Saturday, August 31, 2013

Search and test all links available in web page

Another small utility to check if all HYPERlinks inside a web page are working operationally. User can modify this utility and use according to their requirement.The nice part of this code is it stores search output in a file and display storage path on GUI console at the end of the process. To test the script simply store the code in a *.py file and double click on it. A popup will appear on desktop. Provide URL/web page and click on "show" button.
Searching will start and result will populate on windows command line console provided, python-3.3.2 is installed and placed in path properly.
#!python
'''
Created on Aug 19, 2013
@requires: Tested in Windows XP
@author: Jaydeb Chakraborty
@version: Python Version-3.3.2

'''
from tkinter import Tk, Label, Button, Entry
import urllib.request
import re
import os
import logging

""" 
Store all URLs in a file and get HTTP return code 
"""
def show():
    strn = entry.get()  
    if re.match('(?:ftp|https)://', strn):
        mesg="Currently HTTPS|FTP is not supported"
        t = Label(w, text=mesg)
        t.pack()
    else:
        l=strn.replace('http://', '')
        mesg='Please check output in ' + (os.environ.get('TEMP', '')) + '\INFO.log'
        t = Label(w, text=mesg)
        t.pack()                
         
    if strn:  
        stdinfo=((os.environ.get('TEMP', ''))+ '\INFO.log')
        logging.basicConfig(filename='%s' % stdinfo, format='%(asctime)s %(message)s', level=logging.INFO)
        logging.warning('*******  Accessing : %s' % strn + ' ********')
        logging.warning('**********************************************')
        local_filename, headers = urllib.request.urlretrieve('http://' + l)
        f = open(local_filename) 
        for lines in f:
            myString_list = [item for item in lines.split(" ")]
            for item in myString_list:      
                try:
                    o = re.search("(?Phttp?://[^\s]+)", item.expandtabs()).group("url")
                    url = re.sub(r'\?.*|".*', "", o)
                    conn = urllib.request.urlopen(url)
                    access = conn.getcode()
                    logging.warning('URL : %s' % url + ' -- Returncode is : %s' % access)
                    print(url, access)
                except :
                    pass 
                
       
w = Tk()
quitBotton = Button(w, text='Quit', command=quit).pack()
showBotton = Button(w, text='Show', command=show).pack()
Label(w, text="        Please provide URL...        ").pack()
entry = Entry(w)
entry.pack()
res = Label(w)
res.pack()
w.title('Test Links in web page')
w.maxsize(1000, 40000)
w.mainloop()

No comments:

Post a Comment