Another small utility to check if all HYPERlinks inside a web page are working operationally. User can modify this utility and use according to their requirement.The nice part of this code is it stores search output in a file and display storage path on GUI console at the end of the process. To test the script simply store the code in a *.py file and double click on it. A popup will appear on desktop. Provide URL/web page and click on "show" button.
Searching will start and result will populate on windows command line console provided, python-3.3.2 is installed and placed in path properly.
Searching will start and result will populate on windows command line console provided, python-3.3.2 is installed and placed in path properly.
#!python ''' Created on Aug 19, 2013 @requires: Tested in Windows XP @author: Jaydeb Chakraborty @version: Python Version-3.3.2 ''' from tkinter import Tk, Label, Button, Entry import urllib.request import re import os import logging """ Store all URLs in a file and get HTTP return code """ def show(): strn = entry.get() if re.match('(?:ftp|https)://', strn): mesg="Currently HTTPS|FTP is not supported" t = Label(w, text=mesg) t.pack() else: l=strn.replace('http://', '') mesg='Please check output in ' + (os.environ.get('TEMP', '')) + '\INFO.log' t = Label(w, text=mesg) t.pack() if strn: stdinfo=((os.environ.get('TEMP', ''))+ '\INFO.log') logging.basicConfig(filename='%s' % stdinfo, format='%(asctime)s %(message)s', level=logging.INFO) logging.warning('******* Accessing : %s' % strn + ' ********') logging.warning('**********************************************') local_filename, headers = urllib.request.urlretrieve('http://' + l) f = open(local_filename) for lines in f: myString_list = [item for item in lines.split(" ")] for item in myString_list: try: o = re.search("(?Phttp?://[^\s]+)", item.expandtabs()).group("url") url = re.sub(r'\?.*|".*', "", o) conn = urllib.request.urlopen(url) access = conn.getcode() logging.warning('URL : %s' % url + ' -- Returncode is : %s' % access) print(url, access) except : pass w = Tk() quitBotton = Button(w, text='Quit', command=quit).pack() showBotton = Button(w, text='Show', command=show).pack() Label(w, text=" Please provide URL... ").pack() entry = Entry(w) entry.pack() res = Label(w) res.pack() w.title('Test Links in web page') w.maxsize(1000, 40000) w.mainloop()