Another small utility to check if all HYPERlinks inside a web page are working operationally. User can modify this utility and use according to their requirement.The nice part of this code is it stores search output in a file and display storage path on GUI console at the end of the process. To test the script simply store the code in a *.py file and double click on it. A popup will appear on desktop. Provide URL/web page and click on "show" button.
Searching will start and result will populate on windows command line console provided, python-3.3.2 is installed and placed in path properly.
#!python
'''
Created on Aug 19, 2013
@requires: Tested in Windows XP
@author: Jaydeb Chakraborty
@version: Python Version-3.3.2
'''
from tkinter import Tk, Label, Button, Entry
import urllib.request
import re
import os
import logging
"""
Store all URLs in a file and get HTTP return code
"""
def show():
strn = entry.get()
if re.match('(?:ftp|https)://', strn):
mesg="Currently HTTPS|FTP is not supported"
t = Label(w, text=mesg)
t.pack()
else:
l=strn.replace('http://', '')
mesg='Please check output in ' + (os.environ.get('TEMP', '')) + '\INFO.log'
t = Label(w, text=mesg)
t.pack()
if strn:
stdinfo=((os.environ.get('TEMP', ''))+ '\INFO.log')
logging.basicConfig(filename='%s' % stdinfo, format='%(asctime)s %(message)s', level=logging.INFO)
logging.warning('******* Accessing : %s' % strn + ' ********')
logging.warning('**********************************************')
local_filename, headers = urllib.request.urlretrieve('http://' + l)
f = open(local_filename)
for lines in f:
myString_list = [item for item in lines.split(" ")]
for item in myString_list:
try:
o = re.search("(?Phttp?://[^\s]+)", item.expandtabs()).group("url")
url = re.sub(r'\?.*|".*', "", o)
conn = urllib.request.urlopen(url)
access = conn.getcode()
logging.warning('URL : %s' % url + ' -- Returncode is : %s' % access)
print(url, access)
except :
pass
w = Tk()
quitBotton = Button(w, text='Quit', command=quit).pack()
showBotton = Button(w, text='Show', command=show).pack()
Label(w, text=" Please provide URL... ").pack()
entry = Entry(w)
entry.pack()
res = Label(w)
res.pack()
w.title('Test Links in web page')
w.maxsize(1000, 40000)
w.mainloop()