#!/usr/bin/env python2.4 # Simple RSS generation script for unofficial RSS syndication of # WTF Comics - http://www.wtfcomics.com # # Bruce A. Locke (blocke@shivan.org) # License: Public Domain (Do whatever you want with it) import re, urllib, string def check_wtf_comics(): #page = file("wtftest.html").read() page = urllib.urlopen("http://www.wtfcomics.com/").read() comic_number = string.split( (re.compile(r"(var.+newest_issues.+=.*\".+\".*;)").split(page, 2)[1]), "\"")[1] # Each news posting has the following structure (minus \n, \r, \t, and space): #
# 01/02/03____
#
content
rawnews = [] for x in re.compile(r'
[\r\n\t]+([0-9/]+)_*
[\r\n\t ]+
[\r\n\t ]*', re.I + re.S).split(page)[1:]: rawnews.append(string.split(x, "
")[0]) #print rawnews # generate RSS 2.0 feed (no, this will not validate, oh well) print """ WTF Comics http://www.wtfcomics.com A humorous comic based in the world of Everquest by J. Waller en WTF #%s is available! http://www.wtfcomics.com <p> WTF Comic #%s is now available. ( <a href="http://pub35.ezboard.com/fwtfcomicsfrm2">Forums</a> )</p> """ % ( comic_number, comic_number ) while len(rawnews): print "" print "News: " + rawnews.pop(0) + "" print "http://www.wtfcomics.com/#news" print "" print "<p>" + rawnews.pop(0).replace('<', '<').replace('>', '>').replace('\r', '') + "<p>" print "" print "" print "" if __name__ == '__main__': check_wtf_comics()