#!/usr/bin/env python import os, re, sys, time, stat, string import os.path import shutil # Copyright 2003, Russell Nelson http://angry-economist.russnelson.com # Modified by Rich Magahiz (C) 2003 http://www.magahiz.com:8080/frabjous # Licensed under the Open Software License. # http://opensource.org/licenses/osl.html # Like the GPL, only it's a contract. # how to use publish: # create model.html. # create model.rss. # create content files. Everything that isn't model.html or summary*.html is content. # Put the line # # into any .html files which should not be processed # run 'publish' in the directory holding model.html # it writes index.html, archive.html and rss.xml # to the root directory specified as ROOTDIR # and links to the subdirectory specified as SUBDIR # assumptions about editing: # that the editor writes a foobar.html file # that this script gets run when the editor is satisified # and wishes to publish that file. # that the editor edits the foobar.html file and runs # this script again to publish the change. # that all old content is preserved and linked-to. # that the permalink always points to the newest version. # assumptions about model.html: # that it has one line, where the content should go. # that later on it has one line, where the date/time stamp # should go. # assumptions about model.rss: # that it takes the form of an RDF 1.0 file # that it has in the rdf:li resource line which is to be filled in with # directory name and file name # that it has an line which is to be filled in with directory # name, file name, title, directory name, file name, description # each of these substitution lines is a proper format line %s for the # fields which are to be substituted # assumptions about all .html files: # files whose names begin in "summary-" are skipped. # that they contain META description lines describing the content # that they have a line with tags # surrounding the title. # that the content begins after the next blank line following that line. # that their content ends in a line with . # that they have a line which begins with 'Last Modified:' and # that the rest of this line is a date/time stamp. # if anything isn't obvious about these assumptions, look at my files # on http://www.magahiz.com:8080/frabjous/blog # Version 1.0 - 20030330- first release. # Version 1.1 - 20030411- moved RSS out into its own file. # Version 1.2 - 20030501- Modified by Rich Magahiz # Version 1.3 - 20040615- rewritefile returns a value, also streamline the loop # Version 1.4 - 20041016- port to Windows # Version 1.5 - 20041025- Use shutil.copyfile instead of linking # Version 1.6 - 20041203- rm.htm is a second copy of index.html # Globals - customize here # Filenames SUMMARY = r'summary-' INDEX = "index.html" ARCHIVE = "archive.html" MODEL = "model.html" RSS = "model.rss" # Path where index and archive will be published to #ROOTDIR = "." ROOTDIR = r"/cygdrive/c/Program Files/Apache Group/Apache2/htdocs/magahiz/frabjous" # Path underneath root where the content lives SUBDIR = "blog" #SUBDIR = None # Number of articles to keep LINECOUNTGLOBAL = 250 # Number of rss items to publish RSSCOUNTGLOBAL = 12 # Minimum number of rewrites it takes to generate the new index/archive MINREWRITES = 0 INITREWRITES = 0 REWFAIL = -1 # Return success REWSUCCESS = 0 # Cutoff to ignore all old timestamps CUTOFF = 1097948675 # Function to process new content files. # we have found content with no timestamp in its name - it must be new. # Insert a link to the content it updates (fnold). Preserve the # edit time of the file. def rewritefile(fn, fnold, mtime, atime): inf = open(fn) outf = open(fn+".new", "w") while 1: inl = inf.readline() if not inl: break # Check whether it is a static file, bail if so if re.search('STATICCONTENT',inl): print "Static content, skipping rewrite of",fn inf.close() outf.close() # inf.remove() return REWFAIL outf.write(inl) #if re.match("Last modified:", inl): if re.match('',inl): outf.write('

\n' % (fnold, time.ctime(os.stat(ROOTDIR + os.stat + SUBDIR + os.stat + fnold)[stat.ST_MTIME]))) inf.close() outf.close() os.rename(fn+".new", fn) print fn,"rewritten." os.utime(fn, (atime, mtime)) return REWSUCCESS # Function to write out the content, index, archive, and rss files. def writecontent(): rewritecount = INITREWRITES RSSCOUNT = RSSCOUNTGLOBAL LINECOUNT = LINECOUNTGLOBAL keydict = {} # Read the RSS file here and fill the lists rsstop, rssbot, rssend rssmodel = open(ROOTDIR + os.sep + SUBDIR + os.sep + RSS) rssline = None rssitem = None rssswitch = 0 rsstop = [] rssbot = [] rssend = [] for rssread in rssmodel.readlines(): if re.search(r' newest[fn]: newest[fn] = mtime else: # It's a new content file times = os.stat(ROOTDIR + os.sep + SUBDIR + os.sep + fn) mtime = times[stat.ST_MTIME] atime = times[stat.ST_ATIME] now[fn] = mtime # remember the oldest date of this file. if not oldest.has_key(fn) or mtime < oldest[fn]: oldest[fn] = mtime # Throw away the initial files list # we sort articles by their initial publication dates, not their current date files = oldest.items() files.sort(lambda a,b:cmp(b[1],a[1])) # we only keep enough articles to fill at least LINECOUNT lines. for fn,mtime in files: #print "File",fn good2write = 0 if newest.has_key(fn): pass # print fn,"mtime",mtime,"now",now[fn], # print "newest",newest[fn], # print else: print fn,"mtime",mtime,"now",now[fn], print rewritecount = rewritecount + 1 # is the newest earlier than cutoff? skip if so if newest.has_key(fn) and newest[fn] < CUTOFF: pass else: # is the newest timestamp not ours? if newest.has_key(fn) and newest[fn] != now[fn]: mtime = now[fn] # Rechristen it with a new timestamped name (fnver) fnver = "%s-%d.html" % (fn[:-5], newest[fn]) good2write = rewritefile(ROOTDIR + os.sep + SUBDIR + os.sep + fn, fnver, mtime, mtime) if(good2write == REWFAIL): print "Skipping",fn," good2write=",good2write continue print "rewriting",fn,"quoting",fnver, mtime rewritecount = rewritecount + 1 # make a link to our timestamped file. if not newest.has_key(fn) or newest[fn] != now[fn]: # This is broken on Windows but works on Unix/Cygwin # os.link(fullpath + fn, fullpath + "%s-%d.html" % (fn[:-5], now[fn])) shutil.copyfile(fullpath + fn, fullpath + "%s-%d.html" % (fn[:-5], now[fn])) # Now we get to open up the content file inf = open(fullpath + fn) # Process the META lines first description = "Frabjous Times" keyword = [] # get the description and keywords allkeys = [] title = None while 1: line = inf.readline() if not line: break descmatch = re.search(r'="[dD]escription" (content|CONTENT)="(.*)"', line) keymatch = re.search(r'="[Kk]ey[Ww]ords" (content|CONTENT)="(.*)"', line) titlematch = re.search(r'(.*)', line) if descmatch: description = descmatch.group(2) continue if keymatch: allkeys = string.split(string.strip(keymatch.group(2)),',') keyword.extend(allkeys) for eachkey in allkeys: keydict[eachkey] = '' # print "adding key ",eachkey continue # get the title out of the file. Title must be on one line inside TITLE{START|END} comment tags if titlematch: title = titlematch.group(1) # print "Title",title break # print good2write if(good2write > REWFAIL): # print "Adding ",title,"to archive" archive.write('

' + "\n") archive.write(description + "\n") archive.write("

\n") if (SUBDIR == '.'): archive.write('

%s - %s

\n' % (fn, title, time.ctime(mtime))) else: archive.write('

%s - %s

\n' % (SUBDIR, fn, title, time.ctime(mtime))) # Write up to RSSCOUNT items to the rss file # print "RSSCOUNT=",RSSCOUNT if RSSCOUNT > 0: #rss.write(rssline % (title, SUBDIR, fn)) # 0.9 format # print "rssline=",rssline # print "SUBDIR, fn, title, SUBDIR, fn, description",SUBDIR, fn, title, SUBDIR, fn, description rsstop.append(rssline % (SUBDIR, fn, title, SUBDIR, fn, description)) # print "",rssitem,SUBDIR,fn rssbot.append(rssitem % (SUBDIR, fn)) RSSCOUNT = RSSCOUNT - 1 # Now we need to decide whether to write to the index file if LINECOUNT < 0: # Nah, it's too full skippedstories = skippedstories + 1 # print "Skipping",title else: # print "Adding",title # print everything from the title through the TITLESTART line. if (SUBDIR == '.'): indexf.write('

%s

%s [permalink]\n' % (title, time.ctime(mtime), fn)) else: indexf.write('

%s

%s [permalink]\n' % (title, time.ctime(mtime), SUBDIR, fn)) # print "Writing",title while 1: LINECOUNT = LINECOUNT - 1 line = inf.readline() if not line: break if re.search(r'', line): # print "Wrote content of ",title break indexf.write(line) indexf.write('

Keywords: ' + string.join(keyword,"|") + "

\n") inf.close() indexf.write('

%d more stories in the archive.\n' % skippedstories) # Write the lists out to the rss file for topline in rsstop: rss.write(topline) for botline in rssbot: rss.write(botline) for endline in rssend: rss.write(endline) rss.close() return rewritecount # Main program # we rewrite the index file and the archive file modelled on what # we find in model.html. # If all the files live in one directory, uncomment the following indexf = open(ROOTDIR + os.sep + "index.new", "w") archive = open(ROOTDIR + os.sep + "archive.new", "w") rss = open(ROOTDIR + os.sep + "rss.new", "w") modtime = time.ctime(time.time()) model = open(ROOTDIR + os.sep + SUBDIR + os.sep + "model.html") while 1: inl = model.readline() if not inl: break # where we see the INSERTCONTENT line in model.html, we insert our own content. #if re.match(r'\s*

$', inl): if re.search(r'', inl): #archive.write('

\n') rewritecount = writecontent() archive.write('