#!/usr/bin/python # This is a basic first-cut code to html converter. # Unlike any of the other modules I've looked at, it makes a rudimentary attempt # to cross-reference variables (eventually, it will need to be two-pass.) # Note that it doesn't use any python-internal tools to do the parsing, just # code and regexps; thus there are some holes. However, this gets me off the # ground far enough to start putting blobs of python code up on the web site. # # Future directions include more literate-programming oriented features, like # taking block comments [like this] and formatting them more smartly, or perhaps # even extracting the __doc__ value. Perhaps even having a sidebar with notes # that are somehow tagged as less-inline than normal comments. # # For now, though, this is enough to put in Makefiles and check in for the web site. # # Copyright 2003 Mark W. Eichin <eichin@thok.org> The Herd Of Kittens # import re import sys verbs = ["assert", "break", "class", "continue", "def", "del", "elif", "else", "except", "exec", "finally", "for", "from", "global", "if", "import", "pass", "print", "raise", "return", "try", "while", "yield", ] # -- single token, outdent -- # break # continue # pass # return # -- single token with colon, indent -- # else: # except: # finally: # try: # -- token EXPR -- # assert EXPR # del EXPR # exec EXPR # -- token EXPR, outdent -- # raise EXPR # return EXPR # yield EXPR # -- token EXPR colon, indent -- # if EXPR: # elif EXPR: # while EXPR: # except TYPE: # class NAME: # -- something complex, outdent -- # class NAME(BASECLASS) # def NAME(ARGS): # except TYPE, VAR: # for NAME in EXPR: # -- something else -- # from NAME import COMMANAMES # global COMMANAMES # import COMMANAMES # print >> FD, EXPRS # print EXPRS def link_target_wrap(s, name): return '<a name="%s">%s</a>' % (name, s) def link_ref_wrap(s, name): return '<a href="#%s">%s</a>' % (name, s) known_names = {} def handle_def(stream, line, state): m = re.match("(?P<def>def)(?P<ws>\s+)(?P<name>\S*)(?P<args>\([^)]*\):)", line) if not m: return emit_rest(stream, line, state) g = m.groupdict() stream.write(span_token("verb", g["def"])) stream.write(g["ws"]) known_names[g["name"]] = 1 stream.write(span_token("name", link_target_wrap(g["name"], g["name"]))) stream.write(g["args"]) def handle_class(stream, line, state): return emit_rest(stream, line, state) known_imports = {} known_aliases = {} def handle_import(stream, line, state): m = re.match("(?P<imp>import)(?P<ws>\s+)(?P<names>.*)", line) if not m: m = re.match("(?P<imp>from)(?P<ws>\s+)(?P<name>\S*)(?P<rest>\s*import.*)", line) if not m: return emit_rest(stream, line, state) g = m.groupdict() if g["imp"] == "from": stream.write(span_token("verb", g["imp"])) stream.write(g["ws"]) known_imports[g["name"]] = 1 stream.write(span_token("import", link_target_wrap(g["name"], g["name"]))) mm = re.match("(?P<ws1>\s+)(?P<imp>import)(?P<ws2>\s+)(?P<rest>.*)", g["rest"]) gg = mm.groupdict() stream.write(gg["ws1"]) stream.write(span_token("verb", gg["imp"])) stream.write(gg["ws2"]) for s, xt in re.findall("([^, ]+)(, ?)?", gg["rest"]): known_aliases[s] = 1 stream.write(span_token("alias", link_target_wrap(s,s))) stream.write(xt) elif g["imp"] == "import": stream.write(span_token("verb", g["imp"])) stream.write(g["ws"]) for s, xt in re.findall("([^, ]+)(, ?)?", g["names"]): known_imports[s] = 1 stream.write(span_token("import", link_target_wrap(s,s))) stream.write(xt) known_variables = {} # there needs to be a way to reset scoping, probably with levels set in state # or maybe stuffing the whole thing in state def handle_assignment(stream, line, state): m = re.match("(?P<lhs>\S+)(?P<eq>\s*=\s*)(?P<rest>.*)", line) if not m: return emit_rest(stream, line, state) g = m.groupdict() # needs to handle multiple LHS variables... mm = re.match("(?P<v>\w*)(?P<rest>.*)", g["lhs"]) gg = mm.groupdict() vname = gg["v"] if vname in known_variables.keys(): stream.write(span_token("noun", link_ref_wrap(vname, vname))) else: known_variables[vname] = 1 stream.write(span_token("noun", link_target_wrap(vname, vname))) stream.write(gg["rest"]) stream.write(g["eq"]) emit_rest(stream, g["rest"], state) special_verbs = { "def": handle_def, "class": handle_class, "import": handle_import, "from": handle_import, } exprs = ["and", "in", "is", "lambda", "not", "or", # and some that should be handled better "for", "return", "len"] from htmlentitydefs import entitydefs invdefs = {} for i in range(0,256): c = "%c" % i invdefs[c] = c for k,v in entitydefs.items(): if len(v) == 1: invdefs[v] = "&%s;" % k def make_one_url(s): if s.startswith("http://"): return '<a href="%s">%s</a>' % (s, s) return s def urlify(s): parts = re.split("(http://[^\s&]*)", s) return "".join([make_one_url(p) for p in parts]) def httpquote(s): return "".join([invdefs[v] for v in s]) # break into tokens: quote, triplequote, tick, other # backslash-foo yields other. munge_other = "munge_other" munge_quote = "munge_quote" munge_3quote = "munge_3quote" munge_tick = "munge_tick" mquotes = [ munge_quote, munge_3quote, munge_tick ] def check_token(s, mungetag, match): if s.startswith(match): return (mungetag, s[:len(match)], s[len(match):]) return None def string_munge_one_token(s): # strip the quoted char too (deal with newline case later) if s.startswith("\\"): return (munge_other, s[:2], s[2:]) r = check_token(s, munge_tick, "'") if r: return r r = check_token(s, munge_3quote, '"""') if r: return r r = check_token(s, munge_quote, '"') if r: return r r = check_token(s, munge_3quote, httpquote('"""')) if r: return r r = check_token(s, munge_quote, httpquote('"')) if r: return r if s.startswith("&"): m = re.match("(\&[^\\'\"\&]*)", s) if m: return (munge_other, s[:len(m.group(1))], s[len(m.group(1)):]) m = re.match("([^\\'\"\&]*)", s) if m: return (munge_other, s[:len(m.group(1))], s[len(m.group(1)):]) def string_munge(s): reslist = [] while s: tag, piece, s = string_munge_one_token(s) reslist.append([tag,piece]) return reslist def span_mark(t): return """<span class="python-quote">%s""" % t def span_end(t): return """%s</span>""" % t def span_token(tag, str): return """<span class="python-%s">%s</span>""" % (tag, str) def txt_process(s): # gets chunks of text that are "other" s = re.sub("\\bNone\\b", span_token("token", "None"), s) for i in exprs: s = re.sub("\\b%s\\b" % i, span_token("token", i), s) for i in known_names: s = re.sub("\\b%s\\b" % i, span_token("name", link_ref_wrap(i,i)), s) for i in known_imports: s = re.sub("\\b%s\\b" % i, span_token("import", link_ref_wrap(i,i)), s) for i in known_aliases: s = re.sub("\\b%s\\b" % i, span_token("alias", link_ref_wrap(i,i)), s) for i in known_variables: s = re.sub("\\b%s\\b" % i, span_token("noun", link_ref_wrap(i,i)), s) return s def string_span_munge(s, st): state, ticktype = st res = [] for t, p in string_munge(s): if state == "outside": if t == "munge_other": res.append(txt_process(p)) else: state = "inside" ticktype = t res.append(span_mark(p)) elif state == "inside": if t == ticktype: state = "outside" res.append(span_end(p)) else: res.append(p) else: raise Exception("bugcheck %s" % state) return (state, ticktype, "".join(res)) def emit_rest(stream, s, state): st, ty, res = string_span_munge(httpquote(s), state["string"]) state["string"] = (st, ty) stream.write(urlify(res)) def format_line(s, stream, state): m = re.match("^(\s*)(.*)$", s) indent, line = m.groups() stream.write("""<span class="python-indent">%s</span>""" % indent) if line.startswith("#"): stream.write(span_token("comment", urlify(httpquote(line)))) stream.write("\n") return try: first_word, rest = line.split(" ",1) if first_word in special_verbs.keys(): special_verbs[first_word](stream, line, state) elif first_word in verbs: stream.write(span_token("verb", first_word)) stream.write(" ") emit_rest(stream, rest, state) elif line.find("=") > -1: handle_assignment(stream, line, state) else: emit_rest(stream, line, state) except ValueError: emit_rest(stream, line, state) stream.write("\n") def format_stream(instream, outstream): print >>outstream, """<pre class="python-src">""" st = {"string":("outside",None)} while 1: line = instream.readline() if not line: break format_line(line, outstream, st) print >>outstream, """</pre>""" #H1, H2, H3 { text-align: center; } #.gutter td { font-family: sans-serif; text-align: right; } #td.gutter { background: #c0d8c0; } #BODY { background: #ffffd8; } #P, TD { color: black; } CSS = """<style type="text/css" media=screen> <!-- span.python-verb { color: cyan; } span.python-comment { color: red; } span.python-quote { color: orange; } span.python-token { color: blue; } span.python-name { color: green; } span.python-import { color: green; } span.python-alias { color: green; } span.python-noun { color: brown; } pre.python-src { color: grey; background: black; } h1 { color: yellow; background: grey; } body { color: yellow; background: grey; } --> </style> """ if __name__ == "__main__": progname, input = sys.argv f = open(input) print """<html><head><title>%s</title></head>""" % input print CSS print """<body>%s<h1>%s</h1>""" % ( '<a href="index.html">[back]</a>', '<a href="%s">%s</a>' % (input,input)) format_stream(f, sys.stdout) print """</body></html>"""