#!/usr/bin/python
# This is a basic first-cut code to html converter.
# Unlike any of the other modules I've looked at, it makes a rudimentary attempt
# to cross-reference variables (eventually, it will need to be two-pass.)
# Note that it doesn't use any python-internal tools to do the parsing, just
# code and regexps; thus there are some holes. However, this gets me off the
# ground far enough to start putting blobs of python code up on the web site.
#
# Future directions include more literate-programming oriented features, like
# taking block comments [like this] and formatting them more smartly, or perhaps
# even extracting the __doc__ value. Perhaps even having a sidebar with notes
# that are somehow tagged as less-inline than normal comments.
#
# For now, though, this is enough to put in Makefiles and check in for the web site.
#
# Copyright 2003 Mark W. Eichin <eichin@thok.org> The Herd Of Kittens
#
import re
import sys
verbs = ["assert", "break", "class", "continue", "def", "del", "elif",
"else", "except", "exec", "finally", "for", "from", "global", "if", "import",
"pass", "print", "raise", "return", "try", "while", "yield",
]
# -- single token, outdent --
# break
# continue
# pass
# return
# -- single token with colon, indent --
# else:
# except:
# finally:
# try:
# -- token EXPR --
# assert EXPR
# del EXPR
# exec EXPR
# -- token EXPR, outdent --
# raise EXPR
# return EXPR
# yield EXPR
# -- token EXPR colon, indent --
# if EXPR:
# elif EXPR:
# while EXPR:
# except TYPE:
# class NAME:
# -- something complex, outdent --
# class NAME(BASECLASS)
# def NAME(ARGS):
# except TYPE, VAR:
# for NAME in EXPR:
# -- something else --
# from NAME import COMMANAMES
# global COMMANAMES
# import COMMANAMES
# print >> FD, EXPRS
# print EXPRS
def link_target_wrap(s, name):
return '<a name="%s">%s</a>' % (name, s)
def link_ref_wrap(s, name):
return '<a href="#%s">%s</a>' % (name, s)
known_names = {}
def handle_def(stream, line, state):
m = re.match("(?P<def>def)(?P<ws>\s+)(?P<name>\S*)(?P<args>\([^)]*\):)", line)
if not m:
return emit_rest(stream, line, state)
g = m.groupdict()
stream.write(span_token("verb", g["def"]))
stream.write(g["ws"])
known_names[g["name"]] = 1
stream.write(span_token("name", link_target_wrap(g["name"], g["name"])))
stream.write(g["args"])
def handle_class(stream, line, state):
return emit_rest(stream, line, state)
known_imports = {}
known_aliases = {}
def handle_import(stream, line, state):
m = re.match("(?P<imp>import)(?P<ws>\s+)(?P<names>.*)", line)
if not m:
m = re.match("(?P<imp>from)(?P<ws>\s+)(?P<name>\S*)(?P<rest>\s*import.*)", line)
if not m:
return emit_rest(stream, line, state)
g = m.groupdict()
if g["imp"] == "from":
stream.write(span_token("verb", g["imp"]))
stream.write(g["ws"])
known_imports[g["name"]] = 1
stream.write(span_token("import", link_target_wrap(g["name"], g["name"])))
mm = re.match("(?P<ws1>\s+)(?P<imp>import)(?P<ws2>\s+)(?P<rest>.*)", g["rest"])
gg = mm.groupdict()
stream.write(gg["ws1"])
stream.write(span_token("verb", gg["imp"]))
stream.write(gg["ws2"])
for s, xt in re.findall("([^, ]+)(, ?)?", gg["rest"]):
known_aliases[s] = 1
stream.write(span_token("alias", link_target_wrap(s,s)))
stream.write(xt)
elif g["imp"] == "import":
stream.write(span_token("verb", g["imp"]))
stream.write(g["ws"])
for s, xt in re.findall("([^, ]+)(, ?)?", g["names"]):
known_imports[s] = 1
stream.write(span_token("import", link_target_wrap(s,s)))
stream.write(xt)
known_variables = {}
# there needs to be a way to reset scoping, probably with levels set in state
# or maybe stuffing the whole thing in state
def handle_assignment(stream, line, state):
m = re.match("(?P<lhs>\S+)(?P<eq>\s*=\s*)(?P<rest>.*)", line)
if not m:
return emit_rest(stream, line, state)
g = m.groupdict()
# needs to handle multiple LHS variables...
mm = re.match("(?P<v>\w*)(?P<rest>.*)", g["lhs"])
gg = mm.groupdict()
vname = gg["v"]
if vname in known_variables.keys():
stream.write(span_token("noun", link_ref_wrap(vname, vname)))
else:
known_variables[vname] = 1
stream.write(span_token("noun", link_target_wrap(vname, vname)))
stream.write(gg["rest"])
stream.write(g["eq"])
emit_rest(stream, g["rest"], state)
special_verbs = { "def": handle_def,
"class": handle_class,
"import": handle_import,
"from": handle_import,
}
exprs = ["and", "in", "is", "lambda", "not", "or",
# and some that should be handled better
"for", "return", "len"]
from htmlentitydefs import entitydefs
invdefs = {}
for i in range(0,256):
c = "%c" % i
invdefs[c] = c
for k,v in entitydefs.items():
if len(v) == 1:
invdefs[v] = "&%s;" % k
def make_one_url(s):
if s.startswith("http://"):
return '<a href="%s">%s</a>' % (s, s)
return s
def urlify(s):
parts = re.split("(http://[^\s&]*)", s)
return "".join([make_one_url(p) for p in parts])
def httpquote(s):
return "".join([invdefs[v] for v in s])
# break into tokens: quote, triplequote, tick, other
# backslash-foo yields other.
munge_other = "munge_other"
munge_quote = "munge_quote"
munge_3quote = "munge_3quote"
munge_tick = "munge_tick"
mquotes = [ munge_quote, munge_3quote, munge_tick ]
def check_token(s, mungetag, match):
if s.startswith(match):
return (mungetag, s[:len(match)], s[len(match):])
return None
def string_munge_one_token(s):
# strip the quoted char too (deal with newline case later)
if s.startswith("\\"):
return (munge_other, s[:2], s[2:])
r = check_token(s, munge_tick, "'")
if r: return r
r = check_token(s, munge_3quote, '"""')
if r: return r
r = check_token(s, munge_quote, '"')
if r: return r
r = check_token(s, munge_3quote, httpquote('"""'))
if r: return r
r = check_token(s, munge_quote, httpquote('"'))
if r: return r
if s.startswith("&"):
m = re.match("(\&[^\\'\"\&]*)", s)
if m:
return (munge_other, s[:len(m.group(1))], s[len(m.group(1)):])
m = re.match("([^\\'\"\&]*)", s)
if m:
return (munge_other, s[:len(m.group(1))], s[len(m.group(1)):])
def string_munge(s):
reslist = []
while s:
tag, piece, s = string_munge_one_token(s)
reslist.append([tag,piece])
return reslist
def span_mark(t):
return """<span class="python-quote">%s""" % t
def span_end(t):
return """%s</span>""" % t
def span_token(tag, str):
return """<span class="python-%s">%s</span>""" % (tag, str)
def txt_process(s):
# gets chunks of text that are "other"
s = re.sub("\\bNone\\b", span_token("token", "None"), s)
for i in exprs:
s = re.sub("\\b%s\\b" % i, span_token("token", i), s)
for i in known_names:
s = re.sub("\\b%s\\b" % i, span_token("name", link_ref_wrap(i,i)), s)
for i in known_imports:
s = re.sub("\\b%s\\b" % i, span_token("import", link_ref_wrap(i,i)), s)
for i in known_aliases:
s = re.sub("\\b%s\\b" % i, span_token("alias", link_ref_wrap(i,i)), s)
for i in known_variables:
s = re.sub("\\b%s\\b" % i, span_token("noun", link_ref_wrap(i,i)), s)
return s
def string_span_munge(s, st):
state, ticktype = st
res = []
for t, p in string_munge(s):
if state == "outside":
if t == "munge_other":
res.append(txt_process(p))
else:
state = "inside"
ticktype = t
res.append(span_mark(p))
elif state == "inside":
if t == ticktype:
state = "outside"
res.append(span_end(p))
else:
res.append(p)
else:
raise Exception("bugcheck %s" % state)
return (state, ticktype, "".join(res))
def emit_rest(stream, s, state):
st, ty, res = string_span_munge(httpquote(s), state["string"])
state["string"] = (st, ty)
stream.write(urlify(res))
def format_line(s, stream, state):
m = re.match("^(\s*)(.*)$", s)
indent, line = m.groups()
stream.write("""<span class="python-indent">%s</span>""" % indent)
if line.startswith("#"):
stream.write(span_token("comment", urlify(httpquote(line))))
stream.write("\n")
return
try:
first_word, rest = line.split(" ",1)
if first_word in special_verbs.keys():
special_verbs[first_word](stream, line, state)
elif first_word in verbs:
stream.write(span_token("verb", first_word))
stream.write(" ")
emit_rest(stream, rest, state)
elif line.find("=") > -1:
handle_assignment(stream, line, state)
else:
emit_rest(stream, line, state)
except ValueError:
emit_rest(stream, line, state)
stream.write("\n")
def format_stream(instream, outstream):
print >>outstream, """<pre class="python-src">"""
st = {"string":("outside",None)}
while 1:
line = instream.readline()
if not line:
break
format_line(line, outstream, st)
print >>outstream, """</pre>"""
#H1, H2, H3 { text-align: center; }
#.gutter td { font-family: sans-serif; text-align: right; }
#td.gutter { background: #c0d8c0; }
#BODY { background: #ffffd8; }
#P, TD { color: black; }
CSS = """<style type="text/css" media=screen>
<!--
span.python-verb { color: cyan; }
span.python-comment { color: red; }
span.python-quote { color: orange; }
span.python-token { color: blue; }
span.python-name { color: green; }
span.python-import { color: green; }
span.python-alias { color: green; }
span.python-noun { color: brown; }
pre.python-src { color: grey; background: black; }
h1 { color: yellow; background: grey; }
body { color: yellow; background: grey; }
-->
</style>
"""
if __name__ == "__main__":
progname, input = sys.argv
f = open(input)
print """<html><head><title>%s</title></head>""" % input
print CSS
print """<body>%s<h1>%s</h1>""" % (
'<a href="index.html">[back]</a>',
'<a href="%s">%s</a>' % (input,input))
format_stream(f, sys.stdout)
print """</body></html>"""