#!/usr/bin/python import os import sys import elementtree.ElementTree import kimdaba_album # tool for finding old .capt or .xml files, slurping them into the # kimdaba index.xml, and purging the old files. # sample foo.jpg.xml looks like: # <?xml version="1.0" encoding="UTF-8"?> # <image><description> # <field name="title">xmas wreath</field> # </description> # <bins></bins> # <exif></exif> # </image> # # more complete form: # <?xml version="1.0" encoding="UTF-8"?><image><description> # <field name="location">Concord, MA</field> # <field name="people"></field> # <field name="description">Snow still on the ground in late march.</field> # <field name="event"></field> # <field name="title">Snow in the yard</field> # </description> # <bins></bins> # <exif> # <tag name="MeteringMode"> # </tag> # <tag name="JPEG_Type"> # Baseline # </tag> # </exif> # </image> # <exif> is entirely stuff in the .jpg # <bins> is stuff pushed back in later # so ignore both... but where is requested rotation stored??? # # # foo.jpg.capt looks like: # title: xmas wreath # # capts (mixed-case tag names): # Description # Title # Event # Location # Mail (only one value, and not *about* the picture) # People (comma separated, ? means unk. or partly unk) # # (generate mappings in advance...) def get_fields_from_xml(pathpart): print "GET:", pathpart xstuff = elementtree.ElementTree.ElementTree(file=pathpart + ".xml") # <field name="description">Snow still on the ground in late march.</field> fields = {} for field in xstuff.findall("description/field"): fields[field.get("name")] = field.text return fields known_fields = set(["location", "people", "description", "event", "title", "mail"]) # map title-cased capt values to corresponding Persons options on the # kimdaba side peoplemap = { } # get this from options/option off the top level... # however, to get a clean run you can prime it with values here known_people = set() def load_known_people(options): # (album.findall("options/option")) for option in options: if option.get("name") == "Persons": for value in option.findall("value"): known_people.add(value.get("value")) print known_people def add_img_keyword(img, kind, value): assert kind in set(["Keywords", "Persons", "Locations"]) for optionset in img.findall("options/option"): if optionset.get("name") == kind: # avoid duplicates? optionset.append(optionset.makeelement("value", dict(value=value))) return True else: if not img.findall("options"): img.append(img.makeelement("options", dict())) options = img.find("options") options.append(options.makeelement("option", dict(name=kind))) optionset = img.find("options/option") optionset.append(optionset.makeelement("value", dict(value=value))) return True def add_img_person(img, person): return add_img_keyword(img, "Persons", person) class converter: @classmethod def convert(cls, img, fieldname, fieldvalue): methname = "convert_%s" % fieldname if not hasattr(cls, methname): raise KeyError("No converter for %s" % fieldname) return getattr(cls, methname)(img, fieldvalue) @classmethod def convert_location(cls, img, fieldvalue): print "WHERE:", fieldvalue @classmethod def convert_people(cls, img, fieldvalue): if fieldvalue == "sassafras foxy": fieldvalue = "sassafras, foxy" for person in fieldvalue.split(","): person = person.strip().title() if person in peoplemap: add_img_person(img, peoplemap[person]) elif person in known_people: add_img_person(img, person) else: print "WHO:", person return True # only if not printed? @classmethod def convert_description(cls, img, fieldvalue): img.set("description", fieldvalue) return True @classmethod def convert_event(cls, img, fieldvalue): print "EVENT:", fieldvalue # sometimes this appends to description... @classmethod def convert_title(cls, img, fieldvalue): img.set("label", fieldvalue) return True @classmethod def convert_mail(cls, img, fieldvalue): # the mail tag was a hint that one of the tools should # *send it as email* to that person, not that it was anything # about the picture, so just discard it explicitly. return def convert_fields(img, fields): did_something = False for field in fields: did_something = converter.convert(img, field, fields[field]) or did_something return did_something def process_bins(workdir, fake=False): """Process any bins/capt remnants into the xml file.""" # grab the filenames, then walk the xml has_xml = set([os.path.join(workdir, f.replace(".xml", "", 1)) for f in os.listdir(workdir) if f.endswith(".jpg.xml")]) albumfile = kimdaba_album.kimdaba_default_album() album = kimdaba_album.parse(albumfile) load_known_people(album.findall("options/option")) did_something = False nuke_remnants = [] for img in album.findall("images/image"): pathpart = img.get("file") if pathpart in has_xml: fields = get_fields_from_xml(pathpart) # make sure we don't have anything strange... unkfields = set(fields) - known_fields if unkfields: print "Unknown fields", unkfields, "in", pathpart raise Exception("Bad Field %s" % unkfields) # mapper class? if convert_fields(img, fields): did_something = True nuke_remnants.append(pathpart) if did_something: if not fake: kimdaba_album.safe_replace(albumfile, album) for remnant in nuke_remnants: os.remove(remnant + ".capt") os.remove(remnant + ".xml") print "NOW re-run kimdaba, to get the new keywords in the menu" else: print "diff -u", albumfile, "/tmp/c2k.xml" album.write("/tmp/c2k.xml") for remnant in nuke_remnants: print "rm", remnant, ".capt/.xml" if __name__ == "__main__": no_act = False if "--no-act" in sys.argv: no_act = True sys.argv.remove("--no-act") prog, workpath = sys.argv # workpath should be the yeardir; we should be in # the dir with index.xml when we run this process_bins(workpath, fake=no_act)