#!/usr/bin/env python

# $Id: csvtoxml.py.html,v 1.2 2006/09/19 09:41:53 john Exp $

USAGE = """\
Usage: %s [-d DELIMITER] [-i INPUT-ENCODING] [-o OUTPUT-ENCODING]
\t[-t TABLE-ELEMENT] [-r ROW-ELEMENT] [-k ID-COLUMN] INPUT-CSV OUTPUT-XML
Convert a CSV input file (with a header row) into an XML file.
"""

import csv, sys, string

DefaultDelimiter = ','          # -d
DefaultTableElement = 'TABLE'   # -t
DefaultRowElement = 'ROW'       # -r
DefaultInputEnc = 'utf-8'       # -i
DefaultOutputEnc = 'utf-8'      # -o
DefaultIdColumn = 'Id'          # -k
xmlsig = "<?xml version='1.0' encoding='%s'?>\n"

defaults = {
    'DELIMITER':        DefaultDelimiter,
    'INPUT-ENCODING':   DefaultInputEnc,
    'OUTPUT-ENCODING':  DefaultOutputEnc,
    'TABLE-ELEMENT':    DefaultTableElement,
    'ROW-ELEMENT':      DefaultRowElement,
    'ID-COLUMN':        DefaultIdColumn,
}

def newchar(c):
    if c in string.letters or c in string.digits:
        return c
    return ' '

newchars = ''.join([newchar(chr(x)) for x in xrange(256)])

def escape_data(data, inputenc, outputenc):
    data = unicode(data, inputenc).encode(outputenc)
    return data.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')

def create_header_name(name):
    name = string.translate(name, newchars)
    name = string.capwords(name)
    return ''.join(name.split())

def csvtoxml(input, output, delimiter=DefaultDelimiter, inputenc=DefaultInputEnc, outputenc=DefaultOutputEnc, tableelement=DefaultTableElement, rowelement=DefaultRowElement, idcolumn=DefaultIdColumn):
    input = csv.reader(file(input), 'excel', delimiter=delimiter)
    output = file(output, 'w')
    output.write(xmlsig % outputenc)
    output.write('<%s>\n' % tableelement)
    header = map(create_header_name, input.next())
    try:
        idcolumn = map(string.lower, header).index(idcolumn.lower())
    except:
        idcolumn = -1
    for row in input:
        row = map(lambda data: escape_data(data, inputenc, outputenc), row)
        if idcolumn != -1:
            output.write('    <%s id="%s">\n' % (rowelement, row[idcolumn]))
        else:
            output.write('    <%s>\n' % rowelement)
        for key, col in zip(header, row):
            output.write('        <%s>%s</%s>\n' % (key, col, key))
        output.write('    </%s>\n' % rowelement)
    output.write('</%s>\n' % tableelement)
    del(output)
    del(input)

def usage(status):
    sys.stderr.write(USAGE % sys.argv[0])
    print >> sys.stderr, "Defaults:"
    for key, val in defaults.iteritems():
        print >> sys.stderr, "%-20s\"%s\"" % (key, val)
    sys.exit(status)

def main():
    import getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'd:i:o:t:r:k:h?')
    except getopt.error, e:
        print >> sys.stderr, e
        sys.exit(1)
    if len(args) != 2:
        usage(1)
    input, output = args
    delimiter = DefaultDelimiter
    inputenc = DefaultInputEnc
    outputenc = DefaultOutputEnc
    tableelement = DefaultTableElement
    rowelement = DefaultRowElement
    idcolumn = DefaultIdColumn
    for o, a in opts:
        if o == '-d':
            delimiter = a
        elif o == '-i':
            inputenc = a
        elif o == '-o':
            outputenc = a
        elif o == '-t':
            tableelement = a
        elif o == '-r':
            rowelement = a
        elif o == '-k':
            idcolumn = a
        else:
            usage(1)
    csvtoxml(input, output, delimiter, inputenc, outputenc, tableelement, rowelement, idcolumn=idcolumn)

if __name__ == '__main__':
    main()