#!/usr/bin/env python
# $Id: csvtoxml.py.html,v 1.2 2006/09/19 09:41:53 john Exp $
USAGE = """\
Usage: %s [-d DELIMITER] [-i INPUT-ENCODING] [-o OUTPUT-ENCODING]
\t[-t TABLE-ELEMENT] [-r ROW-ELEMENT] [-k ID-COLUMN] INPUT-CSV OUTPUT-XML
Convert a CSV input file (with a header row) into an XML file.
"""
import csv, sys, string
DefaultDelimiter = ',' # -d
DefaultTableElement = 'TABLE' # -t
DefaultRowElement = 'ROW' # -r
DefaultInputEnc = 'utf-8' # -i
DefaultOutputEnc = 'utf-8' # -o
DefaultIdColumn = 'Id' # -k
xmlsig = "<?xml version='1.0' encoding='%s'?>\n"
defaults = {
'DELIMITER': DefaultDelimiter,
'INPUT-ENCODING': DefaultInputEnc,
'OUTPUT-ENCODING': DefaultOutputEnc,
'TABLE-ELEMENT': DefaultTableElement,
'ROW-ELEMENT': DefaultRowElement,
'ID-COLUMN': DefaultIdColumn,
}
def newchar(c):
if c in string.letters or c in string.digits:
return c
return ' '
newchars = ''.join([newchar(chr(x)) for x in xrange(256)])
def escape_data(data, inputenc, outputenc):
data = unicode(data, inputenc).encode(outputenc)
return data.replace('&', '&').replace('<', '<').replace('>', '>')
def create_header_name(name):
name = string.translate(name, newchars)
name = string.capwords(name)
return ''.join(name.split())
def csvtoxml(input, output, delimiter=DefaultDelimiter, inputenc=DefaultInputEnc, outputenc=DefaultOutputEnc, tableelement=DefaultTableElement, rowelement=DefaultRowElement, idcolumn=DefaultIdColumn):
input = csv.reader(file(input), 'excel', delimiter=delimiter)
output = file(output, 'w')
output.write(xmlsig % outputenc)
output.write('<%s>\n' % tableelement)
header = map(create_header_name, input.next())
try:
idcolumn = map(string.lower, header).index(idcolumn.lower())
except:
idcolumn = -1
for row in input:
row = map(lambda data: escape_data(data, inputenc, outputenc), row)
if idcolumn != -1:
output.write(' <%s id="%s">\n' % (rowelement, row[idcolumn]))
else:
output.write(' <%s>\n' % rowelement)
for key, col in zip(header, row):
output.write(' <%s>%s</%s>\n' % (key, col, key))
output.write(' </%s>\n' % rowelement)
output.write('</%s>\n' % tableelement)
del(output)
del(input)
def usage(status):
sys.stderr.write(USAGE % sys.argv[0])
print >> sys.stderr, "Defaults:"
for key, val in defaults.iteritems():
print >> sys.stderr, "%-20s\"%s\"" % (key, val)
sys.exit(status)
def main():
import getopt
try:
opts, args = getopt.getopt(sys.argv[1:], 'd:i:o:t:r:k:h?')
except getopt.error, e:
print >> sys.stderr, e
sys.exit(1)
if len(args) != 2:
usage(1)
input, output = args
delimiter = DefaultDelimiter
inputenc = DefaultInputEnc
outputenc = DefaultOutputEnc
tableelement = DefaultTableElement
rowelement = DefaultRowElement
idcolumn = DefaultIdColumn
for o, a in opts:
if o == '-d':
delimiter = a
elif o == '-i':
inputenc = a
elif o == '-o':
outputenc = a
elif o == '-t':
tableelement = a
elif o == '-r':
rowelement = a
elif o == '-k':
idcolumn = a
else:
usage(1)
csvtoxml(input, output, delimiter, inputenc, outputenc, tableelement, rowelement, idcolumn=idcolumn)
if __name__ == '__main__':
main()