Tabular data structure conversion in Python
This is my OLD blog. I've copied this post over to my NEW blog at:
http://www.saltycrane.com/blog/2007/12/tabular-data-structure-conversion-in/
You should be redirected in 2 seconds.
Here is a Python libary to convert between various tabular data
structures including list of lists, list of dicts, dict of lists, and
dict of dicts.
My
original
attempts at these conversions required that the data be
rectangular (e.g. each column has the same number of
elements). However, further research led me
to
this ASPN Recipe which uses map
to transpose a list
of lists even if it is not
rectangular. With
help from the mailing list, I rewrote the recipe without using
lambda
. (I did this
because
Guido suggested not to use map
with lambda
for the sake of clarity.)
The table below lists all the conversions available between the 8
types of tabular data structures. The function names link to the
function definition below. I
used
list comprehensions wherever possible and
a
functional/
declarative
approach in general. It is likely there is a better way to do many of
these conversions. (After all,
I
just learned how to use zip()
.) In particular, the
functions with the comment "Better way?" use a number of the
other conversion functions in series to achieve the desired
result. All of these could be optimized. Feedback on better methods
is welcome.
TO | |||||||||
lorl list of lists where each inner list is a row |
locl list of lists where each inner list is a column |
lord list of dicts where each dict is a row |
locd list of dicts where each dict is a column |
dorl dict of lists where each list is a row |
docl dict of lists where each list is a column |
dord dict of lists where each inner dict is a row |
docd dict of lists where each inner dict is a column |
||
F R O M |
lorl list of lists where each inner list is a row |
lorl2 locl() |
lorl2 lord() |
lorl2 locd() |
lorl2 dorl() |
lorl2 docl() |
lorl2 dord() |
lorl2 docd() |
|
locl list of lists where each inner list is a column |
locl2 lorl() |
locl2 lord() |
locl2 locd() |
locl2 dorl() |
locl2 docl() |
locl2 dord() |
locl2 docd() |
||
lord list of dicts where each dict is a row |
lord2 lorl() |
lord2 locl() |
lord2 locd() |
lord2 dorl() |
lord2 docl() |
lord2 dord() |
lord2 docd() |
||
locd list of dicts where each dict is a column |
locd2 lorl() |
locd2 locl() |
locd2 lord() |
locd2 dorl() |
locd2 docl() |
locd2 dord() |
locd2 docd() |
||
dorl dict of lists where each list is a row |
dorl2 lorl() |
dorl2 locl() |
dorl2 lord() |
dorl2 locd() |
dorl2 docl() |
dorl2 dord() |
dorl2 docd() |
||
docl dict of lists where each list is a column |
docl2 lorl() |
docl2 locl() |
docl2 lord() |
docl2 locd() |
docl2 dorl() |
docl2 dord() |
docl2 docd() |
||
dord dict of dicts where each inner dict is a row |
dord2 lorl() |
dord2 locl() |
dord2 lord() |
dord2 locd() |
dord2 dorl() |
dord2 docl() |
dord2 docd() |
||
docd dict of dicts where each inner dict is a column |
dord2 lorl() |
dord2 locl() |
dord2 lord() |
dord2 locd() |
dord2 dorl() |
dord2 docl() |
dord2 dord() |
Example data structures
Here are examples of the 8 different tabular
data structures. Note that if a transpose is performed (i.e. rows switched with
columns or vice versa), the output is padded with None
.
Otherwise, it is left as is.
# lorl- list of lists where each inner list is a row lorl = [ ['a1', 'b1', 'c1'], # row 1 ['a2', 'b2', 'c2'], # row 2 ['a3', 'b3', 'c3'], # row 3 ['a4', 'b4', ], # row 4 ] # locl- list of lists where each inner list is a column locl = [ ['a1', 'a2', 'a3', 'a4'], # col a ['b1', 'b2', 'b3', 'b4'], # col b ['c1', 'c2', 'c3', ], # col c ] # lord- list of dicts where each dict is a row lord = [ {'a':'a1', 'b':'b1', 'c':'c1'}, # row 1 {'a':'a2', 'b':'b2', 'c':'c2'}, # row 2 {'a':'a3', 'b':'b3', 'c':'c3'}, # row 3 {'a':'a4', 'b':'b4', }, # row 4 ] # locd- list of dicts where each dict is a column locd = [ {1:'a1', 2:'a2', 3:'a3', 4:'a4'}, # col a {1:'b1', 2:'b2', 3:'b3', 4:'b4'}, # col b {1:'c1', 2:'c2', 3:'c3', }, # col c ] # dorl- dict of lists where each list is a row dorl = { 1: ['a1', 'b1', 'c1'], # row 1 2: ['a2', 'b2', 'c2'], # row 2 3: ['a3', 'b3', 'c3'], # row 3 4: ['a4', 'b4', ], # row 4 } # docl- dict of lists where each list is a column docl = { 'a': ['a1', 'a2', 'a3', 'a4'], # column a 'b': ['b1', 'b2', 'b3', 'b4'], # column b 'c': ['c1', 'c2', 'c3', ], # column c } # dord- dict of dicts where each inner dict is a row dord = { 1: {'a':'a1', 'b':'b1', 'c':'c1'}, # row 1 2: {'a':'a2', 'b':'b2', 'c':'c2'}, # row 2 3: {'a':'a3', 'b':'b3', 'c':'c3'}, # row 3 4: {'a':'a4', 'b':'b4', }, # row 4 } # docd- dict of dicts where each inner dict is a column docd = { 'a': {1:'a1', 2:'a2', 3:'a3', 4:'a4'}, # column a 'b': {1:'b1', 2:'b2', 3:'b3', 4:'b4'}, # column b 'c': {1:'c1', 2:'c2', 3:'c3', }, # column c } # list of row keys and column keys rowkeys = [1, 2, 3, 4] colkeys = ['a', 'b', 'c']
Code
Below is the libary of functions.
#!/usr/bin/python
"""tabular.py
Functions to convert tabular data structures
The following data structures are supported:
lorl- list of lists where each inner list is a row
locl- list of lists where each inner list is a column
lord- list of dicts where each dict is a row
locd- list of dicts where each dict is a column
dorl- dict of lists where each list is a row
docl- dict of lists where each list is a column
dord- dict of dicts where each inner dict is a row
docd- dict of dicts where each inner dict is a column
"""
#-------------------------------------------------------
# from lorl to ...
#-------------------------------------------------------
def lorl2locl(lorl):
return [list(col) for col in map(None, *lorl)]
def lorl2lord(lorl, colkeys):
return [dict(zip(colkeys, row)) for row in lorl]
def lorl2locd(lorl, rowkeys):
# better way?
return locl2locd(lorl2locl(lorl), rowkeys)
def lorl2dorl(lorl, rowkeys):
return dict(zip(rowkeys, [row for row in lorl]))
def lorl2docl(lorl, colkeys):
# better way?
return locl2docl(lorl2locl(lorl), colkeys)
def lorl2dord(lorl, rowkeys, colkeys):
return dict(zip(rowkeys, [dict(zip(colkeys, row))
for row in lorl]))
def lorl2docd(lorl, rowkeys, colkeys):
# better way?
return dict(zip(colkeys, [dict(zip(rowkeys, col))
for col in lorl2locl(lorl)]))
#-------------------------------------------------------
# from locl to ...
#-------------------------------------------------------
def locl2lorl(locl):
return [list(row) for row in map(None, *locl)]
def locl2lord(locl, colkeys):
# better way?
return lorl2lord(locl2lorl(locl), colkeys)
def locl2locd(locl, rowkeys):
return [dict(zip(rowkeys, col)) for col in locl]
def locl2dorl(locl, rowkeys):
# better way?
return dict(zip(rowkeys, [row for row in locl2lorl(locl)]))
def locl2docl(locl, colkeys):
return dict(zip(colkeys, locl))
def locl2dord(locl, rowkeys, colkeys):
# better way?
return dict(zip(rowkeys, [dict(zip(colkeys, row))
for row in locl2lorl(locl)]))
def locl2docd(locl, rowkeys, colkeys):
return dict(zip(colkeys, [dict(zip(rowkeys, col))
for col in locl]))
#-------------------------------------------------------
# from lord to ...
#-------------------------------------------------------
def lord2lorl(lord, colkeys):
return [[row[key] for key in colkeys if key in row]
for row in lord]
def lord2locl(lord, colkeys):
# better way?
return lorl2locl(lord2lorl(lord, colkeys))
def lord2locd(lord, rowkeys, colkeys):
return [dict([(rkey, row[ckey])
for rkey, row in zip(rowkeys, lord) if ckey in row])
for ckey in colkeys]
def lord2dorl(lord, rowkeys, colkeys):
return dict(zip(rowkeys, [[row[ckey]
for ckey in colkeys if ckey in row]
for row in lord]))
def lord2docl(lord, colkeys):
return dict(zip(colkeys, [[row[ckey]
for row in lord if ckey in row]
for ckey in colkeys]))
def lord2dord(lord, rowkeys):
return dict(zip(rowkeys, lord))
def lord2docd(lord, rowkeys, colkeys):
return dict(zip(colkeys,
[dict(zip(rowkeys,
[row[ckey]
for row in lord if ckey in row]))
for ckey in colkeys]))
#-------------------------------------------------------
# from locd to ...
#-------------------------------------------------------
def locd2lorl(locd, rowkeys):
# better way?
return locl2lorl(locd2locl(locd, rowkeys))
def locd2locl(locd, rowkeys):
return [[col[key] for key in rowkeys if key in col]
for col in locd]
def locd2lord(locd, rowkeys, colkeys):
return [dict([(ckey, col[rkey])
for ckey, col in zip(colkeys, locd) if rkey in col])
for rkey in rowkeys]
def locd2dorl(locd, rowkeys):
return dict(zip(rowkeys, [[col[rkey]
for col in locd if rkey in col]
for rkey in rowkeys]))
def locd2docl(locd, rowkeys, colkeys):
return dict(zip(colkeys, [[col[rkey]
for rkey in rowkeys if rkey in col]
for col in locd]))
def locd2dord(locd, rowkeys, colkeys):
return dict(zip(rowkeys,
[dict(zip(colkeys,
[col[rkey]
for col in locd if rkey in col]))
for rkey in rowkeys]))
def locd2docd(locd, colkeys):
return dict(zip(colkeys, locd))
#-------------------------------------------------------
# from dorl to ...
#-------------------------------------------------------
def dorl2lorl(dorl, rowkeys):
return [dorl[key] for key in rowkeys]
def dorl2locl(dorl, rowkeys):
# better way?
return lorl2locl(dorl2lorl(dorl, rowkeys))
def dorl2lord(dorl, rowkeys, colkeys):
return [dict(zip(colkeys, dorl[rkey]))
for rkey in rowkeys]
def dorl2locd(dorl, rowkeys):
# better way?
return locl2locd(lorl2locl(dorl2lorl(dorl, rowkeys)), rowkeys)
def dorl2docl(dorl, rowkeys, colkeys):
# better way?
return locl2docl(lorl2locl(dorl2lorl(dorl, rowkeys)), colkeys)
def dorl2dord(dorl, rowkeys, colkeys):
# better way?
return lorl2dord(dorl2lorl(dorl, rowkeys), rowkeys, colkeys)
def dorl2docd(dorl, rowkeys, colkeys):
# better way?
return locl2docd(lorl2locl(dorl2lorl(dorl, rowkeys)),
rowkeys, colkeys)
#-------------------------------------------------------
# from docl to ...
#-------------------------------------------------------
def docl2lorl(docl, colkeys):
# better way?
return locl2lorl(docl2locl(docl, colkeys))
def docl2locl(docl, colkeys):
return [docl[key] for key in colkeys]
def docl2lord(docl, rowkeys, colkeys):
# better way?
return lorl2lord(locl2lorl(docl2locl(docl, colkeys)), colkeys)
def docl2locd(docl, rowkeys, colkeys):
#
return [dict(zip(rowkeys, docl[ckey]))
for ckey in colkeys]
def docl2dorl(docl, rowkeys, colkeys):
# better way?
return lorl2dorl(locl2lorl(docl2locl(docl, colkeys)), rowkeys)
def docl2dord(docl, rowkeys, colkeys):
# better way?
return lorl2dord(locl2lorl(docl2locl(docl, colkeys)),
rowkeys, colkeys)
def docl2docd(docl, rowkeys, colkeys):
# better way?
return locl2docd(docl2locl(docl, colkeys), rowkeys, colkeys)
#-------------------------------------------------------
# from dord to ...
#-------------------------------------------------------
def dord2lorl(dord, rowkeys, colkeys):
return [[dord[rkey][ckey]
for ckey in colkeys if ckey in dord[rkey]]
for rkey in rowkeys if rkey in dord]
def dord2locl(dord, rowkeys, colkeys):
# better way?
return lorl2locl(dord2lorl(dord, rowkeys, colkeys))
def dord2lord(dord, rowkeys):
return [dord[rkey] for rkey in rowkeys]
def dord2locd(dord, rowkeys, colkeys):
# better way?
return lord2locd(dord2lord(dord, rowkeys), rowkeys, colkeys)
def dord2dorl(dord, rowkeys, colkeys):
# don't need zip
return dict([(rkey, [dord[rkey][ckey]
for ckey in colkeys if ckey in dord[rkey]])
for rkey in rowkeys])
def dord2docl(dord, rowkeys, colkeys):
# better way?
return locl2docl(lorl2locl(dord2lorl(dord, rowkeys, colkeys)),
colkeys)
def dord2docd(dord, rowkeys, colkeys):
# better way?
return locl2docd(lorl2locl(dord2lorl(dord, rowkeys, colkeys)),
rowkeys, colkeys)
#-------------------------------------------------------
# from docd to ...
#-------------------------------------------------------
def docd2lorl(docd, rowkeys, colkeys):
# better way?
return locl2lorl(docd2locl(docd, rowkeys, colkeys))
def docd2locl(docd, rowkeys, colkeys):
return [[docd[ckey][rkey]
for rkey in rowkeys if rkey in docd[ckey]]
for ckey in colkeys if ckey in docd]
def docd2lord(docd, rowkeys, colkeys):
# better way?
return locd2lord(docd2locd(docd, colkeys), rowkeys, colkeys)
def docd2locd(docd, colkeys):
return [docd[ckey] for ckey in colkeys]
def docd2dorl(docd, rowkeys, colkeys):
# better way?
return lorl2dorl(locl2lorl(docd2locl(docd, rowkeys, colkeys)),
rowkeys)
def docd2docl(docd, rowkeys, colkeys):
# don't need zip
return dict([(ckey, [docd[ckey][rkey]
for rkey in rowkeys if rkey in docd[ckey]])
for ckey in colkeys])
def docd2dord(docd, rowkeys, colkeys):
# better way?
return lorl2dord(locl2lorl(docd2locl(docd, rowkeys, colkeys)),
rowkeys, colkeys)
No comments:
Post a Comment