Writing a list of dictionaries in CSV - python

The next problem you have a list of dictionaries of the format
[{'a': 10, 'b': 11, 'c': 12, 'd': 13, 'e': 14},
{'a': 20, 'b': 21, 'c': 22, 'd': 23, 'e': 24},
{'a': 30, 'b': 31, 'c': 32, 'd': 33, 'e': 34},
{'a': 40, 'b': 41, 'c': 42, 'd': 43, 'e': 44}]
which you want to move to CSV-file, looking like
"a","b","c","d","e"
10,11,12,13,14
20,21,22,23,24
30,31,32,33,34
40,41,42,43,44
Problem is that when you start code:
def write_csv_from_list_dict(filename, table, fieldnames, separator, quote):
table = []
for dit in table:
a_row = []
for fieldname in fieldnames:
a_row.append(dit[fieldname])
table.append(a_row)
file_handle = open(filename, 'wt', newline='')
csv_write = csv.writer(file_handle,
delimiter=separator,
quotechar=quote,
quoting=csv.QUOTE_NONNUMERIC)
csv_write.writerow(fieldnames)
for row in table:
csv_write.writerow(row)
file_handler.close()
raising error
(Exception: AttributeError) "'list' object has no attribute 'keys'"
at line 148, in _dict_to_list wrong_fields = rowdict.keys() - self.fieldnames
Why to be so hard to say, explicitly to close a file, not a string.

The below code should work
data = [{'a': 10, 'b': 11, 'c': 12, 'd': 13, 'e': 14},
{'a': 20, 'b': 21, 'c': 22, 'd': 23, 'e': 24},
{'a': 30, 'b': 31, 'c': 32, 'd': 33, 'e': 34},
{'a': 40, 'b': 41, 'c': 42, 'd': 43, 'e': 44}]
keys = data[0].keys()
with open('data.csv', 'w') as f:
f.write(','.join(keys) + '\n')
for entry in data:
f.write(','.join([str(v) for v in entry.values()]) + '\n')
data.csv
a,b,c,d,e
10,11,12,13,14
20,21,22,23,24
30,31,32,33,34
40,41,42,43,44

Related

Converting a list of dictionaries to a list by using certain keys

I have list of dictionaries which the dictionary size (number of keys) is not constant. Here is an example:
data = [{'t': 1633098324445950024,
'y': 1633098324445929497,
'q': 1636226,
'i': '57337',
'x': 12,
's': 15,
'c': [14, 37, 41],
'p': 139.55,
'z': 3},
{'t': 1633098324445958000,
'y': 1633098324445929497,
'q': 1636229,
'i': '57340',
'x': 12,
's': 100,
'c': [14, 41],
'p': 139.55,
'z': 3},
{'t': 1633098324445958498,
'y': 1633098324445594112,
'q': 1636230,
'i': '31895',
'x': 11,
's': 60,
'c': [14, 37, 41],
'p': 139.55,
'z': 3},
{'t': 1633098324446013523,
'y': 1633098324445649152,
'q': 1636231,
'i': '31896',
'x': 11,
's': 52,
'c': [14, 37, 41],
'p': 139.55,
'z': 3},
{'t': 1633098324472392943,
'y': 1633098324472133407,
'q': 1636256,
'i': '3417',
'x': 15,
's': 100,
'p': 139.555,
'z': 3},
{'t': 1633098324478972256,
'y': 1633098324478000000,
'f': 1633098324478949693,
'q': 1636260,
'i': '58051',
'x': 4,
'r': 12,
's': 100,
'p': 139.555,
'z': 3}]
As it is in the sample each dictionary has different length and they do not necessarily have the same keys. I need to extract certain elements based on the key. I am using [(d['t'],d['p'],d['s']) for d in data] and the results looks like this:
[(1633098324445950024, 139.55, 15),
(1633098324445958000, 139.55, 100),
(1633098324445958498, 139.55, 60),
(1633098324446013523, 139.55, 52),
(1633098324472392943, 139.555, 100),
(1633098324478972256, 139.555, 100)]
But I need to have values with 'c' key and when I run the following I got KeyError:
[(d['t'],d['p'],d['s'],d['c']) for d in data]
Traceback (most recent call last):
File "<ipython-input-108-8533763f150e>", line 1, in <module>
[(d['t'],d['p'],d['s'],d['c']) for d in data]
File "<ipython-input-108-8533763f150e>", line 1, in <listcomp>
[(d['t'],d['p'],d['s'],d['c']) for d in data]
KeyError: 'c'
One approach:
res = [(d['t'], d['p'], d['s']) + ((d['c'],) if 'c' in d else tuple()) for d in data]
pprint.pprint(res)
Output
[(1633098324445950024, 139.55, 15, [14, 37, 41]),
(1633098324445958000, 139.55, 100, [14, 41]),
(1633098324445958498, 139.55, 60, [14, 37, 41]),
(1633098324446013523, 139.55, 52, [14, 37, 41]),
(1633098324472392943, 139.555, 100),
(1633098324478972256, 139.555, 100)]
Why don't you try this:
value_list = [(d.get('t', ''), d.get('p', ''), d.get('s', ''), d.get('c', [])) for d in data]
print(value_list)
Output:
[(1633098324445950024, 139.55, 15, [14, 37, 41]),
(1633098324445958000, 139.55, 100, [14, 41]),
(1633098324445958498, 139.55, 60, [14, 37, 41]),
(1633098324446013523, 139.55, 52, [14, 37, 41]),
(1633098324472392943, 139.555, 100, []),
(1633098324478972256, 139.555, 100, [])]
def convert_to_list(list_of_dicts, keys):
"""
Convert a list of dictionaries to a list by using certain keys.
:param list_of_dicts: list of dictionaries
:param keys: list of keys
:return: list
"""
return [{key: dic[key] for key in keys} for dic in list_of_dicts]
if __name__ == '__main__':
list_of_dicts = [{'name': 'John', 'age': 21}, {'name': 'Mark', 'age': 25}]
keys = ['name', 'age']
print(convert_to_list(list_of_dicts, keys))

Python3 Merge two dictionaries based on sub dictionaries keys w/o for loop

Maybe this is impossible but without a for loop through each key in a given dictionary merge the two based on the key in the dictionary
Given :
dict1 = { 'APPL' : { 'cp': 1, 'sed': 'bull'}, 'BAC' : { 'cp': 1, 'sed': 'bull'}}
dict2 = { 'APPL' : { 'tp': 100}}
dict3 = dict1 | dict2 ## python ≥3.9 only
print(dict3)
{'APPL': {'tp': 100}, 'BAC': {'cp': 1, 'sed': 'bull'}}
dict1.update(dict2)
print(dict1)
{'APPL': {'tp': 100}, 'BAC': {'cp': 1, 'sed': 'bull'}}
Desired output
{'APPL': {'tp': 100,'cp': 1, 'sed': 'bull'}, 'BAC': {'cp': 1, 'sed': 'bull'}}
I can do it now with a for loop , just wondering if there is a more elegant solution
Do:
dict1 = {'APPL': {'cp': 1, 'sed': 'bull'}, 'BAC': {'cp': 1, 'sed': 'bull'}}
dict2 = {'APPL': {'tp': 100}}
res = {key: {**value, **dict2.get(key, {})} for key, value in dict1.items()}
print(res)
Output
{'APPL': {'cp': 1, 'sed': 'bull', 'tp': 100}, 'BAC': {'cp': 1, 'sed': 'bull'}}
No, this isn't possible without iteration. You could use a custom joiner function and then reduce a list of dictionaries however:
data = [{'BAC': {'a': 40}, 'XYZ': {'c': 81, 'b': 16}, 'ABC': {'b': 85}},
{'APPL': {'b': 55},
'BAC': {'b': 16, 'f': 59},
'ABC': {'d': 9, 'c': 43},
'XYZ': {'b': 82}},
{'ABC': {'b': 43, 'c': 35},
'APPL': {'f': 17, 'a': 1, 'd': 16},
'BAC': {'f': 35, 'a': 1},
'XYZ': {'a': 2, 'c': 55}},
{'BAC': {'f': 4, 'd': 87},
'XYZ': {'d': 31, 'f': 92},
'APPL': {'b': 18, 'a': 74, 'c': 69}},
{'XYZ': {'d': 84, 'f': 49},
'ABC': {'d': 88, 'a': 82, 'f': 96},
'APPL': {'a': 23},
'BAC': {'b': 40}},
{'BAC': {'c': 88, 'd': 38},
'APPL': {'c': 48, 'b': 30},
'ABC': {'d': 95, 'b': 38},
'XYZ': {'d': 90, 'a': 5}}]
def join_dict(d1, d2):
result = {k: {**d1[k], **d2[k]} for k in d1}
result.update({k: {**d1[k], **d2[k]} for k in d2})
return result
>>> import functools
>>> functools.reduce(join_dict, data)
{'XYZ': {'a': 39, 'f': 78, 'c': 42, 'd': 30, 'b': 24},
'ABC': {'c': 22, 'f': 69, 'a': 8, 'b': 51, 'd': 70},
'APPL': {'d': 19, 'b': 35, 'a': 6, 'f': 33, 'c': 64},
'BAC': {'f': 97, 'c': 38, 'd': 1, 'b': 63, 'a': 91}}
Of course, this will overwrite any common values in the sub-dictionaries. Assuming that isn't an issue for you, this should work fine as a "more elegant solution".

Merging two or more dictionaries when they have the same key value pairs [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 1 year ago.
Improve this question
I am trying to merge two or more dictionaries in a list to combine them using same set of key value pairs. If the specified key value pairs exists, then merge the other keys for those dictionaries gets added under 'other_cols'. Below is what my input looks like and what I am expecting as an output.
input_list = [{'a': 1, 'b' : 2, 'c': 3, 'd': 4},
{'a': 1, 'b' : 2, 'c': 5, 'd': 6},
{'a': 9, 'b' : 10, 'c': 11, 'd': 12},
{'a': 9, 'b' : 10, 'c': 13, 'd': 14},
{'a': 9, 'b' : 10, 'c': 15, 'd': 16},
{'a': 17, 'b' : 18, 'c': 19, 'd': 20},
{'a': 1, 'b' : 2, 'c': 7, 'd': 8}]
merge_by_keys = ['a','b']
expected_output_list = [{'a': 1, 'b' : 2, 'other_cols':[{'c': 3, 'd': 4},
{'c': 5, 'd': 6},
{'c': 7, 'd': 8}],
{'a': 9, 'b' : 10, 'other_cols':[{'c': 11, 'd': 12},
{'c': 13, 'd': 14},
{'c': 15, 'd': 16}],
{'a': 17, 'b' : 18, 'other_cols':[{'c': 19, 'd': 20}]}
This looks like what you are looking for:
The most interesting line is:
out[tuple((entry[x],x) for x in merge_by_keys)].append({k: v for k, v in entry.items() if k not in merge_by_keys})
Make sure you understand it. Ask if you have questions.
from collections import defaultdict
data = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
{'a': 1, 'b': 2, 'c': 5, 'd': 6},
{'a': 9, 'b': 10, 'c': 11, 'd': 12},
{'a': 9, 'b': 10, 'c': 13, 'd': 14},
{'a': 9, 'b': 10, 'c': 15, 'd': 16},
{'a': 17, 'b': 18, 'c': 19, 'd': 20},
{'a': 1, 'b': 2, 'c': 7, 'd': 8}]
merge_by_keys = ['a', 'b']
out = defaultdict(list)
for entry in data:
out[tuple((entry[x],x) for x in merge_by_keys)].append({k: v for k, v in entry.items() if k not in merge_by_keys})
result = []
for k, v in out.items():
result.append({})
for x in k:
result[-1][x[1]] = x[0]
result[-1]['other'] = v
for entry in result:
print(entry)
output
{'a': 1, 'b': 2, 'other': [{'c': 3, 'd': 4}, {'c': 5, 'd': 6}, {'c': 7, 'd': 8}]}
{'a': 9, 'b': 10, 'other': [{'c': 11, 'd': 12}, {'c': 13, 'd': 14}, {'c': 15, 'd': 16}]}
{'a': 17, 'b': 18, 'other': [{'c': 19, 'd': 20}]}
here's one way to do it using a dictionary to group entries and turning its values into a list at the end.
input_list = [{'a': 1, 'b' : 2, 'c': 3, 'd': 4},
{'a': 1, 'b' : 2, 'c': 5, 'd': 6},
{'a': 9, 'b' : 10, 'c': 11, 'd': 12},
{'a': 9, 'b' : 10, 'c': 13, 'd': 14},
{'a': 9, 'b' : 10, 'c': 15, 'd': 16},
{'a': 17, 'b' : 18, 'c': 19, 'd': 20},
{'a': 1, 'b' : 2, 'c': 7, 'd': 8}]
merge_keys = ['a','b']
grouped = dict()
for d in input_list:
groupKey = tuple(map(d.get,merge_keys))
groupDict = grouped.setdefault(groupKey,{k:d.pop(k) for k in merge_keys})
groupDict.setdefault('other_cols',[]).append(d)
result = list(grouped.values())
print(result)
[{'a': 1, 'b': 2, 'other_cols': [{'c': 3, 'd': 4},
{'c': 5, 'd': 6},
{'c': 7, 'd': 8}]},
{'a': 9, 'b': 10, 'other_cols': [{'c': 11, 'd': 12},
{'c': 13, 'd': 14},
{'c': 15, 'd': 16}]},
{'a': 17, 'b': 18, 'other_cols': [{'c': 19, 'd': 20}]}]

Converting pyspark dataframe into dictionary: result different than expected

Let's imagine I have the following pyspark dataframe:
data = [("USA",20,40,60),
("India",50,40,30),
("Nepal",20,50,30),
("Ireland",40,60,70),
("Norway",50,50,60)
]
columns = ["country", "A", "B", "C"]
df = spark.createDataFrame(data=data,schema=columns)
To create a dictionary from it, I followed the following approach:
import pyspark.sql.functions as F
list_test = [row.asDict() for row in df.collect()]
dict_test = {country['country']: country for country in list_test}
The result is as follows:
{'USA': {'country': 'USA', 'A': 20, 'B': 40, 'C': 60}, 'India': {'country': 'India', 'A': 50, 'B': 40, 'C': 30}, 'Nepal': {'country': 'Nepal', 'A': 20, 'B': 50, 'C': 30}, 'Ireland': {'country': 'Ireland', 'A': 40, 'B': 60, 'C': 70}, 'Norway': {'country': 'Norway', 'A': 50, 'B': 50, 'C': 60}}
However, what I wanted was the following:
{'USA': {'A': 20, 'B': 40, 'C': 60}, 'India': {'A': 50, 'B': 40, 'C': 30}, 'Nepal': {'A': 20, 'B': 50, 'C': 30}, 'Ireland': {'A': 40, 'B': 60, 'C': 70}, 'Norway': {'A': 50, 'B': 50, 'C': 60}}
How can I obtain this? I'm not sure I understand what I'm doing wrong.
You can do a dict comprehension to remove the unwanted item:
list_test = [row.asDict() for row in df.collect()]
dict_test = {country['country']: {k:v for k,v in country.items() if k != 'country'} for country in list_test}
print(dict_test)
{'USA': {'A': 20, 'B': 40, 'C': 60}, 'India': {'A': 50, 'B': 40, 'C': 30}, 'Nepal': {'A': 20, 'B': 50, 'C': 30}, 'Ireland': {'A': 40, 'B': 60, 'C': 70}, 'Norway': {'A': 50, 'B': 50, 'C': 60}}
Here's another way by collecting the json string directly from the DataFrame after some transformations then using json.loads to get dict object:
import json
from pyspark.sql.functions import to_json, collect_list, struct, map_from_arrays
dict_test = json.loads(
df.groupBy().agg(
collect_list("country").alias("countries"),
collect_list(struct("A", "B", "C")).alias("values")
).select(
to_json(map_from_arrays("countries", "values")).alias("json_str")
).collect()[0].json_str
)
print(dict_test)
#{'USA': {'A': 20, 'B': 40, 'C': 60}, 'India': {'A': 50, 'B': 40, 'C': 30}, 'Nepal': {'A': 20, 'B': 50, 'C': 30}, 'Ireland': {'A': 40, 'B': 60, 'C': 70}, 'Norway': {'A': 50, 'B': 50, 'C': 60}}

How to sort a complex nested dictionary to a nested list

What is the best way to sort a nested dictionary in Python 2.6 by value? I would like to sort by the length of the inner dictionary followed by the inner dictionary with the largest value. For example:
d = {1: {'AA': {'a': 100, 'b': 1, 'c': 45}},
2: {'AA': {'c': 2}},
3: {'BB': {'d': 122, 'a': 4, 't': 22, 'r': 23, 'w': 12}},
4: {'CC': {'y': 12, 'g': 15, 'b': 500}}}
The desired solution is a nested list:
lst = [[3, 'BB', {'d': 122, 'a': 4, 't': 22, 'r': 23, 'w': 12}],
[4, 'CC', {'y': 12, 'g': 15, 'b': 500}],
[1, 'AA', {'a': 100, 'b': 1, 'c': 45}],
[2, 'AA', {'c': 2}]]
With your corrected data-structure:
d = {1: {'AA': {'a': 100, 'b': 1, 'c': 45}},
2: {'AA': {'c': 2}},
3: {'BB': {'d': 122, 'a': 4, 't': 22, 'r': 23, 'w': 12}},
4: {'CC': {'y': 12, 'g': 15, 'b': 500}}}
def sortkey(x):
num,d1 = x
key,d2 = d1.items()[0] #Some may prefer `next(d.iteritems())`
return len(d2),max(d2.values())
exactly_how_you_want_it = [([k] + v.keys() + v.values()) for k,v in
sorted(d.items(),reverse=True,key=sortkey)]
for item in exactly_how_you_want_it:
print item
results:
[3, 'BB', {'a': 4, 'r': 23, 'd': 122, 'w': 12, 't': 22}]
[4, 'CC', {'y': 12, 'b': 500, 'g': 15}]
[1, 'AA', {'a': 100, 'c': 45, 'b': 1}]
[2, 'AA', {'c': 2}]

Categories

Resources