I want to import a CSV into multiple dictionaries in python
queryInclude,yahoo,value1
queryInclude,yahoo,value2
queryInclude,yahoo,value3
queryExclude,yahoo,value4
queryExclude,yahoo,value5
queryInclude,google,value6
queryExclude,google,value7
My ideal result would have row[0]=dictionary, row[1]=key, and row[2]=value or list of values
queryInclude = {
"yahoo": ["value1", "value2", "value3"],
"google": ["value6"] }
queryExclude = {
"yahoo": ["value4", "value5"],
"google": ["value7"] }
Here's my code:
import csv
queryList=[]
queryDict={}
with open('dictionary.csv') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in reader:
queryDict[row[1]] = queryList.append(row[2])
print queryDict
{'yahoo': None}
{'yahoo': None}
{'yahoo': None}
{'yahoo': None}
{'yahoo': None}
{'google': None, 'yahoo': None}
{'google': None, 'yahoo': None}
I have the flexibility to change the CSV format if needed. My ideal result posted above is what I already had hard-coded into my app. I'm trying to make it easier to add more values down the road. I've spent many hours researching this and will continue to update if I make any more progress. My thought process looks like this... not sure how close I am to understanding how to structure my loops and combine like values while iterating through the CSV rows...
for row in reader:
where row[0] = queryInclude:
create a dictionary combining keys into a list of values
where row[0] = queryExclude:
create a dictionary combining keys into a list of values
Using defaultdict prevents having to account for the first element added to a dictionary. It declares the default type when the key is not present and must be a callable that creates the default object:
#! python3
import csv
from io import StringIO
from collections import defaultdict
from pprint import pprint
data = StringIO('''\
queryInclude,yahoo,value1
queryInclude,yahoo,value2
queryInclude,yahoo,value3
queryExclude,yahoo,value4
queryExclude,yahoo,value5
queryInclude,google,value6
queryExclude,google,value7
''')
D = defaultdict(lambda: defaultdict(list))
for d,k,v in csv.reader(data):
D[d][k].append(v)
pprint(D)
Output:
{'queryExclude': {'google': ['value7'],
'yahoo': ['value4', 'value5']},
'queryInclude': {'google': ['value6'],
'yahoo': ['value1', 'value2', 'value3']}}
Is this helping?
import StringIO
import csv
csvfile = StringIO.StringIO("""queryInclude,yahoo,value1
queryInclude,yahoo,value2
queryInclude,yahoo,value3
queryExclude,yahoo,value4
queryExclude,yahoo,value5
queryInclude,google,value6
queryExclude,google,value7""")
reader = csv.reader(csvfile, delimiter=',', quotechar='|')
dict1={}
for row in reader:
key1, provider, value1 = row
if not dict1.has_key(key1):
dict1[key1] = {}
if not dict1[key1].has_key(provider):
dict1[key1][provider] = []
dict1[key1][provider].append(value1)
Related
How can I extract the T3 Period, Year and maximum value?
file.json
[
{"Fecha":"2022-08-01T00:00:00.000+02:00", "T3_TipoDato":"Avance", "T3_Periodo":"M08", "Anyo":2022, "value":10.4},
{"Fecha":"2022-07-01T00:00:00.000+02:00", "T3_TipoDato":"Definitivo", "T3_Periodo":"M07", "Anyo":2022, "value":10.8},
{"Fecha":"2022-06-01T00:00:00.000+02:00", "T3_TipoDato":"Definitivo", "T3_Periodo":"M06", "Anyo":2022, "value":10.2}
]
My code:
import json
with open("file.json") as f:
distros_dict = json.load(f)
print (distros_dict)
that is my proposition.
Load data from a file to a list.
Loop thru every dict in a list to edit it.
(At my example I, deleted two keys from every dict in list.)
import json
distros_dict = []
with open(f'file.json', "r", encoding='utf-8') as f:
distros_dict.extend(json.load(f))
for item in distros_dict:
item.pop('Fecha')
item.pop('T3_TipoDato')
distros_dict = sorted(distros_dict, key = lambda i: i['value'], reverse=True)[0]
Try this:
from json import load
with open("file.json") as f:
dictionary_max = max(load(f), key=lambda x: x["value"])
result = {
"T3_Periodo": dictionary_max["T3_Periodo"],
"Anyo": dictionary_max["Anyo"],
"value": dictionary_max["value"],
}
print(result)
output:
{'T3_Periodo': 'M07', 'Anyo': 2022, 'value': 10.8}
In my current code, it seems to only take into account one value for my Subject key when there should be more (you can only see Economics in my JSON tree and not Maths). I've tried for hours and I can't get it to work.
Here is my sample dataset - I have many more subjects in my full data set:
ID,Name,Date,Subject,Start,Finish
0,Ladybridge High School,01/11/2019,Maths,05:28,06:45
0,Ladybridge High School,02/11/2019,Maths,05:30,06:45
0,Ladybridge High School,01/11/2019,Economics,11:58,12:40
0,Ladybridge High School,02/11/2019,Economics,11:58,12:40
1,Loreto Sixth Form,01/11/2019,Maths,05:28,06:45
1,Loreto Sixth Form,02/11/2019,Maths,05:30,06:45
1,Loreto Sixth Form,01/11/2019,Economics,11:58,12:40
1,Loreto Sixth Form,02/11/2019,Economics,11:58,12:40
Here is my Python code:
timetable = {"Timetable": []}
with open("C:/Users/kspv914/Downloads/Personal/Project Dawn/Timetable Sample.csv") as f:
csv_data = [{k: v for k, v in row.items()} for row in csv.DictReader(f, skipinitialspace=True)]
name_array = []
for name in [row["Name"] for row in csv_data]:
name_array.append(name)
name_set = set(name_array)
for name in name_set:
timetable["Timetable"].append({"Name": name, "Date": {}})
for row in csv_data:
for entry in timetable["Timetable"]:
if entry["Name"] == row["Name"]:
entry["Date"][row["Date"]] = {}
entry["Date"][row["Date"]][row["Subject"]] = {
"Start": row["Start"],
"Finish": row["Finish"]
}
Here is my JSON tree:
You're making date dict empty and then adding a subject.
Do something like this:
timetable = {"Timetable": []}
with open("a.csv") as f:
csv_data = [{k: v for k, v in row.items()} for row in csv.DictReader(f, skipinitialspace=True)]
name_array = []
for name in [row["Name"] for row in csv_data]:
name_array.append(name)
name_set = set(name_array)
for name in name_set:
timetable["Timetable"].append({"Name": name, "Date": {}})
for row in csv_data:
for entry in timetable["Timetable"]:
if entry["Name"] == row["Name"]:
if row["Date"] not in entry["Date"]:
entry["Date"][row["Date"]] = {}
entry["Date"][row["Date"]][row["Subject"]] = {
"Start": row["Start"],
"Finish": row["Finish"]
}
I've just added if condition before assigning {} to entry["Date"][row["Date"]]
It will give output like as shown in the below image:
You are overwriting your dict entries with entry["Date"][row["Date"]][row["Subject"]] =. The first time "math" is met, the entry is created. The second time it is overwritten.
Your expected result should be a list, not a dict. Every entry should be appended to the list with timetable_list.append().
Here is a simple code that converts the whole csv file into Json without loosing data:
import csv
import json
data = []
with open("ex1.csv") as f:
reader = csv.DictReader(f)
for row in reader:
data.append(row)
print(json.dumps({"Timetable": data}, indent=4))
I am trying to write a code that will take in json values from Kafka and output them to a .csv file. The issue is that, for grades, the values have either science and math OR just english as nested objects.
This is what the data looks like:
{'id': 0, 'name': 'Susan', 'lastName': 'Johnsan', 'grades': {'science':
78, 'math': 89}}
{'id': 1, 'name': 'Mary', 'lastName': 'Davids', 'grades': {'english':
85}}
However when I run my code I keep getting the error TypeError: string indices must be integers.
from kafka import KafkaConsumer
import json
import csv
import sys
from datetime import datetime
import os
# connect to kafka topic
kaf = KafkaConsumer('students.all.events')
outputfile = 'C:\\Users\\Documents\\students_output.csv'
outfile = open(outputfile, mode='w', newline='')
master_key = ['id', 'name', 'lastName', 'science', 'math', 'english']
writer = csv.DictWriter(outfile, master_key, delimiter="|")
writer.writeheader()
'''
writer = csv.writer(outfile)
writer.writerow(['JSON_Data'])
'''
i = 1
for row in kaf:
if i < 5000:
json_row = json.loads(row.value)
print('Row: ', i)
print(json_row)
dict = {'id': json_row['id'], 'name': json_row['name'], 'lastName': json_row['lastName']}
for value in json_row['grades']:
if value['science'] is not None:
dict['science'] = value['science']
dict['math'] = value['math']
elif value['english'] is not None:
dict['english'] = value['english']
writer.writerow(dict)
i += 1
else:
break
outfile.close()
Please check if the value variable is actually of type dict, because the error you get, in general, means that you are trying to access a string object in a dict[key] way.
It looks like you have a typo - at least in the code that you pasted here. There is an extra double quote after the lastName key.
Based off of the help #TenorFlyy gave me, I changed my code to fix the issue:
from kafka import KafkaConsumer
import json
import csv
import sys
from datetime import datetime
import os
# connect to kafka topic
kaf = KafkaConsumer('students.all.events')
outputfile = 'C:\\Users\\Documents\\students_output.csv'
outfile = open(outputfile, mode='w', newline='')
master_key = ['id', 'name', 'lastName', 'science', 'math', 'english']
writer = csv.DictWriter(outfile, master_key, delimiter="|")
writer.writeheader()
'''
writer = csv.writer(outfile)
writer.writerow(['JSON_Data'])
'''
i = 1
for row in kaf:
if i < 5000:
json_row = json.loads(row.value)
print('Row: ', i)
print(json_row)
dict = {'id': json_row['id'], 'name': json_row['name'], 'lastName': json_row['lastName']}
for key, value in json_row['grades'].items():
dict[key] = value
writer.writerow(dict)
i += 1
else:
break
outfile.close()
How can I get a nested dictionary, where both the keys and the subkeys are precisely in the same order as in the csv file?
I tried
import csv
from collections import OrderedDict
filename = "test.csv"
aDict = OrderedDict()
with open(filename, 'r') as f:
csvReader = csv.DictReader(f)
for row in csvReader:
key = row.pop("key")
aDict[key] = row
where test.csv looks like
key,number,letter
eins,1,a
zwei,2,b
drei,3,c
But the sub-dictionaries are not ordered (rows letter and number are changed). So how can I populate aDict[key] in an ordered manner?
You have to build the dictionaries and sub-dictionaries yourself from rows returned from csv.reader which are sequences, instead of using csv.DictReader.
Fortunately that's fairly easy:
import csv
from collections import OrderedDict
filename = 'test.csv'
aDict = OrderedDict()
with open(filename, 'rb') as f:
csvReader = csv.reader(f)
fields = next(csvReader)
for row in csvReader:
temp = OrderedDict(zip(fields, row))
key = temp.pop("key")
aDict[key] = temp
import json # just to create output
print(json.dumps(aDict, indent=4))
Output:
{
"eins": {
"number": "1",
"letter": "a"
},
"zwei": {
"number": "2",
"letter": "b"
},
"drei": {
"number": "3",
"letter": "c"
}
}
This is one way:
import csv
from collections import OrderedDict
filename = "test.csv"
aDict = OrderedDict()
with open(filename, 'r') as f:
order = next(csv.reader(f))[1:]
f.seek(0)
csvReader = csv.DictReader(f)
for row in csvReader:
key = row.pop("key")
aDict[key] = OrderedDict((k, row[k]) for k in order)
csv.DictReader loads the rows into a regular dict and not an ordered one. You'll have to read the csv manually into an OrderedDict to get the order you need:
from collections import OrderedDict
filename = "test.csv"
dictRows = []
with open(filename, 'r') as f:
rows = (line.strip().split(',') for line in f)
# read column names from first row
columns = rows.next()
for row in rows:
dictRows.append(OrderedDict(zip(columns, row)))
You can take advantage of the existing csv.DictReader class, but alter the rows it returns. To do that, add the following class to the beginning of your script:
class OrderedDictReader(csv.DictReader):
def next(self):
# Get a row using csv.DictReader
row = csv.DictReader.next(self)
# Create a new row using OrderedDict
new_row = OrderedDict(((k, row[k]) for k in self.fieldnames))
return new_row
Then, use this class in place of csv.DictReader:
csvReader = OrderedDictReader(f)
The rest of your code remains the same.
I have a Python code, see below, which takes a JSON file in the structure:
{
"name":"Winking Entertainment",
"imports":"Translink Capital"
},
{
"name":"Wochacha",
"imports":"Sequoia Capital"
},
{
"name":"Wuhan Kindstar Diagnostics",
"imports":"Baird Venture Partners"
},
And aggregates repeat values in "imports" and turns the matching strings into a single array for that entry. (see snippet below)
import json
from collections import defaultdict
def map_names_to_imports(raw_data):
name_to_imports = defaultdict(list)
for row in raw_data:
name_to_imports[row['imports']].append(row['name'])
return name_to_imports
def reformat(name_to_imports):
output = []
for name, imports in name_to_imports.items():
new_dict = {
'name': name,
'imports': list(set(imports))
}
output.append(new_dict)
return output
def run(raw_data):
name_to_imports = map_names_to_imports(raw_data)
output = reformat(name_to_imports)
with open('clean-data2.json','wb') as f:
f.write(json.dumps(output))
if __name__ == '__main__':
raw_data = json.load(open('bricinvestors.json'))
run(raw_data)
The issue I am having is my Json file is not coming out the right way.
For some reason, name and imports are getting reversed. So my output looks like:
{"imports": ["SinoHub"], "name": "Iroquois Capital"}, {"imports": ["Qunar.com", "Lashou.com"], "name": "Tenaya Capital"}
In fact, I want to keep the {"name": "string", "imports": "string"} format -- and not the other way around.
What should I do?
Thanks.
If you're using Python 2.7+, you could use collections.OrderedDict as your input to json.loads(), instead of the standard Python dict. The standard library dict class doesn't guarantee the ordering of keys.
Building on dano's answer, you could use the OrderedDict.setdefault method instead of using a defaultdict:
import json
import collections
OrderedDict = collections.OrderedDict
def map_names_to_imports(raw_data):
name_to_imports = OrderedDict()
for row in raw_data:
name_to_imports.setdefault(row['imports'], []).append(row['name'])
return name_to_imports
def reformat(name_to_imports):
output = []
for name, imports in name_to_imports.items():
new_dict = OrderedDict([('name', name),
('imports', list(set(imports)))])
output.append(new_dict)
return output
def run(raw_data):
name_to_imports = map_names_to_imports(raw_data)
output = reformat(name_to_imports)
with open('clean-data2.json', 'wb') as f:
f.write(json.dumps(output))
if __name__ == '__main__':
raw_data = json.load(open('bricinvestors.json'),
object_pairs_hook=OrderedDict)
run(raw_data)
Final version, which is based in large part on #unutbu's answer.
import json
import collections
OrderedDict = collections.OrderedDict
def map_names_to_imports(raw_data):
name_to_imports = OrderedDict()
for row in raw_data:
name_to_imports.setdefault(row['imports'], []).append(row['name'])
return name_to_imports
def reformat(name_to_imports):
the_output = []
for name, imports in name_to_imports.items():
new_dict = OrderedDict([('name', name),
('imports', list(set(imports)))])
the_output.append(new_dict)
return the_output
def run(raw_data):
name_to_imports = map_names_to_imports(raw_data)
the_output = reformat(name_to_imports)
with open('data/clean-data2.json', 'w+', encoding='utf8') as f:
f.write(json.dumps(the_output))
if __name__ == '__main__':
raw_data = json.load(open('data/bricsinvestorsfirst.json'), object_pairs_hook=OrderedDict)
run(raw_data)