Read CSV data and add it into dictionary - python

This is an empty Dictionary
d = {}
This is the csv file data
M, Max, Sporting, Football, Cricket
M, Jack, Sporting, Cricket, Tennis
M, Kevin, Sporting, Cricket, Basketball
M, Ben, Sporting, Football, Rugby
I tried to use the following code to append data from the csv to dictionary.
with open('example.csv', "r") as csvfile:
csv_reader = csv.reader(csvfile)
for row in csv_reader:
if row:
d.setdefault(row[0], {})[row[1]] = {row[2]: [row[3]]}
But it gives me an error:
d.setdefault(row[0], {})[row[1]] = {row[2]: [row[3]]}
IndexError: list index out of range
It there any way, i can add data from csv to the dictionary, in the form:
d = {'M': {'Max': {'Sporting': ['Football', 'Cricket']}, 'Jack': {'Sporting': ['Cricket', 'Tennis']}}}
I am new to this so help me.

import csv
d={}
with open('JJ.csv', "r") as csvfile:
csv_reader = csv.reader(csvfile)
for row in csv_reader:
if row:
d.setdefault(row[0],{})[row[1]] = {row[2]: [row[3],row[4]]}
print(d)
{'M': {' Max': {' Sporting': [' Football', ' Cricket']}, ' Jack': {' Sporting': [' Cricket', ' Tennis']}, ' Kevin': {' Sporting': [' Cricket', ' Basketball']}, ' Ben': {' Sporting': [' Football', ' Rugby']}}}
To remove all the leading/trailing spaces in the output, you can use the below line instead. There might be a better way which I'm not sure as of now.
d.setdefault(row[0],{})[row[1].strip()] = {row[2].strip(): [row[3].strip(),row[4].strip()]}

You can use a nested collections.defaultdict tree and check if the rows are long enough:
from collections import defaultdict
def tree():
return defaultdict(tree)
d = tree()
# ...
for row in csv_reader:
if len(row) >= 3:
d[row[0]][row[1]][row[2]] = row[3:]

Change "for column in csv_reader:" to "for row in csv_reader:"

Straightforwardly:
import csv, collections
with open('example.csv', 'r') as f:
reader = csv.reader(f, skipinitialspace=True)
result = collections.defaultdict(dict)
for r in reader:
if not result[r[0]].get(r[1]): result[r[0]][r[1]] = {}
if not result[r[0]][r[1]].get(r[2]):
result[r[0]][r[1]][r[2]] = r[-2:]
print(dict(result))
The output:
{'M': {'Kevin': {'Sporting': ['Cricket', 'Basketball']}, 'Max': {'Sporting': ['Football', 'Cricket']}, 'Jack': {'Sporting': ['Cricket', 'Tennis']}, 'Ben': {'Sporting': ['Football', 'Rugby']}}}

Related

Writing dictionaries by Arrays into a csv file and inserting new lines by Array

I am trying to insert a new line into a csv file that I am writing this data into. The data is
data = [[{'Hi': 'O'}, {'mr': 'O'}, {'you': 'O'}, {'president': 'O'}, {'USA': 'Country'}, {'for': 'O'}, {'answering': 'O'}, {'football': 'O'}, {'questions': 'O'}, {'music': 'JAZZ'}], [{'Hi': 'O'}, {'You': 'O'}, {'have': 'O'}, {'granted': 'STATE'}, {'purchased': 'O'}, {'GHC3': 'O'}, {'Bundle': 'O'} {'248803151': 'O'}]]
This is the code I have but I am not sure how to re-code it to accommodate the new line per array in the data.
def convert_to_biolu(dico, biolu_list = defaultdict(list)): #dico here is output_data
for dict_item in dico: # you can list as many input dicts as you want
for key, value in dict_item.items():
if value not in biolu_list[key]:
biolu_list[key].append(value)
return biolu_list
def save_to_file(path, data_):
data_ = [convert_to_biolu(item) for item in data][-1]
with open(path, 'w', newline='') as file:
fieldnames = ['word', 'label']
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for key, val in data_.items():
writer.writerow({'word': key, 'label': " ".join(val)})
You can write csvs without the module.
I prefer to do it myself like this:
def write_csv_with_spaces(data, filename):
with open(FILENAME, 'w+') as file:
for list in data:
for dict in list:
file.write(','.join([str(key) + ',' + str(value) for key,value in dict.items()]) + '\n')
file.write('\n')

Save text elements separated by tab and commas in lists

I have the following text format in python:
126 attr1,attr7,attr4 and attr8
1 attr6,attr2,attr9,attr78,attr23,attr56,attr75,attr77
5 attr5,attr3,attr2
7 attr0
67 attr12,attr13,attr14
So i want to save the ids(126,1,5 etc) in a list and every line attributes to be saved in a list or dict. I saved the ids with the following code but i cant save the attributes. Here is my code:
file = open("myfile.txt","r")
lines = file.readlines()
nodes = []
skills = [] #or dict()
for x in lines:
nodes.append(x.split('\t')[0])
skills.append(x.split(',')[0]) #i want a list of lists or a dict with attrs
I think that this will do the trick:
for x in lines:
x = x.split('\t')
nodes.append(x[0])
skills.append(x[1].split(','))
I would rather advice to use a single dictionary with ids as its key and attr as a list of values:
d = {}
file = open("myfile.txt","r")
lines = file.readlines()
for line in lines:
splitted = line.split()
d.update({splitted[0]: splitted[1].split(',')})
print(d)
# {'126': ['attr1', 'attr7', 'attr4', 'attr8'],
# '1': ['attr6', 'attr2', 'attr9', 'attr78', 'attr23', 'attr56', 'attr75', 'attr77'],
# '5': ['attr5', 'attr3', 'attr2'],
# '7': ['attr0'],
# '67': ['attr12', 'attr13', 'attr14']}
If you want all attributes as joined string then :
attr_dict={}
with open('file.txt','r') as f:
for line in f:
attr_dict[line.split()[0]]=line.split()[1:]
print(attr_dict)
output:
{'126': ['attr1,attr7,attr4', 'and', 'attr8'], '7': ['attr0'], '5': ['attr5,attr3,attr2'], '1': ['attr6,attr2,attr9,attr78,attr23,attr56,attr75,attr77'], '67': ['attr12,attr13,attr14']}
If you want indivisual element then :
attr_dict={}
with open('file.txt','r') as f:
for line in f:
data=line.split()
for sub_data in data:
attr_dict[line.split()[0]]=sub_data.split(',')
print(attr_dict)
output:
{'126': ['attr8'], '7': ['attr0'], '5': ['attr5', 'attr3', 'attr2'], '1': ['attr6', 'attr2', 'attr9', 'attr78', 'attr23', 'attr56', 'attr75', 'attr77'], '67': ['attr12', 'attr13', 'attr14']}

How to read a file block-wise in python

I am bit stuck in reading a file block-wise, and facing difficulty in getting some selective data in each block :
Here is my file content :
DATA.txt
#-----FILE-----STARTS-----HERE--#
#--COMMENTS CAN BE ADDED HERE--#
BLOCK IMPULSE DATE 01-JAN-2010 6 DEHDUESO203028DJE \
SEQUENCE=ai=0:at=221:ae=3:lu=100:lo=NNU:ei=1021055:lr=1: \
USERID=ID=291821 NO_USERS=3 GROUP=ONE id_info=1021055 \
CREATION_DATE=27-JUNE-2013 SN=1021055 KEY ="22WS \
DE34 43RE ED54 GT65 HY67 AQ12 ES23 54CD 87BG 98VC \
4325 BG56"
BLOCK PASSION DATE 01-JAN-2010 6 DEHDUESO203028DJE \
SEQUENCE=ai=0:at=221:ae=3:lu=100:lo=NNU:ei=324356:lr=1: \
USERID=ID=291821 NO_USERS=1 GROUP=ONE id_info=324356 \
CREATION_DATE=27-MAY-2012 SN=324356 KEY ="22WS \
DE34 43RE 342E WSEW T54R HY67 TFRT 4ER4 WE23 XS21 \
CD32 12QW"
BLOCK VICTOR DATE 01-JAN-2010 6 DEHDUESO203028DJE \
SEQUENCE=ai=0:at=221:ae=3:lu=100:lo=NNU:ei=324356:lr=1: \
USERID=ID=291821 NO_USERS=5 GROUP=ONE id_info=324356 \
CREATION_DATE=27-MAY-2012 SN=324356 KEY ="22WS \
DE34 43RE 342E WSEW T54R HY67 TFRT 4ER4 WE23 XS21 \
CD32 12QW"
#--BLOCK--ENDS--HERE#
#--NEW--BLOCKS--CAN--BE--APPENDED--HERE--#
I am only interested in Block Name , NO_USERS, and id_info of each block .
these three data to be saved to a data-structure(lets say dict), which is further stored in a list :
[{Name: IMPULSE ,NO_USER=3,id_info=1021055},{Name: PASSION ,NO_USER=1,id_info=324356}. . . ]
any other data structure which can hold the info would also be fine.
So far i have tried getting the block names by reading line by line :
fOpen = open('DATA.txt')
unique =[]
for row in fOpen:
if "BLOCK" in row:
unique.append(row.split()[1])
print unique
i am thinking of regular expression approach, but i have no idea where to start with.
Any help would be appreciate.Meanwhile i am also trying , will update if i get something . Please help .
You could use groupy to find each block, use a regex to extract the info and put the values in dicts:
from itertools import groupby
import re
with open("test.txt") as f:
data = []
# find NO_USERS= 1+ digits or id_info= 1_ digits
r = re.compile("NO_USERS=\d+|id_info=\d+")
grps = groupby(f,key=lambda x:x.strip().startswith("BLOCK"))
for k,v in grps:
# if k is True we have a block line
if k:
# get name after BLOCK
name = next(v).split(None,2)[1]
# get lines after BLOCK and get the second of those
t = next(grps)[1]
# we want two lines after BLOCK
_, l = next(t), next(t)
d = dict(s.split("=") for s in r.findall(l))
# add name to dict
d["Name"] = name
# add sict to data list
data.append(d)
print(data)
Output:
[{'NO_USERS': '3', 'id_info': '1021055', 'Name': 'IMPULSE'},
{'NO_USERS': '1', 'id_info': '324356', 'Name': 'PASSION'},
{'NO_USERS': '5', 'id_info': '324356', 'Name': 'VICTOR'}]
Or without groupby as your file follows a format we just need to extract the second line after the BLOCK line:
with open("test.txt") as f:
data = []
r = re.compile("NO_USERS=\d+|id_info=\d+")
for line in f:
# if True we have a new block
if line.startswith("BLOCK"):
# call next twice to get thw second line after BLOCK
_, l = next(f), next(f)
# get name after BLOCK
name = line.split(None,2)[1]
# find our substrings from l
d = dict(s.split("=") for s in r.findall(l))
d["Name"] = name
data.append(d)
print(data)
Output:
[{'NO_USERS': '3', 'id_info': '1021055', 'Name': 'IMPULSE'},
{'NO_USERS': '1', 'id_info': '324356', 'Name': 'PASSION'},
{'NO_USERS': '5', 'id_info': '324356', 'Name': 'VICTOR'}]
To extract values you can iterate:
for dct in data:
print(dct["NO_USERS"])
Output:
3
1
5
If you want a dict of dicts and to access each section from 1-n you can store as nested dicts using from 1-n as tke key:
from itertools import count
import re
with open("test.txt") as f:
data, cn = {}, count(1)
r = re.compile("NO_USERS=\d+|id_info=\d+")
for line in f:
if line.startswith("BLOCK"):
_, l = next(f), next(f)
name = line.split(None,2)[1]
d = dict(s.split("=") for s in r.findall(l))
d["Name"] = name
data[next(cn)] = d
data["num_blocks"] = next(cn) - 1
Output:
from pprint import pprint as pp
pp(data)
{1: {'NO_USERS': '3', 'Name': 'IMPULSE', 'id_info': '1021055'},
2: {'NO_USERS': '1', 'Name': 'PASSION', 'id_info': '324356'},
3: {'NO_USERS': '5', 'Name': 'VICTOR', 'id_info': '324356'},
'num_blocks': 3}
'num_blocks' will tell you exactly how many blocks you extracted.

How to store CSV data in a Nested Dictionary that has a dictionary and a list?

I have the following CSV Data,
Rule1,Status1,1
Rule1,Status2,1
Rule1,Status3,1
Rule1,Status4,2
Rule2,Status1,2
Rule2,Status2,1
Rule2,Status3,1
Rule2,Status4,3
I have unique rules (first column) stored in a list called Rules. I want my dictionary to look like the following:
DictionaryFull = {
'Rule1' : {1 : [Status1, Status2, Status3], 2 : [Status4]},
'Rule2' : {1 : [Status2, Status3], 2 : [Status1], 3 : [Status4]}
}
Here is what I tried:
openfile = ('data.csv', 'rU')
finalfile = csv.reader(openfile, delimiter=',')
FullDictionary = {}
for row in finalfile:
for j in range (0, 300): #300 number of rules
if Rules[j] not in FullDictionary:
for i in range(1, 71): #These are third column numbers 1 - 71
if i == int(row[2]) and row[0] == Rules[j]:
FullDictionary = {Rules[j] : { i : [].append[row[1]}}
print FullDictionary
But I am getting the following as the result:
{'Rule1': {1 : None}} and so on
Am I doing something wrong? How to accomplish this task of having a dictionary with both another dictionary and a list.
I tried this:
def something():
full_dictionary = {}
with open(DataFilePath) as f:
reader = csv.reader(f)
for row in reader:
rule = row[2], status = row[0], num = int(row[5])
r = full_dictionary.setdefault(rule, {})
r.setdefault(num, []).append(status)
print full_dictionary
The error: ValueError: I/O operation on closed file
Hwo about using collection.defaultdict:
import csv
from collections import defaultdict
full_dictionary = defaultdict(lambda: defaultdict(list))
with open('data.csv') as f:
reader = csv.reader(f)
for rule, status, num in reader:
full_dictionary[rule][num].append(status)
print full_dictionary
output:
defaultdict(<function <lambda> at 0x00000000025A6438>, {
'Rule2': defaultdict(<type 'list'>, {
'1': ['Status2', 'Status3'],
'3': ['Status4'],
'2': ['Status1']
}),
'Rule1': defaultdict(<type 'list'>, {
'1': ['Status1', 'Status2', 'Status3'],
'2': ['Status4']
})
})
If you don't want to use defaultdict, you have to care new key.
For example, using dict.setdefault:
import csv
full_dictionary = {}
with open('data.csv') as f:
reader = csv.reader(f)
for rule, status, num in reader:
r = full_dictionary.setdefault(rule, {})
r.setdefault(num, []).append(status)
print full_dictionary
output:
{'Rule1': {'1': ['Status1', 'Status2', 'Status3'], '2': ['Status4']},
'Rule2': {'1': ['Status2', 'Status3'], '2': ['Status1'], '3': ['Status4']}}
list.append returns None, so your assignment Rules[j] = [].append([row[1]) is setting Rules[j] = None.
Amend that to:
FullDictionary = {Rules[j] : { i : [row[1]}}
or
old_value = Rules[j].get(i, [])
old_value.append(row[1])
depending on what you're wishing to achieve.

Making a CSV list in Python

I am new to python and need help. I am trying to make a list of comma separated values.
I have this data.
EasternMountain 84,844 39,754 24,509 286 16,571 3,409 315
EasternHill 346,373 166,917 86,493 1,573 66,123 23,924 1,343
EasternTerai 799,526 576,181 206,807 2,715 6,636 1,973 5,214
CentralMountain 122,034 103,137 13,047 8 2,819 2,462 561
Now how do I get something like this;
"EasternMountain": 84844,
"EasternHill":346373,
and so on??
So far I have been able to do this:
fileHandle = open("testData", "r")
data = fileHandle.readlines()
fileHandle.close()
dataDict = {}
for i in data:
temp = i.split(" ")
dataDict[temp[0]]=temp[1]
with_comma='"'+temp[0]+'"'+':'+temp[1]+','
print with_comma
Use the csv module
import csv
with open('k.csv', 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=' ')
my_dict = {}
for row in reader:
my_dict[row[0]] = [''.join(e.split(',')) for e in row[1:]]
print my_dict
k.csv is a text file containing:
EasternMountain 84,844 39,754 24,509 286 16,571 3,409 315
EasternHill 346,373 166,917 86,493 1,573 66,123 23,924 1,343
EasternTerai 799,526 576,181 206,807 2,715 6,636 1,973 5,214
CentralMountain 122,034 103,137 13,047 8 2,819 2,462 561
Output:
{'EasternHill': ['346373', '166917', '86493', '1573', '66123', '23924', '1343', ''], 'EasternTerai': ['799526', '576181', '206807', '2715', '6636', '1973', '5214', ''], 'CentralMountain': ['122034', '103137', '13047', '8', '2819', '2462', '561', ''], 'EasternMountain': ['84844', '39754', '24509', '286', '16571', '3409', '315', '']}
Try this:
def parser(file_path):
d = {}
with open(file_path) as f:
for line in f:
if not line:
continue
parts = line.split()
d[parts[0]] = [part.replace(',', '') for part in parts[1:]]
return d
Running it:
result = parser("testData")
for key, value in result.items():
print key, ':', value
Result:
EasternHill : ['346373', '166917', '86493', '1573', '66123', '23924', '1343']
EasternTerai : ['799526', '576181', '206807', '2715', '6636', '1973', '5214']
CentralMountain : ['122034', '103137', '13047', '8', '2819', '2462', '561']
EasternMountain : ['84844', '39754', '24509', '286', '16571', '3409', '315']

Categories

Resources