I'm trying to create a nested if function with the username being the main string and the others being the substring. But for some reason, it does not separate the lines, creating multiple nested dictionaries instead right now it's just stuck in the first line of the text file. Plz help
MY TEXT FILE
shanm = null|Shanmugaraja|09/04/2002|0149606345|020409140817|0102393345|4770 4848 0109 0294
jiken = null|Soo Jiken|08/06/2000|0149600239|020908140213|011349780|8900 2828 1129 0889
MY CODE FOR NESTED DICTIONARY
with open("something.txt", 'r') as f:
data_dict = {}
data_dict2 = {}
data3 = {}
for line in f:
f.read()
k, v = line.strip().split("=")
listDetails = line.strip().split('|')
data_dict = {"Full Name": listDetails[1]}
data_dict.update({"Date of Birth": listDetails[2]})
data_dict.update({"Phone Number": listDetails[3]})
data_dict.update({"Identification Card (IC) Number": listDetails[4]})
data_dict.update({"Emergency Contact Number": listDetails[5]})
data_dict.update({"Credit /Debit Card Details ": listDetails[6]})
data3[k] = data_dict
print(data3)
DESIRED OUTPUT
{'shanm ': {'Full Name': 'Shanmugaraja', 'Date of Birth': '09/04/2002', 'Phone Number': '0149606345', 'Identification Card (IC) Number': '020409140817', 'Emergency Contact Number': '0102393345', 'Credit /Debit Card Details ': '4770 4848 0109 0294'}}
{'jiken ': {'Full Name': 'Soo Ji', 'Date of Birth': '08/06/2000', 'Phone Number': '0149600239', 'Identification Card (IC) Number': '020908140213', 'Emergency Contact Number': '011349780', 'Credit /Debit Card Details ': '8900 2828 1129 0889'}}
OUTPUT RECEIVED
{'shanm ': {'Full Name': 'Shanmugaraja', 'Date of Birth': '09/04/2002', 'Phone Number': '0149606345', 'Identification Card (IC) Number': '020409140817', 'Emergency Contact Number': '0102393345', 'Credit /Debit Card Details ': '4770 4848 0109 0294'}}
#NOT READING THE NEXT PART OF THE LINE
OR (WITHOUT "F.READ()"
{'shanm ': {'Full Name': 'Shanmugaraja', 'Date of Birth': '09/04/2002', 'Phone Number': '0149606345', 'Identification Card (IC) Number': '020409140817', 'Emergency Contact Number': '0102393345', 'Credit /Debit Card Details ': '4770 2828 0109 0394'}}
{'shanm ': {'Full Name': 'Shanmugaraja', 'Date of Birth': '09/04/2002', 'Phone Number': '0149606345', 'Identification Card (IC) Number': '020409140817', 'Emergency Contact Number': '0102393345', 'Credit /Debit Card Details ': '4770 2828 0109 0394'}, 'jiken ': {'Full Name': 'Soo Jiken', 'Date of Birth': '08/06/2000', 'Phone Number': '0149600239', 'Identification Card (IC) Number': '020908140213', 'Emergency Contact Number': '011349780', 'Credit /Debit Card Details ': '8900 2828 1129 0889'}}
READS EVERYTHING ALL OVER AGAIN
try updating your code to this:
with open("something.txt", 'r') as f:
data_dict = {}
data3 = {}
for line in f.readlines():
k, v = line.strip().split("=")
listDetails = line.strip().split('|')
data_dict = {"Full Name": listDetails[1]}
data_dict.update({"Date of Birth": listDetails[2]})
data_dict.update({"Phone Number": listDetails[3]})
data_dict.update({"Identification Card (IC) Number": listDetails[4]})
data_dict.update({"Emergency Contact Number": listDetails[5]})
data_dict.update({"Credit /Debit Card Details ": listDetails[6]})
data3[k] = data_dict
print(data3)
Looks like you want a list of dictionaries. If so, you could do it like this:
keys = ['Full name', 'Date of Birth', 'Phone Number',
'Identification Card (IC) Number', 'Emergency Contact Number', 'Credit /Debit Card Details']
lod = []
with open('something.txt') as infile:
for line in infile:
key, v = line.split('=')
d = {}
for k, v in zip(keys, v.split('|')[1:]):
d[k] = v
lod.append({key.strip(): d})
for d in lod:
name = list(d.keys())[0]
print(d[name]['Full name']
keys = ["Full Name", "Date of Birth", "Phone Number",
"Identification Card (IC) Number", "Emergency Contact Number",
"Credit /Debit Card Details "]
data = {}
with open("something.txt", 'r') as f:
for line in f:
key, values = line.strip().split(" = ") # note the space around =, to avoid trailing space in key
values = values.split('|')
data[key] = dict(zip(keys, values[1:]))}
print(data)
print(data.get('jiken'))
Probably you want to store each dict data in some sort of container type like list.
UPDATE: I edited my code to create a dict and access each record by username.
Related
This question already has answers here:
Remove duplicate dict in list in Python
(16 answers)
Closed 8 months ago.
I have a bit complex list of dictionaries which looks like
[
{'Name': 'Something XYZ', 'Address': 'Random Address', 'Customer Number': '-', 'User Info': [{'Registration Number': '17002', 'First Name': 'John', 'Middle Name': '', 'Last Name': 'Denver'}, {'Registration Number': '27417', 'First Name': 'Robert', 'Middle Name': '', 'Last Name': 'Patson'}]},
{'Name': 'Something XYZ', 'Address': 'Random Address', 'Customer Number': '-', 'User Info': [{'Registration Number': '27417', 'First Name': 'Robert', 'Middle Name': '', 'Last Name': 'Patson'}, {'Registration Number': '17002', 'First Name': 'John', 'Middle Name': '', 'Last Name': 'Denver'}]}
]
Expected is below
[
{'Name': 'Something XYZ', 'Address': 'Random Address', 'Customer Number': '-', 'User Info': [{'Registration Number': '17002', 'First Name': 'John', 'Middle Name': '', 'Last Name': 'Denver'}, {'Registration Number': '27417', 'First Name': 'Robert', 'Middle Name': '', 'Last Name': 'Patson'}]},
]
I want to remove the duplicate dictionaries in this list but I don't know how to deal with User Info because the order of the items might be different. A duplicate case would be where all the dictionary items are exactly the same and in the case of User Info order doesn't matter.
I think the best way is to make a hash of User Info by sum the hash values of it's elements (sum will tolerate position change).
def deepHash(value):
if type(value) == list:
return sum([deepHash(x) for x in value])
if type(value) == dict:
return sum([deepHash(x) * deepHash(y) for x, y in value.items()])
return hash(str(value))
and you can simply check the hash of you inputs:
assert deepHash({"a": [1,2,3], "c": "d"}) == deepHash({"c": "d", "a": [3,2,1]})
I have a long dict which was created by marging of lists of tuples. This dict contains a values from the lists- so it has a order, like
value:key1, value:key2, value:key3, value:key1, value:key2, value:key3
But its not a rule! There are places where there is not a key2 and places where is for example key4.
This values has a different keys.
So it look like much like this
value:key1, value:key2, value:key1, value:key2, value:key4
I would like to create a csv file from this data. I would like to look over the dict, look at the keys, add these keys to csv header, if it doesn't contain that key and add value to that keys and none if there is not a value.
So I have this
{'www.example1.com': 'url', 'FAILURE TO APPEAR (FOR FELONY OFFENSE) - FELONY': 'Charge', 'SIMULTANEOUS POSSESSION OF DRUGS AND FIREARMS - FELONY': 'Offense Date', 'POSSESSION WITH INTENT TO DELIVER METHAMPHETAMINE OR COCAINE': 'Court Type', 'Count=3': 'Court Date', '10-30-2019': 'Bond', '11-16-2019': 'Charging Agency', '': 'DEGREE', '181680713': 'ID', '24': 'Age', 'H': 'Race', 'M': 'Sex', 'BRO': 'Eye Color', 'BLK': 'Hair Color', '175 lb (79 kg)': 'Weight', '5′ 10″ (1.78 m)': 'Height', 'address example': 'Address', '11/16/2019 at 22:07': 'Admit Date', 'Benton Co': 'Confining Agency',
'www.example2.com': 'url', '32-5a-191.4': 'STATUTE', '000-0000 (ALABAMA STATE TROOPERS)': 'COURT CASE NUMBER', 'IGNITION INTERLOCK VIOLATION': 'Description', 'V': 'LEVEL', '$1000.00': 'Bond Set Amount', '181727213': 'ID', 'name example': 'Name', 'W': 'Race', 'MALE': 'Gender', 'Released': 'Inmate Status', 'some number': 'Booking No', 'some number': 'Inmate Number', '11/18/2019 at 16:49': 'Booking Date', '11/18/2019 at 20:35': 'Release Date', '33': 'Arrest Age', 'some address': 'Address Given'}
and I would like to have a csv file like this
url | Charge | Statute
1 www.example1.com SIMULTANEOUS none
2 www.example2.com none 32-5a-191.4
order in header is not important.
I tried this code, but It overwrites data in first row, without appending...
res = defaultdict(list)
d = dict((y, x) for x, y in my_dict)
for key, val in sorted(d.items()):
res[val].append(key)
df = pd.DataFrame.from_dict(res, orient='index').fillna(np.nan).T
df.to_csv("file.csv")
In your example i see every new row start with url.
I think this code can do it.
from collections import defaultdict
import pandas as pd
my_dict = {
'www.example1.com': 'url',
'FAILURE TO APPEAR (FOR FELONY OFFENSE) - FELONY': 'Charge',
'SIMULTANEOUS POSSESSION OF DRUGS AND FIREARMS - FELONY': 'Offense Date',
'POSSESSION WITH INTENT TO DELIVER METHAMPHETAMINE OR COCAINE': 'Court Type',
'Count=3': 'Court Date',
'10-30-2019': 'Bond',
'11-16-2019': 'Charging Agency',
'': 'DEGREE',
'181680713': 'ID',
'24': 'Age',
'H': 'Race',
'M': 'Sex',
'BRO': 'Eye Color',
'BLK': 'Hair Color',
'175 lb (79 kg)': 'Weight',
'5′ 10″ (1.78 m)': 'Height',
'address example': 'Address',
'11/16/2019 at 22:07': 'Admit Date',
'Benton Co': 'Confining Agency',
'www.example2.com': 'url',
'32-5a-191.4': 'STATUTE',
'000-0000 (ALABAMA STATE TROOPERS)': 'COURT CASE NUMBER',
'IGNITION INTERLOCK VIOLATION': 'Description',
'V': 'LEVEL',
'$1000.00': 'Bond Set Amount',
'181727213': 'ID',
'name example': 'Name',
'W': 'Race',
'MALE': 'Gender',
'Released': 'Inmate Status',
'some number': 'Booking No',
'some number': 'Inmate Number',
'11/18/2019 at 16:49': 'Booking Date',
'11/18/2019 at 20:35': 'Release Date',
'33': 'Arrest Age',
'some address': 'Address Given'
}
items = []
curr_dict = None
for key in my_dict.keys():
new_key = my_dict[key]
new_value = key if key else 'None'
if new_key == 'url':
curr_dict = {}
items.append(curr_dict)
curr_dict[new_key] = new_value
df = pd.DataFrame(items).fillna('None')
df.to_csv("file.csv", index = False)
I have a list of string as follows:
e = ['Website: Alabama Office of the Attorney General',
'Toll Free: 1-800-392-5658',
'Website: State Banking Department',
'Toll Free: 1-866-465-2279',
'Website: Department of Insurance',
'Phone Number: 334-241-4141',
'Website: Securities Commission',
'Phone Number: 334-242-2984',
'Website: Public Service Commission',
'Toll Free: 1-800-392-8050']
I want to form dictionaries by splitting the strings at ":" and form dictionaries of each two elements in the list like:
e = [{'Website': 'Alabama Office of the Attorney General',
'Toll Free': '1-800-392-5658'},
{'Website': 'State Banking Department',
'Toll Free': '1-866-465-2279'},
{'Website': 'Department of Insurance',
'Phone Number': '334-241-4141'},
{'Website': 'Securities Commission',
'Phone Number': 334-242-2984'},
{'Website': 'Public Service Commission',
'Toll Free': '1-800-392-8050'}]
Thank you for your help as always.
If you want a dictionary per two lines. You can use:
ei = iter(e)
[{k:v for k,v in (x.split(':',1) for x in xs)} for xs in zip(ei,ei)]
generating:
>>> [{k:v for k,v in (x.split(':',1) for x in xs)} for xs in zip(ei,ei)]
[{'Website': ' Alabama Office of the Attorney General', 'Toll Free': ' 1-800-392-5658'}, {'Website': ' State Banking Department', 'Toll Free': ' 1-866-465-2279'}, {'Website': ' Department of Insurance', 'Phone Number': ' 334-241-4141'}, {'Website': ' Securities Commission', 'Phone Number': ' 334-242-2984'}, {'Website': ' Public Service Commission', 'Toll Free': ' 1-800-392-8050'}]
Or better formatted:
>>> [{k:v for k,v in (x.split(':',1) for x in xs)} for xs in zip(ei,ei)]
[{'Website': ' Alabama Office of the Attorney General', 'Toll Free': ' 1-800-392-5658'},
{'Website': ' State Banking Department', 'Toll Free': ' 1-866-465-2279'},
{'Website': ' Department of Insurance', 'Phone Number': ' 334-241-4141'},
{'Website': ' Securities Commission', 'Phone Number': ' 334-242-2984'},
{'Website': ' Public Service Commission', 'Toll Free': ' 1-800-392-8050'}]
If you want to remove the spaces in the value, we can use strip():
ei = iter(e)
[{k:v.strip() for k,v in (x.split(':',1) for x in xs)} for xs in zip(ei,ei)]
If there are n lines per dictionary, we can use:
n = 2
ei = iter(e)
[{k:v for k,v in (x.split(':',1) for x in xs)} for xs in zip(*((ei,)*n))]
# Assumption : Total 2*n entries are present
ans = []
for i in xrange(0, len(e), 2):
website = e[i].strip().split(':')
toll = e[i+1].strip().split(':')
ans.append({website[0]:website[1], toll[0]:toll[1]})
Input :
{'Name': 'A','Blood Group': 'O +ve', 'Age': '1', 'Sex': 'M','Phone Number': '01234567', 'Mobile Number': '9876543210', 'Date of Birth': '01-01-95'}
1.
d.update({'Contact Info': {'Mobile Number':d['Mobile Number'],'Phone
Number':d['Phone Number'] }})
2.
d['Contact Info']={}
d['Contact Info']['Mobile Number']=d['Mobile Number']
Can you say any better way or different way to create a dictionary key which can be assigned to a dict item as value???
Original Code:
import csv
import copy
from collections import namedtuple
d={}
ls=[]
def nest():
with open ("details.csv", 'r') as f:
reader=csv.DictReader(f)
for row in reader:
d.update(row)
PersonalDetails = namedtuple('PersonalDetails','blood_group age sex')
ContactInfo = namedtuple('ContactInfo','phone_number mobile_number')
d1=copy.deepcopy(d)
ls.append(d1)
print ls
nest()
This is how I would update my dict of dicts:
I would create a function that will take a 3 arguments(The key of the subdict, the subkey of said subdict and the value you want to change.) I assign to be updated and then update that value.
d = {
'Name': 'A',
'Personal Details': {'Blood Group': 'O +ve', 'Age': '1', 'Sex': 'M'},
'Contact Info': {'Phone Number': '01234567', 'Mobile Number': '9876543210'},
'Date of Birth': '01-01-95'
}
def updateInfo(toBeUpdated, subkey, ValueToUpdate):
if toBeUpdated in d:
tempdict = d[toBeUpdated]
tempdict[subkey] = ValueToUpdate
d[toBeUpdated] = tempdict
print (d)
else:
print ("No %s to update" % (toBeUpdated))
updateInfo('Contact Info','Mobile Number','999 999 9999')
the result I get from this:
{'Name': 'A', 'Personal Details': {'Blood Group': 'O +ve', 'Age': '1', 'Sex': 'M'}, 'Contact Info': {'Phone Number': '01234567', 'Mobile Number': '999 999 9999'}, 'Date of Birth': '01-01-95'}
In python I need to create 43 instances of a class 'Student' that includes the variables first_name, middle_name, last_name, student_id by reading in a file (Students.txt) and parsing it. The text file appears like this:
Last Name Midle Name First Name Student ID
----------------------------------------------
Howard Moe howar1m
Howard Curly howar1c
Fine Lary fine1l
Howard Shemp howar1s
Besser Joe besse1j
DeRita Joe Curly derit1cj
Tiure Desilijic Jaba tiure1jd
Tharen Bria thare1b
Tai Besadii Durga tai1db
Hego Damask hego1d
Lannister Tyrion lanni1t
Stark Arya stark1a
Clegane Sandor clega1s
Targaryen Daenerys targa1d
Bombadil Tom bomba1t
Brandybuck Meriadoc brand1m
Took Pregrin took1p
McCoy Leonard mccoy1l
Scott Montgomery scott1m
Crusher Wesley crush1w
Montoya Inigo monto1i
Rugen Tyrone rugen1t
Solo Han solo1h
Corey Carl corey1c
Flaumel Evelyn flaum1e
Taltos Vlad talto1v
e'Drien Morrolan edrie1m
Watson John watso1j
McCoy Ebenezar mccoy1e
Carpenter Molly carpe1m
Graystone Zoe grays1z
Adama William adama1w
Adama Joseph Leland adama1l
Roslin Laura rosli1l
Baltar Gaius balta1g
Tigh Ellen tigh1e
Tigh Saul tigh1s
Cottle Sherman cottl1s
Zarek Thomas zarek1t
Murphy James Alexander murph1a
Sobchak Walter sobch1w
Dane Alexander dane1a
Gruber Hans grube1h
Biggs John Gil biggs1gj
The class student is:
class Student (object):
def __init__(self, first_name, middle_name, last_name, student_id):
self.__first_name = first_name
self.__middle_name = middle_name
self.__last_name = last_name
self.__student_id = student_id
What would be the easiest way to read into 'Students.txt' and create each instance of student?
Step by step tutorial
To read the file content, use io.open. Don't forget to specify the file encoding if any name has accentuated characters.
with io.open('students.txt', mode="r", encoding="utf8") as fd:
content = fd.read()
Here, you read the whole content and store it in memory (amount of data is small). You can also use an iterator.
Then, you can split the content line by line with str.splitlines():
lines = content.splitlines()
# print(lines)
You get something like:
['Last Name Midle Name First Name Student ID ',
'----------------------------------------------',
'Howard Moe howar1m ',
'Howard Curly howar1c ',
'Fine Lary fine1l ',
'Howard Shemp howar1s ',
'Besser Joe besse1j ',
'DeRita Joe Curly derit1cj ',
'Tiure Desilijic Jaba tiure1jd ',
'Tharen Bria thare1b ']
You have (nearly) fixed-length lines, so you can use slices to extract the fields.
Here is what you can do for the header:
header = lines.pop(0)
fields = header[0:8], header[11:21], header[23:33], header[36:46]
# print(fields)
You get:
('Last Nam', 'Midle Name', 'First Name', 'Student ID')
You can drop the line of hyphens:
lines.pop(0)
For each line, you can extract values using slices too. Note: slice indices are slightly different:
for line in lines:
record = line[0:8], line[12:21], line[23:34], line[36:46]
# print(record)
You'll get values with trailing space:
('Howard ', ' ', ' Moe ', 'howar1m ')
('Howard ', ' ', ' Curly ', 'howar1c ')
('Fine ', ' ', ' Lary ', 'fine1l ')
('Howard ', ' ', ' Shemp ', 'howar1s ')
('Besser ', ' ', ' Joe ', 'besse1j ')
('DeRita ', 'Joe ', ' Curly ', 'derit1cj ')
('Tiure ', 'Desilijic', ' Jaba ', 'tiure1jd ')
('Tharen ', ' ', ' Bria ', 'thare1b ')
To avoid trailing spaces, use str.strip() function:
for line in lines:
record = line[0:8], line[12:21], line[23:34], line[36:46]
record = [v.strip() for v in record]
# print(record)
You get:
['Howard', '', 'Moe', 'howar1m']
['Howard', '', 'Curly', 'howar1c']
['Fine', '', 'Lary', 'fine1l']
['Howard', '', 'Shemp', 'howar1s']
['Besser', '', 'Joe', 'besse1j']
['DeRita', 'Joe', 'Curly', 'derit1cj']
['Tiure', 'Desilijic', 'Jaba', 'tiure1jd']
['Tharen', '', 'Bria', 'thare1b']
At this point, I recommend you to store your record as a dict in a list:
records = []
for line in lines:
record = line[0:8], line[12:21], line[23:34], line[36:46]
record = [v.strip() for v in record]
records.append(dict(zip(header, record)))
You get:
[{'First Name': 'Moe', 'Last Nam': 'Howard', 'Midle Name': '', 'Student ID': 'howar1m'},
{'First Name': 'Curly', 'Last Nam': 'Howard', 'Midle Name': '', 'Student ID': 'howar1c'},
{'First Name': 'Lary', 'Last Nam': 'Fine', 'Midle Name': '', 'Student ID': 'fine1l'},
{'First Name': 'Shemp', 'Last Nam': 'Howard', 'Midle Name': '', 'Student ID': 'howar1s'},
{'First Name': 'Joe', 'Last Nam': 'Besser', 'Midle Name': '', 'Student ID': 'besse1j'},
{'First Name': 'Curly', 'Last Nam': 'DeRita', 'Midle Name': 'Joe', 'Student ID': 'derit1cj'},
{'First Name': 'Jaba', 'Last Nam': 'Tiure', 'Midle Name': 'Desilijic', 'Student ID': 'tiure1jd'},
{'First Name': 'Bria', 'Last Nam': 'Tharen', 'Midle Name': '', 'Student ID': 'thare1b'}]
But you can also use a class:
class Student(object):
def __init__(self, first_name, middle_name, last_name, student_id):
self.first_name = first_name
self.middle_name = middle_name
self.last_name = last_name
self.student_id = student_id
def __repr__(self):
fmt = "<Student('{first_name}', '{middle_name}', '{last_name}', '{student_id}')>"
return fmt.format(first_name=self.first_name, middle_name=self.middle_name, last_name=self.last_name, student_id=self.student_id)
And construct a list of students:
students = []
for line in lines:
record = line[0:8], line[12:21], line[23:34], line[36:46]
record = [v.strip() for v in record]
students.append(Student(*record))
You get:
[<Student('Howard', '', 'Moe', 'howar1m')>,
<Student('Howard', '', 'Curly', 'howar1c')>,
<Student('Fine', '', 'Lary', 'fine1l')>,
<Student('Howard', '', 'Shemp', 'howar1s')>,
<Student('Besser', '', 'Joe', 'besse1j')>,
<Student('DeRita', 'Joe', 'Curly', 'derit1cj')>,
<Student('Tiure', 'Desilijic', 'Jaba', 'tiure1jd')>,
<Student('Tharen', '', 'Bria', 'thare1b')>]
list_of_students = []
with open('students.txt') as f:
for line in f:
data = line.split()
if len(data) == 3:
firstname, lastname, id = data
list_of_students.append(Student(firstname, '', lastname, id))
elif len(data) == 4:
list_of_students.append(Student(*data))
else:
raise ValueError
I'm not usre exactly how your input file is laid out, so there's a little processing here to handle the cases where there is no middle name.