Adding multiple dictionaries to a key in python dictionary - python

I am trying to add multiple dictionaries to a key.
e.g.
value = { column1 : {entry1 : val1}
{entry2 : val2}
column2 : {entry3 : val3}
{entry4 : val4}
}
What exactly I am trying to do with this code is:
There is a file.txt which has columns and valid entries for that header. I am trying to make a dictionary with columns as key and for each column another dictionary for each valid entry.
So I am parsing the text file line by line to find the pattern for column and entries and storing it in a variable, check if the column(which is a key) already exists in the dictionary, if exists then add another dictionary to the column, if not create a new entry. I Hope this makes sense.
Sample contents of file.txt
blah blah Column1 blah blah
entry1 val1
entry2 val2
blah blah Column2 blah blah
entry3 val3
entry4 val4
My code:
from __future__ import unicode_literals
import os, re, string, gzip, fnmatch, io
from array import *
header = re.compile(...) #some regex
valid_entries = re.compile(---) #some regex
matches=[]
entries=[]
value = {'MONTH OF INTERVIEW' : {'01': 'MIN VALUE'}}
counter = 0
name = ''
f =open(r'C:/file.txt')
def exists(data, name):
for key in data.keys():
if key == name :
print "existing key : " + name
return True
else :
return False
for line in f:
col = ''
ent = ''
line = re.sub(ur'\u2013', '-', line)
line = re.sub(ur'\u2026', '_', line)
m = header.match(line)
v = valid_entries.match(line)
if m:
name= ''
matches.append(m.groups())
_,_, name,_,_= m.groups()
#print "name : " + name
if v:
entries.append(v.groups())
ent,col= v.groups()
#print v.groups()
#print "col :" + col
#print "ent :" + ent
if (name is not None) and (ent is not None) and (col is not None):
print value
if exists(value, name):
print 'inside existing loop'
value[name].update({ent:col})
else:
value.update({name:{ent:col}})
print value
problem with this code is , it is replacing the values of the sub dictionary and also it is not adding all the values to the dictionary.
I am new to python, so this could be a naive approach to handle this kind of situation. If you think there is a better way of getting what I want, I would really appreciate if you tell me.

Dictionaries have only one value per key. The trick is to make that value a container too, like a list:
value = {
'column1': [{entry1 : val1}, {entry2 : val2}]
'column2': [{entry3 : val3}, {entry4 : val4}]
}
Use dict.setdefault() to insert a list value when there is no value yet:
if name is not None and ent is not None and col is not None:
value.setdefault(name, []).append({ent: col})
You could just make the values one dictionary with multiple (ent, col) key-value pairs here:
if name is not None and ent is not None and col is not None:
value.setdefault(name, {})[ent] = col
Your exists() function was overcomplicating a task dictionaries excel at; testing for a key is done using in instead:
if name in value:
would have sufficed.

I would keep the keys as a list of dictionaries, so you can extend or append
>>> d = {}
>>> d[1] = [{'a': 1}]
>>> d[1].append({'b':2})
>>> d
{1: [{'a': 1}, {'b': 2}]}

You can use defaultdict and regex for this (demo here):
with open('/path/to/file.txt', 'rU') as f: # read the contents from the file
lines = f.readlines()
import re
from collections import defaultdict
d = defaultdict(list) # dict with default value: []
lastKey = None
for line in lines:
m = re.search('Column\d',line) # search the current line for a key
if m: lastKey = m.group()
else:
m = re.search('(?<=entry\d ).*',line) # search the current line for a value
if m: d[lastKey].append(m.group()) # append the value
Output:
[('Column1', ['val1', 'val2']), ('Column2', ['val3', 'val4'])]
Note: Of course, the above code assumes your file.txt was formatted as in your example. For your real file.txt data you might have to adjust the regex.

Related

Problem of incorrect output for dictionary returned from file

File contains student ID and ID of the solved problem.
Example:
1,2
1,4
1,3
2,1
2,2
2,3
2,4
The task is to write a function which will take a filename as an argument and return a dictionary with a student ID and amount of solved tasks.
Example output:
{1:3, 2:4}
My code which doesn't support the correct output. Please, help me find a mistake and a solution.
import collections
def solved_tasks(filename):
with open(filename) as f:
for line in f.readlines():
key,value = line.strip().split(',')
dictionary = {key: collections.Counter(str(value))}
return dictionary
Since you only care about the sum, not the individual exercises, you can use a Counter on the first column:
def solved_tasks(filename):
with open(filename) as in_stream:
counts = collections.Counter(
line.partition(',')[0] # first column ...
for line in in_stream if line # ... of every non-empty row
)
return {int(key): value for key, value in counts.items()}
Assuming that you want to save the repeated instances of student id, you can use a defaultdict and save the problems solved by each student as a list in your dictionary:
import collections
dictionary = collections.defaultdict(list)
def solved_tasks(filename):
with open(filename) as f:
for line in f.readlines():
key,value = line.strip().split(',')
dictionary[key].append(value)
return dictionary
Output:
defaultdict(<type 'list'>, {'1': ['2', '4', '3'], '2': ['1', '2', '3', '4']})
If you want the sum:
def solved_tasks(filename):
with open(filename) as f:
for line in f.readlines():
key,value = line.strip().split(',')
dictionary[key] += 1
return dictionary
Output:
defaultdict(<type 'int'>, {'1': 3, '2': 4})
you can count how often a key appears
marks = """1,2
"1,4
"1,3
"2,1
"2,2
"2,3
"2,4
"2,4"""
dict = {}
for line in marks.split("\n"):
key,value = line.strip().split(",")
dict[key] = dict.get(key,[]) + [value]
for key in dict:
dict[key] = len(set(dict[key])) # eliminate duplicates
the dict.get(key,[]) method returns an empty list if the key doesn't exist in the dict as a default parameter.
#Edit: You said there may contain duplicates. This method would eliminate all duplicates.
#Edit: Added multilines with """
def solved_tasks(filename):
res = {}
values=""
with open(filename, "r") as f:
for line in f.readlines():
values += line.strip()[0] #take only the first value and concatinate with the values string
value = values[0] #take the first value
res[int(value)] = values.count(value) #put it in the dict
for i in values: #loop the values
if i != value: # if the value is not the first value, then the value is the new found value
value = i
res[int(value)] = values.count(value) #add the new value to the dict
return res

How to add multiple values to dictionary keys in Python

I am trying to add a value (string) to the dictionary I already have in this form:
item1
item2
item3
myDictionary = {'key':[item1, item2]}
###Adding the third value (item3) to the already {'key': ['item1', 'item2']} dictionary.
myDictionary['key'] = [myDictionary['key'], item3]
###Ending with 3rd item being placed as a separate list of values:
#{'key': [['item1', 'item2'], item3]}
#instead of getting the expected and desirable:
{'key': ['item1', 'item2', 'item3']}
Already tried the How to add multiple values in dictionary having specific key solution as myDictionary[key].append(value) would yell a AttributeError: *str* object has no attribute *append* Error so using [myDictionary['key'], item3] was the way to add the second but not the third value for the key, key.
As requested, this is the actual code that I am trying to run:
currHostDict = {}
for x in netstatinfo:
result = open("result.txt", "a+")
z = re.match("^en", x)
if z:
adapter = re.split(" +", x)[0]
macIp = re.split(" +", x)[3]
if adapter in currHostDict:
#currHostDict[adapter] = [currHostDict[adapter], macIp]
print(type(currHostDict))
currHostDict[adapter].extend([macIp])
#currHostDict[adapter] = [currHostDict[adapter].extend(macIp)]
#currHostDict[adapter] = [currHostDict[adapter].append(macIp)]
#currHostDict[adapter] = "test"
else:
currHostDict[adapter] = macIp
And this one issues a AttributeError: 'str' object has no attribute 'extend'error
I can also confirm that running this simplified code:
item1 = "item1"
item2 = "item2"
item3 = "item3"
currHostDict = {'en0':[item1,item2]}
currHostDict['en0'].extend([item3])
print(currHostDict)
outputs the expected {'en0': ['item1', 'item2', 'item3']}.
However, this assumes the dictionary already has at least one key and at least one value for that key as I am creating in the original code through currHostDict[adapter] = macIp
Please also note that the else statement will always run first so the dictionary is always filled out with at least one key and its value.
In your code, the problem comes from the line:
currHostDict[adapter] = macIp
Because macIp is a str and not a list of str.
The obvious fix is the following:
currHostDict[adapter] = [macIp]
Moreover, I advise you to check the defaultdict structure. If you request a non-existent key in a defaultdict, it will initialize it with the given default value.
from collections import defaultdict
netstatinfo = ["A - - item1", "B - - item4", "A - - item2", "A - - item3"]
currHostDict = defaultdict(lambda: [])
for x in netstatinfo:
adapter = re.split(" +", x)[0]
macIp = re.split(" +", x)[3]
currHostDict[adapter].append(macIp)
print(dict(currHostDict))
# {'A': ['item1', 'item2', 'item3'], 'B': ['item4']}

Python Dictionary should return a value rather than None

I have a csv file which consists
a,b
2,3
4,5
4,7
4,7
(where a,b is a column values)
Below i have two functions where the first function will read the csv and assign 'a' as key and 'b' as value in a dictionary
Second function i will pass 'a' value as an argument and it returns b as a value when i use that function.If there is no value for 'a' i get None.
def x (id):
dict1= {}
with open(file, 'r', encoding='utf') as f:
for i in csv.DictReader(f, skipinitialspace= True)
dict1[i['a']] = row['b']
print('im happy now' )
def getx (a):
return dict1.get(a, None)
It works perfectly.
Now I have a csv file with four column values
a,b,c,d
1,2,r,4
2,g,4,6
3,d,4,6
For this i have written a code like
def x ():
dict1= {}
with open(file, 'r', encoding='utf') as f:
for i in csv.DictReader(f, skipinitialspace= True)
dict1[i['a']] = dict(dict1[i['b']] = dict(dict1[i['c']] = row['d']))
print('im happy now' )
def getx (a):
return dict1.get(dict1['a']['b']['c'], None)
My logic is to show
dict1[i['a']] = dict(dict1[i['b']] = dict(dict1[i['c']] = row['d']))
as
dict1 :{
'a':{
'b':{
'c':2,
'c':4,
'c':4
}
}
}
I'm not sure if what i have written above is right.
I need to return 'd' as a value when i pass dict1[a[]b][c]. It returns me empty value.
Expected value is for the combination of a,b,c i need a value as d.
For eg: from the above csv.. For the combination of 1,2,r i need to return 4 as an output
Update:
I realized that the column 'a' has duplicate values and it cant be handled in dictionary keys which skips the duplicate key record.
from collections import defaultdict
dict_1 = defaultdict(list)
with open('file.txt','r') as f:
for r in f.readline():
i= r['a']
j= r['b']
k= r['c']
l =r['d']
details = [j,k,l]
dict_1[i].append((details))
print(dict_1)
which gives me
{'1' :[('k', '3', '5'),('e','3','2')], '4' :[('r','3','2'),('e','2','1')],....................}
If i have dict_1 in my first function as above.
Now, any suggestions like how can i get the value of 'd' by passing a,b,c as an argument in my second function, else NONE?
You don't need the csv module for that. Also, you can't have duplicate keys in a dictionary, you'd only overwrite previously set values.
def x():
dict1= {}
with open(file, 'r', encoding='utf') as f:
for i in f.readlines():
a, b, c, d = i.split(',')
dict1[a] = {b: {c: d}}
def getx(a, b, c):
try:
return dict1[a][b][c]
except:
return None

open files for each of many dictionaries in python

I have two main dictionaries:
dict_main1 = {}
dict_main2 = {}
And then I open many dictionaries (below only 6 of 26 I have) which store the values of the main dictionaries depending on one particular string:
string1 = {}
string2 = {}
string3 = {}
string4 = {}
string5 = {}
string6 = {}
for key, value in dict_main1.items():
if 'string1' in key:
string1[key] = dict_main1[key]
elif 'string2' in key:
string2[key] = dict_main1[key]
elif 'string3' in key:
string3[key] = dict_main1[key]
......
for key, value in dict_main2.items():
if 'string4' in key:
string4[key] = dict_main2[key]
elif 'string5' in key:
string5[key] = dict_main2[key]
elif 'string6' in key:
string6[key] = dict_main2[key]
......
How can I open a file for each strin#={} in a pythonic way?. I would like to avoid doing it one by one (as in the example below):
FI = open ('string1', w)
for key, value in string1.items():
OUT = key + '\n' + value + '\n'
FI.write(OUT)
First of all you don't need 99999 dicts, just use one with dicts inside it.
for example:
from collections import collections.defaultdict
my_keys = ['str1', 'str2', ....]
container_dict = defaultdict(dict)
for key, value in dict_main.items():
for k in my_keys:
if k in key:
container_dict[k][key] = value
now for the files, just use for:
for string, strings_dict in container_dict:
with open(string, "wb") as f:
# format string dict... and save it
i didn't run this code, so maybe there are some bugs, but i suppose it ok
It might be useful to use a single dictionary data structure rather than maintaining 26 different dictionaries.
def split_dict(d, corpus):
dicts = {{} for w in corpus}
for k, v in d.items():
word = next(filter(lambda w: w in k, corpus))
dicts[word][k] = v
return dicts
dict_main = {...}
corpus = [f'string{i}' for i in range(1, 4)]
dict_split = split_dict(dict_main, corpus)
Now, just loop through dict_split:
for word, d in dict_split:
with open(word, 'w') as f:
for key, value in d.items():
f.write(f'{key}\n{value}\n')

Extracting columns from a string or list in python

I'm trying to extract columns from a string of values in python. The string of values looks like follows -
CN=Unix ADISID,OU=SA,OU=DGO,DC=dom,DC=ab,DC=com,1001
CN=1002--DS,OU=Process,DC=dom,DC=ab,DC=com,1002
CN=1003--Cyb,OU=SA,OU=DGO,DC=dom,DC=ab,DC=com,1003
CN=Doe--Joe,OU=Adm,DC=dom,DC=ab,DC=com,d1004
CN=cruise--bob,OU=SA,OU=DGO,DC=dom,DC=ab,DC=com,d1005
Now I would like to extract columns from this string with column headers like CN, OU1, OU2,DC1, DC2, DC3,ID. The number of OU and DC values are different in every line so if they are not present in a line, I would like to keep that column as blank. Also, I'm using the following piece of code to generate the above string.
result = l.search_s(base, ldap.SCOPE_SUBTREE, criteria, attributes)
results=""
for i in [entry for dn, entry in result if isinstance(entry, dict)]:
results += str(i.get('distinguishedName')[0] +","+ i.get('sAMAccountName')[0] + "\n").replace("\, ","--")
print results
Will it be easier if I create results as a list to begin with?
To get the "fields left blank" behavior, you're going to have to count the max number of each field. I believe that CN is unique, so that should always be 1.
result = l.search_s(base, ldap.SCOPE_SUBTREE, criteria, attributes)
users = []
for i in [entry for dn, entry in result if isinstance(entry, dict)]:
dn = i.get('distinguishedName')[0].replace('\, ', '--').split(',')
info = collections.defaultdict(list)
info['id'] = i.get('sAMAccountName')[0]
for part in dn:
key,value = part.split('=',1)
info[key].append(value)
users.append(info)
max_cn = max(map(lambda u: len(u['CN']), users))
assert max_cn == 1
max_ou = max(map(lambda u: len(u['OU']), users))
max_dn = max(map(lambda u: len(u['DN']), users))
numflds = max_cn + max_ou + max_dn
fields = []
for u in users:
f = [u['CN']]
ou = u['OU'] + [''] * max_ou
f.extend(ou[:max_ou])
dn = u['DN'] + [''] * max_dn
f.extend(dn[:max_dn])
f.append(u['id'])
For each line:
pairs = [kv.split('=') for kv in line.split(',')]
for pair in pairs:
if len(pair) == 1:
pair.insert(0, 'ID')
Now you have something like this:
[['CN', 'Unix ADISID'],
['OU', 'SA'],
['OU', 'DGO'],
['DC', 'dom'],
['DC', 'ab'],
['DC', 'com'],
['ID', '1001']]
Then:
from collections import defaultdict
mapping = defaultdict(list)
for k,v in pairs:
mapping[k].append(v)
Which gives you:
{'CN': ['Unix ADISID'],
'DC': ['dom', 'ab', 'com'],
'ID': ['1001'],
'OU': ['SA', 'DGO']}

Categories

Resources