How to convert coma delimited string into python dictionary? - python

Please help me with the following issue: I have coma delimited strings and value that related to these strings. The number of comas is unpredictable and can be limited by 6. I have to convert it into python dictionary. For example:
aa.bb.cc 6 => mydict['aa']['bb']['cc']=6
aa.bb.dd.ee 8 = mydict['aa']['bb']['dd']['ee']=8
My python version is 2.7.9

One way to do it is to use defaultdict
from collections import defaultdict as defd
dic = defd(dict)
def create_dict(astr):
keys, val = astr.split(' ')
keys = keys.split('.')
val = int(val)
prevdic = dic
for j, k in enumerate(keys):
if j == len(keys)-1:
prevdic[k] = val
elif k not in prevdic:
prevdic[k] = defd(dict)
prevdic = prevdic[k]
create_dict('aa.bb.cc 6')
create_dict('aa.bb.dd.ee 8')
dic['aa']['bb']['cc'] ## returns 6
dic['aa']['bb']['dd']['ee'] ## returns 8

I'm going to assume you want one dictionary with multiple keys (you do say "a dictionary") :
import re
s1 = 'a.bb.dd.ee 8'
s2 = 'aa.bb.cc 6'
def create_d(s):
fields = re.split(r'[. ]', s)
v = int(fields.pop())
d = { field:v for field in fields }
return d
mydict = create_d(s1)
print mydict
mydict.update(create_d(s2))
print mydict
Gives:
{'a': 8, 'ee': 8, 'dd': 8, 'bb': 8}
{'a': 8, 'aa': 6, 'bb': 6, 'ee': 8, 'dd': 8, 'cc': 6}

You can try following simple code
str1 = 'aa.bb.cc 6'
str2 = 'aa.bb.cc.dd 8'
dictstr = {}
strdelim = str1.split(' ')
dictstr[tuple(strdelim[0].split('.'))] = strdelim[1]
strdelim = str2.split(' ')
dictstr[tuple(strdelim[0].split('.'))] = strdelim[1]
print dictstr
Result
{('aa', 'bb', 'cc'): '6', ('aa', 'bb', 'cc', 'dd'): '8'}

Related

Delete key and value form a dictionary, if it is longer than 5 symbols

How to create a new dictionary where all keys and values are converted to string type? If string length is more than 5 symbols it must be excluded from the answer (both the key and value)
My code now looks like this:
file1data = {"1":"2",2:5,"12dade":-1,"1231":14,"-2":7}
key_values = file1data.items()
new_dictionary = {str(key):str(value) for key, value in key_values}
print((str(new_dictionary)))
It can covert all values and keys to a string type, but not detect if length is more than 5.
For example if is given this dictionary: {"1":"2",2:5,"12dade":-1,"1231":14,"-2":7}
The result should be: {"1":"2","2":"5","1231":"14","-2":"7"}
Add if statement inside dict comprehension like so
file1data = {"1": "2", 2: 5, "12dade": -1, "1231": 14, "-2": 7}
key_values = file1data.items()
new_dictionary = {str(key): str(value) for key, value in key_values if len(str(key)) <= 5 and len(str(value)) <= 5}
print((str(new_dictionary)))
A simple way to do it is to iterate through the keys of the dictionary and check if you should delete it or not if yes add it to a list and delete it later
file1data = {"1":"2",2:5,"12dade":-1,"1231":14,"-2":7}
delete = list()
for key in file1data:
if len(str(key)) > 5:
delete.append(key)
for i in delete:
del file1data[i]
I know that it is not the most compact way to do it but it works
Use a comprehension:
d = {'1': '2', 2: 5, '12dade': -1, '1231': 14, '-2': 7}
out = {k: v for k, v in d.items() if len(str(k)) <= 5 and len(str(v)) <= 5}
print(out)
# Output
{'1': '2', 2: 5, '1231': 14, '-2': 7}
Without converting to string twice:
{sk: sv
for k, v in file1data.items()
if len(sk := str(k)) <= 5
if len(sv := str(v)) <= 5}
You can achieve this by using an if clause inside of the dictionary comprehension, like so:
file_data = {"1": "2", 2: 5, "12dade": -1, "1231": 14, "-2": 7}
filtered_file_data = {str(key): str(value) for key, value in file_data.items() if len(str(key)) <= 5 and len(str(value)) <= 5}
print(filtered_file_data)
Output:
{'1': '2', '2': '5', '1231': '14', '-2': '7'}

Printing dictionary without brackets

I'm trying to write a script for an exercise that allows me to sort out the characters on a string and count the highest recurring characters but I can't seem to print out the results in a form of a string as its a tuple. Anyone has any idea on how I could do it would be much appreciated.
import sys
stringInput = (sys.argv[1]).lower()
stringInput = sorted(stringInput)
DictCount = {}
Dictionary = {}
def ListDict(tup, DictStr):
DictStr = dict(tup)
return DictStr
for chars in stringInput:
if chars in Dictionary:
Dictionary[chars] += 1
else:
Dictionary[chars] = 1
ListChar = sorted(Dictionary.items(), reverse=True, key=lambda x: x[1])
Characters = (ListChar[0], ListChar[1], ListChar[2], ListChar[3], ListChar[4])
print(ListDict(Characters, DictCount))
current output:
python3 CountPopularChars.py sdsERwweYxcxeewHJesddsdskjjkjrFGe21DS2145o9003gDDS
{'d': 7, 's': 7, 'e': 6, 'j': 4, 'w': 3}
desired output:
d:7,s:7,e:6,j:4,w:3
create your output in this way:
output = ','.join(f"{k}:{v}" for k, v in ListChar)
print(output)
Output:
e:17,d:7,a:3,b:1,c:1
Or just:
>>> dct = {'d': 7, 's': 7, 'e': 6, 'j': 4, 'w': 3}
>>> ','.join(f'{k}:{v}' for k,v in dct.items())
'd:7,s:7,e:6,j:4,w:3'
Try:
yourDict = {'d': 7, 's': 7, 'e': 6, 'j': 4, 'w': 3}
print(','.join("{}:{}".format(k, v) for k, v in yourDict.items()))
Output:
d:7,s:7,e:6,j:4,w:3
Your code is highly redundant. You could write it in a much more concise way by using collections.Counter to help:
from collections import Counter
# Hard coded stringInput for ease in this test
stringInput = 'sdsERwweYxcxeewHJesddsdskjjkjrFGe21DS2145o9003gDDS'.lower()
c = Counter(stringInput)
ListChar = sorted(c.items(), reverse=True, key=lambda x: x[1])
print(','.join(f"{k}:{v}" for k, v in ListChar[:5]))

How do I swap two random values from a dictionary?

I need to swap two random values from a dicitonary
def alphabetcreator():
letters = random.sample(range(97,123), 26)
newalpha = []
engalpha =['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
alphasmerged = {}
for i in letters:
newalpha.append(chr(i))
alphasmerged = dict(zip(engalpha, newalpha))
return(alphabetsmerged)
This code gives me my two different alphabets, putting them into a dictionary so I can translate between one and the other. I now need to randomly swap two of the values whilst keeping all the rest the same. How can I do this?
You can first use random.sample to randomly pick two different values from a collection.
From the doc:
Return a k length list of unique elements chosen from the population sequence or set. Used for random sampling without replacement.
Use this function on the keys of your dictionary to have two distinct keys.
In Python 3, you can directly use it on a dict_keys object.
In Python 2, you can either convert d.keys() into a list, or directly pass the dictionary to the sample.
>>> import random
>>> d = {'a': 1, 'b': 2}
>>> k1, k2 = random.sample(d.keys(), 2) # Python 3
>>> k1, k2 = random.sample(d, 2) # Python 2
>>> k1, k2
['a', 'b']
Then, you can in-place-ly swap two values of a collection.
>>> d[k1], d[k2] = d[k2], d[k1]
>>> d
{'b': 1, 'a': 2}
d = {12: 34, 67: 89}
k, v = random.choice(list(d.items()))
d[v] = k
d.pop(k)
which when running, gave the random output of d as:
{12: 34, 89: 67}
You can try this:
import random
engalpha =['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
new_dict = {a:b for a, b in zip(engalpha, map(chr, random.sample(range(97,123), 26)))}
key_val = random.choice(list(new_dict.keys()))
final_dict = {b if a == key_val else a:a if a == key_val else b for a, b in new_dict.items()}
Regarding your recent comment:
import random
s = {'a': 'h', 'b': 'd', 'c': 'y'}
random_dict = [(a, b) for a, b in random.sample(list(s.items()), 2)]
new_dict = {a:b for a, b in zip([i[0] for i in sorted(random_dict, key=lambda x:x[0])], [i[-1] for i in sorted(random_dict, key=lambda x:x[-1])][::-1])}
final_dict = {a:new_dict.get(a, b) for a, b in s.items()}
Output (randomly generated):
{'a': 'y', 'c': 'h', 'b': 'd'}

How to Convert Dict to string

number_pad = {"b":2,"a":2,"c":2,"x":4,"y":4,"z":4}
My print statement is
print(get_number,('00bx'))
How to get the output like this: 0024.
I tried this:
get_number = ""
for key,val in num_pad.items():
get_number = get_phone_number + str(val)
Is there anyway can relate letters to numbers?
You can use dict.get(...) for your task:
number_pad = {"b":2,"a":2,"c":2,"x":4,"y":4,"z":4}
text = '00bx'
re_text = ""
for t in text:
re_text += str(number_pad.get(t, t))
print(re_text) # output: 0024
Or you can condense it to this:
re_text = "".join(str(number_pad.get(t, t)) for t in text)
print(re_text) # output: 0024
Is this essentially what you are after?
str(number_pad['b']) + str(number_pad['x'])
Maybe you can try the map function with a lambda expression.
number_pad = {'b': 2, 'a': 2, 'c': 2, 'x': 4, 'y': 4, 'z': 4}
print(''.join(map(lambda c: str(number_pad[c]) if c in number_pad.keys() else c, '00bx')))

Python + CSV : Sum up Similar Values from a CSV columns

INPUT file:
$ cat dummy.csv
OS,A,B,C,D,E
Ubuntu,0,1,0,1,1
Windows,0,0,1,1,1
Mac,1,0,1,0,0
Ubuntu,1,1,1,1,0
Windows,0,0,1,1,0
Mac,1,0,1,1,1
Ubuntu,0,1,0,1,1
Ubuntu,0,0,1,1,1
Ubuntu,1,0,1,0,0
Ubuntu,1,1,1,1,0
Mac,0,0,1,1,0
Mac,1,0,1,1,1
Windows,1,1,1,1,0
Ubuntu,0,0,1,1,0
Windows,1,0,1,1,1
Mac,0,1,0,1,1
Windows,0,0,1,1,1
Mac,1,0,1,0,0
Windows,1,1,1,1,0
Mac,0,0,1,1,0
Expected output:
OS,A,B,C,D,E
Mac,4,1,6,5,3
Ubuntu,3,4,5,6,3
Windows,3,2,6,6,3
I generated above output using Excel's Pivot Table.
MyCode:
import csv
import pprint
from collections import defaultdict
d = defaultdict(dict)
with open('dummy.csv') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
d[row['OS']]['A'] += row['A']
d[row['OS']]['B'] += row['B']
d[row['OS']]['C'] += row['C']
d[row['OS']]['D'] += row['D']
d[row['OS']]['E'] += row['E']
pprint.pprint(d)
Error:
$ python3 dummy.py
Traceback (most recent call last):
File "dummy.py", line 10, in <module>
d[row['OS']]['A'] += row['A']
KeyError: 'A'
My Idea was to get the CSV values accumulated into a dictionary and later print it. However, I get above error when I try to add the values.
This seems like achievable with built-in csv module. I thought this was an easier one :( Any pointers would be of great help.
There are two problems. The nested dictionaries don't initially have any keys set so d[row[OS]]['A'] results to error. The other issue is that you need to convert column values to int before adding them.
You could use Counter as values in defaultdict since there missing keys default to 0:
import csv
from collections import Counter, defaultdict
d = defaultdict(Counter)
with open('dummy.csv') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
nested = d[row.pop('OS')]
for k, v in row.items():
nested[k] += int(v)
print(*d.items(), sep='\n')
Output:
('Ubuntu', Counter({'D': 6, 'C': 5, 'B': 4, 'E': 3, 'A': 3}))
('Windows', Counter({'C': 6, 'D': 6, 'E': 3, 'A': 3, 'B': 2}))
('Mac', Counter({'C': 6, 'D': 5, 'A': 4, 'E': 3, 'B': 1}))
Something like this? You can write the dataframe to csv file to get the desired format.
import pandas as pd
# df0=pd.read_clipboard(sep=',')
# df0
df=df0.copy()
df=df.groupby(by='OS').sum()
print df
Output:
A B C D E
OS
Mac 4 1 6 5 3
Ubuntu 3 4 5 6 3
Windows 3 2 6 6 3
df.to_csv('file01')
file01
OS,A,B,C,D,E
Mac,4,1,6,5,3
Ubuntu,3,4,5,6,3
Windows,3,2,6,6,3
You got that exception because for the first time, row['OS'] does not exist in d, so 'A' does not exist in d[row['OS']]. Try the following to fix that:
import csv
from collections import defaultdict
d = defaultdict(dict)
with open('dummy.csv') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
d[row['OS']]['A'] = d[row['OS']]['A'] + int(row['A']) if (row['OS'] in d and 'A' in d[row['OS']]) else int(row['A'])
d[row['OS']]['B'] = d[row['OS']]['B'] + int(row['B']) if (row['OS'] in d and 'B' in d[row['OS']]) else int(row['B'])
d[row['OS']]['C'] = d[row['OS']]['C'] + int(row['C']) if (row['OS'] in d and 'C' in d[row['OS']]) else int(row['C'])
d[row['OS']]['D'] = d[row['OS']]['D'] + int(row['D']) if (row['OS'] in d and 'D' in d[row['OS']]) else int(row['D'])
d[row['OS']]['E'] = d[row['OS']]['E'] + int(row['E']) if (row['OS'] in d and 'E' in d[row['OS']]) else int(row['E'])
Output:
>>> import pprint
>>>
>>> pprint.pprint(dict(d))
{'Mac': {'A': 4, 'B': 1, 'C': 6, 'D': 5, 'E': 3},
'Ubuntu': {'A': 3, 'B': 4, 'C': 5, 'D': 6, 'E': 3},
'Windows': {'A': 3, 'B': 2, 'C': 6, 'D': 6, 'E': 3}}
This does not answer your question exactly, as it is indeed possible to solve the problem using csv, but it is worth mentioning that pandas is perfect for this sort of thing:
In [1]: import pandas as pd
In [2]: df = pd.read_csv('dummy.csv')
In [3]: df.groupby('OS').sum()
Out[3]:
A B C D E
OS
Mac 4 1 6 5 3
Ubuntu 3 4 5 6 3
Windows 3 2 6 6 3
d is a dictionary, so d[row['OS']] is a valid expression, but d[row['OS']]['A'] expects that dictionary item to be some kind of collection. Since you didn't provide a default value, it will instead be None, which is not.
I assume your input file is called input_file.csv.
You can also process your data and have your desired output using groupby from itertools module and two dicts like the example below:
from itertools import groupby
data = list(k.strip("\n").split(",") for k in open("input_file.csv", 'r'))
a, b = {}, {}
for k, v in groupby(data[1:], lambda x : x[0]):
try:
a[k] += [i[1:] for i in list(v)]
except KeyError:
a[k] = [i[1:] for i in list(v)]
for key in a.keys():
for j in range(5):
c = 0
for i in a[key]:
c += int(i[j])
try:
b[key] += ',' + str(c)
except KeyError:
b[key] = str(c)
Output:
print(','.join(data[0]))
for k in b.keys():
print("{0},{1}".format(k, b[k]))
>>> OS,A,B,C,D,E
>>> Ubuntu,3,4,5,6,3
>>> Windows,3,2,6,6,3
>>> Mac,4,1,6,5,3
This extends niemmi's solution to format the output to be the same as the OP's example:
import csv
from collections import Counter, defaultdict
d = defaultdict(Counter)
with open('dummy.csv') as csv_file:
reader = csv.DictReader(csv_file)
field_names = reader.fieldnames
for row in reader:
counter = d[row.pop('OS')]
for key, value in row.iteritems():
counter[key] += int(value)
print ','.join(field_names)
for os, counter in sorted(d.iteritems()):
print "%s,%s" % (os, ','.join([str(v) for k, v in sorted(counter.iteritems())]))
Output
OS,A,B,C,D,E
Mac,4,1,6,5,3
Ubuntu,3,4,5,6,3
Windows,3,2,6,6,3
Update: Fixed the output.

Categories

Resources