Python multilevel dict to strings - python

I have a python dictionary and a dictionary with in some of the values. I'm trying to generate a dotted delimited string of the keys in the structure with the value at the end. With the example below I'd want FIELD0 1 and NAME. I could create a for loop to process the data or a recursive function. I didn't know if there was something prebuilt method for collapsing a multilevel dictionary to delimited strings?
I was trying the following but as you know it will just append the sub dictionaries.
'.'.join('%s %s\n' % i for i in a.items())
{'BOGUS1': 'BOGUS_VAL1',
'BOGUS2': 'BOGUS_VAL1',
'FIELD0': {'F0_VAL1': 1, 'F0_VAL2': 2},
'FIELD1': {'F1_VAL1': 80, 'F1_VAL2': 67, 'F1_VAL3': 100},
'FOOBAR1': 'FB_VAL1',
'NAME': 'VALUE'}
BOGUS2.BOGUS_VAL1
.NAME.VALUE
.BOGUS1.BOGUS_VAL1
.FIELD0.{'F0_VAL1': 1, 'F0_VAL2': 2}
.FIELD1.{'F1_VAL2': 67, 'F1_VAL3': 100, 'F1_VAL1': 80}
.FOOBAR1.FB_VAL1
# Wanted results
FIELD0.F0_VAL1 1
FIELD0.F0_VAL2 2
FIELD1.F1_VAL1 80
FIELD1.F2_VAL1 67
FIELD1.F3_VAL1 100
NAME VALUE

How about something like this:
def dotnotation(d, prefix = ''):
for k, v in d.items():
if type(v) == type(dict()):
dotnotation(v, prefix + str(k) + '.')
else:
print prefix + str(k) + ' = ' + str(v)
Also the formatting can be changed according to the stored types. This should work with your example.

Here is my approach:
def dotted_keys(dic):
""" Generated dot notation keys from a dictionary """
queue = [(None, dic)] # A queue of (prefix, object)
while queue:
prefix, current = queue.pop(0)
for k, v in current.iteritems():
if isinstance(v, dict):
queue.append((k, v))
elif prefix:
yield prefix + '.' + k
else:
yield k
def dict_search(dic, dotted_key, default=None):
""" Take a dictionary and a dotted key and return the value. If not
found, return the value specified by the default parameter.
Example: dict_search(d, 'FIELD0.F0_VAL2')
"""
current = dic
keys = dotted_key.split('.')
for k in keys:
if k in current:
current = current[k]
else:
return default
return current
if __name__ == '__main__':
d = {
'BOGUS1': 'BOGUS_VAL1',
'BOGUS2': 'BOGUS_VAL1',
'FIELD0': {'F0_VAL1': 1, 'F0_VAL2': 2, 'XYZ': {'X1': 9}},
'FIELD1': {'F1_VAL1': 80, 'F1_VAL2': 67, 'F1_VAL3': 100},
'FOOBAR1': 'FB_VAL1',
'NAME': 'VALUE'
}
for k in dotted_keys(d):
print(k, '=', dict_search(d, k))
Output:
BOGUS2 = BOGUS_VAL1
NAME = VALUE
BOGUS1 = BOGUS_VAL1
FOOBAR1 = FB_VAL1
FIELD0.F0_VAL1 = 1
FIELD0.F0_VAL2 = 2
FIELD1.F1_VAL2 = 67
FIELD1.F1_VAL3 = 100
FIELD1.F1_VAL1 = 80
XYZ.X1 = None
The dotted_keys function generates a list of keys in dotted notation while the dict_search function takes a dotted key and return a value.

Related

How to deal with columns in pandas dataframe?

I want to do something with column data which is a list. like:
inputs:
col-A
[{'name':'1','age':'12'}, {'name':'2','age':'12'}]
[{'name':'3','age':'18'}, {'name':'7','age':'15'}]
....
outputs:
col-A
[{'1-age':'12'}, {'2-age':'12'}]
[{'3-age':'18'}, {'7-age':'15'}]
....
My code is:
def deal(dict_col, prefix_key):
key_value = dict_col[prefix_key]+'-'
dict_col.pop(prefix_key, None)
items = copy.deepcopy(dict_col)
for key, value in items.items():
dict_col[key_value+key] = dict_col.pop(key)
return dict_col
prefix = "name"
[[deal(sub_item, prefix) for sub_item in item] for item in df[col-A]]
Some items will be processed multiple times.
Because the return value of deal method will be swapped to item in real time?
For example:
For deal method we
input:
{'name':'1','age':'12'}
output:
{'1-age':'12'}
Then the next input may be {'1-age':'12'} , and now we have no name or age to deal with.
How to solve this problem?
You can use the pandas apply method for it here some code:
import pandas as pd
d = {'col-A' : [[{'name' : '1', 'age': '12'}, {'name' : '2', 'age': '12'}],[{'name' : '3', 'age': '18'},{'name' : '7', 'age': '15'}]]}
df = pd.DataFrame(d)
def deal(row, prefix):
out_list = []
for sub_dict in row:
out_dict = {}
out_str = sub_dict.get(prefix) + '-'
for k,v in sub_dict.items():
out_dict[out_str + k] = v
out_list.append(out_dict)
return out_list
prefix = 'name'
df['col-A'] = df['col-A'].apply(lambda x : deal(x, prefix))
print(df)
You could push some of the code in a one-liner if you like that more:
def deal(row, prefix):
out_list = []
for sub_dict in row:
out_dict = dict((sub_dict[prefix] + '-' + k , sub_dict[k]) for k in sub_dict.keys() if k != prefix)
out_list.append(out_dict)
return out_list
prefix = 'name'
df['col-A'] = df['col-A'].apply(lambda x : deal(x, prefix)
Just for the fun of it you could even bring it down to one single line (not recommended due to poor readability:
prefix = "name"
df['col-A'] = df['col-A'].apply(lambda row : [dict((sub_dict[prefix] + '-' + k , sub_dict[k]) for k in sub_dict.keys() if k != prefix) for sub_dict in row])
I believe you need .get function for select with default value if not exist key in dict:
def deal(dict_col, prefix_key):
key_value = dict_col.get(prefix_key, 'not_exist')+'-'
dict_col.pop(prefix_key, None)
items = copy.deepcopy(dict_col)
for key, value in items.items():
dict_col[key_value+key] = dict_col.pop(key)
return dict_col

Display a json without only one field

how can i display this json private of the field user
I don't want to do this:
result = [x['date']['nom']['count'] for x in hits]
{'date': '04-04-2019', 'nom': 'Iphone', 'count': 0, 'users': [1]}
Looks like what you want is:
result = [{k:v for k, v in entry.items() if k != 'users'} for entry in hits]
A more general case is probably:
def filter_dict(d: Mapping[String, Any],
blacklist: Optional[List[String]]=None) -> Mapping[String, Any]:
if blacklist is None:
return d
return {k:v for k, v in d.items() if k not in blacklist}
result = [filter_dict(entry, blacklist=['users']) for entry in hits]
I find this solution:
def removekey(d, key):
r = dict(d)
del r[key]
return r
texte = [removekey(x['_source'], 'users') for x in hits]

Parsing a file with special format using python to a list of dictionaries

I have a file with the following format:
X ={
a= "someText";
b = 0;
c = 1;
d ={
t = "someText3";
};
f ="someText2";
};
X ={
a= "someText4";
b = 20;
c = 40;
f ="someText6";
d ={
t = "someText5";
};
};
I am looking for a smart and robust way to parse it to a list of dict like the following:
X[0] = {'a':"someText",'b':0, 'c':0, 'd':{ 't':'SomeText3' }, 'f':"someText2"}
X[1] = {'a':"someText4",'b':20, 'c':40, 'd':{ 't':'SomeText5' }, 'f':"someText6"}
Note that there might be nested dictionaries and the variables can have different order of occurrence.
My method is to keep track of the level by searching '={' and '};' and construct the list. I wonder if there is an elegant method to parse it.
The simple parser below implements a recursive descent algorithm on simple dictionionary schemes:
import re
from collections import namedtuple
s = """
X ={
a= "someText";
b = 0;
c = 1;
d ={
t = "someText3";
};
f ="someText2";
};
"""
s1 = """
X ={
a= "someText4";
b = 20;
c = 40;
f ="someText6";
d ={
t = "someText5";
};
};
"""
token = namedtuple('token', ['type', 'value'])
class Parser:
lang = r'"[a-zA-Z0-9]+"|[a-zA-Z]+|\d+|\{|\};'
token_types = {'int':'\d+', 'key':'[a-zA-Z]+', 'start':'{', 'end':'};'}
def __init__(self, s):
self.starting_with = Parser.tokenize(s)[1:-1]
self.tokens = iter(Parser.tokenize(s)[1:-1])
self.starts = []
self.ends = []
self.k_list = []
self.k = None
self.d = {}
self.current_d = {}
def parse(self):
current = next(self.tokens, None)
if current:
if current.type == 'start':
self.starts.append(current.value)
self.parse()
if current.type == 'key':
self.k = current.value
self.k_list.append(self.k)
self.parse()
if current.type not in ['start', 'end', 'key']:
if len(self.starts) == 1:
self.d[self.k] = current.value[1:-1] if current.value.startswith('"') and current.value.endswith('"') else current.value
self.parse()
else:
self.current_d[self.k_list[-1]] = current.value[1:-1] if current.value.startswith('"') and current.value.endswith('"') else current.value
self.parse()
if current.type == 'end':
end = self.starts.pop()
self.d[self.k_list[-len(self.starts)-1]] = self.current_d
self.current_d = {}
self.parse()
#classmethod
def tokenize(cls, s):
return [token('string' if i.startswith('"') and i.endswith('"') else [a for a, b in cls.token_types.items() if re.findall(b, i)][0], i) for i in re.findall(cls.lang, s)]
dictionaries = [s, s1]
X = []
for d in dictionaries:
p = Parser(d)
p.parse()
X.append(p.d)
print(X[0])
print(X[1])
Output:
{'a': 'someText', 'c': '1', 'b': '0', 'd': {'t': 'someText3'}, 'f': 'someText2'}
{'a': 'someText4', 'c': '40', 'b': '20', 'd': {'t': 'someText5'}, 'f': 'someText6'}
Here is an implementation using parsy (which works similarly to pyparsing but is more modern and has much nicer documentation, and generally results in much neater code, but does require Python 3.3 or greater):
from collections import defaultdict
from parsy import generate, regex, seq, string, whitespace
lexeme = lambda parser: whitespace.optional() >> parser << whitespace.optional()
variable = lexeme(regex(r"[A-Za-z]+"))
string_literal = lexeme(string('"') >> regex(r'[^"]*') << string('"'))
int_literal = lexeme(regex(r'[0-9]+').map(int))
#generate
def value():
return (yield dict_literal | string_literal | int_literal)
statement = seq(variable << lexeme(string("=")),
value << lexeme(string(";")))
dict_literal = lexeme(string("{")) >> statement.many().map(dict) << lexeme(string("}"))
file_format = statement.many()
def parse(text_input):
output = defaultdict(list)
for key, val in file_format.parse(text_input):
output[key].append(val)
return dict(output)
Output for your example:
{'X': [{'a': 'someText',
'b': 0,
'c': 1,
'd': {'t': 'someText3'},
'f': 'someText2'},
{'a': 'someText4',
'b': 20,
'c': 40,
'd': {'t': 'someText5'},
'f': 'someText6'}]}
The parsing is done by file_format.parse, the parse function I've added then combines that basic parse into a dictionary with multiple entries for each top level variable, and returns that value. It doesn't print it exactly as per your example because that probably isn't what you need if you want to use the values from Python.
You might want to adjust this according to your needs. Also, you may need to adjust all of the sub-parsers according to your actual rules (e.g. can variable names contain numbers? Are there escapes for string literals?).
You can do this without having an IQ of 170, by using pyparsing. Mind you, I've found that it takes some time to learn it.
I have defined the grammar of your input in seven lines. result is used to house the labelled pieces that pyparsing finds. Then the final lines of the code contructs what you want from the parsed items. The bits of code that include previous constitute a hideous kluge that I needed because my grammar finds the var elements twice. Perhaps you can find the flaw?
input = '''\
X ={
a= "someText";
b = 0;
c = 1;
d ={
t = "someText3";
};
f ="someText2";
};
X ={
a= "someText4";
b = 20;
c = 40;
f ="someText6";
d ={
t = "someText5";
};
};'''
import pyparsing as pp
result = []
var = pp.Word(pp.alphas).setParseAction(lambda s: result.append(('var', s[0])))
equals = pp.Literal('=')
semicolon = pp.Literal(';')
a_string = pp.QuotedString('"').setParseAction(lambda s: result.append(('string', s[0])))
number = pp.Word(pp.nums).setParseAction(lambda s: result.append(('number', s[0])))
open_curly = pp.Literal('{').setParseAction(lambda s: result.append(('dict_open', None)))
close_curly = pp.Literal('}').setParseAction(lambda s: result.append(('dict_close', None)))
one_dict = pp.Forward()
simple = var + equals + pp.Or([a_string, number]) + semicolon
declaration = one_dict | simple
one_dict << var + equals + open_curly + pp.OneOrMore(declaration) + close_curly + semicolon
dict_list = pp.OneOrMore(one_dict)
dict_list.parseString(input)
count = 0
previous = None
for item in result:
if item[0] == 'var':
if item[1] == 'X':
print ('\nX[{:d}] = '.format(count), end='')
count += 1
else:
if item == previous:
continue
print ('{}: '.format(item[1]), end='')
previous = item
elif item[0] == 'dict_open':
print ('{ ', end='')
elif item[0] == 'dict_close':
print ('}', end='')
elif item[0] == 'number':
print ('{}, '.format(item[1]), end='')
elif item[0] == 'string':
print ('"{}", '.format(item[1]), end='')
else:
pass
print ()
Result:
X[0] = { a: "someText", b: 0, c: 1, d: { t: "someText3", }f: "someText2", }
X[1] = { a: "someText4", b: 20, c: 40, f: "someText6", d: { t: "someText5", }}
Edit: If it's possible for dictionaries to be empty then substitute the following line in the code above.
one_dict << var + equals + open_curly + pp.ZeroOrMore(declaration) + close_curly + semicolon
I find plex even easier to apply here. Just eight expressions to scan for.
from io import StringIO
input = StringIO(
'''X ={
a= "someText";
b = 0;
c = 1;
d ={
t = "someText3";
};
f ="someText2";
};
X ={
a= "someText4";
b = 20;
c = 40;
f ="someText6";
d ={
t = "someText5";
};
};''')
from plex import *
from io import StringIO
space = Any(' \t\n')
lexicon = Lexicon([
(Rep1(Range('AZaz')), 'var'),
(Str('"') + Rep(AnyBut('"')) + Str('"'), 'quoted'),
(Rep1(Range('09')), 'number'),
(space, IGNORE),
(Str('='), IGNORE),
(Str(';'), IGNORE),
(Str('{'), 'open_curly'),
(Str('}'), 'close_curly'),
])
scanner = Scanner(lexicon, input)
count = 0
while True:
token = scanner.read()
if token[0] is None:
break
elif token[0] in ['var', 'number']:
if token[1]=='X':
print ('\nX[{:d}] = '.format(count),end='')
count += 1
else:
print ('{}: '.format(token[1]),end='')
elif token[0]=='quoted':
print('{}, '.format(token[1]), end='')
elif token[0] == 'open_curly':
print ('{} '.format(token[1]), end='')
elif token[0] == 'close_curly':
print ('{}, '.format(token[1]), end='')
else:
pass
print ()
Result:
X[0] = { a: "someText", b: 0: c: 1: d: { t: "someText3", }, f: "someText2", },
X[1] = { a: "someText4", b: 20: c: 40: f: "someText6", d: { t: "someText5", }, },
The heavy downside is the it's distributed for Py2 only AFAIK. However, was able to make it work for Py3 in about two hours.

Reverse string translation with dictionary

I am trying to reverse the use of the translate function. I pass a dictionary into str.maketrans, which translates the original string correctly, as per the dictionary.
cipher_dictionary = {'a': 'h5c', 'b': 'km3', 'c': '5fv'}
def cipher(text):
trans = str.maketrans(cipher_dictionary)
return text.translate(trans)
Above is the sample dictionary, together with the function that I use to translate strings. Translating abc gives me h5ckm35fv, which is desired.
Now, to reverse it, I am trying to use the following function.
def decipher(text):
reverse = {value: key for key, value in cipher_dictionary.items()}
trans = str.maketrans(reverse)
return text.translate(trans)
Using it raises an error.
Traceback (most recent call last):
File "C:\Users\lukas\Desktop\cipher.py", line 21, in <module>
deciphered = decipher(ciphered)
File "C:\Users\lukas\Desktop\cipher.py", line 13, in decipher
trans = str.maketrans(reverse)
ValueError: string keys in translate table must be of length 1
I am aware that this is because the values in cipher_dictionary aren't equal length to a, b and c. How can I go about rewriting the decipher function, to make h5ckm35fv translate back into abc?
cipher_dictionary = {'a': 'h5c', 'b': 'km3', 'c': '5fv'}
def cipher(text):
trans = str.maketrans(cipher_dictionary)
return text.translate(trans)
def decipher(text):
reverse = {value: key for key, value in cipher_dictionary.items()}
trans = str.maketrans(reverse)
return text.translate(trans)
if __name__ == '__main__':
text_to_cipher = 'abc'
ciphered = cipher(text_to_cipher)
print(ciphered)
deciphered = decipher(ciphered)
print(deciphered)
Running any of the functions provided in answers works perfectly, except for when there is white space in the input.
Text to cipher: some white space
Ciphered text: px3h54oa4b83 ky6u1v0t6yq3b83 px3sy9h5c5fvb83
Traceback (most recent call last):
File "C:\Users\Lukasz\Desktop\Python\Cipher\cip.py", line 45, in <module>
deciphered = decipher(ciphered)
File "C:\Users\Lukasz\Desktop\Python\Cipher\cip.py", line 36, in decipher
decoded_text = ''.join(reverse[text[i:i+3]] for i in range(0, len(text), 3))
File "C:\Users\Lukasz\Desktop\Python\Cipher\cip.py", line 36, in <genexpr>
decoded_text = ''.join(reverse[text[i:i+3]] for i in range(0, len(text), 3))
KeyError: ' ky'
def decipher(sentence):
reverse = {value: key for key, value in cipher_dictionary.items()}
decoded_text = ' '.join(''.join(reverse[word[i:i+3]] for i in range(0, len(word), 3)) for word in sentence.split(' '))
return decoded_text
Assuming that every letter is being encoded into a set of 3 letters.
Assuming that the values in the dictionary for a prefix free code, then you can keep trying prefixes of the unprocessed ciphertext until you find a match in the reverse dictionary:
def decipher(text, d):
r = {v: k for k,v in d.items()} # Reversed dictionary
plaintext = ''
index = 0
length = 1
while index + length <= len(text):
try:
plaintext += r[text[index:index+length]]
index = index + length
length = 1
except:
length += 1
return plaintext
If the values of the dictionary do not form a prefix free code, then the algorithm involves backtracking, and will return one possible plaintext if the cipher is non bijective:
def decipher2(text, d):
r = {v: k for k,v in d.items()} # Reversed dictionary
length = 1
while length <= len(text):
try:
val = r[text[:length]]
if length == len(text):
return val
else:
return val + decipher2(text[length:], d)
except:
length += 1
raise ValueError('Malformed input.')
If you know that all cipher values are of length 3 (i.e. that all values in cipher_dictionary are three characters long), then:
def decrypt(ciphertext, cipher_dict):
decipher_dict = {v:k for k,v in cipher_dict.items()}
answer = []
for cipher in (ciphertext[i:i+3] for i in range(0,len(ciphertext), 3)):
answer.append(decipher_dict[cipher])
return ''.join(answer)
On the other hand, if you don't know that all values are of length 3 (or if they are not of constant size), then try this:
def decrypt(ciphertext, cipher_dict):
decipher_dict = {v:k for k,v in cipher_dict.items()}
answer = []
start = 0
for end in range(len(ciphertext)):
if ciphertext[start:end] not in decipher_dict: continue
answer.append(decipher_dict[ciphertext[start:end]])
start = end
return ''.join(answer)
The problem with this is that it is a greedy algorithm and incurs all the shortcomings of its naïvité
UPDATE:
If you want to do this with sentences (words separated by whitespace):
encryptedSentence = '...'
answer = []
for word in sentence.split():
answer.append(decrypt(word, cipher_dict))
return ' '.join(answer)

best way to parse a line in python to a dictionary

I have a file with lines like
account = "TEST1" Qty=100 price = 20.11 subject="some value" values="3=this, 4=that"
There is no special delimiter and each key has a value that is surrounded by double quotes if its a string but not if it is a number. There is no key without a value though there may exist blank strings which are represented as "" and there is no escape character for a quote as it is not needed
I want to know what is a good way to parse this kind of line with python and store the values as key-value pairs in a dictionary
We're going to need a regex for this.
import re, decimal
r= re.compile('([^ =]+) *= *("[^"]*"|[^ ]*)')
d= {}
for k, v in r.findall(line):
if v[:1]=='"':
d[k]= v[1:-1]
else:
d[k]= decimal.Decimal(v)
>>> d
{'account': 'TEST1', 'subject': 'some value', 'values': '3=this, 4=that', 'price': Decimal('20.11'), 'Qty': Decimal('100.0')}
You can use float instead of decimal if you prefer, but it's probably a bad idea if money is involved.
Maybe a bit simpler to follow is the pyparsing rendition:
from pyparsing import *
# define basic elements - use re's for numerics, faster than easier than
# composing from pyparsing objects
integer = Regex(r'[+-]?\d+')
real = Regex(r'[+-]?\d+\.\d*')
ident = Word(alphanums)
value = real | integer | quotedString.setParseAction(removeQuotes)
# define a key-value pair, and a configline as one or more of these
# wrap configline in a Dict so that results are accessible by given keys
kvpair = Group(ident + Suppress('=') + value)
configline = Dict(OneOrMore(kvpair))
src = 'account = "TEST1" Qty=100 price = 20.11 subject="some value" ' \
'values="3=this, 4=that"'
configitems = configline.parseString(src)
Now you can access your pieces using the returned configitems ParseResults object:
>>> print configitems.asList()
[['account', 'TEST1'], ['Qty', '100'], ['price', '20.11'],
['subject', 'some value'], ['values', '3=this, 4=that']]
>>> print configitems.asDict()
{'account': 'TEST1', 'Qty': '100', 'values': '3=this, 4=that',
'price': '20.11', 'subject': 'some value'}
>>> print configitems.dump()
[['account', 'TEST1'], ['Qty', '100'], ['price', '20.11'],
['subject', 'some value'], ['values', '3=this, 4=that']]
- Qty: 100
- account: TEST1
- price: 20.11
- subject: some value
- values: 3=this, 4=that
>>> print configitems.keys()
['account', 'subject', 'values', 'price', 'Qty']
>>> print configitems.subject
some value
A recursive variation of bobince's parses values with embedded equals as dictionaries:
>>> import re
>>> import pprint
>>>
>>> def parse_line(line):
... d = {}
... a = re.compile(r'\s*(\w+)\s*=\s*("[^"]*"|[^ ,]*),?')
... float_re = re.compile(r'^\d.+$')
... int_re = re.compile(r'^\d+$')
... for k,v in a.findall(line):
... if int_re.match(k):
... k = int(k)
... if v[-1] == '"':
... v = v[1:-1]
... if '=' in v:
... d[k] = parse_line(v)
... elif int_re.match(v):
... d[k] = int(v)
... elif float_re.match(v):
... d[k] = float(v)
... else:
... d[k] = v
... return d
...
>>> line = 'account = "TEST1" Qty=100 price = 20.11 subject="some value" values=
"3=this, 4=that"'
>>> pprint.pprint(parse_line(line))
{'Qty': 100,
'account': 'TEST1',
'price': 20.109999999999999,
'subject': 'some value',
'values': {3: 'this', 4: 'that'}}
If you don't want to use a regex, another option is just to read the string a character at a time:
string = 'account = "TEST1" Qty=100 price = 20.11 subject="some value" values="3=this, 4=that"'
inside_quotes = False
key = None
value = ""
dict = {}
for c in string:
if c == '"':
inside_quotes = not inside_quotes
elif c == '=' and not inside_quotes:
key = value
value = ''
elif c == ' ':
if inside_quotes:
value += ' ';
elif key and value:
dict[key] = value
key = None
value = ''
else:
value += c
dict[key] = value
print dict

Categories

Resources