I have a list of dictionaries, something like this
input = [
{"0_question": "how are you"},
{"0_answer": "good"},
{"4_question": "what time is it"},
{"4_answer": "morning"},
]
and I want this:
expected_result = {
0: {"how are you": "good"},
4: {"what time is it" : "morning"},
}
this is what I tried:
x = {}
l= []
for i, d in enumerate(a):
for k, v in d.items():
l.append(v)
x[l[i]] = l[i+1]
Split the original dict key into numeric and type part:
a = [
{
"0_question": "how are you"
}, {
"0_answer": "good"
}, {
"4_question": "what time is it"
}, {
"4_answer": "morning"
}
]
dic = {}
for item in a:
value = list(item.values())[0]
num, itemType = list(item.keys())[0].split('_')
num = int(num)
if itemType == 'question':
dic[num] = {value: ''}
else:
key = list(dic[num].keys())[0]
dic[num][key] = value
print(dic)
Out:
{0: {'how are you': 'good'}, 4: {'what time is it': 'morning'}}
You can use a nested loop and the modulo operator (%). Use regex to find the question/answer number:
import re
l = [
{"0_question": "how are you"},
{"0_answer": "good"},
{"4_question": "what time is it"},
{"4_answer": "morning"},
{"157_question": "which fruit do you want"},
{"157_answer": "apple"}
]
new_d = {}
for i, d in enumerate(l):
for k, v in d.items():
result = re.search("\d+", k).group(0)
num = int(result)
if ((i + 1) % 2) == 0:
for new_k in new_d[num].keys():
new_d[num][new_k] = v
else:
new_d[num] = {v: ""}
print(new_d)
Output
{0: {'how are you': 'good'}, 4: {'what time is it': 'morning'}, 157: {'which fruit do you want': 'apple'}}
Use This Code
a = [{"0_question": "how are you"},
{"0_answer": "good"},
{"4_question": "what time is it"},
{"4_answer": "morning"}]
dic = {}
for i in range(len(a) - 1):
if i % 2 == 0:
dic[list(a[i].keys())[0]] = {list(a[i].values())[0]: list(a[i + 1].values())[0]}
print(dic)
This has fewer lines and hopefully increases readability. The downside is it uses more resources by restructuring the initial data.
sample_data = [{"0_question" : "how are you"},
{"0_answer": "good"},
{"4_question" : "what time is it"},
{"4_answer": "morning"}]
restructured_data = {k: v for data in sample_data for k, v in data.items()}
nums = list(set([int(k.split("_")[0]) for k, v in restructured_data.items()]))
new_dict = {num: {restructured_data[f'{num}_question']: restructured_data[f'{num}_answer']} for num in nums}
print(new_dict)
output:
{0: {'how are you': 'good'}, 4: {'what time is it': 'morning'}}
data = [{"0_question" : "how are you"},
{"0_answer": "good"},
{"4_question" : "what time is it"},
{"4_answer": "morning"}]
# unpack dict values into any iterable data structure
# list in my case
unpacked_data = list(map(lambda dict_: list(dict_.items()), data))
# dict_items is not subscriptable, so unpack it again
# also exctracting number from key.
processed_data = list(
map(
lambda values_tuple: (values_tuple[0][0][0], values_tuple[0][1]),
unpacked_data
)
)
result_dict = {}
# according to input data we assuame that there is a pairs of (question: answer).
# using zip and slicing for pair iteration
for question, answer in zip(processed_data[0::2], processed_data[1::2]):
result_dict[question[0]] = {question[1] : answer[1]}
print(result_dict)
UPD: this may be not the most optimal solution, but i hope that is pretty clear
for understanding
Check Below Code:
{ int(list(i1)[0][0]) : {list(i1.values())[0] : list(i2.values())[0] } for i1, i2 in zip(input[::2], input[1::2])}
Output:
Related
testlist = ["13A", "13B", "13C", "23D", "5D", "9B", "9C", "9D"]
What I want the list to be:
["13A-C", "23D", "5D", "9B-D"]
Bonus points if you can sort it (5,9,13,23).
For those interested, this is my current WIP script:
testlist = ["13A", "13B", "13C", "23D", "5D", "9B", "9C", "9D"]
newlist = []
lenlist = len(testlist)
for i in range(lenlist):
#check values of first
indexnum = testlist[i][:-1]
indexchar = testlist[i][-1]
if i == 0:
newlist.append(testlist[i])
if indexnum == (testlist[i-1][:-1]):
newlistvalue = (indexnum + (testlist[i-1][-1]) + "-" + (testlist[i][-1]))
if ((indexchar == "B") and ((testlist[i-1][-1]) == "A")) or ((indexchar == "D") and ((testlist[i-1][-1]) == "C")):
newlist.append(newlistvalue)
lastval = newlist[len(newlist)-1][-1]
lastval2 = newlist[(len(newlist)-2)]
#print(lastval2)
if (indexchar == "C") and (lastval == "B"):
newlistvalue = lastval2[:-1] + indexchar
#print(newlistvalue)
newlist.pop()
newlist.pop()
#print(newlistvalue)
newlist.append(newlistvalue)
else:
newlist.append(testlist[i])
print (newlist)
#print (newlistvalue)
First you'd need to create a dict of the numbers and letters, I assume there will only be one letter in each string. Then you need to sort it and format it. You can use the following:
pairs = defaultdict(list)
for s in testlist:
pairs[s[:-1]].append(s[-1])
result = [f'{k}{"-".join(dict.fromkeys([v[0], v[-1]]))}'
for k, v in sorted(pairs.items(), key=lambda x: int(x[0]))]
['5D', '9B-D', '13A-C', '23D']
On the assumption that each string in the list ends with exactly one letter, you could do this:
import re
testlist = ["13A", "13C", "13B", "23D", "5D", "9B", "9C", "9D"]
def seq(lst):
return lst[0] if len(lst) == 1 else f'{lst[0]}-{lst[-1]}'
def key(e):
return int(re.search('\d+', e)[0])
d = {}
for e in testlist:
d.setdefault(int(e[:-1]), []).append(e[-1])
print(sorted([f'{k}{seq(sorted(v))}' for k, v in d.items()], key=key))
Output:
['5D', '9B-D', '13A-B', '23D']
Note:
Subtle change to OP's data to show that this code can handle out-of-sequence values
I have a requirement where I have a keys in string format combined by dot(.) and the value associated with that string of key and I want to create a dictionary.
key1 = "A.B.C.D"
text_to_be_inserted1_for_key1 = "Test1"
key2 = "A.B.C.E"
text_to_be_inserted_for_key2 = "Test2"
Expected result
dict = {
"A": {
"B" : {
"C" : {
"D" : text_to_be_inserted1_for_key1,
"E" : text_to_be_inserted_for_key2
}
}
}
}
from collections import defaultdict
def deep_dict():
return defaultdict(deep_dict)
result = deep_dict()
def deep_insert(key, value):
d = result
keys = key.split(".")
for subkey in keys[:-1]:
d = d[subkey]
d[keys[-1]] = value
deep_insert("A.B.C.D", "Test1")
deep_insert("A.B.C.E", "Test2")
import json
print(json.dumps(result, indent=4))
You may
for each letter except the last one, create a mapping with the key and a dict
for the last letter create the mapping with the value
def insert(keys, values):
res = {}
for k, v in zip(keys, values):
res_tmp = res
levels = k.split(".")
for level in levels[:-1]:
res_tmp = res_tmp.setdefault(level, {})
res_tmp[levels[-1]] = v
return res
Use
key1 = "A.B.C.D"
value_key1 = "Test1"
key2 = "A.B.C.E"
value_key2 = "Test2"
result = insert([key1, key2], [value_key1, value_key2])
print(result) # {'A': {'B': {'C': {'D': 'Test1', 'E': 'Test2'}}}}
You can solve for each case and then merge
from copy import deepcopy
def dict_of_dicts_merge(x, y):
z = {}
overlapping_keys = x.keys() & y.keys()
for key in overlapping_keys:
z[key] = dict_of_dicts_merge(x[key], y[key])
for key in x.keys() - overlapping_keys:
z[key] = deepcopy(x[key])
for key in y.keys() - overlapping_keys:
z[key] = deepcopy(y[key])
return z
key1 = "A.B.C.D"
text_to_be_inserted_for_key1 = "Test1"
key2 = "A.B.C.E"
text_to_be_inserted_for_key2 = "Test2"
dict1 = {}
newdict = {}
olddict = {}
keys_for_1 = key1.split(".")
keys_for_1.reverse()
olddict[keys_for_1[0]] = text_to_be_inserted_for_key1
for i in range (1,len(keys_for_1)):
newdict = {}
newdict[keys_for_1[i]] = olddict
olddict = newdict
save1 = newdict
newdict = {}
olddict = {}
keys_for_2 = key2.split(".")
keys_for_2.reverse()
olddict[keys_for_2[0]] = text_to_be_inserted_for_key2
for i in range (1,len(keys_for_2)):
newdict = {}
newdict[keys_for_2[i]] = olddict
olddict = newdict
save2 = newdict
dict1 = dict_of_dicts_merge(save1,save2)
print (dict1)
I have a list of words as below:
Data = ['pre_bbc', 'pre_nbc', 'pre_fox', 'bread_post', 'pre_news', 'lucky_post',
'banana_post', 'mike', 'john', 'edward_lear', 'winelistpdf', 'cookbookspdf']
Assuming I have no idea of what the prefix or suffix is beforehand, and '_' is not always the case to split suffix/prefix, is there a way using Python to catagorize this list into groups? Let's say the result I want is as below:
List0 = ['pre_bbc', 'pre_nbc', 'pre_fox', 'pre_news']
List1 = ['bread_post', 'lucky_post', 'banana_post']
List2 = ['winelistpdf', 'cookbookspdf']
Orphan_list =['mike', 'john', 'edward_lear']
There could be some tricky cases in which a word contains both suffix and prefix, like 'pre_voa_post', I think this can be put into both lists. Also, let's assume all the elements are unique in this list.
Thanks!
This was a pretty challenging one! There are a few conditions to consider here if this needs to be fairly universal.
Minimum length for an affix
Delimiters that denote affixes
Multiple affixes
import json
def get_affix_groups(words, min=3, delimiter="_"):
"""Get groups from a word list that have matching affixes."""
groups = {}
for word in words:
for item in [w for w in words if w != word]:
for n in range(len(word) - min):
try:
prefix, *_, suffix = word.split(delimiter)
except ValueError:
prefix = word[:n + min]
suffix = word[-(n + min):]
if item.startswith(prefix):
prefix_group = groups.setdefault(prefix, {word})
groups[prefix].add(item)
if item.endswith(suffix):
suffix_group = groups.setdefault(suffix, {word})
groups[suffix].add(item)
all_words = [i for w in groups.values() for i in w]
groups["orphans"] = {word for word in words if word not in all_words}
return groups
data = [
"pre_bbc",
"pre_nbc",
"pre_fox",
"bread_post",
"pre_news",
"lucky_post",
"banana_post",
"mike",
"john",
"edward_lear",
"winelistpdf",
"cookbookspdf",
"pre_voa_post"
]
# Print the resulting dict in a human-readable format
print(json.dumps(get_affix_groups(data), default=list, indent=2))
Output
{
"pre": [
"pre_fox",
"pre_voa_post",
"pre_bbc",
"pre_news",
"pre_nbc"
],
"post": [
"lucky_post",
"pre_voa_post",
"bread_post",
"banana_post"
],
"pdf": [
"cookbookspdf",
"winelistpdf"
],
"orphans": [
"john",
"edward_lear",
"mike"
]
}
If you really need these to be variables, you can use exec(), but it's considered bad practice.
for affix, group in get_affix_groups(data).items():
exec(f"{affix} = {group}")
Tested with :
Data = ['pre_voa_post', 'argument', 'thermodynamic', 'winelistpdf',
'pre_bbc', 'anteroom', 'pre_nbc', 'thermostat', 'pre_fox',
'antedate', 'blabla', 'enchantment', 'pre_news', 'lucky_post',
'banana_post', 'mike', 'john', 'thermometer', 'toto', 'antenatal' ]
Function
def test(Data):
suffixes = Data.copy()
prefixes = Data.copy()
my_suffixes = {}
my_prefixes = {}
Orphan_list = []
Orphan_s = []
Orphan_p = []
while len(prefixes) > 1:
first_p = prefixes.pop(0)
prefix = ''
for elt_pref in prefixes:
i = min(len(first_p), len(elt_pref))
while i > 1:
if first_p[0:i] == elt_pref[0:i]:
prefix = first_p[0:i]
my_prefixes[prefix] = [first_p, elt_pref, ]
prefixes.remove(elt_pref)
var = 0
while var < len(prefixes):
sec_elt = prefixes[var]
if sec_elt.startswith(prefix):
my_prefixes[prefix].append(sec_elt)
prefixes.remove(sec_elt)
else:
var += 1
break
else:
i -= 1
if prefix == '':
Orphan_p.append(first_p)
if prefixes:
Orphan_p.append(prefixes[0])
while len(suffixes) > 1:
first_s = suffixes.pop(0)
suffix = ''
for elt_suf in suffixes:
j = min(len(first_s), len(elt_suf))
while j > 2:
if first_s[-j:] == elt_suf[-j:]:
suffix = first_s[-j:]
my_suffixes[suffix] = [first_s, elt_suf, ]
suffixes.remove(elt_suf)
var = 0
while var < len(suffixes):
elt_suf3 = suffixes[var]
if elt_suf3.endswith(suffix):
my_suffixes[suffix].append(elt_suf3)
suffixes.remove(elt_suf3)
else:
var += 1
break
else:
j -= 1
if suffix == '':
Orphan_s.append(first_s)
if suffixes:
Orphan_s.append(suffixes[0])
Orphan_list = list(set(Orphan_p) & set(Orphan_s))
print("my_suffixes", my_suffixes)
print("my_prefixes", my_prefixes)
print("Orphan_list", Orphan_list)
Result:
my_suffixes {'_post': ['pre_voa_post', 'bread_post', 'lucky_post', 'banana_post'],
'ment': ['argument', 'enchantment'],
'pdf': ['winelistpdf', 'cookbookspdf']}
my_prefixes {'pre_': ['pre_voa_post', 'pre_bbc', 'pre_nbc', 'pre_fox', 'pre_news'],
'thermo': ['thermodynamic', 'thermostat', 'thermometer'],
'ante': ['anteroom', 'antedate', 'antenatal']}
Orphan_list ['toto', 'mike', 'john', 'blabla', 'edward_lear']
this should not be a valid question, but:
def partition(list_of_pref, list_of_words):
ret_list = []
for l in list_of_pref:
this_list = []
ret_list.append(this_list)
for word in list_of_words:
if word.startswith(l):
this_list.append(word)
partition(['pre', 'banana'],['pre_bbc', 'pre_nbc', 'pre_fox', 'bread_post', 'pre_news', 'lucky_post', 'banana_post', 'mike', 'john', 'edward_lear', 'winelistpdf', 'cookbookspdf'])
Out[4]: [['pre_bbc', 'pre_nbc', 'pre_fox', 'pre_news'], ['banana_post']]
return ret_list
do the same with list of pref, or generate them by iterating on split('_') inside your function and you are done
I want to do something with column data which is a list. like:
inputs:
col-A
[{'name':'1','age':'12'}, {'name':'2','age':'12'}]
[{'name':'3','age':'18'}, {'name':'7','age':'15'}]
....
outputs:
col-A
[{'1-age':'12'}, {'2-age':'12'}]
[{'3-age':'18'}, {'7-age':'15'}]
....
My code is:
def deal(dict_col, prefix_key):
key_value = dict_col[prefix_key]+'-'
dict_col.pop(prefix_key, None)
items = copy.deepcopy(dict_col)
for key, value in items.items():
dict_col[key_value+key] = dict_col.pop(key)
return dict_col
prefix = "name"
[[deal(sub_item, prefix) for sub_item in item] for item in df[col-A]]
Some items will be processed multiple times.
Because the return value of deal method will be swapped to item in real time?
For example:
For deal method we
input:
{'name':'1','age':'12'}
output:
{'1-age':'12'}
Then the next input may be {'1-age':'12'} , and now we have no name or age to deal with.
How to solve this problem?
You can use the pandas apply method for it here some code:
import pandas as pd
d = {'col-A' : [[{'name' : '1', 'age': '12'}, {'name' : '2', 'age': '12'}],[{'name' : '3', 'age': '18'},{'name' : '7', 'age': '15'}]]}
df = pd.DataFrame(d)
def deal(row, prefix):
out_list = []
for sub_dict in row:
out_dict = {}
out_str = sub_dict.get(prefix) + '-'
for k,v in sub_dict.items():
out_dict[out_str + k] = v
out_list.append(out_dict)
return out_list
prefix = 'name'
df['col-A'] = df['col-A'].apply(lambda x : deal(x, prefix))
print(df)
You could push some of the code in a one-liner if you like that more:
def deal(row, prefix):
out_list = []
for sub_dict in row:
out_dict = dict((sub_dict[prefix] + '-' + k , sub_dict[k]) for k in sub_dict.keys() if k != prefix)
out_list.append(out_dict)
return out_list
prefix = 'name'
df['col-A'] = df['col-A'].apply(lambda x : deal(x, prefix)
Just for the fun of it you could even bring it down to one single line (not recommended due to poor readability:
prefix = "name"
df['col-A'] = df['col-A'].apply(lambda row : [dict((sub_dict[prefix] + '-' + k , sub_dict[k]) for k in sub_dict.keys() if k != prefix) for sub_dict in row])
I believe you need .get function for select with default value if not exist key in dict:
def deal(dict_col, prefix_key):
key_value = dict_col.get(prefix_key, 'not_exist')+'-'
dict_col.pop(prefix_key, None)
items = copy.deepcopy(dict_col)
for key, value in items.items():
dict_col[key_value+key] = dict_col.pop(key)
return dict_col
I have a file with the following format:
X ={
a= "someText";
b = 0;
c = 1;
d ={
t = "someText3";
};
f ="someText2";
};
X ={
a= "someText4";
b = 20;
c = 40;
f ="someText6";
d ={
t = "someText5";
};
};
I am looking for a smart and robust way to parse it to a list of dict like the following:
X[0] = {'a':"someText",'b':0, 'c':0, 'd':{ 't':'SomeText3' }, 'f':"someText2"}
X[1] = {'a':"someText4",'b':20, 'c':40, 'd':{ 't':'SomeText5' }, 'f':"someText6"}
Note that there might be nested dictionaries and the variables can have different order of occurrence.
My method is to keep track of the level by searching '={' and '};' and construct the list. I wonder if there is an elegant method to parse it.
The simple parser below implements a recursive descent algorithm on simple dictionionary schemes:
import re
from collections import namedtuple
s = """
X ={
a= "someText";
b = 0;
c = 1;
d ={
t = "someText3";
};
f ="someText2";
};
"""
s1 = """
X ={
a= "someText4";
b = 20;
c = 40;
f ="someText6";
d ={
t = "someText5";
};
};
"""
token = namedtuple('token', ['type', 'value'])
class Parser:
lang = r'"[a-zA-Z0-9]+"|[a-zA-Z]+|\d+|\{|\};'
token_types = {'int':'\d+', 'key':'[a-zA-Z]+', 'start':'{', 'end':'};'}
def __init__(self, s):
self.starting_with = Parser.tokenize(s)[1:-1]
self.tokens = iter(Parser.tokenize(s)[1:-1])
self.starts = []
self.ends = []
self.k_list = []
self.k = None
self.d = {}
self.current_d = {}
def parse(self):
current = next(self.tokens, None)
if current:
if current.type == 'start':
self.starts.append(current.value)
self.parse()
if current.type == 'key':
self.k = current.value
self.k_list.append(self.k)
self.parse()
if current.type not in ['start', 'end', 'key']:
if len(self.starts) == 1:
self.d[self.k] = current.value[1:-1] if current.value.startswith('"') and current.value.endswith('"') else current.value
self.parse()
else:
self.current_d[self.k_list[-1]] = current.value[1:-1] if current.value.startswith('"') and current.value.endswith('"') else current.value
self.parse()
if current.type == 'end':
end = self.starts.pop()
self.d[self.k_list[-len(self.starts)-1]] = self.current_d
self.current_d = {}
self.parse()
#classmethod
def tokenize(cls, s):
return [token('string' if i.startswith('"') and i.endswith('"') else [a for a, b in cls.token_types.items() if re.findall(b, i)][0], i) for i in re.findall(cls.lang, s)]
dictionaries = [s, s1]
X = []
for d in dictionaries:
p = Parser(d)
p.parse()
X.append(p.d)
print(X[0])
print(X[1])
Output:
{'a': 'someText', 'c': '1', 'b': '0', 'd': {'t': 'someText3'}, 'f': 'someText2'}
{'a': 'someText4', 'c': '40', 'b': '20', 'd': {'t': 'someText5'}, 'f': 'someText6'}
Here is an implementation using parsy (which works similarly to pyparsing but is more modern and has much nicer documentation, and generally results in much neater code, but does require Python 3.3 or greater):
from collections import defaultdict
from parsy import generate, regex, seq, string, whitespace
lexeme = lambda parser: whitespace.optional() >> parser << whitespace.optional()
variable = lexeme(regex(r"[A-Za-z]+"))
string_literal = lexeme(string('"') >> regex(r'[^"]*') << string('"'))
int_literal = lexeme(regex(r'[0-9]+').map(int))
#generate
def value():
return (yield dict_literal | string_literal | int_literal)
statement = seq(variable << lexeme(string("=")),
value << lexeme(string(";")))
dict_literal = lexeme(string("{")) >> statement.many().map(dict) << lexeme(string("}"))
file_format = statement.many()
def parse(text_input):
output = defaultdict(list)
for key, val in file_format.parse(text_input):
output[key].append(val)
return dict(output)
Output for your example:
{'X': [{'a': 'someText',
'b': 0,
'c': 1,
'd': {'t': 'someText3'},
'f': 'someText2'},
{'a': 'someText4',
'b': 20,
'c': 40,
'd': {'t': 'someText5'},
'f': 'someText6'}]}
The parsing is done by file_format.parse, the parse function I've added then combines that basic parse into a dictionary with multiple entries for each top level variable, and returns that value. It doesn't print it exactly as per your example because that probably isn't what you need if you want to use the values from Python.
You might want to adjust this according to your needs. Also, you may need to adjust all of the sub-parsers according to your actual rules (e.g. can variable names contain numbers? Are there escapes for string literals?).
You can do this without having an IQ of 170, by using pyparsing. Mind you, I've found that it takes some time to learn it.
I have defined the grammar of your input in seven lines. result is used to house the labelled pieces that pyparsing finds. Then the final lines of the code contructs what you want from the parsed items. The bits of code that include previous constitute a hideous kluge that I needed because my grammar finds the var elements twice. Perhaps you can find the flaw?
input = '''\
X ={
a= "someText";
b = 0;
c = 1;
d ={
t = "someText3";
};
f ="someText2";
};
X ={
a= "someText4";
b = 20;
c = 40;
f ="someText6";
d ={
t = "someText5";
};
};'''
import pyparsing as pp
result = []
var = pp.Word(pp.alphas).setParseAction(lambda s: result.append(('var', s[0])))
equals = pp.Literal('=')
semicolon = pp.Literal(';')
a_string = pp.QuotedString('"').setParseAction(lambda s: result.append(('string', s[0])))
number = pp.Word(pp.nums).setParseAction(lambda s: result.append(('number', s[0])))
open_curly = pp.Literal('{').setParseAction(lambda s: result.append(('dict_open', None)))
close_curly = pp.Literal('}').setParseAction(lambda s: result.append(('dict_close', None)))
one_dict = pp.Forward()
simple = var + equals + pp.Or([a_string, number]) + semicolon
declaration = one_dict | simple
one_dict << var + equals + open_curly + pp.OneOrMore(declaration) + close_curly + semicolon
dict_list = pp.OneOrMore(one_dict)
dict_list.parseString(input)
count = 0
previous = None
for item in result:
if item[0] == 'var':
if item[1] == 'X':
print ('\nX[{:d}] = '.format(count), end='')
count += 1
else:
if item == previous:
continue
print ('{}: '.format(item[1]), end='')
previous = item
elif item[0] == 'dict_open':
print ('{ ', end='')
elif item[0] == 'dict_close':
print ('}', end='')
elif item[0] == 'number':
print ('{}, '.format(item[1]), end='')
elif item[0] == 'string':
print ('"{}", '.format(item[1]), end='')
else:
pass
print ()
Result:
X[0] = { a: "someText", b: 0, c: 1, d: { t: "someText3", }f: "someText2", }
X[1] = { a: "someText4", b: 20, c: 40, f: "someText6", d: { t: "someText5", }}
Edit: If it's possible for dictionaries to be empty then substitute the following line in the code above.
one_dict << var + equals + open_curly + pp.ZeroOrMore(declaration) + close_curly + semicolon
I find plex even easier to apply here. Just eight expressions to scan for.
from io import StringIO
input = StringIO(
'''X ={
a= "someText";
b = 0;
c = 1;
d ={
t = "someText3";
};
f ="someText2";
};
X ={
a= "someText4";
b = 20;
c = 40;
f ="someText6";
d ={
t = "someText5";
};
};''')
from plex import *
from io import StringIO
space = Any(' \t\n')
lexicon = Lexicon([
(Rep1(Range('AZaz')), 'var'),
(Str('"') + Rep(AnyBut('"')) + Str('"'), 'quoted'),
(Rep1(Range('09')), 'number'),
(space, IGNORE),
(Str('='), IGNORE),
(Str(';'), IGNORE),
(Str('{'), 'open_curly'),
(Str('}'), 'close_curly'),
])
scanner = Scanner(lexicon, input)
count = 0
while True:
token = scanner.read()
if token[0] is None:
break
elif token[0] in ['var', 'number']:
if token[1]=='X':
print ('\nX[{:d}] = '.format(count),end='')
count += 1
else:
print ('{}: '.format(token[1]),end='')
elif token[0]=='quoted':
print('{}, '.format(token[1]), end='')
elif token[0] == 'open_curly':
print ('{} '.format(token[1]), end='')
elif token[0] == 'close_curly':
print ('{}, '.format(token[1]), end='')
else:
pass
print ()
Result:
X[0] = { a: "someText", b: 0: c: 1: d: { t: "someText3", }, f: "someText2", },
X[1] = { a: "someText4", b: 20: c: 40: f: "someText6", d: { t: "someText5", }, },
The heavy downside is the it's distributed for Py2 only AFAIK. However, was able to make it work for Py3 in about two hours.