Merging Python list of dicts grouped by key value - python

I want to merge two dicts on the id. Dict x contains many distinct ids and different row counts per each id. Dict y contains multiple key values, and always has less rows than dict x.
x = [{'costgroup': '1', 'POC1': '2', 'post': '5','id': '1'},
{'costgroup': '2', 'POC1': '1', 'post': '4','id': '1'},
{'costgroup': '3', 'POC1': '5', 'post': '2', 'id': '1'},
{'costgroup': '1', 'POC1': '2', 'post': '5','id': '2'},
{'costgroup': '2', 'POC1': '1', 'post': '4','id': '2'},
{'costgroup': '3', 'POC1': '5', 'post': '2', 'id': '2'},
{'costgroup': '3', 'POC1': '5', 'post': '2', 'id': '2'}]
y = [{'id': '1', 'laminate': 'D'},
{ 'id':'2', 'laminate': T'}]
The output that I want is the following:
z = [{'costgroup': '1', 'POC1': '2', 'post': '5','id': '1','laminate':'D'},
{'costgroup': '2', 'POC1': '1', 'post': '4','id': '1','laminate': 'D'},
{'costgroup': '3', 'POC1': '5', 'post': '2', 'id': '1','laminate': 'D'},
{'costgroup': '1', 'POC1': '2', 'post': '5','id': '2','laminate': 'T'},
{'costgroup': '2', 'POC1': '1', 'post': '4','id': '2','laminate': 'T'},
{'costgroup': '3', 'POC1': '5', 'post': '2', 'id': '2','laminate': 'T'},
{'costgroup': '3', 'POC1': '5', 'post': '2', 'id': '2','laminate': 'T'}]
This is easy to achieve using pandas
dfx = pd.DataFrame(x)
dfy = pd.DataFrame(y)
pd.merge(dfx,dfy, how ='left', left_on = 'id', right_on = 'id' )
But, I am going to apply this using an AWS Lambda function and I don't want to have the overhead of pandas and the output needs to be a dict. I tried the code below which gets me closer, but then i had to add something to find the distinct values of the ID and iterate through them.But,still don't have the output that I need.
valuelist = ['1']
def copyf(dictlist, key, valuelist):
return [d for d in dictlist if d[key] in valuelist]
y1 = copyf(y, 'id', valuelist)
x1 = copyf(x, 'id', valuelist)
y1.append(x1)
The above provides this output, which is interesting but not what I need.
[{'distance': '2', 'id': '1', 'laminate': 'D'},
[{'POC1': '2', 'costgroup': '1', 'id': '1', 'post': '5'},
{'POC1': '1', 'costgroup': '2', 'id': '1', 'post': '4'},
{'POC1': '5', 'costgroup': '3', 'id': '1', 'post': '2'}]]

def merge(d1, d2):
"""Given two dicts, merge them into a new dict as a shallow copy."""
result = d1.copy()
result.update(d2)
return result
result = [merge(d1, d2) for d1 in x for d2 in y if d1["id"] == d2["id"]]
print(result)
Gives
[{'POC1': '2', 'costgroup': '1', 'id': '1', 'laminate': 'D', 'post': '5'},
{'POC1': '1', 'costgroup': '2', 'id': '1', 'laminate': 'D', 'post': '4'},
{'POC1': '5', 'costgroup': '3', 'id': '1', 'laminate': 'D', 'post': '2'},
{'POC1': '2', 'costgroup': '1', 'id': '2', 'laminate': 'T', 'post': '5'},
{'POC1': '1', 'costgroup': '2', 'id': '2', 'laminate': 'T', 'post': '4'},
{'POC1': '5', 'costgroup': '3', 'id': '2', 'laminate': 'T', 'post': '2'},
{'POC1': '5', 'costgroup': '3', 'id': '2', 'laminate': 'T', 'post': '2'}]
Merge function from here: How to merge two Python dictionaries in a single expression?
There's a more concise syntax in Python 3.5 but you're on 2.7.

import copy
x = [{'costgroup': '1', 'POC1': '2', 'post': '5','id': '1'},
{'costgroup': '2', 'POC1': '1', 'post': '4','id': '1'},
{'costgroup': '3', 'POC1': '5', 'post': '2', 'id': '1'},
{'costgroup': '1', 'POC1': '2', 'post': '5','id': '2'},
{'costgroup': '2', 'POC1': '1', 'post': '4','id': '2'},
{'costgroup': '3', 'POC1': '5', 'post': '2', 'id': '2'},
{'costgroup': '3', 'POC1': '5', 'post': '2', 'id': '2'}]
y = [{'id': '1', 'laminate': 'D'},
{ 'id':'2', 'laminate': 'T'}
]
#create the id->laminate mapping
m = { d['id']: d['laminate'] for d in y }
#create the final output
z = []
for d in x:
#make a copy of the dictionary from x so that the input data
#is not overwritten
item = dict(d) #copy.deepcopy(d)
item.update({'laminate': m[d['id']]})
z.append(item)
print(z)
this produces
[
{'laminate': 'D', 'post': '5', 'POC1': '2', 'id': '1', 'costgroup': '1'},
{'laminate': 'D', 'post': '4', 'POC1': '1', 'id': '1', 'costgroup': '2'},
{'laminate': 'D', 'post': '2', 'POC1': '5', 'id': '1', 'costgroup': '3'},
{'laminate': 'T', 'post': '5', 'POC1': '2', 'id': '2', 'costgroup': '1'},
{'laminate': 'T', 'post': '4', 'POC1': '1', 'id': '2', 'costgroup': '2'},
{'laminate': 'T', 'post': '2', 'POC1': '5', 'id': '2', 'costgroup': '3'},
{'laminate': 'T', 'post': '2', 'POC1': '5', 'id': '2', 'costgroup': '3'}]

z = []
for dx in x:
for dy in y:
if dx['id'] == dy['id']:
z.append(dict(dx.items () + dy.items()))
print z

Related

Change one dict/list python structure to another

I have the following data
data={
None: [
{'ne': '1', 'na': '1'},
{'ne': '2', 'na': '2'},
{'ne': '3', 'na': '3'},
{'ne': '4', 'na': '4'}
],
'AO': [
{'ne': '2', 'na': '2'},
{'ne': '6', 'na': '6'}
],
'NZ': [
{'ne': '1', 'na': '1'}
]
}
and I want to have a list from it like this:
[
{'ne': '1', 'na': '1', 'country': [None, 'NZ']},
{'ne': '2', 'na': '2', 'country': [None, 'AO']},
{'ne': '3', 'na': '3', 'country': [None]},
{'ne': '4', 'na': '4', 'country': [None]},
{'ne': '6', 'na': '6', 'country': ['AO']}
]
my code is doing it fine but it's far from being "pythonic" because I'm a newbie at python:
data = {None: [{'ne': '1', 'na': '1'}, {'ne': '2', 'na': '2'}, {'ne': '3', 'na': '3'}, {'ne': '4', 'na': '4'}], 'AO': [{'ne': '2', 'na': '2'}, {'ne': '6', 'na': '6'}], 'NZ': [{'ne': '1', 'na': '1'}]}
data_list = []
for k,d in data.items():
for dd in d:
dd['country'] = k
data_list.append(dd)
help_dict = {}
for item in data_list:
help_dict[item['ne']] = False
final_list = []
for idx, val in enumerate(data_list):
if not help_dict[val['ne']]:
val['country'] = [val['country']]
for idx2, val2 in enumerate(data_list):
if idx2 != idx and val['ne'] == val2['ne']:
val['country'].append(val2['country'])
help_dict[val['ne']] = True
final_list.append(val)
print(final_list)
can someone help me with a better way to do this?
new = [x for key,value in data.items() for x in value]
# remove duplicate dictionaries
new = [dict(t) for t in {tuple(d.items()) for d in new}]
for d in new:
d['country'] = [key for key,data in data.items() if d in data]
print(new)
>>> [{'ne': '2', 'na': '2', 'country': [None, 'AO']},
{'ne': '4', 'na': '4', 'country': [None]},
{'ne': '1', 'na': '1', 'country': [None, 'NZ']},
{'ne': '6', 'na': '6', 'country': ['AO']},
{'ne': '3', 'na': '3', 'country': [None]}]
If you want to preserve the order
new = [x for n,(key,value) in enumerate(data.items()) for x in value]
seen = set()
new_l = []
for d in new:
t = tuple(d.items())
if t not in seen:
seen.add(t)
new_l.append(d)
for d in new_l:
d['country'] = [key for key,data in data.items() if d in data]
print(new_l)
>>> [{'ne': '1', 'na': '1', 'country': [None, 'NZ']},
{'ne': '2', 'na': '2', 'country': [None, 'AO']},
{'ne': '3', 'na': '3', 'country': [None]},
{'ne': '4', 'na': '4', 'country': [None]},
{'ne': '6', 'na': '6', 'country': ['AO']}]
This is a really naive approach to solve your problem, due to it requiring that the inner dictionaries are sorted in the same order for it to "match" earlier found dictionaries.
For more complex dictionaries inside of the country this might not give correct results:
data={
None: [
{'ne': '1', 'na': '1'},
{'ne': '2', 'na': '2'},
{'ne': '3', 'na': '3'},
{'ne': '4', 'na': '4'}
],
'AO': [
{'ne': '2', 'na': '2'},
{'ne': '6', 'na': '6'}
],
'NZ': [
{'ne': '1', 'na': '1'}
]
}
d = {}
for country in data:
for dictionary in data[country]:
# Create a key that is a string of the dictionary, and value is dictionary plus country
x = d.setdefault(str(dictionary), dictionary | {"country": []})
# If you're using Python < 3.9, use this instead:
# x = d.setdefault(str(dictionary), {**dictionary, "country": []})
x["country"].append(country)
# pprint only used to represent data better
import pprint
pprint.pp(list(d.values()))
Output:
[{'ne': '1', 'na': '1', 'country': [None, 'NZ']},
{'ne': '2', 'na': '2', 'country': [None, 'AO']},
{'ne': '3', 'na': '3', 'country': [None]},
{'ne': '4', 'na': '4', 'country': [None]},
{'ne': '6', 'na': '6', 'country': ['AO']}]
Firstly, I'm assuming ne and na are always the same.
An optimal intermediate data structure is a dict with ne/na as keys and country lists as values:
{'1': [None, 'NZ'],
'2': [None, 'AO'],
'3': [None],
'4': [None],
'6': ['AO']}
Once you have that goal in mind, it's super simple to do it Pythonically:
inter = {}
for k, dicts in data.items():
for d in dicts:
inter.setdefault(d['ne'], []).append(k)
dict.setdefault() is used to get the value if it exists, or if not, set it to a default, which is an empty list here. It's functionally the same as this:
ne = d['ne']
if ne not in inter:
inter[ne] = []
inter[ne].append(k)
You could also use collections.defaultdict(list) to do the same thing even more easily.
And once you have that dict, you just need to unpack it into a list of dicts:
result = [{'ne': ne, 'na': ne, 'country': c} for ne, c in inter.items()]
Which becomes:
[{'ne': '1', 'na': '1', 'country': [None, 'NZ']},
{'ne': '2', 'na': '2', 'country': [None, 'AO']},
{'ne': '3', 'na': '3', 'country': [None]},
{'ne': '4', 'na': '4', 'country': [None]},
{'ne': '6', 'na': '6', 'country': ['AO']}]

Zip method fails in creating a dictionary when a mix of foreign languages are used

I've two lists that I'm trying to combine into a dictionary via the zip method with the aim to do some text analysis.
The final output is however compromised by the time an item with a foreign language - I'd say Arabic - is found in the list.
See a compromised extract of my two lists:
['virus',
'corona',
'health',
'like',
'forever',
'must',
'10000',
'claim',
'ensured',
'have',
'wealth',
'cities',
'way',
'chickens',
'get',
'straight',
'amp',
'fighting',
'please',
'shit',
'me',
'still',
'cases',
'pandemic',
'pregnancies',
'teenage',
'wrong',
'ziko',
'lockdown',
'cancel',
'exams',
'self',
'wearing',
'because',
'harder',
'hit',
'mad',
'racis',
'going',
'labour',
'minister',
'plant',
'said',
'saplings',
'state',
'vanish',
'fight',
'coronavirus',
'curfew',
'ma',
'needs',
'quite',
'thinking',
'unnecessary',
'periodcorona',
'traced',
'real',
'behind',
'dumb',
'hahaha',
'joke',
'long',
'spent',
'twist',
'worst',
'tested',
'children',
'exposed',
'gassed',
'missing',
'pee',
'raped',
'sitting',
'staff',
'theyre',
'again',
'spread',
'baits',
'ignored',
'many',
'mocki',
'tantrums',
'threw',
'free',
'may',
'people',
'let',
'odd',
'rally',
'spike',
'suddenly',
'cancelcbseboardexams2020',
'depression',
'baker',
'bun',
'looking',
'mother',
'ntonntoni',
'oven',
'son',
'threads',
'wat',
'got',
'three',
'brother',
'give',
'patients',
'perfect',
'save',
'ummah',
'u',
'coro',
'patient',
'classes',
'month',
'postpone',
'government',
'one',
'mask',
'go',
'2020',
'95',
'atrocities',
'committed',
'fan',
'positive',
'say',
'someth',
'adversity',
'comes',
'infection',
'resilience',
'shared',
'story',
'story2',
'women',
'young',
'at',
'attempt',
'blame',
'hapless',
'humans',
'shift',
'911',
'begging',
'called',
'pants',
'pretty',
'removed',
'sure',
'system',
'goal',
'trending',
'believe',
'whatever',
'coronarvirues',
'updates',
'owns',
'236941',
'4308692',
'recovered',
'recoveries',
'today',
'catching',
'fear',
'players',
'professional',
'says',
'snooker',
'travel',
'itll',
'kill',
'remains',
'calling',
'create',
'environmen',
'find',
'report',
'start',
'try',
'ways',
'less',
'days',
'followers',
'old',
'thats',
'discussing',
'kills',
'peroxide',
'room',
'tha',
'wash',
'bec',
'dying',
'entry',
'hospitals',
'refused',
'saw',
'video',
'visited',
'sir',
'aka',
'lost',
'trust',
'unleashing',
'whole',
'world',
'immune',
'china',
'gift',
'send',
'cit',
'folks',
'mow',
'nothing',
'police',
'see',
'winning',
'yep',
'adding',
'water',
'final',
'wants',
'عاوزينبديلللخريجين',
'itself',
'addimistrstions',
'bat',
'bidden',
'breaking',
'fund',
'grant',
'institutes',
'provide',
'virology',
'attention',
'freestyle',
'know',
'pay',
'time',
'took',
'verse',
'vir',
'part',
'response',
'bit',
'control',
'disappointed',
'lose',
'praised',
'seemed',
'congress',
'produced',
'doctor',
'highest',
'humanity',
'rank',
'sunset',
'taking',
'watch',
'caring',
'comm',
'community',
'crisis',
'family',
'share',
'us',
'districts',
'entering',
'handled',
'surrounding',
'attack',
'country',
'also',
'come',
'8k',
'became',
'bullish',
'ca',
'fly',
'holding',
'north',
'poiting',
'spesk',
'tightenned',
'up',
'hospital',
'since',
'cer',
'infects',
'public',
'tell',
'vertebrate',
'eat',
'friends',
'fucked',
'ho',
'used',
'cosplay',
'cosplayer',
'costume',
'eeyore',
'home',
'tiger',
'quarantine',
'officials',
't',
'truly',
'anticorona',
'bring',
'forget',
'mall',
'morning',
'image',
'stars',
'yellow',
'app',
'contained',
'information',
'mean',
'all',
'cannot',
'justice',
'kept',
'truth',
'every',
'research',
'30',
'bio',
'lab',
'new',
'weapons',
'ans',
'argue',
'covic419',
'honest',
'oo',
'scam',
'want']
and
['41',
'27',
'12',
'11',
'11',
'11',
'10',
'10',
'10',
'10',
'10',
'6',
'6',
'6',
'5',
'5',
'5',
'5',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1']
Below a screenshot of the output produced with a dict(zip(tmp_list[0], tmp_list[1])).
The same happens if I do create the dictionary in this way.
words_dict = {}
for i, _ in enumerate(tmp_list[0]):
words_dict[tmp_list[0][i]] = int(tmp_list[1][i])
The only way to circumnavigate the problem is to encode the string in UTF-8 by doing something
dict(zip([x.encode('utf-8') for x in tmp_list[0]], tmp_list[1]))
My questions are:
Is there any other efficient way?
Why is that? At the end of the days, the value I'm passing in is a string, so why is the compiler and the dictionary flipping the key with the value?
This is just a problem of output. The Arabic text is displayed right-to-left, and that affects the entire line apparently, so the dict value is printed left of the key. The dict structure itself is just fine.

arranging dictionary elements in sorted order

In my scinario, I am having data as kind of a sorted list:
sample = ['1:4', '2:2', '3:1', '4:1', '5:1', 'B5S94Y:1', 'DB:4', 'ICEauthority:1', 'JPG:1', 'MZPZ5Y:1', 'Mdg80A:1', 'TAG:1', 'V2XO5Y:1', 'Xauthority:1', 'apmrc:1', 'asc:1', 'bak:1', 'baseA:4', 'baseB:4', 'bash:1', 'bash_history:1', 'bash_logout:1', 'bashrc:1', 'bat:1', 'bau:1', 'bdic:1', 'bin:5', 'c:282', 'cache-6:55', 'cache:103', 'cfg:3', 'conf:15', 'converted-launchers:1', 'cson:2', 'css:34', 'dat:2', 'db-journal:3', 'db-shm:1', 'db-wal:1', 'db:17', 'dbf:1', 'dbt:1', 'deb:3', 'desktop:2', 'dic:1', 'dirs:1', 'dll:42', 'dmrc:1', 'docx:9', 'exc:1', 'exe:20', 'final:1', 'fingerprint:4', 'fmt:1', 'gif:5', 'gitconfig:1', 'gitignore:6', 'gitkeep:2', 'gitmodules:1', 'gpg-agent:1', 'gpg:1', 'gradle:3', 'gvdb:1', 'gz:169', 'h:14', 'htm:1', 'html:43', 'icc:1', 'ico:1', 'ics:3', 'idx:5', 'info:1', 'ini:14', 'ino:1', 'isrunning:1', 'jar:1', 'java:7', 'journal:2', 'jpg:16', 'js-20170612122310:1', 'js-20170816210634:1', 'js:245', 'json:367', 'jsonlz4:39', 'kbx:1', 'keyring:1', 'keystore:1', 'ldb:23', 'list:2', 'little:2', 'locale:1', 'localstorage-journal:71', 'localstorage:71', 'lock:1', 'log:41', 'lst:2', 'lsup7I:1', 'lz4:1', 'm:1', 'map:2', 'md:92', 'md~:1', 'metadata-v2:6', 'metadata:6', 'mozlz4:2', 'name:1', 'nls:1', 'odb:2', 'odt:1', 'old:24', 'orig:1', 'otf:31', 'out:19', 'pack:5', 'pak:1', 'parentlock:2', 'pb:32', 'pdf:18', 'pf2:5', 'php:12', 'pl:1', 'pma:2', 'png:2348', 'pro:1', 'profile:1', 'properties:2', 'pset:42', 'pub:1', 'py:41', 'pyc:4', 'rb:1', 'rcache:2', 'rdf:2', 'reg:3', 'run:1', 'sample:72', 'sbstore:42', 'sdv:1', 'sh:27', 'so:2', 'source:1', 'spec:9', 'sqlite-journal:1', 'sqlite-shm:4', 'sqlite-wal:4', 'sqlite3-journal:1', 'sqlite3:1', 'sqlite:24', 'stamp:2', 'stderr:8', 'stdout:8', 'sth:19', 'sublime_session:1', 'sudo_as_admin_successful:1', 'svg:124', 'swo:1', 'swp:2', 'sys:1', 'tdb:3', 'thm:1', 'trashinfo:3', 'tsv:1', 'ttf:107', 'tvc:1', 'txt:63', 'update-timestamp:1', 'usage:6', 'viminfo:1', 'vxd:1', 'woff:1', 'x86_64-pc-linux-gnu:1', 'xba:1', 'xbel:1', 'xcu:1', 'xinputrc:1', 'xlb:2', 'xlc:2', 'xml:35', 'xpi:9', 'xsession-errors:1', 'yml:1', 'zcompdump:1', 'zip:5', 'zsh-template:1', 'zsh-theme:143', 'zsh-update:1', 'zsh:235', 'zsh_history:1', 'zshrc:1']
So after converting this above list to a dictionary with the following code:
dict_val = {a:b for a, b in [i.split(":") for i in sample]}
The output will be:
{'baseA': '4', 'baseB': '4', 'cache-6': '55', 'Xauthority': '1', 'gitmodules': '1', 'apmrc': '1', 'gz': '169', 'dbf': '1', 'kbx': '1', 'sqlite-shm': '4', 'dbt': '1', 'gitignore': '6', 'xml': '35', 'sbstore': '42', 'cache': '103', 'jar': '1', 'desktop': '2', 'source': '1', 'sqlite3-journal': '1', 'TAG': '1', '4': '1', 'tsv': '1', 'spec': '9', 'bin': '5', 'docx': '9', 'woff': '1', 'db': '17', 'V2XO5Y': '1', 'dat': '2', 'fingerprint': '4', 'lz4': '1', 'name': '1', 'bat': '1', 'bau': '1', 'converted-launchers': '1', 'h': '14', 'list': '2', 'lst': '2', 'gradle': '3', 'zsh-update': '1', 'mozlz4': '2', 'stderr': '8', 'sublime_session': '1', 'bak': '1', 'isrunning': '1', 'locale': '1', 'cfg': '3', 'htm': '1', 'odt': '1', 'xlb': '2', 'md~': '1', 'pma': '2', 'sqlite-journal': '1', 'odb': '2', 'dic': '1', 'tvc': '1', 'out': '19', 'ico': '1', 'icc': '1', 'dll': '42', '3': '1', 'rb': '1', 'ics': '3', 'py': '41', 'journal': '2', 'metadata': '6', 'dirs': '1', 'run': '1', 'tdb': '3', 'DB': '4', 'zshrc': '1', 'xpi': '9', 'pub': '1', 'js': '245', 'asc': '1', 'ldb': '23', 'xlc': '2', 'xbel': '1', 'properties': '2', 'bash': '1', 'sys': '1', 'c': '282', 'zip': '5', 'idx': '5', 'lsup7I': '1', 'zcompdump': '1', 'rdf': '2', 'dmrc': '1', 'Mdg80A': '1', 'pdf': '18', 'reg': '3', 'jsonlz4': '39', 'bashrc': '1', 'db-journal': '3', 'pf2': '5', 'localstorage': '71', 'old': '24', 'txt': '63', 'orig': '1', 'gvdb': '1', 'little': '2', 'pyc': '4', 'java': '7', 'log': '41', 'swo': '1', 'stamp': '2', 'vxd': '1', 'fmt': '1', 'gpg': '1', 'zsh-template': '1', 'pb': '32', 'gif': '5', 'json': '367', '2': '2', 'js-20170612122310': '1', 'swp': '2', 'bash_logout': '1', 'final': '1', 'pl': '1', 'gpg-agent': '1', 'sdv': '1', 'x86_64-pc-linux-gnu': '1', 'parentlock': '2', 'cson': '2', 'rcache': '2', 'otf': '31', 'usage': '6', 'bash_history': '1', 'localstorage-journal': '71', 'update-timestamp': '1', 'png': '2348', 'exc': '1', 'info': '1', 'md': '92', 'js-20170816210634': '1', 'sth': '19', 'yml': '1', 'sqlite-wal': '4', 'deb': '3', 'zsh': '235', 'pack': '5', 'zsh_history': '1', 'sqlite': '24', 'stdout': '8', 'lock': '1', 'pro': '1', 'gitkeep': '2', 'jpg': '16', 'sample': '72', 'ino': '1', 'pset': '42', 'ini': '14', 'conf': '15', 'xcu': '1', 'sudo_as_admin_successful': '1', 'xsession-errors': '1', 'keystore': '1', 'nls': '1', 'sh': '27', 'bdic': '1', '1': '4', 'html': '43', '5': '1', 'MZPZ5Y': '1', 'sqlite3': '1', 'pak': '1', 'ttf': '107', 'css': '34', 'profile': '1', 'map': '2', 'metadata-v2': '6', 'm': '1', 'zsh-theme': '143', 'trashinfo': '3', 'ICEauthority': '1', 'php': '12', 'B5S94Y': '1', 'viminfo': '1', 'exe': '20', 'db-shm': '1', 'xinputrc': '1', 'svg': '124', 'keyring': '1', 'JPG': '1', 'thm': '1', 'gitconfig': '1', 'so': '2', 'xba': '1', 'db-wal': '1'}
So I want to know why the order of the output data has changed compared to the input data and how to get the output in the same sorted order as the input is given!!
dicts in python versions older than 3.6 do not have any notion of ordering. If you want a dictionary that maintains order, you'll want the collections.OrderedDict data structure.
from collections import OrderedDict
mapping = OrderedDict(i.split(":") for i in sample)
mapping is an OrderedDict, a subclass of dict which supports all the basic dict functionality, plus the ordering.
Also, do not use dict as a variable name, it shadows the builtin class dict.

creating dictionary from the list with each elements as key:value pairs in python [duplicate]

This question already has answers here:
Python: how to build a dict from plain list of keys and values
(5 answers)
Closed 5 years ago.
I have a list where the content of list are key:value pairs something like shown below
sample =['ldb:21', 'baseB:4', 'cache-6:55', 'Xauthority:1', 'baseA:4',
'apmrc:1', 'gz:169', 'dbf:1', 'lst:2', 'sqlite-shm:4', 'ttf:107',
'gitignore:6', 'xml:35', 'sbstore:42', 'cache:103', 'jar:1',
'desktop:2', 'source:1', 'sqlite3-journal:1', 'TAG:1', '4:1',
'usage:6', 'yml:1', 'bin:5', 'docx:9', 'woff:1', 'db:17',
'gpg-agent:1', 'V2XO5Y:1', 'dat:2', 'fingerprint:4', 'lz4:1',
'cson:2', 'name:1', 'bat:1', 'bau:1', 'converted-launchers:1',
'h:14', 'list:2', 'xlb:2', 'dic:1', 'zsh-update:1',
'mozlz4:2', 'stderr:8', 'sublime_session:1', 'bak:1', 'dll:42',
'old:24', 'locale:1', 'cfg:3', 'htm:1', 'odt:1', 'keyring:1',
'md~:1', 'pma:2', 'sqlite-journal:1', 'odb:2', 'gradle:3', 'tvc:1',
'out:19', 'ico:1', 'icc:1', 'gpg:1', 'dbt:1', '3:1', 'rb:1',
'ics:3', 'reg:3', 'metadata:6', 'dirs:1', 'run:1', 'tdb:3',
'journal:2', 'zshrc:1', 'little:2', 'pub:1', 'js:245',
'asc:1', 'xbel:1', 'properties:2', 'bash:1', 'sys:1', 'c:282',
'zip:5', 'idx:5', 'lsup7I:1', 'zcompdump:1', 'rdf:2', 'dmrc:1',
'Mdg80A:1', 'pdf:18', 'xlc:2', 'jsonlz4:39', 'bashrc:1',
'db-journal:3', 'pf2:5', 'localstorage:71', 'isrunning:1',
'txt:63', 'orig:1', 'gvdb:1', 'xpi:9', 'php:12',
'gitmodules:1', 'log:41', 'swo:1', 'stamp:2', 'vxd:1',
'fmt:1', 'py:41']
I want to convert the above list into dictionary key:value pairs.
Then I want to convert it into json format.
You can try this:
sample =['ldb:21', 'baseB:4', 'cache-6:55', 'Xauthority:1', 'baseA:4', 'apmrc:1', 'gz:169', 'dbf:1', 'lst:2', 'sqlite-shm:4', 'ttf:107', 'gitignore:6', 'xml:35', 'sbstore:42', 'cache:103', 'jar:1', 'desktop:2', 'source:1', 'sqlite3-journal:1', 'TAG:1', '4:1', 'usage:6', 'yml:1', 'bin:5', 'docx:9', 'woff:1', 'db:17', 'gpg-agent:1', 'V2XO5Y:1', 'dat:2', 'fingerprint:4', 'lz4:1', 'cson:2', 'name:1', 'bat:1', 'bau:1', 'converted-launchers:1', 'h:14', 'list:2', 'xlb:2', 'dic:1', 'zsh-update:1', 'mozlz4:2', 'stderr:8', 'sublime_session:1', 'bak:1', 'dll:42', 'old:24', 'locale:1', 'cfg:3', 'htm:1', 'odt:1', 'keyring:1', 'md~:1', 'pma:2', 'sqlite-journal:1', 'odb:2', 'gradle:3', 'tvc:1', 'out:19', 'ico:1', 'icc:1', 'gpg:1', 'dbt:1', '3:1', 'rb:1', 'ics:3', 'reg:3', 'metadata:6', 'dirs:1', 'run:1', 'tdb:3', 'journal:2', 'zshrc:1', 'little:2', 'pub:1', 'js:245', 'asc:1', 'xbel:1', 'properties:2', 'bash:1', 'sys:1', 'c:282', 'zip:5', 'idx:5', 'lsup7I:1', 'zcompdump:1', 'rdf:2', 'dmrc:1', 'Mdg80A:1', 'pdf:18', 'xlc:2', 'jsonlz4:39', 'bashrc:1', 'db-journal:3', 'pf2:5', 'localstorage:71', 'isrunning:1', 'txt:63', 'orig:1', 'gvdb:1', 'xpi:9', 'php:12', 'gitmodules:1', 'log:41', 'swo:1', 'stamp:2', 'vxd:1', 'fmt:1', 'py:41']
final_data = {a:b for a, b in [i.split(":") for i in sample]}
print(final_data)
Output:
{'ldb': '21', 'baseB': '4', 'cache-6': '55', 'Xauthority': '1', 'baseA': '4', 'apmrc': '1', 'gz': '169', 'dbf': '1', 'lst': '2', 'dll': '42', 'ttf': '107', 'gitignore': '6', 'xml': '35', 'sbstore': '42', 'cache': '103', 'jar': '1', 'desktop': '2', 'source': '1', 'sqlite3-journal': '1', 'TAG': '1', '4': '1', 'usage': '6', 'yml': '1', 'bin': '5', 'docx': '9', 'woff': '1', 'dbt': '1', 'V2XO5Y': '1', 'dat': '2', 'fingerprint': '4', 'lz4': '1', 'name': '1', 'bat': '1', 'bau': '1', 'converted-launchers': '1', 'h': '14', 'list': '2', 'xlb': '2', 'gradle': '3', 'zsh-update': '1', 'stderr': '8', 'sublime_session': '1', 'bak': '1', 'old': '24', 'locale': '1', 'cfg': '3', 'htm': '1', 'odt': '1', 'md~': '1', 'pma': '2', 'sqlite-journal': '1', 'odb': '2', 'dic': '1', 'tvc': '1', 'out': '19', 'ico': '1', 'icc': '1', 'sqlite-shm': '4', '3': '1', 'rb': '1', 'ics': '3', 'py': '41', 'reg': '3', 'metadata': '6', 'dirs': '1', 'run': '1', 'tdb': '3', 'journal': '2', 'zshrc': '1', 'xpi': '9', 'pub': '1', 'js': '245', 'asc': '1', 'xbel': '1', 'properties': '2', 'bash': '1', 'c': '282', 'swo': '1', 'idx': '5', 'lsup7I': '1', 'rdf': '2', 'dmrc': '1', 'Mdg80A': '1', 'pdf': '18', 'xlc': '2', 'jsonlz4': '39', 'bashrc': '1', 'db-journal': '3', 'pf2': '5', 'localstorage': '71', 'isrunning': '1', 'txt': '63', 'orig': '1', 'gvdb': '1', 'little': '2', 'gitmodules': '1', 'log': '41', 'zip': '5', 'stamp': '2', 'vxd': '1', 'fmt': '1', 'gpg': '1', 'gpg-agent': '1', 'cson': '2', 'zcompdump': '1', 'mozlz4': '2', 'db': '17', 'sys': '1', 'php': '12', 'keyring': '1'}
Just use dict, splitting on the colons.
dict(item.split(':') for item in sample)
Or the equivalent functional approach,
from operator import methodcaller
dict(map(methodcaller('split', ':'), sample))
From there json.dumps to get a JSON formatted string.
sample =['ldb:21', 'baseB:4', 'cache-6:55', 'Xauthority:1', 'baseA:4', 'apmrc:1', 'gz:169', 'dbf:1', 'lst:2', 'sqlite-shm:4', 'ttf:107', 'gitignore:6', 'xml:35', 'sbstore:42', 'cache:103', 'jar:1', 'desktop:2', 'source:1', 'sqlite3-journal:1', 'TAG:1', '4:1', 'usage:6', 'yml:1', 'bin:5', 'docx:9', 'woff:1', 'db:17', 'gpg-agent:1', 'V2XO5Y:1', 'dat:2', 'fingerprint:4', 'lz4:1', 'cson:2', 'name:1', 'bat:1', 'bau:1', 'converted-launchers:1', 'h:14', 'list:2', 'xlb:2', 'dic:1', 'zsh-update:1', 'mozlz4:2', 'stderr:8', 'sublime_session:1', 'bak:1', 'dll:42', 'old:24', 'locale:1', 'cfg:3', 'htm:1', 'odt:1', 'keyring:1', 'md~:1', 'pma:2', 'sqlite-journal:1', 'odb:2', 'gradle:3', 'tvc:1', 'out:19', 'ico:1', 'icc:1', 'gpg:1', 'dbt:1', '3:1', 'rb:1', 'ics:3', 'reg:3', 'metadata:6', 'dirs:1', 'run:1', 'tdb:3', 'journal:2', 'zshrc:1', 'little:2', 'pub:1', 'js:245', 'asc:1', 'xbel:1', 'properties:2', 'bash:1', 'sys:1', 'c:282', 'zip:5', 'idx:5', 'lsup7I:1', 'zcompdump:1', 'rdf:2', 'dmrc:1', 'Mdg80A:1', 'pdf:18', 'xlc:2', 'jsonlz4:39', 'bashrc:1', 'db-journal:3', 'pf2:5', 'localstorage:71', 'isrunning:1', 'txt:63', 'orig:1', 'gvdb:1', 'xpi:9', 'php:12', 'gitmodules:1', 'log:41', 'swo:1', 'stamp:2', 'vxd:1', 'fmt:1', 'py:41']
dict_data = {}
for data in sample:
item = data.split(':')
dict_data[item[0]] = item[1]
print dict_data
output:
{'ldb': '21', 'baseB': '4', 'cache-6': '55', 'Xauthority': '1', 'baseA': '4', 'apmrc': '1', 'gz': '169', 'dbf': '1', 'lst': '2', 'dll': '42', 'ttf': '107', 'gitignore': '6', 'xml': '35', 'sbstore': '42', 'cache': '103', 'jar': '1', 'desktop': '2', 'source': '1', 'sqlite3-journal': '1', 'TAG': '1', '4': '1', 'usage': '6', 'yml': '1', 'bin': '5', 'docx': '9', 'woff': '1', 'dbt': '1', 'V2XO5Y': '1', 'dat': '2', 'fingerprint': '4', 'lz4': '1', 'name': '1', 'bat': '1', 'bau': '1', 'converted-launchers': '1', 'h': '14', 'list': '2', 'xlb': '2', 'gradle': '3', 'zsh-update': '1', 'stderr': '8', 'sublime_session': '1', 'bak': '1', 'old': '24', 'locale': '1', 'cfg': '3', 'htm': '1', 'odt': '1', 'md~': '1', 'pma': '2', 'sqlite-journal': '1', 'odb': '2', 'dic': '1', 'tvc': '1', 'out': '19', 'ico': '1', 'icc': '1', 'sqlite-shm': '4', '3': '1', 'rb': '1', 'ics': '3', 'py': '41', 'reg': '3', 'metadata': '6', 'dirs': '1', 'run': '1', 'tdb': '3', 'journal': '2', 'zshrc': '1', 'xpi': '9', 'pub': '1', 'js': '245', 'asc': '1', 'xbel': '1', 'properties': '2', 'bash': '1', 'c': '282', 'swo': '1', 'idx': '5', 'lsup7I': '1', 'rdf': '2', 'dmrc': '1', 'Mdg80A': '1', 'pdf': '18', 'xlc': '2', 'jsonlz4': '39', 'bashrc': '1', 'db-journal': '3', 'pf2': '5', 'localstorage': '71', 'isrunning': '1', 'txt': '63', 'orig': '1', 'gvdb': '1', 'little': '2', 'gitmodules': '1', 'log': '41', 'zip': '5', 'stamp': '2', 'vxd': '1', 'fmt': '1', 'gpg': '1', 'gpg-agent': '1', 'cson': '2', 'zcompdump': '1', 'mozlz4': '2', 'db': '17', 'sys': '1', 'php': '12', 'keyring': '1'}
Iterate over your list and split the pair into two to form a dictionary
dicta = {a:b for a,b in [pair.split(":") for pair in sample]}
[pair.split(":") for pair in sample] gives you the list of lists:
[["lib", "1"], ["baseB", "4"], ... ]
You can put key as string and value as int, as it seems to be count.
dicta = {a:int(b) for a,b in [pair.split(":") for pair in sample]}

How to convert list into a dictionary by python

old_list = [ ['ID0', 'ID1'], ['4', '8'], ['5', '6'] ]
I want convert list to new list
key = ['id', 'frame', 'length']
new_list = [{'id': 'ID0', 'frame': '4', 'length': '5'}, {'id': 'ID1', 'frame': '8', 'length': '6'}]
Here's a one-line approach:
>>> [{'id':x, 'frame':y, 'length':z} for x,y,z in zip(*old_list)]
[{'length': '5', 'frame': '4', 'id': 'ID0'}, {'length': '6', 'frame': '8', 'id': 'ID1'}]
new_list=[]
for x,y,z in zip(old_list[0],old_list[1], old_list[2]):
dict = {'id' : x, 'frame' : y, 'length': z}
new_list.append(dict)

Categories

Resources