arranging dictionary elements in sorted order - python

In my scinario, I am having data as kind of a sorted list:
sample = ['1:4', '2:2', '3:1', '4:1', '5:1', 'B5S94Y:1', 'DB:4', 'ICEauthority:1', 'JPG:1', 'MZPZ5Y:1', 'Mdg80A:1', 'TAG:1', 'V2XO5Y:1', 'Xauthority:1', 'apmrc:1', 'asc:1', 'bak:1', 'baseA:4', 'baseB:4', 'bash:1', 'bash_history:1', 'bash_logout:1', 'bashrc:1', 'bat:1', 'bau:1', 'bdic:1', 'bin:5', 'c:282', 'cache-6:55', 'cache:103', 'cfg:3', 'conf:15', 'converted-launchers:1', 'cson:2', 'css:34', 'dat:2', 'db-journal:3', 'db-shm:1', 'db-wal:1', 'db:17', 'dbf:1', 'dbt:1', 'deb:3', 'desktop:2', 'dic:1', 'dirs:1', 'dll:42', 'dmrc:1', 'docx:9', 'exc:1', 'exe:20', 'final:1', 'fingerprint:4', 'fmt:1', 'gif:5', 'gitconfig:1', 'gitignore:6', 'gitkeep:2', 'gitmodules:1', 'gpg-agent:1', 'gpg:1', 'gradle:3', 'gvdb:1', 'gz:169', 'h:14', 'htm:1', 'html:43', 'icc:1', 'ico:1', 'ics:3', 'idx:5', 'info:1', 'ini:14', 'ino:1', 'isrunning:1', 'jar:1', 'java:7', 'journal:2', 'jpg:16', 'js-20170612122310:1', 'js-20170816210634:1', 'js:245', 'json:367', 'jsonlz4:39', 'kbx:1', 'keyring:1', 'keystore:1', 'ldb:23', 'list:2', 'little:2', 'locale:1', 'localstorage-journal:71', 'localstorage:71', 'lock:1', 'log:41', 'lst:2', 'lsup7I:1', 'lz4:1', 'm:1', 'map:2', 'md:92', 'md~:1', 'metadata-v2:6', 'metadata:6', 'mozlz4:2', 'name:1', 'nls:1', 'odb:2', 'odt:1', 'old:24', 'orig:1', 'otf:31', 'out:19', 'pack:5', 'pak:1', 'parentlock:2', 'pb:32', 'pdf:18', 'pf2:5', 'php:12', 'pl:1', 'pma:2', 'png:2348', 'pro:1', 'profile:1', 'properties:2', 'pset:42', 'pub:1', 'py:41', 'pyc:4', 'rb:1', 'rcache:2', 'rdf:2', 'reg:3', 'run:1', 'sample:72', 'sbstore:42', 'sdv:1', 'sh:27', 'so:2', 'source:1', 'spec:9', 'sqlite-journal:1', 'sqlite-shm:4', 'sqlite-wal:4', 'sqlite3-journal:1', 'sqlite3:1', 'sqlite:24', 'stamp:2', 'stderr:8', 'stdout:8', 'sth:19', 'sublime_session:1', 'sudo_as_admin_successful:1', 'svg:124', 'swo:1', 'swp:2', 'sys:1', 'tdb:3', 'thm:1', 'trashinfo:3', 'tsv:1', 'ttf:107', 'tvc:1', 'txt:63', 'update-timestamp:1', 'usage:6', 'viminfo:1', 'vxd:1', 'woff:1', 'x86_64-pc-linux-gnu:1', 'xba:1', 'xbel:1', 'xcu:1', 'xinputrc:1', 'xlb:2', 'xlc:2', 'xml:35', 'xpi:9', 'xsession-errors:1', 'yml:1', 'zcompdump:1', 'zip:5', 'zsh-template:1', 'zsh-theme:143', 'zsh-update:1', 'zsh:235', 'zsh_history:1', 'zshrc:1']
So after converting this above list to a dictionary with the following code:
dict_val = {a:b for a, b in [i.split(":") for i in sample]}
The output will be:
{'baseA': '4', 'baseB': '4', 'cache-6': '55', 'Xauthority': '1', 'gitmodules': '1', 'apmrc': '1', 'gz': '169', 'dbf': '1', 'kbx': '1', 'sqlite-shm': '4', 'dbt': '1', 'gitignore': '6', 'xml': '35', 'sbstore': '42', 'cache': '103', 'jar': '1', 'desktop': '2', 'source': '1', 'sqlite3-journal': '1', 'TAG': '1', '4': '1', 'tsv': '1', 'spec': '9', 'bin': '5', 'docx': '9', 'woff': '1', 'db': '17', 'V2XO5Y': '1', 'dat': '2', 'fingerprint': '4', 'lz4': '1', 'name': '1', 'bat': '1', 'bau': '1', 'converted-launchers': '1', 'h': '14', 'list': '2', 'lst': '2', 'gradle': '3', 'zsh-update': '1', 'mozlz4': '2', 'stderr': '8', 'sublime_session': '1', 'bak': '1', 'isrunning': '1', 'locale': '1', 'cfg': '3', 'htm': '1', 'odt': '1', 'xlb': '2', 'md~': '1', 'pma': '2', 'sqlite-journal': '1', 'odb': '2', 'dic': '1', 'tvc': '1', 'out': '19', 'ico': '1', 'icc': '1', 'dll': '42', '3': '1', 'rb': '1', 'ics': '3', 'py': '41', 'journal': '2', 'metadata': '6', 'dirs': '1', 'run': '1', 'tdb': '3', 'DB': '4', 'zshrc': '1', 'xpi': '9', 'pub': '1', 'js': '245', 'asc': '1', 'ldb': '23', 'xlc': '2', 'xbel': '1', 'properties': '2', 'bash': '1', 'sys': '1', 'c': '282', 'zip': '5', 'idx': '5', 'lsup7I': '1', 'zcompdump': '1', 'rdf': '2', 'dmrc': '1', 'Mdg80A': '1', 'pdf': '18', 'reg': '3', 'jsonlz4': '39', 'bashrc': '1', 'db-journal': '3', 'pf2': '5', 'localstorage': '71', 'old': '24', 'txt': '63', 'orig': '1', 'gvdb': '1', 'little': '2', 'pyc': '4', 'java': '7', 'log': '41', 'swo': '1', 'stamp': '2', 'vxd': '1', 'fmt': '1', 'gpg': '1', 'zsh-template': '1', 'pb': '32', 'gif': '5', 'json': '367', '2': '2', 'js-20170612122310': '1', 'swp': '2', 'bash_logout': '1', 'final': '1', 'pl': '1', 'gpg-agent': '1', 'sdv': '1', 'x86_64-pc-linux-gnu': '1', 'parentlock': '2', 'cson': '2', 'rcache': '2', 'otf': '31', 'usage': '6', 'bash_history': '1', 'localstorage-journal': '71', 'update-timestamp': '1', 'png': '2348', 'exc': '1', 'info': '1', 'md': '92', 'js-20170816210634': '1', 'sth': '19', 'yml': '1', 'sqlite-wal': '4', 'deb': '3', 'zsh': '235', 'pack': '5', 'zsh_history': '1', 'sqlite': '24', 'stdout': '8', 'lock': '1', 'pro': '1', 'gitkeep': '2', 'jpg': '16', 'sample': '72', 'ino': '1', 'pset': '42', 'ini': '14', 'conf': '15', 'xcu': '1', 'sudo_as_admin_successful': '1', 'xsession-errors': '1', 'keystore': '1', 'nls': '1', 'sh': '27', 'bdic': '1', '1': '4', 'html': '43', '5': '1', 'MZPZ5Y': '1', 'sqlite3': '1', 'pak': '1', 'ttf': '107', 'css': '34', 'profile': '1', 'map': '2', 'metadata-v2': '6', 'm': '1', 'zsh-theme': '143', 'trashinfo': '3', 'ICEauthority': '1', 'php': '12', 'B5S94Y': '1', 'viminfo': '1', 'exe': '20', 'db-shm': '1', 'xinputrc': '1', 'svg': '124', 'keyring': '1', 'JPG': '1', 'thm': '1', 'gitconfig': '1', 'so': '2', 'xba': '1', 'db-wal': '1'}
So I want to know why the order of the output data has changed compared to the input data and how to get the output in the same sorted order as the input is given!!

dicts in python versions older than 3.6 do not have any notion of ordering. If you want a dictionary that maintains order, you'll want the collections.OrderedDict data structure.
from collections import OrderedDict
mapping = OrderedDict(i.split(":") for i in sample)
mapping is an OrderedDict, a subclass of dict which supports all the basic dict functionality, plus the ordering.
Also, do not use dict as a variable name, it shadows the builtin class dict.

Related

Is there a way to convert from string to integer while sorting in python using itemgetter or lambda?

I have a dictionary that has all ids as string (id_categ, id_macrocateg, id_microcateg). I don't want to iterate all the dict to convert them in integer and after sort them. I want to know if there is a way with itemgetter or lambda to convert all ids to int while sorting.
An example of a list of dictionaries could be this (for real, the list could change dynamically):
l_dictt = [
{'id_categ': '1', 'id_macrocateg': '1', 'id_microcateg': '1'},
{'id_categ': '1', 'id_macrocateg': '1', 'id_microcateg': '111'},
{'id_categ': '14', 'id_macrocateg': '1', 'id_microcateg': '35'},
{'id_categ': '19', 'id_macrocateg': '1', 'id_microcateg': '9'},
{'id_categ': '19', 'id_macrocateg': '1', 'id_microcateg': '19'},
{'id_categ': '19', 'id_macrocateg': '1', 'id_microcateg': '26'},
{'id_categ': '2', 'id_macrocateg': '50', 'id_microcateg': '554'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '4'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '16'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '17'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '20'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '21'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '24'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '31'},
{'id_categ': '10', 'id_macrocateg': '2', 'id_microcateg': '11'},
{'id_categ': '10', 'id_macrocateg': '2', 'id_microcateg': '12'},
{'id_categ': '10', 'id_macrocateg': '2', 'id_microcateg': '13'},
{'id_categ': '12', 'id_macrocateg': '2', 'id_microcateg': '10'},
{'id_categ': '12', 'id_macrocateg': '2', 'id_microcateg': '27'},
{'id_categ': '12', 'id_macrocateg': '2', 'id_microcateg': '28'},
{'id_categ': '30', 'id_macrocateg': '6', 'id_microcateg': '112'},
{'id_categ': '30', 'id_macrocateg': '6', 'id_microcateg': '112'}]
the output I want is:
l_dictt = [
{'id_categ': '1', 'id_macrocateg': '1', 'id_microcateg': '1'},
{'id_categ': '1', 'id_macrocateg': '1', 'id_microcateg': '111'},
{'id_categ': '14', 'id_macrocateg': '1', 'id_microcateg': '35'},
{'id_categ': '19', 'id_macrocateg': '1', 'id_microcateg': '9'},
{'id_categ': '19', 'id_macrocateg': '1', 'id_microcateg': '19'},
{'id_categ': '19', 'id_macrocateg': '1', 'id_microcateg': '26'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '4'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '16'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '17'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '20'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '21'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '24'},
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '31'},
{'id_categ': '10', 'id_macrocateg': '2', 'id_microcateg': '11'},
{'id_categ': '10', 'id_macrocateg': '2', 'id_microcateg': '12'},
{'id_categ': '10', 'id_macrocateg': '2', 'id_microcateg': '13'},
{'id_categ': '12', 'id_macrocateg': '2', 'id_microcateg': '10'},
{'id_categ': '12', 'id_macrocateg': '2', 'id_microcateg': '27'},
{'id_categ': '12', 'id_macrocateg': '2', 'id_microcateg': '28'},
{'id_categ': '30', 'id_macrocateg': '6', 'id_microcateg': '112'},
{'id_categ': '30', 'id_macrocateg': '6', 'id_microcateg': '112'},
{'id_categ': '2', 'id_macrocateg': '50', 'id_microcateg': '554'}]
if those 3 keys were integers, as I wanted sort first by id_macrocateg, after by id_categ and at the end id_microcateg, I could do this:
for dictt in sorted(l_dictt, key=itemgetter('id_macrocateg', 'id_categ', 'id_microcateg'):
print(dictt)
But as the are strings I cannot do that.
I have tried:
from operator import itemgetter
for dictt in sorted(l_dictt, key=lambda x: int(itemgetter("id_macrocateg, id_categ, id_microcateg")(x))):
print(dictt)
and also this:
from operator import itemgetter
for dictt in sorted(l_dictt, key=lambda x: int(itemgetter("id_macrocateg")(x)),int(itemgetter("id_categ")(x))),int(itemgetter(("id_microcateg")(x))):
print(dictt)
Your third attempt is just missing some parentheses to ensure that all three calls to int are part of the body of the lambda expression.
from operator import itemgetter
for dictt in sorted(l_dictt, key=lambda x: (int(itemgetter("id_macrocateg")(x)),
int(itemgetter("id_categ")(x))),
int(itemgetter(("id_microcateg")(x))):
print(dictt)
though itemgetter really just gets in the way here.
for dictt in sorted(l_dictt, key=lambda x: (int(l_dictt["id_macrocateg"]),
int(l_dictt["id_categ"]),
int(l_dictt["id_microcateg"]):
print(dictt)
You could use map with itemgetter, similar to your second attempt:
from operator import itemgetter
# Splitting up for readability
getter = itemgetter("id_macrocateg", "id_categ", "id_microcateg")
for dictt in sorted(l_dictt, key=lambda x: tuple(map(int, getter(x))):
print(dictt)
though again a generator expression with a list of keys may be simpler:
keys = ("id_macrocateg", "id_categ", "id_microcateg")
for dictt in sorted(l_dictt, key=lambda x: [int(x[k]) for k in keys]):
(using a list only because there is no tuple comprehension; lists and tuples sort in the same fashion.)
Answer after updating question:
Now, you want the list to be sorted on id_macrocateg, id_categ and id_microcateg, so we get it by:
sorted(l_dictt, key=lambda x: x['id_macrocateg'].zfill(10)+x['id_categ'].zfill(10)+x['id_microcateg'].zfill(10))
Here I assumed that the maximum number of digits could be 10 digits '9999999999', and surely we can update if we need.
The results
{'id_categ': '1', 'id_macrocateg': '1', 'id_microcateg': '1'}
{'id_categ': '1', 'id_macrocateg': '1', 'id_microcateg': '111'}
{'id_categ': '14', 'id_macrocateg': '1', 'id_microcateg': '35'}
{'id_categ': '19', 'id_macrocateg': '1', 'id_microcateg': '9'}
{'id_categ': '19', 'id_macrocateg': '1', 'id_microcateg': '19'}
{'id_categ': '19', 'id_macrocateg': '1', 'id_microcateg': '26'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '4'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '16'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '17'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '20'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '21'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '24'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '31'}
{'id_categ': '10', 'id_macrocateg': '2', 'id_microcateg': '11'}
{'id_categ': '10', 'id_macrocateg': '2', 'id_microcateg': '12'}
{'id_categ': '10', 'id_macrocateg': '2', 'id_microcateg': '13'}
{'id_categ': '12', 'id_macrocateg': '2', 'id_microcateg': '10'}
{'id_categ': '12', 'id_macrocateg': '2', 'id_microcateg': '27'}
{'id_categ': '12', 'id_macrocateg': '2', 'id_microcateg': '28'}
{'id_categ': '30', 'id_macrocateg': '6', 'id_microcateg': '112'}
{'id_categ': '30', 'id_macrocateg': '6', 'id_microcateg': '112'}
{'id_categ': '2', 'id_macrocateg': '50', 'id_microcateg': '554'}
Initial answer before updating the question:
You can sort that dictionary by the value of id_categ while keeping its values untouched as follows:
for dictt in sorted(l_dictt, key=lambda x: int(x['id_categ'])):
print(dictt)
The results
{'id_categ': '1', 'id_macrocateg': '1', 'id_microcateg': '1'}
{'id_categ': '1', 'id_macrocateg': '1', 'id_microcateg': '111'}
{'id_categ': '2', 'id_macrocateg': '50', 'id_microcateg': '554'}
{'id_categ': '10', 'id_macrocateg': '2', 'id_microcateg': '11'}
{'id_categ': '10', 'id_macrocateg': '2', 'id_microcateg': '12'}
{'id_categ': '10', 'id_macrocateg': '2', 'id_microcateg': '13'}
{'id_categ': '12', 'id_macrocateg': '2', 'id_microcateg': '10'}
{'id_categ': '12', 'id_macrocateg': '2', 'id_microcateg': '27'}
{'id_categ': '12', 'id_macrocateg': '2', 'id_microcateg': '28'}
{'id_categ': '14', 'id_macrocateg': '1', 'id_microcateg': '35'}
{'id_categ': '19', 'id_macrocateg': '1', 'id_microcateg': '9'}
{'id_categ': '19', 'id_macrocateg': '1', 'id_microcateg': '19'}
{'id_categ': '19', 'id_macrocateg': '1', 'id_microcateg': '26'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '4'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '16'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '17'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '20'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '21'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '24'}
{'id_categ': '21', 'id_macrocateg': '1', 'id_microcateg': '31'}
{'id_categ': '30', 'id_macrocateg': '6', 'id_microcateg': '112'}
{'id_categ': '30', 'id_macrocateg': '6', 'id_microcateg': '112'}

Zip method fails in creating a dictionary when a mix of foreign languages are used

I've two lists that I'm trying to combine into a dictionary via the zip method with the aim to do some text analysis.
The final output is however compromised by the time an item with a foreign language - I'd say Arabic - is found in the list.
See a compromised extract of my two lists:
['virus',
'corona',
'health',
'like',
'forever',
'must',
'10000',
'claim',
'ensured',
'have',
'wealth',
'cities',
'way',
'chickens',
'get',
'straight',
'amp',
'fighting',
'please',
'shit',
'me',
'still',
'cases',
'pandemic',
'pregnancies',
'teenage',
'wrong',
'ziko',
'lockdown',
'cancel',
'exams',
'self',
'wearing',
'because',
'harder',
'hit',
'mad',
'racis',
'going',
'labour',
'minister',
'plant',
'said',
'saplings',
'state',
'vanish',
'fight',
'coronavirus',
'curfew',
'ma',
'needs',
'quite',
'thinking',
'unnecessary',
'periodcorona',
'traced',
'real',
'behind',
'dumb',
'hahaha',
'joke',
'long',
'spent',
'twist',
'worst',
'tested',
'children',
'exposed',
'gassed',
'missing',
'pee',
'raped',
'sitting',
'staff',
'theyre',
'again',
'spread',
'baits',
'ignored',
'many',
'mocki',
'tantrums',
'threw',
'free',
'may',
'people',
'let',
'odd',
'rally',
'spike',
'suddenly',
'cancelcbseboardexams2020',
'depression',
'baker',
'bun',
'looking',
'mother',
'ntonntoni',
'oven',
'son',
'threads',
'wat',
'got',
'three',
'brother',
'give',
'patients',
'perfect',
'save',
'ummah',
'u',
'coro',
'patient',
'classes',
'month',
'postpone',
'government',
'one',
'mask',
'go',
'2020',
'95',
'atrocities',
'committed',
'fan',
'positive',
'say',
'someth',
'adversity',
'comes',
'infection',
'resilience',
'shared',
'story',
'story2',
'women',
'young',
'at',
'attempt',
'blame',
'hapless',
'humans',
'shift',
'911',
'begging',
'called',
'pants',
'pretty',
'removed',
'sure',
'system',
'goal',
'trending',
'believe',
'whatever',
'coronarvirues',
'updates',
'owns',
'236941',
'4308692',
'recovered',
'recoveries',
'today',
'catching',
'fear',
'players',
'professional',
'says',
'snooker',
'travel',
'itll',
'kill',
'remains',
'calling',
'create',
'environmen',
'find',
'report',
'start',
'try',
'ways',
'less',
'days',
'followers',
'old',
'thats',
'discussing',
'kills',
'peroxide',
'room',
'tha',
'wash',
'bec',
'dying',
'entry',
'hospitals',
'refused',
'saw',
'video',
'visited',
'sir',
'aka',
'lost',
'trust',
'unleashing',
'whole',
'world',
'immune',
'china',
'gift',
'send',
'cit',
'folks',
'mow',
'nothing',
'police',
'see',
'winning',
'yep',
'adding',
'water',
'final',
'wants',
'عاوزينبديلللخريجين',
'itself',
'addimistrstions',
'bat',
'bidden',
'breaking',
'fund',
'grant',
'institutes',
'provide',
'virology',
'attention',
'freestyle',
'know',
'pay',
'time',
'took',
'verse',
'vir',
'part',
'response',
'bit',
'control',
'disappointed',
'lose',
'praised',
'seemed',
'congress',
'produced',
'doctor',
'highest',
'humanity',
'rank',
'sunset',
'taking',
'watch',
'caring',
'comm',
'community',
'crisis',
'family',
'share',
'us',
'districts',
'entering',
'handled',
'surrounding',
'attack',
'country',
'also',
'come',
'8k',
'became',
'bullish',
'ca',
'fly',
'holding',
'north',
'poiting',
'spesk',
'tightenned',
'up',
'hospital',
'since',
'cer',
'infects',
'public',
'tell',
'vertebrate',
'eat',
'friends',
'fucked',
'ho',
'used',
'cosplay',
'cosplayer',
'costume',
'eeyore',
'home',
'tiger',
'quarantine',
'officials',
't',
'truly',
'anticorona',
'bring',
'forget',
'mall',
'morning',
'image',
'stars',
'yellow',
'app',
'contained',
'information',
'mean',
'all',
'cannot',
'justice',
'kept',
'truth',
'every',
'research',
'30',
'bio',
'lab',
'new',
'weapons',
'ans',
'argue',
'covic419',
'honest',
'oo',
'scam',
'want']
and
['41',
'27',
'12',
'11',
'11',
'11',
'10',
'10',
'10',
'10',
'10',
'6',
'6',
'6',
'5',
'5',
'5',
'5',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'4',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'3',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'2',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1',
'1']
Below a screenshot of the output produced with a dict(zip(tmp_list[0], tmp_list[1])).
The same happens if I do create the dictionary in this way.
words_dict = {}
for i, _ in enumerate(tmp_list[0]):
words_dict[tmp_list[0][i]] = int(tmp_list[1][i])
The only way to circumnavigate the problem is to encode the string in UTF-8 by doing something
dict(zip([x.encode('utf-8') for x in tmp_list[0]], tmp_list[1]))
My questions are:
Is there any other efficient way?
Why is that? At the end of the days, the value I'm passing in is a string, so why is the compiler and the dictionary flipping the key with the value?
This is just a problem of output. The Arabic text is displayed right-to-left, and that affects the entire line apparently, so the dict value is printed left of the key. The dict structure itself is just fine.

creating dictionary from the list with each elements as key:value pairs in python [duplicate]

This question already has answers here:
Python: how to build a dict from plain list of keys and values
(5 answers)
Closed 5 years ago.
I have a list where the content of list are key:value pairs something like shown below
sample =['ldb:21', 'baseB:4', 'cache-6:55', 'Xauthority:1', 'baseA:4',
'apmrc:1', 'gz:169', 'dbf:1', 'lst:2', 'sqlite-shm:4', 'ttf:107',
'gitignore:6', 'xml:35', 'sbstore:42', 'cache:103', 'jar:1',
'desktop:2', 'source:1', 'sqlite3-journal:1', 'TAG:1', '4:1',
'usage:6', 'yml:1', 'bin:5', 'docx:9', 'woff:1', 'db:17',
'gpg-agent:1', 'V2XO5Y:1', 'dat:2', 'fingerprint:4', 'lz4:1',
'cson:2', 'name:1', 'bat:1', 'bau:1', 'converted-launchers:1',
'h:14', 'list:2', 'xlb:2', 'dic:1', 'zsh-update:1',
'mozlz4:2', 'stderr:8', 'sublime_session:1', 'bak:1', 'dll:42',
'old:24', 'locale:1', 'cfg:3', 'htm:1', 'odt:1', 'keyring:1',
'md~:1', 'pma:2', 'sqlite-journal:1', 'odb:2', 'gradle:3', 'tvc:1',
'out:19', 'ico:1', 'icc:1', 'gpg:1', 'dbt:1', '3:1', 'rb:1',
'ics:3', 'reg:3', 'metadata:6', 'dirs:1', 'run:1', 'tdb:3',
'journal:2', 'zshrc:1', 'little:2', 'pub:1', 'js:245',
'asc:1', 'xbel:1', 'properties:2', 'bash:1', 'sys:1', 'c:282',
'zip:5', 'idx:5', 'lsup7I:1', 'zcompdump:1', 'rdf:2', 'dmrc:1',
'Mdg80A:1', 'pdf:18', 'xlc:2', 'jsonlz4:39', 'bashrc:1',
'db-journal:3', 'pf2:5', 'localstorage:71', 'isrunning:1',
'txt:63', 'orig:1', 'gvdb:1', 'xpi:9', 'php:12',
'gitmodules:1', 'log:41', 'swo:1', 'stamp:2', 'vxd:1',
'fmt:1', 'py:41']
I want to convert the above list into dictionary key:value pairs.
Then I want to convert it into json format.
You can try this:
sample =['ldb:21', 'baseB:4', 'cache-6:55', 'Xauthority:1', 'baseA:4', 'apmrc:1', 'gz:169', 'dbf:1', 'lst:2', 'sqlite-shm:4', 'ttf:107', 'gitignore:6', 'xml:35', 'sbstore:42', 'cache:103', 'jar:1', 'desktop:2', 'source:1', 'sqlite3-journal:1', 'TAG:1', '4:1', 'usage:6', 'yml:1', 'bin:5', 'docx:9', 'woff:1', 'db:17', 'gpg-agent:1', 'V2XO5Y:1', 'dat:2', 'fingerprint:4', 'lz4:1', 'cson:2', 'name:1', 'bat:1', 'bau:1', 'converted-launchers:1', 'h:14', 'list:2', 'xlb:2', 'dic:1', 'zsh-update:1', 'mozlz4:2', 'stderr:8', 'sublime_session:1', 'bak:1', 'dll:42', 'old:24', 'locale:1', 'cfg:3', 'htm:1', 'odt:1', 'keyring:1', 'md~:1', 'pma:2', 'sqlite-journal:1', 'odb:2', 'gradle:3', 'tvc:1', 'out:19', 'ico:1', 'icc:1', 'gpg:1', 'dbt:1', '3:1', 'rb:1', 'ics:3', 'reg:3', 'metadata:6', 'dirs:1', 'run:1', 'tdb:3', 'journal:2', 'zshrc:1', 'little:2', 'pub:1', 'js:245', 'asc:1', 'xbel:1', 'properties:2', 'bash:1', 'sys:1', 'c:282', 'zip:5', 'idx:5', 'lsup7I:1', 'zcompdump:1', 'rdf:2', 'dmrc:1', 'Mdg80A:1', 'pdf:18', 'xlc:2', 'jsonlz4:39', 'bashrc:1', 'db-journal:3', 'pf2:5', 'localstorage:71', 'isrunning:1', 'txt:63', 'orig:1', 'gvdb:1', 'xpi:9', 'php:12', 'gitmodules:1', 'log:41', 'swo:1', 'stamp:2', 'vxd:1', 'fmt:1', 'py:41']
final_data = {a:b for a, b in [i.split(":") for i in sample]}
print(final_data)
Output:
{'ldb': '21', 'baseB': '4', 'cache-6': '55', 'Xauthority': '1', 'baseA': '4', 'apmrc': '1', 'gz': '169', 'dbf': '1', 'lst': '2', 'dll': '42', 'ttf': '107', 'gitignore': '6', 'xml': '35', 'sbstore': '42', 'cache': '103', 'jar': '1', 'desktop': '2', 'source': '1', 'sqlite3-journal': '1', 'TAG': '1', '4': '1', 'usage': '6', 'yml': '1', 'bin': '5', 'docx': '9', 'woff': '1', 'dbt': '1', 'V2XO5Y': '1', 'dat': '2', 'fingerprint': '4', 'lz4': '1', 'name': '1', 'bat': '1', 'bau': '1', 'converted-launchers': '1', 'h': '14', 'list': '2', 'xlb': '2', 'gradle': '3', 'zsh-update': '1', 'stderr': '8', 'sublime_session': '1', 'bak': '1', 'old': '24', 'locale': '1', 'cfg': '3', 'htm': '1', 'odt': '1', 'md~': '1', 'pma': '2', 'sqlite-journal': '1', 'odb': '2', 'dic': '1', 'tvc': '1', 'out': '19', 'ico': '1', 'icc': '1', 'sqlite-shm': '4', '3': '1', 'rb': '1', 'ics': '3', 'py': '41', 'reg': '3', 'metadata': '6', 'dirs': '1', 'run': '1', 'tdb': '3', 'journal': '2', 'zshrc': '1', 'xpi': '9', 'pub': '1', 'js': '245', 'asc': '1', 'xbel': '1', 'properties': '2', 'bash': '1', 'c': '282', 'swo': '1', 'idx': '5', 'lsup7I': '1', 'rdf': '2', 'dmrc': '1', 'Mdg80A': '1', 'pdf': '18', 'xlc': '2', 'jsonlz4': '39', 'bashrc': '1', 'db-journal': '3', 'pf2': '5', 'localstorage': '71', 'isrunning': '1', 'txt': '63', 'orig': '1', 'gvdb': '1', 'little': '2', 'gitmodules': '1', 'log': '41', 'zip': '5', 'stamp': '2', 'vxd': '1', 'fmt': '1', 'gpg': '1', 'gpg-agent': '1', 'cson': '2', 'zcompdump': '1', 'mozlz4': '2', 'db': '17', 'sys': '1', 'php': '12', 'keyring': '1'}
Just use dict, splitting on the colons.
dict(item.split(':') for item in sample)
Or the equivalent functional approach,
from operator import methodcaller
dict(map(methodcaller('split', ':'), sample))
From there json.dumps to get a JSON formatted string.
sample =['ldb:21', 'baseB:4', 'cache-6:55', 'Xauthority:1', 'baseA:4', 'apmrc:1', 'gz:169', 'dbf:1', 'lst:2', 'sqlite-shm:4', 'ttf:107', 'gitignore:6', 'xml:35', 'sbstore:42', 'cache:103', 'jar:1', 'desktop:2', 'source:1', 'sqlite3-journal:1', 'TAG:1', '4:1', 'usage:6', 'yml:1', 'bin:5', 'docx:9', 'woff:1', 'db:17', 'gpg-agent:1', 'V2XO5Y:1', 'dat:2', 'fingerprint:4', 'lz4:1', 'cson:2', 'name:1', 'bat:1', 'bau:1', 'converted-launchers:1', 'h:14', 'list:2', 'xlb:2', 'dic:1', 'zsh-update:1', 'mozlz4:2', 'stderr:8', 'sublime_session:1', 'bak:1', 'dll:42', 'old:24', 'locale:1', 'cfg:3', 'htm:1', 'odt:1', 'keyring:1', 'md~:1', 'pma:2', 'sqlite-journal:1', 'odb:2', 'gradle:3', 'tvc:1', 'out:19', 'ico:1', 'icc:1', 'gpg:1', 'dbt:1', '3:1', 'rb:1', 'ics:3', 'reg:3', 'metadata:6', 'dirs:1', 'run:1', 'tdb:3', 'journal:2', 'zshrc:1', 'little:2', 'pub:1', 'js:245', 'asc:1', 'xbel:1', 'properties:2', 'bash:1', 'sys:1', 'c:282', 'zip:5', 'idx:5', 'lsup7I:1', 'zcompdump:1', 'rdf:2', 'dmrc:1', 'Mdg80A:1', 'pdf:18', 'xlc:2', 'jsonlz4:39', 'bashrc:1', 'db-journal:3', 'pf2:5', 'localstorage:71', 'isrunning:1', 'txt:63', 'orig:1', 'gvdb:1', 'xpi:9', 'php:12', 'gitmodules:1', 'log:41', 'swo:1', 'stamp:2', 'vxd:1', 'fmt:1', 'py:41']
dict_data = {}
for data in sample:
item = data.split(':')
dict_data[item[0]] = item[1]
print dict_data
output:
{'ldb': '21', 'baseB': '4', 'cache-6': '55', 'Xauthority': '1', 'baseA': '4', 'apmrc': '1', 'gz': '169', 'dbf': '1', 'lst': '2', 'dll': '42', 'ttf': '107', 'gitignore': '6', 'xml': '35', 'sbstore': '42', 'cache': '103', 'jar': '1', 'desktop': '2', 'source': '1', 'sqlite3-journal': '1', 'TAG': '1', '4': '1', 'usage': '6', 'yml': '1', 'bin': '5', 'docx': '9', 'woff': '1', 'dbt': '1', 'V2XO5Y': '1', 'dat': '2', 'fingerprint': '4', 'lz4': '1', 'name': '1', 'bat': '1', 'bau': '1', 'converted-launchers': '1', 'h': '14', 'list': '2', 'xlb': '2', 'gradle': '3', 'zsh-update': '1', 'stderr': '8', 'sublime_session': '1', 'bak': '1', 'old': '24', 'locale': '1', 'cfg': '3', 'htm': '1', 'odt': '1', 'md~': '1', 'pma': '2', 'sqlite-journal': '1', 'odb': '2', 'dic': '1', 'tvc': '1', 'out': '19', 'ico': '1', 'icc': '1', 'sqlite-shm': '4', '3': '1', 'rb': '1', 'ics': '3', 'py': '41', 'reg': '3', 'metadata': '6', 'dirs': '1', 'run': '1', 'tdb': '3', 'journal': '2', 'zshrc': '1', 'xpi': '9', 'pub': '1', 'js': '245', 'asc': '1', 'xbel': '1', 'properties': '2', 'bash': '1', 'c': '282', 'swo': '1', 'idx': '5', 'lsup7I': '1', 'rdf': '2', 'dmrc': '1', 'Mdg80A': '1', 'pdf': '18', 'xlc': '2', 'jsonlz4': '39', 'bashrc': '1', 'db-journal': '3', 'pf2': '5', 'localstorage': '71', 'isrunning': '1', 'txt': '63', 'orig': '1', 'gvdb': '1', 'little': '2', 'gitmodules': '1', 'log': '41', 'zip': '5', 'stamp': '2', 'vxd': '1', 'fmt': '1', 'gpg': '1', 'gpg-agent': '1', 'cson': '2', 'zcompdump': '1', 'mozlz4': '2', 'db': '17', 'sys': '1', 'php': '12', 'keyring': '1'}
Iterate over your list and split the pair into two to form a dictionary
dicta = {a:b for a,b in [pair.split(":") for pair in sample]}
[pair.split(":") for pair in sample] gives you the list of lists:
[["lib", "1"], ["baseB", "4"], ... ]
You can put key as string and value as int, as it seems to be count.
dicta = {a:int(b) for a,b in [pair.split(":") for pair in sample]}

Python for loops and data

I have data that looks like this:
The columns are Name, ID, Dev ID, Date
('Anthony', '1', '10', '4/3/2017')
('Anthony', '1', '11', '5/2/2017')
('Anthony', '1', '13', '12/30/2017
('Anthony', '1', '15', '8/20/2017'
('Anthony', '4', '17', '2/3/2018')
('Anthony', '4', '18', '3/28/2017'
('Bob', '1', '111', '4/3/2017')
('Bob', '1', '200', '5/2/2017')
('Bob', '1', '113', '12/30/2017')
('Bob', '1', '115', '8/20/2017')
('Bob', '4', '117', '2/3/2018')
('Bob', '4', '118', '3/28/2017')
I'm trying to find unique Name's and ID's and then compare any dates and return only the one furthest in the future.
Ideally I want output that looks like:
('Anthony', '1', '12/30/2017')
('Anthony', '4', '2/3/2018')
('Bob', '1', '12/30/2017')
('Bob', '4', '2/3/2018')
I'm struggling because I have multiple keys and I can't figure out how to make it work. Any ideas?
Edit: This is only a sample I have 30ish people names and 10 unique id's. So i'm looking to make a For loop to figure this out.
You can use itertools.groupby combined with max to get output similar to what you're looking for.
import itertools
from datetime import datetime
data = [('Anthony', '1', '10', '4/3/2017'),
('Anthony', '1', '11', '5/2/2017'),
('Anthony', '1', '13', '12/30/2017'),
('Anthony', '1', '15', '8/20/2017'),
('Anthony', '4', '17', '2/3/2018'),
('Anthony', '4', '18', '3/28/2017'),
('Bob', '1', '111', '4/3/2017'),
('Bob', '1', '200', '5/2/2017'),
('Bob', '1', '113', '12/30/2017'),
('Bob', '1', '115', '8/20/2017'),
('Bob', '4', '117', '2/3/2018'),
('Bob', '4', '118', '3/28/2017')]
groups_with_max_date = []
for key, group in itertools.groupby(data, lambda d: (d[0], d[1])):
# convert to datetime and get max of group
group_max = max(group, key=lambda q: datetime.strptime(q[3], '%m/%d/%Y'))
groups_with_max_date.append(group_max)
groups_with_max_date
Gives us:
[('Anthony', '1', '13', '12/30/2017'),
('Anthony', '4', '17', '2/3/2018'),
('Bob', '1', '113', '12/30/2017'),
('Bob', '4', '117', '2/3/2018')]
The solution using datetime object, dict.setdefault(), max and datetime.strptime functions:
import datetime
l = [('Anthony', '1', '10', '4/3/2017'),('Anthony', '1', '11', '5/2/2017'),('Anthony', '1', '13', '12/30/2017'),('Anthony', '1', '15', '8/20/2017'),
('Anthony', '4', '17', '2/3/2018'),('Anthony', '4', '18', '3/28/2017'),('Bob', '1', '111', '4/3/2017'),('Bob', '1', '200', '5/2/2017'),
('Bob', '1', '113', '12/30/2017'),('Bob', '1', '115', '8/20/2017'),('Bob', '4', '117', '2/3/2018'),('Bob', '4', '118', '3/28/2017')]
d = {}
for t in l:
# grouping items by first two values of each tuple(accumulating `date` strings)
d.setdefault(t[0] +'-'+ t[1], []).append(t[3]) # first two values of a tuple are combined to be a "hash" key
# getting max date from the list of `datetime` objects
result = [(*k.split('-'), max(v, key=lambda dt: datetime.datetime.strptime(dt, '%m/%d/%Y'))) for k,v in sorted(d.items())]
print(result)
The output:
[('Anthony', '1', '12/30/2017'), ('Anthony', '4', '2/3/2018'), ('Bob', '1', '12/30/2017'), ('Bob', '4', '2/3/2018')]

Insert into a list 4 digit pin and then empty the list

Im trying to create a code that it will store into a list a 4 digit pin, if the pin is correct it will do some stuff and then empty the list and ask again for the 4 digit pin.
A part of my code till now is:
def getKey(self):
# Set all columns as output low
for j in range(len(self.COLUMN)):
GPIO.setup(self.COLUMN[j], GPIO.OUT)
GPIO.output(self.COLUMN[j], GPIO.LOW)
# Set all rows as input
for i in range(len(self.ROW)):
GPIO.setup(self.ROW[i], GPIO.IN, pull_up_down=GPIO.PUD_UP)
# Scan rows for pushed key/button
# A valid key press should set "rowVal" between 0 and 3.
rowVal = -1
for i in range(len(self.ROW)):
tmpRead = GPIO.input(self.ROW[i])
if tmpRead == 0:
rowVal = i
# if rowVal is not 0 thru 3 then no button was pressed and we can exit
if rowVal < 0 or rowVal > 3:
self.exit()
return
# Convert columns to input
for j in range(len(self.COLUMN)):
GPIO.setup(self.COLUMN[j], GPIO.IN, pull_up_down=GPIO.PUD_DOWN)
# Switch the i-th row found from scan to output
GPIO.setup(self.ROW[rowVal], GPIO.OUT)
GPIO.output(self.ROW[rowVal], GPIO.HIGH)
# Scan columns for still-pushed key/button
# A valid key press should set "colVal" between 0 and 2.
colVal = -1
for j in range(len(self.COLUMN)):
tmpRead = GPIO.input(self.COLUMN[j])
if tmpRead == 1:
colVal=j
# if colVal is not 0 thru 2 then no button was pressed and we can exit
if colVal < 0 or colVal > 2:
self.exit()
return
# Return the value of the key pressed
self.exit()
return self.KEYPAD[rowVal][colVal]
list1 =[] #declare the list
#Loop while waiting for a key press
kp = keypad()
while True:
digit = None
while digit == None:
digit = kp.getKey()
print digit #Print the pressed key
x=0
a loop that it will add the 4 digits into the list
while len(list1) < 4:
list1.insert(x, str(digit))
x=x+1
print list1
The output of this code is a bunch of those lists:
1

1

1

1

Any ideas? I know somewhere in my code there is a major mistake, but I'm so confused right know.
try something like this
last_digit = None
while True:
list1 = []
digit = None
while len(list1) < 4:
while digit == last_digit or digit is None:
digit = kp.getKey()
if digit is None:
last_digit = None
list1.append(digit)
last_digit = digit
print "List:",list1
you need to do this because when you say kp.getKey() it just returns whatever key is held down and since you are callign it several times per second you need to have very quick fingers in order to register only a single result
it would probably be better to encapuslate your logic into discreet blocks
def getKeyUp():
while kp.getKey() is None:
pass
released = None
while True
if kp.getKey() is None:
return released
released = kp.getKey()
code = [getKeyUp() for _ in range(4)]
print "Got Code:",code
I could not replicate your precise problem, but your logic does seem off. Try:
while True:
list1 = []
while len(list1) < 4:
digit = None
while digit is None:
digit = kp.getKey()
list1.append(digit)
# list1 has four entries

Categories

Resources