Python how to compare string in a list with dict - python

I have a code like this:
It will print Student
d= u'pen hahahahaha'
area = [u'Apple',u'Banana',u'lemon']
area2 = [ u'pen',u'book',u'chair' ]
area3 = [u'father',u'mother']
if any(d.startswith(i) for i in area):
category = 'Fruit'
print 'Fruit'
elif any(d.startswith(i) for i in area2):
category = 'Student'
print 'Student'
elif any(d.startswith(i) for i in area3):
category = 'family'
print 'family'
I want to know how to edit it to a mode like this:
aa = [{"Fruit":[u'Apple',u'Banana',u'lemon']},
{"Student":[ u'pen',u'book',u'chair' ]},
{"Family":[u'father',u'mother']}]
So I can compare if 'pen hahahahaha' in {"Student":[ u'pen',u'book',u'chair' ]}
save category = 'Student'
I think for a while but have no idea,please guide me.Thank you

You can use loop:
categories = {
"Fruit": [u'Apple', u'Banana', u'lemon'],
"Student": [u'pen', u'book', u'chair'],
"Family": [u'father', u'mother']
}
def get_category(value):
for cat, cat_entries in categories.iteritems():
for cat_entry in cat_entries:
if value.startswith(cat_entry):
return cat
return None
print get_category('pen hahahahaha')
Output:
Student

Make aa a dictionary like:
aa = {"Fruit":[u'Apple',u'Banana',u'lemon'],
"Student":[ u'pen',u'book',u'chair' ],
"Family":[u'father',u'mother']}
obj = 'pen'
for key in aa:
if obj in aa[key]:
print(obj + ' is in ' + key)
edit:
May be this will suit your requirement more
aa = {"Fruit":[u'Apple',u'Banana',u'lemon'],
"Student":[ u'pen',u'book',u'chair' ],
"Family":[u'father',u'mother']}
obj = u'pen hahhah'
for key in aa:
for item in aa[key]:
if obj.startswith(item):
print(obj + ' is in ' + key)

aa = [{"Fruit":[u'Apple',u'Banana',u'lemon']},
{"Student":[ u'pen',u'book',u'chair' ]},
{"Family":[u'father',u'mother']}]
d=u'pen haaaaaa'
print [ x.keys()[0] for x in aa for y in x.values()[0] if y in d.split() ]

Related

How to make a nested dictionary based on a list of URLs?

I have this list of hierarchical URLs:
data = ["https://python-rq.org/","https://python-rq.org/a","https://python-rq.org/a/b","https://python-rq.org/c"]
And I want to dynamically make a nested dictionary for every URL for which there exists another URL that is a subdomain/subfolder of it.
I already tried the follwoing but it is not returning what I expect:
result = []
for key,d in enumerate(data):
form_dict = {}
r_pattern = re.search(r"(http(s)?://(.*?)/)(.*)",d)
r = r_pattern.group(4)
if r == "":
parent_url = r_pattern.group(3)
else:
parent_url = r_pattern.group(3) + "/"+r
print(parent_url)
temp_list = data.copy()
temp_list.pop(key)
form_dict["name"] = parent_url
form_dict["children"] = []
for t in temp_list:
child_dict = {}
if parent_url in t:
child_dict["name"] = t
form_dict["children"].append(child_dict.copy())
result.append(form_dict)
This is the expected output.
{
"name":"https://python-rq.org/",
"children":[
{
"name":"https://python-rq.org/a",
"children":[
{
"name":"https://python-rq.org/a/b",
"children":[
]
}
]
},
{
"name":"https://python-rq.org/c",
"children":[
]
}
]
}
Any advice?
This was a nice problem. I tried going on with your regex method but got stuck and found out that split was actually appropriate for this case. The following works:
data = ["https://python-rq.org/","https://python-rq.org/a","https://python-rq.org/a/b","https://python-rq.org/c"]
temp_list = data.copy()
# This removes the last "/" if any URL ends with one. It makes it a lot easier
# to match the URLs and is not necessary to have a correct link.
data = [x[:-1] if x[-1]=="/" else x for x in data]
print(data)
result = []
# To find a matching parent
def find_match(d, res):
for t in res:
if d == t["name"]:
return t
elif ( len(t["children"])>0 ):
temp = find_match(d, t["children"])
if (temp):
return temp
return None
while len(data) > 0:
d = data[0]
form_dict = {}
l = d.split("/")
# I removed regex as matching the last parentheses wasn't working out
# split does just what you need however
parent = "/".join(l[:-1])
data.pop(0)
form_dict["name"] = d
form_dict["children"] = []
option = find_match(parent, result)
if (option):
option["children"].append(form_dict)
else:
result.append(form_dict)
print(result)
[{'name': 'https://python-rq.org', 'children': [{'name': 'https://python-rq.org/a', 'children': [{'name': 'https://python-rq.org/a/b', 'children': []}]}, {'name': 'https://python-rq.org/c', 'children': []}]}]

matching key against different pairs in python

Due to different names of an attribute I need to match a key of a key value pare against a regex.
The possible names are defined in a dict:
MyAttr = [
('ref_nr', 'Reference|Referenz|Referenz-Nr|Referenznummer'),
('color', 'Color|color|tinta|farbe|Farbe'),
]
The import attributes from an item in another dict:
ImportAttr = [
('Referenz', 'Ref-Val'),
('color', 'red'),
]
Now I would like to return the value of the import attributes, if it is a known attribute (defined in my first dict MyAttr) matching different spelling of the attribute in question.
for key, value in ImportAttr:
if key == "Referenz-Nr" : ref = value
if key == "Farbe" : color = value
The goal is to return the value of a possible attribute if it is a known one.
print(ref)
print(color)
Should return the value if "Referenz-Nr" and "Farbe" are known attributes.
Obviously this pseudo code does not work, I just can't get my head around a function implementing regex for a key search.
It was not clear for me but maybe you want it:
#!/usr/bin/python3
MyAttr = [
('ref_nr', 'Reference|Referenz|Referenz-Nr|Referenznummer'),
('color', 'Color|color|tinta|farbe|Farbe')
]
ImportAttr = [
('Referenz', 'Ref-Val'),
('color', 'red'),
]
ref, color = None, None
for key, value in ImportAttr:
if key in MyAttr[0][1].split('|'):
ref = value
if key in MyAttr[1][1].split('|'):
color = value
print("ref: ", ref)
print("color: ", color)
The split can split the string into a list of string by the separator ("|" character here) then you can check is the key in that list or not.
The following solution is a little bit tricky. If you don't want to hardcode the positions into your source you can use locals().
#!/usr/bin/python3
MyAttr = [
('ref', 'Reference|Referenz|Referenz-Nr|Referenznummer'),
('color', 'Color|color|tinta|farbe|Farbe')
]
ImportAttr = [
('Referenz', 'Ref-Val'),
('color', 'red'),
]
ref, color = None, None
for var, names in MyAttr:
for key, value in ImportAttr:
if key in names.split('|'):
locals()[var] = value
break
print("ref: ", ref)
print("color: ", color)
If you want, you can also use pandas to solve this problem for the large data sets in this way.
get_references_and_colors.py
import pandas as pd
import re
import json
def get_references_and_colors(lookups, attrs):
responses = []
refs = pd.Series(re.split(r"\|", lookups[0][0]))
colors = pd.Series(re.split(r"\|", lookups[1][0]))
d = {"ref": refs, "color": colors}
df = pd.DataFrame(d).fillna('') # To drop NaN entries, in case if refs
# & colors are not of same length
# ref color
# 0 Reference Color
# 1 Referenz color
# 2 Referenz-Nr tinta
# 3 Referenznummer farbe
# 4 Farbe
for key, value in attrs:
response = {}
response["for_attr"] = key
df2 = df.loc[df["ref"] == key]; # find in 'ref' column
if not df2.empty:
response["ref"] = value
else:
df3 = df.loc[df["color"] == key]; # find in 'color' column
if not df3.empty:
response["color"] = value
else:
response["color"] = None # Not Available
response["ref"] = None
responses.append(response)
return responses
if __name__ == "__main__":
LOOKUPS = [
('Reference|Referenz|Referenz-Nr|Referenznummer', 'a'),
('Color|color|tinta|farbe|Farbe', 'b'),
]
ATTR = [
('Referenz', 'Ref-Val'),
('color', 'red'),
('color2', 'orange'), # improper
('tinta', 'Tinta-col')
]
responses = get_references_and_colors(LOOKUPS, ATTR) # dictionary
pretty_response = json.dumps(responses, indent=4) # for pretty printing
print(pretty_response)
Output
[
{
"for_attr": "Referenz",
"ref": "Ref-Val"
},
{
"for_attr": "color",
"color": "red"
},
{
"for_attr": "color2",
"color": null,
"ref": null
},
{
"for_attr": "tinta",
"color": "Tinta-col"
}
]

Count and remove duplicates in keys while preserving values

I have collated some data and made them into a dictionary as follows:
gen_dict = {
"item_C_v001" : "jack",
"item_C_v002" : "kris",
"item_A_v003" : "john",
"item_B_v006" : "peter",
"item_A_v005" : "john",
"item_A_v004" : "dave"
}
I am trying to print out the results in the following format:
Item Name | No. of Vers. | User
item_A | 3 | dave, john
item_B | 1 | peter
item_C | 2 | jack, kris
where it will tabulates similar versions into 1 line, while counting how many versions there are and at the same time, stating the user names..
I am having trouble of integrating in the user names. I used the set() command, and that seems to apply for all my 3 rows of output.
Even so, while my 'Item Name' and 'no. of Vers.' column does seems correct, are there any ways in which I can check if the number of versions it found does adhere to the name? It is possible for me to count it manually if I have a small data but what if I got big data?
strip_ver_list = []
user_list = []
for item_name, user in gen_dict.iteritems():
# Strip out the version digits
strip_ver = item_name[:-3]
strip_ver_list.append(strip_ver)
user_list.append(user)
# This will count and remove the duplicates
versions_num = dict((duplicate, strip_ver_list.count(duplicate)) for duplicate in strip_ver_list)
for name, num in sorted(versions_num.iteritems()):
print "Version Name : {0}\nNo. of Versions : {1}\nUsers : {2}".format(name, num, set(user_list))
This is the ouput I have gotten:
Item Name | No. of Vers. | User
item_A | 3 | set(['dave', 'john', 'jack', 'kris', 'peter'])
item_B | 1 | set(['dave', 'john', 'jack', 'kris', 'peter'])
item_C | 2 | set(['dave', 'john', 'jack', 'kris', 'peter'])
This is the only method I can think up of.. But if there are any other viable methods to get around this, please do share with me
I would use a defaultdict to aggregate the data. Roughly:
>>> from collections import defaultdict
>>> gen_dict = {
... "item_C_v001" : "jack",
... "item_C_v002" : "kris",
... "item_A_v003" : "john",
... "item_B_v006" : "peter",
... "item_A_v005" : "john",
... "item_A_v004" : "dave"
... }
Now ...
>>> versions_num = defaultdict(lambda:dict(versions=set(), users = set()))
>>> for item_name, user in gen_dict.items():
... strip_ver = item_name[:-5]
... version_num = item_name[-3:]
... versions_num[strip_ver]['versions'].add(version_num)
... versions_num[strip_ver]['users'].add(user)
...
Finally,
>>> for item, data in versions_num.items():
... print("Item {} \tno. of Versions: {}\tUsers:{}".format(item, len(data['versions']), ",".join(data['users'])))
...
Item item_B no. of Versions: 1 Users:peter
Item item_A no. of Versions: 3 Users:john,dave
Item item_C no. of Versions: 2 Users:kris,jack
>>>
And if you want it sorted:
>>> for item, data in sorted(versions_num.items()):
... print("Item {} \tno. of Versions: {}\tUsers:{}".format(item, len(data['versions']), ",".join(data['users'])))
...
Item item_A no. of Versions: 3 Users:john,dave
Item item_B no. of Versions: 1 Users:peter
Item item_C no. of Versions: 2 Users:kris,jack
You need to group the lists by the item name and extract the users from each group, otherwise the user_list will always be a global list of users:
from itertools import groupby
# split the item_version
sorted_ver_num = sorted(k.rsplit("_", 1) + [v] for k, v in gen_dict.items())
# group the results by the item name
for k, g in groupby(sorted_ver_num, key = lambda x: x[0]):
# extract the user list within each group
# user_list = [user for *_, user in g]
user_list = [user for _, _, user in g]
print("Version Name : {0}\nNo. of Versions : {1}\nUsers : {2}".format(k, len(user_list), set(user_list)))
Version Name : item_A
No. of Versions : 3
Users : {'dave', 'john'}
Version Name : item_B
No. of Versions : 1
Users : {'peter'}
Version Name : item_C
No. of Versions : 2
Users : {'kris', 'jack'}
I would use a defaultdict to keep track of the users, and an ordinary dict to keep track of the count. The dict.get() method allows you to return a default value if the key is not found, in this case 0, and you just add 1 to it each time the key is found.
from collections import defaultdict
gen_dict = {
"item_C_v001" : "jack",
"item_C_v002" : "kris",
"item_A_v003" : "john",
"item_B_v006" : "peter",
"item_A_v005" : "john",
"item_A_v004" : "dave"
}
user_dict = defaultdict(set)
count_dict = {}
for item_name, user in gen_dict.iteritems():
user_dict[item_name[:-3]].add(user) # Sure you want -3 not -5?
count_dict[item_name[:-3]] = count_dict.get(item_name[:-3], 0) + 1
for name, num in sorted(count_dict.iteritems()):
print "Version Name : {0}\nNo. of Versions : {1}\nUsers : {2}".format(
name, num, ', '.join(item for item in user_dict[name]))
Example in IPython:
In [1]: gen_dict = {
...: "item_C_v001" : "jack",
...: "item_C_v002" : "kris",
...: "item_A_v003" : "john",
...: "item_B_v006" : "peter",
...: "item_A_v005" : "john",
...: "item_A_v004" : "dave"
...: }
Get the keys, we'll be needing them more then once.
In [2]: keys = tuple(gen_dict.keys())
Find the set of items.
In [3]: items = set(j[:-5] for j in keys)
Table header and template.
In [4]: header = 'Item Name | No. of Vers. | User'
In [5]: template = '{:14}|{:<15}|{}'
Print relevant information for all items.
In [6]: print(header)
Item Name | No. of Vers. | User
In [7]: for i in items:
...: relevant = tuple(j for j in keys if j.startswith(i))
...: users = set(gen_dict[x] for x in relevant)
...: print(template.format(i, len(relevant), ' '.join(users)))
...:
item_A |3 |john dave
item_B |1 |peter
item_C |2 |kris jack

Update keys in nested dictionary with some exception in key strings

So I've a list of students which looks something like this :
students = [ {'name': 'Jack' , 'status' : 'Average' , 'subjects' : { 'subject1' : 'English' , 'subject2' : 'Math' } , 'height' : '20cm' },
{'name': 'Tom' , 'status' : 'Good' , 'subjects' : { 'subject1' : 'English' , 'subject2' : 'Science' } , 'height' : '30cm' }
]
So the above list is of size 2. Assume that the size is pretty big, lets say 50 or 60 or more.
I want to return a list students_output & for each student I want to return a dictionary which contains the following values for each student which are fetched from the above list but have slightly modified 'keys'. The end output should be something like this :
students_output = [ {'student_name': 'Jack' , 'student_status' : 'Average' , 'student_subjects' : { 'student_subject1' : 'English' , 'student_subject2' : 'Math' } , 'child_height' : '20cm' },
{'student_name': 'Tom' , 'student_status' : 'Good' , 'student_subjects' : { 'student_subject1' : 'English' , 'student_subject2' : 'Science' } , 'child_height' : '30cm' }
]
I am not able to understand how I can create an effective loop so that the keys in my resultant data structure are maintained as provided in the output and i can fetch the data from the first list.
for example, in students_output, I know
students_output[0]['student_name']=students[0]['name']
But can anyone help me do it iteratively ?
In order to achieve this, you have to concatenate "student_" at the start of each key with some exception as "height" key. You may do it via combination of list comprehension and dict comprehension expression as:
students = [
{'name': 'Jack' , 'status' : 'Average' , 'subjects' : { 'subject1' : 'English' , 'subject2' : 'Math' } , 'height' : '20cm' },
{'name': 'Tom' , 'status' : 'Good' , 'subjects' : { 'subject1' : 'English' , 'subject2' : 'Science' } , 'height' : '30cm' }
]
def get_key(key):
return {
'height': 'child_height', # All exception you need in `key`
# apart from concatenating `"student_"`
}.get(key, 'student_' + key)
new_list = [{
get_key(k): ({
get_key(kk):v for kk, vv in v.items()} if isinstance(v, dict) else v) \
for k, v in s.items()
} for s in students]
Value hold by new_list will be:
[{'student_name': 'Jack', 'child_height': '20cm', 'student_status': 'Average', 'student_subjects': {'student_subject1': {'subject1': 'English', 'subject2': 'Math'}, 'student_subject2': {'subject1': 'English', 'subject2': 'Math'}}},
{'student_name': 'Tom', 'child_height': '30cm', 'student_status': 'Good', 'student_subjects': {'student_subject1': {'subject1': 'English', 'subject2': 'Science'}, 'student_subject2': {'subject1': 'English', 'subject2': 'Science'}}}]
Here's a quick-and-dirty function that will do what you need:
In [10]: def rename_keys(students):
...: d = {}
...: for k,v in students.items():
...: if isinstance(v,dict):
...: k = "student_" + k
...: v = rename_keys(v)
...: d[k] = v
...: elif k == 'height':
...: k = "child_height"
...: d[k] = v
...: else:
...: k = "student_" + k
...: d[k] = v
...: return d
...:
...:
In [11]: [rename_keys(d) for d in students]
Out[11]:
[{'child_height': '20cm',
'student_name': 'Jack',
'student_status': 'Average',
'student_subjects': {'student_subject1': 'English',
'student_subject2': 'Math'}},
{'child_height': '30cm',
'student_name': 'Tom',
'student_status': 'Good',
'student_subjects': {'student_subject1': 'English',
'student_subject2': 'Science'}}]
And really, this doesn't have to be recursive, you could substitute the recursive call with a dictionary comprehension:
v = {'student_'+key:value for key,value in v.items()}
You can use the following function inside a list comprehension like this:
def new_dict(d):
res = {}
for key, value in d.iteritems():
student_or_child = 'student' if key != 'height' else 'child'
if type(value) == dict:
res['{}_{}'.format(student_or_child, key)] = new_dict(value)
else:
res['{}_{}'.format(student_or_child, key)] = value
return res
The above function takes a dict as argument, for each key, value in the passed dict, if value is of type dict then the same function is called on value, and the result is added to res dict, else the same value is added.
Now, with a list comprehension, we can do:
[new_dict(d) for d in students]
Output:
>>> [new_dict(d) for d in students]
[{'child_height': '20cm', 'student_name': 'Jack', 'student_status': 'Average', 'student_subjects': {'student_subject1': 'English', 'student_subject2': 'Math'}}, {'child_height': '30cm', 'student_name': 'Tom', 'student_status': 'Good', 'student_subjects': {'student_subject1': 'English', 'student_subject2': 'Science'}}]

Turn a simple dictionary into dictionary with nested lists

Given the following data received from a web form:
for key in request.form.keys():
print key, request.form.getlist(key)
group_name [u'myGroup']
category [u'social group']
creation_date [u'03/07/2013']
notes [u'Here are some notes about the group']
members[0][name] [u'Adam']
members[0][location] [u'London']
members[0][dob] [u'01/01/1981']
members[1][name] [u'Bruce']
members[1][location] [u'Cardiff']
members[1][dob] [u'02/02/1982']
How can I turn it into a dictionary like this? It's eventually going to be used as JSON but as JSON and dictionaries are easily interchanged my goal is just to get to the following structure.
event = {
group_name : 'myGroup',
notes : 'Here are some notes about the group,
category : 'social group',
creation_date : '03/07/2013',
members : [
{
name : 'Adam',
location : 'London',
dob : '01/01/1981'
}
{
name : 'Bruce',
location : 'Cardiff',
dob : '02/02/1982'
}
]
}
Here's what I have managed so far. Using the following list comprehension I can easily make sense of the ordinary fields:
event = [ (key, request.form.getlist(key)[0]) for key in request.form.keys() if key[0:7] != "catches" ]
but I'm struggling with the members list. There can be any number of members. I think I need to separately create a list for them and add that to a dictionary with the non-iterative records. I can get the member data like this:
tmp_members = [(key, request.form.getlist(key)) for key in request.form.keys() if key[0:7]=="members"]
Then I can pull out the list index and field name:
member_arr = []
members_orig = [ (key, request.form.getlist(key)[0]) for key in request.form.keys() if key[0:7] ==
"members" ]
for i in members_orig:
p1 = i[0].index('[')
p2 = i[0].index(']')
members_index = i[0][p1+1:p2]
p1 = i[0].rfind('[')
members_field = i[0][p1+1:-1]
But how do I add this to my data structure. The following won't work because I could be trying to process members[1][name] before members[0][name].
members_arr[int(members_index)] = {members_field : i[1]}
This seems very convoluted. Is there a simper way of doing this, and if not how can I get this working?
You could store the data in a dictionary and then use the json library.
import json
json_data = json.dumps(dict)
print(json_data)
This will print a json string.
Check out the json library here
Yes, convert it to a dictionary, then use json.dumps(), with some optional parameters, to print out the JSON in the format you need:
eventdict = {
'group_name': 'myGroup',
'notes': 'Here are some notes about the group',
'category': 'social group',
'creation_date': '03/07/2013',
'members': [
{'name': 'Adam',
'location': 'London',
'dob': '01/01/1981'},
{'name': 'Bruce',
'location': 'Cardiff',
'dob': '02/02/1982'}
]
}
import json
print json.dumps(eventdict, indent=4)
The order of the key:value pairs is not always consistent, but if you're just looking for pretty-looking JSON that can be parsed by a script, while remaining human-readable, this should work. You can also sort the keys alphabetically, using:
print json.dumps(eventdict, indent=4, sort_keys=True)
The following python functions can be used to create a nested dictionary from the flat dictionary. Just pass in the html form output to decode().
def get_key_name(str):
first_pos = str.find('[')
return str[:first_pos]
def get_subkey_name(str):
'''Used with lists of dictionaries only'''
first_pos = str.rfind('[')
last_pos = str.rfind(']')
return str[first_pos:last_pos+1]
def get_key_index(str):
first_pos = str.find('[')
last_pos = str.find(']')
return str[first_pos:last_pos+1]
def decode(idic):
odic = {} # Initialise an empty dictionary
# Scan all the top level keys
for key in idic:
# Nested entries have [] in their key
if '[' in key and ']' in key:
if key.rfind('[') == key.find('[') and key.rfind(']') == key.find(']'):
print key, 'is a nested list'
key_name = get_key_name(key)
key_index = int(get_key_index(key).replace('[','',1).replace(']','',1))
# Append can't be used because we may not get the list in the correct order.
try:
odic[key_name][key_index] = idic[key][0]
except KeyError: # List doesn't yet exist
odic[key_name] = [None] * (key_index + 1)
odic[key_name][key_index] = idic[key][0]
except IndexError: # List is too short
odic[key_name] = odic[key_name] + ([None] * (key_index - len(odic[key_name]) + 1 ))
# TO DO: This could be a function
odic[key_name][key_index] = idic[key][0]
else:
key_name = get_key_name(key)
key_index = int(get_key_index(key).replace('[','',1).replace(']','',1))
subkey_name = get_subkey_name(key).replace('[','',1).replace(']','',1)
try:
odic[key_name][key_index][subkey_name] = idic[key][0]
except KeyError: # Dictionary doesn't yet exist
print "KeyError"
# The dictionaries must not be bound to the same object
odic[key_name] = [{} for _ in range(key_index+1)]
odic[key_name][key_index][subkey_name] = idic[key][0]
except IndexError: # List is too short
# The dictionaries must not be bound to the same object
odic[key_name] = odic[key_name] + [{} for _ in range(key_index - len(odic[key_name]) + 1)]
odic[key_name][key_index][subkey_name] = idic[key][0]
else:
# This can be added to the output dictionary directly
print key, 'is a simple key value pair'
odic[key] = idic[key][0]
return odic

Categories

Resources