create a dict by spliting a string in an ordered dict - python

I have an ordered dict that represent field definition ie Name, type, width, precision
it looks like this:
OrderedDict([(u'CODE_MUN', 'str:8'), (u'CODE_DR_AN', 'str:8'),
(u'AREA', 'float:31.2'), (u'PERIMETER', 'float:31.4')])
I would like to create a dict for each item that would be like this:
{'name' : 'CODE_MUN', 'type': 'str', 'width': 8, 'precision':0} for fields without precision
and
{'name' : 'AREA', 'type': 'float', 'width': 31, 'precision':2 } for fiels with precision
for keys, values in fieldsDict.iteritems():
dict = {}
dict['name'] = keys
props = re.split(':.', values)
dict['type'] = props[0]
dict['width'] = props[1]
dict['precision'] = props[2]
of course I have index error when there is no precision defined. What would be the best way to achieve that?

Use a try-except block.
for keys, values in fieldsDict.iteritems():
dict = {}
dict['name'] = keys
props = re.split(':.', values)
dict['type'] = props[0]
dict['width'] = props[1]
try:
dict['precision'] = props[2]
except IndexError:
dict['precision'] = 0
You could also test for length using an if-else block. The methods are pretty close and I doubt this is a situation where it really matters, but for more on asking forgiveness vs permission you can see this question.

You have to check precision is there or not.
from collections import OrderedDict
import re
fieldsDict = OrderedDict([(u'CODE_MUN', 'str:8'), (u'CODE_DR_AN', 'str:8'),
(u'AREA', 'float:31.2'), (u'PERIMETER', 'float:31.4')])
for keys, values in fieldsDict.iteritems():
dict = {}
dict['name'] = keys
props = re.split(':.', values)
dict['type'] = props[0]
dict['width'] = props[1]
if len(props) == 3:
dict['precision'] = props[2]
else:
dict['precision'] = 0
print dict
This might be help

Related

key error when trying to split two entries in a dictionary in python

i have a dictionary with entries that have the ip and ports displayed like this
{'source': '192.168.4.1:80', 'destination': '168.20.10.1:443'}
but i want it to display it like
{'src_ip': '192.168.4.1', 'src_port': 80, 'dest_ip': '168.20.10.1', 'dest_port': 443}
so i want to split the first two entries into 4 new ones and delete the two old ones.
my code currently looks like this:
log entry = {'source': '192.168.4.1:80', 'destination': '168.20.10.1:443'}
def split_ip_port(log_entry):
u_source = log_entry['source']
if ':' in u_source:
src_list = u_source.split(':')
src_ip = src_list[0]
src_port = src_list[1]
log_entry.update({'src_ip': src_ip})
log_entry.update({'src_port': src_port})
del log_entry['source']
u_dest = log_entry['destination']
if ':' in u_dest:
dest_list = u_dest.split(':')
dest_ip = dest_list[0]
dest_port = dest_list[1]
print(dest_list)
log_entry.update({'dest_ip': dest_ip})
log_entry.update({'dest_port': dest_port})
del log_entry['destination']
return log_entry
when i try to test the source it gives me keyerror :'destination' and when i try to test the destination it gives me keyerror source. what is happening here?
When you split value (e.g., log_entry['source'].split(":") ) it returns list ['192.168.4.1','80']. Then you have to return value by index from list, [0] index in list is '192.168.4.1'. Then you have to assign it to new key in your dict, log_entry['src_ip']
log_entry['src_ip'] = log_entry['source'].split(":")[0]
log_entry['src_port'] = log_entry['source'].split(":")[1]
log_entry['dest_ip'] = log_entry['destination'].split(":")[0]
log_entry['dest_port'] = log_entry['destination'].split(":")[1]
del log_entry['source']
del log_entry['destination']
Since the original code work. Here just an offer to simplify the original code - you could try to split the source/destination and ports then just create a new dictionary like this way:
orig_dc = {'source': '192.168.4.1:80', 'destination': '168.20.10.1:443'}
new_dc = {}
for k, v in orig_dc.items():
orig, port = v.split(':')
if k in 'source':
new_dc.setdefault('src_ip', orig)
new_dc.setdefault('src_port', int(port))
else:
new_dc.setdefault('dest_ip', orig)
new_dc.setdefault('dest_port', int(port))
expected = { 'src_ip': '192.168.4.1', 'src_port': 80,
'dest_ip': '168.20.10.1', 'dest_port': 443}
assert new_dc == expected

Nested and escaped JSON payload to flattened dictionary - python

I'm looking for any suggestions to resolve an issue I'm facing. It might seem as a simple problem, but after a few days trying to find an answer - I think it is not anymore.
I'm receiving data (StringType) in a following JSON-like format, and there is a requirement to turn it into flat key-value pair dictionary. Here is a payload sample:
s = """{"status": "active", "name": "{\"first\": \"John\", \"last\": \"Smith\"}", "street_address": "100 \"Y\" Street"}"""
and the desired output should look like this:
{'status': 'active', 'name_first': 'John', 'name_last': 'Smith', 'street_address': '100 "Y" Street'}
The issue is I can't find a way to turn original string (s) into a dictionary. If I can achieve that the flattening part is working perfectly fine.
import json
import collections
import ast
#############################################################
# Flatten complex structure into a flat dictionary
#############################################################
def flatten_dictionary(dictionary, parent_key=False, separator='_', value_to_str=True):
"""
Turn a nested complex json into a flattened dictionary
:param dictionary: The dictionary to flatten
:param parent_key: The string to prepend to dictionary's keys
:param separator: The string used to separate flattened keys
:param value_to_str: Force all returned values to string type
:return: A flattened dictionary
"""
items = []
for key, value in dictionary.items():
new_key = str(parent_key) + separator + key if parent_key else key
try:
value = json.loads(value)
except BaseException:
value = value
if isinstance(value, collections.MutableMapping):
if not value.items():
items.append((new_key,None))
else:
items.extend(flatten_dictionary(value, new_key, separator).items())
elif isinstance(value, list):
if len(value):
for k, v in enumerate(value):
items.extend(flatten_dictionary({str(k): (str(v) if value_to_str else v)}, new_key).items())
else:
items.append((new_key,None))
else:
items.append((new_key, (str(value) if value_to_str else value)))
return dict(items)
# Data sample; sting and dictionary
s = """{"status": "active", "name": "{\"first\": \"John\", \"last\": \"Smith\"}", "street_address": "100 \"Y\" Street"}"""
d = {"status": "active", "name": "{\"first\": \"John\", \"last\": \"Smith\"}", "street_address": "100 \"Y\" Street"}
# Works for dictionary type
print(flatten_dictionary(d))
# Doesn't work for string type, for any of the below methods
e = eval(s)
# a = ast.literal_eval(s)
# j = json.loads(s)
Try:
import json
import re
def jsonify(s):
s = s.replace('"{','{').replace('}"','}')
s = re.sub(r'street_address":\s+"(.+)"(.+)"(.+)"', r'street_address": "\1\2\3"',s)
return json.loads(s)
If you must keep the quotes around Y, try:
def jsonify(s):
s = s.replace('"{','{').replace('}"','}')
search = re.search(r'street_address":\s+"(.+)"(.+)"(.+)"',s)
if search:
s = re.sub(r'street_address":\s+"(.+)"(.+)"(.+)"', r'street_address": "\1\2\3"',s)
dict_version = json.loads(s)
dict_version['street_address'] = dict_version['street_address'].replace(search.group(2),'"'+search.group(2)+'"')
return dict_version
A more generalized attempt:
def jsonify(s):
pattern = r'(?<=[,}])\s*"(.[^\{\}:,]+?)":\s+"([^\{\}:,]+?)"([^\{\}:,]+?)"([^\{\}:,]+?)"([,\}])'
s = s.replace('"{','{').replace('}"','}')
search = re.search(pattern,s)
matches = []
if search:
matches = re.findall(pattern,s)
s = re.sub(pattern, r'"\1": "\2\3\4"\5',s)
dict_version = json.loads(s)
for match in matches:
dict_version[match[0]] = dict_version[match[0]].replace(match[2],'"'+match[2]+'"')
return dict_version

List comprehension with string split

The following code takes two inputs, item_name and sale_type. It will look through item_dict to find if any keys that contain part of the item_name or fully matches it and output the values.
I am trying to convert the following for loop into a list comprehension. I am fairly comfortable with the basic list comprehensions but in this case I require the text to be split and obtain the relevant results. I am not sure if what I am asking for is possible.
item_name = "GalaxyDevices"
sale_type = "buy"
item_dict = {"buy_Galaxy": [11111, 2232], "sell_Galaxy": [2111]}
results = []
for key, value in item_dict.items():
key = key.split("_")
if key[0] != sale_type:
continue
if key[1] in item_name:
results.extend(value)
print(results)
input / output:
item_name = "GalaxyDevices"
sale_type = "buy"
>>> [11111, 2232]
My failed attempt:
results = [value.split("_") for key, value in item_dict.items()]
Many thanks!
I think this should work:
results = [
value
for key, values in item_dict.items()
if (key.split('_')[0] == sale_type
and key.split('_')[1] in item_name)
for value in values
]
Use nested list comprehension and set to determine if any has intersection.
This however requires 3 rules:
Name has some kind of separator - CamelCase separation requires re.
Part of each name should not match any in dict. i.e. Hello_world matching Hello_buy and world_buy at same time for best result.
For one-liner:
name = input("name >> ")
type_ = input("transaction type >> ")
results = [item_dict[key] for key in [key_ for key_ in item_dict.keys() if type_ in key_] if set(name.split()) & set(key.split("_"))]
Explanation
results = [item_dict[key] for key in
[key_ for key_ in item_dict.keys() if type_ in key_]
if set(name.split()) & set(key.split("_"))]
Above equals to:
intersecting_words = lambda name_, keys_: set(name) & set(keys_)
key_matching_part = [key for key in item_dict.keys() if intersecting_words(name, key.split("_"))]
results = [item_dict[key] for key in key_matching_part]
Full test:
item_dict = {"Guido_buy": 100, "Guido_sell": -100,
"Ramalho_buy": 200, "Ramalho_sell": -200}
name = "Guido van Rossum"
type_ = "buy"
results = [item_dict[key] for key in [key_ for key_ in item_dict.keys() if type_ in key_] if set(name.split()) & set(key.split("_"))]
print(results)
name = "Luciano Ramalho"
type_ = "sell"
results = [item_dict[key] for key in [key_ for key_ in item_dict.keys() if type_ in key_] if set(name.split()) & set(key.split("_"))]
print(results)
Output:
[100]
[-200]

Python Methods Refactoring for Filtering Dictionaries

I have a list of Dictionaries where the Dictionaries have the following structure:
{
"subject" : "subjectValue",
"object" : "objectValue",
"prediction" : "predictionValue"
}
Currently i have three Methods to filter all Dictionaries from the list for each of the fields like this:
def getSubjects(value, objectValue, predictionValue):
return [stmt["subject"] for stmt in value if stmt["object"] == objectValue and stmt["prediction"] == predicitonValue]
def getObjects(value, subjectValue, predictionValue):
return [stmt["object"] for stmt in value if stmt["subject"] == subjectValue and stmt["prediction"] == predicitonValue]
def getPredictions(value, objectValue, subjectValue):
return [stmt["prediction"] for stmt in value if stmt["object"] == objectValue and stmt["subject"] == subjectValue]
I also have the following three methods to just get one of the Dictionaries out of the list:
def getSubject(value, objectValue, predictionValue):
return next(stmt["subject"] for stmt in value if if stmt["object"] == objectValue and stmt["prediction"] == predicitonValue)
def getObject(value, subjectValue, predictionValue):
return next(stmt["object"] for stmt in value if stmt["subject"] == subjectValue and stmt["prediction"] == predicitonValue)
def getPrediction(value, objectValue, subjectValue):
return next(stmt["prediction"] for stmt in value if stmt["object"] == objectValue and stmt["subject"] == subjectValue)
Is there a better way to achive this? Maybe in one single Method or in a more pythonic way?
You can get rid of the next() functions by always returning an iterator instead of a list. Then you can either call next() or list() on the returned value. To generalize, you could require that the function is called with keyword arguments representing the keys you want to match along with the key you want to return. The signature would look like:
def pluckFilter(ds, key, **filters):
And you could call it like:
pluckFilter(listOfDicts, 'object', subject = 'subjectValue2', prediction = 'predictionValue' )
This would allow a pretty simple implementation (although you might want to validate the input):
listOfDicts = [{
"subject" : "subjectValue",
"object" : "objectValue",
"prediction" : "predictionValue"
},
{
"subject" : "subjectValue2",
"object" : "objectValue",
"prediction" : "predictionValue"
}
]
def pluckFilter(ds, key, **filters):
filtered = filter(lambda d: all(d[filt] == filters[filt] for filt in filters), ds)
return map(lambda d: d[key], filtered)
list(pluckFilter(listOfDicts, 'subject', object = 'objectValue', prediction = 'predictionValue' ))
# ['subjectValue', 'subjectValue2']
next(pluckFilter(listOfDicts, 'subject', object = 'objectValue', prediction = 'predictionValue' ))
# 'subjectValue'
list(pluckFilter(listOfDicts, 'object', subject = 'subjectValue2', prediction = 'predictionValue' ))
# ['objectValue']
I'd recommend using a SQLite3 database or a Pandas Data Frame. SQLite3 tutorial
import sqlite3
conn = sqlite3.connect("database.db")
c = conn.cursor()
c.execute("CREATE TABLE IF NOT EXISTS my_table(subject REAL, object REAL, prediction REAL)")
c.execute("INSERT INTO my_table(subject, object, prediction) VALUES (?, ?, ?)",
(10, 20, 30))
c.execute("SELECT * FROM my_table")
data = c.fetchall()
for row in data:
print(row)
c.close()
conn.close()
Why won't you use pandas?
You can convert the list of dictionaries into pandas DataFrame. And then use convenient filtering to get the same values as the functions you have:
df = pd.DataFrame(list_of_dict, index=range(len(list_of_dict)))
e.g. analogon of getSubject:
subjects = subjects = df.loc[(df["object"] == 'objectValue') & (df["prediction"] == 'predictionValue'), 'subject']

How can I sort a list of dicts arbitrarily by a particular key?

How can I sort my list of dicts by their name values according to an arbitrary ordering? I want dicts with a name of 720p to come first, then dicts with a name of 1080p, and finally dicts with a name of 360p.
hosters = []
for entry in json.loads(aResult[1][0]):
if 'file' not in entry or 'label' not in entry: continue
sLabel = sName + ' - ' + entry['label'].encode('utf-8')
hoster = dict()
hoster['link'] = entry['file']
hoster['name'] = sLabel
hoster['resolveable'] = True
hosters.append(hoster)
You're going to need to use a custom sorting function. Something like this should work:
def sort_by_resolution(hoster):
desired_order = ['720p', '1080p', '360p']
if hoster['name'] in desired_order:
return desired_order.index(hoster['name'])
else:
return len(desired_order)
sorted(foo, key=sort_by_resolution)
# [{'name': '720p'}, {'name': '1080p'}, {'name': '360p'}]

Categories

Resources