zip Cuts off After First Letter

zip Cuts off After First Letter - python

I'm trying to run the following code:
def by_primary_key(table, key, fields) -> object:
key_columns = get_key_columns(table, key )
print("key columns in get by primary key " , key_columns)
print("key, " , key )
zip_it = list(zip(key_columns, key))
print("zip_it", zip_it )
dictt = dict(zip_it)
print("dict", dictt)
The output I want for zip_it is: [('playerID', 'willite01')]
but the output the program produces is:
key columns in get by primary key ['playerID']
key, willite01
zip_it [('playerID', 'w')]
dict {'playerID': 'w'}
Where am I going wrong?

The following worked
key_columns = get_key_columns(table, key )
lst = []
lst.append(key)
tmp = dict(zip(key_columns, lst))
result = find_by_template1(table, tmp, fields)
return result

Related

Nested and escaped JSON payload to flattened dictionary - python

I'm looking for any suggestions to resolve an issue I'm facing. It might seem as a simple problem, but after a few days trying to find an answer - I think it is not anymore.
I'm receiving data (StringType) in a following JSON-like format, and there is a requirement to turn it into flat key-value pair dictionary. Here is a payload sample:
s = """{"status": "active", "name": "{\"first\": \"John\", \"last\": \"Smith\"}", "street_address": "100 \"Y\" Street"}"""
and the desired output should look like this:
{'status': 'active', 'name_first': 'John', 'name_last': 'Smith', 'street_address': '100 "Y" Street'}
The issue is I can't find a way to turn original string (s) into a dictionary. If I can achieve that the flattening part is working perfectly fine.
import json
import collections
import ast
#############################################################
# Flatten complex structure into a flat dictionary
#############################################################
def flatten_dictionary(dictionary, parent_key=False, separator='_', value_to_str=True):
"""
Turn a nested complex json into a flattened dictionary
:param dictionary: The dictionary to flatten
:param parent_key: The string to prepend to dictionary's keys
:param separator: The string used to separate flattened keys
:param value_to_str: Force all returned values to string type
:return: A flattened dictionary
"""
items = []
for key, value in dictionary.items():
new_key = str(parent_key) + separator + key if parent_key else key
try:
value = json.loads(value)
except BaseException:
value = value
if isinstance(value, collections.MutableMapping):
if not value.items():
items.append((new_key,None))
else:
items.extend(flatten_dictionary(value, new_key, separator).items())
elif isinstance(value, list):
if len(value):
for k, v in enumerate(value):
items.extend(flatten_dictionary({str(k): (str(v) if value_to_str else v)}, new_key).items())
else:
items.append((new_key,None))
else:
items.append((new_key, (str(value) if value_to_str else value)))
return dict(items)
# Data sample; sting and dictionary
s = """{"status": "active", "name": "{\"first\": \"John\", \"last\": \"Smith\"}", "street_address": "100 \"Y\" Street"}"""
d = {"status": "active", "name": "{\"first\": \"John\", \"last\": \"Smith\"}", "street_address": "100 \"Y\" Street"}
# Works for dictionary type
print(flatten_dictionary(d))
# Doesn't work for string type, for any of the below methods
e = eval(s)
# a = ast.literal_eval(s)
# j = json.loads(s)

Try:
import json
import re
def jsonify(s):
s = s.replace('"{','{').replace('}"','}')
s = re.sub(r'street_address":\s+"(.+)"(.+)"(.+)"', r'street_address": "\1\2\3"',s)
return json.loads(s)
If you must keep the quotes around Y, try:
def jsonify(s):
s = s.replace('"{','{').replace('}"','}')
search = re.search(r'street_address":\s+"(.+)"(.+)"(.+)"',s)
if search:
s = re.sub(r'street_address":\s+"(.+)"(.+)"(.+)"', r'street_address": "\1\2\3"',s)
dict_version = json.loads(s)
dict_version['street_address'] = dict_version['street_address'].replace(search.group(2),'"'+search.group(2)+'"')
return dict_version
A more generalized attempt:
def jsonify(s):
pattern = r'(?<=[,}])\s*"(.[^\{\}:,]+?)":\s+"([^\{\}:,]+?)"([^\{\}:,]+?)"([^\{\}:,]+?)"([,\}])'
s = s.replace('"{','{').replace('}"','}')
search = re.search(pattern,s)
matches = []
if search:
matches = re.findall(pattern,s)
s = re.sub(pattern, r'"\1": "\2\3\4"\5',s)
dict_version = json.loads(s)
for match in matches:
dict_version[match[0]] = dict_version[match[0]].replace(match[2],'"'+match[2]+'"')
return dict_version

Performance problem when using pandas apply on big dataframes

Im having some performance issues with the code below, mostly because of the apply function that im using on a huge dataframe. I want to update the semi_dict dictionary with some other data that im calculating with the some functions. Is it any way to improve this?
def my_function_1(semi_dict, row):
#do some calculation/other stuff based on the row data and append it to the dictionary
random_dict = dict(data=some_data, more_data=more_data)
semi_dict["data"].append(random_dict)
def my_function_2(semi_dict, row):
#do some calculation/other stuff based on the row data and append it to the dictionary
random_dict = dict(data=some_data, more_data=more_data)
semi_dict["data2"].append(random_dict)
dictionary_list = []
for v in values:
df_1_rows = df_1_rows[(df_1_rows.values == v)]
df_2_rows = df_2_rows[(df_2_rows.values == v)]
semi_dict = dict(value=v, data=[], data2=[])
function = partial(my_function_1, semi_dict)
function_2 = partial(my_function_2, semi_dict)
df_1_rows.apply(lambda row : function(row), axis=1)
df_2_rows.apply(lambda row : function_2(row), axis=1)
dictionary_list.append(semi_dict)

This answer uses dictionary merge from How to merge dictionaries of dictionaries?, but depending on your use case, you might not need it in the end:
import pandas as pd
import random
len_df = 10
row_values = list("ABCD")
extra_col_values = list("12345")
df_1 = pd.DataFrame([[random.choice(row_values), random.choice(extra_col_values)] for _ in range(len_df)], columns=['col1', 'extra1'])
df_2 = pd.DataFrame([[random.choice(row_values), random.choice(extra_col_values)] for _ in range(len_df)], columns=['col2', 'extra2'])
def make_dict(df):
# some calculations on the df
return {
'data': df.head(1).values.tolist(),
}
def make_dict_2(df):
# some calculations on the df
return {
'data_2': df.head(1).values.tolist(),
}
def merge(a, b, path=None):
"merges b into a, taken from https://stackoverflow.com/questions/7204805/how-to-merge-dictionaries-of-dictionaries "
if path is None: path = []
for key in b:
if key in a:
if isinstance(a[key], dict) and isinstance(b[key], dict):
merge(a[key], b[key], path + [str(key)])
elif a[key] == b[key]:
pass # same leaf value
else:
raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
else:
a[key] = b[key]
return a
dict1 = df_1.groupby('col1').apply(make_dict).to_dict()
dict2 = df_2.groupby('col2').apply(make_dict_2).to_dict()
result = merge(dict1, dict2)
result

List comprehension with string split

The following code takes two inputs, item_name and sale_type. It will look through item_dict to find if any keys that contain part of the item_name or fully matches it and output the values.
I am trying to convert the following for loop into a list comprehension. I am fairly comfortable with the basic list comprehensions but in this case I require the text to be split and obtain the relevant results. I am not sure if what I am asking for is possible.
item_name = "GalaxyDevices"
sale_type = "buy"
item_dict = {"buy_Galaxy": [11111, 2232], "sell_Galaxy": [2111]}
results = []
for key, value in item_dict.items():
key = key.split("_")
if key[0] != sale_type:
continue
if key[1] in item_name:
results.extend(value)
print(results)
input / output:
item_name = "GalaxyDevices"
sale_type = "buy"
>>> [11111, 2232]
My failed attempt:
results = [value.split("_") for key, value in item_dict.items()]
Many thanks!

I think this should work:
results = [
value
for key, values in item_dict.items()
if (key.split('_')[0] == sale_type
and key.split('_')[1] in item_name)
for value in values
]

Use nested list comprehension and set to determine if any has intersection.
This however requires 3 rules:
Name has some kind of separator - CamelCase separation requires re.
Part of each name should not match any in dict. i.e. Hello_world matching Hello_buy and world_buy at same time for best result.
For one-liner:
name = input("name >> ")
type_ = input("transaction type >> ")
results = [item_dict[key] for key in [key_ for key_ in item_dict.keys() if type_ in key_] if set(name.split()) & set(key.split("_"))]
Explanation
results = [item_dict[key] for key in
[key_ for key_ in item_dict.keys() if type_ in key_]
if set(name.split()) & set(key.split("_"))]
Above equals to:
intersecting_words = lambda name_, keys_: set(name) & set(keys_)
key_matching_part = [key for key in item_dict.keys() if intersecting_words(name, key.split("_"))]
results = [item_dict[key] for key in key_matching_part]
Full test:
item_dict = {"Guido_buy": 100, "Guido_sell": -100,
"Ramalho_buy": 200, "Ramalho_sell": -200}
name = "Guido van Rossum"
type_ = "buy"
results = [item_dict[key] for key in [key_ for key_ in item_dict.keys() if type_ in key_] if set(name.split()) & set(key.split("_"))]
print(results)
name = "Luciano Ramalho"
type_ = "sell"
results = [item_dict[key] for key in [key_ for key_ in item_dict.keys() if type_ in key_] if set(name.split()) & set(key.split("_"))]
print(results)
Output:
[100]
[-200]

dictionary comprehension in Python

Is there a way to the follwing in dictionary comprehension?
bmcdsreg = {}
for key, val in bms.iteritems():
bmcdsreg[key] = {}
for reg in bmmaps.columns:
bmcdsreg[key][reg]= val*bmmaps[reg]
I have the following version where the keys are interchanged:
bmcdsreg = {reg: {key: val*bmmaps[reg] for key, val in bms.iteritems()}
for reg in bmmaps.columns}

Here is how:
bmcdsreg = {
key: {reg: val * bmmaps[reg] for reg in bmmaps.columns}
for key, val in bms.items()
}

python key dict error multilevel dict

I'm not entirely sure why im getting a dictionary key error. I'm trying to create a multi level dict with = sign and getting a key error on metrics, but not on the first two.
doc['timestamp']
and
doc['instance_id']
both work fine, but when it gets to metrics it gives me a metrics key error. I'm not entirely sure why.
doc = {}
doc['timestamp'] = datetime.now()
#doc['instance_id'] = get_cloud_app_name()
doc['instance_id'] = "MyMac"
cpu_dict_returned = get_cpu_info()
doc['metrics']['cpu_usage']['user_cpu'] = cpu_dict_returned['user_cpu']
doc['metrics']["cpu_usage"]['system_cpu'] = cpu_dict_returned['system_cpu']
doc['metrics']["cpu_usage"]['idle_cpu'] = cpu_dict_returned['idle_cpu']
doc['metrics']["cpu_usage"]['cpu_count'] = cpu_dict_returned['cpu_count']

You must create the sub-dictionnaries before using them:
doc = {}
doc['timestamp'] = datetime.now()
doc['instance_id'] = "MyMac"
cpu_dict_returned = get_cpu_info()
doc['metrics'] = {}
doc['metrics']['cpu_usage'] = {}
doc['metrics']['cpu_usage']['user_cpu'] = cpu_dict_returned['user_cpu']
doc['metrics']["cpu_usage"]['system_cpu'] = cpu_dict_returned['system_cpu']
doc['metrics']["cpu_usage"]['idle_cpu'] = cpu_dict_returned['idle_cpu']
doc['metrics']["cpu_usage"]['cpu_count'] = cpu_dict_returned['cpu_count']

You can do this more succinctly using a dictionary comprehension:
doc = {}
doc['timestamp'] = datetime.now()
doc['instance_id'] = "MyMac"
cpu_dict_returned = get_cpu_info()
doc['metrics'] = {
'cpu_usage':
{k: cpu_dict_returned.get(k)
for k in ['user_cpu', 'system_cpu', 'idle_cpu', 'cpu_count']}
}
Note that the sub dictionary cpu_usage is first created, and then the nested dictionary is inserted.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

zip Cuts off After First Letter - python

The following worked key_columns = get_key_columns(table, key ) lst = [] lst.append(key) tmp = dict(zip(key_columns, lst)) result = find_by_template1(table, tmp, fields) return result

Related

Nested and escaped JSON payload to flattened dictionary - python

Performance problem when using pandas apply on big dataframes

List comprehension with string split

dictionary comprehension in Python

python key dict error multilevel dict

Categories

Resources