Due to different names of an attribute I need to match a key of a key value pare against a regex.
The possible names are defined in a dict:
MyAttr = [
('ref_nr', 'Reference|Referenz|Referenz-Nr|Referenznummer'),
('color', 'Color|color|tinta|farbe|Farbe'),
]
The import attributes from an item in another dict:
ImportAttr = [
('Referenz', 'Ref-Val'),
('color', 'red'),
]
Now I would like to return the value of the import attributes, if it is a known attribute (defined in my first dict MyAttr) matching different spelling of the attribute in question.
for key, value in ImportAttr:
if key == "Referenz-Nr" : ref = value
if key == "Farbe" : color = value
The goal is to return the value of a possible attribute if it is a known one.
print(ref)
print(color)
Should return the value if "Referenz-Nr" and "Farbe" are known attributes.
Obviously this pseudo code does not work, I just can't get my head around a function implementing regex for a key search.
It was not clear for me but maybe you want it:
#!/usr/bin/python3
MyAttr = [
('ref_nr', 'Reference|Referenz|Referenz-Nr|Referenznummer'),
('color', 'Color|color|tinta|farbe|Farbe')
]
ImportAttr = [
('Referenz', 'Ref-Val'),
('color', 'red'),
]
ref, color = None, None
for key, value in ImportAttr:
if key in MyAttr[0][1].split('|'):
ref = value
if key in MyAttr[1][1].split('|'):
color = value
print("ref: ", ref)
print("color: ", color)
The split can split the string into a list of string by the separator ("|" character here) then you can check is the key in that list or not.
The following solution is a little bit tricky. If you don't want to hardcode the positions into your source you can use locals().
#!/usr/bin/python3
MyAttr = [
('ref', 'Reference|Referenz|Referenz-Nr|Referenznummer'),
('color', 'Color|color|tinta|farbe|Farbe')
]
ImportAttr = [
('Referenz', 'Ref-Val'),
('color', 'red'),
]
ref, color = None, None
for var, names in MyAttr:
for key, value in ImportAttr:
if key in names.split('|'):
locals()[var] = value
break
print("ref: ", ref)
print("color: ", color)
If you want, you can also use pandas to solve this problem for the large data sets in this way.
get_references_and_colors.py
import pandas as pd
import re
import json
def get_references_and_colors(lookups, attrs):
responses = []
refs = pd.Series(re.split(r"\|", lookups[0][0]))
colors = pd.Series(re.split(r"\|", lookups[1][0]))
d = {"ref": refs, "color": colors}
df = pd.DataFrame(d).fillna('') # To drop NaN entries, in case if refs
# & colors are not of same length
# ref color
# 0 Reference Color
# 1 Referenz color
# 2 Referenz-Nr tinta
# 3 Referenznummer farbe
# 4 Farbe
for key, value in attrs:
response = {}
response["for_attr"] = key
df2 = df.loc[df["ref"] == key]; # find in 'ref' column
if not df2.empty:
response["ref"] = value
else:
df3 = df.loc[df["color"] == key]; # find in 'color' column
if not df3.empty:
response["color"] = value
else:
response["color"] = None # Not Available
response["ref"] = None
responses.append(response)
return responses
if __name__ == "__main__":
LOOKUPS = [
('Reference|Referenz|Referenz-Nr|Referenznummer', 'a'),
('Color|color|tinta|farbe|Farbe', 'b'),
]
ATTR = [
('Referenz', 'Ref-Val'),
('color', 'red'),
('color2', 'orange'), # improper
('tinta', 'Tinta-col')
]
responses = get_references_and_colors(LOOKUPS, ATTR) # dictionary
pretty_response = json.dumps(responses, indent=4) # for pretty printing
print(pretty_response)
Output
[
{
"for_attr": "Referenz",
"ref": "Ref-Val"
},
{
"for_attr": "color",
"color": "red"
},
{
"for_attr": "color2",
"color": null,
"ref": null
},
{
"for_attr": "tinta",
"color": "Tinta-col"
}
]
Related
I would like to pass a dataframe to #pytest.mark.parametrize. The dataframes are stored on conftest.py. The unit tests that do not use #pytest.mark.parametrize that reference the dataframes successfully execute.
However, when I apply #pytest.mark.parameterize, the code returns TypeError: 'function' object is not subscriptable
The dataframes are developed as functions in conftest.py. For example:
#pytest.fixture(scope="module")
def df_vartypes():
data = {
"Name": ["tom", "nick", "krish", "jack"],
"City": ["London", "Manchester", "Liverpool", "Bristol"],
"Age": [20, 21, 19, 18],
"Marks": [0.9, 0.8, 0.7, 0.6],
"dob": pd.date_range("2020-02-24", periods=4, freq="T"),
}
df = pd.DataFrame(data)
return df
The unit tests:
_cat_num_vars = [
(df_enc, "var_A", ["var_A"], []),
(df_enc_numeric, "var_B", [], ["var_B"]),
# TODO: Datetime test
(df_vartypes, None, ["Name", "City"], ["Age", "Marks"]),
(df_enc_numeric, None, [], ["var_A", "var_B", "target"])
]
#pytest.mark.parametrize(
"_df, _variables, _categorical_vars, _numerical_vars", _cat_num_vars
)
def test_find_categorical_and_numeric_vars_pass_diff_var_permutations(
_df, _variables, _categorical_vars, _numerical_vars
):
assert (_find_categorical_and_numerical_variables(
_df, _variables) == (_categorical_vars, _numerical_vars)
)
Traceback:
X = <function df_vartypes at 0x7fa8a1647310>, variables = None
def _find_categorical_and_numerical_variables(
X: pd.DataFrame, variables: Variables = None
) -> Tuple[List[Union[str, int]], List[Union[str, int]]]:
"""
Find numerical and categorical variables.
Parameters
----------
X : pandas DataFrame
variables : List of variables. Defaults to None.
Returns
-------
variables : Tuple with List of numerical and list of categorical variables.
"""
# If the user passes just 1 variable outside a list.
if isinstance(variables, (str, int)):
if is_categorical(X[variables]) or is_object(X[variables]):
variables_cat = [variables]
variables_num = []
elif is_numeric(X[variables]):
variables_num = [variables]
variables_cat = []
else:
raise TypeError("The variable entered is neither numerical "
"nor categorical.")
# If user leaves default None parameter.
elif variables is None:
# find categorical variables
if variables is None:
variables_cat = [
column
> for column in X.select_dtypes(include=["O", "category"]).columns
if _is_categorical_and_is_not_datetime(X[column])
]
E AttributeError: 'function' object has no attribute 'select_dtypes'
feature_engine/variable_manipulation.py:321: AttributeError
I have a json config, based on user input, need to filter out the config and get only specific section. I tried running the code mentioned below, it returns the partially expected results.
Config:
superset_config = """
[ {
"Area":"Texas",
"Fruits": {
"RED": {
"Apple":["val1"],
"Grapes":["green"]
},
"YELLOW": {"key2":["val2"]}
}
},
{
"Area":"Dallas",
"Fruits": {
"GREEN": { "key3": ["val3"]}
}
}
]
"""
User Input:
inputs = ['Apple'] # input list
Code:
import json
derived_config = []
for each_src in json.loads(superset_config):
temp = {}
for src_keys in each_src:
if src_keys=='Fruits':
temp_inner ={}
for key,value in each_src[src_keys].items():
metrics = {key_inner:value_inner for key_inner,value_inner in value.items() if key_inner in inputs}
temp_inner[key]=metrics
temp[src_keys] = temp_inner
else:
temp[src_keys] = each_src[src_keys]
derived_config.append(temp)
what do I get from above code:
derived_config= [
{'Area': 'Texas',
'Fruits': {'RED': {'Apple': 'val1'},
'YELLOW': {}
}
},
{'Area': 'Dallas',
'Fruits': {'GREEN': {}
}
}
]
what is needed: I need below results
derived_config= [
{'Area': 'Texas',
'Fruits': {'RED': {'Apple': 'val1'}
}
}
]
can anyone please help? thanks.
Maybe something like this:
import json
inputs = ['Apple'] # input list
derived_config = []
for each_src in json.loads(superset_config):
filtered_fruits = {k: v for k, v in (each_src.get('Fruits') or {}).items()
if any(input_ in v for input_ in inputs)}
if filtered_fruits:
each_src['Fruits'] = filtered_fruits
derived_config.append(each_src)
print(derived_config)
Edit: Based on the comments, it looks like you might want to filter the inner Fruits map based on the input list of fruits as well. In that case, we don't need to use the any function as above.
There is also an unintentional risk that we might mutate the original source config. For example, if you save the result of json.loads(superset_config) to a variable and then try to filter multiple fruits from it, likely it'll mutate the original config object. If you are directly calling jsons.load each time, then you don't need to worry about mutating the object; however you need to be aware that due to list and dict being mutable types in Python, this can be a concern to us.
The solution below does a good job of eliminating a possibility of mutating the original source object. But again, if you are calling jsons.load each time anyway, then you don't need to worry about this and you are free to modify the original config object.
import json
# Note: If you are using Python 3.9+, you can just use the standard collections
# for `dict` and `list`, as they now support parameterized values.
from typing import Dict, Any, List
# The inferred type of the 'Fruits' key in the superset config.
# This is a mapping of fruit color to a `FruitMap`.
Fruits = Dict[str, 'FruitMap']
FruitMap = Dict[str, Any]
# The inferred type of the superset config.
Config = List[Dict[str, Any]]
def get_fruits_config(src_config: Config, fruit_names: List[str]) -> Config:
"""
Returns the specified fruit section(s) from the superset config.
"""
fruits_config: Config = []
final_src: Dict
for each_src in src_config:
fruits: Fruits = each_src.get('Fruits') or {}
final_fruits: Fruits = {}
for fruit_color, fruit_map in fruits.items():
desired_fruits = {fruit: val for fruit, val in fruit_map.items()
if fruit in fruit_names}
if desired_fruits:
final_fruits[fruit_color] = desired_fruits
if final_fruits:
final_src = each_src.copy()
final_src['Fruits'] = final_fruits
fruits_config.append(final_src)
return fruits_config
Usage:
inputs = ['Apple'] # input list
config = json.loads(superset_config)
derived_config = get_fruits_config(config, inputs)
print(derived_config)
# prints:
# [{'Area': 'Texas', 'Fruits': {'RED': {'Apple': ['val1']}}}]
I have an issue about how to customize OrderedDict format and convert them into a json or dictionary format(but be able to reset the key names and the structure). I have the data below:
result= OrderedDict([('index', 'cfs_fsd_00001'),
('host', 'GIISSP707'),
('source', 'D:\\usrLLSS_SS'),
('_time', '2018-11-02 14:43:30.000 EDT'),
('count', '153')])
...However, I want to change the format like this:
{
"servarname": {
"index": "cfs_fsd_00001",
"host": "GIISSP707"
},
"times": '2018-11-02 14:43:30.000 EDT',
"metricTags": {
"source": 'D:\\ddevel.log'"
},
"metricName": "serverice count",
"metricValue": 153,
"metricType": "count"
}
I will be really appreciate your help. Basically the output I got is pretty flat. But I want to customize the structure. The original structure is
OrderedDict([('index', 'cfs_fsd_00001'),('host', 'GIISSP707').....]).
The output I want to achieve is {"servarname"{"index":"cfs_fsd_00001","host":"GIISSP707"},......
You can simply reference the result dict with the respective keys that you want your target data structure to have:
{
"servarname": {
"index": result['index'],
"host": result['host']
},
"times": result['_time'],
"metricTags": {
"source": result['source']
},
"metricName": "serverice count",
"metricValue": result['count'],
"metricType": "count"
}
No sure how flexible you need for your method. I assume you have a few common keys in your OrderedDict and you want to find the metric there, then reformat them into a new dict. Here is a short function which is implemented in python 3 and I hope it could help.
from collections import OrderedDict
import json
def reformat_ordered_dict(dict_result):
"""Reconstruct the OrderedDict result into specific format
This method assumes that your input OrderedDict has the following common keys: 'index',
'host', 'source', '_time', and a potential metric whcih is subject to change (of course
you can support more metrics with minor tweak of the code). The function also re-map the
keys (for example, mapping '_time' to 'times', pack 'index' and 'source' into 'servarname'
).
:param dict_result: the OrderedDict
:return: the reformated OrderedDict
"""
common_keys = ('index', 'host', 'source', '_time')
assert all(common_key in dict_result for common_key in common_keys), (
'You have to provide all the commen keys!')
# write common keys
reformated = OrderedDict()
reformated["servarname"] = OrderedDict([
("index", dict_result['index']),
("host", dict_result['host'])
])
reformated["times"] = dict_result['_time']
reformated["metricTags"] = {"source": dict_result['source']}
# write metric
metric = None
for key in dict_result.keys():
if key not in common_keys:
metric = key
break
assert metric is not None, 'Cannot find metric in the OrderedDict!'
# don't know where you get this value. But you can customize it if needed
# for exampe if the metric name is needed here
reformated['metricName'] = "serverice count"
reformated['metricValue'] = dict_result[metric]
reformated['metricType'] = metric
return reformated
if __name__ == '__main__':
result= OrderedDict([('index', 'cfs_fsd_00001'),
('host', 'GIISSP707'),
('source', 'D:\\usrLLSS_SS'),
('_time', '2018-11-02 14:43:30.000 EDT'),
('count', '153')])
reformated = reformat_ordered_dict(result)
print(json.dumps(reformated))
I'm new to python as was wondering how I could get the estimatedWait and routeName from this string.
{
"lastUpdated": "07:52",
"filterOut": [],
"arrivals": [
{
"routeId": "B16",
"routeName": "B16",
"destination": "Kidbrooke",
"estimatedWait": "due",
"scheduledTime": "06: 53",
"isRealTime": true,
"isCancelled": false
},
{
"routeId":"B13",
"routeName":"B13",
"destination":"New Eltham",
"estimatedWait":"29 min",
"scheduledTime":"07:38",
"isRealTime":true,
"isCancelled":false
}
],
"serviceDisruptions":{
"infoMessages":[],
"importantMessages":[],
"criticalMessages":[]
}
}
And then save this to another string which would be displayed on the lxterminal of the raspberry pi 2. I would like only the 'routeName' of B16 to be saved to the string. How do I do that?
You just have to deserialise the object and then use the index to access the data you want.
To find only the B16 entries you can filter the arrivals list.
import json
obj = json.loads(json_string)
# filter only the b16 objects
b16_objs = filter(lambda a: a['routeName'] == 'B16', obj['arrivals'])
if b16_objs:
# get the first item
b16 = b16_objs[0]
my_estimatedWait = b16['estimatedWait']
print(my_estimatedWait)
You can use string.find() to get the indices of those value identifiers
and extract them.
Example:
def get_vaules(string):
waitIndice = string.find('"estimatedWait":"')
routeIndice = string.find('"routeName":"')
estimatedWait = string[waitIndice:string.find('"', waitIndice)]
routeName = string[routeIndice:string.find('"', routeIndice)]
return estimatedWait, routeName
Or you could just deserialize the json object (highly recommended)
import json
def get_values(string):
jsonData = json.loads(string)
estimatedWait = jsonData['arrivals'][0]['estimatedWait']
routeName = jsonData['arrivals'][0]['routeName']
return estimatedWait, routeName
Parsing values from a JSON file using Python?
Given the following data received from a web form:
for key in request.form.keys():
print key, request.form.getlist(key)
group_name [u'myGroup']
category [u'social group']
creation_date [u'03/07/2013']
notes [u'Here are some notes about the group']
members[0][name] [u'Adam']
members[0][location] [u'London']
members[0][dob] [u'01/01/1981']
members[1][name] [u'Bruce']
members[1][location] [u'Cardiff']
members[1][dob] [u'02/02/1982']
How can I turn it into a dictionary like this? It's eventually going to be used as JSON but as JSON and dictionaries are easily interchanged my goal is just to get to the following structure.
event = {
group_name : 'myGroup',
notes : 'Here are some notes about the group,
category : 'social group',
creation_date : '03/07/2013',
members : [
{
name : 'Adam',
location : 'London',
dob : '01/01/1981'
}
{
name : 'Bruce',
location : 'Cardiff',
dob : '02/02/1982'
}
]
}
Here's what I have managed so far. Using the following list comprehension I can easily make sense of the ordinary fields:
event = [ (key, request.form.getlist(key)[0]) for key in request.form.keys() if key[0:7] != "catches" ]
but I'm struggling with the members list. There can be any number of members. I think I need to separately create a list for them and add that to a dictionary with the non-iterative records. I can get the member data like this:
tmp_members = [(key, request.form.getlist(key)) for key in request.form.keys() if key[0:7]=="members"]
Then I can pull out the list index and field name:
member_arr = []
members_orig = [ (key, request.form.getlist(key)[0]) for key in request.form.keys() if key[0:7] ==
"members" ]
for i in members_orig:
p1 = i[0].index('[')
p2 = i[0].index(']')
members_index = i[0][p1+1:p2]
p1 = i[0].rfind('[')
members_field = i[0][p1+1:-1]
But how do I add this to my data structure. The following won't work because I could be trying to process members[1][name] before members[0][name].
members_arr[int(members_index)] = {members_field : i[1]}
This seems very convoluted. Is there a simper way of doing this, and if not how can I get this working?
You could store the data in a dictionary and then use the json library.
import json
json_data = json.dumps(dict)
print(json_data)
This will print a json string.
Check out the json library here
Yes, convert it to a dictionary, then use json.dumps(), with some optional parameters, to print out the JSON in the format you need:
eventdict = {
'group_name': 'myGroup',
'notes': 'Here are some notes about the group',
'category': 'social group',
'creation_date': '03/07/2013',
'members': [
{'name': 'Adam',
'location': 'London',
'dob': '01/01/1981'},
{'name': 'Bruce',
'location': 'Cardiff',
'dob': '02/02/1982'}
]
}
import json
print json.dumps(eventdict, indent=4)
The order of the key:value pairs is not always consistent, but if you're just looking for pretty-looking JSON that can be parsed by a script, while remaining human-readable, this should work. You can also sort the keys alphabetically, using:
print json.dumps(eventdict, indent=4, sort_keys=True)
The following python functions can be used to create a nested dictionary from the flat dictionary. Just pass in the html form output to decode().
def get_key_name(str):
first_pos = str.find('[')
return str[:first_pos]
def get_subkey_name(str):
'''Used with lists of dictionaries only'''
first_pos = str.rfind('[')
last_pos = str.rfind(']')
return str[first_pos:last_pos+1]
def get_key_index(str):
first_pos = str.find('[')
last_pos = str.find(']')
return str[first_pos:last_pos+1]
def decode(idic):
odic = {} # Initialise an empty dictionary
# Scan all the top level keys
for key in idic:
# Nested entries have [] in their key
if '[' in key and ']' in key:
if key.rfind('[') == key.find('[') and key.rfind(']') == key.find(']'):
print key, 'is a nested list'
key_name = get_key_name(key)
key_index = int(get_key_index(key).replace('[','',1).replace(']','',1))
# Append can't be used because we may not get the list in the correct order.
try:
odic[key_name][key_index] = idic[key][0]
except KeyError: # List doesn't yet exist
odic[key_name] = [None] * (key_index + 1)
odic[key_name][key_index] = idic[key][0]
except IndexError: # List is too short
odic[key_name] = odic[key_name] + ([None] * (key_index - len(odic[key_name]) + 1 ))
# TO DO: This could be a function
odic[key_name][key_index] = idic[key][0]
else:
key_name = get_key_name(key)
key_index = int(get_key_index(key).replace('[','',1).replace(']','',1))
subkey_name = get_subkey_name(key).replace('[','',1).replace(']','',1)
try:
odic[key_name][key_index][subkey_name] = idic[key][0]
except KeyError: # Dictionary doesn't yet exist
print "KeyError"
# The dictionaries must not be bound to the same object
odic[key_name] = [{} for _ in range(key_index+1)]
odic[key_name][key_index][subkey_name] = idic[key][0]
except IndexError: # List is too short
# The dictionaries must not be bound to the same object
odic[key_name] = odic[key_name] + [{} for _ in range(key_index - len(odic[key_name]) + 1)]
odic[key_name][key_index][subkey_name] = idic[key][0]
else:
# This can be added to the output dictionary directly
print key, 'is a simple key value pair'
odic[key] = idic[key][0]
return odic