Get data from the dictionary by parameters from a variable in Python - python

I have dicts with
I get a dictionary from the database with about this structure. And also the path to the desired field of the form - ['extra_fields']['period'] - as a string. Can you please tell me how to turn this string into the path to the data in the dictionary, i.e. into this piece of code in python clt['extra_fields']['period'] or clt.get('extra_fields').get('period'). I'm sitting here thinking and can't figure out the best way to do it? The line with the path to the data can be anything, not just as in the example (so hardcoded is not an option)
clt = {
"first_name": "test",
"last_name": "test",
"phone": "123",
"client_id": 123,
"email": "test#mail.ru",
"father_name": "",
"source": "test",
"extra_fields": {
"amount": 50000,
"period": 90,
"utm_source": "TEST_SOURCE",
"utm_campaign": "TEST_CAMPAIGN",
"keyword": "TEST_CREATIVE_ID"
}
}

lets say path is correct so you can use it with eval or you can parse the keys and then using those parsed keys you can access the data.
example with eval is as, (considering dictionary access path is correct
>>> clt = {
... "first_name": "test",
... "last_name": "test",
... "phone": "123",
... "client_id": 123,
... "email": "test#mail.ru",
... "father_name": "",
... "source": "test",
... "extra_fields": {
... "amount": 50000,
... "period": 90,
... "utm_source": "TEST_SOURCE",
... "utm_campaign": "TEST_CAMPAIGN",
... "keyword": "TEST_CREATIVE_ID"
... }
... }
>>>
>>> path = "['extra_fields']['period']"
>>>
>>> data = eval(f"clt{path}")
>>> data
90
>>>

You can convert the path to a list of paths using regex, and get the value using recursion
def get_value(_clt, path):
value = _clt.get(path[0])
if type(value) is dict:
return get_value(value, path[1:])
return value
path = "['extra_fields']['period']"
path = re.findall(r'\[(.*?)]', path.replace("'", ''))
print(get_value(clt, path)) # 90
path = "['last_name']"
path = re.findall(...)
print(get_value(clt, path)) # test

Related

Use a nested dict (or json) with django-environ?

I've got a nested dict format that I wanted to setup in an environment.
It looks like this:
DEPARTMENTS_INFORMATION={
"Pants": {
"name": "Pants Department",
"email": "pants#department.com",
"ext": "x2121"
},
"Shirts": {
"name": "Shirt Department",
"email": "shirts#department.com",
"ext": "x5151"
},
"Socks": {
"name": "Sock Department",
"email": "socks#department.com",
"ext": " "
}
}
I am using django-environ for this and tried using it like this:
DEPARTMENTS = env.dict("DEPARTMENTS_INFORMATION", default={})
But it's giving me this error:
ValueError: dictionary update sequence element #0 has length 1; 2 is required
I'm not sure how to make the nested dictionary an environment variable - any help appreciated!
You can create 2 files:
file.env (you have to write the dict in one line)
DEPARTMENTS_INFORMATION={"Pants": {"name": "Pants Department","email": "pants#department.com","ext": "x2121"},"Shirts": {"name": "Shirt Department","email": "shirts#department.com","ext": "x5151"},"Socks": {"name": "Sock Department","email": "socks#department.com","ext": " "}}
main.py
import environ
#start the environ
env = environ.Env()
#load the env file
environ.Env.read_env("file.env")
#read the data
data = env.json("DEPARTMENTS_INFORMATION")
print(data)
Hope this helps.

copying data from json response [Python]

I have a scenario where I am trying to extract data from json response which is obtained from the GET request and then rebuilding the json data by changing some values and then sending a PUT request at same time after rebuilding the json data(i.e, after changing idter value)
below is the target json response.
target_json = {
"name": "toggapp",
"ts": [
1234,
3456
],
"gs": [
{
"id": 4491,
"con": "mno"
},
{
"id": 4494,
"con": "hkl"
}
],
"idter": 500,
"datapart": false
}
from the above json I am trying to change the idter value to my custom value and rebuild it into json data again and post the new json data.
Here is what I have tried :
headers = {'Authorization': 'bearer ' + auth_token, 'Content-Type':'application/json', 'Accept':'application/json'}
tesstid =[7865, 7536, 7789]
requiredbdy = []
for key in testid:
get_metadata_targetjson= requests.get('https://myapp.com/%s' %key, headers = headers)
metadata=get_metadata_target.json()
for key1 in metadata:
requiredbdy.append(
{
"metadata" : [{
"name": key1['name'],
"ts": key1['ts'],
"gs": key1[gs],
"idter": 100, #custom value which I want to change
"datapart": false
} ]
}
)
send_metadata_newjson= requests.put('https://myapp.com/%s' %key, headers = headers data = requiredbdy)
print(send_metadata_newjson.status_code)
Is this approach fine or How do I proceed in order to achieve this scenario.
You can use the built-in json module for this like so
import json
my_json = """
{
"name": "toggapp",
"ts": [
1234,
3456
],
"gs": [
{
"id": 4491,
"con": "mno"
},
{
"id": 4494,
"con": "hkl"
}
],
"idter": 500,
"datapart": false
}
"""
json_obj = json.loads(my_json)
json_obj['idter'] = 600
print(json.dumps(json_obj))
Prints
{"name": "toggapp", "ts": [1234, 3456], "gs": [{"id": 4491, "con": "mno"}, {"id": 4494, "con": "hkl"}], "idter": 600, "datapart": false}
There's this small script used it to find entries in some very long and unnerving JSONs. not very beautifull und badly documented but maybe helps in your scenario.
from RecursiveSearch import Retriever
def alter_data(json_data, key, original, newval):
'''
Alter *all* values of said keys
'''
retr = Retriever(json_data)
for item_no, item in enumerate(retr.__track__(key)): # i.e. all 'value'
# Pick parent objects with a last element False in the __track__() result,
# indicating that `key` is either a dict key or a set element
if not item[-1]:
parent = retr.get_parent(key, item_no)
try:
if parent[key] == original:
parent[key] = newval
except TypeError:
# It's a set, this is not the key you're looking for
pass
if __name__ == '__main__':
alter_data(notification, key='value',
original = '********** THIS SHOULD BE UPDATED **********',
newval = '*UPDATED*')

how to extract specific data from json and put in to csv using python

I have a JSON which is in nested form. I would like to extract specific data from json and put into csv using pandas python.
data = {
"class":"hudson.model.Hudson",
"jobs":[
{
"_class":"hudson.model.FreeStyleProject",
"name":"git_checkout",
"url":"http://localhost:8080/job/git_checkout/",
"builds":[
{
"_class":"hudson.model.FreeStyleBuild",
"duration":1201,
"number":6,
"result":"FAILURE",
"url":"http://localhost:8080/job/git_checkout/6/"
}
]
},
{
"_class":"hudson.model.FreeStyleProject",
"name":"output",
"url":"http://localhost:8080/job/output/",
"builds":[
]
},
{
"_class":"org.jenkinsci.plugins.workflow.job.WorkflowJob",
"name":"pipeline_test",
"url":"http://localhost:8080/job/pipeline_test/",
"builds":[
{
"_class":"org.jenkinsci.plugins.workflow.job.WorkflowRun",
"duration":9274,
"number":85,
"result":"SUCCESS",
"url":"http://localhost:8080/job/pipeline_test/85/"
},
{
"_class":"org.jenkinsci.plugins.workflow.job.WorkflowRun",
"duration":4251,
"number":84,
"result":"SUCCESS",
"url":"http://localhost:8080/job/pipeline_test/84/"
}
]
}
]
}
From the above JSON i want to fetch jobs name value and builds result value . I am new to python any help will be appreciated .
Till now i have tried
main_data = data['jobs]
json_normalize(main_data,['builds'],
record_prefix='jobs_', errors='ignore')
which gives information only build key values and not the name of job .
Can anyone help ?
Expected Output:
Considering only first build result value you can need to be in csv column you can achieve this using pandas.
data = {
"class": "hudson.model.Hudson",
"jobs": [
{
"_class": "hudson.model.FreeStyleProject",
"name": "git_checkout",
"url": "http://localhost:8080/job/git_checkout/",
"builds": [
{
"_class": "hudson.model.FreeStyleBuild",
"duration": 1201,
"number": 6,
"result": "FAILURE",
"url": "http://localhost:8080/job/git_checkout/6/"
}
]
},
{
"_class": "hudson.model.FreeStyleProject",
"name": "output",
"url": "http://localhost:8080/job/output/",
"builds": []
},
{
"_class": "org.jenkinsci.plugins.workflow.job.WorkflowJob",
"name": "pipeline_test",
"url": "http://localhost:8080/job/pipeline_test/",
"builds": [
{
"_class": "org.jenkinsci.plugins.workflow.job.WorkflowRun",
"duration": 9274,
"number": 85,
"result": "SUCCESS",
"url": "http://localhost:8080/job/pipeline_test/85/"
},
{
"_class": "org.jenkinsci.plugins.workflow.job.WorkflowRun",
"duration": 4251,
"number": 84,
"result": "SUCCESS",
"url": "http://localhost:8080/job/pipeline_test/84/"
}
]
}
]
}
main_data = data.get('jobs')
res = {'name':[], 'result':[]}
for name_dict in main_data:
res['name'].append(name_dict.get('name','NA'))
resultval = name_dict['builds'][0].get('result') if len(name_dict['builds'])>0 else 'NA'
res['result'].append(resultval)
print(res)
import pandas as pd
df = pd.DataFrame(res)
df.to_csv("/home/file_timer/jobs.csv", index=False)
Check the csv file output
name,result
git_checkout,FAILURE
output,NA
pipeline_test,SUCCESS
If 'NA' result want to skip then
main_data = data.get('jobs')
res = {'name':[], 'result':[]}
for name_dict in main_data:
if len(name_dict['builds'])==0:
continue
res['name'].append(name_dict.get('name', 'NA'))
resultval = name_dict['builds'][0].get('result')
res['result'].append(resultval)
print(res)
import pandas as pd
df = pd.DataFrame(res)
df.to_csv("/home/akash.pagar/shell_learning/file_timer/jobs.csv", index=False)
Output will bw like
name,result
git_checkout,FAILURE
pipeline_test,SUCCESS
Simply with build number,
for job in data.get('jobs'):
for build in job.get('builds'):
print(job.get('name'), build.get('number'), build.get('result'))
gives the result
git_checkout 6 FAILURE
pipeline_test 85 SUCCESS
pipeline_test 84 SUCCESS
If you want to get the result of latest build, and pretty sure about the build number always in decending order,
for job in data.get('jobs'):
if job.get('builds'):
print(job.get('name'), job.get('builds')[0].get('result'))
and if you are not sure the order,
for job in data.get('jobs'):
if job.get('builds'):
print(job.get('name'), sorted(job.get('builds'), key=lambda k: k.get('number'))[-1].get('result'))
then the result will be:
git_checkout FAILURE
pipeline_test SUCCESS
Assuming last build is the last element of its list and you don't care about jobs with no builds, this does:
import pandas as pd
#data = ... #same format as in the question
z = [(job["name"], job["builds"][-1]["result"]) for job in data["jobs"] if len(job["builds"])]
df = pd.DataFrame(data=z, columns=["name", "result"])
#df.to_csv #TODO
Also we don't necessarily need pandas to create the csv file.
You could do:
import csv
#z = ... #see previous code block
with open("f.csv", 'w') as fp:
csv.writer(fp).writerows([("name", "result")] + z)

Pull key from json file when values is known (groovy or python)

Is there any way to pull the key from JSON if the only thing I know is the value? (In groovy or python)
An example:
I know the "_number" value and I need a key.
So let's say, known _number is 2 and as an output, I should get dsf34f43f34f34f
{
"id": "8e37ecadf4908f79d58080e6ddbc",
"project": "some_project",
"branch": "master",
"current_revision": "3rtgfgdfg2fdsf",
"revisions": {
"43g5g534534rf34f43f": {
"_number": 3,
"created": "2019-04-16 09:03:07.459000000",
"uploader": {
"_account_id": 4
},
"description": "Rebase"
},
"dsf34f43f34f34f": {
"_number": 2,
"created": "2019-04-02 10:54:14.682000000",
"uploader": {
"_account_id": 2
},
"description": "Rebase"
}
}
}
With Groovy:
def json = new groovy.json.JsonSlurper().parse("x.json" as File)
println(json.revisions.findResult{ it.value._number==2 ? it.key : null })
// => dsf34f43f34f34f
Python 3: (assuming that data is saved in data.json):
import json
with open('data.json') as f:
json_data = json.load(f)
for rev, revdata in json_data['revisions'].items():
if revdata['_number'] == 2:
print(rev)
Prints all revs where _number equals 2.
using dict-comprehension:
print({k for k,v in d['revisions'].items() if v.get('_number') == 2})
OUTPUT:
{'dsf34f43f34f34f'}

Using .values() with list of dictionaries?

I'm comparing json files between two different API endpoints to see which json records need an update, which need a create and what needs a delete. So, by comparing the two json files, I want to end up with three json files, one for each operation.
The json at both endpoints is structured like this (but they use different keys for same sets of values; different problem):
{
"records": [{
"id": "id-value-here",
"c": {
"d": "eee"
},
"f": {
"l": "last",
"f": "first"
},
"g": ["100", "89", "9831", "09112", "800"]
}, {
…
}]
}
So the json is represented as a list of dictionaries (with further nested lists and dictionaries).
If a given json endpoint (j1) id value ("id":) exists in the other endpoint json (j2), then that record should be added to j_update.
So far I have something like this, but I can see that .values() doesn't work because it's trying to operate on the list instead of on all the listed dictionaries(?):
j_update = {r for r in j1['records'] if r['id'] in
j2.values()}
This doesn't return an error, but it creates an empty set using test json files.
Seems like this should be simple, but tripping over the nesting I think of dictionaries in a list representing the json. Do I need to flatten j2, or is there a simpler dictionary method python has to achieve this?
====edit j1 and j2====
have same structure, use different keys; toy data
j1
{
"records": [{
"field_5": 2329309841,
"field_12": {
"email": "cmix#etest.com"
},
"field_20": {
"last": "Mixalona",
"first": "Clara"
},
"field_28": ["9002329309999", "9002329309112"],
"field_44": ["1002329309832"]
}, {
"field_5": 2329309831,
"field_12": {
"email": "mherbitz345#test.com"
},
"field_20": {
"last": "Herbitz",
"first": "Michael"
},
"field_28": ["9002329309831", "9002329309112", "8002329309999"],
"field_44": ["1002329309832"]
}, {
"field_5": 2329309855,
"field_12": {
"email": "nkatamaran#test.com"
},
"field_20": {
"first": "Noriss",
"last": "Katamaran"
},
"field_28": ["9002329309111", "8002329309112"],
"field_44": ["1002329309877"]
}]
}
j2
{
"records": [{
"id": 2329309831,
"email": {
"email": "mherbitz345#test.com"
},
"name_primary": {
"last": "Herbitz",
"first": "Michael"
},
"assign": ["8003329309831", "8007329309789"],
"hr_id": ["1002329309877"]
}, {
"id": 2329309884,
"email": {
"email": "yinleeshu#test.com"
},
"name_primary": {
"last": "Lee Shu",
"first": "Yin"
},
"assign": ["8002329309111", "9003329309831", "9002329309111", "8002329309999", "8002329309112"],
"hr_id": ["1002329309832"]
}, {
"id": 23293098338,
"email": {
"email": "amlouis#test.com"
},
"name_primary": {
"last": "Maxwell Louis",
"first": "Albert"
},
"assign": ["8002329309111", "8007329309789", "9003329309831", "8002329309999", "8002329309112"],
"hr_id": ["1002329309877"]
}]
}
If you read the json it will output a dict. You are looking for a particular key in the list of the values.
if 'records' in j2:
r = j2['records'][0].get('id', []) # defaults if id does not exist
It it prettier to do a recursive search but i dunno how you data is organized to quickly come up with a solution.
To give an idea for recursive search consider this example
def resursiveSearch(dictionary, target):
if target in dictionary:
return dictionary[target]
for key, value in dictionary.items():
if isinstance(value, dict):
target = resursiveSearch(value, target)
if target:
return target
a = {'test' : 'b', 'test1' : dict(x = dict(z = 3), y = 2)}
print(resursiveSearch(a, 'z'))
You tried:
j_update = {r for r in j1['records'] if r['id'] in j2.values()}
Aside from the r['id'/'field_5] problem, you have:
>>> list(j2.values())
[[{'id': 2329309831, ...}, ...]]
The id are buried inside a list and a dict, thus the test r['id'] in j2.values() always return False.
The basic solution is fairly simple.
First, create a set of j2 ids:
>>> present_in_j2 = set(record["id"] for record in j2["records"])
Then, rebuild the json structure of j1 but without the j1 field_5 that are not present in j2:
>>> {"records":[record for record in j1["records"] if record["field_5"] in present_in_j2]}
{'records': [{'field_5': 2329309831, 'field_12': {'email': 'mherbitz345#test.com'}, 'field_20': {'last': 'Herbitz', 'first': 'Michael'}, 'field_28': ['9002329309831', '9002329309112', '8002329309999'], 'field_44': ['1002329309832']}]}
It works, but it's not totally satisfying because of the weird keys of j1. Let's try to convert j1 to a more friendly format:
def map_keys(json_value, conversion_table):
"""Map the keys of a json value
This is a recursive DFS"""
def map_keys_aux(json_value):
"""Capture the conversion table"""
if isinstance(json_value, list):
return [map_keys_aux(v) for v in json_value]
elif isinstance(json_value, dict):
return {conversion_table.get(k, k):map_keys_aux(v) for k,v in json_value.items()}
else:
return json_value
return map_keys_aux(json_value)
The function focuses on dictionary keys: conversion_table.get(k, k) is conversion_table[k] if the key is present in the conversion table, or the key itself otherwise.
>>> j1toj2 = {"field_5":"id", "field_12":"email", "field_20":"name_primary", "field_28":"assign", "field_44":"hr_id"}
>>> mapped_j1 = map_keys(j1, j1toj2)
Now, the code is cleaner and the output may be more useful for a PUT:
>>> d1 = {record["id"]:record for record in mapped_j1["records"]}
>>> present_in_j2 = set(record["id"] for record in j2["records"])
>>> {"records":[record for record in mapped_j1["records"] if record["id"] in present_in_j2]}
{'records': [{'id': 2329309831, 'email': {'email': 'mherbitz345#test.com'}, 'name_primary': {'last': 'Herbitz', 'first': 'Michael'}, 'assign': ['9002329309831', '9002329309112', '8002329309999'], 'hr_id': ['1002329309832']}]}

Categories

Resources