Related
I would like to modify the value of a field on a specific index of a nested type depending on another value of the same nested object or a field outside of the nested object.
As example, I have the current mapping of my index feed:
{
"feed": {
"mappings": {
"properties": {
"attacks_ids": {
"type": "keyword"
},
"created_by": {
"type": "keyword"
},
"date": {
"type": "date"
},
"groups_related": {
"type": "keyword"
},
"indicators": {
"type": "nested",
"properties": {
"date": {
"type": "date"
},
"description": {
"type": "text"
},
"role": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"malware_families": {
"type": "keyword"
},
"published": {
"type": "boolean"
},
"references": {
"type": "keyword"
},
"tags": {
"type": "keyword"
},
"targeted_countries": {
"type": "keyword"
},
"title": {
"type": "text"
},
"tlp": {
"type": "keyword"
}
}
}
}
}
Take the following document as example:
{
"took": 194,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "feed",
"_type": "_doc",
"_id": "W3CS7IABovFpcGfZjfyu",
"_score": 1,
"_source": {
"title": "Test",
"date": "2022-05-22T16:21:09.159711",
"created_by": "finch",
"tlp": "white",
"published": true,
"references": [
"test",
"test"
],
"tags": [
"tag1",
"tag2"
],
"targeted_countries": [
"Italy",
"Germany"
],
"malware_families": [
"family1",
"family2"
],
"groups_related": [
"group1",
"griup2"
],
"attacks_ids": [
""
],
"indicators": [
{
"value": "testest",
"description": "This is a test",
"type": "sha256",
"role": "file",
"date": "2022-05-22T16:21:09.159560"
},
{
"value": "testest2",
"description": "This is a test 2",
"type": "ipv4",
"role": "c2",
"date": "2022-05-22T16:21:09.159699"
}
]
}
}
]
}
}
I would like to make this update: indicators[0].value = 'changed'
if _id == 'W3CS7IABovFpcGfZjfyu'
or if title == 'some_title'
or if indicators[0].role == 'c2'
I already tried with a script, but it seems I can't manage to get it work, I hope the explanation is clear, ask any question if not, thank you.
Edit 1:
I managed to make it work, however it needs the _id, still looking for a way to do that without it.
My partial solution:
update = Pulse.get(id="XHCz7IABovFpcGfZWfz9") #Pulse is my document
update.update(script="for (indicator in ctx._source.indicators) {if (indicator.value=='changed2') {indicator.value='changed3'}}")
# Modify depending on the value of a field inside the same nested object
I wrote a script to pull data from Verizon's connectivity management API with the following. Below is the section that requests the line information based on the search item, in this case, the SIM or iccid. I did not include the previous parts because they are just to connect to the API and get credentials.
header = {
'accept': 'application/json',
'VZ-M2M-Token': session_token,
'Authorization': 'Bearer' + bearer_token,
'Content-Type': 'application/json',
}
data = '{ "deviceId": { "id": ' + SIM +', "kind": "ICCID" }}'
response = requests.post('https://thingspace.verizon.com/api/m2m/v1/devices/actions/list', headers=header, data=data)
And the response I get is a JSON Array which looks like
{
"hasMoreData": false,
"devices": [
{
"accountName": "123456789-00001",
"billingCycleEndDate": "2020-10-31T20:00:00-04:00",
"carrierInformations": [
{
"carrierName": "Verizon Wireless",
"servicePlan": "3rrrrx48wwwwrjgjtyjtyjtyjtyj",
"state": "active"
}
],
"connected": true,
"createdAt": "2016-11-04T11:06:28-04:00",
"deviceIds": [
{
"id": "5256694405",
"kind": "mdn"
},
{
"id": "3114949302094150",
"kind": "imsi"
},
{
"id": "35922505468230",
"kind": "imei"
},
{
"id": "891480000054957290575",
"kind": "iccId"
},
{
"id": "15256694405",
"kind": "msisdn"
},
{
"id": "5256694405",
"kind": "min"
}
],
"extendedAttributes": [
{
"key": "PrimaryPlaceOfUseTitle"
},
{
"key": "PrimaryPlaceOfUseFirstName",
"value": "5256694405",
},
{
"key": "PrimaryPlaceOfUseMiddleName"
},
{
"key": "PrimaryPlaceOfUseLastName",
"value": "ESN"
},
{
"key": "PrimaryPlaceOfUseSuffix"
},
{
"key": "PrimaryPlaceOfUseAddressLine1"
},
{
"key": "PrimaryPlaceOfUseAddressLine2"
},
{
"key": "PrimaryPlaceOfUseCity"
},
{
"key": "PrimaryPlaceOfUseState"
},
{
"key": "PrimaryPlaceOfUseCountry"
},
{
"key": "PrimaryPlaceOfUseZipCode"
},
{
"key": "PrimaryPlaceOfUseZipCode4"
},
{
"key": "PrimaryPlaceOfUseCBRPhone"
},
{
"key": "PrimaryPlaceOfUseCBRPhoneType"
},
{
"key": "PrimaryPlaceOfUseEmailAddress"
},
{
"key": "AccountNumber",
"value": "5256694405-00001"
},
{
"key": "SmsrOid"
},
{
"key": "ProfileStatus"
},
{
"key": "PromoCodes",
"value": ""
},
{
"key": "PromotionStartDate",
"value": ""
},
{
"key": "PromotionScheduledEndDate",
"value": ""
},
{
"key": "LeadId",
"value": ""
},
{
"key": "CustomerName",
"value": ""
},
{
"key": "CustomerAddressLine1",
"value": ""
},
{
"key": "CustomerAddressLine2",
"value": ""
},
{
"key": "CustomerAddressCity",
"value": ""
},
{
"key": "CustomerAddressState",
"value": ""
},
{
"key": "CustomerAddressZipCode",
"value": ""
},
{
"key": "ServiceZipCode",
"value": ""
},
{
"key": "SkuNumber",
"value": "VZW080000460053"
},
{
"key": "CostCenterCode"
},
{
"key": "PreIMEI",
"value": "3592254564568445"
},
{
"key": "PreSKU",
"value": "VZW080000100037"
},
{
"key": "SIMOTADate",
"value": "4/30/2020 1:22:18 PM"
},
{
"key": "RoamingStatus",
"value": "NotRoaming"
},
{
"key": "LastRoamingStatusUpdate",
"value": "9/24/2020 5:40:26 PM"
}
],
"groupNames": [
"Default: 0220433754-00001"
],
"ipAddress": "100.100.100.100",
"lastActivationBy": "User Verizon",
"lastActivationDate": "2016-11-04T11:06:28-04:00",
"lastConnectionDate": "2020-09-24T13:40:26-04:00"
}
]
}
I added a part to my script to pull the mdn, iccid and the imei from the array with the code that is below.
def puller(line_json):
line_data = json.loads(line_json)
mdn = (line_data['devices'][0]['deviceIds'][0]['id'])
iccid = (line_data['devices'][0]['deviceIds'][3]['id'])
imei = (line_data['devices'][0]['deviceIds'][2]['id'])
print('phone = ' ,mdn)
print('SIM = ' , iccid)
print('IMEI = ' , imei)
I tested this code and it works the way it should with one test ID. I then proceeded to test with another test ID and I learned that the array structure is not always the same. That second JSON array is below. I am wondering is there a better way to find the specific values that I want, but in the way that I am not specifically telling the script where in the structure the item will be as I did above.
{
"hasMoreData": false,
"devices": [
{
"accountName": "02234234234-00001",
"billingCycleEndDate": "2020-10-31T20:00:00-04:00",
"carrierInformations": [
{
"carrierName": "Verizon Wireless",
"servicePlan": "37776xdsfewsfwe576193",
"state": "active"
}
],
"connected": true,
"createdAt": "2016-05-24T15:55:06-04:00",
"deviceIds": [
{
"id": "0945437676404",
"kind": "esn"
},
{
"id": "1234565799",
"kind": "mdn"
},
{
"id": "31148454545458767",
"kind": "imsi"
},
{
"id": "01426786678211",
"kind": "imei"
},
{
"id": "89148000006456456454",
"kind": "iccId"
},
{
"id": "1234565799",
"kind": "min"
}
],
"extendedAttributes": [
{
"key": "PrimaryPlaceOfUseTitle"
},
{
"key": "PrimaryPlaceOfUseFirstName",
"value": "096114564506772"
},
{
"key": "PrimaryPlaceOfUseMiddleName"
},
{
"key": "PrimaryPlaceOfUseLastName",
"value": "096546454806772"
},
{
"key": "PrimaryPlaceOfUseSuffix"
},
{
"key": "PrimaryPlaceOfUseAddressLine1"
},
{
"key": "PrimaryPlaceOfUseAddressLine2"
},
{
"key": "PrimaryPlaceOfUseCity"
},
{
"key": "PrimaryPlaceOfUseState"
},
{
"key": "PrimaryPlaceOfUseCountry"
},
{
"key": "PrimaryPlaceOfUseZipCode"
},
{
"key": "PrimaryPlaceOfUseZipCode4"
},
{
"key": "PrimaryPlaceOfUseCBRPhone"
},
{
"key": "PrimaryPlaceOfUseCBRPhoneType"
},
{
"key": "PrimaryPlaceOfUseEmailAddress"
},
{
"key": "AccountNumber",
"value": "02242342354-00001"
},
{
"key": "SmsrOid"
},
{
"key": "ProfileStatus"
},
{
"key": "PromoCodes",
"value": ""
},
{
"key": "PromotionStartDate",
"value": ""
},
{
"key": "PromotionScheduledEndDate",
"value": ""
},
{
"key": "LeadId",
"value": ""
},
{
"key": "CustomerName",
"value": ""
},
{
"key": "CustomerAddressLine1",
"value": ""
},
{
"key": "CustomerAddressLine2",
"value": ""
},
{
"key": "CustomerAddressCity",
"value": ""
},
{
"key": "CustomerAddressState",
"value": ""
},
{
"key": "CustomerAddressZipCode",
"value": ""
},
{
"key": "ServiceZipCode",
"value": ""
},
{
"key": "SkuNumber",
"value": "VZW12000364343005"
},
{
"key": "CostCenterCode"
},
{
"key": "PreIMEI"
},
{
"key": "PreSKU",
"value": "VZW12000334340005"
},
{
"key": "SIMOTADate",
"value": "3/13/2020 10:52:07 AM"
},
{
"key": "RoamingStatus",
"value": "NotRoaming"
},
{
"key": "LastRoamingStatusUpdate",
"value": "10/20/2020 6:14:20 PM"
}
],
"groupNames": [
"Default: 02342343754-00001"
],
"ipAddress": "101.101.101.101",
"lastActivationBy": "User Verizon",
"lastActivationDate": "2016-05-24T15:55:16-04:00",
"lastConnectionDate": "2020-10-20T14:14:20-04:00"
}
]
}
I tried to use this block of code from some research I did to find the value that I was looking for; in this case, the mdn. Problem I have is that the response returns a blank set of brackets with no information, so I know there is something I probably did wrong.
def json_extract(obj, kind):
"""Recursively fetch values from nested JSON."""
arr = []
def extract(obj, arr, kind):
"""Recursively search for values of key in JSON tree."""
if isinstance(obj, dict):
for k, v in obj.items():
if isinstance(v, (dict, list)):
extract(v, arr, kind)
elif k == kind:
arr.append(v)
elif isinstance(obj, list):
for item in obj:
extract(item, arr, kind)
return arr
values = extract(obj, arr, kind)
return values
names = json_extract(response , 'mdn')
print(names)
I understood that you are trying to find, mdn, iccid and imei'id from the json object above,so, instead of recursion and the complicated coding that you have done there, it is easier to use python's inbuilt libraries to help you out:
You can use the next function for your purpose:
# load your json data
line_data = json.loads(data)
# narrow your focus on the array in question
device_ids = line_data['devices'][0]['deviceIds']
# This gets the first item's id attribute from the list that matches the condition, and returns None if no item matches.
mdn = next((x['id'] for x in device_ids if x['kind'] == "mdn"), None)
iccid = next((x['id'] for x in device_ids if x['kind'] == "iccid"), None)
imei = next((x['id'] for x in device_ids if x['kind'] == "imei"), None)
You will need to handle the None if it was unable to find such element in the array.
Reference : Find object in list that has attribute equal to some value (that meets any condition)
I am trying to retain the whole contents of a nested dictionary but only with its contents reordered..
This is an example of my nested dictionaries (pardon the long example..) -
{
"pages": {
"rotatingTest": {
"elements": {
"apvfafwkbnjn2bjt": {
"name": "animRot_tilt40_v001",
"data": {
"description": "tilt testing",
"project": "TEST",
"created": "26/11/18 16:32",
},
"type": "AnimWidget",
"uid": "apvfafwkbnjn2bjt"
},
"p0pkje1hjcc9jukq": {
"name": "poseRot_positionD_v003",
"data": {
"description": "posing test for positionD",
"created": "10/01/18 14:16",
"project": "TEST",
},
"type": "PosedWidget",
"uid": "p0pkje1hjcc9jukq"
},
"k1gzzc5uy1ynqtnj": {
"name": "animRot_positionH_v001",
"data": {
"description": "rotational posing test for positionH",
"created": "13/06/18 14:19",
"project": "TEST",
},
"type": "AnimWidget",
"uid": "k1gzzc5uy1ynqtnj"
}
}
},
"panningTest": {
"elements": {
"7lyuri8g8u5ctwsa": {
"name": "posePan_positionZ_v001",
"data": {
"description": "panning test for posZ",
"created": "04/10/18 12:43",
"project": "TEST",
},
"type": "PosedWidget",
"uid": "7lyuri8g8u5ctwsa"
}
}
},
"zoomingTest": {
"elements": {
"prtn0i6ehudhz475": {
"name": "posZoom_positionH_v010",
"data": {
"description": "zoom test",
"created": "11/10/18 12:42",
"project": "TEST",
},
"type": "PosedWidget",
"uid": "prtn0i6ehudhz475"
}
}
}
},
"page_order": [
"rotatingTest",
"zoomingTest",
"panningTest"
]
}
and this is my code:
for k1, v1 in test_dict.get('pages', {}).items():
return (sorted(v1.get('elements').items(), key=lambda (k2,v2): v2['data']['created']))
In the code, keys such as the page_order, pages etc are missing...
Or if there is/ are any commands where it will enables me to retain the 'whole' of the dictionary?
Appreciate in advance for any advice.
If you're using Python 3.7, a dict will preserve insert order. Otherwise, you need to use an OrderedDict.Additionally, you need to convert the date string to a date to get the correct sort order:
from datetime import datetime
def sortedPage(d):
return {k: {'elements': dict(sorted(list(v['elements'].items()), key=lambda tuple: datetime.strptime(tuple[1]['data']['created'], '%d/%m/%y %H:%M')))} for k,v in d.items()}
output = {k: sortedPage(v) if k == 'pages' else v for k,v in input.items()}
I am trying to get user details of persons who has put likes, comments on Facebook posts. I am using python facebook-sdk package. Code is as follows.
import facebook as fi
import json
graph = fi.GraphAPI('Access Token')
data = json.dumps(graph.get_object('DSIfootcandy/posts'))
From the above, I am getting a highly nested json. Here I will put only a json string for one post in the fb.
{
"paging": {
"next": "https://graph.facebook.com/v2.0/425073257683630/posts?access_token=&limit=25&until=1449201121&__paging_token=enc_AdD0DL6sN3aDZCwfYY25rJLW9IZBZCLM1QfX0venal6rpjUNvAWZBOoxTjbOYZAaFiBImzMqiv149HPH5FBJFo0nSVOPqUy78S0YvwZDZD",
"previous": "https://graph.facebook.com/v2.0/425073257683630/posts?since=1450843741&access_token=&limit=25&__paging_token=enc_AdCYobFJpcNavx6STzfPFyFe6eQQxRhkObwl2EdulwL7mjbnIETve7sJZCPMwVm7lu7yZA5FoY5Q4sprlQezF4AlGfZCWALClAZDZD&__previous=1"
},
"data": [
{
"picture": "https://fbcdn-photos-e-a.akamaihd.net/hphotos-ak-xfa1/v/t1.0-0/p130x130/1285_5066979392443_n.png?oh=b37a42ee58654f08af5abbd4f52b1ace&oe=570898E7&__gda__=1461440649_aa94b9ec60f22004675c4a527e8893f",
"is_hidden": false,
"likes": {
"paging": {
"cursors": {
"after": "MTU3NzQxODMzNTg0NDcwNQ==",
"before": "MTU5Mzc1MjA3NDE4ODgwMA=="
}
},
"data": [
{
"id": "1593752074188800",
"name": "Maduri Priyadarshani"
},
{
"id": "427605680763414",
"name": "Darshi Mashika"
},
{
"id": "599793563453832",
"name": "Shakeer Nimeshani Shashikala"
},
{
"id": "1577418335844705",
"name": "Däzlling Jalali Muishu"
}
]
},
"from": {
"category": "Retail and Consumer Merchandise",
"name": "Footcandy",
"category_list": [
{
"id": "2239",
"name": "Retail and Consumer Merchandise"
}
],
"id": "425073257683630"
},
"name": "Timeline Photos",
"privacy": {
"allow": "",
"deny": "",
"friends": "",
"description": "",
"value": ""
},
"is_expired": false,
"comments": {
"paging": {
"cursors": {
"after": "WTI5dGJXVnVkRjlqZFhKemIzSUVXdNVFExTURRd09qRTBOVEE0TkRRNE5EVT0=",
"before": "WTI5dGJXVnVkRjlqZFhKemIzNE16Y3dNVFExTVRFNE9qRTBOVEE0TkRRME5UVT0="
}
},
"data": [
{
"from": {
"name": "NiFû Shafrà",
"id": "1025030640553"
},
"like_count": 0,
"can_remove": false,
"created_time": "2015-12-23T04:20:55+0000",
"message": "wow lovely one",
"id": "50018692683829_500458145118",
"user_likes": false
},
{
"from": {
"name": "Shamnaz Lukmanjee",
"id": "160625809961884"
},
"like_count": 0,
"can_remove": false,
"created_time": "2015-12-23T04:27:25+0000",
"message": "Nice",
"id": "500186926838929_500450145040",
"user_likes": false
}
]
},
"actions": [
{
"link": "https://www.facebook.com/425073257683630/posts/5001866838929",
"name": "Comment"
},
{
"link": "https://www.facebook.com/42507683630/posts/500186926838929",
"name": "Like"
}
],
"updated_time": "2015-12-23T04:27:25+0000",
"link": "https://www.facebook.com/DSIFootcandy/photos/a.438926536298302.1073741827.4250732576630/50086926838929/?type=3",
"object_id": "50018692838929",
"shares": {
"count": 3
},
"created_time": "2015-12-23T04:09:01+0000",
"message": "Reach new heights in the cute and extremely comfortable \"Silviar\" www.focandy.lk",
"type": "photo",
"id": "425077683630_50018926838929",
"status_type": "added_photos",
"icon": "https://www.facebook.com/images/icons/photo1.gif"
}
]
}
Now I need to get this data into a dataframe as follows(no need to get all).
item | Like_id |Like_username | comments_userid |comments_username|comment(msg)|
-----+---------+--------------+-----------------+-----------------+------------+
Bag | 45546 | noel | 641 | James | nice work |
-----+---------+--------------+-----------------+-----------------+------------+
Any Help will be Highly Appreciated.
Not exactly like your intended format, but here is the making of a solution :
import pandas
DictionaryObject_as_List = str(mydict).replace("{","").replace("}","").replace("[","").replace("]","").split(",")
newlist = []
for row in DictionaryObject_as_List :
row = row.replace('https://',' ').split(":")
exec('newlist.append ( ' + "[" + " , ".join(row)+"]" + ')')
DataFrame_Object = pandas.DataFrame(newlist)
print DataFrame_Object
In Python I'm currently working with a very large JSON file with some deep dictionaries and arrays. I'm having an issue where it's not constant. For example that's below, it's essentially countries, with regions/states, cities, and suburbs. The issue is that if there is only one suburb, it'll return a dictionary, though if there's more than one, it's a array with a dictionary making me have to add another line of code to go deeper. Sure, can ifelse/for it, but this is only a very small portion of the inconstancy and it's just not proper going ifelse all the time.
What I'd like to do is simply search anything within Belgium for the dictionary entry "code": "8400" and return it's location within the JSON file. What would be my best approach in order to do something like this? Thanks!
***SNIP***
{
"code": "BE",
"name": "Belgium",
"regions": {
"region": [
{
"code": "45",
"name": "Flanders",
"places": {
"place": [
{
"code": "1790",
"name": "Affligem"
},
{
"code": "8570",
"name": "Anzegem"
},
{
"code": "8630",
"name": "Diksmuide"
},
{
"code": "9600",
"name": "Ronse"
}
]
},
"subregions": {
"subregion": [
{
"code": "46",
"name": "Coast",
"places": {
"place": [
{
"code": "8300",
"name": "Knokke-Heist"
},
{
"code": "8400",
"name": "Oostende",
"subplaces": {
"subplace": {
"code": "8450",
"name": "Bredene"
}
}
},
{
"code": "8420",
"name": "De Haan"
},
{
"code": "8430",
"name": "Middelkerke"
},
{
"code": "8434",
"name": "Westende-Bad"
},
{
"code": "8490",
"name": "Jabbeke"
},
{
"code": "8660",
"name": "De Panne"
},
{
"code": "8670",
"name": "Oostduinkerke"
}
]
}
},
{
"code": "47",
"name": "Cities",
"places": {
"place": [
{
"code": "1000",
"name": "Brussels"
},
{
"code": "2000",
"name": "Antwerp"
},
{
"code": "8000",
"name": "Bruges"
},
{
"code": "8340",
"name": "Damme"
},
{
"code": "9000",
"name": "Gent"
}
]
}
},
{
"code": "48",
"name": "Interior",
"places": {
"place": [
{
"code": "2260",
"name": "Westerlo"
},
{
"code": "2400",
"name": "Mol"
},
{
"code": "2590",
"name": "Berlaar"
},
{
"code": "8500",
"name": "Kortrijk",
"subplaces": {
"subplace": {
"code": "8940",
"name": "Wervik"
}
}
},
{
"code": "8610",
"name": "Handzame"
},
{
"code": "8755",
"name": "Ruiselede"
},
{
"code": "8900",
"name": "Ieper"
},
{
"code": "8970",
"name": "Poperinge"
}
]
}
},
EDIT:
I was asked to show how I'm currently getting through this JSON file. Root is a dictionary containing numbers that equal the city/suburb I'm trying to search for. It doesn't define whether it is a city or suburb before hand. Below is my lazyly coded search while I was trying to learn how to dig through this JSON file, until I realized how complicated it was getting and got a bit stuck.
SNIP
for k in dataDict['countries']['country']:
if k['code'] == root['country']:
for y in k['regions']['region']['places']['place']:
if y['code'] == root['place']:
city = y['name']
else:
try:
for p in y['subplaces']['subplace']:
if p['code'] == root['place']:
city = p['name']
except:
pass
If I understand well, each dictionary has the following structure:
{"code": # some int
"name": # some str
none / "country" / "place" / whatever # some dict or list
You can write a recursive function that handle one and only one dict:
def foo(my_dict):
if my_dict['code'] == root['place']:
city = my_dict['name']
elif "country" in my_dict:
city = foo(my_dict['country'])
elif "place" in my_dict:
#
# and so on...
else:
city = None
return city
Hope this example will help you.