"KeyError: 'added' "when importing different lists of json - python

I'm getting a "KeyError: 'added'" Error when i try to import one of my three lists inside a json file. The list i'm trying to import is the one called "added". I had this working when it was just one list without the name "added" on top but now it seems like i can't access the list anymore. I want to individually import them basically.
This is my code to get the Json parts before i import them into the database:
import requests
import json
from users.models import Facility, FacilityAddress, FacilityInspectionInfo, FacilityComplaints
from django.core.management.base import BaseCommand
IMPORT_URL = 'https://url/imports.json'
class Command(BaseCommand):
def import_facility_from_file(self, data):
UUID = data.get('UUID', None)
Name = data.get('Name', None)
PrimaryAddress = data["AddressInfo"]["PrimaryAddress"]
""" This is what happens after the database entry related code """
def handle(self, *args, **options):
"""
Call the function to import data from json url
"""
headers = {'Content-Type': 'application/json'}
response = requests.get(
url=IMPORT_URL,
headers=headers,
)
response.raise_for_status()
data = response.json()
for key, data_object in data.items():
self.import_facility_from_file(data_object)
The new version of the Json File that im trying to use but thats causing the error:
{
"added":
{"125hk24h5kjh43k5":
{
"UUID":"125hk24h5kjh43k5",
"Name":"Test Facility 1",
"AddressInfo":
{"PrimaryAddress":"1234 Drive RD"},
"ImporterLastModifiedTimestamp":1643721420}},
"deleted":["235hk24h5kjh43k5,235hk345789h43k5"],
"modified":{"995hk24h5kjh43k5":
{
"UUID":"995hk24h5kjh43k5",
"Name":"Test Facility 2",
"AddressInfo":
{"PrimaryAddress":"2345 Test RD"},
"ImporterLastModifiedTimestamp":1643721420}
}
}
The old version of the json file that worked perfectly with the code i intially wrote:
{"00016ed7be4872a19d6e16afc98a7389b2bb324a2":
{"UUID":"00016ed7be4872a19d6e1ed6f36b647f3eb41cadedd2130b103a5851caebc26fbbbf24c2f1a64d2cf34ac4e03aaa30309816f58c397e6afc98a7389b2bb324a2","Name":"Test Facility","IssuedNumber":"123456","Licensee":"Test Licensee","Email":"test#example.com","AdministratorName":"Test Name","TelephoneNumber":"(123) 456-7890324879","ImporterLastModifiedTimestamp":"1362985200",
"AddressInfo":{"PrimaryAddress":"123 Fake Road","SecondaryAddress":"","City":"Testcity","RegionOrState":"TX","PostalCode":"12345","Geolocation":"00.0000,-00.0000"},"Capacity":100,"MostRecentLicenseTimestamp":1575180000,"ClosedTimestamp":0,
"InspectionInfo":{"ComplaintRelatedVisits":0,"InspectionRelatedVisits":0,"NumberOfVisits":0,"LastVisitTimestamp":0},
"Complaints":{"ComplaintsTypeA":0,"ComplaintsTypeB":0,"SubstantiatedAllegations":0,"TotalAllegations":0}},
"00016ed7be4872a15435435435b2bb324a2":
{"UUID":"000c93dcb7a0b3d5783bb330892aff6abdb9fb57a7d3701c2d903f3640877579f3173ecd8a80532f6c3d53dbacde78a6a54ae42fef321a5793f5a01934f8de7a","Name":"Test Facility 2","IssuedNumber":"123456","Licensee":"Test Licensee","Email":"test#example.com","AdministratorName":"Test Name","TelephoneNumber":"(123) 456-7890324879","ImporterLastModifiedTimestamp":"1362985200",
"AddressInfo":{"PrimaryAddress":"123 Fake Road","SecondaryAddress":"","City":"Testcity","RegionOrState":"TX","PostalCode":"12345","Geolocation":"00.0000,-00.0000"},"Capacity":100,"MostRecentLicenseTimestamp":1575180000,"ClosedTimestamp":0,
"InspectionInfo":{"ComplaintRelatedVisits":0,"InspectionRelatedVisits":0,"NumberOfVisits":0,"LastVisitTimestamp":0},
"Complaints":{"ComplaintsTypeA":0,"ComplaintsTypeB":0,"SubstantiatedAllegations":0,"TotalAllegations":0}},
"00234324324343243afc98a7389b2bb324a2":
{"UUID":"fffd4dec10054e6e1deb2a2266a7c6bb0136ba46222e734ceed5855651f735cfbe0bb66cfaf27c3d175ae261a8f6df0c36b5390d15c70b07d67e35e1081aaf6d","Name":"Test Facility 3","IssuedNumber":"123456","Licensee":"Test Licensee","Email":"test#example.com","AdministratorName":"Test Name","TelephoneNumber":"(123) 456-7890324879","ImporterLastModifiedTimestamp":"1362985200",
"AddressInfo":{"PrimaryAddress":"123 Fake Road","SecondaryAddress":"","City":"Testcity","RegionOrState":"TX","PostalCode":"12345","Geolocation":"00.0000,-00.0000"},"Capacity":100,"MostRecentLicenseTimestamp":1575180000,"ClosedTimestamp":0,
"InspectionInfo":{"ComplaintRelatedVisits":0,"InspectionRelatedVisits":0,"NumberOfVisits":0,"LastVisitTimestamp":0},
"Complaints":{"ComplaintsTypeA":0,"ComplaintsTypeB":0,"SubstantiatedAllegations":0,"TotalAllegations":0}}}
So i tried it like this to get the UUID and other information from the modified json file:
UUID = data["added"]["UUID"]
but i'm getting this error:
KeyError: 'added'

It sounds like the json format has changed underneath you and you need to adapt.
How about processing all the items in both the "added" and "modified" sections?
for key, data_object in data.items():
if key in ["added", "modified"]:
for key, data in data_object.items():
self.import_facility_from_file(data)

AttributeError: 'list' object has no attribute 'get' because you've list data type in between your dictionary so you can add check for type like this
for key, data_object in data.items():
if type(data_object) == dict:
for data in data_object.values():
self.import_facility_from_file(data)

Related

Getting username from a multi-valued lookup field in Sharepoint List

I am working with a Sharepoint List via REST API, managed to get all of the string value datas, but having problems when it comes to multi-valued User/Lookup field values.
This is the code I am currently using:
from office365.runtime.auth.user_credential import UserCredential
from office365.sharepoint.client_context import ClientContext
site_url = "https://xxx.sharepoint.com/sites/xxx/"
sp_list = "xxx"
ctx = ClientContext(site_url).with_credentials(UserCredential("xxx", "xxx"))
sp_lists = ctx.web.lists
s_list = sp_lists.get_by_title(sp_list)
l_items = s_list.get_items()
ctx.load(l_items)
ctx.execute_query()
for item in l_items:
print(item.properties["mvlf"])
In this case the "mvlf" value in the last line of code is a multi-valued lookup field that contains all the user data (username, email, etc) but I am only getting the id number.
Any help would be great!

how to use the curl POST function in Linux terminal to create a document in python

Am trying to add a new record into a python document and I think am stuck with an issue caused by the curl post function. I have attached the python file and the error received when posting to my url. Could anyone kindly point me towards the right direction.
I dont understand the error code to identify whether the problem comes from the python code bu I do suspect an issue with the curl url.
#!/usr/bin/python
import json
from bson import json_util
from bson.json_util import dumps
import bottle
from bottle import route, run, request, abort
#imports for database
from pymongo import MongoClient
connection = MongoClient('localhost', 27017)
db = connection['city']
collection = db['inspections']
# set up URI paths for REST service
#route('/hello', method='GET')
def get_hello():
word = '"'+request.GET.get('name', None)+'"'
string="{hello:"+word+"}"
return json.loads(json.dumps(string, indent=4, default=json_util.default))
#route('/strings', method='POST')
def run_post():
first = '"'+request.json.get('string1')+'"'
second = '"'+request.json.get('string2')+'"'
data="{first:"+first+",second:"+ second+"}"
return json.loads(json.dumps(data, indent=4, default=json_util.default))
#route('/create', method='POST')
def run_create():
myid = request.json.get('id')
print(myid)
cert_number = request.json.get('certificate_number')
bus_name = request.json.get('business_name')
date = request.json.get('date')
result = request.json.get('result')
sector = request.json.get('sector')
added_id = collection.insert({"id":myid,"certificate_number":cert_number,"business_name":bus_name,"date":date,"result":result,"sector":sector})
added_doc = collection.find_one({"_id":added_id})
return json.loads(json.dumps(added_doc, indent=4, default=json_util.default))
#url does not allow spacing when passing an argument,
#therefore i use underscores when passing the business_name and the remove them
#when creating the query
#route('/read', method='GET')
def get_read():
word = request.params.get('business_name')
word = word.replace("_"," ")
found_doc = collection.find({"business_name":{'$regex':word}}) #will still get results when user pass parameter with white space
return dumps(found_doc)
#route('/update', method='GET')
def get_update(rslt = "Violation Issued"):
myid = request.query.id
query = { "id" :myid}
new_update = { "$set":{"result":rslt}}
collection.update_one(query,new_update)
updated_doc = collection.find_one({"id":myid})
return json.loads(json.dumps(updated_doc, indent=4, default=json_util.default))
#route('/delete', method='GET')
def get_update():
myid = request.query.id
query = {"id" :myid};
print(query)
result = collection.delete_one(query)
return "document with id "+myid+" Has beed deleted from the City Collection"
if __name__ == '__main__':
run(debug=True,reloader = True)
#run(host='localhost', port=8080)
Error:
Returned HTML:
python error:
The Problem is that at one point in the json in your curl request you used “ instead of ". Therefore the json parser throws an error.
So instead of
"business_name" : “ACME Test INC."
write:
"business_name" : "ACME Test INC."
Not sure if you solved this but here we go. Jakob was correct that you used “ instead of "
Next, get the values from the document you are inserting
data = request.json (Contains parsed content)
Assign a value to the variables you need such as id
id = data['id']
Store all the values in a dictionary variable (I think it is much cleaner this way)
document = {"id":myid,"certificate_number":cert_number,"business_name":bus_name,"date":date,"result":result,"sector":sector}
Lastly, use insert_one and catch errors
try:
collection.insert_one(document)
print("CREATED NEW DOCUMENT")
except Exception as e:
print("ERROR: ", e)
Also, there are several ways to fix the "space" problem you mention for cURL requests.
One way is to simply add a %20 between spaces like so:
ACME%20TEST%20INC.
I hope that helps.

how to convert lines of json into a hashmap of composite keys?

Background on the actual problem: I am trying to create an AWS Lambda function in Python that accumulates records from a DynamoDB stream into an S3 object. If you don't understand this context you can just ignore it, the question is really a pure Python question.
I got the code below barely working, the file is successfully concatenated with new records from the stream, in the desired format (one JSON object per line). But what I really want is to treat the file as a hashmap, using the fields in keys (4th line of the function definition), which are a subset of the fields in new, so that any records coming in will overwrite old records containing the same key values.
What is the obvious / idiomatic way to change the line journal += data so that instead of a concatenation, I get an overwrite of lines of the same keys value?
import json
import boto3
import re
import uuid
from decimal import Decimal
import six
import sys
from datetime import datetime
from boto3.dynamodb.types import TypeSerializer
s3 = boto3.resource('s3')
def lambda_handler(event, context):
object = s3.Object('some.bucket', 'address/dynamo-stream.json')
journal = object.get()['Body'].read().decode('utf-8')
for record in event['Records']:
keys = record['dynamodb'].get('Keys')
new = record['dynamodb'].get('NewImage')
if new:
data = json.dumps(loads(new))
journal += data + "\n"
object.put(Body=journal)
return "ok"
# below: code from https://github.com/Alonreznik/dynamodb-json/blob/master/dynamodb_json/json_util.py
[...]
def loads(s, as_dict=False, *args, **kwargs):
[...]
More explanation:
The variable keys is a subset of new in the sense that, for any json value of new in the format
{ "k1":"v1", "k2:v2", "k3:v3", ... "kN:vN" }
keys will have the value
{ "k1":"v1", "k2:v2" }

Get List of Distinct From Nested List

I'm brand new to Python coming over from a javascript/nodeJS upbringing. I have a python dictionary with a nested list that I'm having lots of trouble accessing. I need to pull out the nested list so I can check for unique e-mails. Here is what my GET request pulls back:
{"data": [{"login_date": "2014-04-17T19:14:29+08:00", "email": "walter.elwyn#yahoo.com"}, {"login_date": "2014-04-22T09:31:56+04:00", "email": "helyn67#cruickshankmckenzie.com"}]
The actual data dictionary that gets returned is much longer but you get the point... so I have two questions:
how do I access a specific point by e-mail in the content dictionary
how do I loop over the dictionary for unique emails?
Here is my code:
from flask import Flask
import requests
import urllib2
app = Flask(__name__)
#app.route('/')
def hello_world():
content = urllib2.urlopen('https://9g9xhayrh5.execute-api.us-west-2.amazonaws.com/test/data').read()
print content
return 'check console'
if __name__ == '__main__':
app.run()
If you have a structure like this:
>>> response = {"data": [{"login_date": "2014-04-17T19:14:29+08:00", "email": "walter.elwyn#yahoo.com"}, {"login_date": "2014-04-22T09:31:56+04:00", "email": "helyn67#cruickshankmckenzie.com"}]}
Then what you have is a dict with key data
>>> response['data']
[{'login_date': '2014-04-17T19:14:29+08:00', 'email': 'walter.elwyn#yahoo.com'}, {'login_date': '2014-04-22T09:31:56+04:00', 'email': 'helyn67#cruickshankmckenzie.com'}]
You can get a list of emails with a list comprehension:
>>> [user['email'] for user in response['data']]
['walter.elwyn#yahoo.com', 'helyn67#cruickshankmckenzie.com']
How do I loop over the dictionary for unique emails?
To make that list unique, you can use a set comprehension:
>>> {user['email'] for user in response['data']}
set(['helyn67#cruickshankmckenzie.com', 'walter.elwyn#yahoo.com'])
How do I access a specific point by e-mail in the content dictionary
If you want to filter for a given email, you can use filter()
>>> filter(lambda user: user['email'] == 'walter.elwyn#yahoo.com', response['data'])
[{'login_date': '2014-04-17T19:14:29+08:00', 'email': 'walter.elwyn#yahoo.com'}]
If I do content['data] in my case, I get an internal server error
That's because to do content['data'], content needs to be a dict-like object. Let's use requests lib to do what you want:
>>> import requests
>>> content = requests.get('https://9g9xhayrh5.execute-api.us-west-2.amazonaws.com/test/data')
>>> response = content.json() # reads the data as JSON, into a dict-like object
>>> response.keys()
[u'data']
Now you can use response['data']

cURL method in Python for JSON feed [duplicate]

This question already has answers here:
How to download a file over HTTP?
(30 answers)
Closed 7 years ago.
While building a flask website, I'm using an external JSON feed to feed the local mongoDB with content. This feed is parsed and fed while repurposing keys from the JSON to keys in Mongo.
One of the available keys from the feed is called "img_url" and contains, guess what, an url to an image.
Is there a way, in Python, to mimic a php style cURL? I'd like to grab that key, download the image, and store it somewhere locally while keeping other associated keys, and have that as an entry to my db.
Here is my script up to now:
import json
import sys
import urllib2
from datetime import datetime
import pymongo
import pytz
from utils import slugify
# from utils import logger
client = pymongo.MongoClient()
db = client.artlogic
def fetch_artworks():
# logger.debug("downloading artwork data from Artlogic")
AL_artworks = []
AL_artists = []
url = "http://feeds.artlogic.net/artworks/artlogiconline/json/"
while True:
f = urllib2.urlopen(url)
data = json.load(f)
AL_artworks += data['rows']
# logger.debug("retrieved page %s of %s of artwork data" % (data['feed_data']['page'], data['feed_data']['no_of_pages']))
# Stop we are at the last page
if data['feed_data']['page'] == data['feed_data']['no_of_pages']:
break
url = data['feed_data']['next_page_link']
# Now we have a list called ‘artworks’ in which all the descriptions are stored
# We are going to put them into the mongoDB database,
# Making sure that if the artwork is already encoded (an object with the same id
# already is in the database) we update the existing description instead of
# inserting a new one (‘upsert’).
# logger.debug("updating local mongodb database with %s entries" % len(artworks))
for artwork in AL_artworks:
# Mongo does not like keys that have a dot in their name,
# this property does not seem to be used anyway so let us
# delete it:
if 'artworks.description2' in artwork:
del artwork['artworks.description2']
# upsert int the database:
db.AL_artworks.update({"id": artwork['id']}, artwork, upsert=True)
# artwork['artist_id'] is not functioning properly
db.AL_artists.update({"artist": artwork['artist']},
{"artist_sort": artwork['artist_sort'],
"artist": artwork['artist'],
"slug": slugify(artwork['artist'])},
upsert=True)
# db.meta.update({"subject": "artworks"}, {"updated": datetime.now(pytz.utc), "subject": "artworks"}, upsert=True)
return AL_artworks
if __name__ == "__main__":
fetch_artworks()
First, you might like the requests library.
Otherwise, if you want to stick to the stdlib, it will be something in the lines of:
def fetchfile(url, dst):
fi = urllib2.urlopen(url)
fo = open(dst, 'wb')
while True:
chunk = fi.read(4096)
if not chunk: break
fo.write(chunk)
fetchfile(
data['feed_data']['next_page_link'],
os.path.join('/var/www/static', uuid.uuid1().get_hex()
)
With the correct exceptions catching (i can develop if you want, but i'm sure the documentation will be clear enough).
You could put the fetchfile() into a pool of async jobs to fetch many files at once.
https://docs.python.org/2/library/json.html
https://docs.python.org/2/library/urllib2.html
https://docs.python.org/2/library/tempfile.html
https://docs.python.org/2/library/multiprocessing.html

Categories

Resources