Requirement: To read data from S3 to pass into API
Error: "error": {"code": "ModelStateInvalid", "message": "The request has exceeded the maximum number of validation errors.", "target": "HttpRequest"
When I pass data directly in the code as below document , it works fine as below
def create_doc(self,client):
self.n_docs = int(self.n_docs)
document = {'addresses': {'SingleLocation': {'city': 'ABC',
'country': 'US',
'line1': 'Main',
'postalCode': '00000',
'region': 'CA'
}
},
'commit': False,
}
response = client.cr_transc(document)
jsn = response.json()
But when tried having data in the file in the s3 and read it from s3 , it throws into error
def create_doc(self,client):
self.n_docs = int(self.n_docs)
document = data_from_s3()
response = client.cr_transc(document)
jsn = response.json()
def data_from_s3(self):
s3 = S3Hook()
data = s3.read_key(bucket_name = self.bucket_name, key = self.data_key)
return data
Below link is for read_key method in airflow
https://airflow.apache.org/docs/apache-airflow/1.10.6/_modules/airflow/hooks/S3_hook.html#S3Hook:~:text=%5Bdocs%5D%20%20%20%20def-,read_key,-(self%2C
Checking the source code:
def read_key(self, key, bucket_name=None):
"""
Reads a key from S3
:param key: S3 key that will point to the file
:type key: str
:param bucket_name: Name of the bucket in which the file is stored
:type bucket_name: str
"""
obj = self.get_key(key, bucket_name)
return obj.get()['Body'].read().decode('utf-8')
This returns a str. You might need to use the json module to convert it:
import json
def create_doc(self,client):
self.n_docs = int(self.n_docs)
document = json.loads(data_from_s3()) # <----- convert here
response = client.cr_transc(document)
jsn = response.json()
def data_from_s3(self):
s3 = S3Hook()
data = s3.read_key(bucket_name = self.bucket_name, key = self.data_key)
return data
I have the exact same request as below:
Convert a column of json strings into columns of data
Following the suggested solutions and getting the following error: What am I doing incorrectly? I'm learning to use python and make API requests.
***df['json'] = df['json'].map(address',': dict(eval(address)))***
^
`**SyntaxError: invalid syntax**
Below is my code
import requests
import json
import pandas as pd
import dictionary as dict
Base_url = 'MY_URL'
TOKEN_EndPoint = Base_url + 'token'
Account_EndPoint = Base_url + 'MY_URL'
data = {
'username': 'MY_USERNAME',
'password': 'MY_PASSWORD',
'grant_type': 'MY_GRANT_TYPE'
}
def main():
results = requests.post(url=TOKEN_EndPoint, data=data)
MyToken = results.json()['access_token']
print(MyToken)
MyInputs = GetSourceAddress()
callData = {
'ClientKey': 'MY_CLIENTKEY',
'StreetName': MyInputs['StreetName'],
'CityName' : MyInputs['CityName'],
# 'StateCode' : MyInputs['StateCode'],
'PostalCode' : MyInputs['PostalCode'],
'ManagerVersion': '2'
}
PostFields = json.dumps(callData)
MyHeader = {'Authorization': 'Bearer ' + MyToken,
'content-type': 'application/json'}
results = requests.post(url = Account_EndPoint, data = PostFields,
headers = MyHeader)
address = results.json()
# results.json() = pd.DataFrame(address)
df = pd.DataFrame(['address'], columns=['json'])
**df['json'] = df['json'].map(address',': dict(eval(address)))**
address = df['json'].apply(pd.Series)
for address in address:
print(address)
def GetSourceAddress():
MyInputs = {
'StreetName': 'MY_STREETNAME',
'CityName': 'MY_CITYNAME',
# 'StateCode': 'MY_STATE',
'PostalCode': 'MY_ZIPCODE',
# 'Unit': 'UNIT #'
}
return MyInputs
def GetAddressFrom****(result):
inputs = {
'StreetName': result['ShippingStreet'],
'CityName': result['BillingCity'],
'StateCode': result['BillingState'],
'PostalCode': result['BillingPostalCode'],
# 'Unit': ''
}
return inputs
main()
Thanks
I am getting the following error when i try to use a json a value of the Parent node using the anytree module, How can I get the json value as a parent?
Here is my code
def Get_Full_Name():
name = names.get_full_name()
return name
def Type_Name():
ListType = ['Room', 'Building', 'Bed']
number = random.sample([int(x) for x in range(1, 3)], 1)
RandomList = ListType[number[0]]
return RandomList
headers = {
'accept': 'text/plain',
'Content-Type': 'application/json-patch+json',
}
name = Get_Full_Name()
ListValue = Type_Name()
params = (
('name', name),
('type', 'BuildingGroup'),
)
data = '["{}"]'.format(name)
response = requests.post('http://SampleApiURL/', headers=headers,
params=params, data=data)
response_body = response.json()
print("Parent: " + response_body)
headers = {
'accept': 'text/plain',
}
response2 = requests.get('http://SAmpleApiUrl/' + response_body, headers=headers)
response_body1 = response2.json()
RootNode = AnyNode(response_body1)
#ChildL2 = AnyNode(response_content="someValue", parent=RootNode)
exporter = JsonExporter(indent=4, sort_keys=True)
print (exporter.export(RootNode))
The error I am getting is
Parent node {'locationId': 'LocationId', 'name': 'Andrew Rice', 'type': 'BuildingGroup', 'patientId': None, 'children': None} is not of type 'NodeMixin'.
How can I use a Json as a parent Node?
I have a custom data file formatted like this:
{
data = {
friends = {
max = 0 0,
min = 0 0,
},
family = {
cars = {
van = "honda",
car = "ford",
bike = "trek",
},
presets = {
location = "italy",
size = 10,
travelers = False,
},
version = 1,
},
},
}
I want to collect the blocks of data, meaning string between each set of {} while maintaining a hierarhcy. This data is not a typical json format so that is not a possible solution.
My idea was to create a class object like so
class Block:
def __init__(self, header, children):
self.header = header
self.children = children
Where i would then loop through the data line by line 'somehow' collecting the necessary data so my resulting output would like something like this...
Block("data = {}", [
Block("friends = {max = 0 0,\n min = 0 0,}", []),
Block("family = {version = 1}", [...])
])
In short I'm looking for help on ways I can serialize this into useful data I can then easily manipulate. So my approach is to break into objects by using the {} as dividers.
If anyone has suggestions on ways to better approach this I'm all up for ideas. Thank you again.
So far I've just implemented the basic snippets of code
class Block:
def __init__(self, content, children):
self.content = content
self.children = children
def GetBlock(strArr=[]):
print len(strArr)
# blocks = []
blockStart = "{"
blockEnd = "}"
with open(filepath, 'r') as file:
data = file.readlines()
blocks = GetBlock(strArr=data)
You can create a to_block function that takes the lines from your file as an iterator and recursively creates a nested dictionary from those. (Of course you could also use a custom Block class, but I don't really see the benefit in doing so.)
def to_block(lines):
block = {}
for line in lines:
if line.strip().endswith(("}", "},")):
break
key, value = map(str.strip, line.split(" = "))
if value.endswith("{"):
value = to_block(lines)
block[key] = value
return block
When calling it, you have to strip the first line, though. Also, evaluating the "leafs" to e.g. numbers or strings is left as an excercise to the reader.
>>> to_block(iter(data.splitlines()[1:]))
{'data': {'family': {'version': '1,',
'cars': {'bike': '"trek",', 'car': '"ford",', 'van': '"honda",'},
'presets': {'travelers': 'False,', 'size': '10,', 'location': '"italy",'}},
'friends': {'max': '0 0,', 'min': '0 0,'}}}
Or when reading from a file:
with open("data.txt") as f:
next(f) # skip first line
res = to_block(f)
Alternatively, you can do some preprocessing to transform that string into a JSON(-ish) string and then use json.loads. However, I would not go all the way here but instead just wrap the values into "" (and replace the original " with ' before that), otherwise there is too much risk to accidentally turning a string with spaces into a list or similar. You can sort those out once you've created the JSON data.
>>> data = data.replace('"', "'")
>>> data = re.sub(r'= (.+),$', r'= "\1",', data, flags=re.M)
>>> data = re.sub(r'^\s*(\w+) = ', r'"\1": ', data, flags=re.M)
>>> data = re.sub(r',$\s*}', r'}', data, flags=re.M)
>>> json.loads(data)
{'data': {'family': {'version': '1',
'presets': {'size': '10', 'travelers': 'False', 'location': "'italy'"},
'cars': {'bike': "'trek'", 'van': "'honda'", 'car': "'ford'"}},
'friends': {'max': '0 0', 'min': '0 0'}}}
You can also do with ast or json with the help of regex substitutions.
import re
a = """{
data = {
friends = {
max = 0 0,
min = 0 0,
},
family = {
cars = {
van = "honda",
car = "ford",
bike = "trek",
},
presets = {
location = "italy",
size = 10,
travelers = False,
},
version = 1,
},
},
}"""
#with ast
a = re.sub("(\w+)\s*=\s*", '"\\1":', a)
a = re.sub(":\s*((?:\d+)(?: \d+)+)", lambda x:':[' + x.group(1).replace(" ", ",") + "]", a)
import ast
print ast.literal_eval(a)
#{'data': {'friends': {'max': [0, 0], 'min': [0, 0]}, 'family': {'cars': {'car': 'ford', 'bike': 'trek', 'van': 'honda'}, 'presets': {'travelers': False, 'location': 'italy', 'size': 10}, 'version': 1}}}
#with json
import json
a = re.sub(",(\s*\})", "\\1", a)
a = a.replace(":True", ":true").replace(":False", ":false").replace(":None", ":null")
print json.loads(a)
#{u'data': {u'friends': {u'max': [0, 0], u'min': [0, 0]}, u'family': {u'cars': {u'car': u'ford', u'bike': u'trek', u'van': u'honda'}, u'presets': {u'travelers': False, u'location': u'italy', u'size': 10}, u'version': 1}}}
I tried to compare DRF response and input value.
class ViewTest(TransactionTestCase):
reset_sequences = True
current_date_time = timezone.now()
def setUp(self):
self.client = APIClient()
self.user = User.objects.create_user('hiren', 'a#b.com', 'password')
self.client.force_authenticate(user=self.user)
self.tag = Tag.objects.create(name="Test tag")
Notes.objects.create(tag=self.tag, content="test content", date=self.current_date_time)
def test_return_correct_note(self):
response = self.client.get('/api/notes/1/')
self.assertEqual(response.json(), {'content': 'test content', 'id': 1,
'tag': 1, 'date': self.current_date_time})
Then I got this error :
AssertionError: {'date': '2016-04-09T07:35:28.039393Z', 'co[37 chars]': 1} != {'tag': 1, 'content': 'test content', 'id':[69 chars]TC>)}
{'content': 'test content',
- 'date': '2016-04-09T07:35:28.039393Z',
+ 'date': datetime.datetime(2016, 4, 9, 7, 35, 28, 39393, tzinfo=<UTC>),
'id': 1,
'tag': 1}
What is the correct way to compare django datetime ?
You could either convert the Python datetime object into a ISO time string, or parse the ISO time string into a python datetime object.
For example
...
'tag': 1, 'date': self.current_date_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ')})
You can make use of the to_representation(...) method of the DateTimeField class as
from django.test import TransactionTestCase
class ViewTest(TransactionTestCase):
reset_sequences = True
current_date_time = timezone.now()
def setUp(self):
self.client = APIClient()
self.user = User.objects.create_user("hiren", "a#b.com", "password")
self.client.force_authenticate(user=self.user)
self.tag = Tag.objects.create(name="Test tag")
Notes.objects.create(
tag=self.tag, content="test content", date=self.current_date_time
)
def test_return_correct_note(self):
from rest_framework.fields import DateTimeField
response = self.client.get("/api/notes/1/")
self.assertEqual(
response.json(),
{
"content": "test content",
"id": 1,
"tag": 1,
"date": DateTimeField().to_representation(self.current_date_time),
},
)