Add JSON to a file - python

I have this python code:
import json
def write_json(new_data, filename='test.json'):
with open(filename,'r+') as file:
file_data = json.load(file)
file_data.append(new_data)
file.seek(0)
json.dump(file_data, file, indent = 4)
e = {
"1": [
{
"id": "df8ec0tdrhseragerse4-a3e0-8aa2da5119d3",
"name": "Deezomiro"
}
]
}
write_json(e)
Which should add this json to the end of the current test.json file. However, when I run it I get this error.
Traceback (most recent call last):
File "C:\Users\ArtyF\AppData\Roaming\Microsoft\Windows\Network Shortcuts\HWMonitor\copenheimer\test.py", line 16, in <module>
write_json(e)
File "C:\Users\ArtyF\AppData\Roaming\Microsoft\Windows\Network Shortcuts\HWMonitor\copenheimer\test.py", line 5, in write_json
file_data.append(new_data)
AttributeError: 'dict' object has no attribute 'append'
How can I make this code add this json to the end of the file?

As the error says, dictionaries don't have append. They do have update.
file_data.update(new_data)
# TODO: dump file_data back to file

Related

JSON Encoding Error While Loading String from a File

After opening and before loading a json file on phython, the code end up getting a string filled with unicode blocks between every character. Its seems to be a encoding problem, any easy way to solve this problem?
import json
import io
# read file
with open('BOVA111618484700 (1).json', 'r',encoding="ASCII") as myfile:
data=myfile.read()
print(data)
# parse file
obj = json.loads(data)
print(data) shows:
[�
�{�
�"�d�a�t�a�h�o�r�a�"�:� �"�2�0�2�1�.�0�4�.�1�5� �1�1�:�0�5�:�0�0�"�,�
�"�m�i�l�i�s�e�c�o�n�d�s�"�:� �"�1�6�1�8�4�8�4�7�0�0�2�3�4�"�,�
�"�b�i�d�"�:� �"�1�1�6�.�3�2�"�,�
�"�a�s�k�"�:� �"�1�1�6�.�3�6�"�,�
�"�l�a�s�t�"�:� �"�1�1�6�.�3�2�"�,�
�"�v�o�l�u�m�e�"�:� �"�1�"�,�
�"�f�l�a�g�s�"�:� �"�2�"�
�}�,� #json string continues...
when it should show:
[
{
"datahora": "2021.04.15 11:05:00",
"miliseconds": "1618484700234",
"bid": "116.32",
"ask": "116.36",
"last": "116.32",
"volume": "1",
"flags": "2"
}, #json string continues...
After the print, the json.load function returns this error:
JSONDecodeError: Expecting value: line 1 column 2 (char 1)
Thanks #Grismar and #tevemadar the encode of the file was actually "UTF-16 LE" assigning this to the open function solve everything!
import json
import io
# read file
with open('BOVA111618484700 (1).json', 'r',encoding="UTF-16 LE") as myfile:
data=myfile.read()
print(data)
# parse file
obj = json.loads(data)

How to properly write a python function that reads keys from a separate json file

I am working on a python script that will read json and look for a specific key. So far, I can get it to work if I include my json values as a variable. However, my end goal is to read the json values from a file. Here is the code that I've been working on:
import jwt
import os
jwks = {
"keys": [
{
"kty": "RSA",
"use": "sig",
"kid": "123456ABCDEF",
"x5t": "123456ABCDEF",
"n": "qa9f6h6h52XbX0iAgxKgEDlRpbJw",
"e": "AQAB",
"x5c": [
"43aw7PQjxt4/MpfNMS2BfZ5F8GVSVG7qNb352cLLeJg5rc398Z"
]
},
{
"kty": "RSA",
"use": "sig",
"kid": "987654ghijklmnoP",
"x5t": "987654ghijklmnoP",
"n": "qa9f6h6h52XbX0iAgxKgEDlRpbJw",
"e": "AQAB",
"x5c": [
"1234R46Qjxt4/MpfNMS2BfZ5F8GVSVG7qNb352cLLeJg5rc398Z"
]
}
]
}
class InvalidAuthorizationToken(Exception):
def __init__(self, details):
super().__init__('Invalid authorization token: ' + details)
def get_jwk(kid):
for jwk in jwks.get('keys'):
if jwk.get('kid') == kid:
print ('This is jwk:', jwk)
return jwk
raise InvalidAuthorizationToken('kid not recognized')
#Execute
get_jwk('123456ABCDEF')
get_jwk('987654ghijklmnoP')
Here, what I am trying to do is replace those same values and store them in a separate file (jwks-keys) and read it in as a variable. However, I'm getting the following error and don't understand what i've done. How do I properly construct this function?
Here's the trace:
Traceback (most recent call last):
File "printjwks2.py", line 59, in <module>
get_jwk('123456ABCDEF')
File "printjwks2.py", line 51, in get_jwk
for jwk in jwks.get('keys'):
AttributeError: 'str' object has no attribute 'get'
Here's the function:
def get_jwk(kid):
with open('testkeys/jwks-keys', 'r') as az:
jwks = az.read()
for jwk in jwks.get('keys'):
if jwk.get('kid') == kid:
print (jwk)
return jwk
raise InvalidAuthorizationToken('kid not recognized')
with open('testkeys/jwks-keys', 'r') as az:
jwks = az.read()
Here jwks is just a string object because az.read() returns it as a string . So you have to Deserialize the JSON data into python objects before applying jwks.get('keys').
>>> sample_json = '{"json_key": "json_value"}'
>>> sample_json.get("json_key")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'str' object has no attribute 'get'
>>> import json
>>> json.loads(sample_json).get("json_key")
'json_value'
Since you are reading JSON from a file, you can use json.load to deserialize fp (a .read()-supporting text file or binary file containing a JSON document) to a Python object
>>> import json
>>> with open('testkeys/jwks-keys', 'r') as az:
... jwks = json.load(az)

UnicodeDecodeError for md5 id bulk importing data into elasticsearch

I have written a simple python script to import data into elasticsearch using bulk API.
# -*- encoding: utf-8 -*-
import csv
import datetime
import hashlib
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from dateutil.relativedelta import relativedelta
ORIGINAL_FORMAT = '%y-%m-%d %H:%M:%S'
INDEX_PREFIX = 'my-log'
INDEX_DATE_FORMAT = '%Y-%m-%d'
FILE_ADDR = '/media/zeinab/ZiZi/Elastic/python/elastic-test/elasticsearch-import-data/sample_data/sample.csv'
def set_data(input_file):
with open(input_file) as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
sendtime = datetime.datetime.strptime(row['sendTime'].split('.')[0], ORIGINAL_FORMAT)
yield {
"_index": '{0}-{1}_{2}'.format(
INDEX_PREFIX,
sendtime.replace(day=1).strftime(INDEX_DATE_FORMAT),
(sendtime.replace(day=1) + relativedelta(months=1)).strftime(INDEX_DATE_FORMAT)),
"_type": 'data',
'_id': hashlib.md5("{0}{1}{2}{3}{4}".format(sendtime, row['IMSI'], row['MSISDN'], int(row['ruleRef']), int(row['sponsorRef']))).digest(),
"_source": {
'body': {
'status': int(row['status']),
'sendTime': sendtime
}
}
}
if __name__ == "__main__":
es = Elasticsearch(['http://{0}:{1}'.format('my.host.ip.addr', 9200)])
es.indices.delete(index='*')
success, _ = bulk(es, set_data(FILE_ADDR))
This comment helped me on writing/using set_data method.
Unfortunately I get this exception:
/usr/bin/python2.7 /media/zeinab/ZiZi/Elastic/python/elastic-test/elasticsearch-import-data/import_bulk_data.py
Traceback (most recent call last):
File "/media/zeinab/ZiZi/Elastic/python/elastic-test/elasticsearch-import-data/import_bulk_data.py", line 59, in <module>
success, _ = bulk(es, set_data(source_file))
File "/usr/local/lib/python2.7/dist-packages/elasticsearch/helpers/__init__.py", line 257, in bulk
for ok, item in streaming_bulk(client, actions, **kwargs):
File "/usr/local/lib/python2.7/dist-packages/elasticsearch/helpers/__init__.py", line 180, in streaming_bulk
client.transport.serializer):
File "/usr/local/lib/python2.7/dist-packages/elasticsearch/helpers/__init__.py", line 60, in _chunk_actions
action = serializer.dumps(action)
File "/usr/local/lib/python2.7/dist-packages/elasticsearch/serializer.py", line 50, in dumps
raise SerializationError(data, e)
elasticsearch.exceptions.SerializationError: ({u'index': {u'_type': 'data', u'_id': '8\x1dI\xa2\xe9\xa2H-\xa6\x0f\xbd=\xa7CY\xa3', u'_index': 'my-log-2017-04-01_2017-05-01'}}, UnicodeDecodeError('utf8', '8\x1dI\xa2\xe9\xa2H-\xa6\x0f\xbd=\xa7CY\xa3', 3, 4, 'invalid start byte'))
Process finished with exit code 1
I can insert this data into elasticsearch successfully using index API:
es.index(index='{0}-{1}_{2}'.format(
INDEX_PREFIX,
sendtime.replace(day=1).strftime(INDEX_DATE_FORMAT),
(sendtime.replace(day=1) + relativedelta(months=1)).strftime(INDEX_DATE_FORMAT)
),
doc_type='data',
id=hashlib.md5("{0}{1}{2}{3}{4}".format(sendtime, row['IMSI'], row['MSISDN'], int(row['ruleRef']), int(row['sponsorRef']))).digest(),
body={
'status': int(row['status']),
'sendTime': sendtime
}
)
But the issue with index API is that it's very slow; it needs about 2 seconds to import just 50 records. I hoped bulk API would help me on the speed.
According to the hashlib documentation, the digest method will
Return the digest of the data passed to the update() method so far. This is a bytes object of size digest_size which may contain bytes in the whole range from 0 to 255.
So the resulting bytes may not decodeable to unicode.
>>> id_ = hashlib.md5('abc'.encode('utf-8')).digest()
>>> id_
b'\x90\x01P\x98<\xd2O\xb0\xd6\x96?}(\xe1\x7fr'
>>> id_.decode('utf-8')
Traceback (most recent call last):
File "<console>", line 1, in <module>
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x90 in position 0: invalid start byte
The hexdigest method will produce a string as output; from the docs:
Like digest() except the digest is returned as a string object of double length, containing only hexadecimal digits. This may be used to exchange the value safely in email or other non-binary environments.
>>> id_ = hashlib.md5('abc'.encode('utf-8')).hexdigest()
>>> id_
'900150983cd24fb0d6963f7d28e17f72'

Python: Unable to convert JSON file to CSV [duplicate]

This question already has an answer here:
Python TypeError: expected string or buffer
(1 answer)
Closed 5 years ago.
I have the code below which should convert a JSON file to a CSV file
import json
import csv
infractions = open("C:\\Users\\Alan\\Downloads\\open.json","r")
infractions_parsed = json.loads(infractions)
infractions_data = infractions_parsed['infractions']
# open a file for writing
csv_data = open('Data.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(csv_data)
count = 0
for inf in infractions_data:
if count == 0:
header = inf.keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(inf.values())
employ_data.close()
However, I get this error. Any reason why this should be?
C:\Users\Alan\Desktop>python monkeytennis.py
Traceback (most recent call last):
File "monkeytennis.py", line 5, in <module>
infractions_parsed = json.loads(infractions)
File "C:\Python27\lib\json\__init__.py", line 339, in loads
return _default_decoder.decode(s)
File "C:\Python27\lib\json\decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
TypeError: expected string or buffer
JSON is in format:
{
"count": 666,
"query": "righthere",
"infractions": [{
"status": "open",
"severity": 2.0,
"title": "Blah blah blah",
"coals": [1, 1],
"date": "2017-04-22T23:10:07",
"name": "Joe Bloggs"
},...
infractions is a file object, which can't be passed directly to json.loads(). Either read it first:
infractions_parsed = json.loads(infractions.read())
or use json.load (without the 's') which does expect a buffer.
infractions_parsed = json.load(infractions)

Error getting json using oauthlib python

Im trying to get a pretty json but always have the same error expected string or buffer my code is below
import urllib2
import json
import logging
from libs.requests_oauthlib import OAuth1Session
import libs.requests2
coke = OAuth1Session('user_key',
client_secret='user_secret',
resource_owner_key='key',
resource_owner_secret='key_secret')
headers = {'content-type': 'application/json'}
url = "http://test.hdp.pengostores.mx/api/rest/orders"
response = coke.get(url,headers=headers)
self.response.out.write(response.text)
My log:
<type 'exceptions.Exception'> (/base/data/home/apps/s~precise-line-76299minutos/devvic.398776853921596377/delivery/handlers.py:5278)
Traceback (most recent call last):
File "/base/data/home/apps/s~precise-line-76299minutos/devvic.398776853921596377/delivery/handlers.py", line 5274, in get
response = json.loads(coke.get(url,headers=headers))
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/json/__init__.py", line 338, in loads
return _default_decoder.decode(s)
File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/json/decoder.py", line 365, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
TypeError: expected string or buffer
In error message you use
json.loads( coke.get(...) )
but get() returns response object - you have to use get().text or get().content (instead of get()) to get text or bytes.
Or try
data = coke.get(...).json()
to get directly JSON converted to Python data.
EDIT: to pretty print Python data use pprint.pprint(data) or convert Python data to JSON string using json.dumps()
See example from doc: https://docs.python.org/2/library/json.html
Pretty printing:
>>> import json
>>> print json.dumps({'4': 5, '6': 7}, sort_keys=True,
... indent=4, separators=(',', ': '))
{
"4": 5,
"6": 7
}

Categories

Resources