Delete info from a JSON using Python - python

I am currently exporting a database from firebase into a JSON and want to upload this to Bigquery. However, some of the fieldnames in the database have nested information and Bigquery does not accept it this way. How can I delete 'Peripherals' from every dataset that it is present in in my JSON. It is not present in every dataset though. I provided an example of what the JSON code looks like below. Thanks for the help!
{"AppName": "DataWorks", "foundedPeripheralCount": 1, "version": "1.6.1(8056)", "deviceType": "iPhone 6", "createdAt": "2017-04-05T07:05:30.408Z", "updatedAt": "2017-04-05T07:08:49.569Z", "Peripherals": {"1CA726ED-32B1-43B4-9071-B58BBACE20A8": "Arduino"}, "connectedPeripheralCount": 1, "iOSVersion": "10.2.1"}
{"objectId": "20H5Hg2INB", "foundedPeripheralCount": 0, "DeviceVendorID": "5B7F085E-B3B6-4270-97DC-F42903CDEAC1", "version": "1.3.5(5801)", "deviceType": "iPhone 6", "createdAt": "2015-11-10T06:16:45.459Z", "updatedAt": "2015-11-10T06:16:45.459Z", "connectedPeripheralCount": 0, "iOSVersion": "9.1"}
{"AppName": "DataWorks", "foundedPeripheralCount": 2, "version": "1.6.2(8069)", "deviceType": "iPhone 6s", "createdAt": "2017-04-12T10:05:05.937Z", "updatedAt": "2017-07-06T07:33:02.006Z", "Peripherals": {"060EBAFD-3120-4AAD-8B0A-EC14A323FA25": "28902 ", "identifierInternalSensors": "Internal Sensors", "0521A273-FAA5-462E-B9EC-FBB3D60F5E99": "28895 "}, "connectedPeripheralCount": 8, "iOSVersion": "10.2.1"}
I have tried this
import json
with open('firetobq_peripheral.json') as out_file:
out = json.load(out_file)
for element in out:
del element['Peripherals']
print(out)
but I receive this error
Traceback (most recent call last):
File "editjson.py", line 3, in <module>
out = json.load(out_file)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.py", line 290, in load
**kw)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/__init__.py", line 338, in loads
return _default_decoder.decode(s)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/json/decoder.py", line 369, in decode
raise ValueError(errmsg("Extra data", s, end, len(s)))
ValueError: Extra data: line 2 column 1 - line 629 column 1 (char 311 - 203056)

It looks like the data in 'firetobq_peripheral.json' is not valid json. If each row is on a new line you can use this code:
with open('firetobq_peripheral.json', 'r') as in_file:
dicts = []
for line in in_file.readlines() :
d = json.loads(line.strip())
if d.get('Peripherals'):
del d['Peripherals']
dicts += [d]
with open('firetobq_peripheral.json', 'w') as out_file:
out_file.write('[\n')
for i,v in enumerate(dicts):
out_file.write(json.dumps(v)+('\n' if i == len(dicts)-1 else ',\n'))
out_file.write(']')
Use this code for properly formatted json data:
with open('firetobq_peripheral.json', 'r') as in_file:
dicts = json.load(in_file)
for d in dicts:
if d.get('Peripherals'):
del d['Peripherals']
with open('firetobq_peripheral.json', 'w') as out_file:
out_file.write(json.dumps(dicts, indent=2))

Related

Why does this json file cause an 'expecting value' error?

I'm trying to load a file as JSON.
Here's the data file:
{"Title":"Assignment Checker",
"Q1_Solution":10517,
"Q2_Solution":12,
"Q3_Solution":52,
"Q4_Solution":84,
"Q5_Solution":50,
"Q6_Solution":1971,
"Q7_Solution":("Hip", "Flat", "Gambrel", "Mansard", "Shed", "Gable")}
Here's the code that fails:
f = open("checkerData.json")
checkerData = json.load(f)
f.close()
I get this error:
JSONDecodeError: Expecting value: line 8 column 16 (char 166)
Parenthesis are not valid in JSON to represent a list (array).
Fix the JSON to use [ and ] instead.
This is valid:
{
"Title": "Assignment Checker",
"Q1_Solution": 10517,
"Q2_Solution": 12,
"Q3_Solution": 52,
"Q4_Solution": 84,
"Q5_Solution": 50,
"Q6_Solution": 1971,
"Q7_Solution": [
"Hip",
"Flat",
"Gambrel",
"Mansard",
"Shed",
"Gable"
]
}

Python organising data From TXT and writing to csv

I have a data dump in this format:
[
{
"vaultId": "429d60edc11df0a576cd9173e8c0d0de8792538862db0122848b87a96ecdf537",
"loanSchemeId": "MIN150",
"ownerAddress": "df1q7crh6d3dscj3sajpklehpnwdwhvs0l0jm9fr7s",
"state": "inLiquidation",
"liquidationHeight": 1488540,
"batchCount": 2,
"liquidationPenalty": 5,
"batches": [
{
"index": 0,
"collaterals": [
"282.34948388#DFI"
],
"loan": "6.38045570#BABA"
},
{
"index": 1,
"collaterals": [
"300.16175126#DFI"
],
"loan": "0.26745972#GOOGL"
}
]
},
]
which I have written to a txt, I am trying to use Pandas to make a dataframe and then write to csv using this code however I am reciving this errorValueError: Length mismatch: Expected axis has 1729 elements, new values have 5 elements. This is the code I have used to form the dataframe many thanks in advance.
import pandas as pd
import simplejson as json
df = pd.read_json('auctions.txt')
df[['index', 'collaterals', 'loan']] = df.batches.apply(pd.Series)
Edit:
This is my error with changed code
Traceback (most recent call last):
File "c:\Users\iones\Documents\DeFI Chain Auction Bot\start.py", line 3, in <module>
df = pd.read_json('auctions.json')
File "C:\Users\iones\Documents\DeFI Chain Auction Bot\.venv\lib\site-packages\pandas\util\_decorators.py", line 207, in wrapper
return func(*args, **kwargs)
File "C:\Users\iones\Documents\DeFI Chain Auction Bot\.venv\lib\site-packages\pandas\util\_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "C:\Users\iones\Documents\DeFI Chain Auction Bot\.venv\lib\site-packages\pandas\io\json\_json.py", line 614, in read_json
return json_reader.read()
File "C:\Users\iones\Documents\DeFI Chain Auction Bot\.venv\lib\site-packages\pandas\io\json\_json.py", line 748, in read
obj = self._get_object_parser(self.data)
File "C:\Users\iones\Documents\DeFI Chain Auction Bot\.venv\lib\site-packages\pandas\io\json\_json.py", line 770, in _get_object_parser
obj = FrameParser(json, **kwargs).parse()
File "C:\Users\iones\Documents\DeFI Chain Auction Bot\.venv\lib\site-packages\pandas\io\json\_json.py", line 885, in parse
self._parse_no_numpy()
File "C:\Users\iones\Documents\DeFI Chain Auction Bot\.venv\lib\site-packages\pandas\io\json\_json.py", line 1140, in _parse_no_numpy
loads(json, precise_float=self.precise_float), dtype=None
ValueError: Expected object or value
Load the JSON using read_json and then convert the batch dictionary into columns
import pandas as pd
df = pd.read_json('data.json')
df[['index', 'collaterals', 'loan']] = df.batches.apply(pd.Series)
If you want the collaterals in separate rows
df = df.explode('collaterals')
Output
print(df[['vaultId', 'liquidationHeight', 'index', 'collaterals', 'loan']])
vaultId liquidationHeight index collaterals loan
0 6af21886adcb92c4669a8a901975eb9b9d5544c67e4292... 1489770 0 2326.00000000#DFI 2.24978028#GOOGL
1 6af21886adcb92c4669a8a901975eb9b9d5544c67e4292... 1489770 1 2326.00000000#DFI 2.24978028#GOOGL
EDIT:
According to what you have said, it looks like the data is corrupt i.e. in an invalid JSON format. Please correct it at source. I have used the below data sample.
{
"vaultId": "6af21886adcb92c4669a8a901975eb9b9d5544c67e429267841491649810958a",
"ownerAddress": "df1qhh9ek2d98mxjeh58xdsfj7ad2k7q4d4kwshsxr",
"liquidationHeight": 1489770,
"batchCount": 2,
"batches": [
{
"index": 0,
"collaterals": [
"2326.00000000#DFI"
],
"loan": "2.24978028#GOOGL"
},
{
"index": 1,
"collaterals": [
"2326.00000000#DFI"
],
"loan": "2.24978028#GOOGL"
}
]
}
Fixed by using this
def jsontocsv():
df = pd.read_json('auctions.json')
df = df.explode('batches')
df[['index', 'collaterals', 'loan']] = df.batches.apply(pd.Series)
print(df[['vaultId', 'liquidationHeight', 'index', 'collaterals','loan']])
df.to_csv('auctions.csv')
`

/model/train http API giving 500 error when providing “nlu” data in json

I am trying to train model using httpapi and json data blow is the code.
import requests
import json
data = {
"config": "language: en\npipeline:\n- name: WhitespaceTokenizer\n- name: RegexFeaturizer\n- name: LexicalSyntacticFeaturizer\n- name: CountVectorsFeaturizer\n- name: CountVectorsFeaturizer\nanalyzer: \"char_wb\"\nmin_ngram: 1\nmax_ngram: 4\n- name: DIETClassifier\nepochs: 100\n- name: EntitySynonymMapper\n- name: ResponseSelector\nepochs: 100",
"nlu": json.dumps({
"rasa_nlu_data": {
"regex_features": [],
"entity_synonyms": [],
"common_examples": [
{
"text": "i m looking for a place to eat",
"intent": "restaurant_search",
"entities": []
},
{
"text": "I want to grab lunch",
"intent": "restaurant_search",
"entities": []
},
{
"text": "I am searching for a dinner spot",
"intent": "restaurant_search",
"entities": []
},
]
}
}),
"force": False,
"save_to_default_model_directory": True
}
r = requests.post('http://localhost:5005/model/train', json=data)
It gives me 500 error. Below is the log for error:
2020-09-30 07:40:37,511 [DEBUG] Traceback (most recent call last):
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/rasa/server.py", line 810, in train
None, functools.partial(train_model, **info)
File "/usr/lib/python3.6/concurrent/futures/thread.py", line 56, in run
result = self.fn(*self.args, **self.kwargs)
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/rasa/train.py", line 50, in train
additional_arguments=additional_arguments,
File "uvloop/loop.pyx", line 1456, in uvloop.loop.Loop.run_until_complete
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/rasa/train.py", line 83, in train_async
config, domain, training_files
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/rasa/importers/importer.py", line 79, in load_from_config
config = io_utils.read_config_file(config_path)
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/rasa/utils/io.py", line 188, in read_config_file
content = read_yaml(read_file(filename))
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/rasa/utils/io.py", line 124, in read_yaml
return yaml_parser.load(content) or {}
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/ruamel/yaml/main.py", line 343, in load
return constructor.get_single_data()
File "/home/Documents/practice/rasa/test1/venv/lib/python3.6/site-packages/ruamel/yaml/constructor.py", line 111, in get_single_data
node = self.composer.get_single_node()
File "_ruamel_yaml.pyx", line 706, in _ruamel_yaml.CParser.get_single_node
File "_ruamel_yaml.pyx", line 724, in _ruamel_yaml.CParser._compose_document
File "_ruamel_yaml.pyx", line 775, in _ruamel_yaml.CParser._compose_node
File "_ruamel_yaml.pyx", line 891, in _ruamel_yaml.CParser._compose_mapping_node
File "_ruamel_yaml.pyx", line 904, in _ruamel_yaml.CParser._parse_next_event
ruamel.yaml.parser.ParserError: while parsing a block mapping
in "<unicode string>", line 1, column 1
did not find expected key
in "<unicode string>", line 11, column 1
When I train model using terminal commands and json file, it is trained successfully. I think I am missing some formatting required for /model/train api. Can someone tell me where am I going wrong?
I am using rasa version 1.10.14.
Thankyou in advance.
Turns out that the string in config was not proper. It was giving error when training model due to double quotes used with escape characters. I made some tweaks in the config and it trained the model successfully

Python: Unable to convert JSON file to CSV [duplicate]

This question already has an answer here:
Python TypeError: expected string or buffer
(1 answer)
Closed 5 years ago.
I have the code below which should convert a JSON file to a CSV file
import json
import csv
infractions = open("C:\\Users\\Alan\\Downloads\\open.json","r")
infractions_parsed = json.loads(infractions)
infractions_data = infractions_parsed['infractions']
# open a file for writing
csv_data = open('Data.csv', 'w')
# create the csv writer object
csvwriter = csv.writer(csv_data)
count = 0
for inf in infractions_data:
if count == 0:
header = inf.keys()
csvwriter.writerow(header)
count += 1
csvwriter.writerow(inf.values())
employ_data.close()
However, I get this error. Any reason why this should be?
C:\Users\Alan\Desktop>python monkeytennis.py
Traceback (most recent call last):
File "monkeytennis.py", line 5, in <module>
infractions_parsed = json.loads(infractions)
File "C:\Python27\lib\json\__init__.py", line 339, in loads
return _default_decoder.decode(s)
File "C:\Python27\lib\json\decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
TypeError: expected string or buffer
JSON is in format:
{
"count": 666,
"query": "righthere",
"infractions": [{
"status": "open",
"severity": 2.0,
"title": "Blah blah blah",
"coals": [1, 1],
"date": "2017-04-22T23:10:07",
"name": "Joe Bloggs"
},...
infractions is a file object, which can't be passed directly to json.loads(). Either read it first:
infractions_parsed = json.loads(infractions.read())
or use json.load (without the 's') which does expect a buffer.
infractions_parsed = json.load(infractions)

Cannot read the dumped JSON file back Python

I am trying to load a JSON file of mine which I have created from copying content from another JSON file. But I keep on getting the Error ValueError: Expecting property name: line 1 column 1 (char 1) when I try to read JSON data from the file in which I copied all the data, My JSON data is of the Format
{
"server": {
"ipaddress": "IP_Sample",
"name": "name_Sample",
"type": "type_Sample",
"label": "label_Sample",
"keyword": "kwd_Sample",
"uid": "uid_Sample",
"start_time": "start_Sample",
"stop_time": "stop_Sample"
}
}
And my load and write methods are
def load(self, filename):
inputfile = open(filename,'r')
self.data = json.loads(inputfile.read())
print (self.data)
inputfile.close()
return
def write(self, filename):
file = open(filename, "w")
tempObject = self.data
print type(tempObject)
#json.dump(filename, self.data)
print self.data["server"]
print >> file, self.data
file.close()
return
I cannot figure out where I am going wrong, can anybody help me with that..
To save and load JSON to and from a file, use an open file object. Your code indicates you tried to save the filename to self.data, which is not a fileobject...
The following code works:
def write(self, filename):
with open(filename, 'w') as output:
json.dump(self.data, output)
def load(self, filename):
with open(filename, 'r') as input:
self.data = json.load(input)
I use the open files as context managers, to ensure they are closed when done reading or writing.
Your other attempt, print >> file, self.data, simply prints the python representation to the file, not JSON:
>>> print example
{u'server': {u'uid': u'uid_Sample', u'keyword': u'kwd_Sample', u'ipaddress': u'IP_Sample', u'start_time': u'start_Sample', u'label': u'label_Sample', u'stop_time': u'stop_Sample', u'type': u'type_Sample', u'name': u'name_Sample'}}
which, when read back from the file would give the error message you indicated:
>>> json.loads("{u'server': {u'uid': u'uid_Sample', u'keyword': u'kwd_Sample', u'ipaddress': u'IP_Sample', u'start_time': u'start_Sample', u'label': u'label_Sample', u'stop_time': u'stop_Sample', u'type': u'type_Sample', u'name': u'name_Sample'}}")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/__init__.py", line 307, in loads
return _default_decoder.decode(s)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/decoder.py", line 319, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/decoder.py", line 336, in raw_decode
obj, end = self._scanner.iterscan(s, **kw).next()
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/scanner.py", line 55, in iterscan
rval, next_pos = action(m, context)
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/json/decoder.py", line 171, in JSONObject
raise ValueError(errmsg("Expecting property name", s, end))
ValueError: Expecting property name: line 1 column 1 (char 1)
You'd have to print the json.dumps() output instead:
>>> print json.dumps(example)
'{"server": {"uid": "uid_Sample", "keyword": "kwd_Sample", "ipaddress": "IP_Sample", "start_time": "start_Sample", "label": "label_Sample", "stop_time": "stop_Sample", "type": "type_Sample", "name": "name_Sample"}}'

Categories

Resources