I'm trying to remove the very first character (") from a file which contains a JSON String. I'm using Python for this. Below is my code:
jsonOutput = 'JsonString_{}.{}'.format(str(uuid.uuid1()), "json")
jsonOutput_File = os.path.join(arcpy.env.scratchFolder, jsonOutput)
with open(jsonOutput_File, 'w') as json_file:
json.dump(jsonString, json_file)
// I was able to remove the very last character using the code below
with open(jsonOutput_File, 'r+') as read_json_file:
read_json_file.seek(-1, os.SEEK_END)
read_json_file.truncate()
Basically when I dump the JSON String to a file, the String is getting surrounded by double quotes. I'm trying to remove these double quotes from the first & last position of the file.
If you already have a JSON string, simply write it to the file.
Encoding the JSON string to JSON again using json.dump() is a bad idea and will not be fixed as simple as removing a leading and a trailing quote.
Consider the following minimal and complete example:
import json
import os
import uuid
myobject = {"hello": "world"}
jsonString = json.dumps(myobject)
jsonOutput = 'JsonString_{}.{}'.format(str(uuid.uuid1()), "json")
jsonOutput_File = os.path.join("d:\\", jsonOutput)
with open(jsonOutput_File, 'w') as json_file:
json.dump(jsonString, json_file)
The output is a file with the content:
"{\"hello\": \"world\"}"
Removing the quotes will not make it valid JSON.
Instead, avoid the duplicate JSON creation, either by removing json.dumps() which converts the object to JSON one time, or by removing json.dump(), which does it a second time.
Solution 1:
import json
import os
import uuid
myobject = {"hello": "world"}
# <-- deleted line here
jsonOutput = 'JsonString_{}.{}'.format(str(uuid.uuid1()), "json")
jsonOutput_File = os.path.join("d:\\", jsonOutput)
with open(jsonOutput_File, 'w') as json_file:
json.dump(myobject, json_file) # <-- changed to object here
Solution 2:
import json
import os
import uuid
myobject = {"hello": "world"}
jsonString = json.dumps(myobject)
jsonOutput = 'JsonString_{}.{}'.format(str(uuid.uuid1()), "json")
jsonOutput_File = os.path.join("d:\\", jsonOutput)
with open(jsonOutput_File, 'w') as json_file:
json_file.write(jsonString) # <-- Note this line
Related
How to dump data into Json file
*as can see in the below python code I am trying the dump data in Json file so but I am struggling to do it in python code *
import time
import json
import os
def long_function(name):
cache_path = 'cache.json'
if not os.path.isfile(cache_path):
with open(cache_path, 't') as json_file:
cache_file_data = [name]
jsondump(cache_file_data, json_file)
else:
with open(cache_path, 'r') as json_file:
cache_file_data = json.load(json_file)
if name in cache_file_data:
print("Name already exist")
return name
else:
cache_file_data.append(name)
for e in range(5):
time.sleep(1)
print(e+1)
with open(cache_path, 'w') as json_file:
jsondump(cache_file_data, json_file)
print("New Name added in cache")
return name
print(long_function('nitu'))
so please resolve my problem......please help me
import json
# JSON data:
x = '{ "organization":"New_holn",
"city":"Noida",
"country":"India"}'
# python object to be appended
y = {"pin":117845}
# parsing JSON string:
z = json.loads(x)
# appending the data
z.update(y)
# the result is a JSON string:
print(json.dumps(z))
This is nothing but follow this pattern and your so your code error is ..you are not defined file mode correctly in if condition
with open (cache_path. "t") as json_file:
Instead of
with open (cache_path. "w") as json_file:
And second thing is you are not doing dump data
I have a bunch of json files with multiple lines who look like this:
file1
{"id":1,"name":"Eric","height":1.80, ...},
{"id":2,"name":"Bob","height":1.90, ...}
...
file2
{"id":3,"name":"Jenny","height":1.50, ...},
{"id":4,"name":"Marlene","height":1.60, ...}
...
I want to build a generator to yield each line as a dictionary. My current code:
from typing import Iterator, Dict, Any, Optional
import io
import os
def json_gen2(file_list: list) -> Iterator[Dict[str, Any]]:
import json
for file in file_list:
with open(file) as json_file:
data = []
for line in json_file:
data = json.load(line)
if not data:
break
yield data
datapath = os.path.normcase(os.getcwd()) + '/data/log_data'
file_list = get_files(datapath) # create path list of json files
jsonfile = json_gen2(file_list)
next(jsonfile)
i get the following
Error Message
pls help :)
Oops, I misread. You are doing the same thing I was saying. Your error is due to using 'load' instead of 'loads'. Each line returned by
for line in json_file:
data = json.load(line)
is a string, and you're attempting to read it as a file pointer.
I try to get the output of this request (https://api.opendota.com/api/players/7841909) in a file, line by line.
For some reason the output is stored in byte and not str, which I can change by str().
I tried to use a regualar expression to just store the information between the {} and also tried the csv module, which lead to just store digits.
What did I do wrong? The following version ignores the linebreak and the delimiters. :/
import requests
import csv
import re
dotaId = "7841909" #somit als string gespeichert
pfad = "https://api.opendota.com/api/players/" + dotaId + "/matches"
req = requests.get(pfad)
with open('%s.csv' % dotaId, 'w') as file:
clean_line = re.findall(r'\{(.*?)\}', req.text)
file.write(str(clean_line))
Your object clean_line is a list which you are writing as a one liner into the file.
It is better to use the csv writer module and write the content row by row:
with open('new_file.csv', 'w', newline='') as file:
writer = csv.writer(file, quotechar="'")
clean_lines = re.findall(r'\{(.*?)\}', req.text)
for line in clean_lines:
writer.writerow([str(line)])
I am using the following function to get json from the flickr API. The string it returns is a properly formatted chunk of JSON:
def get_photo_data(photo_id):
para = {}
para["photo_id"] = photo_id
para["method"] = "flickr.photos.getInfo"
para["format"] = "json"
para["api_key"] = FLICKR_KEY
request_data = params_unique_combination("https://api.flickr.com/services/rest/", para)
if request_data in CACHE_DICTION:
return CACHE_DICTION[request_data]
else:
response = requests.get("https://api.flickr.com/services/rest/", para)
CACHE_DICTION[request_data] = response.text[14:-1]
cache_file = open(CACHE_FNAME, 'w')
cache_file.write(json.dumps(CACHE_DICTION))
cache_file.close()
return response.text[14:-1]
The issue I am having is that when I go to write the json to my cache file it keeps adding in backslashes, like this example:
"https://api.flickr.com/services/rest/format-json_method-flickr.photos.getInfo_photo_id-34869402493": "{\"photo\":{\"id\":\"34869402493\",\"secret\":\"56fcf0342c\",\"server\":\"4057\",\"farm\":5,\"dateuploaded\":\"1499030213\",\"isfavorite\":0,\"license\":\"0\",\"safety_level\":\"0\",\"rotation\":0,\"originalsecret\":\"c4d1d316ed\",\"originalformat\":\"jpg\",\"owner\":{\"nsid\":\"150544082#N05\",\"username\":\"ankitrana_\",\"realname\":\"Ankit Rana\",\"location\":\"Cincinnati, USA\",\"iconserver\":\"4236\",\"iconfarm\":5,\"path_alias\":\"ankitrana_\"},\"title\":{\"_content\":\"7\"},\"description\":{\"_content\":\"\"},\"visibility\":{\"ispublic\":1,\"isfriend\":0,\"isfamily\":0},\"dates\":{\"posted\":\"1499030213\",\"taken\":\"2017-06-19 13:43:38\",\"takengranularity\":\"0\",\"takenunknown\":\"0\",\"lastupdate\":\"1499041020\"},\"views\":\"41\",\"editability\":{\"cancomment\":0,\"canaddmeta\":0},\"publiceditability\":{\"cancomment\":1,\"canaddmeta\":0},\"usage\":{\"candownload\":1,\"canblog\":0,\"canprint\":0,\"canshare\":1},\"comments\":{\"_content\":\"0\"},\"notes\":{\"note\":[]},\"people\":{\"haspeople\":0},\"tags\":{\"tag\":[{\"id\":\"150538742-34869402493-5630\",\"author\":\"150544082#N05\",\"authorname\":\"ankitrana_\",\"raw\":\"cincinnati\",\"_content\":\"cincinnati\",\"machine_tag\":0},{\"id\":\"150538742-34869402493-226\",\"author\":\"150544082#N05\",\"authorname\":\"ankitrana_\",\"raw\":\"ohio\",\"_content\":\"ohio\",\"machine_tag\":false},
... etc., etc.}
How can I store the JSON to the existing file without these additional \ characters, as it is represented when I print the string?
use your_string.decode('string_escape') to unescape \" to "
update:
your string escaped because json.dumps(), it convert object to string and later you can read it using json.loads(), the result are unescaped.
you can save it without slash using str()
cache_file.write(str(CACHE_DICTION))
# {'myparam' :'"162000","photo":...'
but the problem it save to file with single quotes, it not valid json and not compatible with json.loads()
my suggestion keep your code as above, except you want to store it to file CACHE_FNAME.json
cache_file = open(CACHE_FNAME, 'w')
cache_file.write(response.text)
cache_file.close()
# {"photos":{"page":1,"pages":6478,..}
You could try replacing the "\" with the str.replace function in python
Add the code after the following line
cache_file = open(CACHE_FNAME, 'w')
json_item = str(json.dumps(CACHE_DICTION))
json_item.replace("\", "")
and change this line
cache_file.write(json.dumps(CACHE_DICTION))
to
cache_file.write(json_item)
let me know if this works for you
just replace \ with a whitespace.
I did the same thing while i was working with JSON.
json_new = json.replace('\\', '')
I have some simple code to ingest some JSON Twitter data, and output some specific fields into separate columns of a CSV file. My problem is that I cannot for the life of me figure out the proper way to encode the output as UTF-8. Below is the closest I've been able to get, with the help of a member here, but I still it still isn't running correctly and fails because of the unique characters in the tweet text field.
import json
import sys
import csv
import codecs
def main():
writer = csv.writer(codecs.getwriter("utf-8")(sys.stdout), delimiter="\t")
for line in sys.stdin:
line = line.strip()
data = []
try:
data.append(json.loads(line))
except ValueError as detail:
continue
for tweet in data:
## deletes any rate limited data
if tweet.has_key('limit'):
pass
else:
writer.writerow([
tweet['id_str'],
tweet['user']['screen_name'],
tweet['text']
])
if __name__ == '__main__':
main()
From Docs:
https://docs.python.org/2/howto/unicode.html
a = "string"
encodedstring = a.encode('utf-8')
If that does not work:
Python DictWriter writing UTF-8 encoded CSV files
I have had the same problem. I have a large amount of data from twitter firehose so every possible complication case (and has arisen)!
I've solved it as follows using try / except:
if the dict value is a string: if isinstance(value,basestring) I try to encode it straight away. If not a string, I make it a string and then encode it.
If this fails, it's because some joker is tweeting odd symbols to mess up my script. If that is the case, firstly I decode then re-encode value.decode('utf-8').encode('utf-8') for strings and decode, make into a string and re-encode for non-strings value.decode('utf-8').encode('utf-8')
Have a go with this:
import csv
def export_to_csv(list_of_tweet_dicts,export_name="flat_twitter_output.csv"):
utf8_flat_tweets=[]
keys = []
for tweet in list_of_tweet_dicts:
tmp_tweet = tweet
for key,value in tweet.iteritems():
if key not in keys: keys.append(key)
# convert fields to utf-8 if text
try:
if isinstance(value,basestring):
tmp_tweet[key] = value.encode('utf-8')
else:
tmp_tweet[key] = str(value).encode('utf-8')
except:
if isinstance(value,basestring):
tmp_tweet[key] = value.decode('utf-8').encode('utf-8')
else:
tmp_tweet[key] = str(value.decode('utf-8')).encode('utf-8')
utf8_flat_tweets.append(tmp_tweet)
del tmp_tweet
list_of_tweet_dicts = utf8_flat_tweets
del utf8_flat_tweets
with open(export_name, 'w') as f:
dict_writer = csv.DictWriter(f, fieldnames=keys,quoting=csv.QUOTE_ALL)
dict_writer.writeheader()
dict_writer.writerows(list_of_tweet_dicts)
print "exported tweets to '"+export_name+"'"
return list_of_tweet_dicts
hope that helps you.