list.append() is replacing every variable to new one - python

I have loop in which I edit a json object and append it to a list. But outside the loop, the value of all old elements gets changed to the new one
My question is similar to this one here, but I still cant find a solution to my problem.
This is my Code:
json_data = open(filepath).read()
data = json.loads(json_data)
dataNew=[]
#opening file to write json
with open(filepath2, 'w') as outfile:
for i in range(50):
random_index_IntentNames = randint(0,len(intent_names)-1)
random_index_SessionIds = randint(0,len(session_id)-1)
timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime())
data["result"]["metadata"]["intentName"] = intent_names[random_index_IntentNames]
data["sessionId"]=session_id[random_index_SessionIds]
data["timestamp"] = timestamp
dataNew.append(data)
json.dump(dataNew, outfile, indent=2)

Every item in your list is just a reference to a single object in memory. Similar to what was posted in your linked answer, you need to append copies of the dict.
import copy
my_list = []
a = {1: 2, 3: 4}
b = a # Referencing the same object
c = copy.copy(a) # Creating a different object
my_list.append(a)
my_list.append(b)
my_list.append(c)
a[1] = 'hi' # Modify the dict, which will change both a and b, but not c
print my_list
You might be interested in Is Python call-by-value or call-by-reference? Neither. for further reading.

data is a dict, which means it's mutable and it's value is passed by reference, you have to use [copy.deepcopy()]
(https://docs.python.org/2/library/copy.html#copy.deepcopy) if you want to keep origin data not muted:
from copy import deepcopy
json_data = open(filepath).read()
data = json.loads(json_data)
dataNew=[]
#opening file to write json
with open(filepath2, 'w') as outfile:
for i in range(50):
random_index_IntentNames = randint(0,len(intent_names)-1)
random_index_SessionIds = randint(0,len(session_id)-1)
timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime())
# Create a shallow copy, modify it and append to new
new_data = deepcopy(data)
new_data["result"]["metadata"]["intentName"] = intent_names[random_index_IntentNames]
new_data["sessionId"]=session_id[random_index_SessionIds]
new_data["timestamp"] = timestamp
dataNew.append(new_data)
json.dump(dataNew, outfile, indent=2)
NOTE: If data dosn't store mutable items you can use dict.copy in order to avoid modifying origin value.
Good Luck!

I was able to find the solution myself. I gave the assignment of "data" within the loop and it worked:
json_data = open(filepath).read()
dataNew=[]
#opening file to write json
with open(filepath2, 'w') as outfile:
for i in range(50):
random_index_IntentNames = randint(0,len(intent_names)-1)
random_index_SessionIds = randint(0,len(session_id)-1)
timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime())
data = json.loads(json_data)
data["result"]["metadata"]["intentName"] = intent_names[random_index_IntentNames]
data["sessionId"]=session_id[random_index_SessionIds]
data["timestamp"] = timestamp
dataNew.append(data)
json.dump(dataNew, outfile, indent=2)

Related

Handle index position in Python script to delete json objects from json file - Resolved

I have a file (my_file.json) has contents as below;
[
{
"use":"abcd",
"contact":"xyz",
"name":"my_script.py",
"time":"11:22:33"
},
{
"use":"abcd"
"contact":"xyz",
"name":"some_other_script.py",
"time":"11:22:33"
},
{
"use":"apqwkndf",
"contact":"xyz",
"name":"my_script.py",
"time":"11:22:33"
},
{
"use":"kjdshfjkasd",
"contact":"xyz",
"name":"my_script.py",
"time":"11:22:33"
}
]
I used following python code to delete the objects that has "name":"my_script.py",
#!/bin/usr/python
impoty json
obj = json.load(open("my_file.json"))
index_list = []
for i in xrange(len(obj)):
if obj[i]["name"] == ["my_script.py"]
index_list.append(i)
for x in range(len(index_list)):
obj.pop(index_list[x])
open("output_my_file.json","w".write(json.dumps(obj, indent=4, separators=(',',': ')))
but it seems I am stuck, because after popping an index the index position in actual obj gets changed, which leads to wrong index deletion or sometimes pop index gets out of range. Any other solution?
Try popping in reverse order:
for x in reversed(range(len(index_list))):
This will create a new list and assign only those without "name": "my_script.py" to the new list.
obj = [i for i in obj if i["name"] != "my_script.py"]
import json
with open('my_file.json') as f:
data = json.load(f)
data = [item for item in data if item.get('name') != 'my_script.py']
with open('output_my_file.json', 'w') as f:
json.dump(data, f, indent=4)
Try:
import json
json_file = json.load(open("file.json"))
for json_dict in json_file:
json_dict.pop("name",None)
print(json.dumps(json_file, indent=4))
You don't need the last line where it says 'json.dumps' I just have it there so it looks more readable when printed.
As a general rule of thumb, you usually don't want to ever change an iterable while iterating over it.
I suggest you save the elements you do want in the first loop:
import json
with open('path/to/file', 'r') as f:
data = json.load(f)
items_to_keep = []
for item in data:
if item['name'] != 'my_script.py':
items_to_keep.append(item)
with open('path/to/file', 'w') as f:
json.dump(items_to_keep, f, ...)
The filtering can be reduced into a single line (called list-comprehension)
import json
with open('path/to/file', 'r') as f:
data = json.load(f)
items_to_keep = [item for item in data if item['name'] != 'my_script.py']
with open('path/to/file', 'w') as f:
json.dump(items_to_keep, f, ...)

"Remove" in else clause changes results of loop over json dict

I am iterating over a dict created from a json file which works fine but as soon as I remove some of the entries in the else clause the results change (normally it prints 35 nuts_ids but with the remove in the else only 32 are printed. So it seems that the remove influences the iterating but why? The key should be safe? How can I do this appropriately without loosing data?
import json
with open("test.json") as json_file:
json_data = json.load(json_file)
for g in json_data["features"]:
poly = g["geometry"]
cntr_code = g["properties"]["CNTR_CODE"]
nuts_id = g["properties"]["NUTS_ID"]
name = g["properties"]["NUTS_NAME"]
if cntr_code == "AT":
print(nuts_id)
# do plotting etc
else: # delete it if it is not part a specific country
json_data["features"].remove(g) # line in question
# do something else with the json_data
Not a good practice to delete items while iterating the object. Instead you can try filtering out the elements you do need.
Ex:
import json
with open("test.json") as json_file:
json_data = json.load(json_file)
json_data_features = [g for g in json_data["features"] if g["properties"]["CNTR_CODE"] == "AT"] #Filter other country codes.
json_data["features"] = json_data_features
for g in json_data["features"]:
poly = g["geometry"]
cntr_code = g["properties"]["CNTR_CODE"]
nuts_id = g["properties"]["NUTS_ID"]
name = g["properties"]["NUTS_NAME"]
# do plotting etc
# do something else with the json_data
Always remember the cardinal rule, never modify objects you are iterating on
You can take a copy of your dictionary and then iterate on it using copy.copy
import json
import copy
with open("test.json") as json_file:
json_data = json.load(json_file)
#Take copy of json_data
json_data_copy = json_data['features'].copy()
#Iterate on the copy
for g in json_data_copy:
poly = g["geometry"]
cntr_code = g["properties"]["CNTR_CODE"]
nuts_id = g["properties"]["NUTS_ID"]
name = g["properties"]["NUTS_NAME"]
if cntr_code == "AT":
print(nuts_id)
# do plotting etc
else: # delete it if it is not part a specific country
json_data["features"].remove(g) # line in question

Read CSV file and filter results

Im writing a script where one of its functions is to read a CSV file that contain URLs on one of its rows. Unfortunately the system that create those CSVs doesn't put double-quotes on values inside the URL column so when the URL contain commas it breaks all my csv parsing.
This is the code I'm using:
with open(accesslog, 'r') as csvfile, open ('results.csv', 'w') as enhancedcsv:
reader = csv.DictReader(csvfile)
for row in reader:
self.uri = (row['URL'])
self.OriCat = (row['Category'])
self.query(self.uri)
print self.URL+","+self.ServerIP+","+self.OriCat+","+self.NewCat"
This is a sample URL that is breaking up the parsing - this URL comes on the row named "URL". (note the commas at the end)
ams1-ib.adnxs.com/ww=1238&wh=705&ft=2&sv=43&tv=view5-1&ua=chrome&pl=mac&x=1468251839064740641,439999,v,mac,webkit_chrome,view5-1,0,,2,
The following row after the URL always come with a numeric value between parenthesis. Ex: (9999) so this could be used to define when the URL with commas end.
How can i deal with a situation like this using the csv module?
You will have to do it a little more manually. Try this
def process(lines, delimiter=','):
header = None
url_index_from_start = None
url_index_from_end = None
for line in lines:
if not header:
header = [l.strip() for l in line.split(delimiter)]
url_index_from_start = header.index('URL')
url_index_from_end = len(header)-url_index_from_start
else:
data = [l.strip() for l in line.split(delimiter)]
url_from_start = url_index_from_start
url_from_end = len(data)-url_index_from_end
values = data[:url_from_start] + data[url_from_end+1:] + [delimiter.join(data[url_from_start:url_from_end+1])]
keys = header[:url_index_from_start] + header[url_index_from_end+1:] + [header[url_index_from_start]]
yield dict(zip(keys, values))
Usage:
lines = ['Header1, Header2, URL, Header3',
'Content1, "Content2", abc,abc,,abc, Content3']
result = list(process(lines))
assert result[0]['Header1'] == 'Content1'
assert result[0]['Header2'] == '"Content2"'
assert result[0]['Header3'] == 'Content3'
assert result[0]['URL'] == 'abc,abc,,abc'
print(result)
Result:
>>> [{'URL': 'abc,abc,,abc', 'Header2': '"Content2"', 'Header3': 'Content3', 'Header1': 'Content1'}]
Have you considered using Pandas to read your data in?
Another possible solution would be to use regular expressions to pre-process the data...
#make a list of everything you want to change
old = re.findall(regex, f.read())
#append quotes and create a new list
new = []
for url in old:
url2 = "\""+url+"\""
new.append(url2)
#combine the lists
old_new = list(zip(old,new))
#Then use the list to update the file:
f = open(filein,'r')
filedata = f.read()
f.close()
for old,new in old_new:
newdata = filedata.replace(old,new)
f = open(filein,'w')
f.write(newdata)
f.close()

Append JSON to file

I am trying to append values to a json file. How can i append the data? I have been trying so many ways but none are working ?
Code:
def all(title,author,body,type):
title = "hello"
author = "njas"
body = "vgbhn"
data = {
"id" : id,
"author": author,
"body" : body,
"title" : title,
"type" : type
}
data_json = json.dumps(data)
#data = ast.literal_eval(data)
#print data_json
if(os.path.isfile("offline_post.json")):
with open('offline_post.json','a') as f:
new = json.loads(f)
new.update(a_dict)
json.dump(new,f)
else:
open('offline_post.json', 'a')
with open('offline_post.json','a') as f:
new = json.loads(f)
new.update(a_dict)
json.dump(new,f)
How can I append data to json file when this function is called?
I suspect you left out that you're getting a TypeError in the blocks where you're trying to write the file. Here's where you're trying to write:
with open('offline_post.json','a') as f:
new = json.loads(f)
new.update(a_dict)
json.dump(new,f)
There's a couple of problems here. First, you're passing a file object to the json.loads command, which expects a string. You probably meant to use json.load.
Second, you're opening the file in append mode, which places the pointer at the end of the file. When you run the json.load, you're not going to get anything because it's reading at the end of the file. You would need to seek to 0 before loading (edit: this would fail anyway, as append mode is not readable).
Third, when you json.dump the new data to the file, it's going to append it to the file in addition to the old data. From the structure, it appears you want to replace the contents of the file (as the new data contains the old data already).
You probably want to use r+ mode, seeking back to the start of the file between the read and write, and truncateing at the end just in case the size of the data structure ever shrinks.
with open('offline_post.json', 'r+') as f:
new = json.load(f)
new.update(a_dict)
f.seek(0)
json.dump(new, f)
f.truncate()
Alternatively, you can open the file twice:
with open('offline_post.json', 'r') as f:
new = json.load(f)
new.update(a_dict)
with open('offline_post.json', 'w') as f:
json.dump(new, f)
This is a different approach, I just wanted to append without reloading all the data. Running on a raspberry pi so want to look after memory. The test code -
import os
json_file_exists = 0
filename = "/home/pi/scratch_pad/test.json"
# remove the last run json data
try:
os.remove(filename)
except OSError:
pass
count = 0
boiler = 90
tower = 78
while count<10:
if json_file_exists==0:
# create the json file
with open(filename, mode = 'w') as fw:
json_string = "[\n\t{'boiler':"+str(boiler)+",'tower':"+str(tower)+"}\n]"
fw.write(json_string)
json_file_exists=1
else:
# append to the json file
char = ""
boiler = boiler + .01
tower = tower + .02
while(char<>"}"):
with open(filename, mode = 'rb+') as f:
f.seek(-1,2)
size=f.tell()
char = f.read()
if char == "}":
break
f.truncate(size-1)
with open(filename, mode = 'a') as fw:
json_string = "\n\t,{'boiler':"+str(boiler)+",'tower':"+str(tower)+"}\n]"
fw.seek(-1, os.SEEK_END)
fw.write(json_string)
count = count + 1

KeyError: u'somestring' Json

I am trying to make a point system for my Twitch bot and I am encountering KeyErrors when trying to make a new entry for some odd reason. Here is my code:
import urllib2, json
def updateUsers(chan):
j = urllib2.urlopen('http://tmi.twitch.tv/group/user/' + chan + '/chatters')
j_obj = json.load(j)
with open('dat.dat', 'r') as data_file:
data = json.load(data_file)
for usr in j_obj['chatters']['viewers']:
data[usr]['Points'] = "0" # Were the KeyError: u'someguysusername' occurs
with open('dat.dat', 'w') as out_file:
json.dump(data, out_file)
updateUsers('tryhard_clan')
If you want to see the Json itself go to http://tmi.twitch.tv/group/user/tryhard_clan/chatters
I'm storing user data in a file in this format:
{"users": {"cupcake": {"Points": "0"}}}
a slightly more concise form than #Raunak suggested:
data.setdefault (usr, {}) ['Points'] = "0"
that will set data[usr] to an empty dict if it's not already there, and set the 'Points' element in any case.
It happens variable usr doesn't resolve to an existing key in data. Do this instead:
if usr not in data:
data[usr] = {}
data[usr]['Points'] = "0"

Categories

Resources