I am reading through a .json file and parsing some of the data to save into an Object. There are only 2000 or so items within the JSON that I need to iterate over, but the script I currently have running takes a lot longer than I'd like.
data_file = 'v1/data/data.json'
user = User.objects.get(username='lsv')
format = Format(format='Limited')
format.save()
lost_cards = []
lost_cards_file = 'v1/data/LostCards.txt'
with open(data_file) as file:
data = json.load(file)
for item in data:
if item['model'] == 'cards.cardmodel':
if len(Card.objects.filter(name=item['fields']['name'])) == 0:
print(f"card not found: {item['fields']['name']}")
lost_cards.append(item['fields']['name'])
try:
Rating(
card=Card.objects.get(name=item['fields']['name'], set__code=item['fields']['set']),
rating=item['fields']['rating'],
reason=item['fields']['reason'],
format=format,
rator=user
).save()
except Exception as e:
print(e, item['fields']['name'], item['fields']['set'])
break
with open(lost_cards_file, 'w') as file:
file.write(str(lost_cards))
The code is working as expected, but it's taking a lot longer than I'd like. I'm hoping there is a built-in JSON or iterator function that could accelerate this process.
There is. It's called the json module.
with open(data_file, 'r') as input_file:
dictionary_from_json = json.load(input_file)
should do it.
So here is the code, it's in loop but storing very last record only:
try:
with open('records.csv', 'a') as csv_file:
for url in urls:
if url.strip() != '':
print('Processing URL:- '+url)
data = fetch_data(url)
csv_file.write(data)
status = 'OK'
csv_file.close()
except Exception,e:
status = 'FAIL'
print str(e)
finally:
with open('process.log', 'a') as the_file:
ts = time.strftime("%H:%M")
the_file.write(ts+'\t'+url+'\t'+status+'\n')
if driver is not None:
driver.quit()
records.csv saves only last record
file.write does not add newlines to the file, so it's likely that it's processing everything but only writing one extra line to the file.
Some nitpicks:
You don't need to do csvfile.close() since you're using a with block.
You don't need to set status="OK" in every iteration of the loop, just once above the loop.
It might be faster to process data separately from I/O, as #bruno suggests below.
datalines = []
for url in urls:
if url.strip():
datalines.append(fetch_data(url))
csvfile.write("\n".join(datalines))
I have a text file named 'triple_response.txt' which contain the some text as :
(1,(db_name,string),DSP)
(1,(rel, id),2)
(2,(rel_name, string),DataSource)
(2,(tuple, id),201)
(2,(tuple, id),202)
(2,(tuple, id),203)
(201,(src_id,varchar),Pos201510070)
(201,(src_name,varchar),Postgres)
(201,(password,varchar),root)
(201,(host,varchar),localhost)
(201,(created_date,date),2015-10-07)
(201,(user_name,varchar),postgres)
(201,(src_type,varchar),Structured)
(201,(db_name,varchar),postgres)
(201,(port,numeric),None)
(202,(src_id,varchar),pos201510060)
(202,(src_name,varchar),Postgres)
(202,(password,varchar),root)
(202,(host,varchar),localhost)
(202,(created_date,date),2015-10-06)
(202,(user_name,varchar),postgres)
(202,(src_type,varchar),Structured)
(202,(db_name,varchar),DSP)
(202,(port,numeric),5432)
(203,(src_id,varchar),pos201510060)
(203,(src_name,varchar),Postgres)
(203,(password,varchar),root)
(203,(host,varchar),localhost)
(203,(created_date,date),2015-10-06)
(203,(user_name,varchar),postgres)
(203,(src_type,varchar),Structured)
(203,(db_name,varchar),maindb)
(203,(port,numeric),5432)
I am trying to convert these contents into JSON using a python script:
import re
import collections
import json, jsonpickle
def convertToJSON(File):
word_list=[]
row_list = []
try:
with open(File,'r') as f:
for word in f:
word_list.append(word)
with open(File,'r+') as f:
for row in f:
print row
row_list.append(row.split())
column_list = zip(*row_list)
except IOError:
print "Error in opening file.."
triple =""
for t in word_list:
triple+=t
tripleList = re.findall(r"\([^\(^\)]*\)",triple)
idList = re.split(r"\([^\(^\)]*\)",triple)
i =0
jsonDummy = []
jsonData = {}
for trip in tripleList:
nameAndType = re.split(r",|:",trip)
if(i==0):
key = re.compile("[^\w']|_").sub("",idList[i])
else:
try:
key = re.compile("[^\w']|_").sub("",idList[i].split("(")[1])
except IndexError:
pass
i = i+1
if(idList[i].find('(')!=-1):
try:
content = re.compile("[^\w']|_").sub("",idList[i].split(")")[0])
except IndexError:
pass
else:
content = re.compile("[^\w']|_").sub("",idList[i])
try:
trip = trip[1:-1]
tripKey = trip[1]
except IndexError:
tripKey = ''
name = re.compile("[^\w']").sub("",nameAndType[0])
try:
typeName = re.compile("[^\w']|_").sub("",nameAndType[1])
except IndexError:
typeName = 'String'
tripDict = dict()
value = dict()
value[name] = content
tripDict[key]=value
jsonDummy.append(tripDict)
for j in jsonDummy:
for k,v in j.iteritems():
jsonData.setdefault(k, []).append(v)
data = dict()
data['data'] = jsonData
obj = {}
obj=jsonpickle.encode(data, unpicklable=False)
return obj
pass
I am calling this function convertToJSON() within the same file as:
print convertToJSON("triple_response.txt")
I am getting the output as i expect like:
{"data": {"1": [{"db_name": "DSP"}, {"rel": "2"}], "201": [{"src_id": "Pos201510070"}, {"src_name": "Postgres"}, {"password": "root"}, {"host": "localhost"}, {"created_date": "20151007"}, {"user_name": "postgres"}, {"src_type": "Structured"}, {"db_name": "postgres"}, {"port": "None"}], "203": [{"src_id": "pos201510060"}, {"src_name": "Postgres"}, {"password": "root"}, {"host": "localhost"}, {"created_date": "20151006"}, {"user_name": "postgres"}, {"src_type": "Structured"}, {"db_name": "maindb"}, {"port": "5432"}], "2": [{"rel_name": "DataSource"}, {"tuple": "201"}, {"tuple": "202"}, {"tuple": "203"}], "202": [{"src_id": "pos201510060"}, {"src_name": "Postgres"}, {"password": "root"}, {"host": "localhost"}, {"created_date": "20151006"}, {"user_name": "postgres"}, {"src_type": "Structured"}, {"db_name": "DSP"}, {"port": "5432"}]}}
Now the problem which i am facing, which i am calling this from the outside the class as:
def extractConvertData(self):
triple_response = SPO(source, db_name, table_name, response)
try:
_triple_file = open('triple_response.txt','w+')
_triple_file.write(triple_response)
print "written data in file.."
with open('triple_response.txt','r+') as f:
for word in f:
print word
jsonData = convertToJSON(str('triple_response.txt'))
except IOError:
print "Not able to open a file"
print "Converted into JSON"
print jsonData
pass
The same code of convertToJSON() is not working...
It neither giving any output nor giving any error, it is not able to read the content from the 'triple_response.txt' file in the line.
with open('triple_response.txt','r+') as f:
for word in f:
print word
Any one can tell me solution to this problem..
_triple_file is never closed (except implicitly when you end the Python process, which is a terrible practice).
You can get platform-specific behavior when you have dangling filehandles like that (what is your platform? Unix? Windows?). Probably the write to _triple_file is not getting flushed.
So don't leave it dangling. Make sure to close it after you write it: (_triple_file.write(triple_response)). And in fact then assert that that file length is non-zero, using os.stat(), otherwise raise an Exception.
Also, you only have one big try...except clause to catch all errors, this is too much in one bite. Break it into two separate try...except clauses for writing _triple_file, and then reading it back. (Btw you might like to use tempfile library instead, to sidestep needing to know your intermediate file's pathname).
Something like the following untested pseudocode:
triple_response = SPO(source, db_name, table_name, response)
try:
_triple_file = open('triple_response.txt','w+')
_triple_file.write(triple_response)
_triple_file.close()
except IOError:
print "Not able to write intermediate JSON file"
raise
assert [suitable expression involving os.stat('triple_response.txt') to test size > 0 ], "Error: intermediate JSON file was empty"
try:
with open('triple_response.txt','r+') as f:
for word in f:
print word
jsonData = convertToJSON(str('triple_response.txt'))
except IOError:
print "Not able to read back intermediate JSON file"
#raise # if you want to reraise the exception
...
I am stuck why the words.txt is not showing the full grid, below is the tasks i must carry out:
write code to prompt the user for a filename, and attempt to open the file whose name is supplied. If the file cannot be opened the user should be asked to supply another filename; this should continue until a file has been successfully opened.
The file will contain on each line a row from the words grid. Write code to read, in turn, each line of the file, remove the newline character and append the resulting string to a list of strings.After the input is complete the grid should be displayed on the screen.
Below is the code i have carried out so far, any help would be appreciated:
file = input("Enter a filename: ")
try:
a = open(file)
with open(file) as a:
x = [line.strip() for line in a]
print (a)
except IOError as e:
print ("File Does Not Exist")
Note: Always avoid using variable names like file, list as they are built in python types
while True:
filename = raw_input(' filename: ')
try:
lines = [line.strip() for line in open(filename)]
print lines
break
except IOError as e:
print 'No file found'
continue
The below implementation should work:
# loop
while(True):
# don't use name 'file', it's a data type
the_file = raw_input("Enter a filename: ")
try:
with open(the_file) as a:
x = [line.strip() for line in a]
# I think you meant to print x, not a
print(x)
break
except IOError as e:
print("File Does Not Exist")
You need a while loop?
while True:
file = input("Enter a filename: ")
try:
a = open(file)
with open(file) as a:
x = [line.strip() for line in a]
print (a)
break
except IOError:
pass
This will keep asking untill a valid file is provided.
I am looking to take a list that contains data in it and append it so that it will save next time i execute the code. So far i have this that works, but when i execute the code again it does not include the new data that was entered. Any suggestions?
def the_list():
data = ['data1', 'data2', 'data3 ' , 'data4', 'data5' ]
for i in data:
print (i)
print (' would you like to add')
a = input()
if a == ('yes'):
b = input()
data.append(b)
print (data)
the_list()
If you mean to save data across executions, everything in memory of the "live" program gets discarded at the end of execution, so you must save data to a file (or some other persistent medium), then read it back in.
You can save to a file as simple strings, or you can use pickle to easily serialize objects like lists.
Using simple strings
Wrap your program with code to load and save data to a file:
data=[]
try:
old_data= open("save_data", "r").read().split('\n')
data.extend(old_data)
except:
print ("Unable to load old data!")
data = ['data1', 'data2', 'data3 ' , 'data4', 'data5' ]
#YOUR PROGRAM HERE
try:
with open("save_data", "w") as out:
out.write('\n'.join(data))
except:
print ("Unable to write data!")
Of course, you have to do some work to prevent having duplicates in data if they must not appear.
Using pickle
import pickle
data_fn = "save_data"
data=[]
try:
with open(data_fn, "rb") as data_in:
old_data = pickle.load(data_in)
data.extend(old_data)
except:
print ("Unable to load last data")
data = ['data1', 'data2', 'data3 ' , 'data4', 'data5' ]
#REST OF YOUR PROGRAM
try:
with open(data_fn, "w") as data_out:
pickle.dump(data, data_out)
except:
print ("Unable to save data")
def the_list():
try:
with open('data.txt', 'r') as f:
data = [line.strip() for line in f]
except:
data = []
for i in data:
print(i)
print (' would you like to add')
a = raw_input()
if a == 'yes':
b = input()
data.append(b)
print (data)
with open('data.txt', 'r') as nf:
nf.write('\n'.join(data))
the_list()
but when i execute the code again it does not include the new data
that was entered
Of course, because the list is in memory. You would have to save it to a file or to a database in order for it to persist.
For example, read about Input and Output in python.