Iterating the content of a text file in python - python

I have a text file named 'triple_response.txt' which contain the some text as :
(1,(db_name,string),DSP)
(1,(rel, id),2)
(2,(rel_name, string),DataSource)
(2,(tuple, id),201)
(2,(tuple, id),202)
(2,(tuple, id),203)
(201,(src_id,varchar),Pos201510070)
(201,(src_name,varchar),Postgres)
(201,(password,varchar),root)
(201,(host,varchar),localhost)
(201,(created_date,date),2015-10-07)
(201,(user_name,varchar),postgres)
(201,(src_type,varchar),Structured)
(201,(db_name,varchar),postgres)
(201,(port,numeric),None)
(202,(src_id,varchar),pos201510060)
(202,(src_name,varchar),Postgres)
(202,(password,varchar),root)
(202,(host,varchar),localhost)
(202,(created_date,date),2015-10-06)
(202,(user_name,varchar),postgres)
(202,(src_type,varchar),Structured)
(202,(db_name,varchar),DSP)
(202,(port,numeric),5432)
(203,(src_id,varchar),pos201510060)
(203,(src_name,varchar),Postgres)
(203,(password,varchar),root)
(203,(host,varchar),localhost)
(203,(created_date,date),2015-10-06)
(203,(user_name,varchar),postgres)
(203,(src_type,varchar),Structured)
(203,(db_name,varchar),maindb)
(203,(port,numeric),5432)
I am trying to convert these contents into JSON using a python script:
import re
import collections
import json, jsonpickle
def convertToJSON(File):
word_list=[]
row_list = []
try:
with open(File,'r') as f:
for word in f:
word_list.append(word)
with open(File,'r+') as f:
for row in f:
print row
row_list.append(row.split())
column_list = zip(*row_list)
except IOError:
print "Error in opening file.."
triple =""
for t in word_list:
triple+=t
tripleList = re.findall(r"\([^\(^\)]*\)",triple)
idList = re.split(r"\([^\(^\)]*\)",triple)
i =0
jsonDummy = []
jsonData = {}
for trip in tripleList:
nameAndType = re.split(r",|:",trip)
if(i==0):
key = re.compile("[^\w']|_").sub("",idList[i])
else:
try:
key = re.compile("[^\w']|_").sub("",idList[i].split("(")[1])
except IndexError:
pass
i = i+1
if(idList[i].find('(')!=-1):
try:
content = re.compile("[^\w']|_").sub("",idList[i].split(")")[0])
except IndexError:
pass
else:
content = re.compile("[^\w']|_").sub("",idList[i])
try:
trip = trip[1:-1]
tripKey = trip[1]
except IndexError:
tripKey = ''
name = re.compile("[^\w']").sub("",nameAndType[0])
try:
typeName = re.compile("[^\w']|_").sub("",nameAndType[1])
except IndexError:
typeName = 'String'
tripDict = dict()
value = dict()
value[name] = content
tripDict[key]=value
jsonDummy.append(tripDict)
for j in jsonDummy:
for k,v in j.iteritems():
jsonData.setdefault(k, []).append(v)
data = dict()
data['data'] = jsonData
obj = {}
obj=jsonpickle.encode(data, unpicklable=False)
return obj
pass
I am calling this function convertToJSON() within the same file as:
print convertToJSON("triple_response.txt")
I am getting the output as i expect like:
{"data": {"1": [{"db_name": "DSP"}, {"rel": "2"}], "201": [{"src_id": "Pos201510070"}, {"src_name": "Postgres"}, {"password": "root"}, {"host": "localhost"}, {"created_date": "20151007"}, {"user_name": "postgres"}, {"src_type": "Structured"}, {"db_name": "postgres"}, {"port": "None"}], "203": [{"src_id": "pos201510060"}, {"src_name": "Postgres"}, {"password": "root"}, {"host": "localhost"}, {"created_date": "20151006"}, {"user_name": "postgres"}, {"src_type": "Structured"}, {"db_name": "maindb"}, {"port": "5432"}], "2": [{"rel_name": "DataSource"}, {"tuple": "201"}, {"tuple": "202"}, {"tuple": "203"}], "202": [{"src_id": "pos201510060"}, {"src_name": "Postgres"}, {"password": "root"}, {"host": "localhost"}, {"created_date": "20151006"}, {"user_name": "postgres"}, {"src_type": "Structured"}, {"db_name": "DSP"}, {"port": "5432"}]}}
Now the problem which i am facing, which i am calling this from the outside the class as:
def extractConvertData(self):
triple_response = SPO(source, db_name, table_name, response)
try:
_triple_file = open('triple_response.txt','w+')
_triple_file.write(triple_response)
print "written data in file.."
with open('triple_response.txt','r+') as f:
for word in f:
print word
jsonData = convertToJSON(str('triple_response.txt'))
except IOError:
print "Not able to open a file"
print "Converted into JSON"
print jsonData
pass
The same code of convertToJSON() is not working...
It neither giving any output nor giving any error, it is not able to read the content from the 'triple_response.txt' file in the line.
with open('triple_response.txt','r+') as f:
for word in f:
print word
Any one can tell me solution to this problem..

_triple_file is never closed (except implicitly when you end the Python process, which is a terrible practice).
You can get platform-specific behavior when you have dangling filehandles like that (what is your platform? Unix? Windows?). Probably the write to _triple_file is not getting flushed.
So don't leave it dangling. Make sure to close it after you write it: (_triple_file.write(triple_response)). And in fact then assert that that file length is non-zero, using os.stat(), otherwise raise an Exception.
Also, you only have one big try...except clause to catch all errors, this is too much in one bite. Break it into two separate try...except clauses for writing _triple_file, and then reading it back. (Btw you might like to use tempfile library instead, to sidestep needing to know your intermediate file's pathname).
Something like the following untested pseudocode:
triple_response = SPO(source, db_name, table_name, response)
try:
_triple_file = open('triple_response.txt','w+')
_triple_file.write(triple_response)
_triple_file.close()
except IOError:
print "Not able to write intermediate JSON file"
raise
assert [suitable expression involving os.stat('triple_response.txt') to test size > 0 ], "Error: intermediate JSON file was empty"
try:
with open('triple_response.txt','r+') as f:
for word in f:
print word
jsonData = convertToJSON(str('triple_response.txt'))
except IOError:
print "Not able to read back intermediate JSON file"
#raise # if you want to reraise the exception
...

Related

Try except block python varies result

I am reading some .json files. Some of the files are empty. So I am using try-except block to read.
If it is empty then except condition will be executed. My code snippet looks like this:
exec = True
for jsonidx, jsonpath in enumerate(jsonList):
print("\n")
print(">>> Reading {} of {} json file: {}".format(jsonidx, len(jsonList), os.path.basename(jsonpath)))
try:
with open(jsonpath, "r") as read_file:
jsondata = json.load(read_file)
outPath = r'{}'.format(outPath)
dicomPath = os.path.join(outPath, 'dicoms')
nrrdPath = os.path.join(outPath, 'nrrds')
if exec: # if you want to execute the move
if not os.path.isdir(outPath): # if the outPath directory doesn't exist.
os.mkdir(outPath)
os.mkdir(dicomPath)
os.mkdir(nrrdPath)
thisJsonDcm = []
for widx, jw in enumerate(jsondata['workItemList']):
# print('\n')
print('-------------------- Extracting workitem #{} --------------------'.format(widx))
seriesName = jw['imageSeriesSet'][0]['seriesLocalFolderName'] # this is dicom folder whole path
thisJsonDcm.append(seriesName)
except:
print("Json empty")
The code ran perfectly at the first couple of times or so, where it iterates the second for loop with jsondata["workItemList"].
But when I run later again, the second for loop doesn't iterate and all the iterations show the print statement inside except json empty.
Does try-except block have any state or specific behavior?? Do I need to delete or refresh something after running the first time to repeat again?
All you need is json.decoder.JSONDecodeError exception.
It looks like this:
try:
pass
"""Some stuff with json..."""
except json.decoder.JSONDecodeError:
print("Json empty")
More about in Json Docs
Or you can handle error only when loading json
exec = True
for jsonidx, jsonpath in enumerate(jsonList):
print("\n")
print(">>> Reading {} of {} json file: {}".format(jsonidx, len(jsonList), os.path.basename(jsonpath)))
with open(jsonpath, "r") as read_file:
try:
jsondata = json.load(read_file)
except json.decoder.JSONDecodeError:
print("Json empty")
continue
outPath = r'{}'.format(outPath)
dicomPath = os.path.join(outPath, 'dicoms')
nrrdPath = os.path.join(outPath, 'nrrds')
if exec: # if you want to execute the move
if not os.path.isdir(outPath): # if the outPath directory doesn't exist.
os.mkdir(outPath)
os.mkdir(dicomPath)
os.mkdir(nrrdPath)
thisJsonDcm = []
for widx, jw in enumerate(jsondata['workItemList']):
# print('\n')
print('-------------------- Extracting workitem #{} --------------------'.format(widx))
seriesName = jw['imageSeriesSet'][0]['seriesLocalFolderName'] # this is dicom folder whole path
thisJsonDcm.append(seriesName)
You can read more about try/except in Python Documentation

How to Create a Function to extract data from a JSON file

I am currently extracting trade data from a JSON file to create graphs. I have made a file to load in my JSON data but I want to create a a function that allows me to extract specific data points (Like a getter?), How would I go about this?
So the function is to store the data but Im not sure how to connect it back to my loaded in JSON file.
This is my function so far
class TradeInfo():
def __init__(self, sym, vol, pChange, gcount):
self.symbol = sym
self.volume = vol
self.pChange = pChange
self.gcount = gcount
def getSymbol(self):
return (self.symbol)
def getVolume(self):
return (self.volume)
def getPriceChange(self):
return (self.pChange)
def getCount(self):
return (self.gcount)
and below is the output I recive when I load in my Json file in a separate function
enter image description here
This is the code to load my JSON file
def loadfile(infileName,biDir= True):
try:
filename= infileName
with open(filename) as f:
fileObj = json.load(f)
fileObj = json.dumps(fileObj, indent=4)
except IOError as e:
print("Error in file processing: " + str(e))
return fileObj
Let's say your JSON looks like this:
{
"marketId": "LTC-AUD",
"bestBid": "67.62",
"bestAsk": "68.15",
"lastPrice": "67.75",
"volume24h": "190.19169781",
"volumeQte24h": "12885.48752662",
"price24h": "1.37",
"pricePct24h": "2.06",
"low24h": "65.89",
"high24h": "69.48",
"timestamp": "2020-10-10T11:14:19.270000Z"
}
So your loadfile function should look something like this:
import json
def load_file(infile_name) -> dict:
try:
with open(infile_name) as f:
return json.load(f)
except IOError as e:
print("Error in file processing: " + e)
data = load_file("sample_json.json")
print(json.dumps(data, indent=2, sort_keys=True))
print(data['timestamp'])
Output:
{
"bestAsk": "68.15",
"bestBid": "67.62",
"high24h": "69.48",
"lastPrice": "67.75",
"low24h": "65.89",
"marketId": "LTC-AUD",
"price24h": "1.37",
"pricePct24h": "2.06",
"timestamp": "2020-10-10T11:14:19.270000Z",
"volume24h": "190.19169781",
"volumeQte24h": "12885.48752662"
}
2020-10-10T11:14:19.270000Z
I've simplified your function and removed a redundant argument biDir because you're not using it anywhere.

Iterating through JSON is requiring more for loops than I'd like

I am reading through a .json file and parsing some of the data to save into an Object. There are only 2000 or so items within the JSON that I need to iterate over, but the script I currently have running takes a lot longer than I'd like.
data_file = 'v1/data/data.json'
user = User.objects.get(username='lsv')
format = Format(format='Limited')
format.save()
lost_cards = []
lost_cards_file = 'v1/data/LostCards.txt'
with open(data_file) as file:
data = json.load(file)
for item in data:
if item['model'] == 'cards.cardmodel':
if len(Card.objects.filter(name=item['fields']['name'])) == 0:
print(f"card not found: {item['fields']['name']}")
lost_cards.append(item['fields']['name'])
try:
Rating(
card=Card.objects.get(name=item['fields']['name'], set__code=item['fields']['set']),
rating=item['fields']['rating'],
reason=item['fields']['reason'],
format=format,
rator=user
).save()
except Exception as e:
print(e, item['fields']['name'], item['fields']['set'])
break
with open(lost_cards_file, 'w') as file:
file.write(str(lost_cards))
The code is working as expected, but it's taking a lot longer than I'd like. I'm hoping there is a built-in JSON or iterator function that could accelerate this process.
There is. It's called the json module.
with open(data_file, 'r') as input_file:
dictionary_from_json = json.load(input_file)
should do it.

python does not read the second line of the file

i have a text file with the below contents
url1,user1,xxxxxxxxx
url2,user2,yyyyyyyyy
I have a block of code that is supposed to get the value xxxxxxxxx or yyyyyyyyy based on the env value passed(prod or test)
#!/usr/bin/python
import os
class test:
def __init__(self, env):
self.env = env
def func(self):
res = []
try:
if os.path.exists("file.txt"):
try:
with open("file.txt", 'r') as fp:
for line in fp:
print("line is " +line)
line_api = line.split(',')[2]
print(line_api)
res.append(line_api)
print(res)
if self.env == "prod":
self.api = res[0]
print(self.api)
else:
self.api = res[1]
print(self.api)
except Exception as e:
print(e)
except Exception as e:
print(e)
value when else part is executed
list index out of range
Now when the env passed is prod the function works but when the value is test and the else part is executed the value of list res is only xxxxxx, there is only one value in list and the code self.api = res[1] fails. print(res) only prints ['xxxxxxxxxxxx\n'] but when url passed is url1 print(res) only prints both ['xxxxxxxxxxx\n', 'yyyyyyyyy \n']
What is wrong with my code?
The issue with your code is that you split each line that you read in with line_api = line.split(',')[2] and the [2] is referencing the third element that exists in that list which is xxxxxxxxx or yyyyyyyyy, then when you call self.api = res[0] you reference the first (and only) element in that list.
self.api = res[1] will always throw an error because it will never exist in the 1 element list. I'm not sure what the goal was for this else statement, but I would suggest using DirtyBit's elegant solution
Since it is hard to debug your already excessive code, Here is a shorter snippet:
Using startswith():
list.txt:
url1,user1,xxxxxxxxx
url2,user2,yyyyyyyyy
Hence:
logFile = "list.txt"
def getUrlValue(url):
with open(logFile) as f:
content = f.readlines()
# you may also want to remove empty lines
content = [l.strip() for l in content if l.strip()]
for line in content:
if line.startswith(url):
print(line.split(',')[2])
getUrlValue("url1")
getUrlValue("url2")
OUTPUT:
xxxxxxxxx
yyyyyyyyy

KeyError: u'somestring' Json

I am trying to make a point system for my Twitch bot and I am encountering KeyErrors when trying to make a new entry for some odd reason. Here is my code:
import urllib2, json
def updateUsers(chan):
j = urllib2.urlopen('http://tmi.twitch.tv/group/user/' + chan + '/chatters')
j_obj = json.load(j)
with open('dat.dat', 'r') as data_file:
data = json.load(data_file)
for usr in j_obj['chatters']['viewers']:
data[usr]['Points'] = "0" # Were the KeyError: u'someguysusername' occurs
with open('dat.dat', 'w') as out_file:
json.dump(data, out_file)
updateUsers('tryhard_clan')
If you want to see the Json itself go to http://tmi.twitch.tv/group/user/tryhard_clan/chatters
I'm storing user data in a file in this format:
{"users": {"cupcake": {"Points": "0"}}}
a slightly more concise form than #Raunak suggested:
data.setdefault (usr, {}) ['Points'] = "0"
that will set data[usr] to an empty dict if it's not already there, and set the 'Points' element in any case.
It happens variable usr doesn't resolve to an existing key in data. Do this instead:
if usr not in data:
data[usr] = {}
data[usr]['Points'] = "0"

Categories

Resources