Python CSV function outputs blank CSV when not enough rows? - python

I have a function which takes a list of custom objects, conforms some values then writes them to a CSV file. Something really strange is happening in that when the list only contains a few objects, the resulting CSV file is always blank. When the list is longer, the function works fine. Is it some kind of weird anomaly with the temporary file perhaps?
I have to point out that this function returns the temporary file to a web server allowing the user to download the CSV. The web server function is below the main function.
def makeCSV(things):
from tempfile import NamedTemporaryFile
# make the csv headers from an object
headers = [h for h in dir(things[0]) if not h.startswith('_')]
# this just pretties up the object and returns it as a dict
def cleanVals(item):
new_item = {}
for h in headers:
try:
new_item[h] = getattr(item, h)
except:
new_item[h] = ''
if isinstance(new_item[h], list):
if new_item[h]:
new_item[h] = [z.__str__() for z in new_item[h]]
new_item[h] = ', '.join(new_item[h])
else:
new_item[h] = ''
new_item[h] = new_item[h].__str__()
return new_item
things = map(cleanVals, things)
f = NamedTemporaryFile(delete=True)
dw = csv.DictWriter(f,sorted(headers),restval='',extrasaction='ignore')
dw.writer.writerow(dw.fieldnames)
for t in things:
try:
dw.writerow(t)
# I can always see the dicts here...
print t
except Exception as e:
# and there are no exceptions
print e
return f
Web server function:
f = makeCSV(search_results)
response = FileResponse(f.name)
response.headers['Content-Disposition'] = (
"attachment; filename=export_%s.csv" % collection)
return response
Any help or advice greatly appreciated!

Summarizing eumiro's answer: the file needs to be flushed. Call f.flush() at the end of makeCSV().

Related

Trying to load pickled data to a list isn't appending properly

I'm writing a to-do list application, and to store the class objects task I'm pickling a list of the objects created. However, when I load the data, the list appears empty. The way I structured it is to create an empty list each session, then append the contents of the pickle file. When new tasks are created, they are appended and the whole list is then appended and then reloaded.
This is my first real software project, so my code looks pretty rough. I reviewed it and can't find any glaring errors, but obviously I am doing something wrong.
Here is the relevant code:
import _pickle as pickle
import os.path
from os import path
from datetime import datetime
#checks if data exists, and creates file if it does not
if path.exists('./tasks.txt') != True:
open("./tasks.txt", 'wb')
else:
pass
#define class for tasks
class task:
def __init__(self, name, due, category):
self.name = name
self.due = datetime.strptime(due, '%B %d %Y %I:%M%p')
self.category = category
def expand(self): # returns the contents of the task
return str(self.name) + " is due in " + str((self.due - datetime.now()))
data = []
# load data to list
def load_data():
with open('tasks.txt', 'rb') as file:
while True:
data = []
try:
data.append(pickle.load(file))
except EOFError:
break
...
# returns current task list
def list_tasks():
clear()
if not data:
print("Nothing to see here.")
else:
i = 1
for task in data:
print("%s. %s" % (i, task.expand()))
i = i+1
#define function to add tasks
def addTask(name, due, category):
newTask = task(name, due, category)
data.append(newTask)
with open('./tasks.txt', 'wb') as file:
pickle.dump(data, file)
load_data()
list_tasks()
...
load_data()
list_tasks()
startup()
ask()
data = []
# load data to list
def load_data():
with open('tasks.txt', 'rb') as file:
while True:
data = []
try:
data.append(pickle.load(file))
except EOFError:
break
That second data = [] doesn't look right. Having data = [] both inside and outside of the function creates two data objects, and the one you're appending to won't be accessible anywhere else. And even if it was accessible, it would still be empty since it's being reset to [] in every iteration of the while loop. Try erasing the inner data = []. Then the data.append call will affect the globally visible data, and its contents won't be reset in each loop.
Additionally, going by the rest of your code it looks like that data is supposed to be a list of tasks. But if you pickle a list of tasks and then run data.append(pickle.load(file)), then data will be a list of lists of tasks instead. One way to keep things flat is to use extend instead of append.
data = []
# load data to list
def load_data():
with open('tasks.txt', 'rb') as file:
while True:
try:
data.extend(pickle.load(file))
except EOFError:
break
I think it may also be possible to load the data with a single load call, rather than many calls in a loop. It depends on whether your tasks.txt file is the result of a single pickle.dump call, or if you appended text to it multiple times with multiple pickle.dump calls while the file was opened in "append" mode.
def load_data():
with open('tasks.txt', 'rb') as file:
return pickle.load(file)
data = load_data()

Iterating through JSON is requiring more for loops than I'd like

I am reading through a .json file and parsing some of the data to save into an Object. There are only 2000 or so items within the JSON that I need to iterate over, but the script I currently have running takes a lot longer than I'd like.
data_file = 'v1/data/data.json'
user = User.objects.get(username='lsv')
format = Format(format='Limited')
format.save()
lost_cards = []
lost_cards_file = 'v1/data/LostCards.txt'
with open(data_file) as file:
data = json.load(file)
for item in data:
if item['model'] == 'cards.cardmodel':
if len(Card.objects.filter(name=item['fields']['name'])) == 0:
print(f"card not found: {item['fields']['name']}")
lost_cards.append(item['fields']['name'])
try:
Rating(
card=Card.objects.get(name=item['fields']['name'], set__code=item['fields']['set']),
rating=item['fields']['rating'],
reason=item['fields']['reason'],
format=format,
rator=user
).save()
except Exception as e:
print(e, item['fields']['name'], item['fields']['set'])
break
with open(lost_cards_file, 'w') as file:
file.write(str(lost_cards))
The code is working as expected, but it's taking a lot longer than I'd like. I'm hoping there is a built-in JSON or iterator function that could accelerate this process.
There is. It's called the json module.
with open(data_file, 'r') as input_file:
dictionary_from_json = json.load(input_file)
should do it.

python does not read the second line of the file

i have a text file with the below contents
url1,user1,xxxxxxxxx
url2,user2,yyyyyyyyy
I have a block of code that is supposed to get the value xxxxxxxxx or yyyyyyyyy based on the env value passed(prod or test)
#!/usr/bin/python
import os
class test:
def __init__(self, env):
self.env = env
def func(self):
res = []
try:
if os.path.exists("file.txt"):
try:
with open("file.txt", 'r') as fp:
for line in fp:
print("line is " +line)
line_api = line.split(',')[2]
print(line_api)
res.append(line_api)
print(res)
if self.env == "prod":
self.api = res[0]
print(self.api)
else:
self.api = res[1]
print(self.api)
except Exception as e:
print(e)
except Exception as e:
print(e)
value when else part is executed
list index out of range
Now when the env passed is prod the function works but when the value is test and the else part is executed the value of list res is only xxxxxx, there is only one value in list and the code self.api = res[1] fails. print(res) only prints ['xxxxxxxxxxxx\n'] but when url passed is url1 print(res) only prints both ['xxxxxxxxxxx\n', 'yyyyyyyyy \n']
What is wrong with my code?
The issue with your code is that you split each line that you read in with line_api = line.split(',')[2] and the [2] is referencing the third element that exists in that list which is xxxxxxxxx or yyyyyyyyy, then when you call self.api = res[0] you reference the first (and only) element in that list.
self.api = res[1] will always throw an error because it will never exist in the 1 element list. I'm not sure what the goal was for this else statement, but I would suggest using DirtyBit's elegant solution
Since it is hard to debug your already excessive code, Here is a shorter snippet:
Using startswith():
list.txt:
url1,user1,xxxxxxxxx
url2,user2,yyyyyyyyy
Hence:
logFile = "list.txt"
def getUrlValue(url):
with open(logFile) as f:
content = f.readlines()
# you may also want to remove empty lines
content = [l.strip() for l in content if l.strip()]
for line in content:
if line.startswith(url):
print(line.split(',')[2])
getUrlValue("url1")
getUrlValue("url2")
OUTPUT:
xxxxxxxxx
yyyyyyyyy

Iterating the content of a text file in python

I have a text file named 'triple_response.txt' which contain the some text as :
(1,(db_name,string),DSP)
(1,(rel, id),2)
(2,(rel_name, string),DataSource)
(2,(tuple, id),201)
(2,(tuple, id),202)
(2,(tuple, id),203)
(201,(src_id,varchar),Pos201510070)
(201,(src_name,varchar),Postgres)
(201,(password,varchar),root)
(201,(host,varchar),localhost)
(201,(created_date,date),2015-10-07)
(201,(user_name,varchar),postgres)
(201,(src_type,varchar),Structured)
(201,(db_name,varchar),postgres)
(201,(port,numeric),None)
(202,(src_id,varchar),pos201510060)
(202,(src_name,varchar),Postgres)
(202,(password,varchar),root)
(202,(host,varchar),localhost)
(202,(created_date,date),2015-10-06)
(202,(user_name,varchar),postgres)
(202,(src_type,varchar),Structured)
(202,(db_name,varchar),DSP)
(202,(port,numeric),5432)
(203,(src_id,varchar),pos201510060)
(203,(src_name,varchar),Postgres)
(203,(password,varchar),root)
(203,(host,varchar),localhost)
(203,(created_date,date),2015-10-06)
(203,(user_name,varchar),postgres)
(203,(src_type,varchar),Structured)
(203,(db_name,varchar),maindb)
(203,(port,numeric),5432)
I am trying to convert these contents into JSON using a python script:
import re
import collections
import json, jsonpickle
def convertToJSON(File):
word_list=[]
row_list = []
try:
with open(File,'r') as f:
for word in f:
word_list.append(word)
with open(File,'r+') as f:
for row in f:
print row
row_list.append(row.split())
column_list = zip(*row_list)
except IOError:
print "Error in opening file.."
triple =""
for t in word_list:
triple+=t
tripleList = re.findall(r"\([^\(^\)]*\)",triple)
idList = re.split(r"\([^\(^\)]*\)",triple)
i =0
jsonDummy = []
jsonData = {}
for trip in tripleList:
nameAndType = re.split(r",|:",trip)
if(i==0):
key = re.compile("[^\w']|_").sub("",idList[i])
else:
try:
key = re.compile("[^\w']|_").sub("",idList[i].split("(")[1])
except IndexError:
pass
i = i+1
if(idList[i].find('(')!=-1):
try:
content = re.compile("[^\w']|_").sub("",idList[i].split(")")[0])
except IndexError:
pass
else:
content = re.compile("[^\w']|_").sub("",idList[i])
try:
trip = trip[1:-1]
tripKey = trip[1]
except IndexError:
tripKey = ''
name = re.compile("[^\w']").sub("",nameAndType[0])
try:
typeName = re.compile("[^\w']|_").sub("",nameAndType[1])
except IndexError:
typeName = 'String'
tripDict = dict()
value = dict()
value[name] = content
tripDict[key]=value
jsonDummy.append(tripDict)
for j in jsonDummy:
for k,v in j.iteritems():
jsonData.setdefault(k, []).append(v)
data = dict()
data['data'] = jsonData
obj = {}
obj=jsonpickle.encode(data, unpicklable=False)
return obj
pass
I am calling this function convertToJSON() within the same file as:
print convertToJSON("triple_response.txt")
I am getting the output as i expect like:
{"data": {"1": [{"db_name": "DSP"}, {"rel": "2"}], "201": [{"src_id": "Pos201510070"}, {"src_name": "Postgres"}, {"password": "root"}, {"host": "localhost"}, {"created_date": "20151007"}, {"user_name": "postgres"}, {"src_type": "Structured"}, {"db_name": "postgres"}, {"port": "None"}], "203": [{"src_id": "pos201510060"}, {"src_name": "Postgres"}, {"password": "root"}, {"host": "localhost"}, {"created_date": "20151006"}, {"user_name": "postgres"}, {"src_type": "Structured"}, {"db_name": "maindb"}, {"port": "5432"}], "2": [{"rel_name": "DataSource"}, {"tuple": "201"}, {"tuple": "202"}, {"tuple": "203"}], "202": [{"src_id": "pos201510060"}, {"src_name": "Postgres"}, {"password": "root"}, {"host": "localhost"}, {"created_date": "20151006"}, {"user_name": "postgres"}, {"src_type": "Structured"}, {"db_name": "DSP"}, {"port": "5432"}]}}
Now the problem which i am facing, which i am calling this from the outside the class as:
def extractConvertData(self):
triple_response = SPO(source, db_name, table_name, response)
try:
_triple_file = open('triple_response.txt','w+')
_triple_file.write(triple_response)
print "written data in file.."
with open('triple_response.txt','r+') as f:
for word in f:
print word
jsonData = convertToJSON(str('triple_response.txt'))
except IOError:
print "Not able to open a file"
print "Converted into JSON"
print jsonData
pass
The same code of convertToJSON() is not working...
It neither giving any output nor giving any error, it is not able to read the content from the 'triple_response.txt' file in the line.
with open('triple_response.txt','r+') as f:
for word in f:
print word
Any one can tell me solution to this problem..
_triple_file is never closed (except implicitly when you end the Python process, which is a terrible practice).
You can get platform-specific behavior when you have dangling filehandles like that (what is your platform? Unix? Windows?). Probably the write to _triple_file is not getting flushed.
So don't leave it dangling. Make sure to close it after you write it: (_triple_file.write(triple_response)). And in fact then assert that that file length is non-zero, using os.stat(), otherwise raise an Exception.
Also, you only have one big try...except clause to catch all errors, this is too much in one bite. Break it into two separate try...except clauses for writing _triple_file, and then reading it back. (Btw you might like to use tempfile library instead, to sidestep needing to know your intermediate file's pathname).
Something like the following untested pseudocode:
triple_response = SPO(source, db_name, table_name, response)
try:
_triple_file = open('triple_response.txt','w+')
_triple_file.write(triple_response)
_triple_file.close()
except IOError:
print "Not able to write intermediate JSON file"
raise
assert [suitable expression involving os.stat('triple_response.txt') to test size > 0 ], "Error: intermediate JSON file was empty"
try:
with open('triple_response.txt','r+') as f:
for word in f:
print word
jsonData = convertToJSON(str('triple_response.txt'))
except IOError:
print "Not able to read back intermediate JSON file"
#raise # if you want to reraise the exception
...

python implementation of 'readAsDataURL

i'm having some troubles to get the URI from a certain file, like .mp4/.ogg/etc..
The thing is that i need to do it in python, where the webserver is running.
Initially, i proceed like this:
def __parse64(self, path_file):
string_file = open(path_file, 'r').readlines()
new_string_file = ''
for line in string_file:
striped_line = line.strip()
separated_lines = striped_line.split('\n')
new_line = ''
for l in separated_lines:
new_line += l
new_string_file += new_line
self.encoded_string_file = b64.b64encode(new_string_file)
But this way, doesn't give what i need, if you compare the result with given here.
What a i need is a way to implement the function readAsDataURL() from FileReader class (see the code of the link above), in python.
UPDATE:
The solution given by #SeanVieira, returns a valid data field for the URI.
def __parse64(self, path_file):
file_data = open(path_file, 'rb').read(-1)
self.encoded_string_file = b64.b64encode(file_data)
Now how can i complete the URI, with the previous fields?
Like this.
For example: data:video/mp4;base64,data
Thanks!
The problem is that you are treating binary-encoded data as text data, which is breaking your code.
Try:
def __parse64(self, path_file):
file_data = open(path_file, 'rb').read(-1)
#This slurps the whole file as binary.
self.encoded_string_file = b64.b64encode(file_data)
The #SeanVieria answer will not work if the file is very large (more than 7mb)
This function will work for all cases (tested on Python version 3.4):
def __parse64(self, path_file):
data = bytearray()
with open(path_file, "rb") as f:
b = f.read(1)
while b != b"":
data.append(int.from_bytes(b, byteorder='big'))
b = f.read(1)
self.encoded_string_file = base64.b64encode(data)

Categories

Resources