I am working on a project in which a user uploads a file from the front-end and without storing the file it goes to the backend where some processing is done and results are returned back. Here are the functions which are dealing these works in the views.py file:
def midpro(request, *args):
global file
if(request.method == 'POST'):
try:
file = request.FILES['data']
except:
print("No File")
if(file!=None):
if(file.name[-1:-4:-1]!="vsc"):
return render(request, 'mapp/nocsv.html')
else:
return linml(request)
return render(request, 'mapp/nofile.html')
def linml(request, *args):
global retdata
global file
ans = list()
col = ['D1', 'D2']
if(file!=None):
ins = mapp.Mapp(file)
retdata = ins.linml()
for i in zip(col, retdata):
ans.append(i)
context = {
'ans':ans,
'data':file
}
return render(request, 'mapp/linml.html', context)
the code inside Mapp class is
class Mapp:
def __init__(self, file):
self.file = file
def linml(self, *args):
data = pd.read_csv(self.file)
data = np.array(data)
return([np.mean(data), np.var(data)])
pd is the alias for pandas library.
np is the alias for numpy library.
the error I/O operation on closed file is occuring in data = pd.read_csv(self.file) step.
Can anyone tell me how can i resolve this issue?
Also if I try to explicitly open the file with:
with open(self.file) as f:
It shows expected str, bytes or os.PathLike object, not InMemoryUploadedFile error, which i guess means that the file is already opened.
So, anyone please tell me why I/O operation on closed file is happening!
Related
I have written a test as below:
class TestLoader(TestCase):
#pytest.fixture(autouse=True)
#patch('loaders.myloader.DSFactory')
def _initialize_(self, mock_ds_factory):
self.loader = MyLoader()
mock_ds = Mock()
mock_ds_factory.get_ds_for_env.return_value = mock_ds
self.loader.ds = mock_ds
def test_load(self):
self.loader.ds.read_file.return_value = json.dumps(self.get_data())
self.loader.load("test_s3_key") #####IN THIS LINE I AM GETTING ERROR AS MENTIONED BELOW##
#staticmethod
def get_data():
return {"key1":"value1","key2":"value2"}
Associated source is located here: loaders->myloader.py. myloader.py is as follows:
from common.ds_factory import DSFactory
class MyLoader:
def __init__(self):
self.ds = DSFactory.get_ds_for_env()
def load(self, file_key):
print(f"ds : {self.ds}")
print(f"file read is : {self.ds.read_file(S3_BUCKET, file_key)}"}
data_dict = json.loads(self.ds.read_file(S3_BUCKET, file_key))
But while testing, I am getting error as follows:
ds is :<MagicMock name='DSFactory.get_ds_for_env()' id='140634163567528'>
file read is :<MagicMock name='DSFactory.get_ds_for_env().read_file()' id='140635257259568'>
E TypeError: the JSON object must be str, bytes or bytearray, not 'MagicMock'
I don't understand why, even after mocking return value of read_file with
self.loader.ds.read_file.return_value = json.dumps(self.get_data())
I am getting MagickMock object. I am stuck, not getting any clue how to resolve this.
Your code:
from common.ds_factory import DSFactory
class MyLoader:
def __init__(self):
self.ds = DSFactory.get_ds_for_env()
def load(self, file_key):
data_dict = json.loads(self.datastore.read_file(S3_BUCKET, file_key))
Issue here i can see is, data-store is not present, it should be self.ds.read_file
Please print self.datastore.read_file(S3_BUCKET, file_key) and verify the output.
This is the error coming from AWS_S3 bucket Json structure. It seems its not sending the Json value in string format rather than in Magic Mock format.
To more about Magic Mock format, please visit here: https://medium.com/ryans-dev-notes/python-mock-and-magicmock-b3295c2cc7eb
I want to parse a large wikipedia dump iteratively. I found a tutorial for this here: https://towardsdatascience.com/wikipedia-data-science-working-with-the-worlds-largest-encyclopedia-c08efbac5f5c
However, when I want to read in the data like this:
data_path = 'C:\\Users\\Me\\datasets\\dewiki-latest-pages-articles1.xml-p1p262468.bz2'
import xml.sax
class WikiXmlHandler(xml.sax.handler.ContentHandler):
"""Content handler for Wiki XML data using SAX"""
def __init__(self):
xml.sax.handler.ContentHandler.__init__(self)
self._buffer = None
self._values = {}
self._current_tag = None
self._pages = []
def characters(self, content):
"""Characters between opening and closing tags"""
if self._current_tag:
self._buffer.append(content)
def startElement(self, name, attrs):
"""Opening tag of element"""
if name in ('title', 'text'):
self._current_tag = name
self._buffer = []
def endElement(self, name):
"""Closing tag of element"""
if name == self._current_tag:
self._values[name] = ' '.join(self._buffer)
if name == 'page':
self._pages.append((self._values['title'], self._values['text']))
# Object for handling xml
handler = WikiXmlHandler()
# Parsing object
parser = xml.sax.make_parser()
parser.setContentHandler(handler)
# Iteratively process file
for line in subprocess.Popen(['bzcat'],
stdin = open(data_path),
stdout = subprocess.PIPE,shell=True).stdout:
parser.feed(line)
# Stop when 3 articles have been found
if len(handler._pages) > 3:
break
it seems like nothing happens. The handler._pages list is empty. This is where the parsed articles should be stored. I also added shell=True because otherwise I get the error message FileNotFoundError: [WinError 2].
I never worked with subprocesses in python so I don't know what the problem might be.
I also tried to specify the data_path differently (with / and //).
Thank you in advance.
I have a dictionary called users and it saves the user data that he inputs in a textinput in kivy ...however it works fine but when i rerun the program the info is all gone it is not saved and i need to add the user again ..also it's an atm system so i edit in the values of that dictionary which means i can't save it to a file.
class Data:
users = {}
def add_user(self, email,
password,name,lastname,country,num,day,month,year,gender,balance,created):
if email not in self.users:
self.users[email] =
[password,name,lastname,country,num,day,month,year,gender,balance,created]
return 1
else:
print("Email exists!")
return -1
def get_user(self, email):
if email in self.users:
return self.users[email]
else:
return -1
def validate(self, email, password):
if self.get_user(email) != -1:
return self.users[email][0] == password
else:
return False
class Depositpage(Screen,Widget,Data):
def __init__(self, **kwargs):
super(Depositpage, self).__init__(**kwargs)
btn1 = Button(text='Add',size_hint=(0.08,0.06),pos_hint=
{'x':0.903,'top':0.599},color=(0,0,0,1),background_color=(0,0,0,0))
btn1.bind(on_release=lambda x: self.add())
self.txt1= TextInput(multiline=False,size_hint=(0.45,0.13),pos_hint=
{'x':0.27,'top':0.475},font_size=43)
#self.ballabel = Label(text="text",font_size=20,pos_hint=
{'x':-0.04,'top':1.27},color=(0,0,0,1))
self.add_widget(self.txt1)
self.add_widget(btn1)
#self.add_widget(self.ballabel)
def add(self):
result = int(self.users['mo#gmail.com'][9]) + int(self.txt1.text)
self.users['mo#gmail.com'][9] = result
print(f"add {self.users['mo#gmail.com'][9]}")
print(self.users['mo#gmail.com'][9])
A dictionary is not designed to store data persistently.
Though you can dump it to a JSON file, and then load it from there where you need it?
import json
with open('my_dict.json', 'w') as f:
json.dump(my_dict, f)
# elsewhere...
with open('my_dict.json') as f:
my_dict = json.load(f)
Loading from JSON is fairly efficient.
Another option would be to use pickle and marshal modules, but unlike JSON, the files it generates aren't human-readable, turns many Python data types into a stream of bytes and then recreate the objects from the bytes.
Data persistence in Python:
https://docs.python.org/3/library/persistence.html
The code below works. See how you can adopt it to your code.
import json
data = {'x':7}
# save the dict to disk
with open('data.json','w') as f:
f.write(json.dumps(data))
# read the json into a dict
with open('data.json','r') as f:
data_from_disk = json.loads(f.read())
print('data_from_disk: ' + str(data_from_disk))
output
data_from_disk: {'x': 7}
I have class named ExcelFile, his job is to manage excel files (read, extract data, and differents things for the stack).
I want to implement a system for managing errors/exceptions.
For example, ExcelFile as a method load(), like a "setup"
def load(self):
"""
Setup for excel file
Load workbook, worksheet and others characteristics (data lines, header...)
:return: Setup successfully or not
:rtype: bool
Current usage
:Example:
> excefile = ExcelFile('test.xls')
> excefile.load()
True
> excefile.nb_rows()
4
"""
self.workbook = xlrd.open_workbook(self.url)
self.sheet = self.workbook.sheet_by_index(0)
self.header_row_index = self.get_header_row_index()
if self.header_row_index == None: # If file doesn't have header (or not valid)
return False
self.header_fields = self.sheet.row_values(self.header_row_index)
self.header_fields_col_ids = self.get_col_ids(self.header_fields) # Mapping between header fields and col ids
self.nb_rows = self.count_rows()
self.row_start_data = self.header_row_index + self.HEADER_ROWS
return True
As you can see, I can encounter 2 differents errors:
The file is not an excel file (raise xlrd.XLRDError)
The file has an invalid header (so I return False)
I want to implement a good management system of ExcelFile errors, because this class is used a lot in the stack.
This is my first idea for processing that :
Implement a standard exception
class ExcelFileException(Exception):
def __init__(self, message, type=None):
self.message = message
self.type = type
def __str__(self):
return "{} : {} ({})".format(self.__class__.__name__, self.message, self.type)
Rewrite load method
def load(self):
"""
Setup for excel file
Load workbook, worksheet and others characteristics (data lines, header...)
:return: Setup successfully or not
:rtype: bool
Current usage
:Example:
> excefile = ExcelFile('test.xls')
> excefile.load()
True
> excefile.nb_rows()
4
"""
try:
self.workbook = xlrd.open_workbook(self.url)
except xlrd.XLRDError as e:
raise ExcelFileException("Unsupported file type", e.__class__.__name__)
self.sheet = self.workbook.sheet_by_index(0)
self.header_row_index = self.get_header_row_index()
if self.header_row_index == None: # If file doesn't have header (or not valid)
raise ExcelFileException("Invalid or empty header")
self.header_fields = self.sheet.row_values(self.header_row_index)
self.header_fields_col_ids = self.get_col_ids(self.header_fields) # Mapping between header fields and col ids
self.nb_rows = self.count_rows()
self.row_start_data = self.header_row_index + self.HEADER_ROWS
return True
And this an example in a calling method, a big problem is I have to manage a dict named "report" with errors in french, for customers success and other.
...
def foo():
...
file = ExcelFile(location)
try:
file.load()
except ExcelFileException as e:
log.warn(e.__str__())
if e.type == 'XLRDError'
self.report['errors'] = 'Long description of the error, in french (error is about invalid file type)'
else:
self.report['errors'] = 'Long description of the error, in french (error is about invalid header)'
...
What do you think about that ? Do you have a better way ?
Thank you
You could change your exception to log the errors in your dict:
class ExcelFileException(Exception):
def __init__(self, message, report, type=None):
report['errors'].append(message)
self.message = message
self.type = type
def __str__(self):
return "{} : {} ({})".format(self.__class__.__name__, self.message, self.type)
When you will raise an exception:
raise ExcelFileException("Invalid or empty header", report)
The errors will be present in self.dictionnary['errors']
Also the error can be fixed by installing missing a optional dependence Xlrd
pip install Xlrd
More available python packages when working with excel
This question already has answers here:
Python: Inflate and Deflate implementations
(2 answers)
Closed 4 years ago.
I currently use following code to decompress gzipped response by urllib2:
opener = urllib2.build_opener()
response = opener.open(req)
data = response.read()
if response.headers.get('content-encoding', '') == 'gzip':
data = StringIO.StringIO(data)
gzipper = gzip.GzipFile(fileobj=data)
html = gzipper.read()
Does it handle deflated response too or do I need to write seperate code to handle deflated response?
You can try
if response.headers.get('content-encoding', '') == 'deflate':
html = zlib.decompress(response.read())
if fail, here is another way, I found it in requests source code,
if response.headers.get('content-encoding', '') == 'deflate':
html = zlib.decompressobj(-zlib.MAX_WBITS).decompress(response.read())
There is a better way outlined at:
http://rationalpie.wordpress.com/2010/06/02/python-streaming-gzip-decompression/
The author explains how to decompress chunk by chunk, rather than all at once in memory. This is the preferred method when larger files are involved.
Also found this helpful site for testing:
http://carsten.codimi.de/gzip.yaws/
To answer from above comment, the HTTP spec (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.3) says:
If no Accept-Encoding field is present in a request, the server MAY assume that the client will accept any content coding. In this case, if "identity" is one of the available content-codings, then the server SHOULD use the "identity" content-coding, unless it has additional information that a different content-coding is meaningful to the client.
I take that to mean it should use identity. I've never seen a server that doesn't.
you can see the code in urllib3
class DeflateDecoder(object):
def __init__(self):
self._first_try = True
self._data = binary_type()
self._obj = zlib.decompressobj()
def __getattr__(self, name):
return getattr(self._obj, name)
def decompress(self, data):
if not data:
return data
if not self._first_try:
return self._obj.decompress(data)
self._data += data
try:
return self._obj.decompress(data)
except zlib.error:
self._first_try = False
self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
try:
return self.decompress(self._data)
finally:
self._data = None
class GzipDecoder(object):
def __init__(self):
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
def __getattr__(self, name):
return getattr(self._obj, name)
def decompress(self, data):
if not data:
return data
return self._obj.decompress(data)