Read/Write access issue Python on Jupyter Server - python

I am working on a Jupyter notebook server remotely and when I create a file using:
file = open("test.txt","w") file.write("test") file.close()
Everything works as expected and the file test.txt is written to the working directory. My problem arises wh trying to use the the Pandas to_hfs command:
data.to_hdf('raw_data.h5','raw_data_santodomingo',mode='w',format='f',data_columns=True)
I get the following error:
Opening raw_data.h5 in read-only mode
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
/opt/conda/lib/python3.6/site-packages/pandas/io/pytables.py in open(self, mode, **kwargs)
586 try:
--> 587 self._handle = tables.open_file(self._path, self._mode, **kwargs)
588 except (IOError) as e: # pragma: no cover
/opt/conda/lib/python3.6/site-packages/tables/file.py in open_file(filename, mode, title, root_uep, filters, **kwargs)
319 # Finally, create the File instance, and return it
--> 320 return File(filename, mode, title, root_uep, filters, **kwargs)
321
/opt/conda/lib/python3.6/site-packages/tables/file.py in __init__(self, filename, mode, title, root_uep, filters, **kwargs)
783 # Now, it is time to initialize the File extension
--> 784 self._g_new(filename, mode, **params)
785
tables/hdf5extension.pyx in tables.hdf5extension.File._g_new()
/opt/conda/lib/python3.6/site-packages/tables/utils.py in check_file_access(filename, mode)
178 raise IOError("directory ``%s`` exists but it can not be "
--> 179 "written" % (parentname,))
180 elif mode == 'a':
OSError: directory ``.`` exists but it can not be written
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
<ipython-input-182-479f2e98ea81> in <module>()
----> 1 pre_clean_data.to_hdf('raw_data.h5','raw_data_santodomingo',mode='w',format='f',data_columns=True)
/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py in to_hdf(self, path_or_buf, key, **kwargs)
1136
1137 from pandas.io import pytables
-> 1138 return pytables.to_hdf(path_or_buf, key, self, **kwargs)
1139
1140 def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
/opt/conda/lib/python3.6/site-packages/pandas/io/pytables.py in to_hdf(path_or_buf, key, value, mode, complevel, complib, append, **kwargs)
267 if isinstance(path_or_buf, string_types):
268 with HDFStore(path_or_buf, mode=mode, complevel=complevel,
--> 269 complib=complib) as store:
270 f(store)
271 else:
/opt/conda/lib/python3.6/site-packages/pandas/io/pytables.py in __init__(self, path, mode, complevel, complib, fletcher32, **kwargs)
446 self._fletcher32 = fletcher32
447 self._filters = None
--> 448 self.open(mode=mode, **kwargs)
449
450 #property
/opt/conda/lib/python3.6/site-packages/pandas/io/pytables.py in open(self, mode, **kwargs)
589 if 'can not be written' in str(e):
590 print('Opening %s in read-only mode' % self._path)
--> 591 self._handle = tables.open_file(self._path, 'r', **kwargs)
592 else:
593 raise
/opt/conda/lib/python3.6/site-packages/tables/file.py in open_file(filename, mode, title, root_uep, filters, **kwargs)
318
319 # Finally, create the File instance, and return it
--> 320 return File(filename, mode, title, root_uep, filters, **kwargs)
321
322
/opt/conda/lib/python3.6/site-packages/tables/file.py in __init__(self, filename, mode, title, root_uep, filters, **kwargs)
782
783 # Now, it is time to initialize the File extension
--> 784 self._g_new(filename, mode, **params)
785
786 # Check filters and set PyTables format version for new files.
tables/hdf5extension.pyx in tables.hdf5extension.File._g_new()
/opt/conda/lib/python3.6/site-packages/tables/utils.py in check_file_access(filename, mode)
154 # The file should be readable.
155 if not os.access(filename, os.F_OK):
--> 156 raise IOError("``%s`` does not exist" % (filename,))
157 if not os.path.isfile(filename):
158 raise IOError("``%s`` is not a regular file" % (filename,))
OSError: ``raw_data.h5`` does not exist
These lines seem pertinent and are making me think write permission is the issue:
/opt/conda/lib/python3.6/site-packages/tables/utils.py in check_file_access(filename, mode)
154 # The file should be readable.
155 if not os.access(filename, os.F_OK):
--> 156 raise IOError("``%s`` does not exist" % (filename,))
157 if not os.path.isfile(filename):
158 raise IOError("``%s`` is not a regular file" % (filename,))
And
/opt/conda/lib/python3.6/site-packages/tables/utils.py in check_file_access(filename, mode)
154 # The file should be readable.
155 if not os.access(filename, os.F_OK):
--> 156 raise IOError("``%s`` does not exist" % (filename,))
157 if not os.path.isfile(filename):
158 raise IOError("``%s`` is not a regular file" % (filename,))
OSError: ``raw_data.h5`` does not exist
However that would confuse me as I can write text files in the working directory as mentioned above. All and any attempts at assistance appreciated.
EDIT: If i use the full path :/home/joyvan/work/raw_data.h5' I get a different error readout.
data.to_hdf('/home/joyvan/work/raw_data.h5','raw_data_santodomingo',mode='w',format='f',data_columns=True)
produces
OSError Traceback (most recent call last)
<ipython-input-185-de493145e6a7> in <module>()
----> 1 pre_clean_data.to_hdf('/home/joyvan/work/raw_data.h5','raw_data_santodomingo',mode='w',format='f',data_columns=True)
/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py in to_hdf(self, path_or_buf, key, **kwargs)
1136
1137 from pandas.io import pytables
-> 1138 return pytables.to_hdf(path_or_buf, key, self, **kwargs)
1139
1140 def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs):
/opt/conda/lib/python3.6/site-packages/pandas/io/pytables.py in to_hdf(path_or_buf, key, value, mode, complevel, complib, append, **kwargs)
267 if isinstance(path_or_buf, string_types):
268 with HDFStore(path_or_buf, mode=mode, complevel=complevel,
--> 269 complib=complib) as store:
270 f(store)
271 else:
/opt/conda/lib/python3.6/site-packages/pandas/io/pytables.py in __init__(self, path, mode, complevel, complib, fletcher32, **kwargs)
446 self._fletcher32 = fletcher32
447 self._filters = None
--> 448 self.open(mode=mode, **kwargs)
449
450 #property
/opt/conda/lib/python3.6/site-packages/pandas/io/pytables.py in open(self, mode, **kwargs)
585
586 try:
--> 587 self._handle = tables.open_file(self._path, self._mode, **kwargs)
588 except (IOError) as e: # pragma: no cover
589 if 'can not be written' in str(e):
/opt/conda/lib/python3.6/site-packages/tables/file.py in open_file(filename, mode, title, root_uep, filters, **kwargs)
318
319 # Finally, create the File instance, and return it
--> 320 return File(filename, mode, title, root_uep, filters, **kwargs)
321
322
/opt/conda/lib/python3.6/site-packages/tables/file.py in __init__(self, filename, mode, title, root_uep, filters, **kwargs)
782
783 # Now, it is time to initialize the File extension
--> 784 self._g_new(filename, mode, **params)
785
786 # Check filters and set PyTables format version for new files.
tables/hdf5extension.pyx in tables.hdf5extension.File._g_new()
/opt/conda/lib/python3.6/site-packages/tables/utils.py in check_file_access(filename, mode)
172 parentname = '.'
173 if not os.access(parentname, os.F_OK):
--> 174 raise IOError("``%s`` does not exist" % (parentname,))
175 if not os.path.isdir(parentname):
176 raise IOError("``%s`` is not a directory" % (parentname,))
OSError: ``/home/joyvan/work`` does not exist

I ran into similar problem for this.
It turns out that as a current user i am running file does not have enough permission to write.
I ran same script with root user and it worked.
Note: This is late and not an answer to OP's Questions but similar situation for me and writing solution which worked for me.

Related

Altair saver - frequent errors saving charts

I've been getting frequent but not constant errors using Altair saver to save .png files via selenium and chromedriver. First I get a "headless chrome" error:
WebDriverException: Message: unknown error: session deleted because of
page crash from unknown error: cannot determine loading status from
tab crashed (Session info: headless chrome=90.0.4430.212)
Followed by a "invalid session" error when I try to save it again:
InvalidSessionIdException: Message: invalid session id
Based on my read of StackOverflow and elsewhere, the initial error leaves a chromedriver session open and that causes the second error. It will resolve itself if I close out of everything and reopen but that's not really a workable solution. Any advice on making saving Altair charts more reliable?
Versions are up to date and compatible as far as I know: altair 4.1.0, altair-saver 0.5.0, selenium 3.141.0, Chrome 90.0.4430.212, ChromeDriver 90.0.4430.24, chromedriver-binary 90.0.4430.24.0
Full trackback of headless chrome error:
---------------------------------------------------------------------------
WebDriverException Traceback (most recent call last)
<ipython-input-37-cf20dd94d732> in <module>
---> 30 chart.save('chart.png', scale_factor=3)
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair\vegalite\v4\api.py in save(self, fp, format, override_data_transformer, scale_factor, vegalite_version, vega_version, vegaembed_version, **kwargs)
474 if override_data_transformer:
475 with data_transformers.disable_max_rows():
--> 476 result = save(**kwds)
477 else:
478 result = save(**kwds)
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair\utils\save.py in save(chart, fp, vega_version, vegaembed_version, format, mode, vegalite_version, embed_options, json_kwds, webdriver, scale_factor, **kwargs)
119 webdriver=webdriver,
120 scale_factor=scale_factor,
--> 121 **kwargs,
122 )
123 if format == "png":
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair\utils\mimebundle.py in spec_to_mimebundle(spec, format, mode, vega_version, vegaembed_version, vegalite_version, **kwargs)
58 "see http://github.com/altair-viz/altair_saver/".format(fmt=format)
59 )
---> 60 return altair_saver.render(spec, format, mode=mode, **kwargs)
61 if format == "html":
62 html = spec_to_html(
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair_saver\_core.py in render(chart, fmts, mode, embed_options, method, **kwargs)
255 Saver = _select_saver(method, mode=mode, fmt=fmt)
256 saver = Saver(spec, mode=mode, embed_options=embed_options, **kwargs)
--> 257 mimebundle.update(saver.mimebundle(fmt))
258
259 return mimebundle
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair_saver\savers\_saver.py in mimebundle(self, fmts)
88 vegalite_version=self._package_versions["vega-lite"],
89 )
---> 90 bundle[mimetype] = self._serialize(fmt, "mimebundle")
91 return bundle
92
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair_saver\savers\_selenium.py in _serialize(self, fmt, content_type)
282
283 def _serialize(self, fmt: str, content_type: str) -> MimebundleContent:
--> 284 out = self._extract(fmt)
285 if fmt == "png":
286 assert isinstance(out, str)
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair_saver\savers\_selenium.py in _extract(self, fmt)
262
263 url = self._serve(html, js_resources)
--> 264 driver.get("about:blank")
265 driver.get(url)
266 try:
c:\programdata\anaconda3\envs\[...]\lib\site-packages\selenium\webdriver\remote\webdriver.py in get(self, url)
331 Loads a web page in the current browser session.
332 """
--> 333 self.execute(Command.GET, {'url': url})
334
335 #property
c:\programdata\anaconda3\envs\[...]\lib\site-packages\selenium\webdriver\remote\webdriver.py in execute(self, driver_command, params)
319 response = self.command_executor.execute(driver_command, params)
320 if response:
--> 321 self.error_handler.check_response(response)
322 response['value'] = self._unwrap_value(
323 response.get('value', None))
c:\programdata\anaconda3\envs\[...]\lib\site-packages\selenium\webdriver\remote\errorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default):
WebDriverException: Message: unknown error: session deleted because of page crash
from unknown error: cannot determine loading status
from tab crashed
(Session info: headless chrome=90.0.4430.212)
And invalid session error:
---------------------------------------------------------------------------
InvalidSessionIdException Traceback (most recent call last)
<ipython-input-38-cf20dd94d732> in <module>
29
---> 30 chart.save('chart.png' scale_factor=3)
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair\vegalite\v4\api.py in save(self, fp, format, override_data_transformer, scale_factor, vegalite_version, vega_version, vegaembed_version, **kwargs)
474 if override_data_transformer:
475 with data_transformers.disable_max_rows():
--> 476 result = save(**kwds)
477 else:
478 result = save(**kwds)
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair\utils\save.py in save(chart, fp, vega_version, vegaembed_version, format, mode, vegalite_version, embed_options, json_kwds, webdriver, scale_factor, **kwargs)
119 webdriver=webdriver,
120 scale_factor=scale_factor,
--> 121 **kwargs,
122 )
123 if format == "png":
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair\utils\mimebundle.py in spec_to_mimebundle(spec, format, mode, vega_version, vegaembed_version, vegalite_version, **kwargs)
58 "see http://github.com/altair-viz/altair_saver/".format(fmt=format)
59 )
---> 60 return altair_saver.render(spec, format, mode=mode, **kwargs)
61 if format == "html":
62 html = spec_to_html(
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair_saver\_core.py in render(chart, fmts, mode, embed_options, method, **kwargs)
255 Saver = _select_saver(method, mode=mode, fmt=fmt)
256 saver = Saver(spec, mode=mode, embed_options=embed_options, **kwargs)
--> 257 mimebundle.update(saver.mimebundle(fmt))
258
259 return mimebundle
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair_saver\savers\_saver.py in mimebundle(self, fmts)
88 vegalite_version=self._package_versions["vega-lite"],
89 )
---> 90 bundle[mimetype] = self._serialize(fmt, "mimebundle")
91 return bundle
92
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair_saver\savers\_selenium.py in _serialize(self, fmt, content_type)
282
283 def _serialize(self, fmt: str, content_type: str) -> MimebundleContent:
--> 284 out = self._extract(fmt)
285 if fmt == "png":
286 assert isinstance(out, str)
c:\programdata\anaconda3\envs\[...]\lib\site-packages\altair_saver\savers\_selenium.py in _extract(self, fmt)
262
263 url = self._serve(html, js_resources)
--> 264 driver.get("about:blank")
265 driver.get(url)
266 try:
c:\programdata\anaconda3\envs\[...]\lib\site-packages\selenium\webdriver\remote\webdriver.py in get(self, url)
331 Loads a web page in the current browser session.
332 """
--> 333 self.execute(Command.GET, {'url': url})
334
335 #property
c:\programdata\anaconda3\envs\[...]\lib\site-packages\selenium\webdriver\remote\webdriver.py in execute(self, driver_command, params)
319 response = self.command_executor.execute(driver_command, params)
320 if response:
--> 321 self.error_handler.check_response(response)
322 response['value'] = self._unwrap_value(
323 response.get('value', None))
c:\programdata\anaconda3\envs\[...]\lib\site-packages\selenium\webdriver\remote\errorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default):
InvalidSessionIdException: Message: invalid session id

SQLAlchemy-TypeError: _get_column_info() got an unexpected keyword argument 'generated'

I'm trying to get metadata of the table from the Redshift database.
I'm getting the error even though the connection is fine.
"TypeError: _get_column_info() got an unexpected keyword argument 'generated'"
I tried with another databse of a different server it works fine...
But not sure whats the issue with this server tables.
Can you please help me out with a solution.
Table=sa.Table("Tablename" ,metadata,autoload=True,autoload_with=engine)
TypeError Traceback (most recent call last)
<ipython-input-98-366ec112cf52> in <module>
----> 1 Table=sa.Table("dim_dealer" ,metadata,autoload=True,autoload_with=engine)
<string> in __new__(cls, *args, **kw)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\util\deprecations.py in warned(fn, *args, **kwargs)
126 )
127
--> 128 return fn(*args, **kwargs)
129
130 doc = fn.__doc__ is not None and fn.__doc__ or ""
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\sql\schema.py in __new__(cls, *args, **kw)
494 except:
495 with util.safe_reraise():
--> 496 metadata._remove_table(name, schema)
497
498 #property
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\util\langhelpers.py in __exit__(self, type_, value, traceback)
66 self._exc_info = None # remove potential circular references
67 if not self.warn_only:
---> 68 compat.reraise(exc_type, exc_value, exc_tb)
69 else:
70 if not compat.py3k and self._exc_info and self._exc_info[1]:
~\AppData\Local\Continuum\anaconda3\lib\`enter code here`site-packages\sqlalchemy\util\compat.py in reraise(tp, value, tb, cause)
151 if value.__traceback__ is not tb:
152 raise value.with_traceback(tb)
--> 153 raise value
154
155 def u(s):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\sql\schema.py in __new__(cls, *args, **kw)
489 metadata._add_table(name, schema, table)
490 try:
--> 491 table._init(name, metadata, *args, **kw)
492 table.dispatch.after_parent_attach(table, metadata)
493 return table
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\sql\schema.py in _init(self, name, metadata, *args, **kwargs)
583 include_columns,
584 _extend_on=_extend_on,
--> 585 resolve_fks=resolve_fks,
586 )
587
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\sql\schema.py in _autoload(self, metadata, autoload_with, include_columns, exclude_columns, resolve_fks, _extend_on)
607 exclude_columns,
608 resolve_fks,
--> 609 _extend_on=_extend_on,
610 )
611 else:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\base.py in run_callable(self, callable_, *args, **kwargs)
2148 """
2149 with self._contextual_connect() as conn:
-> 2150 return conn.run_callable(callable_, *args, **kwargs)
2151
2152 def execute(self, statement, *multiparams, **params):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\base.py in run_callable(self, callable_, *args, **kwargs)
1602
1603 """
-> 1604 return callable_(self, *args, **kwargs)
1605
1606 def _run_visitor(self, visitorcallable, element, **kwargs):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\default.py in reflecttable(self, connection, table, include_columns, exclude_columns, resolve_fks, **opts)
429 insp = reflection.Inspector.from_engine(connection)
430 return insp.reflecttable(
--> 431 table, include_columns, exclude_columns, resolve_fks, **opts
432 )
433
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\reflection.py in reflecttable(self, table, include_columns, exclude_columns, resolve_fks, _extend_on)
638
639 for col_d in self.get_columns(
--> 640 table_name, schema, **table.dialect_kwargs
641 ):
642 found_table = True
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\reflection.py in get_columns(self, table_name, schema, **kw)
371
372 col_defs = self.dialect.get_columns(
--> 373 self.bind, table_name, schema, info_cache=self.info_cache, **kw
374 )
375 for col_def in col_defs:
<string> in get_columns(self, connection, table_name, schema, **kw)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy\engine\reflection.py in cache(fn, self, con, *args, **kw)
54 ret = info_cache.get(key)
55 if ret is None:
---> 56 ret = fn(self, con, *args, **kw)
57 info_cache[key] = ret
58 return ret
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy_redshift\dialect.py in get_columns(self, connection, table_name, schema, **kw)
459 default=col.default, notnull=col.notnull, domains=domains,
460 enums=[], schema=col.schema, encode=col.encode,
--> 461 comment=col.comment)
462 columns.append(column_info)
463 return columns
~\AppData\Local\Continuum\anaconda3\lib\site-packages\sqlalchemy_redshift\dialect.py in _get_column_info(self, *args, **kwargs)
666 column_info = super(RedshiftDialect, self)._get_column_info(
667 *args,
--> 668 **kw
669 )
670 if isinstance(column_info['type'], VARCHAR):
TypeError: _get_column_info() got an unexpected keyword argument 'generated'
print(repr(metadata.tables[Table]))
Thanks in advance
It looks like a backwards compatibility bug between SQLAlchemy and SQLAlchemy-Redshift.
Private method RedshiftDialect._get_column_info was overriden in SQLAlchemy-Redshift. generated keyword argument was added to this method in SQLAlchemy v1.3.16 which caused compatibility error. So a fix for this problem was implemented: generated keyword should be used only for the latest versions of SQLAlchemy. Unfortunately it doesn't work:
if sa.__version__ >= '1.3.16':
# SQLAlchemy 1.3.16 introduced generated columns,
# not supported in redshift
kw['generated'] = ''
As you can see this condition is truthy for your SQLAlchemy version ("1.3.7") because this is how string comparison works. I think I will make a pull request to correct this behaviour.
I think the most simple solution for you for now is to update your SQLAlchemy package to the 1.3.10 version or newer. In this case this condition will work as expected.
Update: This bug was fixed in the SQLAlchemy-Redshift v0.8.0.

"Error while extracting" from tensorflow datasets

I want to train a tensorflow image segmentation model on COCO, and thought I would leverage the dataset builder already included. Download seems to be completed but it crashes on extracting the zip files.
Running with TF 2.0.0 on a Jupyter Notebook under a conda environment. Computer is 64-bit Windows 10. The Oxford Pet III dataset used in the official image segmentation tutorial works fine.
Below is the error message (my local user name replaced with %user%).
---------------------------------------------------------------------------
OutOfRangeError Traceback (most recent call last)
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\extractor.py in _sync_extract(self, from_path, method, to_path)
88 try:
---> 89 for path, handle in iter_archive(from_path, method):
90 path = tf.compat.as_text(path)
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\extractor.py in iter_zip(arch_f)
176 with _open_or_pass(arch_f) as fobj:
--> 177 z = zipfile.ZipFile(fobj)
178 for member in z.infolist():
~\.conda\envs\tf-tutorial\lib\zipfile.py in __init__(self, file, mode, compression, allowZip64)
1130 if mode == 'r':
-> 1131 self._RealGetContents()
1132 elif mode in ('w', 'x'):
~\.conda\envs\tf-tutorial\lib\zipfile.py in _RealGetContents(self)
1193 try:
-> 1194 endrec = _EndRecData(fp)
1195 except OSError:
~\.conda\envs\tf-tutorial\lib\zipfile.py in _EndRecData(fpin)
263 # Determine file size
--> 264 fpin.seek(0, 2)
265 filesize = fpin.tell()
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\util\deprecation.py in new_func(*args, **kwargs)
506 instructions)
--> 507 return func(*args, **kwargs)
508
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\lib\io\file_io.py in seek(self, offset, whence, position)
166 elif whence == 2:
--> 167 offset += self.size()
168 else:
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\lib\io\file_io.py in size(self)
101 """Returns the size of the file."""
--> 102 return stat(self.__name).length
103
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\lib\io\file_io.py in stat(filename)
726 """
--> 727 return stat_v2(filename)
728
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_core\python\lib\io\file_io.py in stat_v2(path)
743 file_statistics = pywrap_tensorflow.FileStatistics()
--> 744 pywrap_tensorflow.Stat(compat.as_bytes(path), file_statistics)
745 return file_statistics
OutOfRangeError: C:\Users\%user%\tensorflow_datasets\downloads\images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip; Unknown error
During handling of the above exception, another exception occurred:
ExtractError Traceback (most recent call last)
<ipython-input-27-887fa0198611> in <module>
1 cocoBuilder = tfds.builder('coco')
2 info = cocoBuilder.info
----> 3 cocoBuilder.download_and_prepare()
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\api_utils.py in disallow_positional_args_dec(fn, instance, args, kwargs)
50 _check_no_positional(fn, args, ismethod, allowed=allowed)
51 _check_required(fn, kwargs)
---> 52 return fn(*args, **kwargs)
53
54 return disallow_positional_args_dec(wrapped) # pylint: disable=no-value-for-parameter
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in download_and_prepare(self, download_dir, download_config)
285 self._download_and_prepare(
286 dl_manager=dl_manager,
--> 287 download_config=download_config)
288
289 # NOTE: If modifying the lines below to put additional information in
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in _download_and_prepare(self, dl_manager, download_config)
946 super(GeneratorBasedBuilder, self)._download_and_prepare(
947 dl_manager=dl_manager,
--> 948 max_examples_per_split=download_config.max_examples_per_split,
949 )
950
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\dataset_builder.py in _download_and_prepare(self, dl_manager, **prepare_split_kwargs)
802 # Generating data for all splits
803 split_dict = splits_lib.SplitDict()
--> 804 for split_generator in self._split_generators(dl_manager):
805 if splits_lib.Split.ALL == split_generator.split_info.name:
806 raise ValueError(
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\image\coco.py in _split_generators(self, dl_manager)
237 root_url = 'http://images.cocodataset.org/'
238 extracted_paths = dl_manager.download_and_extract({
--> 239 key: root_url + url for key, url in urls.items()
240 })
241
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\download_manager.py in download_and_extract(self, url_or_urls)
357 with self._downloader.tqdm():
358 with self._extractor.tqdm():
--> 359 return _map_promise(self._download_extract, url_or_urls)
360
361 #property
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\download_manager.py in _map_promise(map_fn, all_inputs)
393 """Map the function into each element and resolve the promise."""
394 all_promises = utils.map_nested(map_fn, all_inputs) # Apply the function
--> 395 res = utils.map_nested(_wait_on_promise, all_promises)
396 return res
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in map_nested(function, data_struct, dict_only, map_tuple)
127 return {
128 k: map_nested(function, v, dict_only, map_tuple)
--> 129 for k, v in data_struct.items()
130 }
131 elif not dict_only:
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in <dictcomp>(.0)
127 return {
128 k: map_nested(function, v, dict_only, map_tuple)
--> 129 for k, v in data_struct.items()
130 }
131 elif not dict_only:
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\utils\py_utils.py in map_nested(function, data_struct, dict_only, map_tuple)
141 return tuple(mapped)
142 # Singleton
--> 143 return function(data_struct)
144
145
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\download_manager.py in _wait_on_promise(p)
377
378 def _wait_on_promise(p):
--> 379 return p.get()
380
381 else:
~\.conda\envs\tf-tutorial\lib\site-packages\promise\promise.py in get(self, timeout)
508 target = self._target()
509 self._wait(timeout or DEFAULT_TIMEOUT)
--> 510 return self._target_settled_value(_raise=True)
511
512 def _target_settled_value(self, _raise=False):
~\.conda\envs\tf-tutorial\lib\site-packages\promise\promise.py in _target_settled_value(self, _raise)
512 def _target_settled_value(self, _raise=False):
513 # type: (bool) -> Any
--> 514 return self._target()._settled_value(_raise)
515
516 _value = _reason = _target_settled_value
~\.conda\envs\tf-tutorial\lib\site-packages\promise\promise.py in _settled_value(self, _raise)
222 if _raise:
223 raise_val = self._fulfillment_handler0
--> 224 reraise(type(raise_val), raise_val, self._traceback)
225 return self._fulfillment_handler0
226
~\.conda\envs\tf-tutorial\lib\site-packages\six.py in reraise(tp, value, tb)
694 if value.__traceback__ is not tb:
695 raise value.with_traceback(tb)
--> 696 raise value
697 finally:
698 value = None
~\.conda\envs\tf-tutorial\lib\site-packages\promise\promise.py in handle_future_result(future)
840 # type: (Any) -> None
841 try:
--> 842 resolve(future.result())
843 except Exception as e:
844 tb = exc_info()[2]
~\.conda\envs\tf-tutorial\lib\concurrent\futures\_base.py in result(self, timeout)
423 raise CancelledError()
424 elif self._state == FINISHED:
--> 425 return self.__get_result()
426
427 self._condition.wait(timeout)
~\.conda\envs\tf-tutorial\lib\concurrent\futures\_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
~\.conda\envs\tf-tutorial\lib\concurrent\futures\thread.py in run(self)
54
55 try:
---> 56 result = self.fn(*self.args, **self.kwargs)
57 except BaseException as exc:
58 self.future.set_exception(exc)
~\.conda\envs\tf-tutorial\lib\site-packages\tensorflow_datasets\core\download\extractor.py in _sync_extract(self, from_path, method, to_path)
92 except BaseException as err:
93 msg = 'Error while extracting %s to %s : %s' % (from_path, to_path, err)
---> 94 raise ExtractError(msg)
95 # `tf.io.gfile.Rename(overwrite=True)` doesn't work for non empty
96 # directories, so delete destination first, if it already exists.
ExtractError: Error while extracting C:\Users\%user%\tensorflow_datasets\downloads\images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip to C:\Users\%user%\tensorflow_datasets\downloads\extracted\ZIP.images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip : C:\Users\%user%\tensorflow_datasets\downloads\images.cocodataset.org_zips_train20147eQIfmQL3bpVDgkOrnAQklNLVUtCsFrDPwMAuYSzF3U.zip; Unknown error
The message seems cryptic to me. The folder to which it is trying to extract does not exist when the notebook is started - it is created by Tensorflow, and only at that command line. I obviously tried deleting it completely and running it again, to no effect.
The code that leads to the error is (everything runs fine until the last line):
import tensorflow as tf
from __future__ import absolute_import, division, print_function, unicode_literals
from tensorflow_examples.models.pix2pix import pix2pix
import tensorflow_datasets as tfds
from IPython.display import clear_output
import matplotlib.pyplot as plt
dataset, info = tfds.load('coco', with_info=True)
Also tried breaking down the last command into assigning the tdfs.builder object and then running download_and_extract, and again got the same error.
There is enough space in disk - after download, still 50+GB available, while the dataset is supposed to be 37GB in its largest version (2014).
I have a similar problem with Windows 10 & COCO 2017. My solution is simple. Extract the ZIP file manually according to the folder path in the error message.

Opening a FITS file with Astropy results in FileNotFoundError

When I use
fits_datasweep_gal = fits.open('Macintosh HD/Users/lingxuan/Downloads/datasweep-index-gal.fits')
to open a FITS file on Jupyter notebook, it returns:
FileNotFoundError Traceback (most recent call last)
<ipython-input-13-e5886f60eba2> in <module>
----> 1 fits_datasweep_gal = fits.open('Macintosh HD/Users/lingxuan/Downloads/datasweep-index-gal.fits')
~/anaconda3/lib/python3.7/site-packages/astropy/io/fits/hdu/hdulist.py in fitsopen(name, mode, memmap, save_backup, cache, lazy_load_hdus, **kwargs)
149
150 return HDUList.fromfile(name, mode, memmap, save_backup, cache,
--> 151 lazy_load_hdus, **kwargs)
152
153
~/anaconda3/lib/python3.7/site-packages/astropy/io/fits/hdu/hdulist.py in fromfile(cls, fileobj, mode, memmap, save_backup, cache, lazy_load_hdus, **kwargs)
388 return cls._readfrom(fileobj=fileobj, mode=mode, memmap=memmap,
389 save_backup=save_backup, cache=cache,
--> 390 lazy_load_hdus=lazy_load_hdus, **kwargs)
391
392 #classmethod
~/anaconda3/lib/python3.7/site-packages/astropy/io/fits/hdu/hdulist.py in _readfrom(cls, fileobj, data, mode, memmap, save_backup, cache, lazy_load_hdus, **kwargs)
1037 if not isinstance(fileobj, _File):
1038 # instantiate a FITS file object (ffo)
-> 1039 fileobj = _File(fileobj, mode=mode, memmap=memmap, cache=cache)
1040 # The Astropy mode is determined by the _File initializer if the
1041 # supplied mode was None
~/anaconda3/lib/python3.7/site-packages/astropy/utils/decorators.py in wrapper(*args, **kwargs)
501 # one with the name of the new argument to the function
502 kwargs[new_name[i]] = value
--> 503 return function(*args, **kwargs)
504
505 return wrapper
~/anaconda3/lib/python3.7/site-packages/astropy/io/fits/file.py in __init__(self, fileobj, mode, memmap, overwrite, cache)
176 self._open_fileobj(fileobj, mode, overwrite)
177 elif isinstance(fileobj, str):
--> 178 self._open_filename(fileobj, mode, overwrite)
179 else:
180 self._open_filelike(fileobj, mode, overwrite)
~/anaconda3/lib/python3.7/site-packages/astropy/io/fits/file.py in _open_filename(self, filename, mode, overwrite)
553
554 if not self._try_read_compressed(self.name, magic, mode, ext=ext):
--> 555 self._file = fileobj_open(self.name, IO_FITS_MODES[mode])
556 self.close_on_error = True
557
~/anaconda3/lib/python3.7/site-packages/astropy/io/fits/util.py in fileobj_open(filename, mode)
386 """
387
--> 388 return open(filename, mode, buffering=0)
389
390
FileNotFoundError: [Errno 2] No such file or directory: 'Macintosh HD/Users/lingxuan/Downloads/datasweep-index-gal.fits'
What should I do?
Remove Macintosh HD part from path
fits_datasweep_gal = fits.open('/Users/lingxuan/Downloads/datasweep-index-gal.fits')

Trying to download specific type of file from S3 using Boto3 - TypeError: expected string or bytes-like object

I'm trying to download only the most recent .csv files from my S3 bucket and am running into an error that says "TypeError: expected string or bytes-like object."
I currently have working code that identifies the last modified S3 objects, sorts these objects, and puts them into a list named latest_files.
session = boto3.Session()
s3_resource = boto3.resource('s3')
my_bucket = s3_resource.Bucket('chansbucket')
get_last_modified = lambda obj: int(obj.last_modified.strftime('%s'))
unsorted = []
# filters through the bucket and appends objects to the unsorted list
for file in my_bucket.objects.filter():
unsorted.append(file)
# sorts last five files in unsorted by last modified time
latest_files = [obj.key for obj in sorted(unsorted, key=get_last_modified, reverse=True)][0:5]
Now I want to loop through latest_files and download only those that end with .csv.
for file in latest_files:
if file.endswith('.csv'):
s3_resource.meta.client.download_file(my_bucket, file, '/Users/mikechan/projects/TT_product_analyses/raw_csv_files/' + file)
Here's where I get the error TypeError: expected string or bytes-like object
Here's the traceback:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-27-ca90c5ad9c53> in <module>()
1 for file in latest_files:
2 if file.endswith('.csv'):
----> 3 s3_resource.meta.client.download_file(my_bucket, str(file), '/Users/mikechan/projects/TT_product_analyses/raw_csv_files/' + str(file))
4
5
~/anaconda/lib/python3.6/site-packages/boto3/s3/inject.py in download_file(self, Bucket, Key, Filename, ExtraArgs, Callback, Config)
170 return transfer.download_file(
171 bucket=Bucket, key=Key, filename=Filename,
--> 172 extra_args=ExtraArgs, callback=Callback)
173
174
~/anaconda/lib/python3.6/site-packages/boto3/s3/transfer.py in download_file(self, bucket, key, filename, extra_args, callback)
305 bucket, key, filename, extra_args, subscribers)
306 try:
--> 307 future.result()
308 # This is for backwards compatibility where when retries are
309 # exceeded we need to throw the same error from boto3 instead of
~/anaconda/lib/python3.6/site-packages/s3transfer/futures.py in result(self)
71 # however if a KeyboardInterrupt is raised we want want to exit
72 # out of this and propogate the exception.
---> 73 return self._coordinator.result()
74 except KeyboardInterrupt as e:
75 self.cancel()
~/anaconda/lib/python3.6/site-packages/s3transfer/futures.py in result(self)
231 # final result.
232 if self._exception:
--> 233 raise self._exception
234 return self._result
235
~/anaconda/lib/python3.6/site-packages/s3transfer/tasks.py in _main(self, transfer_future, **kwargs)
253 # Call the submit method to start submitting tasks to execute the
254 # transfer.
--> 255 self._submit(transfer_future=transfer_future, **kwargs)
256 except BaseException as e:
257 # If there was an exception raised during the submission of task
~/anaconda/lib/python3.6/site-packages/s3transfer/download.py in _submit(self, client, config, osutil, request_executor, io_executor, transfer_future, bandwidth_limiter)
351 Bucket=transfer_future.meta.call_args.bucket,
352 Key=transfer_future.meta.call_args.key,
--> 353 **transfer_future.meta.call_args.extra_args
354 )
355 transfer_future.meta.provide_transfer_size(
~/.local/lib/python3.6/site-packages/botocore/client.py in _api_call(self, *args, **kwargs)
318 "%s() only accepts keyword arguments." % py_operation_name)
319 # The "self" in this scope is referring to the BaseClient.
--> 320 return self._make_api_call(operation_name, kwargs)
321
322 _api_call.__name__ = str(py_operation_name)
~/.local/lib/python3.6/site-packages/botocore/client.py in _make_api_call(self, operation_name, api_params)
594 }
595 request_dict = self._convert_to_request_dict(
--> 596 api_params, operation_model, context=request_context)
597
598 service_id = self._service_model.service_id.hyphenize()
~/.local/lib/python3.6/site-packages/botocore/client.py in _convert_to_request_dict(self, api_params, operation_model, context)
628 context=None):
629 api_params = self._emit_api_params(
--> 630 api_params, operation_model, context)
631 request_dict = self._serializer.serialize_to_request(
632 api_params, operation_model)
~/.local/lib/python3.6/site-packages/botocore/client.py in _emit_api_params(self, api_params, operation_model, context)
658 service_id=service_id,
659 operation_name=operation_name),
--> 660 params=api_params, model=operation_model, context=context)
661 return api_params
662
~/.local/lib/python3.6/site-packages/botocore/hooks.py in emit(self, event_name, **kwargs)
354 def emit(self, event_name, **kwargs):
355 aliased_event_name = self._alias_event_name(event_name)
--> 356 return self._emitter.emit(aliased_event_name, **kwargs)
357
358 def emit_until_response(self, event_name, **kwargs):
~/.local/lib/python3.6/site-packages/botocore/hooks.py in emit(self, event_name, **kwargs)
226 handlers.
227 """
--> 228 return self._emit(event_name, kwargs)
229
230 def emit_until_response(self, event_name, **kwargs):
~/.local/lib/python3.6/site-packages/botocore/hooks.py in _emit(self, event_name, kwargs, stop_on_response)
209 for handler in handlers_to_call:
210 logger.debug('Event %s: calling handler %s', event_name, handler)
--> 211 response = handler(**kwargs)
212 responses.append((handler, response))
213 if stop_on_response and response is not None:
~/.local/lib/python3.6/site-packages/botocore/handlers.py in validate_bucket_name(params, **kwargs)
216 return
217 bucket = params['Bucket']
--> 218 if VALID_BUCKET.search(bucket) is None:
219 error_msg = (
220 'Invalid bucket name "%s": Bucket name must match '
TypeError: expected string or bytes-like object
Can you help? I feel like it's something pretty simple, but I'm a total noob and have been pounding my head to my desk forever on this. Any help is appreciated.
Thanks!
The issue with this line:
s3_resource.meta.client.download_file(my_bucket, file, '/Users/mikechan/projects/TT_product_analyses/raw_csv_files/' + file)
is that
my_bucket = s3_resource.Bucket('chansbucket')
is returning a Bucket object while download_file() just wants a bucket name as a string, such as:
s3.meta.client.download_file('mybucket', 'hello.txt', '/tmp/hello.txt')
Also, I think that the latest_files =... line should not be indented.

Categories

Resources