ImageDataBunch not loading in my files/labels

ImageDataBunch not loading in my files/labels - python

I have a dataset of images stored on my local disk that I name as such:
path = '../input/resized/Resized Dataset.zip/Resized Data'
And I am calling an instance of an ImageDataBunch from the fastai.vision library using the following code:
data = ImageDataBunch.from_folder(path, train=".", ds_tfms=get_transforms(),size=224, valid_pct=0.2)
but for some reason I am getting an error
File "../src/script.py", line 17, in <module>
data = ImageDataBunch.from_folder(path, train=".", ds_tfms=get_transforms(),size=20, valid_pct=0.2)
File "/opt/conda/lib/python3.6/site-packages/fastai/vision/data.py", line 110, in from_folder
src = src.label_from_folder(classes=classes)
File "/opt/conda/lib/python3.6/site-packages/fastai/data_block.py", line 463, in _inner
self.train = ft(*args, from_item_lists=True, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/fastai/data_block.py", line 292, in label_from_folder
label_cls=label_cls, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/fastai/data_block.py", line 287, in label_from_func
return self._label_from_list([func(o) for o in self.items], label_cls=label_cls, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/fastai/data_block.py", line 262, in _label_from_list
label_cls = self.get_label_cls(labels, label_cls=label_cls, **kwargs)
File "/opt/conda/lib/python3.6/site-packages/fastai/data_block.py", line 251, in get_label_cls
it = index_row(labels,0)
File "/opt/conda/lib/python3.6/site-packages/fastai/core.py", line 250, in index_row
3.5s
3
return a[idxs]
IndexError: index 0 is out of bounds for axis 0 with size 0
I have used multiple different datasets and have the same problem

Related

ValueError: read of closed file while trying to convert .avro to .csv using Dask

import dask.bag as db
class Converter():
def __init__(self,input,output):
"""Converter constructor"""
self.input = input
self.output = output
#staticmethod
def large_file_reader(file_path: str):
"""
File reader
"""
temp_data = db.read_avro(file_path)
data = temp_data.to_dataframe()
# just to check able to by read properly
print(data.head(6))
return data
#staticmethod
def large_file_writer(data, file_path: str) -> bool:
"""
File writer
"""
data.compute().to_csv(file_path, index=False)
def large_file_processor(self):
"Read then write"
input_file_path =self.input
output_file_path =self.output
data = Converter.large_file_reader(input_file_path)
Converter.large_file_writer(data=data, file_path=output_file_path)
if __name__ == "__main__":
c = Converter("/Users/csv_to_avro_new.avro", "/Users/test_avro_new.csv")
c.large_file_processor()
Traceback (most recent call last):
File "/Users/PycharmProjects/ms--py/new.py", line 41, in <module>
c.large_file_processor()
File "/Users/PycharmProjects/ms--py/new.py", line 36, in large_file_processor
Converter.large_file_writer(data=data, file_path=output_file_path)
File "/Users/PycharmProjects/ms--py/new.py", line 28, in large_file_writer
data.compute().to_csv(file_path, index=False)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/base.py", line 315, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/base.py", line 600, in compute
results = schedule(dsk, keys, **kwargs)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/threaded.py", line 89, in get
results = get_async(
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/local.py", line 511, in get_async
raise_exception(exc, tb)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/local.py", line 319, in reraise
raise exc
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/local.py", line 224, in execute_task
result = _execute_task(task, data)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/core.py", line 119, in <genexpr>
return func(*(_execute_task(a, cache) for a in args))
File "/Users/adavsandeep/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/dask/bag/avro.py", line 150, in read_chunk
chunk = read_block(f, off, l, head["sync"])
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/fsspec/utils.py", line 244, in read_block
found_start_delim = seek_delimiter(f, delimiter, 2**16)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/fsspec/utils.py", line 187, in seek_delimiter
current = file.read(blocksize)
File "/Users/PycharmProjects/data-ingest/lib/python3.10/site-packages/fsspec/implementations/local.py", line 337, in read
return self.f.read(*args, **kwargs)
ValueError: read of closed file
Process finished with exit code 1
`Added the error traceback.check it once.Thanks.
Error
ValueError: read of closed file
Tried convert to some other formats like csv to json in the same way it was working
But not able to convert avro to csv, avro to json and avro to parquet
My file size is more than 2GB .Thats the reason I am using dask.
Thanks in advance .
`

Open PIL image from zip (Kaggle competition)

I am trying to read an image from kaggle competition (It is an old competition, but I would like to practice):
https://www.kaggle.com/competitions/dogs-vs-cats-redux-kernels-edition
I am trying to read images from the training file zip using this code:
def get_files_names(zip_file_path):
with ZipFile(zip_file_path) as myzip:
return myzip.namelist()
def get_image(zip_path, image_name):
with ZipFile(zip_path) as myzip:
# print(myzip.namelist()[:10])
with myzip.open(image_name) as myfile:
# img = Image.open(myfile)
img = Image.open(myfile)
return img
names = get_files_names(train_file_path)
img = get_image(train_file_path, names[1])
img.show()
I am getting this error:
Traceback (most recent call last):
File "/cats_vs_dogs/unrelated_file.py", line 46, in <module>
img.show()
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/Image.py", line 2205, in show
_show(self, title=title, command=command)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/Image.py", line 3167, in _show
_showxv(image, **options)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/Image.py", line 3181, in _showxv
ImageShow.show(image, title, **options)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/ImageShow.py", line 56, in show
if viewer.show(image, title=title, **options):
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/ImageShow.py", line 81, in show
return self.show_image(image, **options)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/ImageShow.py", line 107, in show_image
return self.show_file(self.save_image(image), **options)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/ImageShow.py", line 103, in save_image
return image._dump(format=self.get_format(image), **self.options)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/Image.py", line 636, in _dump
self.load()
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/ImageFile.py", line 247, in load
s = read(self.decodermaxblock)
File "/Users/user/.pyenv/versions/3.7.8-thesis/lib/python3.7/site-packages/PIL/JpegImagePlugin.py", line 400, in load_read
s = self.fp.read(read_bytes)
File "/Users/user/.pyenv/versions/3.7.8/lib/python3.7/zipfile.py", line 930, in read
data = self._read1(n)
File "/Users/user/.pyenv/versions/3.7.8/lib/python3.7/zipfile.py", line 998, in _read1
data += self._read2(n - len(data))
File "/Users/user/.pyenv/versions/3.7.8/lib/python3.7/zipfile.py", line 1030, in _read2
data = self._fileobj.read(n)
File "/Users/user/.pyenv/versions/3.7.8/lib/python3.7/zipfile.py", line 753, in read
self._file.seek(self._pos)
AttributeError: 'NoneType' object has no attribute 'seek'
If I extract the file into finder (using mac), then I see this image:
Also, if I try to convert the RGB image into into a numpy array np.array(img), I get this result:
What am I doing wrong?

Issue TypeError: argument must be a string or number

There is only one categorical column and I want to encode it, it is working fine on notebook but when it is being uploaded to aicrowd platform it is creating this trouble.
There are totally 3 categorical features where one is the target feature, one is the row of ids and after excluding them for the training I am left with one feature.
df[['intersection_pos_rel_centre']]
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
df[['intersection_pos_rel_centre']]=le.fit_transform(df[['intersection_pos_rel_centre']])
df[['intersection_pos_rel_centre']]
My error is
Selecting runtime language: python
[NbConvertApp] Converting notebook predict.ipynb to notebook
[NbConvertApp] Executing notebook with kernel: python
Traceback (most recent call last):
File "/opt/conda/bin/jupyter-nbconvert", line 11, in <module>
sys.exit(main())
File "/opt/conda/lib/python3.8/site-packages/jupyter_core/application.py", line 254, in launch_instance
return super(JupyterApp, cls).launch_instance(argv=argv, **kwargs)
File "/opt/conda/lib/python3.8/site-packages/traitlets/config/application.py", line 845, in launch_instance
app.start()
File "/opt/conda/lib/python3.8/site-packages/nbconvert/nbconvertapp.py", line 350, in start
self.convert_notebooks()
File "/opt/conda/lib/python3.8/site-packages/nbconvert/nbconvertapp.py", line 524, in convert_notebooks
self.convert_single_notebook(notebook_filename)
File "/opt/conda/lib/python3.8/site-packages/nbconvert/nbconvertapp.py", line 489, in convert_single_notebook
output, resources = self.export_single_notebook(notebook_filename, resources, input_buffer=input_buffer)
File "/opt/conda/lib/python3.8/site-packages/nbconvert/nbconvertapp.py", line 418, in export_single_notebook
output, resources = self.exporter.from_filename(notebook_filename, resources=resources)
File "/opt/conda/lib/python3.8/site-packages/nbconvert/exporters/exporter.py", line 181, in from_filename
return self.from_file(f, resources=resources, **kw)
File "/opt/conda/lib/python3.8/site-packages/nbconvert/exporters/exporter.py", line 199, in from_file
return self.from_notebook_node(nbformat.read(file_stream, as_version=4), resources=resources, **kw)
File "/opt/conda/lib/python3.8/site-packages/nbconvert/exporters/notebook.py", line 32, in from_notebook_node
nb_copy, resources = super().from_notebook_node(nb, resources, **kw)
File "/opt/conda/lib/python3.8/site-packages/nbconvert/exporters/exporter.py", line 143, in from_notebook_node
nb_copy, resources = self._preprocess(nb_copy, resources)
File "/opt/conda/lib/python3.8/site-packages/nbconvert/exporters/exporter.py", line 318, in _preprocess
nbc, resc = preprocessor(nbc, resc)
File "/opt/conda/lib/python3.8/site-packages/nbconvert/preprocessors/base.py", line 47, in __call__
return self.preprocess(nb, resources)
File "/opt/conda/lib/python3.8/site-packages/nbconvert/preprocessors/execute.py", line 79, in preprocess
self.execute()
File "/opt/conda/lib/python3.8/site-packages/nbclient/util.py", line 74, in wrapped
return just_run(coro(*args, **kwargs))
File "/opt/conda/lib/python3.8/site-packages/nbclient/util.py", line 53, in just_run
return loop.run_until_complete(coro)
File "/opt/conda/lib/python3.8/asyncio/base_events.py", line 616, in run_until_complete
return future.result()
File "/opt/conda/lib/python3.8/site-packages/nbclient/client.py", line 553, in async_execute
await self.async_execute_cell(
File "/opt/conda/lib/python3.8/site-packages/nbconvert/preprocessors/execute.py", line 123, in async_execute_cell
cell, resources = self.preprocess_cell(cell, self.resources, cell_index)
File "/opt/conda/lib/python3.8/site-packages/nbconvert/preprocessors/execute.py", line 146, in preprocess_cell
cell = run_sync(NotebookClient.async_execute_cell)(self, cell, index, store_history=self.store_history)
File "/opt/conda/lib/python3.8/site-packages/nbclient/util.py", line 74, in wrapped
return just_run(coro(*args, **kwargs))
File "/opt/conda/lib/python3.8/site-packages/nbclient/util.py", line 53, in just_run
return loop.run_until_complete(coro)
File "/opt/conda/lib/python3.8/site-packages/nest_asyncio.py", line 98, in run_until_complete
return f.result()
File "/opt/conda/lib/python3.8/asyncio/futures.py", line 178, in result
raise self._exception
File "/opt/conda/lib/python3.8/asyncio/tasks.py", line 280, in __step
result = coro.send(None)
File "/opt/conda/lib/python3.8/site-packages/nbclient/client.py", line 852, in async_execute_cell
self._check_raise_for_error(cell, exec_reply)
File "/opt/conda/lib/python3.8/site-packages/nbclient/client.py", line 760, in _check_raise_for_error
raise CellExecutionError.from_cell_and_msg(cell, exec_reply_content)
nbclient.exceptions.CellExecutionError: An error occurred while executing the following cell:
------------------
df[['intersection_pos_rel_centre']]
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
df[['intersection_pos_rel_centre']]=le.fit_transform(df[['intersection_pos_rel_centre']])
df[['intersection_pos_rel_centre']]
------------------
TypeError: argument must be a string or number

Problem with adding Excel files at Pandas | wrapper return func

Hi everybody I have a problem uploading a excel file with Pandas
I have taken the file in archive, if I uploaded it directly it gaves me an error. If I cope and paste the excel file there is no problem.
The code is very easy:
data = pd.read_excel(r"C:\Users\obett\Desktop\Corporate Governance\pandas.xlsx")
and this is the error:
Traceback (most recent call last):
File "C:/Users/obett/PycharmProjects/pythonProject6/main.py", line 24, in <module>
data = pd.read_excel(r"C:\Users\obett\Desktop\Corporate Governance\Aida_Export_67.xlsx")
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\util\_decorators.py", line 299, in wrapper
return func(*args, **kwargs)
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\io\excel\_base.py", line 344, in read_excel
data = io.parse(
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\io\excel\_base.py", line 1170, in parse
return self._reader.parse(
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\io\excel\_base.py", line 492, in parse
data = self.get_sheet_data(sheet, convert_float)
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\io\excel\_openpyxl.py", line 549, in get_sheet_data
converted_row = [self._convert_cell(cell, convert_float) for cell in row]
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\io\excel\_openpyxl.py", line 549, in <listcomp>
converted_row = [self._convert_cell(cell, convert_float) for cell in row]
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\io\excel\_openpyxl.py", line 514, in _convert_cell
elif cell.is_date:
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\openpyxl\cell\read_only.py", line 101, in is_date
return Cell.is_date.__get__(self)
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\openpyxl\cell\cell.py", line 256, in is_date
self.data_type == 'n' and is_date_format(self.number_format)
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\openpyxl\cell\read_only.py", line 66, in number_format
_id = self.style_array.numFmtId
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\openpyxl\cell\read_only.py", line 56, in style_array
return self.parent.parent._cell_styles[self._style_id]
IndexError: list index out of range
Thank you very much

Tensorflow Dataset map: input seems to be a placeholder which causes an error in tf.read_file

I'm trying to read in data using the Tensorflow Dataset API. I have loaded filenames and label filenames into arrays which I load into a dataset. I then try to map these filenames to the actual image files, but get an error that seems to state that the input to the mapping function recieves placeholders rather than actual tensors.
class DatasetReader:
def __init__(self, records_list, batch_size=1):
self.batch_size = batch_size
self.records = {}
self.records["image"] = tf.convert_to_tensor([record['image'] for record in records_list])
self.records["filename"] = tf.convert_to_tensor([record['filename'] for record in records_list])
self.records["annotation"] = tf.convert_to_tensor([record['annotation'] for record in records_list])
self.dataset = Dataset.from_tensor_slices(self.records)
self.dataset = self.dataset.map(self._input_parser)
self.dataset = self.dataset.batch(batch_size)
self.dataset = self.dataset.repeat()
def _input_parser(self, record):
filename = record['filename']
image_name = record['image']
annotation_file = record['annotation']
image = tf.image.decode_image(tf.read_file(filename))
annotation = tf.image.decode_image(tf.read_file(annotation_file))
return self._augment_image(image, annotation)
The error I'm getting is in the line image = tf.image.decode_image(tf.read_file(filename)). The stack trace is below.
File "FCN.py", line 269, in <module>
tf.app.run()
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "FCN.py", line 179, in main
train_records, valid_records, image_options_train, image_options_val, FLAGS.batch_size, FLAGS.batch_size)
File "/home/ubuntu/FCN.tensorflow/TFReader.py", line 89, in from_records
train_reader = DatasetReader(train_records, train_image_options, train_batch_size)
File "/home/ubuntu/FCN.tensorflow/TFReader.py", line 34, in __init__
self.dataset = self.dataset.map(self._input_parser)
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/data/python/ops/dataset_ops.py", line 964, in map
return MapDataset(self, map_func, num_threads, output_buffer_size)
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/data/python/ops/dataset_ops.py", line 1735, in __init__
self._map_func.add_to_graph(ops.get_default_graph())
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/function.py", line 449, in add_to_graph
self._create_definition_if_needed()
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/data/python/framework/function.py", line 168, in _create_definition_if_needed
outputs = self._func(*inputs)
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/data/python/ops/dataset_ops.py", line 1723, in tf_map_func
ret = map_func(nested_args)
File "/home/ubuntu/FCN.tensorflow/TFReader.py", line 42, in _input_parser
image = tf.image.decode_image(tf.read_file(filename))
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_io_ops.py", line 223, in read_file
result = _op_def_lib.apply_op("ReadFile", filename=filename, name=name)
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
op_def=op_def)
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/data/python/framework/function.py", line 80, in create_op
data_types, **kwargs)
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/function.py", line 665, in create_op
**kwargs)
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2632, in create_op
set_shapes_for_outputs(ret)
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1911, in set_shapes_for_outputs
shapes = shape_func(op)
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1861, in call_with_requiring
return call_cpp_shape_fn(op, require_shape_fn=True)
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 595, in call_cpp_shape_fn
require_shape_fn)
File "/home/ubuntu/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 659, in _call_cpp_shape_fn_impl
raise ValueError(err.message)
ValueError: Shape must be rank 0 but is rank 1 for 'ReadFile' (op: 'ReadFile') with input shapes: [?].

You cannot pass in a rank-1 tensor to tf.read_file. Here are some examples:
import tensorflow as tf
# Correct: input can be a string.
tf.image.decode_image(tf.read_file("filename"))
# Correct: input can be a rank-0 tensor.
tf.image.decode_image(tf.read_file(tf.convert_to_tensor("filename")))
# Wrong: input cannot be a list.
tf.image.decode_image(tf.read_file(["filename"]))
# Wrong: input cannot be a rank-1 tensor
tf.image.decode_image(tf.read_file(tf.convert_to_tensor(["filename"])))
In your code, it seems like self.records["filename"] is a rank-1 tensor; you might mistakenly passed it as a parameter to tf.read_file in _input_parser

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

ImageDataBunch not loading in my files/labels - python

Related

ValueError: read of closed file while trying to convert .avro to .csv using Dask

Open PIL image from zip (Kaggle competition)

Issue TypeError: argument must be a string or number

Problem with adding Excel files at Pandas | wrapper return func

Tensorflow Dataset map: input seems to be a placeholder which causes an error in tf.read_file

Categories

Resources