Trying to import csv fiels and analyse for missing values, below is my code
import pandas as pd
import numpy as np
import smtplib
import glob
import os
import sys
path = "/home/tmp"
all_files = glob.glob(os.path.join(path,"*.csv"))
for fname in all_files:
print(fname)
df_header = pd.read_csv(fname,header=None,skiprows=1,encoding='cp1252')
print(df_header)
Error msg:
df_header = pd.read_csv(fname,header=None,skiprows=1,encoding='cp1252')
File "/usr/lib64/python2.7/site-packages/pandas/io/parsers.py", line 678, in parser_f
return _read(filepath_or_buffer, kwds)
File "/usr/lib64/python2.7/site-packages/pandas/io/parsers.py", line 440, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "/usr/lib64/python2.7/site-packages/pandas/io/parsers.py", line 787, in __init__
self._make_engine(self.engine)
File "/usr/lib64/python2.7/site-packages/pandas/io/parsers.py", line 1014, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "/usr/lib64/python2.7/site-packages/pandas/io/parsers.py", line 1708, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas/_libs/parsers.pyx", line 542, in pandas._libs.parsers.TextReader.__cinit__
pandas.errors.EmptyDataError: No columns to parse from file
Please suggest solution for fixing this.
Sample Data
Output of head -n5 path:
Related
I am having this very weird error with python pandas:
import pandas as pd
df = pd.read_csv('C:\Temp\test.csv', index_col=None, comment='#', sep=',')
The test.csv is a very simple CSV file created in Notepad:
aaa,bbb,date
hhhhh,wws,20220701
Now I get the error:
File "C:\test\untitled0.py", line 10, in <module>
df = pd.read_csv('C:\temp\test.csv', index_col=None, comment='#', sep=',')
File "C:\...\lib\site-packages\pandas\util\_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "C:\...\lib\site-packages\pandas\io\parsers\readers.py", line 586, in read_csv
return _read(filepath_or_buffer, kwds)
File "C:\...\lib\site-packages\pandas\io\parsers\readers.py", line 482, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "C:\...\lib\site-packages\pandas\io\parsers\readers.py", line 811, in __init__
self._engine = self._make_engine(self.engine)
File "C:\...\lib\site-packages\pandas\io\parsers\readers.py", line 1040, in _make_engine
return mapping[engine](self.f, **self.options) # type: ignore[call-arg]
File "C:\...\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py", line 51, in __init__
self._open_handles(src, kwds)
File "C:\...\lib\site-packages\pandas\io\parsers\base_parser.py", line 229, in _open_handles
errors=kwds.get("encoding_errors", "strict"),
File "C:\...\lib\site-packages\pandas\io\common.py", line 707, in get_handle
newline="",
OSError: [Errno 22] Invalid argument: 'C:\temp\test.csv'
I also tried to use Excel to export a CSV file, and get the same error.
Does anyone know what goes wrong?
In a python string, the backslash in '\t' is an escape character which causes those two characters ( \ followed by t) to mean tab. You can get around this using raw strings by prefacing the opening quote with the letter 'r':
r'C:\Temp\test.csv'
HELP, does someone understand why it is giving me this error?
import tensorflow
import keras
import pandas as pd
import numpy as np
import sklearn
from sklearn import linear_model
from sklearn.utils import shuffle
data = pd.read_csv("student-mat.csv", sep=";")
data = data[["G1","G2","G3","studytime","failures","absences"]]
print(data.head())
Traceback (most recent call last):
File "/Users/tanyapaul/PycharmProjects/pythonProject5/main.py", line 12, in <module>
data = pd.read_csv("student-mat.csv", sep=";")
File "/opt/anaconda3/envs/tensor/lib/python3.6/site-packages/pandas/io/parsers.py", line 688, in read_csv
return _read(filepath_or_buffer, kwds)
File "/opt/anaconda3/envs/tensor/lib/python3.6/site-packages/pandas/io/parsers.py", line 454, in _read
parser = TextFileReader(fp_or_buf, **kwds)
File "/opt/anaconda3/envs/tensor/lib/python3.6/site-packages/pandas/io/parsers.py", line 948, in __init__
self._make_engine(self.engine)
File "/opt/anaconda3/envs/tensor/lib/python3.6/site-packages/pandas/io/parsers.py", line 1180, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "/opt/anaconda3/envs/tensor/lib/python3.6/site-packages/pandas/io/parsers.py", line 2010, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas/_libs/parsers.pyx", line 382, in pandas._libs.parsers.TextReader.__cinit__
File "pandas/_libs/parsers.pyx", line 674, in pandas._libs.parsers.TextReader._setup_parser_source
FileNotFoundError: [Errno 2] No such file or directory: 'student-mat.csv'
Thank you for your help :)
My problem is that the file and folders aren't retrieved, also the directory isn't created.
fetching the data from the internet
import os
import tarfile
from six.moves import urllib
DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml/master"
HOUSING_PATH = os.path.join("datasets", "housing")
HOUSING_URL = DOWNLOAD_ROOT + "datasets/housing/housing.tgz"
def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
if not os.path.isdir(housing_path):
os.makedirs(housing_path)
tgz_path = os.path.join(housing_path, "housing.tgz")
urllib.request.urlretrieve(housing_url, tgz_path)
housing_tgz = tarfile.open(tgz_path)
housing_tgz.extractall(path=housing_path)
housing_tgz.close()
This is where i'll be loading the data into pandas
import pandas as pd
def load_housing_data(housing_path=HOUSING_PATH):
csv_path = os.path.join(housing_path, "housing.csv")
return pd.read_csv(csv_path)
Retrieving the first five rows
housing = load_housing_data()
housing.head()
This is the error I am getting, Please help me resolve it
Traceback (most recent call last):
File "/Users/kutloano/Documents/Prog/ml/getData.py", line 24, in <module>
housing = load_housing_data()
File "/Users/kutloano/Documents/Prog/ml/getData.py", line 22, in load_housing_data
return pd.read_csv(csv_path)
File "/Users/kutloano/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py",
line 702, in parser_f
return _read(filepath_or_buffer, kwds)
File "/Users/kutloano/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py", line 429, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "/Users/kutloano/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py", line 895, in __init__
self._make_engine(self.engine)
File "/Users/kutloano/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py", line 1122, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "/Users/kutloano/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py", line 1853, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas/_libs/parsers.pyx", line 387, in pandas._libs.parsers.TextReader.__cinit__
File "pandas/_libs/parsers.pyx", line 705, in pandas._libs.parsers.TextReader._setup_parser_source
FileNotFoundError: [Errno 2] File b'datasets/housing/housing.csv' does not exist: b'datasets/housing/housing.csv'
I have writed a script to extract table from csv file and write a new csv file that contains this table.
So I have this code below :
import csv
import pandas as pd
with open("C:\\OpenFace\\x64\\Release\\processed\\webcam_2019-04-22-1552.csv") as csvfile:
ddf= pd.read_table(csvfile,sep=" ")
first_letters = ['eye']
headers = ddf.dtypes.index
df= pd.read_table(csvfile,sep=" ",names=[name for name in headers if (name[0] in first_letters)])
print(df)
I'm trying to get only columns names who start from eye ,
but i get this error :
Traceback (most recent call last):
File "getpoints.py", line 8, in <module>
df= pd.read_table(csvfile,sep=" ",names=[name for name in headers if
(name[0] in first_letters)])
File "C:\Python36\lib\site-packages\pandas\io\parsers.py", line 678, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Python36\lib\site-packages\pandas\io\parsers.py", line 440, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "C:\Python36\lib\site-packages\pandas\io\parsers.py", line 787, in __init__
self._make_engine(self.engine)
File "C:\Python36\lib\site-packages\pandas\io\parsers.py", line 1014, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "C:\Python36\lib\site-packages\pandas\io\parsers.py", line 1708, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas\_libs\parsers.pyx", line 542, in pandas._libs.parsers.TextReader.__cinit__
pandas.errors.EmptyDataError: No columns to parse from file
How to solve this?
Any ideas?
Thanks.
import csv
import pandas as pd
#Read the only the header, i.e column names and breaks the execution
#as only the column names is to be fetched.
with open("C:/path/to/.csv", "rb") as f:
reader = csv.reader(f)
columns = reader.next()
break
columns = list(filter(lambda x: x.startswith("eye"), columns))
df = pd.read_csv("C:/path/to/.csv", sep=" ", names=columns)
I am trying to download HF data from netfonds website by directly using Dr. Yves Hilpisch's sample code, however, I ran into error message such as
ValueError: No columns to parse from file
— can anyone help with this? Thanks a lot.
Here is the sample code:
import numpy as np
import pandas as pd
import datetime as dt
from urllib import urlretrieve
url1='http://hopey.netfonds.no/posdump.php?'
url2='date=%s%s%s&paper=AAPL.O&csv_format=csv'
url=url1+url2
year='2014'
month='09'
days=['23','24']
AAPL=pd.DataFrame()
for day in days:
AAPL=AAPL.append(pd.read_csv(url % (year,month,day),
index_col=0, header=0, parse_dates=True))
AAPL.columns=['bid','bdepth','bdeptht','offer','odepth','odeptht']
AAPL.info()
The error message is like this :
Traceback (most recent call last):
File "<ipython-input-87-27cc48982059>", line 18, in <module>
index_col=0, header=0, parse_dates=True))
File "C:\Users\jinj\AppData\Local\Continuum\Miniconda\lib\site-packages\pandas\io\parsers.py", line 474, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\jinj\AppData\Local\Continuum\Miniconda\lib\site-packages\pandas\io\parsers.py", line 250, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "C:\Users\jinj\AppData\Local\Continuum\Miniconda\lib\site-packages\pandas\io\parsers.py", line 566, in __init__
self._make_engine(self.engine)
File "C:\Users\jinj\AppData\Local\Continuum\Miniconda\lib\site-packages\pandas\io\parsers.py", line 705, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "C:\Users\jinj\AppData\Local\Continuum\Miniconda\lib\site-packages\pandas\io\parsers.py", line 1072, in __init__
self._reader = _parser.TextReader(src, **kwds)
File "pandas\parser.pyx", line 512, in pandas.parser.TextReader.__cinit__ (pandas\parser.c:4814)
ValueError: No columns to parse from file