pandas read_csv error when loading a plain CSV file - python

I am having this very weird error with python pandas:
import pandas as pd
df = pd.read_csv('C:\Temp\test.csv', index_col=None, comment='#', sep=',')
The test.csv is a very simple CSV file created in Notepad:
aaa,bbb,date
hhhhh,wws,20220701
Now I get the error:
File "C:\test\untitled0.py", line 10, in <module>
df = pd.read_csv('C:\temp\test.csv', index_col=None, comment='#', sep=',')
File "C:\...\lib\site-packages\pandas\util\_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "C:\...\lib\site-packages\pandas\io\parsers\readers.py", line 586, in read_csv
return _read(filepath_or_buffer, kwds)
File "C:\...\lib\site-packages\pandas\io\parsers\readers.py", line 482, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "C:\...\lib\site-packages\pandas\io\parsers\readers.py", line 811, in __init__
self._engine = self._make_engine(self.engine)
File "C:\...\lib\site-packages\pandas\io\parsers\readers.py", line 1040, in _make_engine
return mapping[engine](self.f, **self.options) # type: ignore[call-arg]
File "C:\...\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py", line 51, in __init__
self._open_handles(src, kwds)
File "C:\...\lib\site-packages\pandas\io\parsers\base_parser.py", line 229, in _open_handles
errors=kwds.get("encoding_errors", "strict"),
File "C:\...\lib\site-packages\pandas\io\common.py", line 707, in get_handle
newline="",
OSError: [Errno 22] Invalid argument: 'C:\temp\test.csv'
I also tried to use Excel to export a CSV file, and get the same error.
Does anyone know what goes wrong?

In a python string, the backslash in '\t' is an escape character which causes those two characters ( \ followed by t) to mean tab. You can get around this using raw strings by prefacing the opening quote with the letter 'r':
r'C:\Temp\test.csv'

Related

pandas read_csv throwing ValueError: Invalid file path or buffer object type: <class 'list'>

I want to read a csv file sent as a command line argument. Thought I could directly use FileType object of argsprase but I'm getting errors.
from argparse import ArgumentParser, FileType
from pandas import read_csv
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("input_file_path", help="Input CSV file", type=FileType('r'), nargs=1)
df = read_csv(parser.parse_args().input_file_path, sep="|")
print(df.to_string())
Pandas read_csv is unable to read FileType object when I execute the program as given below - what is missing?
python csv_splitter.py test.csv
Traceback (most recent call last):
File "csv_splitter.py", line 7, in <module>
df = read_csv(parser.parse_args().input_file_path, sep="|")
File "C:\Users\kakkrah\AppData\Roaming\Python\Python38\site-packages\pandas\io\parsers.py", line 605, in read_csv
return _read(filepath_or_buffer, kwds)
File "C:\Users\kakkrah\AppData\Roaming\Python\Python38\site-packages\pandas\io\parsers.py", line 457, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "C:\Users\kakkrah\AppData\Roaming\Python\Python38\site-packages\pandas\io\parsers.py", line 814, in __init__
self._engine = self._make_engine(self.engine)
File "C:\Users\kakkrah\AppData\Roaming\Python\Python38\site-packages\pandas\io\parsers.py", line 1045, in _make_engine
return mapping[engine](self.f, **self.options) # type: ignore[call-arg]
File "C:\Users\kakkrah\AppData\Roaming\Python\Python38\site-packages\pandas\io\parsers.py", line 1862, in __init__
self._open_handles(src, kwds)
File "C:\Users\kakkrah\AppData\Roaming\Python\Python38\site-packages\pandas\io\parsers.py", line 1357, in _open_handles
self.handles = get_handle(
File "C:\Users\kakkrah\AppData\Roaming\Python\Python38\site-packages\pandas\io\common.py", line 558, in get_handle
ioargs = _get_filepath_or_buffer(
File "C:\Users\kakkrah\AppData\Roaming\Python\Python38\site-packages\pandas\io\common.py", line 371, in _get_filepath_or_buffer
raise ValueError(msg)
ValueError: Invalid file path or buffer object type: <class 'list'>
pd.read_csv cannot read a list of files, only one at a time.
To read multiple files into one dataframe, use pd.concat with a generator:
df = pd.concat(pd.read_csv(p) for p in paths)
Or pd.concat with map:
df = pd.concat(map(pd.read_csv, paths))
In OP's case, even though nargs=1 limits the arg parser to consuming 1 file, it still returns a list of that 1 file object:
print(parser.parse_args().input_file_path)
# [ <_io.TextIOWrapper> ]
So just index the single file:
df = pd.read_csv(parser.parse_args().input_file_path[0])
# ^^^

Lineterminator giving extra row using pandas

I think I have a problem using line terminator when reading data from a text file using pandas. It gives an unneeded row at the end of the column of NaN. My data has 12 rows plus a header but using the code provided it produces 13rows plus a header. How can I get it to output the correct data.
Data was written to text file using
with open(filepath, 'a', newline='') as filey:
csv_writer = csv.writer(filey, delimiter = '\t', lineterminator='\r\n')
Code
import pandas as pd
path_to_results = r"C:\\...\\Desktop\\Results\\_results.txt"
data = pd.read_csv(path_to_results,sep='\t',lineterminator='\r')
data = pd.DataFrame(data)
#print(data.head())
print(data["Vi_V"])
print(data["mass_g"])
using Lineterminator \r :
Name: Vi_V, dtype: object
0 0.24
1 0.47
...
11 3.66
12 NaN
using Lineterminator \r\n :
Traceback (most recent call last):
File "c:/Users.../g_00.py", line 5, in <module>
data = pd.read_csv(path_to_results,sep='\t',lineterminator='\r\n')
File "C:...\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\parsers.py", line 676, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:...\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\parsers.py", line 448, in _read
parser = TextFileReader(fp_or_buf, **kwds)
File "C:...\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\parsers.py", line 880, in __init__
self._make_engine(self.engine)
File "C:...\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\parsers.py", line 1114, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "C:...\AppData\Local\Programs\Python\Python37\lib\site-packages\pandas\io\parsers.py", line 1891, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas\_libs\parsers.pyx", line 395, in pandas._libs.parsers.TextReader.__cinit__
ValueError: Only length-1 line terminators supported
dee cue gave the answer in a comment:
"Have you tried leaving out the line terminator parameter? Try to see if python on Windows automatically reads new line as \r\n. – dee cue Oct 12 at 10:01"
It worked.

Issues reading with pandas.read_table

I am trying to read in and manipulate some text files I have as outputs with statistics from an MRI analysis I ran.
With a for loop I would like to index into each subjects folder and convert their txt file with summary statistics into a data_frame, drop some of the rows that are not necessary, and concatenate each subject's now cleaned data_frame with a master data_frame. I seem to be reading in the txt file and getting it into the data_frame. However I am running into two issues I can't troubleshoot when trying to drop rows.
The txt file is organized like so.....
# Title Pathway Statistics
#
# generating_program
/cell_root/software/freesurfer/6.0.0/sys/bin/dmri_pathstats
# cvs_version
Count 2000
Volume 98
Len_Min 67
Len_Max 92
Len_Avg 81.219
Len_Center 87
AD_Avg 0.00152315
AD_Avg_Weight 0.00151198
AD_Avg_Center 0.00141413
Although there is one row with many spaces that might be related to the issue in terms of reading the data in?
# cmdline
/cell_root/software/freesurfer/6.0.0/sys/bin/dmri_pathstats --intrc
/homes/dcallow/dti_freesurf/trac/Ex.AES115.long.base_AES115/dpath/fmajor_PP_avg33_mni_bbr --dtbase
/homes/dcallow/dti_freesurf/trac/Ex.AES115.long.base_AES115/dmri/dtifit --path fmajor --subj Ex.AES115.long.base_AES115 --out
/homes/dcallow/dti_freesurf/trac/Ex.AES115.long.base_AES115/dpath/fmajor_PP_avg33_mni_bbr/pathstats.overall.txt --outvox
/homes/dcallow/dti_freesurf/trac/Ex.AES115.long.base_AES115/dpath/fmajor_PP_avg33_mni_bbr/pathstats.byvoxel.txt
I tried removing the drop line however I then get a different error that occurs earlier in the code which is also odd?
Traceback (most recent call last):
File "./txt_2_excel.sh", line 18, in
df=pd.read_table('pathstats.overall.txt', delim_whitespace=True,names=['measure','value','excess1','excess2','excess3','excess4'])
File "/Users/amos/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py",
line 678, in parser_f
return _read(filepath_or_buffer, kwds)
File "/Users/amos/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py",
line 440, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "/Users/amos/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py",
line 787, in init
self._make_engine(self.engine)
File "/Users/amos/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py",
line 1014, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "/Users/amos/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py",
line 1708, in init
self._reader = parsers.TextReader(src, **kwds)
File "pandas/_libs/parsers.pyx", line 384, in pandas._libs.parsers.TextReader.cinit
File "pandas/_libs/parsers.pyx", line 695, in pandas._libs.parsers.TextReader._setup_parser_source
FileNotFoundError: File b'pathstats.overall.txt' does not exist
#!/Users/amos/anaconda3/bin/python
# Pythono3 code to rename multiple
# files in a directory or folder
# importing os module
import os
import pandas as pd
#set working directory to where files are stored
os.chdir("/Volumes/DANIEL/trac_stats")
df_master = pd.DataFrame()
for tract in os.listdir("/Volumes/DANIEL/tract_names/"):
for subj in os.listdir("/Volumes/DANIEL/trac/"):
os.chdir("/Volumes/DANIEL/trac/{0}/dpath/{1}/".format(subj,tract))
os.getcwd()
df=pd.read_table('pathstats.overall.txt', delim_whitespace=True,names=['measure','value','excess1','excess2','excess3','excess4'])
df=df.drop([0,1,2,3,4,5,6,7,8,9,10,11,14,15,16,17,18,20,22,23,25,26,28,29,31,32])
df['subj']=subj
df_master=pd.concat([df_master,df])
print(df_master)
os.chdir("/Volumes/DANIEL/trac_stats/")
df_master.to_excel('trac_stats')
This should produce an excel sheet with a ['measure','value','excess1','excess2','excess3','excess4'] column and rows 12,13,19,21,24,and 27 of data for each subject in a excel sheet named trac_stats.
I get the following error
File "./txt_2_excel.sh", line 20, in File
"/Users/amos/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py",
line 3697, in drop errors=errors) File
"/Users/amos/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py",
line 3111, in drop obj = obj._drop_axis(labels, axis, level=level,
errors=errors) File
"/Users/amos/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py",
line 3143, in _drop_axis new_axis = axis.drop(labels, errors=errors)
File
"/Users/amos/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py",
line 4404, in drop '{} not found in axis'.format(labels[mask]))
KeyError: '[32] not found in axis'

How to get write custom tables in new file from csv file

I have writed a script to extract table from csv file and write a new csv file that contains this table.
So I have this code below :
import csv
import pandas as pd
with open("C:\\OpenFace\\x64\\Release\\processed\\webcam_2019-04-22-1552.csv") as csvfile:
ddf= pd.read_table(csvfile,sep=" ")
first_letters = ['eye']
headers = ddf.dtypes.index
df= pd.read_table(csvfile,sep=" ",names=[name for name in headers if (name[0] in first_letters)])
print(df)
I'm trying to get only columns names who start from eye ,
but i get this error :
Traceback (most recent call last):
File "getpoints.py", line 8, in <module>
df= pd.read_table(csvfile,sep=" ",names=[name for name in headers if
(name[0] in first_letters)])
File "C:\Python36\lib\site-packages\pandas\io\parsers.py", line 678, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Python36\lib\site-packages\pandas\io\parsers.py", line 440, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "C:\Python36\lib\site-packages\pandas\io\parsers.py", line 787, in __init__
self._make_engine(self.engine)
File "C:\Python36\lib\site-packages\pandas\io\parsers.py", line 1014, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "C:\Python36\lib\site-packages\pandas\io\parsers.py", line 1708, in __init__
self._reader = parsers.TextReader(src, **kwds)
File "pandas\_libs\parsers.pyx", line 542, in pandas._libs.parsers.TextReader.__cinit__
pandas.errors.EmptyDataError: No columns to parse from file
How to solve this?
Any ideas?
Thanks.
import csv
import pandas as pd
#Read the only the header, i.e column names and breaks the execution
#as only the column names is to be fetched.
with open("C:/path/to/.csv", "rb") as f:
reader = csv.reader(f)
columns = reader.next()
break
columns = list(filter(lambda x: x.startswith("eye"), columns))
df = pd.read_csv("C:/path/to/.csv", sep=" ", names=columns)

pandas cannot converge

I get an error that a file does not exist while I have the file there in the folder, would you please tell me where I am making a mistake?
pd.DataFrame.from_csv
I am getting an error shown below.
Traceback (most recent call last):
File "main.py", line 194, in <module>
start_path+end_res)
File "/Users/admin/Desktop/script/mergeT.py", line 5, in merge
df_peak = pd.DataFrame.from_csv(peak_score, index_col = False, sep='\t')
File "/Library/Python/2.7/site-packages/pandas/core/frame.py", line 1231, in from_csv
infer_datetime_format=infer_datetime_format)
File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 645, in parser_f
return _read(filepath_or_buffer, kwds)
File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 388, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 729, in __init__
self._make_engine(self.engine)
File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 922, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 1389, in __init__
self._reader = _parser.TextReader(src, **kwds)
File "pandas/parser.pyx", line 373, in pandas.parser.TextReader.__cinit__ (pandas/parser.c:4175)
File "pandas/parser.pyx", line 667, in pandas.parse**strong text**r.TextReader._setup_parser_source (pandas/parser.c:8440)
IOError: File results\scoring\fed\score_peak.txt does not exist
I have tried to set a path to the exact file
for example
As per documentation of pandas 0.19.1 pandas.DataFrame.from_csv does not support index_col = False. Try to use pandas.read_csv instead (with the same parameters). Also make sure you are using the up to date version of pandas.
See if this works:
import pandas as pd
def merge(peak_score, profile_score, res_file):
df_peak = pd.read_csv(peak_score, index_col = False, sep='\t')
df_profile = pd.read_csv(profile_score, index_col = False, sep='\t')
result = pd.concat([df_peak, df_profile], axis=1)
print result.head()
test = []
for a,b in zip(result['prot_a_p'],result['prot_b_p']):
if a == b:
test.append(1)
else:
test.append(0)
result['test']=test
result = result[result['test']==0]
del result['test']
result = result.fillna(0)
result.to_csv(res_file)
if __name__ == '__main__':
pass
Regarding the path issue when changing from Windows to OS X:
In all flavours of Unix, paths are written with slashes /, while in Windows backslashes \ are used. Since OS X is a descendant of Unix, as other users have correctly pointed out, when you change there from Windows you need to adapt your paths.

Categories

Resources