Python Pandas print error in Eclipse's PyDev: unknown encoding: MS874 - python

I am trying to use Pandas library to read csv files, using Eclipse's PyDev.
foo.csv file:
"head1", "head2",
"A", "123"
test.py:
import pandas as pd
data = pd.read_csv('foo.csv');
print data
I ran this and got an error:
Traceback (most recent call last):
File "C:\Users\qqq\studyspace\macd\test3.py", line 4, in <module>
print data
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 666, in __str__
return self.__bytes__()
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 676, in __bytes__
return self.__unicode__().encode(encoding, 'replace')
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 691, in __unicode__
fits_horizontal = self._repr_fits_horizontal_()
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 651, in _repr_fits_horizontal_
d.to_string(buf=buf)
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 1488, in to_string
formatter.to_string()
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 314, in to_string
strcols = self._to_str_columns()
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 258, in _to_str_columns
str_index = self._get_formatted_index()
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 472, in _get_formatted_index
fmt_index = [index.format(name=show_index_names, formatter=fmt)]
File "C:\Python27\lib\site-packages\pandas\core\index.py", line 450, in format
return self._format_with_header(header, **kwargs)
File "C:\Python27\lib\site-packages\pandas\core\index.py", line 472, in _format_with_header
result = _trim_front(format_array(values, None, justify='left'))
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 1321, in format_array
return fmt_obj.get_result()
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 1448, in get_result
return _make_fixed_width(fmt_values, self.justify)
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 1495, in _make_fixed_width
max_len = np.max([_strlen(x) for x in strings])
File "C:\Python27\lib\site-packages\pandas\core\format.py", line 184, in _strlen
return len(x.decode(encoding))
LookupError: unknown encoding: MS874
I have tried to run this in IPython, and it does not give the error, so I think the problem is with my Eclipse setting. I use Eclipse Juno and I installed Pandas via Python(x,y).
I have tried to solve it blindly like this
import pandas as pd
data = pd.read_csv('foo.csv');
b = True;
while(b):
try:
print data
b = False
except:
print 'foooo'
And it just printed 'foooo' forever.

I have found the solution.
Right click on the project => Properties => Resource => Text file encoding. Choose other => UTF-8.

Related

Error shows up when using df.to_parquet("filename")

I want to save the data set as a parquet file, called power.parquet, and I use df.to_parquet(<filename>). But it gives me this errer "ValueError: Error converting column "Global_reactive_power" to bytes using encoding UTF8. Original error: bad argument type for built-in operation" And I installed the fastparquet package.
from fastparquet import write, ParquetFile
dat.to_parquet("power.parquet")
df_parquet = ParquetFile("power.parquet").to_pandas()
df_parquet.head() # Test your final value
`*Traceback (most recent call last):
File "/opt/anaconda3/lib/python3.9/site-packages/fastparquet/writer.py", line 259, in convert
out = array_encode_utf8(data)
File "fastparquet/speedups.pyx", line 50, in fastparquet.speedups.array_encode_utf8
TypeError: bad argument type for built-in operation
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/var/folders/4f/bm2th1p56tz4rq_zffc8g3940000gn/T/ipykernel_85477/3080656655.py", line 1, in <module>
dat.to_parquet("power.parquet", compression="GZIP")
File "/opt/anaconda3/lib/python3.9/site-packages/dask/dataframe/core.py", line 4560, in to_parquet
return to_parquet(self, path, *args, **kwargs)
File "/opt/anaconda3/lib/python3.9/site-packages/dask/dataframe/io/parquet/core.py", line 732, in to_parquet
return compute_as_if_collection(
File "/opt/anaconda3/lib/python3.9/site-packages/dask/base.py", line 315, in compute_as_if_collection
return schedule(dsk2, keys, **kwargs)
File "/opt/anaconda3/lib/python3.9/site-packages/dask/threaded.py", line 79, in get
results = get_async(
File "/opt/anaconda3/lib/python3.9/site-packages/dask/local.py", line 507, in get_async
raise_exception(exc, tb)
File "/opt/anaconda3/lib/python3.9/site-packages/dask/local.py", line 315, in reraise
raise exc
File "/opt/anaconda3/lib/python3.9/site-packages/dask/local.py", line 220, in execute_task
result = _execute_task(task, data)
File "/opt/anaconda3/lib/python3.9/site-packages/dask/core.py", line 119, in _execute_task
return func(*(_execute_task(a, cache) for a in args))
File "/opt/anaconda3/lib/python3.9/site-packages/dask/utils.py", line 35, in apply
return func(*args, **kwargs)
File "/opt/anaconda3/lib/python3.9/site-packages/dask/dataframe/io/parquet/fastparquet.py", line 1167, in write_partition
rg = make_part_file(
File "/opt/anaconda3/lib/python3.9/site-packages/fastparquet/writer.py", line 716, in make_part_file
rg = make_row_group(f, data, schema, compression=compression,
File "/opt/anaconda3/lib/python3.9/site-packages/fastparquet/writer.py", line 701, in make_row_group
chunk = write_column(f, coldata, column,
File "/opt/anaconda3/lib/python3.9/site-packages/fastparquet/writer.py", line 554, in write_column
repetition_data, definition_data, encode[encoding](data, selement), 8 * b'\x00'
File "/opt/anaconda3/lib/python3.9/site-packages/fastparquet/writer.py", line 354, in encode_plain
out = convert(data, se)
File "/opt/anaconda3/lib/python3.9/site-packages/fastparquet/writer.py", line 284, in convert
raise ValueError('Error converting column "%s" to bytes using '
ValueError: Error converting column "Global_reactive_power" to bytes using encoding UTF8. Original error: bad argument type for built-in operation
*
I tried by adding object_coding = "bytes".I want to solve this problem.

Problem with adding Excel files at Pandas | wrapper return func

Hi everybody I have a problem uploading a excel file with Pandas
I have taken the file in archive, if I uploaded it directly it gaves me an error. If I cope and paste the excel file there is no problem.
The code is very easy:
data = pd.read_excel(r"C:\Users\obett\Desktop\Corporate Governance\pandas.xlsx")
and this is the error:
Traceback (most recent call last):
File "C:/Users/obett/PycharmProjects/pythonProject6/main.py", line 24, in <module>
data = pd.read_excel(r"C:\Users\obett\Desktop\Corporate Governance\Aida_Export_67.xlsx")
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\util\_decorators.py", line 299, in wrapper
return func(*args, **kwargs)
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\io\excel\_base.py", line 344, in read_excel
data = io.parse(
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\io\excel\_base.py", line 1170, in parse
return self._reader.parse(
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\io\excel\_base.py", line 492, in parse
data = self.get_sheet_data(sheet, convert_float)
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\io\excel\_openpyxl.py", line 549, in get_sheet_data
converted_row = [self._convert_cell(cell, convert_float) for cell in row]
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\io\excel\_openpyxl.py", line 549, in <listcomp>
converted_row = [self._convert_cell(cell, convert_float) for cell in row]
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\pandas\io\excel\_openpyxl.py", line 514, in _convert_cell
elif cell.is_date:
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\openpyxl\cell\read_only.py", line 101, in is_date
return Cell.is_date.__get__(self)
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\openpyxl\cell\cell.py", line 256, in is_date
self.data_type == 'n' and is_date_format(self.number_format)
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\openpyxl\cell\read_only.py", line 66, in number_format
_id = self.style_array.numFmtId
File "C:\Users\obett\PycharmProjects\pythonProject6\venv\lib\site-packages\openpyxl\cell\read_only.py", line 56, in style_array
return self.parent.parent._cell_styles[self._style_id]
IndexError: list index out of range
Thank you very much

Astropy get_gcrs_posvel in Python 2.7

In Python 2.7 I am trying to calculate the position and velocity of an observatory by doing
>>> from astropy.time import Time
>>> from astropy.coordinates import SkyCoord, EarthLocation, ICRS
>>> from astropy import units as u
>>> from astropy import coordinates
>>> time=Time(58121.93, format='mjd')
>>> location=EarthLocation(53.2367, 2.3085, 100, ellipsoid = None)
>>> op, ov = location.get_gcrs_posvel(time)
But I get the error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\p\lib\site-packages\astropy\coordinates\earth.py", line 653, in get_gcrs_posvel
gcrs_data = self._get_gcrs(obstime).data
File "C:\p\lib\site-packages\astropy\coordinates\earth.py", line 633, in _get_gcrs
return itrs.transform_to(GCRS(obstime=obstime))
File "C:\p\lib\site-packages\astropy\coordinates\baseframe.py", line 934, in transform_to
return trans(self, new_frame)
File "C:\p\lib\site-packages\astropy\coordinates\transformations.py", line 1314, in __call__
curr_coord = t(curr_coord, curr_toframe)
File "C:\p\lib\site-packages\astropy\coordinates\transformations.py", line 847, in __call__
reprwithoutdiff = supcall(from_diffless, toframe)
File "C:\p\lib\site-packages\astropy\coordinates\builtin_frames\intermediate_rotation_transforms.py", line 72, in cirs_to_gcrs
return gcrs.transform_to(gcrs_frame)
File "C:\p\lib\site-packages\astropy\coordinates\baseframe.py", line 934, in transform_to
return trans(self, new_frame)
File "C:\p\lib\site-packages\astropy\coordinates\transformations.py", line 1314, in __call__
curr_coord = t(curr_coord, curr_toframe)
File "C:\p\lib\site-packages\astropy\coordinates\transformations.py", line 914, in __call__
return supcall(fromcoord, toframe)
File "C:\p\lib\site-packages\astropy\coordinates\builtin_frames\icrs_cirs_transforms.py", line 221, in gcrs_to_gcrs
return from_coo.transform_to(ICRS).transform_to(to_frame)
File "C:\p\lib\site-packages\astropy\coordinates\baseframe.py", line 934, in transform_to
return trans(self, new_frame)
File "C:\p\lib\site-packages\astropy\coordinates\transformations.py", line 1314, in __call__
curr_coord = t(curr_coord, curr_toframe)
File "C:\p\lib\site-packages\astropy\coordinates\transformations.py", line 914, in __call__
return supcall(fromcoord, toframe)
File "C:\p\lib\site-packages\astropy\coordinates\builtin_frames\icrs_cirs_transforms.py", line 188, in gcrs_to_icrs
i_ra, i_dec = aticq(gcrs_ra, gcrs_dec, astrom)
File "C:\p\lib\site-packages\astropy\coordinates\builtin_frames\utils.py", line 196, in aticq
before = norm(ppr-d)
File "C:\p\lib\site-packages\astropy\coordinates\builtin_frames\utils.py", line 125, in norm
return p/np.sqrt(np.einsum('...i,...i', p, p))[..., np.newaxis]
File "C:\p\lib\site-packages\numpy\core\einsumfunc.py", line 1087, in einsum
einsum_call=True)
File "C:\p\lib\site-packages\numpy\core\einsumfunc.py", line 688, in einsum_path
input_subscripts, output_subscript, operands = _parse_einsum_input(operands)
File "C:\p\lib\site-packages\numpy\core\einsumfunc.py", line 432, in _parse_einsum_input
raise TypeError("For this input type lists must contain "
TypeError: For this input type lists must contain either int or Ellipsis
I searched for this issue on Astropy's issue tracker and came across this:
numpy TypeError on coordinate transform (TypeError: For this input type lists must contain either int or Ellipsis)
It was fixed as of Astropy v2.0.4, so, pretty recent. You should be able to upgrade your Astropy.

pandas cannot converge

I get an error that a file does not exist while I have the file there in the folder, would you please tell me where I am making a mistake?
pd.DataFrame.from_csv
I am getting an error shown below.
Traceback (most recent call last):
File "main.py", line 194, in <module>
start_path+end_res)
File "/Users/admin/Desktop/script/mergeT.py", line 5, in merge
df_peak = pd.DataFrame.from_csv(peak_score, index_col = False, sep='\t')
File "/Library/Python/2.7/site-packages/pandas/core/frame.py", line 1231, in from_csv
infer_datetime_format=infer_datetime_format)
File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 645, in parser_f
return _read(filepath_or_buffer, kwds)
File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 388, in _read
parser = TextFileReader(filepath_or_buffer, **kwds)
File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 729, in __init__
self._make_engine(self.engine)
File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 922, in _make_engine
self._engine = CParserWrapper(self.f, **self.options)
File "/Library/Python/2.7/site-packages/pandas/io/parsers.py", line 1389, in __init__
self._reader = _parser.TextReader(src, **kwds)
File "pandas/parser.pyx", line 373, in pandas.parser.TextReader.__cinit__ (pandas/parser.c:4175)
File "pandas/parser.pyx", line 667, in pandas.parse**strong text**r.TextReader._setup_parser_source (pandas/parser.c:8440)
IOError: File results\scoring\fed\score_peak.txt does not exist
I have tried to set a path to the exact file
for example
As per documentation of pandas 0.19.1 pandas.DataFrame.from_csv does not support index_col = False. Try to use pandas.read_csv instead (with the same parameters). Also make sure you are using the up to date version of pandas.
See if this works:
import pandas as pd
def merge(peak_score, profile_score, res_file):
df_peak = pd.read_csv(peak_score, index_col = False, sep='\t')
df_profile = pd.read_csv(profile_score, index_col = False, sep='\t')
result = pd.concat([df_peak, df_profile], axis=1)
print result.head()
test = []
for a,b in zip(result['prot_a_p'],result['prot_b_p']):
if a == b:
test.append(1)
else:
test.append(0)
result['test']=test
result = result[result['test']==0]
del result['test']
result = result.fillna(0)
result.to_csv(res_file)
if __name__ == '__main__':
pass
Regarding the path issue when changing from Windows to OS X:
In all flavours of Unix, paths are written with slashes /, while in Windows backslashes \ are used. Since OS X is a descendant of Unix, as other users have correctly pointed out, when you change there from Windows you need to adapt your paths.

How to fix encoding in Python Mechanize?

here is the sample code:
from mechanize import Browser
br = Browser()
page = br.open('http://hunters.tclans.ru/news.php?readmore=2')
br.form = br.forms().next()
print br.form
The problem is that server return incorrect encoding (windows-cp1251). How can I manually set the encoding of the current page in mechanize?
Error:
Traceback (most recent call last):
File "/tmp/stackoverflow.py", line 5, in <module>
br.form = br.forms().next()
File "/usr/local/lib/python2.6/dist-packages/mechanize/_mechanize.py", line 426, in forms
return self._factory.forms()
File "/usr/local/lib/python2.6/dist-packages/mechanize/_html.py", line 559, in forms
self._forms_factory.forms())
File "/usr/local/lib/python2.6/dist-packages/mechanize/_html.py", line 225, in forms
_urlunparse=_rfc3986.urlunsplit,
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 967, in ParseResponseEx
_urlunparse=_urlunparse,
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 1104, in _ParseFileEx
fp.feed(data)
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 870, in feed
sgmllib.SGMLParser.feed(self, data)
File "/usr/lib/python2.6/sgmllib.py", line 104, in feed
self.goahead(0)
File "/usr/lib/python2.6/sgmllib.py", line 193, in goahead
self.handle_entityref(name)
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 751, in handle_entityref
'&%s;' % name, self._entitydefs, self._encoding))
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 238, in unescape
return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
File "/usr/lib/python2.6/re.py", line 151, in sub
return _compile(pattern, 0).sub(repl, string, count)
File "/usr/local/lib/python2.6/dist-packages/ClientForm.py", line 230, in replace_entities
repl = repl.encode(encoding)
LookupError: unknown encoding: windows-cp1251
I don't know about Mechanize, but you could hack codecs to accept wrong encoding names that have both ‘windows’ and ‘cp’:
>>> def fixcp(name):
... if name.lower().startswith('windows-cp'):
... try:
... return codecs.lookup(name[:8]+name[10:])
... except LookupError:
... pass
... return None
...
>>> codecs.register(fixcp)
>>> '\xcd\xe0\xef\xee\xec\xe8\xed\xe0\xe5\xec'.decode('windows-cp1251')
u'Напоминаем'
Fixed by setting
br._factory.encoding = enc
br._factory._forms_factory.encoding = enc
br._factory._links_factory._encoding = enc
(note the underscores) after br.open()

Categories

Resources