Version of raise is a syntax error in Python 3 - python

As a rookie, I just started to use the datareader library, in particular read_html function and came across the following error when trying to get a table from websites.
import pandas as pd
from pandas_datareader import data
df_list=pd.read_html('https://www.mismarcadores.com/futbol/espana/laliga/clasificacion/')
print(len(df_list))
And I get this syntax error with raise (line 346)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-44-c546df3e8ebd> in <module>()
----> 1 df_list=pd.read_html('https://www.mismarcadores.com/futbol/espana/laliga/clasificacion/')
2 print(len(df_list))
~\Anaconda3\lib\site-packages\pandas\io\html.py in read_html(io, match, flavor, header, index_col, skiprows, attrs, parse_dates, tupleize_cols, thousands, encoding, decimal, converters, na_values, keep_default_na)
904 thousands=thousands, attrs=attrs, encoding=encoding,
905 decimal=decimal, converters=converters, na_values=na_values,
--> 906 keep_default_na=keep_default_na)
~\Anaconda3\lib\site-packages\pandas\io\html.py in _parse(flavor, io, match, attrs, encoding, **kwargs)
741 break
742 else:
--> 743 raise_with_traceback(retained)
744
745 ret = []
~\Anaconda3\lib\site-packages\pandas\compat\__init__.py in raise_with_traceback(exc, traceback)
342 if traceback == Ellipsis:
343 _, _, traceback = sys.exc_info()
--> 344 raise exc.with_traceback(traceback)
345 else:
346 # this version of raise is a syntax error in Python 3
ValueError: No tables found
Checking the HTML code there's actually a table tag on that url, and I do not understand why it does not pick it out...
Thanks a lot for your help.

Related

not able to make a DataFrame with yFinance JSON values

I am trying to make a data frame with some of the information I received from yFinance.info. I have a list of s&p 500 stock symbols, and I made a for loop using stocks' symbols to retrieve data
for sym in symbol:
x=yf.Ticker(sym)
sector.append(x.info['forwardPE'])
However, every time I run it, it runs for a very long time and returns this error.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-13-c87646d48ecd> in <module>
12 for sym in symbol:
13 x=yf.Ticker(sym)
---> 14 sector.append(x.info['forwardPE'])
15
~/opt/anaconda3/lib/python3.7/site-packages/yfinance/ticker.py in info(self)
136 #property
137 def info(self):
--> 138 return self.get_info()
139
140 #property
~/opt/anaconda3/lib/python3.7/site-packages/yfinance/base.py in get_info(self, proxy, as_dict, *args, **kwargs)
444
445 def get_info(self, proxy=None, as_dict=False, *args, **kwargs):
--> 446 self._get_fundamentals(proxy)
447 data = self._info
448 if as_dict:
~/opt/anaconda3/lib/python3.7/site-packages/yfinance/base.py in _get_fundamentals(self, kind, proxy)
283 # holders
284 url = "{}/{}/holders".format(self._scrape_url, self.ticker)
--> 285 holders = _pd.read_html(url)
286
287 if len(holders)>=3:
~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/html.py in read_html(io, match, flavor, header, index_col, skiprows, attrs, parse_dates, thousands, encoding, decimal, converters, na_values, keep_default_na, displayed_only)
1098 na_values=na_values,
1099 keep_default_na=keep_default_na,
-> 1100 displayed_only=displayed_only,
1101 )
~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/html.py in _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs)
913 break
914 else:
--> 915 raise retained
916
917 ret = []
~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/html.py in _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs)
893
894 try:
--> 895 tables = p.parse_tables()
896 except ValueError as caught:
897 # if `io` is an io-like object, check if it's seekable
~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/html.py in parse_tables(self)
211 list of parsed (header, body, footer) tuples from tables.
212 """
--> 213 tables = self._parse_tables(self._build_doc(), self.match, self.attrs)
214 return (self._parse_thead_tbody_tfoot(table) for table in tables)
215
~/opt/anaconda3/lib/python3.7/site-packages/pandas/io/html.py in _parse_tables(self, doc, match, attrs)
543
544 if not tables:
--> 545 raise ValueError("No tables found")
546
547 result = []
ValueError: No tables found
When I do it without the append (eg."x.info['forwardPE']), it runs fine and return values one by one. Can anybody please help me with how I could fix this problem? Sorry for the horrible summarization and thank you in advance.
You could put the line in a try block and except the errors to see which symbols aren't working properly. Since you have 500 tickers to go through, you may encounter more than one exception so I'd recommend using a broad except Exception statement and using traceback (optional) to get more info on the error
import traceback
import yfinance as yf
symbol = ['TSLA', 'F', 'MNQ', 'MMM']
sector = []
for sym in symbol:
try:
x = yf.Ticker(sym)
sector.append(x.info['forwardPE'])
except Exception as error:
print()
print(f'{error} for symbol {sym}')
print(traceback.format_exc())
print(sector)

I can't seem to find a fix for the "ValueError: Unknown subheader signature" raised while reading sas file using pd.read_sas?

I am trying to load a sasbdat file in python using pd.read_sas() and I fail to load the data due to the below error.
ValueError Traceback (most recent call last)
<ipython-input-148-64f915da8256> in <module>
----> 1 df_sas = pd.read_sas('input_sasfile.sas7bdat', format='sas7bdat')
~\.conda\envs\overloaded-new\lib\site-packages\pandas\io\sas\sasreader.py in read_sas(filepath_or_buffer, format, index, encoding, chunksize, iterator)
121
122 reader = SAS7BDATReader(
--> 123 filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize
124 )
125 else:
~\.conda\envs\overloaded-new\lib\site-packages\pandas\io\sas\sas7bdat.py in __init__(self, path_or_buf, index, convert_dates, blank_missing, chunksize, encoding, convert_text, convert_header_text)
144
145 self._get_properties()
--> 146 self._parse_metadata()
147
148 def column_data_lengths(self):
~\.conda\envs\overloaded-new\lib\site-packages\pandas\io\sas\sas7bdat.py in _parse_metadata(self)
349 self.close()
350 raise ValueError("Failed to read a meta data page from the SAS file.")
--> 351 done = self._process_page_meta()
352
353 def _process_page_meta(self):
~\.conda\envs\overloaded-new\lib\site-packages\pandas\io\sas\sas7bdat.py in _process_page_meta(self)
355 pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types
356 if self._current_page_type in pt:
--> 357 self._process_page_metadata()
358 is_data_page = self._current_page_type & const.page_data_type
359 is_mix_page = self._current_page_type in const.page_mix_types
~\.conda\envs\overloaded-new\lib\site-packages\pandas\io\sas\sas7bdat.py in _process_page_metadata(self)
388 subheader_signature = self._read_subheader_signature(pointer.offset)
389 subheader_index = self._get_subheader_index(
--> 390 subheader_signature, pointer.compression, pointer.ptype
391 )
392 self._process_subheader(subheader_index, pointer)
~\.conda\envs\overloaded-new\lib\site-packages\pandas\io\sas\sas7bdat.py in _get_subheader_index(self, signature, compression, ptype)
401 else:
402 self.close()
--> 403 raise ValueError("Unknown subheader signature")
404 return index
405
ValueError: Unknown subheader signature
Though I found relevant github issue (https://github.com/pandas-dev/pandas/issues/24794), but it was closed because the issue got resolved by updating the pandas.
Any help is greatly appreciated.

appending json files in python

I am trying to append some json files in python. I have the following code. It seems right. However, I am getting an error.
The code is as follows.
import pandas as pd
df1=pd.DataFrame()
for i in range(0,49):
df = pd.read_json ('/media/michael/extHDD/Kaggle/DeepFAke/DF_all/metadata{}.json'.format(i))
df1.append(df.T)
The error is as follows.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-76-ddb355627155> in <module>
3 df1=pd.DataFrame()
4 for i in range(0,49):
----> 5 df = pd.read_json ('/media/michael/extHDD/Kaggle/DeepFAke/DF_all/metadata{}.json'.format(i))
6 df1.append(df.T)
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, lines, chunksize, compression)
590 return json_reader
591
--> 592 result = json_reader.read()
593 if should_close:
594 try:
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in read(self)
715 obj = self._get_object_parser(self._combine_lines(data.split("\n")))
716 else:
--> 717 obj = self._get_object_parser(self.data)
718 self.close()
719 return obj
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in _get_object_parser(self, json)
737 obj = None
738 if typ == "frame":
--> 739 obj = FrameParser(json, **kwargs).parse()
740
741 if typ == "series" or obj is None:
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in parse(self)
847
848 else:
--> 849 self._parse_no_numpy()
850
851 if self.obj is None:
~/myenv/lib/python3.5/site-packages/pandas/io/json/_json.py in _parse_no_numpy(self)
1091 if orient == "columns":
1092 self.obj = DataFrame(
-> 1093 loads(json, precise_float=self.precise_float), dtype=None
1094 )
1095 elif orient == "split":
ValueError: Expected object or value
The code works when I do it for each file individually. Would anyone be able to help me regarding this.
Thanks & Best Regards
Michael
The error occurs on a df = pd.read_json (...) line. It is likely that one of the file is non existent or incorrect. My advice is to use a try catch to identify it:
for i in range(0,49):
try:
df = pd.read_json ('/media/michael/extHDD/Kaggle/DeepFAke/DF_all/metadata{}.json'.format(i))
except:
print('Error on iteration', i, ', file',
'/media/michael/extHDD/Kaggle/DeepFAke/DF_all/metadata{}.json'.format(i))
raise
df1.append(df.T)
Catching any exception is normally bad practice because it can hide truely abnormal conditions like an IO or memory error. That is the reason why I re-raise the original exception in above code.

Python: how to read a url with ".data " suffix

I'm trying to read data from this url - "https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data" into a pandas dataframe.
I've used this technique:
park_df = pd.read_html('https://archive.ics.uci.edu/ml/machine-learning-
databases/parkinsons/parkinsons.data', header=0, flavor='bs4')
but I get an error as shown below:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-18-804373f977ab> in <module>()
----> 1 park_df = pd.read_html('https://archive.ics.uci.edu/ml/machine-
learning-databases/parkinsons/parkinsons.data', header=0, flavor='bs4')
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\html.py in
read_html(io, match, flavor, header, index_col, skiprows, attrs,
parse_dates, tupleize_cols, thousands, encoding, decimal, converters,
na_values, keep_default_na, displayed_only)
985 decimal=decimal, converters=converters,
na_values=na_values,
986 keep_default_na=keep_default_na,
--> 987 displayed_only=displayed_only)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\io\html.py in
_parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs)
813 break
814 else:
--> 815 raise_with_traceback(retained)
816
817 ret = []
~\AppData\Local\Continuum\anaconda3\lib\site-
packages\pandas\compat\__init__.py in raise_with_traceback(exc, traceback)
402 if traceback == Ellipsis:
403 _, _, traceback = sys.exc_info()
--> 404 raise exc.with_traceback(traceback)
405 else:
406 # this version of raise is a syntax error in Python 3
ValueError: No tables found
Can you suggest what I'm doing wrong here, what else could be a better option. Please do open the url to check how the data looks, with the header in the 1st row (containing column names) and data following further below.
Function read_html is used for convert html tables to pandas DataFrame, for convert csv format use read_csv:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data'
df = pd.read_csv(url)
print (df.head())
name MDVP:Fo(Hz) MDVP:Fhi(Hz) MDVP:Flo(Hz) MDVP:Jitter(%) \
0 phon_R01_S01_1 119.992 157.302 74.997 0.00784
1 phon_R01_S01_2 122.400 148.650 113.819 0.00968
2 phon_R01_S01_3 116.682 131.111 111.555 0.01050
3 phon_R01_S01_4 116.676 137.871 111.366 0.00997
4 phon_R01_S01_5 116.014 141.781 110.655 0.01284
MDVP:Jitter(Abs) MDVP:RAP MDVP:PPQ Jitter:DDP MDVP:Shimmer ... \
0 0.00007 0.00370 0.00554 0.01109 0.04374 ...
1 0.00008 0.00465 0.00696 0.01394 0.06134 ...
2 0.00009 0.00544 0.00781 0.01633 0.05233 ...
3 0.00009 0.00502 0.00698 0.01505 0.05492 ...
4 0.00011 0.00655 0.00908 0.01966 0.06425 ...
Shimmer:DDA NHR HNR status RPDE DFA spread1 \
0 0.06545 0.02211 21.033 1 0.414783 0.815285 -4.813031
1 0.09403 0.01929 19.085 1 0.458359 0.819521 -4.075192
2 0.08270 0.01309 20.651 1 0.429895 0.825288 -4.443179
3 0.08771 0.01353 20.644 1 0.434969 0.819235 -4.117501
4 0.10470 0.01767 19.649 1 0.417356 0.823484 -3.747787
spread2 D2 PPE
0 0.266482 2.301442 0.284654
1 0.335590 2.486855 0.368674
2 0.311173 2.342259 0.332634
3 0.334147 2.405554 0.368975
4 0.234513 2.332180 0.410335
[5 rows x 24 columns]

incomplete read error returned in sqlite in pandas notebook

I am connecting to a website, I have created two tables to store the data and I am trying to collect the data after a certain date, and I am returned an error. I dont know if I need to add code to my handler section, but my code for collecting the data is shown below:
record_cnt = 0
for link in data_list_post:
data = pd.read_table(link, sep=',')
print('%s:%s rows %s columns' % (link[-10:-4],data.shape[0], data.shape[1]))
record_cnt += data.shape[0]
data.to_sql(name='post', con=conPost, flavor='sqlite', if_exists='append')
and the error I am returned is:
IncompleteRead: IncompleteRead(8437886 bytes read)
Full error traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
C:\Program Files\Anaconda3\lib\http\client.py in _get_chunk_left(self)
540 try:
--> 541 chunk_left = self._read_next_chunk_size()
542 except ValueError:
C:\Program Files\Anaconda3\lib\http\client.py in _read_next_chunk_size(self)
507 try:
--> 508 return int(line, 16)
509 except ValueError:
ValueError: invalid literal for int() with base 16: b'00004000\r00:00,REGULAR,0000262144,0000327687 \n'
During handling of the above exception, another exception occurred:
IncompleteRead Traceback (most recent call last)
C:\Program Files\Anaconda3\lib\http\client.py in _readall_chunked(self)
557 while True:
--> 558 chunk_left = self._get_chunk_left()
559 if chunk_left is None:
C:\Program Files\Anaconda3\lib\http\client.py in _get_chunk_left(self)
542 except ValueError:
--> 543 raise IncompleteRead(b'')
544 if chunk_left == 0:
IncompleteRead: IncompleteRead(0 bytes read)
During handling of the above exception, another exception occurred:
IncompleteRead Traceback (most recent call last)
<ipython-input-13-e9dcb24183ff> in <module>()
1 record_cnt = 0
2 for link in data_list_post:
----> 3 data = pd.read_table(link, sep=',')
4 print('%s:%s rows %s columns' % (link[-10:-4],data.shape[0], data.shape[1])) #printing out values makes me feel safe....
5 record_cnt += data.shape[0]
C:\Program Files\Anaconda3\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
560 skip_blank_lines=skip_blank_lines)
561
--> 562 return _read(filepath_or_buffer, kwds)
563
564 parser_f.__name__ = name
C:\Program Files\Anaconda3\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
299 filepath_or_buffer, _, compression = get_filepath_or_buffer(
300 filepath_or_buffer, encoding,
--> 301 compression=kwds.get('compression', None))
302 kwds['compression'] = (inferred_compression if compression == 'infer'
303 else compression)
C:\Program Files\Anaconda3\lib\site-packages\pandas\io\common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression)
315 # cat on the compression to the tuple returned by the function
316 to_return = (list(maybe_read_encoded_stream(req, encoding,
--> 317 compression)) +
318 [compression])
319 return tuple(to_return)
C:\Program Files\Anaconda3\lib\site-packages\pandas\io\common.py in maybe_read_encoded_stream(reader, encoding, compression)
235 reader = BytesIO(reader.read())
236 else:
--> 237 reader = StringIO(reader.read().decode(encoding, errors))
238 else:
239 if compression == 'gzip':
C:\Program Files\Anaconda3\lib\http\client.py in read(self, amt)
453
454 if self.chunked:
--> 455 return self._readall_chunked()
456
457 if self.length is None:
C:\Program Files\Anaconda3\lib\http\client.py in _readall_chunked(self)
563 return b''.join(value)
564 except IncompleteRead:
--> 565 raise IncompleteRead(b''.join(value))
566
567 def _readinto_chunked(self, b):
IncompleteRead: IncompleteRead(8437886 bytes read)

Categories

Resources