.describe() and .info() not working for me in Jupyter Notebook

.describe() and .info() not working for me in Jupyter Notebook - python

I am trying to use the describe method to get summary statistics of my data but I keep on getting this error message. Anyway to sort this out? The .info() is also giving me the same problem.
TypeError Traceback (most recent call last)
<ipython-input-28-614cd2726f37> in <module>
----> 1 players_final.describe()
~\anaconda3\lib\site-packages\pandas\core\generic.py in describe(self, percentiles, include, exclude)
10265 elif (include is None) and (exclude is None):
10266 # when some numerics are found, keep only numerics
> 10267 data = self.select_dtypes(include=[np.number])
10268 if len(data.columns) == 0:
10269 data = self
~\anaconda3\lib\site-packages\pandas\core\frame.py in select_dtypes(self, include, exclude)
3420 # the "union" of the logic of case 1 and case 2:
3421 # we get the included and excluded, and return their logical and
-> 3422 include_these = Series(not bool(include), index=self.columns)
3423 exclude_these = Series(not bool(exclude), index=self.columns)
3424
~\anaconda3\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)
309 data = data.copy()
310 else:
--> 311 data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True)
312
313 data = SingleBlockManager(data, index, fastpath=True)
~\anaconda3\lib\site-packages\pandas\core\internals\construction.py in sanitize_array(data, index, dtype, copy, raise_cast_failure)
710 value = maybe_cast_to_datetime(value, dtype)
711
--> 712 subarr = construct_1d_arraylike_from_scalar(value, len(index), dtype)
713
714 else:
~\anaconda3\lib\site-packages\pandas\core\dtypes\cast.py in construct_1d_arraylike_from_scalar(value, length, dtype)
1231 value = ensure_str(value)
1232
-> 1233 subarr = np.empty(length, dtype=dtype)
1234 subarr.fill(value)
1235
TypeError: Cannot interpret '<attribute 'dtype' of 'numpy.generic' objects>' as a data type

Related

Numpy command to calculate sine (and cosine) consumes all RAM

I am trying to calculate sine and cosine of month number (e.g. Jan=1, Feb=2, ... Dec=12) for a series of observations that covers ~5 years:
def get_sin(value, max_value):
sine = np.sin(value * (2.*np.pi/max_value))
return sine
def get_cosine(value, max_value):
cosine = np.cos(value * (2.*np.pi/max_value))
return cosine
I run the following command on the data:
df_ufvdate['month_sine'] = df_ufvdate.apply(lambda row: get_sin(month, 12), axis=1)
However my desktop RAM is exausted, and then I get the following MemoryError:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
Input In [466], in <cell line: 1>()
----> 1 df_ufvdate['month_sine'] = df_ufvdate.apply(lambda row: get_sin(month, 12), axis=1)
File ~\Anaconda3\lib\site-packages\pandas\core\frame.py:8839, in DataFrame.apply(self, func, axis, raw, result_type, args, **kwargs)
8828 from pandas.core.apply import frame_apply
8830 op = frame_apply(
8831 self,
8832 func=func,
(...)
8837 kwargs=kwargs,
8838 )
-> 8839 return op.apply().__finalize__(self, method="apply")
File ~\Anaconda3\lib\site-packages\pandas\core\apply.py:727, in FrameApply.apply(self)
724 elif self.raw:
725 return self.apply_raw()
--> 727 return self.apply_standard()
File ~\Anaconda3\lib\site-packages\pandas\core\apply.py:854, in FrameApply.apply_standard(self)
851 results, res_index = self.apply_series_generator()
853 # wrap results
--> 854 return self.wrap_results(results, res_index)
File ~\Anaconda3\lib\site-packages\pandas\core\apply.py:880, in FrameApply.wrap_results(self, results, res_index)
878 # see if we can infer the results
879 if len(results) > 0 and 0 in results and is_sequence(results[0]):
--> 880 return self.wrap_results_for_axis(results, res_index)
882 # dict of scalars
883
884 # the default dtype of an empty Series will be `object`, but this
885 # code can be hit by df.mean() where the result should have dtype
886 # float64 even if it's an empty Series.
887 constructor_sliced = self.obj._constructor_sliced
File ~\Anaconda3\lib\site-packages\pandas\core\apply.py:1027, in FrameColumnApply.wrap_results_for_axis(self, results, res_index)
1023 result.index = res_index
1025 # we may want to infer results
1026 else:
-> 1027 result = self.infer_to_same_shape(results, res_index)
1029 return result
File ~\Anaconda3\lib\site-packages\pandas\core\apply.py:1033, in FrameColumnApply.infer_to_same_shape(self, results, res_index)
1031 def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame:
1032 """infer the results to the same shape as the input object"""
-> 1033 result = self.obj._constructor(data=results)
1034 result = result.T
1036 # set the index
File ~\Anaconda3\lib\site-packages\pandas\core\frame.py:636, in DataFrame.__init__(self, data, index, columns, dtype, copy)
630 mgr = self._init_mgr(
631 data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy
632 )
634 elif isinstance(data, dict):
635 # GH#38939 de facto copy defaults to False only in non-dict cases
--> 636 mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager)
637 elif isinstance(data, ma.MaskedArray):
638 import numpy.ma.mrecords as mrecords
File ~\Anaconda3\lib\site-packages\pandas\core\internals\construction.py:494, in dict_to_mgr(data, index, columns, dtype, typ, copy)
487 arrays = [
488 arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
489 ]
491 if copy:
492 # arrays_to_mgr (via form_blocks) won't make copies for EAs
493 # dtype attr check to exclude EADtype-castable strs
--> 494 arrays = [
495 x
496 if not hasattr(x, "dtype") or not isinstance(x.dtype, ExtensionDtype)
497 else x.copy()
498 for x in arrays
499 ]
500 # TODO: can we get rid of the dt64tz special case above?
502 return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)
File ~\Anaconda3\lib\site-packages\pandas\core\internals\construction.py:497, in <listcomp>(.0)
487 arrays = [
488 arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
489 ]
491 if copy:
492 # arrays_to_mgr (via form_blocks) won't make copies for EAs
493 # dtype attr check to exclude EADtype-castable strs
494 arrays = [
495 x
496 if not hasattr(x, "dtype") or not isinstance(x.dtype, ExtensionDtype)
--> 497 else x.copy()
498 for x in arrays
499 ]
500 # TODO: can we get rid of the dt64tz special case above?
502 return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy)
File ~\Anaconda3\lib\site-packages\pandas\core\generic.py:6032, in NDFrame.copy(self, deep)
5926 #final
5927 def copy(self: NDFrameT, deep: bool_t = True) -> NDFrameT:
5928 """
5929 Make a copy of this object's indices and data.
5930
(...)
6030 dtype: object
6031 """
-> 6032 data = self._mgr.copy(deep=deep)
6033 self._clear_item_cache()
6034 return self._constructor(data).__finalize__(self, method="copy")
File ~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py:603, in BaseBlockManager.copy(self, deep)
600 else:
601 new_axes = list(self.axes)
--> 603 res = self.apply("copy", deep=deep)
605 res.axes = new_axes
607 if self.ndim > 1:
608 # Avoid needing to re-compute these
File ~\Anaconda3\lib\site-packages\pandas\core\internals\managers.py:304, in BaseBlockManager.apply(self, f, align_keys, ignore_failures, **kwargs)
302 applied = b.apply(f, **kwargs)
303 else:
--> 304 applied = getattr(b, f)(**kwargs)
305 except (TypeError, NotImplementedError):
306 if not ignore_failures:
File ~\Anaconda3\lib\site-packages\pandas\core\internals\blocks.py:643, in Block.copy(self, deep)
641 values = self.values
642 if deep:
--> 643 values = values.copy()
644 return type(self)(values, placement=self._mgr_locs, ndim=self.ndim)
File ~\Anaconda3\lib\site-packages\pandas\core\arrays\masked.py:680, in BaseMaskedArray.copy(self)
678 def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
679 data, mask = self._data, self._mask
--> 680 data = data.copy()
681 mask = mask.copy()
682 return type(self)(data, mask, copy=False)
MemoryError: Unable to allocate 404. KiB for an array with shape (51724,) and data type float64
I suppose there is something very inefficient with my coding. Can anybody suggest what I am doing wrong?
UPDATE:
I noticed something very weird about variable 'month'. I used
df_ufvdate['month'] = df_ufvdate['month'].astype('int64')
to convert 'month' into an integer and when I run df_ufvdate.info(max_cols=250, show_counts='True') I see that 'month' is type 'int64':
month 51724 non-null int64
However, when I run
df_ufvdate['month'].describe()
I get that 'month' is type 'float64':
count 51724.000000
mean 8.030895
std 3.693370
min 1.000000
25% 5.000000
50% 9.000000
75% 11.000000
max 12.000000
Name: month, dtype: float64
Here is more info on df_ufvdate:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 51724 entries, 1 to 62618
Data columns (total 211 columns)
dtypes: Int64(34), float64(105), int64(1), object(71)
memory usage: 85.3+ MB
Here is my desktop specs:
Windows 64,
RAM: 24GB,
Jupyter: 6.4.8,
Python 3.9.12 (main, Apr 4 2022, 05:22:27) [MSC v.1916 64 bit (AMD64)]

I got it fixed:
def get_sin(row, column, max_value):
value = row[column]
sine = np.sin(value * (2.*np.pi/max_value))
return sine
def get_cosine(row, column, max_value):
value = row[column]
cosine = np.cos(value * (2.*np.pi/max_value))
return cosine
and then these lambdas will do the trick:
df_ufvdate['month_sine'] = df_ufvdate.apply(lambda row: get_sin(row, 'month', 12), axis=1)
df_ufvdate['month_cosine'] = df_ufvdate.apply(lambda row: get_cosine(row, 'month', 12), axis=1)
Thank all who commented on this question!

could not convert string to float - object type

I'm working with a dataframe in Python using Pandas and Jupyter Notebook, and my dataframe has Longitude and Latitude columns with values like '-23,4588'. Somehow, everytime I try to convert it to float, I get an error telling 'could not convert string to float'.
I tried to change the comma, tried to change the .csv column type to float, but nothing works.
A part of my code:
ValueError Traceback (most recent call last)
C:\TEMP/ipykernel_12640/4061618161.py in <module>
----> 1 newocorr_sjc['Latitude'] = newocorr_sjc['Latitude'].astype(float)
c:\users\caique.fernandes\appdata\local\programs\python\python39\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors)
5875 else:
5876 # else, only a single dtype is given
-> 5877 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
5878 return self._constructor(new_data).__finalize__(self, method="astype")
5879
c:\users\caique.fernandes\appdata\local\programs\python\python39\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, copy, errors)
629 self, dtype, copy: bool = False, errors: str = "raise"
630 ) -> "BlockManager":
--> 631 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
632
633 def convert(
c:\users\caique.fernandes\appdata\local\programs\python\python39\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, align_keys, ignore_failures, **kwargs)
425 applied = b.apply(f, **kwargs)
426 else:
--> 427 applied = getattr(b, f)(**kwargs)
428 except (TypeError, NotImplementedError):
429 if not ignore_failures:
c:\users\caique.fernandes\appdata\local\programs\python\python39\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors)
671 vals1d = values.ravel()
672 try:
--> 673 values = astype_nansafe(vals1d, dtype, copy=True)
674 except (ValueError, TypeError):
675 # e.g. astype_nansafe can fail on object-dtype of strings
c:\users\caique.fernandes\appdata\local\programs\python\python39\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
1095 if copy or is_object_dtype(arr) or is_object_dtype(dtype):
1096 # Explicit copy, or required since NumPy can't view from / to object.
-> 1097 return arr.astype(dtype, copy=True)
1098
1099 return arr.view(dtype)
ValueError: could not convert string to float: '-23,5327'```

Maybe you should use decimal=',' as argument of pd.read_csv:
df = pd.read_csv('data.csv', sep=';', decimal=',')
>>> df.select_dtypes(float)
17 22 23
0 17.5 -23.5327 -46.8182
1 56.3 -23.4315 -47.1269

ValueError: Shape of passed values is (37679, 43), indices imply (37679, 41)

I am trying to group horse data by races. I am using pivot function to try do this, but I keep getting a Value error.
def group_horse_and_result(element):
if element[0] == 'placing':
return 100 + element[1]
else:
return element[1]
data = data.pivot(index='id', columns='barrier', values=data.columns[2:])
rearranged_columns = sorted(list(data.columns.values), key=group_horse_and_result)
data = data[rearranged_columns]
print(data.head())
data.fillna(0)
And I keep getting this error result:
AssertionError Traceback (most recent call last)
<ipython-input-253-97da160dc172> in <module>
5 return element[1]
6
----> 7 data = data.pivot(index='race_id', columns='placing', values=data.columns[2:])
8 rearranged_columns = sorted(list(data.columns.values), key=group_horse_and_result)
9 data = data[rearranged_columns]
~\anaconda3\lib\site-packages\pandas\core\frame.py in pivot(self, index, columns, values)
6672 from pandas.core.reshape.pivot import pivot
6673
-> 6674 return pivot(self, index=index, columns=columns, values=values)
6675
6676 _shared_docs[
~\anaconda3\lib\site-packages\pandas\core\reshape\pivot.py in pivot(data, index, columns, values)
470 # Exclude tuple because it is seen as a single column name
471 values = cast(Sequence[Label], values)
--> 472 indexed = data._constructor(
473 data[values]._values, index=index, columns=values
474 )
~\anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
495 mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
496 else:
--> 497 mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
498
499 # For data is list-like, or Iterable (will consume into list)
~\anaconda3\lib\site-packages\pandas\core\internals\construction.py in init_ndarray(values, index, columns, dtype, copy)
232 block_values = [values]
233
--> 234 return create_block_manager_from_blocks(block_values, [columns, index])
235
236
~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_blocks(blocks, axes)
1663 ]
1664
-> 1665 mgr = BlockManager(blocks, axes)
1666 mgr._consolidate_inplace()
1667 return mgr
~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in __init__(self, blocks, axes, do_integrity_check)
147
148 if do_integrity_check:
--> 149 self._verify_integrity()
150
151 # Populate known_consolidate, blknos, and blklocs lazily
~\anaconda3\lib\site-packages\pandas\core\internals\managers.py in _verify_integrity(self)
326 raise construction_error(tot_items, block.shape[1:], self.axes)
327 if len(self.items) != tot_items:
--> 328 raise AssertionError(
329 "Number of manager items must equal union of "
330 f"block items\n# manager items: {len(self.items)}, # "
AssertionError: Number of manager items must equal union of block items
# manager items: 42, # tot_items: 44
Is this something to do with my data pre-processing or is my code wrong here? Relatively new to coding so apologies if the wording of my questions are off. The table shape is 37679,44.

It might be because of duplicates among the columns.
The duplicate columns can be identified using data.columns.duplicated().

Pandas error when filtering rows based on multiple column conditionals, "ValueError: invalid literal for int() with base 10: "

I am getting a 'ValueError: invalid literal for int() with base 10: ' when I try to filter the dataframe using multiple column conditions
Here is the code to set up the pandas dataframe. Warning: it'll download 6 mb of data. Can run in Google Colab if too concerned.
Code to import stuff and download the data
#Import stuff
import re
import os
import zipfile
from urllib.request import urlretrieve
from os.path import isfile, isdir
import requests
#Define Download Function
def download_file_from_google_drive(id, destination):
URL = "https://docs.google.com/uc?export=download"
session = requests.Session()
response = session.get(URL, params = { 'id' : id }, stream = True)
token = get_confirm_token(response)
if token:
params = { 'id' : id, 'confirm' : token }
response = session.get(URL, params = params, stream = True)
save_response_content(response, destination)
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None
def save_response_content(response, destination):
CHUNK_SIZE = 32768
with open(destination, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
#Download data
download_file_from_google_drive('1sZk3WWgdyHLru7q1KSWQwCT4nwwzHlpY', 'TheAnimeList.csv')
Code to set up the pandas dataframe
download_file_from_google_drive('1sZk3WWgdyHLru7q1KSWQwCT4nwwzHlpY', 'TheAnimeList.csv')
animeuser = pd.read_csv('TheAnimeList.csv' )
animeuser = animeuser[['anime_id','title_english', 'popularity', 'rank']]
animeuser.head()
anime_id title_english popularity rank
0 11013 Inu X Boku Secret Service 231 1274.0
1 2104 My Bride is a Mermaid 366 727.0
2 5262 Shugo Chara!! Doki 1173 1508.0
3 721 Princess Tutu 916 307.0
4 12365 Bakuman. 426 50.0
I am trying to filter rows based on column conditionals. First I tried
animeuser = animeuser[ (animeuser.popularity >= 3000) | (animeuser.rank >= 3000) ]
But that gave me this error
TypeError Traceback (most recent call last)
<ipython-input-39-8fb6d8508f25> in <module>()
----> 1 animeuser = animeuser[ (animeuser.popularity >= 3000) | (animeuser.rank >= 3000) ]
TypeError: '>=' not supported between instances of 'method' and 'int'
Then I tried
animeuser = animeuser[ ( animeuser.astype(int)['popularity'] >= 3000 ) | ( animeuser.astype(int)['rank'] >= 3000 ) ]
But that gave me this error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-40-a2ea65786b2a> in <module>()
----> 1 animeuser = animeuser[ ( animeuser.astype(int)['popularity'] >= 3000 ) | ( animeuser.astype(int)['rank'] >= 3000 ) ]
/usr/local/lib/python3.6/dist-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
116 else:
117 kwargs[new_arg_name] = new_arg_value
--> 118 return func(*args, **kwargs)
119 return wrapper
120 return _deprecate_kwarg
/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py in astype(self, dtype, copy, errors, **kwargs)
4002 # else, only a single dtype is given
4003 new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors,
-> 4004 **kwargs)
4005 return self._constructor(new_data).__finalize__(self)
4006
/usr/local/lib/python3.6/dist-packages/pandas/core/internals.py in astype(self, dtype, **kwargs)
3460
3461 def astype(self, dtype, **kwargs):
-> 3462 return self.apply('astype', dtype=dtype, **kwargs)
3463
3464 def convert(self, **kwargs):
/usr/local/lib/python3.6/dist-packages/pandas/core/internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
3327
3328 kwargs['mgr'] = self
-> 3329 applied = getattr(b, f)(**kwargs)
3330 result_blocks = _extend_blocks(applied, result_blocks)
3331
/usr/local/lib/python3.6/dist-packages/pandas/core/internals.py in astype(self, dtype, copy, errors, values, **kwargs)
542 def astype(self, dtype, copy=False, errors='raise', values=None, **kwargs):
543 return self._astype(dtype, copy=copy, errors=errors, values=values,
--> 544 **kwargs)
545
546 def _astype(self, dtype, copy=False, errors='raise', values=None,
/usr/local/lib/python3.6/dist-packages/pandas/core/internals.py in _astype(self, dtype, copy, errors, values, klass, mgr, **kwargs)
623
624 # _astype_nansafe works fine with 1-d only
--> 625 values = astype_nansafe(values.ravel(), dtype, copy=True)
626 values = values.reshape(self.shape)
627
/usr/local/lib/python3.6/dist-packages/pandas/core/dtypes/cast.py in astype_nansafe(arr, dtype, copy)
690 elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer):
691 # work around NumPy brokenness, #1987
--> 692 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)
693
694 if dtype.name in ("datetime64", "timedelta64"):
pandas/_libs/lib.pyx in pandas._libs.lib.astype_intsafe()
pandas/_libs/src/util.pxd in util.set_value_at_unsafe()
ValueError: invalid literal for int() with base 10: 'Inu X Boku Secret Service'
The string 'Inu X Boku Secret Service' belongs to the 'title_english' column in the very first row of the dataframe. But the 'rank' and 'popularity' columns see to be float and ints.
I even tried looking at the datatypes
animeuser.dtypes
anime_id int64
title_english object
popularity int64
rank float64
dtype: object
And everything seems to be in order.

The first error you are facing is because rank is a method of pandas.DataFrame. Methods have precedence over column access via attribute notation. So in order to access the data you need to use bracket notation: animeuser['rank'].
The second error occurs because you try to represent the whole data frame as int which is not possible for various columns. This would only be possible for the 'rank' and 'popularity' columns.

With statement
animeuser.astype(int)['popularity']
you trying to convert to int all animeuser columns. And got an error on string column. Try just
animeuser['popularity']

Received "ValueError: If using all scalar values, you must pass an index" in Python

I have run the following code on python in order to retrieve various crypto currency closing prices from their inception. I have run it successfully using the following tickers:
tickers = ['USDT_BTC','USDT_BCH','USDT_ETC','USDT_XMR','USDT_ETH','USDT_DASH',
'USDT_XRP','USDT_LTC','USDT_NXT','USDT_STR','USDT_REP','USDT_ZEC']
I now have changed it as follows (full code included) and get a ValueError.
[LN1]
def CryptoDataCSV(symbol, frequency):
#Params: String symbol, int frequency = 300,900,1800,7200,14400,86400
#Returns: df from first available date
url ='https://poloniex.com/public?command=returnChartData&currencyPair='+symbol+'&end=9999999999&period='+str(frequency)+'&start=0'
df = pd.read_json(url)
df.set_index('date',inplace=True)
df.to_csv(symbol + '.csv')
print('Processed: ' + symbol)
[LN2]
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
[LN3]
tickers = 'ETH_BTC','STR_BTC','XMR_BTC','XRP_BTC','LTC_BTC','DASH_BTC',
'ETC_BTC','POT_BTC','OMG_BTC','FCT_BTC','ZEC_BTC','BTS_BTC','VTC_BTC',
'XEM_BTC','MAID_BTC','DGB_BTC','STRAT_BTC','LSK_BTC','XVC_BTC','SC_BTC',
'DOGE_BTC','XBC_BTC','GNT_BTC','EMC2_BTC','CLAM_BTC','RIC_BTC','SYS_BTC',
'DCR_BTC','STEEM_BTC','ZRX_BTC','GAME_BTC','VIA_BTC','NXC_BTC','NXT_BTC'
,'VRC_BTC','NAV_BTC','PINK_BTC','STORJ_BTC','ARDR_BTC','BCN_BTC','CVC_BTC',
'EXP_BTC','LBC_BTC','GNO_BTC','GAS_BTC','OMNI_BTC','XCP_BTC','NEOS_BTC',
'BURST_BTC','AMP_BTC','FLDC_BTC','FLO_BTC','SBD_BTC','BLK_BTC','BTCD_BTC',
'NOTE_BTC','GRC_BTC','PPC_BTC','BTM_BTC','XPM_BTC','NMC_BTC','PASC_BTC',
'NAUT_BTC','BELA_BTC','SJCX_BTC','HUC_BTC','RADS_BTC']
[LN4]
for ticker in tickers:
CryptoDataCSV(ticker, 86400)
I now get the following error:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) in ()
1 for ticker in tickers:
----> 2 CryptoDataCSV(ticker, 86400)
in CryptoDataCSV(symbol, frequency)
7 url ='https://poloniex.com/public?command=returnChartData&currencyPair='+symbol+'&end=9999999999&period='+str(frequency)+'&start=0'
8
----> 9 df = pd.read_json(url)
10
11 df.set_index('date',inplace=True)
~\Anaconda3\lib\site-packages\pandas\io\json\json.py in
read_json(path_or_buf, orient, typ, dtype, convert_axes,
convert_dates, keep_default_dates, numpy, precise_float, date_unit,
encoding, lines)
352 obj = FrameParser(json, orient, dtype, convert_axes, convert_dates,
353 keep_default_dates, numpy, precise_float,
--> 354 date_unit).parse()
355
356 if typ == 'series' or obj is None:
~\Anaconda3\lib\site-packages\pandas\io\json\json.py in parse(self)
420
421 else:
--> 422 self._parse_no_numpy()
423
424 if self.obj is None:
~\Anaconda3\lib\site-packages\pandas\io\json\json.py in
_parse_no_numpy(self)
637 if orient == "columns":
638 self.obj = DataFrame(
--> 639 loads(json, precise_float=self.precise_float), dtype=None)
640 elif orient == "split":
641 decoded = dict((str(k), v)
~\Anaconda3\lib\site-packages\pandas\core\frame.py in init(self,
data, index, columns, dtype, copy)
273 dtype=dtype, copy=copy)
274 elif isinstance(data, dict):
--> 275 mgr = self._init_dict(data, index, columns, dtype=dtype)
276 elif isinstance(data, ma.MaskedArray):
277 import numpy.ma.mrecords as mrecords
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _init_dict(self,
data, index, columns, dtype)
409 arrays = [data[k] for k in keys]
410
--> 411 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
412
413 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
~\Anaconda3\lib\site-packages\pandas\core\frame.py in
_arrays_to_mgr(arrays, arr_names, index, columns, dtype) 5494 # figure out the index, if necessary 5495 if index is None:
-> 5496 index = extract_index(arrays) 5497 else: 5498 index = _ensure_index(index)
~\Anaconda3\lib\site-packages\pandas\core\frame.py in
extract_index(data) 5533 5534 if not indexes and not
raw_lengths:
-> 5535 raise ValueError('If using all scalar values, you must pass' 5536 ' an index') 5537
ValueError: If using all scalar values, you must pass an index

I just tested your data, and it appears that some of your currency pairs do not work at all, returning a json of the form:
{"error":"Invalid currency pair."}
When this is returned, pd.read_json throws an error, because it can't convert this to a dataframe.
The simplest workaround is to use a try-except brace and handle any non-working tickers.
broken_tickers = []
for t in tickers:
url ='https://poloniex.com/public?command=returnChartData&currencyPair={}&end=9999999999&period={}&start=0'.format(t, 86400)
try:
df = pd.read_json(url)
except ValueError:
broken_tickers.append(t)
continue
df.set_index('date')
df.to_csv('{}.csv'.format(t))
I've gotten rid of the function, I didn't really feel it necessary here but you can add it back in.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

.describe() and .info() not working for me in Jupyter Notebook - python

Related

Numpy command to calculate sine (and cosine) consumes all RAM

could not convert string to float - object type

ValueError: Shape of passed values is (37679, 43), indices imply (37679, 41)

Pandas error when filtering rows based on multiple column conditionals, "ValueError: invalid literal for int() with base 10: "

Received "ValueError: If using all scalar values, you must pass an index" in Python

Categories

Resources