Give row number as index when reading csv

Give row number as index when reading csv - python

I have a csv file like the one here below:
30,60,14.3,53.6,0.71,403,0
30,60,15.3,54.9,0.72,403,0
30,60,16.5,56.2,0.73,403,0
30,60,17.9,57.5,0.74,403,0
No header, just data. The columns are
colNames = {
'doa_in1': np.float64, 'doa_in2': np.float64,
'doa_est1': np.float64, 'doa_est2': np.float64,
'rho': np.float64,
'seed': np.int32, 'matl_chan':np.int32
}
I read the csv with:
tmp_df = pd.read_csv(
io.BytesIO(tmp_csv), encoding='utf8',
header=None,
names=colNames.keys(), dtype=colNames,
converters={
'matl_chan': lambda x: bool(int(x))
}
)
This gives a warning as I'm giving two possible conversion to matl_chan, but it's just a warning that python will use only what is in converters (i.e. the lambda function)
I would like to have as index for each row a number or something unique.
That's because, then I process tmp_df with this function
def remove_lines(df):
THRES = 50
THRES_angle = 10 # degrees
is_converging = True
for idx, row in df.iterrows():
if idx == 0:
is_converging = False
# check if MUSIC started converging
if abs(row['doa_est1']-row['doa_in1']) < THRES_angle:
if abs(row['doa_est2']-row['doa_in2']) < THRES_angle:
is_converging = True
# calc error
err = abs(row['doa_est1']- row['doa_in1'])+abs(row['doa_est2']-row['doa_in2'])
if err > THRES and is_converging:
df=df.drop(idx)
return df
All rows, though, have index 30, so the function doesn't drop anything as I get this error:
KeyError: '[30] not found in axis'
The full stacktrace is
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-143-b61c0402f9d7> in <module>
----> 1 df=get_dataframe()
<ipython-input-121-b76aab8b17ee> in get_dataframe()
24 continue
25
---> 26 tmp_df_sanitized = remove_lines(tmp_df)
27 all_dataframes.append(tmp_df_sanitized)
28
<ipython-input-142-31019390251a> in remove_lines(df)
62 err = abs(row['doa_est1']-row['doa_in1'])+abs(row['doa_est2']-row['doa_in2'])
63 if err > THRES and is_converging:
---> 64 df=df.drop(idx)
65 print("dropped {}".format(idx))
66 return df
/usr/lib/python3.7/site-packages/pandas/core/frame.py in drop(self, labels, axis, index, columns, level, inplace, errors)
3938 index=index, columns=columns,
3939 level=level, inplace=inplace,
-> 3940 errors=errors)
3941
3942 #rewrite_axis_style_signature('mapper', [('copy', True),
/usr/lib/python3.7/site-packages/pandas/core/generic.py in drop(self, labels, axis, index, columns, level, inplace, errors)
3778 for axis, labels in axes.items():
3779 if labels is not None:
-> 3780 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
3781
3782 if inplace:
/usr/lib/python3.7/site-packages/pandas/core/generic.py in _drop_axis(self, labels, axis, level, errors)
3810 new_axis = axis.drop(labels, level=level, errors=errors)
3811 else:
-> 3812 new_axis = axis.drop(labels, errors=errors)
3813 result = self.reindex(**{axis_name: new_axis})
3814
/usr/lib/python3.7/site-packages/pandas/core/indexes/base.py in drop(self, labels, errors)
4962 if mask.any():
4963 if errors != 'ignore':
-> 4964 raise KeyError(
4965 '{} not found in axis'.format(labels[mask]))
4966 indexer = indexer[~mask]
KeyError: '[30] not found in axis'
Is there anyone who has a solution?
edit: to be clearer, I'd like to have the row index as [0,1,2,3] for the four row I put above

Related

How to correctly read csv file generated by groupby results?

I have calculated the mean value of DataFrame by two groups and saved the results to CSV file.
Then, I tried to read it again by read_csv(), but the .loc() function doesn't work for the loaded DataFrame.
Here's the code example:
import pandas as pd
import numpy as np
np.random.seed(100)
df = pd.DataFrame(np.random.randn(100, 3), columns=['a', 'b', 'value'])
a_bins = np.arange(-3, 4, 1)
b_bins = np.arange(-2, 4, 2)
# calculate the mean value
df['a_bins'] = pd.cut(df['a'], bins=a_bins)
df['b_bins'] = pd.cut(df['b'], bins=b_bins)
df_value_bin = df.groupby(['a_bins','b_bins']).agg({'value':'mean'})
# save to csv file
df_value_bin.to_csv('test.csv')
# read the exported file
df_test = pd.read_csv('test.csv')
When I type:
df_value_bin.loc[(1.5, -1)]
I got this output
value 0.254337
Name: ((1, 2], (-2, 0]), dtype: float64
But, if I use the same method to locate the value from the loaded CSV file:
df_test.loc[(1.5, -1)]
I got this Keyerror:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/tmp/ipykernel_33836/4042082162.py in <module>
----> 1 df_test.loc[(1.5, -1)]
~/miniconda3/lib/python3.9/site-packages/pandas/core/indexing.py in __getitem__(self, key)
923 with suppress(KeyError, IndexError):
924 return self.obj._get_value(*key, takeable=self._takeable)
--> 925 return self._getitem_tuple(key)
926 else:
927 # we by definition only have the 0th axis
~/miniconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_tuple(self, tup)
1098 def _getitem_tuple(self, tup: tuple):
1099 with suppress(IndexingError):
-> 1100 return self._getitem_lowerdim(tup)
1101
1102 # no multi-index, so validate all of the indexers
~/miniconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_lowerdim(self, tup)
836 # We don't need to check for tuples here because those are
837 # caught by the _is_nested_tuple_indexer check above.
--> 838 section = self._getitem_axis(key, axis=i)
839
840 # We should never have a scalar section here, because
~/miniconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
1162 # fall thru to straight lookup
1163 self._validate_key(key, axis)
-> 1164 return self._get_label(key, axis=axis)
1165
1166 def _get_slice_axis(self, slice_obj: slice, axis: int):
~/miniconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _get_label(self, label, axis)
1111 def _get_label(self, label, axis: int):
1112 # GH#5667 this will fail if the label is not present in the axis.
-> 1113 return self.obj.xs(label, axis=axis)
1114
1115 def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
~/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py in xs(self, key, axis, level, drop_level)
3774 raise TypeError(f"Expected label or tuple of labels, got {key}") from e
3775 else:
-> 3776 loc = index.get_loc(key)
3777
3778 if isinstance(loc, np.ndarray):
~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
386 except ValueError as err:
387 raise KeyError(key) from err
--> 388 raise KeyError(key)
389 return super().get_loc(key, method=method, tolerance=tolerance)
390
KeyError: 1.5

You should read the index as a MultiIndex, but you need to convert the strings to interval. You can use to_interval (all credits to korakot):
def to_interval(istr):
c_left = istr[0]=='['
c_right = istr[-1]==']'
closed = {(True, False): 'left',
(False, True): 'right',
(True, True): 'both',
(False, False): 'neither'
}[c_left, c_right]
left, right = map(int, istr[1:-1].split(','))
return pd.Interval(left, right, closed)
df_test = pd.read_csv('test.csv', index_col=[0,1], converters={0: to_interval,1: to_interval})
Test:
df_test.loc[(1.5, -1)]
#value 0.254337
#Name: ((1, 2], (-2, 0]), dtype: float64

Python KeyError when trying to code textual to numerical

At first I imported pandas as pd and imported my data set as dF. At first I was getting an unindent error by the elseif. After playing around I got rid of that error and ran into the below error. I am using pythong Jupyter notebook
Null = dF.isnull() .any()
dF = dF.drop([["customerID", "gender", "SeniorCitizen", "Partner", "Dependents", "tenure", "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod", "MonthlyCharges", "TotalCharges", "Churn"]], axis=1)
for column in range(len(list(dF.columns.values))):
for index, row in dF.iterrows():
if "No" in row[column] or "Female" in row[column]:
dF.iloc[index, column] = 0
elif "Yes" in row[column] or "Male" in row[column]:
dF.iloc[index, column] = 1
dF.to_excel('Cleaned.xlsx', index=False)
ERROR BELOW
KeyError Traceback (most recent call last)
<ipython-input-94-076be1113e81> in <module>
1 Null = dF.isnull() .any()
2
----> 3 dF = dF.drop([["customerID", "gender", "SeniorCitizen", "Partner", "Dependents", "tenure", "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod", "MonthlyCharges", "TotalCharges", "Churn"]], axis=1)
4
5 for column in range(len(list(dF.columns.values))):
~\Anaconda3\lib\site-packages\pandas\core\frame.py in drop(self, labels, axis, index, columns, level, inplace, errors)
4100 level=level,
4101 inplace=inplace,
-> 4102 errors=errors,
4103 )
4104
~\Anaconda3\lib\site-packages\pandas\core\generic.py in drop(self, labels, axis, index, columns, level, inplace, errors)
3912 for axis, labels in axes.items():
3913 if labels is not None:
-> 3914 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
3915
3916 if inplace:
~\Anaconda3\lib\site-packages\pandas\core\generic.py in _drop_axis(self, labels, axis, level, errors)
3944 new_axis = axis.drop(labels, level=level, errors=errors)
3945 else:
-> 3946 new_axis = axis.drop(labels, errors=errors)
3947 result = self.reindex(**{axis_name: new_axis})
3948
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in drop(self, labels, errors)
5338 if mask.any():
5339 if errors != "ignore":
-> 5340 raise KeyError("{} not found in axis".format(labels[mask]))
5341 indexer = indexer[~mask]
5342 return self.delete(indexer)
KeyError: "[('customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn')] not found in axis"

I'm going to assume the dF is already populated. The error you are getting I believe is because you have double arrayed the columns to drop. That's why it's telling you that whole array is not a key in the dF. Because it's not.
Instead of
dF = dF.drop([["customerID", "gender", "SeniorCitizen", "Partner", "Dependents", "tenure", "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod", "MonthlyCharges", "TotalCharges", "Churn"]], axis=1)
Try
dF = dF.drop(["customerID", "gender", "SeniorCitizen", "Partner", "Dependents", "tenure", "PhoneService", "MultipleLines", "InternetService", "OnlineSecurity", "OnlineBackup", "DeviceProtection", "TechSupport", "StreamingTV", "StreamingMovies", "Contract", "PaperlessBilling", "PaymentMethod", "MonthlyCharges", "TotalCharges", "Churn"], axis=1)

ValueError: 2 columns passed, passed data had 1170 columns

When I try to import a geojson file and convert it into a dataframe, the issue appears, I want a dataframe with columns: Zipcode, Latitude, Longitude. Here are my codes:
import urllib.request, json
import pandas as pd
with urllib.request.urlopen("http://bostonopendata-boston.opendata.arcgis.com/datasets/53ea466a189b4f43b3dfb7b38fa7f3b6_1.geojson") as url:
wuppertal_data = json.loads(url.read().decode())
neighborhoods_data = wuppertal_data['features']
results = pd.DataFrame()
for data in neighborhoods_data:
zipcode = data['properties']['ZIP5']
temp_df = pd.DataFrame(data['geometry']['coordinates'])
temp_df = temp_df.T
temp_df = pd.DataFrame(temp_df.iloc[:,0].tolist(), columns=['Latitude', 'Longitude'])
temp_df['Zipcode'] = zipcode
results = results.append(temp_df).reset_index(drop=True)
Result:
AssertionError Traceback (most recent call last)
D:\PYTHON3.7\lib\site-packages\pandas\core\internals\construction.py in _list_to_arrays(data, columns, coerce_float, dtype)
496 result = _convert_object_array(
--> 497 content, columns, dtype=dtype, coerce_float=coerce_float
498 )
D:\PYTHON3.7\lib\site-packages\pandas\core\internals\construction.py in _convert_object_array(content, columns, coerce_float, dtype)
580 raise AssertionError(
--> 581 f"{len(columns)} columns passed, passed data had "
582 f"{len(content)} columns"
AssertionError: 2 columns passed, passed data had 1170 columns
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
<ipython-input-82-b1c5869e9ca3> in <module>
14 temp_df = pd.DataFrame(data['geometry']['coordinates'])
15 temp_df = temp_df.T
---> 16 temp_df = pd.DataFrame(temp_df.iloc[:,0].tolist(), columns=['Latitude', 'Longitude'])
17
18 temp_df['Neighborhood'] = neighborhood_name
D:\PYTHON3.7\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
472 if is_named_tuple(data[0]) and columns is None:
473 columns = data[0]._fields
--> 474 arrays, columns = to_arrays(data, columns, dtype=dtype)
475 columns = ensure_index(columns)
476
D:\PYTHON3.7\lib\site-packages\pandas\core\internals\construction.py in to_arrays(data, columns, coerce_float, dtype)
459 return [], [] # columns if columns is not None else []
460 if isinstance(data[0], (list, tuple)):
--> 461 return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype)
462 elif isinstance(data[0], abc.Mapping):
463 return _list_of_dict_to_arrays(
D:\PYTHON3.7\lib\site-packages\pandas\core\internals\construction.py in _list_to_arrays(data, columns, coerce_float, dtype)
498 )
499 except AssertionError as e:
--> 500 raise ValueError(e) from e
501 return result
502
ValueError: 2 columns passed, passed data had 1170 columns
I don't quite understand the error. Can anyone help me out? I don't know what part is wrong.

The problem was, that temp_df.iloc[:,0] sometimes had more than 2 columns - in which case it was throwing an error, since you indexed only 2 of them - so to limit number of read columns from pd.Series to 2 just do: temp_df.iloc[:,0].str[:2] instead.
Full code:
import urllib.request, json
import pandas as pd
with urllib.request.urlopen("http://bostonopendata-boston.opendata.arcgis.com/datasets/53ea466a189b4f43b3dfb7b38fa7f3b6_1.geojson") as url:
wuppertal_data = json.loads(url.read().decode())
neighborhoods_data = wuppertal_data['features']
results = pd.DataFrame()
for data in neighborhoods_data:
zipcode = data['properties']['ZIP5']
temp_df = pd.DataFrame(data['geometry']['coordinates'])
temp_df = temp_df.T
temp_df = pd.DataFrame(temp_df.iloc[:,0].str[:2].tolist(), columns=['Latitude', 'Longitude'])
temp_df['Zipcode'] = zipcode
results = results.append(temp_df).reset_index(drop=True)

Specify lambda function to continue even if there is an error

I am trying to run this line of code:
df['Zillow ID'] = df.apply(lambda row: get_zillow_id(key, row['Address'], row['Zipcode']), axis = 1)
But for some address and zipcodes the function get_zillow_id() fails. But I want the lambda function to just ignore the error for that particular address and zipcode and continue. How do I do that?
Here is the entire code:
from pyzillow.pyzillow import ZillowWrapper, GetDeepSearchResults, GetUpdatedPropertyDetails
import pandas as pd
import numpy as np
key = "X1-ZWz1gtmiat11xn_7ew1d"
# Create function to get zillow_id
def get_zillow_id(key, address, zipcode):
zillow_data = ZillowWrapper(key)
deep_search_response = zillow_data.get_deep_search_results(address, zipcode)
result = GetDeepSearchResults(deep_search_response)
return result.zillow_id
# Create function to get propery data
def get_property_data(key, address, zipcode):
zillow_data = ZillowWrapper(key)
updated_property_details_response = zillow_data.get_updated_property_details(get_zillow_id(key, address, zipcode))
result = GetUpdatedPropertyDetails(updated_property_details_response)
return result.year_built
# Import data into dataframe
df = pd.read_csv('test.csv')
# Get zillow ids
df['Zillow ID'] = df.apply(lambda row: get_zillow_id(key, row['Address'], row['Zipcode']), axis = 1)
Here is a picture of the data frame:
Here is the error I am getting:
ZillowError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_code(self, code_obj, result)
2861 #rprint('Running code', repr(code_obj)) # dbg
-> 2862 exec(code_obj, self.user_global_ns, self.user_ns)
2863 finally:
<ipython-input-40-55f38b77eeea> in <module>()
1 # Get zillow ids
----> 2 df['Zillow ID'] = df.apply(lambda row: get_zillow_id(key, row['Address'], row['Zipcode']), axis = 1)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, args, **kwds)
4261 reduce=reduce,
-> 4262 ignore_failures=ignore_failures)
4263 else:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in _apply_standard(self, func, axis, ignore_failures, reduce)
4357 for i, v in enumerate(series_gen):
-> 4358 results[i] = func(v)
4359 keys.append(v.name)
<ipython-input-40-55f38b77eeea> in <lambda>(row)
1 # Get zillow ids
----> 2 df['Zillow ID'] = df.apply(lambda row: get_zillow_id(key, row['Address'], row['Zipcode']), axis = 1)
<ipython-input-37-ce158395fdb8> in get_zillow_id(key, address, zipcode)
3 zillow_data = ZillowWrapper(key)
----> 4 deep_search_response = zillow_data.get_deep_search_results(address, zipcode)
5 result = GetDeepSearchResults(deep_search_response)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pyzillow\pyzillow.py in get_deep_search_results(self, address, zipcode)
30 }
---> 31 return self.get_data(url, params)
32
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pyzillow\pyzillow.py in get_data(self, url, params)
81 if response.findall('message/code')[0].text is not '0':
---> 82 raise ZillowError(int(response.findall('message/code')[0].text))
83 else:
<class 'str'>: (<class 'TypeError'>, TypeError('__str__ returned non-string (type dict)',))
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_code(self, code_obj, result)
2877 if result is not None:
2878 result.error_in_exec = sys.exc_info()[1]
-> 2879 self.showtraceback(running_compiled_code=True)
2880 else:
2881 outflag = False
~\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in showtraceback(self, exc_tuple, filename, tb_offset, exception_only, running_compiled_code)
1809 value, tb, tb_offset=tb_offset)
1810
-> 1811 self._showtraceback(etype, value, stb)
1812 if self.call_pdb:
1813 # drop into debugger
~\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\zmqshell.py in _showtraceback(self, etype, evalue, stb)
541 u'traceback' : stb,
542 u'ename' : unicode_type(etype.__name__),
--> 543 u'evalue' : py3compat.safe_unicode(evalue),
544 }
545
~\AppData\Local\Continuum\anaconda3\lib\site-packages\ipython_genutils\py3compat.py in safe_unicode(e)
63 """
64 try:
---> 65 return unicode_type(e)
66 except UnicodeError:
67 pass
TypeError: __str__ returned non-string (type dict)

You should try and understand exactly why your function will fail. Then use a try / except clause to ignore the specific problem you wish to avoid. For example, to ignore TypeError:
def get_zillow_id(key, address, zipcode):
try:
zillow_data = ZillowWrapper(key)
deep_search_response = zillow_data.get_deep_search_results(address, zipcode)
result = GetDeepSearchResults(deep_search_response)
return result.zillow_id
except TypeError, ZillowError:
return None
df['Zillow ID'] = df.apply(lambda row: get_zillow_id(key, row['Address'], row['Zipcode']),
axis=1)
If ZillowError is an actual error, you may need to import it from that library.

Received "ValueError: If using all scalar values, you must pass an index" in Python

I have run the following code on python in order to retrieve various crypto currency closing prices from their inception. I have run it successfully using the following tickers:
tickers = ['USDT_BTC','USDT_BCH','USDT_ETC','USDT_XMR','USDT_ETH','USDT_DASH',
'USDT_XRP','USDT_LTC','USDT_NXT','USDT_STR','USDT_REP','USDT_ZEC']
I now have changed it as follows (full code included) and get a ValueError.
[LN1]
def CryptoDataCSV(symbol, frequency):
#Params: String symbol, int frequency = 300,900,1800,7200,14400,86400
#Returns: df from first available date
url ='https://poloniex.com/public?command=returnChartData&currencyPair='+symbol+'&end=9999999999&period='+str(frequency)+'&start=0'
df = pd.read_json(url)
df.set_index('date',inplace=True)
df.to_csv(symbol + '.csv')
print('Processed: ' + symbol)
[LN2]
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
[LN3]
tickers = 'ETH_BTC','STR_BTC','XMR_BTC','XRP_BTC','LTC_BTC','DASH_BTC',
'ETC_BTC','POT_BTC','OMG_BTC','FCT_BTC','ZEC_BTC','BTS_BTC','VTC_BTC',
'XEM_BTC','MAID_BTC','DGB_BTC','STRAT_BTC','LSK_BTC','XVC_BTC','SC_BTC',
'DOGE_BTC','XBC_BTC','GNT_BTC','EMC2_BTC','CLAM_BTC','RIC_BTC','SYS_BTC',
'DCR_BTC','STEEM_BTC','ZRX_BTC','GAME_BTC','VIA_BTC','NXC_BTC','NXT_BTC'
,'VRC_BTC','NAV_BTC','PINK_BTC','STORJ_BTC','ARDR_BTC','BCN_BTC','CVC_BTC',
'EXP_BTC','LBC_BTC','GNO_BTC','GAS_BTC','OMNI_BTC','XCP_BTC','NEOS_BTC',
'BURST_BTC','AMP_BTC','FLDC_BTC','FLO_BTC','SBD_BTC','BLK_BTC','BTCD_BTC',
'NOTE_BTC','GRC_BTC','PPC_BTC','BTM_BTC','XPM_BTC','NMC_BTC','PASC_BTC',
'NAUT_BTC','BELA_BTC','SJCX_BTC','HUC_BTC','RADS_BTC']
[LN4]
for ticker in tickers:
CryptoDataCSV(ticker, 86400)
I now get the following error:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) in ()
1 for ticker in tickers:
----> 2 CryptoDataCSV(ticker, 86400)
in CryptoDataCSV(symbol, frequency)
7 url ='https://poloniex.com/public?command=returnChartData&currencyPair='+symbol+'&end=9999999999&period='+str(frequency)+'&start=0'
8
----> 9 df = pd.read_json(url)
10
11 df.set_index('date',inplace=True)
~\Anaconda3\lib\site-packages\pandas\io\json\json.py in
read_json(path_or_buf, orient, typ, dtype, convert_axes,
convert_dates, keep_default_dates, numpy, precise_float, date_unit,
encoding, lines)
352 obj = FrameParser(json, orient, dtype, convert_axes, convert_dates,
353 keep_default_dates, numpy, precise_float,
--> 354 date_unit).parse()
355
356 if typ == 'series' or obj is None:
~\Anaconda3\lib\site-packages\pandas\io\json\json.py in parse(self)
420
421 else:
--> 422 self._parse_no_numpy()
423
424 if self.obj is None:
~\Anaconda3\lib\site-packages\pandas\io\json\json.py in
_parse_no_numpy(self)
637 if orient == "columns":
638 self.obj = DataFrame(
--> 639 loads(json, precise_float=self.precise_float), dtype=None)
640 elif orient == "split":
641 decoded = dict((str(k), v)
~\Anaconda3\lib\site-packages\pandas\core\frame.py in init(self,
data, index, columns, dtype, copy)
273 dtype=dtype, copy=copy)
274 elif isinstance(data, dict):
--> 275 mgr = self._init_dict(data, index, columns, dtype=dtype)
276 elif isinstance(data, ma.MaskedArray):
277 import numpy.ma.mrecords as mrecords
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _init_dict(self,
data, index, columns, dtype)
409 arrays = [data[k] for k in keys]
410
--> 411 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
412
413 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
~\Anaconda3\lib\site-packages\pandas\core\frame.py in
_arrays_to_mgr(arrays, arr_names, index, columns, dtype) 5494 # figure out the index, if necessary 5495 if index is None:
-> 5496 index = extract_index(arrays) 5497 else: 5498 index = _ensure_index(index)
~\Anaconda3\lib\site-packages\pandas\core\frame.py in
extract_index(data) 5533 5534 if not indexes and not
raw_lengths:
-> 5535 raise ValueError('If using all scalar values, you must pass' 5536 ' an index') 5537
ValueError: If using all scalar values, you must pass an index

I just tested your data, and it appears that some of your currency pairs do not work at all, returning a json of the form:
{"error":"Invalid currency pair."}
When this is returned, pd.read_json throws an error, because it can't convert this to a dataframe.
The simplest workaround is to use a try-except brace and handle any non-working tickers.
broken_tickers = []
for t in tickers:
url ='https://poloniex.com/public?command=returnChartData&currencyPair={}&end=9999999999&period={}&start=0'.format(t, 86400)
try:
df = pd.read_json(url)
except ValueError:
broken_tickers.append(t)
continue
df.set_index('date')
df.to_csv('{}.csv'.format(t))
I've gotten rid of the function, I didn't really feel it necessary here but you can add it back in.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Give row number as index when reading csv - python

Related

How to correctly read csv file generated by groupby results?

Python KeyError when trying to code textual to numerical

ValueError: 2 columns passed, passed data had 1170 columns

Specify lambda function to continue even if there is an error

Received "ValueError: If using all scalar values, you must pass an index" in Python

Categories

Resources