Python Pandas NameError: name 'data' is not defined - python

I'm new to coding. When I attempt to run this it says:
NameError: name 'data' is not defined.
import numpy as np
import pandas as pd
from scipy import stats
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator
import datetime
import json
from bs4 import BeautifulSoup
import requests
import time
def fetchCryptoClose(fsym, tsym):
# function fetches the close-price time-series from
# it may ignore USDT coin (due to near-zero pricing)
# daily sampled
cols = ['date', 'timestamp', fsym]
lst = ['time', 'open', 'high', 'low', 'close']
timestamp_today =
curr_timestamp = timestamp_today
for j in range(2):
df = pd.DataFrame(columns=cols)
url = "" + fsym + \
"&tsym=" + tsym + "&toTs=" + str(int(curr_timestamp)) + "&limit=3"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
dic = json.loads(soup.prettify())
for i in range(1, 4):
tmp = []
for e in enumerate(lst):
x = e[0]
y = dic['Data'][i][e[1]]
if(x == 0):
if(np.sum(tmp[-4::]) > 0): # remove for USDT
tmp = np.array(tmp)
tmp = tmp[[0,1,4]] # filter solely for close prices
df.loc[len(df)] = np.array(tmp)
# ensure a correct date format
df.index = pd.to_datetime(, format="%Y-%m-%d")
df.drop('date', axis=1, inplace=True)
curr_timestamp = int(df.ix[0][0])
if(j == 0):
df0 = df.copy()
data = pd.concat([df, df0], axis=0)
data.drop("timestamp", axis=1, inplace=True)
return data # DataFrame
# N-Cryptocurrency Portfolio (tickers)
fsym = ['BTC', 'ETH', 'XRP', 'LTC', 'DASH', 'XMR', 'ETC', 'MAID', 'XEM', 'REP']
# vs.
tsym = 'USD'
for e in enumerate(fsym):
print(e[0], e[1])
if(e[0] == 0):
data = fetchCryptoClose(e[1], tsym)
data = data.join(fetchCryptoClose(e[1], tsym))
# ensure values to be floats
# save portfolio to a file (HDF5 file format)
store = pd.HDFStore('portfolio2.h5')
store['data'] = data
# read in your portfolio from a file
df = pd.read_hdf('portfolio2.h5', 'data')

Don't use try-except-pass because will silence all your exceptions and you might never actually create `data.
Replace this code:
for e in enumerate(fsym):
print(e[0], e[1])
if(e[0] == 0):
data = fetchCryptoClose(e[1], tsym)
data = data.join(fetchCryptoClose(e[1], tsym))
with this:
for e in enumerate(fsym):
print(e[0], e[1])
if(e[0] == 0):
data = fetchCryptoClose(e[1], tsym)
data = data.join(fetchCryptoClose(e[1], tsym))
and see where your real exceptions are.


Best approach to iterate and append a custom function a on a new dataframe

I have the following custom function that generates a row with EMA data for a specific asset based on the current time.
Here's the complete code for the function:
def find_ema(futures_symbol):
futures_symbol = futures_symbol
def fetch_ohlc(symbol,timeframe, timesymbol):
symbol = symbol
timeframe = timeframe
timesymbol = timesymbol
#fetch data-binance api
candlestick_url = ''+symbol+'&contractType=PERPETUAL&interval='+str(timeframe)+timesymbol+'&limit=1500'
candlestick_chart = requests.get(candlestick_url).json()
candlestick_df = pd.DataFrame(candlestick_chart)
candlestick_df = candlestick_df.iloc[:,1:7]
candlestick_df.columns = ['open', 'high', 'low','close','volume', 'date']
candlestick_df['date'] = pd.to_datetime(candlestick_df['date'], unit='ms').round('1s')
candlestick_df.insert(0, 'date', candlestick_df.pop('date') )
# reset to midnight = pd.to_datetime(
min_date =
NextDay_Date = (min_date + datetime.timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
candlestick_df = candlestick_df[ >= NextDay_Date].copy()
candlestick_df = candlestick_df.set_index('date')
candlestick_df['symbol'] = symbol
ohlc_data = candlestick_df
cols= ['open', 'high', 'low','close','volume']
ohlc_data[cols] = ohlc_data[cols].apply(pd.to_numeric, errors = 'coerce')
ohlc_data[cols] = ohlc_data[cols].round(decimals=2)
return ohlc_data
#separate df for limited candle stick data
ohlc_smaller = fetch_ohlc(futures_symbol,5,'m')
ohlc_larger = fetch_ohlc(futures_symbol,1,'h')
ema_df = ohlc_smaller
#calculating ema with 200 row data
ema_df['15m'] = ohlc_smaller.resample('15T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['30m'] = ohlc_smaller.resample('30T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['1h'] = ohlc_larger.resample('60T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['2h'] = ohlc_larger.resample('120T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['4h'] = ohlc_larger.resample('240T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
#forward fill larger tf data to smaller tf
ema_df = ema_df.fillna(method='ffill').tail(1)
ema_df.insert(0, 'symbol', ema_df.pop('symbol'))
ema_df = ema_df.drop(['high','low','close','volume'], axis=1)
return ema_df
When I apply this function to a single symbol it returns the dataframe perfectly. for example:
working example on single symbol
However, I now have a list of symbols on which I want to apply this function and create a new dataframe.
Here's how I am generating my list of symbols:
symbols_url = ''
symbols_data = requests.get(symbols_url).json()
symbols_df = pd.DataFrame(symbols_data)
symbols_df = symbols_df[symbols_df['symbol'].str.contains('USDT')]
futures_tickers_binance = list(symbols_df['symbol'])
#some ends with numbers(eg:Quarterly Contracts), hence filter:
futures_tickers_binance = list(filter(lambda x: x.endswith(('USDT')), futures_tickers_binance))
Here's what I thought would work:
for symbol in futures_tickers_binance:
for j in range(len(futures_tickers_binance)):
df = df.append(find_ema(futures_tickers_binance[j]))
df = df.drop_duplicates()
However, this returns a valueError:
ValueError: If using all scalar values, you must pass an index
Is there a way to apply this function and generate a new dataframe with the values for the complete list in a faster way?
Thank you in advance for your patience to read this!
The final result would look something like this, however my loop is not working the way it is supposed to be working:
Expected (almost) perfect result
Here's my complete code if needed:
import pandas as pd
import numpy as np
import as px
import plotly.graph_objects as go
import datetime
import requests
symbols_url = ''
symbols_data = requests.get(symbols_url).json()
symbols_df = pd.DataFrame(symbols_data)
symbols_df = symbols_df[symbols_df['symbol'].str.contains('USDT')]
futures_tickers_binance = list(symbols_df['symbol'])
#some ends with numbers(eg:Quarterly Contracts), hence filter:
futures_tickers_binance = list(filter(lambda x: x.endswith(('USDT')), futures_tickers_binance))
def find_ema(futures_symbol):
futures_symbol = futures_symbol
def fetch_ohlc(symbol,timeframe, timesymbol):
symbol = symbol
timeframe = timeframe
timesymbol = timesymbol
#fetch data-binance api
candlestick_url = ''+symbol+'&contractType=PERPETUAL&interval='+str(timeframe)+timesymbol+'&limit=1500'
candlestick_chart = requests.get(candlestick_url).json()
candlestick_df = pd.DataFrame(candlestick_chart)
candlestick_df = candlestick_df.iloc[:,1:7]
candlestick_df.columns = ['open', 'high', 'low','close','volume', 'date']
candlestick_df['date'] = pd.to_datetime(candlestick_df['date'], unit='ms').round('1s')
candlestick_df.insert(0, 'date', candlestick_df.pop('date') )
# reset to midnight = pd.to_datetime(
min_date =
NextDay_Date = (min_date + datetime.timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
candlestick_df = candlestick_df[ >= NextDay_Date].copy()
candlestick_df = candlestick_df.set_index('date')
candlestick_df['symbol'] = symbol
ohlc_data = candlestick_df
cols= ['open', 'high', 'low','close','volume']
ohlc_data[cols] = ohlc_data[cols].apply(pd.to_numeric, errors = 'coerce')
ohlc_data[cols] = ohlc_data[cols].round(decimals=2)
return ohlc_data
#separate df for limited candle stick data
ohlc_smaller = fetch_ohlc(futures_symbol,5,'m')
ohlc_larger = fetch_ohlc(futures_symbol,1,'h')
ema_df = ohlc_smaller
#calculating ema with 200 row data
ema_df['15m'] = ohlc_smaller.resample('15T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['30m'] = ohlc_smaller.resample('30T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['1h'] = ohlc_larger.resample('60T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['2h'] = ohlc_larger.resample('120T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
ema_df['4h'] = ohlc_larger.resample('240T').apply({'close':'last'}).ewm(span = 200, min_periods = 200).mean()
#forward fill larger tf data to smaller tf
ema_df = ema_df.fillna(method='ffill').tail(1)
ema_df.insert(0, 'symbol', ema_df.pop('symbol'))
ema_df = ema_df.drop(['high','low','close','volume'], axis=1)
return ema_df
for symbol in futures_tickers_binance:
for j in range(len(futures_tickers_binance)):
df = df.append(find_ema(futures_tickers_binance[j]))
df = df.drop_duplicates()

Split json file into multiple csv files depending on date?

I am trying to split up a json file from alpha-vantages api into separate files depending on the date. I'm also trying to reformat the file to have blank values in the gaps where dates are missing. The following code is what I have come up with but it gives me the TypeError: 'list' object is not callable". I'm fairly new to python and pandas so I'm sure there is a better way to go about this.
import requests
import pandas as pd
from datetime import datetime, timedelta
from dateutil import parser
import numpy as np
from pandas import DataFrame
import json
symbol = "MSFT"
symbol_list = symbol.split(",")
def num_el(list):
count = 0
for element in list:
count += 1
return count
def csv_make(sy, dar, dat):
csv_file = open(f"{sy}_1min_{dar}.csv", "w", newline="")
i = 0
x = -1
n = num_el(symbol_list)
while i < n:
namesym = symbol_list[x]
ticker = namesym
api_key = 'APIKEYHERE'
url = f'{ticker}&outputsize=full&interval=1min&apikey={api_key}'
data = requests.get(url)
dsf = data.json()
daf = pd.DataFrame(dsf['Time Series (1min)'])
dxf: DataFrame = daf.T = 'time'
dxf['time'] = pd.to_datetime(dxf['time'])
dxf['minute'] = dxf['time'].dt.time
dxf['day'] = dxf['time']
dxf['date'] = dxf['time']
agg = dxf.groupby([dxf['day']])
length1 = dxf.groupby([dxf['day']]).size()
length = pd.DataFrame(length1) = 'day'
length_sum = length[0].sum()
v = 0
d = length_sum
b = len(length)
x2 = length_sum
while v < b:
a = length[0][v]
x2 -= length[0][v]
xd = agg.get_group(length['day'][v])
date = xd['date'][x2]
max_dt = parser.parse(str(max(xd['minute'])))
min_dt = parser.parse(str(min(xd['minute'])))
dt_range = []
while min_dt <= max_dt:
min_dt += timedelta(seconds=60)
complete_df = pd.DataFrame({'minute': dt_range})
xy = complete_df.astype('str')
yx = xd.astype('str')
dasf = xy.merge(yx, how='left', on='minute')
dasf['ev'] = np.where(dasf['1. open'].notnull(), 'False', 'True')
time = []
open = []
high = []
low = []
close = []
volume = []
empty_value = []
for ib in range(len(dasf)):
open.append(dasf['1. open'][ib])
high.append(dasf['2. high'][ib])
low.append(dasf['3. low'][ib])
close.append(dasf['4. close'][ib])
volume.append(dasf['5. volume'][ib])
time_df = pd.DataFrame(time).rename(columns={0: 'Time'})
open_df = pd.DataFrame(open).rename(columns={0: 'Open'})
high_df = pd.DataFrame(high).rename(columns={0: 'High'})
low_df = pd.DataFrame(low).rename(columns={0: 'Low'})
close_df = pd.DataFrame(close).rename(columns={0: 'Close'})
volume_df = pd.DataFrame(volume).rename(columns={0: 'Volume'})
empty_value_df = pd.DataFrame(empty_value).rename(columns={0: 'Empty Value'})
frames = [time_df, open_df, high_df, low_df, close_df, volume_df, empty_value_df]
df = pd.concat(frames, axis=1, join='inner')
df = df.set_index('Time')
ad = df.to_csv()
csv_make(namesym, date, ad)
v += 1
i += 1

Convert DF column values to column (like pivot)

I am scraping api data and totaling counts of different values into a dictionary 'building':'count' for each player (row). I would like to be able to analyze it further. An easy solution would be to pull the different unique 'buildings' (dictionary keys within the row) as dataframe columns and then do the equivalent of an index/match/match on them. The script currently gets the data, and I can extract the unique keys, but I am lost at how to make them into DF columns and then how to do the index/match/match. There may be a better approach from even before running the 'count' part of the script.
You should be able to run the script, no credentials are required to GET against the API. If you see the ranklist DF column with the building counts you will see what I am referencing.
Thank you for any guidance!
import requests
import pandas as pd
from datetime import datetime
from datetime import date
from datetime import timedelta
import operator
from time import sleep
ranklist = pd.DataFrame()
for i in range(430):
baserank_url = '' + str(i) + '/'
r = requests.get(baserank_url)
rank_json = r.json()
df = pd.DataFrame.from_dict(rank_json)
ranklist = ranklist.append(df)
print('Ranking list started succesfully!')
for row in ranklist.itertuples():
attempt = 0
while True:
if attempt == 6:
print(str(row.rank + 1) +' ' + str(attempt))
account_url = '' + str( + '/'
r = requests.get(account_url)
account_json = r.json()
playerid = account_json.get("player").get("id")
playerlevel = account_json.get("player").get("level")
datestart = datetime.strptime(account_json.get("player").get("dateJoined")[:10],'%Y-%m-%d').date()
yearsactive = round((today - datestart)/ timedelta(days=365.2425),2)
buildings = account_json.get("buildings")
certificates = account_json.get("certificates")
bnames = [d['name'] for d in buildings]
bnames = [n.replace('Park','Recreation').replace('Lake','Recreation').replace('Castle','Recreation') for n in bnames]
cnames = [d['name'] for d in certificates]
sptr = 'Yes' if 'Supporter' in cnames else 'No'
dictOfElems = dict()
for elem in bnames:
if elem in dictOfElems:
dictOfElems[elem] += 1
dictOfElems[elem] = 1
blist = {key:value for key, value in dictOfElems.items()}
blist = dict(sorted(blist.items(),key=operator.itemgetter(1),reverse=True))
levellist.append([playerid, playerlevel,sptr, datestart,yearsactive,blist])
attempt +=1
#get unique building values
bcodf= pd.DataFrame(bcolist,columns=['buildings'])
bcouni = list(set([a for b in bcodf.buildings.tolist() for a in b]))
leveldf = pd.DataFrame(levellist,columns=['id','level','sptr','datestart','yearsactive','blist'])
#clist = list(set([a for b in leveldf.cnames.tolist() for a in b]))
#bul = leveldf[blist].keys()
#buniq = list(set([a for b in leveldf.bul.tolist() for a in b]))
ranklist = ranklist.merge(leveldf, on='id', how='left')
ranklist['rank'] +=1

code showing empty dataframe

I write the below code for my project but dataframe df showing empty records.I want to know where i am lacking in the code:
import urllib
from urllib2 import *
import pandas as pd
def urlmake(req):
requests = [req]
for parms in requests:
url = 'http://localhost:8983/solr/data/select?indent=on&' + urllib.urlencode(parms)
connection = urlopen(url)
response = eval(
t = response['response']['numFound']
req2 = req['q'][13:17]
if(req2 == 'AXIS'):
for i in range(0,t):
t1 = float((response['response']['docs'][i]['message']).split(" ")[1])
t2 = response['response']['docs'][i]['customer_id']
df = df.append(pd.DataFrame(t2,t1))
ba_query = [{'q':'sender_name:*AXIS* AND message:*Avbl Lmt*','start':0,'rows':211,'wt':'json'}]
for i in range(0,len(ba_query)):
getting errror as:
UnboundLocalError: local variable 'df' referenced before assignment
import urllib
from urllib2 import *
import pandas as pd
df = pd.DataFrame(columns=['Customer_id','Spent'])
def urlmake(req):
requests = [req]
for parms in requests:
url = 'http://localhost:8983/solr/data/select?indent=on&' + urllib.urlencode(parms)
connection = urlopen(url)
response = eval(
t = response['response']['numFound']
req2 = req['q'][13:17]
if(req2 == 'AXIS'):
for i in range(0,t):
t1 = float((response['response']['docs'][i]['message']).split(" ")[1])
t2 = response['response']['docs'][i]['customer_id']
df = df.append({'Customer_id':t2, 'Spent':t1}, ignore_index=True) # HERE
See the comment in the code
Here's an MCVE of how your code should look:
import pandas as pd
import numpy as np
df = pd.DataFrame()
for iteration in range(0, 5):
dummy_data = np.random.rand(3, 3)
df = df.append(pd.DataFrame(dummy_data))
df.columns = ['a', 'b', 'c']
import pandas as pd
import numpy as np
def myfunc():
df = pd.DataFrame()
for iteration in range(0, 5):
dummy_data = np.random.rand(3, 3)
df = df.append(pd.DataFrame(dummy_data))
df.columns = ['a', 'b', 'c']
return df
df2 = myfunc()

Add each new dictionary result in the order of the columns of a dataframe

I am new to Python, but hope to explain the issue.
dfrow - is a dictionary of a single regression summary
results - is an empty dataframe with same columns as in dfrow
I would like to save regression results for each observation in the outer loop at the same time making sure column order in the inner loop. I am getting a result for the first observations but cannot move further, error saying:
Traceback (most recent call last):
File "<stdin>", line 109, in <module>
TypeError: 'numpy.int64' object is not iterable
when I run this code
import pandas as pd
import numpy as np
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.stats import stattools as st
import statsmodels.api as sm
import collections
import datetime
import warnings
import scipy.stats
df_rent = import_rents()
df_return = import_ee_rets()
mostrecent = df_return.iloc[len(df_return) - 1]
mostrecentYYYY = mostrecent['Year']
mostrecentQ = mostrecent['Quarter']
mostrecentperiod = str(mostrecentYYYY) + "-Q" + str(mostrecentQ)
rentcols = df_rent.columns.values
colnames = []
#loop through the columns in df_rent until the column == the most recent period for which we have ee return data
for colname in rentcols:
if colname != mostrecentperiod:
rentcols = colnames
#subset df_rent to only include columns that also have ee return data
df_rent = df_rent[rentcols]
#change dtype of metro_code / metro columns to string for matching later
df_rent['metro_code'] = df_rent['metro_code'].apply(str)
df_return['Metro'] = df_return['Metro'].apply(str)
df = pd.read_csv('//x/Project/_data/raw_data/rent_change.csv')
metros = list(np.unique(df['metro_code']))
regress_result_names = [
regress_result_names = pd.Series(regress_result_names)
results = pd.DataFrame(columns=regress_result_names)
row = 0
for metro in metros:
for nlag in range(0, 5):
for nma in range(1, 11):
for AR in range(1, 5):
y = df_rent[df_rent['metro_code'] == str(metro)]
y = y.values.tolist()
y = y[0]
# delete first two columns of df_rent (they don't contain numeric data)
#y = rent time series data for specific metro
y = pd.Series(y)
#x1 = lagged moving average data for given params
df_return1 = df_return[df_return['Metro'] == str(metro)]
df_return1 = df_return1.reset_index(drop = True)
x1 = lagged_moving_avg(df = df_return1, metro_code = metro, nlag = nlag, nma = nma)
#y and x1 dataframe
y_label = 'y_Rent'
x_lagMA_label = 'x1_LaggedMA'
df1 = pd.DataFrame()
df1[y_label] = y
df1[x_lagMA_label] = x1
if mostrecentQ == 1:
currmonth = "01"
elif mostrecentQ == 2:
currmonth = "04"
elif mostrecentQ == 3:
currmonth = "07"
currmonth = "10"
#convert index to datetime to run the regressions
currpd = pd.to_datetime((str(mostrecentYYYY) + currmonth), format='%Y%m')
df1.index = pd.date_range(*(pd.to_datetime(['1990-01', currpd]) + pd.offsets.QuarterEnd()), freq='Q')
#drop any rows that have missing observations
df1 = df1.dropna()
#df1.to_csv('//Nisfile01/x/Project - Real Estate Database/real_estate/odil/XandY.csv', index=True)
reg = ARIMA(endog = df1[y_label], order = (AR, 0,0)).fit(trend = 'nc', disp = 0, tol=1e-20)
resid_reg = reg.resid
reg2 = sm.OLS(resid_reg, df1[x_lagMA_label]).fit()
resid_reg2 = reg2.resid
dfrow = {
'metro': metro,
'num_lag': nlag,
'num_ma': nma,
'num_AR': AR,
'beta_x1_retmov': reg2.params[0],
'x1_se': reg2.bse[0],
'x1_tstat': reg2.tvalues[0],
'x1_pval': reg2.pvalues[0],
'r-squared': reg2.rsquared,
'fstat_pvalue': reg2.f_pvalue,
'durbin-watson': st.durbin_watson(reg2.resid),
'resid_var': resid_reg2.var(),
#create df for output called results
for key in dfrow.keys():
results.loc[row, key] = list(dfrow[key])
row = row + 1
Any help is very much appreciated.
P.S. Sorry for the messy code
The offending line is results.loc[row, key] = list(dfrow[key]).
You are trying to convert a single value, in this case a numpy.int64 object, to a list. I assume that what you're trying to do, and correct me if I am wrong, is create a singleton list with the int64 inside it. If that's what you want to do, you should use:
results.loc[row, key] = [dfrow[key]]

