code showing empty dataframe - python

I write the below code for my project but dataframe df showing empty records.I want to know where i am lacking in the code:
import urllib
from urllib2 import *
import pandas as pd
def urlmake(req):
requests = [req]
for parms in requests:
url = 'http://localhost:8983/solr/data/select?indent=on&' + urllib.urlencode(parms)
connection = urlopen(url)
response = eval(connection.read())
t = response['response']['numFound']
req2 = req['q'][13:17]
print(req2)
if(req2 == 'AXIS'):
print('true')
for i in range(0,t):
t1 = float((response['response']['docs'][i]['message']).split(" ")[1])
#print(t1)
t2 = response['response']['docs'][i]['customer_id']
#print(t2)
df = df.append(pd.DataFrame(t2,t1))
ba_query = [{'q':'sender_name:*AXIS* AND message:*Avbl Lmt*','start':0,'rows':211,'wt':'json'}]
for i in range(0,len(ba_query)):
urlmake(ba_query[i])
getting errror as:
UnboundLocalError: local variable 'df' referenced before assignment

import urllib
from urllib2 import *
import pandas as pd
df = pd.DataFrame(columns=['Customer_id','Spent'])
def urlmake(req):
requests = [req]
for parms in requests:
url = 'http://localhost:8983/solr/data/select?indent=on&' + urllib.urlencode(parms)
connection = urlopen(url)
response = eval(connection.read())
t = response['response']['numFound']
req2 = req['q'][13:17]
print(req2)
if(req2 == 'AXIS'):
print('true')
for i in range(0,t):
t1 = float((response['response']['docs'][i]['message']).split(" ")[1])
#print(t1)
t2 = response['response']['docs'][i]['customer_id']
#print(t2)
df = df.append({'Customer_id':t2, 'Spent':t1}, ignore_index=True) # HERE
See the comment in the code
.
Here's an MCVE of how your code should look:
import pandas as pd
import numpy as np
df = pd.DataFrame()
for iteration in range(0, 5):
dummy_data = np.random.rand(3, 3)
df = df.append(pd.DataFrame(dummy_data))
df.columns = ['a', 'b', 'c']
New MCVE:
import pandas as pd
import numpy as np
def myfunc():
df = pd.DataFrame()
for iteration in range(0, 5):
dummy_data = np.random.rand(3, 3)
df = df.append(pd.DataFrame(dummy_data))
df.columns = ['a', 'b', 'c']
return df
df2 = myfunc()
print(df2)

Related

Can I change datatype list to bs4.element.ResultSet?

I wanted to get data from 2022-01 to 2022-04,
so I used this code.
from urllib.request import urlopen
import pandas as pd
from bs4 import BeautifulSoup
df = []
for i in month:
gu_code = 11680
numOfRows=1000
month = [202201, 202202, 220203, 202204]
url = "http://openapi.molit.go.kr:8081/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcAptTrade?LAWD_CD="+str(gu_code)+"&DEAL_YMD="+ str(i) +"&numOfRows="+str(numOfRows)+"&serviceKey="+str(ServiceKey)
result = urlopen(url)
house = BeautifulSoup(result, 'lxml-xml')
te = house.find_all('item')
df.append(te)
after this, I used this code but the problem is df is the list. and len(df) is 4.
data = []
for i in range(k):
month = df[i].월.string.strip()
price = df[i].거래금액.string.strip()
built_yr = df[i].건축년도.string.strip()
dong_name = df[i].법정동.string.strip()
apt_name = df[i].아파트.string.strip()
size = df[i].전용면적.string.strip()
gu_code = df[i].지역코드.string.strip()
total = [dong_name, apt_name, price, month, built_yr, size, gu_code ]
data.append(total)
how can I solve this problem?
Do:
for i in range(len(df)):
Or a better way to do that is change your df list to a different variable, and iterate through that:
Note, your loop makes no sense. You are iterating through a list that is defined inside the loop. So I changed it to make sens, but have no idea what you are doing:
from urllib.request import urlopen
import pandas as pd
from bs4 import BeautifulSoup
df_list = []
gu_code = 11680
numOfRows=1000
month = [202201, 202202, 220203, 202204]
for i in month:
url = "http://openapi.molit.go.kr:8081/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcAptTrade?LAWD_CD="+str(gu_code)+"&DEAL_YMD="+ str(i) +"&numOfRows="+str(numOfRows)+"&serviceKey="+str(ServiceKey)
result = urlopen(url)
house = BeautifulSoup(result, 'lxml-xml')
te = house.find_all('item')
df_list.append(te)
Then:
data = []
for df in df_list:
month = df[i].월.string.strip()
price = df[i].거래금액.string.strip()
built_yr = df[i].건축년도.string.strip()
dong_name = df[i].법정동.string.strip()
apt_name = df[i].아파트.string.strip()
size = df[i].전용면적.string.strip()
gu_code = df[i].지역코드.string.strip()
total = [dong_name, apt_name, price, month, built_yr, size, gu_code ]
data.append(total)

Python dictionary comprehension Scoping

I am running the code below and getting a NameError.
When I run it one line at a time, it works, but when I wrap the lines inside a function, I get NameError: name 'primes_cols' is not defined. Why is the code below producing a NameError when the variables are defined?
import pandas as pd
primes = pd.DataFrame(columns = ['A','B','C','D'], data=[[3,5,7,11]])
tens = pd.DataFrame(columns = ['E','F','G','H'], data=[[10,20,30,40]])
evens = pd.DataFrame(columns = ['I','J','K','L'], data=[[4,8,12,16]])
def process():
keys = ['primes','tens','evens']
# My Data
data = {key:eval(key) for key in keys}
# Trim Dataframes
primes_cols = ['A','B']
tens_cols = ['E','F']
evens_cols = ['I','J']
data = {key:value[eval(key + '_cols')] for (key, value) in data.items()}
return data
df = process()
The best approach (#juanpa.arrivillaga), as seen in the comments, Is to avoid eval
import pandas as pd
primes = pd.DataFrame(columns = ['A','B','C','D'], data=[[3,5,7,11]])
tens = pd.DataFrame(columns = ['E','F','G','H'], data=[[10,20,30,40]])
evens = pd.DataFrame(columns = ['I','J','K','L'], data=[[4,8,12,16]])
def process():
# Trim Dataframes
primes_cols = ['A','B']
tens_cols = ['E','F']
evens_cols = ['I','J']
keys = ['primes','tens','evens']
values = [primes, tens, evens]
cols = [primes_cols, tens_cols, evens_cols]
return {key:df[c] for key,df,c in zip(keys, values, cols)}

Split json file into multiple csv files depending on date?

I am trying to split up a json file from alpha-vantages api into separate files depending on the date. I'm also trying to reformat the file to have blank values in the gaps where dates are missing. The following code is what I have come up with but it gives me the TypeError: 'list' object is not callable". I'm fairly new to python and pandas so I'm sure there is a better way to go about this.
import requests
import pandas as pd
from datetime import datetime, timedelta
from dateutil import parser
import numpy as np
from pandas import DataFrame
import json
symbol = "MSFT"
symbol_list = symbol.split(",")
def num_el(list):
count = 0
for element in list:
count += 1
return count
def csv_make(sy, dar, dat):
csv_file = open(f"{sy}_1min_{dar}.csv", "w", newline="")
csv_file.write(dat)
csv_file.close()
i = 0
x = -1
n = num_el(symbol_list)
while i < n:
namesym = symbol_list[x]
ticker = namesym
api_key = 'APIKEYHERE'
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={ticker}&outputsize=full&interval=1min&apikey={api_key}'
data = requests.get(url)
dsf = data.json()
daf = pd.DataFrame(dsf['Time Series (1min)'])
dxf: DataFrame = daf.T
dxf.index.name = 'time'
dxf.reset_index(inplace=True)
dxf['time'] = pd.to_datetime(dxf['time'])
dxf['minute'] = dxf['time'].dt.time
dxf['day'] = dxf['time'].dt.day
dxf['date'] = dxf['time'].dt.date
agg = dxf.groupby([dxf['day']])
length1 = dxf.groupby([dxf['day']]).size()
length = pd.DataFrame(length1)
length.index.name = 'day'
length.reset_index(inplace=True)
length_sum = length[0].sum()
v = 0
d = length_sum
b = len(length)
x2 = length_sum
while v < b:
a = length[0][v]
x2 -= length[0][v]
xd = agg.get_group(length['day'][v])
date = xd['date'][x2]
max_dt = parser.parse(str(max(xd['minute'])))
min_dt = parser.parse(str(min(xd['minute'])))
dt_range = []
while min_dt <= max_dt:
dt_range.append(min_dt.strftime("%H:%M:%S"))
min_dt += timedelta(seconds=60)
complete_df = pd.DataFrame({'minute': dt_range})
xy = complete_df.astype('str')
yx = xd.astype('str')
dasf = xy.merge(yx, how='left', on='minute')
dasf['ev'] = np.where(dasf['1. open'].notnull(), 'False', 'True')
time = []
open = []
high = []
low = []
close = []
volume = []
empty_value = []
for ib in range(len(dasf)):
time.append(dasf['minute'][ib])
open.append(dasf['1. open'][ib])
high.append(dasf['2. high'][ib])
low.append(dasf['3. low'][ib])
close.append(dasf['4. close'][ib])
volume.append(dasf['5. volume'][ib])
empty_value.append(dasf['ev'][ib])
time_df = pd.DataFrame(time).rename(columns={0: 'Time'})
open_df = pd.DataFrame(open).rename(columns={0: 'Open'})
high_df = pd.DataFrame(high).rename(columns={0: 'High'})
low_df = pd.DataFrame(low).rename(columns={0: 'Low'})
close_df = pd.DataFrame(close).rename(columns={0: 'Close'})
volume_df = pd.DataFrame(volume).rename(columns={0: 'Volume'})
empty_value_df = pd.DataFrame(empty_value).rename(columns={0: 'Empty Value'})
frames = [time_df, open_df, high_df, low_df, close_df, volume_df, empty_value_df]
df = pd.concat(frames, axis=1, join='inner')
df = df.set_index('Time')
ad = df.to_csv()
csv_make(namesym, date, ad)
v += 1
i += 1

Convert DF column values to column (like pivot)

I am scraping api data and totaling counts of different values into a dictionary 'building':'count' for each player (row). I would like to be able to analyze it further. An easy solution would be to pull the different unique 'buildings' (dictionary keys within the row) as dataframe columns and then do the equivalent of an index/match/match on them. The script currently gets the data, and I can extract the unique keys, but I am lost at how to make them into DF columns and then how to do the index/match/match. There may be a better approach from even before running the 'count' part of the script.
You should be able to run the script, no credentials are required to GET against the API. If you see the ranklist DF column with the building counts you will see what I am referencing.
Thank you for any guidance!
import requests
import pandas as pd
from datetime import datetime
from datetime import date
from datetime import timedelta
import operator
from time import sleep
ranklist = pd.DataFrame()
for i in range(430):
baserank_url = 'https://www.simcompanies.com/api/v3/encyclopedia/ranking/' + str(i) + '/'
r = requests.get(baserank_url)
rank_json = r.json()
df = pd.DataFrame.from_dict(rank_json)
df=df.filter(['company','id','rank','value'])
ranklist = ranklist.append(df)
ranklist.to_csv(r'rank_dataframe.csv',index=False)
print('Ranking list started succesfully!')
levellist=[]
bcolist=[]
today= date.today()
for row in ranklist.itertuples():
attempt = 0
while True:
if attempt == 6:
break
try:
print(str(row.rank + 1) +' ' + str(attempt))
account_url = 'https://www.simcompanies.com/api/v2/players/' + str(row.id) + '/'
r = requests.get(account_url)
account_json = r.json()
playerid = account_json.get("player").get("id")
playerlevel = account_json.get("player").get("level")
datestart = datetime.strptime(account_json.get("player").get("dateJoined")[:10],'%Y-%m-%d').date()
yearsactive = round((today - datestart)/ timedelta(days=365.2425),2)
buildings = account_json.get("buildings")
certificates = account_json.get("certificates")
bnames = [d['name'] for d in buildings]
bnames = [n.replace('Park','Recreation').replace('Lake','Recreation').replace('Castle','Recreation') for n in bnames]
cnames = [d['name'] for d in certificates]
sptr = 'Yes' if 'Supporter' in cnames else 'No'
dictOfElems = dict()
for elem in bnames:
if elem in dictOfElems:
dictOfElems[elem] += 1
else:
dictOfElems[elem] = 1
blist = {key:value for key, value in dictOfElems.items()}
blist = dict(sorted(blist.items(),key=operator.itemgetter(1),reverse=True))
bcolist.append([blist.keys()])
levellist.append([playerid, playerlevel,sptr, datestart,yearsactive,blist])
except:
sleep(20)
attempt +=1
continue
break
#get unique building values
bcodf= pd.DataFrame(bcolist,columns=['buildings'])
bcouni = list(set([a for b in bcodf.buildings.tolist() for a in b]))
print(bcouni)
leveldf = pd.DataFrame(levellist,columns=['id','level','sptr','datestart','yearsactive','blist'])
#clist = list(set([a for b in leveldf.cnames.tolist() for a in b]))
#print(leveldf[blist])
#bul = leveldf[blist].keys()
#buniq = list(set([a for b in leveldf.bul.tolist() for a in b]))
#print(bul)
ranklist = ranklist.merge(leveldf, on='id', how='left')
ranklist['rank'] +=1
ranklist.to_csv(r'rank_dataframe.csv',index=False)

Python Pandas NameError: name 'data' is not defined

I'm new to coding. When I attempt to run this it says:
NameError: name 'data' is not defined.
import numpy as np
import pandas as pd
from scipy import stats
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator
import datetime
import json
from bs4 import BeautifulSoup
import requests
import time
def fetchCryptoClose(fsym, tsym):
# function fetches the close-price time-series from cryptocompare.com
# it may ignore USDT coin (due to near-zero pricing)
# daily sampled
cols = ['date', 'timestamp', fsym]
lst = ['time', 'open', 'high', 'low', 'close']
timestamp_today = datetime.today().timestamp()
curr_timestamp = timestamp_today
for j in range(2):
df = pd.DataFrame(columns=cols)
url = "https://min-api.cryptocompare.com/data/histoday?fsym=" + fsym + \
"&tsym=" + tsym + "&toTs=" + str(int(curr_timestamp)) + "&limit=3"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
dic = json.loads(soup.prettify())
for i in range(1, 4):
tmp = []
for e in enumerate(lst):
x = e[0]
y = dic['Data'][i][e[1]]
if(x == 0):
tmp.append(str(timestamp2date(y)))
tmp.append(y)
if(np.sum(tmp[-4::]) > 0): # remove for USDT
tmp = np.array(tmp)
tmp = tmp[[0,1,4]] # filter solely for close prices
df.loc[len(df)] = np.array(tmp)
# ensure a correct date format
df.index = pd.to_datetime(df.date, format="%Y-%m-%d")
df.drop('date', axis=1, inplace=True)
curr_timestamp = int(df.ix[0][0])
if(j == 0):
df0 = df.copy()
else:
data = pd.concat([df, df0], axis=0)
data.drop("timestamp", axis=1, inplace=True)
return data # DataFrame
# N-Cryptocurrency Portfolio (tickers)
fsym = ['BTC', 'ETH', 'XRP', 'LTC', 'DASH', 'XMR', 'ETC', 'MAID', 'XEM', 'REP']
# vs.
tsym = 'USD'
for e in enumerate(fsym):
print(e[0], e[1])
if(e[0] == 0):
try:
data = fetchCryptoClose(e[1], tsym)
except:
pass
else:
try:
data = data.join(fetchCryptoClose(e[1], tsym))
except:
pass
# ensure values to be floats
# save portfolio to a file (HDF5 file format)
store = pd.HDFStore('portfolio2.h5')
store['data'] = data
store.close()
# read in your portfolio from a file
df = pd.read_hdf('portfolio2.h5', 'data')
print(df)
Don't use try-except-pass because will silence all your exceptions and you might never actually create `data.
Replace this code:
for e in enumerate(fsym):
print(e[0], e[1])
if(e[0] == 0):
try:
data = fetchCryptoClose(e[1], tsym)
except:
pass
else:
try:
data = data.join(fetchCryptoClose(e[1], tsym))
except:
pass
with this:
for e in enumerate(fsym):
print(e[0], e[1])
if(e[0] == 0):
data = fetchCryptoClose(e[1], tsym)
else:
data = data.join(fetchCryptoClose(e[1], tsym))
and see where your real exceptions are.

Categories

Resources