Task: to write a function for changing the timeframe on encodings.
Body of the program:
import pandas as pd
import numpy as np
from future_functions import *
# Load CSV Data
data = pd.read_csv('Data/EURUSDHours.csv')
data.columns = ['Date','open','high','low','close','AskVol']
data = data.set_index(pd.to_datetime(data.Date))
data = data[['open','high','low','close','AskVol']]
prices = data.drop_duplicates(keep=False)
hkaprices = prices.copy()
hkaprices['Symbol'] = 'SYMB'
HKA = OHLCresample(hkaprices,'15H')
, where future_functions is an attached file with a function.
The function itself:
def OHLCresample(DataFrame,TimeFrame,column='ask'):
grouped = DataFrame.groupby('Symbol')
if np.any(DataFrame.columns == 'Ask'):
if column == 'ask':
ask = grouped['Ask'].resample(TimeFrame).ohlc()
askVol = grouped['AskVol'].resample(TimeFrame).count()
resampled = pd.DataFrame(ask)
resampled['AskVol'] = askVol
elif column == 'bid':
bid = grouped['Bid'].resample(TimeFrame).ohlc()
bidVol = grouped['BidVol'].resample(TimeFrame).count()
resampled = pd.DataFrame(bid)
resampled['BidVol'] = bidVol
else:
raise ValueError('Column must be a string. Either ask or bid')
elif np.any(DataFrame.columns == 'close'):
open = grouped['open'].resample(TimeFrame).ohlc()
close = grouped['close'].resample(TimeFrame).ohlc()
high = grouped['high'].resample(TimeFrame).ohlc()
low = grouped['low'].resample(TimeFrame).ohlc()
askVol = grouped['AskVol'].resample(TimeFrame).ohlc()
resampled = pd.DataFrame(open)
resampled['high'] = high
resampled['low'] = low
resampled['close'] = close
resampled['AskVol'] = askVol
resampled = resampled.dropna()
return resampled
I receive an error:
KeyError: 'AskVol'
ValueError: Wrong number of items passed 4, placement implies 1
The data set can be taken following the link:
https://nofile.io/f/Q9AKjGbSUHd/EURUSDHours.csv
I understand that the problem is in dimensionality, but I do not know how to resolve it.
Related
I was making my automatic stock strategy yield calculation program with Python. Here's my code:
import FinanceDataReader as fdr
import numpy as np
# ...(more modules for python)
pd.options.display.float_format = '{:.5f}'.format
file_list = os.listdir('/home/sejahui/projects/stock_data_excel')
for i in range(20):
os.chdir('/home/sejahui/projects/stock_data_excel')
odd = file_list[i]
data = pd.read_excel('/home/sejahui/projects/stock_data_excel/'+str(odd))
def calMACD(data, short=5, long=25, signal=9):
data.sort_index()
data['MVA_25']=data['Close'].ewm(span=long, adjust=False).mean()
data['MVA_5']=data['Close'].ewm(span=short, adjust=False).mean()
data['MACD']=data['Close'].ewm(span=short, adjust=False).mean() - data['Close'].ewm(span=long, adjust=False).mean()
data['Signal']=data['MACD'].ewm(span=signal, adjust=False).mean( )
#data['Buy_sign']=(data['MACD']-data['Signal']) >=600
data['Buy_sign']=np.where(data['MACD']-data['Signal'] >=451, 'Buy' , 'Sell' )
#data['Target_1']=(data['Close']-data['Close'].shift(1))/data['Close'].shift(1)*100
#data['Target_1']=np.where(data['Buy_sign']=='Buy', (data['Change'])+1,1)
#data['Target_2']=np.where(data['Buy_sign']=='Sell', (data['Change'])+1,1)
#data['Real_world']= 1000000*data['Target_1']
#data['Real_world_2']= 1000000*data['Target_2']
#data['Condition'] = np.where(data['Real_world']<1000000, data['Real_world']-data['Real_world'].shift(-2),1)
##data['Condition_2'] = np.where(data['Real_world']<1000000, data['Target_1'].shift(-2),1)
#data['Moneyflow'] =
#plt.plot(data['Date'], data['Real_world'])
#data[data.Buy_sign !='Sell']
'''
data['Target_1']=np.where(data['Buy_sign']=='Buy', data['Change'],1)
data['Target_2']=np.where(data['Buy_sign']=='Sell', data ['Change'],1)
data['Yield']=np.where(data['Buy_sign']=='Sell', data['Target_1']/data['Target_2'],1 )
'''
'''
data['Result']=data['Target_1'].cumprod()
data['Result_2']=data['Target_2'].cumprod()
data['??????'] = data['Result'] - data['Result_2']
'''
return data
Adjusted = calMACD(data)
Adjusted.drop(['Change'], axis=1, inplace = True)
Filtered = Adjusted[Adjusted.Buy_sign!='Sell'].copy()
#print(Filtered)
#Filtered = (Adjusted.Buy_sign =='Buy') #(Adjusted.Condition = 1.0)
#Master = Adjusted.loc[Adjusted,['Date','Buy_sign','Target_1','Real_world',]]
#print(Adjusted)
def backtester(Filtered):
Filtered['Change'] = ((Filtered['Close'] - Filtered['Close'].shift(1)) / Filtered['Close'].shift(1))+1
#data['Target_1']=np.where(data['Buy_sign']=='Buy', (data['Change'])+1,1)
Filtered['Real_world'] = 1000000*Filtered['Change']
#Filtered['Condition'] = np.where(Filtered['Real_world']<1000000, Filtered['Real_world'].shift(-2)-Filtered['Real_world'],1)
Filtered['Condition'] = np.where(Filtered['Real_world']<1000000, Filtered['Change'].shift(-2),1)
#Filtered['Target_1'] = np.where(Filtered['Buy_sign']=='Buy', (Filtered['Change'])+1,1)
#Filtered['Condition'] = np.where(Filtered['Real_world']<1000000, Filtered['Real_world'].shift(-2)-Filtered['Real_world'],1)
return Filtered
s = backtester(Filtered)
e = s[s.Condition!=1.00000]
x = e.dropna()
y = x['Condition']
list_1 = []
write_wb = Workbook()
write_ws = write_wb.create_sheet('MACD&Signal gap data sheet')
write_ws = write_wb.active
write_ws['A1'] = 'Name'
write_ws['B1'] = 'Profit'
try:
print(geometric_mean(y)*1000000*12)
except StatisticsError as e:
print ('Sell is empty':',odd)
else:
d = (geometric_mean(y)*1000000*12)
print(d,odd)
list_1.insert(i,d)
Print(list_1)
Here's the part where I'm troubling with:
s = backtester(Filtered)
e = s[s.Condition!=1.00000]
x = e.dropna()
y = x['Condition']
list_1 = []
try:
print(geometric_mean(y)*1000000*12)
except StatisticsError as e:
print ('Sell is empty':',odd)
else:
d = (geometric_mean(y)*1000000*12)
print(d)
list_1.insert(d)
print(list_1)
When I initiate the code where I am having problems, list only saves the last result of 'try, except, else' function. My intention was saving all the results. What change should I give to save all the results?
Here's the output of the list:
[11772769.197974786]
Your problem is that you are using insert instead of append and the main difference that insert takes a second argument for the position that you want to insert your element at and when none is provided it is 0 by default so you are consistently inserting at the same index resulting in a list with only the last element at the first position.
To fix that simply use append instead.
else:
d = (geometric_mean(y)*1000000*12)
print(d)
list_1.append(d)
You want to use append, not insert. see Python Data Structures
Change list_1.insert(d) to list_1.append(d)
The insert is defaulting to index 0 and just updating it each time.
Edit: Just noticed your answer is in the question title.
I'm making a terminal stock portfolio tracker in Python using the yahoo_fin package and exporting the data in a table using tabulate. My variables are individual stock tickers (in this case, PTON, CHWY, and FUBO). Is there a way to have to have a set of variables run though the same line of function code? As you'll see in my current code, I have to edit each variable within the function to have the table work. Ideally, the code would just read each ticker through the same line of code instead of individually editing each set.
This is my first post so apologies if the formatting for the code is wrong. Thank you.
from yahoo_fin import stock_info as si
from datetime import datetime
from tabulate import tabulate
now = datetime.now()
time = now.strftime('%I:%M:%S %p')
# tape = "PTON, CHWY, FUBO"
print(time)
tkr1 = 'PTON'
tkr2 = 'CHWY'
tkr3 = 'FUBO'
# FIGURE OUT A WAY TO HAVE EACH TICKER RUN THROUGH THE PACKAGE WITHOUT HAVING TO MAKE DIFFERENT VARIABLES
# You could have like Run #1 defined as a variable, so Run1 = [], run2 = [] etc.
#1
output1 = si.get_live_price(tkr1)
# get quote_table
quote1 = si.get_quote_table(tkr1)
# get data from quote_table:
prevclose1 = quote1.get("Previous Close")
vol1 = quote1.get("Volume")
rangeday1 = quote1.get("Day's Range")
range52_1 = quote1.get('52 Week Range')
percentchange1 = (output1/prevclose1)-1
percentage1 = "{:.2%}".format(percentchange1)
earningsdate1 = quote1.get('Earnings Date')
#2
output2 = si.get_live_price(tkr2)
# get quote_table
quote2 = si.get_quote_table(tkr2)
# get data from quote_table:
prevclose2 = quote2.get("Previous Close")
vol2 = quote2.get("Volume")
rangeday2 = quote2.get("Day's Range")
range52_2 = quote2.get('52 Week Range')
percentchange2 = (output2/prevclose2)-1
percentage2 = "{:.2%}".format(percentchange2)
earningsdate2 = quote2.get('Earnings Date')
#3
output3 = si.get_live_price(tkr3)
# get quote_table
quote3 = si.get_quote_table(tkr3)
# get data from quote_table:
prevclose3 = quote3.get("Previous Close")
vol3 = quote3.get("Volume")
rangeday3 = quote3.get("Day's Range")
range52_3 = quote3.get('52 Week Range')
percentchange3 = (output3/prevclose3)-1
percentage3 = "{:.2%}".format(percentchange3)
earningsdate3 = quote3.get('Earnings Date')
#Tabulate
data = [[tkr1, round(output1, 2), percentage1,(f'{vol1:,}'),rangeday1,range52_1,earningsdate1],
[tkr2, round(output2, 2), percentage2,(f'{vol2:,}'),rangeday2,range52_2,earningsdate2],
[tkr3, round(output3, 2), percentage3,(f'{vol3:,}'),rangeday3,range52_3,earningsdate3]]
print(tabulate(data, headers=["Ticker", "Price", "% Chg", "Volume", "Day Range", "52 Range", "Earnings"]))
just make a function:
from yahoo_fin import stock_info as si
from datetime import datetime
from tabulate import tabulate
now = datetime.now()
time = now.strftime('%I:%M:%S %p')
# tape = "PTON, CHWY, FUBO"
print(time)
tkr1 = 'PTON'
tkr2 = 'CHWY'
tkr3 = 'FUBO'
def get_ticker_run(ticker_code):
output = si.get_live_price(ticker_code)
# get quote_table
quote = si.get_quote_table(ticker_code)
# get data from quote_table:
prevclose = quote.get("Previous Close")
vol = quote.get("Volume")
rangeday = quote.get("Day's Range")
range52 = quote.get('52 Week Range')
percentchange = (output/prevclose)-1
percentage = "{:.2%}".format(percentchange)
earningsdate = quote.get('Earnings Date')
return [ticker_code, round(output, 2), percentage,(f'{vol:,}'),rangeday,range52,earningsdate]
Finally, use it:
data = [
get_ticker_run(tkr1),
get_ticker_run(tkr2),
get_ticker_run(tkr3)
]
print(tabulate(data, headers=["Ticker", "Price", "% Chg", "Volume", "Day Range", "52 Range", "Earnings"]))
I'm working on a web scraping project, and have all the right code that returns me the json data in the format that I want if I used the #print command below, but when I got to run the same code except through Pandas Dataframe it only returns the first row of Data that I'm looking for. Just running the print, it returns the expected 17 rows of data I'm looking for. Dataframe to CSV gives me the first row only. Totally stumped! So grateful for anyone's help!
for item in response['body']:
DepartureDate = item['legs'][0][0]['departDate']
ReturnDate = item['legs'][1][0]['departDate']
Airline = item['legs'][0][0]['airline']['code']
Origin = item['legs'][0][0]['depart']
Destination = item['legs'][0][0]['destination']
OD = (Origin + Destination)
TrueBaseFare = item['breakdown']['baseFareAmount']
YQYR = item['breakdown']['fuelSurcharge']
TAX = item['breakdown']['totalTax']
TTL = item['breakdown']['totalFareAmount']
MARKEDUPTTL = item['breakdown']['totalCalculatedFareAmount']
MARKUP = ((MARKEDUPTTL - TTL) / (TTL)*100)
FBC = item['fareBasisCode']
#print(DepartureDate,ReturnDate,Airline,OD,TrueBaseFare,YQYR,TAX,TTL,MARKEDUPTTL,MARKUP,FBC)
MI = pd.DataFrame(
{'Dept': [DepartureDate],
'Ret': [ReturnDate],
'AirlineCode': [Airline],
'Routing': [OD],
'RealFare': [TrueBaseFare],
'Fuel': [YQYR],
'Taxes': [TAX],
'RealTotal': [TTL],
'AgencyTotal': [MARKEDUPTTL],
'Margin': [MARKUP],
'FareBasis': [FBC],
})
df = pd.DataFrame(MI)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
df.to_csv('MITest7.csv')
When you print all your values after the cycle, you will see that you get only the last values. To resolve this problem you need to create lists and put there your values.
Try this:
DepartureDate = []
ReturnDate = []
Airline = []
Origin = []
Destination = []
OD = []
TrueBaseFare = []
YQYR = []
TAX = []
TTL = []
MARKEDUPTTL = []
MARKUP = []
FBC = []
for item in response['body']:
DepartureDate.append(item['legs'][0][0]['departDate'])
ReturnDate.append(item['legs'][1][0]['departDate'])
Airline.append(item['legs'][0][0]['airline']['code'])
Origin.append(item['legs'][0][0]['depart'])
Destination.append(item['legs'][0][0]['destination'])
OD.append((Origin[-1] + Destination[-1]))
TrueBaseFare.append(item['breakdown']['baseFareAmount'])
YQYR.append(item['breakdown']['fuelSurcharge'])
TAX.append(item['breakdown']['totalTax'])
TTL.append(item['breakdown']['totalFareAmount'])
MARKEDUPTTL.append(item['breakdown']['totalCalculatedFareAmount'])
MARKUP.append(((MARKEDUPTTL[-1] - TTL[-1]) / (TTL[-1])*100))
FBC.append(item['fareBasisCode'])
I have written the following code that has a function model_data to perform a particular set of tasks. I have to pass the list of Badges and the type of category 1 or 2 along with an empty dataframe data.
But while running the code I am getting an error. I searched SO for answers but this type of Question was not found.
CODE
#Model Function
def model_data(badge_list, data):
for key, value in badge_list.items():
#Check for Post Type
if (value == 1):
badge_type = posts.loc[posts.PostTypeId == '1']
elif (value == 2):
badge_type = posts.loc[posts.PostTypeId == '2']
#Obtain required fields from Badge Data
badge_type = badge_type[['OwnerUserId', 'Id','Score', 'CreationDate']]
badge_type.columns = ['UserId', 'Id', 'Score','CreationDate']
Badge = key
#Obtain time when user first obtained Badge
badge_data = user_badge_dt(Badge)
#Find the number of posts made before and after 1 week of Badge Attainment
post_data = post_details(df1 = badge_data, df2 = badge_type)
post_data.date = pd.to_datetime(post_data.date)
#Calculate APR
post_data = APR(post_data)
#Calculate Score
post_data = score(df = post_data, post_type = badge_type)
#Generate Final Dataframe with Badge Count
data1 = badge_number(post_data)
data1 = data1[['1','2','3','date','Score','APR']]
#Append Dataframe
data = data.append(data1)
return data
#Function Call
questionBadge_list = {'Good Question':1, 'Explainer':2}
data = pd.DataFrame()
badge1_data = model_data(badge_list = questionBadge_list, data = data)
ERROR
IndexError: Too many levels: Index has only 1 level, not 2
ERROR LINE
The code line badge_data = user_badge_dt(Badge) gives this error so I am adding the complete function.
#Function to obtain UserId with the date-time of obtaining given badge for the first time
def user_badge_dt(badge):
#Creating DataFrame to obtain all UserId and date-Time of given badge
df = badges[['UserId','Date']].loc[badges.Name == badge]
#Obtaining the first date-time of badge attainment
v = df.groupby("UserId", group_keys=False)['Date'].nsmallest(1)
v.index = v.index.droplevel(1)
df['date'] = df['UserId'].map(v)
df.drop(columns='Date',inplace=True)
#Removing all duplicate values of Users
df.drop_duplicates(subset='UserId', inplace=True )
return df
I am new to python and pandas and have a csv file that am reading into a panda data frame. Find it attached below.
I am trying to populate the the column, OND_ORIGIN and OND_DEST based on row values in the PLDATE.
The logic is for every flight flown on the same day, the OND_ORIGIN and OND_DEST should be the same as for departure_from and Arr_to columns
import pandas as pd
import numpy as np
import csv
location = r'C:\Users\bi.reports\Desktop\output.csv'
df = pd.read_csv(location,sep='\s*,\s*',engine='python')
for i, row in df.iterrows():
if row['COUPON_NUMBER'] == 1:
df.OND_ORIGIN = df.DEP_FROM
#df.OND_DEST = df.DEP_FROM
elif row['COUPON_NUMBER'] == 2:
#df.OND_ORIGIN = df.DEP_FROM
df.OND_DEST = df.ARR_TO
elif row['COUPON_NUMBER'] == 3:
#df.OND_ORIGIN = df.DEP_FROM
df.OND_DEST = df.ARR_TO
else:
df.OND_ORIGIN = df.DEP_FROM
#df.OND_DEST = df.ARR_TO
df.to_csv('out.csv', sep=',',index = False)
csv file in use
Try this:
df.loc[df['COUPON_NUMBER'] == 1, 'OND_ORIGIN'] = df.DEP_FROM
df.loc[df['COUPON_NUMBER'].isin([2,3]), 'OND_DEST'] = df.ARR_TO
df.loc[~df['COUPON_NUMBER'].isin([1,2,3]), 'OND_ORIGIN'] = df.DEP_FROM
or bit optimized:
df.loc[df['COUPON_NUMBER'].isin([2,3]), 'OND_DEST'] = df.ARR_TO
df.loc[~df['COUPON_NUMBER'].isin([2,3]), 'OND_ORIGIN'] = df.DEP_FROM