I have written a script that reads the HTML tables from the outlook application and has 4 columns (Ticker, Level, Pkey, Date). I need to put these values in an insert query, how can I bifurcate data frame into different columns so that the insert query can take references from different columns?
import pandas as pd
import win32com.client
import numpy as np
from sqlalchemy.engine import create_engine
import re
from datetime import datetime, timedelta
import requests
import sys
from bs4 import BeautifulSoup
from pprint import pprint
EMAIL_ACCOUNT = 'xyz'
EMAIL_SUBJ_SEARCH_STRING = ('tg')
#EMAIL_CONTNT = {'Ticker': [df.to_string()[-82:-74], 'TickerLevel': [price], 'DATE': [Date]}
out_app = win32com.client.gencache.EnsureDispatch("Outlook.Application")
out_namespace = out_app.GetNamespace("MAPI")
#lastWeekDateTime = dt.datetime.now() - dt.timedelta(days=1)
#lastWeekDateTime = lastWeekDateTime.strftime('%m/%d/%Y %H:%M %p')
root_folder = out_namespace.GetDefaultFolder(6)
out_iter_folder = root_folder.Folders['Email_Snapper']
#out_iter_folder = out_iter_folder.Restrict("[ReceivedTime] >= '" + lastWeekDateTime + "'")
item_count = out_iter_folder.Items.Count
Flag = False
cnt = 1
if item_count > 0:
for i in range(item_count, 0, -1):
message = out_iter_folder.Items[i]
if EMAIL_SUBJ_SEARCH_STRING in message.Subject and cnt <=1:
cnt=cnt+1
subject = message.Subject
if datetime.now().weekday() == 0:
DT = datetime.strftime(datetime.now() - timedelta(days=3), '%Y-%m')
else:
DT = datetime.strftime(datetime.now() - timedelta(days=1), '%Y-%m')
DT = subject[subject.find(DT): subject.find(DT) + 10]
Date = datetime.strptime(DT, '%Y-%m-%d').strftime("'%d%b%Y'")
print(Date)
Body_content = message.HTMLBody
Body_content = BeautifulSoup(Body_content,"lxml")
html_tables = Body_content.find_all('table')[0]
#Body_content = Body_content[:Body_content.find("Disclaimer")].strip()
df = pd.read_html(str(html_tables),header=0)[0]
Pkey = {'MSUSDSP5': 71763307, 'MSUSSPVP': 76366654,'MSCBICCO':137292386, 'MSCBWGSP':151971418, 'MSCBBGEC':151971419, 'MSUSEVHI':152547427, 'MSCBCTAD':152547246}
df['Pkey'] = df['Ticker'].map(Pkey)
df['Date'] = Date
print(df)
sql_query = 'call CORE_VALUATIONS.VALUATIONS.INSERTEQCLOSINGPRICE(%d, %s, %s, NULL, NULL)' % ((Pkey), Date, Level)
print(sql_query)
Related
import pandas as pd
from datetime import datetime, date
symbolDf = pd.read_csv('https://shoonya.finvasia.com/NFO_symbols.txt.zip')
symbolDf['Expiry'] = pd.to_datetime(symbolDf['Expiry']).apply(lambda x: x.date())
ocdf = symbolDf[(symbolDf.Symbol == 'BANKNIFTY') & (symbolDf.Expiry == date(2022,12,8)) ]
Pedf = ocdf[ocdf.OptionType == 'PE']
This following part is taking so long:
ltp = 100
strikeList = []
for i in Pedf.index:
strikeInfo = Pedf.loc[i]
res = api.get_quotes(exchange='NFO', token=str(strikeInfo.Token))
res = {'tsym': res['tsym'], 'lp': float(res['lp']),'lotSize':strikeInfo.LotSize,'token':res['token']}
strikeList.extend(res)
strikedf = pd.DataFrame(strikeList)
strikedf.sort_values(by = 'lp',inplace = True)
strikedf['diff'] = abs(strikedf['lp'] - ltp)
strikedf.sort_values(by = 'diff',inplace = True)
strikedf.iloc[0]
It gives the expected results; but it is taking too much time.
I am trying to split up a json file from alpha-vantages api into separate files depending on the date. I'm also trying to reformat the file to have blank values in the gaps where dates are missing. The following code is what I have come up with but it gives me the TypeError: 'list' object is not callable". I'm fairly new to python and pandas so I'm sure there is a better way to go about this.
import requests
import pandas as pd
from datetime import datetime, timedelta
from dateutil import parser
import numpy as np
from pandas import DataFrame
import json
symbol = "MSFT"
symbol_list = symbol.split(",")
def num_el(list):
count = 0
for element in list:
count += 1
return count
def csv_make(sy, dar, dat):
csv_file = open(f"{sy}_1min_{dar}.csv", "w", newline="")
csv_file.write(dat)
csv_file.close()
i = 0
x = -1
n = num_el(symbol_list)
while i < n:
namesym = symbol_list[x]
ticker = namesym
api_key = 'APIKEYHERE'
url = f'https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={ticker}&outputsize=full&interval=1min&apikey={api_key}'
data = requests.get(url)
dsf = data.json()
daf = pd.DataFrame(dsf['Time Series (1min)'])
dxf: DataFrame = daf.T
dxf.index.name = 'time'
dxf.reset_index(inplace=True)
dxf['time'] = pd.to_datetime(dxf['time'])
dxf['minute'] = dxf['time'].dt.time
dxf['day'] = dxf['time'].dt.day
dxf['date'] = dxf['time'].dt.date
agg = dxf.groupby([dxf['day']])
length1 = dxf.groupby([dxf['day']]).size()
length = pd.DataFrame(length1)
length.index.name = 'day'
length.reset_index(inplace=True)
length_sum = length[0].sum()
v = 0
d = length_sum
b = len(length)
x2 = length_sum
while v < b:
a = length[0][v]
x2 -= length[0][v]
xd = agg.get_group(length['day'][v])
date = xd['date'][x2]
max_dt = parser.parse(str(max(xd['minute'])))
min_dt = parser.parse(str(min(xd['minute'])))
dt_range = []
while min_dt <= max_dt:
dt_range.append(min_dt.strftime("%H:%M:%S"))
min_dt += timedelta(seconds=60)
complete_df = pd.DataFrame({'minute': dt_range})
xy = complete_df.astype('str')
yx = xd.astype('str')
dasf = xy.merge(yx, how='left', on='minute')
dasf['ev'] = np.where(dasf['1. open'].notnull(), 'False', 'True')
time = []
open = []
high = []
low = []
close = []
volume = []
empty_value = []
for ib in range(len(dasf)):
time.append(dasf['minute'][ib])
open.append(dasf['1. open'][ib])
high.append(dasf['2. high'][ib])
low.append(dasf['3. low'][ib])
close.append(dasf['4. close'][ib])
volume.append(dasf['5. volume'][ib])
empty_value.append(dasf['ev'][ib])
time_df = pd.DataFrame(time).rename(columns={0: 'Time'})
open_df = pd.DataFrame(open).rename(columns={0: 'Open'})
high_df = pd.DataFrame(high).rename(columns={0: 'High'})
low_df = pd.DataFrame(low).rename(columns={0: 'Low'})
close_df = pd.DataFrame(close).rename(columns={0: 'Close'})
volume_df = pd.DataFrame(volume).rename(columns={0: 'Volume'})
empty_value_df = pd.DataFrame(empty_value).rename(columns={0: 'Empty Value'})
frames = [time_df, open_df, high_df, low_df, close_df, volume_df, empty_value_df]
df = pd.concat(frames, axis=1, join='inner')
df = df.set_index('Time')
ad = df.to_csv()
csv_make(namesym, date, ad)
v += 1
i += 1
I am retrieving data from http://data.rcc-acis.org/StnData. That is working well, I can plot the data as I want to visualize it using matplotlib. What I am struggling with is plotting a 4 polynomial line for the values. The code I have is listed below. I am using Python 3.7. Any help is greatly appreciated.
import urllib
import datetime
import urllib.request
import ast
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import scipy.interpolate as ntrpl
import numpy as np
import matplotlib as mpl
# create a static day of the month
#dayofmonth = ('01-01')
dayofmonth = ('11-26')
# testing out a range of years
y = datetime.datetime.today().year
years = list(range(y-1,y-50, -1))
i = 0
dateList=[]
minTList=[]
maxTList=[]
for year in years:
sdate = (str(year) + '-' + dayofmonth)
#print(sdate)
url = "http://data.rcc-acis.org/StnData"
values = {
"sid": "KGGW",
"date": sdate,
"elems": "maxt,mint",
"meta": "name",
"output": "json"
}
data = urllib.parse.urlencode(values).encode("utf-8")
req = urllib.request.Request(url, data)
response = urllib.request.urlopen(req)
results = response.read()
results = results.decode()
results = ast.literal_eval(results)
if i < 1:
n_label = results['meta']['name']
i = 2
for x in results["data"]:
date,maxT,minT = x
#setting the string of date to datetime
date = date[0:4]
date_obj = datetime.datetime.strptime(date,'%Y')
dateList.append(date_obj)
minTList.append(minT)
maxTList.append(maxT)
minT_int = list(map(int,minTList))
maxT_int = list(map(int,maxTList))
n_label = n_label + " " + dayofmonth
#p1 = np.polyfit(newDates,h_temps,3)
def plot_graph(dates,h_temps,l_temps,label):
#converting the dates into a readable format for pyplot
newDates = mdates.date2num(dates)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
plt.title(label)
plt.gcf().autofmt_xdate()
plt.plot(newDates,l_temps)
plt.plot(newDates,h_temps)
#p1 = np.polyfit(x,y,3)
p1 = np.polyfit(newDates,h_temps,3)
print(p1)
plt.plot(newDates,p1)
plt.show()
#print
plot_graph(dateList,maxT_int,minT_int,n_label)
I'm new to coding. When I attempt to run this it says:
NameError: name 'data' is not defined.
import numpy as np
import pandas as pd
from scipy import stats
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator
import datetime
import json
from bs4 import BeautifulSoup
import requests
import time
def fetchCryptoClose(fsym, tsym):
# function fetches the close-price time-series from cryptocompare.com
# it may ignore USDT coin (due to near-zero pricing)
# daily sampled
cols = ['date', 'timestamp', fsym]
lst = ['time', 'open', 'high', 'low', 'close']
timestamp_today = datetime.today().timestamp()
curr_timestamp = timestamp_today
for j in range(2):
df = pd.DataFrame(columns=cols)
url = "https://min-api.cryptocompare.com/data/histoday?fsym=" + fsym + \
"&tsym=" + tsym + "&toTs=" + str(int(curr_timestamp)) + "&limit=3"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
dic = json.loads(soup.prettify())
for i in range(1, 4):
tmp = []
for e in enumerate(lst):
x = e[0]
y = dic['Data'][i][e[1]]
if(x == 0):
tmp.append(str(timestamp2date(y)))
tmp.append(y)
if(np.sum(tmp[-4::]) > 0): # remove for USDT
tmp = np.array(tmp)
tmp = tmp[[0,1,4]] # filter solely for close prices
df.loc[len(df)] = np.array(tmp)
# ensure a correct date format
df.index = pd.to_datetime(df.date, format="%Y-%m-%d")
df.drop('date', axis=1, inplace=True)
curr_timestamp = int(df.ix[0][0])
if(j == 0):
df0 = df.copy()
else:
data = pd.concat([df, df0], axis=0)
data.drop("timestamp", axis=1, inplace=True)
return data # DataFrame
# N-Cryptocurrency Portfolio (tickers)
fsym = ['BTC', 'ETH', 'XRP', 'LTC', 'DASH', 'XMR', 'ETC', 'MAID', 'XEM', 'REP']
# vs.
tsym = 'USD'
for e in enumerate(fsym):
print(e[0], e[1])
if(e[0] == 0):
try:
data = fetchCryptoClose(e[1], tsym)
except:
pass
else:
try:
data = data.join(fetchCryptoClose(e[1], tsym))
except:
pass
# ensure values to be floats
# save portfolio to a file (HDF5 file format)
store = pd.HDFStore('portfolio2.h5')
store['data'] = data
store.close()
# read in your portfolio from a file
df = pd.read_hdf('portfolio2.h5', 'data')
print(df)
Don't use try-except-pass because will silence all your exceptions and you might never actually create `data.
Replace this code:
for e in enumerate(fsym):
print(e[0], e[1])
if(e[0] == 0):
try:
data = fetchCryptoClose(e[1], tsym)
except:
pass
else:
try:
data = data.join(fetchCryptoClose(e[1], tsym))
except:
pass
with this:
for e in enumerate(fsym):
print(e[0], e[1])
if(e[0] == 0):
data = fetchCryptoClose(e[1], tsym)
else:
data = data.join(fetchCryptoClose(e[1], tsym))
and see where your real exceptions are.
Views.py. Here, I am calling 'total_performance_ratio' function from 'yeartarg' function in order to calculate the target ratio. The 'total_performance_ratio' returned the http response as {"status": "[{\"estimated_code\": \"MRO\", \"percentage\": \"0.00\", \"estimatedvalue\": 496.55172413793105, \"won_deals\": 0, \"targetedvalue\": 0}]"}. How can we convert the http response to python object `def yeartarg(request,year=None):
now = datetime.datetime.now()
if year is None:
start_date = date(date.today().year, 1, 1)
else:
start_date = year+'-'+01+'-'+01
last_date = date(date.today().year, 12, 31)
if date.today() < last_date :
end_date = date.today()
else:
end_date = last_date
user = get_user_id(request)
uid = user[0]['user_id']
cur = connection.cursor()
cur.execute("select role_id from myapp_custom_user_model where user_ptr_id="+str(uid))
role = dictfetchall(cur)
cur.close()
user_level = role[0]['role_id']
if user_level==1:
user_performance = total_performance_ratio(request,start_date,end_date)
return JsonResponse({'status':user_performance})`
total_performance_ratio
def total_performance_ratio(request,start_date=None,end_date=None, amount=None):
now = datetime.datetime.now()
st_date, end_date = week_magic(date.today())
cur = connection.cursor()
cur.execute("select SUM(myapp_deal.deal_value) as d_value,myapp_currency_list.code from myapp_deal INNER JOIN myapp_currency_list on myapp_currency_list.id = myapp_deal.currency_id where myapp_deal.status=1 and myapp_deal.approved =0 and DATE_FORMAT(myapp_deal.closed_date,'%%Y-%%m-%%d') BETWEEN " '%s' " and " '%s' " group by myapp_deal.currency_id" %(start_date,end_date))
CompletedTarget = dictfetchall(cur)
cur.close()
TargetValue = 0
Wondeals = 0
monthly_target = total_monthly_target()
for data in CompletedTarget:
TargetValue += convertCurrency( data['d_value'],data['code'],monthly_target[0]['code'])
Wondeals += data['won_deals']
yearly_target = monthly_target[0]['monthly_target'] * 12
start_date = date(date.today().year, 1, 1)
last_date = date(date.today().year, 12, 31)
diff = last_date-start_date
completed_days = diff.days
now = datetime.datetime.now()
this_month_total_days=calendar.monthrange(now.year, now.month)[1]
one_day = float(monthly_target[0]['monthly_target'])/this_month_total_days
date_st_date= datetime.date(st_date.year,st_date.month,st_date.day)
str_st_date = date_st_date.strftime('%Y-%m-%d')
strf = datetime.datetime.strptime(str_st_date, "%Y-%m-%d")
date_f_strf= datetime.date(strf.year,strf.month,strf.day)
date_end_date= datetime.date(end_date.year,end_date.month,end_date.day)
str_end_date = date_end_date.strftime('%Y-%m-%d')
strf_end_date = datetime.datetime.strptime(str_end_date, "%Y-%m-%d")
end_date_f_strf= datetime.date(strf_end_date.year,strf_end_date.month,strf_end_date.day)
diff = end_date_f_strf - date_f_strf
completed_days = diff.days
target_upto_this = one_day * completed_days
percentage = (TargetValue/float(target_upto_this) ) * 100
if percentage >= 100:
percent = 100
else :
percent = number_format(percentage,2)
target1 =[]
target1.append({'targetedvalue':TargetValue, 'estimatedvalue':target_upto_this,'estimated_code':monthly_target[0]['code'],'percentage':percent,'won_deals':Wondeals})
data_json = json.dumps(target1)
return json_response({'status':data_json})
Change
return json_response({'status':data_json})
in your def total_performance_ratio(request,start_date=None,end_date=None, amount=None) function to
return {'status':data_json}
as json_response should be created in views only.
EDIT: If you want to use the outcome oftotal_performance_ratio as a separate view as well, then simply create
def total_performance_ratio_view(request,start_date=None,end_date=None, amount=None):
return JsonResponse(total_performance_ratio(request, start_date, end_date, amount))
This way the calculation of total performance ratio can be reused in multiple views.