Date picker component controlling the date of SQL query - python

I am implementing date picker to my dash web app. When the date is picked I want it to be passed to SQL code so it returns values from the selected date. Although it sounds pretty simple I'm stuck on the second day with it and have no idea how to make it work. In the code below I am trying to return date_value from the function and assign it to the variable datev and then insert it into SQL query inside the class Values. Result is the error pasted just below.
ERROR
File "C:\Users\patryk.suwala\Documents\pythonProject\pythonProject6\data.py", line 28, in
dates, datev = update_output()
File "C:\Users\patryk.suwala\Documents\pythonProject\pythonProject6\venv\lib\site-packages\dash_callback.py", line 143, in add_context
output_spec = kwargs.pop("outputs_list")
KeyError: 'outputs_list'
app.layout = html.Div([
dcc.DatePickerSingle(id='date-picker-single', date=date(1997, 5, 10)),
html.Div(id='output-container-date-picker-single')
])
#app.callback(
Output('output-container-date-picker-single', 'children'),
Input('my-date-picker-single', 'date'))
def update_output(date_value):
if date_value is not None:
date_object = date.fromisoformat(date_value)
date_string = date_object.strftime('%B %d, %Y')
return date_string, date_value # Include date_value in function return
dates, datev = update_output() # Assign date_value to the variable
class Values:
SLGordersAc = f"""SELECT COUNT(slg.id) AS slg_orders_accepted
FROM slg_failure slg
WHERE slg.slg_declined_reason_id = 0
AND slg.created_date = {datev}; """ #Insert variable into SQL query
cursor.execute(SLGordersAc)
resultSLGoA = cursor.fetchall()
[resultSLGoA] = resultSLGoA[0]
SLGo = dftab3.at[3, 'Value']
SLGo_PR = SLGo / OO
dftab3.loc[4, :] = 'SLG Orders %', round(SLGo_PR, 2)
dftab3.loc[5, :] = 'SLG Orders Accepted', resultSLGoA
SLGoA = dftab3.at[5, 'Value']
SLGoA_PR = SLGoA / OO
dftab3.loc[6, :] = 'SLG Orders Accepted %', round(SLGoA_PR, 2)

Related

How to optimize PRAW and pandas data collection to make it more pythonic?

I am using PRAW to get data from Reddit and created this function to do so on multiple subreddits.
It works, however, I am working on a more concise/pythonic version but can't figure out how I can create a single "for loop", doing the job of the 3 below.
subs = r.subreddit('Futurology+wallstreetbets+DataIsBeautiful+RenewableEnergy+Bitcoin')
#This function aim to scrap data from a list of subreddit.
#From these subreddit, I would like to get the #new, #hot and #rising posts
def get_data(size_new, size_hot, size_rising, subs_number):
posts = []
followers = []
targeted_date = '14-11-20 12:00:00'
targeted_date = datetime.datetime.strptime(targeted_date, '%d-%m-%y %H:%M:%S')
#getting x new posts
for subreddit in subs.new(limit = size_new):
date = subreddit.created
date = datetime.datetime.fromtimestamp(date)
if date >= targeted_date:
posts.append([date, subreddit.subreddit, subreddit.title, subreddit.selftext])
#getting x hot posts
for subreddit in subs.hot(limit = size_hot):
date = subreddit.created
date = datetime.datetime.fromtimestamp(date)
if date >= targeted_date:
posts.append([date, subreddit.subreddit, subreddit.title, subreddit.selftext])
#getting x rising posts
for subreddit in subs.rising(limit = size_rising):
date = subreddit.created
date = datetime.datetime.fromtimestamp(date)
if date >= targeted_date:
posts.append([date, subreddit.subreddit, subreddit.title, subreddit.selftext])
#getting subreddit subscribers number
for sub_name in subs_2:
for submission in r.subreddit(sub_name).hot(limit = 1):
followers.append([submission.subreddit, r.subreddit(sub_name).subscribers])
#creating 2 df
df_1 = pd.DataFrame(followers, columns = ['subreddit','subscribers'])
df = pd.DataFrame(posts, columns = ['date', 'subreddit', 'title', 'text']).drop_duplicates().sort_values(by = ['date']).reset_index(drop = True)
#concat the 2 df together
df = df.join(df_1.set_index('subreddit'), on = 'subreddit')
df = df[["date", "subreddit", "subscribers", "title", 'text']]
df = df[df.subscribers > subs_number].reset_index(drop = True)
return df
My request: how could it be more concise/optimized? What methodology are you using to make your code more readable or even better, optimize it for run time/computational resources?
Thank you
There are various principles to make better code, and various tools to use to find the 'code smells' that may be lurking in your code.
DRY - Don't Repeat Yourself
KISS - keep it stupid simple
SOLID
etc...
Taking a dive into the code that you posted using some of the principles on a surface level would refactor some of your code into looking like:
subs = r.subreddit('Futurology+wallstreetbets+DataIsBeautiful+RenewableEnergy+Bitcoin')
# check that the date is greater than the target date
# return true/false
def check_date(subreddit, targeted_date):
return subreddit.created >= targeted_date:
# get specific post data
def get_post_data(subreddit):
return [subreddit.created, subreddit.subreddit, subreddit.title, subreddit.selftext]
# get posts by sort type
def get_subreddit_post_types(subreddit_sort, targeted_date):
return [get_post_data(subreddit) for subreddit in subreddit_sort if check_date(subreddit, targeted_date)]
#This function aim to scrap data from a list of subreddit.
#From these subreddit, I would like to get the #new, #hot and #rising posts
def get_data(size_new, size_hot, size_rising, subs_number):
targeted_date = '14-11-20 12:00:00'
targeted_date = datetime.datetime.strptime(targeted_date, '%d-%m-%y %H:%M:%S').timestamp()
posts = []
followers = []
#getting x new posts
posts.extend(get_subreddit_post_types(subs.new(limit = size_new), targeted_date))
#getting x hot posts
posts.extend(get_subreddit_post_types(subs.hot(limit = size_hot), targeted_date))
#getting x rising posts
posts.extend(get_subreddit_post_types(subs.rising(limit = size_rising), targeted_date))
#getting subreddit subscribers number
for sub_name in subs_2:
for submission in r.subreddit(sub_name).hot(limit = 1):
followers.append([submission.subreddit, r.subreddit(sub_name).subscribers])
#creating 2 df
df_1 = pd.DataFrame(followers, columns = ['subreddit','subscribers'])
df = pd.DataFrame(posts, columns = ['date', 'subreddit', 'title', 'text']).drop_duplicates().sort_values(by = ['date']).reset_index(drop = True)
#concat the 2 df together
df = df.join(df_1.set_index('subreddit'), on = 'subreddit')
df = df[["date", "subreddit", "subscribers", "title", 'text']]
df = df[df.subscribers > subs_number].reset_index(drop = True)
return df
As for better optimizing your computational resources (what are you trying to optimize memory or runtime)? The same process applies to either is to look at your code to see what can be changed to decrease one versus the other.
From looking at your code something that would generally optimize what you wrote would be to look at what are the 'duplicate' posts that you are getting. If you could remove the duplicate check (as each of the hot/rising/new get posts from similar date ranges, but hot/rising may be completely encompassed inside of new) call from the posts that you gathered, so that you don't have to check that they are different, and possibly remove hot/rising calls (because those posts may be encompassed in new).

How can i sort Binance historical candles for multiple pairs across multiple timeframes

I'm downloading historical candlestick data for multiple crypto pairs across different timeframes from the binance api, i would like to know how to sort this data according to pair and timeframe and check which pair on which timeframe executes my code, the following code is what i use to get historical data
import requests
class BinanceFuturesClient:
def __init__(self):
self.base_url = "https://fapi.binance.com"
def make_requests(self, method, endpoint, data):
if method=="GET":
response = requests.get(self.base_url + endpoint, params=data)
return response.json()
def get_symbols(self):
symbols = []
exchange_info = self.make_requests("GET", "/fapi/v1/exchangeInfo", None)
if exchange_info is not None:
for symbol in exchange_info['symbols']:
if symbol['contractType'] == 'PERPETUAL' and symbol['quoteAsset'] == 'USDT':
symbols.append(symbol['pair'])
return symbols
def initial_historical_data(self, symbol, interval):
data = dict()
data['symbol'] = symbol
data['interval'] = interval
data['limit'] = 35
raw_candle = self.make_requests("GET", "/fapi/v1/klines", data)
candles = []
if raw_candle is not None:
for c in raw_candle:
candles.append(float(c[4]))
return candles[:-1]
running this code
print(binance.initial_historical_data("BTCUSDT", "5m"))
will return this as the output
[55673.63, 55568.0, 55567.89, 55646.19, 55555.0, 55514.53, 55572.46, 55663.91, 55792.83, 55649.43,
55749.98, 55680.0, 55540.25, 55470.44, 55422.01, 55350.0, 55486.56, 55452.45, 55507.03, 55390.23,
55401.39, 55478.63, 55466.48, 55584.2, 55690.03, 55760.81, 55515.57, 55698.35, 55709.78, 55760.42,
55719.71, 55887.0, 55950.0, 55980.47]
which is a list of closes
i want to loop through the code in such a manner that i can return all the close prices for the pairs and timeframes i need and sort it accordingly, i did give it a try but am just stuck at this point
period = ["1m", "3m", "5m", "15m"]
binance = BinanceFuturesClient()
symbols = binance.get_symbols()
for symbol in symbols:
for tf in period:
historical_candles = binance.initial_historical_data(symbol, tf)
# store values and run through strategy
You can use my code posted below. It requires python-binance package to be installed on your environment and API key/secret from your Binance account. Method tries to load data by weekly chunks (parameter step) and supports resending requests on failures after timeout. It may helps when you need to fetch huge amount of data.
import pandas as pd
import pytz, time, datetime
from binance.client import Client
from tqdm.notebook import tqdm
def binance_client(api_key, secret_key):
return Client(api_key=api_key, api_secret=secret_key)
def load_binance_data(client, symbol, start='1 Jan 2017 00:00:00', timeframe='1M', step='4W', timeout_sec=5):
tD = pd.Timedelta(timeframe)
now = (pd.Timestamp(datetime.datetime.now(pytz.UTC).replace(second=0)) - tD).strftime('%d %b %Y %H:%M:%S')
tlr = pd.DatetimeIndex([start]).append(pd.date_range(start, now, freq=step).append(pd.DatetimeIndex([now])))
print(f' >> Loading {symbol} {timeframe} for [{start} -> {now}]')
df = pd.DataFrame()
s = tlr[0]
for e in tqdm(tlr[1:]):
if s + tD < e:
_start, _stop = (s + tD).strftime('%d %b %Y %H:%M:%S'), e.strftime('%d %b %Y %H:%M:%S')
nerr = 0
while nerr < 3:
try:
chunk = client.get_historical_klines(symbol, timeframe.lower(), _start, _stop)
nerr = 100
except e as Exception:
nerr +=1
print(red(str(e)))
time.sleep(10)
if chunk:
data = pd.DataFrame(chunk, columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_av', 'trades', 'tb_base_av', 'tb_quote_av', 'ignore' ])
data.index = pd.to_datetime(data['timestamp'].rename('time'), unit='ms')
data = data.drop(columns=['timestamp', 'close_time']).astype(float).astype({
'ignore': bool,
'trades': int,
})
df = df.append(data)
s = e
time.sleep(timeout_sec)
return df
How to use
c = binance_client(<your API code>, <your API secret>)
# loading daily data from 1/Mar/21 till now (your can use other timerames like 1m, 5m etc)
data = load_binance_data(c, 'BTCUSDT', '2021-03-01', '1D')
It returns indexed DataFrame with loaded data:
time
open
high
low
close
volume
quote_av
trades
tb_base_av
tb_quote_av
ignore
2021-03-02 00:00:00
49595.8
50200
47047.6
48440.7
64221.1
3.12047e+09
1855583
31377
1.52515e+09
False
2021-03-03 00:00:00
48436.6
52640
48100.7
50349.4
81035.9
4.10952e+09
2242131
40955.4
2.07759e+09
False
2021-03-04 00:00:00
50349.4
51773.9
47500
48374.1
82649.7
4.07984e+09
2291936
40270
1.98796e+09
False
2021-03-05 00:00:00
48374.1
49448.9
46300
48751.7
78192.5
3.72713e+09
2054216
38318.3
1.82703e+09
False
2021-03-06 00:00:00
48746.8
49200
47070
48882.2
44399.2
2.14391e+09
1476474
21500.6
1.03837e+09
False
Next steps are up to you and dependent on how would you like to design your data structure. In simplest case you could store data into dictionaries:
from collections import defaultdict
data = defaultdict(dict)
for symbol in ['BTCUSDT', 'ETHUSDT']:
for tf in ['1d', '1w']:
historical_candles = load_binance_data(c, symbol, '2021-05-01', timeframe=tf)
# store values and run through strategy
data[symbol][tf] = historical_candles
to get access to your OHLC you just need following: data['BTCUSDT']['1d'] etc.

How to make sure that the data in this PyTrends function comes out in YYYY-MM-DD format and not YYYY-MM-DD 00:00:00

I have the following function:
def my_funct(Keyword, Dates, Country, Col_name):
KEYWORDS=[Keyword]
KEYWORDS_CODES=[pytrend.suggestions(keyword=i)[0] for i in KEYWORDS]
df_CODES= pd.DataFrame(KEYWORDS_CODES)
EXACT_KEYWORDS=df_CODES['mid'].to_list()
DATE_INTERVAL= Dates
COUNTRY=[Country] #Use this link for iso country code
CATEGORY=0 # Use this link to select categories
SEARCH_TYPE='' #default is 'web searches',others include 'images','news','youtube','froogle' (google shopping)
Individual_EXACT_KEYWORD = list(zip(*[iter(EXACT_KEYWORDS)]*1))
Individual_EXACT_KEYWORD = [list(x) for x in Individual_EXACT_KEYWORD]
dicti = {}
i = 1
for Country in COUNTRY:
for keyword in Individual_EXACT_KEYWORD:
try:
pytrend.build_payload(kw_list=keyword,
timeframe = DATE_INTERVAL,
geo = Country,
cat=CATEGORY,
gprop=SEARCH_TYPE)
dicti[i] = pytrend.interest_over_time()
i+=1
time.sleep(6)
except requests.exceptions.Timeout:
print("Timeout occured")
df_trends = pd.concat(dicti, axis=1)
df_trends.columns = df_trends.columns.droplevel(0) #drop outside header
df_trends = df_trends.drop('isPartial', axis = 1) #drop "isPartial"
df_trends.reset_index(level=0,inplace=True) #reset_index
df_trends.columns=['date', Col_name] #change column names
return df_trends
Then I call the function using:
x1 = my_funct('Unemployment', '2004-01-04 2009-01-04', 'DK', 'Unemployment (Denmark)')
Then I put that into a df:
df1 = pd.DataFrame(x1)
Once I convert that df to excel, how do I ensure that it is in YYYY-MM-DD format without the dangling 00:00:00? Anytime I convert it comes out with hours and seconds.
I tried df1 = pd.DataFrame(x1).dt.strftime('%Y-%m-%d') but it says that this cannot be used?
Please help
Thanks
You are trying pass dt.strftime on the entire dataframe, but you need to pass it on the date column:
df1['date'] = df1['date'].dt.strftime('%Y-%m-%d')

Panda DataFrame Row Items IF Comparison doesnt return correct result

I retrieve data from quandl and load it to a pandas DF object.
Afterwards I calculate SMA values (SMA21, SMA55) based on "Last Price".
Adding those SMA values as a column do my DF object.
I iterate through DF to catch a buy signal.
I know the buy condition is holding true for some dates but my code does not printing anything out. I am expecting to print the buy condition at the very least.
as below you can see the following condition:
kitem['SMA21'] >= kitem['Last']
My code:
import requests
import pandas as pd
import json
class URL_Params:
def __init__ (self, endPoint, symboll, startDate, endDate, apiKey):
self.endPoint = endPoint
self.symboll = symboll
self.startDate = startDate
self.endDate = endDate
self.apiKey = apiKey
def createURL (self):
return self.endPoint + self.symboll + '?start_date=' + self.startDate + '&end_date=' + self.endDate + '&api_key=' + self.apiKey
def add_url(self, _url):
self.url_list
my_portfolio = {'BTC':1.0, 'XRP':0, 'DSH':0, 'XMR':0, 'TotalBTCValue':1.0}
_endPoint = 'https://www.quandl.com/api/v3/datasets/BITFINEX/'
_symbolls = ['BTCEUR','XRPBTC','DSHBTC','IOTBTC','XMRBTC']
_startDate = '2017-01-01'
_endDate = '2019-03-01'
_apiKey = '' #needs to be set for quandl
my_data = {}
my_conns = {}
my_col_names = ['Date', 'High', 'Low', 'Mid', 'Last', 'Bid', 'Ask', 'Volume']
orderbook = []
#create connection and load data for each pair/market.
#load them in a dict for later use
for idx_symbol in _symbolls:
my_url_params = URL_Params(_endPoint,idx_symbol,_startDate,_endDate,_apiKey)
response = requests.get(my_url_params.createURL())
my_data[idx_symbol] = json.loads(response.text)
#Prepare Data
my_raw_data_df_xrpbtc = pd.DataFrame(my_data['XRPBTC']['dataset']['data'], columns= my_data['XRPBTC']['dataset']['column_names'])
#Set Index to Date Column and Sort
my_raw_data_df_xrpbtc['Date'] = pd.to_datetime(my_raw_data_df_xrpbtc['Date'])
my_raw_data_df_xrpbtc.index = my_raw_data_df_xrpbtc['Date']
my_raw_data_df_xrpbtc = my_raw_data_df_xrpbtc.sort_index()
#Drop unrelated columns
my_raw_data_df_xrpbtc.drop(['Date'], axis=1, inplace=True)
my_raw_data_df_xrpbtc.drop(['Ask'], axis=1, inplace=True)
my_raw_data_df_xrpbtc.drop(['Bid'], axis=1, inplace=True)
my_raw_data_df_xrpbtc.drop(['Low'], axis=1, inplace=True)
my_raw_data_df_xrpbtc.drop(['High'], axis=1, inplace=True)
my_raw_data_df_xrpbtc.drop(['Mid'], axis=1, inplace=True)
#Calculate SMA values to create buy-sell signal
my_raw_data_df_xrpbtc['SMA21'] = my_raw_data_df_xrpbtc['Last'].rolling(21).mean()
my_raw_data_df_xrpbtc['SMA55'] = my_raw_data_df_xrpbtc['Last'].rolling(55).mean()
my_raw_data_df_xrpbtc['SMA200'] = my_raw_data_df_xrpbtc['Last'].rolling(200).mean()
#Check for each day if buy signal holds BUY if sell signal holds SELL
for idx,kitem in my_raw_data_df_xrpbtc.iterrows():
if (kitem['SMA21'] >= kitem['Last']) is True: #buy signal
print("buy0")
if my_portfolio['BTC'] > 0 is True:
print("buy1")
if (kitem['Last'] * my_portfolio['XRP']) >= (my_portfolio['BTC'] * 1.05) is True: #sell signal
print("sell0")
if my_portfolio['XRP'] > 0 is True:
print("sell1")
I know that there are lots of rows that holds true but my code never enters this path of code so it does not print out what I expect.
Could anyone please help/comment what might be wrong?
The reason is that your comparison is wrong. The result of kitem['SMA21'] >= kitem['Last'] will be a numpy.bool_. When you use is to compare it to True this will fail as it is not the same object.
If you change the comparison to == it will work as expected:
if (kitem['SMA21'] >= kitem['Last']) == True:

returning different time frames from datetime

I am parsing a file this way :
for d in csvReader:
print datetime.datetime.strptime(d["Date"]+"-"+d["Time"], "%d-%b-%Y-%H:%M:%S.%f").date()
date() returns : 2000-01-08, which is correct
time() returns : 06:20:00, which is also correct
How would I go about returning informations like "date+time" or "date+hours+minutes"
EDIT
Sorry I should have been more precise, here is what I am trying to achieve :
lmb = lambda d: datetime.datetime.strptime(d["Date"]+"-"+d["Time"], "%d-%b-%Y-%H:%M:%S.%f").date()
daily_quotes = {}
for k, g in itertools.groupby(csvReader, key = lmb):
lowBids = []
highBids = []
openBids = []
closeBids = []
for i in g:
lowBids.append(float(i["Low Bid"]))
highBids.append(float(i["High Bid"]))
openBids.append(float(i["Open Bid"]))
closeBids.append(float(i["Close Bid"]))
dayMin = min(lowBids)
dayMax = max(highBids)
open = openBids[0]
close = closeBids[-1]
daily_quotes[k.strftime("%Y-%m-%d")] = [dayMin,dayMax,open,close]
As you can see, right now I'm grouping values by day, I would like to group them by hour ( for which I would need date + hour ) or minutes ( date + hour + minute )
thanks in advance !
Don't use the date method of the datetime object you're getting from strptime. Instead, apply strftime directly to the return from strptime, which gets you access to all the member fields, including year, month, day, hour, minute, seconds, etc...
d = {"Date": "01-Jan-2000", "Time": "01:02:03.456"}
dt = datetime.datetime.strptime(d["Date"]+"-"+d["Time"], "%d-%b-%Y-%H:%M:%S.%f")
print dt.strftime("%Y-%m-%d-%H-%M-%S")

Categories

Resources