I am exploring azure management APIs. The ADF monitor pipeline, returns only 100 records at a time. So I created a while loop, but for some reason, not sure what, not able to get the next token.
ct = d.get('continuationToken','')
c = 1
while ct!='':
req_body = self.getDataBody(ct)
data = self.getResponse(data_url,data_headers,req_body)
nct = self.getContinuationToken(data,c)
c = c+1
print(c)
if ct == nct:
print(ct)
print(nct)
print('duplicate token')
break
ct = nct
if ct == '':
break
Here in the next iteration next token is not getting updated.
Update:
following the functions that the above code is using
def getDataBody(self,ct):
start_date = datetime.now().strftime("%Y-%m-%d")
end_date = (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d")
data_body = {'lastUpdatedAfter': start_date, 'lastUpdatedBefore': end_date}
if ct!='':
data_body['continuationToken'] = ct
return data_body
def getResponse(self,url,headers,body):
data = requests.post(url,headers=headers,data=body)
return data.text
def getContinuationToken(self,data,c):
d = json.loads(data)
with open(f'data/{c}.json','w') as f:
json.dump(d,f)
return d.get('continuationToken','')
you can try with increasing the timeout in the ADF activity may be due to the timeout setting in your current ADF activity is less than the actual time
taking to execute that API .
Related
Is it possible to use multi processing in Django on a request.
#so if I send a request to http://127.0.0.1:8000/wallet_verify
def wallet_verify(request):
walelts = botactive.objects.all()
#here I check if the user want to be included in the process or not so if they set it to True then i'll include them else ignore.
for active in walelts:
check_active = active.active
if check_active == True:
user_is_active = active.user
#for the ones that want to be included I then go to get their key data.
I need to get both api and secret so then I loop through to get the data from active users.
database = Bybitapidatas.objects.filter(user=user_is_active)
for apikey in database:
apikey = apikey.apikey
for apisecret in database:
apisecret = apisecret.apisecret
#since I am making a request to an exchange endpoint I can only include one API and secret at a time . So for 1 person at a time this is why I want to run in parallel.
for a, b in zip(list(Bybitapidatas.objects.filter(user=user_is_active).values("apikey")), list(Bybitapidatas.objects.filter(user=user_is_active).values("apisecret"))):
session =spot.HTTP(endpoint='https://api-testnet.bybit.com/', api_key=a['apikey'], api_secret=b['apisecret'])
#here I check to see if they have balance to open trades if they have selected to be included.
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
for i in GET_USDT_BALANCE:
if 'USDT' in i.values():
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
idx_USDT = GET_USDT_BALANCE.index(i)
GET_USDTBALANCE = session.get_wallet_balance()['result']['balances'][idx_USDT]['free']
print(round(float(GET_USDTBALANCE),2))
#if they don't have enough balance I skip the user.
if round(float(GET_USDTBALANCE),2) < 11 :
pass
else:
session.place_active_order(
symbol="BTCUSDT",
side="Buy",
type="MARKET",
qty=10,
timeInForce="GTC"
)
How can I run this process in parallel while looping through the database to also get data for each individual user.
I am still new to coding so hope I explained that it makes sense.
I have tried multiprocessing and pools but then I get that the app has not started yet and I have to run it outside of wallet_verify is there a way to do it in wallet_verify
and when I send the Post Request.
Any help appreciated.
Filtering the Database to get Users who have set it to True
Listi - [1,3](these are user ID's Returned
processess = botactive.objects.filter(active=True).values_list('user')
listi = [row[0] for row in processess]
Get the Users from the listi and perform the action.
def wallet_verify(listi):
# print(listi)
database = Bybitapidatas.objects.filter(user = listi)
print("---------------------------------------------------- START")
for apikey in database:
apikey = apikey.apikey
print(apikey)
for apisecret in database:
apisecret = apisecret.apisecret
print(apisecret)
start_time = time.time()
session =spot.HTTP(endpoint='https://api-testnet.bybit.com/', api_key=apikey, api_secret=apisecret)
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
for i in GET_USDT_BALANCE:
if 'USDT' in i.values():
GET_USDT_BALANCE = session.get_wallet_balance()['result']['balances']
idx_USDT = GET_USDT_BALANCE.index(i)
GET_USDTBALANCE = session.get_wallet_balance()['result']['balances'][idx_USDT]['free']
print(round(float(GET_USDTBALANCE),2))
if round(float(GET_USDTBALANCE),2) < 11 :
pass
else:
session.place_active_order(
symbol="BTCUSDT",
side="Buy",
type="MARKET",
qty=10,
timeInForce="GTC"
)
print ("My program took", time.time() - start_time, "to run")
print("---------------------------------------------------- END")
return HttpResponse("Wallets verified")
Verifyt is what I use for the multiprocessing since I don't want it to run without being requested to run. also initialiser starts apps for each loop
def verifyt(request):
with ProcessPoolExecutor(max_workers=4, initializer=django.setup) as executor:
results = executor.map(wallet_verify, listi)
return HttpResponse("done")
```
I try to use Google Calendar API
events_result = service.events().list(calendarId=calendarId,
timeMax=now,
alwaysIncludeEmail=True,
maxResults=100, singleEvents=True,
orderBy='startTime').execute()
Everything is ok, when I have permission to access the calendarId, but it will be errors if wrong when I don't have calendarId permission.
I build an autoload.py function with schedule python to load events every 10 mins, this function will be stopped if error come, and I have to use SSH terminal to restart autoload.py manually
So i want to know:
How can I get status_code, example, if it is 404, python will PASS
Answer:
You can use a try/except block within a loop to go through all your calendars, and skip over accesses which throw an error.
Code Example:
To get the error code, make sure to import json:
import json
and then you can get the error code out of the Exception:
calendarIds = ["calendar ID 1", "calendar ID 2", "calendar Id 3", "etc"]
for i in calendarIds:
try:
events_result = service.events().list(calendarId=i,
timeMax=now,
alwaysIncludeEmail=True,
maxResults=100, singleEvents=True,
orderBy='startTime').execute()
except Exception as e:
print(json.loads(e.content)['error']['code'])
continue
Further Reading:
Python Try Except - w3schools
Python For Loops - w3schools
Thanks to #Rafa Guillermo, I uploaded the full code to the autoload.py program, but I also wanted to know, how to get response json or status_code for request Google API.
The solution:
try:
code here
except Exception as e:
continue
import schedule
import time
from datetime import datetime
import dir
import sqlite3
from project.function import cmsCalendar as cal
db_file = str(dir.dir) + '/admin.sqlite'
def get_list_shop_from_db(db_file):
cur = sqlite3.connect(db_file).cursor()
query = cur.execute('SELECT * FROM Shop')
colname = [ d[0] for d in query.description ]
result_list = [ dict(zip(colname, r)) for r in query.fetchall() ]
cur.close()
cur.connection.close()
return result_list
def auto_load_google_database(list_shop, calendarError=False):
shopId = 0
for shop in list_shop:
try:
shopId = shopId+1
print("dang ghi vao shop", shopId)
service = cal.service_build()
shop_step_time_db = list_shop[shopId]['shop_step_time']
shop_duration_db = list_shop[shopId]['shop_duration']
slot_available = list_shop[shopId]['shop_slots']
slot_available = int(slot_available)
workers = list_shop[shopId]['shop_workers']
workers = int(workers)
calendarId = list_shop[shopId]['shop_calendarId']
if slot_available > workers:
a = workers
else:
a = slot_available
if shop_duration_db == None:
shop_duration_db = '30'
if shop_step_time_db == None:
shop_step_time_db = '15'
shop_duration = int(shop_duration_db)
shop_step_time = int(shop_step_time_db)
shop_start_time = list_shop[shopId]['shop_start_time']
shop_start_time = datetime.strptime(shop_start_time, "%H:%M:%S.%f").time()
shop_end_time = list_shop[shopId]['shop_end_time']
shop_end_time = datetime.strptime(shop_end_time, "%H:%M:%S.%f").time()
# nang luc moi khung gio lay ra tu file Json WorkShop.js
booking_status = cal.auto_load_listtimes(service, shopId, calendarId, shop_step_time, shop_duration, a,
shop_start_time,
shop_end_time)
except Exception as e:
continue
def main():
list_shop = get_list_shop_from_db(db_file)
auto_load_google_database(list_shop)
if __name__ == '__main__':
main()
schedule.every(5).minutes.do(main)
while True:
# Checks whether a scheduled task
# is pending to run or not
schedule.run_pending()
time.sleep(1)
I've built a small download manager to get data for the SHARADAR tables in Quandl. GIT
This is functioning well but the downloads are very slow for the larger files (up to 2 gb over 10 years).
I attempted to use asyncio but this didn't speed up the downloads. This may be because Quandl doesn't allow concurrent downloads. Am I making an error in my code, or is this restriction I will have to live with from Quandl?
import asyncio
import math
import time
import pandas as pd
import quandl
import update
def segment_dates(table, date_start, date_end):
# Determine the number of days per asyncio loop. Determined by the max size of the
# range of data divided by the size of the files in 100 mb chunks.
# reduce this number for smaller more frequent downloads.
total_days = 40
# Number of days per download should be:
sizer = math.ceil(total_days / update.sharadar_tables[table][2])
# Number of days between start and end.
date_diff = date_end - date_start
loop_count = int(math.ceil(date_diff.days / sizer))
sd = date_start
sync_li = []
for _ in range(loop_count):
ed = sd + pd.Timedelta(days=sizer)
if ed > date_end:
ed = date_end
sync_li.append((sd, ed,))
sd = ed + pd.Timedelta(days=1)
return sync_li
async def get_data(table, kwarg):
"""
Using the table name and kwargs retrieves the most current data.
:param table: Name of table to update.
:param kwarg: Dictionary containing the parameters to send to Quandl.
:return dataframe: Pandas dataframe containing latest data for the table.
"""
return quandl.get_table("SHARADAR/" + table.upper(), paginate=True, **kwarg)
async def main():
table = "SF1"
# Name of the column that has the date field for this particular table.
date_col = update.sharadar_tables[table][0]
date_start = pd.to_datetime("2020-03-15")
date_end = pd.to_datetime("2020-04-01")
apikey = "API Key"
quandl.ApiConfig.api_key = apikey
# Get a list containing the times start and end for loops.
times = segment_dates(table, date_start, date_end)
wait_li = []
for t in times:
kwarg = {date_col: {"gte": t[0].strftime("%Y-%m-%d"), "lte": t[1].strftime("%Y-%m-%d")}}
wait_li.append(loop.create_task(get_data(table, kwarg)))
await asyncio.wait(wait_li)
return wait_li
if __name__ == "__main__":
starter = time.time()
try:
loop = asyncio.get_event_loop()
res = loop.run_until_complete(main())
for r in res:
df = r.result()
print(df.shape)
print(df.head())
except:
raise ValueError("error")
finally:
# loop.close()
print("Finished in {}".format(time.time() - starter))
I'm extracting data using Spotify API wrapper. The access token (which is global variable) is valid for only 1 hour so I need to update it during the for loop in some defined function. I tried to update it using try/except, but I got the following error:
UnboundLocalError: local variable 'spotify' referenced before assignment.
Here is the relevant code:
token = credentials.get_access_token()
spotify = spotipy.Spotify(auth = token)
...
def main():
...
df_af = generate_audio_features_df(track_ids)
...
def generate_audio_features_df(track_ids):
col_list = ['id', 'danceability']
result = []
count = 0
for j in track_ids:
try:
r = spotify.audio_features(j)[0]
features_list = [r['id'], r['danceability']]
result.append(features_list)
#display progress
count += 1
print("Added ", count, " track")
except spotipy.client.SpotifyException:
token = credentials.get_access_token()
spotify = spotipy.Spotify(auth = token)
df = pd.DataFrame(data = result, columns = col_list)
return df
if __name__ == "__init__":
main()
I would like the code to update a token and get back to the loop.
Been trying to extract websocket information from Bitfinex websocket client service. Below is the code. The script works fine when I search for under 30 crypto pairs (ie. "p" or "PAIRS" has 30 elements) but if I try to go higher the script never gets to the "save_data" co-routine. Any ideas why this could be happening.
I modified the script from: "https://mmquant.net/replicating-orderbooks-from-websocket-stream-with-python-and-asyncio/", kudos to Mmquant for making the code available and giving an awesome script description.
import aiohttp
import asyncio
import ujson
from tabulate import tabulate
from copy import deepcopy
import pandas as pd
from openpyxl import load_workbook
import datetime
from datetime import datetime
import numpy as np
from collections import OrderedDict
from time import sleep
"""
Load the workbook to dump the API data as well as instruct it to not generate a new sheet.
The excel work book must:
1. Be of the type ".xlsx", only this because the load_workbook function was set to call a specific sheet with .xlsx format. This can be changed.
2. Must have the worksheets, "apidata" and "Test". This can also be adjusted below.
3. The excel workbooks name is "bitfinexws.xlsx". This can be changed below.
4. The excel spreadsheet is in the same folder as this script.
"""
book = load_workbook('bitfinexwsasync.xlsx') #.xlsx Excel spreadsheet that will be used for the placement and extracting of data.
apdat = book['Sheet1'] #Assign a variable to the sheet where the trade ratios will be put. This is case sensitive.
#The next 3 lines are critical to allow overwriting of data and not creating a new worksheet when using panda dataframes.
writer = pd.ExcelWriter('bitfinexwsasync.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
#Get a list of all the ratios and add the standard trade url: "https://api.bitfinex.com/v1/book/" before the ratios.
burl = 'https://api.bitfinex.com/v1/book/' #This is the standard url for retrieving trade ratios, the pair symbol must be added after this.
sym = pd.read_json('https://api.bitfinex.com/v1/symbols',orient='values') #This is a list of all the symbols on the Bitfinex website.
p=[]
p=[0]*len(sym)
for i in range(0,len(sym)):
p[i]=sym.loc[i,0]
p=tuple(p)
m=len(p) #Max number of trade ratios to extract for this script. Script cannot run the full set of 105 trade ratios, it will time-out.
p=p[0:m]
d=[]
e=[]
j=[]
"""
NOTE:
The script cannot run for the full 105 pairs, it timesout and becomes unresponsive.
By testig the stability it was found that calling 21 pairs per script at a refresh rate of 5seconds did not allow for any time-out problems.
"""
print('________________________________________________________________________________________________________')
print('')
print('Bitfinex Websocket Trading Orderbook Extraction - Asynchronous.')
print('There are a total of ', len(sym), ' trade ratios in this exchange.')
print('Only ',m,' trading pairs will be extracted by this script, namely:',p)
print('Process initiated at',datetime.now().strftime('%Y-%m-%d %H:%M:%S'),'.') #Tells me the date and time that the data extraction was intiated.
print('________________________________________________________________________________________________________')
print('')
# Pairs which generate orderbook for.
PAIRS = p
# If there is n pairs we need to subscribe to n websocket channels.
# This the subscription message template.
# For details about settings refer to https://bitfinex.readme.io/v2/reference#ws-public-order-books.
SUB_MESG = {
'event': 'subscribe',
'channel': 'book',
'freq': 'F0', #Adjust for real time
'len': '25',
'prec': 'P0'
# 'pair': <pair>
}
def build_book(res, pair):
""" Updates orderbook.
:param res: Orderbook update message.
:param pair: Updated pair.
"""
global orderbooks
# Filter out subscription status messages.
if res.data[0] == '[':
# String to json
data = ujson.loads(res.data)[1]
# Build orderbook
# Observe the structure of orderbook. The prices are keys for corresponding count and amount.
# Structuring data in this way significantly simplifies orderbook updates.
if len(data) > 10:
bids = {
str(level[0]): [str(level[1]), str(level[2])]
for level in data if level[2] > 0
}
asks = {
str(level[0]): [str(level[1]), str(level[2])[1:]]
for level in data if level[2] < 0
}
orderbooks[pair]['bids'] = bids
orderbooks[pair]['asks'] = asks
# Update orderbook and filter out heartbeat messages.
elif data[0] != 'h':
# Example update message structure [1765.2, 0, 1] where we have [price, count, amount].
# Update algorithm pseudocode from Bitfinex documentation:
# 1. - When count > 0 then you have to add or update the price level.
# 1.1- If amount > 0 then add/update bids.
# 1.2- If amount < 0 then add/update asks.
# 2. - When count = 0 then you have to delete the price level.
# 2.1- If amount = 1 then remove from bids
# 2.2- If amount = -1 then remove from asks
data = [str(data[0]), str(data[1]), str(data[2])]
if int(data[1]) > 0: # 1.
if float(data[2]) > 0: # 1.1
orderbooks[pair]['bids'].update({data[0]: [data[1], data[2]]})
elif float(data[2]) < 0: # 1.2
orderbooks[pair]['asks'].update({data[0]: [data[1], str(data[2])[1:]]})
elif data[1] == '0': # 2.
if data[2] == '1': # 2.1
if orderbooks[pair]['bids'].get(data[0]):
del orderbooks[pair]['bids'][data[0]]
elif data[2] == '-1': # 2.2
if orderbooks[pair]['asks'].get(data[0]):
del orderbooks[pair]['asks'][data[0]]
async def save_data():
""" Save the data to the excel spreadsheet specified """
#NOTE, Adjusted this for every 5 seconds, ie "await asyncio.sleep(10)" to "await asyncio.sleep(5)"
global orderbooks
while 1:
d=[]
e=[]
j=[]
await asyncio.sleep(5)
for pair in PAIRS:
bids2 = [[v[1], v[0], k] for k, v in orderbooks[pair]['bids'].items()]
asks2 = [[k, v[0], v[1]] for k, v in orderbooks[pair]['asks'].items()]
bids2.sort(key=lambda x: float(x[2]), reverse=True)
asks2.sort(key=lambda x: float(x[0]))
table2 = [[*bid, *ask] for (bid, ask) in zip(bids2, asks2)]
d.extend(table2)
e.extend([0]*len(table2))
e[len(e)-len(table2)]=pair
j.extend([0]*len(d))
j[0]=datetime.now().strftime('%Y-%m-%d %H:%M:%S')
s = pd.DataFrame(d, columns=['bid:amount', 'bid:count', 'bid:price', 'ask:price', 'ask:count', 'ask:amount'])
r = pd.DataFrame(e, columns=['Trade pair'])
u = pd.DataFrame(j, columns=['Last updated'])
z = pd.concat([s, r, u], axis=1, join_axes=[s.index])
z.to_excel(writer, 'Sheet1', startrow=0, startcol=0, index=False)
writer.save()
print('Update completed at',datetime.now().strftime('%Y-%m-%d %H:%M:%S'),'.')
async def get_book(pair, session):
""" Subscribes for orderbook updates and fetches updates. """
#print('enter get_book, pair: {}'.format(pair))
pair_dict = deepcopy(SUB_MESG) #Allows for changes to a made within a variable.
pair_dict.update({'pair': pair}) #Updates the dictionary SUB_MESG with the new pair to be evaluated. Will be added to the end of the dictionary.
async with session.ws_connect('wss://api.bitfinex.com/ws/2') as ws:
asyncio.ensure_future(ws.send_json(pair_dict)) #This was added and replaced "ws.send_json(pair_dict)" as Ubuntu python required a link to asyncio for this function.
while 1: #Loops infinitely.
res = await ws.receive()
print(pair_dict['pair'], res.data) # debug
build_book(res, pair)
async def main():
""" Driver coroutine. """
async with aiohttp.ClientSession() as session:
coros = [get_book(pair, session) for pair in PAIRS]
# Append coroutine for printing orderbook snapshots every 10s.
coros.append(save_data())
await asyncio.wait(coros)
orderbooks = {
pair: {}
for pair in PAIRS
}
loop = asyncio.get_event_loop()
loop.run_until_complete(main())