This is a side project I am doing as I am attempting to learn Python.
I am trying to write a python script that will iterate through a date range and use each date that is returned in a GET request URL.
The URL uses a LastModified parameter and limits GET requests to a 24 hour period so I would like to run the GET request for each day from the start date.
Below is what I have currently, the major issue I am having is how to separate the returned dates in a way that I can use each date separately for the GET, the GET will also need to be looped to use each date I suppose.
Any pointer in the right direction would be helpful as I am trying to learn as much as possible.
start_date = datetime.date(2020, 1, 1)
end_date = datetime.date.today()
delta = datetime.timedelta(days=1)
while start_date <= end_date:
last_mod = start_date + delta
print(last_mod)
start_date += delta
import requests
from requests.auth import HTTPBasicAuth
vend_key = 'REDACTED'
user_key = 'REDACTED'
metrc_license = 'A12-0000015-LIC'
base_url = 'https://sandbox-api-ca.metrc.com'
last_mod_date = ''
a = HTTPBasicAuth(vend_key, user_key)
def get(path):
url = '{}/{}/?licenseNumber={}&lastModifiedStart={}'.format(base_url, path, metrc_license, last_mod_date, )
print('URL:', url)
r = requests.get(url, auth=a)
print("The server response is: ", r.status_code)
if r.status_code == 200:
return r.json()
# Would like an elif that is r.status_code is 500 wait _ seconds and try again
elif r.status_code == 500:
print("500 error, try again.")
else:
print("Error")
print((get('/packages/v1/active')))
Here is an example return from the current script, I do not need it to return each date so I can remove the print, but how can I make each loop from the date be its own variable to use in a loop of the GET?
2020-01-02
2020-01-03
2020-01-04
2020-01-05
2020-01-06
etc...
etc...
etc...
2020-05-24
2020-05-25
2020-05-26
2020-05-27
URL: https://sandbox-api-ca.metrc.com//packages/v1/active/?licenseNumber=A12-0000015-LIC&lastModifiedStart=2020-05-27
The server response is: 200
[]
It's super simple, you need to use use the while loop that generates all these dates into your get() function. Here is what I mean:
import requests
from requests.auth import HTTPBasicAuth
vend_key = 'REDACTED'
user_key = 'REDACTED'
metrc_license = 'A12-0000015-LIC'
base_url = 'https://sandbox-api-ca.metrc.com'
a = HTTPBasicAuth(vend_key, user_key)
def get(path):
start_date = datetime.date(2020, 1, 1)
end_date = datetime.date.today()
delta = datetime.timedelta(days=1)
while start_date <= end_date:
last_mod_date = start_date + delta
print(last_mod_date)
start_date += delta
url = '{}/{}/?licenseNumber={}&lastModifiedStart={}'.format(base_url, path, metrc_license, last_mod_date, )
print('URL:', url)
r = requests.get(url, auth=a)
print("The server response is: ", r.status_code)
if r.status_code == 200:
return r.json()
# Would like an elif that is r.status_code is 500 wait _ seconds and try again
elif r.status_code == 500:
print("500 error, try again.")
else:
print("Error")
print((get('/packages/v1/active')))
One thing you could do is call your get function inside the while loop. First modify the get function to take a new parameter date and then use this parameter when you build your url.
For instance:
def get(path, date):
url = '{}/{}/?licenseNumber={}&lastModifiedStart={}'.format(base_url, path, metrc_license, date, )
...
And then call get inside the while loop.
while start_date <= end_date:
last_mod = start_date + delta
get(some_path, last_mod)
start_date += delta
This would make a lot of GET requests in a short period of time, so you might want to be careful not to overload the server with requests.
Related
This is my assignment:
You need to write a Python code that will read the current price of the XRP/USDT futures on the Binance exchange in real time (as fast as possible). If the price falls by 1% from the maximum price in the last hour, the program should print a message to the console, and the program should continue to work, constantly reading the current price.
I learned how to receive data, but how can I go further?
import requests
import json
import pandas as pd
import datetime
base = 'https://testnet.binancefuture.com'
path = '/fapi/v1/klines'
url = base + path
param = {'symbol': 'XRPUSDT', 'interval': '1h', 'limit': 10}
r = requests.get(url, params = param)
if r.status_code == 200:
data = pd.DataFrame(r.json())
print(data)
else:
print('Error')
You can try this, I've defined a function for price check and the rest is the main operation
def price_check(df):
max_value = max(df['Price']) #max price within 1 hour
min_value = min(df['Price']) #min price within 1 hour
if min_value/max_value < 0.99: #1% threshold
print("Alert")
while True: # you can adjust the check frequency by sleep() function
response = requests.get(url)
if response.status_code==200:
data = pd.Dataframe(response.json())
price_check(data)
import requests
import time
def get_price():
url = "https://api.binance.com/api/v3/ticker/price?symbol=XRPUSDT"
response = requests.get(url)
return float(response.json()["price"])
def check_price_drop(price, highest_price):
if price / highest_price < 0.99:
print("Price dropped by 1%!")
highest_price = 0
while True:
price = get_price()
if price > highest_price:
highest_price = price
check_price_drop(price, highest_price)
time.sleep(10)
i wrote a script to get historical data from the public trades endpoint of the Kraken API, code as follows:
import pandas as pd
import json
import time
import urllib.request
def get_data(pair, since, until):
global data
global query
global json_response
global api_data
data_columns= ["price", "volume", "time", "buy/sell", "market/limit", "miscellaneous"]
data = pd.DataFrame(columns= data_columns)
api_start = since
app_start_time = time.time()
counter = 1
while api_start < until:
last_time = time.time()
api_domain = "https://api.kraken.com/0/public/Trades" + \
"?pair=%(pair)s&since=%(since)s" % {"pair":pair, "since": api_start}
api_request = urllib.request.Request(api_domain)
try:
api_data = urllib.request.urlopen(api_request).read()
except Exception:
time.sleep(3)
api_data = json.loads(api_data)
if len(api_data["error"]) != 0:
print(api_data["error"])
time.sleep(3)
continue
query = pd.DataFrame(api_data["result"][pair], columns = data_columns)
data = data.append(query, ignore_index= True)
api_start = int(api_data["result"]["last"][:10])
counter +=1
time.sleep(1)
print("Request number: %s" %counter)
print("Time since start: %s minutes" % round((time.time() - app_start_time)/60,2))
print("Time since last request: %s seconds" % round((time.time() - last_time),2))
print("last: %s" %api_start)
print("")
get_data("XXBTZUSD", 1414761200, 1455761200)
After some successful responses, i get flawed responses, looking like this:
As you can see, at some point, the UNIX time stamp simply jumps from 142894080.33775 to 1654992002.801943 and thus resulting in wrong data.
Is that a problem with my code or with the API?
Thanks in advance.
Taking the liberty to simplify your code I cannot confirm your observation. I get proper timestamps.
Try this:
import requests
def get_data(pair, since):
url = f"https://api.kraken.com/0/public/Trades?pair={pair}&since={since}"
api_data = requests.get(url)
api_data = json.loads(api_data.content)
return api_data
results = get_data("XBTUSD", 1414761200)
columns= ["price", "volume", "time", "buy/sell", "market/limit", "miscellaneous"]
df = pd.DataFrame(results["result"]["XXBTZUSD"], columns=columns)
df.time = df.time.astype(int)
df.head()
Print out:
price volume time buy/sell market/limit miscellaneous
0 340.09209 0.02722956 1414815678 s m
1 340.15346 0.21604000 1414820304 s m
2 340.00000 0.03395999 1414820304 s m
3 340.00001 0.01000000 1414821818 s l
4 340.00000 0.25668009 1414821818 s l
Edit:
Using pagination I can confirm the jump in timestamps. The problem very likely lies with the API.
def get_data(pair, since):
url = f"https://api.kraken.com/0/public/Trades?pair={pair}&since={since}"
api_data = requests.get(url)
api_data = json.loads(api_data.content)
return api_data
start_ts = 1414761200
frames = []
for _ in range(30):
print(start_ts)
print(datetime.fromtimestamp(int(start_ts)))
tmp = get_data("XBTUSD", start_ts)
start_ts = tmp["result"]["last"][:10]
frames.append(pd.DataFrame(results["result"]["XXBTZUSD"]))
time.sleep(2)
Print out after a couple of iterations:
1438313128
2015-07-31 05:25:28
1653648031
2022-05-27 12:40:31
I am trying to download multiple netcdf4 files from GES DISC, but I seem to be having trouble with the Authorization.
'fpath' is location of the netcdf4 file. If I was to paste into address bar, a pop box will appear for 'https://urs.earthdata.nasa.gov' requiring username and password. If entered successfully, the file would download. However using 'fpath' in request.get() does not work.
request.get() successfully connects if I use 'https://urs.earthdata.nasa.gov' instead of fpath, but then I cannot download the netcdf4 file.
I've tried solution mentioned here but no luck.
Any help be appreciated
Code example below
import requests
from requests.auth import HTTPBasicAuth
from datetime import timedelta, date
def daterange(start_date, end_date):
for n in range(int((end_date - start_date).days)):
yield start_date + timedelta(n)
start_date = date(2016, 1, 1)
end_date = date(2016, 1, 2)
for single_date in daterange(start_date, end_date):
YYYY = single_date.strftime("%Y")
MM = single_date.strftime("%m")
DD = single_date.strftime("%d")
fpath1 = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I1NXASM.5.12.4/' + YYYY + '/' + MM + '/'
fpath2 = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc?'
fpath3 = 'U2M[0:23][94:160][469:534],TROPT[0:23][94:160][469:534],TROPPB[0:23][94:160][469:534],' \
'T2M[0:23][94:160][469:534],TQL[0:23][94:160][469:534],TOX[0:23][94:160][469:534],' \
'PS[0:23][94:160][469:534],V50M[0:23][94:160][469:534],DISPH[0:23][94:160][469:534],' \
'TO3[0:23][94:160][469:534],TS[0:23][94:160][469:534],T10M[0:23][94:160][469:534],' \
'TROPPT[0:23][94:160][469:534],TQI[0:23][94:160][469:534],SLP[0:23][94:160][469:534],' \
'TQV[0:23][94:160][469:534],V2M[0:23][94:160][469:534],TROPQ[0:23][94:160][469:534],' \
'V10M[0:23][94:160][469:534],U50M[0:23][94:160][469:534],U10M[0:23][94:160][469:534],' \
'QV2M[0:23][94:160][469:534],TROPPV[0:23][94:160][469:534],' \
'QV10M[0:23][94:160][469:534],time,lat[94:160],lon[469:534]'
fpath = fpath1 + fpath2 + fpath3
print(fpath)
# This successfully connects
# response = requests.get('https://urs.earthdata.nasa.gov', auth=HTTPBasicAuth('username', 'password'))
# print(response)
# This one does not
response = requests.get(fpath, auth=HTTPBasicAuth('username', 'password'))
print(response)
Note - anyone can create a free account to access this data by going to this website
Thank you #Stovfl for pointing me in the right direction.
Guidance led me to This website which contained information on how to set up a session for earthdata
the updated complete code is below
import requests
from datetime import timedelta, date
def daterange(start_date, end_date):
for n in range(int((end_date - start_date).days)):
yield start_date + timedelta(n)
start_date = date(2016, 1, 1)
end_date = date(2019, 7, 31)
# ***********************
# overriding requests.Session.rebuild_auth to maintain headers when redirected
# ***********************
class SessionWithHeaderRedirection(requests.Session):
AUTH_HOST = 'urs.earthdata.nasa.gov'
def __init__(self, username, password):
super().__init__()
self.auth = (username, password)
# Overrides from the library to keep headers when redirected to or from the NASA auth host.
def rebuild_auth(self, prepared_request, response):
headers = prepared_request.headers
url = prepared_request.url
if 'Authorization' in headers:
original_parsed = requests.utils.urlparse(response.request.url)
redirect_parsed = requests.utils.urlparse(url)
if (original_parsed.hostname != redirect_parsed.hostname) and \
redirect_parsed.hostname != self.AUTH_HOST and \
original_parsed.hostname != self.AUTH_HOST:
del headers['Authorization']
return
# create session with the user credentials that will be used to authenticate access to the data
username = "USERNAME"
password = "PASSWORD"
session = SessionWithHeaderRedirection(username, password)
# ***********************
# Loop through Files
# ***********************
for single_date in daterange(start_date, end_date):
YYYY = single_date.strftime("%Y")
MM = single_date.strftime("%m")
DD = single_date.strftime("%d")
fpath1 = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I1NXASM.5.12.4/' + YYYY + '/' + MM + '/'
fpath2 = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc?'
fpath3 = 'U2M[0:23][94:160][469:534],TROPT[0:23][94:160][469:534],TROPPB[0:23][94:160][469:534],' \
'T2M[0:23][94:160][469:534],TQL[0:23][94:160][469:534],TOX[0:23][94:160][469:534],' \
'PS[0:23][94:160][469:534],V50M[0:23][94:160][469:534],DISPH[0:23][94:160][469:534],' \
'TO3[0:23][94:160][469:534],TS[0:23][94:160][469:534],T10M[0:23][94:160][469:534],' \
'TROPPT[0:23][94:160][469:534],TQI[0:23][94:160][469:534],SLP[0:23][94:160][469:534],' \
'TQV[0:23][94:160][469:534],V2M[0:23][94:160][469:534],TROPQ[0:23][94:160][469:534],' \
'V10M[0:23][94:160][469:534],U50M[0:23][94:160][469:534],U10M[0:23][94:160][469:534],' \
'QV2M[0:23][94:160][469:534],TROPPV[0:23][94:160][469:534],' \
'QV10M[0:23][94:160][469:534],time,lat[94:160],lon[469:534]'
url = fpath1 + fpath2 + fpath3
# print(url)
# extract the filename from the url to be used when saving the file
filename = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc'
print(filename)
try:
# submit the request using the session
response = session.get(url, stream=True)
print(response.status_code)
# raise an exception in case of http errors
response.raise_for_status()
# save the file
with open(filename, 'wb') as fd:
for chunk in response.iter_content(chunk_size=1024 * 1024):
fd.write(chunk)
except requests.exceptions.HTTPError as e:
# handle any errors here
print(e)
To Repeatedly call a web URL which has time stamp in the end,
Example URL
'https://mywebApi/StartTime=2019-05-01%2000:00:00&&endTime=2019-05-01%2003:59:59'
StartTime=2019-05-01%2000:00:00
is URL representation of Time 2019-05-01 00:00:00
endTime=2019-05-01%2003:59:59
is URL representation of Time 2019-05-01 00:00:00
Requirement is to make repetitive calls , with 4 hour window.
While adding 4 hours, the date may change,
Is there a lean way to generate the URL String,
Some thing like
baseUrl = 'https://mywebApi/StartTime='
startTime = DateTime(2018-05-03 00:01:00)
terminationTime = DateTime(2019-05-03 00:05:00)
while (startTime < terminationTime):
endTime = startTime + hours(4)
url = baseUrl+str(startTime)+"endtime="+str(startTime)
# request get url
startTime = startTime + hours(1)
You can use Datetime.timedelta as well as the strftime function as follows:
from datetime import datetime, timedelta
baseUrl = 'https://mywebApi/StartTime='
startTime = datetime(year=2018, month=5, day=3, hour=0, minute=1, second=0)
terminationTime = datetime(year=2018, month=5, day=3, hour=3, minute=59, second=59)
while (startTime < terminationTime):
endTime = startTime + timedelta(hours=4)
url = baseUrl + startTime.strftime("%Y-%m-%d%20%H:%M:%S") + "endtime=" + endtime.strftime("%Y-%m-%d%20%H:%M:%S")
# request get url
startTime = endTime
The following link is useful https://www.guru99.com/date-time-and-datetime-classes-in-python.html or you can look at the official datetime documentation.
edit: using what u/John Gordan said to declare the initial dates
A webscraper written in Python extracts waterleveldata. One read per hour.
When written to a .txt-file using the code below each line is appended with datetime, thus each line takes up something like 20 characters.
Example: "01/01-2010 11:10,-32"
Using the code below results in a file containing data from 01/01-2010 00:10 to 28/02-2010 23:50 which equals something like 60 days. 60 days, with a reading per hour results in 1440 lines and approx. 30000 characters. Microsoft word, however, tell me the file contains 830000 characters on 42210 lines, which fits very well with an observed filesize of 893 kB.
Apparently some lines and characters are hidden somewhere. I cant seem to find them anywhere.
import requests
import time
totaldata =[]
filnavn='Vandstandsdata_Esbjerg_KDI_TEST_recode.txt'
file = open(filnavn,'w')
file.write("")
file.close()
from datetime import timedelta, date
from bs4 import BeautifulSoup
def daterange(start_date, end_date):
for n in range(int ((end_date - start_date).days)):
yield start_date + timedelta(n)
start_date = date(2010, 1, 1)
end_date = date(2010, 3, 1)
values=[]
datoer=[]
for single_date in daterange(start_date, end_date):
valuesTemp=[]
datoerTemp=[]
dato = single_date.strftime("%d-%m-%y")
url = "http://kysterne.kyst.dk/pages/10852/waves/showData.asp?targetDay="+dato+"&ident=6401&subGroupGuid=16410"
page = requests.get(url)
if page.status_code == 200:
soup = BeautifulSoup(page.content, 'html.parser')
dataliste = list(soup.find_all(class_="dataTable"))
#dataliste =list(dataliste.find_all('td'))
#dataliste =dataliste[0].getText()
print(url)
dataliste = str(dataliste)
dataliste = dataliste.splitlines()
dataliste = dataliste[6:] #18
#print(dataliste[0])
#print(dataliste[1])
for e in range (0,len(dataliste),4): #4
#print(dataliste[e])
datoerTemp.append(dataliste[e])
#print(" -------- \n")
for e in range (1,len(dataliste),4): #4
valuesTemp.append(dataliste[e])
for e in valuesTemp:
#print (e)
e=e[4:]
e=e[:-5]
values.append(e)
for e in datoerTemp:
#print (e)
e=e[4:]
e=e[:-5]
datoer.append(e)
file = open(filnavn,'a')
for i in range(0,len(datoer),6):
file.write(datoer[i]+","+values[i]+"\n")
print("- skrevet til fil\n")
file.close()
print("done")
Ah, heureka.
Seconds before posting this question I realized I forgot to reset the list.
I added:
datoer=[]
everything now works as intended.
The old code would write data from a given day and all data of all previous days, for each loop in the code.
I hope someone can use this newbie-experience.