i wrote a script to get historical data from the public trades endpoint of the Kraken API, code as follows:
import pandas as pd
import json
import time
import urllib.request
def get_data(pair, since, until):
global data
global query
global json_response
global api_data
data_columns= ["price", "volume", "time", "buy/sell", "market/limit", "miscellaneous"]
data = pd.DataFrame(columns= data_columns)
api_start = since
app_start_time = time.time()
counter = 1
while api_start < until:
last_time = time.time()
api_domain = "https://api.kraken.com/0/public/Trades" + \
"?pair=%(pair)s&since=%(since)s" % {"pair":pair, "since": api_start}
api_request = urllib.request.Request(api_domain)
try:
api_data = urllib.request.urlopen(api_request).read()
except Exception:
time.sleep(3)
api_data = json.loads(api_data)
if len(api_data["error"]) != 0:
print(api_data["error"])
time.sleep(3)
continue
query = pd.DataFrame(api_data["result"][pair], columns = data_columns)
data = data.append(query, ignore_index= True)
api_start = int(api_data["result"]["last"][:10])
counter +=1
time.sleep(1)
print("Request number: %s" %counter)
print("Time since start: %s minutes" % round((time.time() - app_start_time)/60,2))
print("Time since last request: %s seconds" % round((time.time() - last_time),2))
print("last: %s" %api_start)
print("")
get_data("XXBTZUSD", 1414761200, 1455761200)
After some successful responses, i get flawed responses, looking like this:
As you can see, at some point, the UNIX time stamp simply jumps from 142894080.33775 to 1654992002.801943 and thus resulting in wrong data.
Is that a problem with my code or with the API?
Thanks in advance.
Taking the liberty to simplify your code I cannot confirm your observation. I get proper timestamps.
Try this:
import requests
def get_data(pair, since):
url = f"https://api.kraken.com/0/public/Trades?pair={pair}&since={since}"
api_data = requests.get(url)
api_data = json.loads(api_data.content)
return api_data
results = get_data("XBTUSD", 1414761200)
columns= ["price", "volume", "time", "buy/sell", "market/limit", "miscellaneous"]
df = pd.DataFrame(results["result"]["XXBTZUSD"], columns=columns)
df.time = df.time.astype(int)
df.head()
Print out:
price volume time buy/sell market/limit miscellaneous
0 340.09209 0.02722956 1414815678 s m
1 340.15346 0.21604000 1414820304 s m
2 340.00000 0.03395999 1414820304 s m
3 340.00001 0.01000000 1414821818 s l
4 340.00000 0.25668009 1414821818 s l
Edit:
Using pagination I can confirm the jump in timestamps. The problem very likely lies with the API.
def get_data(pair, since):
url = f"https://api.kraken.com/0/public/Trades?pair={pair}&since={since}"
api_data = requests.get(url)
api_data = json.loads(api_data.content)
return api_data
start_ts = 1414761200
frames = []
for _ in range(30):
print(start_ts)
print(datetime.fromtimestamp(int(start_ts)))
tmp = get_data("XBTUSD", start_ts)
start_ts = tmp["result"]["last"][:10]
frames.append(pd.DataFrame(results["result"]["XXBTZUSD"]))
time.sleep(2)
Print out after a couple of iterations:
1438313128
2015-07-31 05:25:28
1653648031
2022-05-27 12:40:31
Related
I have created a python trading bot but I cant get the code to execute a trade. Just fyi I am not an experienced coder, and have only worked with computational numerical solutions. I have never used API and websockets, idk if that's the right term.
Anyway, I can confirm that all trade conditions execute fine, but the following error is print when the execute_trade() function is run:
Error executing trade: {'code': -1022, 'msg': 'Signature for this request is not valid.'}
import talib
import pandas as pd
import time
import requests
import json
import logging
import hmac
from datetime import datetime
import hashlib
from hashlib import sha256
import API
# Get current time
now = datetime.now()
# Create a logger
logging.basicConfig(filename='trading_bot.log', level=logging.INFO)
# Binance API endpoint for accessing historical data
api_endpoint = "https://api.binance.com/api/v3/klines"
# Binance API endpoint for placing trades
api_trade = "https://api.binance.com/api/v3/order"
# Binance API endpoint for checking account balance
api_balance = "https://api.binance.com/api/v3/account"
# Binance API keys
api_key = API.API_KEY
api_secret = API.API_SECRET
# Interval of historical data (1 minute)
interval = "1m"
# Currency pair
symbol = "DOGEBUSD"
# Risk management
stop_loss = 0.5 # stop loss in percentage
# Amount of currency to trade
amount = 10
# Timeframe for calculating indicators
timeframe = 30
# Parameters for Bollinger Bands
bb_window = 20
bb_deviation = 2
def hashing(query_string, secret):
return hmac.new(
secret.encode("utf-8"), query_string.encode("utf-8"), hashlib.sha256
).hexdigest()
# Function to retrieve historical data from Binance
def fetch_binance_data():
try:
params = {"symbol": symbol, "interval": interval}
data = requests.get(api_endpoint, params=params).json()
df = pd.DataFrame(data, columns=["timestamp", "open", "high", "low", "close", "volume", "close_time", "quote_asset_volume", "number_of_trades", "taker_buy_base_asset_volume", "taker_buy_quote_assure", "ignore"])
df["timestamp"] = pd.to_datetime(df["timestamp"], unit='ms')
df.set_index("timestamp", inplace=True)
df = df.astype(float)
except Exception as e:
logging.error("Error fetching data from Binance: %s", e)
df = pd.DataFrame()
return df
# Function to check account balance
def check_balance():
try:
params = {"timestamp": int(time.time()*1000)}
query_string = '&'.join([f'{k}={v}' for k, v in params.items()])
signature = hmac.new(api_secret.encode(), query_string.encode(), sha256).hexdigest()
params["signature"] = signature
headers = {"X-MBX-APIKEY": api_key}
response = requests.get(api_balance, params=params, headers=headers)
if response.status_code == 200:
balance = response.json()
print("Your current balance is: ", balance)
return balance
else:
print("Error getting balance information.")
return {}
except Exception as e:
logging.error("Error getting balance information: %s", e)
return {}
# Function to calculate indicators
def calculate_indicators(df):
try:
# Calculate Bollinger Bands
df["bb_upper"], df["bb_middle"], df["bb_lower"] = talib.BBANDS(df["close"], timeperiod=bb_window, nbdevup=bb_deviation, nbdevdn=bb_deviation)
# Calculate RSI
df["rsi"] = talib.RSI(df["close"], timeperiod=timeframe)
except Exception as e:
logging.error("Error calculating indicators: %s", e)
return df
# Global variable to store the buy price
buy_price = 0
# Function to execute trade
def execute_trade(side):
try:
params = {"symbol": symbol, "side": side, "type": "MARKET", "quantity": amount}
query_string = '&'.join([f'{k}={v}' for k, v in params.items()])
timestamp = int(time.time()*1000)
query_string += f"×tamp={timestamp}"
signature = hmac.new(api_secret.encode(), query_string.encode(), hashlib.sha256).hexdigest()
headers = {"X-MBX-APIKEY": api_key, "X-MBX-SIGNATURE": signature}
response = requests.post(api_trade, params=params, headers=headers)
if response.status_code != 200:
logging.error("Error executing trade: %s", response.json())
print("Error executing trade: ", response.json())
else:
logging.info("Trade executed successfully: %s", response.json())
price = data['close'].iloc[-1]
print("Trade executed: ", side, " at price ", price, f"at {current_time}")
except Exception as e:
logging.error("Error executing trade: %s", e)
trade_executed = False
while True:
# Retrieve historical data from Binance
# Get current time
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
data = fetch_binance_data()
# Check for enough data
if len(data) < timeframe:
print("Data insufficient")
continue
# Calculate indicators
# Indicator 1: RSI
data['RSI'] = talib.RSI(data['close'], timeperiod=14)
# Indicator 2: Bollinger Bands
data['upper'], data['middle'], data['lower'] = talib.BBANDS(data['close'], timeperiod=bb_window, nbdevup=bb_deviation, nbdevdn=bb_deviation, matype=talib.MA_Type.EMA)
price = data['close'].iloc[-1]
# Check for buy conditions
if (data['low'].iloc[-1] < data['middle'].iloc[-1] and data['RSI'].iloc[-1] < 50):
# Execute buy trade
if not trade_executed:
execute_trade("BUY")
trade_executed = True
print(f"Buy conditions triggered at {current_time} at {price}")
# Check for sell conditions
if (data['high'].iloc[-1] > data['middle'].iloc[-1] and data['RSI'].iloc[-1] > 60):
# Execute sell trade
if not trade_executed:
execute_trade("SELL")
trade_executed = True
print(f"Sell conditions triggered at {current_time} at {price}")
trade_executed = False
time.sleep(5)
# Main function
def main():
while True:
# Fetch historical data
df = fetch_binance_data()
# Check if dataframe is not empty
if not df.empty:
# Calculate indicators
df = calculate_indicators(df)
# Check trade
time.sleep(20)
if __name__ == "__main__":
main()
You should use a python-binance Python wrapper for the Binance exchange REST API v3 instead coding it by yourself. According to the wrapper your code seems to be right (maybe your default encoding is not utf-8).
# https://github.com/sammchardy/python-binance/blob/59e3c8045fa13ca0ba038d4a2e0481212b3b665f/binance/client.py#L219
def _generate_signature(self, data: Dict) -> str:
assert self.API_SECRET, "API Secret required for private endpoints"
ordered_data = self._order_params(data)
query_string = '&'.join([f"{d[0]}={d[1]}" for d in ordered_data])
m = hmac.new(self.API_SECRET.encode('utf-8'), query_string.encode('utf-8'), hashlib.sha256)
return m.hexdigest()
I'm currently trying to put two things together when checking multiple websites from my input CSV file:
Check HTTP status
Check if Website displays specific keyword
then save the results to a new CSV file.
My input.csv:
id url
1 https://example123.com
2 https://envato.com/blog/30-outstanding-coming-soon-and-under-construction-website-templates/
3 https://mundoshoponline.com
My Code:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import asyncio
import re
from concurrent.futures import ProcessPoolExecutor, as_completed
df = pd.read_csv('path/to/my/input.csv')
#my csv has urls in the 1st column
urls = df.T.values.tolist()[1]
results = {}
status = []
async def scrape(url):
try:
r = requests.get(url, timeout=(3, 6))
r.raise_for_status()
soup = BeautifulSoup(r.content, 'html.parser')
#all keywords to check on the website
data = {
"coming soon": soup.body.findAll(text = re.compile("coming soon", re.I)),
"Opening Soon": soup.body.findAll(text = re.compile("Opening Soon", re.I)),
"Forbidden": soup.body.findAll(text = re.compile("Forbidden", re.I)),
"Page not found": soup.body.findAll(text = re.compile("Page not found", re.I)),
"Under Construction": soup.body.findAll(text = re.compile("Under Construction", re.I)),
"Currently Unavailable": soup.body.findAll(text = re.compile("Currently Unavailable", re.I))}
results[url] = data
#check for http status and save to status list
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
status.append("Down")
except requests.exceptions.HTTPError:
status.append("Other")
else:
status.append("OK")
async def main():
await asyncio.wait([scrape(url) for url in urls])
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
comingList= []
openingList = []
forbiddenList= []
notfoundList = []
underList = []
currentlyList = []
#mark x if there are any hits for specific keyword
for url in results:
comingList.append("x" if len(results[url]["coming soon"]) > 0 else "")
openingList.append("x" if len(results[url]["Opening Soon"]) > 0 else "")
forbiddenList.append("x" if len(results[url]["Forbidden"]) > 0 else "")
notfoundList.append("x" if len(results[url]["Page not found"]) > 0 else "")
underList.append("x" if len(results[url]["Under Construction"]) > 0 else "")
currentlyList.append("x" if len(results[url]["Currently Unavailable"]) > 0 else "")
df["comingSoon"] = pd.DataFrame(comingList, columns=['comingSoon'])
df["openingSoon"] = pd.DataFrame(openingList, columns=['openingSoon'])
df["forbidden"] = pd.DataFrame(forbiddenList, columns=['forbidden'])
df["notfound2"] = pd.DataFrame(notfoundList, columns=['notfound2'])
df["underConstruction"] = pd.DataFrame(underList, columns=['underConstruction'])
df["currentlyUnavailable"] = pd.DataFrame(currentlyList, columns=['currentlyUnavailable'])
df['status'] = status
print(df)
df.to_csv('path/to/my/output.csv', index=False)
However, whenever I run the above script with for url in urls:
for some of my urls it throws this error and script breaks and output.csv is not generated:
Traceback (most recent call last):
File "path/to/myscan.py", line 51, in <module>
comingList.append("x" if len(results[url]["coming soon"]) > 0 else "")
KeyError: 'http://example123.com'
and when running it with for url in results: output.csv is as follows:
[![enter image description here][1]][1]
seems erroneous as first row has keywords marked as present (comingSoon, underConstruction columns) + status column = Down. But website doesn't contain 'coming soon' or 'under construction' strings.
Would someone be able to help me with this? I believe there might be an issue in my loop or try/except part of the code. I'm happy to provide more information if the above is not sufficient. Thank you in advance.
I think your main problem is that you are iterating over the whole urls which some of which may have failed and therefore does not exist in your results as a key.
A much safer way to do this is to iterate over the subset of urls that you are sure have succeeded and have a key in results, so instead of
for url in urls:
you could make it
for url in results:
To make the final results consistent with the input order of your urls:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import asyncio
import re
from concurrent.futures import ProcessPoolExecutor, as_completed
df = pd.read_csv('./input.csv')
#my csv has urls in the 4th column
urls = [ 'example123.com', 'https://envato.com/blog/30-outstanding-coming-soon-and-under-construction-website-templates/', 'http://alotechgear.com']
results = {}
status = {}
async def scrape(url):
try:
r = requests.get(url, timeout=(3, 6))
r.raise_for_status()
soup = BeautifulSoup(r.content, 'html.parser')
#all keywords to check on the website
data = {
"coming soon": soup.body.findAll(text = re.compile("coming soon", re.I)),
"Opening Soon": soup.body.findAll(text = re.compile("Opening Soon", re.I)),
"Forbidden": soup.body.findAll(text = re.compile("Forbidden", re.I)),
"Page not found": soup.body.findAll(text = re.compile("Page not found", re.I)),
"Under Construction": soup.body.findAll(text = re.compile("Under Construction", re.I)),
"Currently Unavailable": soup.body.findAll(text = re.compile("Currently Unavailable", re.I))}
results[url] = data
#check for http status and save to status list
except (requests.exceptions.ConnectionError, requests.exceptions.Timeout, requests.exceptions.MissingSchema):
status[url] = "Down"
except requests.exceptions.HTTPError:
status[url] = "Other"
else:
status[url] = "OK"
async def main():
await asyncio.wait([scrape(url) for url in urls])
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
loop.close()
comingList= []
openingList = []
forbiddenList= []
notfoundList = []
underList = []
currentlyList = []
statusList = []
#mark x if there are any hits for specific keyword
for url in urls:
if(not results.get(url)):
statusList.append(status.get(url))
notfoundList.append("x")
comingList.append("-")
openingList.append("-")
forbiddenList.append("-")
underList.append("-")
currentlyList.append("-")
else:
statusList.append(status.get(url))
comingList.append("x" if len(results[url].get("coming soon")) > 0 else "-")
openingList.append("x" if len(results[url].get("Opening Soon")) > 0 else "-")
forbiddenList.append("x" if len(results[url].get("Forbidden")) > 0 else "-")
notfoundList.append("x" if len(results[url].get("Page not found")) > 0 else "-")
underList.append("x" if len(results[url].get("Under Construction")) > 0 else "-")
currentlyList.append("x" if len(results[url].get("Currently Unavailable")) > 0 else "-")
df["comingSoon"] = pd.DataFrame(comingList, columns=['comingSoon'])
df["openingSoon"] = pd.DataFrame(openingList, columns=['openingSoon'])
df["forbidden"] = pd.DataFrame(forbiddenList, columns=['forbidden'])
df["notfound2"] = pd.DataFrame(notfoundList, columns=['notfound2'])
df["underConstruction"] = pd.DataFrame(underList, columns=['underConstruction'])
df["currentlyUnavailable"] = pd.DataFrame(currentlyList, columns=['currentlyUnavailable'])
df['status'] = pd.DataFrame(statusList, columns=['Status'])
print(df)
df.to_csv('./output.csv', index=False)
sample result:
id url comingSoon openingSoon forbidden notfound2 underConstruction currentlyUnavailable status
0 1 https://example123.com - - - x - - Down
1 2 https://envato.com/blog/30-outstanding-c... x - - - x - OK
2 3 https://mundoshoponline.com - - - x - - Down
I'm using a medium API to get a some information but after some API calls the python script ended with this error:
IndexError: list index out of range
Here is my Python code:
def get_post_responses(posts):
#start = time.time()
count = 0
print('Retrieving the post responses...')
responses = []
for post in posts:
url = MEDIUM + '/_/api/posts/' + post + '/responses'
count = count + 1
print("number of times api called",count)
response = requests.get(url)
response_dict = clean_json_response(response)
responses += response_dict['payload']['value']
#end = time.time()
#four = end - start
#global time_cal
#time_cal.append(four)
return responses
def check_if_high_recommends(response, recommend_min):
if response['virtuals']['recommends'] >= recommend_min:
return True
def check_if_recent(response):
limit_date = datetime.now() - timedelta(days=360)
creation_epoch_time = response['createdAt'] / 1000
creation_date = datetime.fromtimestamp(creation_epoch_time)
if creation_date >= limit_date:
return True
It needs to work for more then 10000 followers for a user.
i got an ans for my question...
just i need to use try catch exception ...
response_dict = clean_json_response(response)
try:
responses += response_dict['payload']['value']
catch:
continue
i am trying to scrape data using loop and this is the code
import requests
import json
import pandas as pd
parameters = ['a:1','a:2','a:3','a:4','a:3','a:4','a:5','a:6','a:7','a:8','a:9','a:10']
results = pd.DataFrame()
for item in parameters:
key, value = item.split(':')
url = "https://xxxx.000webhostapp.com/getNamesEnc02Motasel2.php?keyword=%s&type=2&limit=%s" %(key, value)
r = requests.get(url)
cont = json.loads(r.content)
temp_df = pd.DataFrame(cont)
results = results.append(temp_df)
results.to_csv('ScrapeData.csv', index=False)
this method is working great but the problem is that there i need the parameters = until 'a:1000' and i think there is a better solution to loop from 'a:1' to 'a:1000' instead of duplicating parameters like in my code .
i really need your help
Use can use a for i in range(start, end) loop. Like this
results = pd.DataFrame()
key = 'a'
# Goes from 1 to 1000 (including both)
for value in range(1, 1001):
url = f'https://xxxx.000webhostapp.com/getNamesEnc02Motasel2.php?keyword={key}&type=2&limit={value}'
r = requests.get(url)
cont = json.loads(r.content)
temp_df = pd.DataFrame(cont)
results = results.append(temp_df)
results.to_csv('ScrapeData.csv', index=False)
value = 1
key = 'a'
while value <= 1000:
url = .....%(key, str(value))
....
....
value += 1
......
Use a counter
I have a script that extracts data from an API, where the final output of requests.get(url=url, auth=(user, password)).json() is "all_results". The output is ~25K rows, but it contains nested fields.
The API is for portfolio data, and the children field is a dictionary holding ticker level information (so can be really large).
The script below flattens "all_results" and specifies only the columns I need:
final_df = pd.DataFrame()
for record in all_results:
df = pd.DataFrame(record.get('children', {}))
df['contactId'] = record.get('contactId')
df['origin'] = record.get('origin')
df['description'] = record.get('description')
final_df = final_df.append(df)
It works perfectly with smaller samples, however when trying to run it over the whole data set- it takes HOURS. Can anyone propose something more efficient than my current script? Need it to run way faster than currently.
Thank you in advance!
-- Full script--
user = ''
password= ""
# Starting values
start = 0
rows = 1500
base_url = 'https://....?start={0}&rows={1}'
print ("Connecting to API..")
url = base_url.format(start,rows)
req = requests.get(url=url, auth=(user, password))
print ("Extracting data..")
out = req.json()
total_records = out['other']['numFound']
print("Total records found: "+ str(total_records))
results = out['resultList']
all_results = results
print ("First " + str(rows) + " rows were extracted")
# Results will be an empty list if no more results are found
while results:
start += rows # Rebuild url based on current start
url = base_url.format(start, rows)
req = requests.get(url=url, auth=(user, password))
out = req.json()
results = out['resultList']
all_results += results
print ("Next " + str(rows) + " rows were extracted")
# All results will now contains all the responses of each request.
print("Total records returned from API: "+ str(len(all_results))) #should equal number of records in response
final_df = pd.DataFrame()
for record in all_results:
df = pd.DataFrame(record.get('children', {}))
df['contactId'] = record.get('contactId')
df['origin'] = record.get('origin')
df['description'] = record.get('description')
final_df = final_df.append(df)
final_df = final_df.reset_index()
del final_df['index']
final_df['ticker'] = final_df['identifier'].str.split('#').str.get(0) #extract ticker (anything before #)
final_df.drop_duplicates(keep='first') #removes duplicates
print('DataFrame from API created succesfully\n')
print(final_df.head(n=50))