Just started learning Python. I am trying to gather data by webscraping and tweet out info. But everytime I rerun the code. I get
Forbidden: 403 Forbidden
187 - Status is a duplicate.
How do I loop this script without getting this error?
Here's my code :
def scrape ():
page = requests.get("https://www.reuters.com/business/future-of-money/")
soup = BeautifulSoup(page.content, "html.parser")
home = soup.find(class_="editorial-franchise-layout__main__3cLBl")
posts = home.find_all(class_="text__text__1FZLe text__dark-grey__3Ml43 text__inherit-font__1Y8w3 text__inherit-size__1DZJi link__underline_on_hover__2zGL4")
top_post = posts[0].find("h3", class_="text__text__1FZLe text__dark-grey__3Ml43 text__medium__1kbOh text__heading_3__1kDhc heading__base__2T28j heading__heading_3__3aL54 hero-card__title__33EFM").find_all("span")[0].text.strip()
tweet (top_post)
def tweet (top_post):
api_key = 'deletedforprivacy'
api_key_secret = 'deletedforprivacy'
access_token = 'deletedforprivacy'
access_token_secret = 'deletedforprivacy'
authenticator = tweepy.OAuthHandler(api_key, api_key_secret)
authenticator.set_access_token(access_token, access_token_secret)
api = tweepy.API(authenticator, wait_on_rate_limit=True)
api.update_status(f"{top_post} \nSource : https://www.reuters.com/business/future-of-money/")
print(top_post)
scrape()
The twitter api checks if the content is duplicate and if it is duplicate it returns:
Request returned an error: 403 {"detail":"You are not allowed to create a Tweet with duplicate content.","type":"about:blank","title":"Forbidden","status":403}
I added an simple function to check if the previous content is same as the one about to be added
** Full Code**
from requests_oauthlib import OAuth1Session
import os
import json
import requests
from bs4 import BeautifulSoup
import time
user_id = 000000000000000 # Get userid from https://tweeterid.com/
bearer_token = "<BEARER_TOKEN>"
consumer_key = "<CONSUMER_KEY>"
consumer_secret = "<CONSUMER_SECRET>"
def init():
# Get request token
request_token_url = "https://api.twitter.com/oauth/request_token?oauth_callback=oob&x_auth_access_type=write"
oauth = OAuth1Session(consumer_key, client_secret=consumer_secret)
try:
fetch_response = oauth.fetch_request_token(request_token_url)
except ValueError:
print(
"There may have been an issue with the consumer_key or consumer_secret you entered."
)
resource_owner_key = fetch_response.get("oauth_token")
resource_owner_secret = fetch_response.get("oauth_token_secret")
print("Got OAuth token and secret")
# Get authorization
base_authorization_url = "https://api.twitter.com/oauth/authorize"
authorization_url = oauth.authorization_url(base_authorization_url)
print("Please go here and authorize: %s" % authorization_url)
verifier = input("Paste the PIN here: ")
# Get the access token
access_token_url = "https://api.twitter.com/oauth/access_token"
oauth = OAuth1Session(
consumer_key,
client_secret=consumer_secret,
resource_owner_key=resource_owner_key,
resource_owner_secret=resource_owner_secret,
verifier=verifier,
)
oauth_tokens = oauth.fetch_access_token(access_token_url)
access_token = oauth_tokens["oauth_token"]
access_token_secret = oauth_tokens["oauth_token_secret"]
# Make the request
oauth = OAuth1Session(
consumer_key,
client_secret=consumer_secret,
resource_owner_key=access_token,
resource_owner_secret=access_token_secret,
)
scraper(oauth, bearer_token)
def bearer_oauth(r):
"""
Method required by bearer token authentication.
"""
r.headers["Authorization"] = f"Bearer {bearer_token}"
r.headers["User-Agent"] = "v2UserTweetsPython"
return r
def previous_tweet():
url = "https://api.twitter.com/2/users/{}/tweets".format(user_id)
# Tweet fields are adjustable.
# Options include:
# attachments, author_id, context_annotations,
# conversation_id, created_at, entities, geo, id,
# in_reply_to_user_id, lang, non_public_metrics, organic_metrics,
# possibly_sensitive, promoted_metrics, public_metrics, referenced_tweets,
# source, text, and withheld
params = {"tweet.fields": "text"}
response = requests.request(
"GET", url, auth=bearer_oauth, params=params)
print(response.status_code)
if response.status_code != 200:
raise Exception(
"Request returned an error: {} {}".format(
response.status_code, response.text
)
)
# checking if this is the first post
if response.json() != {'meta': {'result_count': 0}}:
# Since twitter changes html to small url I am splitting at \n to match to new payload
previous_tweet_text = response.json()["data"][0]["text"].split("\n")[0]
previous_payload = {"text": f"{previous_tweet_text}"}
else:
previous_payload = {"text": f""}
return previous_payload
def scraper(oauth, bearer_token):
while True:
page = requests.get(
"https://www.reuters.com/business/future-of-money/")
soup = BeautifulSoup(page.content, "html.parser")
home = soup.find(class_="editorial-franchise-layout__main__3cLBl")
posts = home.find_all(
class_="text__text__1FZLe text__dark-grey__3Ml43 text__inherit-font__1Y8w3 text__inherit-size__1DZJi link__underline_on_hover__2zGL4")
top_post = posts[0].find(
"h3", class_="text__text__1FZLe text__dark-grey__3Ml43 text__medium__1kbOh text__heading_3__1kDhc heading__base__2T28j heading__heading_3__3aL54 hero-card__title__33EFM").find_all("span")[0].text.strip()
# Be sure to add replace the text of the with the text you wish to Tweet. You can also add parameters to post polls, quote Tweets, Tweet with reply settings, and Tweet to Super Followers in addition to other features.
payload = {
"text": f"{top_post}\nSource:https://www.reuters.com/business/future-of-money/"}
current_checker_payload = {"text": payload["text"].split("\n")[0]}
previous_payload = previous_tweet()
if previous_payload != current_checker_payload:
tweet(payload, oauth)
else:
print("Content hasn't changed")
time.sleep(60)
def tweet(payload, oauth):
# Making the request
response = oauth.post(
"https://api.twitter.com/2/tweets",
json=payload,
)
if response.status_code != 201:
raise Exception(
"Request returned an error: {} {}".format(
response.status_code, response.text)
)
print("Response code: {}".format(response.status_code))
# Showing the response as JSON
json_response = response.json()
print(json.dumps(json_response, indent=4, sort_keys=True))
if __name__ == "__main__":
init()
** Output**
Response code: 201
{
"data": {
"id": "1598558336672497664",
"text": "FTX ex-CEO Bankman-Fried claims he was unaware of improper use of customer funds -ABC News\nSource:URL" #couldn't post short url in stackoverflow
}
}
Content hasn't changed
Content hasn't changed
Content hasn't changed
Hope this helps. Happy Coding :)
I need to make a Twitter API request via python request, but I'm not succeeding.
here's what I'm doing:
api_key = ""
api_secret = ""
token = ""
token_secret = ""
assembly = OAuth1Session(api_key,api_secret,token, token_secret)
url = https://api.twitter.com/2/tweets?ids=1519296554556829696,1519296438659862530,1519294[…]lic_metrics,organic_metrics&expansions=attachments.media_keys
r = assembly.get(url)
print(r.json())
I am coding a twitter bot but I am having challenges along the way.
The code will get data from an API and tweet out data daily. Unfortunely, I am having trouble getting the return function on the last line to work that will actually send the tweet out.
import urllib.request
from pprint import pprint
import json
import datetime
import tweepy
import time
import os
import logging
def importVax():
link = 'https://data.ontario.ca/api/3/action/datastore_search?resource_id=8a89caa9-511c-4568-af89-7f2174b4378c&limit=100'
query = urllib.request.urlopen(link)
query = json.loads(query.read())
for r in query['result']['records']:
date = datetime.datetime.strptime(r['report_date'][0:10], "%Y-%m-%d").date()
previous_day_admin = r['previous_day_doses_administered']
total_admin = r['total_doses_administered']
total_complete = r['total_vaccinations_completed']
if previous_day_admin == '':
previous_day_admin = 0
print(date, previous_day_admin, total_admin)
if __name__ == '__main__':
importVax()
consumer_key = 'REDACTED'
consumer_secret ='REDACTED'
access_token = 'REDACTED'
access_token_secret = 'REDACTED'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
def daily_update(date, previous_day_admin, total_admin, total_complete):
message = str(
f'''
[{date}]
Doses Administered: {previous_day_admin}
Total Completed Vaccinations: {total_complete}
% Immune: {round(total_complete/14570000)}
'''
)
return api.update_status(message)
I have this python function and i wish to execute this using lambda handler function hence I have written this code. When I execute in Pycharm I don't see any output in console. Can someone guide what is the problem with below code?
import json
from json import loads
import requests
from requests import exceptions
from requests.auth import HTTPBasicAuth
def lambda_handler(event, context):
test_post_headers_body_json()
return {"statusCode": 200, "body": json.dumps("Hello from Lambda!")}
def test_post_headers_body_json():
client_id = "WJRYDHNGROIZHL8B"
client_secret = "V5VXK6FLG1YI0GD2XY3H"
user = "automation-store-admin1#abc.com"
password = "c0Ba5PBdvVl2"
access_point = "https://api.platform.abc.com/auth/oauth/token"
grant_type = "password"
headers = {"Content-Type": "application/x-www-form-urlencoded"}
# auth = auth.HTTPBasicAuth(client_id, client_secret)
data = {"grant_type": grant_type, "username": user, "password": password}
resp = None
try:
resp = requests.post(
access_point,
auth=HTTPBasicAuth(client_id, client_secret),
data=data,
headers=headers,
)
except exceptions.ConnectionError:
exit(1)
if resp.status_code == 200:
resp = loads(resp.text)
if "access_token" in resp:
print(resp["access_token"])
exit(0)
exit(1)
It is normal because when running you code, Python only declare the function not using it. You should add a __main__ entry point at the end of your file:
import json
from json import loads
import requests
from requests import exceptions
from requests.auth import HTTPBasicAuth
def lambda_handler(event, context):
test_post_headers_body_json()
return {"statusCode": 200, "body": json.dumps("Hello from Lambda!")}
def test_post_headers_body_json():
client_id = "WJRYDHNGROIZHL8B"
client_secret = "V5VXK6FLG1YI0GD2XY3H"
user = "automation-store-admin1#abc.com"
password = "c0Ba5PBdvVl2"
access_point = "https://api.platform.abc.com/auth/oauth/token"
grant_type = "password"
headers = {"Content-Type": "application/x-www-form-urlencoded"}
# auth = auth.HTTPBasicAuth(client_id, client_secret)
data = {"grant_type": grant_type, "username": user, "password": password}
resp = None
try:
resp = requests.post(
access_point,
auth=HTTPBasicAuth(client_id, client_secret),
data=data,
headers=headers,
)
except exceptions.ConnectionError:
exit(1)
if resp.status_code == 200:
resp = loads(resp.text)
if "access_token" in resp:
print(resp["access_token"])
exit(0)
exit(1)
# added part
if __name__ == '__main__':
test_post_headers_body_json()
i have been trying to perform Two-legged OAuth at : https://rest.immobilienscout24.de/restapi/api/ . I am following these instructions but i am receiving 401 error code.
Here is my code :
import requests
URL = "https://rest.immobilienscout24.de/restapi/api/"
consumer_key = 'ImmobilieAnalyseKey'
PARAMS = {'consumer_key': consumer_key}
r = requests.get(url=URL, params=PARAMS)
print(r)
I have tried this and it is returning 200 but i am not getting how can i get data with two legged.
from rauth import OAuth1Session
consumer_key = 'xxxxx'
consumer_secret = 'xxxxx'
base_url = 'https://rest.immobilienscout24.de/restapi/api/'
def test_get():
url = base_url
client = OAuth1Session(consumer_key,consumer_secret)
r = client.get(url)
print(r)
test_get()
EDIT :
ERROR RESPONSE :
<common:messages xmlns:common="http://rest.immobilienscout24.de/schema/common/1.0">
<message>
<messageCode>ERROR_BAD_REQUEST</messageCode>
<message>Missing signature method.</message>
</message>
</common:messages>