How do I loop this webscrape/tweet script 24/7?

How do I loop this webscrape/tweet script 24/7? - python

Just started learning Python. I am trying to gather data by webscraping and tweet out info. But everytime I rerun the code. I get
Forbidden: 403 Forbidden
187 - Status is a duplicate.
How do I loop this script without getting this error?
Here's my code :
def scrape ():
page = requests.get("https://www.reuters.com/business/future-of-money/")
soup = BeautifulSoup(page.content, "html.parser")
home = soup.find(class_="editorial-franchise-layout__main__3cLBl")
posts = home.find_all(class_="text__text__1FZLe text__dark-grey__3Ml43 text__inherit-font__1Y8w3 text__inherit-size__1DZJi link__underline_on_hover__2zGL4")
top_post = posts[0].find("h3", class_="text__text__1FZLe text__dark-grey__3Ml43 text__medium__1kbOh text__heading_3__1kDhc heading__base__2T28j heading__heading_3__3aL54 hero-card__title__33EFM").find_all("span")[0].text.strip()
tweet (top_post)
def tweet (top_post):
api_key = 'deletedforprivacy'
api_key_secret = 'deletedforprivacy'
access_token = 'deletedforprivacy'
access_token_secret = 'deletedforprivacy'
authenticator = tweepy.OAuthHandler(api_key, api_key_secret)
authenticator.set_access_token(access_token, access_token_secret)
api = tweepy.API(authenticator, wait_on_rate_limit=True)
api.update_status(f"{top_post} \nSource : https://www.reuters.com/business/future-of-money/")
print(top_post)
scrape()

The twitter api checks if the content is duplicate and if it is duplicate it returns:
Request returned an error: 403 {"detail":"You are not allowed to create a Tweet with duplicate content.","type":"about:blank","title":"Forbidden","status":403}
I added an simple function to check if the previous content is same as the one about to be added
** Full Code**
from requests_oauthlib import OAuth1Session
import os
import json
import requests
from bs4 import BeautifulSoup
import time
user_id = 000000000000000 # Get userid from https://tweeterid.com/
bearer_token = "<BEARER_TOKEN>"
consumer_key = "<CONSUMER_KEY>"
consumer_secret = "<CONSUMER_SECRET>"
def init():
# Get request token
request_token_url = "https://api.twitter.com/oauth/request_token?oauth_callback=oob&x_auth_access_type=write"
oauth = OAuth1Session(consumer_key, client_secret=consumer_secret)
try:
fetch_response = oauth.fetch_request_token(request_token_url)
except ValueError:
print(
"There may have been an issue with the consumer_key or consumer_secret you entered."
)
resource_owner_key = fetch_response.get("oauth_token")
resource_owner_secret = fetch_response.get("oauth_token_secret")
print("Got OAuth token and secret")
# Get authorization
base_authorization_url = "https://api.twitter.com/oauth/authorize"
authorization_url = oauth.authorization_url(base_authorization_url)
print("Please go here and authorize: %s" % authorization_url)
verifier = input("Paste the PIN here: ")
# Get the access token
access_token_url = "https://api.twitter.com/oauth/access_token"
oauth = OAuth1Session(
consumer_key,
client_secret=consumer_secret,
resource_owner_key=resource_owner_key,
resource_owner_secret=resource_owner_secret,
verifier=verifier,
)
oauth_tokens = oauth.fetch_access_token(access_token_url)
access_token = oauth_tokens["oauth_token"]
access_token_secret = oauth_tokens["oauth_token_secret"]
# Make the request
oauth = OAuth1Session(
consumer_key,
client_secret=consumer_secret,
resource_owner_key=access_token,
resource_owner_secret=access_token_secret,
)
scraper(oauth, bearer_token)
def bearer_oauth(r):
"""
Method required by bearer token authentication.
"""
r.headers["Authorization"] = f"Bearer {bearer_token}"
r.headers["User-Agent"] = "v2UserTweetsPython"
return r
def previous_tweet():
url = "https://api.twitter.com/2/users/{}/tweets".format(user_id)
# Tweet fields are adjustable.
# Options include:
# attachments, author_id, context_annotations,
# conversation_id, created_at, entities, geo, id,
# in_reply_to_user_id, lang, non_public_metrics, organic_metrics,
# possibly_sensitive, promoted_metrics, public_metrics, referenced_tweets,
# source, text, and withheld
params = {"tweet.fields": "text"}
response = requests.request(
"GET", url, auth=bearer_oauth, params=params)
print(response.status_code)
if response.status_code != 200:
raise Exception(
"Request returned an error: {} {}".format(
response.status_code, response.text
)
)
# checking if this is the first post
if response.json() != {'meta': {'result_count': 0}}:
# Since twitter changes html to small url I am splitting at \n to match to new payload
previous_tweet_text = response.json()["data"][0]["text"].split("\n")[0]
previous_payload = {"text": f"{previous_tweet_text}"}
else:
previous_payload = {"text": f""}
return previous_payload
def scraper(oauth, bearer_token):
while True:
page = requests.get(
"https://www.reuters.com/business/future-of-money/")
soup = BeautifulSoup(page.content, "html.parser")
home = soup.find(class_="editorial-franchise-layout__main__3cLBl")
posts = home.find_all(
class_="text__text__1FZLe text__dark-grey__3Ml43 text__inherit-font__1Y8w3 text__inherit-size__1DZJi link__underline_on_hover__2zGL4")
top_post = posts[0].find(
"h3", class_="text__text__1FZLe text__dark-grey__3Ml43 text__medium__1kbOh text__heading_3__1kDhc heading__base__2T28j heading__heading_3__3aL54 hero-card__title__33EFM").find_all("span")[0].text.strip()
# Be sure to add replace the text of the with the text you wish to Tweet. You can also add parameters to post polls, quote Tweets, Tweet with reply settings, and Tweet to Super Followers in addition to other features.
payload = {
"text": f"{top_post}\nSource:https://www.reuters.com/business/future-of-money/"}
current_checker_payload = {"text": payload["text"].split("\n")[0]}
previous_payload = previous_tweet()
if previous_payload != current_checker_payload:
tweet(payload, oauth)
else:
print("Content hasn't changed")
time.sleep(60)
def tweet(payload, oauth):
# Making the request
response = oauth.post(
"https://api.twitter.com/2/tweets",
json=payload,
)
if response.status_code != 201:
raise Exception(
"Request returned an error: {} {}".format(
response.status_code, response.text)
)
print("Response code: {}".format(response.status_code))
# Showing the response as JSON
json_response = response.json()
print(json.dumps(json_response, indent=4, sort_keys=True))
if __name__ == "__main__":
init()
** Output**
Response code: 201
{
"data": {
"id": "1598558336672497664",
"text": "FTX ex-CEO Bankman-Fried claims he was unaware of improper use of customer funds -ABC News\nSource:URL" #couldn't post short url in stackoverflow
}
}
Content hasn't changed
Content hasn't changed
Content hasn't changed
Hope this helps. Happy Coding :)

Related

Error 400 Bad Request when using Twitter API with requests

I´m playing around with the Twitter API and I try to use requests to send a tweet via Twitter (I want to port it on an embedded system).
At first, I checked the create_tweet.py example from Twitter and use some code to generate the OAuth keys:
import os
from requests_oauthlib import OAuth1Session
consumer_key = os.environ.get("CONSUMER_KEY")
consumer_secret = os.environ.get("CONSUMER_SECRET")
if(__name__ == "__main__"):
oauth = OAuth1Session(consumer_key, client_secret = consumer_secret)
try:
fetch_response = oauth.fetch_request_token("https://api.twitter.com/oauth/request_token?oauth_callback=oob&x_auth_access_type=write")
except ValueError:
print(
"There may have been an issue with the consumer_key or consumer_secret you entered."
)
resource_owner_key = fetch_response.get("oauth_token")
resource_owner_secret = fetch_response.get("oauth_token_secret")
print("Got OAuth token: %s" % resource_owner_key)
base_authorization_url = "https://api.twitter.com/oauth/authorize"
authorization_url = oauth.authorization_url(base_authorization_url)
print("Please go here and authorize: %s" % authorization_url)
access_token_url = "https://api.twitter.com/oauth/access_token"
oauth = OAuth1Session(
consumer_key,
client_secret = consumer_secret,
resource_owner_key = resource_owner_key,
resource_owner_secret = resource_owner_secret,
verifier = input("Paste the PIN here: "),
)
oauth_tokens = oauth.fetch_access_token(access_token_url)
print("oauth_token: {}".format(oauth_tokens["oauth_token"]))
print("oauth_token_secret: {}".format(oauth_tokens["oauth_token_secret"]))
I have copied the oauth_token and oauth_token_secret and added them to my second program to transmit the tweet:
import os
import json
import time
import string
import random
import requests
consumer_key = os.environ.get("CONSUMER_KEY")
payload = {"text": "Hello world1!"}
oauth_token = <oauth_token from program above>
oauth_token_secret = <oauth_token_secret from program above>
headers = {
"OAuth oauth_consumer_key": consumer_key,
"oauth_nonce": "".join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for x in range(32)),
"oauth_signature_method": "HMAC-SHA1",
"oauth_timestamp": str(int(time.time())),
"oauth_token": oauth_token_secret,
"oauth_version": "1.0",
"oauth_signature": oauth_token,
}
response = requests.post("https://api.twitter.com/2/tweets", headers = headers, json = payload)
if(response.status_code != 201):
raise Exception(
"Request returned an error: {} {}".format(response.status_code, response.text)
)
print("Response code: {}".format(response.status_code))
json_response = response.json()
print(json.dumps(json_response, indent = 4, sort_keys = True))
But I got a 400 Bad Request error.
What is the reason for the bad request error? How do I have to improve the request?

Kraken Futures API - authenticationError Python

I'm finding the Kraken Futures API confusing compared to other providers. Using a demo account I'm trying to make basic private requests and not working so far with authentication error. The code mainly comes from Kraken docs (non-futures)
Futures auth doc: https://support.kraken.com/hc/en-us/articles/360022635592-Generate-authentication-strings-REST-API-
api_sec = "MxA2FwIQxCxsfy2XDa4R8PwTjwLKjzT8GSOw+qOVuWGh3Lx6PtyW0f94J5XXKz9mP8bztRJSDQJVKBsHFicrDr/N"
api_url = "https://futures.kraken.com/derivatives/api/v3"
api_key = 'Y7kVv/hW0JWRRAhJtA8BuJkUX+E0gWmTL5NWf4lRPN8f+iYoJp9AoYwW'
def get_kraken_signature(urlpath, data, secret):
postdata = urllib.parse.urlencode(data)
encoded = (str(data['nonce']) + postdata).encode()
message = urlpath.encode() + hashlib.sha256(encoded).digest()
mac = hmac.new(base64.b64decode(secret), message, hashlib.sha256)
sigdigest = base64.b64encode(mac.digest())
return sigdigest.decode()
# Attaches auth headers and returns results of a get request
def kraken_request(uri_path, data, api_key, api_sec):
headers = {}
headers['API-Key'] = api_key
# get_kraken_signature() as defined in the 'Authentication' section
headers['API-Sign'] = get_kraken_signature(uri_path, data, api_sec)
req = requests.get((api_url + uri_path), headers=headers, data=data)
return req
# Construct the request and print the result
resp = kraken_request('/accounts', {
"nonce": str(int(1000*time.time()))
}, api_key, api_sec)
Output
{"result":"error","error":"authenticationError","serverTime":"2022-05-13T10:14:50.838Z"}

Only valid bearer authentication supported - Python - Spotify API

I was coding this script that can get into my liked videos and make a playlist on Spotify with the title and artist of each video.
I already tried to renew the Token from the Spotify API manager, but for some reason it's still showing the following error:
status': 400, 'message': 'Only valid bearer authentication supported'}}
Traceback (most recent call last):
File "/Users/gzangerme/Desktop/Python Project/SpotifyAutomation.py", line 159, in <module>
cp.add_song_to_playlist()
File "/Users/gzangerme/Desktop/Python Project/SpotifyAutomation.py", line 129, in add_song_to_playlist
self.get_liked_videos()
File "/Users/gzangerme/Desktop/Python Project/SpotifyAutomation.py", line 76, in get_liked_videos
"spotify_uri": self.get_spotify_uri(song_name, artist)
File "/Users/gzangerme/Desktop/Python Project/SpotifyAutomation.py", line 119, in get_spotify_uri
songs = response_json["tracks"]["items"]
KeyError: 'tracks'
I noticed that the KeyError is showing up because the call is returning an error.
Here follows the code for the project:
import json
import requests
import os
from secrets import spotify_user_id, spotify_token
import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors
import youtube_dl
class CreatePlaylist:
def __init__(self):
self.user_id = spotify_user_id
self.spotify_token = spotify_token
self.youtube_client = self.get_youtube_client()
self.all_song_info = {}
#Step 1
def get_youtube_client(self):
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
#Get Credentials for API Client
flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file('client_secret.json', scopes=['https://www.googleapis.com/auth/youtube'])
credentials = flow.run_console()
youtube_client = googleapiclient.discovery.build(api_service_name, api_version, credentials=credentials)
return youtube_client
#Step 2
def get_liked_video(self):
request = self.youtube_client.videos().list(
part="snippet,contentDetails,statistics",
myRating="Like"
)
response = request.execute()
#Get information on each video on the list
for item in response["items"]:
video_title = item["snippet"]["title"]
youtube_url = "https://www.youtube.com/watch?v={}".format(item["id"])
#use youtube_dl to colect the name of the song and the artist
video = youtube_dl.YoutubeDL({}).extract_info(youtube_url,download = False)
song_name = video["track"]
artist = video["artist"]
self.all_song_info[video_title] = {
"youtube_url":youtube_url,
"song_name":song_name,
"artist":artist,
"spotify_uri":self.get_spotify_uri(song_name,artist)
}
#Step 3
def create_playlist(self):
request_body = json.dumps({
"name":"Youtube Liked Videos",
"description":"Todos os Videos com Like do YT",
"public": True
})
query = "https://api.spotify.com/v1/users/{user_id}/playlists".format()
response = requests.post(
query,
data= request_body,
headers={
"Content-type": "application/json",
"Authorization": "Bearer {}".format(spotify_token)
}
)
response_json = response.json
#playlistId
return response_json["id"]
#Step 4
def get_spotify_uri(self, song_name, artist):
query = "https://api.spotify.com/v1/search".format(
song_name,
artist
)
response = requests.get(
query,
headers={
"Content-type": "application/json",
"Authorization": "Bearer {}".format(spotify_token)
}
)
response_json = response.json()
songs = response_json["tracks"]["items"]
#configurar para utilizar somente a primeira musica
uri = songs[0]["uri"]
return uri
#Step 5
def add_song_to_playlist(self):
self.get_liked_video()
uris = []
for song ,info in self.all_song_info():
uris.apend(info["spotify_uri"])
#create new playlist
playlist_id = self.create_playlist
#add musics to the created playlist
request_data = json.dumps(uris)
query = "https://api.spotify.com/v1/playlists/{playlist_id}/tracks".format(playlist_id)
response = requests.post(
query,
data=request_data,
headers = {
"Content-Type":"application/json",
"Authorization": "Bearer {}".format(self.spotify_token)
}
)
response_json = response.json()
return response_json
CreatePlaylist()

I think the spotify_token and spotify_user_id are the issue. If you go to:
https://pypi.org/project/spotify-token/ it is a Python script where you can generate a Spotify token.
As for the spotify_user_id that is your username on Spotify. To find your username, go to: https://www.spotify.com/us/ , click on Profile > Account > Account overview
Hope it helps.

Why is no ouput generated from this function?

I have this python function and i wish to execute this using lambda handler function hence I have written this code. When I execute in Pycharm I don't see any output in console. Can someone guide what is the problem with below code?
import json
from json import loads
import requests
from requests import exceptions
from requests.auth import HTTPBasicAuth
def lambda_handler(event, context):
test_post_headers_body_json()
return {"statusCode": 200, "body": json.dumps("Hello from Lambda!")}
def test_post_headers_body_json():
client_id = "WJRYDHNGROIZHL8B"
client_secret = "V5VXK6FLG1YI0GD2XY3H"
user = "automation-store-admin1#abc.com"
password = "c0Ba5PBdvVl2"
access_point = "https://api.platform.abc.com/auth/oauth/token"
grant_type = "password"
headers = {"Content-Type": "application/x-www-form-urlencoded"}
# auth = auth.HTTPBasicAuth(client_id, client_secret)
data = {"grant_type": grant_type, "username": user, "password": password}
resp = None
try:
resp = requests.post(
access_point,
auth=HTTPBasicAuth(client_id, client_secret),
data=data,
headers=headers,
)
except exceptions.ConnectionError:
exit(1)
if resp.status_code == 200:
resp = loads(resp.text)
if "access_token" in resp:
print(resp["access_token"])
exit(0)
exit(1)

It is normal because when running you code, Python only declare the function not using it. You should add a __main__ entry point at the end of your file:
import json
from json import loads
import requests
from requests import exceptions
from requests.auth import HTTPBasicAuth
def lambda_handler(event, context):
test_post_headers_body_json()
return {"statusCode": 200, "body": json.dumps("Hello from Lambda!")}
def test_post_headers_body_json():
client_id = "WJRYDHNGROIZHL8B"
client_secret = "V5VXK6FLG1YI0GD2XY3H"
user = "automation-store-admin1#abc.com"
password = "c0Ba5PBdvVl2"
access_point = "https://api.platform.abc.com/auth/oauth/token"
grant_type = "password"
headers = {"Content-Type": "application/x-www-form-urlencoded"}
# auth = auth.HTTPBasicAuth(client_id, client_secret)
data = {"grant_type": grant_type, "username": user, "password": password}
resp = None
try:
resp = requests.post(
access_point,
auth=HTTPBasicAuth(client_id, client_secret),
data=data,
headers=headers,
)
except exceptions.ConnectionError:
exit(1)
if resp.status_code == 200:
resp = loads(resp.text)
if "access_token" in resp:
print(resp["access_token"])
exit(0)
exit(1)
# added part
if __name__ == '__main__':
test_post_headers_body_json()

Adding location to Twitter sentiment analysis

I am trying to build a twitter sentiment analysis tool and want to add a geolocation - that looks for tweets within 10 miles of NYC. How do I do this? I I tried to add the location to the end of the url but it did not work.
Here is the code that I have so far:
import oauth2 as oauth
import urllib2 as urllib
# See assignment1.html instructions or README for how to get these credentials
api_key = ''
api_secret = ''
access_token_key = '
access_token_secret = ''
_debug = 0
oauth_token = oauth.Token(key=access_token_key, secret=access_token_secret)
oauth_consumer = oauth.Consumer(key=api_key, secret=api_secret)
signature_method_hmac_sha1 = oauth.SignatureMethod_HMAC_SHA1()
http_method = "GET"
http_handler = urllib.HTTPHandler(debuglevel=_debug)
https_handler = urllib.HTTPSHandler(debuglevel=_debug)
'''
Construct, sign, and open a twitter request
using the hard-coded credentials above.
'''
def twitterreq(url, method, parameters):
req = oauth.Request.from_consumer_and_token(oauth_consumer,
token=oauth_token,
http_method=http_method,
http_url=url,
parameters=parameters)
req.sign_request(signature_method_hmac_sha1, oauth_consumer, oauth_token)
headers = req.to_header()
if http_method == "POST":
encoded_post_data = req.to_postdata()
else:
encoded_post_data = None
url = req.to_url()
opener = urllib.OpenerDirector()
opener.add_handler(http_handler)
opener.add_handler(https_handler)
response = opener.open(url, encoded_post_data)
return response
def fetchsamples():
url = "https://stream.twitter.com/1.1/statuses/filter.json?
track=money&locations"
parameters = []
response = twitterreq(url, "GET", parameters)
for line in response:
print(line.strip())
if __name__ == '__main__':
fetchsamples()

Here is the API doc: https://developer.twitter.com/en/docs/tweets/filter-realtime/api-reference/post-statuses-filter.html
statuses/filter uses POST, not GET.
For tweets around NYC use "locations=-74,40,-73,41". (You will need to expand this bounding box to make it 10 miles around NYC.) However, when used with track the two filters are OR'ed. In other words, you will get tweets that match either your locations filter or your track filter. You wont get only the tweets that match both filters.
EDIT
url = "https://stream.twitter.com/1.1/statuses/filter.json?track=money&locations=-74,40,-73,41"
parameters = []
response = twitterreq(url, "POST", parameters)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How do I loop this webscrape/tweet script 24/7? - python

Related

Error 400 Bad Request when using Twitter API with requests

Kraken Futures API - authenticationError Python

Only valid bearer authentication supported - Python - Spotify API

Why is no ouput generated from this function?

Adding location to Twitter sentiment analysis

Categories

Resources