Twitter API: How to exclude retweets when searching tweets using Twython - python

I'm trying to exclude retweets and replies in my Twython search.
Here is my code:
from twython import Twython, TwythonError
app_key = "xxxx"
app_secret = "xxxx"
oauth_token = "xxxx"
oauth_token_secret = "xxxx"
naughty_words = [" -RT"]
good_words = ["search phrase", "another search phrase"]
filter = " OR ".join(good_words)
blacklist = " -".join(naughty_words)
keywords = filter + blacklist
twitter = Twython(app_key, app_secret, oauth_token, oauth_token_secret)
search_results = twitter.search(q=keywords, count=100)
The problem is that the -RT function isn't really working.
EDIT:
I've tried #forge suggestion, and while it does print the if tweets are not retweets or replies, when I incorporate them into the code below, the bot still finds tweets, retweets, quotes and replies.
twitter = Twython(app_key, app_secret, oauth_token, oauth_token_secret) query = 'beer OR wine AND -filter:retweets AND -filter:replies'
response = twitter.search(q=query, count=100)
statuses = response['statuses']
try:
for tweet in statuses:
try:
twitter.retweet(id = tweet["id_str"])
except TwythonError as e:
print e
except TwythonError as e:
print e
Any ideas? Is there a filter:quotes?

The correct syntax is -filter:retweets.
If you would like to search on terms "search phrase" or "another search phrase" and exclude retweets, then the query should be:
query = "search_phrase OR another_search_phrase -filter:retweets"
To exclude replies as well, add -filter:replies like this:
query = "search_phrase OR another_search_phrase -filter:retweets AND -filter:replies"
This should be working, you can verify it by checking the status fields in_reply_to_status_id and retweeted_status:
Status is not a reply if in_reply_to_status_id is empty
Status is not a retweet if it doesn't have the field retweeted_status
With Twython:
import twython
twitter = twython.Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
query = 'wine OR beer -filter:retweets AND -filter:replies'
response = twitter.search(q=query, count=100)
statuses = response['statuses']
for status in statuses:
print status['in_reply_to_status_id'], status.has_key('retweeted_status')
# Output should be (None, False) to any status

Related

How do I loop this webscrape/tweet script 24/7?

Just started learning Python. I am trying to gather data by webscraping and tweet out info. But everytime I rerun the code. I get
Forbidden: 403 Forbidden
187 - Status is a duplicate.
How do I loop this script without getting this error?
Here's my code :
def scrape ():
page = requests.get("https://www.reuters.com/business/future-of-money/")
soup = BeautifulSoup(page.content, "html.parser")
home = soup.find(class_="editorial-franchise-layout__main__3cLBl")
posts = home.find_all(class_="text__text__1FZLe text__dark-grey__3Ml43 text__inherit-font__1Y8w3 text__inherit-size__1DZJi link__underline_on_hover__2zGL4")
top_post = posts[0].find("h3", class_="text__text__1FZLe text__dark-grey__3Ml43 text__medium__1kbOh text__heading_3__1kDhc heading__base__2T28j heading__heading_3__3aL54 hero-card__title__33EFM").find_all("span")[0].text.strip()
tweet (top_post)
def tweet (top_post):
api_key = 'deletedforprivacy'
api_key_secret = 'deletedforprivacy'
access_token = 'deletedforprivacy'
access_token_secret = 'deletedforprivacy'
authenticator = tweepy.OAuthHandler(api_key, api_key_secret)
authenticator.set_access_token(access_token, access_token_secret)
api = tweepy.API(authenticator, wait_on_rate_limit=True)
api.update_status(f"{top_post} \nSource : https://www.reuters.com/business/future-of-money/")
print(top_post)
scrape()
The twitter api checks if the content is duplicate and if it is duplicate it returns:
Request returned an error: 403 {"detail":"You are not allowed to create a Tweet with duplicate content.","type":"about:blank","title":"Forbidden","status":403}
I added an simple function to check if the previous content is same as the one about to be added
** Full Code**
from requests_oauthlib import OAuth1Session
import os
import json
import requests
from bs4 import BeautifulSoup
import time
user_id = 000000000000000 # Get userid from https://tweeterid.com/
bearer_token = "<BEARER_TOKEN>"
consumer_key = "<CONSUMER_KEY>"
consumer_secret = "<CONSUMER_SECRET>"
def init():
# Get request token
request_token_url = "https://api.twitter.com/oauth/request_token?oauth_callback=oob&x_auth_access_type=write"
oauth = OAuth1Session(consumer_key, client_secret=consumer_secret)
try:
fetch_response = oauth.fetch_request_token(request_token_url)
except ValueError:
print(
"There may have been an issue with the consumer_key or consumer_secret you entered."
)
resource_owner_key = fetch_response.get("oauth_token")
resource_owner_secret = fetch_response.get("oauth_token_secret")
print("Got OAuth token and secret")
# Get authorization
base_authorization_url = "https://api.twitter.com/oauth/authorize"
authorization_url = oauth.authorization_url(base_authorization_url)
print("Please go here and authorize: %s" % authorization_url)
verifier = input("Paste the PIN here: ")
# Get the access token
access_token_url = "https://api.twitter.com/oauth/access_token"
oauth = OAuth1Session(
consumer_key,
client_secret=consumer_secret,
resource_owner_key=resource_owner_key,
resource_owner_secret=resource_owner_secret,
verifier=verifier,
)
oauth_tokens = oauth.fetch_access_token(access_token_url)
access_token = oauth_tokens["oauth_token"]
access_token_secret = oauth_tokens["oauth_token_secret"]
# Make the request
oauth = OAuth1Session(
consumer_key,
client_secret=consumer_secret,
resource_owner_key=access_token,
resource_owner_secret=access_token_secret,
)
scraper(oauth, bearer_token)
def bearer_oauth(r):
"""
Method required by bearer token authentication.
"""
r.headers["Authorization"] = f"Bearer {bearer_token}"
r.headers["User-Agent"] = "v2UserTweetsPython"
return r
def previous_tweet():
url = "https://api.twitter.com/2/users/{}/tweets".format(user_id)
# Tweet fields are adjustable.
# Options include:
# attachments, author_id, context_annotations,
# conversation_id, created_at, entities, geo, id,
# in_reply_to_user_id, lang, non_public_metrics, organic_metrics,
# possibly_sensitive, promoted_metrics, public_metrics, referenced_tweets,
# source, text, and withheld
params = {"tweet.fields": "text"}
response = requests.request(
"GET", url, auth=bearer_oauth, params=params)
print(response.status_code)
if response.status_code != 200:
raise Exception(
"Request returned an error: {} {}".format(
response.status_code, response.text
)
)
# checking if this is the first post
if response.json() != {'meta': {'result_count': 0}}:
# Since twitter changes html to small url I am splitting at \n to match to new payload
previous_tweet_text = response.json()["data"][0]["text"].split("\n")[0]
previous_payload = {"text": f"{previous_tweet_text}"}
else:
previous_payload = {"text": f""}
return previous_payload
def scraper(oauth, bearer_token):
while True:
page = requests.get(
"https://www.reuters.com/business/future-of-money/")
soup = BeautifulSoup(page.content, "html.parser")
home = soup.find(class_="editorial-franchise-layout__main__3cLBl")
posts = home.find_all(
class_="text__text__1FZLe text__dark-grey__3Ml43 text__inherit-font__1Y8w3 text__inherit-size__1DZJi link__underline_on_hover__2zGL4")
top_post = posts[0].find(
"h3", class_="text__text__1FZLe text__dark-grey__3Ml43 text__medium__1kbOh text__heading_3__1kDhc heading__base__2T28j heading__heading_3__3aL54 hero-card__title__33EFM").find_all("span")[0].text.strip()
# Be sure to add replace the text of the with the text you wish to Tweet. You can also add parameters to post polls, quote Tweets, Tweet with reply settings, and Tweet to Super Followers in addition to other features.
payload = {
"text": f"{top_post}\nSource:https://www.reuters.com/business/future-of-money/"}
current_checker_payload = {"text": payload["text"].split("\n")[0]}
previous_payload = previous_tweet()
if previous_payload != current_checker_payload:
tweet(payload, oauth)
else:
print("Content hasn't changed")
time.sleep(60)
def tweet(payload, oauth):
# Making the request
response = oauth.post(
"https://api.twitter.com/2/tweets",
json=payload,
)
if response.status_code != 201:
raise Exception(
"Request returned an error: {} {}".format(
response.status_code, response.text)
)
print("Response code: {}".format(response.status_code))
# Showing the response as JSON
json_response = response.json()
print(json.dumps(json_response, indent=4, sort_keys=True))
if __name__ == "__main__":
init()
** Output**
Response code: 201
{
"data": {
"id": "1598558336672497664",
"text": "FTX ex-CEO Bankman-Fried claims he was unaware of improper use of customer funds -ABC News\nSource:URL" #couldn't post short url in stackoverflow
}
}
Content hasn't changed
Content hasn't changed
Content hasn't changed
Hope this helps. Happy Coding :)

Error 400 Bad Request when using Twitter API with requests

I´m playing around with the Twitter API and I try to use requests to send a tweet via Twitter (I want to port it on an embedded system).
At first, I checked the create_tweet.py example from Twitter and use some code to generate the OAuth keys:
import os
from requests_oauthlib import OAuth1Session
consumer_key = os.environ.get("CONSUMER_KEY")
consumer_secret = os.environ.get("CONSUMER_SECRET")
if(__name__ == "__main__"):
oauth = OAuth1Session(consumer_key, client_secret = consumer_secret)
try:
fetch_response = oauth.fetch_request_token("https://api.twitter.com/oauth/request_token?oauth_callback=oob&x_auth_access_type=write")
except ValueError:
print(
"There may have been an issue with the consumer_key or consumer_secret you entered."
)
resource_owner_key = fetch_response.get("oauth_token")
resource_owner_secret = fetch_response.get("oauth_token_secret")
print("Got OAuth token: %s" % resource_owner_key)
base_authorization_url = "https://api.twitter.com/oauth/authorize"
authorization_url = oauth.authorization_url(base_authorization_url)
print("Please go here and authorize: %s" % authorization_url)
access_token_url = "https://api.twitter.com/oauth/access_token"
oauth = OAuth1Session(
consumer_key,
client_secret = consumer_secret,
resource_owner_key = resource_owner_key,
resource_owner_secret = resource_owner_secret,
verifier = input("Paste the PIN here: "),
)
oauth_tokens = oauth.fetch_access_token(access_token_url)
print("oauth_token: {}".format(oauth_tokens["oauth_token"]))
print("oauth_token_secret: {}".format(oauth_tokens["oauth_token_secret"]))
I have copied the oauth_token and oauth_token_secret and added them to my second program to transmit the tweet:
import os
import json
import time
import string
import random
import requests
consumer_key = os.environ.get("CONSUMER_KEY")
payload = {"text": "Hello world1!"}
oauth_token = <oauth_token from program above>
oauth_token_secret = <oauth_token_secret from program above>
headers = {
"OAuth oauth_consumer_key": consumer_key,
"oauth_nonce": "".join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for x in range(32)),
"oauth_signature_method": "HMAC-SHA1",
"oauth_timestamp": str(int(time.time())),
"oauth_token": oauth_token_secret,
"oauth_version": "1.0",
"oauth_signature": oauth_token,
}
response = requests.post("https://api.twitter.com/2/tweets", headers = headers, json = payload)
if(response.status_code != 201):
raise Exception(
"Request returned an error: {} {}".format(response.status_code, response.text)
)
print("Response code: {}".format(response.status_code))
json_response = response.json()
print(json.dumps(json_response, indent = 4, sort_keys = True))
But I got a 400 Bad Request error.
What is the reason for the bad request error? How do I have to improve the request?

Getting all tweets from certain user with tweepy

I am trying to get all 50k tweets from #realDonaldTrump. I know there is limit for twitter api requests, so I am using max_id=oldest. But I only get 995 tweets.
import tweepy as tweepy
consumerKey = "xxx"
consumerSecret = "xxx"
accessToken = "xxx"
accessTokenSecret = "xxx"
auth = tweepy.OAuthHandler(consumerKey, consumerSecret)
auth.set_access_token(accessToken, accessTokenSecret)
api = tweepy.API(auth, wait_on_rate_limit=True)
alltweets = []
username="#realDonaldTrump"
new_tweets = api.user_timeline(username, tweet_mode = 'extended', count=200)
alltweets.extend(new_tweets)
oldest = alltweets[-1].id - 1
while len(new_tweets) > 0:
print(f"getting tweets before {oldest}")
new_tweets = api.user_timeline(username, max_id=oldest,tweet_mode = 'extended', count=200)
alltweets.extend(new_tweets)
oldest = alltweets[-1].id - 1
print(f"...{len(alltweets)} tweets downloaded so far")
outtweets = [[tweet.id_str, tweet.created_at, tweet.full_text] for tweet in alltweets]
Free the free dev account you wont get more than the last 3200 tweets.
I suggest to use cursor and pages.
..
c = tw.Cursor(api.user_timeline, id=userid, tweet_mode="extended", wait_on_rate_limit=True,count=200).pages()
while True:
try:
page = c.next()
tweets.extend(page)
..
except tw.TweepError:
print(e)
time.sleep(60)
continue
except StopIteration:
break

Spotify & Youtube API integration: Liked Youtube music videos in Spotify

An overview of the project.
API Used
Using Spotify API to Create a Playlist and add music to the playlist
Using Youtube Data API to retrieve liked videos
OAuth 2.0 for verification
Goal:
The liked youtube videos of my youtube account should automatically come inside my Spotify newly created playlist
Code:
import json
import os
import google_auth_oauthlib.flow
import google.oauth2.credentials
import googleapiclient.discovery
import googleapiclient.errors
import requests
import youtube_dl
from secret import spotify_token, spotify_user_id
from exceptions import ResponseException
class CreatePlaylist:
def __init__(self):
self.user_id = spotify_user_id
self.spotify_token = spotify_token
self.youtube_client = self.get_youtube_client()
self.all_song_info = {}
# connect to youtube data api
def get_youtube_client(self):
# Disable OAuthlib's HTTPS verification when running locally.
# *DO NOT* leave this option enabled in production.
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
client_secrets_file = "youtube_auth.json"
# Get credentials and create an API client
scopes = ["https://www.googleapis.com/auth/youtube.readonly"]
flow = google_auth_oauthlib.flow.InstalledAppFlow.from_client_secrets_file(
client_secrets_file, scopes)
credentials = flow.run_console()
# from the Youtube DATA API
youtube_client = googleapiclient.discovery.build(
api_service_name, api_version, credentials=credentials)
return youtube_client
# remove **kwarg that are not set
def get_liked_videos(self):
request = self.youtube_client.videos().list(
part="snippet,contentDetails,statistics", myRating="like"
)
response = request.execute()
# collect each video and get important information
for item in response['items']:
video_title = item['snippet']['title']
youtube_url = "https://www.youtube.com/watch?v={}".format(
item["id"])
# use youtube_dl to collect song name and artist name
video = youtube_dl.YoutubeDL({}).extract_info(
youtube_url, download=False)
song_name = video['track']
artist = video['artist']
# save all important info and skip any missing song and artist
self.all_song_info[video_title] = {
"youtube_url": youtube_url,
"song_name": song_name,
"artist": artist,
# add the uri, easy to get song to put into playlist
"spotify_uri": self.get_spotify_uri(song_name, artist)
}
# create a new playlist
def create_playlist(self):
request_body = json.dumps({
"name": "Youtube Liked Songs",
"description": "All liked youtube video songs",
"public": True
})
query = "https://api.spotify.com/v1/users/{}/playlists".format(
self.user_id)
response = requests.post(
query,
data=request_body,
headers={
"Content-Type": "application/json",
"Authorization": "Bearer {}".format(self.spotify_token)
}
)
response_json = response.json()
# playlis id
return response_json["id"]
# search for the song on Spotify
def get_spotify_uri(self, song_name, artist):
query = "https://api.spotify.com/v1/search?query=track%3A{}+artist%3A{}&type=track&offset=0&limit=20".format(
song_name,
artist
)
response = requests.get(
query,
headers={
"Content-Type": "application/json",
"Authorization": "Bearer {}".format(self.spotify_token)
}
)
response_json = response.json()
songs = response_json["tracks"]["items"]
# only use the first song
uri = songs[0]["uri"]
return uri
# add the song in new spotify_playlist
def add_song_to_playlist(self):
# populate dictionary with our liked songs
self.get_liked_videos()
# collect all of uri
uris = []
for song, info in self.all_song_info.items():
uris.append(info['spotify_uri'])
# create a new playlist
playlist_id = self.create_playlist()
# add all songs into new playlist
request_data = json.dumps(uris)
query = "https://api.spotify.com/v1/playlists/{}/tracks".format(
playlist_id)
response = requests.post(
query,
data=request_data,
headers={
"Content-Type": "application/json",
"Authorization": "Bearer {}".format(self.spotify_token)
})
if response.status_code < 200 or response.status_code > 300:
raise ResponseException(response.status_code)
response_json = response.json()
return response_json
if __name__ == '__main__':
cp = CreatePlaylist()
cp.add_song_to_playlist()
Output
A new playlist is made inside spotify libray but the song in the list doesn't belong to my liked videos and the songs are repeated in the playlist the number of songs are almost 5-6 and all are same
Link of the song: https://www.youtube.com/watch?v=4awXLGzlf7E
Thanks in advance kinda help.

How to search tweepy for more than one string?

In Python 3 and tweepy I have this script to do hashtags searches on Twitter:
import tweepy
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
#test = api.get_user('some user')._json
#test
#The test worked
search_result = api.search('#maconhamedicinal' or '#cannabismedicinal')
search_result
[]
The result is an empty list. Please, does anyone know what the problem is?
keywords = ['#maconhamedicinal','#cannabismedicinal']
results = []
for key in keywords:
search_results = api.search(q=key, count=100)
results = results + search_results
for result in results:
# do whatever u wanna do

Categories

Resources