I am trying to get all tweets from a given account but I can get only last 20 tweets. How can I get all the tweets that user posted ?
Here is my code:
from bs4 import BeautifulSoup as bs
import urllib
#This function returns tweets from
#given username's account as a list
def get_tweets(username):
tweets = []
URL = "https://twitter.com/"+username
soup = bs(urllib.request.urlopen(URL), 'lxml')
for li in soup.find_all("li", {"data-item-type": "tweet"}):
text_p = li.find("p", class_="tweet-text")
if text_p is not None:
tweets.append(text_p.get_text())
return tweets
In Tweepy, you can get the user's timeline like this:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
for status in tweepy.Cursor(api.user_timeline, username).items():
print('status_id: {}, text: {}'.format(status.id, status.text.encode('utf-8')))
Notice that it's using Tweepy.Cursor, which iterates through the list until there aren't any more items.
Related
I am coding a twitter bot but I am having challenges along the way.
The code will get data from an API and tweet out data daily. Unfortunely, I am having trouble getting the return function on the last line to work that will actually send the tweet out.
import urllib.request
from pprint import pprint
import json
import datetime
import tweepy
import time
import os
import logging
def importVax():
link = 'https://data.ontario.ca/api/3/action/datastore_search?resource_id=8a89caa9-511c-4568-af89-7f2174b4378c&limit=100'
query = urllib.request.urlopen(link)
query = json.loads(query.read())
for r in query['result']['records']:
date = datetime.datetime.strptime(r['report_date'][0:10], "%Y-%m-%d").date()
previous_day_admin = r['previous_day_doses_administered']
total_admin = r['total_doses_administered']
total_complete = r['total_vaccinations_completed']
if previous_day_admin == '':
previous_day_admin = 0
print(date, previous_day_admin, total_admin)
if __name__ == '__main__':
importVax()
consumer_key = 'REDACTED'
consumer_secret ='REDACTED'
access_token = 'REDACTED'
access_token_secret = 'REDACTED'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
def daily_update(date, previous_day_admin, total_admin, total_complete):
message = str(
f'''
[{date}]
Doses Administered: {previous_day_admin}
Total Completed Vaccinations: {total_complete}
% Immune: {round(total_complete/14570000)}
'''
)
return api.update_status(message)
This is simple code I wrote to scrape data from twitter using tweepy.
import tweepy
import csv
import pandas as pd
from datetime import date
####input your credentials here
consumer_key = '(Hidden)'
consumer_secret = '(Hidden)'
access_token = '(Hidden)'
access_token_secret = '(Hidden)'
today = date.today()
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)
csvFile = open('remotejob.csv', 'a')
#Use csv Writer
csvWriter = csv.writer(csvFile)
for tweet in tweepy.Cursor(api.search,q="#jobs #remote",count=5000,
lang="en",
since=today,tweet_mode = 'extended').items():
print (tweet.created_at, tweet.id)
csvWriter.writerow([tweet.created_at, tweet.full_text.encode('utf-8'), tweet.id, tweet.user.name.encode('utf-8'), tweet.user.screen_name.encode('utf-8'), tweet.user.statuses_count, tweet.retweet_count, tweet.favorite_count])
How can I schedule it to run every hour automatically?
My code is:
import tweepy
import csv
import pandas as pd
import sys
#input your credentials here
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)
# Open/Create a file to append data
csvFile = open('amaravathi.csv', 'a')
#Use csv Writer
csvWriter = csv.writer(csvFile)
for tweet in tweepy.Cursor(api.search,q="#amaravathi",count=10,
lang="en").items():`
csvWriter.writerow([tweet.created_at, tweet.text.encode('utf-8')])
print(tweet.created_at,tweet.text)
csvFile.close()
In Python 3 and tweepy I have this script to do hashtags searches on Twitter:
import tweepy
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
#test = api.get_user('some user')._json
#test
#The test worked
search_result = api.search('#maconhamedicinal' or '#cannabismedicinal')
search_result
[]
The result is an empty list. Please, does anyone know what the problem is?
keywords = ['#maconhamedicinal','#cannabismedicinal']
results = []
for key in keywords:
search_results = api.search(q=key, count=100)
results = results + search_results
for result in results:
# do whatever u wanna do
I'm trying to exclude retweets and replies in my Twython search.
Here is my code:
from twython import Twython, TwythonError
app_key = "xxxx"
app_secret = "xxxx"
oauth_token = "xxxx"
oauth_token_secret = "xxxx"
naughty_words = [" -RT"]
good_words = ["search phrase", "another search phrase"]
filter = " OR ".join(good_words)
blacklist = " -".join(naughty_words)
keywords = filter + blacklist
twitter = Twython(app_key, app_secret, oauth_token, oauth_token_secret)
search_results = twitter.search(q=keywords, count=100)
The problem is that the -RT function isn't really working.
EDIT:
I've tried #forge suggestion, and while it does print the if tweets are not retweets or replies, when I incorporate them into the code below, the bot still finds tweets, retweets, quotes and replies.
twitter = Twython(app_key, app_secret, oauth_token, oauth_token_secret) query = 'beer OR wine AND -filter:retweets AND -filter:replies'
response = twitter.search(q=query, count=100)
statuses = response['statuses']
try:
for tweet in statuses:
try:
twitter.retweet(id = tweet["id_str"])
except TwythonError as e:
print e
except TwythonError as e:
print e
Any ideas? Is there a filter:quotes?
The correct syntax is -filter:retweets.
If you would like to search on terms "search phrase" or "another search phrase" and exclude retweets, then the query should be:
query = "search_phrase OR another_search_phrase -filter:retweets"
To exclude replies as well, add -filter:replies like this:
query = "search_phrase OR another_search_phrase -filter:retweets AND -filter:replies"
This should be working, you can verify it by checking the status fields in_reply_to_status_id and retweeted_status:
Status is not a reply if in_reply_to_status_id is empty
Status is not a retweet if it doesn't have the field retweeted_status
With Twython:
import twython
twitter = twython.Twython(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
query = 'wine OR beer -filter:retweets AND -filter:replies'
response = twitter.search(q=query, count=100)
statuses = response['statuses']
for status in statuses:
print status['in_reply_to_status_id'], status.has_key('retweeted_status')
# Output should be (None, False) to any status