This is driving me crazy. As you can see below I am trying to use a simple while loop to perform a couple of tweepy searches and append them into a data frame. For some reason however after pulling the first set of 100 tweets it just repeats that set instead of performing a new search. Any advice would be greatly appreciated.
import sys
import csv
import pandas as pd
import tweepy
from tweepy import OAuthHandler
consumer_key = ''
consumer_secret = ''
access_token = ''
access_secret = ''
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth)
num_results = 200
result_count = 0
last_id = None
df = pd.DataFrame(columns=['Name', 'Location', 'Followers', 'Text', 'Coorinates'])
while result_count < num_results:
result = api.search(q='',count=100, geocode= "38.996918,-104.995826,190mi", since_id = last_id)
for tweet in result:
user = tweet.user
last_id = tweet.id_str
name = user.name
friends = user.friends_count
followers = user.followers_count
text = tweet.text.encode('utf-8')
location = user.location
coordinates = tweet.coordinates
df.loc[result_count] = pd.Series({'Name':name, 'Location':location, 'Followers':followers, 'Text':text, 'Coordinates':coordinates})
print(text)
result_count += 1
# Save to Excel
print("Writing all tables to Excel...")
df.to_csv('out.csv')
print("Excel Export Complete.")
The API.search method returns tweets that match a specified query. It's not a Streaming APi, so it returns all data at once.
Furthermore, in your query parameters, you have added count, that specifies the number of statuses to retrieve.
So the problem is that with your query you are returning the first 100 data of the complete set for each while iteration.
I suggest you to change the code in something like this
result = api.search(q='', geocode= "38.996918,-104.995826,190mi", since_id = last_id)
for tweet in result:
user = tweet.user
last_id = tweet.id_str
name = user.name
friends = user.friends_count
followers = user.followers_count
text = tweet.text.encode('utf-8')
location = user.location
coordinates = tweet.coordinates
df.loc[result_count] = pd.Series({'Name':name, 'Location':location, 'Followers':followers, 'Text':text, 'Coordinates':coordinates})
print(text)
Let me know.
Related
I can retrieve tweets with a specific hashtag using tweepy:
Code:
from os import access
import tweepy
import configparser
import pandas as pd
# config = configparser.ConfigParser()
# config.read('config.ini')
api_key = ''
api_key_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(api_key, api_key_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
# user = '#veritasium'
keywords = '#SheHulk'
limit = 1200
tweets = tweepy.Cursor(api.search_tweets, q = keywords, count = 100, tweet_mode = 'extended').items(limit)
columns = ['User', 'Tweet']
data = []
for tweet in tweets:
data.append([tweet.user.screen_name, tweet.full_text])
df = pd.DataFrame(data, columns=columns)
df.to_excel("output.xlsx")
What I want to know is that if I can get the number of likes with every tweet that is retrieved. Any help would be appreciated.
In the Twitter API V1.1 (see documentation here), that field was called favorite_count.
for tweet in tweets:
print(f"That tweet has {tweet.favorite_count} likes").
I am trying to scrape tweets from a specified user based on a specific keyword using Tweepy. I tried using
if api.search(q="$"):
but I am running into an error. How can I solve this problem?
#Import the libraries
import tweepy
api_key = ""
api_key_secret = ""
access_token = ""
access_token_secret = ""
auth_handler = tweepy.OAuthHandler(consumer_key=api_key, consumer_secret=api_key_secret)
auth_handler.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth_handler,wait_on_rate_limit=True)
user = api.get_user("TheShual")
print("User details:")
print(user.name)
print(user.description)
print(user.location)
userID = "TheShual"
tweets = api.user_timeline(screen_name=userID,
# 200 is the maximum allowed count
count=20,
include_rts = False,
# Necessary to keep full_text
# otherwise only the first 140 words are extracted
tweet_mode = 'extended'
)
for info in tweets[:10]:
if api.search(q="$"):
print(info.created_at)
print(info.full_text)
print("\n")
I apologize in advance if I don't know how to search the Tweepy documentation. I am quite new to python/programming in general.
I have written a small script to pull Twitter follower data for an account I manage for work. I would like to investigate when followers added us to see if our posts are increasing engagement. What I cannot figure out is if I can use the Tweepy module to pull this particular information (when the follower added us)?
Thank you in advance for any help. My MWE:
import tweepy
import pandas as pd
# Load API keys
consumer_key = "my_consumer_key"
consumer_secret = "my_consumer_secret"
access_token = "my_access_token"
access_token_secret = "my_access_token_secret"
# Authenticate access to Twitter API
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
# Get the list of followers for the account
followers = api.followers_ids()
# Create a user map
userMap = {}
# Loop over all users
for f in followers:
# create a temporary list
tempList = []
try:
tweets = api.user_timeline(f, count = 33) # pull the 33 most recent tweets
except tweepy.TweepError:
print('Failed to run command.') # Tweepy throws an error if a user hasn't tweeted
# Loop over all tweets per each user f
for t in tweets:
tempList.append(t)
userMap[f] = tempList
# Create lists of pertinent data
dateList = []
favList = []
rtList = []
keyList = []
def genList(tweetList):
for tweets in tweetList:
for t in tweets:
keyList.append(str(t.id))
dateList.append(str(t.created_at))
favList.append(str(t.favorite_count))
rtList.append(str(t.retweet_count))
genList(userMap.values())
# Create a pandas data frame
df = pd.DataFrame(list(zip(keyList, dateList, favList, rtList)),
columns = ['userID', 'created_at', 'favorited', 'retweeted'])
This information is not provided by Twitter.
The followers/list (in Tweepy followers() method) returns a list of User objects. It looks like the only solution is to monitor the changes and manage the history yourself.
I am trying to extract the all tweets which contain specific keyword and its geo locations .
for example , I want download all the tweets in english which contains the keyword 'iphone' from 'france' and 'singapore'
My code
import tweepy
import csv
import pandas as pd
import sys
# API credentials here
consumer_key = 'INSERT CONSUMER KEY HERE'
consumer_secret = 'INSERT CONSUMER SECRET HERE'
access_token = 'INSERT ACCESS TOKEN HERE'
access_token_secret = 'INSERT ACCESS TOKEN SECRET HERE'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True)
# Search word/hashtag value
HashValue = ""
# search start date value. the search will start from this date to the current date.
StartDate = ""
# getting the search word/hashtag and date range from user
HashValue = input("Enter the hashtag you want the tweets to be downloaded for: ")
StartDate = input("Enter the start date in this format yyyy-mm-dd: ")
# Open/Create a file to append data
csvFile = open(HashValue+'.csv', 'a')
#Use csv Writer
csvWriter = csv.writer(csvFile)
for tweet in tweepy.Cursor(api.search,q=HashValue,count=20,lang="en",since=StartDate, tweet_mode='extended').items():
print (tweet.created_at, tweet.full_text)
csvWriter.writerow([tweet.created_at, tweet.full_text.encode('utf-8')])
print ("Scraping finished and saved to "+HashValue+".csv")
#sys.exit()
How can this be done.
-Hello- Rahul
As I understand it you are looking to get geo data off searched tweets rather then filter search based on geocode.
Here is a code sample with the relevant fields you are interested in. These may or may not be provided depending on the tweeters privacy settings.
Note there is no "since" parameter on the search API:
https://tweepy.readthedocs.io/en/latest/api.html#help-methods
https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets
Standard twitter api search goes back 7 days. The premium and enterprise APIs have 30 day search as well as Full Archive search, but you will pay $$$.
Unfortunately tweepy still hasn't had their models documented:
https://github.com/tweepy/tweepy/issues/720
So if you want to look at the tweet object you can use pprint package and run:
pprint(tweet.__dict__)
One difference I noticed was the "text" field in the JSON became "full_text" in the object.
There's also information on the original tweet in there if the one you found was a quote tweet, has the same info from what I could see.
Anyway here's the code, I added a max tweet count for looping through the cursor while I was testing to avoid blowing any API limits.
Let me know if you want csv code but it looks like you can handle that already.
import tweepy
# API credentials here
consumer_key = 'your-info'
consumer_secret = 'your-info'
access_token = 'your-info'
access_token_secret = 'your-info'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True)
searchString = "iPhone"
cursor = tweepy.Cursor(api.search, q=searchString, count=20, lang="en", tweet_mode='extended')
maxCount = 1
count = 0
for tweet in cursor.items():
print()
print("Tweet Information")
print("================================")
print("Text: ", tweet.full_text)
print("Geo: ", tweet.geo)
print("Coordinates: ", tweet.coordinates)
print("Place: ", tweet.place)
print()
print("User Information")
print("================================")
print("Location: ", tweet.user.location)
print("Geo Enabled? ", tweet.user.geo_enabled)
count = count + 1
if count == maxCount:
break;
Will output something like this:
Tweet Information
================================
Text: NowPlaying : Hashfinger - Leaving
https://derp.com
#iPhone free app https://derp.com
#peripouwebradio
Geo: None
Coordinates: None
Place: None
User Information
================================
Location: Greece
Geo Enabled? True
I’m trying to extract tweets based on the country name but the code always retrieves small amounts of tweets (about 23, 50 and 70, not more than that). Does anyone know how to retrieve tweets around (1000-5000)?
# this is not my real credentials
Consume:
CONSUMER_KEY = ‘xxx’
CONSUMER_SECRET = ‘ttt’
# Access:
ACCESS_TOKEN = ‘rffg’
ACCESS_SECRET = ‘mmvvvt’
import tweepy
import csv
# get authorization
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
api = tweepy.API(auth)
# get tweets from country
place = api.geo_search(query="Saudi Arabia", granularity="country" ,since= '10')
place_id = place[0].id
# print tweets and save to csv file
with open('tweets.csv', 'w', newline='', encoding='utf-8') as csvFile:
tweetWriter = csv.writer(csvFile, delimiter=',')
tweets = api.search(q='place:%s' % place_id, count=100, since='1')
count = 0
for tweet in tweets:
count += 1
# tweet.id = unique id for tweet, text = text, place.name = where it was posted, created_at = UTC time
tweetData = [tweet.id, tweet.user.name, tweet.text, tweet.place.name, tweet.created_at]
tweetWriter.writerow(tweetData)
print(count)