Extract date and time of a tweet tweepy Python - python

I have found a python script for extracting tweets and store to csv file. I am not familiar with python yet. Except the tweets, I need also to extract the date and the time of each tweet. I have found how to extract other characteristics, such as "retweeted", "retweet_count", but I am still stuck in date and time.
The script is here:
#!/usr/bin/env python
# encoding: utf-8
import tweepy #https://github.com/tweepy/tweepy
import csv
#Twitter API credentials
consumer_key = "..........................."
consumer_secret = "..........................."
access_key = "..........................."
access_secret = "..........................."
screename = "#realDonaldTrump"
def get_all_tweets(screen_name):
#Twitter only allows access to a users most recent 3240 tweets with this method
#authorize twitter, initialize tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret )
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
#initialize a list to hold all the tweepy Tweets
alltweets = []
#make initial request for most recent tweets (200 is the maximum allowed count)
new_tweets = api.user_timeline(screen_name = screename ,count=200)
screen_name = "Donald J. Trump"
#save most recent tweets
alltweets.extend(new_tweets)
#save the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
#keep grabbing tweets until there are no tweets left to grab
while len(new_tweets) > 0:
print "getting tweets before %s" % (oldest)
#all subsiquent requests use the max_id param to prevent duplicates
new_tweets = api.user_timeline(screen_name = screename,count=200,max_id=oldest)
#save most recent tweets
alltweets.extend(new_tweets)
#update the id of the oldest tweet less one
oldest = alltweets[-1].id - 1
print "...%s tweets downloaded so far" % (len(alltweets))
#transform the tweepy tweets into a 2D array that will populate the csv
outtweets = [[tweet.id_str, tweet.created_at, tweet.text.encode("utf-8"), tweet.favorite_count, tweet.retweet_count, tweet.favorited, tweet.retweeted] for tweet in alltweets]
#write the csv
with open('%s_tweets.csv' % screen_name , 'wb') as f:
writer = csv.writer(f)
writer.writerow(["id","created_at","text","favorite_count","retweet_count","favorited","retweeted"])
writer.writerows(outtweets)
pass
if __name__ == '__main__':
#pass in the username of the account you want to download
get_all_tweets(screename)

The tweepy tweet model has created_at:
created_at
Creation time of the Tweet.
Type
datetime.datetime | None
Interesting fact is that you can derive the time from the tweet id. Tweet IDs are k-sorted within a second bound. We can extract the timestamp for a tweet ID by right shifting the tweet ID by 22 bits and adding the Twitter epoch time of 1288834974657.

Related

Tweepy + Twitter API v2: I want to extract the tweet.id for media_key only

What if I want to take the Tweet ID and media_keys and only get the Tweet ID that has media_keys?
I was trying to do it with this sample but I got stuck:
https://docs.tweepy.org/en/stable/examples.html
client = tweepy.Client(consumer_key=API_KEY, consumer_secret=API_SECRET, access_token=ACCESS_TOKEN, access_token_secret=ACCESS_TOKEN_SECRET, bearer_token=Bearer_token)
counts = 10
search_result = client.get_list_tweets(id='list id', max_results=counts, expansions=["attachments.media_keys"])
tweets = search_result.data
includes = search_result.includes
medias = includes['media']
for tweetid in tweets:
tid = tweetid.id
mediass = {media['media_key']: media for media in medias}
for tweet in tweets:
print(tweet.id, mediass)
You can check the attachments field of Tweet objects to obtain media keys for media attached to the Tweet.

Tweepy double scraping

I have been using tweepy to scrape twitter for about 9 months. On Friday of last week my scraper stopped working as it did two things: 1) It started to return an empty list instead of previous tweets when tweets are present on the users profile 2) scrape old tweets when only the most recent tweets should be scraped. Has anyone been experiencing the same issues? Any suggested fixes appreciated!
def get_tweets(username):
# Authorization to consumer key and consumer secret
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
# Access to user's access key and access secret
auth.set_access_token(access_key, access_secret)
# Calling api
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
text_of_tweet = None
tweet_id = None
number_of_tweets = 1
# Scrape the most recent tweet on the users timeline
tweet = api.user_timeline(screen_name=username, count=number_of_tweets, include_rts=False)
# Check if string all ascii
for item in tweet:
text_of_tweet = item.text
tweet_id = item.id
if (all(ord(c) < 128 for c in text_of_tweet)) == False:
text_of_tweet = conv_true_ascii(text_of_tweet)
list_of_sentences = re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', text_of_tweet)
text_of_tweet = list_of_sentences[0]
text_of_tweet = text_of_tweet.split('\n')[0]
# Write to CSV
# csvWriter.writerow([text_of_tweet, tweet_time, tweet_id])
# Return tweet
return text_of_tweet, tweet_id
def conv_true_ascii(single_tweet):
edit_start = single_tweet.encode('ascii', errors='ignore')
edited_tweet = edit_start + b'' * (len(single_tweet) - len(edit_start))
edited_tweet = str(edited_tweet)
edited_tweet = edited_tweet.replace("b'", '')
edited_tweet = edited_tweet.replace(edited_tweet[-1], '')
return edited_tweet

Unretweet all the tweets using tweepy

The following code will un-retweet a single tweet by id:
# calling the api
api = tweepy.API(auth)
# the ID of the tweet to be un-retweeted
ID =
# un-retweeting the tweet
api.unretweet(ID)
I want to know how to un-retweet all the tweets that ever been retweeted?

How to extract tweets location which contain specific keyword using twitter API in Python

I am trying to extract the all tweets which contain specific keyword and its geo locations .
for example , I want download all the tweets in english which contains the keyword 'iphone' from 'france' and 'singapore'
My code
import tweepy
import csv
import pandas as pd
import sys
# API credentials here
consumer_key = 'INSERT CONSUMER KEY HERE'
consumer_secret = 'INSERT CONSUMER SECRET HERE'
access_token = 'INSERT ACCESS TOKEN HERE'
access_token_secret = 'INSERT ACCESS TOKEN SECRET HERE'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True)
# Search word/hashtag value
HashValue = ""
# search start date value. the search will start from this date to the current date.
StartDate = ""
# getting the search word/hashtag and date range from user
HashValue = input("Enter the hashtag you want the tweets to be downloaded for: ")
StartDate = input("Enter the start date in this format yyyy-mm-dd: ")
# Open/Create a file to append data
csvFile = open(HashValue+'.csv', 'a')
#Use csv Writer
csvWriter = csv.writer(csvFile)
for tweet in tweepy.Cursor(api.search,q=HashValue,count=20,lang="en",since=StartDate, tweet_mode='extended').items():
print (tweet.created_at, tweet.full_text)
csvWriter.writerow([tweet.created_at, tweet.full_text.encode('utf-8')])
print ("Scraping finished and saved to "+HashValue+".csv")
#sys.exit()
How can this be done.
-Hello- Rahul
As I understand it you are looking to get geo data off searched tweets rather then filter search based on geocode.
Here is a code sample with the relevant fields you are interested in. These may or may not be provided depending on the tweeters privacy settings.
Note there is no "since" parameter on the search API:
https://tweepy.readthedocs.io/en/latest/api.html#help-methods
https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets
Standard twitter api search goes back 7 days. The premium and enterprise APIs have 30 day search as well as Full Archive search, but you will pay $$$.
Unfortunately tweepy still hasn't had their models documented:
https://github.com/tweepy/tweepy/issues/720
So if you want to look at the tweet object you can use pprint package and run:
pprint(tweet.__dict__)
One difference I noticed was the "text" field in the JSON became "full_text" in the object.
There's also information on the original tweet in there if the one you found was a quote tweet, has the same info from what I could see.
Anyway here's the code, I added a max tweet count for looping through the cursor while I was testing to avoid blowing any API limits.
Let me know if you want csv code but it looks like you can handle that already.
import tweepy
# API credentials here
consumer_key = 'your-info'
consumer_secret = 'your-info'
access_token = 'your-info'
access_token_secret = 'your-info'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True,wait_on_rate_limit_notify=True)
searchString = "iPhone"
cursor = tweepy.Cursor(api.search, q=searchString, count=20, lang="en", tweet_mode='extended')
maxCount = 1
count = 0
for tweet in cursor.items():
print()
print("Tweet Information")
print("================================")
print("Text: ", tweet.full_text)
print("Geo: ", tweet.geo)
print("Coordinates: ", tweet.coordinates)
print("Place: ", tweet.place)
print()
print("User Information")
print("================================")
print("Location: ", tweet.user.location)
print("Geo Enabled? ", tweet.user.geo_enabled)
count = count + 1
if count == maxCount:
break;
Will output something like this:
Tweet Information
================================
Text: NowPlaying : Hashfinger - Leaving
https://derp.com
#iPhone free app https://derp.com
#peripouwebradio
Geo: None
Coordinates: None
Place: None
User Information
================================
Location: Greece
Geo Enabled? True

How to extract 1000 tweets using Python?

I’m trying to extract tweets based on the country name but the code always retrieves small amounts of tweets (about 23, 50 and 70, not more than that). Does anyone know how to retrieve tweets around (1000-5000)?
# this is not my real credentials
Consume:
CONSUMER_KEY = ‘xxx’
CONSUMER_SECRET = ‘ttt’
# Access:
ACCESS_TOKEN = ‘rffg’
ACCESS_SECRET = ‘mmvvvt’
import tweepy
import csv
# get authorization
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)
api = tweepy.API(auth)
# get tweets from country
place = api.geo_search(query="Saudi Arabia", granularity="country" ,since= '10')
place_id = place[0].id
# print tweets and save to csv file
with open('tweets.csv', 'w', newline='', encoding='utf-8') as csvFile:
tweetWriter = csv.writer(csvFile, delimiter=',')
tweets = api.search(q='place:%s' % place_id, count=100, since='1')
count = 0
for tweet in tweets:
count += 1
# tweet.id = unique id for tweet, text = text, place.name = where it was posted, created_at = UTC time
tweetData = [tweet.id, tweet.user.name, tweet.text, tweet.place.name, tweet.created_at]
tweetWriter.writerow(tweetData)
print(count)

Categories

Resources