I am using academic account to retrieve tweet information but I don't know how to get the status_id, I thought the conversation_id would be the same as status_id but when I track back, apparently it is not. What should I add to the tweet field?
for response in tweepy.Paginator(client.search_all_tweets,
query = 'query -is:retweet lang:en',
user_fields = ['username', 'public_metrics', 'description', 'location'],
tweet_fields = ['created_at', 'geo', 'public_metrics', 'text','id','conversation_id'],
expansions = ['author_id', 'geo.place_id'],
start_time = ['2020-01-01T00:00:00Z'],
end_time = ['2020-12-12T00:00:00Z']):
time.sleep(1)
tweets.append(response)
result
You've already got it - "id" is the status id
Tweets are the basic atomic building block of all things Twitter.
Tweets are also known as “status updates.” The Tweet object has a long
list of ‘root-level’ attributes, including fundamental attributes such
as id, created_at, and text
https://developer.twitter.com/en/docs/twitter-api/v1/data-dictionary/object-model/tweet
It may be a bit confusing because references to that id are labeled things like "in_reply_to_status_id" - but there is no field called "status_id" - it's just id.
Related
y'all. I'm trying to figure out how to sort for a specific country's tweets using search_recent_tweets. I take a country name as input, use pycountry to get the 2-character country code, and then I can either put some sort of location filter in my query or in search_recent_tweets params. Nothing I have tried so far in either has worked.
######
import tweepy
from tweepy import OAuthHandler
from tweepy import API
import pycountry as pyc
# upload token
BEARER_TOKEN='XXXXXXXXX'
# get tweets
client = tweepy.Client(bearer_token=BEARER_TOKEN)
# TAKE USER INPUT
countryQuery = input("Find recent tweets about travel in a certain country (input country name): ")
keyword = 'women safe' # gets tweets containing women and safe for that country (safe will catch safety)
# get country code to plug in as param in search_recent_tweets
country_code = str(pyc.countries.search_fuzzy(countryQuery)[0].alpha_2)
# get 100 recent tweets containing keywords and from location = countryQuery
query = str(keyword+' place_country='+str(countryQuery)+' -is:retweet') # search for keyword and no retweets
posts = client.search_recent_tweets(query=query, max_results=100, tweet_fields=['id', 'text', 'entities', 'author_id'])
# expansions=geo.place_id, place.fields=[country_code],
# filter posts to remove retweets
# export tweets to json
import json
with open('twitter.json', 'w') as fp:
for tweet in posts.data:
json.dump(tweet.data, fp)
fp.write('\n')
print("* " + str(tweet.text))
I have tried variations of:
query = str(keyword+' -is:retweet') # search for keyword and no retweets
posts = client.search_recent_tweets(query=query, place_fields=[str(countryQuery), country_code], max_results=100, tweet_fields=['id', 'text', 'entities', 'author_id'])
and:
query = str(keyword+' place.fields='+str(countryQuery)+','+country_code+' -is:retweet') # search for keyword and no retweets
posts = client.search_recent_tweets(query=query, max_results=100, tweet_fields=['id', 'text', 'entities', 'author_id'])
These either ended up pulling me NoneType tweets aka nothing or causing a
"The place.fields query parameter value [Germany] is not one of [contained_within,country,country_code,full_name,geo,id,name,place_type]"
The documentation for search_recent_tweets seems like place.fields / place_fields / place_country should be supported.
Any advice would help!!!
I am trying to retrieve Twitter data using Tweepy, using that below code, but I'm having difficulties in collecting media_fields data. Especially, I want to get the type of media, but I failed.
As you can see below, the value is copied and exists in the cell that should be empty.
[enter image description here][1]
import tweepy
from twitter_authentication import bearer_token
import time
import pandas as pd
client = tweepy.Client(bearer_token, wait_on_rate_limit=True)
hoax_tweets = []
for response in tweepy.Paginator(client.search_all_tweets,
query = 'Covid hoax -is:retweet lang:en',
user_fields = ['username', 'public_metrics', 'description', 'location','verified','entities'],
tweet_fields=['id', 'in_reply_to_user_id', 'referenced_tweets', 'context_annotations',
'source', 'created_at', 'entities', 'geo', 'withheld', 'public_metrics',
'text'],
media_fields=['media_key', 'type', 'url', 'alt_text',
'public_metrics','preview_image_url'],
expansions=['author_id', 'in_reply_to_user_id', 'geo.place_id',
'attachments.media_keys','referenced_tweets.id','referenced_tweets.id.author_id'],
place_fields=['id', 'name', 'country_code', 'place_type', 'full_name', 'country',
'geo', 'contained_within'],
start_time = '2021-01-20T00:00:00Z',
end_time = '2021-01-21T00:00:00Z',
max_results=100):
time.sleep(1)
hoax_tweets.append(response)
result = []
user_dict = {}
media_dict = {}
# Loop through each response object
for response in hoax_tweets:
# Take all of the users, and put them into a dictionary of dictionaries with the info we want to keep
for user in response.includes['users']:
user_dict[user.id] = {'username': user.username,
'followers': user.public_metrics['followers_count'],
'tweets': user.public_metrics['tweet_count'],
'description': user.description,
'location': user.location,
'verified': user.verified
}
for media in response.includes['media']:
media_dict[tweet.id] = {'media_key':media.media_key,
'type':media.type
}
for tweet in response.data:
# For each tweet, find the author's information
author_info = user_dict[tweet.author_id]
# Put all of the information we want to keep in a single dictionary for each tweet
result.append({'author_id': tweet.author_id,
'username': author_info['username'],
'author_followers': author_info['followers'],
'author_tweets': author_info['tweets'],
'author_description': author_info['description'],
'author_location': author_info['location'],
'author_verified':author_info['verified'],
'tweet_id': tweet.id,
'text': tweet.text,
'created_at': tweet.created_at,
'retweets': tweet.public_metrics['retweet_count'],
'replies': tweet.public_metrics['reply_count'],
'likes': tweet.public_metrics['like_count'],
'quote_count': tweet.public_metrics['quote_count'],
'in_reply_to_user_id':tweet.in_reply_to_user_id,
'media':tweet.attachments,
'media_type': media,
'conversation':tweet.referenced_tweets
})
# Change this list of dictionaries into a dataframe
df = pd.DataFrame(result)
Also, when I change the code ''media':tweet.attachments' to 'media':tweet.attachments[0] to get 'media_key' data, I get the following error message."TypeError: 'NoneType' object is not subscriptable"
What am I doing wrong? Any suggestions would be appreciated.
[1]: https://i.stack.imgur.com/AxCcl.png
The subscriptable error comes from the fact that tweet.attachments is None, from here the NoneType part. To make it work, you can add a check for None:
'media':tweet.attachments[0] if tweet.attachments else None
I have never used the twitter API, but one thing is to make sure the tweet attachments are always present or if they may be absent.
I'm having trouble getting the user_id from a specific tweet_id. Is there a way to get the author of a tweet id using Tweepy?
For the author of that tweet, or retweeted/quoted status try.
status = api.get_status(id)
author = status.author
retweeted_status_author = status.retweeted_status.author
quoted_status_author = status.quoted_status.author
I have three databases in GAE. Hobby, Attendee and Event.
class Hobby(db.Model):
name = db.StringProperty()
htest = Hobby.get_or_insert('tennis')
htest.name = 'tennis'
htest.put()
htest = Hobby.get_or_insert('basketball')
htest.name = 'basketball'
htest.put()
htest = Hobby.get_or_insert('food')
class Event(db.Model):
title = db.StringProperty(required=True)
description = db.TextProperty()
time = db.DateTimeProperty()
location = db.TextProperty()
creator = db.UserProperty()
edit_link = db.TextProperty()
gcal_event_link = db.TextProperty()
gcal_event_xml = db.TextProperty()
hobby = db.ReferenceProperty(Hobby)
class Attendee(db.Model):
email = db.StringProperty()
hobbies = db.ListProperty(db.Key)
event = db.ReferenceProperty(Event)
Each Attendee can pick however many hobby as they desire. When a event is created, user chooses a hobby to associate the event with and invitation will be send to every attendee who has chosen that hobby. Hobby DB is preloaded database.
I want to make a query that does that.
after reading Nick's blog
http://blog.notdot.net/2010/10/Modeling-relationships-in-App-Engine
which was very helpful I feel like I'm supposed to use the method that was mentioned in there
attendees = Attendee.all()filter('hobbies =', basketball).fetch(100)
however, i'm stuck there... any help would be really appreciated.
I think you should record the invitations send in a table, say "invitationsSend" with two fields : event and attendee, which two fields are making a unique primary key.
To build this, you will have to select the data between both your tables event and attendees :
insert into invitationsSend(select E.Event, A.Attendee from Event as E, Attendee as A where E.Hobby = A.Hobby)
But I'm not familiar with your "db.listProperty" used for "hobbies" and I do not know how to look into that list. I should do this as a separate table with data "Attendee, Hobby", both as primary key.
Regards,
I am trying to create a query using django models. I have 4 models. The functionality is to display all a person's facebook friends that are not already friends and do not have a friend request waiting.
CustomUserFriends
id,
from_customuser_id,
to_customuser_id,
FacebookProfile
id,
facebook_id,
custom_user_id
CustomUser
id,
name,
FriendRequests
id,
requester (user_id of the person requesting),
requestee (user_id of the person requested),
Now I have a list of facebook ids as a variable example
facebook_ids = [12123,45433,455664,44445]
Essentially the query im trying to create through django models is select all customusers that have a facebook id in the facebookprofile table but do not have the relationship of being a friend with the user already or have a pending friend request.
A friend is defined as having 2 records in the CustomUserFriends table, example
a friend relationship is
CustomUsers
id
1
2
CustomUserFriends
id from_custom_user_id to_custom_user_id
1 1 2
2 2 1
So, I wasn't entirely sure what you were trying to accomplish here. It was a toss up between getting all non-friends for a particular user or having a particular user and trying to find all of his friends who weren't friends with each other. I decided I'd do both and let you decide which one you wanted.
First, there are two functions. One is the main function we'll be using, the other is just for displaying the info.
def get_non_friends_for_user(u, friend_ids_filter=[]):
# Search the friends lists
friend_ids = list(CustomUserFriends.objects.filter(
from_customuser_id=u.pk).values_list('to_customuser_id', flat=True))
friend_ids += list(CustomUserFriends.objects.filter(
to_customuser_id=u.pk).values_list('from_customuser_id', flat=True))
# Search the requests lists
friend_ids += list(FriendRequests.objects.filter(
requester=u.pk).values_list('requestee', flat=True))
friend_ids += list(FriendRequests.objects.filter(
requestee=u.pk).values_list('requester', flat=True))
non_friends = CustomUser.objects.exclude(id__in=friend_ids)
if friend_ids_filter:
non_friends = non_friends.filter(id__in=friend_ids_filter)
return non_friends
def display_user_info(cu, non_friends):
print
print cuf.name
for non_friend in non_friends:
print '\t', non_friend.name
Now, to get all people who are not friends of a particular user we just use that function
# Get all non-friends for custom_user
# Note that custom_user should be defined before as a CustomUsers object
non_friends = get_non_friends_for_user(custom_user)
display_user_info(custom_user, non_friends)
To get the list of a user's friends that aren't friends with another of the user's friends, we can do this:
# Again, custom_user must already be defined as a CustomUsers object
custom_user_non_friends = {}
custom_user_friends = CustomUserFriends.objects.filter(
from_customuser_id=custom_user.pk)
friend_ids = list(custom_user_friends.values_list('to_customuser_id', flat=True))
for cuf in custom_user_friends:
cu = cuf.to_customuser_id
# Add the queryset to the dictionary
custom_user_non_friends[cu] = get_non_friends_for_user(cu, friend_ids)
for cu, non_friends in custom_user_non_friends.items():
display_user_info(cu, non_friends)
And that should do it. I haven't tested any of this and it's all pretty much coming off the top of my head, so there may be some bugs. If it doesn't work for you or it's not what you were looking for, just post a comment and I'll see what I can do.