How to get the substring (photo url) from the snscrape? - python

Edit, since I realize it also has the vedio url,
My question is how can I only get the photo url in the following loop?
I want to add a attribute called photourl which is the full url from the media.
import snscrape.modules.twitter as sntwitter
import pandas as pd
# Creating list to append tweet data to
attributes_container = []
# Using TwitterSearchScraper to scrape data and append tweets to list
for i,tweet in enumerate(sntwitter.TwitterSearchScraper('sex for grades since:2021-07-05 until:2022-07-06').get_items()):
if i>150:
break
attributes_container.append([tweet.user.username, tweet.date, tweet.likeCount, tweet.sourceLabel, tweet.content, tweet.media])
# Creating a dataframe to load the list
tweets_df = pd.DataFrame(attributes_container, columns=["User", "Date Created", "Number of Likes", "Source of Tweet", "Tweet","media"])
When I used the snscrape to scrape tweet from the twitter,
I want to filter the photo image from the photo graph. I get the media object like the following:
media=[Photo(previewUrl='https://pbs.twimg.com/media/FePrYL7WQAQDKEB?format=jpg, fullUrl='https://pbs.twimg.com/media/FePrYL7WQAQDKEB?format=jpg&name=large')]
So How can I just get the PreviewUrl'https://pbs.twimg.com/media/FePrYL7WQAQDKEB?format=jpg, and full url sperately',
use the python code?
Thanks

you can change your for loop to:
for i,tweet in enumerate(sntwitter.TwitterSearchScraper('sex for grades since:2021-07-05 until:2022-07-06').get_items()):
if i>150:
break
try:
tweetMedia = tweet.media[0].fullUrl # .previewUrl if you want previewUrl
except:
tweetMedia = tweet.media # or None or '' or any default value
attributes_container.append([tweet.user.username, tweet.date, tweet.likeCount, tweet.sourceLabel, tweet.content, tweetMedia])
and then you'll have the urls [if there are any] for each tweet row.
If you want it all inside the append statement, you can just change that to:
attributes_container.append([
tweet.user.username, tweet.date, tweet.likeCount,
tweet.sourceLabel, tweet.content,
(tweet.media[0].fullUrl if tweet.media
and hasattr(tweet.media[0], 'fullUrl')
else tweet.media)
])
[instead of adding the try...except]

Related

How to search a specific country's tweets with Tweepy client.search_recent_tweets()

y'all. I'm trying to figure out how to sort for a specific country's tweets using search_recent_tweets. I take a country name as input, use pycountry to get the 2-character country code, and then I can either put some sort of location filter in my query or in search_recent_tweets params. Nothing I have tried so far in either has worked.
######
import tweepy
from tweepy import OAuthHandler
from tweepy import API
import pycountry as pyc
# upload token
BEARER_TOKEN='XXXXXXXXX'
# get tweets
client = tweepy.Client(bearer_token=BEARER_TOKEN)
# TAKE USER INPUT
countryQuery = input("Find recent tweets about travel in a certain country (input country name): ")
keyword = 'women safe' # gets tweets containing women and safe for that country (safe will catch safety)
# get country code to plug in as param in search_recent_tweets
country_code = str(pyc.countries.search_fuzzy(countryQuery)[0].alpha_2)
# get 100 recent tweets containing keywords and from location = countryQuery
query = str(keyword+' place_country='+str(countryQuery)+' -is:retweet') # search for keyword and no retweets
posts = client.search_recent_tweets(query=query, max_results=100, tweet_fields=['id', 'text', 'entities', 'author_id'])
# expansions=geo.place_id, place.fields=[country_code],
# filter posts to remove retweets
# export tweets to json
import json
with open('twitter.json', 'w') as fp:
for tweet in posts.data:
json.dump(tweet.data, fp)
fp.write('\n')
print("* " + str(tweet.text))
I have tried variations of:
query = str(keyword+' -is:retweet') # search for keyword and no retweets
posts = client.search_recent_tweets(query=query, place_fields=[str(countryQuery), country_code], max_results=100, tweet_fields=['id', 'text', 'entities', 'author_id'])
and:
query = str(keyword+' place.fields='+str(countryQuery)+','+country_code+' -is:retweet') # search for keyword and no retweets
posts = client.search_recent_tweets(query=query, max_results=100, tweet_fields=['id', 'text', 'entities', 'author_id'])
These either ended up pulling me NoneType tweets aka nothing or causing a
"The place.fields query parameter value [Germany] is not one of [contained_within,country,country_code,full_name,geo,id,name,place_type]"
The documentation for search_recent_tweets seems like place.fields / place_fields / place_country should be supported.
Any advice would help!!!

passing parameter in Rest API request from other file or list variable- using

new to python and API.
i have list of values like below
typeid=['1','12','32','1000','9']
I have to pass this value as parameter in API request, so that it would take one typeid at a time and append the json. code i have following but not sure how it will move from one value to other?
# activity type id store as following in other .py file typeid=['1','12','32','1000','9']
#importing the file in main program file.
From typeid list import activitytypeids
act1 = requests.get(host + '/rest/v1/activities.json',
params={
'activityTypeIds': activitytypeids[0]
}).text
json_obj = json.loads(act1)
results.append(json_obj)
more_result = json_obj['moreResult']
while True:
act1 = requests.get(host + '/rest/v1/activities.json',
params={
'activityTypeIds': activitytypeids[0]
}).text
json_obj = json.loads(act1)
results.append(json_obj)
more_result =json(results['moreResult'])
if not more_result:
break
How do I pass the activity's in request param one by one, so that get the result of all type ids.
take your code to get one id and put it in a function that accepts an activity_id, and change all activitytypeids[0] to just be activity_id
From typeid list import activitytypeids
def get_activity_id(activity_id):
act1 = requests.get(host + '/rest/v1/activities.json',
params={
'activityTypeIds': activity_id
}).text
return act1.json()
then you can just iterate over your list
results = [get_activity_id(id) for id in activitytypeids]
that said it seems very surprising that a variable named activityTypeIds only accepts one id ... i would very much expect this to be able to accept a list based on nothing more than the variable name

Running multiple querys on YouTube API by looping through title columns of CSV python

I am using YouTubes API to get comment data from a list of music videos. The way I have it working right now is by manually typing in my query and then writing the data to a csv file and repeating for each song like such.
query = "song title"
query_results = service.search().list(
part = 'snippet',
q = query,
order = 'relevance', # You can consider using viewCount
maxResults = 20,
type = 'video', # Channels might appear in search results
relevanceLanguage = 'en',
safeSearch = 'moderate',
).execute()
What I would like to do is use the title and artist columns from a csv file that I have containing the song titles I am trying to gather data for so I can run the program once without having to manually type in the song each time.
A friend suggested using something like this
import pandas as pd
data = pd.read_csv("metadata.csv")
def songtitle():
for i in data.index:
title = data.loc[i,'title']
title = '\"' + title + '\"'
artist = data.loc[i,'artist']
return(artist, title)
But I am not sure how I would make this work because when I run this, it is only returning the final row of data, and even if it did run correctly, how I would handle getting the entire program to repeat it self for every instance of a new song.
you can save song title and artist to a list, the loop over that list to get details.
def get_songTitles():
data = pd.read_csv("metadata.csv")
return data['artist'].tolist(),data['title'].tolist()
artist, song_titles = get_songTitles()
for song in song_titles:
query_results = service.search().list(
part = 'snippet',
q = song,
order = 'relevance', # You can consider using viewCount
maxResults = 20,
type = 'video', # Channels might appear in search results
relevanceLanguage = 'en',
safeSearch = 'moderate',
).execute()

parse github api.. getting string indices must be integers error

I need to loop through commits and get name, date, and messages info from
GitHub API.
https://api.github.com/repos/droptable461/Project-Project-Management/commits
I have many different things but I keep getting stuck at string indices must be integers error:
def git():
#name , date , message
#https://api.github.com/repos/droptable461/Project-Project-Management/commits
#commit { author { name and date
#commit { message
#with urlopen('https://api.github.com/repos/droptable461/Project Project-Management/commits') as response:
#source = response.read()
#data = json.loads(source)
#state = []
#for state in data['committer']:
#state.append(state['name'])
#print(state)
link = 'https://api.github.com/repos/droptable461/Project-Project-Management/events'
r = requests.get('https://api.github.com/repos/droptable461/Project-Project-Management/commits')
#print(r)
#one = r['commit']
#print(one)
for item in r.json():
for c in item['commit']['committer']:
print(c['name'],c['date'])
return 'suc'
Need to get person who did the commit, date and their message.
item['commit']['committer'] is a dictionary object, and therefore the line:
for c in item['commit']['committer']: is transiting dictionary keys.
Since you are calling [] on a string (the dictionary key), you are getting the error.
Instead that code should look more like:
def git():
link = 'https://api.github.com/repos/droptable461/Project-Project-Management/events'
r = requests.get('https://api.github.com/repos/droptable461/Project-Project-Management/commits')
for item in r.json():
for key in item['commit']['committer']:
print(item['commit']['committer']['name'])
print(item['commit']['committer']['date'])
print(item['commit']['message'])
return 'suc'

Youtube API v3 and python to generate list of views/likes on own youtube videos

I'm trying to get a RaspberryPi3 Python based birdhouse reading the popularity of its own uploaded videos (which enables it to deduct which ones should be deleted and avoid hundreds of uploaded files).
I thought the best way to read #views/#likes, was to use yt_analytics_report.py
When I input it always returns 0 values:
When I input:
$python yt_analytics_report.py --filters="video==bb_o--nP1mg"
or
$python yt_analytics_report.py --filters="channel==UCGXIq7h2UeFh9RhsSEsFsiA"
The output is:
$ python yt_analytics_report.py --filters="video==bb_o--nP1mg"
{'metrics': 'views,estimatedMinutesWatched,averageViewDuration', 'filters': 'video==bb_o--nP1mg', 'ids': 'channel==MINE', 'end_date': '2018-01-12', 'start_date': '2018-01-06'}
Please visit this URL to authorize this application: [note: here was url with authe sequence etc. which I ackced] followed by the result:
views estimatedMinutesWatched averageViewDuration
0.0 0.0 0.0
I'm new to this; The last 3 days I've been testing a variety of filters, but the result is always the same. I guess I do something severely wrong.
The (auto sensor triggered) video uploads work excellent so I presume the root cause is related to the way I'm using the yt-analytics example.
Any suggestions on rootcause or alternative methods to retrieve #views/#likes of self uploaded youtubes are appreciated.
After a few days trying I have found a solution how to generate with Python and the Youtube API v3 a list which contains views, likes etc of uploaded videos of my own Youtube channel.
I would like to share the complete code just in case anyone is having the same challenge. The code contains remarks and referrals to additional information.
Please be aware that using the API consumes API-credits... This implies that (when you run this script continuesly or often) you can run out of your daily maximum numbers of API-credits set by Google.
# This python 2.7.14 example shows how to retrieve with Youtube API v3 a list of uploaded Youtube videos in a channel and also
# shows additional statistics of each individual youtube video such as number of views, likes etc.
# Please notice that YOU HAVE TO change API_KEY and Youtube channelID
# Have a look at the referred examples to get to understand how the API works
#
# The code consists of two parts:
# - The first part queries the videos in a channel and stores it in a list
# - The second part queries in detail each individual video
#
# Credits to the Coding 101 team, the guy previously guiding me to a query and Google API explorer who/which got me on track.
#
# RESULTING EXAMPLE OUTPUT: The output of the will look a bit like this:
#
# https://www.youtube.com/watch?v=T3U2oz_Y8T0
# Upload date: 2018-01-13T09:43:27.000Z
# Number of views: 8
# Number of likes: 2
# Number of dislikes: 0
# Number of favorites:0
# Number of comments: 0
#
# https://www.youtube.com/watch?v=EFyC8nhusR8
# Upload date: 2018-01-06T14:24:34.000Z
# Number of views: 6
# Number of likes: 2
# Number of dislikes: 0
# Number of favorites:0
# Number of comments: 0
#
#
import urllib #importing to use its urlencode function
import urllib2 #for making http requests
import json #for decoding a JSON response
#
API_KEY = 'PLACE YOUR OWN YOUTUBE API HERE' # What? How? Learn here: https://www.youtube.com/watch?v=JbWnRhHfTDA
ChannelIdentifier = 'PLACE YOUR OWN YOUTUBE channelID HERE' # What? How? Learn here: https://www.youtube.com/watch?v=tf42K4pPWkM
#
# This first part will query the list of videos uploaded of a specific channel
# The identification is done through the ChannelIdentifier hwich you have defined as a variable
# The results from this first part will be stored in the list videoMetadata. This will be used in the second part of the code below.
#
# This code is based on the a very good example from Coding 101 which you can find here:https://www.youtube.com/watch?v=_M_wle0Iq9M
#
url = 'https://www.googleapis.com/youtube/v3/search?part=snippet&channelId='+ChannelIdentifier+'&maxResults=50&type=video&key='+API_KEY
response = urllib2.urlopen(url) #makes the call to YouTube
videos = json.load(response) #decodes the response so we can work with it
videoMetadata = [] #declaring our list
for video in videos['items']:
if video['id']['kind'] == 'youtube#video':
videoMetadata.append(video['id']['videoId']) #Appends each videoID and link to our list
#
# In this second part, a loop will run through the listvideoMetadata
# During each step the details a specific video are retrieved and displayed
# The structure of the API-return can be tested with the API explorer (which you can excecute without OAuth):
# https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.videos.list?part=snippet%252CcontentDetails%252Cstatistics&id=Ks-_Mh1QhMc&_h=1&
#
for metadata in videoMetadata:
print "https://www.youtube.com/watch?v="+metadata # Here the videoID is printed
SpecificVideoID = metadata
SpecificVideoUrl = 'https://www.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id='+SpecificVideoID+'&key='+API_KEY
response = urllib2.urlopen(SpecificVideoUrl) #makes the call to a specific YouTube
videos = json.load(response) #decodes the response so we can work with it
videoMetadata = [] #declaring our list
for video in videos['items']:
if video['kind'] == 'youtube#video':
print "Upload date: "+video['snippet']['publishedAt'] # Here the upload date of the specific video is listed
print "Number of views: "+video['statistics']['viewCount'] # Here the number of views of the specific video is listed
print "Number of likes: "+video['statistics']['likeCount'] # etc
print "Number of dislikes: "+video['statistics']['dislikeCount']
print "Number of favorites:"+video['statistics']['favoriteCount']
print "Number of comments: "+video['statistics']['commentCount']
print "\n"
Building on Sefo's answer above, I was able to clean up the outputs a bit.
The first function creates a list of relevant videos (you could replace this however you want), and the second iterates through this list and grabs the statistics and basic text data associated with each individual video.
The output is a list of dictionaries, perfect for conversion into a pandas DataFrame.
def youtube_search_list(q, max_results=10):
# Call the search.list method to retrieve results matching the specified
# query term.
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
developerKey=DEVELOPER_KEY)
# Call the search.list method to retrieve results matching the specified
# query term.
search_response = youtube.search().list(
q=q,
part='id,snippet',
maxResults=max_results,
order='viewCount'
).execute()
return search_response
def youtube_search_video(q='spinners', max_results=10):
max_results = max_results
order = "viewCount"
token = None
location = None
location_radius = None
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
developerKey=DEVELOPER_KEY)
q=q
#Return list of matching records up to max_search
search_result = youtube_search_list(q, max_results)
videos_list = []
for search_result in search_result.get("items", []):
if search_result["id"]["kind"] == 'youtube#video':
temp_dict_ = {}
#Available from initial search
temp_dict_['title'] = search_result['snippet']['title']
temp_dict_['vidId'] = search_result['id']['videoId']
#Secondary call to find statistics results for individual video
response = youtube.videos().list(
part='statistics, snippet',
id=search_result['id']['videoId']
).execute()
response_statistics = response['items'][0]['statistics']
response_snippet = response['items'][0]['snippet']
snippet_list = ['publishedAt','channelId', 'description',
'channelTitle', 'tags', 'categoryId',
'liveBroadcastContent', 'defaultLanguage', ]
for val in snippet_list:
try:
temp_dict_[val] = response_snippet[val]
except:
#Not stored if not present
temp_dict_[val] = 'xxNoneFoundxx'
stats_list = ['favoriteCount', 'viewCount', 'likeCount',
'dislikeCount', 'commentCount']
for val in stats_list:
try:
temp_dict_[val] = response_statistics[val]
except:
#Not stored if not present
temp_dict_[val] = 'xxNoneFoundxx'
#add back to main list
videos_list.append(temp_dict_)
return videos_list
this code will help you a ton, i was struggling with this for a long time, just provide the API key and the youtube channel name and the channel id in the search list
from googleapiclient.discovery import build
DEVELOPER_KEY = "paste your API key here"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
title = []
channelId = []
channelTitle = []
categoryId = []
videoId = []
viewCount = []
likeCount = []
dislikeCount = []
commentCount = []
favoriteCount = []
category = []
tags = []
videos = []
tags = []
max_search = 50
order = "relevance"
token = None
location = None
location_radius = None
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
developerKey=DEVELOPER_KEY)
search_result = youtube.search().list(q="put the channel name here", type="video",
pageToken=token, order=order, part="id,snippet",
maxResults=max_search, location=location,
locationRadius=location_radius, channelId='put the
channel ID here').execute() # this line is to get the videos of the channel by the
name of it
for search_result in search_result.get("items", []):
if search_result["id"]["kind"] == 'youtube#video':
title.append(search_result['snippet']['title']) # the title of the video
videoId.append(search_result['id']['videoId']) # the ID of the video
response = youtube.videos().list(part='statistics, snippet',
id=search_result['id'][
'videoId']).execute() # this is the other request because the statistics and
snippet require this because of the API
channelId.append(response['items'][0]['snippet']['channelId']) # channel ID,
which is constant here
channelTitle.append(response['items'][0]['snippet']['channelTitle']) # channel
title, also constant
categoryId.append(response['items'][0]['snippet']['categoryId']) # stores
the categories of the videos
favoriteCount.append(response['items'][0]['statistics']['favoriteCount']) #
stores the favourite count of the videos
viewCount.append(response['items'][0]['statistics']['viewCount']) #
stores the view counts
"""
the likes and dislikes had a bug all along, which required the if else instead of
just behaving like the viewCount"""
if 'likeCount' in response['items'][0]['statistics'].keys(): # checks
for likes count then restores it, if no likes stores 0
likeCount.append(response['items'][0]['statistics']['likeCount'])
else:
likeCount.append('0')
if 'dislikeCount' in response['items'][0]['statistics'].keys(): # checks for dislikes count then stores it, if no dislikes found returns 0
dislikeCount.append(response['items'][0]['statistics']['dislikeCount'])
else:
likeCount.append('0')
if 'commentCount' in response['items'][0]['statistics'].keys(): # checks for comment count then stores it, if no comment found stores 0
commentCount.append(response['items'][0]['statistics']['commentCount'])
else:
commentCount.append('0')
if 'tags' in response['items'][0]['snippet'].keys(): # checks for tags count then stores it, if none found stores 0
tags.append(response['items'][0]['snippet']['tags'])
else:
tags.append('0')
youtube_dict = {
'tags': tags,
'channelId': channelId,
'channelTitle': channelTitle,
'categoryId': categoryId,
'title': title,
'videoId': videoId,
'viewCount': viewCount,
'likeCount': likeCount,
'dislikeCount': dislikeCount,
'commentCount': commentCount,
'favoriteCount': favoriteCount
}
for x in youtube_dict:
print(x)
for y in youtube_dict[x]:
print(y)
please re indent the code because the site ruined the indentation of python to make the code in the code section not in the words section. good luck

Categories

Resources