I'm using Raspberry Pi 3 & Python 2.7.9.
I'm trying to using youtube api. here's the code I use:
#!/usr/bin/python
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.tools import argparser
import json
import urllib
# Set DEVELOPER_KEY to the API key value from the APIs & auth > Registered apps
# tab of
# https://cloud.google.com/console
# Please ensure that you have enabled the YouTube Data API for your project.
DEVELOPER_KEY = "REPLACE_ME" #I did replace api key
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
FREEBASE_SEARCH_URL = "https://www.googleapis.com/freebase/v1/search?%s"
def get_topic_id(options):
# Retrieve a list of Freebase topics associated with the provided query term.
freebase_params = dict(query=options.query, key=DEVELOPER_KEY)
freebase_url = FREEBASE_SEARCH_URL % urllib.urlencode(freebase_params)
freebase_response = json.loads(urllib.urlopen(freebase_url).read())
if len(freebase_response["result"]) == 0:
exit("No matching terms were found in Freebase.")
# Display the list of matching Freebase topics.
mids = []
index = 1
print "The following topics were found:"
for result in freebase_response["result"]:
mids.append(result["mid"])
print " %2d. %s (%s)" % (index, result.get("name", "Unknown"),
result.get("notable", {}).get("name", "Unknown"))
index += 1
# Display a prompt for the user to select a topic and return the topic ID
# of the selected topic.
mid = None
while mid is None:
index = raw_input("Enter a topic number to find related YouTube %ss: " %
options.type)
try:
mid = mids[int(index) - 1]
except ValueError:
pass
return mid
def youtube_search(mid, options):
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION,
developerKey=DEVELOPER_KEY)
# Call the search.list method to retrieve results associated with the
# specified Freebase topic.
search_response = youtube.search().list(
topicId=mid,
type=options.type,
part="id,snippet",
maxResults=options.max_results
).execute()
# Print the title and ID of each matching resource.
for search_result in search_response.get("items", []):
if search_result["id"]["kind"] == "youtube#video":
print "%s (%s)" % (search_result["snippet"]["title"],
search_result["id"]["videoId"])
elif search_result["id"]["kind"] == "youtube#channel":
print "%s (%s)" % (search_result["snippet"]["title"],
search_result["id"]["channelId"])
elif search_result["id"]["kind"] == "youtube#playlist":
print "%s (%s)" % (search_result["snippet"]["title"],
search_result["id"]["playlistId"])
if __name__ == "__main__":
argparser.add_argument("--query", help="Freebase search term", default="Google")
argparser.add_argument("--max-results", help="Max YouTube results",
default=25)
argparser.add_argument("--type",
help="YouTube result type: video, playlist, or channel", default="channel")
args = argparser.parse_args()
mid = get_topic_id(args)
try:
youtube_search(mid, args)
except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)
And I got error:
ValueError: No JSON object could be decoded
I really don't know how to handle this... anyone how to fix this?
I think it's related with OAuth 2.0 json file, but not sure... and I don't know how to use it
Related
Im trying to stop my RDS cluster using Lambda and Im using the python code shown below to target the clusters tags,
However when trying to create a CloudFormation stack, AWS is telling me that the YAML is not well formed.
For example it tells that Key = 'True' is not well formed.
If I remove that bit of code it then tells me the next bit of code, client = boto3.client('rds') is also not well formed.
Ive put this code into an online python validator and it didnt report any issues.
Can anyone help with this? Thanks
Tag = 'AutoPower'
Key = 'True'
client = boto3.client('rds')
response = client.describe_db_cluster()
for resp in response['DBCluster']:
db_cluster_arn = resp['DBClusterArn']
response = client.list_tags_for_resource(ResourceName=db_cluster_arn)
for tags in response['TagList']:
if tags['Key'] == str(Key) and tags['Value'] == str(Tag):
status = resp['DBClusterStatus']
ClusterID = resp['DBClusterIdentifier']
print(InstanceID)
if status == 'available':
print("shutting down %s " % ClusterID)
client.stop_db_cluster(DBClusterIdentifier=ClusterID)
# print ("Do something with it : %s" % db_instance_arn)
elif status == 'stopped':
print("starting up %s " % ClusterID)
client.start_db_cluster(DBClusterIdentifier=ClusterID)
else:
print("The database is " + status + " status!")
You are most likely having issues due to indentation issues between CloudFormation YAML and the inline function code. Here is an example of CloudFormation YAML that would use your code inline to create a function:
Resources:
YourFunction:
Type: AWS::Lambda::Function
Properties:
Code:
ZipFile: |
Tag = 'AutoPower'
Key = 'True'
client = boto3.client('rds')
response = client.describe_db_cluster()
for resp in response['DBCluster']:
db_cluster_arn = resp['DBClusterArn']
response = client.list_tags_for_resource(ResourceName=db_cluster_arn)
for tags in response['TagList']:
if tags['Key'] == str(Key) and tags['Value'] == str(Tag):
status = resp['DBClusterStatus']
ClusterID = resp['DBClusterIdentifier']
print(InstanceID)
if status == 'available':
print("shutting down %s " % ClusterID)
client.stop_db_cluster(DBClusterIdentifier=ClusterID)
# print ("Do something with it : %s" % db_instance_arn)
elif status == 'stopped':
print("starting up %s " % ClusterID)
client.start_db_cluster(DBClusterIdentifier=ClusterID)
else:
print("The database is " + status + " status!")
Handler: index.lambda_handler
Role: !Sub "arn:aws:iam::${AWS::AccountId}:role/YourRoleNameHere"
Runtime: python3.9
Description: This is an example of a function in CloudFormation
FunctionName: Your_Function_Name
MemorySize: 128
Timeout: 180
In the python I am starting and I have a problem with the use of example scripts, however, there is a problem with the operation even using arguments when calling the script.
I will not say that this script solves a lot of my problems without servicing data writing.
And I will not say that it would be useful to run it :)
I've been giving arguments and nothing in any way
import httplib2
import os
import sys
import csv
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
import argparse
from oauth2client.tools import argparser, run_flow
library/python/guide/aaa_client_secrets
CLIENT_SECRETS_FILE = "client_secret.json"
YOUTUBE_SCOPES = ["https://www.googleapis.com/auth/youtube.readonly",
"https://www.googleapis.com/auth/yt-analytics.readonly"]
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
YOUTUBE_ANALYTICS_API_SERVICE_NAME = "youtubeAnalytics"
YOUTUBE_ANALYTICS_API_VERSION = "v1"
MISSING_CLIENT_SECRETS_MESSAGE = """
WARNING: Please configure OAuth 2.0
To make this sample run you will need to populate the client_secrets.json file
found at:
%s
with information from the Developers Console
https://console.developers.google.com/
For more information about the client_secrets.json file format, please visit:
https://developers.google.com/api-client-library/python/guide/aaa_client_secrets
""" % os.path.abspath(os.path.join(os.path.dirname(__file__),
CLIENT_SECRETS_FILE))
def get_authenticated_services(args):
flow = flow_from_clientsecrets(CLIENT_SECRETS_FILE,
scope=" ".join(YOUTUBE_SCOPES),
message=MISSING_CLIENT_SECRETS_MESSAGE)
storage = Storage("%s-oauth2.json" % sys.argv[0])
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = run_flow(flow, storage, args)
http = credentials.authorize(httplib2.Http())
youtube_analytics = build(YOUTUBE_ANALYTICS_API_SERVICE_NAME,
YOUTUBE_ANALYTICS_API_VERSION, http=http)
return youtube_analytics
def run_analytics_report(youtube_analytics, options, count):
# Call the Analytics API to retrieve a report. For a list of available
# reports, see:
# https://developers.google.com/youtube/analytics/v1/channel_reports
analytics_query_response = youtube_analytics.reports().query(
ids="channel==%s" % options.channel_id,
metrics=options.metrics,
dimensions=options.dimensions,
filters=options.filters,
start_date=options.start_date,
end_date=options.end_date,
#max_results=options.max_results,
sort=options.sort
).execute()
print "Analytics Data for Channel %s" % options.channel_id
if count == 0:
with open('results.csv', 'w') as csv_out:
csvWriter=csv.writer(csv_out, delimiter=',', lineterminator = '\n')
headers = [ch["name"] for ch in analytics_query_response.get("columnHeaders", [])]
headers.append("country")
csvWriter.writerow(headers)
else:
with open('results.csv', 'a') as csv_out:
csvWriter=csv.writer(csv_out, delimiter=',', lineterminator = '\n')
for row in analytics_query_response.get("rows", []):
values = []
for value in row:
values.append(str(value))
values.append((options.filters[9]+""+options.filters[10]))
csvWriter.writerow(values)
print "Results exported to csv"
if __name__ == "__main__":
count = 0
now = datetime.now()
one_day_ago = (now - timedelta(days=1)).strftime("%Y-%m-%d")
one_week_ago = (now - timedelta(days=7)).strftime("%Y-%m-%d")
f = open('countries.csv', 'rb')
reader = csv.reader(f)
for row in reader:
argparser = argparse.ArgumentParser()
argparser.add_argument("--channel-id", help="Channel ID",
default="UCJ5v_MCY6GNUBTO8-D3XoAg")
argparser.add_argument("--metrics", help="Report metrics",
default="views,estimatedMinutesWatched")
argparser.add_argument("--dimensions", help="Report dimensions",
default="deviceType")
argparser.add_argument("--filters", help="Report filters",
default="country==" + ''.join(row))
argparser.add_argument("--start-date", default=one_week_ago,
help="Start date, in YYYY-MM-DD format")
argparser.add_argument("--end-date", default=one_day_ago,
help="End date, in YYYY-MM-DD format")
#argparser.add_argument("--max-results", help="Max results", default=10)
argparser.add_argument("--sort", help="Sort order", default="-views")
args = argparser.parse_args()
youtube_analytics = get_authenticated_services(args)
try:
run_analytics_report(youtube_analytics, args, count)
count = count + 1
except HttpError, e:
print "An HTTP error %d occurred:\n%s" % (e.resp.status, e.content)
I do not know, my question is howling.
When running the script even giving arguments shows the following message?
File "yt-mario.py", line 91
print "Analytics Data for Channel %s" % options.channel_id
^
SyntaxError: Missing parentheses in call to 'print'. Did you mean print("Analytics Data for Channel %s" % options.channel_id)?
SyntaxError: Missing parentheses in call to 'print'. Did you mean print("Analytics Data for Channel %s" % options.channel_id)?
This code is probably Python2.7, where you don't need to put parenthesis around the print statement.
Try running this using Python 2.7.
I've recently open a question about the GA management API where I explained that the exemple script here didn't worked for me. So looking at the Google Analytics Git Hub, I found a totaly different exemple which doesn't work either but I have a different error :
The client secrets were invalid:
Missing property "client_secret" in a client type of "installed".
WARNING: Please configure OAuth 2.0
To make this sample run you will need to populate the client_secrets.json file
found at:
client_secrets.json
with information from the APIs Console <https://code.google.com/apis/console>.
However I did all this perfectly well, I have my client_secrets.json file. what are the differences between client_secrets authentification and p12 authentification ? Is it possible that the Management API work only with client_secrets authentification ? Does someone have a working code example ?
Thanks.
Here is the script :
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Reference command-line example for Google Analytics Management API v3.
This application demonstrates how to use the python client library to access
all the pieces of data returned by the Google Analytics Management API v3.
The application manages autorization by saving an OAuth2.0 token in a local
file and reusing the token for subsequent requests. It then traverses the
Google Analytics Management hiearchy. It first retrieves and prints all the
authorized user's accounts, next it prints all the web properties for the
first account, then all the profiles for the first web property and finally
all the goals for the first profile. The sample then prints all the
user's advanced segments.
Before You Begin:
Update the client_secrets.json file
You must update the clients_secrets.json file with a client id, client
secret, and the redirect uri. You get these values by creating a new project
in the Google APIs console and registering for OAuth2.0 for installed
applications: https://code.google.com/apis/console
Learn more about registering your analytics application here:
https://developers.google.com/analytics/devguides/config/mgmt/v3/mgmtAuthorization
Sample Usage:
$ python management_v3_reference.py
Also you can also get help on all the command-line flags the program
understands by running:
$ python management_v3_reference.py --help
"""
from __future__ import print_function
__author__ = 'api.nickm#gmail.com (Nick Mihailovski)'
import argparse
import sys
from googleapiclient.errors import HttpError
from googleapiclient import sample_tools
from oauth2client.client import AccessTokenRefreshError
def main(argv):
# Authenticate and construct service.
service, flags = sample_tools.init(
argv, 'analytics', 'v3', __doc__, __file__,
scope='https://www.googleapis.com/auth/analytics.readonly')
# Traverse the Management hiearchy and print results or handle errors.
try:
traverse_hiearchy(service)
except TypeError as error:
# Handle errors in constructing a query.
print(('There was an error in constructing your query : %s' % error))
except HttpError as error:
# Handle API errors.
print(('Arg, there was an API error : %s : %s' %
(error.resp.status, error._get_reason())))
except AccessTokenRefreshError:
print ('The credentials have been revoked or expired, please re-run'
'the application to re-authorize')
def traverse_hiearchy(service):
"""Traverses the management API hiearchy and prints results.
This retrieves and prints the authorized user's accounts. It then
retrieves and prints all the web properties for the first account,
retrieves and prints all the profiles for the first web property,
and retrieves and prints all the goals for the first profile.
Args:
service: The service object built by the Google API Python client library.
Raises:
HttpError: If an error occured when accessing the API.
AccessTokenRefreshError: If the current token was invalid.
"""
accounts = service.management().accounts().list().execute()
print_accounts(accounts)
if accounts.get('items'):
firstAccountId = accounts.get('items')[0].get('id')
webproperties = service.management().webproperties().list(
accountId=firstAccountId).execute()
print_webproperties(webproperties)
if webproperties.get('items'):
firstWebpropertyId = webproperties.get('items')[0].get('id')
profiles = service.management().profiles().list(
accountId=firstAccountId,
webPropertyId=firstWebpropertyId).execute()
print_profiles(profiles)
if profiles.get('items'):
firstProfileId = profiles.get('items')[0].get('id')
goals = service.management().goals().list(
accountId=firstAccountId,
webPropertyId=firstWebpropertyId,
profileId=firstProfileId).execute()
print_goals(goals)
print_segments(service.management().segments().list().execute())
def print_accounts(accounts_response):
"""Prints all the account info in the Accounts Collection.
Args:
accounts_response: The response object returned from querying the Accounts
collection.
"""
print('------ Account Collection -------')
print_pagination_info(accounts_response)
print()
for account in accounts_response.get('items', []):
print('Account ID = %s' % account.get('id'))
print('Kind = %s' % account.get('kind'))
print('Self Link = %s' % account.get('selfLink'))
print('Account Name = %s' % account.get('name'))
print('Created = %s' % account.get('created'))
print('Updated = %s' % account.get('updated'))
child_link = account.get('childLink')
print('Child link href = %s' % child_link.get('href'))
print('Child link type = %s' % child_link.get('type'))
print()
if not accounts_response.get('items'):
print('No accounts found.\n')
def print_webproperties(webproperties_response):
"""Prints all the web property info in the WebProperties collection.
Args:
webproperties_response: The response object returned from querying the
Webproperties collection.
"""
print('------ Web Properties Collection -------')
print_pagination_info(webproperties_response)
print()
for webproperty in webproperties_response.get('items', []):
print('Kind = %s' % webproperty.get('kind'))
print('Account ID = %s' % webproperty.get('accountId'))
print('Web Property ID = %s' % webproperty.get('id'))
print(('Internal Web Property ID = %s' %
webproperty.get('internalWebPropertyId')))
print('Website URL = %s' % webproperty.get('websiteUrl'))
print('Created = %s' % webproperty.get('created'))
print('Updated = %s' % webproperty.get('updated'))
print('Self Link = %s' % webproperty.get('selfLink'))
parent_link = webproperty.get('parentLink')
print('Parent link href = %s' % parent_link.get('href'))
print('Parent link type = %s' % parent_link.get('type'))
child_link = webproperty.get('childLink')
print('Child link href = %s' % child_link.get('href'))
print('Child link type = %s' % child_link.get('type'))
print()
if not webproperties_response.get('items'):
print('No webproperties found.\n')
def print_profiles(profiles_response):
"""Prints all the profile info in the Profiles Collection.
Args:
profiles_response: The response object returned from querying the
Profiles collection.
"""
print('------ Profiles Collection -------')
print_pagination_info(profiles_response)
print()
for profile in profiles_response.get('items', []):
print('Kind = %s' % profile.get('kind'))
print('Account ID = %s' % profile.get('accountId'))
print('Web Property ID = %s' % profile.get('webPropertyId'))
print(('Internal Web Property ID = %s' %
profile.get('internalWebPropertyId')))
print('Profile ID = %s' % profile.get('id'))
print('Profile Name = %s' % profile.get('name'))
print('Currency = %s' % profile.get('currency'))
print('Timezone = %s' % profile.get('timezone'))
print('Default Page = %s' % profile.get('defaultPage'))
print(('Exclude Query Parameters = %s' %
profile.get('excludeQueryParameters')))
print(('Site Search Category Parameters = %s' %
profile.get('siteSearchCategoryParameters')))
print(('Site Search Query Parameters = %s' %
profile.get('siteSearchQueryParameters')))
print('Created = %s' % profile.get('created'))
print('Updated = %s' % profile.get('updated'))
print('Self Link = %s' % profile.get('selfLink'))
parent_link = profile.get('parentLink')
print('Parent link href = %s' % parent_link.get('href'))
print('Parent link type = %s' % parent_link.get('type'))
child_link = profile.get('childLink')
print('Child link href = %s' % child_link.get('href'))
print('Child link type = %s' % child_link.get('type'))
print()
if not profiles_response.get('items'):
print('No profiles found.\n')
def print_goals(goals_response):
"""Prints all the goal info in the Goals collection.
Args:
goals_response: The response object returned from querying the Goals
collection
"""
print('------ Goals Collection -------')
print_pagination_info(goals_response)
print()
for goal in goals_response.get('items', []):
print('Goal ID = %s' % goal.get('id'))
print('Kind = %s' % goal.get('kind'))
print('Self Link = %s' % goal.get('selfLink'))
print('Account ID = %s' % goal.get('accountId'))
print('Web Property ID = %s' % goal.get('webPropertyId'))
print(('Internal Web Property ID = %s' %
goal.get('internalWebPropertyId')))
print('Profile ID = %s' % goal.get('profileId'))
print('Goal Name = %s' % goal.get('name'))
print('Goal Value = %s' % goal.get('value'))
print('Goal Active = %s' % goal.get('active'))
print('Goal Type = %s' % goal.get('type'))
print('Created = %s' % goal.get('created'))
print('Updated = %s' % goal.get('updated'))
parent_link = goal.get('parentLink')
print('Parent link href = %s' % parent_link.get('href'))
print('Parent link type = %s' % parent_link.get('type'))
# Print the goal details depending on the type of goal.
if goal.get('urlDestinationDetails'):
print_url_destination_goal_details(
goal.get('urlDestinationDetails'))
elif goal.get('visitTimeOnSiteDetails'):
print_visit_time_on_site_goal_details(
goal.get('visitTimeOnSiteDetails'))
elif goal.get('visitNumPagesDetails'):
print_visit_num_pages_goal_details(
goal.get('visitNumPagesDetails'))
elif goal.get('eventDetails'):
print_event_goal_details(goal.get('eventDetails'))
print()
if not goals_response.get('items'):
print('No goals found.\n')
def print_url_destination_goal_details(goal_details):
"""Prints all the URL Destination goal type info.
Args:
goal_details: The details portion of the goal response.
"""
print('------ Url Destination Goal -------')
print('Goal URL = %s' % goal_details.get('url'))
print('Case Sensitive = %s' % goal_details.get('caseSensitive'))
print('Match Type = %s' % goal_details.get('matchType'))
print('First Step Required = %s' % goal_details.get('firstStepRequired'))
print('------ Url Destination Goal Steps -------')
for goal_step in goal_details.get('steps', []):
print('Step Number = %s' % goal_step.get('number'))
print('Step Name = %s' % goal_step.get('name'))
print('Step URL = %s' % goal_step.get('url'))
if not goal_details.get('steps'):
print('No Steps Configured')
def print_visit_time_on_site_goal_details(goal_details):
"""Prints all the Visit Time On Site goal type info.
Args:
goal_details: The details portion of the goal response.
"""
print('------ Visit Time On Site Goal -------')
print('Comparison Type = %s' % goal_details.get('comparisonType'))
print('comparison Value = %s' % goal_details.get('comparisonValue'))
def print_visit_num_pages_goal_details(goal_details):
"""Prints all the Visit Num Pages goal type info.
Args:
goal_details: The details portion of the goal response.
"""
print('------ Visit Num Pages Goal -------')
print('Comparison Type = %s' % goal_details.get('comparisonType'))
print('comparison Value = %s' % goal_details.get('comparisonValue'))
def print_event_goal_details(goal_details):
"""Prints all the Event goal type info.
Args:
goal_details: The details portion of the goal response.
"""
print('------ Event Goal -------')
print('Use Event Value = %s' % goal_details.get('useEventValue'))
for event_condition in goal_details.get('eventConditions', []):
event_type = event_condition.get('type')
print('Type = %s' % event_type)
if event_type in ('CATEGORY', 'ACTION', 'LABEL'):
print('Match Type = %s' % event_condition.get('matchType'))
print('Expression = %s' % event_condition.get('expression'))
else: # VALUE type.
print('Comparison Type = %s' % event_condition.get('comparisonType'))
print('Comparison Value = %s' % event_condition.get('comparisonValue'))
def print_segments(segments_response):
"""Prints all the segment info in the Segments collection.
Args:
segments_response: The response object returned from querying the
Segments collection.
"""
print('------ Segments Collection -------')
print_pagination_info(segments_response)
print()
for segment in segments_response.get('items', []):
print('Segment ID = %s' % segment.get('id'))
print('Kind = %s' % segment.get('kind'))
print('Self Link = %s' % segment.get('selfLink'))
print('Name = %s' % segment.get('name'))
print('Definition = %s' % segment.get('definition'))
print('Created = %s' % segment.get('created'))
print('Updated = %s' % segment.get('updated'))
print()
def print_pagination_info(management_response):
"""Prints common pagination details.
Args:
management_response: The common reponse object for each collection in the
Management API.
"""
print('Items per page = %s' % management_response.get('itemsPerPage'))
print('Total Results = %s' % management_response.get('totalResults'))
print('Start Index = %s' % management_response.get('startIndex'))
# These only have values if other result pages exist.
if management_response.get('previousLink'):
print('Previous Link = %s' % management_response.get('previousLink'))
if management_response.get('nextLink'):
print('Next Link = %s' % management_response.get('nextLink'))
if __name__ == '__main__':
main(sys.argv)
client_secrets.json is Oauth2 authentication that being a user is first prompted for access to their account.
P12 file is service account authentication. Service accounts are like dummy users they are pre authorized which means there will be no prompt for user consent. To pre authorize a service account to Google Analytics the service account email address must be added as a user at the ACCOUNT level in the admin section of the google analytics website. Yes service accounts work with the management API.
Note: Oauth2 credentials and service account credentials are created on the google developer console when registering your application. It is now possible to get a .json file for service accounts instead of only the p12. However the code used to authenticate a service account and Oauth2 are different and these two .json files can not be interchanged. (hope that makes sense)
Further reading:
My tutorial on Oauth2 Google Developer Console Oauth2 credentials
My tutorial on service accounts Google Developer console service account
I'm playing around the Twitter API and am in the process of developing a script to pull all Tweets with a certain hashtag down to a local mongoDB. I have it working fine when I'm downloading tweets from users, but when downloading tweets from a hashtag I get:
return loads(fp.read(),
AttributeError: 'int' object has no attribute 'read'
Can anyone offer their infinite wisdom into how I could get this script to work?
To run, save it as a .py file, cd to the folder and run:
python twitter.py
Code:
__author__ = 'Tom Cusack'
import pymongo
import oauth2 as oauth
import urllib2, json
import sys, argparse, time
def oauth_header(url, consumer, token):
params = {'oauth_version': '1.0',
'oauth_nonce': oauth.generate_nonce(),
'oauth_timestamp': int(time.time()),
}
req = oauth.Request(method = 'GET',url = url, parameters = params)
req.sign_request(oauth.SignatureMethod_HMAC_SHA1(),consumer, token)
return req.to_header()['Authorization'].encode('utf-8')
def main():
### Twitter Settings
numtweets = '32000'
verbose = 'store_true'
retweet = 'store_false'
CONSUMER_KEY = 'M7Xu9Wte0eIZvqhb4G9HnIn3G'
CONSUMER_SECRET = 'c8hB4Qwps2aODQUx7UsyzQuCRifEp3PKu6hPQll8wnJGIhbKgZ'
ACCESS_TOKEN = '3213221313-APuXuNjVMbRbZpu6sVbETbgqkponGsZJVT53QmG'
ACCESS_SECRET = 'BJHrqWC9ed3pA5oDstSMCYcUcz2pYF3DmJ7jcuDe7yxvi'
base_url = url = 'https://api.twitter.com/1.1/search/tweets.json?include_entities=true&count=200&q=#mongodb&include_rts=%s' % (retweet)
oauth_consumer = oauth.Consumer(key = CONSUMER_KEY, secret = CONSUMER_SECRET)
oauth_token = oauth.Token(key = ACCESS_TOKEN, secret = ACCESS_SECRET)
### Mongodb Settings
uri = 'mongodb://127.0.0.1:27017/SARKY'
if uri != None:
try:
conn = pymongo.MongoClient(uri)
print 'Pulling Tweets..'
except:
print 'Error: Unable to connect to DB. Check uri variable.'
return
uri_parts = pymongo.uri_parser.parse_uri(uri)
db = conn[uri_parts['database']]
db['twitter-harvest'].ensure_index('id_str')
### Helper Variables for Harvest
max_id = -1
tweet_count = 0
stream = 0
### Begin Harvesting
while True:
auth = oauth_header(url, oauth_consumer, oauth_token)
headers = {"Authorization": auth}
request = urllib2.Request(url, headers = headers)
try:
stream = urllib2.urlopen(request)
except urllib2.HTTPError, err:
if err.code == 404:
print 'Error: Unknown user. Check --user arg'
return
if err.code == 401:
print 'Error: Unauthorized. Check Twitter credentials'
return
tweet_list = json.load(stream)
if len(tweet_list) == 0:
print 'No tweets to harvest!'
return
if 'errors' in tweet_list:
print 'Hit rate limit, code: %s, message: %s' % (tweets['errors']['code'], tweets['errors']['message'])
return
if max_id == -1:
tweets = tweet_list
else:
tweets = tweet_list[1:]
if len(tweets) == 0:
print 'Finished Harvest!'
return
for tweet in tweets:
max_id = id_str = tweet['id_str']
try:
if tweet_count == numtweets:
print 'Finished Harvest- hit numtweets!'
return
if uri != None:
db[user].update({'id_str':id_str},tweet,upsert = True)
else:
print tweet['text']
tweet_count+=1
if verbose == True and uri != None:
print tweet['text']
except Exception, err:
print 'Unexpected error encountered: %s' %(err)
return
url = base_url + '&max_id=' + max_id
if __name__ == '__main__':
try:
main()
except SystemExit as e:
if e.code == 0:
pass
You initially set stream = 0. When your try...except block catches a HTTP response with a code that isn't 404 or 401, stream is still equal to 0, but your except block doesn't break out of the function.
I'd look more closely at what this response says.
I'm trying to get the follower count of companies and track it over time. I have over 200 000 companies so the code I currently have would literally take years to run with current api limit.
c = tweepy.Cursor(api.followers_ids, id = a)
ids = []
for id in c.items():
time.sleep(0.01)
ids.append(id) '
In this code its one api hit for every follower. I was wondering if there is a function that just gives a follower count as a number? Also what is the twitter api limit?
Each API requests returns at most 5000 followers IDs at a time, to retrieve all the followers of the 200 000 companies, here is a very useful script from the book Mining the social web by Matthew A. Russell to solve the twitter api limit
to make robust twitter request and to access twitter's API Matthew defined these methods :
import sys
import time
from urllib2 import URLError
from httplib import BadStatusLine
import json
import twitter
def oauth_login():
CONSUMER_KEY = ''
CONSUMER_SECRET = ''
OAUTH_TOKEN = ''
OAUTH_TOKEN_SECRET = ''
auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
CONSUMER_KEY, CONSUMER_SECRET)
twitter_api = twitter.Twitter(auth=auth)
return twitter_api
def make_twitter_request(twitter_api_func, max_errors=10, *args, **kw):
# A nested helper function that handles common HTTPErrors. Return an updated
# value for wait_period if the problem is a 500 level error. Block until the
# rate limit is reset if it's a rate limiting issue (429 error). Returns None
# for 401 and 404 errors, which requires special handling by the caller.
def handle_twitter_http_error(e, wait_period=2, sleep_when_rate_limited=True):
if wait_period > 3600: # Seconds
print >> sys.stderr, 'Too many retries. Quitting.'
raise e
# See https://dev.twitter.com/docs/error-codes-responses for common codes
if e.e.code == 401:
print >> sys.stderr, 'Encountered 401 Error (Not Authorized)'
return None
elif e.e.code == 404:
print >> sys.stderr, 'Encountered 404 Error (Not Found)'
return None
elif e.e.code == 429:
print >> sys.stderr, 'Encountered 429 Error (Rate Limit Exceeded)'
if sleep_when_rate_limited:
print >> sys.stderr, "Retrying in 15 minutes...ZzZ..."
sys.stderr.flush()
time.sleep(60*15 + 5)
print >> sys.stderr, '...ZzZ...Awake now and trying again.'
return 2
else:
raise e # Caller must handle the rate limiting issue
elif e.e.code in (500, 502, 503, 504):
print >> sys.stderr, 'Encountered %iError. Retrying in %iseconds' %\
(e.e.code, wait_period)
time.sleep(wait_period)
wait_period *= 1.5
return wait_period
else:
raise e
# End of nested helper function
wait_period = 2
error_count = 0
while True:
try:
return twitter_api_func(*args, **kw)
except twitter.api.TwitterHTTPError, e:
error_count = 0
wait_period = handle_twitter_http_error(e, wait_period)
if wait_period is None:
return
except URLError, e:
error_count += 1
print >> sys.stderr, "URLError encountered. Continuing."
if error_count > max_errors:
print >> sys.stderr, "Too many consecutive errors...bailing out."
raise
except BadStatusLine, e:
error_count += 1
print >> sys.stderr, "BadStatusLine encountered. Continuing."
if error_count > max_errors:
print >> sys.stderr, "Too many consecutive errors...bailing out."
raise
here is the methods to retrieve the friends and the followers :
from functools import partial
from sys import maxint
def get_friends_followers_ids(twitter_api, screen_name=None, user_id=None,
friends_limit=maxint, followers_limit=maxint):
# Must have either screen_name or user_id (logical xor)
assert (screen_name != None) != (user_id != None),\
"Must have screen_name or user_id, but not both"
# See https://dev.twitter.com/docs/api/1.1/get/friends/ids and
# https://dev.twitter.com/docs/api/1.1/get/followers/ids for details
# on API parameters
get_friends_ids = partial(make_twitter_request, twitter_api.friends.ids,
count=5000)
get_followers_ids = partial(make_twitter_request,twitter_api.followers.ids,
count=5000)
friends_ids, followers_ids = [], []
for twitter_api_func, limit, ids, label in [
[get_friends_ids, friends_limit, friends_ids, "friends"],
[get_followers_ids, followers_limit, followers_ids, "followers"]
]:
if limit == 0: continue
cursor = -1
while cursor != 0:
# Use make_twitter_request via the partially bound callable...
if screen_name:
response = twitter_api_func(screen_name=screen_name, cursor=cursor)
else: # user_id
response = twitter_api_func(user_id=user_id, cursor=cursor)
if response is not None:
ids += response['ids']
cursor = response['next_cursor']
print >> sys.stderr, 'Fetched {0} total {1} ids for{2}'.format(len(ids),
label, (user_id or screen_name))
# XXX: You may want to store data during each iteration to provide
# an additional layer of protection from exceptional circumstances
if len(ids) >= limit or response is None:
break
# Do something useful with the IDs, like store them to disk...
return friends_ids[:friends_limit], followers_ids[:followers_limit]
# Sample usage
twitter_api = oauth_login()
friends_ids, followers_ids =get_friends_followers_ids(twitter_api,
screen_name="SocialWebMining",
friends_limit=10,
followers_limit=10)
print friends_ids
print followers_ids