I am trying to read customers' feeds through Google API. I got an access token.
This is the documentation I followed: https://developers.google.com/shopping-content/developers-guide-python#authentication
Example here shows an ACCOUNT_ID to use, but I don't understand where to receive this info.
import gdata.contentforshopping.client
ACCOUNT_ID = '1234567'
shopping_client = gdata.contentforshopping.client.ContentForShoppingClient(account_id=ACCOUNT_ID)
Here is what i did so far:
import gdata.contentforshopping.client
import gdata.gauth
part = 2
auth_token = gdata.gauth.OAuth2Token(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, scope=SCOPE, user_agent=USER_AGENT)
shopping_client = gdata.contentforshopping.client.ContentForShoppingClient()
authorize_url = auth_token.generate_authorize_url(redirect_uri=APPLICATION_REDIRECT_URI)
if part == 1:
print 'Please visit: %s' % authorize_url
elif part == 2:
query = {'code': 'xxxx'} # received from result of part == 1
auth_token.get_access_token(query)
auth_token.authorize(shopping_client)
accounts = shopping_client.GetClientAccounts()
print(accounts)
It turned out there is no proper way to do this. You have to get an error from old API and parse it to find out your merchant id.
Source: https://groups.google.com/forum/#!topic/google-content-api-for-shopping/3iLEm9puJis
Related
I have a google spreadsheet with around 3000 rows and I am trying to extract comments from this spreadsheet using the following code:
import requests
from apiclient import errors
from apiclient import discovery
from apiclient.discovery import build
from oauth2client.client import OAuth2WebServerFlow
import httplib2
CLIENT_ID = "xxxxxyyyy"
CLIENT_SECRET = "xxxxxxx"
OAUTH_SCOPE = "https://www.googleapis.com/auth/drive"
REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'
file-id = "zzzzzz"
def retrieve_comments(service, file_id):
"""Retrieve a list of comments.
Args:
service: Drive API service instance.
file_id: ID of the file to retrieve comments for.
Returns:
List of comments.
"""
try:
comments = service.comments().list(fileId=file_id).execute()
return comments.get('items', [])
except errors.HttpError as error:
print(f'An error occurred: {error}')
return None
# ...
flow = OAuth2WebServerFlow(CLIENT_ID,CLIENT_SECRET,OAUTH_SCOPE)
flow.redirect_uri = REDIRECT_URI
authorize_url = flow.step1_get_authorize_url()
print("Go to the following link in your web browser "+ authorize_url)
code = input("Enter verfication code : ").strip()
credentials = flow.step2_exchange(code)
http = httplib2.Http()
http = credentials.authorize(http)
service = build('drive', 'v2', http=http)
comments = retrieve_comments(service, file-id)
However, the length of the list comments is only 20 whereas the spreadsheet surely contains more comments. Could someone explain which parameter I would need to tweak to retrieve all the comments in the spreadsheet? Thanks!
In the current stage, the default value of maxResults (Drive API v2) or pageSize (Drive API v3) of "Comments: list" of Drive API v3 is 20. I thought that this might be the reason for your current issue of However, the length of the list comments is only 20 whereas the spreadsheet surely contains more comments.. In this case, how about the following modification?
From:
comments = service.comments().list(fileId=file_id).execute()
return comments.get('items', [])
To:
From your script, when you want to use Drive API v2, please modify it as follows.
file_id = "###" # Please set your file ID.
res = []
page_token = None
while True:
obj = service.comments().list(fileId=file_id, pageToken=page_token, maxResults=100, fields="*").execute()
if len(obj.get("items", [])) > 0:
res = [*res, *obj.get("items", [])]
page_token = obj.get("nextPageToken")
if not page_token:
break
return res
When you want to use Drive API v3, please modify it as follows.
file_id = "###" # Please set your file ID.
res = []
page_token = None
while True:
obj = service.comments().list(fileId=file_id, pageToken=page_token, pageSize=100, fields="*").execute()
if len(obj.get("comments", [])) > 0:
res = [*res, *obj.get("comments", [])]
page_token = obj.get("nextPageToken")
if not page_token:
break
return res
In this modification, the all comments in the Spreadsheet is returned as an array.
References:
Comments: list of Drive API v2
Comments: list of Drive API v3
I'm trying to perform some more in-depth PII detection as the standard code that might be found here: https://learn.microsoft.com/en-us/azure/cognitive-services/language-service/personally-identifiable-information/quickstart?pivots=programming-language-python fails to find some more detailed entities (like French registration plates number, for example).
Everything works fine when I use the standard endpoint: 'https://whatever.cognitiveservices.azure.com/'
However, when I switch to 'https://whatever.cognitiveservices.azure.com/text/analytics/v3.1/entities/recognition/pii?piiCategories=default,FRDriversLicenseNumber" (an example found here: https://learn.microsoft.com/en-us/azure/cognitive-services/language-service/personally-identifiable-information/how-to-call ) I get an 404 error.
I believe it might be the Python SDK Issue, as when I try the API console - it works just fine. https://westus2.dev.cognitive.microsoft.com/docs/services/TextAnalytics-v3-1/operations/EntitiesRecognitionPii
The code:
key = "key"
endpoint = "https://whatever.cognitiveservices.azure.com/text/analytics/v3.1/entities/recognition/pii?piiCategories=default,FRDriversLicenseNumber/"
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
# Authenticate the client using your key and endpoint
def authenticate_client():
ta_credential = AzureKeyCredential(key)
text_analytics_client = TextAnalyticsClient(
endpoint=endpoint,
credential=ta_credential)
return text_analytics_client
client = authenticate_client()
# Example method for detecting sensitive information (PII) from text
def pii_recognition_example(client):
documents = [
"The employee's SSN is 859-98-0987.",
"The employee's phone number is 555-555-5555."
]
response = client.recognize_pii_entities(documents, language="en")
result = [doc for doc in response if not doc.is_error]
for doc in result:
print("Redacted Text: {}".format(doc.redacted_text))
for entity in doc.entities:
print("Entity: {}".format(entity.text))
print("\tCategory: {}".format(entity.category))
print("\tConfidence Score: {}".format(entity.confidence_score))
print("\tOffset: {}".format(entity.offset))
print("\tLength: {}".format(entity.length))
pii_recognition_example(client)
As it is not stated in the MS docs yet, the endpoint should be kept simple:
endpoint = "https://.cognitiveservices.azure.com"
and the details passed to the response = client.recognize_pii_entities().
The below code works just fine:
key = "key"
endpoint = "https://<name>.cognitiveservices.azure.com"
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
# Authenticate the client using your key and endpoint
def authenticate_client():
ta_credential = AzureKeyCredential(key)
text_analytics_client = TextAnalyticsClient(
endpoint=endpoint,
credential=ta_credential)
return text_analytics_client
client = authenticate_client()
# Example method for detecting sensitive information (PII) from text
def pii_recognition_example(client):
documents = [
"The employee's SSN is 859-98-0987.",
"The employee's phone number is 555-555-5555."
]
response = client.recognize_pii_entities(documents, language="en", categories_filter=["default", "FRDriversLicenseNumber"])
result = [doc for doc in response if not doc.is_error]
for doc in result:
print("Redacted Text: {}".format(doc.redacted_text))
for entity in doc.entities:
print("Entity: {}".format(entity.text))
print("\tCategory: {}".format(entity.category))
print("\tConfidence Score: {}".format(entity.confidence_score))
print("\tOffset: {}".format(entity.offset))
print("\tLength: {}".format(entity.length))
pii_recognition_example(client)
I'm fetching Azure AD data in a Python script. What I'm interested in is specifically name, email and jobtitle from the Users site in Azure.
However, the get request is limited to 100 rows, which I assume has something to do with pagination. Additionally, the jobtitle is empty, which has something to do with the app registration.
QUESTION
How do I return more than 100 rows? I'm struggling with the documentation, and I can't find any Python examples on this?
My code is:
import logging
import json
import msal
import requests
import pandas
# Globals
token = None
graphApiVersion = "v1.0"
uri = "https://graph.microsoft.com/{v}/{r}"
headers = None
# Functions
def authenticate():
global token
global headers
authority = "https://login.microsoftonline.com/<tenant id>"
appID = "<app id>"
appSecret = "<app secret>"
scope = ["https://graph.microsoft.com/.default"]
app = msal.ConfidentialClientApplication(
appID, authority=authority, client_credential = appSecret)
token = app.acquire_token_silent(scope, account=None)
if not token:
token = app.acquire_token_for_client(scopes=scope)
headers = {'Authorization': 'Bearer ' + token['access_token']}
return
def users(Format=True):
return query(graphApiVersion, "/users?$select=displayName,givenName,jobTitle,email,department", Format)
def query(v, r, Format=True):
dest = uri.format(v=v, r=r)
result = requests.get(dest, headers=headers).json()
if Format:
print(pandas.json_normalize(result["value"]))
else:
return result["value"]
You can just use $top param to Page Microsoft Graph data. And if you use this param, you will get an additional link with name: #odata.nextLink to fetch next page data. Details see this official doc.
I also did a quick test on my side with your code as below:
import logging
import json
import msal
import requests
import pandas
# Globals
token = '<access token>'
graphApiVersion = "v1.0"
uri = "https://graph.microsoft.com/{v}/{r}"
headers = None
# Functions
def users(Format=True):
return query(graphApiVersion, "/users?$select=displayName,givenName,jobTitle,email,department&$top=200", Format)
def query(v, r, Format=True):
dest = uri.format(v=v, r=r)
result = requests.get(dest, headers={"Authorization": "Bearer " + token}).json()
if Format:
print(pandas.json_normalize(result["value"] ))
print("next page URL:" + result["#odata.nextLink"])
else:
return result["value"]
users()
It works for me:
I'm trying to write a script that does the following:
obtains a list of album (photoset) ID's from my flickr account
list the image titles from each album (photoset) into a text file named as the album title
Here's what I have so far:
import flickrapi
from xml.etree import ElementTree
api_key = 'xxxx'
api_secret = 'xxxx'
flickr = flickrapi.FlickrAPI(api_key, api_secret)
(token, frob) = flickr.get_token_part_one(perms='write')
if not token: raw_input("Press ENTER after you authorized this program")
flickr.get_token_part_two((token, frob))
sets = flickr.photosets_getList(user_id='xxxx')
for elm in sets.getchildren()[0]:
title = elm.getchildren()[0].text
print ("id: %s setname: %s photos: %s") %(elm.get('id'), title, elm.get('photos'))
The above simply outputs the result to the screen like this:
id: 12453463463252553 setname: 2006-08 photos: 371
id: 23523523523532523 setname: 2006-07 photos: 507
id: 53253253253255532 setname: 2006-06 photos: 20
... etc ...
From there, I've got the following which I assumed would list all the image titles in the above album:
import flickrapi
from xml.etree import ElementTree
api_key = 'xxxx'
api_secret = 'xxxx'
flickr = flickrapi.FlickrAPI(api_key, api_secret)
(token, frob) = flickr.get_token_part_one(perms='write')
if not token: raw_input("Press ENTER after you authorized this program")
flickr.get_token_part_two((token, frob))
photos = flickr.photosets_getPhotos(photoset_id='12453463463252553')
for elm in photos.getchildren()[0]:
title = elm.getchildren()[0].text
print ("%s") %(elm.get('title'))
Unfortunately it just spits out a index out of range index error.
I stuck with it and had a hand from a friend to come up with the following which works as planned:
import flickrapi
import os
from xml.etree import ElementTree
api_key = 'xxxx'
api_secret = 'xxxx'
flickr = flickrapi.FlickrAPI(api_key, api_secret)
(token, frob) = flickr.get_token_part_one(perms='write')
if not token: raw_input("Press ENTER after you authorized this program")
flickr.get_token_part_two((token, frob))
sets = flickr.photosets_getList(user_id='xxxx')
for set in sets.getchildren()[0]:
title = set.getchildren()[0].text
filename = "%s.txt" % (title)
f = open(filename,'w')
print ("Getting Photos from set: %s") % (title)
for photo in flickr.walk_set(set.get('id')):
f.write("%s" % (photo.get('title')))
f.close()
Its quite easy if you use python-flickr-api. The complicated part is getting authorization from flickr to access private information.
Here is some (untested) code you can use:
import os
import flickr_api as flickr
# If all you want to do is get public information,
# then you need to set the api key and secret
flickr.set_keys(api_key='key', api_secret='sekret')
# If you want to fetch private/hidden information
# then in addition to the api key and secret,
# you also need to authorize your application.
# To do that, we request the authorization URL
# to get the value of `oauth_verifier`, which
# is what we need.
# This step is done only once, and we save
# the token. So naturally, we first check
# if the token exists or not:
if os.path.isfile('token.key'):
flickr.set_auth_handler('token.key')
else:
# This is the first time we are running,
# so get the token and save it
auth = flickr.auth.AuthHandler()
url = auth.get_authorization_url('read') # Get read permissions
session_key = raw_input('''
Please visit {} and then copy the value of oauth_verifier:'''.format(url))
if len(session_key.strip()):
auth.set_verifier(session_key.strip())
flickr.set_auth_handler(auth)
# Save this token for next time
auth.save('token.key')
else:
raise Exception("No authorization token provided, quitting.")
# If we reached this point, we are good to go!
# First thing we want to do is enable the cache, so
# we don't hit the API when not needed
flickr.enable_cache()
# Fetching a user, by their username
user = flickr.Person.findByUserName('username')
# Or, we don't know the username:
user = flickr.Person.findByEmail('some#user.com')
# Or, if we want to use the authenticated user
user = flickr.test.login()
# Next, fetch the photosets and their corresponding photos:
photo_sets = user.getPhotosets()
for pset in photo_sets:
print("Getting pictures for {}".format(pset.title))
photos = pset.getPhotos()
for photo in photos:
print('{}'.format(photo.info.title))
# Or, just get me _all_ the photos:
photos = user.getPhotos()
# If you haven't logged in,
# photos = user.getPublicPhotos()
for photo in photos:
print('{}'.format(photo.info.title))
I'm just wondering if there is any way to write a python script to check to see if a twitch.tv stream is live?
I'm not sure why my app engine tag was removed, but this would be using app engine.
Since all answers are actually outdated as of 2020-05-02, i'll give it a shot. You now are required to register a developer application (I believe), and now you must use an endpoint that requires a user-id instead of a username (as they can change).
See https://dev.twitch.tv/docs/v5/reference/users
and https://dev.twitch.tv/docs/v5/reference/streams
First you'll need to Register an application
From that you'll need to get your Client-ID.
The one in this example is not a real
TWITCH_STREAM_API_ENDPOINT_V5 = "https://api.twitch.tv/kraken/streams/{}"
API_HEADERS = {
'Client-ID' : 'tqanfnani3tygk9a9esl8conhnaz6wj',
'Accept' : 'application/vnd.twitchtv.v5+json',
}
reqSession = requests.Session()
def checkUser(userID): #returns true if online, false if not
url = TWITCH_STREAM_API_ENDPOINT_V5.format(userID)
try:
req = reqSession.get(url, headers=API_HEADERS)
jsondata = req.json()
if 'stream' in jsondata:
if jsondata['stream'] is not None: #stream is online
return True
else:
return False
except Exception as e:
print("Error checking user: ", e)
return False
I hated having to go through the process of making an api key and all those things just to check if a channel was live, so i tried to find a workaround:
As of june 2021 if you send a http get request to a url like https://www.twitch.tv/CHANNEL_NAME, in the response there will be a "isLiveBroadcast": true if the stream is live, and if the stream is not live, there will be nothing like that.
So i wrote this code as an example in nodejs:
const fetch = require('node-fetch');
const channelName = '39daph';
async function main(){
let a = await fetch(`https://www.twitch.tv/${channelName}`);
if( (await a.text()).includes('isLiveBroadcast') )
console.log(`${channelName} is live`);
else
console.log(`${channelName} is not live`);
}
main();
here is also an example in python:
import requests
channelName = '39daph'
contents = requests.get('https://www.twitch.tv/' +channelName).content.decode('utf-8')
if 'isLiveBroadcast' in contents:
print(channelName + ' is live')
else:
print(channelName + ' is not live')
It looks like Twitch provides an API (documentation here) that provides a way to get that info. A very simple example of getting the feed would be:
import urllib2
url = 'http://api.justin.tv/api/stream/list.json?channel=FollowGrubby'
contents = urllib2.urlopen(url)
print contents.read()
This will dump all of the info, which you can then parse with a JSON library (XML looks to be available too). Looks like the value returns empty if the stream isn't live (haven't tested this much at all, nor have I read anything :) ). Hope this helps!
RocketDonkey's fine answer seems to be outdated by now, so I'm posting an updated answer for people like me who stumble across this SO-question with google.
You can check the status of the user EXAMPLEUSER by parsing
https://api.twitch.tv/kraken/streams/EXAMPLEUSER
The entry "stream":null will tell you that the user if offline, if that user exists.
Here is a small Python script which you can use on the commandline that will print 0 for user online, 1 for user offline and 2 for user not found.
#!/usr/bin/env python3
# checks whether a twitch.tv userstream is live
import argparse
from urllib.request import urlopen
from urllib.error import URLError
import json
def parse_args():
""" parses commandline, returns args namespace object """
desc = ('Check online status of twitch.tv user.\n'
'Exit prints are 0: online, 1: offline, 2: not found, 3: error.')
parser = argparse.ArgumentParser(description = desc,
formatter_class = argparse.RawTextHelpFormatter)
parser.add_argument('USER', nargs = 1, help = 'twitch.tv username')
args = parser.parse_args()
return args
def check_user(user):
""" returns 0: online, 1: offline, 2: not found, 3: error """
url = 'https://api.twitch.tv/kraken/streams/' + user
try:
info = json.loads(urlopen(url, timeout = 15).read().decode('utf-8'))
if info['stream'] == None:
status = 1
else:
status = 0
except URLError as e:
if e.reason == 'Not Found' or e.reason == 'Unprocessable Entity':
status = 2
else:
status = 3
return status
# main
try:
user = parse_args().USER[0]
print(check_user(user))
except KeyboardInterrupt:
pass
Here is a more up to date answer using the latest version of the Twitch API (helix). (kraken is deprecated and you shouldn't use GQL since it's not documented for third party use).
It works but you should store the token and reuse the token rather than generate a new token every time you run the script.
import requests
client_id = ''
client_secret = ''
streamer_name = ''
body = {
'client_id': client_id,
'client_secret': client_secret,
"grant_type": 'client_credentials'
}
r = requests.post('https://id.twitch.tv/oauth2/token', body)
#data output
keys = r.json();
print(keys)
headers = {
'Client-ID': client_id,
'Authorization': 'Bearer ' + keys['access_token']
}
print(headers)
stream = requests.get('https://api.twitch.tv/helix/streams?user_login=' + streamer_name, headers=headers)
stream_data = stream.json();
print(stream_data);
if len(stream_data['data']) == 1:
print(streamer_name + ' is live: ' + stream_data['data'][0]['title'] + ' playing ' + stream_data['data'][0]['game_name']);
else:
print(streamer_name + ' is not live');
📚 Explanation
Now, the Twitch API v5 is deprecated. The helix API is in place, where an OAuth Authorization Bearer AND client-id is needed. This is pretty annoying, so I went on a search for a viable workaround, and found one.
🌎 GraphQL
When inspecting Twitch's network requests, while not being logged in, I found out the anonymous API relies on GraphQL. GraphQL is a query language for APIs.
query {
user(login: "USERNAME") {
stream {
id
}
}
}
In the graphql query above, we are querying a user by their login name. If they are streaming, the stream's id will be given. If not, None will be returned.
🐍 The Final Code
The finished python code, in a function, is below. The client-id is taken from Twitch's website. Twitch uses the client-id to fetch information for anonymous users. It will always work, without the need of getting your own client-id.
import requests
# ...
def checkIfUserIsStreaming(username):
url = "https://gql.twitch.tv/gql"
query = "query {\n user(login: \""+username+"\") {\n stream {\n id\n }\n }\n}"
return True if requests.request("POST", url, json={"query": query, "variables": {}}, headers={"client-id": "kimne78kx3ncx6brgo4mv6wki5h1ko"}).json()["data"]["user"]["stream"] else False
I've created a website where you can play with Twitch's GraphQL API. Refer to the GraphQL Docs for help on GraphQL syntax! There's also Twitch GraphQL API documentation on my playground.
Use the twitch api with your client_id as a parameter, then parse the json:
https://api.twitch.tv/kraken/streams/massansc?client_id=XXXXXXX
Twitch Client Id is explained here: https://dev.twitch.tv/docs#client-id,
you need to register a developer application: https://www.twitch.tv/kraken/oauth2/clients/new
Example:
import requests
import json
def is_live_stream(streamer_name, client_id):
twitch_api_stream_url = "https://api.twitch.tv/kraken/streams/" \
+ streamer_name + "?client_id=" + client_id
streamer_html = requests.get(twitch_api_stream_url)
streamer = json.loads(streamer_html.content)
return streamer["stream"] is not None
I'll try to shoot my shot, just in case someone still needs an answer to this, so here it goes
import requests
import time
from twitchAPI.twitch import Twitch
client_id = ""
client_secret = ""
twitch = Twitch(client_id, client_secret)
twitch.authenticate_app([])
TWITCH_STREAM_API_ENDPOINT_V5 = "https://api.twitch.tv/kraken/streams/{}"
API_HEADERS = {
'Client-ID' : client_id,
'Accept' : 'application/vnd.twitchtv.v5+json',
}
def checkUser(user): #returns true if online, false if not
userid = twitch.get_users(logins=[user])['data'][0]['id']
url = TWITCH_STREAM_API_ENDPOINT_V5.format(userid)
try:
req = requests.Session().get(url, headers=API_HEADERS)
jsondata = req.json()
if 'stream' in jsondata:
if jsondata['stream'] is not None:
return True
else:
return False
except Exception as e:
print("Error checking user: ", e)
return False
print(checkUser('michaelreeves'))
https://dev.twitch.tv/docs/api/reference#get-streams
import requests
# ================================================================
# your twitch client id
client_id = ''
# your twitch secret
client_secret = ''
# twitch username you want to check if it is streaming online
twitch_user = ''
# ================================================================
#getting auth token
url = 'https://id.twitch.tv/oauth2/token'
params = {
'client_id':client_id,
'client_secret':client_secret,
'grant_type':'client_credentials'}
req = requests.post(url=url,params=params)
token = req.json()['access_token']
print(f'{token=}')
# ================================================================
#getting user data (user id for example)
url = f'https://api.twitch.tv/helix/users?login={twitch_user}'
headers = {
'Authorization':f'Bearer {token}',
'Client-Id':f'{client_id}'}
req = requests.get(url=url,headers=headers)
userdata = req.json()
userid = userdata['data'][0]['id']
print(f'{userid=}')
# ================================================================
#getting stream info (by user id for example)
url = f'https://api.twitch.tv/helix/streams?user_id={userid}'
headers = {
'Authorization':f'Bearer {token}',
'Client-Id':f'{client_id}'}
req = requests.get(url=url,headers=headers)
streaminfo = req.json()
print(f'{streaminfo=}')
# ================================================================
This solution doesn't require registering an application
import requests
HEADERS = { 'client-id' : 'kimne78kx3ncx6brgo4mv6wki5h1ko' }
GQL_QUERY = """
query($login: String) {
user(login: $login) {
stream {
id
}
}
}
"""
def isLive(username):
QUERY = {
'query': GQL_QUERY,
'variables': {
'login': username
}
}
response = requests.post('https://gql.twitch.tv/gql',
json=QUERY, headers=HEADERS)
dict_response = response.json()
return True if dict_response['data']['user']['stream'] is not None else False
if __name__ == '__main__':
USERS = ['forsen', 'offineandy', 'dyrus']
for user in USERS:
IS_LIVE = isLive(user)
print(f'User {user} live: {IS_LIVE}')
Yes.
You can use Twitch API call https://api.twitch.tv/kraken/streams/YOUR_CHANNEL_NAME and parse result to check if it's live.
The below function returns a streamID if the channel is live, else returns -1.
import urllib2, json, sys
TwitchChannel = 'A_Channel_Name'
def IsTwitchLive(): # return the stream Id is streaming else returns -1
url = str('https://api.twitch.tv/kraken/streams/'+TwitchChannel)
streamID = -1
respose = urllib2.urlopen(url)
html = respose.read()
data = json.loads(html)
try:
streamID = data['stream']['_id']
except:
streamID = -1
return int(streamID)