Updating access token (global variable) inside a function/loop problem - python

I'm extracting data using Spotify API wrapper. The access token (which is global variable) is valid for only 1 hour so I need to update it during the for loop in some defined function. I tried to update it using try/except, but I got the following error:
UnboundLocalError: local variable 'spotify' referenced before assignment.
Here is the relevant code:
token = credentials.get_access_token()
spotify = spotipy.Spotify(auth = token)
...
def main():
...
df_af = generate_audio_features_df(track_ids)
...
def generate_audio_features_df(track_ids):
col_list = ['id', 'danceability']
result = []
count = 0
for j in track_ids:
try:
r = spotify.audio_features(j)[0]
features_list = [r['id'], r['danceability']]
result.append(features_list)
#display progress
count += 1
print("Added ", count, " track")
except spotipy.client.SpotifyException:
token = credentials.get_access_token()
spotify = spotipy.Spotify(auth = token)
df = pd.DataFrame(data = result, columns = col_list)
return df
if __name__ == "__init__":
main()
I would like the code to update a token and get back to the loop.

Related

Python while loop token not updating

I am exploring azure management APIs. The ADF monitor pipeline, returns only 100 records at a time. So I created a while loop, but for some reason, not sure what, not able to get the next token.
ct = d.get('continuationToken','')
c = 1
while ct!='':
req_body = self.getDataBody(ct)
data = self.getResponse(data_url,data_headers,req_body)
nct = self.getContinuationToken(data,c)
c = c+1
print(c)
if ct == nct:
print(ct)
print(nct)
print('duplicate token')
break
ct = nct
if ct == '':
break
Here in the next iteration next token is not getting updated.
Update:
following the functions that the above code is using
def getDataBody(self,ct):
start_date = datetime.now().strftime("%Y-%m-%d")
end_date = (datetime.now() + timedelta(days=1)).strftime("%Y-%m-%d")
data_body = {'lastUpdatedAfter': start_date, 'lastUpdatedBefore': end_date}
if ct!='':
data_body['continuationToken'] = ct
return data_body
def getResponse(self,url,headers,body):
data = requests.post(url,headers=headers,data=body)
return data.text
def getContinuationToken(self,data,c):
d = json.loads(data)
with open(f'data/{c}.json','w') as f:
json.dump(d,f)
return d.get('continuationToken','')
you can try with increasing the timeout in the ADF activity may be due to the timeout setting in your current ADF activity is less than the actual time
taking to execute that API .

Adding A Sentiment Analysis Loop When Collecting Twitter data

I am currently trying to add a sentiment analysis loop to a python script that collects tweets. When I run the script without the loop, it can generate the tweets just fine; however, whenever I add the for loop in ( starting at "for tweets in tweets returned", the tweets no longer generates and the csv I created does not appear as well. I was wondering if this had to do with where I have placed the for loop within the script or if there is some error with the loop itself. Any help would be greatly appreciated, thanks!
sentiments=[]
sentiment_means=[]
# Create URL Structure
class RequestWithMethod(urllib.request.Request):
def __init__(self, base_url, method, headers={}):
self._method = method
urllib.request.Request.__init__(self, base_url, headers)
def get_method(self):
if self._method:
return self._method
else:
return urllib.request.Request.get_method(self)
#Create Endpoint & Add Credentials
def create_rules_endpoint(query):
new_url = base_url + query
base64string = ('%s:%s' % (UN, PWD)).replace('\n', '')
base = base64.b64encode(base64string.encode('ascii'))
final_final_url = urllib.request.Request(new_url)
final_final_url.add_header('Authorization', 'Basic %s' % base.decode('ascii'))
return final_final_url
# Take in the Endpoint and Make the Request
def make_request(search_endpoint):
try:
response = urllib.request.urlopen(search_endpoint)
response_data = response.read()
handle_response(response_data)
except urllib.request.HTTPError as error:
print("ERROR: %s" % error)
# Handle the Returned Data
def handle_response(data):
tweets_returned = json.loads(data.decode('utf-8'))
print(tweets_returned)
**for tweet in tweets_returned['results']:
counter=1
compound_list=[]
positive_list = []
negative_list = []
neutral_list = []
geo_list = []
compound = analyzer.polarity_scores(tweet["text"])["compound"]
pos = analyzer.polarity_scores(tweet["text"])["pos"]
neu = analyzer.polarity_scores(tweet["text"])["neu"]
neg = analyzer.polarity_scores(tweet["text"])["neg"]
compound_list.append(compound)
positive_list.append(pos)
negative_list.append(neg)
neutral_list.append(neu)
sentiments.append({"Location": tweet["geo"],
"Date": tweet["created_at"],
"Tweet": tweet["text"],
"Compound": compound,
"Positive": pos,
"Neutral": neu,
"Negative": neg,
"Tweets_Ago": counter
})
counter+=1
sentiment_means.append({
"Compound_Mean": np.mean(compound_list),
"Positive": np.mean(positive_list),
"Neutral": np.mean(negative_list),
"Negative": np.mean(neutral_list),
"Count": len(compound_list)
})**
# Create the Endpoint Variable w/ Sample Query Keyword
search_endpoint = create_rules_endpoint('Wilson%20Rackets%20has%3Ageo%20lang%3Aen')
# Make the Request by Passing in Search Endpoint
make_request(search_endpoint)
# Convert all_sentiments to DataFrame
all_sentiments_pd = pd.DataFrame.from_dict(sentiments)
all_sentiments_pd.to_csv("sentiments_array_pd.csv")
display(all_sentiments_pd)
#print(all_sentiments_pd.dtypes)
# Convert sentiment_means to DataFrame
sentiment_means_pd = pd.DataFrame.from_dict(sentiment_means)
display(sentiment_means_pd)

get all user id's Gmail API

My task is to cout all top senders and top recievers of user's email.
So the plan is to get all user id's, put them in a dictionary, count their amount and print.
I tried this but it doesn't work very well with INBOX label (10 000+ messages):
import base64
import email
import re
import operator
from googleapiclient import errors
from quickstart import service
def find(st):
for i in range(0,len(st)):
tmp = str(st[i])
for j in range(0,len(tmp)):
if tmp[j] == 'T' and tmp[j+1] == 'o' and tmp[j-1] == "'" and tmp[j+2] == "'":
return i
pass
def getTop(n):
try:
if n == 1:
label_ids = "INBOX"
else:
label_ids = "SENT"
user_id = "me"
topers = service.users().labels().get(userId = user_id,id = label_ids).execute()
count = topers['messagesTotal']
print(count)
topers = service.users().messages().list(userId = user_id, labelIds = label_ids).execute()
arrId = []
for i in range(0,count):
arrId.append(topers['messages'][i]['id'])
st = []
for i in range(0,count):
message = service.users().messages().get(userId=user_id,
id=arrId[i],
format = 'metadata').execute()
head = message['payload']['headers']
index = find(head)
obval = head[index]['value']
tmp = str(obval)
tmp =tmp.split('<', 1)[-1]
tmp = tmp.replace('>',"")
st.append(tmp)
cnt = 0
mvalues = {}
for mail in st:
if not mail in mvalues:
mvalues[mail] = 1
else:
mvalues[mail]+= 1
sorted_values = sorted(mvalues.items(),key= operator.itemgetter(1))
ln = len(sorted_values)
for j in range(1,6):
print(sorted_values[-j])
pass
except errors.HttpError as error:
print('An error occurred: %s' % error)
My question is: what is the fastest and the most correct way to get all these user emails?
If I have a lot of messages, using a while and make a request every time is not the best way I guess. I'm trying to figure this out for about 4 days. Help

Flickr API Function Issue

I am having issues with my below API request to Flickr. My function takes as input a list of 10 photo ids. However when I print the data from my function I am only getting information based on 1 photo ID. Looking at my below function any ideas on what may be causing the contents of only 1 photo ID to print? Any help would be great.
for item in get_flickr_data(word)["photos"]["photo"]:
photo_ids =item["id"].encode('utf-8')
lst_photo_ids.append(photo_ids)
print lst_photo_ids
lst_photo_ids = ['34117701526', '33347528313', '34158745075', '33315997274', '33315996984', '34028007021', '33315995844', '33347512113', '33315784134', '34024299271']
def get_photo_data(lst_photo_ids):
baseurl = "https://api.flickr.com/services/rest/"
params_d = {}
params_d["method"] = "flickr.photos.getInfo"
params_d["format"] = "json"
params_d["photo_id"] = photo_ids
params_d["api_key"] = FLICKR_KEY
unique_identifier = params_unique_combination(baseurl,params_d)
if unique_identifier in CACHE_DICTION:
flickr_data_diction = CACHE_DICTION[unique_identifier]
else:
resp = requests.get(baseurl,params_d)
json_result_text = resp.text[14:-1]
flickr_data_diction = json.loads(json_result_text)
CACHE_DICTION[unique_identifier] = flickr_data_diction
fileref = open(CACHE_FNAME,"w")
fileref.write(json.dumps(CACHE_DICTION))
fileref.close()
return flickr_data_diction
print get_photo_data(photo_ids)

Trying to Optimize Python API read script

So I am creating a script to communicate to our API server for asset management and retrieve some information. I've found that the longest total time portion of the script is:
{method 'read' of '_ssl._SSLSocket' objects}
Currently we're pulling information about 25 assets or so and that specific portion is taking 18.89 seconds.
Is there any way to optimize this so it doesn't take 45 minutes to do all 2,700 computers we have?
I can provide a copy of the actual code if that would be helpful.
import urllib2
import base64
import json
import csv
# Count Number so that process only runs for 25 assets at a time will be
# replaced with a variable that is determined by the number of computers added
# to the list
Count_Stop = 25
final_output_list = []
def get_creds():
# Credentials Function that retrieves username:pw from .file
with open('.cred') as cred_file:
cred_string = cred_file.read().rstrip()
return cred_string
print(cred_string)
def get_all_assets():
# Function to retrieve computer ID + computer names and store the ID in a
# new list called computers_parsed
request = urllib2.Request('jss'
'JSSResource/computers')
creds = get_creds()
request.add_header('Authorization', 'Basic ' + base64.b64encode(creds))
response = urllib2.urlopen(request).read()
# At this point the request for ID + name has been retrieved and now to be
# formatted in json
parsed_ids_json = json.loads(response)
# Then assign the parsed list (which has nested lists) at key 'computers'
# to a new list variable called computer_set
computer_set = parsed_ids_json['computers']
# New list to store just the computer ID's obtained in Loop below
computer_ids = []
# Count variable, when equal to max # of computers in Count_stop it stops.
count = 0
# This for loop iterates over ID + name in computer_set and returns the ID
# to the list computer_ids
for computers in computer_set:
count += 1
computer_ids.append(computers['id'])
# This IF condition allows for the script to be tested at 25 assets
# instead of all 2,000+ (comment out other announce_all_assets call)
if count == Count_Stop:
announce_all_assets(computer_ids, count)
# announce_all_assets(computer_ids, count)
def announce_all_assets(computer_ids, count):
print('Final list of ID\'s for review: ' + str(computer_ids))
print('Total number of computers to check against JSS: ' +
str(count))
extension_attribute_request(computer_ids, count)
def extension_attribute_request(computer_ids, count):
# Creating new variable, first half of new URL used in loop to get
# extension attributes using the computer ID's in computers_ids
base_url = 'jss'
what_we_want = '/subset/extensionattributes'
creds = get_creds()
print('Extension attribute function starts now:')
for ids in computer_ids:
request_url = base_url + str(ids) + what_we_want
request = urllib2.Request(request_url)
request.add_header('Authorization', 'Basic ' + base64.b64encode(creds))
response = urllib2.urlopen(request).read()
parsed_ext_json = json.loads(response)
ext_att_json = parsed_ext_json['computer']['extension_attributes']
retrieve_all_ext(ext_att_json)
def retrieve_all_ext(ext_att_json):
new_computer = {}
# new_computer['original_id'] = ids['id']
# new_computer['original_name'] = ids['name']
for computer in ext_att_json:
new_computer[str(computer['name'])] = computer['value']
add_to_master_list(new_computer)
def add_to_master_list(new_computer):
final_output_list.append(new_computer)
print(final_output_list)
def main():
# Function to run the get all assets function
get_all_assets()
if __name__ == '__main__':
# Function to run the functions in order: main > get all assets >
main()
I'd _highly recommend using the 'requests' module over 'urllib2'. It handles a lot of stuff for you and will save you many a headache.
I believe it will also give you better performance, but I'd love to hear your feedback.
Here's your code using requests. (I've added newlines to highlight my changes. Note the built-in .json() decoder.):
# Requires requests module be installed.:
# `pip install requests` or `pip3 install requests`
# https://pypi.python.org/pypi/requests/
import requests
import base64
import json
import csv
# Count Number so that process only runs for 25 assets at a time will be
# replaced with a variable that is determined by the number of computers added
# to the list
Count_Stop = 25
final_output_list = []
def get_creds():
# Credentials Function that retrieves username:pw from .file
with open('.cred') as cred_file:
cred_string = cred_file.read().rstrip()
return cred_string
print(cred_string)
def get_all_assets():
# Function to retrieve computer ID + computer names and store the ID in a
# new list called computers_parsed
base_url = 'jss'
what_we_want = 'JSSResource/computers'
request_url = base_url + what_we_want
# NOTE the request_url is constructed based on your request assignment just below.
# As such, it is malformed as a URL, and I assume anonymized for your posting on SO.
# request = urllib2.Request('jss'
# 'JSSResource/computers')
#
creds = get_creds()
headers={
'Authorization': 'Basic ' + base64.b64encode(creds),
}
response = requests.get( request_url, headers )
parsed_ids_json = response.json()
#[NO NEED FOR THE FOLLOWING. 'requests' HANDLES DECODES JSON. SEE ABOVE ASSIGNMENT.]
# At this point the request for ID + name has been retrieved and now to be
# formatted in json
# parsed_ids_json = json.loads(response)
# Then assign the parsed list (which has nested lists) at key 'computers'
# to a new list variable called computer_set
computer_set = parsed_ids_json['computers']
# New list to store just the computer ID's obtained in Loop below
computer_ids = []
# Count variable, when equal to max # of computers in Count_stop it stops.
count = 0
# This for loop iterates over ID + name in computer_set and returns the ID
# to the list computer_ids
for computers in computer_set:
count += 1
computer_ids.append(computers['id'])
# This IF condition allows for the script to be tested at 25 assets
# instead of all 2,000+ (comment out other announce_all_assets call)
if count == Count_Stop:
announce_all_assets(computer_ids, count)
# announce_all_assets(computer_ids, count)
def announce_all_assets(computer_ids, count):
print('Final list of ID\'s for review: ' + str(computer_ids))
print('Total number of computers to check against JSS: ' +
str(count))
extension_attribute_request(computer_ids, count)
def extension_attribute_request(computer_ids, count):
# Creating new variable, first half of new URL used in loop to get
# extension attributes using the computer ID's in computers_ids
base_url = 'jss'
what_we_want = '/subset/extensionattributes'
creds = get_creds()
print('Extension attribute function starts now:')
for ids in computer_ids:
request_url = base_url + str(ids) + what_we_want
headers={
'Authorization': 'Basic ' + base64.b64encode(creds),
}
response = requests.get( request_url, headers )
parsed_ext_json = response.json()
ext_att_json = parsed_ext_json['computer']['extension_attributes']
retrieve_all_ext(ext_att_json)
def retrieve_all_ext(ext_att_json):
new_computer = {}
# new_computer['original_id'] = ids['id']
# new_computer['original_name'] = ids['name']
for computer in ext_att_json:
new_computer[str(computer['name'])] = computer['value']
add_to_master_list(new_computer)
def add_to_master_list(new_computer):
final_output_list.append(new_computer)
print(final_output_list)
def main():
# Function to run the get all assets function
get_all_assets()
if __name__ == '__main__':
# Function to run the functions in order: main > get all assets >
main()
Please do let me know the relative performance time with your 25 assets in 18.89 seconds! I'm very curious.
I'd still recommend my other answer below(?) regarding the use of the requests module from a pure cleanliness perspective (requests is very clean to work with), but I recognize it may or may not address your original question.
If you want to try PyCurl, which likely will impact your original question, here's the same code implemented with that approach:
# Requires pycurl module be installed.:
# `pip install pycurl` or `pip3 install pycurl`
# https://pypi.python.org/pypi/pycurl/7.43.0
# NOTE: The syntax used herein for pycurl is python 3 compliant.
# Not python 2 compliant.
import pycurl
import base64
import json
import csv
def pycurl_data( url, headers ):
buffer = BytesIO()
connection = pycurl.Curl()
connection.setopt( connection.URL, url )
connection.setopt(pycurl.HTTPHEADER, headers )
connection.setopt( connection.WRITEDATA, buffer )
connection.perform()
connection.close()
body = buffer.getvalue()
# NOTE: The following assumes a byte string and a utf8 format. Change as desired.
return json.loads( body.decode('utf8') )
# Count Number so that process only runs for 25 assets at a time will be
# replaced with a variable that is determined by the number of computers added
# to the list
Count_Stop = 25
final_output_list = []
def get_creds():
# Credentials Function that retrieves username:pw from .file
with open('.cred') as cred_file:
cred_string = cred_file.read().rstrip()
return cred_string
print(cred_string)
def get_all_assets():
# Function to retrieve computer ID + computer names and store the ID in a
# new list called computers_parsed
base_url = 'jss'
what_we_want = 'JSSResource/computers'
request_url = base_url + what_we_want
# NOTE the request_url is constructed based on your request assignment just below.
# As such, it is malformed as a URL, and I assume anonymized for your posting on SO.
# request = urllib2.Request('jss'
# 'JSSResource/computers')
#
creds = get_creds()
headers= [ 'Authorization: Basic ' + base64.b64encode(creds) ]
response = pycurl_data( url, headers )
# At this point the request for ID + name has been retrieved and now to be
# formatted in json
parsed_ids_json = json.dumps( response )
# Then assign the parsed list (which has nested lists) at key 'computers'
# to a new list variable called computer_set
computer_set = parsed_ids_json['computers']
# New list to store just the computer ID's obtained in Loop below
computer_ids = []
# Count variable, when equal to max # of computers in Count_stop it stops.
count = 0
# This for loop iterates over ID + name in computer_set and returns the ID
# to the list computer_ids
for computers in computer_set:
count += 1
computer_ids.append(computers['id'])
# This IF condition allows for the script to be tested at 25 assets
# instead of all 2,000+ (comment out other announce_all_assets call)
if count == Count_Stop:
announce_all_assets(computer_ids, count)
# announce_all_assets(computer_ids, count)
def announce_all_assets(computer_ids, count):
print('Final list of ID\'s for review: ' + str(computer_ids))
print('Total number of computers to check against JSS: ' +
str(count))
extension_attribute_request(computer_ids, count)
def extension_attribute_request(computer_ids, count):
# Creating new variable, first half of new URL used in loop to get
# extension attributes using the computer ID's in computers_ids
base_url = 'jss'
what_we_want = '/subset/extensionattributes'
creds = get_creds()
print('Extension attribute function starts now:')
for ids in computer_ids:
request_url = base_url + str(ids) + what_we_want
headers= [ 'Authorization: Basic ' + base64.b64encode(creds) ]
response = pycurl_data( url, headers )
parsed_ext_json = json.dumps( response )
ext_att_json = parsed_ext_json['computer']['extension_attributes']
retrieve_all_ext(ext_att_json)
def retrieve_all_ext(ext_att_json):
new_computer = {}
# new_computer['original_id'] = ids['id']
# new_computer['original_name'] = ids['name']
for computer in ext_att_json:
new_computer[str(computer['name'])] = computer['value']
add_to_master_list(new_computer)
def add_to_master_list(new_computer):
final_output_list.append(new_computer)
print(final_output_list)
def main():
# Function to run the get all assets function
get_all_assets()
if __name__ == '__main__':
# Function to run the functions in order: main > get all assets >
main()

Categories

Resources