I'm trying to build a simple Python script to count how many notes each user has entered in a Highrise CRM system, in the last 365 days. I have created a script that works for a tiny data set (a Highrise system with only 10 notes), but it times out on larger data sets (I assume because my script is horribly inefficient due to my lack of Python skills).
I am working on this, using Nitrous.io for the environment, using Python 3.3.
I'm using the Highton wrapper for the Highrise API calls (I haven't figured out how to read the API key in from a file successfully, but I can get it to work by typing the API key and username in directly -- tips here would be useful, but my big focus is getting the script to run on a production-size Highrise environment.)
Can anyone offer recommendations on how to do this more elegantly/correctly?
My Python script is:
# Using https://github.com/seibert-media/Highton to integrate with Highrise CRM
# Change to Python 3.3 with this command: source py3env/bin/activate
# Purpose: Count activity by Highrise CRM user in the last 365 days
from highton import Highton
from datetime import date, datetime, timedelta
#initialize Highrise instance
#keyfile = open('highrisekeys.txt', 'r')
#highrise_key = keyfile.readline()
#highrise_user = keyfile.readline()
#print('api key = ', api_key, 'user = ', api_user)
high = Highton(
api_key = 'THIS_IS_A_SECRET',
user = 'SECRET'
)
users = high.get_users()
#print('users is type: ', type(users))
#for user in users:
# print('Users: ', user.name)
people = high.get_people()
#print('people is type: ', type(people))
notes = []
tmp_notes = []
for person in people:
#print('Person: ', person.first_name, person.last_name)
#person_highrise_id = person.highrise_id
#print(person.last_name)
tmp_notes = high.get_person_notes(person.highrise_id)
if (type(tmp_notes) is list):
notes.extend(high.get_person_notes(person.highrise_id)) # No quotes for person_highrise_id in ()'s
#print('Notes is type ', type(notes), ' for ', person.first_name, ' ', person.last_name)
#print('total number of notes is ', len(notes))
for user in users:
#print(user.name, ' has ', notes.author_id.count(user.highrise_id), ' activities')
counter = 0
for note in notes:
if (note.author_id == user.highrise_id) and (note.created_at > datetime.utcnow() + timedelta(days = -365)):
counter += 1
print(user.name, ' has performed ', counter, ' activities')
The error message I got was:
Traceback (most recent call last): File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 544, in urlopen
body=body, headers=headers) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 341, in _make_request
self._validate_conn(conn) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 761, in _validate_conn
conn.connect() File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 204, in connect
conn = self._new_conn() File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 134, in _new_conn
(self.host, self.port), self.timeout, **extra_kw) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/util/connection.py", line 64, in create_connection
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): socket.gaierror: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/adapters.py", line 370, in send
timeout=timeout File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 597, in urlopen
_stacktrace=sys.exc_info()[2]) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/util/retry.py", line 245, in increment
raise six.reraise(type(error), error, _stacktrace) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/packages/six.py", line 309, in reraise
raise value.with_traceback(tb) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 544, in urlopen
body=body, headers=headers) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 341, in _make_request
self._validate_conn(conn) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 761, in _validate_conn
conn.connect() File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 204, in connect
conn = self._new_conn() File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 134, in _new_conn
(self.host, self.port), self.timeout, **extra_kw) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/util/connection.py", line 64, in create_connection
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): requests.packages.urllib3.exceptions.ProtocolError: ('Connection aborted.', gaierror(-2, 'Name or service not known'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "highrise-analysis.py", line 35, in <module>
tmp_notes = high.get_person_notes(person.highrise_id) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 436, in get_person_notes
return self._get_notes(subject_id, 'people') File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 433, in _get_notes
highrise_type, subject_id)), Note) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 115, in _get_data
content = self._get_request(endpoint, params).content File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 44, in _get_request
params=params, File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/api.py", line 69, in get
return request('get', url, params=params, **kwargs) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/api.py", line 50, in request
response = session.request(method=method, url=url, **kwargs) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/sessions.py", line 465, in request
resp = self.send(prep, **send_kwargs) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/sessions.py", line 573, in send
r = adapter.send(request, **kwargs) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/adapters.py", line 415, in send
raise ConnectionError(err, request=request) requests.exceptions.ConnectionError: ('Connection aborted.', gaierror(-2, 'Name or service not known'))
Problem Solved: The Highrise API is rate limited to 500 requests per 10 second period from the same IP address for the same account, which I was exceeding when extracting the data. To resolve this, I added a time.sleep(.5) command to pause between each note data-pull per person, to avoid crossing that rate limit threshold.
In addition, I broke the code into 2 separate functions:
1. Extract the users, people, and notes data and store them as local files with pickle, so I didn't need to pull the data each time I wanted to do some analysis
2. Perform analysis on the extracted pickle files
I also needed to add a try / except KeyError conditional, as some notes were created by Highrise users who are no longer active (people who left the company)
Here's revised code:
# Using https://github.com/seibert-media/Highton to integrate with Highrise CRM
# Change to Python 3.3 with this command: source py3env/bin/activate
# Purpose: Count activity by Highrise CRM user in the last 365 days
from highton import Highton
from datetime import date, datetime, timedelta
import time
import pickle
# ===================================================================
def Create_Notes_Backup(highrise_key, highrise_user, notesfile, userfile, peoplefile, trailing_days = 365):
# Function to create new Notes backup file of Highrise instance (this can take a while)
print('Entered Create_Notes_Backup function')
high = Highton(api_key = highrise_key, user = highrise_user) # Connect to API
print('Connected to Highrise')
users = high.get_users()
print('Pulled ', len(users), ' users')
people = high.get_people()
print('Pulled ', len(people), ' people')
notes = []
tmp_notes = []
print('Started creating notes array')
for person in people:
tmp_notes = high.get_person_notes(person.highrise_id)
time.sleep(.5) # Pause per API limits https://github.com/basecamp/highrise-api
if (type(tmp_notes) is list):
print('Pulled ', len(tmp_notes), ' notes for ', person.first_name, ' ', person.last_name)
if tmp_notes[0].created_at > datetime.utcnow() + timedelta(days = -trailing_days):
notes.extend(high.get_person_notes(person.highrise_id)) # No quotes for person_highrise_id in ()'s
print('Finished creating notes array')
# Final Step: Export lists into pickle files
with open(notesfile, 'wb') as f:
pickle.dump(notes, f)
with open(userfile, 'wb') as g:
pickle.dump(users, g)
with open(peoplefile, 'wb') as h:
pickle.dump(people, h)
print('Exported lists to *.bak files')
# ===================================================================
def Analyze_Notes_Backup(notesfile, userfile, peoplefile, trailing_days = 365):
# Function to analyze notes backup:
# 1. Count number of activities in last trailing_days days
# 2. Identify date of last note update
print('Entered Analyze_Notes_Backup function')
notes = []
users = []
people = []
# Load the lists
with open(notesfile, 'rb') as a:
notes = pickle.load(a)
with open(userfile, 'rb') as b:
users = pickle.load(b)
with open(peoplefile, 'rb') as c:
people = pickle.load(c)
# Start counting
user_activity_count = {}
last_user_update = {}
for user in users:
user_activity_count[user.highrise_id] = 0
last_user_update[user.highrise_id] = date(1901, 1, 1)
print('Started counting user activity by note')
for note in notes:
if note.created_at > datetime.utcnow() + timedelta(days = -trailing_days):
#print('Note created ', note.created_at, ' by ', note.author_id, ' regarding ', note.body)
try:
user_activity_count[note.author_id] += 1
except KeyError:
print('User no longer exists')
try:
if (note.created_at.date() > last_user_update[note.author_id]):
last_user_update[note.author_id] = note.created_at.date()
except KeyError:
print('...')
print('Finished counting user activity by note')
print('=======================================')
f = open('highrise-analysis-output.txt', 'w')
f.write('Report run on ')
f.write(str(date.today()))
f.write('\n Highrise People Count: ')
f.write(str(len(people)))
f.write('\n ============================ \n')
for user in users:
print(user.name, ' has performed ', user_activity_count[user.highrise_id], ' activities')
f.write(str.join(' ', (user.name, ', ', str(user_activity_count[user.highrise_id]))))
if last_user_update[user.highrise_id] == date(1901, 1, 1):
print(user.name, ' has not updated Highrise in the last 365 days')
f.write(str.join(' ', (', NO_UPDATES\n')))
else:
print(user.name, ' last updated Highrise ', last_user_update[user.highrise_id])
f.write(str.join(' ', (', ', str(last_user_update[user.highrise_id]), '\n')))
all_done = time.time()
f.close
# ===================================================================
if __name__ == "__main__":
trailing_days = 365 # Number of days back to monitor
# Production Environment Analysis
Create_Notes_Backup(MY_API_KEY, MY_HIGHRISE_USERID, 'highrise-production-notes.bak', 'highrise-production-users.bak', 'highrise-production-people.bak', trailing_days = 365) # Production Environment
Analyze_Notes_Backup('highrise-production-notes.bak', 'highrise-production-users.bak', 'highrise-production-people.bak', trailing_days = 365)
Mike,
What you are doing is going through all the users, and for each one then going through all of the notes. Once you have the user there should be a way to query for just the notes that belong to that user. You probably can include the date range in the query and just do a .count to see how many records match.
If you can't search notes by user, then go through the notes once and store the userId and the sum of that users notes that match your criteria in a dictionary. Then you can match up the userid's with the users table.
Good luck
Related
The functionality of this script to shutdown GCP VM's based on some logic. Currently we are trying shut it down at night, but the scripts are failing before it could shutdown the VM. We shutdown based on time. At evenings and night the VM are shutdown using these scripts
Script to turn off instances overnight
Environment: Runs in Google Cloud Functions (Python3.7)
import datetime
import json
from pprint import pformat
import pytz
import re
import modules.common.cfcommon as cfcommon
import modules.utilities.dateutilities as dateutilities
from modules.compute.instances import InstanceList, Instance
from modules.compute.compute_service import ComputeServiceContext
from modules.utilities.printutilities import print_message, debug_message
from modules.pubsub.topic import PublishMessage
from modules.common.labels import VMAUTOSHUTDOWN_LABEL, VMAUTOSHUTDOWN_DEFER_LABEL, ShutdownDeferLabelValueIsValid, ShutdownLabelValueIsValid
from templates.renderer import render_template
# Takes a list in the following format and checks if the 'Instance' object is within it
# list must contain dictionaries in the following format:
# {"name": "instancename", "zone": "zonename"}
# Example: {"name": "test-01", "zone": "us-east4-c"}
#
# Parameters:
# inputList - list of dictionary objects
# instance - Instance object
def isInstanceInList(inputList, instance):
if not isinstance(inputList, list):
raise TypeError("Provided inputList is not a list")
if not isinstance(instance, Instance):
raise TypeError("Provided instance is not of type 'Instance'")
# Iterate over every item in inputList and check if the name and zone match
for cItem in inputList:
if cItem["name"].lower() == instance.properties["name"].lower() and cItem["zone"].lower() == instance.GetShortZoneName().lower():
return True
# No match found
return False
# Takes a list of Instance objects and sees if their shutdown timezone is within the graceperiod of the shutdownHour
#
# Example: is shutdown hour is 23 and the gracePeriodMin is 15 then if the function is called at 23:12, the instance will be included in the shutdown list
#
# Parameters:
# instanceList - List of Instance objects
# shutdownHour - number (0-23) 0 = Midnight, 23 = 11PM
# gracePeriodMin - number
def getInstancesToStop(instanceList, gracePeriodMin):
instancesToStop = []
debug_message("Entering getInstancesToStop")
for cInstance in instanceList:
debug_message("Instance: %s (ID: %s, Zone: %s, Project: %s)" % (cInstance.GetName(), cInstance.GetId(), cInstance.GetShortZoneName(), cInstance.project))
labels = cInstance.GetLabels()
if VMAUTOSHUTDOWN_LABEL in labels.keys():
labelValue = labels.get(VMAUTOSHUTDOWN_LABEL, '')
pattern = '\d\d-\d\d-\d\d'
match = re.match(pattern, labelValue)
if not match or not ShutdownLabelValueIsValid(labelValue):
debug_message(f'Label {labelValue} does not match the correct format')
instancesToStop.append(cInstance)
continue
else:
debug_message(f'Label {VMAUTOSHUTDOWN_LABEL} not found. Adding to shutdown list')
instancesToStop.append(cInstance)
continue
shutdown_deferred_utc_datetime = None
if VMAUTOSHUTDOWN_DEFER_LABEL in labels.keys():
labelValue = labels.get(VMAUTOSHUTDOWN_DEFER_LABEL, '')
pattern = '\d\d\d\d-\d\d-\d\dt\d\d-\d\d-\d\d'
match = re.match(pattern, labelValue)
if match and ShutdownDeferLabelValueIsValid(labelValue):
shutdown_deferred_utc_date, shutdown_deferred_utc_time = labelValue.split('t')
year, month, day = shutdown_deferred_utc_date.split('-')
hour, minute, second = shutdown_deferred_utc_time.split('-')
shutdown_deferred_utc_datetime = datetime.datetime.now(pytz.timezone('GMT')).replace(
year=int(year), month=int(month), day=int(day), hour=int(hour), minute=int(minute), second=int(second)
)
else:
debug_message(f'Label {labels[VMAUTOSHUTDOWN_DEFER_LABEL]} does not match the correct format')
instancesToStop.append(cInstance)
continue
current_utc_time = dateutilities.get_current_datetime()
# If defer date is in the future, and not in grace window time, skip shutting down
if shutdown_deferred_utc_datetime is not None and shutdown_deferred_utc_datetime > current_utc_time:
debug_message(f'Instance {cInstance.GetName()} shutdown deferred until after {labels[VMAUTOSHUTDOWN_DEFER_LABEL]}')
continue
# If defer time is in past, continue with the vm hour shutdown
shutdown_utc_hour = labels[VMAUTOSHUTDOWN_LABEL].split('-')[0]
# Convert shutdown UTC hour into datetime object
shutdown_utc_time = datetime.datetime.now(pytz.timezone('GMT')).replace(hour=int(shutdown_utc_hour), minute=0, second=0)
shutdown_utc_grace_time = shutdown_utc_time + datetime.timedelta(minutes=gracePeriodMin)
debug_message(f"Shutdown UTC time {shutdown_utc_time}")
debug_message(f"Shutdown UTC grace time {shutdown_utc_grace_time}")
# Check if shutdown is within time window
if current_utc_time >= shutdown_utc_time and current_utc_time <= shutdown_utc_grace_time:
debug_message("We're in the time window")
instancesToStop.append(cInstance)
else:
debug_message("We're outside the time window. Not adding to stop list")
return instancesToStop
# This is the main entry point that cloud functions calls
def AutoStopVMInstances(config, policy=None, payload=None, generate_local_report=False):
FUNCTION_NAME = "AutoStopVMInstances"
# Populated by config later...
QUERY_PROJECT_IDS = None # List of project IDs
INSTANCE_WHITELIST = None # List of dictionaries in format {"name": "instancename", "zone": "zonename", "project": "projectid"}
PREVIEW_MODE = True
SHUTDOWN_GRACEPERIOD_MIN = 30
# Start
startTime = datetime.datetime.now()
print_message("Started %s within Cloud Function %s [%s]" % (FUNCTION_NAME, cfcommon.CLOUD_FUNCTION_NAME, startTime))
debug_message("")
# For ease of access, assign from config values
debug_message("Processing Configuration...")
QUERY_PROJECT_IDS = config.get("QueryProjectIDs", []) # Required field
INSTANCE_WHITELIST = config.get("InstanceWhiteList", []) # Optional Field
PREVIEW_MODE = config.get("PreviewMode", True) # Required field
SHUTDOWN_GRACEPERIOD_MIN = config.get("ShutdownGracePeriodMin", None) # Required field
SKIP_INSTANCE_GROUPS = config.get("SkipInstanceGroups", False) # Optional
EMAIL_PUB_SUB_PROJECT = config.get("EmailPubSubProject", None) # Optional
EMAIL_PUB_SUB_TOPIC = config.get("EmailPubSubTopic", None) # Optional
EMAIL_TO = config.get("EmailTo", []) # Optional
EMAIL_CC = config.get("EmailCC", None) # Optional
EMAIL_BCC = config.get("EmailBCC", None) # Optional
EMAIL_FROM = config.get("EmailFrom", "noreply-ei-cs-cloudops-resource-administration#ei-cs-cloudops.local") # Optional
EMAIL_SUBJECT = config.get("EmailSubject", "Nightly VM Instance Shutdown Summary") # Optional
cfLogger = cfcommon.CloudFunctionLog()
# Validate whitelist
if INSTANCE_WHITELIST is None:
raise Exception("Unable to get whitelist")
debug_message("Whitelist loaded:")
debug_message(pformat((INSTANCE_WHITELIST)))
# Re-init Compute service - execution environment in cloud functions can be shared among each other. Let's re-init our connection every execution.
ComputeServiceContext.InitComputeService()
# Build the service object.
allRunningInstances = []
for cProjectId in QUERY_PROJECT_IDS:
debug_message("Checking Project: %s" % (cProjectId))
# Main Loop - Let's get and analyze all instances from our project
# Paginated within the 'request' object
runningInstances = []
allInstances = []
debug_message("Building Instance List...", end="")
instances = InstanceList(cProjectId)
instances.PopulateInstances()
debug_message("Done")
for cInstance in instances.GetAllInstances():
debug_message("Found Instance %s in %s [%s - %s]" % (cInstance.GetName(), cInstance.GetZone(), cInstance.GetId(), cInstance.GetStatus()))
# Check if whitelisted. If it is, skip it
if isInstanceInList(INSTANCE_WHITELIST, cInstance):
debug_message(" Instance is whitelisted. Skipping.")
continue
# Check if we should skip instance groups
if SKIP_INSTANCE_GROUPS and cInstance.IsWithinInstanceGroup():
debug_message(" Instance is within an instance group. Skipping.")
continue
debug_message(" Is Running: %s" % (cInstance.IsRunning()))
owner = cInstance.GetOwner()
if owner in ("devops", "ei devops", "eicsdevopseng"):
debug_message("Skipping instance owned by devops")
continue
# # TODO: FOR USE WHEN TESTING
# if VMAUTOSHUTDOWN_LABEL not in labels.keys():
# continue
# Keep track of this instance
allInstances.append(cInstance)
# If it's running, it's a candidate to stop
if cInstance.IsRunning():
runningInstances.append(cInstance)
# Handle no instances found
if len(allInstances) == 0:
debug_message("INFO: No Instances found.")
# Summarize for user
debug_message("")
if len(runningInstances) > 0:
debug_message("Found %s/%s non-whitelisted instances are running (project: %s)" % (len(runningInstances), len(allInstances), cProjectId))
else:
debug_message("All %s non-whitelisted instances are good (project: %s)" % (len(allInstances), cProjectId))
# Main loop to stop
debug_message("")
allRunningInstances = allRunningInstances + runningInstances
instancesToBeStopped = getInstancesToStop(allRunningInstances, SHUTDOWN_GRACEPERIOD_MIN)
stoppedCount = 0
instanceSummary = []
if len(instancesToBeStopped) == 0:
print_message("No instances are due to be stopped")
else:
for cInstance in instancesToBeStopped:
summaryEntry = {
"Name": cInstance.GetName(),
"ID": cInstance.GetId(),
"Zone": cInstance.GetShortZoneName(),
"Project": cInstance.GetProject(),
"Preview": PREVIEW_MODE,
"Stopped": False,
"InstanceLink": cInstance.GetSelfLinkToConsole()
}
logMessage = "Stopping Instance: {name} (ID: {id}, Zone: {zone}, Project: {project})".format(
name=summaryEntry.get("Name"),
id=summaryEntry.get("ID"),
zone=summaryEntry.get("Zone"),
project=summaryEntry.get("Project")
)
if PREVIEW_MODE:
print_message("(PREVIEW) " + logMessage )
else:
print_message(logMessage)
cInstance.Stop()
summaryEntry["Stopped"] = True
stoppedCount += 1
instanceSummary.append(summaryEntry)
if EMAIL_PUB_SUB_PROJECT is not None and EMAIL_PUB_SUB_TOPIC is not None:
debug_message("It looks like we have an email config. Attempting to send email")
emailBody = render_template(
'shutdown_report',
instance_summary=instanceSummary,
config=json.dumps(config, indent=4, sort_keys=True),
preview_mode=PREVIEW_MODE,
generation_time=datetime.datetime.now().astimezone(pytz.utc)
)
emailPayload = {
"To": EMAIL_TO,
"From": EMAIL_FROM,
"Subject": EMAIL_SUBJECT,
"BodyHtml": emailBody
}
if EMAIL_CC is not None:
emailPayload["CC"] = EMAIL_CC
if EMAIL_BCC is not None:
emailPayload["BCC"] = EMAIL_BCC
if not generate_local_report:
print_message("Sending email...", end="")
PublishMessage(EMAIL_PUB_SUB_PROJECT, EMAIL_PUB_SUB_TOPIC, json.dumps(emailPayload))
else:
print_message('Generating local HTML report')
with open('./html_reports/shutdown_report.html', 'w') as r:
r.write(emailBody)
print_message("Done")
# We want to log a nice structured json line to stackdriver for easy reporting.
cfLogger.log({
"StartTime": startTime.isoformat(),
"InstancesStoppedCount": stoppedCount,
"PreviewMode": PREVIEW_MODE,
"Instances": instanceSummary,
"Whitelist": INSTANCE_WHITELIST,
"EndTime": datetime.datetime.now().isoformat(),
"LogLine": "Summary"
})
print_message("DONE [%s]" % (datetime.datetime.now()))
The error I'm getting when trying to run a VM shutdown script:
Caught exception while running VMNightlyShutdown. Exception Text: Traceback (most recent call last):
File "/workspace/main.py", line 248, in StartCloudFunction
policy=policy_config)
File "/workspace/vm_nightly_shutdown.py", line 256, in AutoStopVMInstances
cInstance.Stop()
File "/workspace/modules/compute/instances.py", line 729, in Stop
stop_instance(self.project, self.GetShortZoneName(), self.GetName(), waitForCompletion=waitForCompletion)
File "/workspace/modules/compute/instances.py", line 45, in stop_instance
return wait_for_operation(project, zone, response["name"])
File "/workspace/modules/compute/instances.py", line 27, in wait_for_operation
operation=operation
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/googleapiclient/_helpers.py", line 130, in positional_wrapper
return wrapped(*args, **kwargs)
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/googleapiclient/http.py", line 932, in execute
headers=self.headers,
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/googleapiclient/http.py", line 222, in _retry_request
raise exception
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/googleapiclient/http.py", line 191, in _retry_request
resp, content = http.request(uri, method, *args, **kwargs)
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/google_auth_httplib2.py", line 225, in request
**kwargs
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/httplib2/__init__.py", line 1721, in request
conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/httplib2/__init__.py", line 1440, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/httplib2/__init__.py", line 1392, in _conn_request
response = conn.getresponse()
File "/layers/google.python.runtime/python/lib/python3.7/http/client.py", line 1373, in getresponse
response.begin()
File "/layers/google.python.runtime/python/lib/python3.7/http/client.py", line 319, in begin
version, status, reason = self._read_status()
File "/layers/google.python.runtime/python/lib/python3.7/http/client.py", line 280, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/layers/google.python.runtime/python/lib/python3.7/socket.py", line 589, in readinto
return self._sock.recv_into(b)
File "/layers/google.python.runtime/python/lib/python3.7/ssl.py", line 1071, in recv_into
return self.read(nbytes, buffer)
File "/layers/google.python.runtime/python/lib/python3.7/ssl.py", line 929, in read
return self._sslobj.read(len, buffer)
ssl.SSLError: [SSL: DECRYPTION_FAILED_OR_BAD_RECORD_MAC] decryption failed or bad record mac (_ssl.c:2570)",
}
I am running all the sql scripts under the scripts path in a for loop and copying the data into #priya_stage area in snowflake and then using GET command , i am unloading data from stage area to my Unix path in csv format. But I am getting error.
Note: this same code works on my MAC but not on unix server.
import logging
import os
import snowflake.connector
from snowflake.connector import DictCursor as dict
from os import walk
try:
conn = snowflake.connector.connect(
account = 'xxx' ,
user = 'xxx' ,
password = 'xxx' ,
database = 'xxx' ,
schema = 'xxx' ,
warehouse = 'xxx' ,
role = 'xxx' ,
)
conn.cursor().execute('USE WAREHOUSE xxx')
conn.cursor().execute('USE DATABASE xxx')
conn.cursor().execute('USE SCHEMA xxx')
take = []
scripts = '/xxx/apps/xxx/xxx/scripts/snow/scripts/'
os.chdir('/xxx/apps/xxx/xxx/scripts/snow/scripts/')
for root , dirs , files in walk(scripts):
for file in files:
inbound = file[0:-4]
sql = open(file , 'r').read()
# file_number = 0
# file_number += 1
file_prefix = 'bridg_' + inbound
file_name = file_prefix
result_query = conn.cursor(dict).execute(sql)
query_id = result_query.sfqid
sql_copy_into = f'''
copy into #priya_stage/{file_name}
from (SELECT * FROM TABLE(RESULT_SCAN('{query_id}')))
DETAILED_OUTPUT = TRUE
HEADER = TRUE
SINGLE = FALSE
OVERWRITE = TRUE
max_file_size=4900000000'''
rs_copy_into = conn.cursor(dict).execute(sql_copy_into)
for row_copy in rs_copy_into:
file_name_in_stage = row_copy["FILE_NAME"]
sql_get_to_local = f"""
GET #priya_stage/{file_name_in_stage} file:///xxx/apps/xxx/xxx/inbound/zip_files/{inbound}/"""
rs_get_to_local = conn.cursor(dict).execute(sql_get_to_local)
except snowflake.connector.errors.ProgrammingError as e:
print('Error {0} ({1}): {2} ({3})'.format(e.errno , e.sqlstate , e.msg , e.sfqid))
finally:
conn.cursor().close()
conn.close()
Error
Traceback (most recent call last):
File "Generic_local.py", line 52, in <module>
rs_get_to_local = conn.cursor(dict).execute(sql_get_to_local)
File "/usr/local/lib64/python3.6/site-packages/snowflake/connector/cursor.py", line
746, in execute
sf_file_transfer_agent.execute()
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/file_transfer_agent.py", line 379, in execute
self._transfer_accelerate_config()
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/file_transfer_agent.py", line 671, in
_transfer_accelerate_config
self._use_accelerate_endpoint = client.transfer_accelerate_config()
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/s3_storage_client.py", line 572, in
transfer_accelerate_config
url=url, verb="GET", retry_id=retry_id, query_parts=dict(query_parts)
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/s3_storage_client.py", line 353, in _.
send_request_with_authentication_and_retry
verb, generate_authenticated_url_and_args_v4, retry_id
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/storage_client.py", line 313, in
_send_request_with_retry
f"{verb} with url {url} failed for exceeding maximum retries."
snowflake.connector.errors.RequestExceedMaxRetryError: GET with url b'https://xxx-
xxxxx-xxx-x-customer-stage.xx.amazonaws.com/https://xxx-xxxxx-xxx-x-customer-
stage.xx.amazonaws.com/?accelerate' failed for exceeding maximum retries.
This link redirects me to a error message .
https://xxx-
xxxxx-xxx-x-customer-stage.xx.amazonaws.com/https://xxx-xxxxx-xxx-x-customer-
stage.xx.amazonaws.com/?accelerate
Access Denied error :
<Error>
<Code>AccessDenied</Code>
<Message>Access Denied</Message>
<RequestId>1X1Z8G0BTX8BAHXK</RequestId>
<HostId>QqdCqaSK7ogAEq3sNWaQVZVXUGaqZnPv78FiflvVzkF6nSYXTSKu3iSiYlUOU0ka+0IMzErwGC4=</HostId>
</Error>
I am iterating through a list of urls from a csv file trying to locate their sitemaps, however, I am getting a weird leading space issue that's causing an error to occur when requests processes each url. I'm trying to figure out what's causing this space to be generated and what type of space it is. I believe something funky is happening with strip() because I can get this to run fine when copying and pasting a url into requests. I am just not sure what type of space this is and what's causing it to occur.
Wondering if anyone else is having or had this issue?
So far I have tried to solve using the following methods:
replace()
"".join(split())
regex
Here is my code:
with open('links.csv') as f:
for line in f:
strdomain = line.strip()
if strdomain:
domain = strdomain
fix_domain = domain.replace('https://', '').replace('www', '').replace('/', '').replace('.', '').replace(' ', '')
ofile = fix_domain + '.txt' # args.ofile
domain_rem = domain
map = find_sitemap.get_sitemap(domain_rem+"sitemap.xml")
url_info = find_sitemap.parse_sitemap(map)
print("Found {0} urls".format(len(url_info)))
new_urls = []
for u in url_info:
new_urls.append(u)
print(u)
links.csv look like the following with just one column:
https://site1.com/
https://site2.com/
https://site3.com/
I printed domain and strdomain and even added the word "this" next to the variable domain so you can see the space being produced clearly:
Here is the error I receive in full when running (you will notice there is no leading space within the url after I've copied and pasted from the terminal into here however I provide an image of my terminal below so you can see it):
Traceback (most recent call last):
File "/Users/natehurwitz/PROJECTS/axis/axis/apps/axisDataFinder/map_website.py", line 358, in <module>
main()
File "/Users/natehurwitz/PROJECTS/axis/axis/apps/axisDataFinder/map_website.py", line 318, in main
map = find_sitemap.get_sitemap(domain_rem+"sitemap.xml")
File "/Users/natehurwitz/PROJECTS/axis/axis/apps/axisDataFinder/find_sitemap.py", line 5, in get_sitemap
get_url = requests.get(url)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/api.py", line 72, in get
return request('get', url, params=params, **kwargs)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/sessions.py", line 522, in request
resp = self.send(prep, **send_kwargs)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/sessions.py", line 636, in send
adapter = self.get_adapter(url=request.url)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/sessions.py", line 727, in get_adapter
raise InvalidSchema("No connection adapters were found for '%s'" % url)
requests.exceptions.InvalidSchema: No connection adapters were found for 'https://blkgrn.com/sitemap.xml'
Here is where you can see the leading space that occurs
Here is the code for "find_sitemap.py":
from bs4 import BeautifulSoup
import requests
def get_sitemap(url):
get_url = requests.get(url)
if get_url.status_code == 200:
return get_url.text
else:
print ('Unable to fetch sitemap: %s.' % url)
def process_sitemap(s):
soup = BeautifulSoup(s, "lxml")
result = []
for loc in soup.findAll('loc'):
item = {}
item['loc'] = loc.text
item['tag'] = loc.parent.name
if loc.parent.lastmod is not None:
item['lastmod'] = loc.parent.lastmod.text
if loc.parent.changeFreq is not None:
item['changeFreq'] = loc.parent.changeFreq.text
if loc.parent.priority is not None:
item['priority'] = loc.parent.priority.text
result.append(item)
return result
def is_sub_sitemap(s):
if s['loc'].endswith('.xml') and s['tag'] == 'sitemap':
return True
else:
return False
def parse_sitemap(s):
sitemap = process_sitemap(s)
result = []
while sitemap:
candidate = sitemap.pop()
if is_sub_sitemap(candidate):
sub_sitemap = get_sitemap(candidate['loc'])
for i in process_sitemap(sub_sitemap):
sitemap.append(i)
else:
result.append(candidate)
return result
I am trying to get the email addresses of AD group members of a particular LDAP group using python.
I have following code. The Print m statement writes something like below.
Output:
CN=Admin_abc20,OU=Admin ID's,OU=TEST1,DC=other_example,DC=example,DC=com
CN=leterd,OU=Employees,OU=BACD,DC=na,DC=example,DC=com
CN=mytest37,OU=Employees,OU=SUNPH,DC=na,DC=example,DC=com
CN=Doe Mestre\, John,OU=Partners & Contractors,OU=TEST1,DC=other_example,DC=example,DC=com
CN=Robin\, Mark [ABCD],OU=Partners & Contractors,OU=JJCUS,DC=na,DC=example,DC=com
CN=San Irdondo\, Paul [TEST1 Non-ABC],OU=Partners & Contractors,OU=TEST1,DC=other_example,DC=example,DC=com
My Code:
def get_group_members(group_name, ad_conn, basedn=AD_USER_BASEDN):
members = []
ad_filter = AD_GROUP_FILTER.replace('My_Group_Name', group_name)
result = ad_conn.search_s(basedn, ldap.SCOPE_SUBTREE, ad_filter)
if result:
if len(result[0]) >= 2 and 'member' in result[0][1]:
members_tmp = result[0][1]['member']
for m in members_tmp:
print m
#email = ad_conn.search_s(m, ldap.SCOPE_SUBTREE,'(objectClass=*)',['mail'])
#print email
Now when I remove comment from last 2 lines of my code to get the email address of persons, I get following error, please note that I have changed by company's ldap identifiers to example/test.
Can you please help me with this? I am a newbie to python.
Traceback (most recent call last):
File "/app/abc/python/Test_new.py", line 81, in <module>
group_members = get_group_members(group_name, ad_conn)
File "/app/abc/python/Test_new.py", line 58, in get_group_members
email = ad_conn.search_s(m, ldap.SCOPE_SUBTREE,'(objectClass=*)', ['mail'])
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 516, in search_s
return self.search_ext_s(base,scope,filterstr,attrlist,attrsonly,None,None,timeout=self.timeout)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 510, in search_ext_s
return self.result(msgid,all=1,timeout=timeout)[1]
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 436, in result
res_type,res_data,res_msgid = self.result2(msgid,all,timeout)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 440, in result2
res_type, res_data, res_msgid, srv_ctrls = self.result3(msgid,all,timeout)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 446, in result3
ldap_result = self._ldap_call(self._l.result3,msgid,all,timeout)
File "/usr/lib64/python2.6/site-packages/ldap/ldapobject.py", line 96, in _ldap_call
result = func(*args,**kwargs)
ldap.REFERRAL: {'info': 'Referral:\nldap://ab.example.com/CN=Radfde3,OU=Partners%20&%20Contractors,OU=JANBE,DC=eu,DC=example,DC=com', 'desc': 'Referral'}
I don't know much about Python but I think your problem is with the LDAP filter. Try this for the last 2 lines of code:
email = ad_conn.search_s(m, ldap.SCOPE_SUBTREE,'(&(objectClass=person)(mail=*))')
print email
I hope this helps!
So I'm attempting to create a program that takes the genre and location of recently uploaded Soundcloud tracks and displays them on a map. I'm trying to gather the data using the Soundcloud API however at random points while the program is executing an HTTPError gets thrown. Here's my code:
import soundcloud
import urllib2
client = soundcloud.Client(client_id="xxxxx",
client_secret="xxxxx",
username="xxxx",
password="xxxx")
def main():
for trackId in getAllTrackId('tracks/'):
try:
trackId = str(trackId)
userId = str(client.get('tracks/' + trackId).user_id)
genre = client.get('tracks/' + trackId).genre
country = client.get('users/' + userId).country
city = client.get('users/' + userId).city
user = client.get('tracks/' + trackId).user
except urllib2.HTTPError, e:
print e.geturl()
if (type(city) != None) & (type(country) != None) & (type(genre) != None):
try:
print 'User: ' + userId + '\t\tLocation: ' + city + ', ' + country + '\t\tGenre: ' + genre
except:
print trackId
def getAllTrackId(path):
items=[]
page_size=200
offset=0
page = client.get(path, limit=page_size, offset=offset)
#while (len(page) != 0):
while (offset<=1000):
for item in page:
items.append(item.id)
offset += page_size
page = client.get(path, limit=page_size, offset=offset)
return items
main()
I'm trying to figure out how to catch the error but also why it is thrown in the first place. I'm new to programming so I'm sure there are many errors in my code but if anyone could help me through this it would be awesome :)
(env)xxxxxxx-MacBook-Pro-2:soundcloudmap xxxxxxxxxx$ python soundcloudmap.py
User: 1278531 Location: Detroit, United States Genre: HSB
User: 1139662 Location: Brasilia, Brazil Genre: Beat
159333532
User: 23129248 Location: Cadiz, Spain Genre: Breaks
159333523
User: 10761166 Location: Wadsworth, United States Genre: Say
Traceback (most recent call last):
File "soundcloudmap.py", line 43, in <module>
main()
File "soundcloudmap.py", line 28, in main
userId = str(client.get('tracks/' + trackId).user_id)
File "/Users/michaelspearman/code/soundcloudmap/env/lib/python2.7/site-packages/soundcloud/client.py", line 130, in _request
return wrapped_resource(make_request(method, url, kwargs))
File "/Users/michaelspearman/code/soundcloudmap/env/lib/python2.7/site-packages/soundcloud/request.py", line 134, in make_request
result.raise_for_status()
File "/Users/michaelspearman/code/soundcloudmap/env/lib/python2.7/site-packages/requests/models.py", line 795, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 404 Client Error: Not Found
EDIT:
To catch the error I just need to follow the path of the error correctly.. duhh
except urllib2.HTTPError, e:
should be
except requests.exceptions.HTTPError as e:
However I'm still not sure why it's being thrown..
From the Soundclould Python documentation:
tracks = client.get('/tracks', limit=10)
compare this to yours:
client.get('tracks/' + trackId).user_id
More specifically, you are missing the leading /. Adding this should work.