Im making a backend script to manage some stuff for a front end. I try to send multiple pictures back as a response to a HTTP request but i get an error.
Python code on the server:
#app.route('/get_place_content/<locid>', methods=['GET']) # type: ignore
def placecontent(locid):
mydb = fun.connectDB()
cursor = mydb.cursor() #type: ignore
token = request.headers.get('token')
if fun.authenticate(token) or True:
query = "select photos,videos,mapscreen from locdic where locid = " + str(locid)
cursor.execute(query)
for entry in cursor:
pic = entry[0]
print(pic)
vid = entry[1]
print(vid)
pics = pic.split(',')
vids = vid.split(',')
map = entry[2]
print(str(pics) + str(vids) + str(map))
return (send_file(os.path.join(app.config['UPLOAD_FOLDER2'], pics[0]), mimetype='image/gif'),send_file(os.path.join(app.config['UPLOAD_FOLDER2'], pics[1]), mimetype='image/gif'),send_file(os.path.join(app.config['UPLOAD_FOLDER2'], pics[2]), mimetype='image/gif'),send_file(os.path.join(app.config['UPLOAD_FOLDER2'], vids[0]), mimetype='video/mp4'),send_file(os.path.join(app.config['UPLOAD_FOLDER2'], map), mimetype='image/gif')),202
and i get this back:
connect to database ---- OK connect to database ---- OK disconnect from database ---- OK 9d3e358f8770edde764603a2ffa1726bdfc3a8e0_2490.jpg,202426be23c5e3d5photo_2023-01-17_16-50-13.jpg,4f70fc06barcodes.jpg 4a0ba73fredditsave.com_this_remote_surgery_was_made_by_a_london_surgeon-iymq8cxnanba1.mp4 ['9d3e358f8770edde764603a2ffa1726bdfc3a8e0_2490.jpg', '202426be23c5e3d5photo_2023-01-17_16-50-13.jpg', '4f70fc06barcodes.jpg']['4a0ba73fredditsave.com_this_remote_surgery_was_made_by_a_london_surgeon-iymq8cxnanba1.mp4']5622e8eaScreenshot 2023-02-02 103254.png [2023-02-02 10:49:29,060] ERROR in app: Exception on /get_place_content/10 [GET] Traceback (most recent call last): File "/home/blank/.local/lib/python3.10/site-packages/flask/app.py", line 2525, in wsgi_app response = self.full_dispatch_request() File "/home/blank/.local/lib/python3.10/site-packages/flask/app.py", line 1823, in full_dispatch_request return self.finalize_request(rv) File "/home/blank/.local/lib/python3.10/site-packages/flask/app.py", line 1842, in finalize_request response = self.make_response(rv) File "/home/blank/.local/lib/python3.10/site-packages/flask/app.py", line 2170, in make_response raise TypeError( TypeError: The view function did not return a valid response. The return type must be a string, dict, list, tuple with headers or status, Response instance, or WSGI callable, but it was a tuple. 80.106.135.81 - - [02/Feb/2023 10:49:29] "GET /get_place_content/10 HTTP/1.1" 500 -
the "fun" thing in the code is an external script i made so i dont have to type the same stuff over and over again.
Thank you in advance
EDIT:
tried to change the formating to this but still no luck
#app.route('/get_place_content/<locid>', methods=['GET']) # type: ignore
def placecontent(locid):
mydb = fun.connectDB()
cursor = mydb.cursor() #type: ignore
token = request.headers.get('token')
if fun.authenticate(token) or True:
query = "select photos,videos,mapscreen from locdic where locid = " + str(locid)
cursor.execute(query)
for entry in cursor:
pic = entry[0]
vid = entry[1]
pics = pic.split(',')
vids = vid.split(',')
map = entry[2]
print(str(pics) + str(vids) + str(map))
pic1 = pics[0]
pic2 = pics[1]
pic3 = pics[2]
vid1 = vids[0]
fun.disconnectDB(False,mydb)
return (send_file(os.path.join(app.config['UPLOAD_FOLDER2'], pic1), mimetype='image/gif'),send_file(os.path.join(app.config['UPLOAD_FOLDER2'], pic2), mimetype='image/gif'),send_file(os.path.join(app.config['UPLOAD_FOLDER2'], pic3), mimetype='image/gif'),send_file(os.path.join(app.config['UPLOAD_FOLDER2'], vid1), mimetype='video/mp4'),send_file(os.path.join(app.config['UPLOAD_FOLDER2'], map), mimetype='image/gif')),202
The functionality of this script to shutdown GCP VM's based on some logic. Currently we are trying shut it down at night, but the scripts are failing before it could shutdown the VM. We shutdown based on time. At evenings and night the VM are shutdown using these scripts
Script to turn off instances overnight
Environment: Runs in Google Cloud Functions (Python3.7)
import datetime
import json
from pprint import pformat
import pytz
import re
import modules.common.cfcommon as cfcommon
import modules.utilities.dateutilities as dateutilities
from modules.compute.instances import InstanceList, Instance
from modules.compute.compute_service import ComputeServiceContext
from modules.utilities.printutilities import print_message, debug_message
from modules.pubsub.topic import PublishMessage
from modules.common.labels import VMAUTOSHUTDOWN_LABEL, VMAUTOSHUTDOWN_DEFER_LABEL, ShutdownDeferLabelValueIsValid, ShutdownLabelValueIsValid
from templates.renderer import render_template
# Takes a list in the following format and checks if the 'Instance' object is within it
# list must contain dictionaries in the following format:
# {"name": "instancename", "zone": "zonename"}
# Example: {"name": "test-01", "zone": "us-east4-c"}
#
# Parameters:
# inputList - list of dictionary objects
# instance - Instance object
def isInstanceInList(inputList, instance):
if not isinstance(inputList, list):
raise TypeError("Provided inputList is not a list")
if not isinstance(instance, Instance):
raise TypeError("Provided instance is not of type 'Instance'")
# Iterate over every item in inputList and check if the name and zone match
for cItem in inputList:
if cItem["name"].lower() == instance.properties["name"].lower() and cItem["zone"].lower() == instance.GetShortZoneName().lower():
return True
# No match found
return False
# Takes a list of Instance objects and sees if their shutdown timezone is within the graceperiod of the shutdownHour
#
# Example: is shutdown hour is 23 and the gracePeriodMin is 15 then if the function is called at 23:12, the instance will be included in the shutdown list
#
# Parameters:
# instanceList - List of Instance objects
# shutdownHour - number (0-23) 0 = Midnight, 23 = 11PM
# gracePeriodMin - number
def getInstancesToStop(instanceList, gracePeriodMin):
instancesToStop = []
debug_message("Entering getInstancesToStop")
for cInstance in instanceList:
debug_message("Instance: %s (ID: %s, Zone: %s, Project: %s)" % (cInstance.GetName(), cInstance.GetId(), cInstance.GetShortZoneName(), cInstance.project))
labels = cInstance.GetLabels()
if VMAUTOSHUTDOWN_LABEL in labels.keys():
labelValue = labels.get(VMAUTOSHUTDOWN_LABEL, '')
pattern = '\d\d-\d\d-\d\d'
match = re.match(pattern, labelValue)
if not match or not ShutdownLabelValueIsValid(labelValue):
debug_message(f'Label {labelValue} does not match the correct format')
instancesToStop.append(cInstance)
continue
else:
debug_message(f'Label {VMAUTOSHUTDOWN_LABEL} not found. Adding to shutdown list')
instancesToStop.append(cInstance)
continue
shutdown_deferred_utc_datetime = None
if VMAUTOSHUTDOWN_DEFER_LABEL in labels.keys():
labelValue = labels.get(VMAUTOSHUTDOWN_DEFER_LABEL, '')
pattern = '\d\d\d\d-\d\d-\d\dt\d\d-\d\d-\d\d'
match = re.match(pattern, labelValue)
if match and ShutdownDeferLabelValueIsValid(labelValue):
shutdown_deferred_utc_date, shutdown_deferred_utc_time = labelValue.split('t')
year, month, day = shutdown_deferred_utc_date.split('-')
hour, minute, second = shutdown_deferred_utc_time.split('-')
shutdown_deferred_utc_datetime = datetime.datetime.now(pytz.timezone('GMT')).replace(
year=int(year), month=int(month), day=int(day), hour=int(hour), minute=int(minute), second=int(second)
)
else:
debug_message(f'Label {labels[VMAUTOSHUTDOWN_DEFER_LABEL]} does not match the correct format')
instancesToStop.append(cInstance)
continue
current_utc_time = dateutilities.get_current_datetime()
# If defer date is in the future, and not in grace window time, skip shutting down
if shutdown_deferred_utc_datetime is not None and shutdown_deferred_utc_datetime > current_utc_time:
debug_message(f'Instance {cInstance.GetName()} shutdown deferred until after {labels[VMAUTOSHUTDOWN_DEFER_LABEL]}')
continue
# If defer time is in past, continue with the vm hour shutdown
shutdown_utc_hour = labels[VMAUTOSHUTDOWN_LABEL].split('-')[0]
# Convert shutdown UTC hour into datetime object
shutdown_utc_time = datetime.datetime.now(pytz.timezone('GMT')).replace(hour=int(shutdown_utc_hour), minute=0, second=0)
shutdown_utc_grace_time = shutdown_utc_time + datetime.timedelta(minutes=gracePeriodMin)
debug_message(f"Shutdown UTC time {shutdown_utc_time}")
debug_message(f"Shutdown UTC grace time {shutdown_utc_grace_time}")
# Check if shutdown is within time window
if current_utc_time >= shutdown_utc_time and current_utc_time <= shutdown_utc_grace_time:
debug_message("We're in the time window")
instancesToStop.append(cInstance)
else:
debug_message("We're outside the time window. Not adding to stop list")
return instancesToStop
# This is the main entry point that cloud functions calls
def AutoStopVMInstances(config, policy=None, payload=None, generate_local_report=False):
FUNCTION_NAME = "AutoStopVMInstances"
# Populated by config later...
QUERY_PROJECT_IDS = None # List of project IDs
INSTANCE_WHITELIST = None # List of dictionaries in format {"name": "instancename", "zone": "zonename", "project": "projectid"}
PREVIEW_MODE = True
SHUTDOWN_GRACEPERIOD_MIN = 30
# Start
startTime = datetime.datetime.now()
print_message("Started %s within Cloud Function %s [%s]" % (FUNCTION_NAME, cfcommon.CLOUD_FUNCTION_NAME, startTime))
debug_message("")
# For ease of access, assign from config values
debug_message("Processing Configuration...")
QUERY_PROJECT_IDS = config.get("QueryProjectIDs", []) # Required field
INSTANCE_WHITELIST = config.get("InstanceWhiteList", []) # Optional Field
PREVIEW_MODE = config.get("PreviewMode", True) # Required field
SHUTDOWN_GRACEPERIOD_MIN = config.get("ShutdownGracePeriodMin", None) # Required field
SKIP_INSTANCE_GROUPS = config.get("SkipInstanceGroups", False) # Optional
EMAIL_PUB_SUB_PROJECT = config.get("EmailPubSubProject", None) # Optional
EMAIL_PUB_SUB_TOPIC = config.get("EmailPubSubTopic", None) # Optional
EMAIL_TO = config.get("EmailTo", []) # Optional
EMAIL_CC = config.get("EmailCC", None) # Optional
EMAIL_BCC = config.get("EmailBCC", None) # Optional
EMAIL_FROM = config.get("EmailFrom", "noreply-ei-cs-cloudops-resource-administration#ei-cs-cloudops.local") # Optional
EMAIL_SUBJECT = config.get("EmailSubject", "Nightly VM Instance Shutdown Summary") # Optional
cfLogger = cfcommon.CloudFunctionLog()
# Validate whitelist
if INSTANCE_WHITELIST is None:
raise Exception("Unable to get whitelist")
debug_message("Whitelist loaded:")
debug_message(pformat((INSTANCE_WHITELIST)))
# Re-init Compute service - execution environment in cloud functions can be shared among each other. Let's re-init our connection every execution.
ComputeServiceContext.InitComputeService()
# Build the service object.
allRunningInstances = []
for cProjectId in QUERY_PROJECT_IDS:
debug_message("Checking Project: %s" % (cProjectId))
# Main Loop - Let's get and analyze all instances from our project
# Paginated within the 'request' object
runningInstances = []
allInstances = []
debug_message("Building Instance List...", end="")
instances = InstanceList(cProjectId)
instances.PopulateInstances()
debug_message("Done")
for cInstance in instances.GetAllInstances():
debug_message("Found Instance %s in %s [%s - %s]" % (cInstance.GetName(), cInstance.GetZone(), cInstance.GetId(), cInstance.GetStatus()))
# Check if whitelisted. If it is, skip it
if isInstanceInList(INSTANCE_WHITELIST, cInstance):
debug_message(" Instance is whitelisted. Skipping.")
continue
# Check if we should skip instance groups
if SKIP_INSTANCE_GROUPS and cInstance.IsWithinInstanceGroup():
debug_message(" Instance is within an instance group. Skipping.")
continue
debug_message(" Is Running: %s" % (cInstance.IsRunning()))
owner = cInstance.GetOwner()
if owner in ("devops", "ei devops", "eicsdevopseng"):
debug_message("Skipping instance owned by devops")
continue
# # TODO: FOR USE WHEN TESTING
# if VMAUTOSHUTDOWN_LABEL not in labels.keys():
# continue
# Keep track of this instance
allInstances.append(cInstance)
# If it's running, it's a candidate to stop
if cInstance.IsRunning():
runningInstances.append(cInstance)
# Handle no instances found
if len(allInstances) == 0:
debug_message("INFO: No Instances found.")
# Summarize for user
debug_message("")
if len(runningInstances) > 0:
debug_message("Found %s/%s non-whitelisted instances are running (project: %s)" % (len(runningInstances), len(allInstances), cProjectId))
else:
debug_message("All %s non-whitelisted instances are good (project: %s)" % (len(allInstances), cProjectId))
# Main loop to stop
debug_message("")
allRunningInstances = allRunningInstances + runningInstances
instancesToBeStopped = getInstancesToStop(allRunningInstances, SHUTDOWN_GRACEPERIOD_MIN)
stoppedCount = 0
instanceSummary = []
if len(instancesToBeStopped) == 0:
print_message("No instances are due to be stopped")
else:
for cInstance in instancesToBeStopped:
summaryEntry = {
"Name": cInstance.GetName(),
"ID": cInstance.GetId(),
"Zone": cInstance.GetShortZoneName(),
"Project": cInstance.GetProject(),
"Preview": PREVIEW_MODE,
"Stopped": False,
"InstanceLink": cInstance.GetSelfLinkToConsole()
}
logMessage = "Stopping Instance: {name} (ID: {id}, Zone: {zone}, Project: {project})".format(
name=summaryEntry.get("Name"),
id=summaryEntry.get("ID"),
zone=summaryEntry.get("Zone"),
project=summaryEntry.get("Project")
)
if PREVIEW_MODE:
print_message("(PREVIEW) " + logMessage )
else:
print_message(logMessage)
cInstance.Stop()
summaryEntry["Stopped"] = True
stoppedCount += 1
instanceSummary.append(summaryEntry)
if EMAIL_PUB_SUB_PROJECT is not None and EMAIL_PUB_SUB_TOPIC is not None:
debug_message("It looks like we have an email config. Attempting to send email")
emailBody = render_template(
'shutdown_report',
instance_summary=instanceSummary,
config=json.dumps(config, indent=4, sort_keys=True),
preview_mode=PREVIEW_MODE,
generation_time=datetime.datetime.now().astimezone(pytz.utc)
)
emailPayload = {
"To": EMAIL_TO,
"From": EMAIL_FROM,
"Subject": EMAIL_SUBJECT,
"BodyHtml": emailBody
}
if EMAIL_CC is not None:
emailPayload["CC"] = EMAIL_CC
if EMAIL_BCC is not None:
emailPayload["BCC"] = EMAIL_BCC
if not generate_local_report:
print_message("Sending email...", end="")
PublishMessage(EMAIL_PUB_SUB_PROJECT, EMAIL_PUB_SUB_TOPIC, json.dumps(emailPayload))
else:
print_message('Generating local HTML report')
with open('./html_reports/shutdown_report.html', 'w') as r:
r.write(emailBody)
print_message("Done")
# We want to log a nice structured json line to stackdriver for easy reporting.
cfLogger.log({
"StartTime": startTime.isoformat(),
"InstancesStoppedCount": stoppedCount,
"PreviewMode": PREVIEW_MODE,
"Instances": instanceSummary,
"Whitelist": INSTANCE_WHITELIST,
"EndTime": datetime.datetime.now().isoformat(),
"LogLine": "Summary"
})
print_message("DONE [%s]" % (datetime.datetime.now()))
The error I'm getting when trying to run a VM shutdown script:
Caught exception while running VMNightlyShutdown. Exception Text: Traceback (most recent call last):
File "/workspace/main.py", line 248, in StartCloudFunction
policy=policy_config)
File "/workspace/vm_nightly_shutdown.py", line 256, in AutoStopVMInstances
cInstance.Stop()
File "/workspace/modules/compute/instances.py", line 729, in Stop
stop_instance(self.project, self.GetShortZoneName(), self.GetName(), waitForCompletion=waitForCompletion)
File "/workspace/modules/compute/instances.py", line 45, in stop_instance
return wait_for_operation(project, zone, response["name"])
File "/workspace/modules/compute/instances.py", line 27, in wait_for_operation
operation=operation
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/googleapiclient/_helpers.py", line 130, in positional_wrapper
return wrapped(*args, **kwargs)
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/googleapiclient/http.py", line 932, in execute
headers=self.headers,
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/googleapiclient/http.py", line 222, in _retry_request
raise exception
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/googleapiclient/http.py", line 191, in _retry_request
resp, content = http.request(uri, method, *args, **kwargs)
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/google_auth_httplib2.py", line 225, in request
**kwargs
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/httplib2/__init__.py", line 1721, in request
conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/httplib2/__init__.py", line 1440, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/layers/google.python.pip/pip/lib/python3.7/site-packages/httplib2/__init__.py", line 1392, in _conn_request
response = conn.getresponse()
File "/layers/google.python.runtime/python/lib/python3.7/http/client.py", line 1373, in getresponse
response.begin()
File "/layers/google.python.runtime/python/lib/python3.7/http/client.py", line 319, in begin
version, status, reason = self._read_status()
File "/layers/google.python.runtime/python/lib/python3.7/http/client.py", line 280, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/layers/google.python.runtime/python/lib/python3.7/socket.py", line 589, in readinto
return self._sock.recv_into(b)
File "/layers/google.python.runtime/python/lib/python3.7/ssl.py", line 1071, in recv_into
return self.read(nbytes, buffer)
File "/layers/google.python.runtime/python/lib/python3.7/ssl.py", line 929, in read
return self._sslobj.read(len, buffer)
ssl.SSLError: [SSL: DECRYPTION_FAILED_OR_BAD_RECORD_MAC] decryption failed or bad record mac (_ssl.c:2570)",
}
I am running all the sql scripts under the scripts path in a for loop and copying the data into #priya_stage area in snowflake and then using GET command , i am unloading data from stage area to my Unix path in csv format. But I am getting error.
Note: this same code works on my MAC but not on unix server.
import logging
import os
import snowflake.connector
from snowflake.connector import DictCursor as dict
from os import walk
try:
conn = snowflake.connector.connect(
account = 'xxx' ,
user = 'xxx' ,
password = 'xxx' ,
database = 'xxx' ,
schema = 'xxx' ,
warehouse = 'xxx' ,
role = 'xxx' ,
)
conn.cursor().execute('USE WAREHOUSE xxx')
conn.cursor().execute('USE DATABASE xxx')
conn.cursor().execute('USE SCHEMA xxx')
take = []
scripts = '/xxx/apps/xxx/xxx/scripts/snow/scripts/'
os.chdir('/xxx/apps/xxx/xxx/scripts/snow/scripts/')
for root , dirs , files in walk(scripts):
for file in files:
inbound = file[0:-4]
sql = open(file , 'r').read()
# file_number = 0
# file_number += 1
file_prefix = 'bridg_' + inbound
file_name = file_prefix
result_query = conn.cursor(dict).execute(sql)
query_id = result_query.sfqid
sql_copy_into = f'''
copy into #priya_stage/{file_name}
from (SELECT * FROM TABLE(RESULT_SCAN('{query_id}')))
DETAILED_OUTPUT = TRUE
HEADER = TRUE
SINGLE = FALSE
OVERWRITE = TRUE
max_file_size=4900000000'''
rs_copy_into = conn.cursor(dict).execute(sql_copy_into)
for row_copy in rs_copy_into:
file_name_in_stage = row_copy["FILE_NAME"]
sql_get_to_local = f"""
GET #priya_stage/{file_name_in_stage} file:///xxx/apps/xxx/xxx/inbound/zip_files/{inbound}/"""
rs_get_to_local = conn.cursor(dict).execute(sql_get_to_local)
except snowflake.connector.errors.ProgrammingError as e:
print('Error {0} ({1}): {2} ({3})'.format(e.errno , e.sqlstate , e.msg , e.sfqid))
finally:
conn.cursor().close()
conn.close()
Error
Traceback (most recent call last):
File "Generic_local.py", line 52, in <module>
rs_get_to_local = conn.cursor(dict).execute(sql_get_to_local)
File "/usr/local/lib64/python3.6/site-packages/snowflake/connector/cursor.py", line
746, in execute
sf_file_transfer_agent.execute()
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/file_transfer_agent.py", line 379, in execute
self._transfer_accelerate_config()
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/file_transfer_agent.py", line 671, in
_transfer_accelerate_config
self._use_accelerate_endpoint = client.transfer_accelerate_config()
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/s3_storage_client.py", line 572, in
transfer_accelerate_config
url=url, verb="GET", retry_id=retry_id, query_parts=dict(query_parts)
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/s3_storage_client.py", line 353, in _.
send_request_with_authentication_and_retry
verb, generate_authenticated_url_and_args_v4, retry_id
File "/usr/local/lib64/python3.6/site-
packages/snowflake/connector/storage_client.py", line 313, in
_send_request_with_retry
f"{verb} with url {url} failed for exceeding maximum retries."
snowflake.connector.errors.RequestExceedMaxRetryError: GET with url b'https://xxx-
xxxxx-xxx-x-customer-stage.xx.amazonaws.com/https://xxx-xxxxx-xxx-x-customer-
stage.xx.amazonaws.com/?accelerate' failed for exceeding maximum retries.
This link redirects me to a error message .
https://xxx-
xxxxx-xxx-x-customer-stage.xx.amazonaws.com/https://xxx-xxxxx-xxx-x-customer-
stage.xx.amazonaws.com/?accelerate
Access Denied error :
<Error>
<Code>AccessDenied</Code>
<Message>Access Denied</Message>
<RequestId>1X1Z8G0BTX8BAHXK</RequestId>
<HostId>QqdCqaSK7ogAEq3sNWaQVZVXUGaqZnPv78FiflvVzkF6nSYXTSKu3iSiYlUOU0ka+0IMzErwGC4=</HostId>
</Error>
I am iterating through a list of urls from a csv file trying to locate their sitemaps, however, I am getting a weird leading space issue that's causing an error to occur when requests processes each url. I'm trying to figure out what's causing this space to be generated and what type of space it is. I believe something funky is happening with strip() because I can get this to run fine when copying and pasting a url into requests. I am just not sure what type of space this is and what's causing it to occur.
Wondering if anyone else is having or had this issue?
So far I have tried to solve using the following methods:
replace()
"".join(split())
regex
Here is my code:
with open('links.csv') as f:
for line in f:
strdomain = line.strip()
if strdomain:
domain = strdomain
fix_domain = domain.replace('https://', '').replace('www', '').replace('/', '').replace('.', '').replace(' ', '')
ofile = fix_domain + '.txt' # args.ofile
domain_rem = domain
map = find_sitemap.get_sitemap(domain_rem+"sitemap.xml")
url_info = find_sitemap.parse_sitemap(map)
print("Found {0} urls".format(len(url_info)))
new_urls = []
for u in url_info:
new_urls.append(u)
print(u)
links.csv look like the following with just one column:
https://site1.com/
https://site2.com/
https://site3.com/
I printed domain and strdomain and even added the word "this" next to the variable domain so you can see the space being produced clearly:
Here is the error I receive in full when running (you will notice there is no leading space within the url after I've copied and pasted from the terminal into here however I provide an image of my terminal below so you can see it):
Traceback (most recent call last):
File "/Users/natehurwitz/PROJECTS/axis/axis/apps/axisDataFinder/map_website.py", line 358, in <module>
main()
File "/Users/natehurwitz/PROJECTS/axis/axis/apps/axisDataFinder/map_website.py", line 318, in main
map = find_sitemap.get_sitemap(domain_rem+"sitemap.xml")
File "/Users/natehurwitz/PROJECTS/axis/axis/apps/axisDataFinder/find_sitemap.py", line 5, in get_sitemap
get_url = requests.get(url)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/api.py", line 72, in get
return request('get', url, params=params, **kwargs)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/sessions.py", line 522, in request
resp = self.send(prep, **send_kwargs)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/sessions.py", line 636, in send
adapter = self.get_adapter(url=request.url)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/sessions.py", line 727, in get_adapter
raise InvalidSchema("No connection adapters were found for '%s'" % url)
requests.exceptions.InvalidSchema: No connection adapters were found for 'https://blkgrn.com/sitemap.xml'
Here is where you can see the leading space that occurs
Here is the code for "find_sitemap.py":
from bs4 import BeautifulSoup
import requests
def get_sitemap(url):
get_url = requests.get(url)
if get_url.status_code == 200:
return get_url.text
else:
print ('Unable to fetch sitemap: %s.' % url)
def process_sitemap(s):
soup = BeautifulSoup(s, "lxml")
result = []
for loc in soup.findAll('loc'):
item = {}
item['loc'] = loc.text
item['tag'] = loc.parent.name
if loc.parent.lastmod is not None:
item['lastmod'] = loc.parent.lastmod.text
if loc.parent.changeFreq is not None:
item['changeFreq'] = loc.parent.changeFreq.text
if loc.parent.priority is not None:
item['priority'] = loc.parent.priority.text
result.append(item)
return result
def is_sub_sitemap(s):
if s['loc'].endswith('.xml') and s['tag'] == 'sitemap':
return True
else:
return False
def parse_sitemap(s):
sitemap = process_sitemap(s)
result = []
while sitemap:
candidate = sitemap.pop()
if is_sub_sitemap(candidate):
sub_sitemap = get_sitemap(candidate['loc'])
for i in process_sitemap(sub_sitemap):
sitemap.append(i)
else:
result.append(candidate)
return result
I am trying to get the profiles feed from my Google Apps domain using the gdata library supplied my Google for Python. This is my code
import atom
import gdata.auth
import gdata.contacts
import gdata.contacts.service
gd_client = gdata.contacts.service.ContactsService()
gd_client.email = 'name#domain.com'
gd_client.password = 'password'
gd_client.source = 'madeupgibberish'
gd_client.account_type = 'HOSTED'
gd_client.contact_list = 'domain.com'
gd_client.ProgrammaticLogin()
def PrintFeed(feed):
for i, entry in enumerate(feed.entry):
print '\n%s %s' % (i+1, entry.title.text)
max_results = raw_input(
'Enter max return: ')
feed_uri = gd_client.GetProfilesFeed()
query = gdata.contacts.service.ContactsQuery(feed_uri)
print(feed_uri)
query.max_results = max_results
#query.orderby='title'
feed = gd_client.GetContactsFeed(query.ToUri())
# Use the print feed method defined above.
PrintFeed(feed)
print(feed_uri)
#print feed
f = open('c:\\python27\\junk.xml', 'w')
f.write(str(feed))
f.close()
When I run this it returns:
C:\Python27\Lib\gdata-2.0.16>python contactAPI.py
Enter max return: 300
Traceback (most recent call last):
File "contactAPI.py", line 27, in <module>
feed_uri = gd_client.GetProfilesFeed()
File "build\bdist.win-amd64\egg\gdata\contacts\service.py", line 294, in GetProfilesFeed
File "build\bdist.win-amd64\egg\gdata\service.py", line 1108, in Get
gdata.service.RequestError: {'status': 403, 'body': 'Version 1.0 is not supported.', 'reason': 'Forbidden'}
I am able to use GetContactsFeed and other feeds, but I cannot get profiles. Any idea whats happening here or what I need to fix? Thank you in advance for your help.
The gdata.contacts.service uses the deprecated version of the API. You should use gdata.contacts.{client, data} instead}
Here is a sample getting users profiles.
import atom
import gdata.auth
import gdata.contacts
import gdata.contacts.client
email = 'admin#domain.com'
password = 'password'
domain = 'domain.com'
gd_client = gdata.contacts.client.ContactsClient(domain=domain)
gd_client.ClientLogin(email, password, 'madeupgibberish')
def PrintFeed(feed):
for i, entry in enumerate(feed.entry):
print '\n%s %s' % (i+1, entry.title.text)
feed_link = atom.data.Link(gd_client.GetFeedUri(kind='profiles'))
while feed_link:
profiles_feed = gd_client.GetProfilesFeed(uri=feed_link.href)
PrintFeed(profiles_feed)
feed_link = profiles_feed.GetNextLink()
The library's contact_sample.py and unshare_profiles.py work with the client, data files.