Python - Problem use loops to get day by day - python

I'm trying to make a google api downloader that will bypass api restrictions.
Unfortunately, what I do does not work. As I do not introduce loops, unfortunately the change of dates by hand works when downloading historical data is very easy. For new data, no problem skypt generates data daily.
Below is the code at which I try to download data from a specific range day by day.
The problem occurs in this part of the code
def get_top_keywords(service, profile_id):
for day_number in range(total_days):
return service.data().ga().get(
ids='ga:' + profile_id,
start_date='1daysAgo',
end_date=(start_date + dt.timedelta(days = day_number)).date(),
metrics='ga:sessions, ga:newUsers, ga:users, ga:organicSearches, ga:pageviews, ga:bounceRate',
dimensions='ga:date, ga:source, ga:medium',
max_results='10000').execute()
Below the whole code
import argparse
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import csv
from __future__ import print_function
from googleapiclient.errors import HttpError
from googleapiclient import sample_tools
from oauth2client.client import AccessTokenRefreshError
import datetime as dt
start_date = dt.datetime(2019, 01,1)
end_date = dt.datetime(2019, 01,5)
total_days = (end_date - start_date).days + 1
def main(argv):
# Authenticate and construct service.
service, flags = sample_tools.init(
argv, 'analytics', 'v3', __doc__, __file__,
scope='https://www.googleapis.com/auth/analytics.readonly')
# Try to make a request to the API. Print the results or handle errors.
try:
first_profile_id = '11111111111' # Hard Code View Profile ID Here
if not first_profile_id:
print('Could not find a valid profile for this user.')
else:
results = get_top_keywords(service, first_profile_id)
print_results(results)
except TypeError as error:
# Handle errors in constructing a query.
print(('There was an error in constructing your query : %s' % error))
except HttpError as error:
# Handle API errors.
print(('Arg, there was an API error : %s : %s' %
(error.resp.status, error._get_reason())))
except AccessTokenRefreshError:
# Handle Auth errors.
print ('The credentials have been revoked or expired, please re-run '
'the application to re-authorize')
def get_first_profile_id(service):
accounts = service.management().accounts().list().execute()
if accounts.get('items'):
firstAccountId = accounts.get('items')[0].get('id')
webproperties = service.management().webproperties().list(
accountId=firstAccountId).execute()
if webproperties.get('items'):
firstWebpropertyId = webproperties.get('items')[0].get('id')
profiles = service.management().profiles().list(
accountId=firstAccountId,
webPropertyId=firstWebpropertyId).execute()
if profiles.get('items'):
return profiles.get('items')[0].get('id')
return None
def get_top_keywords(service, profile_id):
for day_number in range(total_days):
return service.data().ga().get(
ids='ga:' + profile_id,
start_date='1daysAgo',
end_date=(start_date + dt.timedelta(days = day_number)).date(),
metrics='ga:sessions, ga:newUsers, ga:users, ga:organicSearches, ga:pageviews, ga:bounceRate',
dimensions='ga:date, ga:source, ga:medium',
max_results='10000').execute()
def print_results(results):
print()
print('Profile Name: %s' % results.get('profileInfo').get('profileName'))
print()
# Open a file.
filepath = '/temp/' #change this to your actual file path
filename = 'temp.csv' #change this to your actual file name
f = open(filepath + filename, 'wt')
# Wrap file with a csv.writer
writer = csv.writer(f, lineterminator='\n')
# Write header.
header = [h['name'][3:] for h in results.get('columnHeaders')] #this takes the column headers and gets rid of ga: prefix
writer.writerow(header)
print(''.join('%30s' %h for h in header))
# Write data table.
if results.get('rows', []):
for row in results.get('rows'):
writer.writerow(row)
print(''.join('%30s' %r for r in row))
print('\n')
print ('Success Data Written to CSV File')
print ('filepath = ' + filepath)
print ('filename = '+ filename)
else:
print ('No Rows Found')
# Close the file.
f.close()
if __name__ == '__main__':
main(sys.argv)
What should I improve to make the date automatically reach the end of the loop?
Why the loop doesn't work, I'm just starting with python but I'm trying

Yes, I added a loop, I just did something wrong, because when I test the loop itself, it works, but when I apply it to the code connecting to api, it doesn't work anymore
for day_number in range(total_days):
return service.data().ga().get(
ids='ga:' + profile_id,
start_date='1daysAgo',
end_date=(start_date + dt.timedelta(days = day_number)).date(),
metrics='ga:sessions, ga:newUsers, ga:users, ga:organicSearches, ga:pageviews, ga:bounceRate',
dimensions='ga:date, ga:source, ga:medium',
max_results='10000').execute()

Related

How to fix aws lambda function logs error?

I'm trying to extract aws trust advisor data through lambda function(trigger by event scheduler) and upload to s3. However, some part of the function throws error. below is my code
##libraries
import boto3
import os
import csv
from csv import DictWriter
import time
import traceback
## bucket_name is set as env variable
bucket_name = "test-ta-reports"
fail_msg = 'Pulling Trusted Advisor data failed'
Filename = "/tmp/checks_list.csv"
obj_name = time.strftime("%Y-%m-%d-%H-%M-%S") + '/' + '.csv'
##upload to s3
def s3_upload(bucket_name, Filename, obj_name):
if obj_name is None:
obj_name = os.path.basename(Filename)
try:
s3 = boto3.client("s3", region_name="eu-west-1")
response = s3.upload_file(Filename, bucket_name, obj_name)
return True
except:
print('Data failed to upload to bucket')
traceback.print_exc()
return False
def lambda_handler(event, context):
try:
support_client = boto3.client('support', region_name='us-east-1')
ta_checks = support_client.describe_trusted_advisor_checks(language='en')
checks_list = {ctgs: [] for ctgs in list(set([checks['category'] for checks in ta_checks['checks']]))}
for checks in ta_checks['checks']:
print('Getting check:' + checks['name'] + checks['category'])
try:
check_summary = support_client.describe_trusted_advisor_check_summaries(
checkIds=[checks['id']])['summaries'][0]
if check_summary['status'] != 'not_available':
checks_list[checks['category']].append(
[checks['name'], check_summary['status'],
str(check_summary['resourcesSummary']['resourcesProcessed']),
str(check_summary['resourcesSummary']['resourcesFlagged']),
str(check_summary['resourcesSummary']['resourcesSuppressed']),
str(check_summary['resourcesSummary']['resourcesIgnored'])
])
else:
print("unable to append checks")
except:
print('Failed to get check: ' + checks['name'])
traceback.print_exc()
except:
print('Failed! Debug further.')
traceback.print_exc()
##rewrite dict to csv
with open('/tmp/checks_list.csv', 'w', newline='') as csvfile:
csv_writer = DictWriter(csvfile, fieldnames=['status','hasFlaggedResources','timestamp','resourcesSummary','categorySpecificSummary', 'checkId'])
csv_writer.writeheader()
csv_writer.writerow(check_summary)
return checks_list
if s3_upload(bucket_name, Filename, obj_name):
print("Successfully uploaded")
if __name__ == '__main__':
lambda_handler(event, context)
The error logs
unable to append checks
I'm new to Python. So, unsure of how to check for trackback stacks under else: statement. Is there any way to modify this code for getting traceback logs for the append block. Also, have i made any error in the above code. I'm unable to figure out any. PLz help
response = client.describe_trusted_advisor_check_summaries(
checkIds=[
'string',
]
)
describe_trusted_advisor_check_summaries() returns summarized results for one or more Trusted advisors. Here you are checking for the check_summary['status'] is not equal to not_avaialble i.e. alert status of the check is either "ok" (green), "warning" (yellow), "error" (red), and in that case, you are appending resourcesProcessed, resourcesFlagged, resourcesSuppressed, and resourcesIgnored to checks_list for further processing.
it's printing
unable to append checks
just because the status of the check is not_available. It is not an error log. Just deal with the case if the check status is not_available, what you should be doing?
See the documentation of describe_trusted_advisor_check_summaries. https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/support.html#Support.Client.describe_trusted_advisor_check_summaries

How do I set the python docusign_esign ApiClient to use a proxy?

I am using the following examples from the docusign site.
I have a set of python scripts that works well on my PC.
I have the move the code to a server behind a proxy.
I could not find any example or settings to configure a proxy.
I tired setting it in the underlining URLLIB3 code but it is being overwritten each time the AP creates class of the APIClient().
How do I set the python docusign_esign ApiClient to use a proxy?
Below is the portion of the code.
from docusign_esign import ApiClient
from docusign_esign import EnvelopesApi
from jwt_helper import get_jwt_token, get_private_key
# this one has all the connection parameters
from jwt_config import DS_JWT
import urllib3
proxy = urllib3.ProxyManager('http://<id>:<pwd>#<proxy_server>:3128/', maxsize=10)
# used by docusign to decide what you have access to
SCOPES = ["signature", "impersonation"]
# Call the envelope status change method to list the envelopes changed in the last 10 days
def worker(args):
api_client = ApiClient()
api_client.host = args['base_path']
api_client.set_default_header("Authorization", "Bearer " + args['access_token'])
envelope_api = EnvelopesApi(api_client)
# The Envelopes::listStatusChanges method has many options
# The list status changes call requires at least a from_date OR
# a set of envelopeIds. Here we filter using a from_date.
# Here we set the from_date to filter envelopes for the last month
# Use ISO 8601 date format
from_date = (datetime.datetime.utcnow() - timedelta(days=120)).isoformat()
results = envelope_api.list_status_changes(args['account_id'], from_date=from_date)
return results, envelope_api
# Call request_jwt_user_token method
def get_token(private_key, api_client):
token_response = get_jwt_token(private_key, SCOPES, DS_JWT["authorization_server"],
DS_JWT["ds_client_id"], DS_JWT["ds_impersonated_user_id"])
access_token = token_response.access_token
# Save API account ID
user_info = api_client.get_user_info(access_token)
accounts = user_info.get_accounts()
api_account_id = accounts[0].account_id
base_path = accounts[0].base_uri + "/restapi"
return {"access_token": access_token, "api_account_id": api_account_id, "base_path":
base_path}
# bucket to keep track of token info
def get_args(api_account_id, access_token, base_path):
args = {
"account_id": api_account_id,
"base_path": base_path,
"access_token": access_token
}
return args
# start the actual code here create and then setup the object
api_client = ApiClient()
api_client.set_base_path(DS_JWT["authorization_server"])
api_client.set_oauth_host_name(DS_JWT["authorization_server"])
api_client.rest_client.pool_manager.proxy = proxy
api_client.rest_client.pool_manager.proxy.scheme = "http"
private_key = get_private_key(DS_JWT["private_key_file"]).encode("ascii").decode("utf-8")
jwt_values = get_token(private_key, api_client)
args = get_args(jwt_values["api_account_id"], jwt_values["access_token"], jwt_values["base_path"])
account_id = args["account_id"]
# return the envelope list and api_client object created to get it
results, envelope_api = worker(args)
print("We found " + str(results.result_set_size) + " sets of files")
for envelope in results.envelopes:
envelope_id = envelope.envelope_id
print("Extracting " + envelope_id)
# The SDK always stores the received file as a temp file you can not set the path for this
# Call the envelope get method
temp_file = envelope_api.get_document(account_id=account_id, document_id="archive",
envelope_id=envelope_id)
if temp_file:
print("File is here " + temp_file)
with zipfile.ZipFile(temp_file, 'r') as zip_ref:
zip_ref.extractall(extract_dir + envelope_id + "\\")
zip_ref.close()
print("Done extracting " + envelope_id + " deleting zip file")
os.remove(temp_file)
print("Deleted file here " + temp_file)
else:
print("Failed to get data for " + envelope_id)

InvalidSchema("No connection adapters were found for '%s'" % url) - Have tried a lot

I understood that the topic already exists, but I cannot figure out what is the issue.
This is the code ( I believe it is straight forward )
import requests
import config
import time
dataID = "111111"
# If reportis not generated will return 'None'
def get_data():
dataChecking = None
checking_url = 'https://api.example.com/v1.1/reports/{0}'.format(dataID)
responseCheck = requests.get(checking_url, params=(('fields',
'generated_date'), ), auth=(''
+ config.authUsername + '', ''
+ config.authPassword + ''))
report_url = responseCheck.json()['report']['generated_date']
dataChecking = requests.get(report_url).content
return dataChecking
def download_report(dataChecking):
urlDownload = 'https://api.example.com/v1.1/reports/{0}'.format(dataID)
responseDownload = requests.get(urlDownload, params=(('fields',
'download'), ), auth=(''
+ config.authUsername + '', ''
+ config.authPassword + ''))
report_url = responseDownload.json()['report']['download']
dataDownload = requests.get(report_url).content
with open('' + config.fileDest + '\exportReport.json', 'w') as f:
f.write(dataDownload)
pass
# Checking report is generated
generatedData = get_data()
# Wating for report to generated
while generatedData == None:
# Check again if report is generated
print("Report is generating, Please wait")
generatedData = get_data()
# Wait 0.25 seconds between each check
time.sleep(0.25)
# Report generated, need to download him
download_report(dataChecking)
Error is :
raise InvalidSchema("No connection adapters were found for '%s'" % url)
InvalidSchema: No connection adapters were found for '2018-06-15T10:37:50'
I have tried to change the URL part, using different tutorials, with no success.
'2018-06-15T10:37:50' - This is the date when the report is generated, so what I currently do, is trying to check if the data is empty and keep checking it it filled in (with date, as I demonstrate in the example) and then it will run the download part.
Your error seems to be in these lines in the get_data function:
report_url = responseCheck.json()['report']['generated_date']
dataChecking = requests.get(report_url).content
The data structure implies that what you're storing in the variable report_url is actually a date, and then you're trying to retrieve that as a url, which throws the error. Figure out where the actual report url is stored and fetch that instead.

Python MySQL TypeError: must be str, not tuple

I have the following code which connects to a MySQL DB and checks which records are marked as 'active' by a field that has '1' in.
The code then downloads some files using the contents of the vulntype field in the URL for the download.
I think the issue lies with the MySQL query or the for loop to do the downloads.
The code is:-
import requests
import os
import MySQLdb
from hurry.filesize import size, si
import logging
import logging.handlers
from logging.config import fileConfig
logging.handlers = logging.handlers
fileConfig('data/logging_config.ini')
logger = logging.getLogger("downloader")
active_vuln_type = None
def get_active_vuln_sets():
global active_vuln_type
try:
logging.info('Connecting to the database...')
active_vuln_type = con = MySQLdb.connect(*******)
logging.info('Database connected!')
except FileNotFoundError as fnf:
logging.error(fnf)
except MySQLdb.Error as e:
logging.error(e)
try:
logging.info('Getting active vulnerability sets...')
cur = con.cursor()
active = "1"
cur.execute("""SELECT vulntype FROM vuln_sets WHERE active = %s""", (active))
active_vuln_type = cur.fetchall()
except MySQLdb.Error as e:
logging.exception(e)
def download():
try:
logging.info('Downloading vulnerability set files...')
for x in active_vuln_type:
basepath = os.path.dirname(__file__)
filepath = os.path.abspath(os.path.join(basepath, ".."))
response = requests.get('https://vulners.com/api/'
'v3/archive/collection/?type=' + x)
with open(filepath + '/vuln_files/' + x + '.zip', 'wb') as f:
f.write(response.content)
filesize = size(os.path.getsize
(filepath + '/vuln_files/'
+ x + '.zip'), system=si)
files = x + ".zip - " + str(filesize)
logging.info('Downloaded ' + x + '.zip Successfully')
logging.info('File details: ' + files)
except Exception as e:
logging.exception(e)
The traceback for this is:-
Traceback (most recent call last):
File "/home/luke/projects/vuln_backend/vuln_backend/download.py", line 61, in download
'v3/archive/collection/?type=' + x)
TypeError: must be str, not tuple
active_vuln_type = cur.fetchall()
This line returns a list of rows from the database. Each row is a tuple. Of course you're only selecting one column from the table, but the interface is the same: each row is a tuple, one value per column.
for x in active_vuln_type:
Here x is a tuple like ("vulnerability of some kind",); note the trailing comma. You need to unpack it:
for db_row in active_vuln_type:
x = db_row[0] # The first and column in the row.
Besides that, please consider naming x descriptively, returning active_vuln_type from one procedure and passing it to the other as a parameter. This will make your code less brittle and easier to test.
def get_active_vuln_sets(db_host, db_user, db_password):
# Do your stuff, using the db credentials....
return active_vuln_type
def download(active_vuln_type):
# Same source as now.
Now you can download(get_active_vuln_sets('192.168.100.1', 'joe', 'secret'))
Or you can test / retry the same thing without touching the DB: download([("CVE-xyxxy",), ("CVE-asdfghjkl",)]).
One more thing you can do is to return a clean list of vuln names, not raw DB tuples:
def get_active_vuln_sets(...):
# .....
return [x[0] for x in cur.fetchall()]
Now the value returned will be a list of single, directly usable values, so your original download(...) code would work with it.
What you get it's a tuple, so you will need to get the first element with x[0]
def download():
try:
logging.info('Downloading vulnerability set files...')
for x in active_vuln_type:
basepath = os.path.dirname(__file__)
filepath = os.path.abspath(os.path.join(basepath, ".."))
response = requests.get('https://vulners.com/api/'
'v3/archive/collection/?type=' + x[0])
with open(filepath + '/vuln_files/' + x[0] + '.zip', 'wb') as f:
f.write(response.content)
filesize = size(os.path.getsize
(filepath + '/vuln_files/'
+ x[0] + '.zip'), system=si)
files = x[0] + ".zip - " + str(filesize)
logging.info('Downloaded ' + x[0] + '.zip Successfully')
logging.info('File details: ' + files)
except Exception as e:
logging.exception(e)
Reasons to avoid these types of errors: like you have to use try: and except function as I know.
Fetching the data using another source may not be the exect you want.
Is your MySQL DB getting typeerror? Do check which records are marked as 'active' by a field that has '1' in by filtering them.
str(function(argument))
And it such likes:
try:
print(x)
except TypeError:
print("Variable x is not defined")
except:
print("Something else went wrong")
And that's all you can pass out your variable by replacing.
I do not think it will work perfectly that you want? But according to wholeblog
have to understand the whole code.
If the statement of if and else works then they will pass out some results.
Thank you.

Grab Bing Wallpaper with python3?

I wanna write a python script that grabs the bing.com wallpaper and saves it.
The urls of these wallpapers look like:
http://www.bing.com/az/hprichbg/rb/EuropeESA_DE-DE7849418832_1920x1080.jpg
http://www.bing.com/az/hprichbg/rb/CanisLupus_DE-DE11366975292_1920x1080.jpg
http://www.bing.com/az/hprichbg/rb/HouseBoats_DE-DE8695714746_1920x1080.jpg
Is there a way to find the image url of todays wallpaper automatically?
Based on a few of the useful answers in this related SO question, here's a simple Python script to fetch the Bing photo of the day:
import requests
import json
BING_URI_BASE = "http://www.bing.com"
BING_WALLPAPER_PATH = "/HPImageArchive.aspx?format=js&idx=0&n=1&mkt=en-US"
# open the Bing HPImageArchive URI and ask for a JSON response
resp = requests.get(BING_URI_BASE + BING_WALLPAPER_PATH)
if resp.status_code == 200:
json_response = json.loads(resp.content)
wallpaper_path = json_response['images'][0]['url']
filename = wallpaper_path.split('/')[-1]
wallpaper_uri = BING_URI_BASE + wallpaper_path
# open the actual wallpaper uri, and write the response as an image on the filesystem
response = requests.get(wallpaper_uri)
if resp.status_code == 200:
with open(filename, 'wb') as f:
f.write(response.content)
else:
raise ValueError("[ERROR] non-200 response from Bing server for '{}'".format(wallpaper_uri))
else:
raise ValueError("[ERROR] non-200 response from Bing server for '{}'".format(BING_URI_BASE + BING_WALLPAPER_PATH))
This will write a file such as TurtleTears_EN-US7942276596_1920x1080.jpg to the same directory where the script is executed. Of course, can tweak a whole bunch of things here, but gets the job done reasonably easily.
Grab it and save it in folder by using this Code:
import datetime
from urllib.request import urlopen, urlretrieve
from xml.dom import minidom
import os
import sys
def join_path(*args):
# Takes an list of values or multiple values and returns an valid path.
if isinstance(args[0], list):
path_list = args[0]
else:
path_list = args
val = [str(v).strip(' ') for v in path_list]
return os.path.normpath('/'.join(val))
dir_path = os.path.dirname(os.path.realpath(__file__))
save_dir = join_path(dir_path, 'images')
if not os.path.exists(save_dir):
os.makedirs(save_dir)
def set_wallpaper(pic_path):
if sys.platform.startswith('win32'):
cmd = 'REG ADD \"HKCU\Control Panel\Desktop\" /v Wallpaper /t REG_SZ /d \"%s\" /f' %pic_path
os.system(cmd)
os.system('rundll32.exe user32.dll, UpdatePerUserSystemParameters')
print('Wallpaper is set.')
elif sys.platform.startswith('linux'):
os.system(''.join(['gsettings set org.gnome.desktop.background picture-uri file://', pic_path]))
print('Wallpaper is set.')
else:
print('OS not supported.')
return
return
def download_old_wallpapers(minus_days=False):
"""Uses download_wallpaper(set_wallpaper=False) to download the last 20 wallpapers.
If minus_days is given an integer a specific day in the past will be downloaded.
"""
if minus_days:
download_wallpaper(idx=minus_days, use_wallpaper=False)
return
for i in range(0, 20): # max 20
download_wallpaper(idx=i, use_wallpaper=False)
def download_wallpaper(idx=0, use_wallpaper=True):
# Getting the XML File
try:
usock = urlopen(''.join(['http://www.bing.com/HPImageArchive.aspx?format=xml&idx=',
str(idx), '&n=1&mkt=ru-RU'])) # ru-RU, because they always have 1920x1200 resolution
except Exception as e:
print('Error while downloading #', idx, e)
return
try:
xmldoc = minidom.parse(usock)
# This is raised when there is trouble finding the image url.
except Exception as e:
print('Error while processing XML index #', idx, e)
return
# Parsing the XML File
for element in xmldoc.getElementsByTagName('url'):
url = 'http://www.bing.com' + element.firstChild.nodeValue
# Get Current Date as fileName for the downloaded Picture
now = datetime.datetime.now()
date = now - datetime.timedelta(days=int(idx))
pic_path = join_path(save_dir, ''.join([date.strftime('bing_wp_%d-%m-%Y'), '.jpg']))
if os.path.isfile(pic_path):
print('Image of', date.strftime('%d-%m-%Y'), 'already downloaded.')
if use_wallpaper:
set_wallpaper(pic_path)
return
print('Downloading: ', date.strftime('%d-%m-%Y'), 'index #', idx)
# Download and Save the Picture
# Get a higher resolution by replacing the file name
urlretrieve(url.replace('_1366x768', '_1920x1200'), pic_path)
# Set Wallpaper if wanted by user
if use_wallpaper:
set_wallpaper(pic_path)
if __name__ == "__main__":
download_wallpaper()
for number, url in enumerate(list_of_urls):
urllib.urlretrieve(url, 'Image {}.jpg'.format(number + 1))

Categories

Resources