Deploy AZURE custom labeled form model though functions on logic app - python

I used labeling tool to train a custom model. The tool provides a python script for using it. It runs well on terminal. Now I want to put that in an Azure function inside a logic app. A .pdf email attachment would trigger the form analizer model and then I would parse the JSON response. So far I haven't been able to:
Successfully deploying the provided script inside an Azure function.
Using that inside a logic app to recognize every pdf that I would deposit in a blob or send via email.
This is the provided script from the Azure Form recognizer tool:
########### Python Form Recognizer Async Analyze #############
import json
import time
import getopt
import sys
import os
from requests import get, post
def main(argv):
input_file, output_file, file_type = getArguments(argv)
runAnalysis(input_file, output_file, file_type)
def runAnalysis(input_file, output_file, file_type):
# Endpoint URL
endpoint = r"##################################/"
# Subscription Key
apim_key = "####################################"
# Model ID
model_id = "######################################"
# API version
API_version = "v2.1-preview.3"
post_url = endpoint + "/formrecognizer/%s/custom/models/%s/analyze" % (API_version, model_id)
params = {
"includeTextDetails": True
}
headers = {
# Request headers
'Content-Type': file_type,
'Ocp-Apim-Subscription-Key': apim_key,
}
try:
with open(input_file, "rb") as f:
data_bytes = f.read()
except IOError:
print("Inputfile not accessible.")
sys.exit(2)
try:
print('Initiating analysis...')
resp = post(url = post_url, data = data_bytes, headers = headers, params = params)
if resp.status_code != 202:
print("POST analyze failed:\n%s" % json.dumps(resp.json()))
quit()
print("POST analyze succeeded:\n%s" % resp.headers)
print
get_url = resp.headers["operation-location"]
except Exception as e:
print("POST analyze failed:\n%s" % str(e))
quit()
n_tries = 15
n_try = 0
wait_sec = 5
max_wait_sec = 60
print()
print('Getting analysis results...')
while n_try < n_tries:
try:
resp = get(url = get_url, headers = {"Ocp-Apim-Subscription-Key": apim_key})
resp_json = resp.json()
if resp.status_code != 200:
print("GET analyze results failed:\n%s" % json.dumps(resp_json))
quit()
status = resp_json["status"]
if status == "succeeded":
if output_file:
with open(output_file, 'w') as outfile:
json.dump(resp_json, outfile, indent=2, sort_keys=True)
print("Analysis succeeded:\n%s" % json.dumps(resp_json, indent=2, sort_keys=True))
quit()
if status == "failed":
print("Analysis failed:\n%s" % json.dumps(resp_json))
quit()
# Analysis still running. Wait and retry.
time.sleep(wait_sec)
n_try += 1
wait_sec = min(2*wait_sec, max_wait_sec)
except Exception as e:
msg = "GET analyze results failed:\n%s" % str(e)
print(msg)
quit()
print("Analyze operation did not complete within the allocated time.")
def getArguments(argv):
input_file = ''
file_type = ''
output_file = ''
try:
opts, args = getopt.gnu_getopt(argv, "ht:o:", [])
except getopt.GetoptError:
printCommandDescription(2)
for opt, arg in opts:
if opt == '-h':
printCommandDescription()
if len(args) != 1:
printCommandDescription()
else:
input_file = args[0]
for opt, arg in opts:
if opt == '-t':
if arg not in ('application/pdf', 'image/jpeg', 'image/png', 'image/tiff', 'image/bmp'):
print('Type ' + file_type + ' not supported')
sys.exit()
else:
file_type = arg
if opt == '-o':
output_file = arg
try:
open(output_file, 'a')
except IOError:
print("Output file not creatable")
sys.exit(2)
if not file_type:
file_type = inferrType(input_file)
return (input_file, output_file, file_type)
def inferrType(input_file):
filename, file_extension = os.path.splitext(input_file)
if file_extension == '':
print('File extension could not be inferred from inputfile. Provide type as an argument.')
sys.exit()
elif file_extension == '.pdf':
return 'application/pdf'
elif file_extension == '.jpeg':
return 'image/jpeg'
elif file_extension == '.bmp':
return 'image/bmp'
elif file_extension == '.png':
return 'image/png'
elif file_extension == '.tiff':
return 'image/tiff'
else:
print('File extension ' + file_extension + ' not supported')
sys.exit()
def printCommandDescription(exit_status=0):
print('analyze.py <inputfile> [-t <type>] [-o <outputfile>]')
print
print('If type option is not provided, type will be inferred from file extension.')
sys.exit(exit_status)
if __name__ == '__main__':
main(sys.argv[1:])

For your requirement, you need to create azure function and write the python code in function with some modification.
First create azure function with python in VS code. The python function you created should be like:
def main(req: func.HttpRequest) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')
...............
Then you need to write the python code you provided above to the function with some modification. Fuse the code into function HttpRequest code body.
After that, deploy the function from local to azure.
Then you can call the function in your logic app.

Related

How to read ZIP file in an S3 bucket with a Python Lambda function?

I'm creating a backend function so that users of App A can import their details to App B. The flow is like this:
User uploads a zip file on the website. This zip contains csv files.
This zip file flows into S3.
Once in S3, it triggers a Lambda function.
The Lambda function then picks the zip file and starts processing the data inside the csv files.
I've completed Step 1,2 and 3. But in 4, the Lambda function is not able to read/process the file.
The python file works fine on my local device, so I think the issue is that it is not able to "get" the object from S3 correctly and so the read_zip doesn't work.
Relevant code below:
import <relevant libs>
s3Client = boto3.client('s3')
def lambda_handler(event,context):
bucket = event['Records'][0]['s3']['bucket']['name']
filename = event['Records'][0]['s3']['object']['key']
#tried printing these, displaying correctly
#response = s3Client.get_object(Bucket=bucket, Key=filename)
usefile = 'https://' + bucket + '.s3.ap-south-1.amazonaws.com/' + filename
print(usefile)
#printing correct filename
def read_csv(file):
to_return = []
reader = csv.DictReader(TextIOWrapper(file, 'utf-8'))
for row in reader:
to_return.append(row)
return to_return
def read_zip(usefile):
with ZipFile(usefile, 'r') as APPA_file:
with APPA_file.open("file1.csv", mode='r') as f:
file1 = read_csv(f)
with APPA_file.open("file2.csv", mode='r') as f:
file2 = read_csv(f)
return file1, file2
def get_APPB_url(APPA_uri):
resp = requests.get(APPA_uri)
if resp.status_code != 200:
return None
# extract the APPB url
re_match = re.findall('href="(https://www.X.org/.+/)"', resp.text)
if not re_match:
return None
print(resp.text)
return re_match[0]
def rate_on_APPB(APPB_url, rating):
re_match = re.findall('X.org/(.+)/', APPB_url)
if not re_match:
return None
APPB_id = re_match[0]
req_body = {
"query": <query used>,
"operationName": "<xyz>",
"variables": <variables>
}
headers = {
"content-type": "application/json",
"x-hasura-admin-secret": "XXX"
}
resp = requests.post("XXX", json=req_body, headers=headers)
if resp.status_code != 200:
raise ValueError(f"Hasura query failed. Code: {resp.status_code}")
else: print(APPB_id)
json_resp = resp.json()
if 'errors' in json_resp and len(json_resp['errors']) > 0:
first_error_msg = json_resp['errors'][0]['message']
if 'Authentication' in first_error_msg:
print(f"Failed to authenticate with cookie")
exit(1)
else:
raise ValueError(first_error_msg)
def APPA_to_APPB(APPA_dict):
APPB_url = get_APPB_url(APPA_dict['APPA URI'])
if APPB_url is None:
raise ValueError("Cannot find APPB title")
rate_on_APPB(APPB_url, int(float(APPA_dict['Rating']) * 2))
def main():
file1, file2 = read_zip(usefile)
success = []
errors = []
with tqdm(total=len(file1)) as pbar:
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
future_to_url = {
executor.submit(APPA_to_APPB, APPA_dict): APPA_dict for APPA_dict in file1
}
try:
for future in concurrent.futures.as_completed(future_to_url):
APPA_dict = future_to_url[future]
pbar.update(1)
try:
success.append(future.result())
except Exception as e:
errors.append({"APPA_dict": APPA_dict, "error": e})
except KeyboardInterrupt:
executor._threads.clear()
concurrent.futures.thread._threads_queues.clear()
print(f"Successfully rated: {len(success)} ")
print(f"{len(errors)} Errors")
for error in errors:
print(f"\t{error['APPA_dict']['Name']} ({error['APPA_dict']['Year']}): {error['error']}")
if __name__ == '__main__':
main()
CloudWatch log basically says Event started and ended successfully, nothing else. I was able to verify that the trigger was working fine by printing the file name and it appears in the log. But that's about it.

Python SIEM log collector hangs randomly

I am trying to troubleshoot a script for Mimecast's API. The script runs fine for the most part, but a few times, I have noticed that it stops pulling logs and generally appears to be a hung process. After restarting the script and manually pushing logs to the syslog server, it starts working again without issue. I am not able to reproduce this issue at will.
The script is supposed to do the following:
Authenticate against Mimecast's API
Sign responses
download, extract and save log files to log dir
utilize a tokenized header to determine which file was downloaded in the last request. Should save the token ID within a file in the checkpoint directory
Push files to remote syslog server
Output any errors and info to console
Below is the sample code from Mimecast.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging.handlers
import json
import os
import requests
import base64
import uuid
import datetime
import hashlib
import shutil
import hmac
import time
from zipfile import ZipFile
import io
# Set up variables
APP_ID = "YOUR DEVELOPER APPLICATION ID"
APP_KEY = "YOUR DEVELOPER APPLICATION KEY"
URI = "/api/audit/get-siem-logs"
EMAIL_ADDRESS = 'EMAIL ADDRESS OF YOUR ADMINISTRATOR'
ACCESS_KEY = 'ACCESS KEY FOR YOUR ADMINISTRATOR'
SECRET_KEY = 'SECRET KEY FOR YOUR ADMINISTRATOR'
LOG_FILE_PATH = "FULLY QUALIFIED PATH TO FOLDER TO WRITE LOGS"
CHK_POINT_DIR = 'FULLY QUALIFIED PATH TO FOLDER TO WRITE PAGE TOKEN'
# Set True to output to syslog, false to only save to file
syslog_output = False
# Enter the IP address or hostname of your syslog server
syslog_server = 'localhost'
# Change this to override default port
syslog_port = 514
# delete files after fetching
delete_files = True
# Set threshold in number of files in log file directory
log_file_threshold = 10000
# Set up logging (in this case to terminal)
log = logging.getLogger(__name__)
log.root.setLevel(logging.DEBUG)
log_formatter = logging.Formatter('%(levelname)s %(message)s')
log_handler = logging.StreamHandler()
log_handler.setFormatter(log_formatter)
log.addHandler(log_handler)
# Set up syslog output
syslog_handler = logging.handlers.SysLogHandler(address=(syslog_server, syslog_port))
syslog_formatter = logging.Formatter('%(message)s')
syslog_handler.setFormatter(syslog_formatter)
syslogger = logging.getLogger(__name__)
syslogger = logging.getLogger('SysLogger')
syslogger.addHandler(syslog_handler)
# Supporting methods
def get_hdr_date():
return datetime.datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S UTC")
def read_file(file_name):
try:
with open(file_name, 'r') as f:
data = f.read()
return data
except Exception as e:
log.error('Error reading file ' + file_name + '. Cannot continue. Exception: ' + str(e))
quit()
def write_file(file_name, data_to_write):
if '.zip' in file_name:
try:
byte_content = io.BytesIO(data_to_write)
zip_file = ZipFile(byte_content)
zip_file.extractall(LOG_FILE_PATH)
except Exception as e:
log.error('Error writing file ' + file_name + '. Cannot continue. Exception: ' + str(e))
quit()
else:
try:
with open(file_name, 'w') as f:
f.write(data_to_write)
except Exception as e:
log.error('Error writing file ' + file_name + '. Cannot continue. Exception: ' + str(e))
quit()
def get_base_url(email_address):
# Create post body for request
post_body = dict()
post_body['data'] = [{}]
post_body['data'][0]['emailAddress'] = email_address
# Create variables required for request headers
request_id = str(uuid.uuid4())
request_date = get_hdr_date()
headers = {'x-mc-app-id': APP_ID, 'x-mc-req-id': request_id, 'x-mc-date': request_date}
# Send request to API
log.debug('Sending request to https://api.mimecast.com/api/discover-authentication with request Id: ' +
request_id)
try:
r = requests.post(url='https://api.mimecast.com/api/login/discover-authentication',
data=json.dumps(post_body), headers=headers)
# Handle Rate Limiting
if r.status_code == 429:
log.warning('Rate limit hit. sleeping for ' + str(r.headers['X-RateLimit-Reset'] * 1000))
time.sleep(r.headers['X-RateLimit-Reset'] * 1000)
except Exception as e:
log.error('Unexpected error getting base url. Cannot continue.' + str(e))
quit()
# Handle error from API
if r.status_code != 200:
log.error('Request returned with status code: ' + str(r.status_code) + ', response body: ' +
r.text + '. Cannot continue.')
quit()
# Load response body as JSON
resp_data = json.loads(r.text)
# Look for api key in region region object to get base url
if 'region' in resp_data["data"][0]:
base_url = resp_data["data"][0]["region"]["api"].split('//')
base_url = base_url[1]
else:
# Handle no region found, likely the email address was entered incorrectly
log.error(
'No region information returned from API, please check the email address.'
'Cannot continue')
quit()
return base_url
def post_request(base_url, uri, post_body, access_key, secret_key):
# Create variables required for request headers
request_id = str(uuid.uuid4())
request_date = get_hdr_date()
unsigned_auth_header = '{date}:{req_id}:{uri}:{app_key}'.format(
date=request_date,
req_id=request_id,
uri=uri,
app_key=APP_KEY
)
hmac_sha1 = hmac.new(
base64.b64decode(secret_key),
unsigned_auth_header.encode(),
digestmod=hashlib.sha1).digest()
sig = base64.encodebytes(hmac_sha1).rstrip()
headers = {
'Authorization': 'MC ' + access_key + ':' + sig.decode(),
'x-mc-app-id': APP_ID,
'x-mc-date': request_date,
'x-mc-req-id': request_id,
'Content-Type': 'application/json'
}
try:
# Send request to API
log.debug('Sending request to https://' + base_url + uri + ' with request Id: ' + request_id)
r = requests.post(url='https://' + base_url + uri, data=json.dumps(post_body), headers=headers)
# Handle Rate Limiting
if r.status_code == 429:
log.warning('Rate limit hit. sleeping for ' + str(r.headers['X-RateLimit-Reset'] * 1000))
time.sleep(r.headers['X-RateLimit-Reset'] * 1000)
r = requests.post(url='https://' + base_url + uri, data=json.dumps(post_body), headers=headers)
# Handle errors
except Exception as e:
log.error('Unexpected error connecting to API. Exception: ' + str(e))
return 'error'
# Handle errors from API
if r.status_code != 200:
log.error('Request to ' + uri + ' with , request id: ' + request_id + ' returned with status code: ' +
str(r.status_code) + ', response body: ' + r.text)
return 'error'
# Return response body and response headers
return r.content, r.headers
def get_mta_siem_logs(checkpoint_dir, base_url, access_key, secret_key):
uri = "/api/audit/get-siem-logs"
# Set checkpoint file name to store page token
checkpoint_filename = os.path.join(checkpoint_dir, 'get_mta_siem_logs_checkpoint')
# Build post body for request
post_body = dict()
post_body['data'] = [{}]
post_body['data'][0]['type'] = 'MTA'
post_body['data'][0]['compress'] = True
if os.path.exists(checkpoint_filename):
post_body['data'][0]['token'] = read_file(checkpoint_filename)
# Send request to API
resp = post_request(base_url, uri, post_body, access_key, secret_key)
now = datetime.datetime.now().strftime("%a %b %d %H:%M:%S %Y")
# Process response
if resp != 'error':
resp_body = resp[0]
resp_headers = resp[1]
content_type = resp_headers['Content-Type']
# End if response is JSON as there is no log file to download
if content_type == 'application/json':
log.info('No more logs available')
return False
# Process log file
elif content_type == 'application/octet-stream':
file_name = resp_headers['Content-Disposition'].split('=\"')
file_name = file_name[1][:-1]
# Save files to LOG_FILE_PATH
write_file(os.path.join(LOG_FILE_PATH, file_name), resp_body)
# Save mc-siem-token page token to check point directory
write_file(checkpoint_filename, resp_headers['mc-siem-token'])
try:
if syslog_output is True:
for filename in os.listdir(LOG_FILE_PATH):
file_creation_time = time.ctime(os.path.getctime(LOG_FILE_PATH + "/" + filename))
if now < file_creation_time or now == file_creation_time:
log.info('Loading file: ' + filename + ' to output to ' + syslog_server + ':' + str(syslog_port))
with open(file=os.path.join(LOG_FILE_PATH, filename), mode='r', encoding='utf-8') as log_file:
lines = log_file.read().splitlines()
for line in lines:
syslogger.info(line)
log.info('Syslog output completed for file ' + filename)
except Exception as e:
log.error('Unexpected error writing to syslog. Exception: ' + str(e))
# return true to continue loop
return True
else:
# Handle errors
log.error('Unexpected response')
for header in resp_headers:
log.error(header)
return False
def run_script():
# discover base URL
try:
base_url = get_base_url(email_address=EMAIL_ADDRESS)
except Exception as e:
log.error('Error discovering base url for ' + EMAIL_ADDRESS + ' . Exception: ' + str(e))
quit()
# Request log data in a loop until there are no more logs to collect
try:
log.info('Getting MTA log data')
while get_mta_siem_logs(checkpoint_dir=CHK_POINT_DIR, base_url=base_url, access_key=ACCESS_KEY,
secret_key=SECRET_KEY) is True:
log.info('Getting more MTA log files')
except Exception as e:
log.error('Unexpected error getting MTA logs ' + (str(e)))
file_number = len([name for name in os.listdir(LOG_FILE_PATH) if os.path.isfile(name)])
if delete_files or file_number >= log_file_threshold:
for filename in os.listdir(LOG_FILE_PATH):
file_path = os.path.join(LOG_FILE_PATH, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print('Failed to delete %s. Reason: %s' % (file_path, e))
quit()
# Run script
run_script()
It seems like it may potentially be a race condition but I am not sure how to confirm since I can't reproduce it. I notice that SumoLogic has a modified version of this script as well with a different methodology for managing the files/paths. If this script works better than the main sample script above, would anybody be able to explain WHY? I haven't had any issues with it yet.
https://github.com/SumoLogic/sumologic-content/blob/master/MimeCast/SumoLogic-Mimecast-Data-Collection/siem_collection.py

Don't know how to execute function after else command

I'm trying to take an image via webcam then send the image through gmail. That function suppose to happen if my fingerprint is known at the system. If it does it needs to execute the send email function. I added after the send email function to send 'HIGH' to a gpio. I don't think it suppose to interfere.
When there is a known template it is followed by an "else:" statement. then I put my "def find_newest_file(dir):" function. What am I doing wrong?
might note I'm terrible at programming. Thank you!
Pyfingerprint
import os
import glob
import time
import smtplib
import imghdr
from email.message import EmailMessage
import RPi.GPIO as GPIO
from time import sleep
GPIO.setmode(GPIO.BCM)
GPIO.setup(26, GPIO.OUT)
import hashlib
from pyfingerprint.pyfingerprint import PyFingerprint
GPIO.output(26, 0)
## Search for a finger
##
## Tries to initialize the sensor
try:
f = PyFingerprint('/dev/ttyUSB0', 57600, 0xFFFFFFFF, 0x00000000)
if ( f.verifyPassword() == False ):
raise ValueError('The given fingerprint sensor password is wrong!')
except Exception as e:
print('The fingerprint sensor could not be initialized!')
print('Exception message: ' + str(e))
exit(1)
## Gets some sensor information
print('Currently used templates: ' + str(f.getTemplateCount()) +'/'+ str(f.getStorageCapacity()))
## Tries to search the finger and calculate hash
try:
print('Waiting for finger...')
## Wait that finger is read
while ( f.readImage() == False ):
pass
## Converts read image to characteristics and stores it in charbuffer 1
f.convertImage(0x01)
## Searchs template
result = f.searchTemplate()
positionNumber = result[0]
accuracyScore = result[1]
if ( positionNumber == -1 ):
print('No match found!')
exit(0)
else:
print('Found template at position #' + str(positionNumber))
print('The accuracy score is: ' + str(accuracyScore))
def find_newest_file(dir):
os.system('fswebcam -r 1280x720 -S 3 --jpeg 90 --save /home/pi/Pictures/%H%M%S.jpg')
types = ['*.png', '*.jpg']
files = []
if not os.path.isdir(dir):
print(f'ERROR: {dir} does not exist. or it is not a valid folder')
exit()
for ext in types:
scan_path = os.path.abspath(os.path.join(dir, ext))
files.extend(glob.glob(scan_path))
if len(files) == 0:
print(f'ERROR: file not found while scanning folder: {dir} for: {types}')
exit()
newest = None
n_time = 0
for file in files:
# print(file)
c_time = os.path.getctime(file)
if c_time > n_time:
n_time = c_time
newest = file
if newest is None:
print(f'-----------\nUnexpected error: None was return while the list was not empty:\n{files}')
exit()
if os.path.exists(newest):
return newest # return as a list since this is what Yehonata expect
else:
print(f'ERROR: File {newest} not found')
exit()
# Yehontan Code
Sender_Email = "Sender#gmail.com"
Reciever_Email = "reciver#gmailcom"
Password = input('Enter your email account password: ')
newMessage = EmailMessage()
newMessage['Subject'] = "Check out the new logo"
newMessage['From'] = Sender_Email
newMessage['To'] = Reciever_Email
newMessage.set_content('Let me know what you think. Image attached!')
# Tomerl: Replace static name with seach function
files = [ find_newest_file('/home/pi/Pictures/') ]
for file in files:
with open(file, 'rb') as f:
image_data = f.read()
image_type = imghdr.what(f.name)
image_name = f.name
newMessage.add_attachment(image_data, maintype='image', subtype=image_type, filename=image_name)
with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
smtp.login(Sender_Email, Password)
smtp.send_message(newMessage)
try:
for x in range(1):
GPIO.output(26, 1)
sleep(2.5)
GPIO.output(26, 0)
sleep(2.5)
except KeyboardInterrupt:
GPIO.cleanup()
GPIO.output(26, 0)
## Loads the found template to charbuffer 1
f.loadTemplate(positionNumber, 0x01)
## Downloads the characteristics of template loaded in charbuffer 1
characterics = str(f.downloadCharacteristics(0x01)).encode('utf-8')
## Hashes characteristics of template
print('SHA-2 hash of template: ' + hashlib.sha256(characterics).hexdigest())
except Exception as e:
print('Operation failed!')
print('Exception message: ' + str(e))
exit(1)
You need to define the function at the top of the program. You only need to call it after the else statement. Basically, put
def find_newest_file(dir):
os.system('fswebcam -r 1280x720 -S 3 --jpeg 90 --save /home/pi/Pictures/%H%M%S.jpg')
types = ['*.png', '*.jpg']
files = []
if not os.path.isdir(dir):
print(f'ERROR: {dir} does not exist. or it is not a valid folder')
exit()
at the top and when you want to use it do
find_newest_file(foo)
Good luck on your journey!

import python with __main__ method

I have a python script that have __main__ statement and took all values parametric.
I want to import and use it in my own script.
Actually I can import but don't know how to use it.
As you see below, __main__ is a bit complicated and rewriting it will take time because I even don't know what does most of code mean.
Want to know is there any way to import and use the code as a function?
import os
import sys
import time
import base64
from urllib2 import urlopen
from urllib2 import Request
from urllib2 import HTTPError
from urllib import urlencode
from urllib import quote
from exceptions import Exception
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.application import MIMEApplication
from email.encoders import encode_noop
from api_util import json2python, python2json
class MalformedResponse(Exception):
pass
class RequestError(Exception):
pass
class Client(object):
default_url = 'http://nova.astrometry.net/api/'
def __init__(self,
apiurl = default_url):
self.session = None
self.apiurl = apiurl
def get_url(self, service):
return self.apiurl + service
def send_request(self, service, args={}, file_args=None):
'''
service: string
args: dict
'''
if self.session is not None:
args.update({ 'session' : self.session })
print 'Python:', args
json = python2json(args)
print 'Sending json:', json
url = self.get_url(service)
print 'Sending to URL:', url
# If we're sending a file, format a multipart/form-data
if file_args is not None:
m1 = MIMEBase('text', 'plain')
m1.add_header('Content-disposition', 'form-data; name="request-json"')
m1.set_payload(json)
m2 = MIMEApplication(file_args[1],'octet-stream',encode_noop)
m2.add_header('Content-disposition',
'form-data; name="file"; filename="%s"' % file_args[0])
#msg.add_header('Content-Disposition', 'attachment',
# filename='bud.gif')
#msg.add_header('Content-Disposition', 'attachment',
# filename=('iso-8859-1', '', 'FuSballer.ppt'))
mp = MIMEMultipart('form-data', None, [m1, m2])
# Makie a custom generator to format it the way we need.
from cStringIO import StringIO
from email.generator import Generator
class MyGenerator(Generator):
def __init__(self, fp, root=True):
Generator.__init__(self, fp, mangle_from_=False,
maxheaderlen=0)
self.root = root
def _write_headers(self, msg):
# We don't want to write the top-level headers;
# they go into Request(headers) instead.
if self.root:
return
# We need to use \r\n line-terminator, but Generator
# doesn't provide the flexibility to override, so we
# have to copy-n-paste-n-modify.
for h, v in msg.items():
print >> self._fp, ('%s: %s\r\n' % (h,v)),
# A blank line always separates headers from body
print >> self._fp, '\r\n',
# The _write_multipart method calls "clone" for the
# subparts. We hijack that, setting root=False
def clone(self, fp):
return MyGenerator(fp, root=False)
fp = StringIO()
g = MyGenerator(fp)
g.flatten(mp)
data = fp.getvalue()
headers = {'Content-type': mp.get('Content-type')}
if False:
print 'Sending headers:'
print ' ', headers
print 'Sending data:'
print data[:1024].replace('\n', '\\n\n').replace('\r', '\\r')
if len(data) > 1024:
print '...'
print data[-256:].replace('\n', '\\n\n').replace('\r', '\\r')
print
else:
# Else send x-www-form-encoded
data = {'request-json': json}
print 'Sending form data:', data
data = urlencode(data)
print 'Sending data:', data
headers = {}
request = Request(url=url, headers=headers, data=data)
try:
f = urlopen(request)
txt = f.read()
print 'Got json:', txt
result = json2python(txt)
print 'Got result:', result
stat = result.get('status')
print 'Got status:', stat
if stat == 'error':
errstr = result.get('errormessage', '(none)')
raise RequestError('server error message: ' + errstr)
return result
except HTTPError, e:
print 'HTTPError', e
txt = e.read()
open('err.html', 'wb').write(txt)
print 'Wrote error text to err.html'
def login(self, apikey):
args = { 'apikey' : apikey }
result = self.send_request('login', args)
sess = result.get('session')
print 'Got session:', sess
if not sess:
raise RequestError('no session in result')
self.session = sess
def _get_upload_args(self, **kwargs):
args = {}
for key,default,typ in [('allow_commercial_use', 'd', str),
('allow_modifications', 'd', str),
('publicly_visible', 'y', str),
('scale_units', None, str),
('scale_type', None, str),
('scale_lower', None, float),
('scale_upper', None, float),
('scale_est', None, float),
('scale_err', None, float),
('center_ra', None, float),
('center_dec', None, float),
('radius', None, float),
('downsample_factor', None, int),
('tweak_order', None, int),
('crpix_center', None, bool),
# image_width, image_height
]:
if key in kwargs:
val = kwargs.pop(key)
val = typ(val)
args.update({key: val})
elif default is not None:
args.update({key: default})
print 'Upload args:', args
return args
def url_upload(self, url, **kwargs):
args = dict(url=url)
args.update(self._get_upload_args(**kwargs))
result = self.send_request('url_upload', args)
return result
def upload(self, fn, **kwargs):
args = self._get_upload_args(**kwargs)
try:
f = open(fn, 'rb')
result = self.send_request('upload', args, (fn, f.read()))
return result
except IOError:
print 'File %s does not exist' % fn
raise
def submission_images(self, subid):
result = self.send_request('submission_images', {'subid':subid})
return result.get('image_ids')
def overlay_plot(self, service, outfn, wcsfn, wcsext=0):
from astrometry.util import util as anutil
wcs = anutil.Tan(wcsfn, wcsext)
params = dict(crval1 = wcs.crval[0], crval2 = wcs.crval[1],
crpix1 = wcs.crpix[0], crpix2 = wcs.crpix[1],
cd11 = wcs.cd[0], cd12 = wcs.cd[1],
cd21 = wcs.cd[2], cd22 = wcs.cd[3],
imagew = wcs.imagew, imageh = wcs.imageh)
result = self.send_request(service, {'wcs':params})
print 'Result status:', result['status']
plotdata = result['plot']
plotdata = base64.b64decode(plotdata)
open(outfn, 'wb').write(plotdata)
print 'Wrote', outfn
def sdss_plot(self, outfn, wcsfn, wcsext=0):
return self.overlay_plot('sdss_image_for_wcs', outfn,
wcsfn, wcsext)
def galex_plot(self, outfn, wcsfn, wcsext=0):
return self.overlay_plot('galex_image_for_wcs', outfn,
wcsfn, wcsext)
def myjobs(self):
result = self.send_request('myjobs/')
return result['jobs']
def job_status(self, job_id, justdict=False):
result = self.send_request('jobs/%s' % job_id)
if justdict:
return result
stat = result.get('status')
if stat == 'success':
result = self.send_request('jobs/%s/calibration' % job_id)
print 'Calibration:', result
result = self.send_request('jobs/%s/tags' % job_id)
print 'Tags:', result
result = self.send_request('jobs/%s/machine_tags' % job_id)
print 'Machine Tags:', result
result = self.send_request('jobs/%s/objects_in_field' % job_id)
print 'Objects in field:', result
result = self.send_request('jobs/%s/annotations' % job_id)
print 'Annotations:', result
result = self.send_request('jobs/%s/info' % job_id)
print 'Calibration:', result
return stat
def sub_status(self, sub_id, justdict=False):
result = self.send_request('submissions/%s' % sub_id)
if justdict:
return result
return result.get('status')
def jobs_by_tag(self, tag, exact):
exact_option = 'exact=yes' if exact else ''
result = self.send_request(
'jobs_by_tag?query=%s&%s' % (quote(tag.strip()), exact_option),
{},
)
return result
if __name__ == '__main__':
import optparse
parser = optparse.OptionParser()
parser.add_option('--server', dest='server', default=Client.default_url,
help='Set server base URL (eg, %default)')
parser.add_option('--apikey', '-k', dest='apikey',
help='API key for Astrometry.net web service; if not given will check AN_API_KEY environment variable')
parser.add_option('--upload', '-u', dest='upload', help='Upload a file')
parser.add_option('--wait', '-w', dest='wait', action='store_true', help='After submitting, monitor job status')
parser.add_option('--wcs', dest='wcs', help='Download resulting wcs.fits file, saving to given filename; implies --wait if --urlupload or --upload')
parser.add_option('--kmz', dest='kmz', help='Download resulting kmz file, saving to given filename; implies --wait if --urlupload or --upload')
parser.add_option('--urlupload', '-U', dest='upload_url', help='Upload a file at specified url')
parser.add_option('--scale-units', dest='scale_units',
choices=('arcsecperpix', 'arcminwidth', 'degwidth', 'focalmm'), help='Units for scale estimate')
#parser.add_option('--scale-type', dest='scale_type',
# choices=('ul', 'ev'), help='Scale bounds: lower/upper or estimate/error')
parser.add_option('--scale-lower', dest='scale_lower', type=float, help='Scale lower-bound')
parser.add_option('--scale-upper', dest='scale_upper', type=float, help='Scale upper-bound')
parser.add_option('--scale-est', dest='scale_est', type=float, help='Scale estimate')
parser.add_option('--scale-err', dest='scale_err', type=float, help='Scale estimate error (in PERCENT), eg "10" if you estimate can be off by 10%')
parser.add_option('--ra', dest='center_ra', type=float, help='RA center')
parser.add_option('--dec', dest='center_dec', type=float, help='Dec center')
parser.add_option('--radius', dest='radius', type=float, help='Search radius around RA,Dec center')
parser.add_option('--downsample', dest='downsample_factor', type=int, help='Downsample image by this factor')
parser.add_option('--parity', dest='parity', choices=('0','1'), help='Parity (flip) of image')
parser.add_option('--tweak-order', dest='tweak_order', type=int, help='SIP distortion order (default: 2)')
parser.add_option('--crpix-center', dest='crpix_center', action='store_true', default=None, help='Set reference point to center of image?')
parser.add_option('--sdss', dest='sdss_wcs', nargs=2, help='Plot SDSS image for the given WCS file; write plot to given PNG filename')
parser.add_option('--galex', dest='galex_wcs', nargs=2, help='Plot GALEX image for the given WCS file; write plot to given PNG filename')
parser.add_option('--substatus', '-s', dest='sub_id', help='Get status of a submission')
parser.add_option('--jobstatus', '-j', dest='job_id', help='Get status of a job')
parser.add_option('--jobs', '-J', dest='myjobs', action='store_true', help='Get all my jobs')
parser.add_option('--jobsbyexacttag', '-T', dest='jobs_by_exact_tag', help='Get a list of jobs associated with a given tag--exact match')
parser.add_option('--jobsbytag', '-t', dest='jobs_by_tag', help='Get a list of jobs associated with a given tag')
parser.add_option( '--private', '-p',
dest='public',
action='store_const',
const='n',
default='y',
help='Hide this submission from other users')
parser.add_option('--allow_mod_sa','-m',
dest='allow_mod',
action='store_const',
const='sa',
default='d',
help='Select license to allow derivative works of submission, but only if shared under same conditions of original license')
parser.add_option('--no_mod','-M',
dest='allow_mod',
action='store_const',
const='n',
default='d',
help='Select license to disallow derivative works of submission')
parser.add_option('--no_commercial','-c',
dest='allow_commercial',
action='store_const',
const='n',
default='d',
help='Select license to disallow commercial use of submission')
opt,args = parser.parse_args()
if opt.apikey is None:
# try the environment
opt.apikey = os.environ.get('AN_API_KEY', None)
if opt.apikey is None:
parser.print_help()
print
print 'You must either specify --apikey or set AN_API_KEY'
sys.exit(-1)
args = {}
args['apiurl'] = opt.server
c = Client(**args)
c.login(opt.apikey)
if opt.upload or opt.upload_url:
if opt.wcs or opt.kmz:
opt.wait = True
kwargs = dict(
allow_commercial_use=opt.allow_commercial,
allow_modifications=opt.allow_mod,
publicly_visible=opt.public)
if opt.scale_lower and opt.scale_upper:
kwargs.update(scale_lower=opt.scale_lower,
scale_upper=opt.scale_upper,
scale_type='ul')
elif opt.scale_est and opt.scale_err:
kwargs.update(scale_est=opt.scale_est,
scale_err=opt.scale_err,
scale_type='ev')
elif opt.scale_lower or opt.scale_upper:
kwargs.update(scale_type='ul')
if opt.scale_lower:
kwargs.update(scale_lower=opt.scale_lower)
if opt.scale_upper:
kwargs.update(scale_upper=opt.scale_upper)
for key in ['scale_units', 'center_ra', 'center_dec', 'radius',
'downsample_factor', 'tweak_order', 'crpix_center',]:
if getattr(opt, key) is not None:
kwargs[key] = getattr(opt, key)
if opt.parity is not None:
kwargs.update(parity=int(opt.parity))
if opt.upload:
upres = c.upload(opt.upload, **kwargs)
if opt.upload_url:
upres = c.url_upload(opt.upload_url, **kwargs)
stat = upres['status']
if stat != 'success':
print 'Upload failed: status', stat
print upres
sys.exit(-1)
opt.sub_id = upres['subid']
if opt.wait:
if opt.job_id is None:
if opt.sub_id is None:
print "Can't --wait without a submission id or job id!"
sys.exit(-1)
while True:
stat = c.sub_status(opt.sub_id, justdict=True)
print 'Got status:', stat
jobs = stat.get('jobs', [])
if len(jobs):
for j in jobs:
if j is not None:
break
if j is not None:
print 'Selecting job id', j
opt.job_id = j
break
time.sleep(5)
success = False
while True:
stat = c.job_status(opt.job_id, justdict=True)
print 'Got job status:', stat
if stat.get('status','') in ['success']:
success = (stat['status'] == 'success')
break
time.sleep(5)
if success:
c.job_status(opt.job_id)
# result = c.send_request('jobs/%s/calibration' % opt.job_id)
# print 'Calibration:', result
# result = c.send_request('jobs/%s/tags' % opt.job_id)
# print 'Tags:', result
# result = c.send_request('jobs/%s/machine_tags' % opt.job_id)
# print 'Machine Tags:', result
# result = c.send_request('jobs/%s/objects_in_field' % opt.job_id)
# print 'Objects in field:', result
#result = c.send_request('jobs/%s/annotations' % opt.job_id)
#print 'Annotations:', result
retrieveurls = []
if opt.wcs:
# We don't need the API for this, just construct URL
url = opt.server.replace('/api/', '/wcs_file/%i' % opt.job_id)
retrieveurls.append((url, opt.wcs))
if opt.kmz:
url = opt.server.replace('/api/', '/kml_file/%i/' % opt.job_id)
retrieveurls.append((url, opt.kmz))
for url,fn in retrieveurls:
print 'Retrieving file from', url, 'to', fn
f = urlopen(url)
txt = f.read()
w = open(fn, 'wb')
w.write(txt)
w.close()
print 'Wrote to', fn
opt.job_id = None
opt.sub_id = None
if opt.sdss_wcs:
(wcsfn, outfn) = opt.sdss_wcs
c.sdss_plot(outfn, wcsfn)
if opt.galex_wcs:
(wcsfn, outfn) = opt.galex_wcs
c.galex_plot(outfn, wcsfn)
if opt.sub_id:
print c.sub_status(opt.sub_id)
if opt.job_id:
print c.job_status(opt.job_id)
#result = c.send_request('jobs/%s/annotations' % opt.job_id)
#print 'Annotations:', result
if opt.jobs_by_tag:
tag = opt.jobs_by_tag
print c.jobs_by_tag(tag, None)
if opt.jobs_by_exact_tag:
tag = opt.jobs_by_exact_tag
print c.jobs_by_tag(tag, 'yes')
if opt.myjobs:
jobs = c.myjobs()
print jobs
#print c.submission_images(1)
No, there is no clean way to do so. When the module is being imported, it's code is executed and all global variables are set as attributes to the module object. So if part of the code is not executed at all (is guarded by __main__ condition) there is no clean way to get access to that code. You can however run code of this module with substituted __name__ but that's very hackish.
You should refactor this module and move whole __main__ part into a method and call it like this:
def main():
do_everything()
if __name__ == '__main__':
main()
This way consumer apps will be able to run code without having to run it in a separate process.
Use the runpy module in the Python 3 Standard Library
See that data can be passed to and from the called script
# top.py
import runpy
import sys
sys.argv += ["another parameter"]
module_globals_dict = runpy.run_path("other_script.py",
init_globals = globals(), run_name="__main__")
print(module_globals_dict["return_value"])
# other_script.py
# Note we did not load sys module, it gets passed to this script
script_name = sys.argv[0]
print(f"Script {script_name} loaded")
if __name__ == "__main__":
params = sys.argv[1:]
print(f"Script {script_name} run with params: {params}")
return_value = f"{script_name} Done"
by what your saying you want to call a function in the script that is importing the module so try:
import __main__
__main__.myfunc()

Check server status on Twisted

While I was writing simple message-based fileserver and client, I got the idea about checking fileserver status, but don't know how to realize this: just try to connect and disconnect from server (and how disconnect immediately, when server is not running, if using this way?) or maybe twisted/autobahn have some things, which help to get server status without creating "full connection"?
a) fileserver.py
import os
import sys
import json
from twisted.internet import reactor
from autobahn.twisted.websocket import WebSocketServerFactory, WebSocketServerProtocol, listenWS
CONFIG_TEMPLATE = ''
CONFIG_DATA = {}
class MessageBasedServerProtocol(WebSocketServerProtocol):
"""
Message-based WebSockets server
Template contains some parts as string:
[USER_ID:OPERATION_NAME:FILE_ID] - 15 symbols for USER_ID,
10 symbols for OPERATION_NAME,
25 symbols for FILE_ID
other - some data
"""
def __init__(self):
path = CONFIG_DATA['path']
base_dir = CONFIG_DATA['base_dir']
# prepare to working with files...
if os.path.exists(path) and os.path.isdir(path):
os.chdir(path)
if not os.path.exists(base_dir) or not os.path.isdir(base_dir):
os.mkdir(base_dir)
os.chdir(base_dir)
else:
os.makedir(path)
os.chdir(path)
os.mkdir(base_dir)
os.chdir(base_dir)
# init some things
self.fullpath = path + '/' + base_dir
def __checkUserCatalog(self, user_id):
# prepare to working with files...
os.chdir(self.fullpath)
if not os.path.exists(user_id) or not os.path.isdir(user_id):
os.mkdir(user_id)
os.chdir(user_id)
else:
os.chdir(self.fullpath + '/' + user_id)
def onOpen(self):
print "[USER] User with %s connected" % (self.transport.getPeer())
def connectionLost(self, reason):
print '[USER] Lost connection from %s' % (self.transport.getPeer())
def onMessage(self, payload, isBinary):
"""
Processing request from user and send response
"""
user_id, cmd, file_id = payload[:54].replace('[', '').replace(']','').split(':')
data = payload[54:]
operation = "UNK" # WRT - Write, REA -> Read, DEL -> Delete, UNK -> Unknown
status = "C" # C -> Complete, E -> Error in operation
commentary = 'Succesfull!'
# write file into user storage
if cmd == 'WRITE_FILE':
self.__checkUserCatalog(user_id)
operation = "WRT"
try:
f = open(file_id, "wb")
f.write(data)
except IOError, argument:
status = "E"
commentary = argument
except Exception, argument:
status = "E"
commentary = argument
raise Exception(argument)
finally:
f.close()
# read some file
elif cmd == 'READU_FILE':
self.__checkUserCatalog(user_id)
operation = "REA"
try:
f = open(file_id, "rb")
commentary = f.read()
except IOError, argument:
status = "E"
commentary = argument
except Exception, argument:
status = "E"
commentary = argument
raise Exception(argument)
finally:
f.close()
# delete file from storage (and in main server, in parallel delete from DB)
elif cmd == 'DELET_FILE':
self.__checkUserCatalog(user_id)
operation = "DEL"
try:
os.remove(file_id)
except IOError, argument:
status = "E"
commentary = argument
except Exception, argument:
status = "E"
commentary = argument
raise Exception(argument)
self.sendMessage('[%s][%s]%s' % (operation, status, commentary), isBinary=True)
if __name__ == '__main__':
if len(sys.argv) < 2:
print "using python fileserver_client.py [PATH_TO_config.json_FILE]"
else:
# read config file
CONFIG_TEMPLATE = sys.argv[1]
with open(CONFIG_TEMPLATE, "r") as f:
CONFIG_DATA = json.load(f)
# create server
factory = WebSocketServerFactory("ws://localhost:9000")
factory.protocol = MessageBasedServerProtocol
listenWS(factory)
reactor.run()
b) client.py
import json
import sys
import commands
from twisted.internet import reactor
from autobahn.twisted.websocket import WebSocketClientFactory, WebSocketClientProtocol, connectWS
CONFIG_TEMPLATE = ''
CONFIG_DATA = {}
class MessageBasedClientProtocol(WebSocketClientProtocol):
"""
Message-based WebSockets client
Template contains some parts as string:
[USER_ID:OPERATION_NAME:FILE_ID] - 15 symbols for USER_ID,
10 symbols for OPERATION_NAME,
25 symbols for FILE_ID
other - some data
"""
def onOpen(self):
user_id = CONFIG_DATA['user']
operation_name = CONFIG_DATA['cmd']
file_id = CONFIG_DATA['file_id']
src_file = CONFIG_DATA['src_file']
data = '[' + str(user_id) + ':' + str(operation_name) + ':' + str(file_id) + ']'
if operation_name == 'WRITE_FILE':
with open(src_file, "r") as f:
info = f.read()
data += str(info)
self.sendMessage(data, isBinary=True)
def onMessage(self, payload, isBinary):
cmd = payload[1:4]
result_cmd = payload[6]
if cmd in ('WRT', 'DEL'):
print payload
elif cmd == 'REA':
if result_cmd == 'C':
try:
data = payload[8:]
f = open(CONFIG_DATA['src_file'], "wb")
f.write(data)
except IOError, e:
print e
except Exception, e:
raise Exception(e)
finally:
print payload[:8] + "Successfully!"
f.close()
else:
print payload
reactor.stop()
if __name__ == '__main__':
if len(sys.argv) < 2:
print "using python fileserver_client.py [PATH_TO_config.json_FILE]"
else:
# read config file
CONFIG_TEMPLATE = sys.argv[1]
with open(CONFIG_TEMPLATE, "r") as f:
CONFIG_DATA = json.load(f)
# connection to server
factory = WebSocketClientFactory("ws://localhost:9000")
factory.protocol = MessageBasedClientProtocol
connectWS(factory)
reactor.run()
Find solution this issue: using callLater or deferLater for disconnect from server, if can't connect, but when all was 'OK', just take server status, which he says.
import sys
from twisted.internet.task import deferLater
from twisted.internet import reactor
from autobahn.twisted.websocket import WebSocketClientFactory, WebSocketClientProtocol, connectWS
CONFIG_IP = ''
CONFIG_PORT = 9000
def isOffline(status):
print status
class StatusCheckerProtocol(WebSocketClientProtocol):
def __init__(self):
self.operation_name = "STATUS_SRV"
self.user_id = 'u00000000000000'
self.file_id = "000000000000000000000.log"
def onOpen(self):
data = '[' + str(self.user_id) + ':' + str(self.operation_name) + ':' + str(self.file_id) + ']'
self.sendMessage(data, isBinary=True)
def onMessage(self, payload, isBinary):
cmd = payload[1:4]
result_cmd = payload[6]
data = payload[8:]
print data
reactor.stop()
if __name__ == '__main__':
if len(sys.argv) < 3:
print "using python statuschecker.py [IP] [PORT]"
else:
# read preferences
CONFIG_IP = sys.argv[1]
CONFIG_PORT = int(sys.argv[2])
server_addr = "ws://%s:%d" % (CONFIG_IP, CONFIG_PORT)
# connection to server
factory = WebSocketClientFactory(server_addr)
factory.protocol = StatusCheckerProtocol
connectWS(factory)
# create special Deffered, which disconnect us from some server, if can't connect within 3 seconds
d = deferLater(reactor, 3, isOffline, 'OFFLINE')
d.addCallback(lambda ignored: reactor.stop())
# run all system...
reactor.run()

Categories

Resources