Im trying to change play back speed in azure amp.
The following is the url generated from azure apis: https://ampdemo.azureedge.net/?url=https://testingmedia-usea.streaming.media.azure.net/bbd51d47-cc1a-4515-bac8-4053040f8c58/ignite.ism/manifest(format=mpd-time-cmaf,filter=filter1)&heuristicprofile=lowlatency
if you check that link there is no playback speed.
I saw the below link but dont know where to apply in python code
https://amp.azure.net/libs/amp/latest/docs/index.html#amp.player.options.playbackspeed
below is my code:
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
from azure.mgmt.media import AzureMediaServices
from azure.storage.blob import BlobServiceClient
from azure.mgmt.media.models import (
Asset,
Transform,
TransformOutput,
BuiltInStandardEncoderPreset,
Job,
JobInputAsset,
JobOutputAsset,
OnErrorType,
Priority,
StreamingLocator,
AssetFilter,
PresentationTimeRange,
)
import os
import random
#Timer for checking job progress
import time
import requests
#Get environment variables
load_dotenv()
default_credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
# Get the environment variables SUBSCRIPTIONID, RESOURCEGROUP and ACCOUNTNAME
subscription_id = os.getenv('SUBSCRIPTIONID')
resource_group = os.getenv('RESOURCEGROUP')
account_name = os.getenv('ACCOUNTNAME')
# The file you want to upload. For this example, put the file in the same folder as this script.
# The file ignite.mp4 has been provided for you.
source_file = "https://testingmedia.blob.core.windows.net/data/ignite.mp4"
#url = requests.get(source_file)
# This is a random string that will be added to the naming of things so that you don't have to keep doing this during testing
uniqueness = "streamAssetFilters-" + str(random.randint(0,9999))
# Change this to your specific streaming endpoint name if not using "default"
streaming_endpoint_name = "default"
# Set the attributes of the input Asset using the random number
in_asset_name = 'inputassetName' + uniqueness
in_alternate_id = 'inputALTid' + uniqueness
in_description = 'inputdescription' + uniqueness
# Create an Asset object
# The asset_id will be used for the container parameter for the storage SDK after the asset is created by the AMS client.
in_asset = Asset(alternate_id=in_alternate_id, description=in_description)
# Set the attributes of the output Asset using the random number
out_asset_name = 'outputassetName' + uniqueness
out_alternate_id = 'outputALTid' + uniqueness
out_description = 'outputdescription' + uniqueness
# Create an output asset object
out_asset = Asset(alternate_id=out_alternate_id, description=out_description)
# The AMS Client
print("Creating AMS Client")
client = AzureMediaServices(default_credential, subscription_id)
# Create an input Asset
print(f"Creating input asset {in_asset_name}")
input_asset = client.assets.create_or_update(resource_group, account_name, in_asset_name, in_asset)
# An AMS asset is a container with a specific id that has "asset-" prepended to the GUID.
# So, you need to create the asset id to identify it as the container
# where Storage is to upload the video (as a block blob)
in_container = 'asset-' + input_asset.asset_id
# create an output Asset
print(f"Creating output asset {out_asset_name}")
output_asset = client.assets.create_or_update(resource_group, account_name, out_asset_name, out_asset)
### Use the Storage SDK to upload the video ###
print(f"Uploading the file {source_file}")
blob_service_client = BlobServiceClient.from_connection_string(os.getenv('STORAGEACCOUNTCONNECTION'))
blob_client = blob_service_client.get_blob_client(in_container, "ignite.mp4")
# working_dir = os.getcwd() + "\Media"
# print(working_dir)
# print(f"Current working directory: {working_dir}")
# upload_file_path = os.path.join(working_dir, source_file)
# print(upload_file_path,"####")
# WARNING: Depending on where you are launching the sample from, the path here could be off, and not include the BasicEncoding folder.
# Adjust the path as needed depending on how you are launching this python sample file.
# Upload the video to storage as a block blob
#with open(url, "rb") as data:
blob_client.upload_blob_from_url(source_file)
transform_name = 'ContentAwareEncodingAssetFilters'
# Create a new Standard encoding Transform for Built-in Copy Codec
print(f"Creating Encoding transform named: {transform_name}")
# For this snippet, we are using 'BuiltInStandardEncoderPreset'
transform_output = TransformOutput(
preset=BuiltInStandardEncoderPreset(
preset_name="ContentAwareEncoding"
),
# What should we do with the job if there is an error?
on_error=OnErrorType.STOP_PROCESSING_JOB,
# What is the relative priority of this job to others? Normal, high or low?
relative_priority=Priority.NORMAL
)
print("Creating encoding transform...")
# Adding transform details
my_transform = Transform()
my_transform.description="Transform with Asset filters"
my_transform.outputs = [transform_output]
print(f"Creating transform {transform_name}")
transform = client.transforms.create_or_update(
resource_group_name=resource_group,
account_name=account_name,
transform_name=transform_name,
parameters=my_transform)
print(f"{transform_name} created (or updated if it existed already). ")
job_name = 'ContentAwareEncodingAssetFilters'+ uniqueness
print(f"Creating custom encoding job {job_name}")
files = (source_file)
# Create Job Input and Ouput Assets
input = JobInputAsset(asset_name=in_asset_name)
outputs = JobOutputAsset(asset_name=out_asset_name)
# Create the job object and then create transform job
the_job = Job(input=input, outputs=[outputs])
job: Job = client.jobs.create(resource_group, account_name, transform_name, job_name, parameters=the_job)
# Check job state
job_state = client.jobs.get(resource_group, account_name, transform_name, job_name)
# First check
print("First job check")
print(job_state.state)
# Check the state of the job every 10 seconds. Adjust time_in_seconds = <how often you want to check for job state>
def countdown(t):
while t:
mins, secs = divmod(t, 60)
timer = '{:02d}:{:02d}'.format(mins, secs)
print(timer, end="\r")
time.sleep(1)
t -= 1
job_current = client.jobs.get(resource_group, account_name, transform_name, job_name)
if(job_current.state == "Finished"):
print(job_current.state)
# TODO: Download the output file using blob storage SDK
return
if(job_current.state == "Error"):
print(job_current.state)
# TODO: Provide Error details from Job through API
return
else:
print(job_current.state)
countdown(int(time_in_seconds))
time_in_seconds = 10
countdown(int(time_in_seconds))
print(f"Creating locator for streaming...")
# Publish the output asset for streaming via HLS or DASH
locator_name = f"locator-{uniqueness}"
# Create the Asset filters
print("Creating an asset filter...")
asset_filter_name = 'filter1'
# Create the asset filter
asset_filter = client.asset_filters.create_or_update(
resource_group_name=resource_group,
account_name=account_name,
asset_name=out_asset_name,
filter_name=asset_filter_name,
parameters=AssetFilter(
# In this sample, we are going to filter the manifest by the time range of the presentation using the default timescale.
# You can adjust these settings for your own needs. Not that you can also control output tracks, and quality levels with a filter.
tracks=[],
# start_timestamp = 100000000 and end_timestamp = 300000000 using the default timescale will generate
# a play-list that contains fragments from between 10 seconds and 30 seconds of the VoD presentation.
# If a fragment straddles the boundary, the entire fragment will be included in the manifest.
presentation_time_range=PresentationTimeRange(start_timestamp=100000000, end_timestamp=300000000)
)
)
if asset_filter:
print(f"The asset filter ({asset_filter_name}) was successfully created.")
print()
else:
raise ValueError("There was an issue creating the asset filter.")
if output_asset:
streaming_locator = StreamingLocator(asset_name=out_asset_name, streaming_policy_name="Predefined_DownloadAndClearStreaming",filters=list(asset_filter_name.split(" ")))
locator = client.streaming_locators.create(
resource_group_name=resource_group,
account_name=account_name,
streaming_locator_name=locator_name,
parameters=streaming_locator
)
if locator:
print(f"The streaming locator {locator_name} was successfully created!")
else:
raise Exception(f"Error while creating streaming locator {locator_name}")
if locator.name:
hls_format = "format=m3u8-cmaf"
dash_format = "format=mpd-time-cmaf"
# Get the default streaming endpoint on the account
streaming_endpoint = client.streaming_endpoints.get(
resource_group_name=resource_group,
account_name=account_name,
streaming_endpoint_name=streaming_endpoint_name
)
if streaming_endpoint.resource_state != "Running":
print(f"Streaming endpoint is stopped. Starting endpoint named {streaming_endpoint_name}")
client.streaming_endpoints.begin_start(resource_group, account_name, streaming_endpoint_name)
basename_tup = os.path.splitext(source_file) # Extracting the filename and extension
path_extension = basename_tup[1] # Setting extension of the path
manifest_name = os.path.basename(source_file).replace(path_extension, "")
print(f"The manifest name is: {manifest_name}")
manifest_base = f"https://{streaming_endpoint.host_name}/{locator.streaming_locator_id}/{manifest_name}.ism/manifest"
hls_manifest = ""
if asset_filter_name is None:
hls_manifest = f'{manifest_base}({hls_format})'
else:
hls_manifest = f'{manifest_base}({hls_format},filter={asset_filter_name})'
print(f"The HLS (MP4) manifest URL is: {hls_manifest}")
print("Open the following URL to playback the live stream in an HLS compliant player (HLS.js, Shaka, ExoPlayer) or directly in an iOS device")
print({hls_manifest})
print()
dash_manifest = ""
if asset_filter_name is None:
dash_manifest = f'{manifest_base}({dash_format})'
else:
dash_manifest = f'{manifest_base}({dash_format},filter={asset_filter_name})'
print(f"The DASH manifest URL is: {dash_manifest}")
print("Open the following URL to playback the live stream from the LiveOutput in the Azure Media Player")
print(f"https://ampdemo.azureedge.net/?url={dash_manifest}&heuristicprofile=lowlatency")
print()
else:
raise ValueError("Locator was not created or Locator name is undefined.")
There's an example on https://amp.azure.net/libs/amp/latest/samples/dynamic_playback_speed.html for how to use playback speed. This is also available at https://github.com/Azure-Samples/azure-media-player-samples/blob/master/html/dynamic_playback_speed.html.
Related
I am using the following examples from the docusign site.
I have a set of python scripts that works well on my PC.
I have the move the code to a server behind a proxy.
I could not find any example or settings to configure a proxy.
I tired setting it in the underlining URLLIB3 code but it is being overwritten each time the AP creates class of the APIClient().
How do I set the python docusign_esign ApiClient to use a proxy?
Below is the portion of the code.
from docusign_esign import ApiClient
from docusign_esign import EnvelopesApi
from jwt_helper import get_jwt_token, get_private_key
# this one has all the connection parameters
from jwt_config import DS_JWT
import urllib3
proxy = urllib3.ProxyManager('http://<id>:<pwd>#<proxy_server>:3128/', maxsize=10)
# used by docusign to decide what you have access to
SCOPES = ["signature", "impersonation"]
# Call the envelope status change method to list the envelopes changed in the last 10 days
def worker(args):
api_client = ApiClient()
api_client.host = args['base_path']
api_client.set_default_header("Authorization", "Bearer " + args['access_token'])
envelope_api = EnvelopesApi(api_client)
# The Envelopes::listStatusChanges method has many options
# The list status changes call requires at least a from_date OR
# a set of envelopeIds. Here we filter using a from_date.
# Here we set the from_date to filter envelopes for the last month
# Use ISO 8601 date format
from_date = (datetime.datetime.utcnow() - timedelta(days=120)).isoformat()
results = envelope_api.list_status_changes(args['account_id'], from_date=from_date)
return results, envelope_api
# Call request_jwt_user_token method
def get_token(private_key, api_client):
token_response = get_jwt_token(private_key, SCOPES, DS_JWT["authorization_server"],
DS_JWT["ds_client_id"], DS_JWT["ds_impersonated_user_id"])
access_token = token_response.access_token
# Save API account ID
user_info = api_client.get_user_info(access_token)
accounts = user_info.get_accounts()
api_account_id = accounts[0].account_id
base_path = accounts[0].base_uri + "/restapi"
return {"access_token": access_token, "api_account_id": api_account_id, "base_path":
base_path}
# bucket to keep track of token info
def get_args(api_account_id, access_token, base_path):
args = {
"account_id": api_account_id,
"base_path": base_path,
"access_token": access_token
}
return args
# start the actual code here create and then setup the object
api_client = ApiClient()
api_client.set_base_path(DS_JWT["authorization_server"])
api_client.set_oauth_host_name(DS_JWT["authorization_server"])
api_client.rest_client.pool_manager.proxy = proxy
api_client.rest_client.pool_manager.proxy.scheme = "http"
private_key = get_private_key(DS_JWT["private_key_file"]).encode("ascii").decode("utf-8")
jwt_values = get_token(private_key, api_client)
args = get_args(jwt_values["api_account_id"], jwt_values["access_token"], jwt_values["base_path"])
account_id = args["account_id"]
# return the envelope list and api_client object created to get it
results, envelope_api = worker(args)
print("We found " + str(results.result_set_size) + " sets of files")
for envelope in results.envelopes:
envelope_id = envelope.envelope_id
print("Extracting " + envelope_id)
# The SDK always stores the received file as a temp file you can not set the path for this
# Call the envelope get method
temp_file = envelope_api.get_document(account_id=account_id, document_id="archive",
envelope_id=envelope_id)
if temp_file:
print("File is here " + temp_file)
with zipfile.ZipFile(temp_file, 'r') as zip_ref:
zip_ref.extractall(extract_dir + envelope_id + "\\")
zip_ref.close()
print("Done extracting " + envelope_id + " deleting zip file")
os.remove(temp_file)
print("Deleted file here " + temp_file)
else:
print("Failed to get data for " + envelope_id)
I am trying the example from the Google repo:
https://github.com/googleapis/python-documentai/blob/HEAD/samples/snippets/quickstart_sample.py
I have an error:
metadata=[('x-goog-request-params', 'name=projects/my_proj_id/locations/us/processors/my_processor_id'), ('x-goog-api-client', 'gl-python/3.8.10 grpc/1.38.1 gax/1.30.0 gapic/1.0.0')]), last exception: 503 DNS resolution failed for service: https://us-documentai.googleapis.com/v1/
My full code:
from google.cloud import documentai_v1 as documentai
import os
# TODO(developer): Uncomment these variables before running the sample.
project_id= '123456789'
location = 'us' # Format is 'us' or 'eu'
processor_id = '1a23345gh823892' # Create processor in Cloud Console
file_path = 'document.jpg'
os.environ['GRPC_DNS_RESOLVER'] = 'native'
def quickstart(project_id: str, location: str, processor_id: str, file_path: str):
# You must set the api_endpoint if you use a location other than 'us', e.g.:
opts = {}
if location == "eu":
opts = {"api_endpoint": "eu-documentai.googleapis.com"}
client = documentai.DocumentProcessorServiceClient(client_options=opts)
# The full resource name of the processor, e.g.:
# projects/project-id/locations/location/processor/processor-id
# You must create new processors in the Cloud Console first
name = f"projects/{project_id}/locations/{location}/processors/{processor_id}:process"
# Read the file into memory
with open(file_path, "rb") as image:
image_content = image.read()
document = {"content": image_content, "mime_type": "image/jpeg"}
# Configure the process request
request = {"name": name, "raw_document": document}
result = client.process_document(request=request)
document = result.document
document_pages = document.pages
# For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document
# Read the text recognition output from the processor
print("The document contains the following paragraphs:")
for page in document_pages:
paragraphs = page.paragraphs
for paragraph in paragraphs:
print(paragraph)
paragraph_text = get_text(paragraph.layout, document)
print(f"Paragraph text: {paragraph_text}")
def get_text(doc_element: dict, document: dict):
"""
Document AI identifies form fields by their offsets
in document text. This function converts offsets
to text snippets.
"""
response = ""
# If a text segment spans several lines, it will
# be stored in different text segments.
for segment in doc_element.text_anchor.text_segments:
start_index = (
int(segment.start_index)
if segment in doc_element.text_anchor.text_segments
else 0
)
end_index = int(segment.end_index)
response += document.text[start_index:end_index]
return response
def main ():
quickstart (project_id = project_id, location = location, processor_id = processor_id, file_path = file_path)
if __name__ == '__main__':
main ()
FYI, on the Google Cloud website it stated that the endpoint is:
https://us-documentai.googleapis.com/v1/projects/123456789/locations/us/processors/1a23345gh823892:process
I can use the web interface to run DocumentAI so it is working. I just have the problem with Python code.
Any suggestion is appreciated.
I would suspect the GRPC_DNS_RESOLVER environment variable to be the root cause. Did you try with the corresponding line commented out? Why was it added in your code?
I have a setup where I need to extract data from Elasticsearch and store it on an Azure Blob. Now to get the data I am using Elasticsearch's _search and _scroll API. The indexes are pretty well designed and are formatted something like game1.*, game2.*, game3.* etc.
I've created a worker.py file which I stored in a folder called shared_code as Microsoft suggests and I have several Timer Trigger Functions which import and call worker.py. Due to the way ES was setup on our side I had to create a VNET and a static Outbound IP address which we've then whitelisted on ES. Conversely, the data is only available to be extracted from ES only on port 9200. So I've created an Azure Function App which has the connection setup and I am trying to create multiple Functions (game1-worker, game2-worker, game3-worker) to pull the data from ES running in parallel on minute 5. I've noticed if I add the FUNCTIONS_WORKER_PROCESS_COUNT = 1 setting then the functions will wait until the first triggered one finishes its task and then the second one triggers. If I don't add this app setting or increase the number, then once a function stopped because it finished working, it will try to start it again and then I get a OSError: [WinError 10048] Only one usage of each socket address (protocol/network address/port) is normally permitted error. Is there a way I can make these run in parallel but not have the mentioned error?
Here is the code for the worker.py:
#!/usr/bin/env python
# coding: utf-8
# # Elasticsearch to Azure Microservice
import json, datetime, gzip, importlib, os, re, logging
from elasticsearch import Elasticsearch
import azure.storage.blob as azsb
import azure.identity as azi
import os
import tempfile
def batch(game_name, env='prod'):
# #### Global Variables
env = env.lower()
connection_string = os.getenv('conn_storage')
lowerFormat = game_name.lower().replace(" ","_")
azFormat = re.sub(r'[^0-9a-zA-Z]+', '-', game_name).lower()
storageContainerName = azFormat
stateStorageContainerName = "azure-webjobs-state"
minutesOffset = 5
tempFilePath = tempfile.gettempdir()
curFileName = f"{lowerFormat}_cursor.py"
curTempFilePath = os.path.join(tempFilePath,curFileName)
curBlobFilePath = f"cursors/{curFileName}"
esUrl = os.getenv('esUrl')
# #### Connections
es = Elasticsearch(
esUrl,
port=9200,
timeout=300)
def uploadJsonGzipBlob(filePathAndName, jsonBody):
blob = azsb.BlobClient.from_connection_string(
conn_str=connection_string,
container_name=storageContainerName,
blob_name=filePathAndName
)
blob.upload_blob(gzip.compress(bytes(json.dumps(jsonBody), encoding='utf-8')))
def getAndLoadCursor(filePathAndName):
# Get cursor from blob
blob = azsb.BlobClient.from_connection_string(
conn_str=os.getenv('AzureWebJobsStorage'),
container_name=stateStorageContainerName,
blob_name=filePathAndName
)
# Stream it to Temp file
with open(curTempFilePath, "wb") as f:
data = blob.download_blob()
data.readinto(f)
# Load it by path
spec = importlib.util.spec_from_file_location("cursor", curTempFilePath)
cur = importlib.util.module_from_spec(spec)
spec.loader.exec_module(cur)
return cur
def writeCursor(filePathAndName, body):
blob = azsb.BlobClient.from_connection_string(
conn_str=os.getenv('AzureWebJobsStorage'),
container_name=stateStorageContainerName,
blob_name=filePathAndName
)
blob.upload_blob(body, overwrite=True)
# Parameter and state settings
if os.getenv(f"{lowerFormat}_maxSizeMB") is None:
maxSizeMB = 10 # Default to 10 MB
else:
maxSizeMB = int(os.getenv(f"{lowerFormat}_maxSizeMB"))
if os.getenv(f"{lowerFormat}_maxProcessTimeSeconds") is None:
maxProcessTimeSeconds = 300 # Default to 300 seconds
else:
maxProcessTimeSeconds = int(os.getenv(f"{lowerFormat}_maxProcessTimeSeconds"))
try:
cur = getAndLoadCursor(curBlobFilePath)
except Exception as e:
dtStr = f"{datetime.datetime.utcnow():%Y/%m/%d %H:%M:00}"
writeCursor(curBlobFilePath, f"# Please use format YYYY/MM/DD HH24:MI:SS\nlastPolled = '{dtStr}'")
logging.info(f"No cursor file. Generated {curFileName} file with date {dtStr}")
return 0
# # Scrolling and Batching Engine
lastRowDateOffset = cur.lastPolled
nrFilesThisInstance = 0
while 1:
# Offset the current time by -5 minutes to account for the 2-3 min delay in Elasticsearch
initTime = datetime.datetime.utcnow()
## Filter lt (less than) endDate to avoid infinite loops.
## Filter lt manually when compiling historical based on
endDate = initTime-datetime.timedelta(minutes=minutesOffset)
endDate = f"{endDate:%Y/%m/%d %H:%M:%S}"
doc = {
"query": {
"range": {
"baseCtx.date": {
"gt": lastRowDateOffset,
"lt": endDate
}
}
}
}
Index = lowerFormat + ".*"
if env == 'dev': Index = 'dev.' + Index
if nrFilesThisInstance == 0:
page = es.search(
index = Index,
sort = "baseCtx.date:asc",
scroll = "2m",
size = 10000,
body = doc
)
else:
page = es.scroll(scroll_id = sid, scroll = "10m")
pageSize = len(page["hits"]["hits"])
data = page["hits"]["hits"]
sid = page["_scroll_id"]
totalSize = page["hits"]["total"]
print(f"Total Size: {totalSize}")
cnt = 0
# totalSize might be flawed as it returns at times an integer > 0 but array is empty
# To overcome this, I've added the below check for the array size instead
if pageSize == 0: break
while 1:
cnt += 1
page = es.scroll(scroll_id = sid, scroll = "10m")
pageSize = len(page["hits"]["hits"])
sid = page["_scroll_id"]
data += page["hits"]["hits"]
sizeMB = len(gzip.compress(bytes(json.dumps(data), encoding='utf-8'))) / (1024**2)
loopTime = datetime.datetime.utcnow()
processTimeSeconds = (loopTime-initTime).seconds
print(f"{cnt} Results pulled: {pageSize} -- Cumulative Results: {len(data)} -- Gzip Size MB: {sizeMB} -- processTimeSeconds: {processTimeSeconds} -- pageSize: {pageSize} -- startDate: {lastRowDateOffset} -- endDate: {endDate}")
if sizeMB > maxSizeMB: break
if processTimeSeconds > maxProcessTimeSeconds: break
if pageSize < 10000: break
lastRowDateOffset = max([x['_source']['baseCtx']['date'] for x in data])
lastRowDateOffsetDT = datetime.datetime.strptime(lastRowDateOffset, '%Y/%m/%d %H:%M:%S')
outFile = f"elasticsearch/live/{lastRowDateOffsetDT:%Y/%m/%d/%H}/{lowerFormat}_live_{lastRowDateOffsetDT:%Y%m%d%H%M%S}.json.gz"
uploadJsonGzipBlob(outFile, data)
writeCursor(curBlobFilePath, f"# Please use format YYYY/MM/DD HH24:MI:SS\nlastPolled = '{lastRowDateOffset}'")
nrFilesThisInstance += 1
logging.info(f"File compiled: {outFile} -- {sizeMB} MB\n")
# If the while loop ran for more than maxProcessTimeSeconds then end it
if processTimeSeconds > maxProcessTimeSeconds: break
if pageSize < 10000: break
logging.info(f"Closing Connection to {esUrl}")
es.close()
return 0
And these are 2 of the timing triggers I am calling:
game1-worker
import logging
import datetime
import azure.functions as func
#from shared_code import worker
import importlib
def main(mytimer: func.TimerRequest) -> None:
utc_timestamp = datetime.datetime.utcnow().replace(
tzinfo=datetime.timezone.utc).isoformat()
if mytimer.past_due:
logging.info('The timer is past due!')
# Load a new instance of worker.py
spec = importlib.util.spec_from_file_location("worker", "shared_code/worker.py")
worker = importlib.util.module_from_spec(spec)
spec.loader.exec_module(worker)
worker.batch('game1name')
logging.info('Python timer trigger function ran at %s', utc_timestamp)
game2-worker
import logging
import datetime
import azure.functions as func
#from shared_code import worker
import importlib
def main(mytimer: func.TimerRequest) -> None:
utc_timestamp = datetime.datetime.utcnow().replace(
tzinfo=datetime.timezone.utc).isoformat()
if mytimer.past_due:
logging.info('The timer is past due!')
# Load a new instance of worker.py
spec = importlib.util.spec_from_file_location("worker", "shared_code/worker.py")
worker = importlib.util.module_from_spec(spec)
spec.loader.exec_module(worker)
worker.batch('game2name')
logging.info('Python timer trigger function ran at %s', utc_timestamp)
TL;DR
Based on what you described, multiple worker-processes share underlying runtime's resources (sockets).
For your usecase you just need to leave FUNCTIONS_WORKER_PROCESS_COUNT at 1. Default value is supposed to be 1, so not specifying it should mean the same as setting it to 1.
You need to understand how Azure Functions scale. It is very unnatural/confusing.
Assumes Consumption Plan.
Coding: You write Functions. Say F1 an F2. How you organize is up to you.
Provisioning:
You create a Function App.
You deploy F1 and F2 to this App.
You start the App. (not function).
Runtime:
At start
Azure spawns one Function Host. Think of this as a container/OS.
Inside the Host, one worker-process is created. This worker-process will host one instance of App.
If you change FUNCTIONS_WORKER_PROCESS_COUNT to say 10 then Host will spawn 10 processes and run your App inside each of them.
When a Function is triggered (function could be triggered due to timer, or REST calls or message in Q, ...)
Each worker-process is capable of servicing one request at a time. Be it a request for F1 or F2. One at a time.
Each Host is capable servicing one request per worker-process in it.
If backlog of requests grows, then Azure load balancer would trigger scale-out and create new Function Hosts.
Based on limited info, it seems like bad design to create 3 functions. You could instead create a single timer-triggered function, which sends out 3 messages to a Q (Storage Q should be more than plenty for such minuscule traffic), which in turn triggers your actual Function/implementation (which is storage Q triggered Function). Message would be something like {"game_name": "game1"}.
I have this script:
#!/usr/bin/env python3
import boto3
import argparse
import time
ec2 = boto3.resource('ec2')
dstamp = time.strftime("_%m-%d-%Y-0")
parser = argparse.ArgumentParser(description='Create Image(AMI) from Instance tag:Name Value')
parser.add_argument('names', nargs='+', type=str.upper, help='instance name or list of names to create images from')
args = parser.parse_args()
# List Source Instances for Image/Tag Creation
for instance in ec2.instances.all():
# Pull Name tags from source instances
for name in instance.tags:
if name["Key"] == 'Name':
instancename = name["Value"]
# Check for Name Tag Match with args
for iname in args.names:
if iname == instancename:
# Create an image if we have a match
image = instance.create_image(
Description=f"Created from Source: {instance.id} Name: {instancename}",
Name=instancename + dstamp,
NoReboot=True)
print('New: {} from instance.id: {} {}'.format(image.id, instance.id, instancename))
# ----------------------------------------------
# Can't copy tags from src instance - cause of auto-generated by Cloudformation Tags
# error I got: "Tag keys starting with 'aws:' are reserved for internal use"
# So we skip any tag [Key] named 'aws:'
# ----------------------------------------------
for tag in instance.tags:
dst_tags = []
if tag['Key'].startswith('aws:'):
print("Skip tag that starts with 'aws:' " + tag['Key'])
else:
dst_tags.append(tag)
print(' Tags:', dst_tags)
image.create_tags(Tags=dst_tags)
This is working perfectly, but the final function I am missing is to apply the tags to the underlying volume snapshots within the newly created image. Do I have to totally switch to client = boto3.client('ec2') in order to tag my volume snapshots?
To put it another way - how are people who are using images for backup tagging their volume snapshots?
I have been working with boto3 and python 3 for all of 3 weeks along with my regular duties, any help would be appreciated.
So I am trying my best to navigate my way through the Facebook API. I need to crate a script that will download my business' campaign information daily as a csv file so I can use another script to upload the information to our database easily.
I finally have code that works to print the information to the log, but I am reaching the user request limit because I have to call get_insights() for every single campaign individually. I am wondering if anyone knows how to help me make it so I don't have to call the facebook API as often.
What I would like to do if find a field where I can get the daily spend so I don't have to call the API in every iteration of my for campaign loop, but I cannot for the life of me find a way to do so.
#Import all the facebook mumbo jumbo
from facebookads.api import FacebookAdsApi
from facebookads.adobjects.adset import AdSet
from facebookads.adobjects.campaign import Campaign
from facebookads.adobjects.adsinsights import AdsInsights
from facebookads.adobjects.adreportrun import AdReportRun
from facebookads.adobjects.adaccount import AdAccount
from facebookads.adobjects.business import Business
import time
#Set the login info
my_app_id = '****'
my_app_secret = '****'
my_access_token = '****'
#Start the connection to the facebook API
FacebookAdsApi.init(my_app_id, my_app_secret, my_access_token)
business = Business('****')
#Get all ad accounts on the business account
accounts = business.get_owned_ad_accounts(fields=[AdAccount.Field.id])
#iterate through all accounts in the business account
for account in accounts:
tempaccount = AdAccount(account[AdAccount.Field.id])
#get all campaigns in the adaccount
campaigns = tempaccount.get_campaigns(fields=[Campaign.Field.name,Campaign.Field])
#iterate trough all the campaigns in the adaccount
for campaign in campaigns:
print(campaign[Campaign.Field.name])
#get the insight info (spend) from each campaign
campaignsights = campaign.get_insights(params={'date_preset':'yesterday'},fields=[AdsInsights.Field.spend])
print (campaignsights)
It took a while of digging through the API and guessing but I got it! Here is my final script:
# This program downloads all relevent Facebook traffic info as a csv file
# This program requires info from the Facebook Ads API: https://github.com/facebook/facebook-python-ads-sdk
# Import all the facebook mumbo jumbo
from facebookads.api import FacebookAdsApi
from facebookads.adobjects.adsinsights import AdsInsights
from facebookads.adobjects.adaccount import AdAccount
from facebookads.adobjects.business import Business
# Import th csv writer and the date/time function
import datetime
import csv
# Set the info to get connected to the API. Do NOT share this info
my_app_id = '****'
my_app_secret = '****'
my_access_token = '****'
# Start the connection to the facebook API
FacebookAdsApi.init(my_app_id, my_app_secret, my_access_token)
# Create a business object for the business account
business = Business('****')
# Get yesterday's date for the filename, and the csv data
yesterdaybad = datetime.datetime.now() - datetime.timedelta(days=1)
yesterdayslash = yesterdaybad.strftime('%m/%d/%Y')
yesterdayhyphen = yesterdaybad.strftime('%m-%d-%Y')
# Define the destination filename
filename = yesterdayhyphen + '_fb.csv'
filelocation = "/cron/downloads/"+ filename
# Get all ad accounts on the business account
accounts = business.get_owned_ad_accounts(fields=[AdAccount.Field.id])
# Open or create new file
try:
csvfile = open(filelocation , 'w+', 0777)
except:
print ("Cannot open file.")
# To keep track of rows added to file
rows = 0
try:
# Create file writer
filewriter = csv.writer(csvfile, delimiter=',')
except Exception as err:
print(err)
# Iterate through the adaccounts
for account in accounts:
# Create an addaccount object from the adaccount id to make it possible to get insights
tempaccount = AdAccount(account[AdAccount.Field.id])
# Grab insight info for all ads in the adaccount
ads = tempaccount.get_insights(params={'date_preset':'yesterday',
'level':'ad'
},
fields=[AdsInsights.Field.account_id,
AdsInsights.Field.account_name,
AdsInsights.Field.ad_id,
AdsInsights.Field.ad_name,
AdsInsights.Field.adset_id,
AdsInsights.Field.adset_name,
AdsInsights.Field.campaign_id,
AdsInsights.Field.campaign_name,
AdsInsights.Field.cost_per_outbound_click,
AdsInsights.Field.outbound_clicks,
AdsInsights.Field.spend
]
);
# Iterate through all accounts in the business account
for ad in ads:
# Set default values in case the insight info is empty
date = yesterdayslash
accountid = ad[AdsInsights.Field.account_id]
accountname = ""
adid = ""
adname = ""
adsetid = ""
adsetname = ""
campaignid = ""
campaignname = ""
costperoutboundclick = ""
outboundclicks = ""
spend = ""
# Set values from insight data
if ('account_id' in ad) :
accountid = ad[AdsInsights.Field.account_id]
if ('account_name' in ad) :
accountname = ad[AdsInsights.Field.account_name]
if ('ad_id' in ad) :
adid = ad[AdsInsights.Field.ad_id]
if ('ad_name' in ad) :
adname = ad[AdsInsights.Field.ad_name]
if ('adset_id' in ad) :
adsetid = ad[AdsInsights.Field.adset_id]
if ('adset_name' in ad) :
adsetname = ad[AdsInsights.Field.adset_name]
if ('campaign_id' in ad) :
campaignid = ad[AdsInsights.Field.campaign_id]
if ('campaign_name' in ad) :
campaignname = ad[AdsInsights.Field.campaign_name]
if ('cost_per_outbound_click' in ad) : # This is stored strangely, takes a few steps to break through the layers
costperoutboundclicklist = ad[AdsInsights.Field.cost_per_outbound_click]
costperoutboundclickdict = costperoutboundclicklist[0]
costperoutboundclick = costperoutboundclickdict.get('value')
if ('outbound_clicks' in ad) : # This is stored strangely, takes a few steps to break through the layers
outboundclickslist = ad[AdsInsights.Field.outbound_clicks]
outboundclicksdict = outboundclickslist[0]
outboundclicks = outboundclicksdict.get('value')
if ('spend' in ad) :
spend = ad[AdsInsights.Field.spend]
# Write all ad info to the file, and increment the number of rows that will display
filewriter.writerow([date, accountid, accountname, adid, adname, adsetid, adsetname, campaignid, campaignname, costperoutboundclick, outboundclicks, spend])
rows += 1
csvfile.close()
# Print report
print (str(rows) + " rows added to the file " + filename)
I then have a php script that takes the csv file and uploads it to my database. The key is pulling all the insight data in one big yank. You can then break it up however you want because each ad has information about its adset, adaccount, and campaign.
Adding a couple of small functions to improve on LucyTurtle's answer as it is still susceptible to Facebook's Rate Limiting
import logging
import requests as rq
#Function to find the string between two strings or characters
def find_between( s, first, last ):
try:
start = s.index( first ) + len( first )
end = s.index( last, start )
return s[start:end]
except ValueError:
return ""
#Function to check how close you are to the FB Rate Limit
def check_limit():
def check_limit():
check=rq.get('https://graph.facebook.com/v3.3/act_'+account_number+'/insights?access_token='+my_access_token)
call=float(find_between(check.headers['x-business-use-case-usage'],'call_count":','}'))
cpu=float(find_between(check.headers['x-business-use-case-usage'],'total_cputime":','}'))
total=float(find_between(check.headers['x-business-use-case-usage'],'total_time":',','))
usage=max(call,cpu,total)
return usage
#Check if you reached 75% of the limit, if yes then back-off for 5 minutes (put this chunk in your 'for ad is ads' loop, every 100-200 iterations)
if (check_limit()>75):
print('75% Rate Limit Reached. Cooling Time 5 Minutes.')
logging.debug('75% Rate Limit Reached. Cooling Time 5 Minutes.')
time.sleep(300)
I'd just like to say
Thank you.
As Marks Andre said - you made my day!
The FB SDK documentation is exhaustive, but it completely lacks the practical implementation examples for day-to-day-tasks like this one. Bookmark is set - page will be revisited soon.
So the only thing I can actually contribute for fellow sufferers: it seems that with the newer facebook_business SDK you can simply completely replace "facebookads" in the import statements with "facebook_business".