monitor function name error using simpy - python

very new to python and trying to use a simpy script I found online to queue times. I am getting a name error when I use "Monitor" - which I thought was part of SimPy. Is there somewhere else I should be importing monitor from?
Thanks in advance for the help!
See below:
#!/usr/bin/env python
from __future__ import generators
import simpy
from multiprocessing import Queue, Process
from random import Random,expovariate,uniform
# MMC.py simulation of an M/M/c/FCFS/inft/infty queue
# 2004 Dec updated and simplified
# $Revision: 1.1.1.5 $ $Author: kgmuller $ $Date: 2006/02/02 13:35:45 $
"""Simulation of an M/M/c queue
Jobs arrive at random into a c-server queue with
exponential service-time distribution. Simulate to
determine the average number and the average time
in the system.
- c = Number of servers = 3
- rate = Arrival rate = 2.0
- stime = mean service time = 1.0
"""
__version__='\nModel: MMC queue'
class Generator(Process):
""" generates Jobs at random """
def execute(self,maxNumber,rate,stime):
##print "%7.4f %s starts"%(now(), self.name)
for i in range(maxNumber):
L = Job("Job "+`i`)
activate(L,L.execute(stime),delay=0)
yield hold,self,grv.expovariate(rate)
class Job(Process):
""" Jobs request a gatekeeper and hold it for an exponential time """
def execute(self,stime):
global NoInSystem
arrTime=now()
self.trace("Hello World")
NoInSystem +=1
m.accum(NoInSystem)
yield request,self,server
self.trace("At last ")
t = jrv.expovariate(1.0/stime)
msT.tally(t)
yield hold,self,t
yield release,self,server
NoInSystem -=1
m.accum(NoInSystem)
mT.tally(now()-arrTime)
self.trace("Geronimo ")
def trace(self,message):
if TRACING:
print "%7.4f %6s %10s (%2d)"%(now(),self.name,message,NoInSystem)
TRACING = 0
print __version__
c = 3
stime = 1.0
rate = 2.0
print "%2d servers, %6.4f arrival rate,%6.4f mean service time"%(c,rate,stime)
grv = Random(333555) # RV for Source
jrv = Random(777999) # RV for Job
NoInSystem = 0
m=Monitor()
mT=Monitor()
msT=Monitor()
server=Resource(c,name='Gatekeeper')
initialize()
g = Generator('gen')
activate(g,g.execute(maxNumber=10,rate=rate,stime=stime),delay=0)
simulate(until=3000.0)
print "Average number in the system is %6.4f"%(m.timeAverage(),)
print "Average time in the system is %6.4f"%(mT.mean(),)
print "Actual average service-time is %6.4f"%(msT.mean(),)

You are currently receiving a name error because Monitor hasn't currently been defined within your script. In order to use the Monitor from within simpy, you will need to either change import simpy to from simpy import Monitor or append simpy.Monitor for the locations that you are currently using the Monitor function.
Ex:
#!/usr/bin/env python
from __future__ import generators
from simpy import Monitor
Or (lines 71-73):
m=simpy.Monitor()
mT=simpy.Monitor()
msT=simpy.Monitor()

Related

How to run multiple Azure Functions in parallel which scroll through Elasticsearch?

I have a setup where I need to extract data from Elasticsearch and store it on an Azure Blob. Now to get the data I am using Elasticsearch's _search and _scroll API. The indexes are pretty well designed and are formatted something like game1.*, game2.*, game3.* etc.
I've created a worker.py file which I stored in a folder called shared_code as Microsoft suggests and I have several Timer Trigger Functions which import and call worker.py. Due to the way ES was setup on our side I had to create a VNET and a static Outbound IP address which we've then whitelisted on ES. Conversely, the data is only available to be extracted from ES only on port 9200. So I've created an Azure Function App which has the connection setup and I am trying to create multiple Functions (game1-worker, game2-worker, game3-worker) to pull the data from ES running in parallel on minute 5. I've noticed if I add the FUNCTIONS_WORKER_PROCESS_COUNT = 1 setting then the functions will wait until the first triggered one finishes its task and then the second one triggers. If I don't add this app setting or increase the number, then once a function stopped because it finished working, it will try to start it again and then I get a OSError: [WinError 10048] Only one usage of each socket address (protocol/network address/port) is normally permitted error. Is there a way I can make these run in parallel but not have the mentioned error?
Here is the code for the worker.py:
#!/usr/bin/env python
# coding: utf-8
# # Elasticsearch to Azure Microservice
import json, datetime, gzip, importlib, os, re, logging
from elasticsearch import Elasticsearch
import azure.storage.blob as azsb
import azure.identity as azi
import os
import tempfile
def batch(game_name, env='prod'):
# #### Global Variables
env = env.lower()
connection_string = os.getenv('conn_storage')
lowerFormat = game_name.lower().replace(" ","_")
azFormat = re.sub(r'[^0-9a-zA-Z]+', '-', game_name).lower()
storageContainerName = azFormat
stateStorageContainerName = "azure-webjobs-state"
minutesOffset = 5
tempFilePath = tempfile.gettempdir()
curFileName = f"{lowerFormat}_cursor.py"
curTempFilePath = os.path.join(tempFilePath,curFileName)
curBlobFilePath = f"cursors/{curFileName}"
esUrl = os.getenv('esUrl')
# #### Connections
es = Elasticsearch(
esUrl,
port=9200,
timeout=300)
def uploadJsonGzipBlob(filePathAndName, jsonBody):
blob = azsb.BlobClient.from_connection_string(
conn_str=connection_string,
container_name=storageContainerName,
blob_name=filePathAndName
)
blob.upload_blob(gzip.compress(bytes(json.dumps(jsonBody), encoding='utf-8')))
def getAndLoadCursor(filePathAndName):
# Get cursor from blob
blob = azsb.BlobClient.from_connection_string(
conn_str=os.getenv('AzureWebJobsStorage'),
container_name=stateStorageContainerName,
blob_name=filePathAndName
)
# Stream it to Temp file
with open(curTempFilePath, "wb") as f:
data = blob.download_blob()
data.readinto(f)
# Load it by path
spec = importlib.util.spec_from_file_location("cursor", curTempFilePath)
cur = importlib.util.module_from_spec(spec)
spec.loader.exec_module(cur)
return cur
def writeCursor(filePathAndName, body):
blob = azsb.BlobClient.from_connection_string(
conn_str=os.getenv('AzureWebJobsStorage'),
container_name=stateStorageContainerName,
blob_name=filePathAndName
)
blob.upload_blob(body, overwrite=True)
# Parameter and state settings
if os.getenv(f"{lowerFormat}_maxSizeMB") is None:
maxSizeMB = 10 # Default to 10 MB
else:
maxSizeMB = int(os.getenv(f"{lowerFormat}_maxSizeMB"))
if os.getenv(f"{lowerFormat}_maxProcessTimeSeconds") is None:
maxProcessTimeSeconds = 300 # Default to 300 seconds
else:
maxProcessTimeSeconds = int(os.getenv(f"{lowerFormat}_maxProcessTimeSeconds"))
try:
cur = getAndLoadCursor(curBlobFilePath)
except Exception as e:
dtStr = f"{datetime.datetime.utcnow():%Y/%m/%d %H:%M:00}"
writeCursor(curBlobFilePath, f"# Please use format YYYY/MM/DD HH24:MI:SS\nlastPolled = '{dtStr}'")
logging.info(f"No cursor file. Generated {curFileName} file with date {dtStr}")
return 0
# # Scrolling and Batching Engine
lastRowDateOffset = cur.lastPolled
nrFilesThisInstance = 0
while 1:
# Offset the current time by -5 minutes to account for the 2-3 min delay in Elasticsearch
initTime = datetime.datetime.utcnow()
## Filter lt (less than) endDate to avoid infinite loops.
## Filter lt manually when compiling historical based on
endDate = initTime-datetime.timedelta(minutes=minutesOffset)
endDate = f"{endDate:%Y/%m/%d %H:%M:%S}"
doc = {
"query": {
"range": {
"baseCtx.date": {
"gt": lastRowDateOffset,
"lt": endDate
}
}
}
}
Index = lowerFormat + ".*"
if env == 'dev': Index = 'dev.' + Index
if nrFilesThisInstance == 0:
page = es.search(
index = Index,
sort = "baseCtx.date:asc",
scroll = "2m",
size = 10000,
body = doc
)
else:
page = es.scroll(scroll_id = sid, scroll = "10m")
pageSize = len(page["hits"]["hits"])
data = page["hits"]["hits"]
sid = page["_scroll_id"]
totalSize = page["hits"]["total"]
print(f"Total Size: {totalSize}")
cnt = 0
# totalSize might be flawed as it returns at times an integer > 0 but array is empty
# To overcome this, I've added the below check for the array size instead
if pageSize == 0: break
while 1:
cnt += 1
page = es.scroll(scroll_id = sid, scroll = "10m")
pageSize = len(page["hits"]["hits"])
sid = page["_scroll_id"]
data += page["hits"]["hits"]
sizeMB = len(gzip.compress(bytes(json.dumps(data), encoding='utf-8'))) / (1024**2)
loopTime = datetime.datetime.utcnow()
processTimeSeconds = (loopTime-initTime).seconds
print(f"{cnt} Results pulled: {pageSize} -- Cumulative Results: {len(data)} -- Gzip Size MB: {sizeMB} -- processTimeSeconds: {processTimeSeconds} -- pageSize: {pageSize} -- startDate: {lastRowDateOffset} -- endDate: {endDate}")
if sizeMB > maxSizeMB: break
if processTimeSeconds > maxProcessTimeSeconds: break
if pageSize < 10000: break
lastRowDateOffset = max([x['_source']['baseCtx']['date'] for x in data])
lastRowDateOffsetDT = datetime.datetime.strptime(lastRowDateOffset, '%Y/%m/%d %H:%M:%S')
outFile = f"elasticsearch/live/{lastRowDateOffsetDT:%Y/%m/%d/%H}/{lowerFormat}_live_{lastRowDateOffsetDT:%Y%m%d%H%M%S}.json.gz"
uploadJsonGzipBlob(outFile, data)
writeCursor(curBlobFilePath, f"# Please use format YYYY/MM/DD HH24:MI:SS\nlastPolled = '{lastRowDateOffset}'")
nrFilesThisInstance += 1
logging.info(f"File compiled: {outFile} -- {sizeMB} MB\n")
# If the while loop ran for more than maxProcessTimeSeconds then end it
if processTimeSeconds > maxProcessTimeSeconds: break
if pageSize < 10000: break
logging.info(f"Closing Connection to {esUrl}")
es.close()
return 0
And these are 2 of the timing triggers I am calling:
game1-worker
import logging
import datetime
import azure.functions as func
#from shared_code import worker
import importlib
def main(mytimer: func.TimerRequest) -> None:
utc_timestamp = datetime.datetime.utcnow().replace(
tzinfo=datetime.timezone.utc).isoformat()
if mytimer.past_due:
logging.info('The timer is past due!')
# Load a new instance of worker.py
spec = importlib.util.spec_from_file_location("worker", "shared_code/worker.py")
worker = importlib.util.module_from_spec(spec)
spec.loader.exec_module(worker)
worker.batch('game1name')
logging.info('Python timer trigger function ran at %s', utc_timestamp)
game2-worker
import logging
import datetime
import azure.functions as func
#from shared_code import worker
import importlib
def main(mytimer: func.TimerRequest) -> None:
utc_timestamp = datetime.datetime.utcnow().replace(
tzinfo=datetime.timezone.utc).isoformat()
if mytimer.past_due:
logging.info('The timer is past due!')
# Load a new instance of worker.py
spec = importlib.util.spec_from_file_location("worker", "shared_code/worker.py")
worker = importlib.util.module_from_spec(spec)
spec.loader.exec_module(worker)
worker.batch('game2name')
logging.info('Python timer trigger function ran at %s', utc_timestamp)
TL;DR
Based on what you described, multiple worker-processes share underlying runtime's resources (sockets).
For your usecase you just need to leave FUNCTIONS_WORKER_PROCESS_COUNT at 1. Default value is supposed to be 1, so not specifying it should mean the same as setting it to 1.
You need to understand how Azure Functions scale. It is very unnatural/confusing.
Assumes Consumption Plan.
Coding: You write Functions. Say F1 an F2. How you organize is up to you.
Provisioning:
You create a Function App.
You deploy F1 and F2 to this App.
You start the App. (not function).
Runtime:
At start
Azure spawns one Function Host. Think of this as a container/OS.
Inside the Host, one worker-process is created. This worker-process will host one instance of App.
If you change FUNCTIONS_WORKER_PROCESS_COUNT to say 10 then Host will spawn 10 processes and run your App inside each of them.
When a Function is triggered (function could be triggered due to timer, or REST calls or message in Q, ...)
Each worker-process is capable of servicing one request at a time. Be it a request for F1 or F2. One at a time.
Each Host is capable servicing one request per worker-process in it.
If backlog of requests grows, then Azure load balancer would trigger scale-out and create new Function Hosts.
Based on limited info, it seems like bad design to create 3 functions. You could instead create a single timer-triggered function, which sends out 3 messages to a Q (Storage Q should be more than plenty for such minuscule traffic), which in turn triggers your actual Function/implementation (which is storage Q triggered Function). Message would be something like {"game_name": "game1"}.

Trying to add throttle control to paralleled API calls in python

I am using Google places API which has a query per second limit of 10. This means I cannot make more than 10 requests within a second. If we were using Serial execution this wouldn't be an issue as the APIs avg response time is 250 ms, so i will be able to make just 4 calls in a second.
To utilize the entire 10 QPS limit i used multithreading and made parallel API calls. But now i need to control the number of calls that can happen in a second, it should not go beyond 10 (google API starts throwing errors if i cross the limit)
Below is the code that i have so far, I am not able to figure out why the program just gets stuck sometimes or takes alot longer than required.
import time
from datetime import datetime
import random
from threading import Lock
from concurrent.futures import ThreadPoolExecutor as pool
import concurrent.futures
import requests
import matplotlib.pyplot as plt
from statistics import mean
from ratelimiter import RateLimiter
def make_parallel(func, qps=10):
lock = Lock()
threads_execution_que = []
limit_hit = False
def qps_manager(arg):
current_second = time.time()
lock.acquire()
if len(threads_execution_que) >= qps or limit_hit:
limit_hit = True
if current_second - threads_execution_que[0] <= 1:
time.sleep(current_second - threads_execution_que[0])
current_time = time.time()
threads_execution_que.append(current_time)
lock.release()
res = func(arg)
lock.acquire()
threads_execution_que.remove(current_time)
lock.release()
return res
def wrapper(iterable, number_of_workers=12):
result = []
with pool(max_workers=number_of_workers) as executer:
bag = {executer.submit(func, i): i for i in iterable}
for future in concurrent.futures.as_completed(bag):
result.append(future.result())
return result
return wrapper
#make_parallel
def api_call(i):
min_func_time = random.uniform(.25, .3)
start_time = time.time()
try:
response = requests.get('https://jsonplaceholder.typicode.com/posts', timeout=1)
except Exception as e:
response = e
if (time.time() - start_time) - min_func_time < 0:
time.sleep(min_func_time - (time.time() - start_time))
return response
api_call([1]*50)
Ideally the code should take not more than 1.5 seconds, but currently it is taking about 12-14 seconds.
The script speeds up to its expected speed as soon as i remove the QPS manager logic.
Please do suggest what i am doing wrong and also, if there is any package available already which does this mechanism out of the box.
Looks like ratelimit does just that:
from ratelimit import limits, sleep_and_retry
#make_parallel
#sleep_and_retry
#limits(calls=10, period=1)
def api_call(i):
try:
response = requests.get("https://jsonplaceholder.typicode.com/posts", timeout=1)
except Exception as e:
response = e
return response
EDIT: I did some testing and it looks like #sleep_and_retry is a little too optimistic, so just increase the period a little, to 1.2 second:
s = datetime.now()
api_call([1] * 50)
elapsed_time = datetime.now() - s
print(elapsed_time > timedelta(seconds=50 / 10))

nidaqmx: Access an existing task

I am using the nidaqmx-python library for acquiring data. Is it possible to access an existing task, which is already defined in the NI MAX?
My solution, thanks to the tip from #nekomatic, is:
import nidaqmx
system = nidaqmx.system.System.local() # load local system
task_names = system.tasks.task_names # returns a list of task names
task = system.tasks[0] # selected the first task
loaded_task = task.load() # load the task
sent_samples = [] # list for saving acquired data
with loaded_task:
loaded_task.timing.cfg_samp_clk_timing(
rate=2560,
sample_mode=nidaqmx.constants.AcquisitionType.CONTINUOUS,
samps_per_chan=1000)
def callback(task_handle, every_n_samples_event_type,
number_of_samples, callback_data):
"""
Callback function/
"""
print('Every N Samples callback invoked.')
samples = loaded_task.read(number_of_samples_per_channel=2560)
sent_samples.extend(samples)
return 0
loaded_task.register_every_n_samples_acquired_into_buffer_event(
200, callback)
loaded_task.start()
input('Running task. Press Enter to stop.\n')

Number of vehicles each T seconds

I wrote the script below with python and i implemented it on sumo,in order to obtain the number of vehicles between two inductionLoop,every 60 seconds,in a lane.
But this one gives each second .
#!/usr/bin/env python
# -*-coding:Latin-1 -*
import os, sys
import optparse
import subprocess
import random
import threading
import time
SUMO_HOME = "/home/khadija/Téléchargements/sumo-0.25.0"
try:
sys.path.append(os.path.join(SUMO_HOME, "tools"))
from sumolib import checkBinary
except ImportError:
sys.exit("please declare environment variable 'SUMO_HOME' as the root directory of your sumo installation (it should contain folders 'bin', 'tools' and 'docs')")
import traci
routeFile="data2/cross.rou.xml"
PORT = 8873
#SIGN CONFIGURATIONS : NESW
NSgreen = "GrGr"
EWgreen = "rGrG"
PROGRAM = (NSgreen,EWgreen)
def nbr_veh_entr_indloop(i,o):
# i et j se sont les inductions loop input et output
threading.Timer(60.0, nbr_veh_entr_indloop).start()
x = traci.inductionloop.getLastStepMeanLength(i) - traci.inductionloop.getLastStepMeanLength(o)
return x
def run():
steps = open("data2/step.txt","w")
traci.init(int(PORT))
step = 0
while step < 7200 :
a = nbr_veh_entr_indloop("4i","40")
k=str(a)
print >> steps , "nombre des veh : " + k #concaténation
traci.simulationStep()
step +=1
steps.close()
traci.close()
sys.stdout.flush()
def get_options():
optParser = optparse.OptionParser()
optParser.add_option("--nogui", action="store_true",
default=False, help="run the commandline version of sumo")
options, args = optParser.parse_args()
return options
# this is the main entry point of this script
if __name__ == "__main__":
options = get_options()
# this script has been called from the command line. It will start sumo as a
# server, then connect and run
if options.nogui:
sumoBinary = checkBinary('sumo')
else:
sumoBinary = checkBinary('sumo-gui')
# this is the normal way of using traci. sumo is started as a
# subprocess and then the python script connects and runs
sumoProcess = subprocess.Popen([sumoBinary, "-c", "data2/cross.sumocfg", "--tripinfo-output","tripinfo.xml", "--remote-port", str(PORT)], stdout=sys.stdout, stderr=sys.stderr)
run()
sumoProcess.wait()
Thanks for help in advance.
Regards,
You probably want to have have the value every 60 simulation seconds not every 60 wallclock seconds, so a timer is pointless here. Simply ask for the value after 60 simulation steps (assuming you use sumo's default step length of one second). So you could write something like:
if step % 60 == 0:
print >> steps , "nombre des veh : " + k
This will print the value for the last step every 60 steps. If you want the value for the last minute you need to aggregate (sum up) yourself.

python: pausing a simulation in simpy

I was hoping someone could point me a the right direction as i work through building a queue model in Simpy. The documentation and examples on the Simpy site are amazing and i wanted to see if i could build upon an example they provided.
Essentially, i am simulating a process where a clerk has to review paperwork. The paperwork both arrives and exits his queue (i.e. is serviced) at random time intervals (much like an MM1 queue) with a first-in-first-out service discipline.
The tricky part i am finding is that I would like to model the process such that the clerk has to sleep for some period of time before returning to work. For example, if the simulation runs for "4 weeks", the clerk should sleep for 12 hours per day during that period. I cant seem to get this last caveat to function. Please see my code below, thanks!
import random
import simpy
import matplotlib.pyplot as plt
RANDOM_SEED = 123456 #Random seed
INTERVAL_RECORDS = 30.0 #review records roughly every xx hours?
T_INTER = 28 #generate records roughly every xx hours?
WEEKS = 4
SIM_TIME = WEEKS*7*24*60 #simulation time (hours?)
class record_review:
def __init__(self, env):
self.env = env
self.shift_exchange = env.event()
def record(env, interval, counter, data, t_inter):
"""Source generates records randomly"""
for i in range(1):
"""start off with x=i records in queue"""
c = review(env, 'Record%02d' % i, counter, time_in_queue=30.0)
env.process(c)
t = random.expovariate(1.0 / interval) #set up random generation rate
yield env.timeout(t)
while True:
"""continually generate records throughout simulation"""
yield env.timeout(random.randint(t_inter-2, t_inter+2)) #generate record between +/- 2 of interal
i += 1
c = review(env, 'Record%02d' % i, counter, time_in_queue=30.0)
env.process(c)
t = random.expovariate(1.0 / interval) #random generation rate
yield env.timeout(t)
def review(env, name, counter, time_in_queue):
"""Record arrives, is reviewed and exits."""
arrive = env.now
print('%7.4f %s: Record has entered queue' % (arrive, name))
with counter.request() as req:
yield req
wait = env.now - arrive #total wait time / record
tib = random.expovariate(1.0 / time_in_queue) #time in queue/review rate
yield env.timeout(tib)
data.append(wait) #monitor
print('%7.4f %s: Waited %6.3f' % (env.now, name, wait))
print('%7.4f %s: Finished' % (env.now, name))
def shift_exchange(self, env):
while True:
for i in range(SIM_TIME):
yield env.timeout(60)
self.shift_exchange = env.event()
# Setup and start the simulation
print('Batch Record Review Simulation')
random.seed(RANDOM_SEED)
env = simpy.Environment()
data = []
# Start processes and run
counter = simpy.Resource(env, capacity=1)
env.process(record(env, INTERVAL_RECORDS, counter, data, T_INTER))
env.run(until=SIM_TIME)

Categories

Resources