I am trying to implement zmq client wrapper class. Hope the comments written are useful to understand. Please see the __main__ for the different ways I tried.
import zmq
class Client(object):
"""
This is class for passing message using pipeline(push-pull) pattern of zmq.
"""
def __init__(self, pull_addr="tcp://127.0.0.1:5757",\
push_addr="tcp://127.0.0.1:5858"):
"""
The constructor
Parameters:
pull_addr (string) : Local endpoint for pushing messages and pulling from
push_addr (string) : Remote endpoint for pushing messages.
"""
super(Client, self).__init__()
print("Initialized...")
self.context = zmq.Context()
self.push_addr = push_addr
self.pull_addr = pull_addr
def producer(self, msg):
"""
Produces messages.
It's a wrapper and will receive the message which will be queued locally by
pushing to self.pull_addr.
Parameters:
msg (json) : Messages to be queued.
Returns:
None
"""
print("Production started.... ")
zmq_socket = self.context.socket(zmq.PUSH)
zmq_socket.bind(self.pull_addr)
zmq_socket.send_json(msg)
def consumer(self):
"""
Consumes messages produced by self.producer.
Reads messages queued by self.producer and pushes to the self.push_addr
where it assumes the collector would be expecting messages.
Parameters:
None
Returns:
None
"""
print("consumption is awaiting...")
receiver = self.context.socket(zmq.PULL)
receiver.connect(self.pull_addr)
sender = self.context.socket(zmq.PUSH)
sender.connect(self.push_addr)
while True:
msg = receiver.recv_json()
sender.send_json(msg)
if __name__ == '__main__':
import multiprocessing
# con_pool = multiprocessing.Pool()
# # pro_pool = multiprocessing.Pool()
zmq_client = Client()
# consumer = con_pool.apply_async(zmq_client.consumer, ())
# # producer = pro_pool.apply_async(zmq_client.producer,({"msg" : "Hello World!"}))
# print("DONE>>>>")
# from multiprocessing import Process
# consumer = Process(target=zmq_client.consumer)
# consumer.start()
# producer = Process(target=zmq_client.producer, args=({'msg' : "Hello World!"}))
# producer.start()
# producer.join()
# consumer.join()
pool = multiprocessing.Pool()
p = pool.apply_async(zmq_client.consumer)
p2 = pool.apply_async(zmq_client.producer, ({"msg" : "Hello World!"}))
Try 1
pool = multiprocessing.Pool()
p = pool.apply_async(zmq_client.consumer)
p2 = pool.apply_async(zmq_client.producer, ({"msg" : "Hello World!"}))
Output 1
calls the constructor and terminates.
Try 2
con_pool = multiprocessing.Pool()
pro_pool = multiprocessing.Pool()
zmq_client = Client()
consumer = con_pool.apply_async(zmq_client.consumer, ())
producer = pro_pool.apply_async(zmq_client.producer,({"msg" : "Hello World!"}))
Output 2
calls the constructor and terminates.
Try 3
from multiprocessing import Process
zmq_client = Client()
con = Process(target=zmq_client.consumer)
con.start()
pro = Process(target=zmq_client.producer, args=({'msg' : "Hello World!"}))
pro.start()
pro.join()
con.join()
Output 3
calls the constructor
calls the consumer
calls the producer
keeps hanging
Here I was expecting zmq server to receive the message. Any suggestions to get this class working as expected?
Related
I am receiving None when calling poll() in this program but I am getting the messages when running the kafka-console-consumer.bat from cmd, I can't figure out what exactly the problem.
The execution starts from main.py
from queue import Queue
from concurrent.futures import ThreadPoolExecutor
import time
import json
from kafka_message_consumer import KafkaMessageConsumer
from kafka_discovery_executor import KafkaDiscoveryExecutor
with open('kafka_properties.json') as f:
kafka_properties = json.loads(f.read())
message_queue = Queue()
kafka_message_consumer = KafkaMessageConsumer(kafka_properties, message_queue)
kafka_discovery_executor = KafkaDiscoveryExecutor(message_queue, kafka_properties)
with ThreadPoolExecutor(max_workers=5) as executor:
executor.submit(kafka_message_consumer.run())
time.sleep(1)
executor.submit(kafka_discovery_executor.run())
time.sleep(1)
KafkaDiscoveryExecutor class is for consuming messages from shared queue and processing that messages.
This is kafka_message_consumer.py
import logging
from confluent_kafka import Consumer
class KafkaMessageConsumer:
def __init__(self, kafka_properties, message_queue):
self.message_queue = message_queue
self.logger = logging.getLogger('KafkaMessageConsumer')
self.kafka_stream_consumer = None
self.create_consumer(kafka_properties)
def create_consumer(self, kafka_properties):
"""
Create an instance of Kafka Consumer with the consumer configuration properties
and subscribes to the defined topic(s).
"""
consumer_config = dict()
# Consumer configuration properties.
consumer_config['bootstrap.servers'] = kafka_properties.get('bootstrap.servers')
consumer_config['group.id'] = kafka_properties.get('group.id')
consumer_config['enable.auto.commit'] = True
consumer_config['auto.offset.reset'] = 'earliest'
# For SSL Security
# consumer_config['security.protocol'] = 'SASL_SSL'
# consumer_config['sasl.mechanisms'] = 'PLAIN'
# consumer_config['sasl.username'] = ''
# consumer_config['sasl.password'] = ''
# Create the consumer using consumer_config.
self.kafka_stream_consumer = Consumer(consumer_config)
# Subscribe to the specified topic(s).
self.kafka_stream_consumer.subscribe(['mytopic'])
def run(self):
while True:
msg = self.kafka_stream_consumer.poll(1.0)
if msg is None:
# No message available within timeout.
print("Waiting for message or event/error in poll()")
continue
elif msg.error():
print("Error: {}".format(msg.error()))
else:
# Consume the record.
# Push the message into message_queue
try:
self.message_queue.put(msg)
except Exception as e:
self.logger.critical("Error occured in kafka Consumer: {}".format(e))
The specified topic has events but I am getting None here and the print statement inside 'if msg is None:' is executing.
I am still not sure as to why the above code is not working as it should.
Here's what changes I made to make this code work
I used threading module instead of concurrent.futures
used daemon thread
make a call to thread.init() inside the constructor of the classes [KafkaMessageConsumer, KafkaDiscoveryExecutor]
Here's main.py
from queue import Queue
import threading
import time
import json
from kafka_message_consumer import KafkaMessageConsumer
from kafka_discovery_executor import KafkaDiscoveryExecutor
def main():
with open('kafka_properties.json') as f:
kafka_properties = json.loads(f.read())
message_queue = Queue()
threads = [
KafkaMessageConsumer(kafka_properties, message_queue),
KafkaDiscoveryExecutor(message_queue, kafka_properties)
]
for thread in threads:
thread.start()
time.sleep(1)
for thread in threads:
thread.join()
time.sleep(1)
if __name__ == "__main__":
main()
and kafka_message_consumer.py
import logging
from confluent_kafka import Consumer
import threading
class KafkaMessageConsumer(threading.Thread):
daemon = True
def __init__(self, kafka_properties, message_queue):
threading.Thread.__init__(self)
self.message_queue = message_queue
self.logger = logging.getLogger('KafkaMessageConsumer')
self.kafka_stream_consumer = None
self.create_consumer(kafka_properties)
def create_consumer(self, kafka_properties):
"""
Create an instance of Kafka Consumer with the consumer configuration properties
and subscribes to the defined topic(s).
"""
consumer_config = dict()
# Consumer configuration properties.
consumer_config['bootstrap.servers'] = kafka_properties.get('bootstrap.servers')
consumer_config['group.id'] = kafka_properties.get('group.id')
consumer_config['enable.auto.commit'] = True
consumer_config['auto.offset.reset'] = 'earliest'
# Create the consumer using consumer_config.
self.kafka_stream_consumer = Consumer(consumer_config)
# Subscribe to the specified topic(s).
self.kafka_stream_consumer.subscribe(['mytopic'])
def run(self):
while True:
msg = self.kafka_stream_consumer.poll(1.0)
if msg is None:
# No message available within timeout.
print("Waiting for message or event/error in poll()")
continue
elif msg.error():
print("Error: {}".format(msg.error()))
else:
# Consume the record.
# Push the message into message_queue
try:
self.message_queue.put(msg)
except Exception as e:
self.logger.critical("Error occured in kafka Consumer: {}".format(e))
self.kafka_stream_consumer.close()
I have RabbitMQ server running in Docker and two python clients that connect to the server and send messages to each other using headers exchange. Message rate is about 10/s. After some amount of time (most of the time after 300-500 messages have been exchanged) one of the exchange become unresponsive. channel.basic_publish call passes without any exception but receiver doesn't receive any messages. Also on rabbitmq dashboard there's no any activity on this exchange. rabbitmq dashboard screenshot
Here is the code example:
import pika
import threading
import time
import sys
class Test:
def __init__(
self,
p_username,
p_password,
p_host,
p_port,
p_virtualHost,
p_outgoingExchange,
p_incomingExchange
):
self.__outgoingExch = p_outgoingExchange
self.__incomingExch = p_incomingExchange
self.__headers = {'topic': 'test'}
self.__queueName = ''
self.__channelConsumer = None
self.__channelProducer = None
self.__isRun = False
l_credentials = pika.PlainCredentials(p_username, p_password)
l_parameters = pika.ConnectionParameters(
host=p_host,
port=p_port,
virtual_host=p_virtualHost,
credentials=l_credentials,
socket_timeout=30,
connection_attempts=5,
)
self.__connection = pika.SelectConnection(
parameters=l_parameters,
on_open_callback=self.__on_connection_open,
on_open_error_callback=self.__on_connection_open_error,
on_close_callback=self.__on_connection_closed
)
def __on_connection_open(self, _conn):
print("Connection opened")
self.__connection.channel(on_open_callback=self.__on_consume_channel_open)
self.__connection.channel(on_open_callback=self.__on_produce_channel_open)
def __on_connection_open_error(self, _conn, _exception):
print("Failed to open connection")
def __on_connection_closed(self, _conn, p_exception):
print("Connection closed: {}".format(p_exception))
def __on_consume_channel_open(self, p_ch):
print("Consumer channel opened")
self.__channelConsumer = p_ch
self.__channelConsumer.exchange_declare(
exchange=self.__incomingExch,
exchange_type="headers",
callback=self.__on_consume_exchange_declared
)
def __on_consume_exchange_declared(self, p_method):
print("Consumer exchange declared")
self.__channelConsumer.queue_declare(
queue='',
callback=self.__on_queue_declare
)
def __on_queue_declare(self, p_method):
print("Consumer queue declared")
self.__queueName = p_method.method.queue
self.__channelConsumer.queue_bind(
queue=self.__queueName,
exchange=self.__incomingExch,
arguments=self.__headers,
)
self.__channelConsumer.basic_consume(self.__queueName, self.__onMessageReceived)
def __on_produce_channel_open(self, p_ch):
print("Producer channel opened")
self.__channelProducer = p_ch
self.__channelProducer.exchange_declare(
exchange=self.__outgoingExch,
exchange_type="headers",
callback=self.__on_produce_exchange_declared
)
def __on_produce_exchange_declared(self, p_method):
print("Producer exchange declared")
l_publisher = threading.Thread(target=self.__publishProcedure)
l_publisher.start()
def __onMessageReceived(self, p_channel, p_method, p_properties, p_body):
p_channel.basic_ack(p_method.delivery_tag)
print("Message received: {}".format(p_body))
def __publishProcedure(self):
print("Start publishing")
l_msgCounter = 0
while self.__isRun:
l_msgCounter += 1
self.__publish(l_msgCounter)
time.sleep(0.1)
def __publish(self, p_msgCounter):
self.__channelProducer.basic_publish(
exchange=self.__outgoingExch,
routing_key="#",
body=str(p_msgCounter),
properties=pika.BasicProperties(headers=self.__headers)
)
def run(self):
self.__isRun = True
try:
self.__connection.ioloop.start()
except KeyboardInterrupt:
self.__isRun = False
self.__connection.close()
print("Exit...")
if __name__ == '__main__':
if len(sys.argv) < 2:
print("Provide node name [node1 | node2]")
exit(-1)
l_outgoingExch = ''
l_incomingExch = ''
if sys.argv[1] == 'node1':
l_outgoingExch = 'node2.headers'
l_incomingExch = 'node1.headers'
elif sys.argv[1] == 'node2':
l_outgoingExch = 'node1.headers'
l_incomingExch = 'node2.headers'
else:
print("Wrong node name")
exit(-1)
l_testInstance = Test(
p_username='admin',
p_password='admin',
p_host='localhost',
p_port=5672,
p_virtualHost='/',
p_incomingExchange=l_incomingExch,
p_outgoingExchange=l_outgoingExch
)
l_testInstance.run()
I run two instances as two nodes (node1 and node2) so they should communicate with each other.
Also sometimes I have the issue described here:
Stream connection lost: AssertionError(('_AsyncTransportBase._produce() tx buffer size underflow', -275, 1),)
I found that I misused pika. As pika documentation states, it's not safe to share connection across multiple threads. The only way you can interact with connection from other threads is to use add_callback_threadsafe function. In my example it should look like this:
def __publishProcedure(self):
print("Start publishing")
l_msgCounter = 0
while self.__isRun:
l_msgCounter += 1
l_cb = functools.partial(self.__publish, l_msgCounter)
self.__connection.ioloop.add_callback_threadsafe(l_cb)
time.sleep(0.1)
def __publish(self, p_msgCounter):
self.__channelProducer.basic_publish(
exchange=self.__outgoingExch,
routing_key="#",
body=str(p_msgCounter),
properties=pika.BasicProperties(headers=self.__headers)
)
I have two function which are require to run same time. read_card needs to run in an infinite loop and waits for new cards(it is actually a Nrf reader) and
adds some string to a queue , send_data suppose to get values from queue and send them to the server via requests library.Everything works when I do not use multiprocessing. But I need concurrency I guess.
Here is my two function.
def read_card(reader, configs):
print("First started")
while True:
authorized_uid = reader.is_granted(reader.read())
print("Waiting for card")
#TODO:If not authorized in AccessList.txt look to the server
if authorized_uid is not None:
print(authorized_uid)
open_door()
check_model = CheckModel(configs.DeviceSerialNumber, authorized_uid)
message_helper.put_message(check_model)
def send_data(sender):
print("Second started")
while True:
message_model = message_helper.get_message()
if message_model is not None:
sender.send_message(message_model)
Here is how I call main
def main():
download_settings()
create_folders()
settings = read_settings()
accessList = get_user_list(settings)
configure_scheduler(settings)
message_sender = MessageSender(client.check,client.bulk)
reader_process = multiprocessing.Process(name = "reader_loop", target = read_card, args=(Reader(accessList, entryLogger),configs,))
message_process = multiprocessing.Process(name = "message_loop", target = send_data, args=(message_sender,))
reader_process.start()
message_process.start()
if __name__ == '__main__':
main()
And those are for debugging. I printed what put_message and send_message from different classes.
def send_message(self,model):
print(model)
return self.checkClient.check(model)
def put_message(self, message):
print(message)
self.put_to_queue(self.queue, message)
self.put_to_db(message)
I expect to see some object names in terminal, but I only see below. Also reader does not work.
First started
Second started
Which part I do wrong?
Use a Queue to communicate between processes. Then when you read a card inside reader create a new job and push it into the queue, then pop this job inside the processor and send the request.
Here's a proof of concept:
from datetime import datetime
from multiprocessing import Process, Queue
from random import random
from time import sleep
import requests
def reader(q: Queue):
while True:
# create a job
job = {'date': datetime.now().isoformat(), 'number': random()}
q.put(job)
# use a proper logger instead of printing,
# otherwise you'll get mangled output!
print('Enqueued new job', job)
sleep(5)
def client(q: Queue):
while True:
# wait for a new job
job = q.get()
res = requests.post(url='https://httpbin.org/post',
data=job)
res.raise_for_status()
json = res.json()
print(json['form'])
if __name__ == '__main__':
q = Queue()
reader_proc = Process(name='reader', target=reader, args=(q,))
client_proc = Process(name='client', target=client, args=(q,))
procs = [reader_proc, client_proc]
for p in procs:
print(f'{p.name} started')
p.start()
for p in procs:
p.join()
which prints:
reader started
client started
Enqueued new job {'date': '2019-07-01T15:51:53.100395', 'number': 0.7659293922700549}
{'date': '2019-07-01T15:51:53.100395', 'number': '0.7659293922700549'}
Enqueued new job {'date': '2019-07-01T15:51:58.116020', 'number': 0.14306347124900576}
{'date': '2019-07-01T15:51:58.116020', 'number': '0.14306347124900576'}
I have a tinker IMU and I'm using their library which is utilizing a callback to output data. I have implemented a multithread approach which works fine but now I want to make it multiprocess instead for better concurrency. The multiprocess starts the startIMUData function fine but it won't call the myIMUCallback.
def myIMUCallback():
print("callback called!")
# Function to start recording IMU dimport serialata via callback function above
def startIMUData(q):
print("Starting IMU!")
HOST = "localhost"
PORT = 4223
UID = "6Dcx3Y" # Change XXYYZZ to the UID of your IMU Brick 2.0
ipcon = IPConnection() # Create IP connection
imu = BrickIMUV2(UID, ipcon) # Create device object
ipcon.connect(HOST, PORT)
# myIMUCallback is NOT being called in process version
imu.register_callback(imu.CALLBACK_ALL_DATA, myIMUCallback)
imu.set_all_data_period(100)
if __name__ == "__main__":
q = Queue()
# This works
threadIMU = Thread(target = startIMUData, args = (q, ))
threadIMU.start()
threadIMU.join()
#This doesn't work, calls startIMUData fine but ignores myIMUCallback
processIMU = multiprocessing.Process(target=startIMUData, args=(q, ))
processIMU.start()
processIMU.join()
I am writing a AWS Lambda function that deletes 100,000 objects per lambda function call from S3 bucket. I am trying to see if I can create and run the deletion on a background threads. I have the following code.
import boto3
import boto3.session
from threading import Thread
http_response = []
MAX = 999
threads = []
class myThread(Thread):
def __init__(self, objects_to_delete, bucket_name):
Thread.__init__(self)
self.objects_to_delete = objects_to_delete
self.bucket_name = bucket_name
def run(self):
session = boto3.session.Session().client('s3')
s3 = session.client('s3')
####
COMES HERE AND PRINTS THE NAME OF THE BUCKET.
####
print(self.bucket_name)
response = s3.delete_objects(Bucket=bucket_name, Delete={'Objects': objects_to_delete[0:MAX] })
####
THIS IS NOT GETTING PRINTED. MEANING, delete_object IS BREAKING/NOT EXECUTING
####
print(response)
def handler(event, context):
keys = event['keys']
bucket_name = event["bucket"]
if (len(keys) == 0 or len(bucket_name) == 0):
return {
"message": http_response
}
try:
t = myThread(objects_to_delete[0:MAX], bucket_name)
t.start()
threads.append(t)
except:
print("Something Went wrong!!! " + str(objects_to_delete))
del keys[0:MAX]
for i in range(len(threads)):
threads[i].start()
handler({'keys': keys, 'bucket': bucket_name}, context)
Is there anything wrong I am doing here? Seems like thread is starting, however it's not making the "delete_objects" call. It's not even returning any error messages to learn about the error. Any thoughts or ideas?
One more thing, when I run this function locally on my computer, it runs just fine without any problem.
turns out after starting a thread, you should join them because, once the process quits, the threads die as well. So I did the following
import boto3
from threading import Thread
MAX = 999
threads = []
class myThread(Thread):
def __init__(self, bucket_name, objects):
Thread.__init__(self)
self.bucket_name = bucket_name
self.objects = objects
def run(self):
s3 = boto3.client('s3', region_name="us-east-1")
response = s3.delete_objects(Bucket=self.bucket_name, Delete={'Objects':self.objects})
print(response)
def handler(event, context):
keys = event["keys"]
bucket_name = event["bucket"]
objects_to_delete = [1...100,000]
while (len(objects_to_delete) != 0):
t = myThread(bucket_name, objects_to_delete[0:MAX])
threads.append(t)
del objects_to_delete[0:MAX]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
return {
"message": "Success Message."
}