Unable to see Data in Kafka Topic - python

I have a json file in S3 bucket and I am pushing that file into kafka topic, Can some one guide me why I am not able to see this data in Kafka topic. My Program has no errors and running correctly.
from kafka import KafkaProducer
import json
import time
import boto3
def json_serializer(data):
return json.dumps(data).encode("utf-8")
producer = KafkaProducer(bootstrap_servers= ['localhost:9092'],
value_serializer=json_serializer)
def read_s3():
s3 = boto3.resource('s3')
bucket = s3.Bucket('s3sparkbucket')
for obj in bucket.objects.all():
key = obj.key
body = obj.get()['Body'].read().decode('utf-8')
return body
if __name__ == "__main__":
body=read_s3()
producer.send("Uber_Eats",body)
print("Done")

KafkaProducer must need 3 params:
bootstrap.servers key.serializer and value.serializer
May be caused by that

Related

Trying to read ssm parameter and working fine but write as text and uploading inside my bucket its not happening please find below code

import boto3
import os
client = boto3.client('ssm')
s3 = boto3.client("s3")
def lambda_handler(event, context):
parameter = client.get_parameter(Name='otherparam', WithDecryption=True)
#print(parameter)
return parameter ['Parameter']['Value']
#file = open("/sample.txt", "w")
#file.write(parameter)
#file.close
with open("/tmp/log.txt", "w") as f:
file.write(parameter)
s3.upload_file("/tmp/log.txt", "copys3toecsbucket-117", "logs.txt")
#bucket = "copys3toecsbucket-117"
#file = "/sample.txt"
#response = s3_client.put_object(Body=file,Bucket='bucket',key='file')
print(response)
trying in aws lambda only.
how to convert ssm parameter into text file which will be trigger file for next step and upload in s3 bucket?
Uploading to bucket is not happening because you are returning a value before the upload happens. When you return a value in the handler, the Lambda function completes.
Removing return will fix it.
import boto3
import os
client = boto3.client('ssm')
s3 = boto3.client("s3")
def lambda_handler(event, context):
parameter = client.get_parameter(Name='otherparam', WithDecryption=True)
print(parameter)
with open("/tmp/log.txt", "w") as f:
file.write(parameter)
s3.upload_file("/tmp/log.txt", "copys3toecsbucket-117", "logs.txt")
return True

Append String in a file Using Python Boto 3

I am writing one Lambda function using Python. And I need to collect a list of AMIs which is having a specified tag key-value pair and write it to an S3 Bucket as a JSON file. My code is in below,
import boto3
import json
client = boto3.client('ec2')
def lambda_handler(event, context):
response = client.describe_images(Owners=['self'])
versions = response['Images']
for x in range(len(versions)):
if {'Key': 'product', 'Value': 'code'} in response['Images'][x]['Tags']:
ImageId=versions[x]['ImageId']
print(ImageId)
s3 = boto3.resource('s3')
obj = s3.Object('my-ami-bucketforelk','hello.json')
obj.put(Body=json.dumps(ImageId))
My Lambda is working as expected except for one thing. My output is overwriting. So I am only able to write one AMI ID at a time.
Can somebody help me to resolve this issue?
You're writing the object to S3 for each and every image ID. Instead, accumulate the image IDs in a list, and then upload that to S3 at the end. For example:
import json
import boto3
ec2 = boto3.client('ec2')
s3 = boto3.resource('s3')
def lambda_handler(event, context):
response = ec2.describe_images(Owners=['self'])
versions = response['Images']
images = []
for x in range(len(versions)):
if {'Key': 'product', 'Value': 'code'} in response['Images'][x]['Tags']:
ImageId=versions[x]['ImageId']
images.append(ImageId)
obj = s3.Object('my-ami-bucketforelk', 'hello.json')
obj.put(Body=json.dumps(images))

How to write parquet file to ECS in Flask python using boto or boto3

I have flask python rest api which is called by another flask rest api.
the input for my api is one parquet file (FileStorage object) and ECS connection and bucket details.
I want to save parquet file to ECS in a specific folder using boto or boto3
the code I have tried
def uploadFileToGivenBucket(self,inputData,file):
BucketName = inputData.ecsbucketname
calling_format = OrdinaryCallingFormat()
client = S3Connection(inputData.access_key_id, inputData.secret_key, port=inputData.ecsport,
host=inputData.ecsEndpoint, debug=2,
calling_format=calling_format)
#client.upload_file(BucketName, inputData.filename, inputData.folderpath)
bucket = client.get_bucket(BucketName,validate=False)
key = boto.s3.key.Key(bucket, inputData.filename)
fileName = NamedTemporaryFile(delete=False,suffix=".parquet")
file.save(fileName)
with open(fileName.name) as f:
key.send_file(f)
but it is not working and giving me error like...
signature_host = '%s:%d' % (self.host, port)
TypeError: %d format: a number is required, not str
I tried google but no luck Can anyone help me with this or any sample code for the same.
After a lot of hit and tried and time, I finally got the solution. I posting it for everyone else who are facing the same issue.
You need to use Boto3 and here is the code...
def uploadFileToGivenBucket(self,inputData,file):
BucketName = inputData.ecsbucketname
#bucket = client.get_bucket(BucketName,validate=False)
f = NamedTemporaryFile(delete=False,suffix=".parquet")
file.save(f)
endpointurl = "<your endpoints>"
s3_client = boto3.client('s3',endpoint_url=endpointurl, aws_access_key_id=inputData.access_key_id,aws_secret_access_key=inputData.secret_key)
try:
newkey = 'yourfolderpath/anotherfolder'+inputData.filename
response = s3_client.upload_file(f.name, BucketName,newkey)
except ClientError as e:
logging.error(e)
return False
return True

AWS: empty SQS queue when subscribed to SNS via boto3

I am not receiving any messages in my SQS queue when subscribing to an SNS topic via boto3.
Is this an issue with the code or the API credentials I am using? The IAM policy associated with this account has AWS PowerUser privileges, which should mean it has unrestricted access to manage SNS topics and SQS queues.
When I create the equivalent structure through the AWS console (create topic, create queue, subscribe queue to topic) and send a message using either boto3, the AWS CLI, or the AWS console, the message comes through correctly.
I don't think it is an issue with the code because the SubscriptionArn is being returned correctly?
I have tried this with both the US-EAST-1 and AP-SE-1 regions, same result.
Sample code:
#!/usr/bin/env python3
import boto3
import json
def get_sqs_msgs_from_sns():
sqs_client = boto3.client('sqs', region_name='us-east-1')
sqs_obj = boto3.resource('sqs', region_name='us-east-1')
sns_client = boto3.client('sns', region_name='us-east-1')
sqs_queue_name = 'queue1'
topic_name = 'topic1'
# Create/Get Queue
sqs_client.create_queue(QueueName=sqs_queue_name)
sqs_queue = sqs_obj.get_queue_by_name(QueueName=sqs_queue_name)
queue_url = sqs_client.get_queue_url(QueueName=sqs_queue_name)['QueueUrl']
sqs_queue_attrs = sqs_client.get_queue_attributes(QueueUrl=queue_url,
AttributeNames=['All'])['Attributes']
sqs_queue_arn = sqs_queue_attrs['QueueArn']
if ':sqs.' in sqs_queue_arn:
sqs_queue_arn = sqs_queue_arn.replace(':sqs.', ':')
# Create SNS Topic
topic_res = sns_client.create_topic(Name=topic_name)
sns_topic_arn = topic_res['TopicArn']
# Subscribe SQS queue to SNS
sns_client.subscribe(
TopicArn=sns_topic_arn,
Protocol='sqs',
Endpoint=sqs_queue_arn
)
# Publish SNS Messages
test_msg = {'default': {"x":"foo","y":"bar"}}
test_msg_body = json.dumps(test_msg)
sns_client.publish(
TopicArn=sns_topic_arn,
Message=json.dumps({'default': test_msg_body}),
MessageStructure='json')
# Validate Message
sqs_msgs = sqs_queue.receive_messages(
AttributeNames=['All'],
MessageAttributeNames=['All'],
VisibilityTimeout=15,
WaitTimeSeconds=20,
MaxNumberOfMessages=5
)
assert len(sqs_msgs) == 1
assert sqs_msgs[0].body == test_msg_body
print(sqs_msgs[0].body) # This should output dict with keys Message, Type, Timestamp, etc., but only returns the test_msg
if __name__ == "__main__":
get_mock_sqs_msgs_from_sns()
I receive this output:
$ python .\sns-test.py
Traceback (most recent call last):
File ".\sns-test.py", line 55, in <module>
get_sqs_msgs_from_sns()
File ".\sns-test.py", line 50, in get_sqs_msgs_from_sns
assert len(sqs_msgs) == 1
AssertionError
The URL above for the similar question posed for the C# AWS SDK put me in the correct direction for this: I needed to attach a policy to the SQS queue to allow the SNS topic to write to it.
def allow_sns_to_write_to_sqs(topicarn, queuearn):
policy_document = """{{
"Version":"2012-10-17",
"Statement":[
{{
"Sid":"MyPolicy",
"Effect":"Allow",
"Principal" : {{"AWS" : "*"}},
"Action":"SQS:SendMessage",
"Resource": "{}",
"Condition":{{
"ArnEquals":{{
"aws:SourceArn": "{}"
}}
}}
}}
]
}}""".format(queuearn, topicarn)
return policy_document
and
policy_json = allow_sns_to_write_to_sqs(topic_arn, queue_arn)
response = sqs_client.set_queue_attributes(
QueueUrl = queue_url,
Attributes = {
'Policy' : policy_json
}
)
print(response)

"Text file busy" error on multithreading python

I have a python script which downloads shell scripts from amazon S3 server and then executes them (each script is about 3GB in size). The function that downloads and executes the file looks like this:
import boto3
def parse_object_key(key):
key_parts = key.split(':::')
return key_parts[1]
def process_file(file):
client = boto3.client('s3')
node = parse_object_key(file)
file_path = "/tmp/" + node + "/tmp.sh"
os.makedirs(file_path)
client.download_file('category', file, file_path)
os.chmod(file_path, stat.S_IXUSR)
os.system(file_path)
The node is unique for each file.
I created a for loop to execute this:
s3 = boto3.resource('s3')
bucket = s3.Bucket('category')
for object in bucket.objects.page_size(count=50):
process_file(object.key, client)
This works perfectly, but when I try to create a separate thread for each file, I get error:
sh: 1: /path/to/file: Text file busy
The script with threading looks like:
s3 = boto3.resource('s3')
bucket = s3.Bucket('category')
threads = []
for object in bucket.objects.page_size(count=50):
t = threading.Thread(target=process_file, args=(object.key, client))
threads.append(t)
t.start()
for t in threads:
t.join()
Out of all the threads, exactly one thread succeed and all other fail on "Text file busy error". Can someone help me figure out what I am doing incorrectly?
Boto3 is not thread-safe so you cannot re-use your S3 connection for each download. See here for details of a workaround.

Categories

Resources