I have a django app, I received binary audio data from a javascript client, and I'm trying to send it to the google cloud speech to text API. The problem is that, python is not writing the binary audio data to a file. So I'm getting
with io.open(file_name, "rb") as f:
FileNotFoundError: [Errno 2] No such file or directory: '.........\\gcp_cloud\\blog\\audio_file.wav'
I replaced the first part of the path with ...........
Here is the client side code
rec.ondataavailable = e => {
audioChunks.push(e.data);
if (rec.state == "inactive"){
let blob = new Blob(audioChunks,{type:'audio/wav; codecs=MS_PCM'});
recordedAudio.src = URL.createObjectURL(blob);
recordedAudio.controls=true;
recordedAudio.autoplay=true;
sendData(blob)
}
}
and here is my sendData function
function sendData(data) {
let csrftoken = getCookie('csrftoken');
let response=fetch("/voice_request", {
method: "post",
body: data,
headers: { "X-CSRFToken": csrftoken },
})
console.log('got a response from the server')
console.log(response)
}
and here is the DJango view that handles the binary audio data from the client
def voice_request(request):
#print(request.body)
fw = open('audio_file.wav', 'wb')
fw.write(request.body)
file_name = os.path.join(current_folder, 'audio_file.wav')
#file_name = os.path.join(current_folder, 'Recording.m4a')
client = speech.SpeechClient()
# The language of the supplied audio
language_code = "en-US"
# Sample rate in Hertz of the audio data sent
sample_rate_hertz = 16000
encoding = enums.RecognitionConfig.AudioEncoding.LINEAR16
config = {
"language_code": language_code,
#"sample_rate_hertz": sample_rate_hertz,
#"encoding": 'm4a',
}
with io.open(file_name, "rb") as f:
content = f.read()
audio = {"content": content}
fw.close()
response = client.recognize(config, audio)
print('response')
print(response)
for result in response.results:
# First alternative is the most probable result
alternative = result.alternatives[0]
print(u"Transcript: {}".format(alternative.transcript))
return HttpResponse(response)
you should use with io.open(file_name, "wb") as f: or with io.open(file_name, "ab") as f:
because r mean read-only, w mean "rewrite"
you can refer: https://docs.python.org/3.7/library/functions.html?highlight=open#open
Related
I have html document embeded with pdf document in base64 encoded format. I like to extract the string and save it as pdf file. using below code to save it as pdf file.
but its on opening in adobe reader, saying invalid format. looking to fix this issue.
I think pdf file encoded using Javascript encodeURIComponent function. need to convert using Python.
sample embed tag
<embed type="application/pdf" src="data:application/pdf;base64,JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC9D">
Code
import base64
def decode_b64():
b64 = "JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC9D"
buffer = BytesIO.BytesIO()
content = base64.b64decode(b64)
buffer.write(content)
with open(Path(Path.home(), 'Downloads', 'mytest.pdf'), "wb") as f:
f.write(buffer.getvalue())
if __name__ == "__main__":
decode_b64()
=== Update 1:
found the way to convert using JavaScript: It will be nice if we can port this code to Python.
const {readFileSync, writeFile, promises: fsPromises} = require('fs');
var data=readFileSync("pdf-file.html", 'utf-8')
var DOMParser = require('xmldom').DOMParser;
var parser = new DOMParser();
const virtualDoc = parser.parseFromString(data, 'text/html');
var elem = virtualDoc.getElementsByTagName('embed')[0];
for (var i = 0; i < elem.attributes.length; i++) {
var attrib = elem.attributes[i];
if (attrib.specified) {
if( attrib.name == "src") {
var result =attrib.value
result=result.replace('data:application/pdf;base64,','');
let buff = Buffer.from(decodeURIComponent(result), 'base64');
writeFile('pdf-file.pdf', buff, err => {
if (err) {
console.error(err);
}
});
}
}
}
This is a situation that you should have been able to chase down yourself. I wasn't 100% sure how Javascript encoded those two characters, so I wrote up a simple HTML page:
<script>
var s = "abcde++defgh//";
alert(encodeURIComponent(s));
</script>
When I ran that page, the result was "abcde%2B%2Bdefgh%2F%2F", and that is all the information you need to fix up those strings.
def decode_b64():
b64 = "JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC9D......"
b64 = b64.replace('%2B','+').replace('%2F','/')
content = base64.b64decode(b64)
with open(Path(Path.home(), 'Downloads', 'mytest.pdf'), "wb") as f:
f.write(content)
Hello im trying to generate a CSV from dynamoDB to S3 using lambda function. the thing is I just get an empty file on s3. Please your help!
import csv
import boto3
import json
dynamodb = boto3.resource('dynamodb')
db = dynamodb.Table('ReporteTelefonica')
def lambda_handler(event, context):
AWS_BUCKET_NAME = 'reportetelefonica'
s3 = boto3.resource('s3')
bucket = s3.Bucket(AWS_BUCKET_NAME)
path = 'test.csv'
try:
response = db.scan()
myFile = open(path, 'w')
for i in response['Items']:
csv.register_dialect('myDialect', delimiter=',', quoting=csv.QUOTE_NONE)
with myFile:
writer = csv.writer(myFile, dialect='myDialect')
writer.writerows(i)
print(i)
except :
print("error")
bucket.put_object(
ACL='public-read-write',
ContentType='application/csv',
Key=path,
# Body=json.dumps(i),
)
# print("here")
body = {
"uploaded": "true",
"bucket": AWS_BUCKET_NAME,
"path": path,
}
# print("then here")
return {
"statusCode": 200,
"body": json.dumps(body)
}
I'm kind of noob on this, so I was wondering what should I modify to successfully make a complete scan of the table and write the values on the CSV on S3???
Here's a working lambda that will do the job.
import boto3
import json
import os
import pandas as pd
TABLE_NAME = os.environ.get("DDB_TABLE_NAME")
OUTPUT_BUCKET = os.environ.get("BUCKET_NAME")
TEMP_FILENAME = '/tmp/export.csv'
OUTPUT_KEY = 'export.csv'
s3_resource = boto3.resource('s3')
dynamodb_resource = boto3.resource('dynamodb')
table = dynamodb_resource.Table(TABLE_NAME)
def lambda_handler(event, context):
response = table.scan()
df = pd.DataFrame(response['Items'])
df.to_csv(TEMP_FILENAME, index=False, header=True)
# Upload temp file to S3
s3_resource.Bucket(OUTPUT_BUCKET).upload_file(TEMP_FILENAME, OUTPUT_KEY)
return {
'statusCode': 200,
'headers': {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Credentials": True,
"content-type": "application/json"
},
'body': json.dumps('OK')
}
You either have to close the file after you finished writing the cvs records and then reopen for reading and pass to the put_obkect method.
Alternatively you open the file for reading and writing and after writing you seek to position 0 so that the put_object method reads from the start.
The problem is I'm unable to access the information from config.json file to my python file
I have provided the JSON data and python code bellow
I have tried everything in the request module
but I can access the response without the config file but,
I need with config file
The following is a json file
{
"api_data": {
"request_url": "https://newapi.zivame.com/api/v1/catalog/list",
"post_data" : {"category_ids" : "948",
"limit" : "10000"},
"my_headers":{"Content-Type": "application/json"}
},
"redshift":{
"host":"XXX.XXXX.XXX",
"user":"XXXX",
"password":"XXXXXXXX",
"port": 8080,
"db":"XXXX"
},
"s3":{
"access_key":"XXXXXXXXX",
"secret_key":"XXXXXXXXXX",
"region":"XX-XXXXX-1",
"path":"XXXXXXXXXXXX/XXX",
"table":"XXXXXX",
"bucket":"XXXX",
"file": "XXXXXX",
"copy_column": "XXX",
"local_path": "XXXXX"
},
"csv_file": {
"promo_zivame": ""
}
}
and this is the program
#!/usr/bin/python
import json
import psycopg2
import requests
import os
BASE_PATH = os.path.dirname(os.path.realpath(__file__))
with open(BASE_PATH+'/config.json') as json_data_file:
data = json.load(json_data_file)
#api_config = data['api_data']
#redshift = data['redshift']
s3_config = data['s3']
#x = print(api_config.get('request_url'))
class ApiResponse:
#api response
def api_data(self, api_config):
print("starting api_data")
try:
self.ApiResponse = requests.post(api_config['request_url'], api_config['post_data'], api_config['my_headers'])
data_1 = self.ApiResponse
#data = json.dump(self.ApiResponse)
print("API Result Response")
print(())
print(self.ApiResponse)
return (self.ApiResponse)
except Exception:
print("response not found")
return False
def redshift_connect(self, redshift):
try:
# Amazon Redshift connect string
self.con = psycopg2.connect(
host=redshift['host'],
user=redshift['user'],
port=redshift['port'],
password=redshift['password'],
dbname=redshift['db'])
print(self.con)
return self.con
except Exception:
print("Error in Redshift connection")
return False
def main():
c1 = ApiResponse()
api_config = data['api_data']
redshift = data['redshift']
c1.api_data(api_config)
c1.api_data(data)
c1.redshift_connect(redshift)
if __name__=='__main__':
main()
Third argument to requests.post() is json. To provide headers, you need to use the name of the argument explicitly as #JustinEzequiel suggested. See the requests doc here: 2.python-requests.org/en/v1.1.0/user/quickstart/#custom-headers
requests.post(api_config['request_url'], json=api_config['post_data'], headers=api_config['my_headers'])
Borrowing code from https://stackoverflow.com/a/16696317/5386938
import requests
api_config = {
"request_url": "https://newapi.zivame.com/api/v1/catalog/list",
"post_data" : {"category_ids" : "948", "limit" : "10000"},
"my_headers":{"Content-Type": "application/json"}
}
local_filename = 'the_response.json'
with requests.post(api_config['request_url'], json=api_config['post_data'], headers=api_config['my_headers'], stream=True) as r:
r.raise_for_status()
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
saves the response into a file ('the_response.json') you can then pass around. Note the stream=True passed to requests.post
I've been having some trouble sending files via python's rest module. I can send emails without attachments just fine but as soon as I try and add a files parameter, the call fails and I get a 415 error.
I've looked through the site and found out it was maybe because I wasn't sending the content type of the files when building that array of data so altered it to query the content type with mimetypes; still 415.
This thread: python requests file upload made a couple of more edits but still 415.
The error message says:
"A supported MIME type could not be found that matches the content type of the response. None of the supported type(s)"
Then lists a bunch of json types e.g: "'application/json;odata.metadata=minimal;odata.streaming=true;IEEE754Compatible=false"
then says:
"matches the content type 'multipart/form-data; boundary=0e5485079df745cf0d07777a88aeb8fd'"
Which of course makes me think I'm still not handling the content type correctly somewhere.
Can anyone see where I'm going wrong in my code?
Thanks!
Here's the function:
def send_email(access_token):
import requests
import json
import pandas as pd
import mimetypes
url = "https://outlook.office.com/api/v2.0/me/sendmail"
headers = {
'Authorization': 'Bearer '+access_token,
}
data = {}
data['Message'] = {
'Subject': "Test",
'Body': {
'ContentType': 'Text',
'Content': 'This is a test'
},
'ToRecipients': [
{
'EmailAddress':{
'Address': 'MY TEST EMAIL ADDRESS'
}
}
]
}
data['SaveToSentItems'] = "true"
json_data = json.dumps(data)
#need to convert the above json_data to dict, otherwise it won't work
json_data = json.loads(json_data)
###ATTACHMENT WORK
file_list = ['test_files/test.xlsx', 'test_files/test.docx']
files = {}
pos = 1
for file in file_list:
x = file.split('/') #seperate file name from file path
files['file'+str(pos)] = ( #give the file a unique name
x[1], #actual filename
open(file,'rb'), #open the file
mimetypes.MimeTypes().guess_type(file)[0] #add in the contents type
)
pos += 1 #increase the naming iteration
#print(files)
r = requests.post(url, headers=headers, json=json_data, files=files)
print("")
print(r)
print("")
print(r.text)
I've figured it out! Took a look at the outlook API documentation and realised I should be adding attachments as encoded lists within the message Json, not within the request.post function. Here's my working example:
import requests
import json
import pandas as pd
import mimetypes
import base64
url = "https://outlook.office.com/api/v2.0/me/sendmail"
headers = {
'Authorization': 'Bearer '+access_token,
}
Attachments = []
file_list = ['test_files/image.png', 'test_files/test.xlsx']
for file in file_list:
x = file.split('/') #file the file path so we can get it's na,e
filename = x[1] #get the filename
content = open(file,'rb') #load the content
#encode the file into bytes then turn those bytes into a string
encoded_string = ''
with open(file, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read())
encoded_string = encoded_string.decode("utf-8")
#append the file to the attachments list
Attachments.append({
"#odata.type": "#Microsoft.OutlookServices.FileAttachment",
"Name": filename,
"ContentBytes": encoded_string
})
data = {}
data['Message'] = {
'Subject': "Test",
'Body': {
'ContentType': 'Text',
'Content': 'This is a test'
},
'ToRecipients': [
{
'EmailAddress':{
'Address': 'EMAIL_ADDRESS'
}
}
],
"Attachments": Attachments
}
data['SaveToSentItems'] = "true"
json_data = json.dumps(data)
json_data = json.loads(json_data)
r = requests.post(url, headers=headers, json=json_data)
print(r)
I'm using Tornado Webserver and the jQuery Webcam Plugin.
Everything is going fine except that I don't think i'm getting the raw data properly. I'm getting "FFD8FFE000104A46494600010100000100010000FFDB0084000503040404030504040405050506070C08070707070F0B0B090C110F1212110F111113161C1713141A1511111821181A1D1D1F1F1F13172224221E241C1E1F1E010505050706070E08080E1E1411141E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1E1" for my data.
frontend:
$("#camera").webcam({width: 320,
height: 240,
mode: "save",
swffile: "/static/js/jscam.swf",
onTick: function() {
alert('OnTick');},
onCapture: function() {
webcam.capture();
var x = webcam.save('/saveimage');
},
onDebug: function(type, string) {
alert('error');
alert(type + ": " + string);},
});
backend:
filecontent = self.request.body
f = open('static/studentphotos/'+ filename +'.jpg','w')
f.write(filecontent)
f.close()"
Using your data as x, notice the JFIF in the output from unhexlify:
In [88]: binascii.unhexlify(x[:-1])
Out[88]: '\xff\xd8\xff\xe0\x00\x10JFIF...'
So it appears the data is a JPEG that needs to be unhexlified. Therefore try:
import binascii
filecontent = self.request.body
with open('static/studentphotos/'+ filename +'.jpg','w') as f:
f.write(binascii.unhexlify(filecontent))