I'm trying to use the Vosk library in Python to extract text from audio files. To do this, I'm using the wave module to read a temporary WAV file created from an MP3 or video file using the pydub and ffmpeg libraries.
But it keeps giving me the error permmision denied even after giving the privileges
Here is my code:
from flask import Flask, render_template, request, send_file
from werkzeug.utils import secure_filename
import os
import wave
import json
import tempfile
import subprocess
from pydub import AudioSegment
import stat
import vosk
app = Flask(__name__,template_folder='template')
# Set up Vosk model and recognizer
model = vosk.Model("\SpeechRecongnition\model\EnglishIN")
rec = vosk.KaldiRecognizer(model, 16000)
UPLOAD_FOLDER = 'uploads'
# Set up app config
app.config['MAX_CONTENT_LENGTH'] = 70 * 1024 * 1024 # 70 MB max for audio files
app.config['UPLOAD_EXTENSIONS'] = ['.wav', '.mp3', '.mp4', '.avi', '.mkv']
app.config['UPLOAD_PATH'] = UPLOAD_FOLDER
#app.route('/')
def index():
return render_template('index.html')
#app.route('/transcribe', methods=['POST'])
def upload_file():
# Check if file was submitted
text=""
if 'file' not in request.files:
return 'No file uploaded'
# Get file from request and check if it has a valid extension
file = request.files['file']
if not file.filename.lower().endswith(tuple(app.config['UPLOAD_EXTENSIONS'])):
return 'Unsupported file format'
# Save file to disk
filename = secure_filename(file.filename)
file.save(os.path.join(app.config['UPLOAD_PATH'], filename))
text = transcribe_audio(os.path.join(app.config['UPLOAD_PATH'], filename))
# Render download page
return render_template('download.html', filename=filename, text=text)
#app.route('/download/<format>/<filename>/<text>')
def download(format, filename, text):
# Create file object with transcription text
if format == 'txt':
file = open(f'{filename}.txt', 'w')
file.write(text)
file.close()
elif format == 'srt':
file = open(f'{filename}.srt', 'w')
file.write('1\n00:00:00,000 --> 00:00:10,000\n')
file.write(text.replace('\n', '\n\n'))
file.close()
else:
return 'Unsupported file format'
# Return file for download
return send_file(file.name, as_attachment=True)
def transcribe_audio(file):
# Create temporary file for processing
with tempfile.NamedTemporaryFile(suffix='.wav' ,dir=app.config['UPLOAD_PATH']) as tf:
# Convert MP3 and video files to WAV using pydub and ffmpeg
if file.endswith('.mp3'):
audio = AudioSegment.from_file(file, format='mp3')
audio.export(tf.name, format='wav')
elif file.endswith('.mp4') or file.endswith('.avi') or file.endswith('.mkv'):
subprocess.call(['ffmpeg', '-i', file, tf.name])
# Read WAV file directly
elif file.endswith('.wav'):
tf.write(open(file,'rb').read())
tf.flush()
else:
raise ValueError('Unsupported file format')
# Open WAV file and transcribe with Vosk
with open(tf.name, 'rb') as f:
with wave.open(tf.name, 'rb') as wf:
# Set parameters for recognizer
rec.SetChannel(wf.getnchannels())
rec.SetSampleRate(wf.getframerate())
# Process audio in chunks
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
# Get intermediate transcription and clear the recognizer
result = json.loads(rec.Result())
text = result['text']
rec.ResetFinal()
# Get final transcription
result = json.loads(rec.FinalResult())
text += result['text']
return text
if __name__ == '__main__':
app.run(debug=True)
Related
Im writting a simple python application where the user selects a file from their local file manager and tries to upload using strealit
Im able to succesfully take the file the user had given using streamlit.uploader and stored the file in a temp directory from the stramlit app folder but the issue is i cant give the path of the file of the file stored in the newly created directory in order to send the application into my gcp clouds bucket
Adding my snippet below any help is appreciated :)
import streamlit as st
from google.oauth2 import service_account
from google.cloud import storage
import os
from os import listdir
from os.path import isfile, join
from pathlib import Path
from PIL import Image, ImageOps
bucketName=('survey-appl-dev-public')
# Create API client.
credentials = service_account.Credentials.from_service_account_info(
st.secrets["gcp_service_account"]
)
client = storage.Client(credentials=credentials)
#create a bucket object to get bucket details
bucket = client.get_bucket(bucketName)
file = st.file_uploader("Upload An file")
def main():
if file is not None:
file_details = {"FileName":file.name,"FileType":file.type}
st.write(file_details)
#img = load_image(image_file)
#st.image(img, caption='Sunrise by the mountains')
with open(os.path.join("tempDir",file.name),"wb") as f:
f.write(file.getbuffer())
st.success("Saved File")
object_name_in_gcs_bucket = bucket.blob(".",file.name)
object_name_in_gcs_bucket.upload_from_filename("tempDir",file.name)
if __name__ == "__main__":
main()
ive tried importing the path of the file using cwd command and also tried os library for file path but nothing worked
edited:
All i wanted to implement is make a file upload that is selected by customer using the dropbox of file_uploader option im able to save the file into a temporary directory after the file is selected using the file.getbuffer as shown in the code but i couldnt amke the code uploaded into the gcs bucket since its refering as str cannnot be converted into int while i press the upload button
may be its the path issue "the code is unable to find the path of the file stored in the temp directory " but im unable to figure iut how to give the path to the upload function
error coding im facing
TypeError: '>' not supported between instances of 'str' and 'int'
Traceback:
File "/home/raviteja/.local/lib/python3.10/site-packages/streamlit/runtime/scriptrunner/script_runner.py", line 564, in _run_script
exec(code, module.__dict__)
File "/home/raviteja/test/streamlit/test.py", line 43, in <module>
main()
File "/home/raviteja/test/streamlit/test.py", line 29, in main
object_name_in_gcs_bucket = bucket.blob(".",file.name)
File "/home/raviteja/.local/lib/python3.10/site-packages/google/cloud/storage/bucket.py", line 795, in blob
return Blob(
File "/home/raviteja/.local/lib/python3.10/site-packages/google/cloud/storage/blob.py", line 219, in __init__
self.chunk_size = chunk_size # Check that setter accepts value.
File "/home/raviteja/.local/lib/python3.10/site-packages/google/cloud/storage/blob.py", line 262, in chunk_size
if value is not None and value > 0 and value % self._CHUNK_SIZE_MULTIPLE != 0:
Thanks all for response after days of struggle at last I've figured out the mistake im making.
I dont know if I'm right or wrong correct me if I'm wrong but this worked for me:
object_name_in_gcs_bucket = bucket.blob("path-to-upload"+file.name)
Changing the , to + between the filepath and filename made my issue solve.
Sorry for the small issue.
Happy that I could solve it.
You have some variables in your code and I guess you know what they represent. Try this out else make sure you add every relevant information to the question and the code snippet.
def main():
file = st.file_uploader("Upload file")
if file is not None:
file_details = {"FileName":file.name,"FileType":file.type}
st.write(file_details)
file_path = os.path.join("tempDir/", file.name)
with open(file_path,"wb") as f:
f.write(file.getbuffer())
st.success("Saved File")
print(file_path)
def upload():
file_name = file_path
read_file(file_name)
st.write(file_name)
st.session_state["upload_state"] = "Saved successfully!"
object_name_in_gcs_bucket = bucket.blob("gcp-bucket-destination-path"+ file.name)
object_name_in_gcs_bucket.upload_from_filename(file_path)
st.write("Youre uploading to bucket", bucketName)
st.button("Upload file to GoogleCloud", on_click=upload)
if __name__ == "__main__":
main()
This one works for me.
Solution 1
import streamlit as st
from google.oauth2 import service_account
from google.cloud import storage
import os
STREAMLIT_SCRIPT_FILE_PATH = os.path.dirname(os.path.abspath(__file__))
credentials = service_account.Credentials.from_service_account_info(
st.secrets["gcp_service_account"]
)
client = storage.Client(credentials=credentials)
def main():
bucketName = 'survey-appl-dev-public'
file = st.file_uploader("Upload file")
if file is not None:
file_details = {"FileName":file.name,"FileType":file.type}
st.write(file_details)
with open(os.path.join("tempDir", file.name), "wb") as f:
f.write(file.getbuffer())
st.success("Saved File")
bucket = client.bucket(bucketName)
object_name_in_gcs_bucket = bucket.blob(file.name)
# src_relative = f'./tempDir/{file.name}' # also works
src_absolute = f'{STREAMLIT_SCRIPT_FILE_PATH}/tempDir/{file.name}'
object_name_in_gcs_bucket.upload_from_filename(src_absolute)
if __name__ == '__main__':
main()
Solution 2
Instead of saving the file to disk, use the file bytes directly using upload_from_string().
References:
Google Cloud upload_from_string
Streamlit file uploader
credentials = service_account.Credentials.from_service_account_info(
st.secrets["gcp_service_account"]
)
client = storage.Client(credentials=credentials)
def gcs_upload_data():
bucket_name = 'your_gcs_bucket_name'
file = st.file_uploader("Upload file")
if file is not None:
fname = file.name
ftype = file.type
file_details = {"FileName":fname,"FileType":ftype}
st.write(file_details)
# Define gcs bucket.
bucket = client.bucket(bucket_name)
bblob = bucket.blob(fname)
# Upload the bytes directly instead of a disk file.
bblob.upload_from_string(file.getvalue(), ftype)
if __name__ == '__main__':
gcs_upload_data()
While uploading files to box it gives an error says File size is less than required. Here is my code:
import os
from boxsdk import JWTAuth, Client
import schedule
import time
directory = '/videos'
def save_video():
retval = os.getcwd()
help = (retval)
os.chdir(help + directory)
retval = os.getcwd()
config = JWTAuth.from_settings_file('box_config.json')
client = Client(config)
for file_name in os.listdir(retval):
print(file_name)
if file_name == 'box_config.json':
continue
file_size = os.path.getsize(file_name)
print(file_name)
folder_id = '144613233618'
upload_session = client.folder(folder_id=folder_id).create_upload_session(file_size, file_name)
print('Created upload session {0} with chunk size of {1} bytes'.format(upload_session.id, upload_session.part_size))
chunked_upload = upload_session.get_chunked_uploader(file_name)
uploaded_file = chunked_upload.start()
print('File "{0}" uploaded to Box with file ID {1}'.format(uploaded_file.name, uploaded_file.id))
os.remove(file_name)
schedule.every().day.at("11:34").do(save_video)
while True:
schedule.run_pending()
time.sleep(1)
I will upload more than one file as sequence and some file's size could be less than 20000000.
Chunked uploads have more overhead, so they only allow it for files larger than 20MB. For smaller files, use the normal upload API. In fact, they recommend the normal upload up to 50MB.
I am having an issue trying to download download in-memory ZIP-FILE object using Flask send_file. my zip exists in memory and is full of text documents but when I try with this code
the result I get is: it downloads like it is supposed to but it downloads an empty zip file! it's like it is copying nothing ... I have no idea how to solve this problem.
#app.route('/downloads/', methods=['GET'])
def download():
from flask import send_file
import io
import zipfile
import time
FILEPATH = r"C:\Users\JD\Downloads\trydownload.zip"
fileobj = io.BytesIO()
with zipfile.ZipFile(fileobj, 'w') as zip_file:
zip_info = zipfile.ZipInfo(FILEPATH)
zip_info.date_time = time.localtime(time.time())[:6]
zip_info.compress_type = zipfile.ZIP_DEFLATED
with open(FILEPATH, 'rb') as fd:
zip_file.writestr(zip_info, fd.read())
fileobj.seek(0)
return send_file(fileobj, mimetype='zip', as_attachment=True,
attachment_filename='%s.zip' % os.path.basename(FILEPATH))
I had the exact same issue with the Flask send_file method.
Details:
Flask version 2.0.1
OS: Windows 10
Solution
I figured out a workaround to this i.e. instead of the send_file method, this can be done by returning a Response object with the data. Replace the return statement in your code with the following and this should work.
#app.route('/downloads/', methods=['GET'])
def download():
from flask import Response # Changed line
import io
import zipfile
import time
FILEPATH = r"C:\Users\JD\Downloads\trydownload.zip"
fileobj = io.BytesIO()
with zipfile.ZipFile(fileobj, 'w') as zip_file:
zip_info = zipfile.ZipInfo(FILEPATH)
zip_info.date_time = time.localtime(time.time())[:6]
zip_info.compress_type = zipfile.ZIP_DEFLATED
with open(FILEPATH, 'rb') as fd:
zip_file.writestr(zip_info, fd.read())
fileobj.seek(0)
# Changed line below
return Response(fileobj.getvalue(),
mimetype='application/zip',
headers={'Content-Disposition': 'attachment;filename=your_filename.zip'})
I am developing API using Flask-restplus. One of the endpoints handles audio file uploads which can be either mp3 or wav format. According to PUT request to upload a file not working in Flask, file uploaded by put is in either request.data or request.stream. So this is what I did:
#ns.route('/upload')
class AudioUpload(Resource):
def put(self):
now = datetime.now()
filename = now.strftime("%Y%m%d_%H%M%S") + ".mp3"
cwd = os.getcwd()
filepath = os.path.join(cwd, filename)
with open(filepath, 'wb') as f:
f.write(request.stream.read())
return filepath
I am saving the file as mp3. However sometime the file comes in as wav. Is there a way to get the original file name from put request in the similar way as post request:
file = request.files['file']
filename = file.filename
i'm trying to upload an image to a server (pythonanywhere.com) with a python script using web2py, so i can make some changes to the image and save it...i will use the script in a terminal and upload the image via curl like that:
curl -i -F filedata=#image.jpg http://my_username.pythonanywhere.com/DocScanner/default/upload
That's build in to the web2py SQLFORM. Add an upload field and web2py will stream the file to disk with a safe name and it will return the name to your code. Have a look at the web2py book which documents SQLFORM and upload fields.
import os
def decode_image3(src):
import base64
import re
import uuid
result = re.search("data:image/(?P<ext>.*?);base64,(?P<data>.*)", src, re.DOTALL)
if result:
ext = result.groupdict().get("ext")
data = result.groupdict().get("data")
else:
raise Exception("Do not parse!")
# 2, base64 decoding
img = base64.urlsafe_b64decode(data)
# 3, the binary file is saved
filename = "{}.{}".format(uuid.uuid4(), ext)
completeName = os.path.join(request.folder,'uploads', filename)
with open(completeName, "wb") as f:
f.write(img)
return filename
#request.restful()
def Image2():
response.view = 'generic.json'
def POST(**vars):
image = decode_image3(request.vars.image)
completeName = os.path.join(request.folder,'uploads', image)
stream = open(completeName, 'rb')
if os.path.exists(completeName):
rv = os.remove(completeName)
save = db.test.insert(image=stream, age=34)
return dict(msg=save)
return locals()