Download audio only youtube raises regexmatcherror - python

I am trying to execute the following code, that is supposed to download the audio file from youtube url
from pathlib import Path
from pytube import YouTube
def download_youtube_video(youtube_url, output_path):
audio_file = YouTube(youtube_url).streams.get_audio_only().download(output_path=output_path)
audio_file = Path(audio_file)
audio_file = audio_file.replace(audio_file.with_suffix(".mp3"))
return audio_file
youtube_url = 'https://youtu.be/_H5hsUwv8lE'
output_path = Path(__file__).parent
audio_file = download_youtube_video(youtube_url, output_path)
But I got the following traceback
Traceback (most recent call last):
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\__main__.py", line 181, in fmt_streams
extract.apply_signature(stream_manifest, self.vid_info, self.js)
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\extract.py", line 409, in apply_signature
cipher = Cipher(js=js)
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\cipher.py", line 43, in __init__
self.throttling_plan = get_throttling_plan(js)
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\cipher.py", line 405, in get_throttling_plan
raw_code = get_throttling_function_code(js)
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\cipher.py", line 311, in get_throttling_function_code
name = re.escape(get_throttling_function_name(js))
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\cipher.py", line 296, in get_throttling_function_name
raise RegexMatchError(
pytube.exceptions.RegexMatchError: get_throttling_function_name: could not find match for multiple
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "demo.py", line 18, in <module>
audio_file = download_youtube_video(youtube_url, output_path)
File "demo.py", line 6, in download_youtube_video
audio_file = YouTube(youtube_url).streams.get_audio_only().download(output_path=output_path)
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\__main__.py", line 296, in streams
return StreamQuery(self.fmt_streams)
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\__main__.py", line 188, in fmt_streams
extract.apply_signature(stream_manifest, self.vid_info, self.js)
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\extract.py", line 409, in apply_signature
cipher = Cipher(js=js)
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\cipher.py", line 43, in __init__
self.throttling_plan = get_throttling_plan(js)
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\cipher.py", line 405, in get_throttling_plan
raw_code = get_throttling_function_code(js)
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\cipher.py", line 311, in get_throttling_function_code
name = re.escape(get_throttling_function_name(js))
File "C:\Users\Future\AppData\Local\Programs\Python\Python38\lib\site-packages\pytube\cipher.py", line 296, in get_throttling_function_name
raise RegexMatchError(
pytube.exceptions.RegexMatchError: get_throttling_function_name: could not find match for multiple
Any idea how to fix such a problem?

My steps to solve the problem:
First, detect the package pytube path using the code
import pytube
import os
print(pytube.__file__)
print(os.path.dirname(pytube.__file__))
Navigate to the directory pytube and modify the cipher.py
The line 273 r'\([a-z]\s*=\s*([a-zA-Z0-9$]{3})(\[\d+\])?\([a-z]\)', changed to r'\([a-z]\s*=\s*([a-zA-Z0-9$]+)(\[\d+\])?\([a-z]\)',
The line 288 nfunc=function_match.group(1)), changed to nfunc=re.escape(function_match.group(1))),

Related

KeyError: 'SECRETS_PATH' Error: '' is not a valid port number

I am trying to run the image with permissions docker run --user 1000:1000 connectors but i got the error:
docker run --user 1000:1000 connectors
* Tip: There are .env or .flaskenv files present. Do "pip install python-dotenv" to use them.
Traceback (most recent call last):
File "/home/connectors/.local/bin/flask", line 8, in <module>
sys.exit(main())
File "/home/connectors/.local/lib/python3.10/site-packages/flask/cli.py", line 988, in main
cli.main()
File "/home/connectors/.local/lib/python3.10/site-packages/flask/cli.py", line 579, in main
return super().main(*args, **kwargs)
File "/home/connectors/.local/lib/python3.10/site-packages/click/core.py", line 1053, in main
rv = self.invoke(ctx)
File "/home/connectors/.local/lib/python3.10/site-packages/click/core.py", line 1653, in invoke
cmd_name, cmd, args = self.resolve_command(ctx, args)
File "/home/connectors/.local/lib/python3.10/site-packages/click/core.py", line 1700, in resolve_command
cmd = self.get_command(ctx, cmd_name)
File "/home/connectors/.local/lib/python3.10/site-packages/flask/cli.py", line 535, in get_command
return info.load_app().cli.get_command(ctx, name)
File "/home/connectors/.local/lib/python3.10/site-packages/flask/cli.py", line 389, in load_app
app = locate_app(import_name, name)
File "/home/connectors/.local/lib/python3.10/site-packages/flask/cli.py", line 234, in locate_app
__import__(module_name)
File "/app/api/app.py", line 15, in <module>
from api.fields import fields_blueprint
File "/app/api/fields.py", line 13, in <module>
from api.oauth import validate_auth_if_provided
File "/app/api/oauth.py", line 13, in <module>
from base import api_helpers, env
File "/app/base/api_helpers.py", line 11, in <module>
from base import env
File "/app/base/env.py", line 32, in <module>
SECRETS_PATH: str = os.environ["SECRETS_PATH"]
File "/usr/local/lib/python3.10/os.py", line 679, in __getitem__
raise KeyError(key) from None
KeyError: 'SECRETS_PATH'
Error: '' is not a valid port number.

google.auth.exceptions.DefaultCredentialsError

I am trying to connect to Google Speech-to-Text API and keep bumping into this error. I'm a noob python user, so thanks in advance for your help and patience!
PS C:\Users\LUVU\Desktop\TD\twistcube\td_speech_to_text> python 2_speech_fromaudio.py
Traceback (most recent call last):
File "C:\Users\LUVU\Desktop\TD\twistcube\td_speech_to_text\2_speech_fromaudio.py", line 11, in <module>
client = speech.SpeechClient()
File "C:\Users\LUVU\AppData\Local\Programs\Python\Python310\lib\site-packages\google\cloud\speech_v1\services\speech\client.py", line 408, in __init__
self._transport = Transport(
File "C:\Users\LUVU\AppData\Local\Programs\Python\Python310\lib\site-packages\google\cloud\speech_v1\services\speech\transports\grpc.py", line 151, in __init__
super().__init__(
File "C:\Users\LUVU\AppData\Local\Programs\Python\Python310\lib\site-packages\google\cloud\speech_v1\services\speech\transports\base.py", line 105, in __init__
credentials, _ = google.auth.default(
File "C:\Users\LUVU\AppData\Local\Programs\Python\Python310\lib\site-packages\google\auth\_default.py", line 544, in default
credentials, project_id = checker()
File "C:\Users\LUVU\AppData\Local\Programs\Python\Python310\lib\site-packages\google\auth\_default.py", line 537, in <lambda>
lambda: _get_explicit_environ_credentials(quota_project_id=quota_project_id),
File "C:\Users\LUVU\AppData\Local\Programs\Python\Python310\lib\site-packages\google\auth\_default.py", line 218, in _get_explicit_environ_credentials
credentials, project_id = load_credentials_from_file(
File "C:\Users\LUVU\AppData\Local\Programs\Python\Python310\lib\site-packages\google\auth\_default.py", line 117, in load_credentials_from_file
raise exceptions.DefaultCredentialsError(
google.auth.exceptions.DefaultCredentialsError: File [td-cc-342823-bda3eb899dc0] was not found.
PS C:\Users\LUVU\Desktop\TD\twistcube\td_speech_to_text>
Screenshot

Python-docx in Google cloud functions

Has anyone tried using python-docx on google cloud functions?
I am just getting started and can't get a simple code below work:
from docx import Document
document = Document('blank_doc.docx')
document.save('test.docx');
Edit 1:
Here's the log for above snippet:
2021-02-05T15:39:56.658Ztest1w0yivt7dw3gw Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",
line 402, in run_http_function result = _function_handler.invoke_user_function(flask.request)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",
line 268, in invoke_user_function return call_user_function(request_or_event)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",
line 261, in call_user_function return self._user_function(request_or_event)
File "/user_code/main.py",
line 6, in test1 document.save('document.docx')
File "/env/local/lib/python3.7/site-packages/docx/document.py",
line 135, in save self._part.save(path_or_stream)
File "/env/local/lib/python3.7/site-packages/docx/parts/document.py",
line 111, in save self.package.save(path_or_stream)
File "/env/local/lib/python3.7/site-packages/docx/opc/package.py",
line 172, in save PackageWriter.write(pkg_file, self.rels, self.parts)
File "/env/local/lib/python3.7/site-packages/docx/opc/pkgwriter.py",
line 32, in write phys_writer = PhysPkgWriter(pkg_file)
File "/env/local/lib/python3.7/site-packages/docx/opc/phys_pkg.py",
line 141, in __init__ self._zipf = ZipFile(pkg_file, 'w', compression=ZIP_DEFLATED)
File "/opt/python3.7/lib/python3.7/zipfile.py",
line 1240, in __init__ self.fp = io.open(file, filemode) OSError: [Errno 30] Read-only file system: 'document.docx'
Traceback (most recent call last): File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",
line 402, in run_http_function result = _function_handler.invoke_user_function(flask.request)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",
line 268, in invoke_user_function return call_user_function(request_or_event)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",
line 261, in call_user_function return self._user_function(request_or_event)
File "/user_code/main.py", line 6, in test1 document.save('document.docx')
File "/env/local/lib/python3.7/site-packages/docx/document.py",
line 135, in save self._part.save(path_or_stream)
File "/env/local/lib/python3.7/site-packages/docx/parts/document.py",
line 111, in save self.package.save(path_or_stream)
File "/env/local/lib/python3.7/site-packages/docx/opc/package.py",
line 172, in save PackageWriter.write(pkg_file, self.rels, self.parts)
File "/env/local/lib/python3.7/site-packages/docx/opc/pkgwriter.py",
line 32, in write phys_writer = PhysPkgWriter(pkg_file)
File "/env/local/lib/python3.7/site-packages/docx/opc/phys_pkg.py",
line 141, in __init__ self._zipf = ZipFile(pkg_file, 'w', compression=ZIP_DEFLATED)
File "/opt/python3.7/lib/python3.7/zipfile.py",
line 1240, in __init__ self.fp = io.open(file, filemode) OSError: [Errno 30] Read-only file system: 'document.docx'
I thought this has something to do with the bucket storage thing and tried below but can't seem to get it to work. Appreciate help
from google.cloud import storage
from docx import Document
client = storage.Client()
import io
def test(request):
file_stream = io.BytesIO()
BUCKET = 'out_bucket'
document = Document('blank_doc.docx')
bucket = client.get_bucket(BUCKET)
document.save(file_stream)
file_stream = file_stream.encode('utf-8')
newblob = bucket.blob(file_stream)
newblob.upload_from_file('document.docx')
Edit 2:
Sorry, here is the log for snippet above:
2021-02-05T15:30:53.665Ztest18s7xo9gksz2 Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",
line 402, in run_http_function result = _function_handler.invoke_user_function(flask.request)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",
line 268, in invoke_user_function return call_user_function(request_or_event)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",
line 261, in call_user_function return self._user_function(request_or_event)
File "/user_code/main.py",
line 13, in test file_stream = file_stream.encode('utf-8') AttributeError:
'_io.BytesIO' object has no attribute 'encode'
Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",
line 402, in run_http_function result = _function_handler.invoke_user_function(flask.request)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",
line 268, in invoke_user_function return call_user_function(request_or_event)
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker_v2.py",
line 261, in call_user_function return self._user_function(request_or_event)
File "/user_code/main.py",
line 13, in test file_stream = file_stream.encode('utf-8') AttributeError:
'_io.BytesIO' object has no attribute 'encode'
On Cloud Functions the only path where you can write documents is on /tmp as mentioned here, so I rewrote the Cloud Function hello_world as:
from docx import Document
def hello_world(request):
document = Document()
document.save('/tmp/test.docx');
request_json = request.get_json()
if request.args and 'message' in request.args:
return request.args.get('message')
elif request_json and 'message' in request_json:
return request_json['message']
else:
return f'Hello World!'
And in the requirements.txt file added the next line:
python-docx

PyCharm's code coverage not working due to Windows drive letters

I have a computer that has a C and D drive, where PyCharm, Python and the source code are installed on the D drive. I'm using Pipenv with PIPENV_VENV_IN_PROJECT set to enabled so that also ends up in the D drive. Despite all this, when running with code coverage enabled, I get this error:
Destroying test database for alias 'default'...
Traceback (most recent call last):
File "D:\Development\PyCharm\PyCharm 2019.1.1\helpers\coverage_runner\run_coverage.py", line 54, in <module>
main()
File "D:\Business\projectx\.venv\lib\site-packages\coverage\cmdline.py", line 770, in main
status = CoverageScript().command_line(argv)
File "D:\Business\projectx\.venv\lib\site-packages\coverage\cmdline.py", line 489, in command_line
return self.do_run(options, args)
File "D:\Business\projectx\.venv\lib\site-packages\coverage\cmdline.py", line 657, in do_run
self.coverage.save()
File "D:\Business\projectx\.venv\lib\site-packages\coverage\control.py", line 529, in save
data = self.get_data()
File "D:\Business\projectx\.venv\lib\site-packages\coverage\control.py", line 583, in get_data
if self._collector and self._collector.flush_data():
File "D:\Business\projectx\.venv\lib\site-packages\coverage\collector.py", line 425, in flush_data
self.covdata.add_lines(abs_file_dict(self.data))
File "D:\Business\projectx\.venv\lib\site-packages\coverage\sqldata.py", line 236, in add_lines
self._choose_lines_or_arcs(lines=True)
File "D:\Business\projectx\.venv\lib\site-packages\coverage\sqldata.py", line 279, in _choose_lines_or_arcs
with self._connect() as con:
File "D:\Business\projectx\.venv\lib\site-packages\coverage\sqldata.py", line 160, in _connect
self._create_db()
File "D:\Business\projectx\.venv\lib\site-packages\coverage\sqldata.py", line 117, in _create_db
with self._db:
File "D:\Business\projectx\.venv\lib\site-packages\coverage\sqldata.py", line 525, in __enter__
self.connect()
File "D:\Business\projectx\.venv\lib\site-packages\coverage\sqldata.py", line 510, in connect
filename = os.path.relpath(self.filename)
File "D:\Business\projectx\.venv\lib\ntpath.py", line 562, in relpath
path_drive, start_drive))
ValueError: path is on mount 'C:', start on mount 'D:'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\Development\PyCharm\PyCharm 2019.1.1\helpers\coverage_runner\run_coverage.py", line 58, in <module>
main(["xml", "-o", coverage_file + ".xml", "--ignore-errors"])
File "D:\Business\projectx\.venv\lib\site-packages\coverage\cmdline.py", line 770, in main
status = CoverageScript().command_line(argv)
File "D:\Business\projectx\.venv\lib\site-packages\coverage\cmdline.py", line 511, in command_line
self.coverage.load()
File "D:\Business\projectx\.venv\lib\site-packages\coverage\control.py", line 336, in load
self._data.read()
File "D:\Business\projectx\.venv\lib\site-packages\coverage\sqldata.py", line 409, in read
with self._connect(): # TODO: doesn't look right
File "D:\Business\projectx\.venv\lib\site-packages\coverage\sqldata.py", line 160, in _connect
self._create_db()
File "D:\Business\projectx\.venv\lib\site-packages\coverage\sqldata.py", line 117, in _create_db
with self._db:
File "D:\Business\projectx\.venv\lib\site-packages\coverage\sqldata.py", line 525, in __enter__
self.connect()
File "D:\Business\projectx\.venv\lib\site-packages\coverage\sqldata.py", line 510, in connect
filename = os.path.relpath(self.filename)
File "D:\Business\projectx\.venv\lib\ntpath.py", line 562, in relpath
path_drive, start_drive))
ValueError: path is on mount 'C:', start on mount 'D:'
Any ideas why?

Unable to download file with pysftp get

I am able to connect the sftp with pysftp successfully, but getting error while downloading file as FileNotFoundError: [Errno 2] No such file. I also observed that the file is just creating at local path adding '?' along with extension. Below are more details.
File at ftp as Test_03132018080105.csv.
File creating at Local path as Test_03132018080105.csv? with zero bytes
Code:
def get_move_on_ftp(ftpsource,localsource):
if (os.stat(envvar.validfiles).st_size == 0) and (os.stat(envvar.invalidfiles).st_size == 0):
print("There are no Source files on FTP.")
else:
srcinfo={'host':envvar.src_ftphost,'port':envvar.src_ftpport,'username':envvar.src_uname,'password':envvar.src_passwd}
sftp = pysftp.Connection(**srcinfo)
sftp.cwd(ftpsource)
''' Downloading Files '''
avail_files=open(envvar.validfiles,'r')
for filename in avail_files:
print(sftp.getcwd())
#sftp.get(filename, preserve_mtime=True)
print(filename) # for debug
sftp.get(filename)
sftp.close()
Error:
Traceback (most recent call last):
File "my.py", line 96, in <module>
main()
File "my.py", line 92, in main
config_file_read(config_file)
File "my.py", line 85, in config_file_read
get_move_on_ftp(ftpsource,localsource)
File "my.py", line 61, in get_move_on_ftp
sftp.get(filename)
File "/home/username/miniconda3/lib/python3.6/site-packages/pysftp/__init__.py", line 249, in get
self._sftp.get(remotepath, localpath, callback=callback)
File "/home/username/miniconda3/lib/python3.6/site-packages/paramiko/sftp_client.py", line 770, in get
size = self.getfo(remotepath, fl, callback)
File "/home/username/miniconda3/lib/python3.6/site-packages/paramiko/sftp_client.py", line 746, in getfo
file_size = self.stat(remotepath).st_size
File "/home/username/miniconda3/lib/python3.6/site-packages/paramiko/sftp_client.py", line 460, in stat
t, msg = self._request(CMD_STAT, path)
File "/home/username/miniconda3/lib/python3.6/site-packages/paramiko/sftp_client.py", line 780, in _request
return self._read_response(num)
File "/home/username/miniconda3/lib/python3.6/site-packages/paramiko/sftp_client.py", line 832, in _read_response
self._convert_status(msg)
File "/home/username/miniconda3/lib/python3.6/site-packages/paramiko/sftp_client.py", line 861, in _convert_status
raise IOError(errno.ENOENT, text)
FileNotFoundError: [Errno 2] No such file
Just add "/" in front of ftpsource. i.e. "/2020/Jan/10/". It should work

Categories

Resources