python-telegram-bot: The last sent message is not a document - python

I have problem to download files from my private chat.
I've added a bot as administrator to my private chat. To upload on telegram with a python script I have no problems, for example I use successfully this code
import os
import telegram
# Enter your bot's token here
TOKEN = "58819**********************....."
# Enter your private chat ID here
CHAT_ID = -100****...
# Enter the path to the document here
DOCUMENT_PATH = "C:\\temp\\telegram\\snow.epub"
# Create a Bot object using the token
bot = telegram.Bot(token=TOKEN)
# Open the document in binary mode
with open(DOCUMENT_PATH, "rb") as f:
# Send the document to the specified chat
bot.send_document(chat_id=CHAT_ID, document=f)
print("Document sent successfully!")
Problem is download this file (an epub file) from my private chat
i try with this code but nothing happens, powershell returns me this message
PS C:\temp\telegram> python scaricare2.py
The last sent message is not a document.
I test with this code
import os
import os.path
import telegram
# Enter your bot's token here
TOKEN = "588199*****************..."
# Enter your private chat ID here
CHAT_ID = -1001******...
# Create a Bot object using the token
bot = telegram.Bot(token=TOKEN)
# Download the latest file sent in the chat
updates = bot.get_updates(offset=-1, limit=1)
if updates:
# Checks if the Message object contains a document attribute
if hasattr(updates[0].message, "document"):
file_id = updates[0].message.document.file_id
else:
print("The last sent message is not a document.")
exit()
file_info = bot.get_file(file_id)
file = bot.download_file(file_info.file_path)
# Create the path to the directory where to save the file
download_path = "C:/temp/telegram/filexx/"
# Create the full path to the file using the file name
file_path = os.path.join(download_path, file_info.file_name)
# Open the file in binary write mode
with open(file_path, "wb") as f:
# Write the contents of the file to the specified path
f.write(file)
print("File downloaded successfully!")
else:
print("The last sent message is not a document.")
Any idea to solve ?
Download from bot returns this error
PS C:\temp\telegram> python scaricare2.py
Traceback (most recent call last):
File "C:\Program Files\Python310\lib\site-packages\telegram\vendor\ptb_urllib3\urllib3\connectionpool.py", line 402, in _make_request
six.raise_from(e, None)
File "<string>", line 2, in raise_from
File "C:\Program Files\Python310\lib\site-packages\telegram\vendor\ptb_urllib3\urllib3\connectionpool.py", line 398, in _make_request
httplib_response = conn.getresponse()
File "C:\Program Files\Python310\lib\http\client.py", line 1374, in getresponse
response.begin()
File "C:\Program Files\Python310\lib\http\client.py", line 318, in begin
version, status, reason = self._read_status()
File "C:\Program Files\Python310\lib\http\client.py", line 279, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "C:\Program Files\Python310\lib\socket.py", line 705, in readinto
return self._sock.recv_into(b)
File "C:\Program Files\Python310\lib\ssl.py", line 1273, in recv_into
return self.read(nbytes, buffer)
File "C:\Program Files\Python310\lib\ssl.py", line 1129, in read
return self._sslobj.read(len, buffer)
TimeoutError: The read operation timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Program Files\Python310\lib\site-packages\telegram\utils\request.py", line 259, in _request_wrapper
resp = self._con_pool.request(*args, **kwargs)
File "C:\Program Files\Python310\lib\site-packages\telegram\vendor\ptb_urllib3\urllib3\request.py", line 68, in request
return self.request_encode_body(method, url, fields=fields,
File "C:\Program Files\Python310\lib\site-packages\telegram\vendor\ptb_urllib3\urllib3\request.py", line 148, in request_encode_body
return self.urlopen(method, url, **extra_kw)
File "C:\Program Files\Python310\lib\site-packages\telegram\vendor\ptb_urllib3\urllib3\poolmanager.py", line 244, in urlopen
response = conn.urlopen(method, u.request_uri, **kw)
File "C:\Program Files\Python310\lib\site-packages\telegram\vendor\ptb_urllib3\urllib3\connectionpool.py", line 665, in urlopen
retries = retries.increment(method, url, error=e, _pool=self,
File "C:\Program Files\Python310\lib\site-packages\telegram\vendor\ptb_urllib3\urllib3\util\retry.py", line 347, in increment
raise six.reraise(type(error), error, _stacktrace)
File "C:\Program Files\Python310\lib\site-packages\telegram\vendor\ptb_urllib3\urllib3\packages\six.py", line 686, in reraise
raise value
File "C:\Program Files\Python310\lib\site-packages\telegram\vendor\ptb_urllib3\urllib3\connectionpool.py", line 614, in urlopen
httplib_response = self._make_request(conn, method, url,
File "C:\Program Files\Python310\lib\site-packages\telegram\vendor\ptb_urllib3\urllib3\connectionpool.py", line 404, in _make_request
self._raise_timeout(err=e, url=url, timeout_value=read_timeout,
File "C:\Program Files\Python310\lib\site-packages\telegram\vendor\ptb_urllib3\urllib3\connectionpool.py", line 321, in _raise_timeout
raise exc_cls(*args)
telegram.vendor.ptb_urllib3.urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='api.telegram.org', port=443): Read timed out. (read timeout=5.0)
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\temp\telegram\scaricare2.py", line 24, in <module>
file_info = bot.get_file(file_id)
File "C:\Program Files\Python310\lib\site-packages\telegram\bot.py", line 134, in decorator
result = func(*args, **kwargs)
File "C:\Program Files\Python310\lib\site-packages\telegram\bot.py", line 2510, in get_file
result = self._post('getFile', data, timeout=timeout, api_kwargs=api_kwargs)
File "C:\Program Files\Python310\lib\site-packages\telegram\bot.py", line 299, in _post
return self.request.post(
File "C:\Program Files\Python310\lib\site-packages\telegram\utils\request.py", line 361, in post
result = self._request_wrapper(
File "C:\Program Files\Python310\lib\site-packages\telegram\utils\request.py", line 261, in _request_wrapper
raise TimedOut() from error
telegram.error.TimedOut: Timed out

Related

Getting error: An established connection was aborted by the software in your host machine when running a python script to extract news published dates

I wrote a script to extract published dates from news articles. I have all the urls to these articles in a text file (one url per line). The goal is to group the articles by date (one file for each day and it has all news stories published in that day). The script runs fine but it takes a very long time (sometimes weeks). There are about 300k news articles' urls in each text file. the script eventually stops and gives this error
Traceback (most recent call last):
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\connectionpool.py", line 703, in urlopen
httplib_response = self._make_request(
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\connectionpool.py", line 386, in _make_request
self._validate_conn(conn)
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\connectionpool.py", line 1042, in _validate_conn
conn.connect()
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\connection.py", line 414, in connect
self.sock = ssl_wrap_socket(
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\util\ssl_.py", line 449, in ssl_wrap_socket
ssl_sock = _ssl_wrap_socket_impl(
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\util\ssl_.py", line 493, in _ssl_wrap_socket_impl
return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2288.0_x64__qbz5n2kfra8p0\lib\ssl.py", line 513, in wrap_socket
return self.sslsocket_class._create(
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2288.0_x64__qbz5n2kfra8p0\lib\ssl.py", line 1071, in _create
self.do_handshake()
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2288.0_x64__qbz5n2kfra8p0\lib\ssl.py", line 1342, in do_handshake
self._sslobj.do_handshake()
ConnectionAbortedError: [WinError 10053] An established connection was aborted by the software in your host machine
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\requests\adapters.py", line 489, in send
resp = conn.urlopen(
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\connectionpool.py", line 787, in urlopen
retries = retries.increment(
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\util\retry.py", line 550, in increment
raise six.reraise(type(error), error, _stacktrace)
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\packages\six.py", line 769, in reraise
raise value.with_traceback(tb)
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\connectionpool.py", line 703, in urlopen
httplib_response = self._make_request(
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\connectionpool.py", line 386, in _make_request
self._validate_conn(conn)
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\connectionpool.py", line 1042, in _validate_conn
conn.connect()
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\connection.py", line 414, in connect
self.sock = ssl_wrap_socket(
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\util\ssl_.py", line 449, in ssl_wrap_socket
ssl_sock = _ssl_wrap_socket_impl(
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\urllib3\util\ssl_.py", line 493, in _ssl_wrap_socket_impl
return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2288.0_x64__qbz5n2kfra8p0\lib\ssl.py", line 513, in wrap_socket
return self.sslsocket_class._create(
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2288.0_x64__qbz5n2kfra8p0\lib\ssl.py", line 1071, in _create
self.do_handshake()
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.2288.0_x64__qbz5n2kfra8p0\lib\ssl.py", line 1342, in do_handshake
self._sslobj.do_handshake()
urllib3.exceptions.ProtocolError: ('Connection aborted.', ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\hhallak\Desktop\split_by_date.py", line 65, in <module>
split(links)
File "C:\Users\hhallak\Desktop\split_by_date.py", line 25, in split
with requests.get(link, stream=True) as response:
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\requests\api.py", line 73, in get
return request("get", url, params=params, **kwargs)
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\requests\api.py", line 59, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\requests\sessions.py", line 587, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\requests\sessions.py", line 701, in send
r = adapter.send(request, **kwargs)
File "C:\Users\hhallak\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\requests\adapters.py", line 547, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', ConnectionAbortedError(10053, 'An established connection was aborted by the software in your host machine', None, 10053, None))
This is the code I am running:
import os
import sys
from newspaper import Article
import requests
import json
def split(links):
exists = os.path.exists("output")
if not exists:
# Create a new directory because it does not exist
os.makedirs("output")
exists = os.path.exists("output_redo")
if not exists:
# Create a new directory because it does not exist
os.makedirs("output_redo")
for link in links:
with requests.get(link, stream=True) as response:
response = requests.get(link)
if response.status_code == 200:
final_link = link
else:
final_link = response.url
try:
story = Article(final_link)
story.download()
story.parse()
date_time = str(story.publish_date)
split_date = date_time.split()
date = split_date[0]
with open("output/" + date + ".txt", "a", encoding = 'utf-8') as output_file:
output_file.write(link + "\n")
except:
print("The script was not able to extract published date. Moving the url to be crawled later.")
print("link: ", link)
print("status code: ", response.status_code)
print("final link: ", final_link)
with open("output_redo/" + "links_to_redo" + ".txt", "a", encoding = 'utf-8') as output_redo:
output_redo.write(link + "\n")
continue
if __name__ == "__main__":
if len(sys.argv) != 2:
print ("Usage: Python split_by_date.py <file_name>")
print ("e.g: python split_by_date.py input_file.txt")
sys.exit()
else:
file_name = sys.argv[1]
with open(file_name, "r", encoding = 'utf-8') as input_file:
input_data = input_file.read()
links = input_data.split("\n")
del links[-1]
split(links)

Telegram bot parser (TimeoutError: The read operation timed out; ReadTimeoutError)

I'm trying to write a Telegram Messenger bot using telebot that parses the ad page every five seconds and checks if a new ad has appeared.
I change IP address every 10 seconds using Tor. The bot works for a while, but then it crashes. Without a bot, the program works fine. In the same program, but without the bot, exceptions work, but it crashes with the bot.
last_check = ''
have_url = False
#bot.message_handler(content_types=['text'])
def send_text(message):
global have_url
if message.text.lower() == 'create request':
bot.send_message(message.chat.id, 'insert URL')
elif domain in message.text: # Define URL
bot.send_message(message.chat.id, 'Search...')
have_url = True
start_parsing(message)
#bot.message_handler()
def parse(message):
global last_check
olx_url = message.text # Get URL from user
r = get(olx_url, timeout=10, headers=headers, stream=False)
if r.status_code == 200:
'''Parse page and send link to user'''
def start_parsing(url):
while have_url:
try:
parse(url)
except Exception as e:
print(e)
time.sleep(5)
bot.polling(none_stop=True)
After about 20 minutes I get this:
Traceback (most recent call last):
Fileenter code here "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 449, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 444, in _make_request
httplib_response = conn.getresponse()
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\http\client.py", line 1374, in getresponse
response.begin()
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\http\client.py", line 318, in begin
version, status, reason = self._read_status()
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\http\client.py", line 279, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\socket.py", line 705, in readinto
return self._sock.recv_into(b)
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\ssl.py", line 1273, in recv_into
return self.read(nbytes, buffer)
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\ssl.py", line 1129, in read
return self._sslobj.read(len, buffer)
TimeoutError: The read operation timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\adapters.py", line 440, in send
resp = conn.urlopen(
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 785, in urlopen
retries = retries.increment(
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\util\retry.py", line 550, in increment
raise six.reraise(type(error), error, _stacktrace)
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\packages\six.py", line 770, in reraise
raise value
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 703, in urlopen
httplib_response = self._make_request(
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 451, in _make_request
self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 340, in _raise_timeout
raise ReadTimeoutError(
urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='api.telegram.org', port=443): Read timed out. (read timeout=25)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\PycharmProjects\notifier\notifier\olx_bot.py", line 95, in <module>
bot.polling(none_stop=True)
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\telebot\__init__.py", line 946, in polling
self.__threaded_polling(non_stop=non_stop, interval=interval, timeout=timeout, long_polling_timeout=long_polling_timeout,
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\telebot\__init__.py", line 1021, in __threaded_polling
raise e
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\telebot\__init__.py", line 976, in __threaded_polling
polling_thread.raise_exceptions()
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\telebot\util.py", line 116, in raise_exceptions
raise self.exception_info
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\telebot\util.py", line 98, in run
task(*args, **kwargs)
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\telebot\__init__.py", line 601, in __retrieve_updates
updates = self.get_updates(offset=(self.last_update_id + 1),
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\telebot\__init__.py", line 575, in get_updates
json_updates = apihelper.get_updates(self.token, offset, limit, timeout, allowed_updates, long_polling_timeout)
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\telebot\apihelper.py", line 324, in get_updates
return _make_request(token, method_url, params=payload)
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\telebot\apihelper.py", line 146, in _make_request
result = _get_req_session().request(
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\sessions.py", line 529, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\sessions.py", line 645, in send
r = adapter.send(request, **kwargs)
File "C:\Users\evgen\AppData\Local\Programs\Python\Python310\lib\site-packages\requests\adapters.py", line 532, in send
raise ReadTimeout(e, request=request)
requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='api.telegram.org', port=443): Read timed out. (read timeout=25)
Process finished with exit code 1
**

How can I extract file from the public AWS S3 bucket with Python

I have bucket URL, name and files list object key, and need to download the file.
What have I tried:
import boto3
import botocore
BUCKET_NAME = 'my-bucket'
KEY = 'my_file'
s3 = boto3.resource('s3')
try:
s3.Bucket(BUCKET_NAME).download_file(KEY, 'my_local_file')
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == "404":
print("The object does not exist.")
else:
raise
And caught errors:
Traceback (most recent call last):
File "D:\me\work\Mackpaw Data Engeneering test tsk\main.py", line 10, in <module>
s3.Bucket(BUCKET_NAME).download_file(KEY, 'data.json')
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\boto3\s3\inject.py", line 244, in bucket_download_file
return self.meta.client.download_file(
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\boto3\s3\inject.py", line 170, in download_file
return transfer.download_file(
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\boto3\s3\transfer.py", line 307, in download_file
future.result()
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\s3transfer\futures.py", line 106, in result
return self._coordinator.result()
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\s3transfer\futures.py", line 265, in result
raise self._exception
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\s3transfer\tasks.py", line 255, in _main
self._submit(transfer_future=transfer_future, **kwargs)
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\s3transfer\download.py", line 340, in _submit
response = client.head_object(
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\botocore\client.py", line 357, in _api_call
return self._make_api_call(operation_name, kwargs)
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\botocore\client.py", line 662, in _make_api_call
http, parsed_response = self._make_request(
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\botocore\client.py", line 682, in _make_request
return self._endpoint.make_request(operation_model, request_dict)
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\botocore\endpoint.py", line 102, in make_request
return self._send_request(request_dict, operation_model)
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\botocore\endpoint.py", line 132, in _send_request
request = self.create_request(request_dict, operation_model)
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\botocore\endpoint.py", line 115, in create_request
self._event_emitter.emit(event_name, request=request,
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\botocore\hooks.py", line 356, in emit
return self._emitter.emit(aliased_event_name, **kwargs)
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\botocore\hooks.py", line 228, in emit
return self._emit(event_name, kwargs)
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\botocore\hooks.py", line 211, in _emit
response = handler(**kwargs)
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\botocore\signers.py", line 90, in handler
return self.sign(operation_name, request)
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\botocore\signers.py", line 162, in sign
auth.add_auth(request)
File "C:\Users\Katherine\AppData\Local\Programs\Python\Python39\lib\site-packages\botocore\auth.py", line 373, in add_auth
raise NoCredentialsError()
botocore.exceptions.NoCredentialsError: Unable to locate credentials
Seems that I'm trying to download the file as if the bucket is private and it needs some configs. But this bucket is public and I haven't found info how to get access to the file in that situation.
If the bucket and its objects are public, you can just use the object's public URL to retrieve the required object. The public URL is in the following format:
https://<image_name>.s3-.amazonaws.com/<file_path_in_s3>
I guess you have not setup the aws credentials - secret and access key. You can follow the below link for the same:
https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html

How do you solve a timeout error in a python dropbox upload script?

I'm currently working on a project to do a timelapse with a raspberry pi3 (which works well, capturing the photos ain't a problem).
However, I used another script to upload those photos to a dropbox account and it work all well, until I checked it out today, seeing a timeout error.
Does anyone know how this could solved?
This is the error:
Traceback (most recent call last):
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/contrib/pyopenssl.py", line 453, in wrap_socket
cnx.do_handshake()
File "/usr/lib/python3/dist-packages/OpenSSL/SSL.py", line 1426, in do_handshake
self._raise_ssl_error(self._ssl, result)
File "/usr/lib/python3/dist-packages/OpenSSL/SSL.py", line 1149, in _raise_ssl_error
raise WantReadError()
OpenSSL.SSL.WantReadError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/connectionpool.py", line 343, in _make_request
self._validate_conn(conn)
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/connectionpool.py", line 839, in _validate_conn
conn.connect()
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/connection.py", line 344, in connect
ssl_context=context)
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/util/ssl_.py", line 344, in ssl_wrap_socket
return context.wrap_socket(sock, server_hostname=server_hostname)
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/contrib/pyopenssl.py", line 456, in wrap_socket
raise timeout('select timed out')
socket.timeout: select timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/pi/.local/lib/python3.5/site-packages/requests/adapters.py", line 449, in send
timeout=timeout
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/connectionpool.py", line 638, in urlopen
_stacktrace=sys.exc_info()[2])
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/util/retry.py", line 367, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/packages/six.py", line 686, in reraise
raise value
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/connectionpool.py", line 600, in urlopen
chunked=chunked)
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/connectionpool.py", line 346, in _make_request
self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/connectionpool.py", line 306, in _raise_timeout
raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='content.dropboxapi.com', port=443): Read timed out. (read timeout=30)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/pi/Desktop/time/picture/drop.py", line 16, in <module>
response = db.files_upload(f.read(), dname)
File "/home/pi/.local/lib/python3.5/site-packages/dropbox/base.py", line 2293, in files_upload
f,
File "/home/pi/.local/lib/python3.5/site-packages/dropbox/dropbox.py", line 274, in request
timeout=timeout)
File "/home/pi/.local/lib/python3.5/site-packages/dropbox/dropbox.py", line 365, in request_json_string_with_retry
timeout=timeout)
File "/home/pi/.local/lib/python3.5/site-packages/dropbox/dropbox.py", line 449, in request_json_string
timeout=timeout,
File "/home/pi/.local/lib/python3.5/site-packages/requests/sessions.py", line 581, in post
return self.request('POST', url, data=data, json=json, **kwargs)
File "/home/pi/.local/lib/python3.5/site-packages/requests/sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/home/pi/.local/lib/python3.5/site-packages/requests/sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "/home/pi/.local/lib/python3.5/site-packages/requests/adapters.py", line 529, in send
raise ReadTimeout(e, request=request)
requests.exceptions.ReadTimeout: HTTPSConnectionPool(host='content.dropboxapi.com', port=443): Read timed out. (read timeout=30)
>>>
This is how the code to upload on dropbox looks:
import dropbox
from time import sleep
db = dropbox.Dropbox('apikeyplaceholder')
for i in range(4500): ##4500
name = 'image{0:04d}.jpg'
fname = '/home/pi/Desktop/time/picture/'+name.format(i)
dname = '/'+name.format(i)
f = open(fname, 'rb')
response = db.files_upload(f.read(), dname)
print ('uploaded:', response)
sleep(585)
f.close()
NEW ERROR:
Traceback (most recent call last):
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/contrib/pyopenssl.py", line 317, in _send_until_done
return self.connection.send(data)
File "/usr/lib/python3/dist-packages/OpenSSL/SSL.py", line 1256, in send
self._raise_ssl_error(self._ssl, result)
File "/usr/lib/python3/dist-packages/OpenSSL/SSL.py", line 1151, in _raise_ssl_error
raise WantWriteError()
OpenSSL.SSL.WantWriteError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/connectionpool.py", line 600, in urlopen
chunked=chunked)
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/connectionpool.py", line 354, in _make_request
conn.request(method, url, **httplib_request_kw)
File "/usr/lib/python3.5/http/client.py", line 1107, in request
self._send_request(method, url, body, headers)
File "/usr/lib/python3.5/http/client.py", line 1152, in _send_request
self.endheaders(body)
File "/usr/lib/python3.5/http/client.py", line 1103, in endheaders
self._send_output(message_body)
File "/usr/lib/python3.5/http/client.py", line 936, in _send_output
self.send(message_body)
File "/usr/lib/python3.5/http/client.py", line 908, in send
self.sock.sendall(data)
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/contrib/pyopenssl.py", line 328, in sendall
sent = self._send_until_done(data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE])
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/contrib/pyopenssl.py", line 320, in _send_until_done
raise timeout()
socket.timeout
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/pi/.local/lib/python3.5/site-packages/requests/adapters.py", line 449, in send
timeout=timeout
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/connectionpool.py", line 638, in urlopen
_stacktrace=sys.exc_info()[2])
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/util/retry.py", line 367, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/packages/six.py", line 685, in reraise
raise value.with_traceback(tb)
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/connectionpool.py", line 600, in urlopen
chunked=chunked)
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/connectionpool.py", line 354, in _make_request
conn.request(method, url, **httplib_request_kw)
File "/usr/lib/python3.5/http/client.py", line 1107, in request
self._send_request(method, url, body, headers)
File "/usr/lib/python3.5/http/client.py", line 1152, in _send_request
self.endheaders(body)
File "/usr/lib/python3.5/http/client.py", line 1103, in endheaders
self._send_output(message_body)
File "/usr/lib/python3.5/http/client.py", line 936, in _send_output
self.send(message_body)
File "/usr/lib/python3.5/http/client.py", line 908, in send
self.sock.sendall(data)
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/contrib/pyopenssl.py", line 328, in sendall
sent = self._send_until_done(data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE])
File "/home/pi/.local/lib/python3.5/site-packages/urllib3/contrib/pyopenssl.py", line 320, in _send_until_done
raise timeout()
urllib3.exceptions.ProtocolError: ('Connection aborted.', timeout())
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/pi/Desktop/time/bild/drop.py", line 20, in <module>
response = upload(f, dname)
File "/usr/local/lib/python3.5/dist-packages/backoff/_sync.py", line 94, in retry
ret = target(*args, **kwargs)
File "/home/pi/Desktop/time/bild/drop.py", line 11, in upload
return db.files_upload(f.read(), dname)
File "/home/pi/.local/lib/python3.5/site-packages/dropbox/base.py", line 2293, in files_upload
f,
File "/home/pi/.local/lib/python3.5/site-packages/dropbox/dropbox.py", line 274, in request
timeout=timeout)
File "/home/pi/.local/lib/python3.5/site-packages/dropbox/dropbox.py", line 365, in request_json_string_with_retry
timeout=timeout)
File "/home/pi/.local/lib/python3.5/site-packages/dropbox/dropbox.py", line 449, in request_json_string
timeout=timeout,
File "/home/pi/.local/lib/python3.5/site-packages/requests/sessions.py", line 581, in post
return self.request('POST', url, data=data, json=json, **kwargs)
File "/home/pi/.local/lib/python3.5/site-packages/requests/sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/home/pi/.local/lib/python3.5/site-packages/requests/sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "/home/pi/.local/lib/python3.5/site-packages/requests/adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', timeout())
Before you implement code to handle the timeout you first need to think about how you want to handle it. A common strategy is the following:
Try re-uploading a certain number of times, potentially coupled with exponential backoff (e.g. using this library)
If that fails either skip the current image or abort the process completely.
Depending on your setup, you might want to put the uploading code into a separate thread so that delays during uploads don't interfere with capturing the photos.
A simple example using the backoff library I've linked to above (untested):
from time import sleep
import backoff
import dropbox
import requests
db = dropbox.Dropbox('apikeyplaceholder')
#backoff.on_exception(backoff.expo, requests.exceptions.ReadTimeout)
def upload(f, name):
return db.files_upload(f.read(), dname)
NAME_PATTERN = '/image{0:04d}.jpg'
for i in range(4500):
name = NAME_PATTERN.format(i)
fname = '/home/pi/Desktop/time/picture' + name
with open(fname, 'rb') as f:
response = upload(f, name)
print ('uploaded:', response)
sleep(585)

How do I access files on a hadoop file system present on a different server from my local machine?

I have a local machine (local_user#local_machine). And a hadoop file system is present on a different server (some_user#another_server). One of the users in the hadoop server is named target_user. How do I access files present in target_user from local_user#local_machine? More precisely, say there's a file /user/target_user/test.txt present in the HDFS on some_user#another_server. What is the correct file path I should use when accessing /user/target_user/test.txt from local_user#local_machine?
I can access the file in the hdfs itself with hdfs dfs -cat /user/target_user/test.txt. But I can't access the file from my local machine using a python script I have written to read & write from the HDFS (that takes 3 arguments - local file path, remote file path, and read or write), most probably because I am not giving the correct path.
I have tried the following, but none of them work:
$ #local_user#local_machine
$ python3 rw_hdfs.py ./to_local_test.txt /user/target_user/test.txt read
$ python3 rw_hdfs.py ./to_local_test.txt some_user#another_server/user/target_user/test.txt read
The all give the exact same error:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 377, in _make_request
httplib_response = conn.getresponse(buffering=True)
TypeError: getresponse() got an unexpected keyword argument 'buffering'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 560, in urlopen
body=body, headers=headers)
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 379, in _make_request
httplib_response = conn.getresponse()
File "/usr/lib/python3.5/http/client.py", line 1197, in getresponse
response.begin()
File "/usr/lib/python3.5/http/client.py", line 297, in begin
version, status, reason = self._read_status()
File "/usr/lib/python3.5/http/client.py", line 279, in _read_status
raise BadStatusLine(line)
http.client.BadStatusLine:
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/requests/adapters.py", line 376, in send
timeout=timeout
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 610, in urlopen
_stacktrace=sys.exc_info()[2])
File "/usr/lib/python3/dist-packages/urllib3/util/retry.py", line 247, in increment
raise six.reraise(type(error), error, _stacktrace)
File "/usr/lib/python3/dist-packages/six.py", line 685, in reraise
raise value.with_traceback(tb)
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 560, in urlopen
body=body, headers=headers)
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 379, in _make_request
httplib_response = conn.getresponse()
File "/usr/lib/python3.5/http/client.py", line 1197, in getresponse
response.begin()
File "/usr/lib/python3.5/http/client.py", line 297, in begin
version, status, reason = self._read_status()
File "/usr/lib/python3.5/http/client.py", line 279, in _read_status
raise BadStatusLine(line)
requests.packages.urllib3.exceptions.ProtocolError: ('Connection aborted.', BadStatusLine('\x15\x03\x03\x00\x02\x02\n',))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "python_hdfs.py", line 63, in <module>
status, name, nnaddress= check_node_status(node)
File "python_hdfs.py", line 18, in check_node_status
request = requests.get("%s/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"%name,verify=False).json()
File "/usr/lib/python3/dist-packages/requests/api.py", line 67, in get
return request('get', url, params=params, **kwargs)
File "/usr/lib/python3/dist-packages/requests/api.py", line 53, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 468, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 576, in send
r = adapter.send(request, **kwargs)
File "/usr/lib/python3/dist-packages/requests/adapters.py", line 426, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', BadStatusLine('\x15\x03\x03\x00\x02\x02\n',))
More precisely, say there's a file /user/target_user/test.txt present in the HDFS on some_user#another_server
First, HDFS isn't a single directory on one machine. Therefore trying to access it like that doesn't make sense.
Secondly, whatever Python library you're using is trying to communicate over WebHDFS, which you must specifically enable for the cluster.
https://hadoop.apache.org/docs/r2.7.3/hadoop-project-dist/hadoop-hdfs/WebHDFS.html
BadStatusLine in the error might indicate that you're dealing with a Kerberized, secure cluster, so you might need a different way to read files
For example, PySpark or the Ibis project

Categories

Resources