App Engine Backend Instance Timeout (Python) - python

Currently I am using a dynamic backend instance for my site, but I still got timeout issue. The background of my site is:
GAE used as the frontend which handles user requests.
Requests will be sent to a EC2 rest server (using bottle) to process.
Below is the error information from GAE log. It seems like GAE stopped my backend thread after 60 seconds. I appreciate any suggestions.
ERROR 2014-02-20 14:42:18,365 module.py:660] Request to '/_ah/background' failed
Traceback (most recent call last):
File "C:\Program Files (x86)\Google\google_appengine\google\appengine\tools\devappserver2\module.py", line 637, in _handle_request
environ, wrapped_start_response)
File "C:\Program Files (x86)\Google\google_appengine\google\appengine\tools\devappserver2\request_rewriter.py", line 311, in _rewriter_middleware
response_body = iter(application(environ, wrapped_start_response))
File "C:\Program Files (x86)\Google\google_appengine\google\appengine\tools\devappserver2\module.py", line 1524, in _handle_script_request
request_type)
File "C:\Program Files (x86)\Google\google_appengine\google\appengine\tools\devappserver2\module.py", line 1476, in _handle_instance_request
request_id, request_type)
File "C:\Program Files (x86)\Google\google_appengine\google\appengine\tools\devappserver2\instance.py", line 382, in handle
request_type))
File "C:\Program Files (x86)\Google\google_appengine\google\appengine\tools\devappserver2\http_runtime.py", line 244, in handle
response = connection.getresponse()
File "C:\Python27\Lib\httplib.py", line 1045, in getresponse
response.begin()
File "C:\Python27\Lib\httplib.py", line 409, in begin
version, status, reason = self._read_status()
File "C:\Python27\Lib\httplib.py", line 365, in _read_status
line = self.fp.readline(_MAXLINE + 1)
File "C:\Python27\Lib\socket.py", line 476, in readline
data = self._sock.recv(self._rbufsize)
error: [Errno 10054] An existing connection was forcibly closed by the remote host
app.yaml
- url: /backend.html
script: przm_batchmodel.py
backends.yaml
backends:
- name: mybackend
class: B1
instances: 1
options: dynamic
Output.py
def loop_html(thefile):
####do some thing###
response = urlfetch.fetch(url=my_REST_server_URL, payload=data, method=urlfetch.POST, headers=http_headers, deadline=6000)
return response
class przmBatchOutputPage(webapp.RequestHandler):
def post(self):
form = cgi.FieldStorage()
thefile = form['file-0']
html = background_thread.BackgroundThread(target=loop_html, args=[thefile, generate_batch_jid()])
html.start()
self.response.out.write(html)
app = webapp.WSGIApplication([('/.*', przmBatchOutputPage)], debug=True)

Related

httplib2.socks.HTTPError: (403, b'Forbidden') python apache-beam dataflow

I work on a google cloud environment where i don't have internet access. I'm trying to launch a dataflow job. I'm using a proxy to access the internet.
when i run a simple wordcount.py with dataflow i get this error
WARNING:apache_beam.utils.retry:Retry with exponential backoff: waiting for 4.750968074377858 seconds before retrying _uncached_gcs_file_copy because we caught exception: httplib2.socks.HTTPError: (403, b'Forbidden')
Traceback for above exception (most recent call last):
File "/opt/py38/lib64/python3.8/site-packages/apache_beam/utils/retry.py", line 275, in wrapper
return fun(*args, **kwargs)
File "/opt/py38/lib64/python3.8/site-packages/apache_beam/runners/dataflow/internal/apiclient.py", line 631, in _uncached_gcs_file_copy
self.stage_file(to_folder, to_name, f, total_size=total_size)
File "/opt/py38/lib64/python3.8/site-packages/apache_beam/runners/dataflow/internal/apiclient.py", line 735, in stage_file
response = self._storage_client.objects.Insert(request, upload=upload)
File "/opt/py38/lib64/python3.8/site-packages/apache_beam/io/gcp/internal/clients/storage/storage_v1_client.py", line 1152, in Insert
return self._RunMethod(
File "/opt/py38/lib64/python3.8/site-packages/apitools/base/py/base_api.py", line 728, in _RunMethod
http_response = http_wrapper.MakeRequest(
File "/opt/py38/lib64/python3.8/site-packages/apitools/base/py/http_wrapper.py", line 359, in MakeRequest
retry_func(ExceptionRetryArgs(http, http_request, e, retry,
File "/opt/py38/lib64/python3.8/site-packages/apache_beam/io/gcp/gcsio_overrides.py", line 45, in retry_func
return http_wrapper.HandleExceptionsAndRebuildHttpConnections(retry_args)
File "/opt/py38/lib64/python3.8/site-packages/apitools/base/py/http_wrapper.py", line 304, in HandleExceptionsAndRebuildHttpConnections
raise retry_args.exc
File "/opt/py38/lib64/python3.8/site-packages/apitools/base/py/http_wrapper.py", line 348, in MakeRequest
return _MakeRequestNoRetry(
File "/opt/py38/lib64/python3.8/site-packages/apitools/base/py/http_wrapper.py", line 397, in _MakeRequestNoRetry
info, content = http.request(
File "/opt/py38/lib64/python3.8/site-packages/google_auth_httplib2.py", line 209, in request
self.credentials.before_request(self._request, method, uri, request_headers)
File "/opt/py38/lib64/python3.8/site-packages/google/auth/credentials.py", line 134, in before_request
self.refresh(request)
File "/opt/py38/lib64/python3.8/site-packages/google/auth/compute_engine/credentials.py", line 111, in refresh
self._retrieve_info(request)
File "/opt/py38/lib64/python3.8/site-packages/google/auth/compute_engine/credentials.py", line 87, in _retrieve_info
info = _metadata.get_service_account_info(
File "/opt/py38/lib64/python3.8/site-packages/google/auth/compute_engine/_metadata.py", line 234, in get_service_account_info
return get(request, path, params={"recursive": "true"})
File "/opt/py38/lib64/python3.8/site-packages/google/auth/compute_engine/_metadata.py", line 150, in get
response = request(url=url, method="GET", headers=_METADATA_HEADERS)
File "/opt/py38/lib64/python3.8/site-packages/google_auth_httplib2.py", line 119, in __call__
response, data = self.http.request(
File "/opt/py38/lib64/python3.8/site-packages/httplib2/__init__.py", line 1701, in request
(response, content) = self._request(
File "/opt/py38/lib64/python3.8/site-packages/httplib2/__init__.py", line 1421, in _request
(response, content) = self._conn_request(conn, request_uri, method, body, headers)
File "/opt/py38/lib64/python3.8/site-packages/httplib2/__init__.py", line 1343, in _conn_request
conn.connect()
File "/opt/py38/lib64/python3.8/site-packages/httplib2/__init__.py", line 1026, in connect
self.sock.connect((self.host, self.port) + sa[2:])
File "/opt/py38/lib64/python3.8/site-packages/httplib2/socks.py", line 504, in connect
self.__negotiatehttp(destpair[0], destpair[1])
File "/opt/py38/lib64/python3.8/site-packages/httplib2/socks.py", line 465, in __negotiatehttp
raise HTTPError((statuscode, statusline[2]))
My service account have this role:
BigQuery Data Editor
BigQuery User
Dataflow Developer
Dataflow Worker
Service Account User
Storage Admin
The istance have Cloud API access scopes: Allow full access to all Cloud APIs
what is the problem?
Based on the comment #luca the above error is solved using an internal proxy that will allow access to the internet. Add this --no_use_public_ip to the command and set no_proxy="metadata.google.internal,www.googleapis.com,dataflow.googleapis.com,bigquery.googleapis.com".

google.auth.exceptions.RefreshError: ('invalid_grant: Token has been expired or revoked.}

In one my application files, I am running a query on BQ Public Tables.
from google.cloud import bigquery
import pyarrow
client = bigquery.Client(project = "project-name")
os.environ.setdefault("GCLOUD_PROJECT", "project-name")
sql = """
SELECT *
FROM `bigquery-public-data.geo_us_boundaries.cbsa`
WHERE name IN UNNEST(%s)
""" %(metros)
df_geom = client.query(sql).to_dataframe()
When I run the Application, I am getting a google.auth.exception out of nowhere.
File "/Users/...tab1.py", line 72, in <module>
df_geom = client.query(sql).to_dataframe()
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/cloud/bigquery/client.py", line 3391, in query
future = do_query()
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/cloud/bigquery/client.py", line 3368, in do_query
query_job._begin(retry=retry, timeout=timeout)
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/cloud/bigquery/job/query.py", line 1249, in _begin
super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout)
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/cloud/bigquery/job/base.py", line 509, in _begin
api_response = client._call_api(
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/cloud/bigquery/client.py", line 782, in _call_api
return call()
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/api_core/retry.py", line 283, in retry_wrapped_func
return retry_target(
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/api_core/retry.py", line 190, in retry_target
return target()
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/cloud/_http/__init__.py", line 469, in api_request
response = self._make_request(
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/cloud/_http/__init__.py", line 333, in _make_request
return self._do_request(
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/cloud/_http/__init__.py", line 371, in _do_request
return self.http.request(
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/auth/transport/requests.py", line 476, in request
self.credentials.before_request(auth_request, method, url, request_headers)
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/auth/credentials.py", line 133, in before_request
self.refresh(request)
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/oauth2/credentials.py", line 302, in refresh
) = reauth.refresh_grant(
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/oauth2/reauth.py", line 347, in refresh_grant
_client._handle_error_response(response_data)
File "/Applications/Anaconda/anaconda3/lib/python3.9/site-packages/google/oauth2/_client.py", line 60, in _handle_error_response
raise exceptions.RefreshError(error_details, response_data)
google.auth.exceptions.RefreshError: ('invalid_grant: Token has been expired or revoked.', {'error': 'invalid_grant', 'error_description': 'Token has been expired or revoked.'})
I have no issues whatsoever before this for months. My application bundle includes a project-name-xxxx.json file from Google Cloud Console. Everytime, I attempt to run the application it fails.
I have looked through some of the other answers that suggests tokens expire after 7 days for publishing mode Testing. The publishing mode is Production in our case.

GCS broken pipe error when trying to upload large files

I'm trying to upload a .csv.gz file to GCS after extracting it to .csv, the file size changes from 500MB to around 5GB. I'm able to extract the .csv.gz file to a temporary path and it fails when I try to upload that file to GCS. I get the following error:
[2019-11-11 13:59:58,180] {models.py:1796} ERROR - [Errno 32] Broken pipe
Traceback (most recent call last)
File "/usr/local/lib/airflow/airflow/models.py", line 1664, in _run_raw_tas
result = task_copy.execute(context=context
File "/home/airflow/gcs/dags/operators/s3_to_gcs_transform_operator.py", line 220, in execut
gcs_hook.upload(dest_gcs_bucket, dest_gcs_object, target_file, gzip=True
File "/home/airflow/gcs/dags/hooks/gcs_hook_conn.py", line 208, in uploa
.insert(bucket=bucket, name=object, media_body=media)
File "/opt/python3.6/lib/python3.6/site-packages/googleapiclient/_helpers.py", line 130, in positional_wrappe
return wrapped(*args, **kwargs
File "/opt/python3.6/lib/python3.6/site-packages/googleapiclient/http.py", line 835, in execut
method=str(self.method), body=self.body, headers=self.headers
File "/opt/python3.6/lib/python3.6/site-packages/googleapiclient/http.py", line 179, in _retry_reques
raise exceptio
File "/opt/python3.6/lib/python3.6/site-packages/googleapiclient/http.py", line 162, in _retry_reques
resp, content = http.request(uri, method, *args, **kwargs
File "/opt/python3.6/lib/python3.6/site-packages/google_auth_httplib2.py", line 198, in reques
uri, method, body=body, headers=request_headers, **kwargs
File "/usr/local/lib/airflow/airflow/contrib/hooks/gcp_api_base_hook.py", line 155, in new_reques
redirections, connection_type
File "/opt/python3.6/lib/python3.6/site-packages/httplib2/__init__.py", line 1924, in reques
cachekey
File "/opt/python3.6/lib/python3.6/site-packages/httplib2/__init__.py", line 1595, in _reques
conn, request_uri, method, body, header
File "/opt/python3.6/lib/python3.6/site-packages/httplib2/__init__.py", line 1502, in _conn_reques
conn.request(method, request_uri, body, headers
File "/opt/python3.6/lib/python3.6/http/client.py", line 1239, in reques
self._send_request(method, url, body, headers, encode_chunked
File "/opt/python3.6/lib/python3.6/http/client.py", line 1285, in _send_reques
self.endheaders(body, encode_chunked=encode_chunked
File "/opt/python3.6/lib/python3.6/http/client.py", line 1234, in endheader
self._send_output(message_body, encode_chunked=encode_chunked
File "/opt/python3.6/lib/python3.6/http/client.py", line 1065, in _send_outpu
self.send(chunk
File "/opt/python3.6/lib/python3.6/http/client.py", line 986, in sen
self.sock.sendall(data
File "/opt/python3.6/lib/python3.6/ssl.py", line 975, in sendal
v = self.send(byte_view[count:]
File "/opt/python3.6/lib/python3.6/ssl.py", line 944, in sen
return self._sslobj.write(data
File "/opt/python3.6/lib/python3.6/ssl.py", line 642, in writ
return self._sslobj.write(data
BrokenPipeError: [Errno 32] Broken pip
From what I understood, the error could be due to the following:
Your server process has received a SIGPIPE writing to a socket. This
usually happens when you write to a socket fully closed on the other
(client) side. This might be happening when a client program doesn't
wait till all the data from the server is received and simply closes a
socket (using close function).
But I have no idea whether this is the issue or how I can fix this. Can someone help?
You should try to uploads big files in chunks.
from google.cloud import storage
CHUNK_SIZE = 128 * 1024 * 1024
client = storage.Client()
bucket = client.bucket('destination')
blob = bucket.blob('really-big-blob', chunk_size=CHUNK_SIZE)
blob.upload_from_filename('/path/to/really-big-file')
Also you can check Parallel Composite Uploads
Similar SO question link.

s3 socket.error: [Errno 104] Connection reset by peer python

I have a script that takes the postgres backup at some time intervals and uploads the backup to s3. The script was designed more than two years ago and was working perfectly which was deployed on Linux server(Ubuntu 14.04 LTS (GNU/Linux 3.13.0-24-generic x86_64)). But from some days the backup was not being uploaded to s3 and facing some errors and so had a look at the log and it was as below
uploading test_db.backup.tar.gz to Amazon S3...............
Traceback (most recent call last):
File "/user/projects/test_folder/test/manage.py", line 9, in <module>
execute_from_command_line(sys.argv)
File "/user/projects_envs/testlocal/lib/python2.7/site-packages/django/core/management/__init__.py", line 399, in execute_from_command_line
utility.execute()
File "/user/projects_envs/testlocal/lib/python2.7/site-packages/django/core/management/__init__.py", line 392, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/user/projects_envs/testlocal/lib/python2.7/site-packages/django/core/management/base.py", line 242, in run_from_argv
self.execute(*args, **options.__dict__)
File "/user/projects_envs/testlocal/lib/python2.7/site-packages/django/core/management/base.py", line 285, in execute
output = self.handle(*args, **options)
File "/user/projects_envs/testlocal/lib/python2.7/site-packages/django/core/management/base.py", line 415, in handle
return self.handle_noargs(**options)
File "/user/projects/test_folder/test/core/management/commands/db_backup.py", line 41, in handle_noargs
self.upload_to_s3(file_name, file_path)
File "/user/projects/test_folder/test/core/management/commands/db_backup.py", line 64, in upload_to_s3
response = conn.put(settings.BACKUP_BUCKET_NAME, file_name, S3.S3Object(tardata))
File "/user/projects/test_folder/test/storage/S3.py", line 192, in put
object.metadata))
File "/user/projects/test_folder/test/storage/S3.py", line 276, in _make_request
connection.request(method, path, data, final_headers)
File "/usr/lib/python2.7/httplib.py", line 973, in request
self._send_request(method, url, body, headers)
File "/usr/lib/python2.7/httplib.py", line 1007, in _send_request
self.endheaders(body)
File "/usr/lib/python2.7/httplib.py", line 969, in endheaders
self._send_output(message_body)
File "/usr/lib/python2.7/httplib.py", line 829, in _send_output
self.send(msg)
File "/usr/lib/python2.7/httplib.py", line 805, in send
self.sock.sendall(data)
File "/usr/lib/python2.7/ssl.py", line 329, in sendall
v = self.send(data[count:])
File "/usr/lib/python2.7/ssl.py", line 298, in send
v = self._sslobj.write(data)
socket.error: [Errno 104] Connection reset by peer
Code
from storage import S3
def upload_to_s3(self, file_name, file_path):
print "uploading " + file_name +'.tar.gz' + " to Amazon S3..............."
conn = S3.AWSAuthConnection(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)
#get all buckets from amazon S3
response = conn.list_all_my_buckets()
buckets = response.entries
#is the bucket which you have specified is already there
flag = False
for bucket in buckets:
if bucket.name == settings.BACKUP_BUCKET_NAME:
flag = True
#if there is no bucket with that name
if flag == False:
print "There is no bucket with name " + BUCKET_NAME + " in your Amazon S3 account"
print "Error : Please enter an appropriate bucket name and re-run the script"
return
#upload file to Amazon S3
tardata = open(file_path+'.tar.gz', "rb").read()
response = conn.put(settings.BACKUP_BUCKET_NAME, file_name, S3.S3Object(tardata))
...............
...............
So whats wrong with the s3 package ? and why it suddenly stopped working ? does this really relates to s3 or else something about linux packages ?

urlfetch DeadlineExceededError

I had a create a REST API using bottle framework, which receives calls from GAE. Once this REST API is invoked, it did some calculations and sent outputs as a zip file to AMAZON S3 server and return the link to GAE. Everything works fine expect the timeout issue. I tried to adjust the deadline of urlfetch to 60 seconds, which did not solve the problem. I appreciate any suggestions.
GAE side:
response = urlfetch.fetch(url=url, payload=data, method=urlfetch.POST, headers=http_headers, deadline=60)
Broser error info.:
Traceback (most recent call last):
File "C:\Program Files (x86)\Google\google_appengine\lib\webapp2-2.5.2\webapp2.py", line 1535, in __call__
rv = self.handle_exception(request, response, e)
File "C:\Program Files (x86)\Google\google_appengine\lib\webapp2-2.5.2\webapp2.py", line 1529, in __call__
rv = self.router.dispatch(request, response)
File "C:\Program Files (x86)\Google\google_appengine\lib\webapp2-2.5.2\webapp2.py", line 1278, in default_dispatcher
return route.handler_adapter(request, response)
File "C:\Program Files (x86)\Google\google_appengine\lib\webapp2-2.5.2\webapp2.py", line 1102, in __call__
return handler.dispatch()
File "C:\Program Files (x86)\Google\google_appengine\lib\webapp2-2.5.2\webapp2.py", line 572, in dispatch
return self.handle_exception(e, self.app.debug)
File "C:\Program Files (x86)\Google\google_appengine\lib\webapp2-2.5.2\webapp2.py", line 570, in dispatch
return method(*args, **kwargs)
File "D:\Dropbox\ubertool_src\przm5/przm5_output.py", line 22, in post
przm5_obj = przm5_rest_model.przm5(args)
File "D:\Dropbox\ubertool_src\przm5\przm5_rest_model.py", line 351, in __init__
self.convertSoil1, self.convert1to3, self.convert2to3)
File "D:\Dropbox\ubertool_src\przm5\przm5_rest_model.py", line 135, in get_jid
response = urlfetch.fetch(url=url, payload=data, method=urlfetch.POST, headers=http_headers)
File "C:\Program Files (x86)\Google\google_appengine\google\appengine\api\urlfetch.py", line 270, in fetch
return rpc.get_result()
File "C:\Program Files (x86)\Google\google_appengine\google\appengine\api\apiproxy_stub_map.py", line 612, in get_result
return self.__get_result_hook(self)
File "C:\Program Files (x86)\Google\google_appengine\google\appengine\api\urlfetch.py", line 410, in _get_fetch_result
'Deadline exceeded while waiting for HTTP response from URL: ' + url)
DeadlineExceededError: Deadline exceeded while waiting for HTTP response from URL: http://localhost:7777/my_model
REST server:
#route('/my_model', method='POST')
#auth_basic(check)
def my_model():
#run the model
run_my model()
#zip output files
zout=zipfile.ZipFile("test.zip","w")
for name in os.listdir(src1):
zout.write(name)
zout.close()
##upload file to S3
conn = S3Connection(key, secretkey)
bucket = Bucket(conn, 'przm5')
k=Key(bucket)
name1='PRZM5_'+name_temp+'.zip'
k.key=name1
###All the above steps are fine####
k.set_contents_from_filename('test.zip')
link='https://s3.amazonaws.com/'+my_file_path
return {'ff': ff}
run(host='localhost', port=7777, debug=True)
Errors from the REST server:
127.0.0.1 - - [07/Jan/2014 16:16:36] "POST /my_model HTTP/1.1" 200 1663
Traceback (most recent call last):
File "C:\Python27\Lib\wsgiref\handlers.py", line 86, in run
self.finish_response()
File "C:\Python27\Lib\wsgiref\handlers.py", line 128, in finish_response
self.write(data)
File "C:\Python27\Lib\wsgiref\handlers.py", line 212, in write
self.send_headers()
File "C:\Python27\Lib\wsgiref\handlers.py", line 270, in send_headers
self.send_preamble()
File "C:\Python27\Lib\wsgiref\handlers.py", line 194, in send_preamble
'Date: %s\r\n' % format_date_time(time.time())
File "C:\Python27\Lib\socket.py", line 324, in write
self.flush()
File "C:\Python27\Lib\socket.py", line 303, in flush
self._sock.sendall(view[write_offset:write_offset+buffer_size])
error: [Errno 10053] An established connection was aborted by the software in your host machine
127.0.0.1 - - [07/Jan/2014 16:16:36] "POST /my_model HTTP/1.1" 500 59
----------------------------------------
Exception happened during processing of request from ('127.0.0.1', 50953)
Traceback (most recent call last):
File "C:\Python27\Lib\SocketServer.py", line 295, in _handle_request_noblock
self.process_request(request, client_address)
File "C:\Python27\Lib\SocketServer.py", line 321, in process_request
self.finish_request(request, client_address)
File "C:\Python27\Lib\SocketServer.py", line 334, in finish_request
self.RequestHandlerClass(request, client_address, self)
File "C:\Python27\Lib\SocketServer.py", line 651, in __init__
self.finish()
File "C:\Python27\Lib\SocketServer.py", line 710, in finish
self.wfile.close()
File "C:\Python27\Lib\socket.py", line 279, in close
self.flush()
File "C:\Python27\Lib\socket.py", line 303, in flush
self._sock.sendall(view[write_offset:write_offset+buffer_size])
error: [Errno 10053] An established connection was aborted by the software in your host machine
The deadline is a maximum value, once reached it'll fail. And it's failing with
Deadline exceeded while waiting for HTTP response
So you should try to catch that exception and try again.
If the entire operation can't be done in under 60 seconds then there is nothing else to be done, it's a hard limit in GAE that HTTP requests can't exceed 60 seconds.

Categories

Resources