Web scraping data from Transfermarkt - python

I wrote a web-scraping procedure to scrape data from Transfermarkt.de
First, I get the data from the 20 biggest transfer from the last 10 years
headers = {'User-Agent':
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'}
df_consolidado = pd.DataFrame()
df = {}
temporadas = list(range(2009,2020))
#Crio os lista para armazenar as informações
Jogadores = []
Valores_Transf = []
Href = []
for t in temporadas:
print(t)
for p in range(1,5):
#Carrega a pagina p do temporada t
page = "https://www.transfermarkt.de/transfers/transferrekorde/statistik/top/saison_id/" + str(t) + "/land_id//ausrichtung//spielerposition_id//altersklasse/u23/leihe//w_s//plus/0/galerie/0/page/" + str(p)
print(page)
pageTree = requests.get(page, headers=headers)
pageSoup = BeautifulSoup(pageTree.content, 'html.parser')
#Pega os dados das transferências
jogador = pageSoup.find_all("a", {"class": "spielprofil_tooltip"})
valor_transf = pageSoup.find_all("td", {"class": "rechts hauptlink"})
#Introduzo as informações nas listas
for i in range(0,25):
Jogadores.append(jogador[i].text)
Valores_Transf.append(float(valor_transf[i].text.replace('Mio.', '').replace('€', '').replace(',', '.').replace('Leihgebühr:', '').replace('Leih-Ende', '0')))
Href.append(jogador[i]['href'])
df[t] = pd.DataFrame({"Temporada":int(t),"Jogador":Jogadores,"Valor Transferência":Valores_Transf, "Ref":Href})
Then I combine all those dfs:
#Combinar os vários dfs gerados
df = pd.concat([df[2009], df[2010],df[2011], df[2012], df[2013], df[2014],df[2015], df[2016], df[2017], df[2018], df[2019]])
But at the last and most important step, I'm finding some troubles. Through the following code, I tried to get more detailed info:
headers = {'User-Agent':
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'}
Altura = []
Idade_Atual = []
Idade_Transf = []
Maior_Valor = []
Data_Max_Valor = []
for index, row in df.iterrows():
page = "https://www.transfermarkt.de" + row['Ref']
pageTree = requests.get(page, headers=headers, timeout=1000)
pageSoup = BeautifulSoup(pageTree.content, 'html.parser')
#Carrego o objeto contendo os dados
dados_agrupados = None
dados_agrupados = pageSoup.find_all("table", {"class": "auflistung"})
dados = []
tabela = []
print(page)
for d in dados_agrupados:
dados.extend(d.find_all("td"))
tabela.extend(d.find_all("th"))
#Verifico a estrutura da table para copiar os dados
for t in range(len(tabela)):
if tabela[t].text == "Height:":
if dados[t].text != "N/A":
Altura.append(float(dados[t].text.rstrip(' m').replace(',', '.')))
else:
Altura.append(0)
if tabela[t].text == "Age:":
Idade_Atual.append(int(dados[t].text))
Idade_Transf.append(int((row['Temporada']-2020) + int(dados[t].text)))
if tabela[t].text == "Foot:":
Pe_Dominante.append(dados[t].text)
#Carrego o objeto contendo o maior valor de mercado do jogador
dados_agrupados_val = None
dados_agrupados_val = pageSoup.find_all("div", {"class": "right-td"})
Data_Max_Valor.append(int(dados_agrupados_val[2].text.replace(' ', '')[-5:-1]))
if "k" in str(dados_agrupados_val[2].text.replace('Mio.', '').replace('€', '').replace(',', '.').replace(' ', '')[:-12]):
Maior_Valor.append(float(dados_agrupados_val[2].text.replace('Mio.', '').replace('Â', '').replace('â', '').replace('¬', '').replace('k', '').replace('€', '').replace(',', '.').replace(' ', '')[:-12])/1000)
else:
Maior_Valor.append(float(dados_agrupados_val[2].text.replace('Mio.', '').replace('Â', '').replace('â', '').replace('¬', '').replace('€', '').replace(',', '.').replace(' ', '')[:-13]))
df["Altura"] = Altura
df["Idade_Atual"] = Idade_Atual
df["Idade_Transf"] = Idade_Transf
df["Max_Valor"] = Maior_Valor
df["Data_Max_Valor"] = Data_Max_Valor
#Idade calculado quando o máximo valor de mercado foi atingido
df["Idade_Max_Valor"] = df["Data_Max_Valor"] - (df["Temporada"]-df["Idade_Transf"])
df
But I ended up with the following error:
---------------------------------------------------------------------------
SysCallError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\urllib3\contrib\pyopenssl.py in wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname)
455 try:
--> 456 cnx.do_handshake()
457 except OpenSSL.SSL.WantReadError:
~\Anaconda3\lib\site-packages\OpenSSL\SSL.py in do_handshake(self)
1914 result = _lib.SSL_do_handshake(self._ssl)
-> 1915 self._raise_ssl_error(self._ssl, result)
1916
~\Anaconda3\lib\site-packages\OpenSSL\SSL.py in _raise_ssl_error(self, ssl, result)
1638 if errno != 0:
-> 1639 raise SysCallError(errno, errorcode.get(errno))
1640 raise SysCallError(-1, "Unexpected EOF")
SysCallError: (10054, 'WSAECONNRESET')
During handling of the above exception, another exception occurred:
SSLError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
342 try:
--> 343 self._validate_conn(conn)
344 except (SocketTimeout, BaseSSLError) as e:
~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _validate_conn(self, conn)
838 if not getattr(conn, 'sock', None): # AppEngine might not have `.sock`
--> 839 conn.connect()
840
~\Anaconda3\lib\site-packages\urllib3\connection.py in connect(self)
343 server_hostname=server_hostname,
--> 344 ssl_context=context)
345
~\Anaconda3\lib\site-packages\urllib3\util\ssl_.py in ssl_wrap_socket(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir)
346 if HAS_SNI and server_hostname is not None:
--> 347 return context.wrap_socket(sock, server_hostname=server_hostname)
348
~\Anaconda3\lib\site-packages\urllib3\contrib\pyopenssl.py in wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname)
461 except OpenSSL.SSL.Error as e:
--> 462 raise ssl.SSLError('bad handshake: %r' % e)
463 break
SSLError: ("bad handshake: SysCallError(10054, 'WSAECONNRESET')",)
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
448 retries=self.max_retries,
--> 449 timeout=timeout
450 )
~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
637 retries = retries.increment(method, url, error=e, _pool=self,
--> 638 _stacktrace=sys.exc_info()[2])
639 retries.sleep()
~\Anaconda3\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
398 if new_retry.is_exhausted():
--> 399 raise MaxRetryError(_pool, url, error or ResponseError(cause))
400
MaxRetryError: HTTPSConnectionPool(host='www.transfermarkt.de', port=443): Max retries exceeded with url: /bojan-krkic/profil/spieler/44675 (Caused by SSLError(SSLError("bad handshake: SysCallError(10054, 'WSAECONNRESET')")))
During handling of the above exception, another exception occurred:
SSLError Traceback (most recent call last)
<ipython-input-5-7f98723c208e> in <module>
11 for index, row in df.iterrows():
12 page = "https://www.transfermarkt.de" + row['Ref']
---> 13 pageTree = requests.get(page, headers=headers, timeout=1000)
14 pageSoup = BeautifulSoup(pageTree.content, 'html.parser')
15
~\Anaconda3\lib\site-packages\requests\api.py in get(url, params, **kwargs)
73
74 kwargs.setdefault('allow_redirects', True)
---> 75 return request('get', url, params=params, **kwargs)
76
77
~\Anaconda3\lib\site-packages\requests\api.py in request(method, url, **kwargs)
58 # cases, and look like a memory leak in others.
59 with sessions.Session() as session:
---> 60 return session.request(method=method, url=url, **kwargs)
61
62
~\Anaconda3\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
531 }
532 send_kwargs.update(settings)
--> 533 resp = self.send(prep, **send_kwargs)
534
535 return resp
~\Anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
644
645 # Send the request
--> 646 r = adapter.send(request, **kwargs)
647
648 # Total elapsed time of the request (approximately)
~\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
512 if isinstance(e.reason, _SSLError):
513 # This branch is for urllib3 v1.22 and later.
--> 514 raise SSLError(e, request=request)
515
516 raise ConnectionError(e, request=request)
SSLError: HTTPSConnectionPool(host='www.transfermarkt.de', port=443): Max retries exceeded with url: /bojan-krkic/profil/spieler/44675 (Caused by SSLError(SSLError("bad handshake: SysCallError(10054, 'WSAECONNRESET')")))
Does anyone understand what's happening?

The problem may be because if the pyOpenSSL version you use, try updating it with the help of this link.

It looks like this might be a problem on the server side, not with your code. The SysCallError code you're getting is a Windows Sockets error code. Citing from the Microsoft Docs:
WSAECONNRESET
10054
Connection reset by peer.
An existing connection was forcibly closed by the remote host. This normally results if the peer application on the remote host is suddenly stopped, the host is rebooted, the host or remote network interface is disabled, or the remote host uses a hard close (see setsockopt for more information on the SO_LINGER option on the remote socket). This error may also result if a connection was broken due to keep-alive activity detecting a failure while one or more operations are in progress. Operations that were in progress fail with WSAENETRESET. Subsequent operations fail with WSAECONNRESET.
It might be that this was a temporary issue. I was able to run your script and send requests for quite a while without issues. If this keeps happening when sending requests to that server, consider catching these exceptions using something like this:
try:
pageTree = requests.get(page, headers=headers, timeout=1000)
except requests.exceptions.SSLError as e:
print(f'request to {page} failed: {e}')
# or retry the request until it succeeds

Related

gaierror, NewConnectionError, MaxRetryError, ConnectionError with URL in requests

I am trying to check the response of the URL same as the domain record from the WHOIS database.
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
The code:
def abnormal_url(url):
response = requests.get(url,verify=False)
domainname = urlparse(url).netloc
domain = whois.whois(domainname)
try:
if response.text == domain:
return 0 # legitimate
else:
return 1 # phishing
except:
return 1 # phishing
Append to dataframe:
df['abnormal url'] = df['url'].apply(lambda i: abnormal_url(i))
Error found:
gaierror Traceback (most recent call last)
File D:\anaconda3\lib\site-packages\urllib3\connection.py:174, in HTTPConnection._new_conn(self)
173 try:
--> 174 conn = connection.create_connection(
175 (self._dns_host, self.port), self.timeout, **extra_kw
176 )
178 except SocketTimeout:
File D:\anaconda3\lib\site-packages\urllib3\util\connection.py:72, in create_connection(address, timeout, source_address, socket_options)
68 return six.raise_from(
69 LocationParseError(u"'%s', label empty or too long" % host), None
70 )
---> 72 for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
73 af, socktype, proto, canonname, sa = res
File D:\anaconda3\lib\socket.py:954, in getaddrinfo(host, port, family, type, proto, flags)
953 addrlist = []
--> 954 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
955 af, socktype, proto, canonname, sa = res
gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
File D:\anaconda3\lib\site-packages\urllib3\connectionpool.py:703, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
702 # Make the request on the httplib connection object.
--> 703 httplib_response = self._make_request(
704 conn,
705 method,
706 url,
707 timeout=timeout_obj,
708 body=body,
709 headers=headers,
710 chunked=chunked,
711 )
713 # If we're going to release the connection in ``finally:``, then
714 # the response doesn't need to know about the connection. Otherwise
715 # it will also try to release it and we'll have a double-release
716 # mess.
File D:\anaconda3\lib\site-packages\urllib3\connectionpool.py:386, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
385 try:
--> 386 self._validate_conn(conn)
387 except (SocketTimeout, BaseSSLError) as e:
388 # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
File D:\anaconda3\lib\site-packages\urllib3\connectionpool.py:1040, in HTTPSConnectionPool._validate_conn(self, conn)
1039 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
-> 1040 conn.connect()
1042 if not conn.is_verified:
File D:\anaconda3\lib\site-packages\urllib3\connection.py:358, in HTTPSConnection.connect(self)
356 def connect(self):
357 # Add certificate verification
--> 358 self.sock = conn = self._new_conn()
359 hostname = self.host
File D:\anaconda3\lib\site-packages\urllib3\connection.py:186, in HTTPConnection._new_conn(self)
185 except SocketError as e:
--> 186 raise NewConnectionError(
187 self, "Failed to establish a new connection: %s" % e
188 )
190 return conn
NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x0000023F7EA21520>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
File D:\anaconda3\lib\site-packages\requests\adapters.py:440, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
439 if not chunked:
--> 440 resp = conn.urlopen(
441 method=request.method,
442 url=url,
443 body=request.body,
444 headers=request.headers,
445 redirect=False,
446 assert_same_host=False,
447 preload_content=False,
448 decode_content=False,
449 retries=self.max_retries,
450 timeout=timeout
451 )
453 # Send the request.
454 else:
File D:\anaconda3\lib\site-packages\urllib3\connectionpool.py:785, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
783 e = ProtocolError("Connection aborted.", e)
--> 785 retries = retries.increment(
786 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
787 )
788 retries.sleep()
File D:\anaconda3\lib\site-packages\urllib3\util\retry.py:592, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
591 if new_retry.is_exhausted():
--> 592 raise MaxRetryError(_pool, url, error or ResponseError(cause))
594 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
MaxRetryError: HTTPSConnectionPool(host='www.list.tmall.com', port=443): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000023F7EA21520>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
Input In [16], in <cell line: 1>()
----> 1 df['abnormal url'] = df['url'].apply(lambda i: abnormal_url(i))
File D:\anaconda3\lib\site-packages\pandas\core\series.py:4433, in Series.apply(self, func, convert_dtype, args, **kwargs)
4323 def apply(
4324 self,
4325 func: AggFuncType,
(...)
4328 **kwargs,
4329 ) -> DataFrame | Series:
4330 """
4331 Invoke function on values of Series.
4332
(...)
4431 dtype: float64
4432 """
-> 4433 return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
File D:\anaconda3\lib\site-packages\pandas\core\apply.py:1082, in SeriesApply.apply(self)
1078 if isinstance(self.f, str):
1079 # if we are a string, try to dispatch
1080 return self.apply_str()
-> 1082 return self.apply_standard()
File D:\anaconda3\lib\site-packages\pandas\core\apply.py:1137, in SeriesApply.apply_standard(self)
1131 values = obj.astype(object)._values
1132 # error: Argument 2 to "map_infer" has incompatible type
1133 # "Union[Callable[..., Any], str, List[Union[Callable[..., Any], str]],
1134 # Dict[Hashable, Union[Union[Callable[..., Any], str],
1135 # List[Union[Callable[..., Any], str]]]]]"; expected
1136 # "Callable[[Any], Any]"
-> 1137 mapped = lib.map_infer(
1138 values,
1139 f, # type: ignore[arg-type]
1140 convert=self.convert_dtype,
1141 )
1143 if len(mapped) and isinstance(mapped[0], ABCSeries):
1144 # GH#43986 Need to do list(mapped) in order to get treated as nested
1145 # See also GH#25959 regarding EA support
1146 return obj._constructor_expanddim(list(mapped), index=obj.index)
File D:\anaconda3\lib\site-packages\pandas\_libs\lib.pyx:2870, in pandas._libs.lib.map_infer()
Input In [16], in <lambda>(i)
----> 1 df['abnormal url'] = df['url'].apply(lambda i: abnormal_url(i))
Input In [15], in abnormal_url(url)
1 def abnormal_url(url):
----> 2 response = requests.get(url,verify=False)
3 domainname = urlparse(url).netloc
4 domain = whois.whois(domainname)
File D:\anaconda3\lib\site-packages\requests\api.py:75, in get(url, params, **kwargs)
64 def get(url, params=None, **kwargs):
65 r"""Sends a GET request.
66
67 :param url: URL for the new :class:`Request` object.
(...)
72 :rtype: requests.Response
73 """
---> 75 return request('get', url, params=params, **kwargs)
File D:\anaconda3\lib\site-packages\requests\api.py:61, in request(method, url, **kwargs)
57 # By using the 'with' statement we are sure the session is closed, thus we
58 # avoid leaving sockets open which can trigger a ResourceWarning in some
59 # cases, and look like a memory leak in others.
60 with sessions.Session() as session:
---> 61 return session.request(method=method, url=url, **kwargs)
File D:\anaconda3\lib\site-packages\requests\sessions.py:529, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
524 send_kwargs = {
525 'timeout': timeout,
526 'allow_redirects': allow_redirects,
527 }
528 send_kwargs.update(settings)
--> 529 resp = self.send(prep, **send_kwargs)
531 return resp
File D:\anaconda3\lib\site-packages\requests\sessions.py:645, in Session.send(self, request, **kwargs)
642 start = preferred_clock()
644 # Send the request
--> 645 r = adapter.send(request, **kwargs)
647 # Total elapsed time of the request (approximately)
648 elapsed = preferred_clock() - start
File D:\anaconda3\lib\site-packages\requests\adapters.py:519, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
515 if isinstance(e.reason, _SSLError):
516 # This branch is for urllib3 v1.22 and later.
517 raise SSLError(e, request=request)
--> 519 raise ConnectionError(e, request=request)
521 except ClosedPoolError as e:
522 raise ConnectionError(e, request=request)
ConnectionError: HTTPSConnectionPool(host='www.list.tmall.com', port=443): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000023F7EA21520>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
The connection could not be established because the site can't be reached.
Just execute the get request inside your try/except and it will work.
def abnormal_url(url):
domainname = urlparse(url).netloc
domain = whois.whois(domainname)
try:
response = requests.get(url,verify=False)
if response.text == domain:
return 0 # legitimate
else:
return 1 # phishing
except:
return 1 # phishing

Is my code crashing because of internet connection issue? if so is there a way i can make a snippet of code to check connection before trying api?

Im using Jupyter Notebook to run a fairly basic crypto trading bot with an API on coinbase and after an arbitrary amount of time, sometimes 6 hours other times 18 hours, the code will error and stop working.. can has anyone had this problem or are able to determine the problem from the error script produced.
error code given below..
OSError Traceback (most recent call last)
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
596 # Make the request on the httplib connection object.
--> 597 httplib_response = self._make_request(conn, method, url,
598 timeout=timeout_obj,
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
385 # otherwise it looks like a programming error was the cause.
--> 386 six.raise_from(e, None)
387 except (SocketTimeout, BaseSSLError, SocketError) as e:
~\anaconda3\lib\site-packages\requests\packages\urllib3\packages\six.py in raise_from(value, from_value)
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
381 try:
--> 382 httplib_response = conn.getresponse()
383 except Exception as e:
~\anaconda3\lib\http\client.py in getresponse(self)
1370 try:
-> 1371 response.begin()
1372 except ConnectionError:
~\anaconda3\lib\http\client.py in begin(self)
318 while True:
--> 319 version, status, reason = self._read_status()
320 if status != CONTINUE:
~\anaconda3\lib\http\client.py in _read_status(self)
279 def _read_status(self):
--> 280 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
281 if len(line) > _MAXLINE:
~\anaconda3\lib\socket.py in readinto(self, b)
703 try:
--> 704 return self._sock.recv_into(b)
705 except timeout:
~\anaconda3\lib\site-packages\requests\packages\urllib3\contrib\pyopenssl.py in recv_into(self, *args, **kwargs)
292 else:
--> 293 return self.recv_into(*args, **kwargs)
294
~\anaconda3\lib\site-packages\requests\packages\urllib3\contrib\pyopenssl.py in recv_into(self, *args, **kwargs)
281 else:
--> 282 raise SocketError(str(e))
283 except OpenSSL.SSL.ZeroReturnError as e:
OSError: (10054, 'WSAECONNRESET')
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
~\anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
412 if not chunked:
--> 413 resp = conn.urlopen(
414 method=request.method,
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
647
--> 648 retries = retries.increment(method, url, error=e, _pool=self,
649 _stacktrace=sys.exc_info()[2])
~\anaconda3\lib\site-packages\requests\packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
346 if read is False or not self._is_method_retryable(method):
--> 347 raise six.reraise(type(error), error, _stacktrace)
348 elif read is not None:
~\anaconda3\lib\site-packages\requests\packages\urllib3\packages\six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
596 # Make the request on the httplib connection object.
--> 597 httplib_response = self._make_request(conn, method, url,
598 timeout=timeout_obj,
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
385 # otherwise it looks like a programming error was the cause.
--> 386 six.raise_from(e, None)
387 except (SocketTimeout, BaseSSLError, SocketError) as e:
~\anaconda3\lib\site-packages\requests\packages\urllib3\packages\six.py in raise_from(value, from_value)
~\anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
381 try:
--> 382 httplib_response = conn.getresponse()
383 except Exception as e:
~\anaconda3\lib\http\client.py in getresponse(self)
1370 try:
-> 1371 response.begin()
1372 except ConnectionError:
~\anaconda3\lib\http\client.py in begin(self)
318 while True:
--> 319 version, status, reason = self._read_status()
320 if status != CONTINUE:
~\anaconda3\lib\http\client.py in _read_status(self)
279 def _read_status(self):
--> 280 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
281 if len(line) > _MAXLINE:
~\anaconda3\lib\socket.py in readinto(self, b)
703 try:
--> 704 return self._sock.recv_into(b)
705 except timeout:
~\anaconda3\lib\site-packages\requests\packages\urllib3\contrib\pyopenssl.py in recv_into(self, *args, **kwargs)
292 else:
--> 293 return self.recv_into(*args, **kwargs)
294
~\anaconda3\lib\site-packages\requests\packages\urllib3\contrib\pyopenssl.py in recv_into(self, *args, **kwargs)
281 else:
--> 282 raise SocketError(str(e))
283 except OpenSSL.SSL.ZeroReturnError as e:
ProtocolError: ('Connection aborted.', OSError("(10054, 'WSAECONNRESET')"))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_26992/2760925491.py in <module>
36 # dead zone
37 if (price_diff < buy_diff) and (price_diff > sell_diff) and (wait_time<run_time):
---> 38 price_difference()
39 wait_time = (run_time+180)
40 run_log.logger.info("price difference in dead zone")
~\AppData\Local\Temp/ipykernel_26992/3172399590.py in price_difference()
46 global difference
47 global lastDiff
---> 48 price()
49 historic_price()
50 if current_price < hist_avg:
~\AppData\Local\Temp/ipykernel_26992/3172399590.py in price()
23 global current_ask
24 global last_price
---> 25 var=auth_client.get_product_order_book(trade_pair)
26 bids=var.get('bids')
27 asks=var.get('asks')
~\anaconda3\lib\site-packages\cbpro\public_client.py in get_product_order_book(self, product_id, level)
86 """
87 params = {'level': level}
---> 88 return self._send_message('get',
89 '/products/{}/book'.format(product_id),
90 params=params)
~\anaconda3\lib\site-packages\cbpro\public_client.py in _send_message(self, method, endpoint, params, data)
266 """
267 url = self.url + endpoint
--> 268 r = self.session.request(method, url, params=params, data=data,
269 auth=self.auth, timeout=30)
270 return r.json()
~\anaconda3\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
486 }
487 send_kwargs.update(settings)
--> 488 resp = self.send(prep, **send_kwargs)
489
490 return resp
~\anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
607
608 # Send the request
--> 609 r = adapter.send(request, **kwargs)
610
611 # Total elapsed time of the request (approximately)
~\anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
471
472 except (ProtocolError, socket.error) as err:
--> 473 raise ConnectionError(err, request=request)
474
475 except MaxRetryError as e:
ConnectionError: ('Connection aborted.', OSError("(10054, 'WSAECONNRESET')"))
A “connection reset” error usually means that the network is ok but the thing at the other end isn’t, or doesn’t want to talk to you, though with today’s “unclean” networks with proxies and firewalls and layering violations everywhere, there are probably more exotic causes as well.
In any case, I would not try to predict it (which is going to be hopelessly brittle) but deal with it when it happens by catching the exception.
so it might not be the perfect solution but it's going to work for me in this case, have a script simply restart the main script if/when it crashes.
from subprocess import run
from time import sleep
# Path and name to the script you are trying to start
file_path = "CBPRO_BTC_USDC_V3.py"
print('program started')
restart_timer = 60
def start_script():
try:
# Make sure 'python' command is available
run("python "+file_path, check=True)
except:
# Script crashed, lets restart it!
handle_crash()
def handle_crash():
print('Restarting in 60 seconds')
sleep(restart_timer) # Restarts the script after 60 seconds
start_script()
start_script()
I don't know if it crashes because the internet connection. But if you want check if you have a internet cnnection you could ping a server for example 8.8.8.8 (Google DNS Server). If the command returns 0 it was sucesfull if it returns 1 it failed.
import os
response_code = os.system("ping 8.8.8.8 -n 1")
print(response_code)

Python - RemoteDisconnected: Remote end closed connection without response

I'm using Python to request things to the smashgg API (the queries are in GraphQL) and I got an error i never had before while running the code below :
headers = {"Authorization": "Bearer my token"}
def run_query(query, variables): # A simple function to use requests.post to make the API call. Note the json= section.
request = requests.post('https://api.smash.gg/gql/alpha', json={'query': query, 'variables': variables}, headers=headers)
if request.status_code == 200:
return request.json()
else:
raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query))
nbRequestsPerMinute = 75
dataFrameAllSets = pd.read_csv('dataFrameAllSets.csv') #sets of players I already fetched
countryByPlayer = pd.DataFrame(columns = [
"playerId",
"playerCountry"
])
uniqueValues1 = dataFrameAllSets["setWinnerId"].unique()
uniqueValues2 = dataFrameAllSets["setLoserId"].unique()
concate = np.concatenate((uniqueValues1, uniqueValues2))
countryByPlayer["playerId"] = concate
countryByPlayer.drop_duplicates(subset = "playerId", inplace = True)
#countryByPlayer has more than 200 000 rows
def queryCountry(playerId) :
query = """
query player ($playerId: ID!){
player(id: $playerId){
user{
location {
country
}
}
}
}
"""
variables = {
"playerId": playerId
}
return query, variables
for index, x in countryByPlayer.iterrows() :
t0 = time.time()
query, variables = queryCountry(int(x['playerId']))
result = run_query(query, variables) # Execute the query
if result["data"]["player"] != None :
if result["data"]["player"]["user"] != None :
if result["data"]["player"]["user"]["location"] != None :
countryByPlayer.at[index, "playerCountry"] = result["data"]["player"]["user"]["location"]["country"]
else :
countryByPlayer.at[index, "playerCountry"] = "noneLocation"
else :
countryByPlayer.at[index, "playerCountry"] = "nonePlayer"
t1 = time.time()
if t1 - t0 < 60/nbRequestsPerMinute :
time.sleep(60/nbRequestsPerMinute - t1 + t0)
The error message is the following one :
RemoteDisconnected Traceback (most recent call last)
~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
383 # otherwise it looks like a programming error was the cause.
--> 384 six.raise_from(e, None)
385 except (SocketTimeout, BaseSSLError, SocketError) as e:
~\Anaconda3\lib\site-packages\urllib3\packages\six.py in raise_from(value, from_value)
~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
379 try:
--> 380 httplib_response = conn.getresponse()
381 except Exception as e:
~\Anaconda3\lib\http\client.py in getresponse(self)
1320 try:
-> 1321 response.begin()
1322 except ConnectionError:
~\Anaconda3\lib\http\client.py in begin(self)
295 while True:
--> 296 version, status, reason = self._read_status()
297 if status != CONTINUE:
~\Anaconda3\lib\http\client.py in _read_status(self)
264 # sending a valid response.
--> 265 raise RemoteDisconnected("Remote end closed connection without"
266 " response")
RemoteDisconnected: Remote end closed connection without response
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
448 retries=self.max_retries,
--> 449 timeout=timeout
450 )
~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
637 retries = retries.increment(method, url, error=e, _pool=self,
--> 638 _stacktrace=sys.exc_info()[2])
639 retries.sleep()
~\Anaconda3\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
366 if read is False or not self._is_method_retryable(method):
--> 367 raise six.reraise(type(error), error, _stacktrace)
368 elif read is not None:
~\Anaconda3\lib\site-packages\urllib3\packages\six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
383 # otherwise it looks like a programming error was the cause.
--> 384 six.raise_from(e, None)
385 except (SocketTimeout, BaseSSLError, SocketError) as e:
~\Anaconda3\lib\site-packages\urllib3\packages\six.py in raise_from(value, from_value)
~\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
379 try:
--> 380 httplib_response = conn.getresponse()
381 except Exception as e:
~\Anaconda3\lib\http\client.py in getresponse(self)
1320 try:
-> 1321 response.begin()
1322 except ConnectionError:
~\Anaconda3\lib\http\client.py in begin(self)
295 while True:
--> 296 version, status, reason = self._read_status()
297 if status != CONTINUE:
~\Anaconda3\lib\http\client.py in _read_status(self)
264 # sending a valid response.
--> 265 raise RemoteDisconnected("Remote end closed connection without"
266 " response")
ProtocolError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-5-f3b194f946df> in <module>
4 compteurPlayers += 1
5 query, variables = queryCountry(int(x['playerId']))
----> 6 result = run_query(query, variables) # Execute the query
7 if result["data"]["player"] != None :
8 if result["data"]["player"]["user"] != None :
<ipython-input-2-2a5c0920ef76> in run_query(query, variables)
3
4 def run_query(query, variables): # A simple function to use requests.post to make the API call. Note the json= section.
----> 5 request = requests.post('https://api.smash.gg/gql/alpha', json={'query': query, 'variables': variables}, headers=headers)
6 if request.status_code == 200:
7 return request.json()
~\Anaconda3\lib\site-packages\requests\api.py in post(url, data, json, **kwargs)
114 """
115
--> 116 return request('post', url, data=data, json=json, **kwargs)
117
118
~\Anaconda3\lib\site-packages\requests\api.py in request(method, url, **kwargs)
58 # cases, and look like a memory leak in others.
59 with sessions.Session() as session:
---> 60 return session.request(method=method, url=url, **kwargs)
61
62
~\Anaconda3\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
531 }
532 send_kwargs.update(settings)
--> 533 resp = self.send(prep, **send_kwargs)
534
535 return resp
~\Anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
644
645 # Send the request
--> 646 r = adapter.send(request, **kwargs)
647
648 # Total elapsed time of the request (approximately)
~\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
496
497 except (ProtocolError, socket.error) as err:
--> 498 raise ConnectionError(err, request=request)
499
500 except MaxRetryError as e:
ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
I first thought it had something to do with the rate limit of the API which is set to 80 requests by minutes, however, I delay the loop to always be under this rate limit.
Moreover, when retesting my code multiple times, the error appeared randomly during the loop.
So I come to you guys because I really need your help.
Thanks in advance.
Problem
A remote service is unreliable.
Solution
Program defensively by handling anticipated errors in your code. Consider implementing an exponential backoff with maximum retries. Also, add logging to track with requests were successful, retried or completely failed. If necessary you may want to implement application monitoring or paging system to alert you if a certain condition is met (100 errors in a row) if this is considered a process critical to your application.
Also, the service may have a bulk API that you can use, so instead of submitting n requests (where n is the number of player ids) you can submit n / bulk_limit (where bulk_limit is the max number of ids their bulk api accepts to process in a single request).

How to make Python requests persist in getting head

I have a problem where Python requests is throwing an exception after a few seconds. The website is being very slow, but only intermittently.
Chrome and Safari both fail to load the page. (E.g. Chrome displays "This site can't be reached", ERR_CONNECTION_RESET). However, Firefox is consistently able to access the page, although it takes around 20 secs to load. This behaviour is repeatable from several different machines, located in different countries. It seems like Firefox is "trying harder", and not timing out.
I'd like to get Python's requests to behave more like Firefox in this case. I have set the timeout argument to be a large number (60-seconds), but the exception is thrown long before that. It seems like there is some kind of handshake timeout, whereas maybe the timeout parameter controls the wait time for the response, post-handshake?
import requests
target='https://nomads.ncep.noaa.gov/pub/data/nccf/com/gens/prod/gefs.20191113/00/pgrb2a/'
request = requests.head(target, timeout=60)
print(request.status_code)
^^ replace 20191113 with yesterday's date, as these links expire after 7-days.
The exception arrives after around 5-seconds, and is the "standard" requests exception when it can't access a page:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
383 # otherwise it looks like a programming error was the cause.
--> 384 six.raise_from(e, None)
385 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
379 try:
--> 380 httplib_response = conn.getresponse()
381 except Exception as e:
~/miniconda/envs/basics/lib/python3.6/http/client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
~/miniconda/envs/basics/lib/python3.6/http/client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
~/miniconda/envs/basics/lib/python3.6/http/client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
~/miniconda/envs/basics/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs)
299 else:
--> 300 return self.recv_into(*args, **kwargs)
301
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs)
289 else:
--> 290 raise SocketError(str(e))
291 except OpenSSL.SSL.ZeroReturnError as e:
OSError: (54, 'ECONNRESET')
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
~/miniconda/envs/basics/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
444 retries=self.max_retries,
--> 445 timeout=timeout
446 )
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
637 retries = retries.increment(method, url, error=e, _pool=self,
--> 638 _stacktrace=sys.exc_info()[2])
639 retries.sleep()
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
366 if read is False or not self._is_method_retryable(method):
--> 367 raise six.reraise(type(error), error, _stacktrace)
368 elif read is not None:
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
383 # otherwise it looks like a programming error was the cause.
--> 384 six.raise_from(e, None)
385 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
379 try:
--> 380 httplib_response = conn.getresponse()
381 except Exception as e:
~/miniconda/envs/basics/lib/python3.6/http/client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
~/miniconda/envs/basics/lib/python3.6/http/client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
~/miniconda/envs/basics/lib/python3.6/http/client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
~/miniconda/envs/basics/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs)
299 else:
--> 300 return self.recv_into(*args, **kwargs)
301
~/miniconda/envs/basics/lib/python3.6/site-packages/urllib3/contrib/pyopenssl.py in recv_into(self, *args, **kwargs)
289 else:
--> 290 raise SocketError(str(e))
291 except OpenSSL.SSL.ZeroReturnError as e:
ProtocolError: ('Connection aborted.', OSError("(54, 'ECONNRESET')",))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-2-e4852eeb80e3> in <module>()
2 import requests
3 target='https://nomads.ncep.noaa.gov/pub/data/nccf/com/gens/prod/gefs.20191113/00/pgrb2a/'
----> 4 request = requests.head(target, timeout=60)
5 print(request.status_code)
~/miniconda/envs/basics/lib/python3.6/site-packages/requests/api.py in head(url, **kwargs)
96
97 kwargs.setdefault('allow_redirects', False)
---> 98 return request('head', url, **kwargs)
99
100
~/miniconda/envs/basics/lib/python3.6/site-packages/requests/api.py in request(method, url, **kwargs)
56 # cases, and look like a memory leak in others.
57 with sessions.Session() as session:
---> 58 return session.request(method=method, url=url, **kwargs)
59
60
~/miniconda/envs/basics/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
510 }
511 send_kwargs.update(settings)
--> 512 resp = self.send(prep, **send_kwargs)
513
514 return resp
~/miniconda/envs/basics/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
620
621 # Send the request
--> 622 r = adapter.send(request, **kwargs)
623
624 # Total elapsed time of the request (approximately)
~/miniconda/envs/basics/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
493
494 except (ProtocolError, socket.error) as err:
--> 495 raise ConnectionError(err, request=request)
496
497 except MaxRetryError as e:
ConnectionError: ('Connection aborted.', OSError("(54, 'ECONNRESET')",))
Is there a way to get Requests to "try harder" for slow pages?
This is Python3.6 and requests2.19.1
Based on the stack trace the connection does not timeout but is rejected by to host ('ECONNRESET'). For more information about the error see the question "What does “connection reset by peer” mean?"
Instead of increasing the timeout you need to retry the request. To avoid spamming the host there should be some time between the retries. You could write your own retry logic or use a library like backoff.
Below is an example from the backoff's documentation which will retry on any error for 60 seconds using exponential backoff strategy.
#backoff.on_exception(backoff.expo,
requests.exceptions.RequestException,
max_time=60)
def get_url(url):
return requests.get(url)

python 3 urllib.request.urlopen not working in place of python 2 urllib2.urlopen

I inherited some python 2 code that uses urllib2.urlopen to post some data to a local server, and I'd like to convert it to python 3. I initially tried the requests module, and when that didn't work I fell back to urllib.request.urlopen, since the documentation says
urllib.request.urlopen() corresponds to the old urllib2.urlopen.
Both python 3 approaches are throwing the same exception, while the python 2 code works fine.
working python 2 code:
>>> request = urllib2.Request(url)
>>> request.add_data(data)
>>> response = urllib2.urlopen(request)
>>> response.read()
''
>>> response.close()
python 3 equivalent
>>> request = urllib.request.Request(url, data)
>>> response = urllib.request.urlopen(request)
generates the following traceback:
---------------------------------------------------------------------------
RemoteDisconnected Traceback (most recent call last)
<ipython-input-56-d5c6f6ac8b5c> in <module>()
----> 1 response = urllib.request.urlopen(request)
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
221 else:
222 opener = _opener
--> 223 return opener.open(url, data, timeout)
224
225 def install_opener(opener):
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
524 req = meth(req)
525
--> 526 response = self._open(req, data)
527
528 # post-process response
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py in _open(self, req, data)
542 protocol = req.type
543 result = self._call_chain(self.handle_open, protocol, protocol +
--> 544 '_open', req)
545 if result:
546 return result
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py in http_open(self, req)
1344
1345 def http_open(self, req):
-> 1346 return self.do_open(http.client.HTTPConnection, req)
1347
1348 http_request = AbstractHTTPHandler.do_request_
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1319 except OSError as err: # timeout error
1320 raise URLError(err)
-> 1321 r = h.getresponse()
1322 except:
1323 h.close()
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in getresponse(self)
1329 try:
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
1333 self.close()
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in begin(self)
295 # read until we get a non-100 response
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
299 break
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py in _read_status(self)
264 # Presumably, the server closed the connection before
265 # sending a valid response.
--> 266 raise RemoteDisconnected("Remote end closed connection without"
267 " response")
268 try:
RemoteDisconnected: Remote end closed connection without response
I'd really like to avoid converting my existing code to python 2 if at all possible. I don't have any control over how the server handles requests.

Categories

Resources