python url request to finance website hangs [duplicate] - python

This question already has answers here:
Sending "User-agent" using Requests library in Python
(3 answers)
Closed 6 days ago.
I am struggling to get reliable dividend data for my website so I have the following script to hit nasdaq.com:
import requests
dividends = "https://www.nasdaq.com/market-activity/stocks/hd/dividend-history"
response = requests.get(dividends)
The script just hangs and does nothing, the traceback when you cancel it looks like this:
^CTraceback (most recent call last):
File "/home/cchilders/projects/stocks_backend/scripts/get_dividends_nasdaq_dot_com.py", line 5, in <module>
response = requests.get(dividends)
File "/home/cchilders/.local/lib/python3.10/site-packages/requests/api.py", line 73, in get
return request("get", url, params=params, **kwargs)
File "/home/cchilders/.local/lib/python3.10/site-packages/requests/api.py", line 59, in request
return session.request(method=method, url=url, **kwargs)
File "/home/cchilders/.local/lib/python3.10/site-packages/requests/sessions.py", line 587, in request
resp = self.send(prep, **send_kwargs)
File "/home/cchilders/.local/lib/python3.10/site-packages/requests/sessions.py", line 701, in send
r = adapter.send(request, **kwargs)
File "/home/cchilders/.local/lib/python3.10/site-packages/requests/adapters.py", line 489, in send
resp = conn.urlopen(
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 699, in urlopen
httplib_response = self._make_request(
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 445, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "/usr/lib/python3/dist-packages/urllib3/connectionpool.py", line 440, in _make_request
httplib_response = conn.getresponse()
File "/usr/lib/python3.10/http/client.py", line 1374, in getresponse
response.begin()
File "/usr/lib/python3.10/http/client.py", line 318, in begin
version, status, reason = self._read_status()
File "/usr/lib/python3.10/http/client.py", line 279, in _read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/usr/lib/python3.10/socket.py", line 705, in readinto
return self._sock.recv_into(b)
File "/usr/lib/python3.10/ssl.py", line 1273, in recv_into
return self.read(nbytes, buffer)
File "/usr/lib/python3.10/ssl.py", line 1129, in read
return self._sslobj.read(len, buffer)
KeyboardInterrupt
The test script works and shows 200 response:
response = requests.get("https://www.google.com")
print(response)
does this mean that the site has blocked requests module and other libraries from connecting or is there something else I can do? I cannot find dividends data going back more than a few years from any site except this one

The site has blocked some user-agents. Try using the user-agent of a browser:
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:104.0) Gecko/20100101 Firefox/104.0'
}
dividends = "https://www.nasdaq.com/market-activity/stocks/hd/dividend-history"
response = requests.get(dividends, headers=headers)
print(response)
Result:
<Response [200]>

Related

Python response = request.urlopen(url) not working with specific website

I am trying to get some news from this website https://www.onvista.de, using my code I previously used on other websites, but in this case it doesn't work. Can anyone please tell me why?
import urllib.request, urllib.error, urllib.parse, datetime, os
url = 'https://www.onvista.de'
response = urllib.request.urlopen(url)
webContent = response.read()
print(type(webContent))
Here is what I get back:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 525, in open
response = self._open(req, data)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 1397, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 1358, in do_open
r = h.getresponse()
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\http\client.py", line 1344, in getresponse
response.begin()
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\http\client.py", line 307, in begin
version, status, reason = self._read_status()
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\http\client.py", line 276, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response
>>> response = urllib.request.urlopen('https://www.onvista.de')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 525, in open
response = self._open(req, data)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 542, in _open
result = self._call_chain(self.handle_open, protocol, protocol +
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 1397, in https_open
return self.do_open(http.client.HTTPSConnection, req,
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\urllib\request.py", line 1358, in do_open
r = h.getresponse()
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\http\client.py", line 1344, in getresponse
response.begin()
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\http\client.py", line 307, in begin
version, status, reason = self._read_status()
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.8_3.8.2800.0_x64__qbz5n2kfra8p0\lib\http\client.py", line 276, in _read_status
raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response
Thank you for any adivce, I am using Python 3.8 on Windows 10.
You should put headers in a request.
import requests
url = 'https://www.onvista.de'
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:101.0) Gecko/20100101 Firefox/101.0'}
response = requests.get(url, headers=headers) # <Response [200]>
The website seems to block user agents that are identified as bots. You can set a custom user agent so the server will accept your request.
req = urllib.request.Request(
url,
data=None,
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
}
)
...

Having problems with python requests

I can't request any page. Am I doing something wrong? I've used this code before, so I don't know what's happening. I've tried other ways as well. Seems to just not get a response. Did something change with requests? Or could it be an anti-virus firewall thing?
Any help?
>>> import requests, json
>>> url = 'https://api.chess.com/pub/player/Hikaru/games/archives'
>>> resp= requests.get(url)
Traceback (most recent call last):
File "<pyshell#4>", line 1, in <module>
resp= requests.get(url)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\site-packages\requests\adapters.py", line 449, in send
timeout=timeout
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 600, in urlopen
chunked=chunked)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\connectionpool.py", line 354, in _make_request
conn.request(method, url, **httplib_request_kw)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1229, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1275, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1224, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 1016, in _send_output
self.send(msg)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\http\client.py", line 977, in send
self.sock.sendall(data)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\contrib\pyopenssl.py", line 328, in sendall
sent = self._send_until_done(data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE])
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\site-packages\urllib3\contrib\pyopenssl.py", line 317, in _send_until_done
return self.connection.send(data)
File "C:\Users\\AppData\Local\Programs\Python\Python37\lib\site-packages\OpenSSL\SSL.py", line 1644, in send
with _from_buffer(buf) as data:
AttributeError: __enter__
import requests, json
url = 'https://api.chess.com/pub/player/Hikaru/games/archives'
resp= requests.get(url)
print(resp)

request.get() is getting stuck

Hello I am trying to scrape some data from a website and request.get() is getting caught up on something.
here is my code:
page_url = front_end+str(i)+'/'
page = requests.get(page_url)
so I want it to be a string, because I am just entering an url and if I stop the code or it runs too long I get something like:
File "/usr/local/lib/python3.6/site-packages/urllib3/connectionpool.py",
line 377, in _make_request
httplib_response = conn.getresponse(buffering=True)
TypeError: getresponse() got an unexpected keyword argument 'buffering'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "main.py", line 24, in <module>
page = requests.get(page_url)
File "/usr/local/lib/python3.6/site-packages/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/usr/local/lib/python3.6/site-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/local/lib/python3.6/site-packages/requests/sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python3.6/site-packages/requests/sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python3.6/site-packages/requests/adapters.py", line 449, in send
timeout=timeout
File "/usr/local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 600, in urlopen
chunked=chunked)
File "/usr/local/lib/python3.6/site-packages/urllib3/connectionpool.py", line 380, in _make_request
httplib_response = conn.getresponse()
File "/usr/local/lib/python3.6/http/client.py", line 1331, in getresponse
response.begin()
File "/usr/local/lib/python3.6/http/client.py", line 297, inbegin
version, status, reason = self._read_status()
File "/usr/local/lib/python3.6/http/client.py", line 258, in_read_status
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
File "/usr/local/lib/python3.6/socket.py", line 586, in readinto
return self._sock.recv_into(b)
File "/usr/local/lib/python3.6/ssl.py", line 1002, in recv_into
return self.read(nbytes, buffer)
File "/usr/local/lib/python3.6/ssl.py", line 865, in read
return self._sslobj.read(len, buffer)
File "/usr/local/lib/python3.6/ssl.py", line 625, in read
v = self._sslobj.read(len, buffer)
I do not understand what the TypeError: getresponse() got an unexpected keyword argument 'buffering' means or how to fix it.
Answer:
Sometimes requests from requests.get() gets blocked by server, so solution is to make the server think the request is coming from a web browser.
Example:
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',}
page = requests.get("https://example.com", headers=headers)

Python Requests Get not Working (GCloud Debian 4.9.110)

I have a simple Get request I'd like to make using Python's Request library.
import requests
u_a = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36"
url = 'http://stats.nba.com/stats/playergamelogs?DateFrom=&DateTo=&GameSegment=&LastNGames=0&LeagueID=00&Location=&MeasureType=Base&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PaceAdjust=N&PerMode=Totals&Period=0&PlayerID=202391&PlusMinus=N&Rank=N&Season=2014-15&SeasonSegment=&SeasonType=Regular+Season&ShotClockRange=&VsConference=&VsDivision='
response = requests.get(url, headers={"USER-AGENT":u_a})
The requests.get call hangs and I have to force the script to exit. However, I am able to make the same call on my local MacOS and Ubuntu machines. I can also copy/paste the url into my local computer's browsers and view the resulting JSON. Is the problem with the User Agent?
Edit (Added stacktrace):
Traceback (most recent call last):
File "prune_simulation.py", line 336, in <module>
main()
File "prune_simulation.py", line 44, in main
response = requests.get(url, headers={"USER-AGENT":u_a})
File "/home/nole/.local/lib/python2.7/site-packages/requests/api.py", line 72, in get
return request('get', url, params=params, **kwargs)
File "/home/nole/.local/lib/python2.7/site-packages/requests/api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "/home/nole/.local/lib/python2.7/site-packages/requests/sessions.py", line 512, in request
resp = self.send(prep, **send_kwargs)
File "/home/nole/.local/lib/python2.7/site-packages/requests/sessions.py", line 622, in send
r = adapter.send(request, **kwargs)
File "/home/nole/.local/lib/python2.7/site-packages/requests/adapters.py", line 445, in send
timeout=timeout
File "/home/nole/.local/lib/python2.7/site-packages/urllib3/connectionpool.py", line 600, in urlopen
chunked=chunked)
File "/home/nole/.local/lib/python2.7/site-packages/urllib3/connectionpool.py", line 377, in _make_request
httplib_response = conn.getresponse(buffering=True)
File "/usr/lib/python2.7/httplib.py", line 1121, in getresponse
response.begin()
File "/usr/lib/python2.7/httplib.py", line 438, in begin
version, status, reason = self._read_status()
File "/usr/lib/python2.7/httplib.py", line 394, in _read_status
line = self.fp.readline(_MAXLINE + 1)
File "/usr/lib/python2.7/socket.py", line 480, in readline
data = self._sock.recv(self._rbufsize)
KeyboardInterrupt

send file with python-requests

I'm doing pretty simple thing - get file from client and send it to process to another server:
import requests
import logging
logger = logging.getLogger(__name__)
data = request.environ['wsgi.input'].read()
url = request.cfg.DOC_PARSE_URL
params = {'AddedPath': str(request.cfg.FORM_TEMP_URL+uid+'/'), 'Button': 'Generate'}
logger.warning('url:'+repr(url))
for k in params:
logger.warning('params:'+repr(k)+' '+repr(params[k]))
logger.warning('data:'+repr(type(data))+str(len(data)))
#logger.warning('data:'+data)
files = {'WordFile': ('process.doc', data)}
r = requests.post(url, files=files, data=params, stream=False)
and everything works fine on my own PC and on dev server, but on production server this code crashes with UnicodeDecodeError:
Traceback (most recent call last):
File "/sites/mo/admin/utils/dispatcher.py", line 492, in __call__
response = self.view_map[endpoint](request, **params)
File "/sites/mo/admin/utils/admin_views.py", line 231, in parse_doc
r = requests.post(url, files=files, data=params, stream=False)
File "/sites/mo/admin/../third_party/requests/api.py", line 88, in post
return request('post', url, data=data, **kwargs)
File "/sites/mo/admin/../third_party/requests/api.py", line 44, in request
return session.request(method=method, url=url, **kwargs)
File "/sites/mo/admin/../third_party/requests/sessions.py", line 360, in request
resp = self.send(prep, **send_kwargs)
File "/sites/mo/admin/../third_party/requests/sessions.py", line 463, in send
r = adapter.send(request, **kwargs)
File "/sites/mo/admin/../third_party/requests/adapters.py", line 292, in send
timeout=timeout
File "/sites/mo/admin/../third_party/requests/packages/urllib3/connectionpool.py", line 428, in urlopen
body=body, headers=headers)
File "/sites/mo/admin/../third_party/requests/packages/urllib3/connectionpool.py", line 283, in _make_request
conn.request(method, url, **httplib_request_kw)
File "/usr/lib64/python2.7/httplib.py", line 946, in request
self._send_request(method, url, body, headers)
File "/usr/lib64/python2.7/httplib.py", line 987, in _send_request
self.endheaders(body)
File "/usr/lib64/python2.7/httplib.py", line 940, in endheaders
self._send_output(message_body)
File "/usr/lib64/python2.7/httplib.py", line 801, in _send_output
msg += message_body
UnicodeDecodeError: 'ascii' codec can't decode byte 0xd0 in position 739: ordinal not in range(128)
All values are bytes string (I inserted logging to ensure) so there shouldn't be any unicode issues. Seems there is some environment influence, but i can't figure it out. Does anybody know what could cause such problem?

Categories

Resources