I am writing a python script where I right-click a file, click on a context menu item. On clicking it opens a webpage. I have to validate the url of the webpage. How do I shift my control from OS to the browser and get Current URL.
Why don't you call requests.get(url) and check the response code. Another option is to call request.head(url)
>>> import requests
>>> url1 = 'http://example.com'
>>> url2 = 'http://sdsdsdsdsdss.com'
>>> r = requests.head(url1)
>>> r.status_code
200
>>> r = requests.head(url2, timeout=5)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/dist-packages/requests/api.py", line 77, in head
return request('head', url, **kwargs)
File "/usr/lib/python2.7/dist-packages/requests/api.py", line 44, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/lib/python2.7/dist-packages/requests/sessions.py", line 383, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python2.7/dist-packages/requests/sessions.py", line 486, in send
r = adapter.send(request, **kwargs)
File "/usr/lib/python2.7/dist-packages/requests/adapters.py", line 387, in send
raise Timeout(e)
requests.exceptions.Timeout: (<urllib3.connectionpool.HTTPConnectionPool object at 0x7f4e77635950>, 'Connection to sdsdsdsdsdss.com timed out. (connect timeout=5)')
>>>
You need to handle the exception. Details about requests module: http://docs.python-requests.org/en/latest/
And if you really need to open the web browser, you can use this library: https://docs.python.org/2/library/webbrowser.html
Related
Hi I am trying to a hit an API using requests module of python. The Api has to be hit 20000 times as the number of pages are around 20000. In every hit the data comes around 10 mb. By the end of the process it creates a json file of around 100gb. Here is the code I have written
with open('file.json','wb',buffering=100*1048567) as f:
while(next_page_cursor != ""):
with request.get(url,headers=headers) as response:
json_response = json.loads(response.content.decode('utf-8'))
"""
json response looks something like this
{
content:[{},{},{}........50 dictionaries]
next_page_cursor : "abcd"
}
"""
next_page_cursor = json_response['next_page_cursor']
for data in json_response['content']:
f.write((json.dumps(data) + "\n").encode())
But after running successfully for few pages the code fails giving the below error:
Traceback (most recent call last):
File "<command-1206920060120926>", line 65, in <module>
with requests.get(data_url, headers = headers) as response:
File "/databricks/python/lib/python3.7/site-packages/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/databricks/python/lib/python3.7/site-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/databricks/python/lib/python3.7/site-packages/requests/sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/databricks/python/lib/python3.7/site-packages/requests/sessions.py", line 686, in send
r.content
File "/databricks/python/lib/python3.7/site-packages/requests/models.py", line 828, in content
self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
File "/databricks/python/lib/python3.7/site-packages/requests/models.py", line 753, in generate
raise ChunkedEncodingError(e)
requests.exceptions.ChunkedEncodingError: ('Connection broken: OSError("(104, \'ECONNRESET\')")', OSError("(104, 'ECONNRESET')"))
you need to use response.iter_content
https://2.python-requests.org/en/master/api/#requests.Response.iter_content
I am trying to make app that download comics but whenever I try to download an image, it says no host supplied.
I really searched and there was nothing.
This is the code:
import requests,bs4
url='https://www.marvel.com/comics/issue/71314/edge_of_spider-geddon_2018_1'
res=requests.get(url,stream=True)
res.raise_for_status()
soup=bs4.BeautifulSoup(res.text)
elem=soup.select('div[class="row-item-image"] img')#.viewer-cnt .row .col-xs-12 #ppp img')
#print(elem)
comicurl='https:'+elem[0].get('src')
res=requests.get(comicurl,stream=True,allow_redirects=True)
res.raise_for_status()
with open(comicurl[comicurl.rfind('/')+1:],'wb') as i:
for chunk in res.iter_content(100000):
i.write(chunk)
I expect it to download the image but it gives me this error:
Traceback (most recent call last):
File "C:\Users\Islam\AppData\Local\Programs\Python\Python36\comicdownloader.py", line 10, in <module>
res=requests.get(comicurl,stream=True,allow_redirects=True)
File "C:\Users\Islam\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\Islam\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\Islam\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\sessions.py", line 519, in request
prep = self.prepare_request(req)
File "C:\Users\Islam\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\sessions.py", line 462, in prepare_request
hooks=merge_hooks(request.hooks, self.hooks),
File "C:\Users\Islam\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\models.py", line 313, in prepare
self.prepare_url(url, params)
File "C:\Users\Islam\AppData\Local\Programs\Python\Python36\lib\site-packages\requests\models.py", line 390, in prepare_url
raise InvalidURL("Invalid URL %r: No host supplied" % url)
requests.exceptions.InvalidURL: Invalid URL 'https:https://i.annihil.us/u/prod/marvel/i/mg/6/b0/5b6c5e4154f75/portrait_uncanny.jpg': No host supplied
And it gives it to me whenever I try it on any website.
it looks like elem[0].get('src') evaluates to https://i.annihil.us/u/prod/marvel/i/mg/6/b0/5b6c5e4154f75/portrait_uncanny.jpg.
so on line comicurl='https:'+elem[0].get('src') you add http: in front of an already well formed url, making it invalid
Can't argue with this: Invalid URL 'https:https://i.annihil.us/u/prod -- the URL is really invalid, probably you should get rid of https in the following statement:
comicurl='https:'+elem[0].get('src')
macOS 10.12.3 python 2.7.13 requests 2.13.0
I use requests package to send post request.This request need to login before post data.So I use request.Session() and load a logined cookie.
Then I use this session to send post data in cycle mode.
It is no error that I used to run this code in Windows and Linux.
Simple Code:
s = request.Session()
s.cookies = cookieslib.LWPCookieJar('cookise')
s.cookies.load(ignore_discard=True)
for user_id in range(100,200):
url = 'http://xxxx'
data = { 'user': user_id, 'content': '123'}
r = s.post(url, data)
...
But the program frequently (about every interval) crash, the error isAttributeError: 'module' object has no attribute 'kqueue'
Traceback (most recent call last):
File "/Users/gasxia/Dev/Projects/TgbookSpider/kfz_send_msg.py", line 90, in send_msg
r = requests.post(url, data) # catch error if user isn't exist
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 535, in post
return self.request('POST', url, data=data, json=json, **kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 488, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/sessions.py", line 609, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python2.7/site-packages/requests/adapters.py", line 423, in send
timeout=timeout
File "/usr/local/lib/python2.7/site-packages/requests/packages/urllib3/connectionpool.py", line 588, in urlopen
conn = self._get_conn(timeout=pool_timeout)
File "/usr/local/lib/python2.7/site-packages/requests/packages/urllib3/connectionpool.py", line 241, in _get_conn
if conn and is_connection_dropped(conn):
File "/usr/local/lib/python2.7/site-packages/requests/packages/urllib3/util/connection.py", line 27, in is_connection_dropped
return bool(wait_for_read(sock, timeout=0.0))
File "/usr/local/lib/python2.7/site-packages/requests/packages/urllib3/util/wait.py", line 33, in wait_for_read
return _wait_for_io_events(socks, EVENT_READ, timeout)
File "/usr/local/lib/python2.7/site-packages/requests/packages/urllib3/util/wait.py", line 22, in _wait_for_io_events
with DefaultSelector() as selector:
File "/usr/local/lib/python2.7/site-packages/requests/packages/urllib3/util/selectors.py", line 431, in __init__
self._kqueue = select.kqueue()
AttributeError: 'module' object has no attribute 'kqueue'
This looks like a problem that commonly arises if you're using something like eventlet or gevent, both of which monkeypatch the select module. If you're using those to achieve asynchrony, you will need to ensure that those monkeypatches are applied before importing requests. This is a known bug, being tracked in this issue.
I have the code below which works fine and brings back what I need
import requests
from requests.auth import HTTPBasicAuth
response = requests.get('https://example/answers/331', auth=HTTPBasicAuth('username', 'password'),json={"solution": "12345"})
print response.content
However when I change it to a patch method, which is accepted by the server, I get the following errors. Any idea on why?
Traceback (most recent call last):
File "auth.py", line 8, in <module>
response = requests.patch('https://example/answers/331', auth=HTTPBasicAuth('username', 'password'),json={"solution": "12345"})
File "C:\Python27\lib\site-packages\requests-2.12.0-py2.7.egg\requests\api.py", line 138, in patch
return request('patch', url, data=data, **kwargs)
File "C:\Python27\lib\site-packages\requests-2.12.0-py2.7.egg\requests\api.py", line 56, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Python27\lib\site-packages\requests-2.12.0-py2.7.egg\requests\sessions.py", line 488, in request
resp = self.send(prep, **send_kwargs)
File "C:\Python27\lib\site-packages\requests-2.12.0-py2.7.egg\requests\sessions.py", line 609, in send
r = adapter.send(request, **kwargs)
File "C:\Python27\lib\site-packages\requests-2.12.0-py2.7.egg\requests\adapters.py", line 473, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', BadStatusLine("''",))
Thanks
Try using a POST request with the following header: X-HTTP-Method-Override: PATCH
This is unique to the Oracle Service Cloud REST API implementation and is documented.
In cases where the browser or client application does not support PATCH requests, or network intermediaries block PATCH requests, HTTP tunneling can be used with a POST request by supplying an X-HTTP-Method-Override header.
Example:
import requests
restURL = <Your REST URL>
params = {'field': 'val'}
headers = {'X-HTTP-Method-Override':'PATCH'}
try:
resp = requests.post(restURL, json=params, auth=('<uname>', '<pwd>'), headers=headers)
print resp
except requests.exceptions.RequestException as err:
errMsg = "Error: %s" % err
print errMsg
I am working with a local html file in python, and I am trying to use lxml to parse the file. For some reason I can't get the file to load properly, and I'm not sure if this has to do with not having an http server set up on my local machine, etree usage, or something else.
My reference for this code was this: http://docs.python-guide.org/en/latest/scenarios/scrape/
This could be a related problem: Requests : No connection adapters were found for, error in Python3
Here is my code:
from lxml import html
import requests
page = requests.get('C:\Users\...\sites\site_1.html')
tree = html.fromstring(page.text)
test = tree.xpath('//html/body/form/div[3]/div[3]/div[2]/div[2]/div/div[2]/div[2]/p[1]/strong/text()')
print test
The traceback that I'm getting reads:
C:\Python27\python.exe "C:/Users/.../extract_html/extract.py"
Traceback (most recent call last):
File "C:/Users/.../extract_html/extract.py", line 4, in <module>
page = requests.get('C:\Users\...\sites\site_1.html')
File "C:\Python27\lib\site-packages\requests\api.py", line 69, in get
return request('get', url, params=params, **kwargs)
File "C:\Python27\lib\site-packages\requests\api.py", line 50, in request
response = session.request(method=method, url=url, **kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 465, in request
resp = self.send(prep, **send_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 567, in send
adapter = self.get_adapter(url=request.url)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 641, in get_adapter
raise InvalidSchema("No connection adapters were found for '%s'" % url)
requests.exceptions.InvalidSchema: No connection adapters were found for 'C:\Users\...\sites\site_1.html'
Process finished with exit code 1
You can see that it has something to do with a "connection adapter" but I'm not sure what that means.
If the file is local, you shouldn't be using requests -- just open the file and read it in. requests expects to be talking to a web server.
with open(r'C:\Users\...site_1.html', "r") as f:
page = f.read()
tree = html.fromstring(page)
There is a better way for doing it:
using parse function instead of fromstring
tree = html.parse("C:\Users\...site_1.html")
print(html.tostring(tree))
You can also try using Beautiful Soup
from bs4 import BeautifulSoup
f = open("filepath", encoding="utf8")
soup = BeautifulSoup(f)
f.close()