wget.download() only works for some links

wget.download() only works for some links - python

I have a list of some links (which are pdf files). I'm running wget.download() for each link in the list. However, only some of them get downloaded and then I get:
File "/home/.local/lib/python3.6/site-packages/wget.py", line 526, in download
(tmpfile, headers) = ulib.urlretrieve(binurl, tmpfile, callback)
File "/usr/lib/python3.6/urllib/request.py", line 248, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/usr/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/usr/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/usr/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/usr/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/usr/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
I also tried to use r = requests.get(link) but the issue is still the same and additionally, I get this error:
File "/usr/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/usr/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/usr/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/usr/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp
Example of link that does not get downloaded:
https://cnds.jacobs-university.de/courses/sads-2020/p6.pdf
If I open the link on my browser, I get the download. Also, this method was working until a few months back. Idk why it's not working anymore

I can't comment as of yet, but have you tried downloading the files with a browser manually? If that works then the issue must be that wget can't reach the target for some reason.

Related

Raise HTTP Error 406: Not Acceptable when import SeqVecEmbedder from bio_embeddings.embed

I am trying to process protein sequence by using bio_embeddings, but it raise "HTTP Error 406: Not Acceptable" when import SeqVecEmbedder from bio_embeddings.embed.
File "C:\ProgramData\Miniconda3\envs\bio_e\lib\site-packages\bio_embeddings\embed\__init__.py", line 31, in __init__
get_model_file(path=f.name, model='seqvecv{}'.format(str(kwargs.get('seqvec_version', 1))), file=file)
File "C:\ProgramData\Miniconda3\envs\bio_e\lib\site-packages\bio_embeddings\utilities\remote_file_retriever.py", line 72, in get_model_file
request.urlretrieve(url, filename=path, reporthook=t.update_to)
File "C:\ProgramData\Miniconda3\envs\bio_e\lib\urllib\request.py", line 247, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "C:\ProgramData\Miniconda3\envs\bio_e\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
File "C:\ProgramData\Miniconda3\envs\bio_e\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\ProgramData\Miniconda3\envs\bio_e\lib\urllib\request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "C:\ProgramData\Miniconda3\envs\bio_e\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\ProgramData\Miniconda3\envs\bio_e\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\ProgramData\Miniconda3\envs\bio_e\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 406: Not Acceptable
That is my code
from bio_embeddings.embed import SeqVecEmbedder
import torch
seq = 'MVTYDFGSDEMHD'
embedder = SeqVecEmbedder()
embedding = embedder.embed(seq)
I am trying to use Bio_embeddings to process protein sequence.

Python3 AmazonAPI urllib.error.HTTPError: HTTP Error 403: Forbidden

I’m facing an issue using Python3 Amazon API. I noticed that a lot of people have the same problem, but I looked for the solution everywhere without results.
I’m trying this simple piece of code:
from amazon.api import AmazonAPI
amazon = AmazonAPI (my_AMAZON_ACCESS_KEY, my_AMAZON_SECRET_KEY, my_AMAZON_ASSOC_TAG, region=’IT’)
products = amazon.search(Keywords = "book name", SearchIndex = "Books")
print(products)
#<amazon.api.AmazonSearch object at 0x7fcc238e7a20>
Until here it works.
for item in products:
print (item.title, item.isbn, item.price_and_currency)
here I get the following error:
File "amazon_loader.py", line 78, in getItemFromAmazon
for item in products:
File "/home/valentinim/.conda/envs/telepot/lib/python3.6/site-packages/amazon/api.py", line 544, in __iter__
for page in self.iterate_pages():
File "/home/valentinim/.conda/envs/telepot/lib/python3.6/site-packages/amazon/api.py", line 561, in iterate_pages
yield self._query(ItemPage=self.current_page, **self.kwargs)
File "/home/valentinim/.conda/envs/telepot/lib/python3.6/site-packages/amazon/api.py", line 573, in _query
response = self.api.ItemSearch(ResponseGroup=ResponseGroup, **kwargs)
File "/home/valentinim/.conda/envs/telepot/lib/python3.6/site-packages/bottlenose/api.py", line 274, in __call__
{'api_url': api_url, 'cache_url': cache_url})
File "/home/valentinim/.conda/envs/telepot/lib/python3.6/site-packages/bottlenose/api.py", line 235, in _call_api
return urllib2.urlopen(api_request, timeout=self.Timeout)
File "/home/valentinim/.conda/envs/telepot/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/home/valentinim/.conda/envs/telepot/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/home/valentinim/.conda/envs/telepot/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/home/valentinim/.conda/envs/telepot/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/home/valentinim/.conda/envs/telepot/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/home/valentinim/.conda/envs/telepot/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
I suppose it is an issue with python urrlib library and not with AmazonAPI. Moreover, I assume it is something about the identification when trying to request information from the downloaded AmazonSearch object, but I don’t know how to fix the problem.
I tried also Python2, no differences.

HTTP Error 404: Not Found python urllib

My code is this:
import urllib.request
import re
http://www.weather-forecast.com/locations/Paris/forcasts/latest
city = input('Please enter a place: ')
url = 'http://www.weather-forecast.com/locations/'+city+'forcasts/latest'
data = urllib.request.urlopen(url).read()
data1 = data.decode('utf-8')
I'm having trouble with the url this is my output:
Traceback (most recent call last):
File "C:/Users/alext/AppData/Local/Programs/Python/Python36/Weather forecast.py", line 9, in
data = urllib.request.urlopen(url).read()
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 564, in error
result = self._call_chain(*args)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 756, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib \request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
I have checked the url and it is definitely correct. I have seen others with problems like this but am still unsure as to the solution.

you are missing a / after the city and a e in forecast. It should be
url = 'http://www.weather-forecast.com/locations/'+city+'/forecasts/latest'

urllib.error.HTTPError: HTTP Error 404: NOT FOUND

I'm trying to run a script to grab dada from an API and feed to a mongodb database. And I keep getting this error but if I test one sing url it works.
Any suggestions?
full script http://pastebin.com/Xap5vYYC
Traceback (most recent call last):
File "hmsParser.py", line 112, in <module>
smReply = urlopen(smUrl).read().decode("utf8")
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 471, in open
response = meth(req, response)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 581, in http_response
'http', request, response, code, msg, hdrs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 509, in error
return self._call_chain(*args)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 443, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 589, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: NOT FOUND

Python URL redirection - Handle 302 which moves to 404

I know we can handle redirection with requests and urllib2 packages. I do not want to use requests as it is not a prebuilt package. Please help me with urllib2 to handle a URL which takes a 302 and then moves to 404. I am not concerned about the 404 and I would like to track whether it is a 301 or 302.
I referred this doc but it still throws the 404.
Here is my code
import urllib2
class My_HTTPRedirectHandler(urllib2.HTTPRedirectHandler):
def http_error_302(self, req, fp, code, msg, headers):
return urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
my_opener = urllib2.build_opener(My_HTTPRedirectHandler)
urllib2.install_opener(my_opener)
response =urllib2.urlopen("MY URL")
print response.read()
Here is my response
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 406, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 519, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 438, in error
result = self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "<stdin>", line 3, in http_error_302
File "/usr/lib/python2.7/urllib2.py", line 625, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python2.7/urllib2.py", line 406, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 519, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 444, in error
return self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 527, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 404: Not Found

It's better to use httplib and HTTP/1.1 HEAD method. That way no response body is received.
What’s the best way to get an HTTP response code from a URL?

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

wget.download() only works for some links - python

I can't comment as of yet, but have you tried downloading the files with a browser manually? If that works then the issue must be that wget can't reach the target for some reason.

Related

Raise HTTP Error 406: Not Acceptable when import SeqVecEmbedder from bio_embeddings.embed

Python3 AmazonAPI urllib.error.HTTPError: HTTP Error 403: Forbidden

HTTP Error 404: Not Found python urllib

urllib.error.HTTPError: HTTP Error 404: NOT FOUND

Python URL redirection - Handle 302 which moves to 404

Categories

Resources