urllib.error.HTTPError: HTTP Error 404: NOT FOUND - python

I'm trying to run a script to grab dada from an API and feed to a mongodb database. And I keep getting this error but if I test one sing url it works.
Any suggestions?
full script http://pastebin.com/Xap5vYYC
Traceback (most recent call last):
File "hmsParser.py", line 112, in <module>
smReply = urlopen(smUrl).read().decode("utf8")
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 471, in open
response = meth(req, response)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 581, in http_response
'http', request, response, code, msg, hdrs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 509, in error
return self._call_chain(*args)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 443, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 589, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: NOT FOUND

Related

Read pdf and find next word after a key word

I am trying to run the following code in order to search for the next words of a text that come after "examination".
Input is a pdf that i try to convert to a text using tinka.
Somehow the code throws an error referred to tinka that i do not understand.
Does anyone know how to fix it or knows another way to implement my problem?
import re
from tika import parser
raw = parser.from_file('application0001.pdf')
print(raw['content'])
list_of_words = raw.split()
search="examination"
next_word = list_of_words[list_of_words.index(search) + 1]
print(next_word)
This is the error I get when running it and I do not get what it means.
2019-05-24 09:53:53,217 [MainThread ] [INFO ] Retrieving http://search.maven.org/remotecontent?filepath=org/apache/tika/tika-server/1.19/tika-server-1.19.jar to /var/folders/xn/p33pzhs179n33z55z66lqcn00000gn/T/tika-server.jar.
Traceback (most recent call last):
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 716, in getRemoteJar
urlretrieve(urlOrPath, destPath)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 248, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Service Unavailable
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/Mauritius/Desktop/text_search.py", line 7, in <module>
raw = parser.from_file('application0001.pdf')
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/parser.py", line 36, in from_file
jsonOutput = parse1('all', filename, serverEndpoint, headers=headers, config_path=config_path)
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 328, in parse1
headers, verbose, tikaServerJar, config_path=config_path, rawResponse=rawResponse)
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 522, in callServer
serverEndpoint = checkTikaServer(scheme, serverHost, port, tikaServerJar, classpath, config_path)
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 571, in checkTikaServer
getRemoteJar(tikaServerJar, jarPath)
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 726, in getRemoteJar
urlretrieve(urlOrPath, destPath)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 248, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Service Unavailable
[Finished in 2.7s with exit code 1]
[shell_cmd: python -u "/Users/Mauritius/Desktop/text_search.py"]
[dir: /Users/Mauritius/Desktop]
[path: /Users/Mauritius/miniconda3/bin:/opt/local/bin:/opt/local/sbin:/Users/Mauritius/anaconda3/bin:/Library/Frameworks/Python.framework/Versions/3.5/bin://anaconda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Library/TeX/texbin]

python urlopen returns error

I am trying to parse some data from 'https://datausa.io/profile/geo/jacksonville-fl/#intro', but I am not sure how to access it from python. My code is:
adress, headers = urllib.request.urlretrieve(' https://datausa.io/profile/geo/jacksonville-fl/#intro')
handle = open(adress)
and it returns the error:
Traceback (most recent call last):
File "C:/Users/Jared/AppData/Local/Programs/Python/Python36-32/capstone1.py", line 16, in <module>
adress, headers = urllib.request.urlretrieve(' https://datausa.io/profile/geo/jacksonville-fl/#intro')
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 248, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
Please explain what is wrong or tell me a better way to access the page. Also, does the ' .io ' suffix affecthow python handles it?
Thanks.
This worked for me:
import requests
url = "https://datausa.io/profile/geo/jacksonville-fl/#intro"
req = requests.request("GET",url)

HTTP Error 404: Not Found python urllib

My code is this:
import urllib.request
import re
http://www.weather-forecast.com/locations/Paris/forcasts/latest
city = input('Please enter a place: ')
url = 'http://www.weather-forecast.com/locations/'+city+'forcasts/latest'
data = urllib.request.urlopen(url).read()
data1 = data.decode('utf-8')
I'm having trouble with the url this is my output:
Traceback (most recent call last):
File "C:/Users/alext/AppData/Local/Programs/Python/Python36/Weather forecast.py", line 9, in
data = urllib.request.urlopen(url).read()
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 564, in error
result = self._call_chain(*args)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 756, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib \request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
I have checked the url and it is definitely correct. I have seen others with problems like this but am still unsure as to the solution.
you are missing a / after the city and a e in forecast. It should be
url = 'http://www.weather-forecast.com/locations/'+city+'/forecasts/latest'

urllib.error.HTTPError: HTTP Error 403: Forbidden when using untangle python 3.6

This is my first attempt with python, Im trying to use an external library for xml parsing for python 3.6.
I'm getting an error which doesn't seem to have anything to do with my code, and I can't figure out what the problem is from the error output
my code:
import untangle
x = untangle.parse(r"C:\file.xml")
error:
Traceback (most recent call last):
File "C:/Project/Main.py", line 2, in <module>
x = untangle.parse(r"C:\file.xml")
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\site-packages\untangle.py", line 177, in parse
parser.parse(filename)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\xml\sax\expatreader.py", line 111, in parse
xmlreader.IncrementalParser.parse(self, source)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\xml\sax\xmlreader.py", line 125, in parse
self.feed(buffer)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\xml\sax\expatreader.py", line 217, in feed
self._parser.Parse(data, isFinal)
File "..\Modules\pyexpat.c", line 668, in ExternalEntityRef
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\xml\sax\expatreader.py", line 413, in external_entity_ref
"")
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\xml\sax\saxutils.py", line 364, in prepare_input_source
f = urllib.request.urlopen(source.getSystemId())
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\user\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden

Python URL redirection - Handle 302 which moves to 404

I know we can handle redirection with requests and urllib2 packages. I do not want to use requests as it is not a prebuilt package. Please help me with urllib2 to handle a URL which takes a 302 and then moves to 404. I am not concerned about the 404 and I would like to track whether it is a 301 or 302.
I referred this doc but it still throws the 404.
Here is my code
import urllib2
class My_HTTPRedirectHandler(urllib2.HTTPRedirectHandler):
def http_error_302(self, req, fp, code, msg, headers):
return urllib2.HTTPRedirectHandler.http_error_302(self, req, fp, code, msg, headers)
my_opener = urllib2.build_opener(My_HTTPRedirectHandler)
urllib2.install_opener(my_opener)
response =urllib2.urlopen("MY URL")
print response.read()
Here is my response
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/python2.7/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/usr/lib/python2.7/urllib2.py", line 406, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 519, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 438, in error
result = self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "<stdin>", line 3, in http_error_302
File "/usr/lib/python2.7/urllib2.py", line 625, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "/usr/lib/python2.7/urllib2.py", line 406, in open
response = meth(req, response)
File "/usr/lib/python2.7/urllib2.py", line 519, in http_response
'http', request, response, code, msg, hdrs)
File "/usr/lib/python2.7/urllib2.py", line 444, in error
return self._call_chain(*args)
File "/usr/lib/python2.7/urllib2.py", line 378, in _call_chain
result = func(*args)
File "/usr/lib/python2.7/urllib2.py", line 527, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 404: Not Found
It's better to use httplib and HTTP/1.1 HEAD method. That way no response body is received.
What’s the best way to get an HTTP response code from a URL?

Categories

Resources