I am trying to parse some data from 'https://datausa.io/profile/geo/jacksonville-fl/#intro', but I am not sure how to access it from python. My code is:
adress, headers = urllib.request.urlretrieve(' https://datausa.io/profile/geo/jacksonville-fl/#intro')
handle = open(adress)
and it returns the error:
Traceback (most recent call last):
File "C:/Users/Jared/AppData/Local/Programs/Python/Python36-32/capstone1.py", line 16, in <module>
adress, headers = urllib.request.urlretrieve(' https://datausa.io/profile/geo/jacksonville-fl/#intro')
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 248, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\Jared\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
Please explain what is wrong or tell me a better way to access the page. Also, does the ' .io ' suffix affecthow python handles it?
Thanks.
This worked for me:
import requests
url = "https://datausa.io/profile/geo/jacksonville-fl/#intro"
req = requests.request("GET",url)
Related
I am trying to run the following code in order to search for the next words of a text that come after "examination".
Input is a pdf that i try to convert to a text using tinka.
Somehow the code throws an error referred to tinka that i do not understand.
Does anyone know how to fix it or knows another way to implement my problem?
import re
from tika import parser
raw = parser.from_file('application0001.pdf')
print(raw['content'])
list_of_words = raw.split()
search="examination"
next_word = list_of_words[list_of_words.index(search) + 1]
print(next_word)
This is the error I get when running it and I do not get what it means.
2019-05-24 09:53:53,217 [MainThread ] [INFO ] Retrieving http://search.maven.org/remotecontent?filepath=org/apache/tika/tika-server/1.19/tika-server-1.19.jar to /var/folders/xn/p33pzhs179n33z55z66lqcn00000gn/T/tika-server.jar.
Traceback (most recent call last):
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 716, in getRemoteJar
urlretrieve(urlOrPath, destPath)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 248, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Service Unavailable
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/Mauritius/Desktop/text_search.py", line 7, in <module>
raw = parser.from_file('application0001.pdf')
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/parser.py", line 36, in from_file
jsonOutput = parse1('all', filename, serverEndpoint, headers=headers, config_path=config_path)
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 328, in parse1
headers, verbose, tikaServerJar, config_path=config_path, rawResponse=rawResponse)
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 522, in callServer
serverEndpoint = checkTikaServer(scheme, serverHost, port, tikaServerJar, classpath, config_path)
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 571, in checkTikaServer
getRemoteJar(tikaServerJar, jarPath)
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 726, in getRemoteJar
urlretrieve(urlOrPath, destPath)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 248, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Service Unavailable
[Finished in 2.7s with exit code 1]
[shell_cmd: python -u "/Users/Mauritius/Desktop/text_search.py"]
[dir: /Users/Mauritius/Desktop]
[path: /Users/Mauritius/miniconda3/bin:/opt/local/bin:/opt/local/sbin:/Users/Mauritius/anaconda3/bin:/Library/Frameworks/Python.framework/Versions/3.5/bin://anaconda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Library/TeX/texbin]
My code is this:
import urllib.request
import re
http://www.weather-forecast.com/locations/Paris/forcasts/latest
city = input('Please enter a place: ')
url = 'http://www.weather-forecast.com/locations/'+city+'forcasts/latest'
data = urllib.request.urlopen(url).read()
data1 = data.decode('utf-8')
I'm having trouble with the url this is my output:
Traceback (most recent call last):
File "C:/Users/alext/AppData/Local/Programs/Python/Python36/Weather forecast.py", line 9, in
data = urllib.request.urlopen(url).read()
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 564, in error
result = self._call_chain(*args)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 756, in http_error_302
return self.parent.open(new, timeout=req.timeout)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\alext\AppData\Local\Programs\Python\Python36\lib\urllib \request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
I have checked the url and it is definitely correct. I have seen others with problems like this but am still unsure as to the solution.
you are missing a / after the city and a e in forecast. It should be
url = 'http://www.weather-forecast.com/locations/'+city+'/forecasts/latest'
I'm trying to run a script to grab dada from an API and feed to a mongodb database. And I keep getting this error but if I test one sing url it works.
Any suggestions?
full script http://pastebin.com/Xap5vYYC
Traceback (most recent call last):
File "hmsParser.py", line 112, in <module>
smReply = urlopen(smUrl).read().decode("utf8")
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 471, in open
response = meth(req, response)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 581, in http_response
'http', request, response, code, msg, hdrs)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 509, in error
return self._call_chain(*args)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 443, in _call_chain
result = func(*args)
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/urllib/request.py", line 589, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: NOT FOUND
I seem to be getting this error with urllib.request and it gives me this url error that i cant seem to fix.
raceback (most recent call last):
File "C:\Users\Jarvis\Documents\Python Scripts\MultiCheck by Koala.py", line 133, in <module>
Migration()
File "C:\Users\Jarvis\Documents\Python Scripts\MultiCheck by Koala.py", line 116, in Migration
rawdata_uuid = urllib.request.urlopen(url)
File "C:\Python34\lib\urllib\request.py", line 161, in urlopen
return opener.open(url, data, timeout)
File "C:\Python34\lib\urllib\request.py", line 469, in open
response = meth(req, response)
File "C:\Python34\lib\urllib\request.py", line 579, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Python34\lib\urllib\request.py", line 507, in error
return self._call_chain(*args)
File "C:\Python34\lib\urllib\request.py", line 441, in _call_chain
result = func(*args)
File "C:\Python34\lib\urllib\request.py", line 587, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 429: 42
The code im using is here is for a migration checker for a game:
def Migration():
url = "https://api.mojang.com/users/profiles/minecraft/" + einfos
rawdata = urllib.request.urlopen(url)
newrawdata = rawdata.read()
jsondata = json.loads(newrawdata.decode('utf-8'))
results = jsondata['id']
url = "https://sessionserver.mojang.com/session/minecraft/profile/" + results
rawdata_uuid = urllib.request.urlopen(url)
newrawdata_uuid = rawdata_uuid.read()
jsondata_uuid = json.loads(newrawdata_uuid.decode('utf-8'))
try:
results = jsondata_uuid['legacy']
print ("Unmigrated")
except:
print("Migrated")
Error 429 means: Too many requests. You seem to have hit a rate limit. The additional number gives are the seconds you have to wait for the limitation to be dropped. So, try again in 42s, or later.
callurl = "http://vgintnh116:8001/master_data/"
params = urllib.urlencode({'res': 'arovit', 'qfields': 'prod' })
f = urllib2.urlopen(callurl, params)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/u/vgtools2/python-2.6.5/lib/python2.6/urllib2.py", line 126, in urlopen
return _opener.open(url, data, timeout)
File "/u/vgtools2/python-2.6.5/lib/python2.6/urllib2.py", line 397, in open
response = meth(req, response)
File "/u/vgtools2/python-2.6.5/lib/python2.6/urllib2.py", line 510, in http_response
'http', request, response, code, msg, hdrs)
File "/u/vgtools2/python-2.6.5/lib/python2.6/urllib2.py", line 435, in error
return self._call_chain(*args)
File "/u/vgtools2/python-2.6.5/lib/python2.6/urllib2.py", line 369, in _call_chain
result = func(*args)
File "/u/vgtools2/python-2.6.5/lib/python2.6/urllib2.py", line 518, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 403: FORBIDDEN
But it works with -
callurl = "http://vgintnh116:8001/master_data/res=arovit&qfields=prod"
f = urllib2.urlopen(callurl)
Please help. I want to use urlencode to avoid handling spaces and extra characters.
If you pass the second argument (data), request will be POST instead of GET.
Also, dictionaries in Python does not have order. To guarantee the order, you should use sequence.
callurl = "http://vgintnh116:8001/master_data/"
params = urllib.urlencode([('res', 'arovit'), ('qfields', 'prod')])
f = urllib2.urlopen(callurl + params)
From urllib2 documentation:
the HTTP request will be a POST instead of a GET when the data
parameter is provided
In your working example, you are making a GET request.