Related
I try to built a website crawl in behind behind a company proxy.
So far I am running the following code:
import requests
url = 'https://www.bundesliga.com/de/bundesliga/spieltag/2022-2023/19/fc-augsburg-vs-bayer-04-leverkusen/lineup'
def get_url_content(url):
return requests.get(url).text
get_url_content(url)
And i am running into the following errors:
PermissionError Traceback (most recent call last)
File C:\Program Files\Anaconda3\envs\Test\lib\site-packages\urllib3\connection.py:174, in HTTPConnection._new_conn(self)
173 try:
--> 174 conn = connection.create_connection(
175 (self._dns_host, self.port), self.timeout, **extra_kw
176 )
178 except SocketTimeout:
File C:\Program Files\Anaconda3\envs\Test\lib\site-packages\urllib3\util\connection.py:95, in create_connection(address, timeout, source_address, socket_options)
94 if err is not None:
---> 95 raise err
97 raise socket.error("getaddrinfo returns an empty list")
File C:\Program Files\Anaconda3\envs\Test\lib\site-packages\urllib3\util\connection.py:85, in create_connection(address, timeout, source_address, socket_options)
84 sock.bind(source_address)
---> 85 sock.connect(sa)
PermissionError: [WinError 10013] Der Zugriff auf einen Socket war aufgrund der Zugriffsrechte des Sockets unzulässig
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
File C:\Program Files\Anaconda3\envs\Test\lib\site-packages\urllib3\connectionpool.py:703, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
702 # Make the request on the httplib connection object.
--> 703 httplib_response = self._make_request(
704 conn,
705 method,
706 url,
707 timeout=timeout_obj,
708 body=body,
709 headers=headers,
710 chunked=chunked,
It seems like the command is blocked by the company proxy, how can I handle the error? Also for other Urls it is the same error.
There is the option to change the proxy with:
requests.get(url, proxies = { "https" : "https://1.1.0.1:80"})
but I have no idea which proxy I have to use.
I am trying to import and use the texthero module in python and keep getting an error message.
code is simply import texthero as hero
there is a really long error message
[nltk_data] Error loading stopwords: <urlopen error [WinError 10060] A
[nltk_data] connection attempt failed because the connected party
[nltk_data] did not properly respond after a period of time, or
[nltk_data] established connection failed because connected host
[nltk_data] has failed to respond>
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\texthero\stopwords.py in <module>()
13 # If not present, download 'en_core_web_sm'
---> 14 spacy_model = spacy.load("en_core_web_sm")
15 except OSError:
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\spacy\__init__.py in load(name, **overrides)
29 warnings.warn(Warnings.W001.format(path=depr_path), DeprecationWarning)
---> 30 return util.load_model(name, **overrides)
31
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\spacy\util.py in load_model(name, **overrides)
174 return load_model_from_path(name, **overrides)
--> 175 raise IOError(Errors.E050.format(name=name))
176
OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a shortcut link, a Python package or a valid path to a data directory.
During handling of the above exception, another exception occurred:
TimeoutError Traceback (most recent call last)
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\urllib3\connection.py in _new_conn(self)
159 conn = connection.create_connection(
--> 160 (self._dns_host, self.port), self.timeout, **extra_kw
161 )
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
83 if err is not None:
---> 84 raise err
85
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
73 sock.bind(source_address)
---> 74 sock.connect(sa)
75 return sock
TimeoutError: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
676 headers=headers,
--> 677 chunked=chunked,
678 )
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
380 try:
--> 381 self._validate_conn(conn)
382 except (SocketTimeout, BaseSSLError) as e:
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\urllib3\connectionpool.py in _validate_conn(self, conn)
977 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
--> 978 conn.connect()
979
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\urllib3\connection.py in connect(self)
308 # Add certificate verification
--> 309 conn = self._new_conn()
310 hostname = self.host
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\urllib3\connection.py in _new_conn(self)
171 raise NewConnectionError(
--> 172 self, "Failed to establish a new connection: %s" % e
173 )
NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x0000000019860160>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
448 retries=self.max_retries,
--> 449 timeout=timeout
450 )
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
726 retries = retries.increment(
--> 727 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
728 )
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
438 if new_retry.is_exhausted():
--> 439 raise MaxRetryError(_pool, url, error or ResponseError(cause))
440
MaxRetryError: HTTPSConnectionPool(host='raw.githubusercontent.com', port=443): Max retries exceeded with url: /explosion/spacy-models/master/shortcuts-v2.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000000019860160>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond',))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-3-a19be4912cd0> in <module>()
----> 1 import texthero as hero
2
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\texthero\__init__.py in <module>()
4
5 """
----> 6 from . import preprocessing
7 from .preprocessing import *
8
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\texthero\preprocessing.py in <module>()
12 from nltk.stem import PorterStemmer, SnowballStemmer
13
---> 14 from texthero import stopwords as _stopwords
15
16 from typing import List, Callable
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\texthero\stopwords.py in <module>()
16 from spacy.cli.download import download as spacy_download
17
---> 18 spacy_download("en_core_web_sm")
19
20 from spacy.lang.en import stop_words as spacy_en_stopwords
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\spacy\cli\download.py in download(model, direct, *pip_args)
42 dl = download_model(dl_tpl.format(m=model_name, v=version), pip_args)
43 else:
---> 44 shortcuts = get_json(about.__shortcuts__, "available shortcuts")
45 model_name = shortcuts.get(model, model)
46 compatibility = get_compatibility()
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\spacy\cli\download.py in get_json(url, desc)
93
94 def get_json(url, desc):
---> 95 r = requests.get(url)
96 if r.status_code != 200:
97 msg.fail(
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\requests\api.py in get(url, params, **kwargs)
74
75 kwargs.setdefault('allow_redirects', True)
---> 76 return request('get', url, params=params, **kwargs)
77
78
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\requests\api.py in request(method, url, **kwargs)
59 # cases, and look like a memory leak in others.
60 with sessions.Session() as session:
---> 61 return session.request(method=method, url=url, **kwargs)
62
63
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
528 }
529 send_kwargs.update(settings)
--> 530 resp = self.send(prep, **send_kwargs)
531
532 return resp
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\requests\sessions.py in send(self, request, **kwargs)
641
642 # Send the request
--> 643 r = adapter.send(request, **kwargs)
644
645 # Total elapsed time of the request (approximately)
C:\Users\name\AppData\Roaming\Python\Python36\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
514 raise SSLError(e, request=request)
515
--> 516 raise ConnectionError(e, request=request)
517
518 except ClosedPoolError as e:
ConnectionError: HTTPSConnectionPool(host='raw.githubusercontent.com', port=443): Max retries exceeded with url: /explosion/spacy-models/master/shortcuts-v2.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000000019860160>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond',))
So it seems not to be able to open the stopwords file. I have checked the data folders and it is present.
when it can't find it it then tries to find the spacy file 'en_core_web_sm'
I cannot find this in the folders although spacy is there.
I then get a time out as it can't find the file.
I have tried uninstalling and reinstalling spacy and texthero and updating both.
I have tired installing en_core_web_sm and the other versions
e.g. using code:
!pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz
and
!python -m spacy download en_core_web_sm
I have tried reinstalling/updating nltk
I have run in Jupyter notebook and in Python and CMD (as admin). all with the same error.
I've tried both python commands and pip to do the above installs etc.
I really don't know what else to try
any ideas?
thanks
Mizz
I am new to TensorFlow and do not have much experience. I am now trying the distributed TensorFlow.
Following the official guide, I first create two servers. I run the following code in two seperate terminals
import sys
import tensorflow as tf
task_number = int(sys.argv[1])
cluster = tf.train.ClusterSpec({"local": ["localhost:2222", "localhost:2223"]})
server = tf.train.Server(cluster, job_name="local", task_index=task_number)
print("Starting server #{}".format(task_number))
server.start()
server.join()
The server has been set up successfully
2018-01-25 20:05:37.651802: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:215] Initialize GrpcChannelCache for job local -> {0 -> localhost:2222, 1 -> localhost:2223}
2018-01-25 20:05:37.652881: I tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc:324] Started server with target: grpc://localhost:2222
Starting server #0
2018-01-25 20:05:37.652938: I tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc:328] Server already started (target: grpc://localhost:2222)
Then I run the following program
import tensorflow as tf
x = tf.constant(2)
with tf.device("/job:local/task:1"):
y2 = x - 66
with tf.device("/job:local/task:0"):
y1 = x + 300
y = y1 + y2
with tf.Session("grpc://localhost:2223") as sess:
result = sess.run(y)
print(result)
Then it gives me the following error message
E0125 20:05:49.573488650 10292 ev_epoll1_linux.c:1051] grpc epoll fd: 5
Traceback (most recent call last):
File "/home/****/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1323, in _do_call
return fn(*args)
File "/home/****/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1293, in _run_fn
self._extend_graph()
File "/home/****/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1354, in _extend_graph
self._session, graph_def.SerializeToString(), status)
File "/home/****/miniconda3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 473, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.UnavailableError: Endpoint read failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/****/Documents/intern/sample_data/try.py", line 25, in <module>
result = sess.run(y)
File "/home/****/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 889, in run
run_metadata_ptr)
File "/home/****/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "/home/****/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1317, in _do_run
options, run_metadata)
File "/home/****/miniconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.UnavailableError: Endpoint read failed
I googled it and some suggest that it might be the problems with proxy, so I have disabled the proxy but nothing changed.
Does anyone have any ideas what the problems might be? Many thanks in advance.
Never mind, problems solved. It is the setting about the proxy. We need to unset proxy on both servers and clients to make the program work.
After having stored the data frame in h5 file i was trying to access the file in another jupyter notebook but when loading the h5 file in read mode in pandas i encountered an error.
I read the file this way
data_frames = pd.HDFStore('data_frames.h5', mode='r')
Error :
HDF5ExtError Traceback (most recent call last)
~/.conda/envs/be_project/lib/python3.6/site-packages/pandas/io/pytables.py in open(self, mode, **kwargs)
586 try:
--> 587 self._handle = tables.open_file(self._path, self._mode, **kwargs)
588 except (IOError) as e: # pragma: no cover
~/.conda/envs/be_project/lib/python3.6/site-packages/tables/file.py in open_file(filename, mode, title, root_uep, filters, **kwargs)
319 # Finally, create the File instance, and return it
--> 320 return File(filename, mode, title, root_uep, filters, **kwargs)
321
~/.conda/envs/be_project/lib/python3.6/site-packages/tables/file.py in __init__(self, filename, mode, title, root_uep, filters, **kwargs)
783 # Now, it is time to initialize the File extension
--> 784 self._g_new(filename, mode, **params)
785
tables/hdf5extension.pyx in tables.hdf5extension.File._g_new (tables/hdf5extension.c:5940)()
HDF5ExtError: HDF5 error back trace
File "H5F.c", line 604, in H5Fopen
unable to open file
File "H5Fint.c", line 1087, in H5F_open
unable to read superblock
File "H5Fsuper.c", line 277, in H5F_super_read
file signature not found
End of HDF5 error back trace
Unable to open/create file 'data_frames.h5'
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
<ipython-input-2-aa601ba65d4f> in <module>()
----> 1 data_frames = pd.HDFStore('data_frames.h5', mode='r')
~/.conda/envs/be_project/lib/python3.6/site-packages/pandas/io/pytables.py in __init__(self, path, mode, complevel, complib, fletcher32, **kwargs)
446 self._fletcher32 = fletcher32
447 self._filters = None
--> 448 self.open(mode=mode, **kwargs)
449
450 #property
~/.conda/envs/be_project/lib/python3.6/site-packages/pandas/io/pytables.py in open(self, mode, **kwargs)
617 # is not part of IOError, make it one
618 if self._mode == 'r' and 'Unable to open/create file' in str(e):
--> 619 raise IOError(str(e))
620 raise
621
OSError: HDF5 error back trace
File "H5F.c", line 604, in H5Fopen
unable to open file
File "H5Fint.c", line 1087, in H5F_open
unable to read superblock
File "H5Fsuper.c", line 277, in H5F_super_read
file signature not found
End of HDF5 error back trace
Unable to open/create file 'data_frames.h5'
Please help if possible.
New to Python, but I'm trying to...retrieve data from a site:
import urllib.request
response = urllib.request.urlopen("http://www.python.org")
This is the same code I've seen from the Python 3.1 docs. And a lot of sites.
However, I get:
Message File Name Line Position
Traceback
<module> G:\My Documents\Python\HTTP.py 14
urlopen E:\Python 3.1\Lib\urllib\request.py 119
open E:\Python 3.1\Lib\urllib\request.py 342
_open E:\Python 3.1\Lib\urllib\request.py 360
_call_chain E:\Python 3.1\Lib\urllib\request.py 320
http_open E:\Python 3.1\Lib\urllib\request.py 1063
do_open E:\Python 3.1\Lib\urllib\request.py 1048
URLError: <urlopen error [Errno 10022] An invalid argument was supplied>
I have no idea what's causing this. Anyone know?
Maybe try turning off the firewall? Since you are on Windows, that might be the problem.