What am I missing?
HINT: I've also tried using urllib module
import requests
import sys
import time
import random
headers = {"User-Agent": "Mozilla/5.0 (X11; U; Linux i686) Gecko/20071127 Firefox/25.0"}
url = "HTTP LINK TO YOUTUBE VIDEO"
views = 10
videoMins = 3
videoSec = 33
refreshRate = videoMins * 60 + videoSec
proxy_list = [
{"http":"49.156.37.30:65309"}, {"http":"160.202.42.106:8080"},
{"http":"218.248.73.193:808"}, {"http":"195.246.57.154:8080"},
{"http":"80.161.30.156:80"}, {"http":"122.228.25.97:8101"},
{"http":"165.84.167.54:8080"},{"https":"178.140.216.229:8080"},
{"https":"46.37.193.74:3128"},{"https":"5.1.27.124:53281"},
{"https":"196.202.194.127:62225"},{"https":"194.243.194.51:8080"},
{"https":"206.132.165.246:8080"},{"https":"92.247.127.177:3128"}]
proxies = random.choice(proxy_list)
while True:
for view in range(views): # to loop in the number of allocated views
s = requests.Session()
s.get(url, headers=headers, proxies=proxies, stream=True, timeout=refreshRate)
s.close()
time.sleep(60) # time between loops so we appear real
sys.exit()
Here's the traceback error I got:
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "pytest.py", line 24, in <module>
s.get(url, headers=headers, proxies=proxies, stream=True,
timeout=refreshRate)
File "C:\Python\lib\site-packages\requests\sessions.py", line 521, in get
return self.request('GET', url, **kwargs)
File "C:\Python\lib\site-packages\requests\sessions.py", line 508, in
request
resp = self.send(prep, **send_kwargs)
File "C:\Python\lib\site-packages\requests\sessions.py", line 640, in send
history = [resp for resp in gen] if allow_redirects else []
File "C:\Python\lib\site-packages\requests\sessions.py", line 640, in
<listcomp>
history = [resp for resp in gen] if allow_redirects else []
File "C:\Python\lib\site-packages\requests\sessions.py", line 218, in
resolve_redirects
**adapter_kwargs
File "C:\Python\lib\site-packages\requests\sessions.py", line 618, in send
r = adapter.send(request, **kwargs)
File "C:\Python\lib\site-packages\requests\adapters.py", line 506, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: HTTPSConnectionPool(host='www.youtube.com',
port=443): Max retries exceed
ch?v=dHUP25DkKWo (Caused by SSLError(SSLError("bad handshake:
SysCallError(-1, 'Unexpected EOF')",),))
I suspect max retries from youtube. But its confusing because I'm connecting via random proxies. If that's the case, maybe the proxies aren't working...or no https connection was made.
Related
I have the following code and when I run it on Windows I can make requests through a specific NIC as said on this answer but when I run it on Arch Linux request goes to timeout.
import requests
from requests_toolbelt.adapters import source
source = source.SourceAddressAdapter('10.100.89.75')
with requests.Session() as session:
session.mount('http://', source)
r = session.get("http://ifconfig.me")
print(r.text)
I get the following error:
Traceback (most recent call last):
File "<stdin>", line 3, in <module>
File "/usr/lib/python3.10/site-packages/requests/sessions.py", line 600, in get
return self.request("GET", url, **kwargs)
File "/usr/lib/python3.10/site-packages/requests/sessions.py", line 587, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python3.10/site-packages/requests/sessions.py", line 701, in send
r = adapter.send(request, **kwargs)
File "/usr/lib/python3.10/site-packages/requests/adapters.py", line 553, in send
raise ConnectTimeout(e, request=request)
requests.exceptions.ConnectTimeout: HTTPConnectionPool(host='ifconfig.me', port=80): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7f8e0ab379a0>, 'Connection to ifconfig.me timed out. (connect timeout=None)'))
I am trying to run a code that gets data using xml requests. The host uses ssl, I tried adding verify=True but I get an SSL error. Python Requests requests.exceptions.SSLError: [Errno 8] _ssl.c:727: EOF occurred in violation of protocol. I use python 2.7
import argparse
import requests
import string
_xml = " ..."
host = ""
port = 0
_dictionary = string.digits + string.uppercase + string.lowercase
def _get_timeout(_data):
return requests.post("http://{0}:{1}/testing/test".format(host, port),
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:57.0) Gecko/20100101 "
"Firefox/57.0",
"SOAPAction": "",
"Content-Type": "text/xml;charset=UTF-8"
}, verify=True,
data=_xml.format(_data)).elapsed.total_seconds()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--host')
parser.add_argument('--port')
parser.add_argument('-v')
args = parser.parse_args()
args_dict = vars(args)
host = args_dict['host']
port = args_dict['port']
print "this may take a few minutes"
for i in range(24):
for _char in _dictionary:
if not (args_dict['v'] is None):
print "checking {0}".format(_hash + _char)
if _get_timeout(_hash + _char) > 1.300: # timeout for local server
_hash += _char
print "Found " + _hash
break
Error
Traceback (most recent call last):
File "test.py", line 141, in <module>
if _get_timeout(_hash + _char) > 1.300: # timeout for local server
File "test.py", line 119, in _get_timeout
data=_xml.format(_data)).elapsed.total_seconds()
File "C:\Python27\lib\site-packages\requests\api.py", line 117, in post
return request('post', url, data=data, json=json, **kwargs)
File "C:\Python27\lib\site-packages\requests\api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 542, in request
resp = self.send(prep, **send_kwargs)
File "C:\Python27\lib\site-packages\requests\sessions.py", line 655, in send
r = adapter.send(request, **kwargs)
File "C:\Python27\lib\site-packages\requests\adapters.py", line 514, in send
raise SSLError(e, request=request)
requests.exceptions.SSLError: HTTPSConnectionPool(host='..', port=..): Max retries exceeded with url: /testing/test (Caused by SSLError(SSLEOFError(8, u'EOF occurred in violation of protocol (_ssl.c:727)'),))
I get requests.exceptions.ConnectionError error when I'm Running following code:
from requests import *
from bs4 import *
URL = "https://www.ldoceonline.com/dictionary/"
response = get(URL)
But when I test it with another URL, It works. I really want to scrape this website. How to fix this error?
Complete error note:
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "e:/amirhossein/Project/Programming/L/Longmandict.py", line 5, in <module>
response = get(URL,verify=False)
File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\requests\api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\requests\sessions.py", line 542, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\requests\sessions.py", line 655, in send
r = adapter.send(request, **kwargs)
File "C:\Users\User\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.7_qbz5n2kfra8p0\LocalCache\local-packages\Python37\site-packages\requests\adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Apparently the server needs correct User-Agent HTTP header to be set:
import requests
from bs4 import BeautifulSoup
headers = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:92.0) Gecko/20100101 Firefox/92.0"
}
url = "https://www.ldoceonline.com/dictionary/"
soup = BeautifulSoup(requests.get(url, headers=headers).content, "html.parser")
print(soup.title.text)
Prints:
Longman English Dictionaries | Meanings, thesaurus, collocations and grammar
I am trying to create code that scrapes and downloads specific files from archive.org. When I run the program, I run into this code error.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\ROMS\Gamecube\main.py", line 16, in <module>
response = requests.get(DOMAIN + file_link)
File "C:\Users\cycle\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\cycle\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\cycle\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\sessions.py", line 530, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\cycle\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\sessions.py", line 643, in send
r = adapter.send(request, **kwargs)
File "C:\Users\cycle\AppData\Local\Programs\Python\Python38-32\lib\site-packages\requests\adapters.py", line 516, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='archive.org007%20-%20agent%20under%20fire%20%28usa%29.nkit.gcz', port=443): Max retries exceeded with url: / (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x043979B8>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
This is my code:
from bs4 import BeautifulSoup as bs
import requests
DOMAIN = 'https://archive.org'
URL = 'https://archive.org/download/GCRedumpNKitPart1'
FILETYPE = '%28USA%29.nkit.gcz'
def get_soup(url):
return bs(requests.get(url).text, 'html.parser')
for link in get_soup(URL).find_all('a'):
file_link = link.get('href')
if FILETYPE in file_link:
print(file_link)
with open(link.text, 'wb') as file:
response = requests.get(DOMAIN + file_link)
file.write(response.content)
You simply forgot / after https://archive.org so you create incorrect urls.
Add / at the end of domain
DOMAIN = 'https://archive.org/'
or add / later
response = requests.get(DOMAIN + '/' + file_link)
or use urllib.parse.urljoin() to create urls
import urllib.parse
response = requests.get(urllib.parse.urljoin(DOMAIN, file_link))
My vpn works WELL, and can visit google(from China).
sock5 Port of my VPN: 1080
But when I run the following code, I get error.
import requests
headers = {'user-agent': ''}
proxies = {"http": "socks5://127.0.0.1:1080",'https': 'socks5://127.0.0.1:1080'}
# url = 'https://www.baidu.com/'
url = 'https://www.google.com/search?q=python' #
res = requests.get(url, headers=headers, proxies=proxies)
print("res.status_code:\n",res.status_code)
if I remove , proxies=proxies, and change the url to baidu it works.
...
url = 'https://www.baidu.com/'
# url = 'https://www.google.com/search?q=python'
res = requests.get(url, headers=headers)
print("res.status_code:\n",res.status_code)
the error in 3:
Traceback (most recent call last):
File "Try.py", line 17, in <module>
res = requests.get(url, headers=headers, proxies=proxies)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/requests-2.22.0-py3.7.egg/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/requests-2.22.0-py3.7.egg/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/requests-2.22.0-py3.7.egg/requests/sessions.py", line 533, in request
resp = self.send(prep, **send_kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/requests-2.22.0-py3.7.egg/requests/sessions.py", line 646, in send
r = adapter.send(request, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/requests-2.22.0-py3.7.egg/requests/adapters.py", line 498, in send
raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', OSError(0, 'Error'))
from 1 to 4, 1 is contradictory to 4. I don't really know where the problem is. I'd be extremely grateful If someone can help.
Solved by substituting sock5 with sock5h