url fetch gets stuck when multiple urls are passed

url fetch gets stuck when multiple urls are passed - python

in the following code below I am trying to first check if the URL status code and then start the relevant thread and do the same for adding it to queue,
however if urls are too many then I get TimeOut error.
all code added below
but just discovered another bug if I am passing a mp3 file along with some jpeg images the mp3 file downloaded of its correct size is opening as one of the image in urls passed.
_fdUtils
def getParser():
parser = argparse.ArgumentParser(prog='FileDownloader',
description='Utility to download files from internet')
parser.add_argument('-v', '--verbose', default=logging.DEBUG,
help='by default its on, pass None or False to not spit in shell')
parser.add_argument('-st', '--saveTo', default=None, action=FullPaths,
help='location where you want files to download to')
parser.add_argument('-urls', nargs='*',
help='urls of files you want to download.')
parser.add_argument('-se', nargs='*', default=[1], help='Split each url passed to urls by the'\
" respective split order, if a url doesn't have a split default is taken 1 ")
return parser.parse_args()
def getResponse(url):
return requests.head(url, allow_redirects=True, timeout=10, headers={'Accept-Encoding': 'identity'})
def isWorkingURL(url):
response = getResponse(url)
return response.status_code in [302, 200, 100, 204, 300]
def getUrl(url):
""" gets the actual url to download file from.
"""
response = getResponse(url)
return response.headers.get('location', url)
error stack Trace:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/threading.py", line 810, in __bootstrap_inner
self.run()
File "python/file_download.py", line 181, in run
_grabAndWriteToDisk(self, split, url, self.__saveTo, 0, self.queue)
File "python/file_download.py", line 70, in _grabAndWriteToDisk
resp = requests.get(url, headers={'Range': 'bytes=%s' % irange}, stream=True)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests-2.1.0-py2.7.egg/requests/api.py", line 55, in get
return request('get', url, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests-2.1.0-py2.7.egg/requests/api.py", line 44, in request
return session.request(method=method, url=url, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests-2.1.0-py2.7.egg/requests/sessions.py", line 382, in request
resp = self.send(prep, **send_kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests-2.1.0-py2.7.egg/requests/sessions.py", line 505, in send
history = [resp for resp in gen] if allow_redirects else []
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests-2.1.0-py2.7.egg/requests/sessions.py", line 167, in resolve_redirects
allow_redirects=False,
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests-2.1.0-py2.7.egg/requests/sessions.py", line 485, in send
r = adapter.send(request, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests-2.1.0-py2.7.egg/requests/adapters.py", line 381, in send
raise Timeout(e)
Timeout: HTTPConnectionPool(host='ia600506.us.archive.org', port=80): Read timed out. (read timeout=<object object at 0x1002b40b0>)
there we go again:
import argparse
import logging
import Queue
import os
import requests
import signal
import socket
import sys
import time
import threading
import utils as _fdUtils
from collections import OrderedDict
from itertools import izip_longest
from socket import error as SocketError, timeout as SocketTimeout
# timeout in seconds
TIMEOUT = 10
socket.setdefaulttimeout(TIMEOUT)
DESKTOP_PATH = os.path.expanduser("~/Desktop")
appName = 'FileDownloader'
logFile = os.path.join(DESKTOP_PATH, '%s.log' % appName)
_log = _fdUtils.fdLogger(appName, logFile, logging.DEBUG, logging.DEBUG, console_level=logging.DEBUG)
queue = Queue.Queue()
STOP_REQUEST = threading.Event()
maxSplits = threading.BoundedSemaphore(3)
threadLimiter = threading.BoundedSemaphore(5)
lock = threading.Lock()
pulledSize = 0
dataDict = {}
def _grabAndWriteToDisk(threadName, url, saveTo, first=None, queue=None, mode='wb', irange=None):
""" Function to download file..
Args:
url(str): url of file to download
saveTo(str): path where to save file
first(int): starting byte of the range
queue(Queue.Queue): queue object to set status for file download
mode(str): mode of file to be downloaded
irange(str): range of byte to download
"""
fileName = _fdUtils.getFileName(url)
filePath = os.path.join(saveTo, fileName)
fileSize = _fdUtils.getUrlSizeInBytes(url)
downloadedFileSize = 0 if not first else first
block_sz = 8192
resp = requests.get(url, headers={'Range': 'bytes=%s' % irange}, stream=True)
for fileBuffer in resp.iter_content(block_sz):
if not fileBuffer:
break
with open(filePath, mode) as fd:
downloadedFileSize += len(fileBuffer)
fd.write(fileBuffer)
mode = 'a'
status = r"%10d [%3.2f%%]" % (downloadedFileSize, downloadedFileSize * 100. / fileSize)
status = status + chr(8)*(len(status)+1)
sys.stdout.write('%s\r' % status)
time.sleep(.01)
sys.stdout.flush()
if downloadedFileSize == fileSize:
STOP_REQUEST.set()
queue.task_done()
_log.debug("Downloaded %s %s%% using %s and saved to %s", fileName,
downloadedFileSize * 100. / fileSize, threadName.getName(), saveTo)
def _downloadChunk(url, idx, irange, fileName, sizeInBytes):
_log.debug("Downloading %s for first chunk %s of %s " % (irange, idx+1, fileName))
pulledSize = irange[-1]
try:
resp = requests.get(url, allow_redirects=False, timeout=TIMEOUT,
headers={'Range': 'bytes=%s-%s' % (str(irange[0]), str(irange[-1]))},
stream=True)
except (SocketTimeout, requests.exceptions), e:
_log.error(e)
return
chunk_size = str(irange[-1])
for chunk in resp.iter_content(chunk_size):
status = r"%10d [%3.2f%%]" % (pulledSize, pulledSize * 100. / int(chunk_size))
status = status + chr(8)*(len(status)+1)
sys.stdout.write('%s\r' % status)
sys.stdout.flush()
pulledSize += len(chunk)
dataDict[idx] = chunk
time.sleep(.03)
if pulledSize == sizeInBytes:
_log.info("%s downloaded %3.0f%%", fileName, pulledSize * 100. / sizeInBytes)
class ThreadedFetch(threading.Thread):
""" docstring for ThreadedFetch
"""
def __init__(self, saveTo, queue):
super(ThreadedFetch, self).__init__()
self.queue = queue
self.__saveTo = saveTo
def run(self):
threadLimiter.acquire()
try:
items = self.queue.get()
url = items[0]
split = items[-1]
fileName = _fdUtils.getFileName(url)
# grab split chunks in separate thread.
if split > 1:
maxSplits.acquire()
try:
sizeInBytes = _fdUtils.getUrlSizeInBytes(url)
byteRanges = _fdUtils.getRangeSegements(sizeInBytes, split)
filePath = os.path.join(self.__saveTo, fileName)
downloaders = [
threading.Thread(
target=_downloadChunk,
args=(url, idx, irange, fileName, sizeInBytes),
)
for idx, irange in enumerate(byteRanges)
]
# start threads, let run in parallel, wait for all to finish
for th in downloaders:
th.start()
# this makes the wait for all thread to finish
# which confirms the dataDict is up-to-date
for th in downloaders:
th.join()
downloadedSize = 0
with open(filePath, 'wb') as fh:
for _idx, chunk in sorted(dataDict.iteritems()):
downloadedSize += len(chunk)
status = r"%10d [%3.2f%%]" % (downloadedSize, downloadedSize * 100. / sizeInBytes)
status = status + chr(8)*(len(status)+1)
fh.write(chunk)
sys.stdout.write('%s\r' % status)
time.sleep(.04)
sys.stdout.flush()
if downloadedSize == sizeInBytes:
_log.info("%s, saved to %s", fileName, self.__saveTo)
self.queue.task_done()
finally:
maxSplits.release()
else:
while not STOP_REQUEST.isSet():
self.setName("primary_%s_thread" % fileName.split(".")[0])
# if downlaod whole file in single chunk no need
# to start a new thread, so directly download here.
_grabAndWriteToDisk(self, url, self.__saveTo, 0, self.queue)
finally:
threadLimiter.release()
def main(appName):
args = _fdUtils.getParser()
saveTo = args.saveTo if args.saveTo else DESKTOP_PATH
# spawn a pool of threads, and pass them queue instance
# each url will be downloaded concurrently
unOrdUrls = dict(izip_longest(args.urls, args.se, fillvalue=1))
ordUrls = OrderedDict([(k, unOrdUrls[k]) for k in sorted(unOrdUrls, key=unOrdUrls.get, reverse=False) if _fdUtils.isWorkingURL(k, _log) and _fdUtils.notOnDisk(k, saveTo)])
print "length: %s " % len(ordUrls)
for i in xrange(len(ordUrls)):
t = ThreadedFetch(saveTo, queue)
t.daemon = True
t.start()
try:
# populate queue with data
for url, split in ordUrls.iteritems():
url = _fdUtils.getUrl(url)
print url
queue.put((url, int(split)))
# wait on the queue until everything has been processed
queue.join()
_log.info('All tasks completed.')
except (KeyboardInterrupt, SystemExit):
_log.critical('! Received keyboard interrupt, quitting threads.')
if __name__ == "__main__":
# change the name of MainThread.
threading.currentThread().setName("FileDownloader")
myapp = threading.currentThread().getName()
main(myapp)

I see two problems in your code. Since it's incomplete, I'm not sure how it's supposed to work, so I can't promise either one is the particular one you're running into first, but I'm pretty sure you need to fix both.
First:
queue.put((_fdUtils.getUrl(url), int(split)))
That's going to call _fdUtils.getUrl(url) in the main thread, and put the result on the queue. Your comments clearly imply that you intended the downloading to happen on the background threads.
If you wanted to pass a function to be called, just pass the function and its argument as separate members of the tuple, or wrap it up in a closure or a partial:
queue.put((lambda: _fdUtils.getUrl(url), int(split)))
Second:
t = ThreadedFetch(saveTo, queue)
t.daemon = True
t.start()
This starts a thread for every URL. That's almost never a good idea. Generally, downloaders don't use more than 4-16 threads at a time, and no more than 2-4 to the same site. You could easily be timing out because you're spamming some sit too fast and its server or router is making you back off for a while. Or, with a huge number of requests, you could be flooding your own network and blocking ACKs or even rebooting the router (especially if you have either a cheap home WiFi router or ADSL with a crappy provider).
Also, a much simpler way to do this would be to use a smart pool, like a multiprocessing.dummy.Pool (multiprocessing.dummy means it acts like the multiprocessing module but uses threads) or, even better, a concurrent.futures.ThreadPoolExecutor. In fact, if you look at the docs, a parallel downloader is the first example for ThreadPoolExecutor.

Related

Python Multi-threaded App does not terminate

This my code which basically just takes a list of 94,000+ URLs, and collects the http_status codes for them:
#!/usr/bin/python3
import threading
from queue import Queue
import urllib.request
import urllib.parse
from http.client import HTTPConnection
import socket
import http.client
#import httplib
url_input = open("urls_prod_sort.txt", "r").read()
urls = url_input[:url_input.rfind('\n')].split('\n')
#urls = urls[:100]
url_502 = []
url_logs = []
url_502_lock = threading.Lock()
print_lock = threading.Lock()
def sendRequest(url_u, http_method = 'GET', data = None):
use_proxy = "http://xxxxxxxx:8080"
proxies = {"http": use_proxy}
proxy = urllib.request.ProxyHandler(proxies)
handler = urllib.request.HTTPHandler()
url = "http://" + url_u
with print_lock:
print(url)
opener = urllib.request.build_opener(proxy,handler)
urllib.request.install_opener(opener)
request = urllib.request.Request(url,data)
request.add_header("User-agent","| MSIE |")
request.get_method = lambda: http_method
try:
response = urllib.request.urlopen(request)
response_code = response.code
except urllib.error.HTTPError as error:
response_code = error.code
except urllib.error.URLError as e2:
response_code = 701
except socket.timeout as e3:
response_code = 702
except socket.error as e4:
response_code = 703
except http.client.IncompleteRead as e:
response_code = 700
if response_code == 502:
with url_502_lock:
#url_502.append(url)
url_502_file = open("url_502_file.txt", "a")
url_502_file.write(url + "\n")
url_502_file.close()
with print_lock:
#url_logs.append(url + "," + str(response_code))
url_all_logs_file = open("url_all_logs.csv", "a")
url_all_logs_file.write(url + "," + str(response_code) + '\n')
url_all_logs_file.close()
#print (url + "," + str(response_code))
#print (response_code)
return response_code
def worker():
while True:
url = q.get()
if url == ":::::"
break
else:
sendRequest(url)
q.task_done()
#======================================
q = Queue()
for threads in range(1000):
t = threading.Thread(target = worker)
t.daemon = True
t.start()
for url in urls:
q.put(url)
q.put(":::::")
q.join()
However, the program never seems to terminate (even tho the URLs have all been iteratred through) which forces me to ctrl-c the program - and then I get the following error:
Traceback (most recent call last):
File "./url_sc_checker.py", line 120, in <module>
q.join()
File "/usr/lib/python3.2/queue.py", line 82, in join
self.all_tasks_done.wait()
File "/usr/lib/python3.2/threading.py", line 235, in wait
waiter.acquire()
KeyboardInterrupt

The reason that your program doesn't terminate is simple, your worker creates an infinite loop:
def worker():
while True:
...
You need to either throw an exception, break, or have a terminating condition in your while statement. Otherwise your program would remain trying to get the next job from the queue, without knowing that there will never be the next job.
A common way to do this is to put a sentinel value in your queue, when checking out a job from the queue, the worker checks if it is the sentinel value and breaks out the loop.
Another way is to have a global condition variable that you check in the while condition. When the job producer have pushed all items to the queue, the job producer joins the queue, and when all jobs are done, the job producer unblocks and terminates the threads our processes.
Another possible reason why your process doesn't terminate is if your sendRequest produces an unexpected exception, then the thread terminates and you'll be left with some jobs that are never marked as done.

using python stream downloading file with server limit

I tried to download file from a server using python, sometimes the file is very large, I would like to have some progress bar, one way to do this I can come up with is to download in a stream, so that I can print the progress. Currently I have tried the standard urlopen, urlretrieve, and requests module (with stream on).
Obviously, urlopen cannot download file in stream, requests module support this, however, the server has limit on the file I can download at one time (its limit is 1). So everytime, I tried to use requests, it only get the webpage told me to wait, is there any other way to do this?

I have very recently downloaded many types of media with this function:
import sys
import requests
import time
def download_resource(domain, url, file_name = None, download = True):
cookies = {}
s = requests.Session()
s.config['keep_alive'] = True
#add your own cookies here, I have a specific function I call
#for my application but yours is different
r = s.get(url, cookies = cookies, stream = True)
if not r.ok:
print "error in downloading"
return -1
file_size = int(r.headers['content-length'])
if not file_name:
try:
temp = r.headers['content-disposition']
except Exception as e:
pass
#failing download
return -1
else:
if not temp:
return -1
else:
file_name = temp.split("filename=")[-1]
return_obj["filename"] = file_name
#print "File size:", file_size
#print "\n", str(self.entire_size / float(1024*1024*1024)), "\n"
print "Downloading:", file_name
if download:
with open(file_name, "wb") as fh:
count = 1
chunk_size = 1048576
start_time = time.time()
try:
for block in r.iter_content(chunk_size):
total_time = time.time() - start_time
percent = count*chunk_size/float(file_size) * 100.0
fraction = int(percent/5)
download_speed = 1.0 / total_time
sys.stdout.write('\r')
sys.stdout.write("[%-20s] %d%% %3.2f MB/s " % ('='* fraction , percent, download_speed))
sys.stdout.flush()
if not block:
break
fh.write(block)
count += 1
start_time = time.time()
except Exception as e:
print e
finally:
#close up the stream
r.close()

why url works in browser but not using requests get method

while testing, I just discovered, that this
url = ' http://wi312.rockdizfile.com/d/uclf2kr7fp4r2ge47pcuihdpky2chcsjur5nrds2hx53f26qgxnrktew/Kimbra%20-%20Love%20in%20High%20Places.mp3'
works in browser and file download begins but if i try to fetch this file using
requests.get(url)
it gives massive error ...
any clue why is this happening ? do in need to decode this to make it working?
Update
this is the error I keep getting:
Exception in thread Thread-5:
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/threading.py", line 810, in __bootstrap_inner
self.run()
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/threading.py", line 763, in run
self.__target(*self.__args, **self.__kwargs)
File "python/file_download.py", line 98, in _downloadChunk
stream=True)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests-2.1.0-py2.7.egg/requests/api.py", line 55, in get
return request('get', url, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests-2.1.0-py2.7.egg/requests/api.py", line 44, in request
return session.request(method=method, url=url, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests-2.1.0-py2.7.egg/requests/sessions.py", line 382, in request
resp = self.send(prep, **send_kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests-2.1.0-py2.7.egg/requests/sessions.py", line 485, in send
r = adapter.send(request, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/requests-2.1.0-py2.7.egg/requests/adapters.py", line 381, in send
raise Timeout(e)
Timeout: (<requests.packages.urllib3.connectionpool.HTTPConnectionPool object at 0x10258de90>, 'Connection to wi312.rockdizfile.com timed out. (connect timeout=0.001)')
there was no space when I posted, it was just in newline because I posted inline code embed.
Here is the code that makes requests:(also try new URL: http://archive.org/download/LucyIsabelleMarsh/LucyIsabelleMarsh-ItalianStreetSong.mp3)
import requests
import signal
import sys
import time
import threading
import utils as _fdUtils
from socket import error as SocketError, timeout as SocketTimeout
def _downloadChunk(url, idx, irange, fileName, sizeInBytes):
_log.debug("Downloading %s for first chunk %s " % (irange, idx+1))
pulledSize = irange[-1]
try:
resp = requests.get(url, allow_redirects=False, timeout=0.001,
headers={'Range': 'bytes=%s-%s' % (str(irange[0]), str(irange[-1]))},
stream=True)
except (SocketTimeout, requests.exceptions), e:
_log.error(e)
return
chunk_size = str(irange[-1])
for chunk in resp.iter_content(chunk_size):
status = r"%10d [%3.2f%%]" % (pulledSize, pulledSize * 100. / int(chunk_size))
status = status + chr(8)*(len(status)+1)
sys.stdout.write('%s\r' % status)
sys.stdout.flush()
pulledSize += len(chunk)
dataDict[idx] = chunk
time.sleep(.03)
if pulledSize == sizeInBytes:
_log.info("%s downloaded %3.0f%%", fileName, pulledSize * 100. / sizeInBytes)
class ThreadedFetch(threading.Thread):
""" docstring for ThreadedFetch
"""
def __init__(self, saveTo, queue):
super(ThreadedFetch, self).__init__()
self.queue = queue
self.__saveTo = saveTo
def run(self):
threadLimiter.acquire()
try:
items = self.queue.get()
url = items[0]
split = items[-1]
fileName = _fdUtils.getFileName(url)
# grab split chunks in separate thread.
if split > 1:
maxSplits.acquire()
try:
sizeInBytes = _fdUtils.getUrlSizeInBytes(url)
if sizeInBytes:
byteRanges = _fdUtils.getRangeSegements(sizeInBytes, split)
else:
byteRanges = ['0-']
filePath = os.path.join(self.__saveTo, fileName)
downloaders = [
threading.Thread(
target=_downloadChunk,
args=(url, idx, irange, fileName, sizeInBytes),
)
for idx, irange in enumerate(byteRanges)
]
# start threads, let run in parallel, wait for all to finish
for th in downloaders:
th.start()
# this makes the wait for all thread to finish
# which confirms the dataDict is up-to-date
for th in downloaders:
th.join()
downloadedSize = 0
with open(filePath, 'wb') as fh:
for _idx, chunk in sorted(dataDict.iteritems()):
downloadedSize += len(chunk)
status = r"%10d [%3.2f%%]" % (downloadedSize, downloadedSize * 100. / sizeInBytes)
status = status + chr(8)*(len(status)+1)
fh.write(chunk)
sys.stdout.write('%s\r' % status)
time.sleep(.04)
sys.stdout.flush()
if downloadedSize == sizeInBytes:
_log.info("%s, saved to %s", fileName, self.__saveTo)
self.queue.task_done()
finally:
maxSplits.release()

The traceback is showing a Timeout exception, and in your code indeed you have a very short timeout set, either remove this limit or increase it:
requests.get(url, allow_redirects=False, timeout=0.001, # <-- this is very short
Even if you were accessing localhost (your own computer), such a timeout will result in a Timeout exception. From the documentation:
Note
timeout is not a time limit on the entire response download; rather,
an exception is raised if the server has not issued a response for
timeout seconds (more precisely, if no bytes have been received on the
underlying socket for timeout seconds).
So its not doing what you might expect.

You have a space before the start of the url which causes a requests.exceptions.InvalidSchema error:
url = ' http://wi312.rockdizfile.com/d/uclf2kr7fp4r2ge47pcuihdpky2chcsjur5nrds2hx53f26qgxnrktew/Kimbra%20-%20Love%20in%20High%20Places.mp3'
Change to:
url = 'http://wi312.rockdizfile.com/d/uclf2kr7fp4r2ge47pcuihdpky2chcsjur5nrds2hx53f26qgxnrktew/Kimbra%20-%20Love%20in%20High%20Places.mp3'

Python, Catch timeout during stream request

I'm reading XML events with the requests library as stated in the code below. How do I raise a connection-lost error once the request is started? The Server is emulating a HTTP push / long polling -> http://en.wikipedia.org/wiki/Push_technology#Long_polling and will not end by default.
If there is no new message after 10minutes, the while loop should be exited.
import requests
from time import time
if __name__ == '__main__':
#: Set a default content-length
content_length = 512
try:
requests_stream = requests.get('http://agent.mtconnect.org:80/sample?interval=0', stream=True, timeout=2)
while True:
start_time = time()
#: Read three lines to determine the content-length
for line in requests_stream.iter_lines(3, decode_unicode=None):
if line.startswith('Content-length'):
content_length = int(''.join(x for x in line if x.isdigit()))
#: pause the generator
break
#: Continue the generator and read the exact amount of the body.
for xml in requests_stream.iter_content(content_length):
print "Received XML document with content length of %s in %s seconds" % (len(xml), time() - start_time)
break
except requests.exceptions.RequestException as e:
print('error: ', e)
The server push could be tested with curl via command line:
curl http://agent.mtconnect.org:80/sample\?interval\=0

This might not be the best method, but you can use multiprocessing to run the requests in a separate process.
Something like this should work:
import multiprocessing
import requests
import time
class RequestClient(multiprocessing.Process):
def run(self):
# Write all your code to process the requests here
content_length = 512
try:
requests_stream = requests.get('http://agent.mtconnect.org:80/sample?interval=0', stream=True, timeout=2)
start_time = time.time()
for line in requests_stream.iter_lines(3, decode_unicode=None):
if line.startswith('Content-length'):
content_length = int(''.join(x for x in line if x.isdigit()))
break
for xml in requests_stream.iter_content(content_length):
print "Received XML document with content length of %s in %s seconds" % (len(xml), time.time() - start_time)
break
except requests.exceptions.RequestException as e:
print('error: ', e)
While True:
childProcess = RequestClient()
childProcess.start()
# Wait for 10mins
start_time = time.time()
while time.time() - start_time <= 600:
# Check if the process is still active
if not childProcess.is_alive():
# Request completed
break
time.sleep(5) # Give the system some breathing time
# Check if the process is still active after 10mins.
if childProcess.is_alive():
# Shutdown the process
childProcess.terminate()
raise RuntimeError("Connection Timed-out")
Not the perfect code for your problem, but you get the idea.

Python Pyro4 nameserver freezes when adding more than two remote objects

Say I want to ping something from different locations, so I wrap ping commandline tool into python and use pyro4 prc library to call it.
I have a python Pyro4 nameserver
import Pyro4
Pyro4.config.COMPRESSION = True
Pyro4.naming.startNSloop("ipofnameserver")
And simple ping server:
class Pinger(object):
def ping(self, host):
return subprocess.check_output(["ping", host, "-c 4"])
pinger = Pinger()
daemon = Pyro4.Daemon() # make a Pyro daemon
ns = Pyro4.locateNS(host = "ipofnameservre", port=9090) # find the name server
uri = daemon.register(pinger) # register the greeting object as a Pyro object
print ns.register("location1", uri)
print "Ready. Object uri =", uri # print the uri so we can use it in the client later
daemon.requestLoop()
As long as I have only two pingservers everything is ok, but after I add third one nameserver stop responding. Every pingserver has unique name of course.
For example, I want to check availability of the servers:
ns = Pyro4.locateNS(host = "nameserverip", port=9090)
names = ns.list().keys()
print names
print ns.list()
for n in names:
if n == 'Pyro.NameServer': continue
proxy = Pyro4.Proxy("PYRONAME:"+n)
try:
print n, proxy._Proxy__pyroCreateConnection()
except:
print "offline"
This works with two pingservers, but with three it just waits for something. Traceback of this script terminated with ctrl+C:
ns = Pyro4.locateNS(host = "nameserverip", port=9090)
File "/usr/local/lib/python2.7/dist-packages/Pyro4/naming.py", line 319, in locateNS
proxy.ping()
File "/usr/local/lib/python2.7/dist-packages/Pyro4/core.py", line 146, in __call__
return self.__send(self.__name, args, kwargs)
File "/usr/local/lib/python2.7/dist-packages/Pyro4/core.py", line 250, in _pyroInvoke
self.__pyroCreateConnection()
File "/usr/local/lib/python2.7/dist-packages/Pyro4/core.py", line 312, in __pyroCreateConnection
msgType, flags, seq, data = MessageFactory.getMessage(conn, None)
File "/usr/local/lib/python2.7/dist-packages/Pyro4/core.py", line 665, in getMessage
headerdata = connection.recv(cls.HEADERSIZE)
File "/usr/local/lib/python2.7/dist-packages/Pyro4/socketutil.py", line 323, in recv
return receiveData(self.sock, size)
File "/usr/local/lib/python2.7/dist-packages/Pyro4/socketutil.py", line 104, in receiveData
data=sock.recv(size, socket.MSG_WAITALL)
strace shows the following:
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 3 fcntl(3, F_GETFL)
= 0x2 (flags O_RDWR) fcntl(3, F_SETFL, O_RDWR) = 0 connect(3, {sa_family=AF_INET, sin_port=htons(9090),
sin_addr=inet_addr("ipofnameserver")}, 16) = 0 setsockopt(3,
SOL_SOCKET, SO_KEEPALIVE, [1], 4) = 0 recvfrom(3,
The following example is not working either, as it unable to resolve names into pyro_uri, because it just waits for something like in previous example. Interesting thing about this example that it prints fls, which contains names of all remote pingservers. Then adding fourth pingserver I'm unable even to print names of registered pingservers.
def _ping((host, firing_location)):
pinger = Pyro4.Proxy("PYRONAME:" + firing_location)
return pinger.ping(host)
def ping(host):
ns = Pyro4.locateNS(host = "178.209.52.240", port=9090)
names = ns.list().keys()
fls = []
for name in names:
if name == 'Pyro.NameServer': continue
fls.append(name)
print fls
p = Pool(len(fls))
jobs = p.map(_ping, zip([host]*len(fls), fls) )
for j in jobs:
print j.split("/")[-3], "±", j.split("/")[-1][:-1]
return jobs
I'm struggling with it for two days and have no idea of what's wrong with my code.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

url fetch gets stuck when multiple urls are passed - python

Related

Python Multi-threaded App does not terminate

using python stream downloading file with server limit

why url works in browser but not using requests get method

Python, Catch timeout during stream request

Python Pyro4 nameserver freezes when adding more than two remote objects

Categories

Resources