Python product of ascii, wrong start - python

I wrote a simple script in order to check the availability of some domains, but I can't understand why it starts with abns not aaaa.
Here is the code :
import whois
import eventlet
from itertools import product
from string import ascii_lowercase
f = open('4-letter.txt', 'w')
k = (''.join(x) for x in product(ascii_lowercase, repeat=4))
def fetch(url):
for x in k:
if whois.whois(x+".ro").status == "OK":
print(x+" bad")
else:
f.write(x+".ro\n")
pool = eventlet.GreenPool()
for status in pool.imap(fetch, k):
print(status)
f.close()

You access the global generator k in this function:
def fetch(url):
for x in k:
if whois.whois(x+".ro").status == "OK":
print(x+" bad")
else:
f.write(x+".ro\n")
But you also hand k to pool.imap(fetch, k). So k is already iterated over several steps before fetch() is called.

Related

Multi Threading python problem, not printing

I am currently learning multithreading and learned about concurrent.futures and the threading pool executor, i tried to implement an example but for some reason it was not printing the multiple print orders. where did i go wrong?
import requests
import random
import string
import concurrent.futures
result = open(r"workingGhosts.txt","w")
length = 5
url ="https://ghostbin.co/paste/"
def get_random_string(length):
letters = string.ascii_lowercase
result_str = ''.join(random.choice(letters) for i in range(length))
return result_str
times = int(input("how many times do you want to check?"))
list_urls=[]
counter = 0
for x in range(times):
stringA = url + get_random_string(length)
list_urls.append(stringA)
def lol(finalUrl):
r = requests.get(finalUrl)
counter= counter +1
print("printing")
if r.status_code != 404:
result.write(finalUrl)
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(lol,list_urls)
I'ts not printing because there is an exception. these aren't shown to you unless you check for them yourself while using an executor
the exception you are getting is:
UnboundLocalError: local variable 'counter' referenced before assignment
because of this line
counter= counter +1
change lol like this:
def lol(finalUrl):
global counter # add this line
r = requests.get(finalUrl)
counter= counter +1
print("printing")
if r.status_code != 404:
result.write(finalUrl)

python IO isn't fast enough

I'm trying to answer a competitive programming question on kattis that can be found here My algorithm is correct, however there is one of the test cases that has a lot of inputs and my code times out. Is there a more optimized way to do IO in python?
from sys import stdin, stdout
import atexit, io, sys
buffer = io.BytesIO()
sys.stdout = buffer
#atexit.register
def write():
sys.__stdout__.write(buffer.getvalue())
def main():
teque = []
for i in range(int(stdin.readline())):
l = stdin.readline().split()
if l[0] == 'push_back':
teque.append(int(l[1]))
if l[0] == 'push_front':
teque.insert(0, int(l[1]))
if l[0] == 'push_middle':
if len(teque)%2==0:
mid = len(teque)/2
else:
mid = (len(teque)+1)/2
teque.insert(int(mid), int(l[1]))
if l[0] == 'get':
stdout.write(str(teque[int(l[1])])+'\n')
if __name__ == "__main__":
main()
So as I learned in the comments, I wasn't actually doing the program in O(1) time for insert, I fixed invoking 2 queues. This solution still isn't fast enough for python 3 run time, however it passes the python 2 run time.
from sys import stdin, stdout
from collections import deque
class Teque:
def __init__(self):
self._teque1 = deque()
self._teque2 = deque()
def push_back(self, x):
self._teque2.append(x)
if len(self._teque2) > len(self._teque1):
self._teque1.append((self._teque2.popleft()))
def push_front(self, x):
self._teque1.appendleft(x)
if len(self._teque1) > len(self._teque2):
self._teque2.appendleft((self._teque1.pop()))
def push_middle(self, x):
if len(self._teque2) > len(self._teque1):
self._teque1.append(self._teque2.popleft())
self._teque2.appendleft(x)
def get(self, i):
if i >= len(self._teque1):
return self._teque2[i-len(self._teque1)]
return self._teque1[i]
def main():
teque = Teque()
for i in range(int(stdin.readline())):
l = stdin.readline().split()
if l[0] == 'push_back':
teque.push_back(int(l[1]))
elif l[0] == 'push_front':
teque.push_front(int(l[1]))
elif l[0] == 'push_middle':
teque.push_middle(int(l[1]))
else:
stdout.write(str(teque.get(int(l[1])))+'\n')
if __name__ == "__main__":
main()

Generator returned from function completes prematurely

I have following piece of code to multiplex blocking generators:
import datetime
import time
import queue
import threading
def blocking1():
while True:
time.sleep(1)
result = "Block1: {}".format(datetime.datetime.now())
yield result
def blocking2():
while True:
time.sleep(2)
result = "Block2: {}".format(datetime.datetime.now())
yield result
def multiplex(generators):
if len(generators) == 1:
return generators[0]
elif len(generators) > 1:
q = queue.Queue()
def run_one(src):
for e in src: q.put(e)
def run_all():
threads = []
for src in generators:
t = threading.Thread(target=run_one, args=(src,))
t.start()
threads.append(t)
for t in threads: t.join()
q.put(StopIteration)
threading.Thread(target=run_all).start()
while True:
e = q.get()
if e is StopIteration:
return
yield e
else:
return []
if __name__ == "__main__":
# tasks = [("map1: {}".format(e) for e in blocking1()), ("map2: {}".format(e) for e in blocking2())]
tasks = [("map1: {}".format(e) for e in blocking1())]
for e in multiplex(tasks):
print(e)
I wanted to be clever and in case there is only one generator, don't do any thread spawning. Just return this single generator (after all the types still match)
However it doesn't work like that.
The program immediately terminates (like this is empty generator)
What's funny is that following works (the map1... output is displayed):
import datetime
import time
import queue
import threading
def blocking1():
while True:
time.sleep(1)
result = "Block1: {}".format(datetime.datetime.now())
yield result
def blocking2():
while True:
time.sleep(2)
result = "Block2: {}".format(datetime.datetime.now())
yield result
def multiplex(generators):
if len(generators) == 1:
return generators[0]
else:
return []
if __name__ == "__main__":
# tasks = [("map1: {}".format(e) for e in blocking1()), ("map2: {}".format(e) for e in blocking2())]
tasks = [("map1: {}".format(e) for e in blocking1())]
for e in multiplex(tasks):
print(e)
Where the difference is only in removal of elif part...
Could someone help me understand what is going on please?
I'm using Python 3.5.3
You can't (usefully) return a value from a function that also does a yield anywhere in its body (even if return and yields occur in separate blocks of code that can never run during the same execution of the function). If you have a yield anywhere in the function, you are making a generator function rather than a normal one.
A good fix for this is to yield from your lone generator if you're only given one:
def multiplex(generators):
if len(generators) == 1:
yield from generators[0] # because this is a generator function, we need to yield here
elif len(generators) > 1:
... # there's a yield in here causing the whole thing to be a generator function!
The problem is you're returning a generator, instead of iterating over it.
Replace
return generators[0]
with
yield from generators[0]

concurrent.futures.ThreadPoolExecutor is slower than for list comprehension

I'm testing a trivial function using list comprehension vs concurrent.futures:
class Test:
#staticmethod
def something(times = 1):
return sum([1 for i in range(times)])
#staticmethod
def simulate1(function, N):
l = []
for i in range(N):
outcome = function()
l.append(outcome)
return sum(l) / N
#staticmethod
def simulate2(function, N):
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
l = [outcome for outcome in executor.map(lambda x: function(), range(N))]
return sum(l) / N
#staticmethod
def simulate3(function, N):
import concurrent.futures
l = 0
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
futures = [executor.submit(function) for i in range(N)]
for future in concurrent.futures.as_completed(futures):
l += future.result()
return l / N
def simulation():
simulationRate = 100000
import datetime
s = datetime.datetime.now()
o = Test.simulate1(lambda : Test.something(10), simulationRate)
print((datetime.datetime.now() - s))
s = datetime.datetime.now()
o = Test.simulate2(lambda : Test.something(10), simulationRate)
print((datetime.datetime.now() - s))
s = datetime.datetime.now()
o = Test.simulate3(lambda : Test.something(10), simulationRate)
print((datetime.datetime.now() - s))
simulation()
Measuring the time, I get:
0:00:00.258000
0:00:10.348000
0:00:10.556000
I'm getting started with concurrency so I don't understand what is the bottleneck that prevents the threads to run faster.
if you change your task function to this, you will see the difference:
def something(n):
""" simulate doing some io based task.
"""
time.sleep(0.001)
return sum(1 for i in range(n))
On my mac pro, this gives:
0:00:13.774700
0:00:01.591226
0:00:01.489159
The concurrent.future is obvious more faster this time.
The reason is that: you are simulating a cpu based task, because of python's GIL, concurrent.future make it slower.
concurrent.future provides a high-level interface for asynchronously executing callables, you are using it for wrong scene.

Memoization, Classes, and Multiprocessing in Python

I am trying to do some computations using the multiprocessing module in python 2.7.2.
My code is like this:
from multiprocessing import Pool
import sys
sys.setrecursionlimit(10000)
partitions = []
class Partitions:
parts = {} #My goal is to use this dict to speed
#up calculations in every process that
#uses it, without having to build it up
#from nothing each time
def __init__(self):
pass
def p1(self, k, n):
if (k,n) in Partitions.parts:
return Partitions.parts[(k, n)]
if k>n:
return 0
if k==n:
return 1
Partitions.parts[(k,n)] = self.p1(k+1, n) + self.p1(k, n-k)
return Partitions.parts[(k,n)]
def P(self, n):
result = 0
for k in xrange(1,n/2 + 1):
result += self.p1(k, n-k)
return 1 + result
p = Partitions()
def log(results):
if results:
partitions.extend(results)
return None
def partWorker(start,stop):
ps = []
for n in xrange(start, stop):
ps.append(((1,n), p.P(n)))
return ps
def main():
pool = Pool()
step = 150
for i in xrange(0,301,step):
pool.apply_async(partWorker, (i, i+step), callback = log)
pool.close()
pool.join()
return None
if __name__=="__main__":
main()
I am new to this, I basically copied the format of the prime code on this page:
python prime crunching: processing pool is slower?
Can I get process running in each core all looking at the same dictionary to assist their
calculations? The way it behaves now, each process creates it's own dictionaries and it eats up ram like crazy.
I'm not sure if this is what you want ... but, take a look at multiprocessing.Manager ( http://docs.python.org/library/multiprocessing.html#sharing-state-between-processes ). Managers allow you to share a dict between processes.

Categories

Resources