Local variable referenced before assignment, using multi-threading - python

Code to add and delete values in a list are operations performed in different threads.
using these global variables in multi-threading:
from threading import Thread
import time
a=[]
i = 0
j = 0
function for thread1:
def val_in():
while 1:
a.append(raw_input())
print "%s value at %d: %d added" % ( time.ctime(time.time()), i ,int(a[i])) // line 14
i+=1
function for thread 2:
def val_out():
while 1:
time.sleep(5)
try:
print "%s value at %d: %d deleted" % (time.ctime(time.time()), j, int(a.pop(j)))
i-=1
except:
print"no values lefts"
time.sleep(2)
defining and starting threads:
t = Thread(target = val_in)
t1 = Thread(target= val_out)
t.start()
t1.start()
Now when input is given as 1, it should be added to the list a, but there is an error:
Error: Exception in thread Thread-1:
Traceback (most recent call last):
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/threading.py", line 810, in __bootstrap_inner
self.run()
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/threading.py", line 763, in run
self.__target(*self.__args, **self.__kwargs)
File "/Users/dhiraj.agarwal/Documents/workspace/try3/multithread.py", line 14, in val_in
UnboundLocalError: local variable 'i' referenced before assignment

You should tell python that i is global:
def val_in():
global i
...
def val_out():
global i
...

This is an issue with the scope of the variable. You might used local variable in the thread for different methods. If that the case then you have to make the variable as global.
def val_in():
global i # add this line
while 1:
a.append(raw_input())
print "%s value at %d: %d added" % ( time.ctime(time.time()), i ,int(a[i]))
i+=1
def val_out():
global i # add this line
while 1:
time.sleep(5)
try:
print "%s value at %d: %d deleted" % (time.ctime(time.time()), j, int(a.pop(j)))
i-=1
except:
print"no values lefts"
time.sleep(2)

Related

outside vs inside __main__ variable definition in multiprocessing

I have following code:
import multiprocessing
import time
import os
# WHEN SEMAPHORE IS DEFINED HERE THEN IT IT WORKS
semaphore = multiprocessing.Semaphore(1)
def producer(num, output):
semaphore.acquire()
time.sleep(1)
element = "PROCESS: %d PID: %d PPID: %d" % (num, os.getpid(), os.getppid())
print "WRITE -> " + element
output.put(element)
time.sleep(1)
semaphore.release()
if __name__ == '__main__':
"""
Reads elements as soon as they are are put inside queue
"""
output = multiprocessing.Manager().Queue()
pool = multiprocessing.Pool(4)
lst = range(40)
# WHEN SEMAPHORE IS DEFINED HERE THEN IT DOES NOT WORKS
# semaphore = multiprocessing.Semaphore(1)
for i in lst:
pool.apply_async(producer, (i, output))
# print "%d Do not wait!" % i
# res.get()
counter = 0
while True:
try:
print "READ <- " + output.get_nowait()
counter += 1
if (counter == len(lst)):
print "Break"
break
except:
print "READ <- NOTHING IN BUFFER"
pass
time.sleep(1)
This code is working as expected and it prints:
READ <- NOTHING IN BUFFER
WRITE -> PROCESS: 0 PID: 15803 PPID: 15798
READ <- NOTHING IN BUFFER
READ <- PROCESS: 0 PID: 15803 PPID: 15798
READ <- NOTHING IN BUFFER
WRITE -> PROCESS: 1 PID: 15806 PPID: 15798
READ <- PROCESS: 1 PID: 15806 PPID: 15798
...
Then I have this version which is not working (It is basically the same as first one except the definition of semaphore is in another place):
import multiprocessing
import time
import os
# WHEN SEMAPHORE IS DEFINED HERE THEN IT IT WORKS
# semaphore = multiprocessing.Semaphore(1)
def producer(num, output):
print hex(id(semaphore))
semaphore.acquire()
time.sleep(1)
element = "PROCESS: %d PID: %d PPID: %d" % (num, os.getpid(), os.getppid())
print "WRITE -> " + element
output.put(element)
time.sleep(1)
semaphore.release()
if __name__ == '__main__':
"""
Reads elements as soon as they are are put inside queue
"""
output = multiprocessing.Manager().Queue()
pool = multiprocessing.Pool(4)
lst = range(40)
# WHEN SEMAPHORE IS DEFINED HERE THEN IT DOES NOT WORKS
semaphore = multiprocessing.Semaphore(1)
for i in lst:
pool.apply_async(producer, (i, output))
# print "%d Do not wait!" % i
# res.get()
counter = 0
while True:
try:
print "READ <- " + output.get_nowait()
counter += 1
if (counter == len(lst)):
print "Break"
break
except:
print "READ <- NOTHING IN BUFFER"
pass
time.sleep(1)
This version prints:
READ <- NOTHING IN BUFFER
READ <- NOTHING IN BUFFER
READ <- NOTHING IN BUFFER
READ <- NOTHING IN BUFFER
READ <- NOTHING IN BUFFER
READ <- NOTHING IN BUFFER
READ <- NOTHING IN BUFFER
...
It seems like if producer never writes anything to Queue. I've read somewhere that apply_sync does not print error messages. So I've changed pool.apply_async(producer, (i, output)) to pool.apply(producer, (i, output)) in second code, to see what is going on. It seems that semaphore is not defined, here is the output:
Traceback (most recent call last):
File "glob_var_wrong.py", line 31, in <module>
pool.apply(producer, (i, output))
File "/usr/lib/python2.7/multiprocessing/pool.py", line 244, in apply
return self.apply_async(func, args, kwds).get()
File "/usr/lib/python2.7/multiprocessing/pool.py", line 567, in get
raise self._value
NameError: global name 'semaphore' is not defined
However following code runs correctly and print 10 (value defined inside __main__):
global_var = 20
def print_global_var():
print global_var
if __name__ == '__main__':
global_var = 10
print_global_var()
It seems that in this code global variable can be defined inside __main__ while in previous codes it is not possible. First I was assuming that variables defined inside __main__ are not shared between processes but it only affects semaphore and not output, pool, lst. Why is this happening?
When you create a new process using Multiprocessing.Process (used underneath the hood by Pool, it copies the local scope, pickles it, and sends it to a new process to evaluate.
Because you did not define the variable semaphore before calling Pool(4), the variable is undefined (in those OTHER processes where the code gets evaluated) and the function producer will throw an exception.
To see this, change the definition
def producer(num, output):
print hex(id(semaphore))
try:
semaphore.acquire()
except Exception as e:
print e
time.sleep(1)
element = "PROCESS: %d PID: %d PPID: %d" % (num, os.getpid(), os.getppid())
print "WRITE -> " + element
output.put(element)
time.sleep(1)
semaphore.release()
and now your failing code will print out a bunch (40) of errors that looks like
global name 'semaphore' is not defined
This is why semaphore has to be defined BEFORE calling Pool
It is because you execute the code on Windows.
You will get the expected results on Linux.
That's the difference between fork and spawn.

Twisted race condition

I'm having an issue with a race condition in my script. The goal is to connect to Deluge and gather information using Twisted.
Here is the script:
#!/usr/bin/python
import json
import sys
import os.path
from datetime import datetime
from deluge.ui.client import client
from twisted.internet import reactor, task
class Deluge(object):
def __init__(self,*args):
for key, value in enumerate(args):
self.key = value
def getDownloadQueue(self):
print "Started getDownloadQueue()"
self.connect()
print "Finished getDownloadQueue()"
def connect(self):
print "Started connect()"
deluge = client.connect()
#deluge.addCallback(self.onConnect,params).addErrback(self.onConnectFail).addBoth(self.disconnect)
print "task.react()"
test = task.react(self.onConnect, [])
print "deluge.addCallback()"
test.addCallback(deluge).addErrback(self.onConnectFail).addBoth(self.disconnect)
#deluge.addErrback(self.onConnectFail)
print "Finished connect()"
def disconnect(self):
client.disconnect()
print "Finished disconnect()"
def onConnect(self, reactor):
print "Started onConnect()"
def onGetTorrentStatus(torrentInfo):
print "Started onGetTorrentStatus()"
print torrentInfo["name"] + " " + torrentInfo["label"]
if torrent["name"] == torrent_name:
print "File '%s' already exists" % torrent["name"]
print "Finished onGetTorrentStatus()"
return
def onGetSessionState(torrent_ids):
print "Started onGetSessionState()"
print torrent_ids
print "Got all torrent ids"
for id in torrent_ids:
d = client.core.get_torrent_status(id, ["name","label"]).addCallback(onGetTorrentStatus)
print defer.gatherResults([d, self.disconnect])
print "Finished onGetSessionState()"
client.core.get_session_state().addCallback(self.onGetSessionState)
print "Finished onConnect()"
def onConnectFail(self,result):
print "Error: %s" % result
Deluge().getDownloadQueue()
Here is the error it outputs:
Traceback (most recent call last):
File "./delugeTest.py", line 64, in <module>
Deluge().getDownloadQueue()
File "./delugeTest.py", line 18, in getDownloadQueue
self.connect()
File "./delugeTest.py", line 28, in connect
test = task.react(self.onConnect, [])
File "/usr/local/lib/python2.7/dist-packages/twisted/internet/task.py", line 867, in react
finished = main(_reactor, *argv)
File "./delugeTest.py", line 58, in onConnect
client.core.get_session_state().addCallback(self.onGetSessionState)
File "/usr/lib/python2.7/dist-packages/deluge/ui/client.py", line 504, in __call__
return self.daemon.call(self.base, *args, **kwargs)
File "/usr/lib/python2.7/dist-packages/deluge/ui/client.py", line 308, in call
self.protocol.send_request(request)
AttributeError: 'NoneType' object has no attribute 'send_request'
This is in reference to a question I asked a few months ago: How to properly stop Twisted reactor when callback is finished

Error while deciphering a string

Any inputs on what is wrong with line phCmd = "ph %s return all".split(' ') % (qgroup) ? I am trying to decipher the string %s.
from subprocess import Popen, PIPE, STDOUT
def main ():
qgroups = ['tech.sw.list','tech.sw.amss']
for qgroup in qgroups:
print qgroup
phCmd = "ph %s return all".split(' ') % (qgroup)
phPipe = Popen(phCmd, stdout=PIPE, stderr=PIPE)
(output, error) = phPipe.communicate()
print output
if phPipe.returncode != 0:
print output
raise IOError, "phcommand %s failed" % (phCmd)
return output
ERROR:
Traceback (most recent call last):
File "test.py", line 20, in <module>
main()
File "test.py", line 9, in main
phCmd = "ph %s return all".split(' ') % (qgroup)
if __name__ == '__main__':
main()
The .split(' ') method call of a string returns a list. Try something like
phCmd = ("ph %s return all" % (qgroup)).split(' ')
instead.
"ph %s return all".split(' ') % (qgroup)
The split() call returns a list, and % is undefined for the argument types list and tuple. I'm not sure what you mean to do here, but it looks like you want:
("ph %s return all" % (qgroup)).split(' ')
When using "%" with strings, you have to place it right after the string. This line of code
phCmd = "ph %s return all".split(' ') % (qgroup)
is actually telling Python to take the list returned by "ph %s return all".split(' ') and run an operation similar to:
>>> 2 % 2
0
>>>
on it using (qgroup), which blows up.
To fix your problem, do this:
phCmd = ("ph %s return all" % qgroup).split(' ')

Python Pickle Error For Multiprocessing Manager Sharing Lists

I am trying to populate lists self.images and self.servers with functions self.refresh_images_list and self.refresh_server_list using multiprocessing. I am doing this so when the object is created they will kick off async. I am using shared lists so the child copy will update the original objects list.
However, I am getting a Pickle error so I am pretty stuck.
class Account():
def __init__(self, username, api, pipe_object):
manager = Manager()
self.images = manager.list()
self.servers = manager.list()
self.images_timestamp = None
self.servers_timestamp = None
#needed a dictionary instead of
#list/tuple. This works best for
#the generator.
self.regions = {
"DFW" : pyrax.connect_to_cloudservers("DFW"),
"ORD" : pyrax.connect_to_cloudservers("ORD"),
"SYD" : pyrax.connect_to_cloudservers("SYD")
}
p1 = Process(target = self.refresh_server_list, args=())
p2 = Process(target = self.refresh_image_list, args=())
p1.start()
p2.start()
p1.join()
p2.join()
flavors = None
#multiprocessing shares lists only for __init__
#after __init__, we want to break the share
unshare_lists = False
def refresh_server_list(self):
if self.unshare_lists:
self.servers = []
self.servers_timestamp = time.strftime(
"%I:%M:%S", time.localtime()
)
with Redirect(self.pipe_object):
print "\nRefreshing server cache...hold on!"
for region, value in self.regions.iteritems():
region_servers = value.servers.list()
for region_servers in generator(region_servers, region):
self.servers.append(region_servers)
with Redirect(self.pipe_object):
print "\nServer cache completed!"
def server_list(self):
if not self.servers:
self.refresh_server_list()
with Redirect(self.pipe_object):
print_header("Cached Server List", "-")
for srv in self.servers:
print "\nName: %s" % srv.name
print "Created: %s" % srv.created
print "Progress: %s" % srv.progress
print "Status: %s" % srv.status
print "URL: %s" % srv.links[0]["href"]
print "Networks: %s" % srv.networks
print "\nLast Refresh time: %s" % self.servers_timestamp
def refresh_image_list(self):
if self.unshare_lists:
self.images = []
self.images_timestamp = time.strftime(
"%I:%M:%S", time.localtime()
)
with Redirect(self.pipe_object):
# print_header("Active Image List", "-")
print "\nRefreshing image cache...hold on!"
for region, value in self.regions.iteritems():
region_images = value.images.list()
for region_images in generator(region_images, region):
self.images.append(region_images)
with Redirect(self.pipe_object):
print "\nImage cache completed!"
def image_list(self):
if not self.images:
self.refresh_image_list()
with Redirect(self.pipe_object):
print_header("List Images", "-")
for img in self.images:
print (
str(self.images.index(img)+1) + ") "
+ "Name: %s\n ID: %s Status: %s" %
(img.name, img.id, img.status)
)
print "\nLast Refresh time: %s" % self.images_timestamp
The error I get:
Refreshing server cache...hold on!
Traceback (most recent call last):
File "menu.py", line 162, in <module>
main()
File "menu.py", line 156, in main
menus[value](hash_table, accounts)
File "menu.py", line 104, in menu
choices[value]()
File "/home/work/modules/classes.py", line 87, in server_list
self.refresh_server_list()
File "/home/work/modules/classes.py", line 80, in refresh_server_list
self.servers.append(region_servers)
File "<string>", line 2, in append
File "/usr/lib64/python2.7/multiprocessing/managers.py", line 758, in _callmethod
conn.send((self._id, methodname, args, kwds))
cPickle.PicklingError: Can't pickle <type 'instancemethod'>: attribute lookup __builtin__.instancemethod failed

cant reference a global dictionary entry

I am aware that global variables are not always the best way to deal with things in this case they are fine for what I am doing. I am not going to be doing heavy read/writes mostly just reads.
alive = {'subAlive': True, 'testAlive': True};
def sub_listener(conn): #listens for kill from main
global alive
while True:
data = conn.recv()
if data == "kill":
alive['subAlive'] = False; #value for kill
break
def subprocess(conn, threadNum):
t = Thread(target=sub_listener, args=(conn,))
count = 0
threadVal = threadNum
t.start()
run = alive[subAlive];
while run:
print "Thread %d Run number = %d" % (threadVal, count)
count = count + 1
sub_parent, sub_child = Pipe()
runNum = int(raw_input("Enter a number: "))
threadNum = int(raw_input("Enter number of threads: "))
print "Starting threads"
for i in range(threadNum):
p = Process(target=subprocess, args=(sub_child, i))
p.start()
print "Starting run"
time.sleep(runNum)
print "Terminating Subprocess run"
for i in range(threadNum):
sub_parent.send("kill") #sends kill to listener
p.join()
I get this error
NameError: global name 'testAlive' is not defined
Traceback (most recent call last):
File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "multiprocessDemo.py", line 38, in subprocess
run = alive[subAlive];
NameError: global name 'subAlive' is not defined
I have tried accessing the dictionary a few different ways and I can't seem to find out what is wrong on google. If I use separate variables it does work but that wont dynamically scale well.
Put quotes around subAlive:
run = alive['subAlive']

Categories

Resources