I could not understand how to use SpiffWorkflow workflow engine. What means creating a task exactly and how can I do that?
Also I could not find any example which uses SpiffWorkflow. Who uses this engine? How can I find usage examples?
I've found an example here.
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../lib'))
from SpiffWorkflow.specs import *
from SpiffWorkflow import Task, Workflow
from SpiffWorkflow.storage import XmlSerializer
def on_entered_cb(workflow, task, taken_path):
#print "entered:",task.get_name()
return True
def on_ready_cb(workflow, task, taken_path):
#print "ready:",task.get_name()
return True
def on_reached_cb(workflow, task, taken_path):
#print "reached:",task.get_name()
return True
def on_complete_cb(workflow, task, taken_path):
# Record the path.
print "complete:",task.get_name()
#print task.get_description()
indent = ' ' * (task._get_depth() - 1)
taken_path.append('%s%s' % (indent, task.get_name()))
return True
class QuestionError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class QuestionWorkflow(object):
def __init__(self):
self.serializer = XmlSerializer()
def set_up(self,filename):
# Test patterns that are defined in XML format.
xml = open(filename).read()
self.wf_spec = WorkflowSpec.deserialize(XmlSerializer(), xml, filename = filename)
self.taken_path = self.track_workflow(self.wf_spec)
self.workflow = Workflow(self.wf_spec)
def run(self, UserSelection, restart=False):
if restart:
self.workflow = Workflow(self.wf_spec)
workflow = self.workflow
condition_keys = []
if UserSelection is None:
UserSelection = {}
task_data_dict = UserSelection.copy()
while not workflow.is_completed():
tasks = workflow.get_tasks(Task.READY)
for t in tasks:
print "Ready:", t.task_spec.name
if hasattr(t.task_spec, "cond_task_specs"):
for cond, name in t.task_spec.cond_task_specs:
for cond_unit in cond.args:
if hasattr(cond_unit, "name"):
condition_keys.append(cond_unit.name)
flag_keys_in_user_select = True
for cond_key in condition_keys:
if not task_data_dict.has_key(cond_key):
print cond_key
flag_keys_in_user_select = False
break
if not flag_keys_in_user_select:
# some tast's condition's key not in input userselect dict
return
for t in tasks:
t.set_data(**task_data_dict)
workflow.complete_next()
if not workflow.is_completed():
raise QuestionError('invalid feature[%s]' % filename)
def print_trace(self):
path = '\n'.join(self.taken_path) + '\n'
info = ""
info += 'the workflowrun path:\n'
info += '%s\n' % path
print info
def track_task(self, task_spec, taken_path):
#reached event call back
if task_spec.reached_event.is_connected(on_reached_cb):
task_spec.reached_event.disconnect(on_reached_cb)
task_spec.reached_event.connect(on_reached_cb, taken_path)
#completed event call back
if task_spec.completed_event.is_connected(on_complete_cb):
task_spec.completed_event.disconnect(on_complete_cb)
task_spec.completed_event.connect(on_complete_cb, taken_path)
#enter event call back
if task_spec.entered_event.is_connected(on_entered_cb):
task_spec.entered_event.disconnect(on_entered_cb)
task_spec.entered_event.connect(on_entered_cb, taken_path)
#ready event call back
if task_spec.ready_event.is_connected(on_ready_cb):
task_spec.ready_event.disconnect(on_ready_cb)
task_spec.ready_event.connect(on_ready_cb, taken_path)
def track_workflow(self, wf_spec, taken_path = None):
if taken_path is None:
taken_path = []
for name in wf_spec.task_specs:
#print "track_workflow:",name
self.track_task(wf_spec.task_specs[name], taken_path)
return taken_path
if __name__ == '__main__':
qw = QuestionWorkflow()
qw.set_up("./case.xml")
print "==========1st question=========="
user_selct = {'man':'1'}
qw.run(user_selct)
print "==========2nd question=========="
user_selct = {'man':'1', 'house': '2'}
qw.run(user_selct)
print "==========3rd question=========="
user_selct = {'man':'1', 'house': '2', 'why': 'because you are a hero'}
qw.run(user_selct)
'''
print "==========4th question========="
user_selct = {'man':'1', 'house': '2', 'role':'5'}
qw.run(user_selct)
'''
print "==========fix some question=========="
user_selct = {'man':'1', 'house': '1', 'role':'5'}
qw.run(user_selct,True)
print
We have developed Zengine extended version of SpiffWorkFlow with improved functionality. You can find it here, and you can see here to find out usage examples.
In short;
Zengine BPMN workflow based framework with Tornado, Rabbit AMQP, advanced permissions, extensible scaffolding features and more.
Built on top of following major components;
SpiffWorkflow: Powerful workflow engine with BPMN 2.0 support.
Tornado: Tornado is a Python web framework and asynchronous networking library.
Pyoko: Django esque ORM for Riak KV store.
RabbitMQ: Fast, ultrasharp AMQP server written with legendary Erlang lang.
perhaps take a look at this file will help
https://github.com/knipknap/SpiffWorkflow/blob/master/SpiffWorkflow/specs/WorkflowSpec.py
and these docs
https://github.com/knipknap/SpiffWorkflow/wiki
Related
Situation:
I have 2 google cloud functions, let's call them gcf-composer and gcf-action.
I have a list of 70,000 unique dicts for which I want to execute the gcf-action.
I use the gcf-composer loop over all dicts and publish a message per dict to the gcf-action topic containing the dict as payload.
I need the gcf-composer, because running the gcf-action directly for all dicts would take more than the 9 min threshold.
I start off the gcf-composer using the google cloud-scheduler
Problem
When firing off the gcf-composer on the cloud, after an x-amount of seconds, it will stop and return the following:
'connection error'
These are the results of 4 separate tries.
Why does it give me the "finished with status: 'connection error'", and how do i solve it?
When I run this locally, so sending messages to topic, it works.
Please let me know if you need any more code or information!
Code of gcf-composer
from mlibs.pubsub import PubSubConnection
pubsub = PubSubConnection()
TOPIC_NAME = 'gcf-action-topic'
def gcf_composer(period, run_method='local', list_x, names_y):
"""Run composer given run method (local or cloud_fn)"""
for k, row in names_y.iterrows():
# get dict of identifiers
y = row.to_dict()
for x in list_x:
parameters = {'x': x, 'y': y}
if run_method == 'local':
c.test_local(x=x, y=y)
elif run_method == 'cloud-fn':
pubsub.publish_to_topic(topic_name=TOPIC_NAME, params={'params': parameters})
else:
print(f'Unknown run method {run_method} used. Please try again.')
PubSubConnection:
"""Interaction with the Pub/Sub Engine"""
from google.oauth2 import service_account
from google.cloud import pubsub_v1
from mlibs.utils import json
from mlibs.utils import decode
from mlibs.utils import files as fs
class PubSubConnection:
def __init__(self):
"""Initiate a PubSub connection"""
self.project_id = None
self.publisher = None
self.count = 0
self.init_connection()
def init_connection(self):
"""Initiates a connection given the service account"""
self.publisher = pubsub_v1.PublisherClient(credentials=*****)
self.project_id = credentials.project_id
def publish_to_topic(self, topic_name, params):
# Define the topic path
topic_path = self.publisher.topic_path(self.project_id, topic_name)
# Convert to ByteString
params_bytes = json.dumps(params).encode('utf-8')
# Publish and handle the Future
cbl = Callable(self.count, params)
message_future = self.publisher.publish(topic_path, data=params_bytes, test='pubsub')
# Done callback
message_future.add_done_callback(cbl.callback)
# https://googleapis.dev/python/pubsub/latest/publisher/index.html#futures
# block result
# message_id = message_future.result()
self.count = self.count + 1
print(f'[pubsub][{self.count}][{topic_name}]')
class Callable:
def __init__(self, count, params):
self.count = count
self.params = params
def callback(self, message_future):
if message_future.exception(timeout=30):
print(f'[pubsub-except] Publishing message threw an Exception {message_future.exception()}')
else:
print(f'[pubsub][{self.count}][{message_future.result()}] {self.params}')
I'm trying to run my code with a multiprocessing function but mongo keep returning
"MongoClient opened before fork. Create MongoClient with
connect=False, or create client after forking."
I really doesn't understand how i can adapt my code to this.
Basically the structure is:
db = MongoClient().database
db.authenticate('user', 'password', mechanism='SCRAM-SHA-1')
collectionW = db['words']
collectionT = db['sinMemo']
collectionL = db['sinLogic']
def findW(word):
rows = collectionw.find({"word": word})
ind = 0
for row in rows:
ind += 1
id = row["_id"]
if ind == 0:
a = ind
else:
a = id
return a
def trainAI(stri):
...
if findW(word) == 0:
_id = db['words'].insert(
{"_id": getNextSequence(db.counters, "nodeid"), "word": word})
story = _id
else:
story = findW(word)
...
def train(index):
# searching progress
progFile = "./train/progress{0}.txt".format(index)
trainFile = "./train/small_file_{0}".format(index)
if os.path.exists(progFile):
f = open(progFile, "r")
ind = f.read().strip()
if ind != "":
pprint(ind)
i = int(ind)
else:
pprint("No progress saved or progress lost!")
i = 0
f.close()
else:
i = 0
#get the number of line of the file
rangeC = rawbigcount(trainFile)
#fix unicode
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
files = io.open(trainFile, "r", encoding="utf8")
str1 = ""
str2 = ""
filex = open(progFile, "w")
with progressbar.ProgressBar(max_value=rangeC) as bar:
for line in files:
line = line.replace("\n", "")
if i % 2 == 0:
str1 = line.translate(non_bmp_map)
else:
str2 = line.translate(non_bmp_map)
bar.update(i)
trainAI(str1 + " " + str2)
filex.seek(0)
filex.truncate()
filex.write(str(i))
i += 1
#multiprocessing function
maxProcess = 3
def f(l, i):
l.acquire()
train(i + 1)
l.release()
if __name__ == '__main__':
lock = Lock()
for num in range(maxProcess):
pprint("start " + str(num))
Process(target=f, args=(lock, num)).start()
This code is made for reading 4 different file in 4 different process and at the same time insert the data in the database.
I copied only part of the code for make you understand the structure of it.
I've tried to add connect=False to this code but nothing...
db = MongoClient(connect=False).database
db.authenticate('user', 'password', mechanism='SCRAM-SHA-1')
collectionW = db['words']
collectionT = db['sinMemo']
collectionL = db['sinLogic']
then i've tried to move it in the f function (right before train() but what i get is that the program doesn't find collectionW,collectionT and collectionL.
I'm not very expert of python or mongodb so i hope that this is not a silly question.
The code is running under Ubuntu 16.04.2 with python 2.7.12
db.authenticate will have to connect to mongo server and it will try to make a connection. So, even though connect=False is being used, db.authenticate will require a connection to be open.
Why don't you create the mongo client instance after fork? That's look like the easiest solution.
Since db.authenticate must open the MongoClient and connect to the server, it creates connections which won't work in the forked subprocess. Hence, the error message. Try this instead:
db = MongoClient('mongodb://user:password#localhost', connect=False).database
Also, delete the Lock l. Acquiring a lock in one subprocess has no effect on other subprocesses.
Here is how I did it for my problem:
import pathos.pools as pp
import time
import db_access
class MultiprocessingTest(object):
def __init__(self):
pass
def test_mp(self):
data = [[form,'form_number','client_id'] for form in range(5000)]
pool = pp.ProcessPool(4)
pool.map(db_access.insertData, data)
if __name__ == '__main__':
time_i = time.time()
mp = MultiprocessingTest()
mp.test_mp()
time_f = time.time()
print 'Time Taken: ', time_f - time_i
Here is db_access.py:
from pymongo import MongoClient
def insertData(form):
client = MongoClient()
db = client['TEST_001']
db.initialization.insert({
"form": form[0],
"form_number": form[1],
"client_id": form[2]
})
This is happening to your code because you are initiating MongoCLient() once for all the sub-processes. MongoClient is not fork safe. So, initiating inside each function works and let me know if there are other solutions.
(A same question is available in Stackoverflow. But that didn't help me because it used other function)
API Documentation
Hello, I am trying to implement Opensubtitle API with Python. I prefer trying to search subtitle file with hash, because it's accurate.
As I have never used xmlrpc before and quite new to using APIs, I had to study to make it work. But I am stuck at the final point. My program is returning Status 200 (OK), but the 'data' array is blank. I think, I am doing something wrong with the paramater passing thing. The code is here:
from xmlrpclib import ServerProxy
import hashCheck, os
server = 'http://api.opensubtitles.org/xml-rpc'
class MainEngine(object):
def __init__(self, language="None"):
self.rpc = ServerProxy(server, allow_none=True)
user_agent = 'OSTestUserAgentTemp'
self.Start()
def getToken(self):
self.logindata = self.rpc.LogIn(None, None, "en", "OSTestUserAgentTemp")
self.Token = self.logindata["token"]
return self.Token
def subSearch(self, path):
self.hash = hashCheck.hash(self.path)
token = self.getToken()
self.param = [
token, # token
[
'eng', # sublanguageid
self.hash, #hash
os.path.getsize(path), # byte size
]
]
Obj = self.rpc.SearchSubtitles(token, self.param)
print Obj
def Start(self):
# print server
self.path = "E:\Movies\English\Captain Phillips\Captain Phillips.mp4"
self.subSearch(self.path)
def main():
MainEngine()
if __name__ == '__main__':
main()
I'm currently doing some work with multithreading and i'm trying to figure out why my program isn't working as intended.
def input_watcher():
while True:
input_file = os.path.abspath(raw_input('Input file name: '))
compiler = raw_input('Choose compiler: ')
if os.path.isfile(input_file):
obj = FileObject(input_file, compiler)
with file_lock:
files.append(obj)
print 'Adding %s with %s as compiler' % (obj.file_name, obj.compiler)
else:
print 'File does not exists'
This is running in one thread and it works fine until i start adding adding the second fileobject.
This is the output from the console:
Input file name: C:\Users\Victor\Dropbox\Private\multiFile\main.py
Choose compiler: aImport
Adding main.py with aImport as compiler
Input file name: main.py updated
C:\Users\Victor\Dropbox\Private\multiFile\main.py
Choose compiler: Input file name: Input file name: Input file name: Input file name:
The input filename keeps popping up the second i added the second filename and it ask for a compiler. The program keeps printing input file name until it crashes.'
I have other code running in a different thread, i don't think it has anything to do with the error, but tell me if you think you need to see it and i will post it.
the full code:
import multiprocessing
import threading
import os
import time
file_lock = threading.Lock()
update_interval = 0.1
class FileMethods(object):
def a_import(self):
self.mod_check()
class FileObject(FileMethods):
def __init__(self, full_name, compiler):
self.full_name = os.path.abspath(full_name)
self.file_name = os.path.basename(self.full_name)
self.path_name = os.path.dirname(self.full_name)
name, exstention = os.path.splitext(full_name)
self.concat_name = name + '-concat' + exstention
self.compiler = compiler
self.compiler_methods = {'aImport': self.a_import}
self.last_updated = os.path.getatime(self.full_name)
self.subfiles = []
self.last_subfiles_mod = {}
def exists(self):
return os.path.isfile(self.full_name)
def mod_check(self):
if self.last_updated < os.path.getmtime(self.full_name):
self.last_updated = os.path.getmtime(self.full_name)
print '%s updated' % self.file_name
return True
else:
return False
def sub_mod_check(self):
for s in self.subfiles:
if self.last_subfiles_mod.get(s) < os.path.getmtime(s):
self.last_subfiles_mod[s] = os.path.getmtime(s)
return True
return False
files = []
def input_watcher():
while True:
input_file = os.path.abspath(raw_input('Input file name: '))
compiler = raw_input('Choose compiler: ')
if os.path.isfile(input_file):
obj = FileObject(input_file, compiler)
with file_lock:
files.append(obj)
print 'Adding %s with %s as compiler' % (obj.file_name, obj.compiler)
else:
print 'File does not exists'
def file_manipulation():
if __name__ == '__main__':
for f in files:
p = multiprocessing.Process(target=f.compiler_methods.get(f.compiler)())
p.start()
#f.compiler_methods.get(f.compiler)()
def file_watcher():
while True:
with file_lock:
file_manipulation()
time.sleep(update_interval)
iw = threading.Thread(target=input_watcher)
fw = threading.Thread(target=file_watcher)
iw.start()
fw.start()
This is happening because you're not using an if __name__ == "__main__": guard, while also using multiprocessing.Process on Windows. Windows needs to re-import your module in the child processes it spawns, which means it will keep creating new threads to handle inputs and watch files. This, of course, is a recipe for disaster. Do this to fix the issue:
if __name__ == "__main__":
iw = threading.Thread(target=input_watcher)
fw = threading.Thread(target=file_watcher)
iw.start()
fw.start()
See the "Safe importing of the main module" section in the multiprocessing docs for more info.
I also have a feeling file_watcher isn't really doing what you want it to (it will keep re-spawning processes for files you've already processed), but that's not really related to the original question.
In my project, I use the multiprocessing class in order to run tasks parallely. I want to use threading instead, as it has better performance (my tasks are TCP/IP bound, not CPU or I/O bound).
multiprocessing has wonderful functions, as Pool.imap_unordered and Pool.map_async, that does not exist in the threading class.
What is the right way to convert my code to use threading instead? The documentation introduces the multiprocessing.dummy class, that is a wrapper for the threading class. However that raises lots of errors (at least on python 2.7.3):
pool = multiprocessing.Pool(processes)
File "C:\python27\lib\multiprocessing\dummy\__init__.py", line 150, in Pool
return ThreadPool(processes, initializer, initargs)
File "C:\python27\lib\multiprocessing\pool.py", line 685, in __init__
Pool.__init__(self, processes, initializer, initargs)
File "C:\python27\lib\multiprocessing\pool.py", line 136, in __init__
self._repopulate_pool()
File "C:\python27\lib\multiprocessing\pool.py", line 199, in _repopulate_pool
w.start()
File "C:\python27\lib\multiprocessing\dummy\__init__.py", line 73, in start
self._parent._children[self] = None
AttributeError: '_DummyThread' object has no attribute '_children'
Edit: What actually happens is that I have a GUI that runs a different thread (to prevent the GUI from gettint stuck). That thread runs the specific search function that has the ThreadPool that fails.
Edit 2: The bugfix was fixed and will be included in future releases.
Great to see a crasher fixed!
import urllib2, htmllib, formatter
import multiprocessing.dummy as multiprocessing
import xml.dom.minidom
import os
import string, random
from urlparse import parse_qs, urlparse
from useful_util import retry
import config
from logger import log
class LinksExtractor(htmllib.HTMLParser):
def __init__(self, formatter):
htmllib.HTMLParser.__init__(self, formatter)
self.links = []
self.ignoredSites = config.WebParser_ignoredSites
def start_a(self, attrs):
for attr in attrs:
if attr[0] == "href" and attr[1].endswith(".mp3"):
if not filter(lambda x: (x in attr[1]), self.ignoredSites):
self.links.append(attr[1])
def get_links(self):
return self.links
def GetLinks(url, returnMetaUrlObj=False):
'''
Function gather links from a url.
#param url: Url Address.
#param returnMetaUrlObj: If true, returns a MetaUrl Object list.
Else, returns a string list. Default is False.
#return links: Look up.
'''
htmlparser = LinksExtractor(formatter.NullFormatter())
try:
data = urllib2.urlopen(url)
except (urllib2.HTTPError, urllib2.URLError) as e:
log.error(e)
return []
htmlparser.feed(data.read())
htmlparser.close()
links = list(set(htmlparser.get_links()))
if returnMetaUrlObj:
links = map(MetaUrl, links)
return links
def isAscii(s):
"Function checks is the string is ascii."
try:
s.decode('ascii')
except (UnicodeEncodeError, UnicodeDecodeError):
return False
return True
#retry(Exception, logger=log)
def parse(song, source):
'''
Function parses the source search page and returns the .mp3 links in it.
#param song: Search string.
#param source: Search website source. Value can be dilandau, mp3skull, youtube, seekasong.
#return links: .mp3 url links.
'''
source = source.lower()
if source == "dilandau":
return parse_dilandau(song)
elif source == "mp3skull":
return parse_Mp3skull(song)
elif source == "SeekASong":
return parse_SeekASong(song)
elif source == "youtube":
return parse_Youtube(song)
log.error('no source "%s". (from parse function in WebParser)')
return []
def parse_dilandau(song, pages=1):
"Function connects to Dilandau.eu and returns the .mp3 links in it"
if not isAscii(song): # Dilandau doesn't like unicode.
log.warning("Song is not ASCII. Skipping on dilandau")
return []
links = []
song = urllib2.quote(song.encode("utf8"))
for i in range(pages):
url = 'http://en.dilandau.eu/download_music/%s-%d.html' % (song.replace('-','').replace(' ','-').replace('--','-').lower(),i+1)
log.debug("[Dilandau] Parsing %s... " % url)
links.extend(GetLinks(url, returnMetaUrlObj=True))
log.debug("[Dilandau] found %d links" % len(links))
for metaUrl in links:
metaUrl.source = "Dilandau"
return links
def parse_Mp3skull(song, pages=1):
"Function connects to mp3skull.com and returns the .mp3 links in it"
links = []
song = urllib2.quote(song.encode("utf8"))
for i in range(pages):
# http://mp3skull.com/mp3/how_i_met_your_mother.html
url = 'http://mp3skull.com/mp3/%s.html' % (song.replace('-','').replace(' ','_').replace('__','_').lower())
log.debug("[Mp3skull] Parsing %s... " % url)
links.extend(GetLinks(url, returnMetaUrlObj=True))
log.debug("[Mp3skull] found %d links" % len(links))
for metaUrl in links:
metaUrl.source = "Mp3skull"
return links
def parse_SeekASong(song):
"Function connects to seekasong.com and returns the .mp3 links in it"
song = urllib2.quote(song.encode("utf8"))
url = 'http://www.seekasong.com/mp3/%s.html' % (song.replace('-','').replace(' ','_').replace('__','_').lower())
log.debug("[SeekASong] Parsing %s... " % url)
links = GetLinks(url, returnMetaUrlObj=True)
for metaUrl in links:
metaUrl.source = "SeekASong"
log.debug("[SeekASong] found %d links" % len(links))
return links
def parse_Youtube(song, amount=10):
'''
Function searches a song in youtube.com and returns the clips in it using Youtube API.
#param song: The search string.
#param amount: Amount of clips to obtain.
#return links: List of links.
'''
"Function connects to youtube.com and returns the .mp3 links in it"
song = urllib2.quote(song.encode("utf8"))
url = r"http://gdata.youtube.com/feeds/api/videos?q=%s&max-results=%d&v=2" % (song.replace(' ', '+'), amount)
urlObj = urllib2.urlopen(url, timeout=4)
data = urlObj.read()
videos = xml.dom.minidom.parseString(data).getElementsByTagName('feed')[0].getElementsByTagName('entry')
links = []
for video in videos:
youtube_watchurl = video.getElementsByTagName('link')[0].attributes.item(0).value
links.append(get_youtube_hightest_quality_link(youtube_watchurl))
return links
def get_youtube_hightest_quality_link(youtube_watchurl, priority=config.youtube_quality_priority):
'''
Function returns the highest quality link for a specific youtube clip.
#param youtube_watchurl: The Youtube Watch Url.
#param priority: A list represents the qualities priority.
#return MetaUrlObj: MetaUrl Object.
'''
video_id = parse_qs(urlparse(youtube_watchurl).query)['v'][0]
youtube_embedded_watchurl = "http://www.youtube.com/embed/%s?autoplay=1" % video_id
d = get_youtube_dl_links(video_id)
for x in priority:
if x in d.keys():
return MetaUrl(d[x][0], 'youtube', d['VideoName'], x, youtube_embedded_watchurl)
log.error("No Youtube link has been found in get_youtube_hightest_quality_link.")
return ""
#retry(Exception, logger=log)
def get_youtube_dl_links(video_id):
'''
Function gets the download links for a youtube clip.
This function parses the get_video_info format of youtube.
#param video_id: Youtube Video ID.
#return d: A dictonary of qualities as keys and urls as values.
'''
d = {}
url = r"http://www.youtube.com/get_video_info?video_id=%s&el=vevo" % video_id
urlObj = urllib2.urlopen(url, timeout=12)
data = urlObj.read()
data = urllib2.unquote(urllib2.unquote(urllib2.unquote(data)))
data = data.replace(',url', '\nurl')
data = data.split('\n')
for line in data:
if 'timedtext' in line or 'status=fail' in line or '<AdBreaks>' in line:
continue
try:
url = line.split('&quality=')[0].split('url=')[1]
quality = line.split('&quality=')[1].split('&')[0]
except:
continue
if quality in d:
d[quality].append(url)
else:
d[quality] = [url]
try:
videoName = "|".join(data).split('&title=')[1].split('&')[0]
except Exception, e:
log.error("Could not parse VideoName out of get_video_info (%s)" % str(e))
videoName = ""
videoName = unicode(videoName, 'utf-8')
d['VideoName'] = videoName.replace('+',' ').replace('--','-')
return d
class NextList(object):
"A list with a 'next' method."
def __init__(self, l):
self.l = l
self.next_index = 0
def next(self):
if self.next_index < len(self.l):
value = self.l[self.next_index]
self.next_index += 1
return value
else:
return None
def isEOF(self):
" Checks if the list has reached the end "
return (self.next_index >= len(self.l))
class MetaUrl(object):
"a url strecture data with many metadata"
def __init__(self, url, source="", videoName="", quality="", youtube_watchurl=""):
self.url = str(url)
self.source = source
self.videoName = videoName # Youtube Links Only
self.quality = quality # Youtube Links Onlys
self.youtube_watchurl = youtube_watchurl # Youtube Links Onlys
def __repr__(self):
return "<MetaUrl '%s' | %s>" % (self.url, self.source)
def search(song, n, processes=config.search_processes):
'''
Function searches song and returns n valid .mp3 links.
#param song: Search string.
#param n: Number of songs.
#param processes: Number of processes to launch in the subprocessing pool.
'''
linksFromSources = []
pool = multiprocessing.Pool(processes)
args = [(song, source) for source in config.search_sources]
imapObj = pool.imap_unordered(_parse_star, args)
for i in range(len(args)):
linksFromSources.append(NextList(imapObj.next(15)))
pool.terminate()
links = []
next_source = 0
while len(links) < n and not all(map(lambda x: x.isEOF(), linksFromSources)):
nextItem = linksFromSources[next_source].next()
if nextItem:
log.debug("added song %.80s from source ID %d (%s)" % (nextItem.url.split('/')[-1], next_source, nextItem.source))
links.append(nextItem)
if len(linksFromSources) == next_source+1:
next_source = 0
else:
next_source += 1
return links
def _parse_star(args):
return parse(*args)
I can't reproduce your problem on my machine. What's in your processes variable? Is it an int?
Python 2.7.3 (default, Apr 10 2012, 23:31:26) [MSC v.1500 32 bit (Intel)] on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import multiprocessing.dummy as multiprocessing
>>> pool = multiprocessing.Pool(5)
>>> pool
<multiprocessing.pool.ThreadPool object at 0x00C7DF90>
>>>
----Edit----
You probably also want to double check if you had messed up your standard library, try an clean install of python 2.7.3 in a different folder.
----Edit 2----
You can quickly patch it like this:
import multiprocessing.dummy
import weakref
import threading
class Worker(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
poll = multiprocessing.dummy.Pool(5)
print str(poll)
w = Worker()
w._children = weakref.WeakKeyDictionary()
w.start()