Python multithreading raw_input - python

I'm currently doing some work with multithreading and i'm trying to figure out why my program isn't working as intended.
def input_watcher():
while True:
input_file = os.path.abspath(raw_input('Input file name: '))
compiler = raw_input('Choose compiler: ')
if os.path.isfile(input_file):
obj = FileObject(input_file, compiler)
with file_lock:
files.append(obj)
print 'Adding %s with %s as compiler' % (obj.file_name, obj.compiler)
else:
print 'File does not exists'
This is running in one thread and it works fine until i start adding adding the second fileobject.
This is the output from the console:
Input file name: C:\Users\Victor\Dropbox\Private\multiFile\main.py
Choose compiler: aImport
Adding main.py with aImport as compiler
Input file name: main.py updated
C:\Users\Victor\Dropbox\Private\multiFile\main.py
Choose compiler: Input file name: Input file name: Input file name: Input file name:
The input filename keeps popping up the second i added the second filename and it ask for a compiler. The program keeps printing input file name until it crashes.'
I have other code running in a different thread, i don't think it has anything to do with the error, but tell me if you think you need to see it and i will post it.
the full code:
import multiprocessing
import threading
import os
import time
file_lock = threading.Lock()
update_interval = 0.1
class FileMethods(object):
def a_import(self):
self.mod_check()
class FileObject(FileMethods):
def __init__(self, full_name, compiler):
self.full_name = os.path.abspath(full_name)
self.file_name = os.path.basename(self.full_name)
self.path_name = os.path.dirname(self.full_name)
name, exstention = os.path.splitext(full_name)
self.concat_name = name + '-concat' + exstention
self.compiler = compiler
self.compiler_methods = {'aImport': self.a_import}
self.last_updated = os.path.getatime(self.full_name)
self.subfiles = []
self.last_subfiles_mod = {}
def exists(self):
return os.path.isfile(self.full_name)
def mod_check(self):
if self.last_updated < os.path.getmtime(self.full_name):
self.last_updated = os.path.getmtime(self.full_name)
print '%s updated' % self.file_name
return True
else:
return False
def sub_mod_check(self):
for s in self.subfiles:
if self.last_subfiles_mod.get(s) < os.path.getmtime(s):
self.last_subfiles_mod[s] = os.path.getmtime(s)
return True
return False
files = []
def input_watcher():
while True:
input_file = os.path.abspath(raw_input('Input file name: '))
compiler = raw_input('Choose compiler: ')
if os.path.isfile(input_file):
obj = FileObject(input_file, compiler)
with file_lock:
files.append(obj)
print 'Adding %s with %s as compiler' % (obj.file_name, obj.compiler)
else:
print 'File does not exists'
def file_manipulation():
if __name__ == '__main__':
for f in files:
p = multiprocessing.Process(target=f.compiler_methods.get(f.compiler)())
p.start()
#f.compiler_methods.get(f.compiler)()
def file_watcher():
while True:
with file_lock:
file_manipulation()
time.sleep(update_interval)
iw = threading.Thread(target=input_watcher)
fw = threading.Thread(target=file_watcher)
iw.start()
fw.start()

This is happening because you're not using an if __name__ == "__main__": guard, while also using multiprocessing.Process on Windows. Windows needs to re-import your module in the child processes it spawns, which means it will keep creating new threads to handle inputs and watch files. This, of course, is a recipe for disaster. Do this to fix the issue:
if __name__ == "__main__":
iw = threading.Thread(target=input_watcher)
fw = threading.Thread(target=file_watcher)
iw.start()
fw.start()
See the "Safe importing of the main module" section in the multiprocessing docs for more info.
I also have a feeling file_watcher isn't really doing what you want it to (it will keep re-spawning processes for files you've already processed), but that's not really related to the original question.

Related

Python OSError: exception: access violation reading 0x0000000000000000 when running external plugin

I have been using Python to run a piece of software via a plug-in which is implemented as a DLL. This is provided through the following wrapper class:
from ctypes import *
import os
ANULL = -999999
gwbdll = None
if os.name == 'posix':
gwbdll = cdll.LoadLibrary('libgwbplugin.so')
else:
gwbdll = cdll.LoadLibrary('gwbplugin')
class GWBplugin:
Name = "GWBplugin"
def __init__(self):
self.plugin = c_void_p (None)
def initialize (self,app_name,file_name=None,cmds=None):
return gwbdll.c_initialize(byref(self.plugin), c_char_p(app_name.encode()), file_name if file_name == None else c_char_p(file_name.encode()), cmds if cmds == None else c_char_p(cmds.encode()))
def exec_cmd (self,uline):
return gwbdll.c_exec_cmd(byref(self.plugin), c_char_p(uline.encode()))
def results (self, value, units=None, ix=0, jy=0):
type = c_void_p
type_result = gwbdll.c_results(byref(self.plugin),c_void_p(None),c_char_p(value.encode()),c_char_p("DATATYPE".encode()),c_int(ix),c_int(jy))
if type_result == 1:
type = c_int
elif type_result == 2:
type = c_double
elif type_result == 3:
type = c_char_p
else:
return []
count = gwbdll.c_results(byref(self.plugin),c_void_p(None),c_char_p(value.encode()),units if units == None else c_char_p(units.encode()),c_int(ix),c_int(jy))
arr = (type*count)()
gwbdll.c_results(byref(self.plugin),cast(arr,c_void_p),c_char_p(value.encode()),units if units == None else c_char_p(units.encode()),c_int(ix),c_int(jy))
if type == c_char_p:
arr = [x.decode('cp1252') for x in arr]
return arr
def destroy (self):
gwbdll.c_destroy(byref(self.plugin))
def __del__(self):
gwbdll.c_destroy(byref(self.plugin))
I am running my program on a Jupyter notebook with Python 3.8.8 with code that looks like:
myPlugin = GWBplugin()
myPlugin.initialize("react", f_out, f_in_flagged)
where "react" is the name of the specific application I am using in this software, f_out looks like 'Output Files/pH 9/Reduced/0.0% Reduced/Output_6.txt', and f_in_flagged looks like "-i 'Input Files/pH 9/Reduced/0.0% Reduced/Input_6.rea'".
This is contained in a loop that runs through many different input files, and was running just fine until a few days ago when I generated more input files (contained within some new subdirectories) to run, and now it spits out the following error:
OSError Traceback (most recent call last)
<ipython-input-6-fdf290a73be1> in <module>
24 # #Initialize: Application, Output file, Input file containing setup
---> 25 myPlugin.initialize("react", f_out, f_in_flagged)
26 #Run
27 myPlugin.exec_cmd("go")
C:\Program Files\Gwb\src\GWBplugin.py in initialize(self, app_name, file_name, cmds)
15
16 def initialize (self,app_name,file_name=None,cmds=None):
---> 17 return gwbdll.c_initialize(byref(self.plugin), c_char_p(app_name.encode()), file_name if file_name == None else c_char_p(file_name.encode()), cmds if cmds == None else c_char_p(cmds.encode()))
18
19 def exec_cmd (self,uline):
OSError: exception: access violation reading 0x0000000000000000
I am not really familiar with C or ctypes, but as far as I can tell this has something to do with the name of the input and output files I'm feeding it. I tried going back and running the files in the original directories that were working before (even tried completely uninstalling and reinstalling everything: the software, anaconda, and deleted all of the new files and directories) and am now getting the same error, so beyond that I am really at a loss as to what is going on. Any help is greatly appreciated!

Stop input after watchdog refresh

I have a problem with python input.
Im creating a python sneakers bot, I have a cli setup that when opens it shows you the amount of .txt files that are in the directory and then an input asking you to choose which ones you wanna use to start your task. [1]
i implemented watchdogs that look into my directory to see if file are added or modified, when files get modified watchdogs script refreshes the cli but the input the user was asked still active. I need to stop the input [1] after the screen get cleaned, how can I make this possible?
here is my code:
def proxieschoice():
import findfiles
findfiles.my_observer.start()
proxiesfile = 0
proxynamelist = {}
print('------------------------------')
for file in glob.glob("*.txt"):
proxiesfile = proxiesfile +1
with open(file) as f:
count = sum(1 for _ in f)
proxynamelist[proxiesfile] = file
print(f"[{Fore.BLUE}{proxiesfile}{Style.RESET_ALL}] {file} [{count} proxies]")
print('------------------------------')
try:
prox = int(input(f"{Fore.BLUE}>> {Style.RESET_ALL} Which proxies you want to use? "))
except ValueError:
print('Invalid Input')
proxieschoice()
here is findfiles.py
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
import converse
patterns = ["*"]
ignore_patterns = None
ignore_directories = False
case_sensitive = True
my_event_handler = PatternMatchingEventHandler(patterns, ignore_patterns, ignore_directories, case_sensitive)
def on_created(event):
converse.cleanscreen()
converse.proxieschoice()
def on_deleted(event):
converse.cleanscreen()
converse.proxieschoice()
def on_modified(event):
converse.cleanscreen()
converse.proxieschoice()
def on_moved(event):
converse.cleanscreen()
converse.proxieschoice()
my_event_handler.on_created = on_created
my_event_handler.on_deleted = on_deleted
my_event_handler.on_modified = on_modified
my_event_handler.on_moved = on_moved
path = "."
go_recursively = True
my_observer = Observer()
my_observer.schedule(my_event_handler, path, recursive=go_recursively)

Checking if a file exists in Python 2.7

I am using a simple python script to search and play songs on my laptop. The code goes as follows :-
import os
d_name = raw_input("enter drive name:-")
choice = raw_input("song or video(s/v):-")
if(choice == 's'):
s_name = raw_input("enter song name:- ")
flag = 1
elif(choice=='v'):
s_name = raw_input("enter video name:-")
flag = 2
if(flag == 1):
f_s_name = "start "+d_name+":/"+s_name+".mp3"
elif(flag == 2):
f_s_name = "start "+d_name+":/"+s_name+".mp4"
dir_list = os.listdir("d_name:/")
i=0
while(1):
if(not(os.system(f_s_name))):
break
else:
if(flag == 1):
f_s_name = "start "+d_name+":/"+dir_list[i]+"/"+s_name+".mp3"
elif(flag == 2):
f_s_name = "start "+d_name+":/"+dir_list[i]+"/"+s_name+".mp4"
i = i+1
the above program works fine but when one of the calls to the function os.system() fails until the required condition matches it pops out a dialog box claiming that the song is not there until it is found. How can i prevent popping up of that dialog box?
You'd use os.path.exists to test whether the file you're about to start actually exists; if it is not found, do not try to start that file:
import os
....
filename = '{}:/{}/{}.mp3'.format(d_name, dir_list[i], s_name)
if os.path.exists(filename):
system('start ' + filename)
else:
print "File {} was not found".format(filename)

How can I create task with SpiffWorkflow?

I could not understand how to use SpiffWorkflow workflow engine. What means creating a task exactly and how can I do that?
Also I could not find any example which uses SpiffWorkflow. Who uses this engine? How can I find usage examples?
I've found an example here.
import sys, os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../../lib'))
from SpiffWorkflow.specs import *
from SpiffWorkflow import Task, Workflow
from SpiffWorkflow.storage import XmlSerializer
def on_entered_cb(workflow, task, taken_path):
#print "entered:",task.get_name()
return True
def on_ready_cb(workflow, task, taken_path):
#print "ready:",task.get_name()
return True
def on_reached_cb(workflow, task, taken_path):
#print "reached:",task.get_name()
return True
def on_complete_cb(workflow, task, taken_path):
# Record the path.
print "complete:",task.get_name()
#print task.get_description()
indent = ' ' * (task._get_depth() - 1)
taken_path.append('%s%s' % (indent, task.get_name()))
return True
class QuestionError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
class QuestionWorkflow(object):
def __init__(self):
self.serializer = XmlSerializer()
def set_up(self,filename):
# Test patterns that are defined in XML format.
xml = open(filename).read()
self.wf_spec = WorkflowSpec.deserialize(XmlSerializer(), xml, filename = filename)
self.taken_path = self.track_workflow(self.wf_spec)
self.workflow = Workflow(self.wf_spec)
def run(self, UserSelection, restart=False):
if restart:
self.workflow = Workflow(self.wf_spec)
workflow = self.workflow
condition_keys = []
if UserSelection is None:
UserSelection = {}
task_data_dict = UserSelection.copy()
while not workflow.is_completed():
tasks = workflow.get_tasks(Task.READY)
for t in tasks:
print "Ready:", t.task_spec.name
if hasattr(t.task_spec, "cond_task_specs"):
for cond, name in t.task_spec.cond_task_specs:
for cond_unit in cond.args:
if hasattr(cond_unit, "name"):
condition_keys.append(cond_unit.name)
flag_keys_in_user_select = True
for cond_key in condition_keys:
if not task_data_dict.has_key(cond_key):
print cond_key
flag_keys_in_user_select = False
break
if not flag_keys_in_user_select:
# some tast's condition's key not in input userselect dict
return
for t in tasks:
t.set_data(**task_data_dict)
workflow.complete_next()
if not workflow.is_completed():
raise QuestionError('invalid feature[%s]' % filename)
def print_trace(self):
path = '\n'.join(self.taken_path) + '\n'
info = ""
info += 'the workflowrun path:\n'
info += '%s\n' % path
print info
def track_task(self, task_spec, taken_path):
#reached event call back
if task_spec.reached_event.is_connected(on_reached_cb):
task_spec.reached_event.disconnect(on_reached_cb)
task_spec.reached_event.connect(on_reached_cb, taken_path)
#completed event call back
if task_spec.completed_event.is_connected(on_complete_cb):
task_spec.completed_event.disconnect(on_complete_cb)
task_spec.completed_event.connect(on_complete_cb, taken_path)
#enter event call back
if task_spec.entered_event.is_connected(on_entered_cb):
task_spec.entered_event.disconnect(on_entered_cb)
task_spec.entered_event.connect(on_entered_cb, taken_path)
#ready event call back
if task_spec.ready_event.is_connected(on_ready_cb):
task_spec.ready_event.disconnect(on_ready_cb)
task_spec.ready_event.connect(on_ready_cb, taken_path)
def track_workflow(self, wf_spec, taken_path = None):
if taken_path is None:
taken_path = []
for name in wf_spec.task_specs:
#print "track_workflow:",name
self.track_task(wf_spec.task_specs[name], taken_path)
return taken_path
if __name__ == '__main__':
qw = QuestionWorkflow()
qw.set_up("./case.xml")
print "==========1st question=========="
user_selct = {'man':'1'}
qw.run(user_selct)
print "==========2nd question=========="
user_selct = {'man':'1', 'house': '2'}
qw.run(user_selct)
print "==========3rd question=========="
user_selct = {'man':'1', 'house': '2', 'why': 'because you are a hero'}
qw.run(user_selct)
'''
print "==========4th question========="
user_selct = {'man':'1', 'house': '2', 'role':'5'}
qw.run(user_selct)
'''
print "==========fix some question=========="
user_selct = {'man':'1', 'house': '1', 'role':'5'}
qw.run(user_selct,True)
print
We have developed Zengine extended version of SpiffWorkFlow with improved functionality. You can find it here, and you can see here to find out usage examples.
In short;
Zengine BPMN workflow based framework with Tornado, Rabbit AMQP, advanced permissions, extensible scaffolding features and more.
Built on top of following major components;
SpiffWorkflow: Powerful workflow engine with BPMN 2.0 support.
Tornado: Tornado is a Python web framework and asynchronous networking library.
Pyoko: Django esque ORM for Riak KV store.
RabbitMQ: Fast, ultrasharp AMQP server written with legendary Erlang lang.
perhaps take a look at this file will help
https://github.com/knipknap/SpiffWorkflow/blob/master/SpiffWorkflow/specs/WorkflowSpec.py
and these docs
https://github.com/knipknap/SpiffWorkflow/wiki

Successive multiprocessing

I am filtering huge text files using multiprocessing.py. The code basically opens the text files, works on it, then closes it.
Thing is, I'd like to be able to launch it successively on multiple text files. Hence, I tried to add a loop, but for some reason it doesn't work (while the code works on each file). I believe this is an issue with:
if __name__ == '__main__':
However, I am looking for something else. I tried to create a Launcher and a LauncherCount files like this:
LauncherCount.py:
def setLauncherCount(n):
global LauncherCount
LauncherCount = n
and,
Launcher.py:
import os
import LauncherCount
LauncherCount.setLauncherCount(0)
os.system("OrientedFilterNoLoop.py")
LauncherCount.setLauncherCount(1)
os.system("OrientedFilterNoLoop.py")
...
I import LauncherCount.py, and use LauncherCount.LauncherCount as my loop index.
Of course, this doesn't work too as it edits the variable LauncherCount.LauncherCount locally, so it won't be edited in the imported version of LauncherCount.
Is there any way to edit globally a variable in an imported file? Or, is there any way to do this in any other way? What I need is running a code multiple times, in changing one value, and without using any loop apparently.
Thanks!
Edit: Here is my main code if necessary. Sorry for the bad style ...
import multiprocessing
import config
import time
import LauncherCount
class Filter:
""" Filtering methods """
def __init__(self):
print("launching methods")
# Return the list: [Latitude,Longitude] (elements are floating point numbers)
def LatLong(self,line):
comaCount = []
comaCount.append(line.find(','))
comaCount.append(line.find(',',comaCount[0] + 1))
comaCount.append(line.find(',',comaCount[1] + 1))
Lat = line[comaCount[0] + 1 : comaCount[1]]
Long = line[comaCount[1] + 1 : comaCount[2]]
try:
return [float(Lat) , float(Long)]
except ValueError:
return [0,0]
# Return a boolean:
# - True if the Lat/Long is within the Lat/Long rectangle defined by:
# tupleFilter = (minLat,maxLat,minLong,maxLong)
# - False if not
def LatLongFilter(self,LatLongList , tupleFilter) :
if tupleFilter[0] <= LatLongList[0] <= tupleFilter[1] and
tupleFilter[2] <= LatLongList[1] <= tupleFilter[3]:
return True
else:
return False
def writeLine(self,key,line):
filterDico[key][1].write(line)
def filteringProcess(dico):
myFilter = Filter()
while True:
try:
currentLine = readFile.readline()
except ValueError:
break
if len(currentLine) ==0: # Breaks at the end of the file
break
if len(currentLine) < 35: # Deletes wrong lines (too short)
continue
LatLongList = myFilter.LatLong(currentLine)
for key in dico:
if myFilter.LatLongFilter(LatLongList,dico[key][0]):
myFilter.writeLine(key,currentLine)
###########################################################################
# Main
###########################################################################
# Open read files:
readFile = open(config.readFileList[LauncherCount.LauncherCount][1], 'r')
# Generate writing files:
pathDico = {}
filterDico = config.filterDico
# Create outputs
for key in filterDico:
output_Name = config.readFileList[LauncherCount.LauncherCount][0][:-4]
+ '_' + key +'.log'
pathDico[output_Name] = config.writingFolder + output_Name
filterDico[key] = [filterDico[key],open(pathDico[output_Name],'w')]
p = []
CPUCount = multiprocessing.cpu_count()
CPURange = range(CPUCount)
startingTime = time.localtime()
if __name__ == '__main__':
### Create and start processes:
for i in CPURange:
p.append(multiprocessing.Process(target = filteringProcess ,
args = (filterDico,)))
p[i].start()
### Kill processes:
while True:
if [p[i].is_alive() for i in CPURange] == [False for i in CPURange]:
readFile.close()
for key in config.filterDico:
config.filterDico[key][1].close()
print(key,"is Done!")
endTime = time.localtime()
break
print("Process started at:",startingTime)
print("And ended at:",endTime)
To process groups of files in sequence while working on files within a group in parallel:
#!/usr/bin/env python
from multiprocessing import Pool
def work_on(args):
"""Process a single file."""
i, filename = args
print("working on %s" % (filename,))
return i
def files():
"""Generate input filenames to work on."""
#NOTE: you could read the file list from a file, get it using glob.glob, etc
yield "inputfile1"
yield "inputfile2"
def process_files(pool, filenames):
"""Process filenames using pool of processes.
Wait for results.
"""
for result in pool.imap_unordered(work_on, enumerate(filenames)):
#NOTE: in general the files won't be processed in the original order
print(result)
def main():
p = Pool()
# to do "successive" multiprocessing
for filenames in [files(), ['other', 'bunch', 'of', 'files']]:
process_files(p, filenames)
if __name__=="__main__":
main()
Each process_file() is called in sequence after the previous one has been complete i.e., the files from different calls to process_files() are not processed in parallel.

Categories

Resources