Stop input after watchdog refresh - python

I have a problem with python input.
Im creating a python sneakers bot, I have a cli setup that when opens it shows you the amount of .txt files that are in the directory and then an input asking you to choose which ones you wanna use to start your task. [1]
i implemented watchdogs that look into my directory to see if file are added or modified, when files get modified watchdogs script refreshes the cli but the input the user was asked still active. I need to stop the input [1] after the screen get cleaned, how can I make this possible?
here is my code:
def proxieschoice():
import findfiles
findfiles.my_observer.start()
proxiesfile = 0
proxynamelist = {}
print('------------------------------')
for file in glob.glob("*.txt"):
proxiesfile = proxiesfile +1
with open(file) as f:
count = sum(1 for _ in f)
proxynamelist[proxiesfile] = file
print(f"[{Fore.BLUE}{proxiesfile}{Style.RESET_ALL}] {file} [{count} proxies]")
print('------------------------------')
try:
prox = int(input(f"{Fore.BLUE}>> {Style.RESET_ALL} Which proxies you want to use? "))
except ValueError:
print('Invalid Input')
proxieschoice()
here is findfiles.py
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
import converse
patterns = ["*"]
ignore_patterns = None
ignore_directories = False
case_sensitive = True
my_event_handler = PatternMatchingEventHandler(patterns, ignore_patterns, ignore_directories, case_sensitive)
def on_created(event):
converse.cleanscreen()
converse.proxieschoice()
def on_deleted(event):
converse.cleanscreen()
converse.proxieschoice()
def on_modified(event):
converse.cleanscreen()
converse.proxieschoice()
def on_moved(event):
converse.cleanscreen()
converse.proxieschoice()
my_event_handler.on_created = on_created
my_event_handler.on_deleted = on_deleted
my_event_handler.on_modified = on_modified
my_event_handler.on_moved = on_moved
path = "."
go_recursively = True
my_observer = Observer()
my_observer.schedule(my_event_handler, path, recursive=go_recursively)

Related

ValueError("empty pattern") in file monitor with Python

I monitor with python directory to get an alert when file created.
Sometimes I get an exception while file created in this folder:
File .....watchdog\utils\patterns.py , line 30 in <genexpr>
return (any(path(match(p) for p in include_patterns)
File"c:\python3\lib\pathlib.py", line 921, in match
raise ValueError("empty pattern")
ValueError: empty pattern
This is the code in python .
import time
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler
def on_created(event):
with open(event.src_path,'r') as f:
data=f.read()
print(data)
print(f"hey, {event.src_path} has been created!")
def on_deleted(event):
print(f"what the f**k! Someone deleted {event.src_path}!")
def on_modified(event):
print(f"hey buddy, {event.src_path} has been modified")
def on_moved(event):
print(f"ok ok ok, someone moved {event.src_path} to {event.dest_path}")
if __name__ == "__main__":
patterns = "*.csv"
ignore_patterns = ""
ignore_directories = False
case_sensitive = True
my_event_handler = PatternMatchingEventHandler(patterns, ignore_patterns, ignore_directories, case_sensitive)
my_event_handler.on_created = on_created
my_event_handler.on_deleted = on_deleted
my_event_handler.on_modified = on_modified
my_event_handler.on_moved = on_moved
path = "t/"
go_recursively = True
my_observer = Observer()
my_observer.schedule(my_event_handler, path, recursive=go_recursively)
my_observer.start()
time.sleep(10)
How do I fix the exception?
You need to pass in lists of patterns, not a single string. And None, not an empty string.
my_event_handler = PatternMatchingEventHandler(
patterns=["*"],
ignore_patterns=None, # pass in `None` in case of no ignores
ignore_directories=False,
case_sensitive=True,
)

Updating Appending List to a txt file

Hello currently i am studying python and i wanted to know on how you can have a list that is being appended if there is a change constantly to a txtfile. Wording is a hard here is the code anyways
list=[]
random_number=0
file_handler=open("history.txt","w")
file_handler.write(str(list))
lenght_cumulative_data=len(list)
confirmed.append(random_number)
Now what i want to accomplish is that the list variable of the number 0 would be shown in history.txt but that doesnt happen and lets just imagine that random_number is always changing I want the list variable to be able to always update itself. Like if let say random_number changes to 1 and then 2 I want list to be updated to [0,1,2]. How do you do that? I've been searching on youtube and all they gave me is this write function is there anyway someone could refrence it or have any ideas?
from os import stat
from _thread import start_new_thread
from time import sleep
List = []
class WatchFileForChanges:
def __init__(self, filename):
self.file = filename
self.cached_file = stat(self.file).st_mtime
def watch(self):
num = 0
while 1:
status = stat(self.file).st_mtime
if status != self.cached_file:
self.cached_file = status
#file changed
List.append(num)
num += 1
def main():
Watcher = WatchFileForChanges("file.txt")
start_new_thread(Watcher.watch, ())
while 1:
print(List)
sleep(1)
if __name__ == '__main__':
main()
This will do what you want.
If I understood you correctly, you want to append to the list every time a file changes.
Note: this answer will only work on Windows
changes.py:
# Adapted from http://timgolden.me.uk/python/win32_how_do_i/watch_directory_for_changes.html
import threading
import os
import win32file
import win32con
ACTIONS = {
1 : "Created",
2 : "Deleted",
3 : "Updated",
4 : "Renamed from something",
5 : "Renamed to something"
}
# Thanks to Claudio Grondi for the correct set of numbers
FILE_LIST_DIRECTORY = 0x0001
def monitor_changes(callback, path, filenames):
path = path or ""
if type(filenames) == "str":
filenames = (filenames,)
thread = threading.Thread(target=_monitor, args=(callback, path, filenames))
thread.start()
return thread
def _monitor(callback, path, filenames):
hDir = win32file.CreateFile (
path,
FILE_LIST_DIRECTORY,
win32con.FILE_SHARE_READ | win32con.FILE_SHARE_WRITE | win32con.FILE_SHARE_DELETE,
None,
win32con.OPEN_EXISTING,
win32con.FILE_FLAG_BACKUP_SEMANTICS,
None
)
while True:
#
# ReadDirectoryChangesW takes a previously-created
# handle to a directory, a buffer size for results,
# a flag to indicate whether to watch subtrees and
# a filter of what changes to notify.
#
# NB Tim Juchcinski reports that he needed to up
# the buffer size to be sure of picking up all
# events when a large number of files were
# deleted at once.
#
results = win32file.ReadDirectoryChangesW (
hDir,
1024,
True,
win32con.FILE_NOTIFY_CHANGE_LAST_WRITE,
None,
None
)
for action, file in results:
if filenames and file not in filenames and os.path.basename(file) not in filenames:
continue
callback(action, file)
if __name__ == '__main__':
# monitor by printing
t = monitor_changes(print, ".", None)
And in your main.py:
import changes
import os
my_list = []
def callback(action_id, filename):
# the function running means
# that the file has been modified
action_desc = changes.ACTIONS[action_id]
print(action_desc, filename)
with open(filename) as f:
my_list.append(f.read())
thread = changes.monitor_changes(callback, ".", "my_file_that_I_want_to_monitor.txt")
If you want to monitor all files in the directory, call monitor_changes with None as the third argument.
Note: this will monitor all subdirectories, so files with the same name but in different folders will trigger the callback. If you want to avoid this, then check the filename passed to your callback function is exactly what you want to monitor.

Python multithreading raw_input

I'm currently doing some work with multithreading and i'm trying to figure out why my program isn't working as intended.
def input_watcher():
while True:
input_file = os.path.abspath(raw_input('Input file name: '))
compiler = raw_input('Choose compiler: ')
if os.path.isfile(input_file):
obj = FileObject(input_file, compiler)
with file_lock:
files.append(obj)
print 'Adding %s with %s as compiler' % (obj.file_name, obj.compiler)
else:
print 'File does not exists'
This is running in one thread and it works fine until i start adding adding the second fileobject.
This is the output from the console:
Input file name: C:\Users\Victor\Dropbox\Private\multiFile\main.py
Choose compiler: aImport
Adding main.py with aImport as compiler
Input file name: main.py updated
C:\Users\Victor\Dropbox\Private\multiFile\main.py
Choose compiler: Input file name: Input file name: Input file name: Input file name:
The input filename keeps popping up the second i added the second filename and it ask for a compiler. The program keeps printing input file name until it crashes.'
I have other code running in a different thread, i don't think it has anything to do with the error, but tell me if you think you need to see it and i will post it.
the full code:
import multiprocessing
import threading
import os
import time
file_lock = threading.Lock()
update_interval = 0.1
class FileMethods(object):
def a_import(self):
self.mod_check()
class FileObject(FileMethods):
def __init__(self, full_name, compiler):
self.full_name = os.path.abspath(full_name)
self.file_name = os.path.basename(self.full_name)
self.path_name = os.path.dirname(self.full_name)
name, exstention = os.path.splitext(full_name)
self.concat_name = name + '-concat' + exstention
self.compiler = compiler
self.compiler_methods = {'aImport': self.a_import}
self.last_updated = os.path.getatime(self.full_name)
self.subfiles = []
self.last_subfiles_mod = {}
def exists(self):
return os.path.isfile(self.full_name)
def mod_check(self):
if self.last_updated < os.path.getmtime(self.full_name):
self.last_updated = os.path.getmtime(self.full_name)
print '%s updated' % self.file_name
return True
else:
return False
def sub_mod_check(self):
for s in self.subfiles:
if self.last_subfiles_mod.get(s) < os.path.getmtime(s):
self.last_subfiles_mod[s] = os.path.getmtime(s)
return True
return False
files = []
def input_watcher():
while True:
input_file = os.path.abspath(raw_input('Input file name: '))
compiler = raw_input('Choose compiler: ')
if os.path.isfile(input_file):
obj = FileObject(input_file, compiler)
with file_lock:
files.append(obj)
print 'Adding %s with %s as compiler' % (obj.file_name, obj.compiler)
else:
print 'File does not exists'
def file_manipulation():
if __name__ == '__main__':
for f in files:
p = multiprocessing.Process(target=f.compiler_methods.get(f.compiler)())
p.start()
#f.compiler_methods.get(f.compiler)()
def file_watcher():
while True:
with file_lock:
file_manipulation()
time.sleep(update_interval)
iw = threading.Thread(target=input_watcher)
fw = threading.Thread(target=file_watcher)
iw.start()
fw.start()
This is happening because you're not using an if __name__ == "__main__": guard, while also using multiprocessing.Process on Windows. Windows needs to re-import your module in the child processes it spawns, which means it will keep creating new threads to handle inputs and watch files. This, of course, is a recipe for disaster. Do this to fix the issue:
if __name__ == "__main__":
iw = threading.Thread(target=input_watcher)
fw = threading.Thread(target=file_watcher)
iw.start()
fw.start()
See the "Safe importing of the main module" section in the multiprocessing docs for more info.
I also have a feeling file_watcher isn't really doing what you want it to (it will keep re-spawning processes for files you've already processed), but that's not really related to the original question.

Automatic background changer using Python 2.7.3 not working, though it should

I'm very new to Ubuntu/Python/Bash/Gnome in general, so I still feel like there's a chance I'm doing something wrong, but it's been 3 days now without success...
Here's what the script is supposed to do:
* [✓] Download 1 random image from wallbase.cc
* [✓] Save it to the same directory that the script is running from
* [x] Set it as the wallpaper
There are two attempts made to set the wallpaper two using different commands and NEITHER work when in the script. There is a print statement (2nd line from the bottom) that spits out the correct terminal command because I can C&P the print result and it works fine, it just doesn't work when it's executed in the script.
#!/usr/bin/env python
import urllib2
import os
from gi.repository import Gio
response = urllib2.urlopen("http://wallbase.cc/random/12/eqeq/1366x768/0.000/100/32")
page_source = response.read()
thlink_pos = page_source.find("ico-X")
address_start = (page_source.find("href=\"", thlink_pos) + 6)
address_end = page_source.find("\"", address_start + 1)
response = urllib2.urlopen(page_source[address_start:address_end])
page_source = response.read()
bigwall_pos = page_source.find("bigwall")
address_start = (page_source.find("src=\"", bigwall_pos) + 5)
address_end = page_source.find("\"", address_start + 1)
address = page_source[address_start:address_end]
slash_pos = address.rfind("/") + 1
pic_name = address[slash_pos:]
bashCommand = "wget " + page_source[address_start:address_end]
os.system(bashCommand)
print "Does my new image exists?", os.path.exists(os.getcwd() + "/" + pic_name)
#attempt 1
settings = Gio.Settings.new("org.gnome.desktop.background")
settings.set_string("picture-uri", "file://" + os.getcwd() + "/" + pic_name)
settings.apply()
#attempt 2
bashCommand = "gsettings set org.gnome.desktop.background picture-uri file://" + os.getcwd() + "/" + pic_name
print bashCommand
os.system(bashCommand)
settings.apply()
You've successfully changed your settings, but they're still left unapplied, try next:
settings.apply()
after setting "picture-uri" string.
It works for me (Ubuntu 12.04).
I've modified your script (unrelated to your error):
#!/usr/bin/python
"""Set desktop background using random images from http://wallbase.cc
It uses `gi.repository.Gio.Settings` to set the background.
"""
import functools
import itertools
import logging
import os
import posixpath
import random
import re
import sys
import time
import urllib
import urllib2
import urlparse
from collections import namedtuple
from bs4 import BeautifulSoup # $ sudo apt-get install python-bs4
from gi.repository.Gio import Settings # pylint: disable=F0401,E0611
DEFAULT_IMAGE_DIR = os.path.expanduser('~/Pictures/backgrounds')
HTMLPAGE_SIZE_MAX = 1 << 20 # bytes
TIMEOUT_MIN = 300 # seconds
TIMEOUT_DELTA = 30 # jitter
# "Anime/Manga", "Wallpapers/General", "High Resolution Images"
CATEGORY_W, CATEGORY_WG, CATEGORY_HR = range(1, 4)
PURITY_SFW, PURITY_SKETCHY, PURITY_NSFW, PURITY_DEFAULT = 4, 2, 1, 0
DAY_IN_SECONDS = 86400
UrlRetreiveResult = namedtuple('UrlRetreiveResult', "path headers")
def set_background(image_path, check_exist=True):
"""Change desktop background to image pointed by `image_path`.
"""
if check_exist: # make sure we can read it (at this time)
with open(image_path, 'rb') as f:
f.read(1)
# prepare uri
path = os.path.abspath(image_path)
if isinstance(path, unicode): # quote() doesn't like unicode
path = path.encode('utf-8')
uri = 'file://' + urllib.quote(path)
# change background
bg_setting = Settings.new('org.gnome.desktop.background')
bg_setting.set_string('picture-uri', uri)
bg_setting.apply()
def url2filename(url):
"""Return basename corresponding to url.
>>> url2filename('http://example.com/path/to/file?opt=1')
'file'
"""
urlpath = urlparse.urlsplit(url).path # pylint: disable=E1103
basename = posixpath.basename(urllib.unquote(urlpath))
if os.path.basename(basename) != basename:
raise ValueError # refuse 'dir%5Cbasename.ext' on Windows
return basename
def download(url, dirpath, extensions=True, filename=None):
"""Download url to dirpath.
Use basename of the url path as a filename.
Create destination directory if necessary.
Use `extensions` to require the file to have an extension or any
of in a given sequence of extensions.
Return (path, headers) on success.
Don't retrieve url if path exists (headers are None in this case).
"""
if not os.path.isdir(dirpath):
os.makedirs(dirpath)
logging.info('created directory %s', dirpath)
# get filename from the url
filename = url2filename(url) if filename is None else filename
if os.path.basename(filename) != filename:
logging.critical('filename must not have path separator in it "%s"',
filename)
return
if extensions:
# require the file to have an extension
root, ext = os.path.splitext(filename)
if root and len(ext) > 1:
# require the extension to be in the list
try:
it = iter(extensions)
except TypeError:
pass
else:
if ext not in it:
logging.warn(("file extension is not in the list"
" url=%s"
" extensions=%s"),
url, extensions)
return
else:
logging.warn("file has no extension url=%s", url)
return
# download file
path = os.path.join(dirpath, filename)
logging.info("%s\n%s", url, path)
if os.path.exists(path): # don't retrieve if path exists
logging.info('path exists')
return UrlRetreiveResult(path, None)
try:
return UrlRetreiveResult(*urllib.urlretrieve(url, path,
_print_download_status))
except IOError:
logging.warn('failed to download {url} -> {path}'.format(
url=url, path=path))
def _print_download_status(block_count, block_size, total_size):
logging.debug('%10s bytes of %s', block_count * block_size, total_size)
def min_time_between_calls(min_delay):
"""Enforce minimum time delay between calls."""
def decorator(func):
lastcall = [None] # emulate nonlocal keyword
#functools.wraps(func)
def wrapper(*args, **kwargs):
if lastcall[0] is not None:
delay = time.time() - lastcall[0]
if delay < min_delay:
_sleep(min_delay - delay)
lastcall[0] = time.time()
return func(*args, **kwargs)
return wrapper
return decorator
#min_time_between_calls(5)
def _makesoup(url):
try:
logging.info(vars(url) if isinstance(url, urllib2.Request) else url)
page = urllib2.urlopen(url)
soup = BeautifulSoup(page.read(HTMLPAGE_SIZE_MAX))
return soup
except (IOError, OSError) as e:
logging.warn('failed to return soup for %s, error: %s',
getattr(url, 'get_full_url', lambda: url)(), e)
class WallbaseImages:
"""Given parameters it provides image urls to download."""
def __init__(self,
categories=None, # default; sequence of CATEGORY_*
resolution_exactly=True, # False means 'at least'
resolution=None, # all; (width, height)
aspect_ratios=None, # all; sequence eg, [(5,4),(16,9)]
purity=PURITY_DEFAULT, # combine with |
thumbs_per_page=None, # default; an integer
):
"""See usage below."""
self.categories = categories
self.resolution_exactly = resolution_exactly
self.resolution = resolution
self.aspect_ratios = aspect_ratios
self.purity = purity
self.thumbs_per_page = thumbs_per_page
def _as_request(self):
"""Create a urllib2.Request() using given parameters."""
# make url
if self.categories is not None:
categories = "".join(str(n) for n in (2, 1, 3)
if n in self.categories)
else: # default
categories = "0"
if self.resolution_exactly:
at_least_or_exactly_resolution = "eqeq"
else:
at_least_or_exactly_resolution = "gteq"
if self.resolution is not None:
resolution = "{width:d}x{height:d}".format(
width=self.resolution[0], height=self.resolution[1])
else:
resolution = "0x0"
if self.aspect_ratios is not None:
aspect_ratios = "+".join("%.2f" % (w / float(h),)
for w, h in self.aspect_ratios)
else: # default
aspect_ratios = "0"
purity = "{0:03b}".format(self.purity)
thumbs = 20 if self.thumbs_per_page is None else self.thumbs_per_page
url = ("http://wallbase.cc/random/"
"{categories}/"
"{at_least_or_exactly_resolution}/{resolution}/"
"{aspect_ratios}/"
"{purity}/{thumbs:d}").format(**locals())
logging.info(url)
# make post data
data = urllib.urlencode(dict(query='', board=categories, nsfw=purity,
res=resolution,
res_opt=at_least_or_exactly_resolution,
aspect=aspect_ratios,
thpp=thumbs))
req = urllib2.Request(url, data)
return req
def __iter__(self):
"""Yield background image urls."""
# find links to bigwall pages
# css-like: #thumbs div[class="thumb"] \
# a[class~="thlink" and href^="http://"]
soup = _makesoup(self._as_request())
if not soup:
logging.warn("can't retrieve the main page")
return
thumbs_soup = soup.find(id="thumbs")
for thumb in thumbs_soup.find_all('div', {'class': "thumb"}):
bigwall_a = thumb.find('a', {'class': "thlink",
'href': re.compile(r"^http://")})
if bigwall_a is None:
logging.warn("can't find thlink link")
continue # try the next thumb
# find image url on the bigwall page
# css-like: #bigwall > img[alt and src^="http://"]
bigwall_soup = _makesoup(bigwall_a['href'])
if bigwall_soup is not None:
bigwall = bigwall_soup.find(id='bigwall')
if bigwall is not None:
img = bigwall.find('img',
src=re.compile(r"(?i)^http://.*\.jpg$"),
alt=True)
if img is not None:
url = img['src']
filename = url2filename(url)
if filename.lower().endswith('.jpg'):
yield url, filename # successfully found image url
else:
logging.warn('suspicious url "%s"', url)
continue
logging.warn("can't parse bigwall page")
def main():
level = logging.INFO
if '-d' in sys.argv:
sys.argv.remove('-d')
level = logging.DEBUG
# configure logging
logging.basicConfig(format='%(levelname)s: %(asctime)s %(message)s',
level=level, datefmt='%Y-%m-%d %H:%M:%S %Z')
if len(sys.argv) > 1:
backgrounds_dir = sys.argv[1]
else:
backgrounds_dir = DEFAULT_IMAGE_DIR
# infinite loop: Press Ctrl+C to interrupt it
#NOTE: here's some arbitrary logic: modify for you needs e.g., break
# after the first image found
timeout = TIMEOUT_MIN # seconds
for i in itertools.cycle(xrange(timeout, DAY_IN_SECONDS)):
found = False
try:
for url, filename in WallbaseImages(
categories=[CATEGORY_WG, CATEGORY_HR, CATEGORY_W],
purity=PURITY_SFW,
thumbs_per_page=60):
res = download(url, backgrounds_dir, extensions=('.jpg',),
filename=filename)
if res and res.path:
found = True
set_background(res.path)
# don't hammer the site
timeout = max(TIMEOUT_MIN, i % DAY_IN_SECONDS)
_sleep(random.randint(timeout, timeout + TIMEOUT_DELTA))
except Exception: # pylint: disable=W0703
logging.exception('unexpected error')
_sleep(timeout)
else:
if not found:
logging.error('failed to retrieve any images')
_sleep(timeout)
timeout = (timeout * 2) % DAY_IN_SECONDS
def _sleep(timeout):
"""Add logging to time.sleep() call."""
logging.debug('sleep for %s seconds', timeout)
time.sleep(timeout)
main()
Tried to implement a python script that used the PIL library to write text on an image then update the Gnome background "picture-uri" to point to that image using the Gio class. The python script would ping pong between two images to always modify the one not in use and then attempt to "switch" by updating the Settings. Did this to avoid any flicker as modifying the current background directly drops it out temporarily. While in the shell and calling the script directly I rarely saw any issue, but in the cronjob it simply wouldn't update on the pong. I used both sync and apply and would wait several minutes before trying to switch the images. Didn't work. Tried cron as user (su -c "cmd" user) and that didn't work either.
Finally gave up on the ping pong approach when I noticed that Gnome will detect any change in the background file and update. So dropped the ping pong method and went to a temp file that I just copy over the current background using the shutil library. Works like a charm.

Successive multiprocessing

I am filtering huge text files using multiprocessing.py. The code basically opens the text files, works on it, then closes it.
Thing is, I'd like to be able to launch it successively on multiple text files. Hence, I tried to add a loop, but for some reason it doesn't work (while the code works on each file). I believe this is an issue with:
if __name__ == '__main__':
However, I am looking for something else. I tried to create a Launcher and a LauncherCount files like this:
LauncherCount.py:
def setLauncherCount(n):
global LauncherCount
LauncherCount = n
and,
Launcher.py:
import os
import LauncherCount
LauncherCount.setLauncherCount(0)
os.system("OrientedFilterNoLoop.py")
LauncherCount.setLauncherCount(1)
os.system("OrientedFilterNoLoop.py")
...
I import LauncherCount.py, and use LauncherCount.LauncherCount as my loop index.
Of course, this doesn't work too as it edits the variable LauncherCount.LauncherCount locally, so it won't be edited in the imported version of LauncherCount.
Is there any way to edit globally a variable in an imported file? Or, is there any way to do this in any other way? What I need is running a code multiple times, in changing one value, and without using any loop apparently.
Thanks!
Edit: Here is my main code if necessary. Sorry for the bad style ...
import multiprocessing
import config
import time
import LauncherCount
class Filter:
""" Filtering methods """
def __init__(self):
print("launching methods")
# Return the list: [Latitude,Longitude] (elements are floating point numbers)
def LatLong(self,line):
comaCount = []
comaCount.append(line.find(','))
comaCount.append(line.find(',',comaCount[0] + 1))
comaCount.append(line.find(',',comaCount[1] + 1))
Lat = line[comaCount[0] + 1 : comaCount[1]]
Long = line[comaCount[1] + 1 : comaCount[2]]
try:
return [float(Lat) , float(Long)]
except ValueError:
return [0,0]
# Return a boolean:
# - True if the Lat/Long is within the Lat/Long rectangle defined by:
# tupleFilter = (minLat,maxLat,minLong,maxLong)
# - False if not
def LatLongFilter(self,LatLongList , tupleFilter) :
if tupleFilter[0] <= LatLongList[0] <= tupleFilter[1] and
tupleFilter[2] <= LatLongList[1] <= tupleFilter[3]:
return True
else:
return False
def writeLine(self,key,line):
filterDico[key][1].write(line)
def filteringProcess(dico):
myFilter = Filter()
while True:
try:
currentLine = readFile.readline()
except ValueError:
break
if len(currentLine) ==0: # Breaks at the end of the file
break
if len(currentLine) < 35: # Deletes wrong lines (too short)
continue
LatLongList = myFilter.LatLong(currentLine)
for key in dico:
if myFilter.LatLongFilter(LatLongList,dico[key][0]):
myFilter.writeLine(key,currentLine)
###########################################################################
# Main
###########################################################################
# Open read files:
readFile = open(config.readFileList[LauncherCount.LauncherCount][1], 'r')
# Generate writing files:
pathDico = {}
filterDico = config.filterDico
# Create outputs
for key in filterDico:
output_Name = config.readFileList[LauncherCount.LauncherCount][0][:-4]
+ '_' + key +'.log'
pathDico[output_Name] = config.writingFolder + output_Name
filterDico[key] = [filterDico[key],open(pathDico[output_Name],'w')]
p = []
CPUCount = multiprocessing.cpu_count()
CPURange = range(CPUCount)
startingTime = time.localtime()
if __name__ == '__main__':
### Create and start processes:
for i in CPURange:
p.append(multiprocessing.Process(target = filteringProcess ,
args = (filterDico,)))
p[i].start()
### Kill processes:
while True:
if [p[i].is_alive() for i in CPURange] == [False for i in CPURange]:
readFile.close()
for key in config.filterDico:
config.filterDico[key][1].close()
print(key,"is Done!")
endTime = time.localtime()
break
print("Process started at:",startingTime)
print("And ended at:",endTime)
To process groups of files in sequence while working on files within a group in parallel:
#!/usr/bin/env python
from multiprocessing import Pool
def work_on(args):
"""Process a single file."""
i, filename = args
print("working on %s" % (filename,))
return i
def files():
"""Generate input filenames to work on."""
#NOTE: you could read the file list from a file, get it using glob.glob, etc
yield "inputfile1"
yield "inputfile2"
def process_files(pool, filenames):
"""Process filenames using pool of processes.
Wait for results.
"""
for result in pool.imap_unordered(work_on, enumerate(filenames)):
#NOTE: in general the files won't be processed in the original order
print(result)
def main():
p = Pool()
# to do "successive" multiprocessing
for filenames in [files(), ['other', 'bunch', 'of', 'files']]:
process_files(p, filenames)
if __name__=="__main__":
main()
Each process_file() is called in sequence after the previous one has been complete i.e., the files from different calls to process_files() are not processed in parallel.

Categories

Resources