Copy parameters into list - python

I am trying to copy parameters passed into a python script to a file. Here is the parameters.
["0013","1","1","\"john.dow#gmail.com\"","1","P123-ND 10Q","10Q H??C"]
I understand that there is a buffer problem and I am getting bad data into my parameters. However, I do not have control over what is being passed in. I am trying to copy, starting at the 5th parameter, the parameters into a file.
f = open(in_file_name, 'w')
for x in range(5, len(arg_list)):
f.write(arg_list[x] + '\n')
f.close()
The result of the file is below:
P123-ND 10Q
10Q H??C
Here is what it should be:
P123-ND
10Q
How can I not include the bad data? What is happening to the spaces between the valid information and the bad information?
As requested, here is the full program:
#!/bin/python
class Argument_Indices:
PRINTER_INDEX = 0
AREA_INDEX = 1
LABEL_INDEX = 2
EMAIL_INDEX = 3
RUN_TYPE_INDEX = 4
import argparse
import json
import os
from subprocess import call
import sys
from time import strftime
def _handle_args():
''' Setup and run argpars '''
parser = argparse.ArgumentParser(description='Set environment variables for and to call Program')
parser.add_argument('time_to_run', default='NOW', choices=['NOW', 'EOP'], help='when to run the report')
parser.add_argument('arguments', nargs='+', help='the remaining command line arguments')
return parser.parse_args()
def _proces_program(arg_list):
time_stamp = strftime("%d_%b_%Y_%H_%M_%S")
printer = arg_list[Argument_Indices.PRINTER_INDEX]
area = arg_list[Argument_Indices.AREA_INDEX]
label = arg_list[Argument_Indices.LABEL_INDEX]
in_file_name = "/tmp/program{0}.inp".format(time_stamp)
os.environ['INPUT_FILE'] = in_file_name
f = open(in_file_name, 'w')
for x in range(5, len(arg_list)):
f.write(arg_list[x])
f.close()
call(['./Program.bin', printer, area, label])
os.remove(in_file_name)
def main():
''' Main Function '''
arg_list = None
args = _handle_args()
if len(args.arguments) < 1:
print('Missing name of input file')
return -1
with open(args.arguments[0]) as input_file:
arg_list = json.load(input_file)
_process_program(arg_list)
return 0
if __name__ == '__main__':
if main() != 0:
print('Program run failed')
sys.exit()

For your exact case (where you're getting duplicated parameters received with some spaces in between) this would work:
received_param_list = ["0013","1","1","\"john.dow#gmail.com\"","1","P123-ND 10Q","10Q H??C"]
arg_list = [i.split(" ")[0] for i in received_param_list]
last_param = received_param_list[-1].split()[-1]
if last_param != arg_list[-1]:
arg_list.append(last_param)
for x in range(5, len(arg_list)):
print (arg_list[x])
Although there might be another simpler way

Related

Python Script Output Changes depending on Computer I am using

I am encountering some weird problem where the same script is outputting a different result. Same python version, same libraries.
Lat=((INPUT_FILE[location[i]+OFFSET+4])<<24|
(INPUT_FILE[location[i]+OFFSET+5])<<16|
(INPUT_FILE[location[i]+OFFSET+6])<<8|
(INPUT_FILE[location[i]+OFFSET+7])<<0)/11930464.71
Long=((INPUT_FILE[location[i]+OFFSET+0])<24|
(INPUT_FILE[location[i]+OFFSET+1])<16|
(INPUT_FILE[location[i]+OFFSET+2])<<8|
(INPUT_FILE[location[i]+OFFSET+3])<<0)/11930464.71
print(Long)
Basically all I am doing is reading bytes from a file and converting them into a coordinate using math. On the Windows computer it is interpreted correctly as a twos complement negative number while on the Linux computer it outputs the value as if it was just an unsigned number. Seems like a Q word D word problem.
import argparse
import os
import stat
import decimal
import sys
import struct
import numpy as np
import re
import binascii
in_file = None
log_file = None
logging = False
FS_MAP = None
PAGE_SIZE = 0x20000
count=0
INPUT_FILE=[]
DirectoryList=""
LIMIT=0
def PreviousDestinatioins ():
print("SCANNING for the Previous Destinations Stored in the pers_NaviControllerLastDestinationsList")
#So lets deal with the first format found in the logs
RegEx = re.compile(binascii.unhexlify("00000000")+b"[\x05-\x0f]"+binascii.unhexlify("00010102"))
location = [m.start(0) for m in RegEx.finditer(INPUT_FILE)]
ARRAY=[]
f.write("<Folder><name>Previous Destinations</name>\n")
for i in range(0,len(location)):
OFFSET=9
Tableoffset=1
print(" ")
STATE=""
CITY=""
ZIPCODE=""
ROAD=""
StreetNumber=""
BusinessName=""
while(Tableoffset!=0x7c12):
TEXT=""
while INPUT_FILE[location[i]+OFFSET]>31:
TEXT=TEXT+chr(INPUT_FILE[location[i]+OFFSET])
OFFSET=OFFSET+1
if Tableoffset==1:
STATE=TEXT
if Tableoffset==0x20215:
OFFSET=OFFSET+20
if Tableoffset==0x201:
CITY=TEXT
if Tableoffset==0x601:
ZIPCODE=TEXT
if Tableoffset==0x301:
ROAD=TEXT
if Tableoffset==0x501:
StreetNumber=TEXT
if Tableoffset==0x11501:
BusinessName=TEXT
Tableoffset=INPUT_FILE[location[i]+OFFSET]<<24|INPUT_FILE[location[i]+OFFSET+1]<<16|INPUT_FILE[location[i]+OFFSET+2]<<8|INPUT_FILE[location[i]+OFFSET+3]
OFFSET=OFFSET+5
address=BusinessName+" "+StreetNumber+" "+ROAD+" "+CITY+" "+STATE+", "+ZIPCODE
print(address)
OFFSET=OFFSET-1
Lat=((INPUT_FILE[location[i]+OFFSET+4])<<24|(INPUT_FILE[location[i]+OFFSET+5])<<16|(INPUT_FILE[location[i]+OFFSET+6])<<8|(INPUT_FILE[location[i]+OFFSET+7])<<0)/11930464.71
Long=((INPUT_FILE[location[i]+OFFSET+0])<<24|(INPUT_FILE[location[i]+OFFSET+1])<<16|(INPUT_FILE[location[i]+OFFSET+2])<<8|(INPUT_FILE[location[i]+OFFSET+3])<<0)/11930464.71
print(str(((INPUT_FILE[location[i]+OFFSET+0])<<24|(INPUT_FILE[location[i]+OFFSET+1])<<16|(INPUT_FILE[location[i]+OFFSET+2])<<8|(INPUT_FILE[location[i]+OFFSET+3])<<0)/11930464.71))
if ((25 <= Lat <= 50) and (-125 <= Long <= -65)):
print("Lat %f"%Lat+" Long %f"%Long)
f.write("<Placemark><Style><IconStyle><scale>2.0</scale><Icon><href>http://maps.google.com/mapfiles/kml/paddle/ylw-blank.png</href></Icon></IconStyle><LabelStyle><scale>.5</scale></LabelStyle></Style><name><![CDATA["+address+"]]></name><Point><altitudeMode>clampToGround </altitudeMode><extrude>0</extrude><coordinates>"+str(Long)+","+str(Lat)+",0</coordinates></Point></Placemark>\n")
f.write("</Folder>\n")
return
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='')
parser.add_argument('-i', '--input', help='Process this input file.', dest='in_file', action='store')
opts = parser.parse_args()
if opts.in_file is None:
print("No input file.")
parser.print_help()
exit(-1)
INPUT_FILE = np.memmap(opts.in_file, mode='r')
FILESIZE=len(INPUT_FILE)
resultstable=[]
f=open("RecoveredGPSData.kml","w")
f.write("<kml xmlns=\"http://www.opengis.net/kml/2.2\" xmlns:gx=\"http://www.google.com/kml/ext/2.2\"> <Document><name>Recovered GPS Data</name>\n")
print("Filesize is %d"%FILESIZE)
print("Note that this script needs to be modified if looking for coordinates outside of the contential US\n\n\n")
print("This script can take up to five minutes.")
PreviousDestinatioins ()
f.write("</Document></kml>")

How should I Execute this Python Script in powershell

I've solved the problem. The problem is related my %PATH%
I have a script which work with an argument. In powershell I've tried the command you can see below;
.\dsrf2csv.py C:\Python27\a\DSR_testdata.tsv.gz
And also you can see the script below,
def __init__(self, dsrf2csv_arg):
self.dsrf_filename = dsrf2csv_arg
dsrf_path, filename = os.path.split(self.dsrf_filename)
self.report_outfilename = os.path.join(dsrf_path, filename.replace('DSR', 'Report').replace('tsv', 'csv'))
self.summary_outfilename = os.path.join(dsrf_path, filename.replace('DSR', 'Summary').replace('tsv.gz', 'csv'))
But when I try to run this script there is no any action. How should I run this script with a file? (example : testdata.tsv.gz)
Note : Script and file in same location.
Full Scritp;
import argparse
import atexit
import collections
import csv
import gzip
import os
SKIP_ROWS = ['HEAD', '#HEAD', '#SY02', '#SY03', '#AS01', '#MW01', '#RU01',
'#SU03', '#LI01', '#FOOT']
REPORT_HEAD = ['Asset_ID', 'Asset_Title', 'Asset_Artist', 'Asset_ISRC',
'MW_Asset_ID', 'MW_Title', 'MW_ISWC', 'MW_Custom_ID',
'MW_Writers', 'Views', 'Owner_name', 'Ownership_Claim',
'Gross_Revenue', 'Amount_Payable', 'Video_IDs', 'Video_views']
SUMMARY_HEAD = ['SummaryRecordId', 'DistributionChannel',
'DistributionChannelDPID', 'CommercialModel', 'UseType',
'Territory', 'ServiceDescription', 'Usages', 'Users',
'Currency', 'NetRevenue', 'RightsController',
'RightsControllerPartyId', 'AllocatedUsages', 'AmountPayable',
'AllocatedNetRevenue']
class DsrfConverter(object):
"""Converts DSRF 3.0 to YouTube CSV."""
def __init__(self, dsrf2csv_arg):
""" Creating output file names """
self.dsrf_filename = dsrf2csv_arg
dsrf_path, filename = os.path.split(self.dsrf_filename)
print(dsrf_filename)
input("Press Enter to continue...")
self.report_outfilename = os.path.join(dsrf_path, filename.replace(
'DSR', 'Report').replace('tsv', 'csv'))
self.summary_outfilename = os.path.join(dsrf_path, filename.replace(
'DSR', 'Summary').replace('tsv.gz', 'csv'))
def parse_blocks(self, reader):
"""Generator for parsing all the blocks from the file.
Args:
reader: the handler of the input file
Yields:
block_lines: A full block as a list of rows.
"""
block_lines = []
current_block = None
for line in reader:
if line[0] in SKIP_ROWS:
continue
# Exit condition
if line[0] == 'FOOT':
yield block_lines
raise StopIteration()
line_block_number = int(line[1])
if current_block is None:
# Initialize
current_block = line_block_number
if line_block_number > current_block:
# End of block, yield and build a new one
yield block_lines
block_lines = []
current_block = line_block_number
block_lines.append(line)
# Also return last block
yield block_lines
def process_single_block(self, block):
"""Handles a single block in the DSR report.
Args:
block: Block as a list of lines.
Returns:
(summary_rows, report_row) tuple.
"""
views = 0
gross_revenue = 0
summary_rows = []
owners_data = {}
# Create an ordered dictionary with a key for every column.
report_row_dict = collections.OrderedDict(
[(column_name.lower(), '') for column_name in REPORT_HEAD])
for line in block:
if line[0] == 'SY02': # Save the financial Summary
summary_rows.append(line[1:])
continue
if line[0] == 'AS01': # Sound Recording information
report_row_dict['asset_id'] = line[3]
report_row_dict['asset_title'] = line[5]
report_row_dict['asset_artist'] = line[7]
report_row_dict['asset_isrc'] = line[4]
if line[0] == 'MW01': # Composition information
report_row_dict['mw_asset_id'] = line[2]
report_row_dict['mw_title'] = line[4]
report_row_dict['mw_iswc'] = line[3]
report_row_dict['mw_writers'] = line[6]
if line[0] == 'RU01': # Video level information
report_row_dict['video_ids'] = line[3]
report_row_dict['video_views'] = line[4]
if line[0] == 'SU03': # Usage data of Sound Recording Asset
# Summing up views and revenues for each sub-period
views += int(line[5])
gross_revenue += float(line[6])
report_row_dict['views'] = views
report_row_dict['gross_revenue'] = gross_revenue
if line[0] == 'LI01': # Ownership information
# if we already have parsed a LI01 line with that owner
if line[3] in owners_data:
# keep only the latest ownership
owners_data[line[3]]['ownership'] = line[6]
owners_data[line[3]]['amount_payable'] += float(line[9])
else:
# need to create the entry for that owner
data_dict = {'custom_id': line[5],
'ownership': line[6],
'amount_payable': float(line[9])}
owners_data[line[3]] = data_dict
# get rid of owners which do not have an ownership or an amount payable
owners_to_write = [o for o in owners_data
if (owners_data[o]['ownership'] > 0
and owners_data[o]['amount_payable'] > 0)]
report_row_dict['owner_name'] = '|'.join(owners_to_write)
report_row_dict['mw_custom_id'] = '|'.join([owners_data[o]
['custom_id']
for o in owners_to_write])
report_row_dict['ownership_claim'] = '|'.join([owners_data[o]
['ownership']
for o in owners_to_write])
report_row_dict['amount_payable'] = '|'.join([str(owners_data[o]
['amount_payable'])
for o in owners_to_write])
# Sanity check. The number of values must match the number of columns.
assert len(report_row_dict) == len(REPORT_HEAD), 'Row is wrong size :/'
return summary_rows, report_row_dict
def run(self):
finished = False
def removeFiles():
if not finished:
os.unlink(self.report_outfilename)
os.unlink(self.summary_outfilename)
atexit.register(removeFiles)
with gzip.open(self.dsrf_filename, 'rb') as dsr_file, gzip.open(
self.report_outfilename, 'wb') as report_file, open(
self.summary_outfilename, 'wb') as summary_file:
dsr_reader = csv.reader(dsr_file, delimiter='\t')
report_writer = csv.writer(report_file)
summary_writer = csv.writer(summary_file)
report_writer.writerow(REPORT_HEAD)
summary_writer.writerow(SUMMARY_HEAD)
for block in self.parse_blocks(dsr_reader):
summary_rows, report_row = self.process_single_block(block)
report_writer.writerow(report_row.values())
summary_writer.writerows(summary_rows)
finished = True
if __name__ == '__main__':
arg_parser = argparse.ArgumentParser(
description='Converts DDEX DSRF UGC profile reports to Standard CSV.')
required_args = arg_parser.add_argument_group('Required arguments')
required_args.add_argument('dsrf2csv_arg', type=str)
args = arg_parser.parse_args()
dsrf_converter = DsrfConverter(args.dsrf2csv_arg)
dsrf_converter.run()
In general to execute a python script in powershell like this .\script.py has two requirements:
Add the path to the python binaries to your %path%: $env:Path = $env:Path + ";C:\Path\to\python\binaries\"
Add the ending .py to the pathtext environment variable: $env:PATHEXT += ";.PY"
The latter will only be used in the current powershell session. If you want to add it to all future powershell sessions, add this line to your powershell profile (f.e. notepad $profile).
In your case there is also an issue with the python script you are trying to excute. def __init__(self) is an constructor for a class, like:
class Foo:
def __init__(self):
print "foo"
Did you give us your complete script?

Cherrypy and Parsing XML Data from multiple files

So this is sort of a piggy-back post of another question I had. I've successfully pulled data from multiple xml files and am able to get the data to display within the terminal using the print function, but when I try to use the return function to show the data in the browser, I only get the data from the first file. Any ideas on why I only get data from the first file rather than all of them? Thanks!
from xml.dom.minidom import parse, parseString
import os, glob, re
import cherrypy
class Root(object):
def index(self):
path = 'C:\Vestigo\XML'
TOTALXML = len(glob.glob(os.path.join(path, '*.xml')))
print TOTALXML
i = 0
for XMLFile in glob.glob(os.path.join(path, '*.xml')):
xmldoc = parse(XMLFile)
order_number = xmldoc.getElementsByTagName('Extrinsic')[0].firstChild.data
order_name = xmldoc.getElementsByTagName('DeliverTo')[0].firstChild.data
street1 = xmldoc.getElementsByTagName('Street1')[0].firstChild.data
state = xmldoc.getElementsByTagName('State')[0].firstChild.data
zip_code = xmldoc.getElementsByTagName('PostalCode')[0].firstChild.data
OUTPUTi = order_number+' '+order_name+' '+street1+' '+state+' '+zip_code
i += 1
print OUTPUTi
return (OUTPUTi, """<br><br>Quit""")
index.exposed = True
def exit(self):
raise SystemExit(0)
exit.exposed = True
def start():
import webbrowser
cherrypy.tree.mount(Root(), '/')
cherrypy.engine.start_with_callback(
webbrowser.open,
('http://localhost:8080/',),
)
cherrypy.engine.block()
if __name__=='__main__':
start()
You are not collecting the data anywhere; you store everything in a variable named OUTPUTi, then only return the last iteration of that variable. Python does not magically make that variable use the i counter.
Use a list to collect the strings:
TOTALXML = len(glob.glob(os.path.join(path, '*.xml')))
print TOTALXML
OUTPUT = []
for XMLFile in glob.glob(os.path.join(path, '*.xml')):
xmldoc = parse(XMLFile)
order_number = xmldoc.getElementsByTagName('Extrinsic')[0].firstChild.data
order_name = xmldoc.getElementsByTagName('DeliverTo')[0].firstChild.data
street1 = xmldoc.getElementsByTagName('Street1')[0].firstChild.data
state = xmldoc.getElementsByTagName('State')[0].firstChild.data
zip_code = xmldoc.getElementsByTagName('PostalCode')[0].firstChild.data
OUTPUT.append(order_number+' '+order_name+' '+street1+' '+state+' '+zip_code)
print OUTPUT[-1]
OUTPUT = ''.join(OUTPUT)
return (OUTPUT, """<br><br>Quit""")

Editing text file through command line argument in Python

I want to edit text file by passing integer number via command line argument in Python. However my code is not working, can some one point me where I am wrong.
import sys, argparse
def main(argv=None):
if argv is None:
argv=sys.argv[1:]
p = argparse.ArgumentParser(description="Editing omnetpp.ini")
p.add_argument('arg1', action='store', default= 1, type=int, help="number of clients")
args = p.parse_args(argv)
n = args.arg1
f = open('C:\\Users\Abcd\Desktop\Omnet\omnetpp.ini', 'a')
for i in range(n):
f.write('*.voipClient['+str(i)+'].udpApp['+str(i)+'].destAddresses = "voipGateway"\n')
f.write('*.voipGateway.udpApp['+str(i)+'].destAddresses = "voipClient['+str(i)+']"\n')
f.close()
If integer number 5 is passed via command line argument then it should add following lines in text file, which is not happening
Output
*.voipClient[0].udpApp[0].destAddresses = "voipGateway"
*.voipGateway.udpApp[0].destAddresses = "voipClient[0]"
*.voipClient[1].udpApp[1].destAddresses = "voipGateway"
*.voipGateway.udpApp[1].destAddresses = "voipClient[1]"
*.voipClient[2].udpApp[2].destAddresses = "voipGateway"
*.voipGateway.udpApp[2].destAddresses = "voipClient[2]"
*.voipClient[3].udpApp[3].destAddresses = "voipGateway"
*.voipGateway.udpApp[3].destAddresses = "voipClient[3]"
*.voipClient[4].udpApp[4].destAddresses = "voipGateway"
*.voipGateway.udpApp[4].destAddresses = "voipClient[4]"
I am following these steps:
Code is saved in test.py
From command line C:\Users\Abcd\Desktop>python test.py 5
Don't close the file in the loop, as soon as it is closed you cannot write to it anymore (in fact, an error should be thrown if you try to write to a closed file object).
Instead, close it after the loop.
Also, to put each sentence on a new line, end the string with the newline symbol \n (sort of pressing "ENTER").
f = open('C:\\Users\Abcd\Desktop\Omnet\omnetpp.ini', 'a')
for i in range(n):
f.write('*.voipClient['+str(i)+'].udpApp['+str(i)+'].destAddresses = "voipGateway"\n')
f.write('*.voipGateway.udpApp['+str(i)+'].destAddresses = "voipClient['+str(i)+']"\n')
f.close()
EDIT
By the way, as Rostyslav Dzinko said in the comments, the way you defined your code is not how you define a main function. In fact, try something like this (see also this SO question):
if __name__ == '__main__':
p = argparse.ArgumentParser(description="Editing omnetpp.ini")
p.add_argument('arg1', action='store', default= 1, type=int, help="number of clients")
args = p.parse_args()

Successive multiprocessing

I am filtering huge text files using multiprocessing.py. The code basically opens the text files, works on it, then closes it.
Thing is, I'd like to be able to launch it successively on multiple text files. Hence, I tried to add a loop, but for some reason it doesn't work (while the code works on each file). I believe this is an issue with:
if __name__ == '__main__':
However, I am looking for something else. I tried to create a Launcher and a LauncherCount files like this:
LauncherCount.py:
def setLauncherCount(n):
global LauncherCount
LauncherCount = n
and,
Launcher.py:
import os
import LauncherCount
LauncherCount.setLauncherCount(0)
os.system("OrientedFilterNoLoop.py")
LauncherCount.setLauncherCount(1)
os.system("OrientedFilterNoLoop.py")
...
I import LauncherCount.py, and use LauncherCount.LauncherCount as my loop index.
Of course, this doesn't work too as it edits the variable LauncherCount.LauncherCount locally, so it won't be edited in the imported version of LauncherCount.
Is there any way to edit globally a variable in an imported file? Or, is there any way to do this in any other way? What I need is running a code multiple times, in changing one value, and without using any loop apparently.
Thanks!
Edit: Here is my main code if necessary. Sorry for the bad style ...
import multiprocessing
import config
import time
import LauncherCount
class Filter:
""" Filtering methods """
def __init__(self):
print("launching methods")
# Return the list: [Latitude,Longitude] (elements are floating point numbers)
def LatLong(self,line):
comaCount = []
comaCount.append(line.find(','))
comaCount.append(line.find(',',comaCount[0] + 1))
comaCount.append(line.find(',',comaCount[1] + 1))
Lat = line[comaCount[0] + 1 : comaCount[1]]
Long = line[comaCount[1] + 1 : comaCount[2]]
try:
return [float(Lat) , float(Long)]
except ValueError:
return [0,0]
# Return a boolean:
# - True if the Lat/Long is within the Lat/Long rectangle defined by:
# tupleFilter = (minLat,maxLat,minLong,maxLong)
# - False if not
def LatLongFilter(self,LatLongList , tupleFilter) :
if tupleFilter[0] <= LatLongList[0] <= tupleFilter[1] and
tupleFilter[2] <= LatLongList[1] <= tupleFilter[3]:
return True
else:
return False
def writeLine(self,key,line):
filterDico[key][1].write(line)
def filteringProcess(dico):
myFilter = Filter()
while True:
try:
currentLine = readFile.readline()
except ValueError:
break
if len(currentLine) ==0: # Breaks at the end of the file
break
if len(currentLine) < 35: # Deletes wrong lines (too short)
continue
LatLongList = myFilter.LatLong(currentLine)
for key in dico:
if myFilter.LatLongFilter(LatLongList,dico[key][0]):
myFilter.writeLine(key,currentLine)
###########################################################################
# Main
###########################################################################
# Open read files:
readFile = open(config.readFileList[LauncherCount.LauncherCount][1], 'r')
# Generate writing files:
pathDico = {}
filterDico = config.filterDico
# Create outputs
for key in filterDico:
output_Name = config.readFileList[LauncherCount.LauncherCount][0][:-4]
+ '_' + key +'.log'
pathDico[output_Name] = config.writingFolder + output_Name
filterDico[key] = [filterDico[key],open(pathDico[output_Name],'w')]
p = []
CPUCount = multiprocessing.cpu_count()
CPURange = range(CPUCount)
startingTime = time.localtime()
if __name__ == '__main__':
### Create and start processes:
for i in CPURange:
p.append(multiprocessing.Process(target = filteringProcess ,
args = (filterDico,)))
p[i].start()
### Kill processes:
while True:
if [p[i].is_alive() for i in CPURange] == [False for i in CPURange]:
readFile.close()
for key in config.filterDico:
config.filterDico[key][1].close()
print(key,"is Done!")
endTime = time.localtime()
break
print("Process started at:",startingTime)
print("And ended at:",endTime)
To process groups of files in sequence while working on files within a group in parallel:
#!/usr/bin/env python
from multiprocessing import Pool
def work_on(args):
"""Process a single file."""
i, filename = args
print("working on %s" % (filename,))
return i
def files():
"""Generate input filenames to work on."""
#NOTE: you could read the file list from a file, get it using glob.glob, etc
yield "inputfile1"
yield "inputfile2"
def process_files(pool, filenames):
"""Process filenames using pool of processes.
Wait for results.
"""
for result in pool.imap_unordered(work_on, enumerate(filenames)):
#NOTE: in general the files won't be processed in the original order
print(result)
def main():
p = Pool()
# to do "successive" multiprocessing
for filenames in [files(), ['other', 'bunch', 'of', 'files']]:
process_files(p, filenames)
if __name__=="__main__":
main()
Each process_file() is called in sequence after the previous one has been complete i.e., the files from different calls to process_files() are not processed in parallel.

Categories

Resources