How should I Execute this Python Script in powershell - python

I've solved the problem. The problem is related my %PATH%
I have a script which work with an argument. In powershell I've tried the command you can see below;
.\dsrf2csv.py C:\Python27\a\DSR_testdata.tsv.gz
And also you can see the script below,
def __init__(self, dsrf2csv_arg):
self.dsrf_filename = dsrf2csv_arg
dsrf_path, filename = os.path.split(self.dsrf_filename)
self.report_outfilename = os.path.join(dsrf_path, filename.replace('DSR', 'Report').replace('tsv', 'csv'))
self.summary_outfilename = os.path.join(dsrf_path, filename.replace('DSR', 'Summary').replace('tsv.gz', 'csv'))
But when I try to run this script there is no any action. How should I run this script with a file? (example : testdata.tsv.gz)
Note : Script and file in same location.
Full Scritp;
import argparse
import atexit
import collections
import csv
import gzip
import os
SKIP_ROWS = ['HEAD', '#HEAD', '#SY02', '#SY03', '#AS01', '#MW01', '#RU01',
'#SU03', '#LI01', '#FOOT']
REPORT_HEAD = ['Asset_ID', 'Asset_Title', 'Asset_Artist', 'Asset_ISRC',
'MW_Asset_ID', 'MW_Title', 'MW_ISWC', 'MW_Custom_ID',
'MW_Writers', 'Views', 'Owner_name', 'Ownership_Claim',
'Gross_Revenue', 'Amount_Payable', 'Video_IDs', 'Video_views']
SUMMARY_HEAD = ['SummaryRecordId', 'DistributionChannel',
'DistributionChannelDPID', 'CommercialModel', 'UseType',
'Territory', 'ServiceDescription', 'Usages', 'Users',
'Currency', 'NetRevenue', 'RightsController',
'RightsControllerPartyId', 'AllocatedUsages', 'AmountPayable',
'AllocatedNetRevenue']
class DsrfConverter(object):
"""Converts DSRF 3.0 to YouTube CSV."""
def __init__(self, dsrf2csv_arg):
""" Creating output file names """
self.dsrf_filename = dsrf2csv_arg
dsrf_path, filename = os.path.split(self.dsrf_filename)
print(dsrf_filename)
input("Press Enter to continue...")
self.report_outfilename = os.path.join(dsrf_path, filename.replace(
'DSR', 'Report').replace('tsv', 'csv'))
self.summary_outfilename = os.path.join(dsrf_path, filename.replace(
'DSR', 'Summary').replace('tsv.gz', 'csv'))
def parse_blocks(self, reader):
"""Generator for parsing all the blocks from the file.
Args:
reader: the handler of the input file
Yields:
block_lines: A full block as a list of rows.
"""
block_lines = []
current_block = None
for line in reader:
if line[0] in SKIP_ROWS:
continue
# Exit condition
if line[0] == 'FOOT':
yield block_lines
raise StopIteration()
line_block_number = int(line[1])
if current_block is None:
# Initialize
current_block = line_block_number
if line_block_number > current_block:
# End of block, yield and build a new one
yield block_lines
block_lines = []
current_block = line_block_number
block_lines.append(line)
# Also return last block
yield block_lines
def process_single_block(self, block):
"""Handles a single block in the DSR report.
Args:
block: Block as a list of lines.
Returns:
(summary_rows, report_row) tuple.
"""
views = 0
gross_revenue = 0
summary_rows = []
owners_data = {}
# Create an ordered dictionary with a key for every column.
report_row_dict = collections.OrderedDict(
[(column_name.lower(), '') for column_name in REPORT_HEAD])
for line in block:
if line[0] == 'SY02': # Save the financial Summary
summary_rows.append(line[1:])
continue
if line[0] == 'AS01': # Sound Recording information
report_row_dict['asset_id'] = line[3]
report_row_dict['asset_title'] = line[5]
report_row_dict['asset_artist'] = line[7]
report_row_dict['asset_isrc'] = line[4]
if line[0] == 'MW01': # Composition information
report_row_dict['mw_asset_id'] = line[2]
report_row_dict['mw_title'] = line[4]
report_row_dict['mw_iswc'] = line[3]
report_row_dict['mw_writers'] = line[6]
if line[0] == 'RU01': # Video level information
report_row_dict['video_ids'] = line[3]
report_row_dict['video_views'] = line[4]
if line[0] == 'SU03': # Usage data of Sound Recording Asset
# Summing up views and revenues for each sub-period
views += int(line[5])
gross_revenue += float(line[6])
report_row_dict['views'] = views
report_row_dict['gross_revenue'] = gross_revenue
if line[0] == 'LI01': # Ownership information
# if we already have parsed a LI01 line with that owner
if line[3] in owners_data:
# keep only the latest ownership
owners_data[line[3]]['ownership'] = line[6]
owners_data[line[3]]['amount_payable'] += float(line[9])
else:
# need to create the entry for that owner
data_dict = {'custom_id': line[5],
'ownership': line[6],
'amount_payable': float(line[9])}
owners_data[line[3]] = data_dict
# get rid of owners which do not have an ownership or an amount payable
owners_to_write = [o for o in owners_data
if (owners_data[o]['ownership'] > 0
and owners_data[o]['amount_payable'] > 0)]
report_row_dict['owner_name'] = '|'.join(owners_to_write)
report_row_dict['mw_custom_id'] = '|'.join([owners_data[o]
['custom_id']
for o in owners_to_write])
report_row_dict['ownership_claim'] = '|'.join([owners_data[o]
['ownership']
for o in owners_to_write])
report_row_dict['amount_payable'] = '|'.join([str(owners_data[o]
['amount_payable'])
for o in owners_to_write])
# Sanity check. The number of values must match the number of columns.
assert len(report_row_dict) == len(REPORT_HEAD), 'Row is wrong size :/'
return summary_rows, report_row_dict
def run(self):
finished = False
def removeFiles():
if not finished:
os.unlink(self.report_outfilename)
os.unlink(self.summary_outfilename)
atexit.register(removeFiles)
with gzip.open(self.dsrf_filename, 'rb') as dsr_file, gzip.open(
self.report_outfilename, 'wb') as report_file, open(
self.summary_outfilename, 'wb') as summary_file:
dsr_reader = csv.reader(dsr_file, delimiter='\t')
report_writer = csv.writer(report_file)
summary_writer = csv.writer(summary_file)
report_writer.writerow(REPORT_HEAD)
summary_writer.writerow(SUMMARY_HEAD)
for block in self.parse_blocks(dsr_reader):
summary_rows, report_row = self.process_single_block(block)
report_writer.writerow(report_row.values())
summary_writer.writerows(summary_rows)
finished = True
if __name__ == '__main__':
arg_parser = argparse.ArgumentParser(
description='Converts DDEX DSRF UGC profile reports to Standard CSV.')
required_args = arg_parser.add_argument_group('Required arguments')
required_args.add_argument('dsrf2csv_arg', type=str)
args = arg_parser.parse_args()
dsrf_converter = DsrfConverter(args.dsrf2csv_arg)
dsrf_converter.run()

In general to execute a python script in powershell like this .\script.py has two requirements:
Add the path to the python binaries to your %path%: $env:Path = $env:Path + ";C:\Path\to\python\binaries\"
Add the ending .py to the pathtext environment variable: $env:PATHEXT += ";.PY"
The latter will only be used in the current powershell session. If you want to add it to all future powershell sessions, add this line to your powershell profile (f.e. notepad $profile).
In your case there is also an issue with the python script you are trying to excute. def __init__(self) is an constructor for a class, like:
class Foo:
def __init__(self):
print "foo"
Did you give us your complete script?

Related

How to read contents of a LibreOffice writer annotation from a python macro

LibreOffice writer allows the user to insert annotations(notes/comments) within the text.
My issue is I am unable to find a method to access the contents of a line specific annotation.
The following python code looks for selected/highlighted text and then strips out everything except a formatted time code (e.g. 01:10:23 or 11:10) which it converts into seconds.
If no text has been selected, it selects the entire current line and attempts to find the time code. However, the time code could be in an annotation.
I have managed to get a list of all of the annotations within the document, commented out at the start of the code but it is of no use to me.
I have been unable to discover a method of divining
a) whether the current line has an annotation or
b) how to access its contents.
If anyone has managed to achieve this, I'd appreciate any pointers.
def fs2_GoToTimestamp(*args):
#get the doc from the scripting context which is made available to all scripts
desktop = XSCRIPTCONTEXT.getDesktop()
model = desktop.getCurrentComponent()
oSelected = model.getCurrentSelection()
#access annotations for the whole document
# oEnum = model.getTextFields().createEnumeration()
# cursor = desktop.getCurrentComponent().getCurrentController().getViewCursor()
# while oEnum.hasMoreElements():
# oField = oEnum.nextElement()
# cursor.gotoRange(oField,False)
# print (cursor.getPosition())
# if oField.supportsService('com.sun.star.text.TextField.Annotation'):
# print (oField.Content)
# x = oField.getAnchor()
# print (dir(x))
oText = ""
try: #Grab the text selected/highlighted
oSel = oSelected.getByIndex(0)
oText= oSel.getString()
except:pass
try:
if oText == "": # Nothing selected grab the whole line
cursor = desktop.getCurrentComponent().getCurrentController().getViewCursor()
cursor.gotoStartOfLine(False) #move cursor to start without selecting (False)
cursor.gotoEndOfLine(True) #now move cursor to end of line selecting all (True)
oSelected = model.getCurrentSelection()
oSel = oSelected.getByIndex(0)
oText= oSel.getString()
# Deselect line to avoid inadvertently deleting it on next keystroke
cursor.gotoStartOfLine(False)
except:pass
time = str(oText)
valid_chars=('0123456789:')
time = ''.join(char for char in time if char in valid_chars)
if time.count(":") == 1:
oM, oS = time.split(":")
oH = "00"
elif time.count(":") == 2:
oH,oM,oS = time.split(":")
else:
return None
if len(oS) != 2:
oS=oS[:2]
try:
secs = int(oS)
secs = secs + int(oM) * 60
secs = secs + int(oH) *3600
except:
return None
seek_instruction = 'seek'+str(secs)+'\n'
#Now do something with the seek instruction
Enumerate the annotations and use getAnchor() to find out where each is located. This answer is based on https://wiki.openoffice.org/wiki/Documentation/DevGuide/Text/Editing_Text#Text_Contents_Other_Than_Strings.
Your code is close to working.
while oEnum.hasMoreElements():
oField = oEnum.nextElement()
if oField.supportsService('com.sun.star.text.TextField.Annotation'):
xTextRange = oField.getAnchor()
cursor.gotoRange(xTextRange, False)
Instead of print (dir(x)), an introspection tool such as XrayTool or MRI will give better information. It makes the API docs easier to figure out.
With much needed help from Jim K a self answer is posted below. I have commented where I believe it will help most.
#!/usr/bin/python
from com.sun.star.awt.MessageBoxButtons import BUTTONS_OK
from com.sun.star.awt.MessageBoxType import INFOBOX
def fs2_GoToTimestamp(*args):
desktop = XSCRIPTCONTEXT.getDesktop()
model = desktop.getCurrentComponent()
oSelected = model.getCurrentSelection()
doc = XSCRIPTCONTEXT.getDocument()
parentwindow = doc.CurrentController.Frame.ContainerWindow
cursor = desktop.getCurrentComponent().getCurrentController().getViewCursor()
try:
CursorPos = cursor.getText().createTextCursorByRange(cursor)#Store original cursor position
except:# The cursor has been placed in the annotation not the text
mess = "Position cursor in the text\nNot the comment box"
heading = "Positioning Error"
MessageBox(parentwindow, mess, heading, INFOBOX, BUTTONS_OK)
return None
oText = ""
try: #Grab the text selected/highlighted
oSel = oSelected.getByIndex(0)
oText= oSel.getString()
except:pass
try:
if oText == "": # Nothing selected grab the whole line
store_position = 0
cursor.gotoStartOfLine(False) #move cursor to start without selecting (False)
cursor.gotoEndOfLine(True) #now move cursor to end of line selecting all (True)
oSelected = model.getCurrentSelection()
oSel = oSelected.getByIndex(0)
oText= oSel.getString()
y = cursor.getPosition()
store_position = y.value.Y
# Deselect line to avoid inadvertently deleting it on next user keystroke
cursor.gotoStartOfLine(False)
if oText.count(":") == 0:
# Still nothing found check for an annotation at this location
#enumerate through annotations for the whole document
oEnum = model.getTextFields().createEnumeration()
while oEnum.hasMoreElements():
oField = oEnum.nextElement()
if oField.supportsService('com.sun.star.text.TextField.Annotation'):
anno_at = oField.getAnchor()
cursor.gotoRange(anno_at,False)
pos = cursor.getPosition()
if pos.value.Y == store_position: # Found an annotation at this location
oText = oField.Content
break
# Re-set cursor to original position after enumeration & deselect
cursor.gotoRange(CursorPos,False)
except:pass
time = str(oText)
valid_chars=('0123456789:')
time = ''.join(char for char in time if char in valid_chars) #Strip out all invalid characters
if time.count(":") == 1: # time 00:00
oM, oS = time.split(":")
oH = "00"
elif time.count(":") == 2: # time 00:00:00
oH,oM,oS = time.split(":")
else:
return None
if len(oS) != 2: # in case time includes tenths 00:00.0 reduce to whole seconds
oS=oS[:2]
try:
secs = int(oS)
secs = secs + int(oM) * 60
secs = secs + int(oH) *3600
except:
return None
seek_instruction = 'seek'+str(secs)+'\n'
print("Seconds",str(secs))
# Do something with seek_instruction
def MessageBox(ParentWindow, MsgText, MsgTitle, MsgType, MsgButtons):
ctx = XSCRIPTCONTEXT.getComponentContext()
sm = ctx.ServiceManager
si = sm.createInstanceWithContext("com.sun.star.awt.Toolkit", ctx)
mBox = si.createMessageBox(ParentWindow, MsgType, MsgButtons, MsgTitle, MsgText)
mBox.execute()

Python help, reading and writing to a txt file

I have posted the relevant part of my code below. Before that are just load functions, which I am pretty sure have no errors.
I am recieving error
IndexError: list index out of range( "namestaj["Naziv"] = deon[1]")
Does anyone see something out of order?
#load furniture from a txt file
def ucitajNamestaj():
listaNamestaja = open("namestaj.txt", "r").readlines()
namestaj = []
for red in listaNamestaja:
namestaj.append(stringToNamestaj(red))
return namestaj
#String to Furniture, dictionary
def stringToNamestaj(red):
namestaj = {}
deon = red.strip().split("|")
namestaj["Sifra"] = deon[0]
namestaj["Naziv"] = deon[1]
namestaj["Boja"] = deon[2]
namestaj["Kolicina"] = int(deon[3])
namestaj["Cena"] = float(deon[4])
namestaj["Kategorija"] = deon[5]
namestaj["Dostupan"] = deon[6]
return namestaj
Couple of things first, try always to provide a mcve and make sure you use properly the SO code directives, otherwise your question is unreadable.
Now, probably what's happening is your file has some empty lines and you're not skipping those, try this:
def ucitajNamestaj():
listaNamestaja = open("namestaj.txt", "r").readlines()
namestaj = []
for red in listaNamestaja:
if red.strip() == "":
continue
namestaj.append(stringToNamestaj(red))
return namestaj
def stringToNamestaj(red):
namestaj = {}
deon = red.strip().split("|")
namestaj["Sifra"] = deon[0]
namestaj["Naziv"] = deon[1]
namestaj["Boja"] = deon[2]
namestaj["Kolicina"] = int(deon[3])
namestaj["Cena"] = float(deon[4])
namestaj["Kategorija"] = deon[5]
namestaj["Dostupan"] = deon[6]
return namestaj

Copy parameters into list

I am trying to copy parameters passed into a python script to a file. Here is the parameters.
["0013","1","1","\"john.dow#gmail.com\"","1","P123-ND 10Q","10Q H??C"]
I understand that there is a buffer problem and I am getting bad data into my parameters. However, I do not have control over what is being passed in. I am trying to copy, starting at the 5th parameter, the parameters into a file.
f = open(in_file_name, 'w')
for x in range(5, len(arg_list)):
f.write(arg_list[x] + '\n')
f.close()
The result of the file is below:
P123-ND 10Q
10Q H??C
Here is what it should be:
P123-ND
10Q
How can I not include the bad data? What is happening to the spaces between the valid information and the bad information?
As requested, here is the full program:
#!/bin/python
class Argument_Indices:
PRINTER_INDEX = 0
AREA_INDEX = 1
LABEL_INDEX = 2
EMAIL_INDEX = 3
RUN_TYPE_INDEX = 4
import argparse
import json
import os
from subprocess import call
import sys
from time import strftime
def _handle_args():
''' Setup and run argpars '''
parser = argparse.ArgumentParser(description='Set environment variables for and to call Program')
parser.add_argument('time_to_run', default='NOW', choices=['NOW', 'EOP'], help='when to run the report')
parser.add_argument('arguments', nargs='+', help='the remaining command line arguments')
return parser.parse_args()
def _proces_program(arg_list):
time_stamp = strftime("%d_%b_%Y_%H_%M_%S")
printer = arg_list[Argument_Indices.PRINTER_INDEX]
area = arg_list[Argument_Indices.AREA_INDEX]
label = arg_list[Argument_Indices.LABEL_INDEX]
in_file_name = "/tmp/program{0}.inp".format(time_stamp)
os.environ['INPUT_FILE'] = in_file_name
f = open(in_file_name, 'w')
for x in range(5, len(arg_list)):
f.write(arg_list[x])
f.close()
call(['./Program.bin', printer, area, label])
os.remove(in_file_name)
def main():
''' Main Function '''
arg_list = None
args = _handle_args()
if len(args.arguments) < 1:
print('Missing name of input file')
return -1
with open(args.arguments[0]) as input_file:
arg_list = json.load(input_file)
_process_program(arg_list)
return 0
if __name__ == '__main__':
if main() != 0:
print('Program run failed')
sys.exit()
For your exact case (where you're getting duplicated parameters received with some spaces in between) this would work:
received_param_list = ["0013","1","1","\"john.dow#gmail.com\"","1","P123-ND 10Q","10Q H??C"]
arg_list = [i.split(" ")[0] for i in received_param_list]
last_param = received_param_list[-1].split()[-1]
if last_param != arg_list[-1]:
arg_list.append(last_param)
for x in range(5, len(arg_list)):
print (arg_list[x])
Although there might be another simpler way

unknown error in jython when use startswith()

I'm using python to analyze some records bib and ris files. I made two functions for each type. The first function is the one you see below:
def limpiarlineasris(self, data):
cont = data
dic = cont.splitlines()
cont = ""
con = []
i = 0
for a in dic:
if len(a) != 0:
con.append(a)
for a in con:
cont = cont + a + "\n"
return cont
That works well and I can compile without problem. The problem arises when I write the second function see below:
def limpiarlineasbib(self, data):
cont = data
dic = cont.splitlines()
cont = ""
con = []
separador = "°-°-°"
for a in dic:
if len(a)!= 0:
if a.startswith('#'):
con.append(separador)
else:
con.append(a)
for a in con:
cont = cont + a + "\n"
return cont
When building the first function no problem. But when I compile the second compiler shows me an error but does not tell me exactly what or where it is because I am using plyjy a jar to create Jython objects, and the console only shows me an exception Plyjy without the line where it occurs. I'm using Netbeans to compile

Successive multiprocessing

I am filtering huge text files using multiprocessing.py. The code basically opens the text files, works on it, then closes it.
Thing is, I'd like to be able to launch it successively on multiple text files. Hence, I tried to add a loop, but for some reason it doesn't work (while the code works on each file). I believe this is an issue with:
if __name__ == '__main__':
However, I am looking for something else. I tried to create a Launcher and a LauncherCount files like this:
LauncherCount.py:
def setLauncherCount(n):
global LauncherCount
LauncherCount = n
and,
Launcher.py:
import os
import LauncherCount
LauncherCount.setLauncherCount(0)
os.system("OrientedFilterNoLoop.py")
LauncherCount.setLauncherCount(1)
os.system("OrientedFilterNoLoop.py")
...
I import LauncherCount.py, and use LauncherCount.LauncherCount as my loop index.
Of course, this doesn't work too as it edits the variable LauncherCount.LauncherCount locally, so it won't be edited in the imported version of LauncherCount.
Is there any way to edit globally a variable in an imported file? Or, is there any way to do this in any other way? What I need is running a code multiple times, in changing one value, and without using any loop apparently.
Thanks!
Edit: Here is my main code if necessary. Sorry for the bad style ...
import multiprocessing
import config
import time
import LauncherCount
class Filter:
""" Filtering methods """
def __init__(self):
print("launching methods")
# Return the list: [Latitude,Longitude] (elements are floating point numbers)
def LatLong(self,line):
comaCount = []
comaCount.append(line.find(','))
comaCount.append(line.find(',',comaCount[0] + 1))
comaCount.append(line.find(',',comaCount[1] + 1))
Lat = line[comaCount[0] + 1 : comaCount[1]]
Long = line[comaCount[1] + 1 : comaCount[2]]
try:
return [float(Lat) , float(Long)]
except ValueError:
return [0,0]
# Return a boolean:
# - True if the Lat/Long is within the Lat/Long rectangle defined by:
# tupleFilter = (minLat,maxLat,minLong,maxLong)
# - False if not
def LatLongFilter(self,LatLongList , tupleFilter) :
if tupleFilter[0] <= LatLongList[0] <= tupleFilter[1] and
tupleFilter[2] <= LatLongList[1] <= tupleFilter[3]:
return True
else:
return False
def writeLine(self,key,line):
filterDico[key][1].write(line)
def filteringProcess(dico):
myFilter = Filter()
while True:
try:
currentLine = readFile.readline()
except ValueError:
break
if len(currentLine) ==0: # Breaks at the end of the file
break
if len(currentLine) < 35: # Deletes wrong lines (too short)
continue
LatLongList = myFilter.LatLong(currentLine)
for key in dico:
if myFilter.LatLongFilter(LatLongList,dico[key][0]):
myFilter.writeLine(key,currentLine)
###########################################################################
# Main
###########################################################################
# Open read files:
readFile = open(config.readFileList[LauncherCount.LauncherCount][1], 'r')
# Generate writing files:
pathDico = {}
filterDico = config.filterDico
# Create outputs
for key in filterDico:
output_Name = config.readFileList[LauncherCount.LauncherCount][0][:-4]
+ '_' + key +'.log'
pathDico[output_Name] = config.writingFolder + output_Name
filterDico[key] = [filterDico[key],open(pathDico[output_Name],'w')]
p = []
CPUCount = multiprocessing.cpu_count()
CPURange = range(CPUCount)
startingTime = time.localtime()
if __name__ == '__main__':
### Create and start processes:
for i in CPURange:
p.append(multiprocessing.Process(target = filteringProcess ,
args = (filterDico,)))
p[i].start()
### Kill processes:
while True:
if [p[i].is_alive() for i in CPURange] == [False for i in CPURange]:
readFile.close()
for key in config.filterDico:
config.filterDico[key][1].close()
print(key,"is Done!")
endTime = time.localtime()
break
print("Process started at:",startingTime)
print("And ended at:",endTime)
To process groups of files in sequence while working on files within a group in parallel:
#!/usr/bin/env python
from multiprocessing import Pool
def work_on(args):
"""Process a single file."""
i, filename = args
print("working on %s" % (filename,))
return i
def files():
"""Generate input filenames to work on."""
#NOTE: you could read the file list from a file, get it using glob.glob, etc
yield "inputfile1"
yield "inputfile2"
def process_files(pool, filenames):
"""Process filenames using pool of processes.
Wait for results.
"""
for result in pool.imap_unordered(work_on, enumerate(filenames)):
#NOTE: in general the files won't be processed in the original order
print(result)
def main():
p = Pool()
# to do "successive" multiprocessing
for filenames in [files(), ['other', 'bunch', 'of', 'files']]:
process_files(p, filenames)
if __name__=="__main__":
main()
Each process_file() is called in sequence after the previous one has been complete i.e., the files from different calls to process_files() are not processed in parallel.

Categories

Resources