argostranslate translate in while loop crash the code in Python - python

User define Library
# filename: my_tr_lib.py
import argostranslate.package
import argostranslate.translate
from_code = "en"
to_code = "ja"
# Download and install Argos Translate package
argostranslate.package.update_package_index()
available_packages = argostranslate.package.get_available_packages()
package_to_install = next(
filter(
lambda x: x.from_code == from_code and x.to_code == to_code, available_packages
)
)
argostranslate.package.install_from_path(package_to_install.download())
def tr(from_text: str) -> str:
translatedText = argostranslate.translate.translate(from_text, from_code, to_code)
return (translatedText)
Test file ( Program crash more than 5000 loop in while )
import my_tr_lib
# Example: open and read file
line_count=0
while(True):
print(my_tr_lib.tr("Hello")) # Example: translate file line one by one
line_count= line_count+ 1
print(line_count) # crash program at aprox over the 4000-5000 line count
Need to run till the end of the file which is more than aprox 5000 line.

Related

How do I generate the same result in a random.randint only twice

I might have not explained myself clearly in the constrained space of the title, but I will try my best to explain myself here.
My code has a function that generates a random integer between 0 and the number of lines of a .txt file. Each line is a string of text. That text is then passed to a second function that turns it into some sort of a typing test. In another one, I test the characters entered by the user compared to the string he was supposed to enter. But it doesn't do that. It goes ahead and generates an entirely new string again, and compares it, obviously the user gets an abysmal error rate and very low WPM. Here is my code:
import time
import random
import string
import sys
import string
import sys
#from getkey import getkey
def generateTest():
mylines = []
with open ('phrases.txt', 'rt') as myfile: # Open phrases.txt for reading the typing tests
for line in myfile: # For each line of text,
mylines.append(line) # add that line to the list.
# Converting lines of list to select a random phrase
listLen = len(mylines) - 1
return (mylines[random.randint(0,listLen)])
def speedCalc():
# words / time passed (assuming it is 5)
start = time.time()
test = input(print(generateTest()))
end = time.time()
timePassed = (end - start)
generateTestLen = len(generateTest())
return generateTest(), timePassed, ((generateTestLen/5)/timePassed)*60
def spellCheck():
test, timePassed, wpm = speedCalc()
diff = 0
correctChars = 0
file_A = generateTest()
file_B = test
#read_A=open(file_A,'r').read()
#read_B=open(file_B,'r').read()
for char_a, char_b in zip(file_A, file_B):
if char_a == char_b:
correctChars = correctChars+1
file_A_len = len(file_A)
correctPercent = (correctChars/file_A_len)*100
errors = file_A_len - correctChars
errorRate = errors/timePassed
netWPM = wpm - errorRate
return correctPercent, errors, netWPM
correctPercent, errors, netWPM = spellCheck()
print(correctPercent)
print(errors)
print(netWPM)```

How should I Execute this Python Script in powershell

I've solved the problem. The problem is related my %PATH%
I have a script which work with an argument. In powershell I've tried the command you can see below;
.\dsrf2csv.py C:\Python27\a\DSR_testdata.tsv.gz
And also you can see the script below,
def __init__(self, dsrf2csv_arg):
self.dsrf_filename = dsrf2csv_arg
dsrf_path, filename = os.path.split(self.dsrf_filename)
self.report_outfilename = os.path.join(dsrf_path, filename.replace('DSR', 'Report').replace('tsv', 'csv'))
self.summary_outfilename = os.path.join(dsrf_path, filename.replace('DSR', 'Summary').replace('tsv.gz', 'csv'))
But when I try to run this script there is no any action. How should I run this script with a file? (example : testdata.tsv.gz)
Note : Script and file in same location.
Full Scritp;
import argparse
import atexit
import collections
import csv
import gzip
import os
SKIP_ROWS = ['HEAD', '#HEAD', '#SY02', '#SY03', '#AS01', '#MW01', '#RU01',
'#SU03', '#LI01', '#FOOT']
REPORT_HEAD = ['Asset_ID', 'Asset_Title', 'Asset_Artist', 'Asset_ISRC',
'MW_Asset_ID', 'MW_Title', 'MW_ISWC', 'MW_Custom_ID',
'MW_Writers', 'Views', 'Owner_name', 'Ownership_Claim',
'Gross_Revenue', 'Amount_Payable', 'Video_IDs', 'Video_views']
SUMMARY_HEAD = ['SummaryRecordId', 'DistributionChannel',
'DistributionChannelDPID', 'CommercialModel', 'UseType',
'Territory', 'ServiceDescription', 'Usages', 'Users',
'Currency', 'NetRevenue', 'RightsController',
'RightsControllerPartyId', 'AllocatedUsages', 'AmountPayable',
'AllocatedNetRevenue']
class DsrfConverter(object):
"""Converts DSRF 3.0 to YouTube CSV."""
def __init__(self, dsrf2csv_arg):
""" Creating output file names """
self.dsrf_filename = dsrf2csv_arg
dsrf_path, filename = os.path.split(self.dsrf_filename)
print(dsrf_filename)
input("Press Enter to continue...")
self.report_outfilename = os.path.join(dsrf_path, filename.replace(
'DSR', 'Report').replace('tsv', 'csv'))
self.summary_outfilename = os.path.join(dsrf_path, filename.replace(
'DSR', 'Summary').replace('tsv.gz', 'csv'))
def parse_blocks(self, reader):
"""Generator for parsing all the blocks from the file.
Args:
reader: the handler of the input file
Yields:
block_lines: A full block as a list of rows.
"""
block_lines = []
current_block = None
for line in reader:
if line[0] in SKIP_ROWS:
continue
# Exit condition
if line[0] == 'FOOT':
yield block_lines
raise StopIteration()
line_block_number = int(line[1])
if current_block is None:
# Initialize
current_block = line_block_number
if line_block_number > current_block:
# End of block, yield and build a new one
yield block_lines
block_lines = []
current_block = line_block_number
block_lines.append(line)
# Also return last block
yield block_lines
def process_single_block(self, block):
"""Handles a single block in the DSR report.
Args:
block: Block as a list of lines.
Returns:
(summary_rows, report_row) tuple.
"""
views = 0
gross_revenue = 0
summary_rows = []
owners_data = {}
# Create an ordered dictionary with a key for every column.
report_row_dict = collections.OrderedDict(
[(column_name.lower(), '') for column_name in REPORT_HEAD])
for line in block:
if line[0] == 'SY02': # Save the financial Summary
summary_rows.append(line[1:])
continue
if line[0] == 'AS01': # Sound Recording information
report_row_dict['asset_id'] = line[3]
report_row_dict['asset_title'] = line[5]
report_row_dict['asset_artist'] = line[7]
report_row_dict['asset_isrc'] = line[4]
if line[0] == 'MW01': # Composition information
report_row_dict['mw_asset_id'] = line[2]
report_row_dict['mw_title'] = line[4]
report_row_dict['mw_iswc'] = line[3]
report_row_dict['mw_writers'] = line[6]
if line[0] == 'RU01': # Video level information
report_row_dict['video_ids'] = line[3]
report_row_dict['video_views'] = line[4]
if line[0] == 'SU03': # Usage data of Sound Recording Asset
# Summing up views and revenues for each sub-period
views += int(line[5])
gross_revenue += float(line[6])
report_row_dict['views'] = views
report_row_dict['gross_revenue'] = gross_revenue
if line[0] == 'LI01': # Ownership information
# if we already have parsed a LI01 line with that owner
if line[3] in owners_data:
# keep only the latest ownership
owners_data[line[3]]['ownership'] = line[6]
owners_data[line[3]]['amount_payable'] += float(line[9])
else:
# need to create the entry for that owner
data_dict = {'custom_id': line[5],
'ownership': line[6],
'amount_payable': float(line[9])}
owners_data[line[3]] = data_dict
# get rid of owners which do not have an ownership or an amount payable
owners_to_write = [o for o in owners_data
if (owners_data[o]['ownership'] > 0
and owners_data[o]['amount_payable'] > 0)]
report_row_dict['owner_name'] = '|'.join(owners_to_write)
report_row_dict['mw_custom_id'] = '|'.join([owners_data[o]
['custom_id']
for o in owners_to_write])
report_row_dict['ownership_claim'] = '|'.join([owners_data[o]
['ownership']
for o in owners_to_write])
report_row_dict['amount_payable'] = '|'.join([str(owners_data[o]
['amount_payable'])
for o in owners_to_write])
# Sanity check. The number of values must match the number of columns.
assert len(report_row_dict) == len(REPORT_HEAD), 'Row is wrong size :/'
return summary_rows, report_row_dict
def run(self):
finished = False
def removeFiles():
if not finished:
os.unlink(self.report_outfilename)
os.unlink(self.summary_outfilename)
atexit.register(removeFiles)
with gzip.open(self.dsrf_filename, 'rb') as dsr_file, gzip.open(
self.report_outfilename, 'wb') as report_file, open(
self.summary_outfilename, 'wb') as summary_file:
dsr_reader = csv.reader(dsr_file, delimiter='\t')
report_writer = csv.writer(report_file)
summary_writer = csv.writer(summary_file)
report_writer.writerow(REPORT_HEAD)
summary_writer.writerow(SUMMARY_HEAD)
for block in self.parse_blocks(dsr_reader):
summary_rows, report_row = self.process_single_block(block)
report_writer.writerow(report_row.values())
summary_writer.writerows(summary_rows)
finished = True
if __name__ == '__main__':
arg_parser = argparse.ArgumentParser(
description='Converts DDEX DSRF UGC profile reports to Standard CSV.')
required_args = arg_parser.add_argument_group('Required arguments')
required_args.add_argument('dsrf2csv_arg', type=str)
args = arg_parser.parse_args()
dsrf_converter = DsrfConverter(args.dsrf2csv_arg)
dsrf_converter.run()
In general to execute a python script in powershell like this .\script.py has two requirements:
Add the path to the python binaries to your %path%: $env:Path = $env:Path + ";C:\Path\to\python\binaries\"
Add the ending .py to the pathtext environment variable: $env:PATHEXT += ";.PY"
The latter will only be used in the current powershell session. If you want to add it to all future powershell sessions, add this line to your powershell profile (f.e. notepad $profile).
In your case there is also an issue with the python script you are trying to excute. def __init__(self) is an constructor for a class, like:
class Foo:
def __init__(self):
print "foo"
Did you give us your complete script?

Python refresh file from disk

I have a python script that calls a system program and reads the output from a file out.txt, acts on that output, and loops. However, it doesn't work, and a close investigation showed that the python script just opens out.txt once and then keeps on reading from that old copy. How can I make the python script reread the file on each iteration? I saw a similar question here on SO but it was about a python script running alongside a program, not calling it, and the solution doesn't work. I tried closing the file before looping back but it didn't do anything.
EDIT:
I already tried closing and opening, it didn't work. Here's the code:
import subprocess, os, sys
filename = sys.argv[1]
file = open(filename,'r')
foo = open('foo','w')
foo.write(file.read().rstrip())
foo = open('foo','a')
crap = open(os.devnull,'wb')
numSolutions = 0
while True:
subprocess.call(["minisat", "foo", "out"], stdout=crap,stderr=crap)
out = open('out','r')
if out.readline().rstrip() == "SAT":
numSolutions += 1
clause = out.readline().rstrip()
clause = clause.split(" ")
print clause
clause = map(int,clause)
clause = map(lambda x: -x,clause)
output = ' '.join(map(lambda x: str(x),clause))
print output
foo.write('\n'+output)
out.close()
else:
break
print "There are ", numSolutions, " solutions."
You need to flush foo so that the external program can see its latest changes. When you write to a file, the data is buffered in the local process and sent to the system in larger blocks. This is done because updating the system file is relatively expensive. In your case, you need to force a flush of the data so that minisat can see it.
foo.write('\n'+output)
foo.flush()
I rewrote it to hopefully be a bit easier to understand:
import os
from shutil import copyfile
import subprocess
import sys
TEMP_CNF = "tmp.in"
TEMP_SOL = "tmp.out"
NULL = open(os.devnull, "wb")
def all_solutions(cnf_fname):
"""
Given a file containing a set of constraints,
generate all possible solutions.
"""
# make a copy of original input file
copyfile(cnf_fname, TEMP_CNF)
while True:
# run minisat to solve the constraint problem
subprocess.call(["minisat", TEMP_CNF, TEMP_SOL], stdout=NULL,stderr=NULL)
# look at the result
with open(TEMP_SOL) as result:
line = next(result)
if line.startswith("SAT"):
# Success - return solution
line = next(result)
solution = [int(i) for i in line.split()]
yield solution
else:
# Failure - no more solutions possible
break
# disqualify found solution
with open(TEMP_CNF, "a") as constraints:
new_constraint = " ".join(str(-i) for i in sol)
constraints.write("\n")
constraints.write(new_constraint)
def main(cnf_fname):
"""
Given a file containing a set of constraints,
count the possible solutions.
"""
count = sum(1 for i in all_solutions(cnf_fname))
print("There are {} solutions.".format(count))
if __name__=="__main__":
if len(sys.argv) == 2:
main(sys.argv[1])
else:
print("Usage: {} cnf.in".format(sys.argv[0]))
You take your file_var and end the loop with file_var.close().
for ... :
ga_file = open(out.txt, 'r')
... do stuff
ga_file.close()
Demo of an implementation below (as simple as possible, this is all of the Jython code needed)...
__author__ = ''
import time
var = 'false'
while var == 'false':
out = open('out.txt', 'r')
content = out.read()
time.sleep(3)
print content
out.close()
generates this output:
2015-01-09, 'stuff added'
2015-01-09, 'stuff added' # <-- this is when i just saved my update
2015-01-10, 'stuff added again :)' # <-- my new output from file reads
I strongly recommend reading the error messages. They hold quite a lot of information.
I think the full file name should be written for debug purposes.

For loop function call file parsing

I recognize that this code is wildly inefficient.
I'm at a complete loss here, and I'm planning to remove the function and just make the code procedural in main. But I'm hoping someone can explain what I'm seeing here. The loop in main() runs and calls matchName(). matchName() executes it's loop then, when it should return for the next "vtRow", instead it just stops executing. So the output is the first record of vtData and every record from adData.
import csv, re
def main():
#1st word
oneWord = re.compile( '\A([\w]+)' )
#1st 3
first3 = re.compile( '\A([\w]{3})' )
#last 3
last3 = re.compile( '(?=([\w]{3})$)' )
mArray = [ oneWord, first3, last3 ]
adFile = open('adData.csv', 'rb')
adFields = ('lName','fName','cNum','addy','city','state','zip','phone','sex')
adData = csv.reader(adFile, dialect='excel')
vtFile = open('data360.csv','rb')
vtFields = ('ref','fName','lName')
vtData = csv.reader(vtFile, dialect='excel')
for vtRow in vtData:
matchName(vtRow, adData, mArray) # appears that this runs once and exits
def matchName(curVtRow, adData, mArr):
lName = curVtRow[4].lower()
fName = curVtRow[3].lower()
Posib = []
for row in adData:
cName = row[0].lower()
print "vt " + lName + " ; ad " + cName
return 1
if __name__ == "__main__":
main()
The issue is that looping with adData causes adFile to be read, and so after the first call to matchName() the file will have been read all the way and thus adData won't be looped over as adData.next() won't result in anything (and thus the print statement will not be executed). I suggest placing adFile.seek(0) after the call to matchName(). Note that just recreating adData won't work; I discovered recently that a csv reader updates its underlying object's file position rather than keeping track of it on its own.

Successive multiprocessing

I am filtering huge text files using multiprocessing.py. The code basically opens the text files, works on it, then closes it.
Thing is, I'd like to be able to launch it successively on multiple text files. Hence, I tried to add a loop, but for some reason it doesn't work (while the code works on each file). I believe this is an issue with:
if __name__ == '__main__':
However, I am looking for something else. I tried to create a Launcher and a LauncherCount files like this:
LauncherCount.py:
def setLauncherCount(n):
global LauncherCount
LauncherCount = n
and,
Launcher.py:
import os
import LauncherCount
LauncherCount.setLauncherCount(0)
os.system("OrientedFilterNoLoop.py")
LauncherCount.setLauncherCount(1)
os.system("OrientedFilterNoLoop.py")
...
I import LauncherCount.py, and use LauncherCount.LauncherCount as my loop index.
Of course, this doesn't work too as it edits the variable LauncherCount.LauncherCount locally, so it won't be edited in the imported version of LauncherCount.
Is there any way to edit globally a variable in an imported file? Or, is there any way to do this in any other way? What I need is running a code multiple times, in changing one value, and without using any loop apparently.
Thanks!
Edit: Here is my main code if necessary. Sorry for the bad style ...
import multiprocessing
import config
import time
import LauncherCount
class Filter:
""" Filtering methods """
def __init__(self):
print("launching methods")
# Return the list: [Latitude,Longitude] (elements are floating point numbers)
def LatLong(self,line):
comaCount = []
comaCount.append(line.find(','))
comaCount.append(line.find(',',comaCount[0] + 1))
comaCount.append(line.find(',',comaCount[1] + 1))
Lat = line[comaCount[0] + 1 : comaCount[1]]
Long = line[comaCount[1] + 1 : comaCount[2]]
try:
return [float(Lat) , float(Long)]
except ValueError:
return [0,0]
# Return a boolean:
# - True if the Lat/Long is within the Lat/Long rectangle defined by:
# tupleFilter = (minLat,maxLat,minLong,maxLong)
# - False if not
def LatLongFilter(self,LatLongList , tupleFilter) :
if tupleFilter[0] <= LatLongList[0] <= tupleFilter[1] and
tupleFilter[2] <= LatLongList[1] <= tupleFilter[3]:
return True
else:
return False
def writeLine(self,key,line):
filterDico[key][1].write(line)
def filteringProcess(dico):
myFilter = Filter()
while True:
try:
currentLine = readFile.readline()
except ValueError:
break
if len(currentLine) ==0: # Breaks at the end of the file
break
if len(currentLine) < 35: # Deletes wrong lines (too short)
continue
LatLongList = myFilter.LatLong(currentLine)
for key in dico:
if myFilter.LatLongFilter(LatLongList,dico[key][0]):
myFilter.writeLine(key,currentLine)
###########################################################################
# Main
###########################################################################
# Open read files:
readFile = open(config.readFileList[LauncherCount.LauncherCount][1], 'r')
# Generate writing files:
pathDico = {}
filterDico = config.filterDico
# Create outputs
for key in filterDico:
output_Name = config.readFileList[LauncherCount.LauncherCount][0][:-4]
+ '_' + key +'.log'
pathDico[output_Name] = config.writingFolder + output_Name
filterDico[key] = [filterDico[key],open(pathDico[output_Name],'w')]
p = []
CPUCount = multiprocessing.cpu_count()
CPURange = range(CPUCount)
startingTime = time.localtime()
if __name__ == '__main__':
### Create and start processes:
for i in CPURange:
p.append(multiprocessing.Process(target = filteringProcess ,
args = (filterDico,)))
p[i].start()
### Kill processes:
while True:
if [p[i].is_alive() for i in CPURange] == [False for i in CPURange]:
readFile.close()
for key in config.filterDico:
config.filterDico[key][1].close()
print(key,"is Done!")
endTime = time.localtime()
break
print("Process started at:",startingTime)
print("And ended at:",endTime)
To process groups of files in sequence while working on files within a group in parallel:
#!/usr/bin/env python
from multiprocessing import Pool
def work_on(args):
"""Process a single file."""
i, filename = args
print("working on %s" % (filename,))
return i
def files():
"""Generate input filenames to work on."""
#NOTE: you could read the file list from a file, get it using glob.glob, etc
yield "inputfile1"
yield "inputfile2"
def process_files(pool, filenames):
"""Process filenames using pool of processes.
Wait for results.
"""
for result in pool.imap_unordered(work_on, enumerate(filenames)):
#NOTE: in general the files won't be processed in the original order
print(result)
def main():
p = Pool()
# to do "successive" multiprocessing
for filenames in [files(), ['other', 'bunch', 'of', 'files']]:
process_files(p, filenames)
if __name__=="__main__":
main()
Each process_file() is called in sequence after the previous one has been complete i.e., the files from different calls to process_files() are not processed in parallel.

Categories

Resources