ValueError: substring not found on lip reading code - python

This is what I have gotten while trying to run step 3 of this source code:
https://github.com/carykh/lazykh
Error:
Traceback (most recent call last):
File "C:\Users\User\Desktop\lazykh-main\code\scheduler.py", line 93, in
OS_nextIndex = originalScript.index(wordString,OS_IndexAt)+len(wordString)
ValueError: substring not found
Code:
import argparse
import os.path
import json
import numpy as np
import random
def addPhoneme(p, t):
global prevPhoneme
global f
if p != prevPhoneme:
strings[4] += (str.format('{0:.3f}', t)+",phoneme,"+p+"\n")
prevPhoneme = p
def pickNewPose(t):
global pose
global prevPose
global POSE_COUNT
global prevPhoneme
global f
newPose = -1
while newPose == -1 or newPose == pose or newPose == prevPose:
newPose = int(random.random()*POSE_COUNT)
prevPose = pose
pose = newPose
strings[3] += (str.format('{0:.3f}', t)+",pose,"+str(pose)+"\n")
prevPhoneme = "na"
strings = [""]*5
POSE_COUNT = 5
emotions = {}
emotions["explain"] = 0
emotions["happy"] = 1
emotions["sad"] = 2
emotions["angry"] = 3
emotions["confused"] = 4
emotions["rq"] = 5
mouthList = [["aa","a"],["ae","a"],["ah","a"],["ao","a"],["aw","au"],
["ay","ay"],["b","m"],["ch","t"],["d","t"],["dh","t"],
["eh","a"],["er","u"],["ey","ay"],["f","f"],["g","t"],
["hh","y"],["ih","a"],["iy","ay"],["jh","t"],["k","t"],
["l","y"],["m","m"],["n","t"],["ng","t"],["ow","au"],
["oy","ua"],["p","m"],["r","u"],["s","t"],["sh","t"],
["t","t"],["th","t"],["uh","u"],["uw","u"],["v","f"],
["w","u"],["y","y"],["z","t"],["zh","t"],
["oov","m"]] # For unknown phonemes, the stick figure will just have a closed mouth ("mmm")
mouths = {}
for x in mouthList:
mouths[x[0]] = x[1]
ENDING_PHONEME = "m"
STOPPERS = [",",";",".",":","!","?"]
parser = argparse.ArgumentParser(description='blah')
parser.add_argument('--input_file', type=str, help='the script')
args = parser.parse_args()
INPUT_FILE = args.input_file
f = open(INPUT_FILE+".txt","r+")
originalScript = f.read()
f.close()
f = open(INPUT_FILE+".json","r+")
fileData = f.read()
f.close()
data = json.loads(fileData)
WORD_COUNT = len(data['words'])
pose = -1
prevPose = -1
prevPhoneme = "na"
emotion = "0"
pararaph = 0
image = 0
OS_IndexAt = 0
pickNewPose(0)
strings[1] += "0,emotion,0\n"
strings[0] += "0,paragraph,0\n"
strings[2] += "0,image,0\n"
strings[4] += "0,phoneme,m\n"
for i in range(WORD_COUNT):
word = data['words'][i]
if "start" not in word:
continue
wordString = word["word"]
timeStart = word["start"]
OS_nextIndex = originalScript.index(wordString,OS_IndexAt)+len(wordString)
if "<" in originalScript[OS_IndexAt:]:
tagStart = originalScript.index("<",OS_IndexAt)
tagEnd = originalScript.index(">",OS_IndexAt)
if OS_nextIndex > tagStart and tagEnd >= OS_nextIndex:
OS_nextIndex = originalScript.index(wordString,tagEnd)+len(wordString)
nextDigest = originalScript[OS_IndexAt:OS_nextIndex]
if "\n" in nextDigest and data['words'][i-1]['case'] != 'not-found-in-audio' and (prevPhoneme == "a" or prevPhoneme == "f" or prevPhoneme == "u" or prevPhoneme == "y"):
addPhoneme("m", data['words'][i-1]["end"])
"""print(wordString)
print(str(OS_IndexAt)+", "+str(OS_nextIndex))
print(nextDigest)
print("")"""
pickedPose = False
for stopper in STOPPERS:
if stopper in nextDigest:
pickNewPose(timeStart)
pickedPose = True
if "<" in nextDigest:
leftIndex = nextDigest.index("<")+1
rightIndex = nextDigest.index(">")
emotion = emotions[nextDigest[leftIndex:rightIndex]]
strings[1] += (str.format('{0:.3f}', timeStart)+",emotion,"+str(emotion)+"\n")
prevPhoneme = "na"
if "\n\n" in nextDigest:
pararaph += 1
image += 1 # The line of the script advances 2 lines whenever we hit a /n/n.
strings[0] += (str.format('{0:.3f}', timeStart)+",paragraph,"+str(pararaph)+"\n")
prevPhoneme = "na"
if "\n" in nextDigest:
image += 1
strings[2] += (str.format('{0:.3f}', timeStart)+",image,"+str(image)+"\n")
prevPhoneme = "na"
if not pickedPose:
pickNewPose(timeStart) # A new image means we also need to have a new pose
phones = word["phones"]
timeAt = timeStart
for phone in phones:
timeAt += phone["duration"]
phoneString = phone["phone"]
if phoneString == "sil":
truePhone = "m"
else:
truePhone = mouths[phoneString[:phoneString.index("_")]]
if len(truePhone) == 2:
addPhoneme(truePhone[0], timeAt-phone["duration"])
addPhoneme(truePhone[1], timeAt-phone["duration"]*0.5)
else:
addPhoneme(truePhone, timeAt-phone["duration"])
OS_IndexAt = OS_nextIndex
f = open(INPUT_FILE+"_schedule.csv","w+")
for i in range(len(strings)):
f.write(strings[i])
if i < len(strings)-1:
f.write("SECTION\n")
f.flush()
f.close()
print(f"Done creating schedule for {INPUT_FILE}.")

The
ValueError: substring not found
occurs when you try to find the index of a substring in a string which does not contain it in the specified (or default) section, using the index function.
The index method takes 3 parameters:
value
start
end
and it searches for the value between start and end.
So, the error occurred because the substring was not found in the section where it was searched for. The line of
OS_nextIndex = originalScript.index(wordString,tagEnd)+len(wordString)
searches for wordString, starting from tagEnd and searches for the likes of
<span>yourwordstring</span>
, but in your case it was not found. You can do one of the following to solve the issue:
you can fix your input if it should always have a match for the search
you can handle the error when the index throws the error
you can use find instead, see https://bobbyhadz.com/blog/python-valueerror-substring-not-found
Note that find also has three parameters, as you can read from https://www.w3schools.com/python/ref_string_find.asp

Related

Python sys.argv indexerror list index out of range

When I use splitter_freqy.py to split my .dat filem I meet the following problem.
line 5 name=str(sys.argv[position+1])
indexerror list index out of range
I use ubuntu and the command is
python3.6 splitter_freq.py abc.dat
This is the splitter_freq.py
import sys
arguments = len(sys.argv) - 1
position = 1
name = str(sys.argv[position+1])
while (arguments > position): # changed >= to > to avoid error
bench_freq_names = []
path0 = "./%s/"%name+sys.argv[position]
position = position+1
print("the input file is ",path0)
c_0 = open(path0,'r')
header=c_0.readline()
l_0 = c_0.readline()
w_0 = l_0.split()
bench_name = w_0[1]
freq_value = w_0[3]
path_out = path0.split("/")
path_out = path_out[-1].split(".")
print("the benchmark name is ",bench_name)
print("the freq value is ",freq_value)
bench_freq_names.append([bench_name,freq_value])
output_file = "./%s/"%name+path_out[0]+"_"+bench_name+"_"+freq_value+".txt"
m = open(output_file,'a')
print(header.rstrip('\n'),file=m)
print(l_0.rstrip('\n'),file=m)
for l_0 in c_0: #read lines one by one
w_0 = l_0.split()
if (w_0[1] == bench_name and w_0[3] == freq_value):
print(l_0.rstrip('\n'),file=m)
else: #new_bench_name or new freq_value
m.close() #close file
bench_name = w_0[1] #update bench
freq_value = w_0[3] #update freq
print("the benchmark name is ",bench_name)
print("the freq value is ",freq_value)
output_file = "./%s/"%name+path_out[0]+"_"+bench_name+"_"+freq_value+".txt"
m = open(output_file,'a')
if [bench_name,freq_value] not in bench_freq_names:
bench_freq_names.append([bench_name,freq_value])
print(header.rstrip('\n'),file=m)
print(l_0.rstrip('\n'),file=m)
c_0.close()
m.close()

obfuscation of a text file using python - by reversing the words and inserting a specific number of random characters between them

Beginner Coding problem I am supposed to write a code that reverses the contents of a file and then inserts a number of random characters based on a strength the user chooses. It then creates a new file containing the obstructed file.
For example, if the user chooses strength = 2, it will insert 2 random characters between each letter in the text file: The cat sits ---> sgyt6gilns t7faxdc e3dh1kT
Right now my program inserts too many characters in between and I can't figure out why.
This is what it's doing:
input: CAT
Output of strength = 1: TeAEADQoC
import string
import random
def getRandomChar():
alpha = string.ascii_letters + string.digits
return random.choice(alpha)
def randomString(EncrypStrength):
count = 0
result = ''
while count < len(EncrypStrength):
result += getRandomChar()
count += 1
return result
def ReverseString(OrigFile):
return OrigFile[::-1]
def LineEncrypt(line, EncrypStrength):
EncrypStrength = ReverseString(line)
index = 0
newline = EncrypStrength[index]
index += 1
while index < len(EncrypStrength):
newline += randomString(EncrypStrength)
newline += EncrypStrength[index]
index += 1
return newline
def main():
OrigFile =input('Original File Name:')
EncryptedFile = input("obfuscated File Name:")
EncrypStrength = int(input('Enter the Encryption Strength:'))
Orig = open(OrigFile, 'r')
Encrypted = open(EncryptedFile, 'w')
line = Orig.readline()
while line!= '':
encryptLine = LineEncrypt(line, EncrypStrength)
Encrypted.write(encryptLine +"\n")
line = Orig.readline()
Orig.close()
Encrypted.close()
if __name__=="__main__":
main()
In Line Encrypt method you are using incorrectly Encrypt Strength, you are overriding the number of characters to put as EncryptStrength with reversed line.
def LineEncrypt(line, EncrypStrength):
reversedString = ReverseString(line)
index = 0
newline = reversedString[index]
index += 1
while index < len(reversedString):
newline += randomString(EncrypStrength)
newline += reversedString[index]
index += 1
You are confusing EncrypStrength and overriding it as Ritesh mentioned.
Here is the full corrected code, I hope it will work as you expected.
import string
import random
def getRandomChar():
alpha = string.ascii_letters + string.digits
return random.choice(alpha)
def randomString(EncrypStrength):
count = 0
result = ''
while count < EncrypStrength:
result += getRandomChar()
count += 1
return result
def ReverseString(OrigFile):
return OrigFile[::-1]
def LineEncrypt(line, EncrypStrength):
RevStr = ReverseString(line)
index = 0
newline = RevStr[index]
index += 1
while index < len(RevStr):
newline += randomString(EncrypStrength)
newline += RevStr[index]
index += 1
return newline
def main():
OrigFile =input('Original File Name:')
EncryptedFile = input("obfuscated File Name:")
EncrypStrength = int(input('Enter the Encryption Strength:'))
Orig = open(OrigFile, 'r')
Encrypted = open(EncryptedFile, 'w')
line = Orig.readline()
while line!= '':
encryptLine = LineEncrypt(line, EncrypStrength)
Encrypted.write(encryptLine +"\n")
line = Orig.readline()
Orig.close()
Encrypted.close()
if __name__=="__main__":
main()

fastest way to check a .rar file password - other than rarfile extractall

So i have written a little .rar password "cracker" based on tutorials, using the code underneath. It works fine, but is very slow when the file size is big. The best reason i could find is, that ever so often when you put in a wrong password, it extracts the whole file, before refusing the password. With small files that is not a big problem, but with big files it slows the process a lot.
Is there a way to just check a hashed version of the password against a iterated hash?
import itertools
import rarfile
import time
rarfile.UNRAR_TOOL = "path"
rar = rarfile.RarFile("path")
done = False
n = 0
inputList = ["A","B","1","2"]
class h():
startword = ""
rep = 1
start = 0
itrTot = 0
f = open("save.txt")
for x,each in enumerate(f):
if x == 0:
h.rep = int(each)
else:
h.start = int(each)-3
f.close()
if h.start < 0:
h.start = 0
h.itrTot = len(inputList)**h.rep
def pw_guess():
res = itertools.product(inputList, repeat=h.rep)
for guess in res:
yield guess
start_time = time.time()
while True:
guess_generator = pw_guess()
for guess in guess_generator:
n += 1
if h.startword == "":
h.startword = guess
else:
if guess == h.startword:
h.rep += 1
n = 0
h.itrTot = len(inputList)**h.rep
h.start = 0
print("next rotation, itr rep: "+str(h.rep))
h.startword = ""
break
if n < h.start:
continue
txt = f"({n}/{h.itrTot}, {round((100/h.itrTot)*n,2)}%) - {h.rep}: {''.join(guess)}"
print(txt)
try:
rar.extractall(path="path",members=None,pwd=''.join(guess))
print("Pass found!")
print(str(n) + " - " + str(h.rep) + ": " + str(''.join(guess)))
done = True
txt2 = f"({n}/{h.itrTot}, {round((100/h.itrTot)*n,2)}%) - {h.rep}: {''.join(guess)}\n"
f = open("pass.txt", "a")
f.write(txt2)
f.close()
break
except:
f = open("save.txt", "w")
f.write(str(h.rep) + "\n" + str(n))
f.close()
if done:
end_time = time.time()
break
print("End time: " + str(end_time-start_time))
John the ripper is the answer. +20k passwords checked in 2 minutes. But using parts of the script for wordlist generation, is still very fast and functional.
wordlist generator i used:
import itertools
inputList = ["A","B","C","D","1","2","3","4","5"]
itr = 7
WL_path = "path"
f = open(WL_path,"w")
f.write("")
f.close()
class h():
startword = ""
rep = 1
itrTot = 0
txt = ""
h.itrTot = len(inputList)**itr
print("Wordlist length: " + str(h.itrTot))
def pw_guess():
res = itertools.product(inputList, repeat=h.rep)
for guess in res:
yield guess
while True:
guess_generator = pw_guess()
for guess in guess_generator:
if h.startword == "":
h.startword = guess
else:
if guess == h.startword:
h.rep += 1
print("next rotation, itr rep: " + str(h.rep))
h.startword = ""
break
h.txt = ''.join(guess)
f = open(WL_path, "a")
f.write(h.txt+"\n")
f.close()
if h.rep > itr:
break

Python: object of type '_io.TextIOWrapper' has no len()

I keep getting the error when running my code:
TypeError: object of type '_io.TextIOWrapper' has no len() function
How do I get it to open/read the file and run it through the loop?
Here's a link to the file that I am trying to import:
download link of the DNA sequence
def mostCommonSubstring():
dna = open("dna.txt", "r")
mink = 4
maxk = 9
count = 0
check = 0
answer = ""
k = mink
while k <= maxk:
for i in range(len(dna)-k+1):
sub = dna[i:i+k]
count = 0
for i in range(len(dna)-k+1):
if dna[i:i+k] == sub:
count = count + 1
if count >= check:
answer = sub
check = count
k=k+1
print(answer)
print(check)
The problem occurs due to the way you are opening the text file.
You should add dna = dna.read() to your code.
so your end code should look something like this:
def mostCommonSubstring():
dna = open("dna.txt", "r")
dna = dna.read()
mink = 4
maxk = 9
count = 0
check = 0
answer = ""
k = mink
while k <= maxk:
for i in range(len(dna)-k+1):
sub = dna[i:i+k]
count = 0
for i in range(len(dna)-k+1):
if dna[i:i+k] == sub:
count = count + 1
if count >= check:
answer = sub
check = count
k=k+1
print(answer)
print(check)
#tfabiant : I suggest this script to read and process a DNA sequence.
To run this code, in the terminal: python readfasta.py fastafile.fasta
import string, sys
##########I. To Load Fasta File##############
file = open(sys.argv[1])
rfile = file.readline()
seqs = {}
##########II. To Make fasta dictionary####
tnv = ""#temporal name value
while rfile != "":
if ">" in rfile:
tnv = string.strip(rfile)
seqs[tnv] = ""
else:
seqs[tnv] += string.strip(rfile)
rfile = file.readline()
##############III. To Make Counts########
count_what = ["A", "T", "C", "G", "ATG"]
for s in seqs:
name = s
seq = seqs[s]
print s # to print seq name if you have a multifasta file
for cw in count_what:
print cw, seq.count(cw)# to print counts by seq

compare an exact word with the txt file

i am trying to get the exact word match from my file along with their line no.
like when i search for abc10 it gives me all the possible answers e.g abc102 abc103 etc
how can i limitize my code to only print what i commanded..
here is my code!
lineNo = 0
linesFound = []
inFile= open('rxmop.txt', 'r')
sKeyword = input("enter word ")
done = False
while not done :
pos = inFile.tell()
sLine = inFile.readline()
if sLine == "" :
done = True
break
if (sLine.find( sKeyword ) != -1):
print ("Found at line: "+str(lineNo))
tTuple = lineNo, pos
linesFound.append( tTuple )
lineNo = lineNo + 1
done = False
while not done :
command = int( input("Enter the line you want to view: ") )
if command == -1 :
done = True
break
for tT in linesFound :
if command == tT[0] :
inFile.seek( tT[1] )
lLine = inFile.readline()
print ("The line at position " + str(tT[1]) + "is: " + lLine)
"like when i search for abc10 it gives me all the possible answers e.g abc102 abc103 etc"
You split each record and compare whole "words" only.
to_find = "RXOTG-10"
list_of_possibles = ["RXOTG-10 QTA5777 HYB SY G12",
"RXOTG-100 QTA9278 HYB SY G12"]
for rec in list_of_possibles:
words_list=rec.strip().split()
if to_find in words_list:
print "found", rec
else:
print " NOT found", rec

Categories

Resources