Python stange behaviour when acessing list elements - python

This small scripts makes exactly what I need.
#!/usr/bin/python
import os
import fileinput
import sys
import shutil
import glob
import time
def replaceAll1(files,searchExp,replaceExp):
for line in fileinput.input(files, inplace=1):
if searchExp in line:
line = line.replace(searchExp,replaceExp)
sys.stdout.write(line)
param1 = [1,2,3]
param2 = [1,2,3]
param3 = [1,2,3]
for i in xrange(len(param1)):
for ii in xrange(len(param2)):
for iii in xrange(len(param3)):
os.system("cp -a cold.in input.in")
old_param1 = "param1 = 1"
old_param2 = "param2 = 1"
old_param3 = "param3 = 1"
new_param1 = "param1 = " + str(param1[i])
new_param2 = "param2 = " + str(param2[ii])
new_param3 = "param3 = " + str(param3[iii])
replaceAll1('input.in',old_param1,new_param1)
replaceAll1('input.in',old_param2,new_param2)
replaceAll1('input.in',old_param3,new_param3)
time.sleep(4)
It enters in a configuration file and replaces sequentially the input parameters according to the lists that are accessed by the loop indexes. It is simple a combination of all the three parameters between each other.
# Input file
param1 = 1 # --- Should be [1,2,3]
param2 = 1 # --- Should be [1,2,3]
param3 = 1 # --- Should be [1,2,3]
The problem is that his big brother is not behaving like it. When it loops through the lists, it gets lost in scheme = 2 and puts dissp_scheme = 2 (freezed) when it should be dissp_scheme = 1. I printed out every single variable that goes inside the function replaceAll marked with comments but when I turn on the other calls it mess up everything. Here is the script.
#!/usr/bin/python
import os
import fileinput
import sys
import shutil
import glob
import time
os.chdir(os.getcwd())
# Replaces the input file parameters
def replaceAll(files,searchExp,replaceExp):
for line in fileinput.input(files, inplace=1):
if searchExp in line:
line = line.replace(searchExp,replaceExp)
sys.stdout.write(line)
# Gets a number inside my input file.
def get_parameter(variable,file_name):
f = open(file_name,'r').readlines()
for i in xrange(len(f)):
index = f[i].find(variable)
if index != -1:
pre_found = f[i].split('=')[1]
return pre_found
# Gets the discretization scheme name.
def get_sheme(number):
if number == 1:
return "Simple Centered Scheme"
elif number == 2:
return "Lax-Wendroff Scheme"
elif number == 3:
return "MacCormack Scheme"
elif number == 4:
return "Beam-Warming Scheme"
elif number == 5:
return "Steger-Warming 1st Order Scheme"
elif number == 6:
return "Steger-Warming 2nd Order Scheme"
elif number == 7:
return "Van Leer 1st Order Scheme"
elif number == 8:
return "Van Leer 2nd Order Scheme"
elif number == 9:
return "Roe Scheme"
elif number == 10:
return "AUSM Scheme"
# Gets the dissipation scheme name.
def get_dissip(number):
if number == 1:
return "Pullian Non-Linear dissipation"
elif number == 2:
return "Second difference dissipation"
elif number == 3:
return "Fourth difference dissipation"
elif number == 4:
return "B&W dissipation"
# Generates the density gnuplot scripts.
def gnuplot(variable,pressure_ratio,scheme,dissip_scheme):
#gnuplot('Density',10,get_sheme(3),'Pullian')
# Building name of the output file.
outFileName = variable.lower() + '_ratio' + str(int(pressure_ratio)) + '_' + scheme.replace(" ","") + '_dissp' + dissip_scheme.replace(" ","") + '.tex'
gnuFileName = variable.lower() + '_ratio' + str(int(pressure_ratio)) + '_' + scheme.replace(" ","") + '_dissp' + dissip_scheme.replace(" ","") + '.gnu'
# Build title of the plot
title = 'Analytical vs Numerical | ' + scheme
f = open(gnuFileName,'w')
f.write("set term cairolatex monochrome size 15.0cm, 8cm\n")
f.write('set output "' + outFileName + '"\n')
f.write("set grid\n")
f.write('set xtics font "Times-Roman, 10\n')
f.write('set ytics font "Times-Roman, 10\n')
f.write('set xlabel "x position" center\n')
f.write('set ylabel "' + variable + '" center\n')
f.write('set title "Analytical vs Numerical Results | ' + variable + '" \n')
f.write('set pointsize 0.5\n')
f.write('set key font ",10"\n')
fortran_out_analytical = 'a' + variable.lower() + '.out'
fortran_out_numerical = variable.lower() + 'Output.out'
f.write('plot "' + fortran_out_analytical +'" u 1:2 with linespoints lt -1 lw 1 pt 4 title "Analytical",\\\n')
f.write( '"' + fortran_out_numerical + '" u 1:2 with lines lw 5 title "Numerical"\n')
f.close()
# Generate latex code.
def generate_latex(text_image_file,caption):
latex.write("\\begin{figure}[H]\n")
latex.write(" \centering\n")
latex.write(" \input{" + text_image_file + "}\n")
latex.write(" \caption{"+ caption +"}\n")
latex.write(" \label{fig:digraph}\n")
latex.write("\\end{figure}\n")
latex.write("\n\n")
# -----------------------------------------------------------------------
# Main loop.
# -----------------------------------------------------------------------
pressure_ratios = [5.0]
schemes = [1,2,3]
dissips = [1,2,3]
# Define replace lines for replace all.
scheme_line = "scheme = "
dissip_line = "dissp_scheme = "
# Open Latex export file.
latex = open("bizu.txt",'w')
i = 0
# ratios.
for i in xrange(len(pressure_ratios)):
print "----------------------------------------"
print " + Configuring File for pressure ratio: " + str(pressure_ratios[i])
print "----------------------------------------\n"
# Schemes
for jj in xrange(len(schemes)):
print " + Configuring file for scheme: " + get_sheme(schemes[jj]) + "\n"
for kkk in xrange(len(dissips)):
print " + Configuring file for dissip: " + get_dissip(dissips[kkk])
# We always work with a brand new file.
os.system("rm input.in")
os.system("cp -a cold.in input.in")
# Replace pressures.
p1_line_old = 'p1 = 5.0d0'
rho1_line_old = 'rho1 = 5.0d0'
p1_line_new = 'p1 = ' + str(pressure_ratios[i]) + 'd0'
rho1_line_new = 'rho1 = ' + str(pressure_ratios[i]) + 'd0'
replaceAll('input.in',p1_line_old,p1_line_new)
replaceAll('input.in',rho1_line_old,rho1_line_new)
# Replace discretization scheme.
old_scheme = scheme_line + "1"
new_scheme = scheme_line + str(schemes[jj])
#==========================================================
# This call is messing everything up when scheme turns to 2
#==========================================================
replaceAll('input.in',old_scheme,new_scheme)
# Replace dissipation scheme.
old_dissp_scheme = dissip_line + "1"
new_dissp_scheme = dissip_line + str(dissips[kkk])
print p1_line_old
print new_scheme
print new_dissp_scheme
replaceAll('input.in',old_dissp_scheme, new_dissp_scheme)
time.sleep(3)
# ### Calling program
# os.system("./sod")
#
latex.close()
And the input file that the it works on is:
&PAR_physical
p1 = 5.0d0
p4 = 1.0d0
rho1 = 5.0d0
rho4 = 1.0d0
fgamma = 1.4d0
R_const = 287.0d0
F_Cp = 1004.5
F_Cv = 717.5
/
&PAR_geometry
total_mesh_points = 1001
start_mesh_point = -5.0d0
final_mesh_point = 5.0d0
print_step = 100
/
&PAR_numeric
scheme = 3
iterations = 10000
time_step = 0.0001d0
/
&PAR_dissip
dissp_scheme = 3
dissip_omega = 0.5d0
/
Thank you all !

Related

How do I make my number to excel coordinate converter work without if statements?

I have made a python program that recreates an image in excel by filling cells with different shades of red, green and blue. I have made this method to convert a number to a x coordinate in excel:
alph = [i for i in string.ascii_uppercase]
alph.insert(0,'')
def numToExcel(x):
stri = ''
row = x
rdiv26 = row//26
rdiv676 = row//676
stri += alph[(rdiv676)-((rdiv676)//676)*676]
stri += alph[(rdiv26)-((rdiv26)//26)*26]
stri += alph[1+(row%26)]
return stri
I added an empty space at the beginning so the program prints, for example, B3 instead of AAB3. However this means it does not reach for the letter Z. If i do x // 27 the image comes out wavy and it does not fix the issue.
This is my entire program:
import string
import time
import math
import openpyxl
from PIL import Image
from openpyxl import Workbook
from openpyxl.styles import Color, PatternFill, Font, Border
alph = [i for i in string.ascii_uppercase]
alph.insert(0,'')
def rgb_to_hex(r, g, b):
return '%02x%02x%02x' % (r,g,b)
def numToExcel(x):
stri = ''
row = x
rdiv26 = row//26
rdiv676 = row//676
stri += alph[(rdiv676)-((rdiv676)//676)*676]
stri += alph[(rdiv26)-((rdiv26)//26)*26]
stri += alph[1+(row%26)]
return stri
wb = Workbook()
ws = wb.active
im = Image.open('input.jpg')
pix = im.load()
x,y=im.size
start_time = time.time()
ct=0
for j in range(1,y-1):
i = 0
while i < 3*(x-1):
#Debug shi: print("["+"#"*math.floor(20*(ct/(x*3*y)))+"-"*(20-math.floor(20*ct/(x*3*y)))+"] " + str(math.floor(ct/(x*3*y)*100))+"% "+ str(ct) + "/" + str(x*y))
#Debug Shi: print("["+"#"*math.floor(20*(ct/(x*3*y)))+"-"*(20-math.floor(20*ct/(x*3*y)))+"] " + str(math.floor(ct/(x*3*y)*100))+"% "+ str(ct) + "/" + str(x*y*3) + " | (" + numToExcel(i) + "," + str(j) + ") -> (" + numToExcel(i+2) + "," + str(j) +") | " + str(pix[i/3,j][0])+','+str(pix[i/3,j][1])+','+str(pix[i/3,j][2]))
ws[numToExcel(i)+str(j)].fill = PatternFill("solid", fgColor=rgb_to_hex(pix[i/3,j][0],0,0))
ws[numToExcel(i+1)+str(j)].fill = PatternFill("solid", fgColor=rgb_to_hex(0,pix[i/3,j][1],0))
ws[numToExcel(i+2)+str(j)].fill = PatternFill("solid", fgColor=rgb_to_hex(0,0,pix[i/3,j][2]))
i += 3
ct += 3
#Progress Bar and stuff
print("\n"*100)
print("["+"#"*math.floor(20*(ct/(x*3*y)))+"-"*(20-math.floor(20*ct/(x*3*y)))+"] " + str(math.floor(ct/(x*3*y)*100))+"% "+ str(ct) + "/" + str(x*y*3) + " | Row " + str(j))
wb.save("sample2.xlsx")
print("--- Complete! ---\n--- %s seconds ---" % (time.time() - start_time))
And here is the output:
Please ignore any shitty code/math lol. I do not want to use any if statements because I am scared it would slow down the program.

Problems with calculating gcskews using python

everyone.
I have some problems with calculating gcskews in python.
My 2 major inputs are fasta file and bed file.
Bed file has columns of gn(0), gene_type(1), gene name(2), chromosome(3), strand(4), num(5), start(6).(These numbers are index numbers in python.) Then I am trying to use some functions which can calculate gcskews of sense and antisense strand from the start site of each gene. The window is 100bp and these are the functions.
import re
import sys
import os
# opening bed file
content= []
with open("gene_info.full.tsv") as new :
for line in new :
content.append(line.strip().split())
content = content[1:]
def fasta2dict(fil):
dic = {}
scaf = ''
seq = []
for line in open(fil):
if line.startswith(">") and scaf == '':
scaf = line.split(' ')[0].lstrip(">").replace("\n", "")
elif line.startswith(">") and scaf != '':
dic[scaf] = ''.join(seq)
scaf = line.split(' ')[0].lstrip(">").replace("\n", "")
seq = []
else:
seq.append(line.rstrip())
dic[scaf] = ''.join(seq)
return dic
dic_file = fasta2dict("full.fa")
# functions for gc skew
def GC_skew_up(strand, loc, seq, window = 100) : # need -1 for index
values_up = []
loc = loc - 1
if strand == "+" :
sp_up = seq[loc - window : loc]
g_up = sp_up.count('G') + sp_up.count('g')
c_up = sp_up.count('C') + sp_up.count('c')
try :
skew_up = (g_up - c_up) / float(g_up + c_up)
except ZeroDivisionError:
skew_up = 0.0
values_up.append(skew_up)
elif strand == "-" :
sp_up = seq[loc : loc + window]
g_up = sp_up.count('G') + sp_up.count('g')
c_up = sp_up.count('C') + sp_up.count('c')
try :
skew_up = (c_up - g_up) / float(g_up + c_up)
except ZeroDivisionError:
skew_up = 0.0
values_up.append(skew_up)
return values_up
def GC_skew_dw(strand, loc, seq, window = 100) :
values_dw = []
loc = loc - 1
if strand == "+" :
sp_dw = seq[loc : loc + window]
g_dw = sp_dw.count('G') + sp_dw.count('g')
c_dw = sp_dw.count('C') + sp_dw.count('c')
try :
skew_dw = (g_dw - c_dw) / float(g_dw + c_dw)
except ZeroDivisionError:
skew_dw = 0.0
values_dw.append(skew_dw)
elif strand == "-" :
sp_dw = seq[loc - window : loc]
g_dw = sp_dw.count('G') + sp_dw.count('g')
c_dw = sp_dw.count('C') + sp_dw.count('c')
try :
skew_dw = (c_dw - g_dw) / float(g_dw + c_dw)
except ZeroDivisionError:
skew_dw = 0.0
values_dw.append(skew_dw)
return values_dw
As I said, I want to calculate the gcskews for 100bp of strands from the start site of genes.
Therefore, I made codes that get the chromosome name from the bed file and get the sequence data from the Fasta file.
Then according to gene name and strand information, I expected that codes will find the correct start site and gcskew for 100bp window will be calculated.
However, when I run this code, gcskew of - strand is wrong but + strand is correct. (I got correct gcskew data and I used it.)
Gcskews are different from the correct data, but I don't know what is the problem.
Could anyone tell me what is the problem of this code?
Thanks in advance!
window = 100
gname = []
up = []
dw = []
for match in content :
seq_chr = dic_file[str(match[3])]
if match[4] == "+" :
strand = match[4]
new = int(match[6])
sen_up = GC_skew_up(strand, new, seq_chr, window = 100)
sen_dw = GC_skew_dw(strand, new, seq_chr, window = 100)
gname.append(match[2])
up.append(str(sen_up[0]))
dw.append(str(sen_dw[0]))
if match[4] == "-" :
strand = match[4]
new = int(match[6])
an_up = GC_skew_up(strand, new, seq_chr, window = 100)
an_dw = GC_skew_dw(strand, new, seq_chr, window = 100)
gname.append(match[2])
up.append(str(an_up[0]))
dw.append(str(an_dw[0]))
tot = zip(gname, up, dw)

Why are Lists causing problems

So I am working on a certain code to modify a text file. When I use this function individually, it works perfectly
TextRotation.rotTextC("cv.txt")
But when I use it in batch as a list like this
def files_LTXT(pathF):
return glob.glob(pathF + "*" + ".txt")
for i in range (len(listFileTXT)):
TextRotation.rotTextC(listFileTXT[i])
IT gives the following error:
File "C:\Users\Administrator\PycharmProjects\openCV\TextRotation.py", line
9, in rotLineC
0
valueObj = int(lineStr[c1])
0.472917 0.713281 0.845833 0.376563
IndexError: string index out of range
Function rotLineC is as follows:
def rotLineC(lineStr, c1):
if len(lineStr) > 2:
valueObj = int(lineStr[c1])
print(valueObj)
valueXC = float(lineStr[(c1+2):(c1+10)])
valueYC = float(lineStr[(c1+11):(c1+19)])
valueW = float(lineStr[(c1+20):(c1+28)])
valueH = float(lineStr[(c1+29):(c1+37)])
# print(valueXC)
# print(valueYC)
# print(valueW)
# print(valueH)
nValueXC = round(1 - valueYC, 6)
nValueYC = round(valueXC, 6)
nValueW = round(valueH, 6)
nValueH = round(valueW, 6)
rotString = str(int(valueObj)) + " " + str(nValueXC) + " " + \
str(nValueYC) + " " + str(nValueW) + " " + str(nValueH)
print(str(nValueXC) + " " + str(nValueYC) + " " + str(nValueW) + " " + str(nValueH))
print(rotString)
return rotString
This function works fine!
for i in range (len(listFileJPG)):
ImageRotation.rotImage(listFileJPG[i])
Mind to include the / to the end of the path! (I am assuming a UNIX environment here)
If the path is 'dev/my_pat', for example, your function will fail. The path must end with a /. You can it to your function:
...
if pathF[-1] != '/':
return glob.glob(pathF + "/*.txt")
...
Also, do not iterate using indices, use the pythonic way!
for file in listFileTXT(my_path):
TextRotation.rotTextC(file)

for-loop through a list with an if statement, only last position of list runs if statements

Here is my list, So models [0], names[0], mac_addresses[0] would all be required to calculate my mac address that I need
models = ["MR18","MR32", "MR18"]
names = ["David", "Bob", "Frank"]
mac_addresses = ["00:18:0A:2C:3D:5F", "00:18:0A:2d:3c:5F", "00:18:0A:2A:3B:5F"]
These are the functions that should run depending on which if statement is True.
def calc18(mac_address, name, mr):
#Mac Address Breakdown
print(name)
mac_calc = mac_address[:2]
mac_extractor_front = mac_address[2:6]
mac_extractor_back = mac_address[8:]
flag = True
First_Pos_Hex = first_hex_calc(mac_calc, mr, flag)
#Initial Mac Addresses
list_2_4.append(mac_address)
list_5.append(First_Pos_Hex + mac_extractor_front + mr_18_5ghz + mac_extractor_back)
flag = False
#First Time Calculation hex updated
hex_updater = first_hex_calc(mac_calc, mr, flag)
list_2_4.append(hex_updater + mac_extractor_front + mr_18_24ghz + mac_extractor_back)
list_5.append(hex_updater + mac_extractor_front + mr_18_5ghz + mac_extractor_back)
#Update self, after appending mac addresses
for i in range(15):
counter = i + 1
hex_updater = hex_calc(hex_updater, mr)
list_2_4.append(hex_updater + mac_extractor_front + mr_18_24ghz + mac_extractor_back)
list_5.append(hex_updater + mac_extractor_front + mr_18_5ghz + mac_extractor_back)
print(str(counter) + ") 2.4ghz: " + list_2_4[i] + "\t" + str(counter) + ") 5 Ghz: " + list_5[i] )
for i in range(len(list_2_4)):
writer(name, mac_address, list_2_4[i], list_5[i], i)
def calc32(mac_address, name):
#Mac Address Breakdown
mac_calc = mac_address[15:17]
mac_extractor_front = mac_address[:6]
mac_extractor_back = mac_address[8:15]
#Initial Mac Addresses
list_2_4.append(mac_extractor_front + mr_32_24ghz + mac_extractor_back + mac_calc)
list_5.append(mac_extractor_front + mr_32_5ghz + mac_extractor_back + mac_calc)
#Update self, after appending mac addresses
for i in range(15):
counter = i + 1
mac_calc = hex_calc(mac_calc, mr)
list_2_4.append(mac_extractor_front + mr_32_24ghz + mac_extractor_back + mac_calc)
list_5.append(mac_extractor_front + mr_32_5ghz + mac_extractor_back + mac_calc)
print(str(counter) + ") 2.4ghz: " + list_2_4[i] + "\t" + str(counter) + ") 5 Ghz: " + list_5[i] )
writer(name, mac_address, list_2_4[i], list_5[i], i)
Now I have this for-loop should iterate through each position in models, which would then check the if statements and execute a specific function
so, the first iteration models[0] which has "MR18" stored inside, then would check the if statements and execute calc18(),
after calc18() function finishes its execution would return to the for-loop and run the next iteration which would be models[1] which has "MR32", then again would check the if statements and execute calc32(). Then when calc32() finishes executing move on to models[2]
But in my case when I run the code, models[2] which represents "MR18" runs through the if statement, completely ignoring models[0] and models[1]
for num, mod in enumerate(models):
print(mod)
if mod == "MR18":
print("I have entered")
calc18(mac_addresses[num], names[num], mod)
if mod == "MR32":
print("I have entered 32")
calc32(mac_addresses[num], names[num])
If this is still confusing please let me know, I'm not sure if pictures are allowed, I can draw a visual example if that's allowed :(
As your code is not runnable on my system, I run a simpler version of code to illustrate that your code should indeed go through each element of models.
models = ["MR18","MR32", "MR18"]
names = ["David", "Bob", "Frank"]
mac_addresses = ["00:18:0A:2C:3D:5F", "00:18:0A:2d:3c:5F", "00:18:0A:2A:3B:5F"]
def calc18(mac_address, name, mr):
print("calc18 running")
print(mac_address)
print(name)
print(mr)
def calc32(mac_address, name):
print("calc32 running")
print(mac_address)
print(name)
for num, mod in enumerate(models):
print(num)
print(mod)
if mod == "MR18":
print("I have entered")
calc18(mac_addresses[num], names[num], mod)
if mod == "MR32":
print("I have entered 32")
calc32(mac_addresses[num], names[num])
produces:
0
MR18
I have entered
calc18 running
00:18:0A:2C:3D:5F
David
MR18
1
MR32
I have entered 32
calc32 running
00:18:0A:2d:3c:5F
Bob
2
MR18
I have entered
calc18 running
00:18:0A:2A:3B:5F
Frank
MR18
The code does go through each element of the list. The problem shouldn't be with the for loop.
I see writer in your code, of which you might be writing to a file. Do check if it is set to overwrite mode or append mode.

PLY using dynamic tokens

I'm writing a program that can parse math papers written in .tex files. Here are what I want:
The program is supposed to detect the beginning, the end, sections, subsections, subsubsections, theorems, lemmas, definitions, conjectures, corollaries, proposition, exercises, notations and examples in a math paper and ignore the rest of the contents to produce a summary.
In the beginning the program is supposed to retain all characters until reaching token MT. In this case the lever should preserve the token and enter ig mode. Then it should ignore all characters unless it detects a theorem/lemma/definition/conjecture/corollary/example/exercise/notation/proposition, in which case it temporarily enters the INITIAL mode and retain it or a (sub/subsub)section in which case it should temporarily enter the sec mode.
\newtheorem{<name>}{<heading>}[<counter>] and \newtheorem{<name>}[<counter>]{<heading>} are detected as TH ptext THCC ptext THC ptext and TH ptext THCS ptext THSC ptext THC respectively where ptext is a bunch of TEXT.
import sys
import logging
from ply.lex import TOKEN
if sys.version_info[0] >= 3:
raw_input = input
tokens = (
'BT', 'BL', 'BD', 'BCONJ', 'BCOR', 'BE', 'ET', 'EL', 'ED', 'ECONJ', 'ECOR', 'EE', 'SEC', 'SSEC', 'SSSEC', 'ES', 'TEXT','ITEXT','BIBS','MT','BN','EN','BEXE','EEXE','BP','EP','TH','THCS','THSC','THCC','THC',
)
states = (('ig', 'exclusive'), ('sec', 'exclusive'), ('th', 'exclusive'), ('tht','exclusive'),('thc','exclusive'))
logging.basicConfig(
level = logging.DEBUG,
filename = "lexlog.txt",
filemode = "w",
format = "%(filename)10s:%(lineno)4d:%(message)s"
)
log = logging.getLogger()
th_temp = ''
thn_temp = ''
term_dic = {'Theorem':'','Lemma':'','Corollary':'','Definition':'','Conjecture':'','Example':'','Exercise':'','Notation':'','Proposition':''}
idb_list = ['','','','','','','','','']
ide_list = ['','','','','','','','','']
bb = r'\\begin\{'
eb = r'\\end\{'
ie = r'\}'
def finalize_terms():
global idb_list
global ide_list
if term_dic['Theorem'] != '':
idb_list[0] = bb + term_dic['Theorem'] + ie
ide_list[0] = eb + term_dic['Theorem'] + ie
if term_dic['Lemma'] != '':
idb_list[1] = bb + term_dic['Lemma'] + ie
ide_list[1] = eb + term_dic['Lemma'] + ie
if term_dic['Corollary'] != '':
idb_list[2] = bb + term_dic['Corollary'] + ie
ide_list[2] = eb + term_dic['Corollary'] + ie
if term_dic['Definition'] != '':
idb_list[3] = bb + term_dic['Definition'] + ie
ide_list[3] = eb + term_dic['Definition'] + ie
if term_dic['Conjecture'] != '':
idb_list[4] = bb + term_dic['Conjecture'] + ie
ide_list[4] = eb + term_dic['Conjecture'] + ie
if term_dic['Example'] != '':
idb_list[5] = bb + term_dic['Example'] + ie
ide_list[5] = eb + term_dic['Example'] + ie
if term_dic['Exercise'] != '':
idb_list[6] = bb + term_dic['Exercise'] + ie
ide_list[6] = eb + term_dic['Exercise'] + ie
if term_dic['Notation'] != '':
idb_list[7] = bb + term_dic['Notation'] + ie
ide_list[7] = eb + term_dic['Notation'] + ie
if term_dic['Proposition'] != '':
idb_list[8] = bb + term_dic['Proposition'] + ie
ide_list[8] = eb + term_dic['Proposition'] + ie
print(idb_list)
print(ide_list)
Here are some of the parsing functions:
def t_TH(t):
r'\\newtheorem\{'
t.lexer.begin('th')
return t
def t_th_THCS(t):
r'\}\['
t.lexer.begin('thc')
return t
def t_tht_THC(t):
r'\}'
if term_dic.has_key(thn_temp) == False:
print(f"{thn_temp} is unknown!")
elif len(th_temp) == 0:
print(f"No abbreviation for {thn_temp} is found!")
else:
term_dic[thn_temp] = th_temp
print(f"The abbreviation for {thn_temp} is {th_temp}!")
th_temp = ''
thn_temp = ''
t.lexer.begin('INITIAL')
return t
def t_th_THCC(t):
r'\}\{'
t.lexer.begin('tht')
return t
def t_thc_THSC(t):
r'\]\{'
t.lexer.begin('tht')
return t
#TOKEN(idb_list[0])
def t_ig_BT(t):
t.lexer.begin('INITIAL')
return t
#TOKEN(ide_list[0])
def t_ET(t):
t.lexer.begin('ig')
return t
def t_INITIAL_sec_thc_TEXT(t):
r'[\s\S]'
return t
def t_th_TEXT(t):
r'[\s\S]'
th_temp = th_temp + t.value()
return t
def t_tht_TEXT(t):
r'[\s\S]'
thn_temp = thn_temp + t.value()
return t
def t_ig_ITEXT(t):
r'[\s\S]'
pass
import ply.lex as lex
lex.lex(debug=True, debuglog = log)
Here are the errors:
ERROR: /Users/CatLover/Documents/Python_Beta/TexExtractor/texlexparse.py:154: No regular expression defined for rule 't_ET'
I don't know why the regular expression defined for 't_ET' etc using #TOKEN do not work.
Ply is a parser generator. It takes your parser/lexer description and compiles a parser/lexer from it. You cannot change the description of the language during the parse.
In this particular case, you might be better off writing a streaming ("online") scanner. But if you want to use Ply, then you will be better off not trying to modify the grammar to ignore parts of the input. Just parse the entire input and ignore the parts you're not interested in. You'll probably find that the code is much simpler.

Categories

Resources