I am trying to make my own basic programming language. I have the following code in my smrlang.py file
from sys import *
tokens = []
def open_file(filename):
data = open(filename, "r").read()
return data
def smr(filecontents):
tok = ""
state = 0
string = ""
filecontents = list(filecontents)
for char in filecontents:
tok += char
if tok == " ":
if state == 0:
tok = ""
else:
tok = " "
elif tok == "PRINT":
tokens.append("PRINT")
tok = ""
elif tok == "\"":
if state == 0:
state = 1
elif state == 1:
print("STRING")
string = ""
state = 0
elif state == 1:
string += tok
print(tokens)
def run():
data = open_file(argv[1])
smr(data)
run()
And I have this in my one.smr file:
PRINT "HELLO WORLD"
The output should be something like PRINT STRING, but when I use the command python3 smrlang.py one.smr, the output is just PRINT. I am using Python 3
Debugging it in the head, I found the problem:
elif state == 1:
string += tok
You don't reset the token here. It will be aababcabcd instead of abcd and recognizing \ won't work (as it will be aababcabcd\).
This also causes the token to just be everything and it will never print.
Try changing it to:
elif state == 1:
string += tok
tok = ""
Output after fix:
> py -3 temp.py temp.txt
STRING
['PRINT']
Related
I am building a mini-coding language and I have run into an error. So I have this function lex() that checks what the current char is and appends it to tokens. So if char=1, then it would append NUM:1 to tokens. So I have lex() check for LPAREN and RPAREN, but the problem is, RPAREN gets appended 1 time too early. for example: is i enter 2*(2*2), the tokens array would be: ['NUM:2', 'OP:*', 'LPAREN(', 'NUM:2', 'OP:*', 'RPAREN)', 'NUM:2']. I need this to be fixed.
Code:
funCall = False
operators = ['+', '-', '*', '/']
# Lexer
def lex(line):
idx = 0
line = line.replace(" ", "")
num = ""
tok = ""
isNum = False
funCall = False
tokens = []
for char in line:
if char.isdigit():
isNum = True
num+=char
elif char in operators:
if isNum:
tokens.append(f"NUM:{num}")
num=""
isNum = False
tokens.append(f"OP:{char}")
elif char == "(" or char == ")":
if not funCall:
if char=="(":
tokens.append(f"LPAREN{char}")
elif char==")":
tokens.append(f"RPAREN{char}")
if isNum:
tokens.append(f"NUM:{num}")
return tokens
# Parser
#TODO: Append add all variables to variables.py, then whenever out.py would be reset, add variables.py to it
def parse(tokens):
equation = ""
if not tokens:
return "Syntax error"
print(tokens)
for tok in tokens:
if "NUM" in tok:
equation+=tok[4:]
elif "OP" in tok:
equation+=tok[3:]
elif "LPAREN" in tok:
equation+="("
elif "RPAREN" in tok:
equation+=")"
# try:
out = open("out.py","a")
out.write(equation)
out.write("\n")
out.flush()
os.fsync(out.fileno())
out.close()
return eval(equation)
# except:
# return f"Invalid equation {equation}"
while True:
data = input("KPP>")
print(parse(lex(data)))
This is my code for a programming problem in a CS course that involves tokenizing a string by its opening and closing parentheses.
def expand(S):
if "(" in S:
open, close = parentheses_pair(S)
open = int(int(open) + int(1))
samp = S[open, close]
currstr = samp
innerstr = ""
if "(" in samp:
open, close = parentheses_pair(S)
currstr = samp[:open]
innerstr = samp[open, close+1]
innerstr = expand(innerstr)
output = ""
for i in range(1, len(currstr), 2):
letter = currstr[i-1]
number = currstr[i]
output += letter*number
output += innerstr
sorted_out = sorted(output)
output = "".join(sorted_out)
return output
else:
return ""
def parentheses_pair(S):
counter = 0
openpar = S.find("(")
currind = S.find("(")
found = False
while not found:
if S[currind] == "(":
counter += 1
elif S[currind] == ")":
if counter == 1:
found = True
break
else:
counter -= 1
currind += 1
return int(openpar), int(currind)
When I used type on both open and close, <class 'int'> was returned by the terminal so I really don't know why it won't accept the variables as the string indices on samp = S[open, close].
I am making a language in python called PythonScript, which is a JavaScript and Python (not Python 3) mix.
I am adding variables, but when I do the following code I don't get the expected result.
var scuffles = hi!
I have tried to debug it, and as you will see in my code I made a debugging array, which I found to be empty. I looked up some tutorials, but none were covering what I am trying to do.
This is a portion of the file:
import os
import sys
programDebugingArray1 = []
functions = ["Console"]
functionCodes = [["out \"TEST\""]]
programVariables = {}
def lex(cm,stri,nums,toWake):
i = 0
if cm == "":
console()
elif cm == "var" or cm == "=":
pass
elif cm == "--vars":
print(programVariables)
elif cm == "--debugManual1":
print(programDebugingArray1)
else:
print("\""+cm+"\" was not recognized as a PythonScript command.")
console()
def console():
commandToRun = ""
tok = ""
string = ""
varValue = ""
newVarName = ""
Strings = []
Numbers = []
functionToCall = ""
state = 0
command = input()
for char in command:
tok += char
if state == 1:
string += char
elif state == 2:
if char == " ":
char = ""
newVarName += char
elif state == 3:
varValue += char
if (tok == "\"" or tok == "'") or (char == "\"" or char == "'"):
if state == 0:
state = 1
elif state == 1:
state = 0
string = string[0:len(string) - 1]
Strings.append(string)
string = ""
tok = ""
char = ""
elif tok == " ":
tok = ""
elif tok == "var" and state == 0:
state = 2
tok = ""
elif tok == "=" and state == 2:
state = 3
tok = ""
elif tok == "!" and state == 3:
programDebugingArray1.append(newVarName)
programDebugingArray1.append(varValue)
state == 0
programVariables[newVarName] = varValue
tok = ""
elif tok == "--vars" and state == 0:
commandToRun == "--vars"
tok = ""
elif tok == "--debugManual1" and state == 0:
commandToRun == "--debugManual1"
tok = ""
if commandToRun == "":
commandToRun = command
lex(commandToRun,Strings,Numbers,functionToCall)
console()
I expected the debug array to at least have something in it, but for some reason it and the variables dictionary is empty. There are no error messages or boots out of the console.
I don't know what to add. At the time that's why I only posted the code.
But the intended result is that a string should be parsed (and successfully printed).
I don't remember what the issue was, but by the sounds of it, it probably just didn't print.
(I'm doing this to get back on S.O.'s "good side".)
class Lex:
def run(args, string):
if args == "print":
print(str(string))
class Calin:
string = ""
running = ""
def parse(args):
lexic = Lex
string = ""
tok = ""
state = 0
for char in args:
tok += char
if tok == " ":
if state == 0:
tok = ""
elif state == 1:
tok = " "
elif tok == "\"":
if state == 0:
state = 1
elif state == 1:
state = 0
elif state == 1:
string += char
elif tok == "print":
running = "print"
lexic.run(running, string)
trans = Calin
trans.parse("print \"WOW A STRING\"")
Because you are making state == 1 only when tok == "\"", which never happens (You are keep adding strs to it.). Changing it to char works:
class Lex:
def run(args, string):
if args == "print":
print(str(string))
class Calin:
string = ""
running = ""
def parse(args):
lexic = Lex
string = ""
tok = ""
state = 0
for char in args:
tok += char
if tok == " ":
if state == 0:
tok = ""
elif state == 1:
tok = " "
elif char == "\"": # <<< Change this
if state == 0:
state = 1
elif state == 1:
state = 0
elif state == 1:
string += char
elif tok == "print":
running = "print"
lexic.run(running, string)
trans = Calin
trans.parse("print \"WOW A STRING\"")
# WOW A STRING
This is the basic.py file for a programming language I am making. At the moment it is throwing an error.
from sys import *
tokens = []
def open_file(filename):
data = open(filename, "r").read()
data += "<EOF>"
return data
def lex(filecontents):
tok = ""
state = 0
isexpr = 0
string = ""
expr = ""
n = ""
filecontents = list(filecontents)
for char in filecontents:
tok += char
if tok == " ":
if state == 0:
tok = ""
else:
tok = " "
elif tok == "\n" or tok == "<EOF>":
if expr != "" and isexpr == 1:
#print(expr + "EXPR")
tokens.append("EXPR:" + expr)
expr = ""
elif expr != "" and isexpr == 0:
#print(expr + "NUM")
tokens.append("NUM:" + expr)
expr = ""
tok = ""
elif tok.lower() == "print":
tokens.append("PRINT")
tok = ""
elif tok.isnumeric():
expr += tok
tok = ""
elif tok == "+":
isexpr = 1
expr += tok
tok = ""
elif tok == "\"":
if state == 0:
state = 1
elif state == 1:
tokens.append("STRING:" + string + "\"")
string = ""
state = 0
tok = ""
elif state == 1:
string += tok
tok = ""
print(tokens)
return tokens
def parse(toks):
i = 0
while(i < len(toks)):
if toks[i] + " " + toks[i+1][0:6] == "PRINT STRING" or toks[i] + " " + toks[i+1][0:3] == "PRINT NUM" or toks[i] + " " + toks[i+1][0:4] == "PRINT EXPR":
if toks[i+1][0:6] == "STRING":
print(toks[i+1][7:])
elif toks[i+1][0:3] == "NUM":
print(toks[i+1][4:])
elif toks[i+1][0:4] == "EXPR":
print(toks[i+1][5:])
i+=2
def run():
data = open_file(argv[1])
toks = lex(data)
parse(toks)
run()
here is the test.vil file(my programming language is called villar) that I am passing data through:
STRING "HELLO WORLD"
string "Hey world!"
17 + 3
As a result, I get an IndexError: List index out of range in line 62.
Can you anyone help me help here? I'd love advice on how to improve it to if its allowed here.
You've got the line:
while(i < len(toks)):
in the parse function. However, within this while loop, you access toks[i+1] element, which'll be out of bounds on the final iteration of the while loop (as i == len(toks)-1 and i+1 == len(toks) which is out of bounds and throwing an error). You need to change that above line to:
while(i < len(toks)-1):
so that on the final iteration i == len(toks) - 2 and i+1 == len(toks) - 1 which are both in bounds.