read a specific string from a file in python? - python

I want to read the above file foo.txt and read only UDE from the first line and store it in a variable then Unspecified from the second line and store it in a variable and so on.
should I use read or readlines ? should I use regex for this ??
My below program is reading the entire line. how to read the specific word in the line ?
fo = open("foo.txt", "r+")
line = fo.readline()
left, right = line.split(':')
result = right.strip()
File_Info_Domain = result
print File_Info_Domain
line = fo.readline()
left, right = line.split(':')
result = right.strip()
File_Info_Intention = result
print File_Info_Intention
line = fo.readline()
left, right = line.split(':')
result = right.strip()
File_Info_NLU_Result = result
print File_Info_NLU_Result
fo.close()

You can use readline() (without s in name) to read line on-by-one, and then you can use split(':') to get value from line.
fo = open("foo.txt", "r+")
# read first line
line = fo.readline()
# split line only on first ':'
elements = line.split(':', 1)
if len(elements) < 2:
print("there is no ':' or there is no value after ':' ")
else:
# remove spaces and "\n"
result = elements[1].strip()
print(result)
#
# time for second line
#
# read second line
line = fo.readline()
# split line only on first ':'
elements = line.split(':', 1)
if len(elements) < 2:
print("there is no ':' or there is no value after ':' ")
else:
# remove spaces and "\n"
result = elements[1].strip()
print(result)
# close
fo.close()

While you can use #furas response or regex, I would recommend you to use a config file to do this, instead of a plain txt. So your config file would look like:
[settings]
Domain=UDE
Intention=Unspecified
nlu_slot_details={"Location": {"literal": "18 Slash 6/2015"}, "Search-phrase": {"literal": "18 slash 6/2015"}
And in your python code:
import configparser
config = configparser.RawConfigParser()
config.read("foo.cfg")
domain = config.get('settings', 'Domain')
intention = config.get('settings', 'Intention')
nlu_slot_details = config.get('settings', 'nlu_slot_details')

Related

In Python, print(lines) only working when code below is uncommented. Not inside conditional or after return

I have a Python script that opens text files and then saves them as newly parsed files.
The code works: print(lines) on line 69 prints the parsed contents and the names of the files are also output if writing is successful.
However, if I remove or comment all of the code below line 70, print(lines) on 69 no longer works.
import os
import re
def parse_file(file_path):
# open the file
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
# read the contents
contents = f.read()
# split the contents into lines
lines = contents.split('\n')
# remove any line that includes the string "END OF TEXT FILE"
lines = [line for line in lines if "END OF TEXT FILE" not in line]
# add the part string
lines[0] = "PART, " + lines[0]
# filters out empty lines
lines = [line for line in lines if line.strip()]
# replace asterisks with an empty string
lines = [line.replace("*", "") for line in lines]
# replace asterisks with an empty string
lines = [line.replace("•", "") for line in lines]
# replace GB with an empty string
lines = [re.sub(r'\sGB', '', line) for line in lines]
# replace "MHz" with an empty string
lines = [re.sub(r'\sMHz', '', line) for line in lines]
# replace "mm" with an empty string
lines = [re.sub(r'\smm', '', line) for line in lines]
# replace W with an empty string
lines = [re.sub(r'\sw', '', line) for line in lines]
# combine the first line and second line if the second line does not start with "VRAM" or "Lighting"
if len(lines) > 1 and not lines[1].startswith("VRAM") and not lines[1].startswith("Lighting"):
lines[0] = lines[0] + " " + lines[1]
lines.pop(1)
# replace any ":" with ","
lines = [line.replace(":", ",") for line in lines]
# Trim the last entry in the list by extracting the digits after the dollar sign using a regular expression
dollar_amount_regex = r'\$(\d+)'
last_entry = lines[-1]
dollar_amount_match = re.search(dollar_amount_regex, last_entry)
# Extract the digits and construct a new string without the dollar sign
dollar_amount_digits = dollar_amount_match.group(1)
dollar_amount = dollar_amount_digits
# Replace the original string with the dollar amount
lines[-1] = dollar_amount
# add "Price, " to the last line
lines[-1] = "Price, " + lines[-1]
# remove any extra whitespace from each line
lines = [line.strip() for line in lines]
# CHECK OUTPUT
print(lines)
# WRITING TO FILES
# extract the first line from the list of lines
file_name = lines[0].strip()
# check if the lines contain the word "VRAM"
if any("VRAM" in line for line in lines):
# add "gpu-" to the front of the file name
file_name = "GPU - " + file_name
# check if the lines contain both "RAM Type" and "Frequency"
if any("RAM Type" in line for line in lines) and any("Frequency" in line for line in lines):
# add "ram-" to the front of the file name
file_name = "RAM - " + file_name
# create a new file path by joining the directory "D:\\Dev\\PAD\\pcbs-parsed-text" with the first line as the file name and .txt extension
new_file_path = os.path.join("D:\\Dev\\PAD\\pcbs-parsed-text", file_name + ".txt")
# save the lines to this file
with open(new_file_path, 'w') as f:
for line in lines:
f.write(line + '\n')
return file_name
# search for files in the folder "D:\\Dev\\PAD\\pcbs-raw-text" with "output" in the title
raw_text_dir = "D:\\Dev\\PAD\\pcbs-raw-text"
files = [f for f in os.listdir(raw_text_dir) if "output" in f]
successes = []
errors = []
# parse each file
for file in files:
file_path = os.path.join(raw_text_dir, file)
try:
file_name = parse_file(file_path)
successes.append(file_name)
except Exception as e:
errors.append(file_name)
print(f"Error parsing file {file_name}: {e}")
# check for success and print success or error, listing each file created
if errors:
print(f"Error parsing files: {errors}")
else:
print(f"Successfully parsed and saved files: {successes}")
I expected the print(lines) call to print the contents of the variable to the console but nothing happens.
To the best of my ability I've checked that print(lines) is not inside a conditional or a returned function.
I thought that Python otherwise executes top down, so I'm not sure about this one.
Still learning so it's probably something silly! Thanks.

How Can I read a string from a txt file line by line and run it with a function?

I have a txt file named a.txt. In this file a has a string per line. I want to append these strings line by line to the keyword = {} dict and run my double_letter function for each line of string. How can I do it?
my double_letter function:
keyword = {}
def double_letter():
print("\nDouble Letter:\n")
idx = random.randint(0, len(keyword) - 1)
keyword = keyword[:idx] + keyword[idx] + keyword[idx:]
print(keyword)
You can open, read and print the contents of a txt file as follows:
f = open("a.txt", "r")
for line in f:
print(line)
You can add in your function for each run through the for loop, i.e. calling it during each line of the text:
f = open("a.txt", "r")
for line in f:
print(line)
double_letter()
IIUC
Code
import random
def double_letter(line):
'''
Repeats random letter in line
'''
if line:
idx = random.randint(0, len(line) - 1)
return line[:idx] + line[idx] + line[idx:]
else:
return line # does nothing with blank lines
with open("a.txt", "r") as f: # with preferred with open file
keyword = {} # setup results dictionary
for line in f:
line = line.rstrip() # remove the '\n' at the end of each line
keyword[line] = double_letter(line) # add line with it's repeat to dictionary
print(keyword)
File a.txt
Welcome
To
Stackoverflow
Output
{'Welcome': 'Welcomee', 'To': 'Too', 'Stackoverflow': 'Stackoverfloow'}

Replace string in line without adding new line?

I want to replace string in a line which contain patternB, something like this:
from:
some lines
line contain patternA
some lines
line contain patternB
more lines
to:
some lines
line contain patternA
some lines
line contain patternB xx oo
more lines
I have code like this:
inputfile = open("d:\myfile.abc", "r")
outputfile = open("d:\myfile_renew.abc", "w")
obj = "yaya"
dummy = ""
item = []
for line in inputfile:
dummy += line
if line.find("patternA") != -1:
for line in inputfile:
dummy += line
if line.find("patternB") != -1:
item = line.split()
dummy += item[0] + " xx " + item[-1] + "\n"
break
outputfile.write(dummy)
It do not replace the line contain "patternB" as expected, but add an new line below it like :
some lines
line contain patternA
some lines
line contain patternB
line contain patternB xx oo
more lines
What can I do with my code?
Of course it is, since you append line to dummy in the beginning of the for loop and then the modified version again in the "if" statement. Also why check for Pattern A if you treat is as you treat everything else?
inputfile = open("d:\myfile.abc", "r")
outputfile = open("d:\myfile_renew.abc", "w")
obj = "yaya"
dummy = ""
item = []
for line in inputfile:
if line.find("patternB") != -1:
item = line.split()
dummy += item[0] + " xx " + item[-1] + "\n"
else:
dummy += line
outputfile.write(dummy)
The simplest will be:
1. Read all File into string
2. Call string.replace
3. Dump string to file
If you want to keep line by line iterator
(for a big file)
for line in inputfile:
if line.find("patternB") != -1:
dummy = line.replace('patternB', 'patternB xx oo')
outputfile.write(dummy)
else:
outputfile.write(line)
This is slower than other responses, but enables big file processing.
This should work
import os
def replace():
f1 = open("d:\myfile.abc","r")
f2 = open("d:\myfile_renew.abc","w")
ow = raw_input("Enter word you wish to replace:")
nw = raw_input("Enter new word:")
for line in f1:
templ = line.split()
for i in templ:
if i==ow:
f2.write(nw)
else:
f2.write(i)
f2.write('\n')
f1.close()
f2.close()
os.remove("d:\myfile.abc")
os.rename("d:\myfile_renew.abc","d:\myfile.abc")
replace()
You can use str.replace:
s = '''some lines
line contain patternA
some lines
line contain patternB
more lines'''
print(s.replace('patternB', 'patternB xx oo'))

python: list index out of range in reducer

I'm writing the reduce part of my mapreduce program and I am getting a 'list index out of range' in the line SplitLine = [1]. Why is this? I was fairly sure this was correct.
import sys
cKey = ""
cList = []
lines = sys.stdin.readlines()
for line in lines:
line = line.rstrip()
splitLine = line.split("\t")
key = splitLine[0]
value = splitLine[1]
....
Any thoughts? Thank you!
You are trying to access splitLine[1] when there is no [1] entry. Most likely, you have either blank lines or lines that have no \t in it.
A possible solution would be to ignore entries that have less than 2 columns:
import sys
cKey = ""
cList = []
lines = sys.stdin.readlines()
for line in lines:
line = line.rstrip()
splitLine = line.split("\t")
if len(splitLine) > 1:
key = splitLine[0]
value = splitLine[1]
You should do 2 things:
Filter out blank lines at the outset if not re.match(r'^\s*$', line):
For non blank lines add a default value for border cases with no tabs (blank space " " in this case) line+"\t "
Sample code:
import sys
cKey = ""
cList = []
lines = sys.stdin.readlines()
for line in lines:
# line is empty (has only the following: \t\n\r and whitespace)
if not re.match(r'^\s*$', line):
# add extra delimiter '\t' and default value ' ' to be safe
line = line+"\t "
splitLine = line.split("\t")
key = splitLine[0]
# strip any blank spaces at end
value = splitLine[1].rstrip()

Why is this writing part of the text to a new line? (Python)

I'm adding some new bits to one of the lines in a text file and then writing it along with the rest of the lines in the file to a new file. Referring to the 2nd if statement in the while loop, I want that to be all on the same line:
path = raw_input("Enter the name of the destination folder: ")
source_file = open("parameters")
lnum=1
for line in source_file:
nums = line.split()
if (lnum==10):
mTot = float(nums[0])
if (lnum==11):
qinit = float(nums[0])
if (lnum==12):
qfinal = float(nums[0])
if (lnum==13):
qgrowth = float(nums[0])
if (lnum==14):
K = float(nums[0])
lnum = lnum+1
q = qinit
m1 = mTot/(1+qinit)
m2 = (mTot*qinit)/(1+qinit)
taua = (1/3.7)*(mTot**(-4.0/3.0))
taue = taua/K
i = 1
infname = 'parameters'
while (q <= qfinal):
outfname = path+'/'+str(i)
oldfile = open(infname)
lnum=1
for line in oldfile:
if (lnum==17):
line = "{0:.2e}".format(m1)+' '+line
if (lnum==18):
line = "{0:.2e}".format(m2)+' '+line+' '+"{0:.2e}".format(taua)+' '+" {0:.2e}".format(taue)
newfile = open(outfname,'a')
newfile.write(line)
lnum=lnum+1
oldfile.close()
newfile.close()
i=i+1
q = q + q*(qgrowth)
m1 = mTot/(1+q)
m2 = (mTot*q)/(1+q)
but taua and taue are being written on the line below the rest of it. What am I missing here?
That is because line still contains the trailing newline, and when you concatenate it you are also including the newline.
Insert a
line = line.strip()
right after the if (lnum == 19): but before you put the longer line together to get rid of the newline.
Note that write will not add a newline automatically, so you'll want to add a trailing newline of your own.
UPDATE:
This is untested, but I think unless I messed up, you could just use this instead of your longer line:
line = line.strip()
line = "{0:.2e} {} {0:.2e} {0:.2e}\n".format(x, line, y, z)
If you use line = rstrip(line) on line before you change the line then it will trim the new line (as well as any whitespace).

Categories

Resources