program for appending a file and removing the newlines - python

i have written this program, just want to know if ti has been written properly! because I am new to this and dont know much!!
def read_words(words_file):
""" (file open for reading) -> list of str
Return a list of all words (with newlines removed) from open file
words_file.
Precondition: Each line of the file contains a word in uppercase characters
from the standard English alphabet.
"""
line = words_file.readline()
while line != '':
line= words_file.readline()
words_list.append(line.rstrip('\n'))
words_file.close()
return words_file

Instead of using while loop you can use for loop like this.
words_list = []
with open('path/to/file.txt', 'r') as word_file
lines = words_file.readline()
for line in lines:
words_list.append(line.rstrip('\n'))

Related

Python - Translate a file and keep original paragraph spacing

I have this project I am working on but need help. My main goal is to make the translated text file look the same as the original file with the exception of the translated words.
Here is what a snippet of the original file looks like:
Original Text File
Here is my python code:
# Step 1: Import the english.txt file
import json
english_text = open('/home/jovyan/english_to_lolspeak_fellow/english.txt', 'r')
text = english_text.readlines()
english_text.close()
# Step 2: Import the glossary (the tranzlashun.json file)
with open('/home/jovyan/english_to_lolspeak_fellow/tranzlashun.json') as translationFile:
data = json.load(translationFile)
# Step 3:Translate the English text into Lolspeak
translated_text= ''
for line in text:
for word in line.split():
if word in data:
translated_text += data[word.lower()]+" "
else:
translated_text += word.lower()+ " "
pass
# Step 4 :Save the translated text as the "lolcat.txt" file
with open('/home/jovyan/english_to_lolspeak_fellow/lolcat.txt', 'w') as lolcat_file:
lolcat_file.write(translated_text)
lolcat_file.close()
And lastly, here is what my output looks like:
Output Translated File
As you can see, I was able to translate the file but the original spacing is ignored. How do I change my code to keep the spacing as it was before?
You can keep the spaces by reading one line at a time.
with open('lolcat.txt', 'w') as fw, open('english.txt') as fp:
for line in fp:
for word in line.split():
line = line.replace(word, data.get(word.lower(), word))
fw.write(line)
I'd suggest combining steps 3 and 4 to translate each line and write the line and then \n to start the next line.
I haven't checked the following on a compiler so you might have to modify it to get it to work.
Note I changed the 'w' to 'a' so it appends instead of just writes and afaik using 'with' means the file will close so you don't need the explicit close().
for line in text:
translated_line = ""
for word in line.split():
if word in data:
translated_line += data[word.lower()]+" "
else:
translated_line += word.lower()+ " "
with open('/home/jovyan/english_to_lolspeak_fellow/lolcat.txt', 'a') as lolcat_file:
lolcat_file.write(translated_line)
write("\n")

How to get first word from text file removing \n - python

If the text file is /n/n Hello world!/n I like python./n
How do I get the first word from that text?
I tried to code:
def word_file(file):
files = open(file, 'r')
l = files.readlines()
for i in range(len(l)):
a = l[i].rstrip("\n")
line = l[0]
word = line.strip().split(" ")[0]
return word
There is space in front Hello.
The result I get is NONE. How should I correct it?
Can anybody help?
Assuming there is a word in the file:
def word_file(f):
with open(f) as file:
return file.read().split()[0]
file.read reads the entire file as a string. Do a split with no parameters on that string (i.e. sep=None). Then according to the Python manual "runs of consecutive whitespace are regarded as a single separator, and the result will contain no empty strings at the start or end if the string has leading or trailing whitespace." So the splitting will be done on consecutive white space and there will be no empty strings returned as a result of the split. Therefore the first element of the returned list will be the first word in the file.
If there is a possibility that the file is empty or contains nothing but white space, then you would need to check the return value from file.read().split() to ensure it is not an empty list.
If you need to avoid having to read the entire file into memory at once, then the following, less terse code can be used:
def word_file(f):
with open(f) as file:
for line in file:
words = line.split()
if words:
return words[0]
return None # No words found
Edit: #Booboo answer is far better than my answer
This should work:
def word_file(file):
with open(file, 'r') as f:
for line in f:
for index, character in enumerate(line):
if not character.isspace():
line = line[index:]
for ind, ch in enumerate(line):
if ch.isspace():
return line[:ind]
return line # could not find whitespace character at end
return None # no words found
output:
Hello

Is there any shortcut in Python to remove all blanks at the end of each line in a file?

I've learned that we can easily remove blank lined in a file or remove blanks for each string line, but how about remove all blanks at the end of each line in a file ?
One way should be processing each line for a file, like:
with open(file) as f:
for line in f:
store line.strip()
Is this the only way to complete the task ?
Possibly the ugliest implementation possible but heres what I just scratched up :0
def strip_str(string):
last_ind = 0
split_string = string.split(' ')
for ind, word in enumerate(split_string):
if word == '\n':
return ''.join([split_string[0]] + [ ' {} '.format(x) for x in split_string[1:last_ind]])
last_ind += 1
Don't know if these count as different ways of accomplishing the task. The first is really just a variation on what you have. The second does the whole file at once, rather than line-by-line.
Map that calls the 'rstrip' method on each line of the file.
import operator
with open(filename) as f:
#basically the same as (line.rstrip() for line in f)
for line in map(operator.methodcaller('rstrip'), f)):
# do something with the line
read the whole file and use re.sub():
import re
with open(filename) as f:
text = f.read()
text = re.sub(r"\s+(?=\n)", "", text)
You just want to remove spaces, another solution would be...
line.replace(" ", "")
Good to remove white spaces.

Replace words of a long document in Python

I have a dictionary dict with some words (2000) and I have a huge text, like Wikipedia corpus, in text format. For each word that is both in the dictionary and in the text file, I would like to replace it with word_1.
with open("wiki.txt",'r') as original, open("new.txt",'w') as mod:
for line in original:
new_line = line
for word in line.split():
if (dict.get(word.lower()) is not None):
new_line = new_line.replace(word,word+"_1")
mod.write(new_line)
This code creates a new file called new.txt with the words that appear in the dictionary replaced as I want.
This works for short files, but for the longer that I am using as input, it "freezes" my computer.
Is there a more efficient way to do that?
Edit for Adi219:
Your code seems working, but there is a problem:
if a line is like that: Albert is a friend of Albert and in my dictionary I have Albert, after the for cycle, the line will be like this:Albert_1_1 is a friend of Albert_1. How can I replace only the exact word that I want, to avoid repetitions like _1_1_1_1?
Edit2:
To solve the previous problem, I changed your code:
with open("wiki.txt", "r") as original, open("new.txt", "w") as mod:
for line in original:
words = line.split()
for word in words:
if dict.get(word.lower()) is not None:
mod.write(word+"_1 ")
else:
mod.write(word+" ")
mod.write("\n")
Now everything should work
A few things:
You could remove the declaration of new_line. Then, change new_line = new_line.replace(...) line with line = line.replace(...). You would also have to write(line) afterwards.
You could add words = line.split() and use for word in words: for the for loop, as this removes a call to .split() for every iteration through the words.
You could (manually(?)) split your large .txt file into multiple smaller files and have multiple instances of your program running on each file, and then you could combine the multiple outputs into one file. Note: You would have to remember to change the filename for each file you're reading/writing to.
So, your code would look like:
with open("wiki.txt", "r") as original, open("new.txt", "w") as mod:
for line in original:
words = line.split()
for word in words:
if dict.get(word.lower()) is not None:
line = line.replace(word, word + "_1")
mod.write(line)

Python: Copying lines that meet requirements

So, basically, I need a program that opens a .dat file, checks each line to see if it meets certain prerequisites, and if they do, copy them into a new csv file.
The prerequisites are that it must 1) contain "$W" or "$S" and 2) have the last value at the end of the line of the DAT say one of a long list of acceptable terms. (I can simply make-up a list of terms and hardcode them into a list)
For example, if the CSV was a list of purchase information and the last item was what was purchased, I only want to include fruit. In this case, the last item is an ID Tag, and I only want to accept a handful of ID Tags, but there is a list of about 5 acceptable tags. The Tags have very veriable length, however, but they are always the last item in the list (and always the 4th item on the list)
Let me give a better example, again with the fruit.
My original .DAT might be:
DGH$G$H $2.53 London_Port Gyro
DGH.$WFFT$Q5632 $33.54 55n39 Barkdust
UYKJ$S.52UE $23.57 22#3 Apple
WSIAJSM_33$4.FJ4 $223.4 Ha25%ek Banana
Only the line: "UYKJ$S $23.57 22#3 Apple" would be copied because only it has both 1) $W or $S (in this case a $S) and 2) The last item is a fruit. Once the .csv file is made, I am going to need to go back through it and replace all the spaces with commas, but that's not nearly as problematic for me as figuring out how to scan each line for requirements and only copy the ones that are wanted.
I am making a few programs all very similar to this one, that open .dat files, check each line to see if they meet requirements, and then decides to copy them to the new file or not. But sadly, I have no idea what I am doing. They are all similar enough that once I figure out how to make one, the rest will be easy, though.
EDIT: The .DAT files are a few thousand lines long, if that matters at all.
EDIT2: The some of my current code snippets
Right now, my current version is this:
def main():
#NewFile_Loc = C:\Users\J18509\Documents
OldFile_Loc=raw_input("Input File for MCLG:")
OldFile = open(OldFile_Loc,"r")
OldText = OldFile.read()
# for i in range(0, len(OldText)):
# if (OldText[i] != " "):
# print OldText[i]
i = split_line(OldText)
if u'$S' in i:
# $S is in the line
print i
main()
But it's very choppy still. I'm just learning python.
Brief update: the server I am working on is down, and might be for the next few hours, but I have my new code, which has syntax errors in it, but here it is anyways. I'll update again once I get it working. Thanks a bunch everyone!
import os
NewFilePath = "A:\test.txt"
Acceptable_Values = ('Apple','Banana')
#Main
def main():
if os.path.isfile(NewFilePath):
os.remove(NewFilePath)
NewFile = open (NewFilePath, 'w')
NewFile.write('Header 1,','Name Header,','Header 3,','Header 4)
OldFile_Loc=raw_input("Input File for Program:")
OldFile = open(OldFile_Loc,"r")
for line in OldFile:
LineParts = line.split()
if (LineParts[0].find($W)) or (LineParts[0].find($S)):
if LineParts[3] in Acceptable_Values:
print(LineParts[1], ' is accepted')
#This Line is acceptable!
NewFile.write(LineParts[1],',',LineParts[0],',',LineParts[2],',',LineParts[3])
OldFile.close()
NewFile.close()
main()
There are two parts you need to implement: First, read a file line by line and write lines meeting a specific criteria. This is done by
with open('file.dat') as f:
for line in f:
stripped = line.strip() # remove '\n' from the end of the line
if test_line(stripped):
print stripped # Write to stdout
The criteria you want to check for are implemented in the function test_line. To check for the occurrence of "$W" or "$S", you can simply use the in-Operator like
if not '$W' in line and not '$S' in line:
return False
else:
return True
To check, if the last item in the line is contained in a fixed list, first split the line using split(), then take the last item using the index notation [-1] (negative indices count from the end of a sequence) and then use the in operator again against your fixed list. This looks like
items = line.split() # items is an array of strings
last_item = items[-1] # take the last element of the array
if last_item in ['Apple', 'Banana']:
return True
else:
return False
Now, you combine these two parts into the test_line function like
def test_line(line):
if not '$W' in line and not '$S' in line:
return False
items = line.split() # items is an array of strings
last_item = items[-1] # take the last element of the array
if last_item in ['Apple', 'Banana']:
return True
else:
return False
Note that the program writes the result to stdout, which you can easily redirect. If you want to write the output to a file, have a look at Correct way to write line to file in Python
inlineRequirements = ['$W','$S']
endlineRequirements = ['Apple','Banana']
inputFile = open(input_filename,'rb')
outputFile = open(output_filename,'wb')
for line in inputFile.readlines():
line = line.strip()
#trailing and leading whitespace has been removed
if any(req in line for req in inlineRequirements):
#passed inline requirement
lastWord = line.split(' ')[-1]
if lastWord in endlineRequirements:
#passed endline requirement
outputFile.write(line.replace(' ',','))
#replaced spaces with commas and wrote to file
inputFile.close()
outputFile.close()
tags = ['apple', 'banana']
match = ['$W', '$S']
OldFile_Loc=raw_input("Input File for MCLG:")
OldFile = open(OldFile_Loc,"r")
for line in OldFile.readlines(): # Loop through the file
line = line.strip() # Remove the newline and whitespace
if line and not line.isspace(): # If the line isn't empty
lparts = line.split() # Split the line
if any(tag.lower() == lparts[-1].lower() for tag in tags) and any(c in line for c in match):
# $S or $W is in the line AND the last section is in tags(case insensitive)
print line
import re
list_of_fruits = ["Apple","Bannana",...]
with open('some.dat') as f:
for line in f:
if re.findall("\$[SW]",line) and line.split()[-1] in list_of_fruits:
print "Found:%s" % line
import os
NewFilePath = "A:\test.txt"
Acceptable_Values = ('Apple','Banana')
#Main
def main():
if os.path.isfile(NewFilePath):
os.remove(NewFilePath)
NewFile = open (NewFilePath, 'w')
NewFile.write('Header 1,','Name Header,','Header 3,','Header 4)
OldFile_Loc=raw_input("Input File for Program:")
OldFile = open(OldFile_Loc,"r")
for line in OldFile:
LineParts = line.split()
if (LineParts[0].find(\$W)) or (LineParts[0].find(\$S)):
if LineParts[3] in Acceptable_Values:
print(LineParts[1], ' is accepted')
#This Line is acceptable!
NewFile.write(LineParts[1],',',LineParts[0],',',LineParts[2],',',LineParts[3])
OldFile.close()
NewFile.close()
main()
This worked great, and has all the capabilities I needed. The other answers are good, but none of them do 100% of what I needed like this one does.

Categories

Resources