Adding words to list - python

I wrote a function that is supposed to add the words from a .txt to a list but it is supposed to ignore empty lines, how ever my function outputs ['',] at an empty line.
def words(filename):
word = []
file = open(filename)
for line in file:
word.append(line.strip())
return word
How can i fix this thanks

what about a simple if test?
def words(filename):
word = []
file = open(filename)
for line in file:
if line.strip() != ' ':
word.append(line.strip())
return word
EDIT: I forgot the .strip() after line
Besides, you could also use if line.strip():
Last, if you want to get a list of words but have several words per line, you need to split them. Assuming your separator is ' ':
def words(filename):
word = []
file = open(filename)
for line in file:
if line.strip() != ' ':
word.extend(line.strip().split())
return word

You can fix this like that:
def words(filename):
word = []
file = open(filename)
for line in file:
if not line.strip():
word.append(line)
return word
Your problem is that you're adding line.strip(), but what happens if line is actually an empty string? Look:
In [1]: line = ''
In [2]: line.strip()
Out[2]: ''
''.strip() returns an empty string.

You need to test for an empty line and skip the append in that case.
def words(filename):
word = []
file = open(filename)
for line in file:
line=line.strip()
if len(line):
word.append(line)
return word

Related

How Can I read a string from a txt file line by line and run it with a function?

I have a txt file named a.txt. In this file a has a string per line. I want to append these strings line by line to the keyword = {} dict and run my double_letter function for each line of string. How can I do it?
my double_letter function:
keyword = {}
def double_letter():
print("\nDouble Letter:\n")
idx = random.randint(0, len(keyword) - 1)
keyword = keyword[:idx] + keyword[idx] + keyword[idx:]
print(keyword)
You can open, read and print the contents of a txt file as follows:
f = open("a.txt", "r")
for line in f:
print(line)
You can add in your function for each run through the for loop, i.e. calling it during each line of the text:
f = open("a.txt", "r")
for line in f:
print(line)
double_letter()
IIUC
Code
import random
def double_letter(line):
'''
Repeats random letter in line
'''
if line:
idx = random.randint(0, len(line) - 1)
return line[:idx] + line[idx] + line[idx:]
else:
return line # does nothing with blank lines
with open("a.txt", "r") as f: # with preferred with open file
keyword = {} # setup results dictionary
for line in f:
line = line.rstrip() # remove the '\n' at the end of each line
keyword[line] = double_letter(line) # add line with it's repeat to dictionary
print(keyword)
File a.txt
Welcome
To
Stackoverflow
Output
{'Welcome': 'Welcomee', 'To': 'Too', 'Stackoverflow': 'Stackoverfloow'}

Open file and count how many times a word is in file

I want to open a file with a given word. The function will read the file line by line and return a count of how many lines contain the given word.
def count_word(file_name, word):
with open(file_name, 'r') as file:
line = file.readline()
line.rstrip('\n')
cnt = 0
for line in file:
if word in line:
cnt += 1
return cnt
This is what I've tried, but it's not working correctly. Not sure what's going on.
Try this:
def count_word(file_name, word):
with open(file_name, 'r') as file:
content = file.read()
return content.count(word)
You need to count the occurrences of the word in isolation. For example, as is in classic, but the word as does not appear in the sentence this is a classic problem. Additionally, you need to move your return to outside the for-loop:
def wordCount(infilepath, word):
answer = 0
with open(infilepath) as infile:
for line in infilepath:
answer += line.split().count(word)
return answer
Here is an alternative version using collections.Counter and re.split:
from collections import Counter
import re
def count_word(file_name, word):
return Counter(re.split('\s+|\W+', open(file_name).read())).get(word, 0)
This should do it, first, it loads the file, then looks through it and counts up all of the words in each line and returning them
#function to count words
def count_word(file_name, word):
#hold number of words
cnt = 0
#open file
with open(file_name, 'r') as file:
#get lines
lines = file.readlines()
#loop the thrpugh file
for line in lines:
#strip the line
line.rstrip('\n')
#get how many times it appears in the line
cnt += line.lower().split().count(word)
return cnt
print(count_word("test.txt", "test"))

Replace string in line without adding new line?

I want to replace string in a line which contain patternB, something like this:
from:
some lines
line contain patternA
some lines
line contain patternB
more lines
to:
some lines
line contain patternA
some lines
line contain patternB xx oo
more lines
I have code like this:
inputfile = open("d:\myfile.abc", "r")
outputfile = open("d:\myfile_renew.abc", "w")
obj = "yaya"
dummy = ""
item = []
for line in inputfile:
dummy += line
if line.find("patternA") != -1:
for line in inputfile:
dummy += line
if line.find("patternB") != -1:
item = line.split()
dummy += item[0] + " xx " + item[-1] + "\n"
break
outputfile.write(dummy)
It do not replace the line contain "patternB" as expected, but add an new line below it like :
some lines
line contain patternA
some lines
line contain patternB
line contain patternB xx oo
more lines
What can I do with my code?
Of course it is, since you append line to dummy in the beginning of the for loop and then the modified version again in the "if" statement. Also why check for Pattern A if you treat is as you treat everything else?
inputfile = open("d:\myfile.abc", "r")
outputfile = open("d:\myfile_renew.abc", "w")
obj = "yaya"
dummy = ""
item = []
for line in inputfile:
if line.find("patternB") != -1:
item = line.split()
dummy += item[0] + " xx " + item[-1] + "\n"
else:
dummy += line
outputfile.write(dummy)
The simplest will be:
1. Read all File into string
2. Call string.replace
3. Dump string to file
If you want to keep line by line iterator
(for a big file)
for line in inputfile:
if line.find("patternB") != -1:
dummy = line.replace('patternB', 'patternB xx oo')
outputfile.write(dummy)
else:
outputfile.write(line)
This is slower than other responses, but enables big file processing.
This should work
import os
def replace():
f1 = open("d:\myfile.abc","r")
f2 = open("d:\myfile_renew.abc","w")
ow = raw_input("Enter word you wish to replace:")
nw = raw_input("Enter new word:")
for line in f1:
templ = line.split()
for i in templ:
if i==ow:
f2.write(nw)
else:
f2.write(i)
f2.write('\n')
f1.close()
f2.close()
os.remove("d:\myfile.abc")
os.rename("d:\myfile_renew.abc","d:\myfile.abc")
replace()
You can use str.replace:
s = '''some lines
line contain patternA
some lines
line contain patternB
more lines'''
print(s.replace('patternB', 'patternB xx oo'))

Read special characters from .txt file in python

The goal of this code is to find the frequency of words used in a book.
I am tying to read in the text of a book but the following line keeps throwing my code off:
precious protégés. No, gentlemen; he'll always show 'em a clean pair
specifically the é character
I have looked at the following documentation, but I don't quite understand it: https://docs.python.org/3.4/howto/unicode.html
Heres my code:
import string
# Create word dictionary from the comprehensive word list
word_dict = {}
def create_word_dict ():
# open words.txt and populate dictionary
word_file = open ("./words.txt", "r")
for line in word_file:
line = line.strip()
word_dict[line] = 1
# Removes punctuation marks from a string
def parseString (st):
st = st.encode("ascii", "replace")
new_line = ""
st = st.strip()
for ch in st:
ch = str(ch)
if (n for n in (1,2,3,4,5,6,7,8,9,0)) in ch or ' ' in ch or ch.isspace() or ch == u'\xe9':
print (ch)
new_line += ch
else:
new_line += ""
# now remove all instances of 's or ' at end of line
new_line = new_line.strip()
print (new_line)
if (new_line[-1] == "'"):
new_line = new_line[:-1]
new_line.replace("'s", "")
# Conversion from ASCII codes back to useable text
message = new_line
decodedMessage = ""
for item in message.split():
decodedMessage += chr(int(item))
print (decodedMessage)
return new_line
# Returns a dictionary of words and their frequencies
def getWordFreq (file):
# Open file for reading the book.txt
book = open (file, "r")
# create an empty set for all Capitalized words
cap_words = set()
# create a dictionary for words
book_dict = {}
total_words = 0
# remove all punctuation marks other than '[not s]
for line in book:
line = line.strip()
if (len(line) > 0):
line = parseString (line)
word_list = line.split()
# add words to the book dictionary
for word in word_list:
total_words += 1
if (word in book_dict):
book_dict[word] = book_dict[word] + 1
else:
book_dict[word] = 1
print (book_dict)
# close the file
book.close()
def main():
wordFreq1 = getWordFreq ("./Tale.txt")
print (wordFreq1)
main()
The error that I received is as follows:
Traceback (most recent call last):
File "Books.py", line 80, in <module>
main()
File "Books.py", line 77, in main
wordFreq1 = getWordFreq ("./Tale.txt")
File "Books.py", line 60, in getWordFreq
line = parseString (line)
File "Books.py", line 36, in parseString
decodedMessage += chr(int(item))
OverflowError: Python int too large to convert to C long
When you open a text file in python, the encoding is ANSI by default, so it doesn't contain your é chartecter. Try
word_file = open ("./words.txt", "r", encoding='utf-8')
The best way I could think of is to read each character as an ASCII value, into an array, and then take the char value. For example, 97 is ASCII for "a" and if you do char(97) it will output "a". Check out some online ASCII tables that provide values for special characters also.
Try:
def parseString(st):
st = st.encode("ascii", "replace")
# rest of code here
The new error you are getting is because you are calling isalpha on an int (i.e. a number)
Try this:
for ch in st:
ch = str(ch)
if (n for n in (1,2,3,4,5,6,7,8,9,0) if n in ch) or ' ' in ch or ch.isspace() or ch == u'\xe9':
print (ch)

Searching words from a file in another file

I have got 2 files:
access.log.13 : a simple access log from a web server.
bots.txt : that contains spider's and crawlers names, each one in a different line, for example: googlebot mj12bot baidu etc etc
I would like to create a third file "hits.txt" with all the lines from "access.log.13" that contains any of the words from the file "spiders.txt"
This is my little Frankeinstein:
file_working = file("hits.txt", "wt")
file_1_logs = open("access.log.13", "r")
file_2_bots = open("bots.txt", "r")
file_3_hits = open("hits.txt", "a")
list_1 = arxiu_1_logs.readlines()
list_2 = arxiu_2_bots.readlines()
file_3_hits.write("Lines with bots: \n \n")
for i in list_2:
for j in list_1:
if i in j:
file_3_hits.write(j)
arxiu_1_logs.close()
arxiu_2_bots.close()
It doesn't work as i would like cause i only get hits when the line in bots.txt is exactly the same than any line in access.log.13. Thx
You can do it in a more pythonish way:
with open('spiders.txt') as fh:
words = set(re.split(r'[ \n\r]+', fh.read())) # set of searched words
with open('access.log.13') as file_in, \
open('hits.txt', 'w') as file_out:
for line in file_in:
if any(word in line for word in words): # look for any of the words
file_out.write(line)
Or you can use even nicer comprehension:
with open(...) as file_in, open (...) as file_out: # same as previously
good_lines = (line for line in file_in if any(word in line for word in words))
for good_line in good_lines:
file_out.write(good_line)
Replace the if with this:
if j.find(i) != -1

Categories

Resources