open: invalid mode or filename - python

This is the word count program. how could it be made more simple?
import re
from collections import Counter
with open('C:\Data\test.txt') as f:
passage = f.read()
words = re.findall(r'\w+', passage)
cap_words = [word.upper() for word in words]
word_counts = Counter(cap_words)
keep getting this error message:
Traceback (most recent call last):
File "C:/Python27/wordcount", line 4, in <module>
with open('C:\Data\test.txt') as f:
IOError: [Errno 22] invalid mode ('r') or filename: 'C:\\Data\test.txt'

Use a raw string or escape each \ with a \. That is required because without it '\t' will be converted to a tab space:
r'C:\Data\test.txt'
Example:
>>> print 'C:\Data\test.txt'
C:\Data est.txt #\t is converted to tab
>>> print r'C:\Data\test.txt'
C:\Data\test.txt #here it is fine
>>> print 'C:\\Data\\test.txt' #same as raw string, but manual escaping
C:\Data\test.txt

Related

I am unable to create multiple files with this code, what is wrong?

So I'm trying to write a program that takes names from a list and adds it to a letter. A text file is created for each name for the letter however the code seems to stop working at that point.
letter = []
names = []
file = open("Input/Letters/starting_letter.txt", "r")
letter = file.readlines()
file.close()
name1 = open("Input/Names/invited_names.txt", "r")
names = name1.readlines()
name1.close()
for name in names:
create_letter = open(f"{name}.txt", "w")
for line in letter:
line = line.replace("[name],", f"{name},")
create_letter.write(line)
create_letter.close()
I get the error message
Traceback (most recent call last):
File "C:\Users\Default User\PycharmProjects\Mail Merge Project Start\main.py", line 10, in <module>
create_letter = open(f"{name}.txt", "w")
OSError: [Errno 22] Invalid argument: 'Aang\n.txt'
Is there a problem with the way I am creating the files?
You can't have newlines in your file name. It is invalid in your OS/filesystem.
Remove them with:
open(f"{name.strip()}.txt", "w")
Or:
open(f"{name.replace('\n', '')}.txt", "w")

word counting in python in file with spaces in name

Here is my code
fname = input("Enter file name: ")
word=input("Enter word to be searched:")
k = 0
with open(fname, 'r') as f:
for line in f:
words = line.split()
for i in words:
if(i==word):
k=k+1
print("Occurrences of the word:")
print(k)
I am running it on windows if the file name is having spaces in it
such as "some file xyz.txt"
then upon running the above code I am getting error
Enter file name: "some file xyz.txt"
Enter word to be searched:cs
Traceback (most recent call last):
File "D:/folder1/folder2/folder3/some file xyz.txt", line 5, in <module>
with open(fname, 'r') as f:
OSError: [Errno 22] Invalid argument: '"some file xyz.txt"'
>>>
How should I enter correct file name with spaces or the code itself is wrong?
Just enter the file name without quotes. Python treats your input as a whole string.

Issue with list indexing while converting letters (devnagari to english)

I am currently trying to map devnagari script with English alphabets. But once in a while I run into the error list index out of range . I don't want to miss out on any list . This is why I do not want to use error handling unless it is necessary. Could you please look into my script and help out why this error is occurring ?
In my word file I have located which word is causing the error but then If i use couple of sentence up and down from that word then the error is not there . i.e I think the error happens at a specific length of string.
clean=[]
dafuq=[]
clean_list = []
replacements = {'अ':'A','आ':'AA', 'इ':'I', 'ई':'II', 'उ':'U','ऊ':'UU', 'ए':'E', 'ऐ':'AI',
'ओ':'O','औ':'OU', 'क':'KA', 'ख':'KHA', 'ग':'GA', 'घ':'GHA', 'ङ':'NGA',
'च':'CA','छ':'CHHA', 'ज':'JA', 'झ':'JHA','ञ':'NIA', 'ट':'TA', 'ठ':'THA',
'ड':'DHA','ढ':'DHHA', 'ण':'NAE', 'त':'TA', 'थ':'THA','द':'DA', 'ध':'DHA',
'न':'NA','प':'PA', 'फ':'FA', 'ब':'B', 'भ':'BHA', 'म':'MA','य':'YA', 'र':'RA',
'ल':'L','व':'WA', 'स':'SA', 'ष':'SHHA', 'श':'SHA', 'ह':'HA', '्':'A',
'ऋ':'RI', 'ॠ':'RI','ऌ':'LI','ॐ':'OMS', 'ः':' ', 'ँ':'U',
'ं':'M', 'ृ':'RI', 'ा':'AA', 'ी':'II', 'ि':'I', 'े':'E', 'ै':'AI',
'ो':'O','ौ':'OU','ु' :'U','ू':'UU' }
import unicodedata
from functools import reduce
def reducer(r, v):
if unicodedata.category(v) in ('Mc', 'Mn'):
r[-1] = r[-1] + v
else:
r.append(v)
return r
with open('words_original.txt', mode='r',encoding="utf-8") as f:
with open ('alphabeths.txt', mode='w+', encoding='utf-8') as d:
with open('only_words.txt', mode='w+', encoding="utf-8") as e:
chunk_size = 4096
f_chunk = f.read(chunk_size)
while len(f_chunk)>0:
for word in f_chunk.split():
for char in ['।', ',', '’', '‘', '?','#','1','2','3','4','0','5','6','7','8','9',
'१','२','३','४','५','.''६','७','८','९','०', '5','6','7','8','9','0','\ufeff']:
if char in word:
word = word.replace(char, '')
if word.strip():
clean_list.append(word)
f_chunk = f.read(chunk_size)
for clean_word in clean_list:
test_word= reduce(reducer,clean_word,[])
final_word= (''.join(test_word))
dafuq.append(final_word)
print (final_word)
f_chunk = f.read(chunk_size)
This is the file I am testing it on
words_original.txt
words_original.txt
stacktrace error
Traceback (most recent call last):
File "C:\Users\KUSHAL\Desktop\EARTHQUAKE_PYTHON\test.py", line 82, in <module>
test_word= reduce(reducer,clean_word,[])
File "C:\Users\KUSHAL\Desktop\EARTHQUAKE_PYTHON\test.py", line 27, in reducer
r[-1] = r[-1] + v
IndexError: list index out of range
The problem lay with some unicode characters. It worked after removing them.

ValueError with NLTK

Using NLTK, I'm trying to print a line of text if the last word of the line has an "NN" POS tag, but I'm getting: "ValueError: too many values to unpack" on the following code. Any ideas why? Thanks in advance.
import nltk
from nltk.tokenize import word_tokenize
def end_of_line():
filename = raw_input("Please enter a text file.> ")
with open(filename) as f:
for line in f:
linewords = nltk.tokenize.word_tokenize(line)
lw_tagged = nltk.tag.pos_tag(linewords)
last_lw_tagged = lw_tagged.pop()
for (word, tag) in last_lw_tagged:
if tag == "NN":
print line
end_of_line()
Traceback (most recent call last):
File "/private/var/folders/ly/n5ph6rcx47q8zz_j4pcj3b880000gn/T/Cleanup At Startup/endofline-477697124.590.py", line 15, in <module>
end_of_line()
File "/private/var/folders/ly/n5ph6rcx47q8zz_j4pcj3b880000gn/T/Cleanup At Startup/endofline-477697124.590.py", line 11, in end_of_line
for (word, tag) in last_lw_tagged:
ValueError: too many values to unpack
logout
Instead of this:
for (word, tag) in last_lw_tagged:
if tag == "NN":
Do this:
if last_lw_tagged[1] == "NN:

cleaning text files in python 2 : TypeError: coercing to Unicode:

I am trying to clean up so text files in python. I want to take out stop words, digits and the new line character. But I keep getting coercing to Unicode python text . Here is my code:
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
import string
from string import digits
def cleanupDoc(s):
s = s.translate(None,digits)
s = s.rstrip('\n')
stopset = set(stopwords.words('english'))
tokens = nltk.word_tokenize(s)
cleanup = " ".join(filter(lambda word: word not in stopset, s.split()))
return cleanup
flist=glob.glob('/home/uiucinfo/Desktop/*txt')
mylist=[]
for fname in flist:
tfile = open(fname, 'r+')
line = tfile.readlines()
#line = cleanupDoc(line)
mylist.append(line)
for fdoc in mylist:
doc = open(fdoc)
newDoc = cleanupDoc(doc)
doc.close()
My Error
Traceback (most recent call last):
File "<stdin>", line 3, in <module>
TypeError: coercing to Unicode: need string or buffer, list found
tfile.readlines() gives you a list of lines, which you are appending to another list:
for fname in flist:
tfile = open(fname, 'r+')
line = tfile.readlines()
mylist.append(line)
In result, you have a list of lists in mylist.
The following should fix the problem:
for fname in flist:
tfile = open(fname, 'r+')
line = tfile.readlines()
mylist += line
This will give you a list of strings in mylist.
import nltk
form nltk import word_tokenize
from nltk.corpus import stopwords
#nltk.download()
import string
from string import digits
import glob
import re
def cleanupDoc(s):
#s = s.translate(None,digits)
#s = s.rstrip('\n')
stopset = set(stopwords.words('english'))
tokens = nltk.word_tokenize(s)
cleanup = " ".join(filter(lambda word: word not in stopset, s.split()))
return cleanup
flist=glob.glob('/home/uiucinfo/Desktop/*txt')
mylist=[]
for fname in flist:
tfile = open(fname, 'r+')
line = tfile.readlines()
#line = cleanupDoc(line)
mylist.append(line)
for fdoc in mylist:
# remove \n or digit from fdoc
fdoc = [re.sub(r'[\"\n]|\d', '', x) for x in fdoc]
# convert list to string
fdoc = ''.join(fdoc)
print fdoc
newDoc = cleanupDoc(fdoc)
print " newDoc: " , newDoc

Categories

Resources