open: invalid mode or filename

open: invalid mode or filename - python

This is the word count program. how could it be made more simple?
import re
from collections import Counter
with open('C:\Data\test.txt') as f:
passage = f.read()
words = re.findall(r'\w+', passage)
cap_words = [word.upper() for word in words]
word_counts = Counter(cap_words)
keep getting this error message:
Traceback (most recent call last):
File "C:/Python27/wordcount", line 4, in <module>
with open('C:\Data\test.txt') as f:
IOError: [Errno 22] invalid mode ('r') or filename: 'C:\\Data\test.txt'

Use a raw string or escape each \ with a \. That is required because without it '\t' will be converted to a tab space:
r'C:\Data\test.txt'
Example:
>>> print 'C:\Data\test.txt'
C:\Data est.txt #\t is converted to tab
>>> print r'C:\Data\test.txt'
C:\Data\test.txt #here it is fine
>>> print 'C:\\Data\\test.txt' #same as raw string, but manual escaping
C:\Data\test.txt

Related

I am unable to create multiple files with this code, what is wrong?

So I'm trying to write a program that takes names from a list and adds it to a letter. A text file is created for each name for the letter however the code seems to stop working at that point.
letter = []
names = []
file = open("Input/Letters/starting_letter.txt", "r")
letter = file.readlines()
file.close()
name1 = open("Input/Names/invited_names.txt", "r")
names = name1.readlines()
name1.close()
for name in names:
create_letter = open(f"{name}.txt", "w")
for line in letter:
line = line.replace("[name],", f"{name},")
create_letter.write(line)
create_letter.close()
I get the error message
Traceback (most recent call last):
File "C:\Users\Default User\PycharmProjects\Mail Merge Project Start\main.py", line 10, in <module>
create_letter = open(f"{name}.txt", "w")
OSError: [Errno 22] Invalid argument: 'Aang\n.txt'
Is there a problem with the way I am creating the files?

You can't have newlines in your file name. It is invalid in your OS/filesystem.
Remove them with:
open(f"{name.strip()}.txt", "w")
Or:
open(f"{name.replace('\n', '')}.txt", "w")

word counting in python in file with spaces in name

Here is my code
fname = input("Enter file name: ")
word=input("Enter word to be searched:")
k = 0
with open(fname, 'r') as f:
for line in f:
words = line.split()
for i in words:
if(i==word):
k=k+1
print("Occurrences of the word:")
print(k)
I am running it on windows if the file name is having spaces in it
such as "some file xyz.txt"
then upon running the above code I am getting error
Enter file name: "some file xyz.txt"
Enter word to be searched:cs
Traceback (most recent call last):
File "D:/folder1/folder2/folder3/some file xyz.txt", line 5, in <module>
with open(fname, 'r') as f:
OSError: [Errno 22] Invalid argument: '"some file xyz.txt"'
>>>
How should I enter correct file name with spaces or the code itself is wrong?

Just enter the file name without quotes. Python treats your input as a whole string.

Issue with list indexing while converting letters (devnagari to english)

I am currently trying to map devnagari script with English alphabets. But once in a while I run into the error list index out of range . I don't want to miss out on any list . This is why I do not want to use error handling unless it is necessary. Could you please look into my script and help out why this error is occurring ?
In my word file I have located which word is causing the error but then If i use couple of sentence up and down from that word then the error is not there . i.e I think the error happens at a specific length of string.
clean=[]
dafuq=[]
clean_list = []
replacements = {'अ':'A','आ':'AA', 'इ':'I', 'ई':'II', 'उ':'U','ऊ':'UU', 'ए':'E', 'ऐ':'AI',
'ओ':'O','औ':'OU', 'क':'KA', 'ख':'KHA', 'ग':'GA', 'घ':'GHA', 'ङ':'NGA',
'च':'CA','छ':'CHHA', 'ज':'JA', 'झ':'JHA','ञ':'NIA', 'ट':'TA', 'ठ':'THA',
'ड':'DHA','ढ':'DHHA', 'ण':'NAE', 'त':'TA', 'थ':'THA','द':'DA', 'ध':'DHA',
'न':'NA','प':'PA', 'फ':'FA', 'ब':'B', 'भ':'BHA', 'म':'MA','य':'YA', 'र':'RA',
'ल':'L','व':'WA', 'स':'SA', 'ष':'SHHA', 'श':'SHA', 'ह':'HA', '्':'A',
'ऋ':'RI', 'ॠ':'RI','ऌ':'LI','ॐ':'OMS', 'ः':' ', 'ँ':'U',
'ं':'M', 'ृ':'RI', 'ा':'AA', 'ी':'II', 'ि':'I', 'े':'E', 'ै':'AI',
'ो':'O','ौ':'OU','ु' :'U','ू':'UU' }
import unicodedata
from functools import reduce
def reducer(r, v):
if unicodedata.category(v) in ('Mc', 'Mn'):
r[-1] = r[-1] + v
else:
r.append(v)
return r
with open('words_original.txt', mode='r',encoding="utf-8") as f:
with open ('alphabeths.txt', mode='w+', encoding='utf-8') as d:
with open('only_words.txt', mode='w+', encoding="utf-8") as e:
chunk_size = 4096
f_chunk = f.read(chunk_size)
while len(f_chunk)>0:
for word in f_chunk.split():
for char in ['।', ',', '’', '‘', '?','#','1','2','3','4','0','5','6','7','8','9',
'१','२','३','४','५','.''६','७','८','९','०', '5','6','7','8','9','0','\ufeff']:
if char in word:
word = word.replace(char, '')
if word.strip():
clean_list.append(word)
f_chunk = f.read(chunk_size)
for clean_word in clean_list:
test_word= reduce(reducer,clean_word,[])
final_word= (''.join(test_word))
dafuq.append(final_word)
print (final_word)
f_chunk = f.read(chunk_size)
This is the file I am testing it on
words_original.txt
words_original.txt
stacktrace error
Traceback (most recent call last):
File "C:\Users\KUSHAL\Desktop\EARTHQUAKE_PYTHON\test.py", line 82, in <module>
test_word= reduce(reducer,clean_word,[])
File "C:\Users\KUSHAL\Desktop\EARTHQUAKE_PYTHON\test.py", line 27, in reducer
r[-1] = r[-1] + v
IndexError: list index out of range

The problem lay with some unicode characters. It worked after removing them.

ValueError with NLTK

Using NLTK, I'm trying to print a line of text if the last word of the line has an "NN" POS tag, but I'm getting: "ValueError: too many values to unpack" on the following code. Any ideas why? Thanks in advance.
import nltk
from nltk.tokenize import word_tokenize
def end_of_line():
filename = raw_input("Please enter a text file.> ")
with open(filename) as f:
for line in f:
linewords = nltk.tokenize.word_tokenize(line)
lw_tagged = nltk.tag.pos_tag(linewords)
last_lw_tagged = lw_tagged.pop()
for (word, tag) in last_lw_tagged:
if tag == "NN":
print line
end_of_line()
Traceback (most recent call last):
File "/private/var/folders/ly/n5ph6rcx47q8zz_j4pcj3b880000gn/T/Cleanup At Startup/endofline-477697124.590.py", line 15, in <module>
end_of_line()
File "/private/var/folders/ly/n5ph6rcx47q8zz_j4pcj3b880000gn/T/Cleanup At Startup/endofline-477697124.590.py", line 11, in end_of_line
for (word, tag) in last_lw_tagged:
ValueError: too many values to unpack
logout

Instead of this:
for (word, tag) in last_lw_tagged:
if tag == "NN":
Do this:
if last_lw_tagged[1] == "NN:

cleaning text files in python 2 : TypeError: coercing to Unicode:

I am trying to clean up so text files in python. I want to take out stop words, digits and the new line character. But I keep getting coercing to Unicode python text . Here is my code:
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
import string
from string import digits
def cleanupDoc(s):
s = s.translate(None,digits)
s = s.rstrip('\n')
stopset = set(stopwords.words('english'))
tokens = nltk.word_tokenize(s)
cleanup = " ".join(filter(lambda word: word not in stopset, s.split()))
return cleanup
flist=glob.glob('/home/uiucinfo/Desktop/*txt')
mylist=[]
for fname in flist:
tfile = open(fname, 'r+')
line = tfile.readlines()
#line = cleanupDoc(line)
mylist.append(line)
for fdoc in mylist:
doc = open(fdoc)
newDoc = cleanupDoc(doc)
doc.close()
My Error
Traceback (most recent call last):
File "<stdin>", line 3, in <module>
TypeError: coercing to Unicode: need string or buffer, list found

tfile.readlines() gives you a list of lines, which you are appending to another list:
for fname in flist:
tfile = open(fname, 'r+')
line = tfile.readlines()
mylist.append(line)
In result, you have a list of lists in mylist.
The following should fix the problem:
for fname in flist:
tfile = open(fname, 'r+')
line = tfile.readlines()
mylist += line
This will give you a list of strings in mylist.

import nltk
form nltk import word_tokenize
from nltk.corpus import stopwords
#nltk.download()
import string
from string import digits
import glob
import re
def cleanupDoc(s):
#s = s.translate(None,digits)
#s = s.rstrip('\n')
stopset = set(stopwords.words('english'))
tokens = nltk.word_tokenize(s)
cleanup = " ".join(filter(lambda word: word not in stopset, s.split()))
return cleanup
flist=glob.glob('/home/uiucinfo/Desktop/*txt')
mylist=[]
for fname in flist:
tfile = open(fname, 'r+')
line = tfile.readlines()
#line = cleanupDoc(line)
mylist.append(line)
for fdoc in mylist:
# remove \n or digit from fdoc
fdoc = [re.sub(r'[\"\n]|\d', '', x) for x in fdoc]
# convert list to string
fdoc = ''.join(fdoc)
print fdoc
newDoc = cleanupDoc(fdoc)
print " newDoc: " , newDoc

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

open: invalid mode or filename - python

Related

I am unable to create multiple files with this code, what is wrong?

word counting in python in file with spaces in name

Issue with list indexing while converting letters (devnagari to english)

ValueError with NLTK

cleaning text files in python 2 : TypeError: coercing to Unicode:

Categories

Resources