Invalid file error Python?

Invalid file error Python? - python

I'm trying to write a script that allows a user to create a folder with any name they want, and then create a file with any name they want. Once they do they, the program asks them for 3 names and writes them into the file. I then want to allow the user to input a number from 1 to 3 and display the number of lines they want. I'm getting an error right now when trying to read the file saying something along the lines of
TypeError: invalid file: <_io.TextIOWrapper name='C:blah blah ' mode='a' encoding='cp1252'>
The code is below:
import os, sys
folder = input("What would you like your folder name to be?")
path = r'C:\Users\Administrator\Desktop\%s' %(folder)
if not os.path.exists(path): os.makedirs(path)
file = input("What name would you like for the file in this folder?")
file = file + ".txt"
completePath = os.path.join(path, file)
newFile = open(completePath, 'w')
newFile.close()
count = 0
while count < 3:
newFile = open(completePath, 'a')
write = input("Input the first and last name of someone: ")
newFile.write(write + '\n')
newFile.close()
count += 1
infile = open(newFile, 'r')
display = int(input("How many names from 1 to 10 would you like to display? "))
print (infile.readlines(5))

You have newFile defiled as an opened file. Then you open it within a while loop, and it is a file, again.
And when you try then to open a file using the newFile variable, Python tries to open a file with a name, contained in a newFile variable. But it is not a file name - it is a file!
This makes Python sad...
Try this one:
import os, sys
folder = input("What would you like your folder name to be?")
path = r'C:\Users\Administrator\Desktop\%s' %(folder)
if not os.path.exists(path): os.makedirs(path)
file = input("What name would you like for the file in this folder?")
file = file + ".txt"
completePath = os.path.join(path, file) # completePath is a string
newFile = open(completePath, 'w') # here, newFile is a file handle
newFile.close()
count = 0
while count < 3:
newFile = open(completePath, 'a') # again, newFile is a file handle
write = input("Input the first and last name of someone: ")
newFile.write(write + '\n')
newFile.close()
count += 1
infile = open(completePath, 'r') # opening file with its path, not its handle
infile.readlines(2)

Related

Winerror 123 when trying to rename a file

I'm trying to write a code that takes a file name as an input. Finds this file on my computer and then changes the name of the file according to the text on the 2 first lines of the file.
import os
filename = input("Enter your file name: ")
def info(filename):
with open(filename, 'r') as filehandle:
current_line = 1
for line in filehandle:
if current_line <=2:
yield(line)
current_line += 1
info = list(info(filename))
print(info)
path = r'C:\Users\marku\Desktop\INF100'
date = str(info[1])
place = str(info[0])
finalname = date + '_' + place + '.txt'
old = os.path.join(path, filename)
new = os.path.join(path, finalname)
os.rename(old, new)
However, I get WinError 123 when trying to run this code.
OSError: [WinError 123] The filename, directory name, or volume label syntax is incorrect:
'C:\\Users\\marku\\Desktop\\INF100\\qwerty.txt' -> 'C:\\Users\\marku\\Desktop\\INF100\\2019-06-01\n_Oslo\n.txt'

How to unzip all folders/files that end in .zip and extract “file.txt” file from each zipped folder

My code currently unzips one zip folder and finds the file called file.txt and extracts it. Now I need to unzip multiple folders that have the extension .zip. I have tried to use code similar to what I need it to do but the problem is that now I have to find a file called file.txt in each of those .zip folders and extract that file only . Also to store file.txt into a separate folder that has the same name where it came from. Thank you in advance for your time.
import re
import os
from zipfile import ZipFile
def pain():
print("\t\t\tinput_files.zip has been unzipped")
with ZipFile('input_files.zip', 'r') as zipObj:
zipObj.extractall()
listOfFileNames = zipObj.namelist()
for fileName in listOfFileNames:
if fileName.endswith('.txt'):
zipObj.extract(fileName, 'storage')
outfile = "output2.txt" #this will be the filename that the code will write to
baconFile = open(outfile,"wt")
file_name1 = "file.txt"
print('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the python shell and this is the way the code should collect the data
baconFile.write('Filename\tLine\tnumber of numbers\tstring separated by a comma\twhite space found\ttab found\tcarriage return found\n') #This prints the master column in the output file and this is the way the code should collect the data
#for filename in os.listdir(os.getcwd() + "/input_files"):
for filename in os.listdir('C:\Users\M29858\Desktop\TestPy\Version10\input_files'):
with open("input_files/" + filename, 'r') as f:
if file_name1 in filename:
output_contents(filename, f, baconFile)
baconFile.close() #closes the for loop that the code is writing to
def output_contents(filename, f, baconFile): #using open() function to open the file inside the directory
index = 0
for line in f:
#create a list of all of the numerical values in our line
content = line.split(',') #this will be used to count the amount numbers before and after comma
whitespace_found = False
tab_found = False
false_string = "False (end of file)"
carriage_found = false_string
sigfigs = ""
index += 1 #adds 1 for every line if it finds what the command wants
if " " in line: #checking for whitespace
whitespace_found = True
if "\t" in line: #checking for tabs return
tab_found = True
if '\n' in line: #checking if there is a newline after the end of each line
carriage_found = True
sigfigs = (','.join(str(len(g)) for g in re.findall(r'\d+\.?(\d+)?', line ))) #counts the sigsfigs after decimal point
print(filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
.format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found))) #whatever is inside the .format() is the way it the data is stored into
baconFile.write('\n')
baconFile.write( filename + "\t{0:<4}\t{1:<17}\t{2:<27}\t{3:17}\t{4:9}\t{5:21}"
.format(index, len(content), sigfigs, str(whitespace_found), str(tab_found), str(carriage_found)))
if __name__ == '__main__':
pain()
#THIS WORKS

import glob
import os
from zipfile import ZipFile
def main():
for fname in glob.glob("*.zip"): # get all the zip files
with ZipFile(fname) as archive:
# if there's no file.txt, ignore and go on to the next zip file
if 'file.txt' not in archive.namelist(): continue
# make a new directory named after the zip file
dirname = fname.rsplit('.',1)[0]
os.mkdir(dirname)
extract file.txt into the directory you just created
archive.extract('file.txt', path=dirname)

Find files in a directory containing desired string in Python

I'm trying to find a string in files contained within a directory. I have a string like banana that I know that exists in a few of the files.
import os
import sys
user_input = input("What is the name of you directory?")
directory = os.listdir(user_input)
searchString = input("What word are you trying to find?")
for fname in directory: # change directory as needed
if searchString in fname:
f = open(fname,'r')
print('found string in file %s') %fname
else:
print('string not found')
When the program runs, it just outputs string not found for every file. There are three files that contain the word banana, so the program isn't working as it should. Why isn't it finding the string in the files?

You are trying to search for string in filename, use open(filename, 'r').read():
import os
user_input = input('What is the name of your directory')
directory = os.listdir(user_input)
searchstring = input('What word are you trying to find?')
for fname in directory:
if os.path.isfile(user_input + os.sep + fname):
# Full path
f = open(user_input + os.sep + fname, 'r')
if searchstring in f.read():
print('found string in file %s' % fname)
else:
print('string not found')
f.close()
We use user_input + os.sep + fname to get full path.
os.listdir gives files and directories names, so we use os.path.isfile to check for files.

Here is another version using the Path module from pathlib instead of os.
def search_in_file(path,searchstring):
with open(path, 'r') as file:
if searchstring in file.read():
print(f' found string in file {path.name}')
else:
print('string not found')
from pathlib import Path
user_input = input('What is the name of your directory')
searchstring = input('What word are you trying to find?')
dir_content = sorted(Path(user_input).iterdir())
for path in dir_content:
if not path.is_dir():
search_in_file(path, searchstring)

This is my solution for the problem. It comes with the feature of also checking in sub-directories, as well as being able to handle multiple file types. It is also quite easy to add support for other ones. The downside is of course that it's quite chunky code. But let me know what you think.
import os
import docx2txt
from pptx import Presentation
import pdfplumber
def findFiles(strings, dir, subDirs, fileContent, fileExtensions):
# Finds all the files in 'dir' that contain one string from 'strings'.
# Additional parameters:
# 'subDirs': True/False : Look in sub-directories of your folder
# 'fileContent': True/False :Also look for the strings in the file content of every file
# 'fileExtensions': True/False : Look for a specific file extension -> 'fileContent' is ignored
filesInDir = []
foundFiles = []
filesFound = 0
if not subDirs:
for filename in os.listdir(dir):
if os.path.isfile(os.path.join(dir, filename).replace("\\", "/")):
filesInDir.append(os.path.join(dir, filename).replace("\\", "/"))
else:
for root, subdirs, files in os.walk(dir):
for f in files:
if not os.path.isdir(os.path.join(root, f).replace("\\", "/")):
filesInDir.append(os.path.join(root, f).replace("\\", "/"))
print(filesInDir)
# Find files that contain the keyword
if filesInDir:
for file in filesInDir:
print("Current file: "+file)
# Define what is to be searched in
filename, extension = os.path.splitext(file)
if fileExtensions:
fileText = extension
else:
fileText = os.path.basename(filename).lower()
if fileContent:
fileText += getFileContent(file).lower()
# Check for translations
for string in strings:
print(string)
if string in fileText:
foundFiles.append(file)
filesFound += 1
break
return foundFiles
def getFileContent(filename):
'''Returns the content of a file of a supported type (list: supportedTypes)'''
if filename.partition(".")[2] in supportedTypes:
if filename.endswith(".pdf"):
content = ""
with pdfplumber.open(filename) as pdf:
for x in range(0, len(pdf.pages)):
page = pdf.pages[x]
content = content + page.extract_text()
return content
elif filename.endswith(".txt"):
with open(filename, 'r') as f:
content = ""
lines = f.readlines()
for x in lines:
content = content + x
f.close()
return content
elif filename.endswith(".docx"):
content = docx2txt.process(filename)
return content
elif filename.endswith(".pptx"):
content = ""
prs = Presentation(filename)
for slide in prs.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
content = content+shape.text
return content
else:
return ""
supportedTypes = ["txt", "docx", "pdf", "pptx"]
print(findFiles(strings=["buch"], dir="C:/Users/User/Desktop/", subDirs=True, fileContent=True, fileExtensions=False))

Here is the most simple answer I can give you. You don't need the colors, they are just cool and you may find that you can learn more than one thing in my code :)
import os
from time import sleep
#The colours of the things
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
# Ask the user to enter string to search
search_path = input("Enter directory path to search : ")
file_type = input("File Type : ")
search_str = input("Enter the search string : ")
# Append a directory separator if not already present
if not (search_path.endswith("/") or search_path.endswith("\\") ):
search_path = search_path + "/"
# If path does not exist, set search path to current directory
if not os.path.exists(search_path):
search_path ="."
# Repeat for each file in the directory
for fname in os.listdir(path=search_path):
# Apply file type filter
if fname.endswith(file_type):
# Open file for reading
fo = open(search_path + fname, 'r')
# Read the first line from the file
line = fo.read()
# Initialize counter for line number
line_no = 1
# Loop until EOF
if line != '' :
# Search for string in line
index = line.find(search_str)
if ( index != -1) :
print(bcolors.OKGREEN + '[+]' + bcolors.ENDC + ' ', fname, sep="")
print(' ')
sleep(0.01)
else:
print(bcolors.FAIL + '[-]' + bcolors.ENDC + ' ', fname, ' ', 'does not contain', ' ', search_str, sep="")
print(" ")
sleep(0.01)
line = fo.readline()
# Increment line counter
line_no += 1
# Close the files
fo.close()
That is it!

I was trying with the following code for this kind of problem, please have a look.
import os,sys
search_path=input("Put the directory here:")
search_str = input("Enter your string")
# Append a directory separator if not already present
if not (search_path.endswith("/") or search_path.endswith("\\") ):
search_path = search_path + "/"
# If path does not exist, set search path to current directory
if not os.path.exists(search_path):
search_path ="."
# Repeat for each file in the directory
for fname in os.listdir(path=search_path):
# Apply file type filter
if fname.endswith(file_type):
# Open file for reading
fo = open(search_path + fname)
# Read the first line from the file
line = fo.readline()
# Initialize counter for line number
line_no = 1
# Loop until EOF
while line != '' :
# Search for string in line
index = line.find(search_str)
if ( index != -1) :
print(fname, "[", line_no, ",", index, "] ", line, sep="")
# Read next line
line = fo.readline()
# Increment line counter
line_no += 1
# Close the files
fo.close()

Reading file from a directory in Python

I have a folder containing files on my desktop, and I'm trying to write a script that will read each of the file, replace the spaces with commas, and then return each file as a CSV file.
Here is my code but it is not working:
import os
import re
import csv
path = 'C:\Users\Kenny\Desktop\TTUM'
listing = os.listdir(path)
for infile in listing:
dir_item_path = os.path.join(path, infile)
fh = open(dir_item_path,'r')
for line in fh.readlines():
space_remove = re.sub(r"\s+",",",line.rstrip())
split_Line = space_remove.split(" ")
Fname = infile
Lname = Fname.split('.')[0]
name = Lname + ".csv"
process_file = open(name,"wb")
newfile = csv.writer(process_file)
newfile.writerow(split_Line)
process_file.close()

You are re-opening the file, writing a line, and closing every time. This will truncate the file and just write the single line. Try opening before the for loop (which you are already doing with the input file), and closing when everything is done.
path = 'C:\Users\Kenny\Desktop\TTUM'
listing = os.listdir(path)
for infile in listing:
dir_item_path = os.path.join(path, infile)
fh = open(dir_item_path,'r')
Fname = infile
Lname = Fname.split('.')[0]
name = Lname + ".csv"
process_file = open(name,"wb")
newfile = csv.writer(process_file)
for line in fh.readlines():
space_remove = re.sub(r"\s+",",",line.rstrip())
split_Line = space_remove.split(" ")
newfile.writerow(split_Line)
process_file.close()
Of course there may be wrong a lot more with your script, but for that you need to explain exactly what the problem is.

python multiple inputs and multiple outputs

I have written a script in python, which works on a single file. I couldn't find an answer to make it run on multiple files and to give output for each file separately.
out = open('/home/directory/a.out','w')
infile = open('/home/directory/a.sam','r')
for line in infile:
if not line.startswith('#'):
samlist = line.strip().split()
if 'I' or 'D' in samlist[5]:
match = re.findall(r'(\d+)I', samlist[5]) # remember to chang I and D here aswell
intlist = [int(x) for x in match]
## if len(intlist) < 10:
for indel in intlist:
if indel >= 10:
## print indel
###intlist contains lengths of insertions in for each read
#print intlist
read_aln_start = int(samlist[3])
indel_positions = []
for num1, i_or_d, num2, m in re.findall('(\d+)([ID])(\d+)?([A-Za-z])?', samlist[5]):
if num1:
read_aln_start += int(num1)
if num2:
read_aln_start += int(num2)
indel_positions.append(read_aln_start)
#print indel_positions
out.write(str(read_aln_start)+'\t'+str(i_or_d) + '\t'+str(samlist[2])+ '\t' + str(indel) +'\n')
out.close()
I would like my script to take multiple files with names like a.sam, b.sam, c.sam and for each file give me the output : aout.sam, bout.sam, cout.sam
Can you please pass me either a solution or a hint.
Regards,
Irek

Loop over filenames.
input_filenames = ['a.sam', 'b.sam', 'c.sam']
output_filenames = ['aout.sam', 'bout.sam', 'cout.sam']
for infn, outfn in zip(input_filenames, output_filenames):
out = open('/home/directory/{}'.format(outfn), 'w')
infile = open('/home/directory/{}'.format(infn), 'r')
...
UPDATE
Following code generate output_filenames from given input_filenames.
import os
def get_output_filename(fn):
filename, ext = os.path.splitext(fn)
return filename + 'out' + ext
input_filenames = ['a.sam', 'b.sam', 'c.sam'] # or glob.glob('*.sam')
output_filenames = map(get_output_filename, input_filenames)

I'd recommend wrapping that script in a function, using the def keyword, and passing the names of the input and output files as parameters to that function.
def do_stuff_with_files(infile, outfile):
out = open(infile,'w')
infile = open(outfile,'r')
# the rest of your script
Now you can call this function for any combination of input and output file names.
do_stuff_with_files('/home/directory/a.sam', '/home/directory/a.out')
If you want to do this for all files in a certain directory, use the glob library. To generate the output filenames, just replace the last three characters ("sam") with "out".
import glob
indir, outdir = '/home/directory/', '/home/directory/out/'
files = glob.glob1(indir, '*.sam')
infiles = [indir + f for f in files]
outfiles = [outdir + f[:-3] + "out" for f in files]
for infile, outfile in zip(infiles, outfiles):
do_stuff_with_files(infile, outfile)

The following script allows working with an input and output file. It will loop over all files in the given directory with the ".sam" extension, perform the specified operation on them, and output the results to a separate file.
Import os
# Define the directory containing the files you are working with
path = '/home/directory'
# Get all the files in that directory with the desired
# extension (in this case ".sam")
files = [f for f in os.listdir(path) if f.endswith('.sam')]
# Loop over the files with that extension
for file in files:
# Open the input file
with open(path + '/' + file, 'r') as infile:
# Open the output file
with open(path + '/' + file.split('.')[0] + 'out.' +
file.split('.')[1], 'a') as outfile:
# Loop over the lines in the input file
for line in infile:
# If a line in the input file can be characterized in a
# certain way, write a different line to the output file.
# Otherwise write the original line (from the input file)
# to the output file
if line.startswith('Something'):
outfile.write('A different kind of something')
else:
outfile.write(line)
# Note the absence of either a infile.close() or an outfile.close()
# statement. The with-statement handles that for you

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Invalid file error Python? - python

Related

Winerror 123 when trying to rename a file

How to unzip all folders/files that end in .zip and extract “file.txt” file from each zipped folder

Find files in a directory containing desired string in Python

Reading file from a directory in Python

python multiple inputs and multiple outputs

Categories

Resources