import os
import aspose.words as aw
rootdir = 'C:/Users/user/stuff/tests'
for subdir, dirs, files in os.walk(rootdir):
for file in files:
a = os.path.join(subdir, file)
doc = aw.Document(a)
doc.save("utput.docx")
doc = aw.Document("Output.docx")
doc.save("output.pdf")
This is my program.
I am trying to run python through a folder containing pdf files, and decrypt them one by one by converting it to word, and then to pdf. What am I doing wrong?
Don't use os.walk. use os.listdir(rootdir) instead. Please note that the saved file and the used file have the same name.
Example:
import os
import aspose.words as aw
root = "C:/Users/user/stuff/tests"
for item in os.listdir(root):
if os.path.isfile(os.path.join(root, item)):
doc = aw.Document(item)
doc.save("Output.docx")
doc = aw.Document("Output.docx")
doc.save("output.pdf")
[EDIT]
above code cant find other folders so i decide to use glob to find all folders
Here:
import os
import aspose.words as aw
import glob
# Set base directory
os.chdir(os.path.join("C:/Users/user/stuff/tests"))
# Geting all pdf files in list
pdf_files = glob.glob("*.pdf")
for files in pdf_files:
doc = aw.Document(files)
doc.save("Output.docx")
doc = aw.Document("Output.docx")
doc.save("output.pdf")
[EDIT-2]
First take all .pdf files in one list :
pdf_files = glob.glob("*.pdf")
other_pdf_files = glob.glob('*/*.pdf')
all_pdf_files=(*pdf_files,*other_pdf_files)
Secondly, you need to use PyPDF2 to get rid of password.
Get unencrypted pdfs by sending all pdf files into decrypt_pdf (don't forget to specify the password). For example: (More detail here and here)
from PyPDF2 import PdfFileReader, PdfFileWriter
def decrypt_pdf(input_path, output_path, password):
with open(input_path, 'rb') as input_file, \
open(output_path, 'wb') as output_file:
reader = PdfFileReader(input_file)
reader.decrypt(password)
writer = PdfFileWriter()
for i in range(reader.getNumPages()):
writer.addPage(reader.getPage(i))
writer.write(output_file)
You can run other parts in the same way.
for files in all_pdf_files:
doc = aw.Document(files)
...
i create a python file via the "with open()" method and now i would like to import the file but the filename should be variable.
filename = "Test"
with open(filename + ".py", "w+") as file:
file.write("def main():\n\tpass")
Now at a other line in the skript i would like to import the python script called like filename. But you cant do something like:
import filename
because then python searches for a python script called "filename". but in this example it should import "Test.py". Any suggestions?
You want the built in import function
new_module = __import__(modulename)
so...
filename = "Test"
with open(filename + ".py", "w+") as file:
file.write("def main():\n\tpass")
new_module = __import__(filename)
Visit https://docs.python.org/3/library/functions.html#__import__
I want to write a program for this: In a folder I have n number of files; first read one file and perform some operation then store result in a separate file. Then read 2nd file, perform operation again and save result in new 2nd file. Do the same procedure for n number of files. The program reads all files one by one and stores results of each file separately. Please give examples how I can do it.
I think what you miss is how to retrieve all the files in that directory.
To do so, use the glob module.
Here is an example which will duplicate all the files with extension *.txt to files with extension *.out
import glob
list_of_files = glob.glob('./*.txt') # create the list of file
for file_name in list_of_files:
FI = open(file_name, 'r')
FO = open(file_name.replace('txt', 'out'), 'w')
for line in FI:
FO.write(line)
FI.close()
FO.close()
import sys
# argv is your commandline arguments, argv[0] is your program name, so skip it
for n in sys.argv[1:]:
print(n) #print out the filename we are currently processing
input = open(n, "r")
output = open(n + ".out", "w")
# do some processing
input.close()
output.close()
Then call it like:
./foo.py bar.txt baz.txt
You may find the fileinput module useful. It is designed for exactly this problem.
I've just learned of the os.walk() command recently, and it may help you here.
It allows you to walk down a directory tree structure.
import os
OUTPUT_DIR = 'C:\\RESULTS'
for path, dirs, files in os.walk('.'):
for file in files:
read_f = open(os.join(path,file),'r')
write_f = open(os.path.join(OUTPUT_DIR,file))
# Do stuff
Combined answer incorporating directory or specific list of filenames arguments:
import sys
import os.path
import glob
def processFile(filename):
fileHandle = open(filename, "r")
for line in fileHandle:
# do some processing
pass
fileHandle.close()
def outputResults(filename):
output_filemask = "out"
fileHandle = open("%s.%s" % (filename, output_filemask), "w")
# do some processing
fileHandle.write('processed\n')
fileHandle.close()
def processFiles(args):
input_filemask = "log"
directory = args[1]
if os.path.isdir(directory):
print "processing a directory"
list_of_files = glob.glob('%s/*.%s' % (directory, input_filemask))
else:
print "processing a list of files"
list_of_files = sys.argv[1:]
for file_name in list_of_files:
print file_name
processFile(file_name)
outputResults(file_name)
if __name__ == '__main__':
if (len(sys.argv) > 1):
processFiles(sys.argv)
else:
print 'usage message'
from pylab import *
import csv
import os
import glob
import re
x=[]
y=[]
f=open("one.txt",'w')
for infile in glob.glob(('*.csv')):
# print "" +infile
csv23=csv2rec(""+infile,'rb',delimiter=',')
for line in csv23:
x.append(line[1])
# print len(x)
for i in range(3000,8000):
y.append(x[i])
print ""+infile,"\t",mean(y)
print >>f,""+infile,"\t\t",mean(y)
del y[:len(y)]
del x[:len(x)]
I know I saw this double with open() somewhere but couldn't remember where. So I built a small example in case someone needs.
""" A module to clean code(js, py, json or whatever) files saved as .txt files to
be used in HTML code blocks. """
from os import listdir
from os.path import abspath, dirname, splitext
from re import sub, MULTILINE
def cleanForHTML():
""" This function will search a directory text files to be edited. """
## define some regex for our search and replace. We are looking for <, > and &
## To replaced with &ls;, > and &. We might want to replace proper whitespace
## chars to as well? (r'\t', ' ') and (f'\n', '<br>')
search_ = ((r'(<)', '<'), (r'(>)', '>'), (r'(&)', '&'))
## Read and loop our file location. Our location is the same one that our python file is in.
for loc in listdir(abspath(dirname(__file__))):
## Here we split our filename into it's parts ('fileName', '.txt')
name = splitext(loc)
if name[1] == '.txt':
## we found our .txt file so we can start file operations.
with open(loc, 'r') as file_1, open(f'{name[0]}(fixed){name[1]}', 'w') as file_2:
## read our first file
retFile = file_1.read()
## find and replace some text.
for find_ in search_:
retFile = sub(find_[0], find_[1], retFile, 0, MULTILINE)
## finally we can write to our newly created text file.
file_2.write(retFile)
This thing also works for reading multiple files, my file name is fedaralist_1.txt and federalist_2.txt and like this, I have 84 files till fedaralist_84.txt
And I'm reading the files as f.
for file in filename:
with open(f'federalist_{file}.txt','r') as f:
f.read()
I would like to save the output in the same location as the input but using a different, but still related, name.
Minimal Example:
This script searches for lines containing the string "NFS" in the input file. Then, it prints the results to another file.
I would like to save the print result to an output file in the same location as the input file but using a different name like "inputfilename.out.csv".
Here is the code :
from __future__ import print_function
import sys
fname = sys.argv[1]
out = open(fname.out.csv, "w") #this doesn't work but this is the idea
with open(fname) as file:
reader = file.readlines()
for line in reader:
if "NFS" in line:
print(line, file = out)
Any suggestions ?
You could use os.path.splitext() to extract extension:
import os
name, ext = os.path.splitext(fname)
output_filename = name + ".out" + ext
Or if you want to change the name completely, you could use os.path.dirname() to get the parent directory:
import os
dirpath = os.path.dirname(fname)
output_filename = os.path.join(dirpath, "outputfile.txt")
Use concatenation to add ".out.csv" to the end of your string.
out = open(fname + ".out.csv", "w")
If the input filename is "inputfilename", then the output will be "inputfilename.out.csv". This may be undesirable behavior if the input filename already has an extension. Then "inputfilename.csv" will become "inputfilename.csv.out.csv". In which case, you may wish to use os.path to construct the new name.
import os.path
filename = "inputfilename.csv"
root, extension = os.path.splitext(filename)
output_filename = root + ".out" + extension
print output_filename
Result:
inputfilename.out.csv
I have a zip file that has a path. When I unzip the file using python and put it in my target folder, it then creates all of the files in the path inside my target folder.
Target: d:\unzip_files
zip file has a path and file name of: \NIS\TEST\Files\tnt.png
What happens: d:\unzip_files\NIS\TEST\Files\tnt.png
Is there a way to hae it just unzip the tnt.png file into d:\unzip_files? Or will I have to read down the list and move the file and then delete all of the empty folders?
import os, sys, zipfile
zippath = r"D:\zip_files\test.zip"
zipdir = r"D:\unzip_files"
zfile = zipfile.ZipFile(zippath, "r")
for name in zfile.namelist():
zfile.extract(name, zipdir)
zfile.close()
So, this is what worked..
import os, sys, zipfile
zippath = r"D:\zip_files\test.zip"
zipdir = r"D:\unzip_files"
zfile = zipfile.ZipFile(zippath, "r")
for name in zfile.namelist():
fname = os.path.join(zipdir, os.path.basename(name))
fout = open(fname, "wb")
fout.write(zfile.read(name))
fout.close()
Thanks for the help.
How about reading file as binary and dump it? Need to deal cases where there is pre-existing file.
for name in zfile.namelist():
fname = os.path.join(zipdir, os.path.basename(name))
fout = open(fname, 'wb')
fout.write(zfile.read(name))