filename = r"C:\Dokumente und Einstellungen\sschnei1\Desktop\a.mat"
print open(filename, "r").read().encode("hex")
The code above only work for text files. But I want to read out the hex-values of mat-files.
EDIT: my little hex-editor
from textwrap import fill
filename = r"C:\a.mat"
hexvalues = open(filename, "rb").read().encode("hex")
print fill(hexvalues,16)
try open(filename,"rb") - open it as a binary file
Related
How to save image files after generate image file in python script?
def mols_to_pngs(mols, basename = "test"):
filenames = []
for i, mol in enumerate(mols):
filename = "%s%d.png" % (basename, i)
Draw.MolToFile(mol,filename)
filenames.append(filename)
return filenames
and I want to this process automatically using csv file and python script
To automatically save image files generated from a python script using a csv file, you need to read the data from the csv file and process it in your script. Here's a basic example:
import csv
import sys
def mols_to_pngs(mols, basename = "test"):
filenames = []
for i, mol in enumerate(mols):
filename = "%s%d.png" % (basename, i)
Draw.MolToFile(mol,filename)
filenames.append(filename)
return filenames
# Read the csv file
try:
with open('data.csv', 'r') as file:
reader = csv.reader(file)
# Skip the header row
next(reader)
mols = []
for row in reader:
# Process the data from the csv file
mol = process_data(row)
mols.append(mol)
except IOError:
print("Could not read file:", file)
sys.exit()
# Save the image files
filenames = mols_to_pngs(mols)
print("Saved the following files:", filenames)
Note that you need to replace the process_data function with your own implementation to process the data from the csv file and create the mol objects. Additionally, you may need to modify the code to match your specific use case.
I have written a Python program that reads a test input file and outputs an output file
inputFile = open ('test.dat', 'r')
outputFile = open('test.log', 'w')
outputFile.write(inputFile.read())
inputFile.close()
outputFile.close()
I'd like to read all the .dat files in a folder and output corresponding .log files, keeping the file name prefix the same. Could somebody help me?
I've figured out that I could list all the dat files with the following code. But don't know what to do from then.
import os
for file in os.listdir("."):
if file.endswith(".htm"):
print(os.path.join("xxx", file))
Also: is there any way to know the count of .dat files in a directory? This way, while processing each file, I could display the progress status, like: "Processing File 1 of 999 data files", etc.
Thanks a bunch
Joanna
You can use glob to list only the files you want and shutil.copyfile to copy them to the new file name. Use the size of the list you get from glob to print progress along the way.
from glob import glob
import shutil
dat_files = list(glob("*.dat"))
dat_file_len = len(dat_files)
for i, dat_file in enumerate(dat_files, 1):
print(f"copying {i} of {dat_file_len}")
shutil.copyfile(dat_file, datfile[:-4] + ".log")
First you would have to get a list of all the .dat files contained in the directory, the code would look like this:
import glob
import os
dat_files = glob.glob('*.dat')
for i, dat_file in enumerate(dat_files):
print("Writing %d file..." %(i+1))
inputFile = open (dat_file, 'r')
outputFile = open(dat_file[:-3]+"log", 'w') # removing "dat" from end, and inserting new extension "log"
outputFile.write(inputFile.read())
inputFile.close()
outputFile.close()
You can use below program:
import glob
files_list = glob.glob('*.dat')
count_dat_file=len(files_list)
print("Count of .dat file is : {}\n".format(count_dat_file))
count_var = 1
for item in files_list:
print("Pocessing file {} of {}".format(count_var, count_dat_file))
print("Old File Name is : {}".format(item))
file_name_list = item.split('.')
file_name_list[-1] = 'log'
new_file_name = '.'.join(file_name_list)
print("New file Name is : {}".format(new_file_name))
with open(item,'r') as input_file:
with open(new_file_name,'w') as output_file:
output_file.write(input_file.read())
print("Data written to new file : {}".format(new_file_name))
count_var+=1
print("\n")
The code I am working with takes in a .pdf file, and outputs a .txt file. My question is, how do I create a loop (probably a for loop) which runs the code over and over again on all files in a folder which end in ".pdf"? Furthermore, how do I change the output each time the loop runs so that I can write a new file each time, that has the same name as the input file (ie. 1_pet.pdf > 1_pet.txt, 2_pet.pdf > 2_pet.txt, etc.)
Here is the code so far:
path="2_pet.pdf"
content = getPDFContent(path)
encoded = content.encode("utf-8")
text_file = open("Output.txt", "w")
text_file.write(encoded)
text_file.close()
The following script solve your problem:
import os
sourcedir = 'pdfdir'
dl = os.listdir('pdfdir')
for f in dl:
fs = f.split(".")
if fs[1] == "pdf":
path_in = os.path.join(dl,f)
content = getPDFContent(path_in)
encoded = content.encode("utf-8")
path_out = os.path.join(dl,fs[0] + ".txt")
text_file = open(path_out, 'w')
text_file.write(encoded)
text_file.close()
Create a function that encapsulates what you want to do to each file.
import os.path
def parse_pdf(filename):
"Parse a pdf into text"
content = getPDFContent(filename)
encoded = content.encode("utf-8")
## split of the pdf extension to add .txt instead.
(root, _) = os.path.splitext(filename)
text_file = open(root + ".txt", "w")
text_file.write(encoded)
text_file.close()
Then apply this function to a list of filenames, like so:
for f in files:
parse_pdf(f)
One way to operate on all PDF files in a directory is to invoke glob.glob() and iterate over the results:
import glob
for path in glob.glob('*.pdf')
content = getPDFContent(path)
encoded = content.encode("utf-8")
text_file = open("Output.txt", "w")
text_file.write(encoded)
text_file.close()
Another way is to allow the user to specify the files:
import sys
for path in sys.argv[1:]:
...
Then the user runs your script like python foo.py *.pdf.
You could use a recursive function to search the folders and all subfolders for files that end with pdf. Than take those files and then create a text file for it.
It could be something like:
import os
def convert_PDF(path, func):
d = os.path.basename(path)
if os.path.isdir(path):
[convert_PDF(os.path.join(path,x), func) for x in os.listdir(path)]
elif d[-4:] == '.pdf':
funct(path)
# based entirely on your example code
def convert_to_txt(path):
content = getPDFContent(path)
encoded = content.encode("utf-8")
file_path = os.path.dirname(path)
# replace pdf with txt extension
file_name = os.path.basename(path)[:-4]+'.txt'
text_file = open(file_path +'/'+file_name, "w")
text_file.write(encoded)
text_file.close()
convert_PDF('path/to/files', convert_to_txt)
Because the actual operation is changeable, you can replace the function with whatever operation you need to perform (like using a different library, converting to a different type, etc.)
I'm trying to create a html file and then convert this file to a pdf file using wkhtmltopdf http://wkhtmltopdf.org/
inputfilename = "/tmp/inputfile.html"
outputfilename = "/tmp/outputfile.pdf"
f = open(inputfilename, 'w')
f.write(html)
f.close()
f1 = open(outputfilename, 'w')
ret = convert2pdf(f,outputfilename)
f1.close()
In convert2pdf I'm doing:
def convert2pdf(htmlfilename,outputpdf):
import subprocess
commands_to_run = ['/wkhtmltopdf-amd64','htmlfilename', 'outputpdf']
subprocess.call(commands_to_run)
Both input/output files are created on the fly. Input file is perfect but output pdf created using wkhtmltopdf is empty. Can you suggest what am I doing wrong.
I think you just have to change
commands_to_run = ['/wkhtmltopdf-amd64','htmlfilename', 'outputpdf']
to
commands_to_run = ['/wkhtmltopdf-amd64', htmlfilename, outputpdf]
and instead of
ret = convert2pdf(f,outputfilename)
do
ret = convert2pdf(inputfilename, outputfilename)
i am trying to rename a list of pdf files by extracting the name from the file using PyPdf. i tried to use a for loop to rename the files but i always get an error with code 32 saying that the file is being used by another process. I am using python2.7
Here's my code
import os, glob
from pyPdf import PdfFileWriter, PdfFileReader
# this function extracts the name of the file
def getName(filepath):
output = PdfFileWriter()
input = PdfFileReader(file(filepath, "rb"))
output.addPage(input.getPage(0))
outputStream = file(filepath + '.txt', 'w')
output.write(outputStream)
outputStream.close()
outText = open(filepath + '.txt', 'rb')
textString = outText.read()
outText.close()
nameStart = textString.find('default">')
nameEnd = textString.find('_SATB', nameStart)
nameEnd2 = textString.find('</rdf:li>', nameStart)
if nameStart:
testName = textString[nameStart+9:nameEnd]
if len(testName) <= 100:
name = testName + '.pdf'
else:
name = textString[nameStart+9:nameEnd2] + '.pdf'
return name
pdfFiles = glob.glob('*.pdf')
m = len(pdfFiles)
for each in pdfFiles:
newName = getName(each)
os.rename(each, newName)
Consider using the with directive of Python. With it you do not need to handle closing the file yourself:
def getName(filepath):
output = PdfFileWriter()
with file(filepath, "rb") as pdfFile:
input = PdfFileReader(pdfFile)
...
You're not closing the input stream (the file) used by the pdf reader.
Thus, when you try to rename the file, it's still open.
So, instead of this:
input = PdfFileReader(file(filepath, "rb"))
Try this:
inputStream = file(filepath, "rb")
input = PdfFileReader(inputStream)
(... when done with this file...)
inputStream.close()
It does not look like you close the file object associated with the PDF reader object. Though maybe at tne end of the function it is closed automatically, but to be sure you might want to create a separate file object which you pass to the PdfFileReader and then close the file handle when done. Then rename.
The below was from SO: How to close pyPDF "PdfFileReader" Class file handle
import os.path
from pyPdf import PdfFileReader
fname = 'my.pdf'
fh = file(fname, "rb")
input = PdfFileReader(fh)
fh.close()
os.rename(fname, 'my_renamed.pdf')