import pickle
#writing into the file
f = open("essay1.txt","ab+")
list1 = ["Aditya","Arvind","Kunal","Naman","Samantha"]
list2 = ["17","23","12","14","34"]
zipfile = zip(list1,list2)
print(zipfile)
pickle.dump(zipfile,f)
f.close()
#opening the file to read it
f = open("essay1","ab")
zipfile = pickle.load(f)
f.close()
and output was :
runfile('E:/Aditya Singh/Aditya Singh/untitled3.py', wdir='E:/Aditya Singh/Aditya Singh')
<zip object at 0x0000000008293BC8>
Traceback (most recent call last):
File "E:\Aditya Singh\Aditya Singh\untitled3.py", line 21, in <module>
zipfile = pickle.load(f)
UnsupportedOperation: read
You forgot the file extension .txt in the line where you tried to open the file and also you opened it in append mode, which is why the returned object does not have read or readline methods (required by pickle.load). I also suggest to use the with keyword instead of manually closing the file.
import pickle
#writing into the file
with open("essay1.txt","ab+") as f:
list1 = ["Aditya","Arvind","Kunal","Naman","Samantha"]
list2 = ["17","23","12","14","34"]
zipfile = zip(list1,list2)
print(zipfile)
pickle.dump(zipfile,f)
#opening the file to read it
with open("essay1.txt", "rb") as f:
zipfile = pickle.load(f)
for item in zipfile:
print(item)
Output:
<zip object at 0x7fa6cb30e3c0>
('Aditya', '17')
('Arvind', '23')
('Kunal', '12')
('Naman', '14')
('Samantha', '34')
do you have essay1 file? or essay1.txt?
this is trying to open without extension.
f = open("essay1","ab")
so fails to read.
There are two issues with your code:
You're opening the file to write and not to read.
You're using different filenames for reading and for writing.
Here's a version that works:
import pickle
#writing into the file
f = open("essay1.txt","wb")
list1 = ["Aditya","Arvind","Kunal","Naman","Samantha"]
list2 = ["17","23","12","14","34"]
zipfile = zip(list1,list2)
print(zipfile)
pickle.dump(zipfile,f)
f.close()
#opening the file to read it
f = open("essay1.txt","rb")
zipfile = pickle.load(f)
print(zipfile)
f.close()
Related
import glob
import os
import csv
from collections import OrderedDict
#Remove output file if already exists. Resolve the append Issue
file_path = 'C:\\Users\\Desktop\\Cobol\\Outputs\\LOC3X.csv'
if os.path.isfile(file_path):
os.remove(file_path)
#
list_of_files = glob.glob('C:\\Users\\Desktop\\Cobol\\*.CBL') # Input files in Folder
Fields = ['Program Name', 'LinesofCode'] # to be displayed in output CSV file
# opening output csv file to write (Fields)
file_path = 'C:\\Users\\Desktop\\Cobol\\Outputs\\LOC3X.csv'
with open(file_path, 'a') as csvfile1:
csvwriter = csv.writer(csvfile1)
csvwriter.writerow(Fields)
csvfile1.close()
def process_files_loc(list_of_files):
for fileName in list_of_files:
with open(fileName) as i:
count = sum(1 for line in i)
my_dict = {i : count} #input filename and its lines of code
ordered_dict = OrderedDict() #using OrderedDict
print(ordered_dict)
# creating ordered dict from dict
ordered_dict = OrderedDict(my_dict)
print(ordered_dict)
# writing records of Program name and LinesofCode to output csv file
file_path = 'C:\\Users\\Desktop\\Cobol\\Outputs\\LOC3X.csv'
with open(file_path, 'a') as csvfile2:
csvwriter = csv.writer(csvfile2)
csvwriter.writerows(ordered_dict)
csvfile2.close()
process_files_loc(list_of_files)
Output in Teminal (Error):
PS C:\Users\Python-1> & C:/Users/AppData/Local/Programs/Python/Python310/python.exe c:/Users/Python-1/one.py
OrderedDict()
OrderedDict([(<_io.TextIOWrapper name='C:\\Users\\Desktop\\Cobol\\ABCDEFGH.CBL' mode='r' encoding='cp1252'>, 191)])
OrderedDict()
OrderedDict([(<_io.TextIOWrapper name='C:\\Users\\Desktop\\Cobol\\IJKLMNOP.CBL' mode='r' encoding='cp1252'>, 195)])
Actual output of file in Folder:
C:\Users\Desktop\Cobol\Outputs
Name Date Modified Type Size
LOC3X.csv 9/15/2022 time Comma Seperated 1KB
Problem: Script executed and Read 2 CBL files in the Folder, and created 1 CSV file in output folder. The output CSV file to have,
Program Name LinesofCode
ABCDEFGH.CBL 191
IJKLMNOP.CBL 195
However, the actual output lines in CSV file is,
Program Name LinesofCode
Try something like this:
import glob
import csv
import os
def process_files_loc(files):
res = []
for file in files:
with open(file) as f:
line_count = len([line.strip("\n") for line in f if line != "\n"])
res.append([os.path.basename(f.name), line_count])
return res
if __name__ == '__main__':
with open('C:\\Users\\Main\\Desktop\\test\\test.csv', 'w', newline='') as f:
csvwriter = csv.writer(f)
csvwriter.writerow(['Program Name', 'LinesofCode'])
csvwriter.writerows(process_files_loc(glob.glob('C:\\Users\\Main\\Desktop\\test\\*.PY')))
Result:
result
Regards,
So, in the folder INFACT, I have the following files(and many more with the same extensions):
BFDRYCKSE.ad
BFDRYCKSE.txt
BFFALIV2SE.ad
BFFALIV2SE.txt
I need to zip those files according to the filename, but only those mentioned above. My final result should be:
BFDRYCKSE.zip, contains(BFDRYCKSE.ad, BFDRYCKSE.txt)
BFFALIV2SE.zip, contains(BFFALIV2SE.ad BFFALIV2SE.txt)
Here's my code:
import os
import glob
import zipfile
setfile = r'C:\Users\vijo9001\Desktop\Infact'
myset = [
"BFBRODSE",
"BFDRYCKSE",
"BFFALIV2SE",
"BFFALIVSSE",
"BFFRYSTSE",
"BFHUSHA1SE",
"BFHUSHA2SE",
"BFHUSHALSE",
"BFKONFEKSE",
"BFKROPPVSE",
"BFKROPP2SE",
"BFLIV2SE",
"BFLIVSSE",
"BFMAKEUPSE",
"BFMEJERISE",
"BFTOBAKSE"
]
os.chdir(setfile)
list_of_lists = []
for i, pattern in enumerate(myset):
list_of_files = glob.glob(r'*{pattern}*'.format(pattern=myset[i]))
list_of_lists.append(list_of_files)
n = 0
file = os.path.splitext(list_of_files[0])[0]
with zipfile.ZipFile(file + '.zip', 'w') as myzip:
for f in list_of_files:
myzip.write(f, compress_type=zipfile.ZIP_DEFLATED)
I keep getting
Traceback (most recent call last):
File "C:/Users/vijo9001/Desktop/Retailers Check/aaa.py", line 29, in <module>
file = os.path.splitext(list_of_files[0])[0]
IndexError: list index out of range
Why is that?
I don't think you need to use an enumerator to access your list elements.
Your problem statement says that you want each zipfile to have the same basename as the .ad and .txt files which it should contain. With that in mind I'd try something along these lines instead
for basename in myset:
filelist = glob.glob(r'*{pattern}*'.format(basename))
with zipfile.ZipFile(basename + '.zip', 'w') as myzip:
for f in filelist:
myzip.write(f, compress_type=zipfile.ZIP_DEFLATED)
My solution is without defining set names:
=^..^=
import os
import zipfile
# get all files from directory
files_list = os.listdir(".")
# collect valid files
files_to_pack = []
for item in files_list:
try:
file_name, file_extension = item.split('.')
if file_extension == 'ad' or file_extension == 'txt':
files_to_pack.append(item)
except:
pass
# sort and pair files
sorted_files = sorted(files_to_pack)
pairs_files = [sorted_files[i:2+i] for i in range(0,len(sorted_files),2)]
# zip paired files
for item in pairs_files:
with zipfile.ZipFile(item[0].split('.')[0] + '.zip', 'w') as myzip:
myzip.write(item[0])
myzip.write(item[1])
myzip.close()
I am trying to concatenate all the pdf into one pdf thereby using PyPDF2 library.
I am using python 2.7 for the same.
My error is :
>>>
RESTART: C:\Users\Yash gupta\Desktop\first projectt\concatenate\test\New folder\test.py
['Invoice.pdf', 'Invoice_2.pdf', 'invoice_3.pdf', 'last.pdf']
Traceback (most recent call last):
File "C:\Users\Yash gupta\Desktop\first projectt\concatenate\test\New folder\test.py", line 17, in <module>
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
File "C:\Python27\lib\site-packages\PyPDF2\pdf.py", line 1084, in __init__
self.read(stream)
File "C:\Python27\lib\site-packages\PyPDF2\pdf.py", line 1689, in read
stream.seek(-1, 2)
IOError: [Errno 22] Invalid argument
My code is :
import PyPDF2, os
# Get all the PDF filenames.
pdfFiles = []
for filename in os.listdir('.'):
if filename.endswith('.pdf'):
pdfFiles.append(filename)
pdfFiles.sort(key=str.lower)
pdfWriter = PyPDF2.PdfFileWriter()
print ( pdfFiles)
# Loop through all the PDF files.
for filename in pdfFiles:
pdfFileObj = open(filename, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
print ( pdfFileObj )
# Loop through all the pages
for pageNum in range(0, pdfReader.numPages):
pageObj = pdfReader.getPage(pageNum)
pdfWriter.addPage(pageObj)
# Save the resulting PDF to a file.
pdfOutput = open('last.pdf', 'wb')
pdfWriter.write(pdfOutput)
pdfOutput.close()
My pdf has some non-ASCII characters, so i am using 'r' rathen then 'rb'
PS:I am new to Python and all this libraries thing
I believe you are looping through collected files incorrectly (Python is indentation-sensitive).
# Loop through all the PDF files.
for filename in pdfFiles:
pdfFileObj = open(filename, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
# Loop through all the pages
for pageNum in range(0, pdfReader.numPages):
pageObj = pdfReader.getPage(pageNum)
pdfWriter.addPage(pageObj)
# Save the resulting PDF to a file.
pdfOutput = open('last.pdf', 'wb')
pdfWriter.write(pdfOutput)
pdfOutput.close()
Also, try to use PdfFileMerger if you want to merge PDF files:
merger = PdfFileMerger(strict=False)
Check out the example code here.
i am trying to rename a list of pdf files by extracting the name from the file using PyPdf. i tried to use a for loop to rename the files but i always get an error with code 32 saying that the file is being used by another process. I am using python2.7
Here's my code
import os, glob
from pyPdf import PdfFileWriter, PdfFileReader
# this function extracts the name of the file
def getName(filepath):
output = PdfFileWriter()
input = PdfFileReader(file(filepath, "rb"))
output.addPage(input.getPage(0))
outputStream = file(filepath + '.txt', 'w')
output.write(outputStream)
outputStream.close()
outText = open(filepath + '.txt', 'rb')
textString = outText.read()
outText.close()
nameStart = textString.find('default">')
nameEnd = textString.find('_SATB', nameStart)
nameEnd2 = textString.find('</rdf:li>', nameStart)
if nameStart:
testName = textString[nameStart+9:nameEnd]
if len(testName) <= 100:
name = testName + '.pdf'
else:
name = textString[nameStart+9:nameEnd2] + '.pdf'
return name
pdfFiles = glob.glob('*.pdf')
m = len(pdfFiles)
for each in pdfFiles:
newName = getName(each)
os.rename(each, newName)
Consider using the with directive of Python. With it you do not need to handle closing the file yourself:
def getName(filepath):
output = PdfFileWriter()
with file(filepath, "rb") as pdfFile:
input = PdfFileReader(pdfFile)
...
You're not closing the input stream (the file) used by the pdf reader.
Thus, when you try to rename the file, it's still open.
So, instead of this:
input = PdfFileReader(file(filepath, "rb"))
Try this:
inputStream = file(filepath, "rb")
input = PdfFileReader(inputStream)
(... when done with this file...)
inputStream.close()
It does not look like you close the file object associated with the PDF reader object. Though maybe at tne end of the function it is closed automatically, but to be sure you might want to create a separate file object which you pass to the PdfFileReader and then close the file handle when done. Then rename.
The below was from SO: How to close pyPDF "PdfFileReader" Class file handle
import os.path
from pyPdf import PdfFileReader
fname = 'my.pdf'
fh = file(fname, "rb")
input = PdfFileReader(fh)
fh.close()
os.rename(fname, 'my_renamed.pdf')
How do I write an in memory zipfile to a file?
# Create in memory zip and add files
zf = zipfile.ZipFile(StringIO.StringIO(), mode='w',compression=zipfile.ZIP_DEFLATED)
zf.writestr('file1.txt', "hi")
zf.writestr('file2.txt', "hi")
# Need to write it out
f = file("C:/path/my_zip.zip", "w")
f.write(zf) # what to do here? Also tried f.write(zf.read())
f.close()
zf.close()
StringIO.getvalue return content of StringIO:
>>> import StringIO
>>> f = StringIO.StringIO()
>>> f.write('asdf')
>>> f.getvalue()
'asdf'
Alternatively, you can change position of the file using seek:
>>> f.read()
''
>>> f.seek(0)
>>> f.read()
'asdf'
Try following:
mf = StringIO.StringIO()
with zipfile.ZipFile(mf, mode='w', compression=zipfile.ZIP_DEFLATED) as zf:
zf.writestr('file1.txt', "hi")
zf.writestr('file2.txt', "hi")
with open("C:/path/my_zip.zip", "wb") as f: # use `wb` mode
f.write(mf.getvalue())
Modify falsetru's answer for python3
1) use io.StringIO instead of StringIO.StringIO
StringIO in python3
2) use b"abc" instead of "abc" , or
python 3.5: TypeError: a bytes-like object is required, not 'str' when writing to a file
3) encode to binary string str.encode(s, "utf-8")
Best way to convert string to bytes in Python 3?
import zipfile
import io
mf = io.BytesIO()
with zipfile.ZipFile(mf, mode="w",compression=zipfile.ZIP_DEFLATED) as zf:
zf.writestr('file1.txt', b"hi")
zf.writestr('file2.txt', str.encode("hi"))
zf.writestr('file3.txt', str.encode("hi",'utf-8'))
with open("a.txt.zip", "wb") as f: # use `wb` mode
f.write(mf.getvalue())
This should also work for gzip: How do I gzip compress a string in Python?
with ZipFile(read_file, 'r') as zipread:
with ZipFile(file_write_buffer, 'w', ZIP_DEFLATED) as zipwrite:
for item in zipread.infolist():
# Copy all ZipInfo attributes for each file since defaults are not preseved
dest.CRC = item.CRC
dest.date_time = item.date_time
dest.create_system = item.create_system
dest.compress_type = item.compress_type
dest.external_attr = item.external_attr
dest.compress_size = item.compress_size
dest.file_size = item.file_size
dest.header_offset = item.header_offset
In the case where the zip file reads corrupted and you notice missing symlinks or corrupted files with wrong timestamps, it could be the fact that the file properties are not getting copied over.
The above code snippet is how I solved the problem.