I am trying to merge some .pdf files in sub-folders
dir_name = r"E:\Data"
import os, PyPDF2
from PyPDF2 import PdfFileMerger, PdfFileReader
#hdir=r #path to the folder directory; would suggest using os.getcwd()
for root,dirs,files in os.walk(dir_name):
merger = PdfFileMerger()
for dir in dirs:
sub_dir = os.path.join(root, dir)
print(sub_dir)
for filename in os.listdir(sub_dir):
print(filename)
if filename.endswith(".pdf"):
filepath = os.path.join(sub_dir, filename)
#print(filepath)
merger.append(PdfFileReader(open(filepath, 'rb')))
#merger.write(str(filename))
merger.write(os.path.join(dir_name,dir+'.pdf'))
code runs as expected for some sub-folders. But it is giving error as -
ValueError: Unresolved named destination '_PAGE1'.
Help me to solve this issue?
Traceback of error
File "<ipython-input-5-bd9240b14192>", line 1, in <module>
runfile('E:/Data/xxx.py', wdir='E:/Data')
File "C:\Anaconda\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Anaconda\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "E:/Data/xxx.py", line 23, in <module>
merger.append(PdfFileReader(open(filepath, 'rb')))
File "C:\Anaconda\lib\site-packages\PyPDF2\merger.py", line 203, in append
self.merge(len(self.pages), fileobj, bookmark, pages, import_bookmarks)
File "C:\Anaconda\lib\site-packages\PyPDF2\merger.py", line 174, in merge
self._associate_dests_to_pages(srcpages)
File "C:\Anaconda\lib\site-packages\PyPDF2\merger.py", line 436, in _associate_dests_to_pages
raise ValueError("Unresolved named destination '%s'" % (nd['/Title'],))
ValueError: Unresolved named destination '_PAGE1'```
Related
Writting a program, that shuffels contents in files. All files are almost the same, but it doesn't work for some of them. Can't understand.
for file in allFiles:
print(file)
items = []
fileName = file
fileIndex = 1
directory = os.path.join(path, fileName[:-5].strip())
if not os.path.exists(directory):
os.mkdir(directory)
theFile = openpyxl.load_workbook(file)
allSheetNames = theFile.sheetnames
And after some quantity of files, it shows me these errors:
Traceback (most recent call last):
File "D:\staff\Python\NewProject\glow.py", line 25, in <module>
theFile = openpyxl.load_workbook(file)
File "C:\Users\User\AppData\Local\Programs\Python\Python38-32\lib\site-packages\openpyxl\reader\excel.py", line 313, in load_workbook
reader = ExcelReader(filename, read_only, keep_vba,
File "C:\Users\User\AppData\Local\Programs\Python\Python38-32\lib\site-packages\openpyxl\reader\excel.py", line 124, in __init__
self.archive = _validate_archive(fn)
File "C:\Users\User\AppData\Local\Programs\Python\Python38-32\lib\site-packages\openpyxl\reader\excel.py", line 96, in _validate_archive
archive = ZipFile(filename, 'r')
File "C:\Users\User\AppData\Local\Programs\Python\Python38-32\lib\zipfile.py", line 1269, in __init__
self._RealGetContents()
File "C:\Users\User\AppData\Local\Programs\Python\Python38-32\lib\zipfile.py", line 1336, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file
But before that everything worked fine, there was no error. Can someone guess, why? Thanks, everybody.
Looking for files that way:
path = os.getcwd()
sourcePath = os.getcwd() + '\source'
extension = 'xlsx'
os.chdir(sourcePath)
allFiles = glob.glob('*.{}'.format(extension))
You iterate over all files not regarding the filetype. Probably you or a process added a file to the directory which is no xlsx file. This is why openpyxl fails to read it.
I am trying to make a script that gets all the pictures of my Images folder that have the jpg extension and moves them in the newimages folder.
Here is my code:
import os
import shutil
for filename in os.listdir("D:/Images/"):
if filename.endswith(".jpg"):
shutil.move(filename, r'D:/newimages/')
However when I run the code, I get the following error:
Traceback (most recent call last):
File "d:\Online_courses\Coursera\Google_IT_Automation\Automating_real-world_tasks_with_python\project1\script1.py", line 9, in <module>
shutil.move(filename, r'D:/newimages/')
File "C:\Users\Nicholas\AppData\Local\Programs\Python\Python37\lib\shutil.py", line 580, in move
copy_function(src, real_dst)
File "C:\Users\Nicholas\AppData\Local\Programs\Python\Python37\lib\shutil.py", line 266, in copy2
copyfile(src, dst, follow_symlinks=follow_symlinks)
File "C:\Users\Nicholas\AppData\Local\Programs\Python\Python37\lib\shutil.py", line 120, in copyfile
with open(src, 'rb') as fsrc:
FileNotFoundError: [Errno 2] No such file or directory: '20180331_164750.jpg'
when you use a filename it will just take a name. For copying you need a full path. Please try the same but with
for filename in os.listdir("D:/Images/"):
if filename.endswith(".jpg"):
shutil.move(os.path.join("D:/Images/", filename), os.path.join("D:/NewImages/", filename))
You need to append 'D:/Images/' to each filename. You're not in the D:/Images/ directory so Python isn't able to find those files.
old_dir = 'D:/Images'
new_dir = 'D:/newimages'
for filename in os.listdir(old_dir):
if filename.endswith(".jpg"):
shutil.move(f'{old_dir}/{filename}', new_dir)
I am trying to make a big data frame by looping through sub-directories. I want to:
i) read data from all the files (with .nc extension) in the subdirectories,
ii) select a particular chunk of it
iii) save it in a output.nc file.
import os
import xarray as xr
import numpy as np
rootdir ='/Users/sm/Desktop/along_track_J2'
data_new=[]
for subdir, dirs, files in os.walk(rootdir):
for file in files:
file_name= os.path.join(subdir, file)
df=xr.open_dataset(file_name)
df['longitude'] = ((df.longitude + 180) % 360 - 180).sortby(df.longitude)
ds=df.where((df.longitude>=-65) & (df.longitude<=-45) & (df.latitude>55), drop=True)
data_new.append(ds)
Somehow xarray cannot read the file and I see the following error:
File "", line 1, in
runfile('/Users/sm/Desktop/jason2_processing.py', wdir='/Users/sm/Desktop')
File "/Users/sm/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", line 668, in runfile
execfile(filename, namespace)
File "/Users/sm/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", line 108, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "/Users/sm/Desktop/jason2_processing.py", line 18, in
df=xr.open_dataset(file_name)
File "/Users/sm/anaconda3/lib/python3.7/site-packages/xarray/backends/api.py", line 320, in open_dataset
**backend_kwargs)
File "/Users/sm/anaconda3/lib/python3.7/site-packages/xarray/backends/netCDF4_.py", line 331, in open
ds = opener()
File "/Users/sm/anaconda3/lib/python3.7/site-packages/xarray/backends/netCDF4_.py", line 230, in _open_netcdf4_group
ds = nc4.Dataset(filename, mode=mode, **kwargs)
File "netCDF4/_netCDF4.pyx", line 2123, in netCDF4._netCDF4.Dataset.init
File "netCDF4/_netCDF4.pyx", line 1743, in netCDF4._netCDF4._ensure_nc_success
OSError: [Errno -51] NetCDF: Unknown file format: b'/Users/sm/Desktop/along_track_J2/.DS_Store'
Can anyone please help me with this. Thank you in advance.
OSError: [Errno -51] NetCDF: Unknown file format: b'/Users/sm/Desktop/along_track_J2/.DS_Store'
You are currently looping through all files, NetCDF and other (system) files. .DS_store is a file created by macOS, which isn't a NetCDF file. If you only want to process NetCDF files, something like this should work:
...
for file in files:
if file.split('.')[-1] == 'nc':
file_name= os.path.join(subdir, file)
df = xr.open_dataset(file_name)
....
if file.split('.')[-1] == 'nc': (the only thing which I added) basically checks if the file extension is .nc, and ignores other files.
Traceback (most recent call last):
File "<ipython-input-6-3aab1be09567>", line 1, in <module>
runfile('D:/Viji/Nlp/ResumeProject/Resume_Info_Extraction/ResumeForMyUnderstanding.py', wdir='D:/Viji/Nlp/ResumeProject/Resume_Info_Extraction')
File "C:\Users\vijv2c13136\AppData\Local\Continuum\anaconda2\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 668, in runfile
execfile(filename, namespace)
File "C:\Users\vijv2c13136\AppData\Local\Continuum\anaconda2\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 93, in execfile
exec(compile(scripttext, filename, 'exec'), glob, loc)
File "D:/Viji/Nlp/ResumeProject/Resume_Info_Extraction/ResumeForMyUnderstanding.py", line 394, in <module>
p = Parse(verbose)
File "D:/Viji/Nlp/ResumeProject/Resume_Info_Extraction/ResumeForMyUnderstanding.py", line 61, in __init__
self.inputString = self.readFile(f)
File "D:/Viji/Nlp/ResumeProject/Resume_Info_Extraction/ResumeForMyUnderstanding.py", line 95, in readFile
return subprocess.Popen(['antiword', fileName], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[0], extension
File "C:\Users\vijv2c13136\AppData\Local\Continuum\anaconda2\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 171, in __init__
super(SubprocessPopen, self).__init__(*args, **kwargs)
File "C:\Users\vijv2c13136\AppData\Local\Continuum\anaconda2\lib\subprocess.py", line 394, in __init__
errread, errwrite)
File "C:\Users\vijv2c13136\AppData\Local\Continuum\anaconda2\lib\subprocess.py", line 644, in _execute_child
startupinfo)
def __init__(self, verbose=False):
print('Starting Programme')
fields = ["name", "address", "email", "phone", "mobile", "telephone", "residence status","experience","degree","cainstitute","cayear","caline","b.cominstitute","b.comyear","b.comline","icwainstitue","icwayear","icwaline","m.cominstitute","m.comyear","m.comline","mbainstitute","mbayear","mbaline","engineering","engineeringyear","engineeringline"]
# Glob module matches certain patterns
doc_files = glob.glob("resumes/*.doc")
docx_files = glob.glob("resumes/*.docx")
pdf_files = glob.glob("resumes/*.pdf")
rtf_files = glob.glob("resumes/*.rtf")
text_files = glob.glob("resumes/*.txt")
files = set(doc_files + docx_files + pdf_files + rtf_files + text_files)
files = list(files)
print ("%d files identified" %len(files))
I am unable to retrieve all files in one folder, only able to retrieve one or 2 files in that directory,
Please help me out to solve this issue.
If you want, i will add my full code
Thanks in advance!!
I was fixed my error. The .doc files is unable to retried by my code. So, i remove all the .doc files and then run my code. Now it was working correctly.
I am having trouble moving files from one folder to another. I have written this simple code:
import os
import shutil
movePath = "C:\\Users\\BWhitehouse\\Documents\\GVRD\\MonthlySummary_03\\SCADA"
destPath = "I:\\eng\\GVRD\\Rain_Gauges\\MonthlyDownloads\\2014-03"
for dirpath, dirs, files in os.walk(movePath):
for file in files:
if file.endswith('.xls'):
shutil.copy(file, destPath)
And this is the error I am getting:
Traceback (most recent call last):
File "C:\Python34\test.py", line 12, in <module> shutil.copy(file, destPath)
File "C:\Python34\lib\shutil.py", line 228, in copy copyfile(src, dst, follow_symlinks=follow_symlinks)
File "C:\Python34\lib\shutil.py", line 107, in copyfile with open(src, 'rb') as fsrc:
FileNotFoundError: [Errno 2] No such file or directory: 'BU07-201403.xls'
If anyone could help me out that would be greatly appreciated!
The file variable is just the name, to get the full path add it to the dirpath variable:
shutil.copy( os.path.join(dirpath, file), destPath )
Do you have full access to these folders? First check it out.
Start Python as Administrator by right-clicking, when you try to start the script.
I had the same problem. I solved the problem in this way.