Get files from specific folders in python - python

I have the following directory structure with the following files:
Folder_One
├─file1.txt
├─file1.doc
└─file2.txt
Folder_Two
├─file2.txt
├─file2.doc
└─file3.txt
I would like to get only the .txt files from each folder listed. Example:
Folder_One-> file1.txt and file2.txt
Folder_Two-> file2.txt and file3.txt
Note: This entire directory is inside a folder called dataset. My code looks like this, but I believe something is missing. Can someone help me.
path_dataset = "./dataset/"
filedataset = os.listdir(path_dataset)
for i in filedataset:
pasta = ''
pasta = pasta.join(i)
for file in glob.glob(path_dataset+"*.txt"):
print(file)

from pathlib import Path
for path in Path('dataset').rglob('*.txt'):
print(path.name)
Using glob
import glob
for x in glob.glob('dataset/**/*.txt', recursive=True):
print(x)

You can use re module to check that filename ends with .txt.
import re
import os
path_dataset = "./dataset/"
l = os.listdir(path_dataset)
for e in l:
if os.path.isdir("./dataset/" + e):
ll = os.listdir(path_dataset + e)
for file in ll:
if re.match(r".*\.txt$", file):
print(e + '->' + file)

One may use an additional option to check and find all files by using the os module (this is of advantage if you already use this module):
import os
#get current directory, you may also provide an absolute path
path=os.getcwd()
#walk recursivly through all folders and gather information
for root, dirs, files in os.walk(path):
#check if file is of correct type
check=[f for f in files if f.find(".txt")!=-1]
if check!=[]:print(root,check)

Related

Python os.walk doesn't copy files from upper directory

I have some files which are generated in 2 different directory
D:\project\external\gerenateExternal
consist of : 1.txt, 2.txt, 3.txt
D:\project\main\generateMain
consisst of : a.txt, b.txt, 3.txt
I want to copy all files from that different directory to D:\project\main\targetDir
My python is in D:\project\main\copy.py
import os
import shutil
import os.path as path
pyfile = os.path.dirname(os.path.abspath(__file__))
pathExternal = os.path.abspath(pyfile + '\..\external\gerenateExternal')
pathMain = os.path.abspath(pyfile + '\generateMain')
targetdir = os.path.abspath(pyfile + '\targetDir')
for p in [ pathMain , pathExternal ]:
print(p)
for path, dirs, files in os.walk(p):
print(files)
for file in files:
if file.endswith(".txt"):
shutil.copy(os.path.join(path, file), targetdir)
The only files that can be copy is from pathMain
I found that any files in folder same level or below current python file (copy.py) can be copied
But if I have files from upper directory from current python file (copy.py) can't be copied.
How to copy files from upper directory from current python file ?
I don't quite understand why you are using os.walk: You know the 2 folders with the files, you could use them directly?
You could try the following:
from pathlib import Path
from shutil import copy
from itertools import chain
pyfile = Path(__file__).resolve().parent # Should be: D:\project\main
pathExternal = pyfile.parent / Path(r'external\generateExternal') # Should be: D:\project\external\gerenateExternal
pathMain = pyfile / Path('generateMain') # Should be: D:\project\main\generateMain
targetdir = pyfile / Path('targetDir') # Should be: D:\project\main\targetDir
targetdir.mkdir(exist_ok=True) # In case targetdir doesn't exist
for file in chain(pathExternal.glob('*.txt'), pathMain.glob('*.txt')):
copy(file, targetdir)
If you want something more os.walk-like you could replace the loop with
...
for file in pyfile.parent.rglob('*.txt'):
copy(file, targetdir)
The code should work just fine. You have a minor TypO in "gerenateExternal". Please check, if the actual directory has the same name.
In addition, for avoiding "\t" in '\targetDir' is interpreted as tab, I would suggest to escape the character, use a forward slash or join the directory, e.g.
targetdir = os.path.abspath(pyfile + '\\targetDir')

Search all files with same name in a directory python

I Have a Question :
I need to get paths of a file in a directory, I have a folder that contains other folders and other folders etc.... and each of them contains a file "tv.sas7bdat" I need to get every path to that file.
Thank you !!!
You can try the following code, where PATH stands for the parent directory
import os
def getAlldirInDiGui(path,resultList):
filesList=os.listdir(path)
for fileName in filesList:
fileAbpath=os.path.join(path,fileName)
if os.path.isdir(fileAbpath):
getAlldirInDiGui(fileAbpath,resultList)
else:
if fileName=='tv.sas7bdat':
resultList.append(fileAbpath)
resultList = []
PATH = ""
getAlldirInDiGui(PATH,resultList)
You can use os.walk()
import os
for root, dirs, files in os.walk(os.getcwd()):
for f in files:
if f.find("tv.sas7bdat")>=0:
print(root,f)
If I get your problem right you can achieve your goal using Pythons's os.walk function, like so:
import os
for root, dirs, files in os.walk("<starting folder here>", topdown=False):
for name in files:
if name == "tv.sas7bdat":
print(os.path.join(root, name))
p.s: as for comments in your question, next time please provide as many details possible in your question and provide code of your attempt, see the asking guidelines
Hope fully below code should work for you:
import glob
initial_path = "c:\<intital folder location>"
files = [file for file in glob.glob(initial_path+ "tv.sas7bdat" , recursive=True)]
for f in files:
print(f)
You could use the os python package combined with a recursive function to search through a certain directory
import os
from os.path import isfile, join, isdir
def get_files_path(directory, paths):
for item in os.listdir(directory):
if isfile(join(directory, item)) and item == "tv.sas7bda":
paths.append(directory + item)
elif isdir(directory+item):
get_files_path(directory + item, paths)
return paths
directory_to_search = "./"
get_files_path(directory_to_search , [])

How can I import all files of a particular extension from subdirectories and save them separately?

I have a directory
* workingdir
* raw_data
* 2001
- a.dat
- b.dat
- c.dat
* 2002
- d.dat
- e.dat
- f.data
* 2003 etc.
How can I read these dat files into separate variables?
So far:
import os # Operating system interface
import glob # For Unix style pathnames
import numpy as np
workingdir = '/home/x/workingdir/'
#Directory for all raw data files
rawdatadir = os.path.abspath(os.path.join(os.getcwd(), os.path.pardir, "raw_data"))
for root, dirs, files in os.walk(rawdatadir):
for files in [f for f in files if f.endswith(".dat")]:
print(os.path.join(rawdatadir, files))
But this is giving me
/home/x/workingdir/raw_data/a.dat
/home/x/workingdir/raw_data/b.dat
So,
How can I get the full path of all the files
And import them (np.fromfile?)
Any "smarter" way to do this?
I come from an R/dataframe background and would prefer to mimic something near that.
You can get full path by replacing os.path.join(rawdatadir, files) with os.path.join(root, files)
root variable contains the directory path in which files listed in files are located.
Correct loop implementation would be:
Storing results can be done using dict if you want to access them by file name
results = {}
for root, dirs, files in os.walk(rawdatadir):
for file in filter(lambda f: f.endswith('.dat'), files):
results[file] = np.fromfile(os.path.join(root, file))
Use glob to find all files in subdirectories, the walking over the list and storing names and content. Its recursive option allows the token ** to match any path which includes subdirectories into the search.
from glob import iglob
import os.path
workingdir = '/home/x/workingdir/'
result = {}
for f in iglob(os.path.join(workingdir, './**/*.dat'), recursive=True):
result[f] = np.fromfile(os.path.abspath(f))
This cute single generator also allows us to express this in a nice pythonic form
files = iglob(os.path.join(workingdir, './**/*.dat'), recursive=True)
result = {f: np.fromfile(os.path.abspath(f)) for f in files}

how to get a folder name and file name in python

I have a python program named myscript.py which would give me the list of files and folders in the path provided.
import os
import sys
def get_files_in_directory(path):
for root, dirs, files in os.walk(path):
print(root)
print(dirs)
print(files)
path=sys.argv[1]
get_files_in_directory(path)
the path i provided is D:\Python\TEST and there are some folders and sub folder in it as you can see in the output provided below :
C:\Python34>python myscript.py "D:\Python\Test"
D:\Python\Test
['D1', 'D2']
[]
D:\Python\Test\D1
['SD1', 'SD2', 'SD3']
[]
D:\Python\Test\D1\SD1
[]
['f1.bat', 'f2.bat', 'f3.bat']
D:\Python\Test\D1\SD2
[]
['f1.bat']
D:\Python\Test\D1\SD3
[]
['f1.bat', 'f2.bat']
D:\Python\Test\D2
['SD1', 'SD2']
[]
D:\Python\Test\D2\SD1
[]
['f1.bat', 'f2.bat']
D:\Python\Test\D2\SD2
[]
['f1.bat']
I need to get the output this way :
D1-SD1-f1.bat
D1-SD1-f2.bat
D1-SD1-f3.bat
D1-SD2-f1.bat
D1-SD3-f1.bat
D1-SD3-f2.bat
D2-SD1-f1.bat
D2-SD1-f2.bat
D2-SD2-f1.bat
how do i get the output this way.(Keep in mind the directory structure here is just an example. The program should be flexible for any path). How do i do this.
Is there any os command for this. Can you Please help me solve this? (Additional Information : I am using Python3.4)
You could try using the glob module instead:
import glob
glob.glob('D:\Python\Test\D1\*\*\*.bat')
Or, to just get the filenames
import os
import glob
[os.path.basename(x) for x in glob.glob('D:\Python\Test\D1\*\*\*.bat')]
To get what you want, you could do the following:
def get_files_in_directory(path):
# Get the root dir (in your case: test)
rootDir = path.split('\\')[-1]
# Walk through all subfolder/files
for root, subfolder, fileList in os.walk(path):
for file in fileList:
# Skip empty dirs
if file != '':
# Get the full path of the file
fullPath = os.path.join(root,file)
# Split the path and the file (May do this one and the step above in one go
path, file = os.path.split(fullPath)
# For each subfolder in the path (in REVERSE order)
subfolders = []
for subfolder in path.split('\\')[::-1]:
# As long as it isn't the root dir, append it to the subfolders list
if subfolder == rootDir:
break
subfolders.append(subfolder)
# Print the list of subfolders (joined by '-')
# + '-' + file
print('{}-{}'.format( '-'.join(subfolders), file) )
path=sys.argv[1]
get_files_in_directory(path)
My test folder:
SD1-D1-f1.bat
SD1-D1-f2.bat
SD2-D1-f1.bat
SD3-D1-f1.bat
SD3-D1-f2.bat
It may not be the best way to do it, but it will get you what you want.

Move Files that ends with .pdf to selected folder (Python)

My script I run will be on my mac.
My root is '/Users/johnle/Desktop/'
The purpose of the code is to move a tons of files.
On my desktop will be tons of .pdf files. I want to move the pdf files to '/Users/johnle/Desktop/PDF'
So : '/Users/johnle/Desktop/file.pdf' - > '/Users/johnle/Desktop/PDF/'
This is my code in python :
def moveFile(root,number_of_files, to):
list_of_file = os.listdir(root)
list_of_file.sort()
for file in list_of_file:
name = root + str(file)
dest = to + str(file)
shutil.move( name, dest )
You can use glob and shutil modules. For example:
import glob
import shutil
for f in glob.glob('/Users/johnle/Desktop/*.pdf'):
shutil.copy(f, '/Users/johnle/Desktop/PDF')
(this code hasn't been tested).
Note: my code copies files. If you want to move them, then replace shutil.copy with shutil.move.
In case you have .pdf files with inconsistent casing on their extensions (e.g. .PDF, .pdf, .PdF, ...), you can use something like this:
import os
import shutil
SOURCE_DIR = '/Users/johnle/Desktop/'
DEST_DIR = '/Users/johnle/Desktop/PDF/'
for fname in os.listdir(SOURCE_DIR):
if fname.lower().endswith('.pdf'):
shutil.move(os.path.join(SOURCE_DIR, fname), DEST_DIR)
The os module has lots of fun toys like this for manipulating files and other OS related operations.
You can use the rename function within the os module, to move the file to a new location.
import os
os.mkdir(<path>) #creates a new folder at the specified path
os.rename(<original/current path>, <new path>)

Categories

Resources