I have a folder with files and want to separate the files in two variables.
For example these are the files:
EXfileone.txt
fileone.txt
EXtest.txt
simple.txt
Now the thing I can't do is this (in pseudocode):
If "EX" in filename:
add file to variable: EXFILES
IF "EX" not in filename:
add file to variable : NORMALFILES
So now:
EXFILES = [EXFfileone.txt, EXtest.txt]
NORMALFILES = [fileone.txt, simple.txt]
Then I will use a for loop to make operation with the files:
for file in EXFILES:
...
I'm using Python 3.
You can simply use the standard library glob module to match pathnames:
import glob
EXFILES = []
NORMALFILES = []
filename_list = glob.glob("*.txt")
for filename in filename_list:
if "EX" in filename:
EXFILES.append(filename)
else:
NORMALFILES.append(filename)
Try this:
from pathlib import Path
folder = Path('/path/to/your/folder')
exfiles = list(folder.glob('EX*.txt'))
normalfiles = [f for f in folder.glob('*.txt') if not f.name.startswith('EX')]
That will give you the list of files as you wanted.
But it is better do something like this instead:
from pathlib import Path
folder = Path('/path/to/your/folder')
for f in folder.glob('*.txt'):
if f.name.startswith('EX'):
# do something with your EX*.txt file
else:
# do something with your normal file
I hope it helps.
You can make each variable a list and store the names of the files in them:
from os import listdir
from os.path import isfile, join
folder_path = '/home/youruser/example' # path to folder
# classify files
exfiles = []
normalfiles= []
for f in listdir(folder_path):
if isfile(join(folder_path, f)):
if f.startswith('EX'):
exfiles.append(join(folder_path, f))
else:
normalfiles.append(join(folder_path, f))
for fname in exfiles:
with open(fname) as f:
# do operations
use the in operator. It checks if a string contains a sub-string:
from os import listdir
EX_files = []
NORMAL_files = []
for file_name in listdir():
if "EX" in file_name:
EX_files.append(file_name)
else:
NORMAL_files.append(file_name)
You can also use Pythons built-in filter and lambda functions:
import os
my_list = list(filter(lambda x: 'EX' in x, os.listdir(some_dir)))
my_other_list = list(filter(lambda x: 'EX' not in x, os.listdir(some_dir)))
Will output two lists filtered with your search criterion.
Related
The code that I have determines which Operating System is being used. Then it has to search the entire system for my csv file. When it's found I need to be able to read in the csv file (so that its not just inside the function, but useable throughout my code).
So far I am able to locate my file, but I am having trouble to assign the filepath to a variable, so that I can read in that variabel with pd.read_csv()
the code that I have is at follows:
import pandas as pd
import os
import re
import win32api
# https://stackoverflow.com/questions/13067686/search-files-in-all-drives-using-python
def find_file(root_folder, rex):
for root,dirs,files in os.walk(root_folder):
for f in files:
result = rex.search(f)
if result:
print(os.path.join(root, f))
return result
break # if you want to find only one
def find_file_in_all_drives(file_name):
#create a regular expression for the file
rex = re.compile(file_name)
for drive in win32api.GetLogicalDriveStrings().split('\000')[:-1]:
find_file( drive, rex )
return
#file_name = "AB_NYC_2019.csv"
#find_file_in_all_drives(file_name)
df_location = find_file_in_all_drives( "AB_NYC_2019.csv" )
df = pd.read_csv(df_location)
I think that something is not right with the return.
Thank you for your time.
Right now it returns "None"
You haven't returned anything from anywhere.
I'm considering your code to be working and I've placed the necessary return calls but haven't tested it:
def find_file(root_folder, rex):
for root, dirs, files in os.walk(root_folder):
for f in files:
result = rex.search(f)
if result:
file_path = os.path.join(root, f)
return file_path
def find_file_in_all_drives(file_name):
matching_files = list()
# create a regular expression for the file
rex = re.compile(file_name)
for drive in win32api.GetLogicalDriveStrings().split('\000')[:-1]:
file_path = find_file(drive, rex)
if file_path:
matching_files.append(file_path)
return matching_files
df_location = find_file_in_all_drives("AB_NYC_2019.csv")
first_file_df = pd.read_csv(df_location[0])
So, in the folder INFACT, I have the following files(and many more with the same extensions):
BFDRYCKSE.ad
BFDRYCKSE.txt
BFFALIV2SE.ad
BFFALIV2SE.txt
I need to zip those files according to the filename, but only those mentioned above. My final result should be:
BFDRYCKSE.zip, contains(BFDRYCKSE.ad, BFDRYCKSE.txt)
BFFALIV2SE.zip, contains(BFFALIV2SE.ad BFFALIV2SE.txt)
Here's my code:
import os
import glob
import zipfile
setfile = r'C:\Users\vijo9001\Desktop\Infact'
myset = [
"BFBRODSE",
"BFDRYCKSE",
"BFFALIV2SE",
"BFFALIVSSE",
"BFFRYSTSE",
"BFHUSHA1SE",
"BFHUSHA2SE",
"BFHUSHALSE",
"BFKONFEKSE",
"BFKROPPVSE",
"BFKROPP2SE",
"BFLIV2SE",
"BFLIVSSE",
"BFMAKEUPSE",
"BFMEJERISE",
"BFTOBAKSE"
]
os.chdir(setfile)
list_of_lists = []
for i, pattern in enumerate(myset):
list_of_files = glob.glob(r'*{pattern}*'.format(pattern=myset[i]))
list_of_lists.append(list_of_files)
n = 0
file = os.path.splitext(list_of_files[0])[0]
with zipfile.ZipFile(file + '.zip', 'w') as myzip:
for f in list_of_files:
myzip.write(f, compress_type=zipfile.ZIP_DEFLATED)
I keep getting
Traceback (most recent call last):
File "C:/Users/vijo9001/Desktop/Retailers Check/aaa.py", line 29, in <module>
file = os.path.splitext(list_of_files[0])[0]
IndexError: list index out of range
Why is that?
I don't think you need to use an enumerator to access your list elements.
Your problem statement says that you want each zipfile to have the same basename as the .ad and .txt files which it should contain. With that in mind I'd try something along these lines instead
for basename in myset:
filelist = glob.glob(r'*{pattern}*'.format(basename))
with zipfile.ZipFile(basename + '.zip', 'w') as myzip:
for f in filelist:
myzip.write(f, compress_type=zipfile.ZIP_DEFLATED)
My solution is without defining set names:
=^..^=
import os
import zipfile
# get all files from directory
files_list = os.listdir(".")
# collect valid files
files_to_pack = []
for item in files_list:
try:
file_name, file_extension = item.split('.')
if file_extension == 'ad' or file_extension == 'txt':
files_to_pack.append(item)
except:
pass
# sort and pair files
sorted_files = sorted(files_to_pack)
pairs_files = [sorted_files[i:2+i] for i in range(0,len(sorted_files),2)]
# zip paired files
for item in pairs_files:
with zipfile.ZipFile(item[0].split('.')[0] + '.zip', 'w') as myzip:
myzip.write(item[0])
myzip.write(item[1])
myzip.close()
My question: Is there a way to load data from all files in a directory using Python
Input: Get all files in a given directory of mine (wow.txt, testting.txt,etc.)
Process: I want to run all the files through a def function
Output: I want the output to be all the files names and their respective content below it.For example:
/home/file/wow.txt
"all of its content"
/home/file/www.txt
"all of its content"
Here is my code:
# Import Functions
import os
import sys
# Define the file path
path="/home/my_files"
file_name="wow.txt"
#Load Data Function
def load_data(path,file_name):
"""
Input : path and file_name
Purpose: loading text file
Output : list of paragraphs/documents and
title(initial 100 words considered as title of document)
"""
documents_list = []
titles=[]
with open( os.path.join(path, file_name) ,"rt", encoding='latin-1') as fin:
for line in fin.readlines():
text = line.strip()
documents_list.append(text)
print("Total Number of Documents:",len(documents_list))
titles.append( text[0:min(len(text),100)] )
return documents_list,titles
#Output
load_data(path,file_name)
Here is my output:
My Problem is that my output only takes one file and shows its content. Obviously, i defined the path and file name in my code to one file but I am confused as to how to write the path in a way to load all the files and output each of its contents separately. Any suggestions?
Using glob:
import glob
files = glob.glob("*.txt") # get all the .txt files
for file in files: # iterate over the list of files
with open(file, "r") as fin: # open the file
# rest of the code
Using os.listdir():
import os
arr = os.listdir()
files = [x for x in arr if x.endswith('.txt')]
for file in files: # iterate over the list of files
with open(file, "r") as fin: # open the file
# rest of the code
Try this:
import glob
for file in glob.glob("test/*.xyz"):
print(file)
if my directory name was "test" and I had lots of xyz files in them...
You can use glob and pandas
import pandas as pd
import glob
path = r'some_directory' # use your path
all_files = glob.glob(path + "/*.txt")
li = []
for filename in all_files:
#read file here
# if you decide to use pandas you might need to use the 'sep' paramaeter as well
df = pd.read_csv(filename, index_col=None, header=0)
li.append(df)
# get it all together
frame = pd.concat(li, axis=0, ignore_index=True)
I will take advantage of the function you have already written, so use the following:
data = []
path="/home/my_files"
dirs = os.listdir( path )
for file in dirs:
data.append(load_data(path, file))
In this case you will have all data in the list data.
Hi you can use a for loop on a listdir:
os.listdir(<path of your directory>)
this gives you the list of files in your directory, but this gives you also the name of folders in that directory
Try generating a file list first, then passing that to a modified version of your function.
def dir_recursive(dirName):
import os
import re
fileList = list()
for (dir, _, files) in os.walk(dirName):
for f in files:
path = os.path.join(dir, f)
if os.path.exists(path):
fileList.append(path)
fList = list()
prog = re.compile('.txt$')
for k in range(len(fileList)):
binMatch = prog.search(fileList[k])
if binMatch:
fList.append(binMatch.string)
return fList
def load_data2(file_list):
documents_list = []
titles=[]
for file_path in file_list:
with open( file_path ,"rt", encoding='latin-1') as fin:
for line in fin.readlines():
text = line.strip()
documents_list.append(text)
print("Total Number of Documents:",len(documents_list))
titles.append( text[0:min(len(text),100)] )
return documents_list,titles
# Generate a file list & load the data from it
file_list = dir_recursive(path)
documents_list, titles = load_data2(file_list)
Suppose I have a text file aiq_hits.txt.
Each line in this file corresponds a filename
ant1.aiq
ant2.aiq
ant3.aiq
ant4.aiq
I want to match each line of my textfile (ant1.aiq,ant2.aiq and so on) with filenames which are present at some specific place(R:\Sample) and extract matching files into some other place (R:\sample\wsa).
I have an idea that I need to use functions like os.walk() and fnmatch.fnmatch(), shutil.copy() but I am not able to implement them
My code:
import os
import shutil
import fnmatch
with open("aiq_hits.txt","r") as in_file:
for line in in_file:
I am stuck here
import os
import shutil
sourceDir = "R:\\Sample"
targetDir = "R:\\Sample\\wsa"
existingFiles = set(f for f in os.listdir(sourceDir) if os.path.isfile(os.path.join(sourceDir, f)))
infilepath = "aiq_hits.txt"
with open(infilepath) as infile:
for line in infile:
fname = line.strip()
if fname not in existingFiles: continue
shutil.move(os.path.join(sourceDir, fname), os.path.join(targetDir, fname))
I hope this will suffice:
import os
def match_files(url,file_read, dest):
f = open(file_read, 'rb')
file_list = os.listdir(url)
print(file_list)
saved_path = os.getcwd()
print("Current working directory is " + saved_path)
os.chdir(url)
match = []
for file_name in f:
file_name = file_name.strip()
if file_name in file_list:
match.append(file_name)
os.rename(os.path.join(url, file_name), os.path.join(dest, file_name))
os.chdir(saved_path)
print match
here, url is source directory or folder from which u want to match files, file_read is the name of file (with path) in which list of file names is given, dest is the destination folder.
this code moves the matching files from url to dest, i.e. these files won't remin in url after running the code.
Alternatively you could use the glob module which allows you to enter in a expression for the file name\extension which will then return a list that you can loop over.
I'd use this module if the source directory can have files with the same extension that you want to exclude from being looped over
Also I'm assuming that the file name list is not large and so storing it in a list wont be an issue
eg (I haven't tested the below )
from glob import glob
import os
import shutil
src = 'R:\\Sample'
dst = "R:\\Sample\\wsa"
in_file_list = "aiq_hits.txt"
list_Of_files = glob(os.path.join(src, 'ant*.aiq'))
data = []
with open(in_file_list) as reader:
data += reader.readlines()
for row in list_Of_files:
file_path, file_name = os.path.split(row)
if file_name in data:
shutil.copy2(row, os.path.join(dst, file_name))
# or if you want to move the file
# shutil.move(row, os.path.join(dst, file_name))
I have some txt files in a directory and I need to get the last 15 lines from all of them. How could I do it using python?
I chose this code:
from os import listdir
from os.path import isfile, join
dir_path= './'
files = [ f for f in listdir(dir_path) if isfile(join(dir_path,f)) ]
out = []
for file in files:
filedata = open(join(dir_path, file), "r").readlines()[-15:]
out.append(filedata)
f = open(r'./fin.txt','w')
f.writelines(out)
f.close()
but I get the error "TypeError: writelines() argument must be a sequence of strings". I think it's because of Russian letters in the lines.
import os
from collections import deque
for filename in os.listdir('/some/path'):
# might want to put a check it's actually a file here...
# (join it to a root path, or anything else....)
# and sanity check it's text of a usable kind
with open(filename) as fin:
last_15 = deque(fin, 15)
deque will automatically discard the oldest entry and peak the max size to be 15, so it's an efficient way of keeping just the "last" 'n' items.
Try this:
from os import listdir
from os.path import isfile
for filepath in listdir("/path/to/folder")
if isfile(filepath): # if need
last_five_lines = open(filepath).readlines()[-15:]
# or, one line:
x = [open(f).readlines()[-15:] for f in listdir("/path/to/folder") if isfile(f)]
Updated:
lastlines = []
for file in files:
lastlines += open(join(dir_path, file), "r").readlines()[-15:]
with open('./fin.txt', 'w') as f:
f.writelines(lastlines)
from os import listdir
from os.path import isfile, join
dir_path= '/usr/lib/something'
files = [ f for f in listdir(dir_path) if isfile(join(dir_path,f)) ]
for file in files:
filedata = open(join(dir_path, file), "r").readlines()[-15:]
#do something with the filedata
Hope this helps:
import os
current_dir = os.getcwd()
dir_objects = os.listdir(current_dir)
dict_of_last_15 = {}
for file in dir_objects:
file_obj = open(file, 'rb')
content = file_obj.readlines()
last_15_lines = content[-15:]
dict_of_last_15[file] = last_15_lines
print "#############: %s" % file
print dict_of_last_15[file]
file_to_check.close()