Need a bit of help putting this together
This script reads any files with or with out an .ext
this can also edit the original file
I would like to isolate it to a file name
because it also reads and edits my other scripts
and it edited itself
import os
for root, dirs, files in os.walk('.'):
for name in files:
nmin = os.path.join(root,name)
with open(nmin,"r") as fin:
data = fin.read()
data = data.replace('"Houston": ??,",', '"Houston": City",') \
.replace('"Place": USA,",', '"Place": County",')
with open(nmin,"w") as fout:
fout.write(data)
I'm trying different ways and can't get it to work right
import os
for root, dirs, files in os.walk('CityPlaces.txt'):
for name in files:
nmin = os.path.join(root,name)
with open(nmin,"r") as fin:
data = fin.read()
data["Houston"] = City
data["Place"] = Country
with open(nmin,"w") as fout:
fout.write(data)
Also I'm trying to make data [ ] work, because every txt file I work on will have different content
For example:
"Houston": Random data,
"Houston": Random data,
"Houston": Random data,
but all will be update with the same write
so replace will not work
I want to change this
data = data.replace('"Houston": ??,",', '"Houston: City",') \
.replace('"Place": USA,",', '"Place: County",')
to this
data = fin.read()
data["Houston"] = City
data["Place"] = Country
Any help would be great
I got help with this part Thank you very much
for root, dirs, files in os.walk('.'):
for name in files:
if name != "CityPlaces.txt": continue
How do I add this to my script
>>> import re
>>> p = re.compile('ab*')
>>> p
re.compile('ab*')
I'm guessing like this
import os
import re
for root, dirs, files in os.walk('CityPlaces.txt'):
for name in files:
nmin = os.path.join(root,name)
with open(nmin,"r") as fin:
data = fin.read()
data = re.compile('Houston*')
data = re.compile('Place*')
with open(nmin,"w") as fout:
fout.write(data)
So, you have files called CityPlaces.txt in multiple directories? Then, just check that your directory contains such a file, and process only that file:
import os
for root, dirs, files in os.walk('.'):
if 'CityPlace.txt' not in files:
continue
nmin = os.path.join(root,'CityPlace.txt')
with open(nmin,"r") as fin:
data = fin.read()
data = data.replace('"Houston": ??,",', '"Houston": City",') \
.replace('"Place": USA,",', '"Place": County",')
with open(nmin,"w") as fout:
fout.write(data)
Related
I am working on merging a number of text files together into a single text document. I am able to read all the file names and create a new output document.
However, when I output the document, I am only getting the data from one file and not the rest? Overall it should be close to 1 million lines in a txt, but only getting the first 10k
import os
projpath1 = 'PATH1'
projpath2 = 'PATH2'
for root, dirs, files in os.walk(f"{projpath1}", topdown=False):
for name in files:
if not name.startswith('.DS_Store'):
split = name.split("/")
title = split[0]
filename = (os.path.join(root, name))
inputf = os.path.expanduser(f'{projpath1}/{title}')
updatedf = os.path.expanduser(f'{projpath2}/ENC_merged.txt')
with open(inputf, "r") as text_file, open(updatedf, 'w') as outfile:
for info in text_file:
for lines in info:
outfile.write(lines)
I really am stuck and can't figure it out :/
You are suppose to open create output file first and within it you need to save all the input files, something like this should work for you.
import os
projpath1 = 'PATH1'
projpath2 = 'PATH2'
with open(updatedf, 'w') as outfile:
for root, dirs, files in os.walk(f"{projpath1}", topdown=False):
for name in files:
if not name.startswith('.DS_Store'):
split = name.split("/")
title = split[0]
filename = (os.path.join(root, name))
inputf = os.path.expanduser(f'{projpath1}/{title}')
updatedf = os.path.expanduser(f'{projpath2}/ENC_merged.txt')
with open(inputf, "r") as text_file:
for info in text_file:
for lines in info:
outfile.write(lines)
What about doing it with bash
ls | xargs cat > merged_file
My question: Is there a way to load data from all files in a directory using Python
Input: Get all files in a given directory of mine (wow.txt, testting.txt,etc.)
Process: I want to run all the files through a def function
Output: I want the output to be all the files names and their respective content below it.For example:
/home/file/wow.txt
"all of its content"
/home/file/www.txt
"all of its content"
Here is my code:
# Import Functions
import os
import sys
# Define the file path
path="/home/my_files"
file_name="wow.txt"
#Load Data Function
def load_data(path,file_name):
"""
Input : path and file_name
Purpose: loading text file
Output : list of paragraphs/documents and
title(initial 100 words considered as title of document)
"""
documents_list = []
titles=[]
with open( os.path.join(path, file_name) ,"rt", encoding='latin-1') as fin:
for line in fin.readlines():
text = line.strip()
documents_list.append(text)
print("Total Number of Documents:",len(documents_list))
titles.append( text[0:min(len(text),100)] )
return documents_list,titles
#Output
load_data(path,file_name)
Here is my output:
My Problem is that my output only takes one file and shows its content. Obviously, i defined the path and file name in my code to one file but I am confused as to how to write the path in a way to load all the files and output each of its contents separately. Any suggestions?
Using glob:
import glob
files = glob.glob("*.txt") # get all the .txt files
for file in files: # iterate over the list of files
with open(file, "r") as fin: # open the file
# rest of the code
Using os.listdir():
import os
arr = os.listdir()
files = [x for x in arr if x.endswith('.txt')]
for file in files: # iterate over the list of files
with open(file, "r") as fin: # open the file
# rest of the code
Try this:
import glob
for file in glob.glob("test/*.xyz"):
print(file)
if my directory name was "test" and I had lots of xyz files in them...
You can use glob and pandas
import pandas as pd
import glob
path = r'some_directory' # use your path
all_files = glob.glob(path + "/*.txt")
li = []
for filename in all_files:
#read file here
# if you decide to use pandas you might need to use the 'sep' paramaeter as well
df = pd.read_csv(filename, index_col=None, header=0)
li.append(df)
# get it all together
frame = pd.concat(li, axis=0, ignore_index=True)
I will take advantage of the function you have already written, so use the following:
data = []
path="/home/my_files"
dirs = os.listdir( path )
for file in dirs:
data.append(load_data(path, file))
In this case you will have all data in the list data.
Hi you can use a for loop on a listdir:
os.listdir(<path of your directory>)
this gives you the list of files in your directory, but this gives you also the name of folders in that directory
Try generating a file list first, then passing that to a modified version of your function.
def dir_recursive(dirName):
import os
import re
fileList = list()
for (dir, _, files) in os.walk(dirName):
for f in files:
path = os.path.join(dir, f)
if os.path.exists(path):
fileList.append(path)
fList = list()
prog = re.compile('.txt$')
for k in range(len(fileList)):
binMatch = prog.search(fileList[k])
if binMatch:
fList.append(binMatch.string)
return fList
def load_data2(file_list):
documents_list = []
titles=[]
for file_path in file_list:
with open( file_path ,"rt", encoding='latin-1') as fin:
for line in fin.readlines():
text = line.strip()
documents_list.append(text)
print("Total Number of Documents:",len(documents_list))
titles.append( text[0:min(len(text),100)] )
return documents_list,titles
# Generate a file list & load the data from it
file_list = dir_recursive(path)
documents_list, titles = load_data2(file_list)
I have a csv with two columns Directory and Filename. Each row in the csv shows what directory each file belongs like so
Directory, File Name
DIR18, IMG_42.png
DIR12, IMG_16.png
DIR4, IMG_65.png
So far I have written code that grabs each directory and filename from the csv and then all files at their destination like so:
movePng.py
import shutil
import os
import csv
from collections import defaultdict
columns = defaultdict(list) # each value in each column is appended to a list
with open('/User/Results.csv') as f:
reader = csv.DictReader(f)
for row in reader:
for (k,v) in row.items():
columns[k].append(v)
source = '/User/PNGItems'
files = os.listdir(source)
for f in files:
pngName = f[:-4]
for filename in columns['File Name']:
fileName = filename[:-4]
if pngName == fileName
# GET THIS POSITION IN columns['File Name'] for columns['Directory']
shutil.move(f, source + '/' + DIRECTORY)
How do I get the index of the columns['File Name'] and grab the corresponding directory out of columns['Directory'] ?
You should read the assignments into a dictionary and then query that:
folder_assignment_file = "folders.csv"
file_folder = dict()
with open(folder_assignment_file, "r") as fh:
reader = csv.reader(fh)
for folder, filename in reader:
file_folder[filename] = folder
And then get the target folder like so: DIRECTORY = file_folder[fileName].
Some other hints:
filename, fileName are not good variable names, this will only lead to hard to find bugs because Python is case sensitive
use os.path.splitext to split the extension off the filename
if not all your files are in one folder the glob module and os.walk might come in handy
Edit:
Creating the dict can be made even nicer like so:
with open(folder_assignment_file, "r") as fh:
reader = csv.reader(fh)
file_folders = {filename: folder for folder, filename in reader}
To solve this I used #Peter Wood suggestion and it worked beautifully. Also I had to modify shutil.
Here is the code below
for f in files:
pngName = f[:-4]
for filename, directory in zip(columns['File Name'], columns['Directory']):
fileName = filename[:-4]
if pngName == fileName:
directoryName = directory[1:]
shutil.move(os.path.join(source, f), source + '/' + directoryName)
I have written a code for moving files from one folder to another .I moved only those files whose names are present in my text file(aiq.txt).
It works fine when files are less in number say(10) but when number goes to around 500 the program terminates.
Each file is of 50 Mb
Here is my code:
import os
import shutil
destination=r"dstpath"
with open("aiq.txt") as infile:
for line in infile:
line=line.strip('\n)
for root,dirs,filenames in os.walk(r"H:\test_vectors"):
for filename in filenames:
if line in filename:
shutil.move(os.path.join(root,filename),destination)
import os
import shutil
root = r"H:\test_vectors"
destination = r"dstpath"
file_list = []
dir_dict = {}
with open('aiq.txt', 'r') as f:
for line in f:
file_list.append(line.strip())
for d, dirs, files in os.walk(root):
if files:
dir_dict[d] = files
for key, item in dir_dict.items():
for x in item:
if x in file_list:
try:
shutil.move(os.path.join(key, x), destination)
except:
pass
Suppose I have a text file aiq_hits.txt.
Each line in this file corresponds a filename
ant1.aiq
ant2.aiq
ant3.aiq
ant4.aiq
I want to match each line of my textfile (ant1.aiq,ant2.aiq and so on) with filenames which are present at some specific place(R:\Sample) and extract matching files into some other place (R:\sample\wsa).
I have an idea that I need to use functions like os.walk() and fnmatch.fnmatch(), shutil.copy() but I am not able to implement them
My code:
import os
import shutil
import fnmatch
with open("aiq_hits.txt","r") as in_file:
for line in in_file:
I am stuck here
import os
import shutil
sourceDir = "R:\\Sample"
targetDir = "R:\\Sample\\wsa"
existingFiles = set(f for f in os.listdir(sourceDir) if os.path.isfile(os.path.join(sourceDir, f)))
infilepath = "aiq_hits.txt"
with open(infilepath) as infile:
for line in infile:
fname = line.strip()
if fname not in existingFiles: continue
shutil.move(os.path.join(sourceDir, fname), os.path.join(targetDir, fname))
I hope this will suffice:
import os
def match_files(url,file_read, dest):
f = open(file_read, 'rb')
file_list = os.listdir(url)
print(file_list)
saved_path = os.getcwd()
print("Current working directory is " + saved_path)
os.chdir(url)
match = []
for file_name in f:
file_name = file_name.strip()
if file_name in file_list:
match.append(file_name)
os.rename(os.path.join(url, file_name), os.path.join(dest, file_name))
os.chdir(saved_path)
print match
here, url is source directory or folder from which u want to match files, file_read is the name of file (with path) in which list of file names is given, dest is the destination folder.
this code moves the matching files from url to dest, i.e. these files won't remin in url after running the code.
Alternatively you could use the glob module which allows you to enter in a expression for the file name\extension which will then return a list that you can loop over.
I'd use this module if the source directory can have files with the same extension that you want to exclude from being looped over
Also I'm assuming that the file name list is not large and so storing it in a list wont be an issue
eg (I haven't tested the below )
from glob import glob
import os
import shutil
src = 'R:\\Sample'
dst = "R:\\Sample\\wsa"
in_file_list = "aiq_hits.txt"
list_Of_files = glob(os.path.join(src, 'ant*.aiq'))
data = []
with open(in_file_list) as reader:
data += reader.readlines()
for row in list_Of_files:
file_path, file_name = os.path.split(row)
if file_name in data:
shutil.copy2(row, os.path.join(dst, file_name))
# or if you want to move the file
# shutil.move(row, os.path.join(dst, file_name))