Find from one directory to another - python

I am trying to read a huge list output from a file directory, and once it finds it, it place it in another file using python.
I have a huge data set and was able to split the csv to 3 different folders based on their classes.
I need to search the list of column images and find from a directory and place it in a new file
To make everything clear:
class0.csv ,Unnamed: 0,noise,rot_ratio,background,class,images
0,0,0.031495803,0.383730466,0.870530701,0,00859199-ad58-4334-8635-07a094e11f94.JPG
5,5,2.605760607,0.547664714,-0.59016648,0,03159229-f613-4bd2-be32-82cf65496865.JPG
13,13,0.79224368,0.742954625,1.136200214,0,083ba0e4-cf97-40b7-9de3-0cdb618006c5.JPG
18,18,-0.416518561,0.432365614,1.12786556,0,0a9bca0f-dcbd-458e-a2bf-557876e5b402.JPG
36,36,2.192400275,0.558622462,-1.038830864,0,0e96c5b0-2ea6-441c-a1b6-22b5f650347b.JPG
46,46,-0.575673656,0.429221735,1.348484522,0,152c3bd4-dc1b-4328-a303-d923c226c040.JPG
51,51,3.880669006,0.295885257,1.005818478,0,19424685-3776-472c-8b07-f4c01643424e.JPG
53,53,1.552991557,0.485258419,0.282584728,0,1a8be963-4696-4605-826a-b9c1999985ae.JPG
Todo:
I need to find the images from my file directory and place it in another file, as the file contains images from different classes.
Hope it is clear.
import csv
from collections import defaultdict
columns = defaultdict(list) # each value in each column is appended to a list
with open('class0.csv') as f:
reader = csv.DictReader(f) # read rows into a dictionary format
for row in reader: # read a row as {column1: value1, column2: value2,...}
for (k,v) in row.items(): # go over each column name and value
columns[k].append(v) # append the value into the appropriate list
# based on column name k
image = columns['images']
print(image)
output list
'00859199-ad58-4334-8635-07a094e11f94.JPG', '03159229-f613-4bd2-be32-82cf65496865.JPG', '083ba0e4-cf97-40b7-9de3-0cdb618006c5.JPG', '0a9bca0f-dcbd-458e-a2bf-557876e5b402.JPG', '0e96c5b0-2ea6-441c-a1b6-22b5f650347b.JPG', '152c3bd4-dc1b-4328-a303-d923c226c040.JPG', '19424685-3776-472c-8b07-f4c01643424e.JPG', '1a8be963-4696-4605-826a-b9c1999985ae.JPG', '1d5c3c21-77d1-42d8-a4fc-80e8db01e2f2.JPG', '1ec36552-00af-454a-af47-de600baf3f1b.JPG', '2acbdc0e-9ae9-47e3-9dc3-c6a124c19296.JPG', '2c9e886e-ac63-4c60-b959-b7bccdd20289.JPG', '2df13128-ff88-4813-973b-c83296f1cbf5.JPG', '2eb9f4b7-cabc-4cd8-98b9-2f3470d623d2.JPG', '3169cf83-c70c-48b6-8332-9cae259e2204.JPG', '339371b4-c470-4489-832a-acd1d9c68d9f.JPG', '3504dc2b-8516-4ffc-bf02-9972409cfb0b.JPG', '364b3c69-dc6a-4afb-b67c-eb0854b5eaaf.JPG', '38cae58c-c150-4e39-a319-a57db3d9ac5f.JPG', '3ee6554f-3d7b-4094-844f-9539cc97a286.JPG', '444ea9ce-cdb5-4f48-ae28-d18e247bc6e4.JPG', '4513be86-e1ee-46a1-8897-30101045b420.JPG', '4e587d28-9656-47cc-bcac-93de429c3847.JPG', '4f4d7096-a3d7-49fe-90c8-63faed85d66c.JPG'
Now, I have a folder with many images including the output list. I want to find those specific images from main folder and place it in a new one.
Done so far: (I need to read the list output and find it from source_dir folder and place the specific files to target_dir)
import shutil
import os
source_dir = '/images'
target_dir = 'class0'
file_names = os.listdir(source_dir)
for file_name in file_names:
shutil.move(os.path.join(source_dir, file_name), target_dir)
I need to place only the specific images from output list

Related

Combining columns of multiple files into one - Python

I am trying to write a simple script that would import a specific column from multiple data files (.csv like file but with no extension) and export it all to one file with filenames in each column header. I tried this solution (also the code bellow by shaktimaan), which seems to do almost exactly the same, however, I got to some difficulties. Firstly I am still getting ''expected str, bytes or os.PathLike object, not list'' error and I am not really sure what I am doing wrong. I am not sure if the File_name variable should contain file names or file paths, and if I should use a different function to import files because my files don't have a .csv extension in the name.
Thank you for your help,
Šimon
import csv
# List of your files
file_names = ['file1', 'file2']
# Output list of generator objects
o_data = []
# Open files in the succession and
# store the file_name as the first
# element followed by the elements of
# the third column.
for afile in file_names:
file_h = open(afile)
a_list = []
a_list.append(afile)
csv_reader = csv.reader(file_h, delimiter=' ')
for row in csv_reader:
a_list.append(row[2])
# Convert the list to a generator object
o_data.append((n for n in a_list))
file_h.close()
# Use zip and csv writer to iterate
# through the generator objects and
# write out to the output file
with open('output', 'w') as op_file:
csv_writer = csv.writer(op_file, delimiter=' ')
for row in list(zip(*o_data)):
csv_writer.writerow(row)
op_file.close()

Why is my code resulting in a horizontal list instead of a vertical one?

I'm writing a script to pull in all the file names from a directory, modify the name, and then output the name to an Excel file. As of right now, I'm simply focused on getting the list of file names to output to a .csv file, and when I do this, the files names appear in a horizontal list, one item in each column, instead of a vertical list, one item in each row under a header.
I tried both using the writer.writerow(files) and for i in files: writer.writerow(i). First one gave me the current horizontal output, while the second one broke up each character into a new cell, horizontally, while write the list vertically.
import os
import csv
path = "C:\\Users\\[REST OF PATH]\\"
files = []
csv_filename = "python_list_test.csv"
#r = root, d = directory, f = files
''' * for r,d,f, in os.walk(path):
for file in f:
if '.txt' in file:
files.append(os.path.join(r,file))
for f in files:
print(f)
'''
for r,d,f in os.walk(path):
for name in f:
if '.pdf' in name:
files.append(name)
for i in files:
print(i)
with open(csv_filename, mode='w',newline='') as c:
writer = csv.writer(c)
writer.writerow(['File Name',])
writer.writerow(files)
I expected the code to give me a single column list with each row being the next item. When I print the second method I mentioned earlier(for i in files:...), it looks perfect, but writing to a .csv file separated out the characters.
Just need to change the last row:
writer.writerows([f] for f in files)

Assign csv files to a collection of dictionaries (list) with file name as the keys and file content as the values

i have a problem with the iteration process in python, I've tried and search the solutions, but i think this more complex than my capability (fyi, I've been writing code for 1 month).
The case:
Let say i have 3 csv files (the actual is 350 files), they are file_1.csv, file_2.csv, file_3.csv. I've done the iteration process/algorithm to create all of the filenames in into single list.
each csv contains single column with so many rows.
i.e.
#actual cvs much more like this:
# for file_1.csv:
value_1
value_2
value_3
Below is not the actual csv content (i mean i have converted them into array/series)
file_1.csv --> [['value_1'],['value_2'],['value_3']]
file_2.csv --> [['value_4'],['value_5']]
file_3.csv --> [['value_6']]
#first step was done, storing csv files name to a list, so it can be read and use in csv function.
filename = ['file_1.csv', 'file_2.csv', 'file_3.csv']
I want the result as a list:
#assigning a empty list
result = []
Desired result
print (result)
out:
[{'keys': 'file_1', 'values': 'value_1, value_2, value_3'},
{'keys': 'file_2', 'values': 'value_4, value_5'}
{'keys': 'file_3', 'values': 'value_6'}]
See above that the result's keys are no more containing ('.csv') at the end of file name, they are all replaced. And note that csv values (previously as a list of list or series) become one single string - separated with comma.
Any help is appreciated, Thank you very much
I'd like to answer this to the best of my capacity (I'm a newbie too).
Step1: Reading those 350 filenames
(if you've not figured out already, you could use glob module for this step)
Define the directory where the files are placed, let's say 'C:\Test'
directory = "C:/Test"
import glob
filename = sorted (glob.glob(directory, + "/*.csv"))
This will read all the 'CSV' files in the directory.
Step2: Reading CSV files and mapping them to dictionaries
result = []
import os
for file in files:
filename = str (os.path.basename(file).split('.')[0]) # removes the CSV extension from the filename
with open (file, 'r') as infile:
tempvalue = []
tempdict = {}
print (filename)
for line in infile.readlines():
tempvalue.append(line.strip()) # strips the lines and adds them to a list of temporary values
value = ",".join(tempvalue) # converts the temp list to a string
tempdict[filename] = value # Assigns the filename as key and the contents as value to a temporary dictionary
result.append(tempdict) # Adds the new temp dictionary for each file to the result list
print (result)
This piece of code should work (though there might be a smaller and more pythonic code someone else might share).
Since it seems that the contents of the files is already pretty much in the format you need them (bar the line endings) and you have the names of the 350 files in a list, there isn't a huge amount of processing you need to do. It's mainly a question of reading the contents of each file, and stripping the newline characters.
For example:
import os
result = []
filenames = ['file_1.csv', 'file_2.csv', 'file_3.csv']
for name in filenames:
# Set the filename minus extension as 'keys'
file_data = {'keys': os.path.basename(name).split('.')[0]}
with open(name) as f:
# Read the entire file
contents = f.read()
# Strip the line endings (and trailing comma), and set as 'values'
file_data['values'] = contents.replace(os.linesep, ' ').rstrip(',')
result.append(file_data)
print(result)

How to read in multiple files separately from multiple directories in python

I have x directories which are Star_{v} with v=0 to x.
I have 2 csv files in each directory, one with the word "epoch", one without.
If one of the csv files has the word "epoch" in it needs to be sent through one set of code, else through another.
I think dictionaries are probably the way to go but this section of the code is a bit of a wrong mess
directory_dict={}
for var in range(0, len(subdirectory)):
#var refers to the number by which the subdirectories are labelled by Star_0, Star_1 etc.
directory_dict['Star_{v}'.format(v=var)]=directory\\Star_{var}
#directory_dict['Star_0'], directory_dict['Star_1'] etc.
read_csv(f) for f in os.listdir('directory_dict[Star_{var}') if f.endswith(".csv")
#reads in all the files in the directories(star{v}) ending in csv.
if 'epoch' in open(read_csv[0]).read():
#if the word epoch is in the csv file then it is
directory_dict[Star_{var}][read] = csv.reader(read_csv[0])
directory_dict[Star_{var}][read1] = csv.reader(read_csv[1])
else:
directory_dict[Star_{var}][read] = csv.reader(read_csv[1])
directory_dict[Star_{var}][read1] = csv.reader(read_csv[0])
when dealing with csvs, you should use the csv module, and for your particular case, you can use a dictreader and parse the headers to check for the column you're looking for
import csv
import os
directory = os.path.abspath(os.path.dirname(__file__)) # change this to your directory
csv_list = [os.path.join(directory, c) for c in os.listdir(directory) if os.path.splitext(c) == 'csv']
def parse_csv_file():
" open CSV and check the headers "
for c in csv_list:
with open(c, mode='r') as open_csv:
reader = csv.DictReader(open_csv)
if 'epoch' in reader.fieldnames:
# do whatever you want here
else:
# do whatever else
then you can extract it from the DictReader's CSV header and do whatever you want
Also your python looks invalid

extract columns from multiple text file with Python

I have a folder with 5 text files in it pertaining to various sites--
the title is formatted in this way:
Rockspring_18_SW.417712.WRFc36.ET.2000-2050.txt
Rockspring_18_SW.417712.WRFc36.RAIN.2000-2050.txt
WICA.399347.WRFc36.ET.2000-2050.txt
WICA.399347.WRFc36.RAIN.2000-2050.txt
so, basically the file name follows the format of-
(site name).(site number).(WRFc36).(some variable).(2000-2050.txt
Each of these text files has a similar format to it with no header row: Year Month Day Value (consisting of ~18500 rows in each text file)
I want Python to search for similar filenames(where site name and site number match), and pick out the first through third columns of data from one of the files and paste it to a new txt file. I also want to copy and paste the 4th columns from each variable for a site (rain, et, etc.) and have them pasted in a particular order in the new file.
I know how to grab data using the csv module (and defining the new dialect for a space delimeter) from ALL files and print to a new text file, but I'm not sure how to automate the creation of a new file for each site name/number and make sure my variables plot out in the right order--
The output I want to use is one text file (not 5) for each site with the following format (year, month, day, variable1, variable2, variable3, variable4, variable5) for ~18500 rows...
I'm sure I'm looking over something realy simple here... this seems like it would be pretty rudimentary... but- any help would be greatly appreciated!
Update
========
I have updated the code to reflect the comments below.
http://codepad.org/3mQEM75e
from collections import defaultdict
import glob
import csv
#Create dictionary of lists-- [A] = [Afilename1, Afilename2, Afilename3...]
# [B] = [Bfilename1, Bfilename2, Bfilename3...]
def get_site_files():
sites = defaultdict(list)
#to start, I have a bunch of files in this format ---
#"site name(unique)"."site num(unique)"."WRFc36"."Variable(5 for each site name)"."2000-2050"
for fname in glob.glob("*.txt"):
#split name at every instance of "."
parts = fname.split(".")
#check to make sure i only use the proper files-- having 6 parts to name and having WRFc36 as 3rd part
if len(parts)==6 and parts[2]=='WRFc36':
#Make sure site name is the full unique identifier, the first and second "parts"
sites[parts[0]+"."+parts[1]].append(fname)
return sites
#hardcode the variables for method 2, below
Var=["TAVE","RAIN","SMOIS_INST","ET","SFROFF"]
def main():
for site_name, files in get_site_files().iteritems():
print "Working on *****"+site_name+"*****"
####Method 1- I'd like to not hardcode in my variables (as in method 2), so I can use this script in other applications.
for filename in files:
reader = csv.reader(open(filename, "rb"))
WriteFile = csv.writer(open("XX_"+site_name+"_combined.txt","wb"))
for row in reader:
row = reader.next()
####Method 2 works (mostly), but skips a LOT of random lines of first file, and doesn't utilize the functionality built into my dictionary of lists...
## reader0 = csv.reader(open(site_name+".WRFc36."+Var[0]+".2000-2050.txt", "rb")) #I'd like to copy ALL columns from the first file
## reader1 = csv.reader(open(site_name+".WRFc36."+Var[1]+".2000-2050.txt", "rb")) # and just the fourth column from all the rest of the files
## reader2 = csv.reader(open(site_name+".WRFc36."+Var[2]+".2000-2050.txt", "rb")) # (the columns 1-3 are the same for all files)
## reader3 = csv.reader(open(site_name+".WRFc36."+Var[3]+".2000-2050.txt", "rb"))
## reader4 = csv.reader(open(site_name+".WRFc36."+Var[4]+".2000-2050.txt", "rb"))
## WriteFile = csv.writer(open("XX_"+site_name+"_COMBINED.txt", "wb")) #creates new command to write a text file
##
## for row in reader0:
## row = reader0.next()
## row1 = reader1.next()
## row2 = reader2.next()
## row3 = reader3.next()
## row4 = reader4.next()
## WriteFile.writerow(row + row1 + row2 + row3 + row4)
## print "***finished with site***"
if __name__=="__main__":
main()
Here's an easier way to iterate through your files, grouped by site.
from collections import defaultdict
import glob
def get_site_files():
sites = defaultdict(list)
for fname in glob.glob('*.txt'):
parts = fname.split('.')
if len(parts)==6 and parts[2]=='WRFc36':
sites[parts[0]].append(fname)
return sites
def main():
for site,files in get_site_files().iteritems():
# you need to better explain what you are trying to do here!
print site, files
if __name__=="__main__":
main()
I still don't understand your cutting and pasting columns - you need to more clearly explain what you are trying to accomplish.
As far as getting the filenames goes I would use something like the following:
import os
# Gets a list of all file names that end in .txt
# ON *nix
file_names = os.popen('ls *.txt').read().split('\n')
# ON Windows
file_names = os.popen('dir /b *.txt').read().split('\n')
Then to get the elements normally separated by periods, use:
# For some file_name in file_names
file_name.split('.')
Then you can proceed to comparisons and extract the desired columns (by using open(file_name, 'r') or your CSV parser)
Michael G.

Categories

Resources