Fixing different id having same images in a csv column by renaming? - python

As a new Python programmer, I'm having trouble figuring out how to accomplish the following.
Given this input data from a csv file:
Sku Image_Name
B001 a.jpg
B002 a.jpg
B001 b.jpg
B002 c.jpg
B003 x.jpg
Where multiple Sku's might have the same image name. When that occurs, I want to rename the image name in the Image__Name column by concatenating "_" + Sku value as shown to the image name in that same row.
So the desired output data would be:
Sku Image_Name
B001 a_B001.jpg
B002 a_B002.jpg
B001 b.jpg
B002 c.jpg
B003 x.jpg
After that it should rename the images in the image folder according to the Image_Name column.
This is all I have so far:
import csv
#open and store the csv file
with open('D:\\test.csv', 'rb') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',')

OK, you've got quite a ways to go, but this should give you some hints on how to proceed (assuming a reasonable number of files):
import csv
from os.path import splitext
with open("/tmp/test.csv", 'rb') as csvfile:
itemList = []
renamedList = []
keyList = []
spamreader = csv.reader(csvfile, delimiter=",")
for row in spamreader:
keyList.append(row[0])
itemList.append(row[1])
renamedList.append(row[1])
toBeChanged = [itemNum for itemNum, item in enumerate(itemList)
if itemList.count(item) > 1]
for itemNum in toBeChanged:
name, ext = splitext(itemList[itemNum])
renamedList[itemNum] = '{}_{}{}'.format(name, keyList[itemNum], ext)
# At this point we have your desired info and can print it just like you
# have above
print("Sku\tImage_Name")
for row in zip(keyList, itemList):
print(row[0] + '\t' + row[1])
# Duplicating / renaming files is next. This isn't the only way
# to do it (or the most efficient), but it's an easy way to understand.
# The idea is to first make copies of all needed files...
from shutil import copyfile
changedNames = []
for itemNum in toBeChanged:
copyfile(itemList[itemNum], renamedList[itemNum])
changedNames.append(itemList[itemNum])
# ...and then delete the originals. The set is used to eliminate
# duplicates.
from os import remove
for item in set(changedNames):
remove(itemName)
There are lots of ways you can improve this code. The intent here was to make it more understandable. Understand it first, improve it second.

import csv
import os
from os.path import splitext # splits name & extension from a file
import shutil #making a duplicate copy of a file
from os import rename
#open and read csv
with open('test.csv') as csvfile:
#create list for sku,old_imagename and new imagename
itemList = []
renamedList = []
keyList = []
spamreader = csv.reader(csvfile, delimiter=",")
#processing every row at a time
for row in spamreader:
keyList.append(row[0]) #for sku
itemList.append(row[1]) #for old_imagename
renamedList.append(row[1]) #for new_imagename
#Processing only sku having same images
toBeChanged = [itemNum for itemNum, item in enumerate(itemList)
if itemList.count(item) > 1]
for itemNum in toBeChanged:
name, ext = splitext(itemList[itemNum]) # splitting image name & extension: eg a-> "a" & "jpg"
oldFileName = name + ext
print("oldFileName = " + oldFileName) # oldFileName = a.jpg
newFileName = '{}_{}{}'.format(name, keyList[itemNum], ext)
print("newFileName = " + newFileName) # newFileName = a_B001.jpg & a_B002.jpg
# check if the Image file exists,
if(os.path.isfile(oldFileName)):
shutil.copy2(oldFileName, newFileName); # creating a duplicate image file
renamedList[itemNum] = '{}_{}{}'.format(name, keyList[itemNum], ext) #a_B001.jpg
# os.remove(oldFileName)
#write the final output in new csv
with open('newCsv.csv','w') as mycsv:
csvWriter = csv.writer(mycsv,delimiter=",")
for row in zip(keyList, renamedList):
print(row[0] + '\t' + '\t' + row[1])
csvWriter.writerow([row[0],row[1]])

Related

How to change value in multiple CSV cells?

I have 118 CSVs, I need to go into each CSV and change F1, F2, F3 and so on to 0.
For example, in csv1, F1 = 0, in csv2, F2 = 0, in csv3, F3 = 0 and so on.
The CSV has headers:
I am assuming all of your CSV files have the same format, and that you are trying to set column F to be 0 for all of them.
You can use Python CSV library to help you as follows:
import csv
import glob
for filename in glob.glob('*.csv'):
print(f'Processing: {filename}')
with open(filename) as f_input:
csv_input = csv.reader(f_input)
header = next(csv_input)
rows = [[*row[:5], '0'] for row in csv_input]
with open(filename, 'w', newline='') as f_output:
csv_output = csv.writer(f_output)
csv_output.writerow(header)
csv_output.writerows(rows)
This reads all .csv files from a given folder and changes the Multi Col 2 values to 0. It does this for all rows but leaves the header the same.
Thank you all, I made my own solution, it is a lot less classy than the ones posted here. But I automated it from the point of needing x number of files to amending the col/row.
#==============================================================================
# Import the necessary packages
import os
#import glob
import shutil
import pathlib
import pandas as pd
#import numpy as np
#==============================================================================
InputPath = 'F:\\cells\\bc_dbase\\bc_dbase1.csv'
OutputPath = 'F:\\cells\\bc_dbase'
str1 = 'Name '
str2 = 'Mult Col 2'
NoStart = 1
NoEnd = 119
#==============================================================================
# Create complete path of folders
def CreatePath(FullPath,File=False):
Parts = pathlib.Path(FullPath).parts
for [n1,Folder] in enumerate(Parts):
if File==True and n1==len(Parts)-1 and "." in Parts[n1]:
continue
elif n1==0:
FolderPath = Parts[n1]
else:
FolderPath = os.path.join(FolderPath,Folder)
if os.path.exists(FolderPath)==False:
os.mkdir(FolderPath)
#==============================================================================
# Delete folder
def DeleteFolder(FullPath):
FullPath = pathlib.Path(FullPath)
try:
shutil.rmtree(FullPath)
except:
pass
#==============================================================================
CreatePath(OutputPath,File=False)
[FolderPath,File] = os.path.split(InputPath)
[FileName,FileExt] = os.path.splitext(os.path.basename(InputPath))
ReversedFileName = FileName[::-1]
AdjFileName = FileName
for n1 in reversed(range(len(AdjFileName))):
char = FileName[n1]
if char.isdigit():
AdjFileName = AdjFileName[:n1] + AdjFileName[(n1+1):]
else: break;
Data1 = pd.read_csv(InputPath)
Data2 = pd.DataFrame.copy(Data1)
NameCols = Data1.columns
if str2 in NameCols:
Data2.loc[:,str2] = 1
for n1 in range(NoStart,NoEnd+1):
NewFile = AdjFileName + str(n1) + FileExt
NewFilePath = os.path.join(OutputPath,NewFile)
Data3 = pd.DataFrame.copy(Data2)
index = Data3[Data3[str1]==n1].index[0]
Data3.loc[index,str2] = 0
Data3.to_csv(NewFilePath, index=False)
print('[INFO] Storing file:',NewFilePath)
#==============================================================================
Mr. Evans has pretty neat code using Python CSV library, so I will expand on it a bit to answer y
our specific question.
import csv
import glob
file_count = 0
for filename in glob.glob('*.csv'):
file_count += 1
print(f'Processing: {filename}')
with open(filename) as f_input:
csv_input = csv.reader(f_input)
header = next(csv_input)
line_count = 0
rows = []
for row in csv_input:
line_count += 1
if line_count == file_count:
rows.append([*row[:5], '0'])
else:
rows.append([*row[:6]])
with open(filename, 'w', newline='') as f_output:
csv_output = csv.writer(f_output)
csv_output.writerow(header)
csv_output.writerows(rows)
Note: the code will run for all the .csv files in the working directory and will run through the files in an alphabetic order.

How to add a blank row at the end to multiple CSV files with Python

I'm using Python 3 & I am having trouble appending lines from multiple csv files into multiple rows for the Master_Total.csv file. I suspect that it is due to not having a "pre-existing" blank row for each csv file. If this is true how do I add a new blank row to each TOTAL.csv file?
TOTAL.csv file:
GND, 0.1V, 1.0V, REFKelvin,
0.000000, 0.100436, 1.003407, 150318.406250,
[no empty row]
enviro.csv file:
temp [C], pressure [kPa], humidity [%RH]
23.870001, 85.903000, 33.75244
[empty row]
When I run my script I get this:
Master_Total.csv
GND, 0.1V, 1.0V, REFKelvin,
0.000000, 0.100436, 1.003407, 150318.4062500.000000, 0.100764, 1.005011, 100.3399580.000019, 0.100252, 1.002642, 100.214996...
Master_enviro.csv
temp [C], pressure [kPa], humidity [%RH]
23.870001, 85.903000, 33.752441
23.760000, 85.914001, 32.997131
24.040001, 85.879997, 33.134460
...
Code:
import shutil, glob, csv, os, sys
path = r'directory'
Enviro_Files = glob.glob(path +"**/*enviro.csv")
Total_Files = glob.glob(path +"**/*TOTAL.csv")
with open('directory_desktop/Master_enviro.csv', 'wb') as outfile1:
for i, filename1 in enumerate(Enviro_Files):
with open(filename1, 'rb') as inputfile1:
if i != 0:
inputfile1.readline()
shutil.copyfileobj(inputfile1, outfile1)
print(filename1 + " has been imported.")
with open('directory_desktop/Master_TOTAL.csv', 'wb') as outfile2:
for h, filename2 in enumerate(Total_Files):
with open(filename2, 'rb') as inputfile2:
if h != 0:
inputfile2.readline()
shutil.copyfileobj(inputfile2, outfile2)
print(fname2 + " has been imported.")
If you make use of Python's CSV library, you can easily test to ensure a given row has values in it, that way it does not matter if there are empty lines are not, they will be skipped over when writing the master files:
import csv
import glob
def merge_csvs(target_filename, csv_list):
with open(target_filename, 'w', newline='') as f_master_target:
csv_master_target = csv.writer(f_master_target)
write_header = True
for csv_filename in csv_list:
with open(csv_filename, 'r', newline='') as f_single:
csv_single = csv.reader(f_single)
header = next(csv_single)
if write_header:
csv_master_target.writerow(header)
write_header = False
for row in csv_single:
if row:
csv_master_target.writerow(row)
path = 'directory'
Enviro_Files = glob.glob(path + "**/*enviro.csv")
Total_Files = glob.glob(path + "**/*TOTAL.csv")
merge_csvs('Master_enviro.csv', Enviro_Files)
merge_csvs('Master_TOTAL.csv', Total_Files)

How to Read CSV, Create a QR Code, and Write its Filename to New Column?

I'm writing a Python script to generate a QR code from the first column in a csv (concatenated with a local name), and that part works well. The csv just has three columns and looks like this:
ID First Last
144 Jerry Seinfeld
491 George Costanza
104 Elaine Benes
99 Cosmo Kramer
And I use my Python script to take that file, append a prefix to the IDs (in this case, 'NBC') and then create QR codes for each record in a new folder. It's a little long but all of this seems to work fine also:
import csv
import qrcode
import os
import shutil
import time
import inquirer
#Identify Timestamp
timestr = time.strftime("%Y%m%d-%H%M%S")
local = 'NBC'
#Load csv
filename = "stackoverflowtest.csv"
#Path to new local folder
localfolder = local
localimagefolder = localfolder+'/image'
localfilefolder = localfolder+'/files'
#Check/create folders based on local
if not os.path.exists(localfolder):
os.makedirs(localfolder)
if not os.path.exists(localimagefolder):
os.makedirs(localimagefolder)
if not os.path.exists(localfilefolder):
os.makedirs(localfilefolder)
#Copy uploaded file to their local's file folder
shutil.copy2(filename, localfilefolder+'/'+local+'-'+timestr+'.csv') # complete target filename given
#Read csv and generate QR code for local+first column of csv
with open(filename, 'rU') as csvfile:
next(csvfile, None) #skip header row
reader = csv.reader(csvfile, delimiter=',', dialect=csv.excel_tab)
for i, row in enumerate(reader):
labeldata = row[0] #Choose first column of data to create QR codes
print labeldata
qr = qrcode.QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=10,
border=4,
)
qr.add_data(local+"-"+labeldata)
qr.make()
img = qr.make_image()
img.save(localimagefolder+"/"+local+"-"+labeldata+".png".format(i)) #Save image
It creates the NBC folder, copies each csv file in one subfolder, and creates the QR codes for each ID (NBC-144,NBC-491,NBC-104,NBC-99) in another.
The part where I'm running into a problem is opening the csv and writing the filepath/filename back to the csv (or a copy of the csv since from what I've read, I likely can't do it to the same one). Is that possible?
The closest I've come with a script that works is appending the local name with the ID and writing that back to a column but I can't seem to figure out how to do the same with a variable, let alone a filepath/filename:
import csv
import os
import sys
filename = 'stackoverflowtest.csv'
newfilename = 'stackoverflowtest2.csv'
local = 'NBC'
with open(filename, 'rU') as f:
reader = csv.reader(f)
with open(newfilename, 'w') as g:
writer = csv.writer(g)
for row in reader:
new_row = row[0:] + ['-'.join([local, row[0]])]
writer.writerow(new_row)
Is it possible to write something like that within my existing script to add a column for the filepath and filename? Everything I try breaks -- especially if I attempt to do it in the same script.
EDIT:
This is my closest attempt that overwrote the existing file
f=open(newfilename,'r+')
w=csv.writer(f)
for path, dirs, files in os.walk(path):
for filename in files:
w.writerow([newfilename])
Also it's still in a separate script.
Since I can't run the code in your question directly, I had to commented-out portions of it in what's below for testing, but think it does everything you wanted in one loop in one script.
import csv
#import qrcode
import os
import shutil
import time
#import inquirer
# Identify Timestamp
timestr = time.strftime("%Y%m%d-%H%M%S")
local = 'NBC'
# Load csv
filename = "stackoverflowtest.csv"
# Path to new local folder
localfolder = local
localimagefolder = os.path.join(localfolder, 'image')
localfilefolder = os.path.join(localfolder, 'files')
# Check/create folders based on local
if not os.path.exists(localfolder):
os.makedirs(localfolder)
if not os.path.exists(localimagefolder):
os.makedirs(localimagefolder)
if not os.path.exists(localfilefolder):
os.makedirs(localfilefolder)
# Copy uploaded file to their local's file folder
target = os.path.join(localfilefolder, local+'-'+timestr+'.csv') # Target filename
#shutil.copy2(filename, target) # Don't need to do this.
# Read csv and generate QR code for local+first column of csv
with open(filename, 'rb') as csvfile, open(target, 'wb') as outfile:
reader = csv.reader(csvfile, delimiter=',', dialect=csv.excel_tab)
writer = csv.writer(outfile, delimiter=',', dialect=csv.excel_tab)
next(reader) # Skip header row.
for row in reader:
id, first, last = row
# qr = qrcode.QRCode(
# version=1,
# error_correction=qrcode.constants.ERROR_CORRECT_L,
# box_size=10,
# border=4,
# )
#
# qr.add_data(local+"-"+id)
# qr.make()
#
# img = qr.make_image()
imagepath = os.path.join(localimagefolder, local+"-"+id+".png")
# img.save(imagepath) # Save image.
print "saving img:", imagepath
writer.writerow(row + [local+'-'+id, imagepath])
Output from sample input data:
144,Jerry,Seinfeld,NBC-144,NBC/image/NBC-144.png
491,George,Costanza,NBC-491,NBC/image/NBC-491.png
104,Elaine,Benes,NBC-104,NBC/image/NBC-104.png
99,Cosmo,Kramer,NBC-99,NBC/image/NBC-99.png

Pyshp appends new fields but not record values to an existing shapefile

The problem consists in append columns presented in a .csv file as new fields to an existing shapefile. So, I've used Python and the modules pyshp and csv to, first, copy the content of the original shapefile (geometries and records) and, second, create new fields in this copy and iterate in the respective .csv rows in order to insert on it:
import os, sys
import shapefile, csv
from os.path import basename
filename_full = sys.argv[1]
output_full = sys.argv[2]
name, file_extension = os.path.splitext(filename_full)
output_name, file_extension = os.path.splitext(output_full)
filename_dbf = name + ".dbf"
filename_classified = name + "_classified.csv"
output_dbf = output_name + ".dbf"
# reader
myshp = open(filename_full, "rb")
mydbf = open(filename_dbf, "rb")
r = shapefile.Reader(shp=myshp, dbf=mydbf)
# writer
w = shapefile.Writer(r.shapeType)
# copy shapefiles content
w._shapes.extend(r.shapes())
w.records.extend(r.records())
w.fields = list(r.fields)
w.save(output_full)
# add new records from the csv
with open(filename_classified, 'rt', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
headers = reader.fieldnames
[w.field(field) for field in headers]
for row in reader:
w.record(*tuple([row[f] for f in headers])) # <-- insertion in specific fields
w.save(output_full)
In the pyshp page, there are a couple of examples. One of them is specific to insertion of rows to a specific field. As follows:
>>> w = shapefile.Writer()
>>> w.field('FIRST_FLD','C','40')
>>> w.field('SECOND_FLD','C','40')
>>> w.record('First', 'Line')
>>> w.record(FIRST_FLD='First', SECOND_FLD='Line')
but, even indicating the fields, I get:
Traceback (most recent call last):
File "assigning-shapefile.py", line 68, in <module>
w.record(*tuple([row[f] for f in headers]))
File "/usr/local/lib/python3.5/dist-packages/shapefile.py", line 1040, in record
record = [recordList[i] for i in range(fieldCount)]
File "/usr/local/lib/python3.5/dist-packages/shapefile.py", line 1040, in <listcomp>
record = [recordList[i] for i in range(fieldCount)]
IndexError: tuple index out of range
and, if we look inside the shapefile, we have something like this:
QGIS attribute table before and after the code execution
which I concluded that the fields are successful added, but the rows (w.record with the fields name specified) are not.
Solved the problem using a quite simple approach with osgeo library:
# --
# USAGE:
# python3 assinging-shapefile.py [input-shapefile] [output-shapefile]
# --
# REQUISITE:
# The classification csv file should be edited as a header of classifiers and its labels. The file name is mandatory to be IMAGE_NAME-classified.csv
# Ex:
# Filename: IMAGE_NAME-classified.csv
# Content:
# Random forest, Multilayer-Perc, CRF, SVM
# vegetation, vegetation, building, vegetation
# wall, window, window, window
# ...
# --
import os, sys
import shapefile, csv
from os.path import basename
from osgeo import ogr
filename_full = sys.argv[1]
output_full = sys.argv[2]
name, file_extension = os.path.splitext(filename_full)
output_name, file_extension = os.path.splitext(output_full)
filename_dbf = name + ".dbf"
filename_classified = name + "_classified.csv"
output_dbf = output_name + ".dbf"
myshp = open(filename_full, "rb")
mydbf = open(filename_dbf, "rb")
r = shapefile.Reader(shp=myshp, dbf=mydbf)
w = shapefile.Writer(r.shapeType)
# copy shapefile
w._shapes.extend(r.shapes())
w.records.extend(r.records())
w.fields = list(r.fields)
w.save(output_full)
# read the csv records
csvRecords = []
csvHeaders = []
with open(filename_classified, 'rt', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
csvHeaders = reader.fieldnames
for line in reader:
csvRecords.append(line)
driver = ogr.GetDriverByName('ESRI Shapefile')
infile = driver.Open(output_full, 1)
for classifier in csvHeaders:
field = ogr.FieldDefn(classifier, ogr.OFTString)
field.SetWidth(16)
layer = infile.GetLayer()
layer.CreateField(field)
cont = 0
for feature in layer:
for classifier in csvHeaders:
if(feature.GetField(0)!=cont):
cont += 1
feature.SetField(classifier, csvRecords[cont][classifier])
layer.SetFeature(feature)
infile=None
which is able (i) to read the csv file (with the columns to be added), (ii) read the shapefile and copy it, (iii) modify the .shp copy by editing each row with a correspondent csv record.

Transposing all csv files within a folder

I got help the last time I asked a question on this site regarding batch processing csv files within a folder using glob.glob() with Python. I am trying to use it this time to transpose all csv files within a folder. The script below only processes the last file and stops. What am I doing wrong?
import csv
import os
import glob
directory = raw_input ("INPUT Folder")
output = raw_input("OUTPUT Folder:")
in_files = os.path.join(directory, '*.csv')
for in_file in glob.glob(in_files):
with open(in_file) as input_file:
reader = csv.reader(input_file)
cols = []
for row in reader:
cols.append(row)
filename = os.path.splitext(os.path.basename(in_file))[0] + '.csv'
with open (os.path.join(output, filename), 'wb') as output_file:
writer = csv.writer(output_file)
for i in range(len(max(cols, key=len))):
writer.writerow ([(c[i] if i<len(c) else '') for c in cols])
You need to indent the "output" portion of the code so that it runs once for each iteration of the for in_file loop:
import csv
import os
import glob
directory = raw_input ("INPUT Folder")
output = raw_input("OUTPUT Folder:")
in_files = os.path.join(directory, '*.csv')
for in_file in glob.glob(in_files):
with open(in_file) as input_file:
reader = csv.reader(input_file)
cols = []
for row in reader:
cols.append(row)
# "outdent" this code so it only needs to run once for each in_file
filename = os.path.splitext(os.path.basename(in_file))[0] + '.csv'
# Indent this to the same level as the rest of the "for in_file" loop!
with open (os.path.join(output, filename), 'wb') as output_file:
writer = csv.writer(output_file)
for i in range(len(max(cols, key=len))):
writer.writerow ([(c[i] if i<len(c) else '') for c in cols])
In your version that code only runs once, after the for in_file loop has completed, and therefore only outputs cols data left over from the final iteration of that loop.
I have also "outdented" the filename = ... statement to the for in_file level, as this only needs to be done once for each in_file, not once for each row of each in_file.
You can get a lot of mileage with data manipulation using pandas:
import os
import pandas as pd
for filename in os.listdir('.'):
# We save an augmented filename later,
# so using splitext is useful for more
# than just checking the extension.
prefix, ext = os.path.splitext(filename)
if ext.lower() != '.csv':
continue
# Load the data into a dataframe
df = pd.DataFrame.from_csv(filename,
header=None,
index_col=None,
parse_dates=False)
# Transpose is easy, but you could do TONS
# of data processing here. pandas is awesome.
df_transposed = df.T
# Save to a new file with an augmented name
df_transposed.to_csv(prefix+'_T'+ext, header=True, index=False)
The os.walk version is not much different, if you need to dig into subfolders as well.
Here is a working one:
had to google for an hour, but works and tested on python33
import csv
import os
import glob
directory = 'C:\Python33\csv'
output = 'C:\Python33\csv2'
in_files = os.path.join(directory, '*.csv')
for in_file in glob.glob(in_files):
with open(in_file) as input_file:
reader = csv.reader(input_file)
cols = []
for row in reader:
cols.append(row)
# "outdent" this code so it only needs to run once for each in_file
filename = os.path.splitext(os.path.basename(in_file))[0] + '.csv'
# Indent this to the same level as the rest of the "for in_file" loop!
with open (os.path.join(output, filename), 'w') as output_file:
writer = csv.writer(output_file)
for i in range(len(max(cols, key=len))):
writer.writerow ([(c[i] if i<len(c) else '') for c in cols])
in_files will only return a single result in that format. Try returning a list:
in_files = [f for f in os.listdir(directory) if f.endswith('.csv')]

Categories

Resources