ValueError: could not convert string to float: 'ï»¿220'

ValueError: could not convert string to float: 'ï»¿220' - python

I'm trying to read in a .csv file and get this error
ValueError: could not convert string to float: 'ï»¿220'
Here's my code so far:
import csv
import matplotlib
import os
environment = []
# Initialise data dir
dir = os.getcwd()
print(dir)
parent = os.path.dirname(dir)
parent = os.path.dirname(parent)
parent = os.path.dirname(parent)
basedir = os.path.dirname(parent)
print(basedir)
datadir = os.path.join(basedir, 'data')
print(datadir)
inputdatadir = os.path.join(datadir, 'input')
print(inputdatadir)
# Open file and read.
file = os.path.join(inputdatadir, 'snowslope1.csv')
f = open(file, newline='')
reader = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
for row in reader: # A list of rows
rowlist = []
for value in row: # A list of value
rowlist.append(value)
#print(value)
environment.append(rowlist)
f.close()
# Plot environment.
matplotlib.pyplot.imshow(environment)
matplotlib.pyplot.show()
Can anybody help with what's going wrong here? I'm unsure where the error is as they all appear to be just numbers like this: 220,221,222,223 in my file snowslope1.csv
TIA

This looks like a UTF-8 BOM.
Try opening the file with encoding="utf-8-sig"

Related

Script to read multiple input file and writing multiple output file for each input file?

I have multiples input files in format like below which have to be processed.
Input file path /tmp/input.
1.1.1.txt
1.1.2.txt
1.1.3.txt
But, I want to have output files for each input file in another folder suppose (/tmp/outputsmgr) like below:
1.1.1_output.csv
1.1.2_output.csv
1.1.3_output.csv
The issues are:
Firstly, I am not able to write the output files in another/different folder
Secondly, all input files data after processing getting merged in one file in input folder only like below instead of separate output file for each input file
All the below files contains same data instead 1.1.1.txt data should be in file 1.1.1_output.csv and file 1.1.2.txt data should be in file 1.1.2_output.csv.
1.1.1.txt_output.csv
1.1.2.txt_output.csv
1.1.3.txt_output.csv
How can I modify the below code to get the desired result?
import os
import csv
import re
def parseFile(fileName):
# We are using a dictionary to store info for each file
data = list()
# data = dict()
fh = open(fileName, "r")
lines = fh.readlines()[1:]
for line in lines:
line = line.rstrip("\n")
if re.search("sessmgr", line):
splitted = line.split()
temp = dict()
temp["CPU"] = splitted[0]
temp["facility"] = splitted[1]
temp["instance"] = splitted[2]
temp["cpu-used"] = splitted[3]
temp["cpu-allc"] = splitted[4]
temp["mem-used"] = splitted[5]
temp["mem-allc"] = splitted[6]
temp["files-used"] = splitted[7]
temp["files-allc"] = splitted[8]
temp["sessions-used"] = splitted[9]
temp["sessions-allc"] = splitted[10]
# print (splitted[2])
data.append(temp)
# continue;
# print (data)
return data
if __name__ == "__main__":
inputsDirectory = "/tmp/input"
outputDirectory = "/tmp/outputsmgr"
path = os.path.abspath(inputsDirectory)
pathout = os.path.abspath(outputDirectory)
fileLists = ["{0}/{1}".format(path,x) for x in os.listdir(outputDirectory)]
fileList = ["{0}/{1}".format(path,x) for x in os.listdir(inputsDirectory)]
# print(fileList)
csvRows = []
for file in fileList:
newRow = parseFile(file)
csvRows.append(newRow)
# print(csvRows)
for files in fileList:
outputFile = "output.csv"
csvfile = open(os.path.join(files + "_" + outputFile), 'w')
fieldnames = ["CPU",
"facility",
"instance",
"cpu-used",
"cpu-allc",
"mem-used",
"mem-allc",
"files-used",
"files-allc",
"sessions-used",
"sessions-allc"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
headers = {}
for n in writer.fieldnames:
headers[n] = n
writer.writerow(headers)
# writer.writeheader()
for row in csvRows:
for obj in row:
print (obj)
writer.writerow(obj)

I think the code below will do what you want. It processes the files in the input directory sequentially and the results returned from the parseFile() function get written to the corresponding output file in the output directory. It's important to get a new set of csvRows from each input file and write (just) those to each output file.
The code assumes the outputDirectory already exists, but if that's not the case, then you'll need to add code to create it before processing any of the files. Hint: use os.path.exists() and os.path.isdir() in conjunction with os.makedirs().
import csv
import os
import re
def parseFile(filePath, fieldnames, target_re=r"sessmgr"):
""" Yield lines of file matching target regex. """
with open(filePath, "r") as file:
next(file) # Skip/ignore first line.
for line in file:
if re.search(target_re, line):
yield dict(zip(fieldnames, line.split()))
if __name__ == "__main__":
OUTPUT_FILE_SUFFIX = "output.csv"
inputsDirectory = "/tmp/input"
outputDirectory = "/tmp/outputsmgr"
fieldnames = ("CPU", "facility", "instance", "cpu-used", "cpu-allc", "mem-used",
"mem-allc", "files-used", "files-allc", "sessions-used",
"sessions-allc")
input_dir = os.path.abspath(inputsDirectory)
output_dir = os.path.abspath(outputDirectory)
for in_filename in os.listdir(input_dir):
in_filepath = os.path.join(input_dir, in_filename)
print('in_filepath: "{}"'.format(in_filepath))
in_rootname = os.path.splitext(in_filename)[0]
out_filename = in_rootname + "_" + OUTPUT_FILE_SUFFIX
out_filepath = os.path.join(output_dir, out_filename)
print('out_filepath: "{}"'.format(out_filepath))
with open(out_filepath, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(parseFile(in_filepath, fieldnames))

Using CSV Sniffer to determine a delimiter but doesnt work on multiple files

I'm using the Sniffer class in CSV Reader to determine what a delimiter is in a CSV file and it works on single files but if I add in a loop and point it to a folder with the same CSV in, it throws out this error:
File "delimiter.py", line 17, in read_csv_delimit
reader = csv.reader(csvfile, dialect)
TypeError: "delimiter" must be a 1-character string
The script looks like this:
#!/usr/local/bin/python3
import csv
import os
def read_csv_delimit(file_dir, csv_file):
# Initialise list
file_csv = []
# Open csv & check delimiter
with open(file_dir + "/" + csv_file, newline='', encoding = "ISO-8859-1") as csvfile:
dialect = csv.Sniffer().sniff(csvfile.read(1024))
csvfile.seek(0)
reader = csv.reader(csvfile, dialect)
for item in reader:
file_csv.append(item[0])
#del file_csv[0]
return file_csv
def split_path(full_path):
#path = path.rstrip(os.sep)
head, tail = os.path.split(full_path)
return (head, tail)
machine_dir = input("Drop the folder here: ")
# Get list of machine csv
machines = os.listdir(machine_dir)
for machine in machines:
print(machine)
#file_dir, csv_file = split_path(csv_file)
machine_list = read_csv_delimit(machine_dir, machine)
print(machine_list)

Given the trace, it seems that your script does pick non-CSV files, indeed. You can use the glob module for fine-tuning the search pattern to pick up only the files you want, but even a simple extension lookup should suffice:
target = input("Drop the folder here: ")
machine_list = [read_csv_delimit(target, m) for m in os.listdir(target) if m[-4:] == ".csv"]
print(machine_list)
Checking for the entered directory validity, tho, is highly recommended, even if it's performed with the simplest os.path.isdir(target).
I'd also recommend you to use os.path facilities to build up your path in the read_csv_delimit() function, e.g.:
with open(os.path.join(file_dir, csv_file), newline='', encoding = "ISO-8859-1") as csvfile:

How to Read CSV, Create a QR Code, and Write its Filename to New Column?

I'm writing a Python script to generate a QR code from the first column in a csv (concatenated with a local name), and that part works well. The csv just has three columns and looks like this:
ID First Last
144 Jerry Seinfeld
491 George Costanza
104 Elaine Benes
99 Cosmo Kramer
And I use my Python script to take that file, append a prefix to the IDs (in this case, 'NBC') and then create QR codes for each record in a new folder. It's a little long but all of this seems to work fine also:
import csv
import qrcode
import os
import shutil
import time
import inquirer
#Identify Timestamp
timestr = time.strftime("%Y%m%d-%H%M%S")
local = 'NBC'
#Load csv
filename = "stackoverflowtest.csv"
#Path to new local folder
localfolder = local
localimagefolder = localfolder+'/image'
localfilefolder = localfolder+'/files'
#Check/create folders based on local
if not os.path.exists(localfolder):
os.makedirs(localfolder)
if not os.path.exists(localimagefolder):
os.makedirs(localimagefolder)
if not os.path.exists(localfilefolder):
os.makedirs(localfilefolder)
#Copy uploaded file to their local's file folder
shutil.copy2(filename, localfilefolder+'/'+local+'-'+timestr+'.csv') # complete target filename given
#Read csv and generate QR code for local+first column of csv
with open(filename, 'rU') as csvfile:
next(csvfile, None) #skip header row
reader = csv.reader(csvfile, delimiter=',', dialect=csv.excel_tab)
for i, row in enumerate(reader):
labeldata = row[0] #Choose first column of data to create QR codes
print labeldata
qr = qrcode.QRCode(
version=1,
error_correction=qrcode.constants.ERROR_CORRECT_L,
box_size=10,
border=4,
)
qr.add_data(local+"-"+labeldata)
qr.make()
img = qr.make_image()
img.save(localimagefolder+"/"+local+"-"+labeldata+".png".format(i)) #Save image
It creates the NBC folder, copies each csv file in one subfolder, and creates the QR codes for each ID (NBC-144,NBC-491,NBC-104,NBC-99) in another.
The part where I'm running into a problem is opening the csv and writing the filepath/filename back to the csv (or a copy of the csv since from what I've read, I likely can't do it to the same one). Is that possible?
The closest I've come with a script that works is appending the local name with the ID and writing that back to a column but I can't seem to figure out how to do the same with a variable, let alone a filepath/filename:
import csv
import os
import sys
filename = 'stackoverflowtest.csv'
newfilename = 'stackoverflowtest2.csv'
local = 'NBC'
with open(filename, 'rU') as f:
reader = csv.reader(f)
with open(newfilename, 'w') as g:
writer = csv.writer(g)
for row in reader:
new_row = row[0:] + ['-'.join([local, row[0]])]
writer.writerow(new_row)
Is it possible to write something like that within my existing script to add a column for the filepath and filename? Everything I try breaks -- especially if I attempt to do it in the same script.
EDIT:
This is my closest attempt that overwrote the existing file
f=open(newfilename,'r+')
w=csv.writer(f)
for path, dirs, files in os.walk(path):
for filename in files:
w.writerow([newfilename])
Also it's still in a separate script.

Since I can't run the code in your question directly, I had to commented-out portions of it in what's below for testing, but think it does everything you wanted in one loop in one script.
import csv
#import qrcode
import os
import shutil
import time
#import inquirer
# Identify Timestamp
timestr = time.strftime("%Y%m%d-%H%M%S")
local = 'NBC'
# Load csv
filename = "stackoverflowtest.csv"
# Path to new local folder
localfolder = local
localimagefolder = os.path.join(localfolder, 'image')
localfilefolder = os.path.join(localfolder, 'files')
# Check/create folders based on local
if not os.path.exists(localfolder):
os.makedirs(localfolder)
if not os.path.exists(localimagefolder):
os.makedirs(localimagefolder)
if not os.path.exists(localfilefolder):
os.makedirs(localfilefolder)
# Copy uploaded file to their local's file folder
target = os.path.join(localfilefolder, local+'-'+timestr+'.csv') # Target filename
#shutil.copy2(filename, target) # Don't need to do this.
# Read csv and generate QR code for local+first column of csv
with open(filename, 'rb') as csvfile, open(target, 'wb') as outfile:
reader = csv.reader(csvfile, delimiter=',', dialect=csv.excel_tab)
writer = csv.writer(outfile, delimiter=',', dialect=csv.excel_tab)
next(reader) # Skip header row.
for row in reader:
id, first, last = row
# qr = qrcode.QRCode(
# version=1,
# error_correction=qrcode.constants.ERROR_CORRECT_L,
# box_size=10,
# border=4,
# )
#
# qr.add_data(local+"-"+id)
# qr.make()
#
# img = qr.make_image()
imagepath = os.path.join(localimagefolder, local+"-"+id+".png")
# img.save(imagepath) # Save image.
print "saving img:", imagepath
writer.writerow(row + [local+'-'+id, imagepath])
Output from sample input data:
144,Jerry,Seinfeld,NBC-144,NBC/image/NBC-144.png
491,George,Costanza,NBC-491,NBC/image/NBC-491.png
104,Elaine,Benes,NBC-104,NBC/image/NBC-104.png
99,Cosmo,Kramer,NBC-99,NBC/image/NBC-99.png

Pyshp appends new fields but not record values to an existing shapefile

The problem consists in append columns presented in a .csv file as new fields to an existing shapefile. So, I've used Python and the modules pyshp and csv to, first, copy the content of the original shapefile (geometries and records) and, second, create new fields in this copy and iterate in the respective .csv rows in order to insert on it:
import os, sys
import shapefile, csv
from os.path import basename
filename_full = sys.argv[1]
output_full = sys.argv[2]
name, file_extension = os.path.splitext(filename_full)
output_name, file_extension = os.path.splitext(output_full)
filename_dbf = name + ".dbf"
filename_classified = name + "_classified.csv"
output_dbf = output_name + ".dbf"
# reader
myshp = open(filename_full, "rb")
mydbf = open(filename_dbf, "rb")
r = shapefile.Reader(shp=myshp, dbf=mydbf)
# writer
w = shapefile.Writer(r.shapeType)
# copy shapefiles content
w._shapes.extend(r.shapes())
w.records.extend(r.records())
w.fields = list(r.fields)
w.save(output_full)
# add new records from the csv
with open(filename_classified, 'rt', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
headers = reader.fieldnames
[w.field(field) for field in headers]
for row in reader:
w.record(*tuple([row[f] for f in headers])) # <-- insertion in specific fields
w.save(output_full)
In the pyshp page, there are a couple of examples. One of them is specific to insertion of rows to a specific field. As follows:
>>> w = shapefile.Writer()
>>> w.field('FIRST_FLD','C','40')
>>> w.field('SECOND_FLD','C','40')
>>> w.record('First', 'Line')
>>> w.record(FIRST_FLD='First', SECOND_FLD='Line')
but, even indicating the fields, I get:
Traceback (most recent call last):
File "assigning-shapefile.py", line 68, in <module>
w.record(*tuple([row[f] for f in headers]))
File "/usr/local/lib/python3.5/dist-packages/shapefile.py", line 1040, in record
record = [recordList[i] for i in range(fieldCount)]
File "/usr/local/lib/python3.5/dist-packages/shapefile.py", line 1040, in <listcomp>
record = [recordList[i] for i in range(fieldCount)]
IndexError: tuple index out of range
and, if we look inside the shapefile, we have something like this:
QGIS attribute table before and after the code execution
which I concluded that the fields are successful added, but the rows (w.record with the fields name specified) are not.

Solved the problem using a quite simple approach with osgeo library:
# --
# USAGE:
# python3 assinging-shapefile.py [input-shapefile] [output-shapefile]
# --
# REQUISITE:
# The classification csv file should be edited as a header of classifiers and its labels. The file name is mandatory to be IMAGE_NAME-classified.csv
# Ex:
# Filename: IMAGE_NAME-classified.csv
# Content:
# Random forest, Multilayer-Perc, CRF, SVM
# vegetation, vegetation, building, vegetation
# wall, window, window, window
# ...
# --
import os, sys
import shapefile, csv
from os.path import basename
from osgeo import ogr
filename_full = sys.argv[1]
output_full = sys.argv[2]
name, file_extension = os.path.splitext(filename_full)
output_name, file_extension = os.path.splitext(output_full)
filename_dbf = name + ".dbf"
filename_classified = name + "_classified.csv"
output_dbf = output_name + ".dbf"
myshp = open(filename_full, "rb")
mydbf = open(filename_dbf, "rb")
r = shapefile.Reader(shp=myshp, dbf=mydbf)
w = shapefile.Writer(r.shapeType)
# copy shapefile
w._shapes.extend(r.shapes())
w.records.extend(r.records())
w.fields = list(r.fields)
w.save(output_full)
# read the csv records
csvRecords = []
csvHeaders = []
with open(filename_classified, 'rt', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile, delimiter=',')
csvHeaders = reader.fieldnames
for line in reader:
csvRecords.append(line)
driver = ogr.GetDriverByName('ESRI Shapefile')
infile = driver.Open(output_full, 1)
for classifier in csvHeaders:
field = ogr.FieldDefn(classifier, ogr.OFTString)
field.SetWidth(16)
layer = infile.GetLayer()
layer.CreateField(field)
cont = 0
for feature in layer:
for classifier in csvHeaders:
if(feature.GetField(0)!=cont):
cont += 1
feature.SetField(classifier, csvRecords[cont][classifier])
layer.SetFeature(feature)
infile=None
which is able (i) to read the csv file (with the columns to be added), (ii) read the shapefile and copy it, (iii) modify the .shp copy by editing each row with a correspondent csv record.

batch rename files by common unique value

I have a folder containing over 500 images. and a text file with new names for these images. I have tried to create a renaming script in python but I am struggling with the robustness issue. I would like to rename the file by selecting the new name by unique identifier
I wrote this...
import os
import csv
img_dir = str(raw_input("Path to Images Folder as C:/path/to/IMAGES : "))
os.chdir(img_dir)
newNames_file = str(raw_input("csv file path and name as C:/path/to/loc_data.csv : "))
with open(newNames_file, 'rb') as f:
reader = csv.reader(f)
newNames = list(reader)
newNames = [l[0] for l in newNames]
print"Current Working Directory is: " + os.getcwd()
path = os.getcwd()
filenames = [os.path.join(img_dir,fn) for fn in next(os.walk(img_dir))[2]]
assert len(filenames) == len(newNames)
for i in range(len(newNames)):
os.rename(str(filenames[i]), str(newNames[i]))
but the results are unreliable.. e.g. the order inst guaranteed to match.
How can I use the unique ID to pass the correct value top the rename function?
the file names are raw e.g. IMG_3036.tif
and the new values are in the form 7700_50fpl_4_1_3036
where 3036 is the unique ID

I write a script and i think it will do the trick.
You can rewrite the get_name_uid function to get your new files' uid and old files's uid if you want.
Make sure you change the glob name and new_names file name as you use.
import glob
import os
old_image_filenames = glob.glob('*.img')
new_names = open('new_names').read().strip().split(',')
def get_name_uid(name):
uid = name.split('.')[0]
uid = uid.split('_')[-1]
return uid
def get_uid_filename_map(filenames, get_uid_func):
uid_filename_map = {}
for filename in filenames:
uid_filename_map[get_uid_func(filename)] = filename
return uid_filename_map
uid_old_filenames = get_uid_filename_map(old_image_filenames, get_name_uid)
uid_new_filenames = get_uid_filename_map(new_names, get_name_uid)
for uid in uid_old_filenames.keys():
if uid in uid_new_filenames:
os.rename(uid_old_filenames[uid], uid_new_filenames[uid])
The folder i run script before
$ ls
new_names play.py zasdf_3036.img zxcsdf_3037.img
after run script
$ ls
7700_50fpl_4_1_3036.img 7700_50fpl_4_2_3037.img new_names play.py
file new_names
$ cat new_names
7700_50fpl_4_1_3036.img,7700_50fpl_4_2_3037.img

This is what i made in the end. Thank you for your help and input.
def main():
pass
if __name__ == '__main__':
main()
import os
import glob
import csv
import numpy as np
# Establish path to images that need renamed
path_to_img = raw_input("your path to your imgs\n")
glob_name = os.path.join(path_to_img, '*.tif')
old_image_filenames = glob.glob(glob_name)
# Make list from csv of new names
path_to_namefile = raw_input('your path to name file\n')
f = open(path_to_namefile)
csv_f = csv.reader(f)
csv_names = []
for i in csv_f:
csv_names.append(i)
new_names = []
for i in csv_names:
new_names.append(str(i)[2:-2])
print new_names
def get_name_uid(name):
uid = name.split('.')[0]
uid = uid.split('_')[-1]
return uid
def get_uid_filename_map(filenames, get_uid_func):
uid_filename_map = {}
for filename in filenames:
uid_filename_map[get_uid_func(filename)] = filename
return uid_filename_map
uid_old_filename = get_uid_filename_map(old_image_filenames, get_name_uid)
uid_new_filename = get_uid_filename_map(new_names, get_name_uid)
for uid in uid_old_filename.keys():
if uid in uid_new_filename:
os.rename(os.path.join(path_to_img, uid_old_filename[uid]), os.path.join(path_to_img, uid_new_filename[uid]))
I had to work through the reading the csv part a bit. And i dont think there is actually a numpy module... not sure how that wound up in there but this works well so i didnt change it anymore.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

ValueError: could not convert string to float: 'ï»¿220' - python

This looks like a UTF-8 BOM. Try opening the file with encoding="utf-8-sig"

Related

Script to read multiple input file and writing multiple output file for each input file?

Using CSV Sniffer to determine a delimiter but doesnt work on multiple files

How to Read CSV, Create a QR Code, and Write its Filename to New Column?

Pyshp appends new fields but not record values to an existing shapefile

batch rename files by common unique value

Categories

Resources