Python - get number of columns from csv file - python

I'm trying to determine the number of columns that are present in a CSV file in python v2.6. This has to be in general, as in, for any input that I pass, I should be able to obtain the number of columns in the file.
Sample input file: love hurt hit
Other input files: car speed beforeTune afterTune repair
So far, what I have tried to do is read the file (with lots of rows), get the first row, and then count the number of words in the first row. Delimiter is ,. I ran into a problem when I try to split headings based on the sample input, and next len(headings) gives me 14 which is wrong as it should give me 3. Any ideas? I am a beginner.
with open(filename1, 'r') as f1:
csvlines = csv.reader(f1, delimiter=',')
for lineNum, line in enumerate(csvlines):
if lineNum == 0:
#colCount = getColCount(line)
headings = ','.join(line) # gives me `love, hurt, hit`
print len(headings) # gives me 14; I need 3
else:
a.append(line[0])
b.append(line[1])
c.append(line[2])

len("love, hurt, hit") is 14 because it's a string.
The len you want is of line, which is a list:
print len(line)
This outputs the number of columns, rather than the number of characters

# old school
import csv
c=0
field={}
with open('csvmsdos.csv', 'r') as csvFile:
reader = csv.reader(csvFile)
for row in reader:
field[c]=row
print(field[c])
c=c+1
row=len (field[0])
column=len(field)
csvFile.close()

A simple solution:
with open(filename1) as file:
# for each row in a given file
for row in file:
# split that row into list elements
# using comma (",") as a separator,
# count the elements and print
print(len(row.split(",")))
# break out of the loop after
# first iteration
break

Related

Python: Trying to search through csv first column and spliting it if len > 30 in another file on the same row

The title isn't big enough for me to explain this so here it goes:
I have a csv file looking something like this:
Example csv containing
long string with some special characters , number, string, number
long string with some special characters , number, string, number
long string with some special characters , number, string, number
long string with some special characters , number, string, number
I want to go through the first column and if the length of the string is greater then 20 do this:
LINE 20 : long string with som, e special characters
split the string, modify first csv with first part of the string, and create a new csv and add the other part on the same line number, leaving the rest just whitespace
what i have for now is this:
this below doesn't do anything right now, its just what I made to try and explain to myself and figure out how could i do new file writing with splitString
fileName = file name
maxCollumnLength = number of rows in the whole set
lineNum = line number of a string that is greater then 20
splitString = second part of the split string that should be written on another file
def newopenfile(fileName, maxCollumnLength, lineNum, splitString):
with open(fileName, 'rw', encoding="utf8") as nf:
writer = csv.writer(fileName, quoting=csv.QUOTE_NONE)
for i in range(0, maxCollumnLength-1):
#write whitespace until reaching lineNum of a string thats bigger then 20 then write that part of the string to a csv
this goes through the first column and checks the length
fileName = 'uskrs.csv'
firstColList=[] #an empty list to store the second column
splitString=[]
i = 0
with open(fileName, 'rw', encoding="utf8") as rf:
reader = csv.reader(rf, delimiter=',')
for row in reader:
if len(row[0]) > 20:
i+=1
#split row and parse the other end of the row to newopenfile(fileName, len(reader), i, splitString )
#print(row[0])
#for debuging
firstColList.append(row[0])
from this point i am stuck at how to actualy change the string in the csv and how to split them
THE STRING COULD ALSO HAVE 60+ chars, so it would need splitting more then 2 times and storing it in more then 2 csvs
I suck at explaining the problem, so if you have any questions please do ask
Okay so i was sucessful in dividing the first column if it has length greater then 20, and replace the first column with first 20 chars
import csv
def checkLength(column, readFile, writeFile, maxLen):
counter = 0
i = 0
idxSplitItems = []
final = []
newSplits = 0
with open(readFile,'r', encoding="utf8", newline='') as f:
reader = csv.reader(f)
your_list = list(reader)
final = your_list
for sublist in your_list:
#del sublist[-1] -remove last invisible element
i+=1
data = removeUnwanted(sublist[column])
print(data)
if len(data) > maxLen:
counter += 1 # Number of large
idxSplitItems.append(split_bylen(i,data,maxLen))
if len(idxSplitItems) > newSplits: newSplits = len(idxSplitItems)
final[i-1][column] = split_bylen(i,data,maxLen)[1]
final[i-1][column] = removeUnwanted(final[i-1][column])
print("After split data: "+ data)
print("After split final: "+ final[i-1][column])
writeSplitToCSV(writeFile, final)
checkCols(final, 6)
return final, idxSplitItems
def removeUnwanted(data):
data = data.replace(',',' ')
return data
def split_bylen(index, item, maxLen):
clean = removeUnwanted(item)
splitList = [clean[ind:ind+maxLen] for ind in range(0, len(item), maxLen)]
splitList.insert(0,index)
return splitList
def writeSplitToCSV(writeFile,data):
with open(writeFile,'w', encoding="utf8", newline='') as f:
writer = csv.writer(f)
writer.writerows(data)
def checkCols(data, columns):
for sublist in data:
if len(sublist)-1!=columns:
print ("[X] This row doesnt have the same amount of columns as others: "+sublist)
else:
print("All okay")
#len(data) #how many split items
#print(your_list[0][0])
#print("Number of large: ", counter)
final, idxSplitItems = checkLength(0,'test.csv','final.csv', 30)
print("------------------------")
print(idxSplitItems)
print("-------------------------")
print(final)
Now I have a problem with this part of the code, notice this:
print("After split data: "+ data)
print("After split final: "+ final[i-1][column])
This is to check if removing comma worked.
with example of
"BUTKOVIĆ VESNA , DIPL.IUR."
data returns
BUTKOVIĆ VESNA DIPL.IUR.
but final returns
BUTKOVIĆ VESNA , DIPL.IUR.
why does my final return "," again but in data its gone, must be something done in "split_bylen()" that makes it do that
Dictionaries are fun!
To overwrite the original csv see here. You would have to use Dictreader & Dictwriter. I keep your method of reading just for clarity.
writecsvs = {} #store each line of each new csv
# e.g. {'csv1':[[row0_split1,row0_num,row0_str,row0_num],[row1_split1,row1_num,row1_str,row1_num],...],
# 'csv2':[[row0_split2,row0_num,row0_str,row0_num],[row1_split2,row1_num,row1_str,row1_num],...],
# .
# .
# .}
with open(fileName, mode='rw', encoding="utf-8-sig") as rf:
reader = csv.reader(rf, delimiter=',')
for row in reader:
col1 = row[0]
# check size & split
# decide number of new csvs
# overwrite original csv
# store new content in writecsvs dict
for # Loop over each csv in writecsvs:
writelines = # Get List of Lines
out_file = open('csv1.csv', mode='w') # use the keys in writecsvs for filenames
for line in writelines:
out_file.write(line)
Hope this helps.

extract float number after specific character

There is my problem, I want to get all flat numbers after sign equal but I dont get how to it.
All my numbers are on a file and I put this file on a list in python, so I want to get numbers after equal sign.
exemple : "average frame rate = 22.5566"
I want the 22.5566 float numbers.
Thanks for your help.
This is for a script that will calculate the average of the frame rate value on a file .txt
I tried this :
l = []
with open("floatnumber.txt", 'r') as f:
csv_f = csv.reader(f, delimiter=',')
for row in csv_f:
l.append(log(float(row[1])))
print (l)
but doesnt work
i got nothing ..
there is an piece of my .txt file:
AnimatonPath completed in 0.0455575 seconds, completing 1 frames,
average frame rate = 21.9503
AnimatonPath completed in 0.0691637 seconds, completing 1 frames,
average frame rate = 14.4585'
and i only want the float numbers after the equal sign
You can also use regex which might be a more robust way of solving the problem
with open("floatnumber.txt", 'r') as f:
csv_f = csv.reader(f, delimiter=',')
for row in csv_f:
results = re.findall("=\s*?(\d+\.\d+|\d+)", row[2])
l.append(log(float(results[0])))
print (l)
Don't forget to use
import re
Use split()
l = []
with open("floatnumber.txt", 'r') as f:
csv_f = csv.reader(f, delimiter=',')
for row in csv_f:
l.append(row.split('= ')[1])
print (l)
This will take the second argument in the split, in other words whatever is behind the equals sign. You might want to remove a new line as well:
for row in csv_f:
l.append(row.split('= ')[1].strip('\n')

python: adding a zero if my value is less then 3 digits long

I have a csv file that needs to add a zero in front of the number if its less than 4 digits.
I only have to update a particular row:
import csv
f = open('csvpatpos.csv')
csv_f = csv.reader(f)
for row in csv_f:
print row[5]
then I want to parse through that row and add a 0 to the front of any number that is shorter than 4 digits. And then input it into a new csv file with the adjusted data.
You want to use string formatting for these things:
>>> '{:04}'.format(99)
'0099'
Format String Syntax documentation
When you think about parsing, you either need to think about regex or pyparsing. In this case, regex would perform the parsing quite easily.
But that's not all, once you are able to parse the numbers, you need to zero fill it. For that purpose, you need to use str.format for padding and justifying the string accordingly.
Consider your string
st = "parse through that row and add a 0 to the front of any number that is shorter than 4 digits."
In the above lines, you can do something like
Implementation
parts = re.split(r"(\d{0,3})", st)
''.join("{:>04}".format(elem) if elem.isdigit() else elem for elem in parts)
Output
'parse through that row and add a 0000 to the front of any number that is shorter than 0004 digits.'
The following code will read in the given csv file, iterate through each row and each item in each row, and output it to a new csv file.
import csv
import os
f = open('csvpatpos.csv')
# open temp .csv file for output
out = open('csvtemp.csv','w')
csv_f = csv.reader(f)
for row in csv_f:
# create a temporary list for this row
temp_row = []
# iterate through all of the items in the row
for item in row:
# add the zero filled value of each temporary item to the list
temp_row.append(item.zfill(4))
# join the current temporary list with commas and write it to the out file
out.write(','.join(temp_row) + '\n')
out.close()
f.close()
Your results will be in csvtemp.csv. If you want to save the data with the original filename, just add the following code to the end of the script
# remove original file
os.remove('csvpatpos.csv')
# rename temp file to original file name
os.rename('csvtemp.csv','csvpatpos.csv')
Pythonic Version
The code above is is very verbose in order to make it understandable. Here is the code refactored to make it more Pythonic
import csv
new_rows = []
with open('csvpatpos.csv','r') as f:
csv_f = csv.reader(f)
for row in csv_f:
row = [ x.zfill(4) for x in row ]
new_rows.append(row)
with open('csvpatpos.csv','wb') as f:
csv_f = csv.writer(f)
csv_f.writerows(new_rows)
Will leave you with two hints:
s = "486"
s.isdigit() == True
for finding what things are numbers.
And
s = "486"
s.zfill(4) == "0486"
for filling in zeroes.

Find an specific word on a file, get the content of the row, and save it on a array

I've a .xls file that I convert to .csv, and then read this .csv until one specific line that contains the word clientegen, get that row and put it on a array.
This is my code so far:
import xlrd
import csv
def main():
print "Converts xls to csv and reads csv"
wb = xlrd.open_workbook('ejemplo.xls')
sh = wb.sheet_by_name('Hoja1')
archivo_csv = open('fichero_csv.csv', 'wb')
wr = csv.writer(archivo_csv, quoting=csv.QUOTE_ALL)
for rownum in xrange(sh.nrows):
wr.writerow(sh.row_values(rownum))
archivo_csv.close()
f = open('fichero_csv.csv', 'r')
for lines in f:
print lines
if __name__ == '__main__':
main()
This prints me:
[... a lot of more stuff ...]
"marco 4","","","","","","","","","","","","","","",""
"","","","","","","","","","","","","","","",""
"","","","","","","","","","","","","","","",""
"clientegen","maier","embega","Jegan ","tapa pure","cil HUF","carcHUF","tecla NSS","M1 NSS","M2 nss","M3 nss","doble nss","tapon","sagola","clip volvo","pillar"
"pz/bast","33.0","40.0","34.0","26.0","80.0","88.0","18.0","16.0","8.0","6.0","34.0","252.0","6.0","28.0","20.0"
"bast/Barra","5.0","3.0","6.0","8.0","10.0","4.0","10.0","10.0","10.0","10.0","8.0","4.0","6.0","10.0","6.0"
[... a lot of more stuff ...]
The thing I want to do is take that clientegen line, and save the content of the row on a new string array with the name finalarray for example.
finalarray = ["maier", "embega", "Jegan", "tapa pure", "cil HUF", "carcHUF", "tecla NSS", "M1 NSS", "M2 nss", "M3 nss", "doble nss", "tapon", "sagola", "clip volvo", "pillar"]
I'm not a lot into python file's read/read so I would like to know how if someone could give me a hand to find that line, get those values and put them on a array. Thanks in advance.
If you swap this for loop out for your for loop, it should do the trick:
for rownum in xrange(sh.nrows):
row = sh.row_values(rownum)
if row[0] == "clientegen": # Check if "clientgen" is the first element of the row
finalarray = list(row) # If so, make a copy of it and name it `finalarray`
wr.writerow(row)
If there will ever be more than one "clientegen" line, we can adjust this code to save all of them.
If you are just looking for the line that contains clientegen, then you could try:
finalarray = list()
with open("fichero_csv.csv") as f:
for line in f: #loop through all the lines
words = line.split(" ") #get a list of all the words
if "clientegen" in words: #check to see if your word is in the list
finalarray = words #if so, put the word list in your finalarray
break #stop checking any further

Create an array from data in a table using Python

I want to get data from a table in a text file into a python array. The text file that I am using as an input has 7 columns and 31 rows. Here is an example of the first two rows:
10672 34.332875 5.360831 0.00004035881220 0.00000515052523 4.52E-07 6.5E-07
12709 40.837833 19.429158 0.00012010938453 -0.00000506426720 7.76E-06 2.9E-07
The code that I have tried to write isn't working as it is not reading one line at a time when it goes through the for loop.
data = []
f = open('hyadeserr.txt', 'r')
while True:
eof = "no"
array = []
for i in range(7):
line = f.readline()
word = line.split()
if len(word) == 0:
eof = "yes"
else:
array.append(float(word[0]))
print array
if eof == "yes": break
data.append(array)
Any help would be greatly appreciated.
A file with space-separated values is just a dialect of the classic comma-separated values (CSV) file where the delimiter is a space (), followed by more spaces, which can be ignored.
Happily, Python comes with a csv.reader class that understands dialects.
You should use this:
Example:
#!/usr/bin/env python
import csv
csv.register_dialect('ssv', delimiter=' ', skipinitialspace=True)
data = []
with open('hyadeserr.txt', 'r') as f:
reader = csv.reader(f, 'ssv')
for row in reader:
floats = [float(column) for column in row]
data.append(floats)
print data
If you don't want to use cvs here, since you don't really need it:
data = []
with open("hyadeserr.txt") as file:
for line in file:
data.append([float(f) for f in line.strip().split()])
Or, if you know for sure that the only extra chars are spaces and line ending \n, you can turn the last line into:
data.append([float(f) for f in line[:-1].split()])

Categories

Resources