delete line, of a .csv, when certain column has no value python - python

I have a .csv file and I am trying to erase some rows/lines that have no usable information. I want to delete lines that do not have a value in a certain column. I am kinda new to programming and I could not find a way to do this. Is this possible?
I tried to delete a line if it did not have a certain number in it but that did not work as wel.
f = open('C:myfile.csv', 'rb')
lines = f.readlines()
f.close()
filename = 'myfile.csv'
f = open(filename, 'wb')
for line in lines:
if line != "1":
f.write(line)
f.close()
here are some sample rows:
0,593 0,250984 -20,523384 -25,406271
0,594 0,250984
0,595 0,250984
0,596 0,250984
0,597 0,250984 -15,793088 -21,286336
0,598 0,250984
0,599 0,908811
0,6 0,893612
0,601 0,784814 -12,130922 -11,825742
0,602 0,909238
0,603 0,25309
0,604 0,38435
0,605 0,602954 -8,316167 -3,43328
0,606 0,642628
0,607 0,39201
0,608 0,384289
0,609 0,251656 -11,825742 -5,874723
So I want to delete the rows when there is no number in the third and fourth column.

You can use Python's csv library to help you do this. Your data appears to be tab delimited, as such the following script should work:
import csv
with open('input.csv', 'rb') as f_input, open('output.csv', 'wb') as f_output:
csv_output = csv.writer(f_output, delimiter = '\t')
for row in csv.reader(f_input, delimiter = '\t'):
if len(row[2]) and len(row[3]):
csv_output.writerow(row)
Giving you an output.csv file containing:
0,593 0,250984 -20,523384 -25,406271
0,597 0,250984 -15,793088 -21,286336
0,601 0,784814 -12,130922 -11,825742
0,605 0,602954 -8,316167 -3,43328
0,609 0,251656 -11,825742 -5,874723
Note, each of your rows appears to have 4 columns (your data has tabs for these missing entries), because of this, it is not enough to simply test the length is 4. You need to test the contents of the two cells.

import csv
fn_in = 'test.csv'
fn_out = 'outfile.csv'
with open(fn_in, 'r') as inp, open(fn_out, 'w') as out:
writer = csv.writer(out)
for row in csv.reader(inp):
if len(row)==4:
writer.writerow(row)

Related

Leaving just the first column in the csv file using python

I am trying to leave just the first column of a csv file. But it seems not to be working for me and can not find the working solution.
def leavethefirstcolumn(filename):
with open(filename) as f, open('out.csv', "w") as out:
reader = csv.reader(f)
for row in reader:
out.write(row[0])
import csv
def leavethefirstcolumn(filename):
with open(filename) as file, open('out.csv', "w") as out:
reader = csv.reader(file)
for row in reader:
out.write(row[0] + "\n")
# example of using the function
leavethefirstcolumn("in.csv")
You are calling csv.reader(file) while on the previous line, you wrote with open(filename) as f instead of with open(filename) as file.
Also when you are writing to out, you should add a new line
character '\n'

how to remove a specific row in csv file based upon the duplicate using python?

I have a csv file which has many rows looks like below.
20170718 014418.475476 [UE:142 CRNTI : 446]
20170718 094937.865362 [UE:142 CRNTI : 546]
Above are the sample two rows of the csv file.
Now if we see the rows there is a string called [UE:142...] which repeats in the csv file.
Problem statement:
I want to remove the duplicate row which contains string [UE:< > more than once in that csv file i.e in the above rows the string [UE:142 repeated twice so the second one must get deleted, in this way there are many random strings like [UE:142 .
Can anyone please help me with python script for the above problem statement?
import csv
reader = open("test.csv", "r")
lines = reader.read().split(" ")
reader.close()
writer = open("test_1.csv", "w")
for line in set(lines):
writer.write(line)
writer.close()
from csv import reader, writer as csv_writer
csv_path = '<your csv file path here>'
def remove_duplicate_ue (csv_path):
found = False
with open (csv_path, 'r') as csv_file:
for line in reader (csv_file, delimiter = ' '):
if 'UE:' not in line [-1]:
yield line
elif not found:
yield line
found = True
def write_csv (csv_path, rows, delimiter = ' '):
with open (csv_path, 'w') as csv_file:
writer = csv_writer (csv_file, delimiter = delimiter)
for row in rows:
writer.writerow (row)
write_csv (csv_path, tuple (remove_duplicate_ue (csv_path)))

Delete blank columns from header row

I'm pretty new to python and I'm having trouble deleting the header columns after the 25th column. There are 8 more extra columns that have no data so I'm trying to delete those columns. Columns 1-25 have like 50,000k of data and the rest of the columns are blank.How would I do this? My code for now is able to clean up the file but I cant delete the headers for row[0] AFTER COLUMN 25.
Thanks
import csv
my_file_name = "NVG.txt"
cleaned_file = "cleanNVG.csv"
remove_words = ['INAC-EIM','-INAC','TO-INAC','TO_INAC','SHIP_TO-inac','SHIP_TOINAC']
with open(my_file_name, 'r', newline='') as infile, open(cleaned_file, 'w',newline='') as outfile:
writer = csv.writer(outfile)
cr = csv.reader(infile, delimiter='|')
writer.writerow(next(cr)) #I think this is why is not working
for line in (r[0:25] for r in cr):
#del line [26:32]
if not any(remove_word in element for element in line for remove_word in remove_words):
line[11]= line[11][:5]
writer.writerow(line)
You've found the line with the problem - all you have to do is only print the headers you want. next(cr) reads the header line, but you pass the entire line to writer.writerow().
Instead of
writer.writerow(next(cr))
you want:
writer.writerow(next(cr)[:25])
([:25] and [0:25] are the same in Python)

Add a new column to a csv file in python

I am trying to add a column to a csv file that combines strings from two other columns. Whenever I try this I either get an output csv with only the new column or an output with all of the original data and not the new column.
This is what I have so far:
with open(filename) as csvin:
readfile = csv.reader(csvin, delimiter=',')
with open(output, 'w') as csvout:
writefile = csv.writer(csvout, delimiter=',', lineterminator='\n')
for row in readfile:
result = [str(row[10]) + ' ' + str(row[11])]
writefile.writerow(result)
Any help would be appreciated.
No input to test, but try this. Your current approach doesn't include the existing data for each row that already exists in your input data. extend will take the list that represents each row and then add another item to that list... equivalent to adding a column.
import csv
with open(filename) as csvin:
readfile = csv.reader(csvin, delimiter=',')
with open(output, 'w') as csvout:
writefile = csv.writer(csvout, delimiter=',', lineterminator='\n')
for row in readfile:
row.extend([str(row[10]) + ' ' + str(row[11])])
writefile.writerow(row)
I assume that glayne wants to combine column 10 and 11 into one.
In my approach, I concentrate on how to transform a single row first:
def transform_row(input_row):
output_row = input_row[:]
output_row[10:12] = [' '.join(output_row[10:12])]
return output_row
Once tested to make sure that it works, I can move on to replace all rows:
with open('data.csv') as inf, open('out.csv', 'wb') as outf:
reader = csv.reader(inf)
writer = csv.writer(outf)
writer.writerows(transform_row(row) for row in reader)
Note that I use the writerows() method to write multiple rows in one statement.
Below code snippet combines strings in column 10 and column 11 in each row and add that to the end of the each row
import csv
input = 'test.csv'
output= 'output.csv'
with open(input, 'rb') as csvin:
readfile = csv.reader(csvin, delimiter=',')
with open(output, 'wb') as csvout:
writefile = csv.writer(csvout, delimiter=',', lineterminator='\n')
for row in readfile:
result = row + [row[10]+row[11]]
writefile.writerow(result)

Create subset of large CSV file and write to new CSV file

I would like to create a subset of a large CSV file using the rows that have the 4th column ass "DOT" and output to a new file.
This is the code I currently have:
import csv
outfile = open('DOT.csv','w')
with open('Service_Requests_2015_-_Present.csv', newline='', encoding='utf-8') as f:
reader = csv.reader(f)
for row in reader:
if row[3] == "DOT":
outfile.write(row)
outfile.close()
The error is:
outfile.write(row)
TypeError: must be str, not list
How can I manipulate row so that I will be able to just straight up do write(row), if not, what is the easiest way?
You can combine your two open statements, as the with statement accepts multiple arguments, like this:
import csv
infile = 'Service_Requests_2015_-_Present.csv'
outfile = 'DOT.csv'
with open(infile, encoding='utf-8') as f, open(outfile, 'w') as o:
reader = csv.reader(f)
writer = csv.writer(o, delimiter=',') # adjust as necessary
for row in reader:
if row[3] == "DOT":
writer.writerow(row)
# no need for close statements
print('Done')
Make your outfile a csv.writer and use writerow instead of write.
outcsv = csv.writer(outfile, ...other_options...)
...
outcsv.writerow(row)
That is how I would do it... OR
outfile.write(",".join(row)) # comma delimited here...
In Above code you are trying to write list with file object , we can not write list that give error "TypeError: must be str, not list" you can convert list in string format then you able to write row in file. outfile.write(str(row))
or
import csv
def csv_writer(input_path,out_path):
with open(out_path, 'ab') as outfile:
writer = csv.writer(outfile)
with open(input_path, newline='', encoding='utf-8') as f:
reader = csv.reader(f)
for row in reader:
if row[3] == "DOT":
writer.writerow(row)
outfile.close()
csv_writer(input_path,out_path)
[This code for Python 3 version. In Python 2.7, the open function does not take a newline argument, hence the TypeError.]

Categories

Resources