How to get data with python in certain rows and columns - python

I have data, example :
2017/06/07 10:42:35,THREAT,url,192.168.1.100,52.25.xxx.xxx,Rule-VWIRE-03,13423523,,web-browsing,80,tcp,block-url
2017/06/07 10:43:35,THREAT,url,192.168.1.101,52.25.xxx.xxx,Rule-VWIRE-03,13423047,,web-browsing,80,tcp,allow
2017/06/07 10:43:36,THREAT,end,192.168.1.100,52.25.xxx.xxx,Rule-VWIRE-03,13423047,,web-browsing,80,tcp,block-url
2017/06/07 10:44:09,TRAFFIC,end,192.168.1.101,52.25.xxx.xxx,Rule-VWIRE-03,13423111,,web-browsing,80,tcp,allow
2017/06/07 10:44:09,TRAFFIC,end,192.168.1.103,52.25.xxx.xxx,Rule-VWIRE-03,13423111,,web-browsing,80,tcp,block-url
How to parse that only get data columns 4,5,7, and 12 in all rows?
This is my code :
import csv
file=open('filename.log', 'r')
f=open('fileoutput', 'w')
lines = file.readlines()
for line in lines:
result.append(line.split(' ')[4,5,7,12])
f.write (line)
f.close()
file.close()

The right way with csv.reader and csv.writer objects:
import csv
with open('filename.log', 'r') as fr, open('filoutput.csv', 'w', newline='') as fw:
reader = csv.reader(fr)
writer = csv.writer(fw)
for l in reader:
writer.writerow(v for k,v in enumerate(l, 1) if k in (4,5,7,12))
filoutput.csv contents:
192.168.1.100,52.25.xxx.xxx,13423523,block-url
192.168.1.101,52.25.xxx.xxx,13423047,allow
192.168.1.100,52.25.xxx.xxx,13423047,block-url
192.168.1.101,52.25.xxx.xxx,13423111,allow
192.168.1.103,52.25.xxx.xxx,13423111,block-url

This is wrong:
line.split(' ')[4,5,7,12]
You want this:
fields = line.split(' ')
fields[4], fields[5], fields[7], fields[12]

a solution using pandas
import pandas as pd
df = pd.read_csv('filename.log', sep=',', header=None, index_col=False)
df[[3, 4, 6, 11]].to_csv('fileoutput.csv', header=False, index=False)
Note the use of [3, 4, 6, 11] instead of [4, 5, 7, 12] to account for 0-indexing in the dataframe's columns.
Content of fileoutput.csv:
192.168.1.100,52.25.xxx.xxx,13423523,block-url
192.168.1.101,52.25.xxx.xxx,13423047,allow
192.168.1.100,52.25.xxx.xxx,13423047,block-url
192.168.1.101,52.25.xxx.xxx,13423111,allow
192.168.1.103,52.25.xxx.xxx,13423111,block-url

You're on the right path, but your syntax is off. Here's an example using csv module:
import csv
log = open('filename.log')
# newline='\n' to prevent csv.writer to include additional newline when writing to file
log_write = open('fileoutput', 'w', newline='\n')
csv_log = csv.reader(log, delimiter=',')
csv_writer = csv.writer(log_write, delimiter=',')
for line in csv_log:
csv_writer.writerow([line[0], line[1], line[2], line[3]]) # output first 4 columns
log.close()
log_write.close()

Looking at the list compressions, you could have something like this without necessarily using csv module
file=open('filename.log','r')
f=open('fileoutput', 'w')
lines = file.readlines()
for line in lines:
f.write(','.join(line.split(',')[i] for i in [3,4,6,11]))
f.close()
file.close()
Notice the indices are 3,4,6,11 for our zero index based list
output
cat fileoutput
192.168.1.100,52.25.xxx.xxx,13423523,block-url
192.168.1.101,52.25.xxx.xxx,13423047,allow
192.168.1.100,52.25.xxx.xxx,13423047,block-url
192.168.1.101,52.25.xxx.xxx,13423111,allow
192.168.1.103,52.25.xxx.xxx,13423111,block-url

Related

Adding two lists to separate columns in csv file in Python

I have two lists as below.
a=[1,3,5,6,7,12]
b=[23,45,67,67]
I need to have the lists in separate columns in csv file like this:
Item,Quantity
[1,3,5,6,15],[23,45,67,67]
I tried using below code snippets, but I am not getting the result I want.
with open('sample_dataset.csv', 'w', encoding = 'utf-16', newline='') as outfile:
rowlists = zip(a, b)
writer = csv.writer(outfile)
for row in rowlists:
writer.writerow(row)
The result is:
1,23
3,45
5,67
6,67
Using pandas
d=[a,b]
my_df = pd.DataFrame(d)
my_df.to_csv('sample_dataset.csv', index=False, header=False)
The result is in different rows:
1,3,5,6,7.0,12.0
23,45,67,67,,
Your help and inputs are appreciated. Thank you
Using csv Module.
Ex:
a=[1,3,5,6,7,12]
b=[23,45,67,67]
import csv
with open(filename, "w") as outfile:
writer = csv.writer(outfile, escapechar=' ', quoting=csv.QUOTE_NONE)
writer.writerow(["Item", "Quantity"])
writer.writerow([a, b])
a=[1,3,5,6,7,12]
b=[23,45,67,67]
with open('sample_dataset.csv', 'w') as f:
f.write("Item,Quantity\n")
f.write("{},{}\n".format(a, b))
Output file:
'''
Item,Quantity
[1, 3, 5, 6, 7, 12],[23, 45, 67, 67]
'''
print(f'Item,Quantity\n[{",".join(map(str, a))}],[{",".join(map(str, b))}]')
Item,Quantity
[1,3,5,6,7,12],[23,45,67,67]

Select specific columns from CSV file

My code is able to get the 28 columns of a text file and format/remove some data. How Can I select specific columns? The columns I want are 0 to 25, and column 28. What is the best approach?
Thanks in advance!
import csv
import os
my_file_name = os.path.abspath('NVG.txt')
cleaned_file = "cleanNVG.csv"
remove_words = ['INAC-EIM','-INAC','TO-INAC','TO_INAC','SHIP_TO-inac','SHIP_TOINAC']
with open(my_file_name, 'r', newline='') as infile, open(cleaned_file, 'w',newline='') as outfile:
writer = csv.writer(outfile)
cr = csv.reader(infile, delimiter='|')
writer.writerow(next(cr)[:28])
for line in (r[0:28] for r in cr):
if not any(remove_word in element for element in line for remove_word in remove_words):
line[11]= line[11][:5]
writer.writerow(line)
infile.close()
outfile.close()
Have a look at pandas.
import pandas as pd
usecols = list(range(26)) + [28]
data = pd.read_csv(my_file_name, usecols=usecols)
You can also conveniently write the data back to a new file
with open(cleaned_file, 'w') as f:
data.to_csv(f)
exclude column 26 and column27 from row using filter():
for row in cr:
content = list(filter(lambda x: row.index(x) not in [25,26], row))
# work with the selected columns content

Remove double quotes from csv row

I have two files: src.csv and dst.csv. The code below reads the second row from src.csv and appends it to dst.csv. The issue is the output in dst.csv is contained within double quotes ("").
Expected result:
10, 5, 5, 10, 1
Output:
"10, 5, 5, 10, 1"
I have tried using quoting=csv.QUOTE_NONE, escapechar=' ' in csv.writer and it does remove the quotes though the output now contains a blank space after each csv value.
Here is my code:
import csv
with open('src.csv', 'r') as src, open('dst.csv', 'a', newline='') as dst:
wr = csv.writer(dst, dialect='excel', delimiter=',', quoting=csv.QUOTE_NONE, escapechar=' ')
next(src)
for row in src:
wr.writerow([row.rstrip('\n')])
Any suggestions?
You don't split the source file rows into columns so you just ended up writing a 1 column csv. Use a reader instead:
import csv
with open('src.csv', 'r') as src, open('dst.csv', 'a', newline='') as dst:
wr = csv.writer(dst, dialect='excel', delimiter=',', quoting=csv.QUOTE_NONE, escapechar=' ')
next(src)
reader = csv.reader(src)
for row in reader:
wr.writerow(row)
I think you have to use csv.reader() to read row as list of number - now you read row as one string and csv.writer has to add "" because you have , in this string.

Adding a column to existing csv file as an array (Python)

Have a csv file called 'data.csv' in the following format:
test1,test2,test3
1,2,3
4,5,6
7,8,9
Given a list in the format ['test4', 4, 7, 10], how can I create a new csv file 'adjusted.csv' with all the data from data.csv and the added column like so:
test1,test2,test3, test4
1,2,3,4
4,5,6,7
7,8,9,10
read lines in
with open('data.csv', 'r') as fi:
lines = [[i.strip() for i in line.strip().split(',')] \
for line in fi.readlines()]
col = ['test4', 4, 7, 10]
Concatenate each row with corresponding element of col. Using enumerate to help keep track of which list index to use.
new_lines = [line + [str(col[i])] for i, line in enumerate(lines)]
Output to file
with open('adjusted.csv', 'w') as fo:
for line in new_lines:
fo.write(','.join(line) + '\n')
I would just treat the csv like the raw text it is. Load in each line, strip off the line break, append the new entry, then put the line break back. This only works if the entries in test4 are guaranteed to be in the same order as the rows in data.csv.
If instead test4 needs to be added to rows based on meeting certain conditions, that would change things a lot. In that case you would probably want to turn both into Pandas dataframes, then perform a proper merge on the required conditions.
test4 = ['test4', 4, 7, 10]
with open(data.csv, 'r') as ifile
with open(adjusted.csv, 'w') as ofile:
for line, new in zip(ifile, test4):
new_line = line.rstrip('\n') + ',' + str(new) + '\n'
ofile.write(new_line)
You can also condense the first two lines into this:
with open(data.csv, 'r') as ifile, open(adjusted.csv, 'w') as ofile:
Do whichever reads more clearly.
Since you're working with csv files use the csv readers and writers to improve readability:
import csv
new_data = ['test4', 4, 7, 10]
with open(r'data.csv', 'r') as in_csv, open(r'adj_data.csv', 'w') as out_csv:
reader = csv.reader(in_csv)
writer = csv.writer(out_csv)
for row, new_col in zip(reader, new_data):
row.append(new_col)
writer.writerow(row)

Separate data with a comma CSV Python

I have some data that needs to be written to a CSV file. The data is as follows
A ,B ,C
a1,a2 ,b1 ,c1
a2,a4 ,b3 ,ct
The first column has comma inside it. The entire data is in a list that I'd like to write to a CSV file, delimited by commas and without disturbing the data in column A. How can I do that? Mentioning delimiter = ',' splits it into four columns on the whole.
Just use the csv.writer from the csv module.
import csv
data = [['A','B','C']
['a1,a2','b1','c1']
['a2,a4','b3','ct']]
fname = "myfile.csv"
with open(fname,'wb') as f:
writer = csv.writer(f)
for row in data:
writer.writerow(row)
https://docs.python.org/library/csv.html#csv.writer
No need to use the csv module since the ',' in the first column is already part of your data, this will work:
with open('myfile.csv', 'w') as f:
for row in data:
f.write(', '.join(row))
f.write('\n')
You could try the below.
Code:
import csv
import re
with open('infile.csv', 'r') as f:
lst = []
for line in f:
lst.append(re.findall(r',?(\S+)', line))
with open('outfile.csv', 'w', newline='') as w:
writer = csv.writer(w)
for row in lst:
writer.writerow(row)
Output:
A,B,C
"a1,a2",b1,c1
"a2,a4",b3,ct

Categories

Resources