Change a text file to csv (multiple rows) - python

I have a txt file that reads as:
interface 0/1
no data
no data
no data
no data
no data
interface 0/2
no data
etc...
I would like to have it output to a csv with a format of:
interface 0/1 | no data | no data | no data | no data | no data
interface 0/2 | no data | no data | no data | no data | no data
I have tried using the csv module and write row with no good results. Any help would be appreciated.

Looks like you want to group 6 lines into a row and write those, eg:
import csv
from itertools import islice
with open('input.txt') as fin, open('output.csv', 'w') as fout:
pipe_out = csv.writer(fout, delimiter='|')
rows = iter(lambda: list(islice(fin, 6)), [])
pipe_out.writerows(rows)

Here is a simple, slightly different answer that uses python 2.7 only (no csv)
f=open("data.txt", 'r') # original data
f1=open("output_data.txt", 'w') # this is to write the output
newline=""
for line in f:
if "interface" not in line:
newline=newline + r" | " + line.strip()
else:
# print newline # check this
f1.write(newline + '\n')
newline=line.strip()
# print newline # just a visual check
f1.write(newline)

You can also try without the csv module:
f = open("file.txt", "r")
of = open("file.csv", "w")
data = f.readlines()
checkpoint = ""
for line in data:
line = line.strip("\n")
if "interface" in line:
of.write(checkpoint + line)
checkpoint = "\n"
else:
of.write(" | " + line)
f.close()
of.close()

CSV module is usually not necessary. This script will do just fine.
f1 = open('file1.txt', 'r')
f2 = open('file2.csv', 'w+')
lines = f1.read()
for line in lines.split('\n'):
if line == "":
continue
elif line.startswith("interface"):
f2.write("\n"+line+",")
else:
f2.write(line+",")

I've done it like this:
content = ''
with open('text_file.txt', 'r') as txt_file:
content = txt_file.read()
blocks = content.split('interface')
csv_content = ''
for block in blocks:
if block != '':
csv_content += 'interface %s\n' % ' | '.join(block.splitlines())
with open('csv_file.csv', 'w') as csv_file:
csv_file.write(csv_content)

It can be by collecting a row's data from each line in the file and outputting it as a csv row whenever a line starting with 'interface' is encountered:
import csv
input_filename = 'interface.txt'
output_filename = 'interface.csv'
with open(input_filename) as infile, \
open(output_filename, 'w', newline='') as outfile:
writer = csv.writer(outfile, delimiter='|')
row = []
for line in (line.rstrip() for line in infile):
if not line.startswith('interface'):
row.append(line)
else: # new group
if row:
writer.writerow(row) # output last group
row = [line]
writer.writerow(row) # output last row
Contents of interface.csv file afterwards:
interface 0/1|no data|no data|no data|no data|no data
interface 0/2|no data|no data|no data|no data|no data

Related

Reading and writing a csv file Python

I just started learning Python, and I am trying to do the following:
- Read a .csv file
- Write the filtered data in a new file where the column 7 is not blank/empty
When I am printing my results, it shows the right output in the python shelf, but when I am checking my data in the .csv is no correct (differs from what is showing with the print function)
Any suggestion with my code?
Thank you in advance.
file = open("station.csv", "r")
writeFile = open("stations-filtered.csv", "w")
for line in file:
line2 = line.split(",")
if line2[7] != "":
print(line)
writeFile.write(line)
I agree with #user513093 that you can use csv, like:
file = open("station.csv", "r")
writeFile = open("stations-filtered.csv", "w")
writer = csv.writer(writeFile, delimiter=',')
for line in file:
line2 = line.split(",")
if line2[7] != "":
print(line)
writer.writerow(line)
But still, pandas is good:
import pandas as pd
file = pd.read_csv("station.csv", sep=",", header=None)
file = file[file[7] != ""]
file.to_csv("stations-filtered.csv")

How to split a large file into chunks using python

import csv
divisor = 1000000
outfileno = 1
outfile = None
with open('o77069882.txt', 'r') as infile:
infile_iter = csv.reader(infile)
header = next(infile_iter)
for index, row in enumerate(infile_iter):
if index % divisor == 0:
if outfile is not None:
outfile.close()
outfilename = 'big-{}.csv'.format(outfileno)
outfile = open(outfilename, 'w')
outfileno += 1
writer = csv.writer(outfile)
writer.writerow(header)
writer.writerow(row)
# Don't forget to close the last file
if outfile is not None:
outfile.close()
I am using this above code to divide my file into chunks of size 100000, it does the job but each row in the first file is getting enclosed by quotes( " ") as for example "abc, dfg, ghj, kil" . The second and third file created are not having this problem, can anyone help me modify my code to get rid of the above error.

how to read the content of .txt file using python?

output_filename = r"C:\Users\guage\Output.txt"
RRA:
GREQ-299684_6j
GREQ-299684_6k
CZM:
V-GREQ-299684_6k
V-GREQ-299524_9
F_65624_1
R-GREQ-299680_5
DUN:
FB_71125_1
FR:
VQ-299659_18
VR-GREQ-299659_19
VEQ-299659_28
VR-GREQ-299659_31
VR-GREQ-299659_32
VEQ-299576_1
GED:
VEQ-299622_2
VR-GREQ-299618_13
VR-GREQ-299559_1
VR-GREQ-299524_14
FB_65624_1
VR-GREQ-299645_1
MNT:
FB_71125_1
FB_71125_2
VR-534_4
The above is the content of the the .txt file. how can I read it separately the content of it. for example -
RRA:VR-GREQ-299684_6j VR-GREQ-299684_6k VR-GREQ-299606_3 VR-GREQ-299606_4 VR-GREQ-299606_5 VR-GREQ-299606_7
and save it in a variable or something similar to it. Later I want to read CZM separately and so on. I did as below.
with open(output_filename, 'r') as f:
excel = f.read()
But how to read it separately ? can someone tell me how to do it ?
Something like this:
def read_file_with_custom_record_separator(file_path, delimiter='\n'):
fh = open(file_path)
data = ""
for line in fh:
if line.strip().endswith(delimiter) and data != "":
print "VARIABLE:\n<", data, ">\n"
data = line
else:
data += line
print "LAST VARIABLE:\n<", data, ">\n"
And then:
read_file_with_custom_record_separator("input.txt", ":")
You can make use of the file text : as indicator to create a new file like this:
savefilename = ""
with open(filename, 'r') as f:
for line in f:
line = line.strip() # get rid of the unnecessary white chars
lastchar = line[-1:] # get the last char
if lastchar == ":": # if the last char is ":"
savefilename = line[0:-1] # get file name from line (except the ":")
sf = open(savefilename + ".txt", 'w') # create a new file
else:
sf.write(line + "\n") # write the data to the opened file
Then you should get collection of files:
RRA.txt
CZM.txt
DUN.txt
# etc
which contains all the appropriate data:
RRA.txt
VR-GREQ-299684_6j
VR-GREQ-299684_6k
VR-GREQ-299606_3
VR-GREQ-299606_4
VR-GREQ-299606_5
VR-GREQ-299606_7
CZM.txt
VR-GREQ-299684_6k
VR-GREQ-299606_6
VR-GREQ-299606_8
VR-GREQ-299640_1
VR-GREQ-299640_5
VR-GREQ-299524_9
FB_65624_1
VR-GREQ-299680_5
DUN.txt
FB_71125_1
# and so on
You can replace the sf = open and the sf.write which whatever way you feel best to separate the data. Here, I use files...
You can iterate over the file and use the lines and indices to your advantage; something like this:
with open(output_filename, 'r') as f:
for index, line in enumerate(f):
# here you have access to each line and its index
# so you can save any number of lines you wish
What about reading it into a list, then process its element as you prefer
>>> f = open('myfile.txt', 'r').readlines()
>>> len(f)
46
>>> f[0]
RRA:
>>> f[-1]
VR-GREQ-299534_4
>>> f[:3]
['RRA:\n', 'VR-GREQ-299684_6j \n', 'VR-GREQ-299684_6k \n']
>>>
>>> [l for l in f if l.startswith('FB_')]
['FB_65624_1 \n', 'FB_71125_1 \n', 'FB_69228_1 \n', 'FB_65624_1 \n', 'FB_71125_1 \n', 'FB_71125_2 \n']
>>>

Merge semicolon delimited txt file looping in directory

Suppose I have many different text files from the same directory with the content structure as shown below:
File a.txt:
HEADER_X;HEADER_Y;HEADER_Z
a_value;a_value;a_value
a_value;a_value;a_value
File b.txt:
HEADER_X;HEADER_Y;HEADER_Z
b_value;b_value;b_value
b_value;b_value;b_value
File c.txt:
HEADER_X;HEADER_Y;HEADER_Z
c_value;c_value;c_value
c_value;c_value;c_value
File d.txt: ...
I'd like to merge all of the txt files into one, by appending the content of each file to the final row of the each previous file. See below:
File combined.txt:
HEADER_X;HEADER_Y;HEADER_Z
a_value;a_value;a_value
a_value;a_value;a_value
b_value;b_value;b_value
b_value;b_value;b_value
c_value;c_value;c_value
c_value;c_value;c_value
...
How can I do this in Python?
Assumptions:
- all txt files are located in the same folder
- all txt files have same headers
- all txt files have same number of columns
- all txt files have different number of rows
Use the CSV Module. Something like this:
import csv
with ('output.csv', 'ab') as output:
writer = csv.writer(output, delimiter=";")
with open('a.txt', 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter=";")
reader.readline() // this is to skip the header
for row in spamreader:
writer.writerow(row)
If you didn't want to harcode in every file (Say you have many more than three) you could do:
from os import listdir
from os.path import isfile, join
onlyfiles = [ f for f in listdir(mypath) if isfile(join(mypath,f)) ]
for aFile in onlyfiles:
with open(aFile, 'rb') as csvfile:
reader = csv.reader(csvfile, delimiter=";")
reader.readline() // this is to skip the header
for row in spamreader:
writer.writerow(row)
I managed to do something that seems to work (at least in the cases I tested).
This will parse all files, get all the headers and format the values on each line of each file to add ";" according to the headers present/absent on that file.
headers = []
values = []
files = ("csv0.txt", "csv1.txt")#put the files you want to parse here
#read the files a first time, just to get the headers
for file_name in files:
file = open(file_name, 'r')
first_line = True
for line in file:
if first_line:
first_line = False
for header in line.strip().split(";"):
if header not in headers:
headers.append(header)
else:
break
file.close()
headers = sorted(headers)
#read a second time to get the values
file_number = 0
for file_name in files:
file = open(file_name, 'r')
file_headers = []
first_line = True
corresponding_indexes = []
values.append([])
for line in file:
if first_line:
first_line = False
index = 0
for header in line.strip().split(";"):
while headers[index] != header:
index += 1
corresponding_indexes.append(index)
else:
line_values = line.strip().split(";")
current_index = 0
values_str = ""
for value in line_values:
#this part write the values with ";" added for the headers not in this file
while current_index not in corresponding_indexes:
current_index += 1
values_str += ";"
values_str += value + ";"
current_index += 1
values_str = values_str[:-1] #we remove the last ";" (useless)
values[file_number].append(values_str)
file_number += 1
file.close()
#and now we write the output file with all headers and values
headers_str = ""
for header in headers:
headers_str += header + ";"
headers_str = headers_str[:-1]
output_file = open("output.txt", 'w')
output_file.write(headers_str + "\n")
for file_values in values:
for values_line in file_values:
output_file.write(values_line + "\n")
output_file.close()
If you have any question, feel free to ask.

Want to read multiple csv file one by one and filepaths are stored in a text file using python

here is my code for readinng individual cell of one csv file. but want to read multiple csv file one by one from .txt file where csv file paths are located.
import csv
ifile = open ("C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv", "rb")
data = list(csv.reader(ifile, delimiter = ';'))
REQ = []
RES = []
n = len(data)
for i in range(n):
x = data[i][1]
y = data[i][2]
REQ.append (x)
RES.append (y)
i += 1
for j in range(2,n):
try:
if REQ[j] != '' and RES[j]!= '': # ignore blank cell
print REQ[j], ' ', RES[j]
except:
pass
j += 1
And csv file paths are stored in a .txt file like
C:\Desktop\Test_Specification\RDBI.csv
C:\Desktop\Test_Specification\ECUreset.csv
C:\Desktop\Test_Specification\RDTC.csv
and so on..
You can read stuff stored in files into variables. And you can use variables with strings in them anywhere you can use a literal string. So...
with open('mytxtfile.txt', 'r') as txt_file:
for line in txt_file:
file_name = line.strip() # or was it trim()? I keep mixing them up
ifile = open(file_name, 'rb')
# ... the rest of your code goes here
Maybe we can fix this up a little...
import csv
with open('mytxtfile.txt', 'r') as txt_file:
for line in txt_file:
file_name = line.strip()
csv_file = csv.reader(open(file_name, 'rb', delimiter=';'))
for record in csv_file[1:]: # skip header row
req = record[1]
res = record[2]
if len(req + res):
print req, ' ', res
you just need to add a while which will read your file containing your list of files & paths upon your first open statement, for example
from __future__ import with_statement
with open("myfile_which_contains_file_path.txt") as f:
for line in f:
ifile = open(line, 'rb')
# here the rest of your code
You need to use a raw string string your path contains \
import csv
file_list = r"C:\Users\BKA4ABT\Desktop\Test_Specification\RDBI.csv"
with open(file_list) as f:
for line in f:
with open(line.strip(), 'rb') as the_file:
reader = csv.reader(the_file, delimiter=';')
for row in reader:
req,res = row[1:3]
if req and res:
print('{0} {1}'.format(req, res))

Categories

Resources