How to parse this particular console output and make csv? - python
I need to process console output which looks like this and make a csv from it:
ID,FLAG,ADDRESS,MAC-ADDRESS,HOST-NAME,SERVER,STATUS,LAST-SEEN
0 10.0.0.11 00:1D:72:29:F2:4F lan waiting never
;;; test comment
1 10.0.0.19 00:13:21:15:D4:00 lan waiting never
2 10.0.0.10 00:60:6E:05:0C:E0 lan waiting never
3 D 10.0.1.199 24:E9:B3:20:FA:C7 home server1 bound 4h54m52s
4 D 100.64.1.197 E6:17:AE:21:EA:00 Suzana-s-A51 dhcp1 bound 2h16m45s
I have managed to split lines but regex is not working for tabs and spaces. Can someone point me in the right direction?
The code I am using is this:
import csv
import re
# Open the input file in read-only mode
with open('output.txt', 'r') as input_file:
# Open the output file in write-only mode
with open('output.csv', 'w') as output_file:
# Create a CSV writer that will write to the output file
writer = csv.writer(output_file)
# Read the first line of the input file (the header)
# and write it to the output file as a single value
# (i.e. do not split it on commas)
header = input_file.readline()
writer.writerow([header.strip()])
# Iterate over the remaining lines of the input file
for line in input_file:
# Ignore lines that start with ";;;" (these are comments)
if line.startswith(';;;'):
continue
# Split the line on newlines
values = line.split('\n')
line = re.sub(r'[\t ]+', ',', line)
# Iterate over the resulting values
for i, value in enumerate(values):
# If the value contains a comma, split it on commas
# and assign the resulting values to the `values` list
if ',' in value:
values[i:i+1] = value.split(',')
# Write the values to the output file
writer.writerow(values)
The regular expression can be handy here, make a mask, and then take each value from line read.
you can refer the regex and will give you great visuals.
so for each line will put a regex reg_format=r"(\d*?)(?:\s+)(.*?)(?:\s)(?:\s*?)(\w*\.\w*\.\w*\.\w*)(?:\s*)(\w*?:\w*?:\w*?:\w*?:\w*?:\w*)(?:\s*)(\w*)(?:\s*)(\w*)(?:\s*)(\w*)"
pls note that when we write to csv using writer.writerow it expects a list.
following would work for you, and you can tweak it as needed.
tweaked your code, and added the comments
Update:
Added masking for records
import csv
import re
#reg_format=r"(\d*?)(?:\s+)(.*?)(?:\s)(?:\s*?)(\w*\.\w*\.\w*\.\w*)(?:\s*)(\w*?:\w*?:\w*?:\w*?:\w*?:\w*)(?:\s*)(\w*)(?:\s*)(\w*)(?:\s*)(\w*)"
all_fields=r"(\d*?)(?:\s+)(.*?)(?:\s)(?:\s*?)(\w*\.\w*\.\w*\.\w*)(?:\s*)(\w*?:\w*?:\w*?:\w*?:\w*?:\w*)(?:\s{1,2})([\w-]{1,14})(?:\s*?)(\w+)(?:\s*)(\w+)(?:\s*)(\w*)(?:\s*)(\w*)"
all_fields_minus_host=r"(\d*?)(?:\s+)(.*?)(?:\s)(?:\s*?)(\w*\.\w*\.\w*\.\w*)(?:\s*)(\w*?:\w*?:\w*?:\w*?:\w*?:\w*)(?:\s{1,})([\w-]{1,14})(?:\s*?)(\w+)(?:\s*)(\w+)(?:\s*)(\w*)(?:\s*)(\w*)"
# Open the input file in read-only mode
with open('testreg.txt', 'r') as input_file:
# Open the output file in write-only mode
with open('output.csv', 'w') as output_file:
# Create a CSV writer that will write to the output file
writer = csv.writer(output_file)
# Read the first line of the input file (the header)
# and write it to the output file as a single value
# (i.e. do not split it on commas)
header = input_file.readline()
writer.writerow(header.split(',')) # split by "," as write row need list
#writer.writerow([header.strip()])
# Iterate over the remaining lines of the input file
for line in input_file:
# Ignore lines that start with ";;;" (these are comments)
if line.startswith(';;;'):
continue
#print(line)
gps=re.findall(all_fields,line)
if gps:
line_write=(['"'+gp+'"' for gp in list(gps[0])]) # if dont need quotes, put like gp for gp in list(gps[0])]
writer.writerow(line_write[:-1])
else:
gps=re.findall(all_fields_minus_host,line)
line_write=(['"'+gp+'"' for gp in list(gps[0])]) # if dont need quotes, put like gp for gp in list(gps[0])]
line_write.insert(4,'""')
writer.writerow(line_write[:-2])
#writer.writerow(line_write)
# commented below line
'''
# Split the line on newlines
values = line.split('\n')
line = re.sub(r'[\t ]+', ',', line)
# Iterate over the resulting values
for i, value in enumerate(values):
# If the value contains a comma, split it on commas
# and assign the resulting values to the `values` list
if ',' in value:
values[i:i+1] = value.split(',')
# Write the values to the output file
#writer.writerow(values)
'''
Related
If the csv file is more than 3 lines, edit it to delete the first line (Not save as new file)
Im trying to code if the csv file is more than 3 lines, edit it to delete the first line. I want to delete the first line from the existing file instead of saving it as a new file. For this reason, I had to delete the existing file and create a file with the same name but only one line is saved and the comma disappears. I'm using Pandas data frame. But if it doesn't matter if I don't use it, I don't want to use it Function name might be weird because I'm a beginner Thanks. file = open("./csv/selllist.csv", encoding="ANSI") reader = csv.reader(file) lines= len(list(reader)) if lines > 3: df = pd.read_csv('./csv/selllist.csv', 'r+', encoding="ANSI") dfa = df.iloc[1:] print(dfa) with open("./csv/selllist.csv", 'r+', encoding="ANSI") as x: x.truncate(0) with open('./csv/selllist.csv', 'a', encoding="ANSI", newline='') as fo: # Pass the CSV file object to the writer() function wo = writer(fo) # Result - a writer object # Pass the data in the list as an argument into the writerow() function wo.writerow(dfa) # Close the file object fo.close() print() This is the type of csv file I deal with string, string, string, string, string string, string, string, string, string string, string, string, string, string string, string, string, string, string
Take a 2-step approach. Open the file for reading and count the number of lines. If there are more than 3 lines, re-open the file (for writing) and update it. For example: lines = [] with open('./csv/selllist.csv') as csv: lines = csv.readlines() if len(lines) > 3: with open('./csv/selllist.csv', 'w') as csv: for line in lines[1:]: # skip first line csv.write(line)
With pandas, you can just specify header=None while reading and writing: import pandas as pd if lines > 3: df = pd.read_csv("data.csv", header=None) df.iloc[1:].to_csv("data.csv", header=None, index=None) With the csv module: import csv with open("data.csv") as infile: reader = csv.reader(infile) lines = list(reader) if len(lines)>3: with open("data.csv", "w", newline="") as outfile: writer = csv.writer(outfile, delimiter=",") writer.writerows(lines[1:])
With one open call and using seek and truncate. Setup out = """\ Look at me I'm a file for sure 4th line, woot!""" with open('filepath.csv', 'w') as fh: fh.write(out) Solution I'm aiming to minimize the stuff I'm doing. I'll only open one file and only one time. I'll only split one time. with open('filepath.csv', 'r+') as csv: top, rest = csv.read().split('\n', 1) # Only necessary to pop off first line if rest.count('\n') > 1: # If 4 or more lines, there will be at # least two more newline characters csv.seek(0) # Once we're done reading, we need to # go back to beginning of file csv.truncate() # truncate to reduce size of file as well csv.write(rest)
Data Formatting within a txt. File
I have the following txt file that needs to be formatted with specific start and end positions for data throughout the file. For instance, column 1 is blank and will be read as an entry number. The values for this data type is a numeric 9 and should have the following positions (1-9). Next is employee ID with positions (10-15).. and so on. Values do not need a delimiter. ,MB4858,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225D,,,DF2016,CA4310,,0172CA,,,,,Y, ,MD6535,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225D,,,DF2016,CA4310,,0172CA,,,,,Y, ,PM7858,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225D,,,DF2016,CA4310,,0172CA,,,,,Y, ,RM0111,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225D,,,DF2016,CA4310,,0172CA,,,,,Y, ,RY2585,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225D,,,DF2016,CA4310,,0172CA,,,,,Y, ,TM0617 ,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225D,,,DF2016,CA4310,,0172CA,,,,,Y, ,VE2495,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225D,,,DF2016,CA4310,,0172CA,,,,,Y, ,VJ8913,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225D,,,DF2016,CA4310,,0172CA,,,,,Y, ,FJ4815 ,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225D,,,DF2016,CA4310,,0172CA,,,,,Y, ,OM0188,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225D,,,DF2016,CA4310,,0172CA,,,,,Y, ,H00858,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H08392,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H15624,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H27573,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H40249,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H44581,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H48473,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H51570,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H55768,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H64315,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H71507,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H72248,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H78527,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H90393,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y, ,H95973,01,1,CA,07/18/20,0,0,4.8,,,,,,14.77,,Y,2225DH,,,DF2016,CA4311,,0172CA,,,,,Y,
You can try starting here: import sys inFile = sys.argv[1] outFile = "newFile.txt" with open(inFile, 'r') as inf, open(outFile, 'w') as outf: for line in inf: line = line.split(',') print(line) Where sys argv[1] is the name of your txt file when you run the python script from the command line. You can see that it will print out a list containing individual strings between the comma delimiters you have in your txt data file. From there you can do list manipulations to format the data. And then write it to outf like so (example): # do what ever manipulations here to the output line output_line = line[0] + " " + line[1] outf.write(output_line) outf.write('\n'
Split String in Text File to Multiple Rows in Python
I have a string within a text file that reads as one row, but I need to split the string into multiple rows based on a separator. If possible, I would like to separate the elements in the string based on the period (.) separating the different line elements listed here: "Line 1: Element '{URL1}Decimal': 'x' is not a valid value of the atomic type 'xs:decimal'.Line 2: Element '{URL2}pos': 'y' is not a valid value of the atomic type 'xs:double'.Line 3: Element '{URL3}pos': 'y z' is not a valid value of the list type '{list1}doubleList'" Here is my current script that is able to read the .txt file and convert it to a csv, but does not separate each entry into it's own row. import glob import csv import os path = "C:\\Users\\mdl518\\Desktop\\txt_strip\\" with open(os.path.join(path,"test.txt"), 'r') as infile, open(os.path.join(path,"test.csv"), 'w') as outfile: stripped = (line.strip() for line in infile) lines = (line.split(",") for line in stripped if line) writer = csv.writer(outfile) writer.writerows(lines) If possible, I would like to be able to just write to a .txt with multiple rows but a .csv would also work - Any help is most appreciated!
One way to make it work: import glob import csv import os path = "C:\\Users\\mdl518\\Desktop\\txt_strip\\" with open(os.path.join(path,"test.txt"), 'r') as infile, open(os.path.join(path,"test.csv"), 'w') as outfile: stripped = (line.strip() for line in infile) lines = ([sent] for para in (line.split(".") for line in stripped if line) for sent in para) writer = csv.writer(outfile) writer.writerows(lines) Explanation below: The output is one line because code in the last line reads a 2d array and there is only one instance in that 2d array which is the entire paragraph. To visualise it, "lines" is stored as [[s1,s2,s3]] where writer.writerows() takes rows input as [[s1],[s2],[s3]] There can be two improvements. (1) Take period '.' as seperator. line.split(".") (2) Iterate over the split list in the list comprehension. lines = ([sent] for para in (line.split(".") for line in stripped if line) for sent in para) str.split() splits a string by separator and store instances in a list. In your case, it tried to store the list in a list comprehension which made it a 2d array. It saves your paragraph into [[s1,s2,s3]]
How do I split each line into two strings and print without the comma?
I'm trying to have output to be without commas, and separate each line into two strings and print them. My code so far yields: 173,70 134,63 122,61 140,68 201,75 222,78 183,71 144,69 But i'd like it to print it out without the comma and the values on each line separated as strings. if __name__ == '__main__': # Complete main section of code file_name = "data.txt" # Open the file for reading here my_file = open('data.txt') lines = my_file.read() with open('data.txt') as f: for line in f: lines.split() lines.replace(',', ' ') print(lines)
In your sample code, line contains the full content of the file as a str. my_file = open('data.txt') lines = my_file.read() You then later re-open the file to iterate the lines: with open('data.txt') as f: for line in f: lines.split() lines.replace(',', ' ') Note, however, str.split and str.replace do not modify the existing value, as strs in python are immutable. Also note you are operating on lines there, rather than the for-loop variable line. Instead, you'll need to assign the result of those functions into new values, or give them as arguments (E.g., to print). So you'll want to open the file, iterate over the lines and print the value with the "," replaced with a " ": with open("data.txt") as f: for line in f: print(line.replace(",", " ")) Or, since you are operating on the whole file anyway: with open("data.txt") as f: print(f.read().replace(",", " ")) Or, as your file appears to be CSV content, you may wish to use the csv module from the standard library instead: import csv with open("data.txt", newline="") as csvfile: for row in csv.reader(csvfile): print(*row)
with open('data.txt', 'r') as f: for line in f: for value in line.split(','): print(value)
while python can offer us several ways to open files this is the prefered one for working with files. becuase we are opening the file in lazy mode (this is the prefered one espicialy for large files), and after exiting the with scope (identation block) the file io will be closed automaticly by the system. here we are openening the file in read mode. files folow the iterator polices, so we can iterrate over them like lists. each line is a true line in the file and is a string type. After getting the line, in line variable, we split (see str.split()) the line into 2 tokens, one before the comma and the other after the comma. split return new constructed list of strings. if you need to omit some unwanted characters you can use the str.strip() method. usualy strip and split combined together. elegant and efficient file reading - method 1 with open("data.txt", 'r') as io: for line in io: sl=io.split(',') # now sl is a list of strings. print("{} {}".format(sl[0],sl[1])) #now we use the format, for printing the results on the screen. non elegant, but efficient file reading - method 2 fp = open("data.txt", 'r') line = None while (line=fp.readline()) != '': #when line become empty string, EOF have been reached. the end of file! sl=line.split(',') print("{} {}".format(sl[0],sl[1]))
Python - rearrange and write strings (line splitting, unwanted newline)
I have a script that: Reads in each line of a file Finds the '*' character in each line and splits the line here Rearranges the 3 parts (first to last, and last to first) Writes the rearranged strings to a .txt file Problem is, it's finding some new line character or something, and isn't outputting how it should. Have tried stripping newline chars, but there must be something I'm missing. Thanks in advance for any help! the script: ## Import packages import time import csv ## Make output file file_output = open('output.txt', 'w') ## Open file and iterate over, rearranging the order of each string with open('input.csv', 'rb') as f: ## Jump to next line (skips file headers) next(f) ## Split each line, rearrange, and write the new line for line in f: ## Strip newline chars line = line.strip('\n') ## Split original string category, star, value = line.rpartition("*") ##Make new string new_string = value+star+category+'\n' ## Write new string to file file_output.write(new_string) file_output.close() ## Require input (stops program from immediately quitting) k = input(" press any key to exit") Input file (input.csv): Category*Hash Value 1*FB1124FF6D2D4CD8FECE39B2459ED9D5 1*FB1124FF6D2D4CD8FECE39B2459ED9D5 1*FB1124FF6D2D4CD8FECE39B2459ED9D5 1*34AC061CCCAD7B9D70E8EF286CA2F1EA Output file (output.txt) FB1124FF6D2D4CD8FECE39B2459ED9D5 *1 FB1124FF6D2D4CD8FECE39B2459ED9D5 *1 FB1124FF6D2D4CD8FECE39B2459ED9D5 *1 34AC061CCCAD7B9D70E8EF286CA2F1EA *1 EDIT: Answered. Thanks everyone! Looks all good now! :)
The file output.txt should exist. The following work with python2 on debian: ## Import packages import time import csv ## Make output file file_output = open('output.txt', 'w') ## Open file and iterate over, rearranging the order of each string with open('input.csv', 'rb') as f: ## Jump to next line (skips file headers) next(f) ## Split each line, rearrange, and write the new line for line in f: ## Split original string category, star, value = line.rpartition("*") ##Make new string new_string = value.strip()+star+category+'\n' ## Write new string to file file_output.write(new_string) file_output.close() ## Require input (stops program from immediately quitting) k = input(" press any key to exit") I strip() the value witch contain the \n in order to sanitize it. You used strip('\n') which could be ambiguous and just using the method without parameter do the job.
Use a DictWriter import csv with open('aster.csv') as f, open('out.txt', 'w') as fout: reader = csv.DictReader(f, delimiter='*') writer = csv.DictWriter(fout, delimiter='*', fieldnames=['Hash Value','Category']) #writer.writeheader() for line in reader: writer.writerow(line) Without csv library with open('aster.csv') as f: next(f) lines = [line.strip().split('*') for line in f] with open('out2.txt', 'w') as fout: for line in lines: fout.write('%s*%s\n' % (line[1], line[0]))