Split .csv while keeping description first row

Split .csv while keeping description first row - python

I have this code:
#!/usr/bin/env python
# Import module
import os
# Define file_splitter function
def file_splitter(fullfilepath, lines=50):
"""Splits a plain text file based on line count."""
path, filename = os.path.split(fullfilepath)
basename, ext = os.path.splitext(filename)
# Open source text file
with open(fullfilepath, 'r') as f_in:
try:
# Open first output file
f_output = os.path.join(path, '{}_{}{}'.format(basename, 0, ext))
f_out = open(f_output, 'w')
# Read input file one line at a time
for i, line in enumerate(f_in):
# When current line can be divided by the line
# count close the output file and open the next one
if i % lines == 0:
f_out.close()
f_output = os.path.join(path, '{}_{}{}'.format(basename, i, ext))
f_out = open(f_output, 'w')
# Write current line to output file
f_out.write(line)
finally:
# Close last output file
f_out.close()
# Call function with source text file and line count
file_splitter('Products_con_almacen_stock.csv', 12000)
This splits a file Products_con_almacen_stock.csv into chunks of 120.000 lines.
Now, every chunk has the columns and rows, but no header, only the first chunk has it, I'd like to preserve the first descriptive row on every chunk.
Is this possible?
Thanks in advance!

You could do something like this:
first = True
for line in lines:
if first:
header = line
first = False
....
You can then use header in all subsequent files.

Related

Is there any idea of deleting lines in python?

So,I have this problem,the code below will delete the 3rd line in a text file.
with open("sample.txt","r") as f:
lines = f.readlines()
del lines[2]
with open("sample.txt", "w+") as f2:
for line in lines:
f2.write(line)
How to delete all lines from a text file?

Why use loop if you want to have an empty file anyways?
f = open("sample.txt", "r+")
f.seek(0)
f.truncate()
This will empty the content without deleting the file!

I think you to need something like this
import os
def delete_line(original_file, line_number):
""" Delete a line from a file at the given line number """
is_skipped = False
current_index = 1
dummy_file = original_file + '.bak'
# Open original file in read only mode and dummy file in write mode
with open(original_file, 'r') as read_obj, open(dummy_file, 'w') as write_obj:
# Line by line copy data from original file to dummy file
for line in read_obj:
# If current line number matches the given line number then skip copying
if current_index != line_number:
write_obj.write(line)
else:
is_skipped = True
current_index += 1
# If any line is skipped then rename dummy file as original file
if is_skipped:
os.remove(original_file)
os.rename(dummy_file, original_file)
else:
os.remove(dummy_file)

Python; trying to add these files line by line with a tab inbetween

I'm reading in text files from the command line and I'm trying to produce output as follows...
Desired output given these command line arguments
Essentially, I want to read in files from the command line; take the first line from each file & print them on one line separated by a tab. Take the second line from each file & print them on the next line separated by a tab & so on.
This is the best code I've come up with (I'm a beginner and I've tried looking at other responses for far too long; glob & os hasn't been helping me understand how to do this; I'd just like to use basic loops and opening of files to do this):
import sys
l = []
list_files = sys.argv[:1]
for fname in list_files:
open(fname) as infile:
for line in infile:
line = line.strip()
if line == '':
l.append("''")
else:
l.append(line)
print(l) # List of all appended animals. Not in the right order
#(takes all names from one file, then all the names from the
#next instead of taking one line from every file on each iteration)

This is a minimally changed version that should work.
import sys
from itertools import zip_longest
files = []
list_files = sys.argv[:1]
for fname in list_files:
with open(fname) as infile: # Don't forget the `with`!
l = []
for line in infile:
line = line.strip()
if line == '':
l.append("''")
else:
l.append(line)
files.append(l) # list of lists
for lines in zip_longest(*files, fillvalue=''): # transpose list of lists
print(*lines, sep='\t') # separate with tabs.

The best way to open files in python is with with. More information can be found at https://www.pythonforbeginners.com/files/with-statement-in-python. Anyways:
import sys
if len(sys.argv) != 3:
sys.exit(1)
filename1 = sys.argv[1]
filename2 = sys.argv[2]
with open(filename1, 'r') as file1, open(filename2, 'r') as file2:
for line1, line2 in zip(file1, file2):
print(line1.strip(), line2.strip(), sep='\t')
This can be changed to allow for more than two files:
import sys
if len(sys.argv) != 3:
sys.exit(1)
filenames = sys.argv[1:]
all_lines = []
for filename in filenames:
with open(filename, 'r') as file:
all_lines.append([l.strip() for l in file.readlines()])
for line in zip(*all_lines):
print(*line, sep='\t')

How do I search a file for a string and replace it with multiple lines in Python?

I am running Python 3.5.1
I have a text file that I'm trying to search through and replace or overwrite text if it matches a predefined variable. Below is a simple example:
test2.txt
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
Outdated line of information that has no comment above - message_label
The last line in this example needs to be overwritten so the new file looks like below:
test2.txt after script
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
# This is an important line that needs to be copied
Very Important Line of information that the above line is a comment for - message_label
The function I have written idealAppend does not work as intended and subsequent executions create a bit of a mess. My workaround has been to separate the two lines into single line variables but this doesn't scale well. I want to use this function throughout my script with the ability to handle any number of lines. (if that makes sense)
Script
#!/usr/bin/env python3
import sys, fileinput, os
def main():
file = 'test2.txt'
fullData = r'''
# This is an important line that needs to be copied
Very Important Line of information that the above line is a comment for - message_label
'''
idealAppend(file, fullData)
def idealAppend(filename, data):
label = data.split()[-1] # Grab last word of the Append String
for line in fileinput.input(filename, inplace=1, backup='.bak'):
if line.strip().endswith(label) and line != data: # If a line 2 exists that matches the last word (label)
line = data # Overwrite with new line, comment, new line, and append data.
sys.stdout.write(line) # Write changes to current line
with open(filename, 'r+') as file: # Open File with rw permissions
line_found = any(data in line for line in file) # Search if Append exists in file
if not line_found: # If data does NOT exist
file.seek(0, os.SEEK_END) # Goes to last line of the file
file.write(data) # Write data to the end of the file
if __name__ == "__main__": main()
Workaround Script
This seems to work perfectly as long as I only need to write exactly two lines. I'd love this to be more dynamic when it comes to number of lines so I can reuse the function easily.
#!/usr/bin/env python3
import sys, fileinput, os
def main():
file = 'test2.txt'
comment = r'# This is an important line that needs to be copied'
append = r'Very Important Line of information that the above line is a comment for - message_label'
appendFile(file, comment, append)
def appendFile(filename, comment, append):
label = append.split()[-1] # Grab last word of the Append String
for line in fileinput.input(filename, inplace=1, backup='.bak'):
if line.strip().endswith(label) and line != append: # If a line 2 exists that matches the last word (label)
line = '\n' + comment + '\n' + append # Overwrite with new line, comment, new line, and append data.
sys.stdout.write(line) # Write changes to current line
with open(filename, 'r+') as file: # Open File with rw permissions
line_found = any(append in line for line in file) # Search if Append exists in file
if not line_found: # If data does NOT exist
file.seek(0, os.SEEK_END) # Goes to last line of the file
file.write('\n' + comment + '\n' + append) # Write data to the end of the file
if __name__ == "__main__": main()
I am very new to Python so I'm hoping there is a simple solution that I overlooked. I thought it might make sense to try and split the fullData variable at the new line characters into a list or tuple, filter the label from the last item in the list, then output all entries but this is starting to move beyond what I've learned so far.

If I understand your issue correctly, you can just open the input and output files, then check whether the line contains old information and ends with the label and write the appropriate content accordingly.
with open('in.txt') as f, open('out.txt', 'r') as output:
for line in f:
if line.endswith(label) and not line.startswith(new_info):
output.write(replacement_text)
else:
output.write(line)
If you want to update the original file instead of creating a second one, it's easiest to just delete the original and rename the new one instead of trying to modify it in place.

Is this what you are looking for ? It's looking for a label and then replaces the whole line with whatever you want.
test2.txt
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
Here is to be replaced - to_replace
script.py
#!/usr/bin/env python3
def main():
file = 'test2.txt'
label_to_modify = "to_replace"
replace_with = "# Blabla\nMultiline\nHello"
"""
# Raw string stored in a file
file_replace_with = 'replace_with.txt'
with open(file_replace_with, 'r') as f:
replace_with = f.read()
"""
appendFile(file, label_to_modify, replace_with)
def appendFile(filename, label_to_modify, replace_with):
new_file = []
with open(filename, 'r') as f:
for line in f:
if len(line.split()) > 0 and line.split()[-1] == label_to_modify:
new_file.append(replace_with)
else:
new_file.append(line)
with open(filename + ".bak", 'w') as f:
f.write(''.join(new_file))
if __name__ == "__main__": main()
test2.txt.bak
A Bunch of Nonsense Stuff
############################
# More Stuff Goes HERE #
############################
More stuff here
# Blabla
Multiline
Hello

Reading over both answers I've come up with the following as the best solution i can get to work. It seems to do everything I need. Thanks Everyone.
#!/usr/bin/env python3
def main():
testConfFile = 'test2.txt' # /etc/apache2/apache2.conf
testConfLabel = 'timed_combined'
testConfData = r'''###This is an important line that needs to be copied - ##-#-####
Very Important Line of information that the above line is a \"r\" comment for - message_label'''
testFormatAppend(testConfFile, testConfData, testConfLabel) # Add new test format
def testFormatAppend(filename, data, label):
dataSplit = data.splitlines()
fileDataStr = ''
with open(filename, 'r') as file:
fileData = stringToDictByLine(file)
for key, val in fileData.items():
for row in dataSplit:
if val.strip().endswith(row.strip().split()[-1]):
fileData[key] = ''
fileLen = len(fileData)
if fileData[fileLen] == '':
fileLen += 1
fileData[fileLen] = data
else:
fileLen += 1
fileData[fileLen] = '\n' + data
for key, val in fileData.items():
fileDataStr += val
with open(filename, 'w') as file:
file.writelines(str(fileDataStr))
def stringToDictByLine(data):
fileData = {}
i = 1
for line in data:
fileData[i] = line
i += 1
return fileData
if __name__ == "__main__": main()

Changing format of output text file

I have a text file like this:-
V1xx AB1
V2xx AC34
V3xx AB1
Can we add ; at each end of line through python script?
V1xx AB1;
V2xx AC34;
V3xx AB1;

Here's what you can try. I have overwritten the same file though.
You can try creating a new one(I leave it to you) - You'll need to modify your with statement a little : -
lines = ""
with open('D:\File.txt') as file:
for line in file:
lines += line.strip() + ";\n"
file = open('D:\File.txt', "w+")
file.writelines(lines)
file.flush()
UPDATE: - For in-place modification of file, you can use fileinput module: -
import fileinput
for line in fileinput.input('D:\File.txt', inplace = True):
print line.strip() + ";"

input_file_name = 'input.txt'
output_file_name = 'output.txt'
with open(input_file_name, 'rt') as input, open(output_file_name, 'wt') as output:
for line in input:
output.write(line[:-1]+';\n')

#Open the original file, and create a blank file in write mode
File = open("D:\myfilepath\myfile.txt")
FileCopy = open("D:\myfilepath\myfile_Copy.txt","w")
#For each line in the file, remove the end line character,
#insert a semicolon, and then add a new end line character.
#copy these lines into the blank file
for line in File:
CleanLine=line.strip("\n")
FileCopy.write(CleanLine+";\n")
FileCopy.close()
File.close()
#Replace the original file with the copied file
File = open("D:\myfilepath\myfile.txt","w")
FileCopy = open("D:\myfilepath\myfile_Copy.txt")
for line in FileCopy:
File.write(line)
FileCopy.close()
File.close()
Notes: I have left the "copy file" in there as a back up. You can manually delete it or use os.remove() (if you do that don't forget to import the os module)

how to replace (update) text in a file line by line

I am trying to replace text in a text file by reading each line, testing it, then writing if it needs to be updated. I DO NOT want to save as a new file, as my script already backs up the files first and operates on the backups.
Here is what I have so far... I get fpath from os.walk() and I guarantee that the pathmatch var returns correctly:
fpath = os.path.join(thisdir, filename)
with open(fpath, 'r+') as f:
for line in f.readlines():
if '<a href="' in line:
for test in filelist:
pathmatch = file_match(line, test)
if pathmatch is not None:
repstring = filelist[test] + pathmatch
print 'old line:', line
line = line.replace(test, repstring)
print 'new line:', line
f.write(line)
But what ends up happening is that I only get a few lines (updated correctly, mind you, but repeated from earlier in the file) corrected. I think this is a scoping issue, afaict.
*Also: I would like to know how to only replace the text upon the first instance of the match, for ex., I don't want to match the display text, only the underlying href.

First, you want to write the line whether it matches the pattern or not. Otherwise, you're writing out only the matched lines.
Second, between reading the lines and writing the results, you'll need to either truncate the file (can f.seek(0) then f.truncate()), or close the original and reopen. Picking the former, I'd end up with something like:
fpath = os.path.join(thisdir, filename)
with open(fpath, 'r+') as f:
lines = f.readlines()
f.seek(0)
f.truncate()
for line in lines:
if '<a href="' in line:
for test in filelist:
pathmatch = file_match(line, test)
if pathmatch is not None:
repstring = filelist[test] + pathmatch
line = line.replace(test, repstring)
f.write(line)

Open the file for read and copy all of the lines into memory. Close the file.
Apply your transformations on the lines in memory.
Open the file for write and write out all the lines of text in memory.
with open(filename, "r") as f:
lines = (line.rstrip() for line in f)
altered_lines = [some_func(line) if regex.match(line) else line for line in lines]
with open(filename, "w") as f:
f.write('\n'.join(altered_lines) + '\n')

A (relatively) safe way to replace a line in a file.
#!/usr/bin/python
# defensive programming style
# function to replace a line in a file
# and not destroy data in case of error
def replace_line(filepath, oldline, newline ):
"""
replace a line in a temporary file,
then copy it over into the
original file if everything goes well
"""
# quick parameter checks
assert os.exists(filepath) # !
assert ( oldline and str(oldline) ) # is not empty and is a string
assert ( newline and str(newline) )
replaced = False
written = False
try:
with open(filepath, 'r+') as f: # open for read/write -- alias to f
lines = f.readlines() # get all lines in file
if oldline not in lines:
pass # line not found in file, do nothing
else:
tmpfile = NamedTemporaryFile(delete=True) # temp file opened for writing
for line in lines: # process each line
if line == oldline: # find the line we want
tmpfile.write(newline) # replace it
replaced = True
else:
tmpfile.write(oldline) # write old line unchanged
if replaced: # overwrite the original file
f.seek(0) # beginning of file
f.truncate() # empties out original file
for tmplines in tmpfile:
f.write(tmplines) # writes each line to original file
written = True
tmpfile.close() # tmpfile auto deleted
f.close() # we opened it , we close it
except IOError, ioe: # if something bad happened.
printf ("ERROR" , ioe)
f.close()
return False
return replaced and written # replacement happened with no errors = True
(note: this replaces entire lines only , and all of the lines that match in the file)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Split .csv while keeping description first row - python

You could do something like this: first = True for line in lines: if first: header = line first = False .... You can then use header in all subsequent files.

Related

Is there any idea of deleting lines in python?

Python; trying to add these files line by line with a tab inbetween

How do I search a file for a string and replace it with multiple lines in Python?

Changing format of output text file

how to replace (update) text in a file line by line

Categories

Resources