Replace given line in files in Python - python

I have several files, and I need to replace third line in them:
files = ['file1.txt', 'file2.txt']
new_3rd_line = 'new third line'
What is the best way to do this?
Files are big enough, several 100mb's files.

I used this solution: Search and replace a line in a file in Python
from tempfile import mkstemp
from shutil import move
from os import remove, close
def replace_3_line(file):
new_3rd_line = 'new_3_line\n'
#Create temp file
fh, abs_path = mkstemp()
new_file = open(abs_path,'w')
old_file = open(file)
counter = 0
for line in old_file:
counter = counter + 1
if counter == 3:
new_file.write(new_3rd_line)
else:
new_file.write(line)
#close temp file
new_file.close()
close(fh)
old_file.close()
#Remove original file
remove(file)
#Move new file
move(abs_path, file)
replace_3_line('tmp.ann')
But it does not work with files that contains non English charecters.
Traceback (most recent call last):
File "D:\xxx\replace.py", line 27, in <module>
replace_3_line('tmp.ann')
File "D:\xxx\replace.py", line 12, in replace_3_line
for line in old_file:
File "C:\Python31\lib\encodings\cp1251.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x98 in position 32: character maps to <undefined>
That is bad. Where's python unicode? (file is utf8, python3).
File is:
фвыафыв
sdadf
试试
阿斯达а
阿斯顿飞

Related

read clm chunk from wav file using python wavfile

i am using the enhanced wavfile.py library, and i want to use it to read serum-style wavetables. i know that these files use a 'clm' block to store cue points, but i am having trouble with reading these using the library
right now i'm just trying to read the file (i'll do something with it later); here is my code:
import wavfile as wf
wf.read('wavetable.wav')
when i run the script, i get this error:
[my dir]/wavfile.py:223: WavFileWarning: Chunk b'clm ' skipped
warnings.warn("Chunk " + str(chunk_id) + " skipped", WavFileWarning)
[my dir]/wavfile.py:223: WavFileWarning: Chunk b'' skipped
warnings.warn("Chunk " + str(chunk_id) + " skipped", WavFileWarning)
Traceback (most recent call last):
File "[my dir]/./test.py", line 5, in <module>
wf.read('wavetable.wav')
File "[my dir]/wavfile.py", line 228, in read
_skip_unknown_chunk(fid)
File "[my dir]/wavfile.py", line 112, in _skip_unknown_chunk
size = struct.unpack('<i', data)[0]
struct.error: unpack requires a buffer of 4 bytes
is it even possible to do this using the library? if not, how could i modify the library to make this work?
bear with me, i'm new to working with files and python in general
UPDATE:
here's the output after i add madison courto's code:
Traceback (most recent call last):
File "[my dir]/./test.py", line 5, in <module>
wf.debug('wavetable.wav')
File "[my dir]/wavfile.py", line 419, in debug
format_str = format.decode("utf-8")
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 1: invalid start byte
and here is the wavetable i'm testing; hopefully sndup left it intact
Adding these conditions to the read function returns a dict of markers, it seems that one of the markers is currupt so I added except pass, it's a bit janky but works.
elif chunk_id == b'':
break
elif chunk_id == b'clm ':
str1 = fid.read(8)
size, numcue = struct.unpack('<ii', str1)
for c in range(numcue):
try:
str1 = fid.read(24)
idx, position, datachunkid, chunkstart, blockstart, sampleoffset = struct.unpack(
'<iiiiii', str1)
# _cue.append(position)
_markersdict[idx][
'position'] = position # needed to match labels and markers
except:
pass

reading .dat file in python (Agilent 4294A Precision Impedance Analyzer)

I've been trying to read a .dat file from an Agilent impedance analyzer. I keep getting the same error regardless of the method I try. Any ideas how to get around this issue?
Thanks in advance.
# import csv
# Method 1
# with open("RP.dat") as infile, open("outfile.csv", "w") as outfile:
# csv_writer = csv.writer(outfile)
# prev = ''
# csv_writer.writerow(['ID', 'PARENT_ID'])
# for line in infile.read().splitlines():
# csv_writer.writerow([line, prev])
# prev = line
# Method 2
# import numpy as np
# filename = 'RP.dat'
# indata = np.loadtxt(filename)
# print(indata)
# Method 3
with open("RP.dat") as infile:
file_contents = infile.readlines()
print(file_contents)
C:\Users\benjy\Workspace\urop>python read_dat.py
Traceback (most recent call last):
File "C:\Users\benjy\Workspace\urop\read_dat.py", line 17, in <module>
file_contents = infile.readlines()
File "C:\Users\benjy\AppData\Local\Programs\Python\Python39\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 672: character maps to <undefined>
You can use codecs library
import codecs
with codecs.open('RP.dat', errors='ignore', encoding='utf-8') as f:
dat = f.read()

Invalid start byte when reading multiple files in Python

My function reads multiple .sgm files. I get an error when reading the content from the file speficially at line contents = f.read()
def block_reader(path):
filePaths = []
for filename in os.listdir(path):
if filename.endswith(".sgm"):
filePaths.append(os.path.join(path, filename))
continue
else:
continue
for file in filePaths:
with open(file, 'r') as f:
print(f)
contents = f.read()
soup = BeautifulSoup(contents, "lxml")
return ["test content"]
Error message
Traceback (most recent call last):
File "./block-1-reader.py", line 32, in <module>
for reuters_file_content in solutions.block_reader(path):
File "/home/ragith/Documents/A-School/Fall-2020/COMP_479/Assignment_1/solutions.py", line 29, in block_reader
contents = f.read()
File "/usr/lib/python3.6/codecs.py", line 321, in decode
(result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xfc in position 1519554: invalid start byte
Try this: with open(path, 'rb') as f: That b in the mode specifier in the open() states that the file shall be treated as binary, so contents will remain a bytes. No decoding attempt will happen this way. More details at: this link

return codecs.ascii_decode(input, self.errors)[0]

I am reading a songs file in csv format and I do not know what I am doing wrong.
import csv
import os
import random
file = open("songs.csv", "rU")
reader = csv.reader(file)
for song in reader:
print(song[0], song[1], song[2])
file.close()
This is the error:
Traceback (most recent call last):
File "/Users/kuku/Desktop/hey/mine/test.py", line 10, in <module>
for song in reader:
File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/encodings/ascii.py", line 26, in decode
return codecs.ascii_decode(input, self.errors)[0]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 414: ordinal not in range(128)
try
for song in [unicode(song, 'utf-8') for song in reader]:
print(...)
With this bit of your code:
for song in reader:
print( song[0], song[1],song[2])
you are printing elements 0, 1 and 2 of the lines in reader during each iteration of the loop. This will cause a (different) error if there are fewer than 3 elements in total.
If you don't know that there will be at least 3 elements in each line, you could include the code in a try, except block:
with open("songs.csv", "r") as f:
song_reader = csv.reader(f)
for song_line in song_reader:
lyric = song_line
try:
print(lyric[0], lyric[1], lyric[2])
except:
pass # ...or preferably do something better
It's worth noting that in most cases it is preferable to open a file within a with block, as shown above. This negates the need for file.close().
You can open the file in utf-8 encoding.
file = open("songs.csv", "rU", encoding="utf-8")

How to read a text file with special characters in python

I am trying to read txt file with special characters like:
الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ
I'm using:
import fileinput
fileToSearch = "test_encoding.txt"
with open(fileToSearch, 'r', encoding='utf-8') as file:
counter = 0;
for line in file:
print(line)
But Python crashes with this message:
Traceback (most recent call last):
File "test.py", line 9, in <module>
print(line)
File "C:\Users\atheelm\AppData\Local\Programs\Python\Python35-
32\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 0-1:
character maps to <undefined>
I have Python 3.5.1 and I'm using Windows.
I'm running this command:
py test.py > out.txt
use 2 diff files and use io:
lines=["Init"]
with io.open(fileToSearch,'r',encoding='utf-8') as file:
counter = 1;
for line in file:
lines.insert(counter,str(line))
counter = counter+1
with io.open(out_file,'w',encoding='utf-8') as file:
for item in lines:
file.write("%s\n" % item)

Categories

Resources