How to read a text file with special characters in python - python

I am trying to read txt file with special characters like:
الْحَمْدُ لِلَّهِ رَبِّ الْعَالَمِينَ
I'm using:
import fileinput
fileToSearch = "test_encoding.txt"
with open(fileToSearch, 'r', encoding='utf-8') as file:
counter = 0;
for line in file:
print(line)
But Python crashes with this message:
Traceback (most recent call last):
File "test.py", line 9, in <module>
print(line)
File "C:\Users\atheelm\AppData\Local\Programs\Python\Python35-
32\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 0-1:
character maps to <undefined>
I have Python 3.5.1 and I'm using Windows.
I'm running this command:
py test.py > out.txt

use 2 diff files and use io:
lines=["Init"]
with io.open(fileToSearch,'r',encoding='utf-8') as file:
counter = 1;
for line in file:
lines.insert(counter,str(line))
counter = counter+1
with io.open(out_file,'w',encoding='utf-8') as file:
for item in lines:
file.write("%s\n" % item)

Related

UnicodeEncodeError: 'charmap' codec can't encode characters in position 0-3: character maps to

I'm having error but my output will be save perfectly. How?
this is the output in terminal......
Traceback (most recent call last):
File "C:\Users\Ankit\Desktop\PYTHON Project\Project App Monitoring\test1.py", line 19, in <module>
file.write(item + "\n")
File "C:\Users\Ankit\AppData\Local\Programs\Python\Python310\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 0-3: character maps to <undefined>
Note that: my output file that i expect to save is properly fine.
this is the program......
import winreg
# Connecting to the registry key
access_registry = winreg.ConnectRegistry(None,winreg.HKEY_LOCAL_MACHINE)
access_key = winreg.OpenKey(access_registry,r"SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall")
# Enumerating all values under the key
output = []
for n in range(winreg.QueryInfoKey(access_key)[0-3]):
try:
value = winreg.EnumKey(access_key, n)
output.append(value)
except:
break
# Writing the output to a file
with open("output.txt", "w") as file:
for item in output:
file.write(item + "\n")
# Closing the connection
winreg.CloseKey(access_key)
winreg.CloseKey(access_registry)

reading .dat file in python (Agilent 4294A Precision Impedance Analyzer)

I've been trying to read a .dat file from an Agilent impedance analyzer. I keep getting the same error regardless of the method I try. Any ideas how to get around this issue?
Thanks in advance.
# import csv
# Method 1
# with open("RP.dat") as infile, open("outfile.csv", "w") as outfile:
# csv_writer = csv.writer(outfile)
# prev = ''
# csv_writer.writerow(['ID', 'PARENT_ID'])
# for line in infile.read().splitlines():
# csv_writer.writerow([line, prev])
# prev = line
# Method 2
# import numpy as np
# filename = 'RP.dat'
# indata = np.loadtxt(filename)
# print(indata)
# Method 3
with open("RP.dat") as infile:
file_contents = infile.readlines()
print(file_contents)
C:\Users\benjy\Workspace\urop>python read_dat.py
Traceback (most recent call last):
File "C:\Users\benjy\Workspace\urop\read_dat.py", line 17, in <module>
file_contents = infile.readlines()
File "C:\Users\benjy\AppData\Local\Programs\Python\Python39\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 672: character maps to <undefined>
You can use codecs library
import codecs
with codecs.open('RP.dat', errors='ignore', encoding='utf-8') as f:
dat = f.read()

json.loads() gives UnicodeEncodeError when parsing JSON object recived from node.js

I am trying to send some json object from my node.js server to a python script. However when trying to convert the json object to dictionary using json.loads, for many inputs there are UnicodeEncodeErrors. WHat do I need to do in order to correctly decode the js object.
Error: UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 2062: character maps to <undefined>
at PythonShell.parseError (D:\Users\Temp\Desktop\empman\node_modules\python-shell\index.js:183:17)
at terminateIfNeeded (D:\Users\Temp\Desktop\empman\node_modules\python-shell\index.js:98:28)
at ChildProcess.<anonymous> (D:\Users\Temp\Desktop\empman\node_modules\python-shell\index.js:88:9)
at emitTwo (events.js:106:13)
at ChildProcess.emit (events.js:191:7)
at Process.ChildProcess._handle.onexit (internal/child_process.js:219:12)
at Process.onexit (D:\Users\Temp\Desktop\empman\node_modules\async-listener\glue.js:188:31)
----- Python Traceback -----
File "word.py", line 38, in <module>
json_data=open('data.txt').read()
File "D:\Users\Temp\AppData\Local\Programs\Python\Python36-32\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
corresponding python code
from docx import Document
from docx.shared import Inches
import sys
import io
import json
document = Document('template.docx')
# newdocument = Document('resume.docx')
# print(sys.argv) # Note the first argument is always the script filename.
resumearray = [];
for x in range(0, 21):
resumearray.append(input())
#json_data=open('data.txt').read()
f = io.open('data','r', encoding='utf-16-le')
# #datastore = json.loads(f.read)
print(f.read())
# text = f.read()
# json_data = text
# document.add_paragraph('_______________________________________________________________________')
#document.add_paragraph(resumearray[1])
k=resumearray[1]
#document.add_paragraph(k)
jsobject = json.loads(k)
document.add_paragraph('_______________________________________________')
#document.add_paragraph(jsobject.values())
for x in range(0, 9):
if resumearray[x]=='[]':
document.add_paragraph('nothing was found')
else:
document.add_paragraph(resumearray[x])
You are running python on Windows, where the default encoding is cp1252. The json is encoded as utf-8, hence the error.
>>> with open('blob.json', encoding='cp1252') as f:
... j = json.load(f)
...
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "/usr/local/lib/python3.6/json/__init__.py", line 296, in load
return loads(fp.read(),
File "/usr/local/lib/python3.6/encodings/cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 2795: character maps to <undefined>
Use utf-8 instead:
>>> with open('blob.json', encoding='utf-8') as f:
... j = json.load(f)
...
>>> print(len(j))
29

Python doesn't create a new file

I'm using Python 3.4. I have a program in witch I have two files:
problems = open("out/problems.tex", 'w')
answers = open("out/answers.tex", 'w')
If I run this I get an error:
Traceback (most recent call last):
File "<encoding error>", line 10, in <module>
File "C:\Python33\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 247-251: character maps to <undefined>
If I run this:
problems = open("out/problems.tex", 'w', encoding='utf8')
answers = open("out/answers.tex", 'w', encoding='utf8')
I get no errors but I also get no files 'problems.tex' and 'answers.tex'. Does anybody know what am I doing wrong?
Here is my full program:
from polygen import *
head_file = open("tex/head.tex", 'r', encoding="utf8")
ground_file = open("tex/ground.tex", 'r', encoding="utf8")
problems = open("out/problems.tex", 'w', encoding="utf8")
answers = open("out/answers.tex", 'w', encoding="utf8")
head = head_file.read()
problems.write(head)
answers.write(head)
t = polymult(7, 1, 2, 2, 3, 4)
problems.write(t[0])
answers.write(t[1])
ground = ground_file.read()
problems.write(ground)
answers.write(ground)
problems.close()
answers.close()
So weird. 2 suggestions:
Use an absolute path, make sure the file generated.
Use "rb"/"wb" mode instead "r"/"w" if you have non ASCii characters in source file

Replace given line in files in Python

I have several files, and I need to replace third line in them:
files = ['file1.txt', 'file2.txt']
new_3rd_line = 'new third line'
What is the best way to do this?
Files are big enough, several 100mb's files.
I used this solution: Search and replace a line in a file in Python
from tempfile import mkstemp
from shutil import move
from os import remove, close
def replace_3_line(file):
new_3rd_line = 'new_3_line\n'
#Create temp file
fh, abs_path = mkstemp()
new_file = open(abs_path,'w')
old_file = open(file)
counter = 0
for line in old_file:
counter = counter + 1
if counter == 3:
new_file.write(new_3rd_line)
else:
new_file.write(line)
#close temp file
new_file.close()
close(fh)
old_file.close()
#Remove original file
remove(file)
#Move new file
move(abs_path, file)
replace_3_line('tmp.ann')
But it does not work with files that contains non English charecters.
Traceback (most recent call last):
File "D:\xxx\replace.py", line 27, in <module>
replace_3_line('tmp.ann')
File "D:\xxx\replace.py", line 12, in replace_3_line
for line in old_file:
File "C:\Python31\lib\encodings\cp1251.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x98 in position 32: character maps to <undefined>
That is bad. Where's python unicode? (file is utf8, python3).
File is:
фвыафыв
sdadf
试试
阿斯达а
阿斯顿飞

Categories

Resources