Appending hexlifyed content to file

Appending hexlifyed content to file - python

file_1 = ('test.png')
with open(file_1, 'rb') as b:
file_hex = b.read()
binascii.hexlify(file_hex)
file_1_size = len(file_hex)
print (file_1_size)
file_new = open("test.tp", "a")
file_new.write(binascii.hexlify(file_hex))
file_new.close()
I've been trying to get this hexlifyed content appended to the file. I've even tried to apply the hexlifyed content to a variable of its own. like this,
file_1 = ('test.png')
with open(file_1, 'rb') as b:
file_hex = b.read()
x = binascii.hexlify(file_hex)
file_1_size = len(file_hex)
print (file_1_size)
file_new = open("test.tp", "a")
file_new.write(x)
file_new.close()
both end with error
TypeError: must be str, not bytes

Open your file in binary mode to append bytes:
with open("test.tp", "ab") as file_new:
file_new.write(x)
or decode your bytes to a string first:
with open("test.tp", "a") as file_new:
file_new.write(x.decode('ascii')
Hex digits fall within the ASCII code range, so decoding with that codec is safe.

Related

Encoding and decoding string in Python

I want to write a string to a file using Python. I know how to do that, so that's not a problem. I also wish to encode that string once it has been written. The encoding doesn't really matter, so I'll stick to let's say UTF-32. What I do for that is after I wrote the string, I read from the file again, encode the string into bytes and then re-write to the same file. I can do the encoding part, but my problem arises with the decoding. I want to read it as bytes so that I can convert it back to a str. What I do for this is the same principle: Read from file, decode and write to the same file. What I get from reading the encoded string looks like b'\xff\xfe\x00\x001\x00\x00\x004\x00\x00\x002\x00\x00\x00'
When I read this as bytes, it doubles the b and the backslashes. If I read it like this, as a string, and then try to decode, it keeps saying 'str' object does not have attribute decode or something. I know that I can't decode the string, but if I try with bytes it seems to be "doubling" the bytes.
Here is my code:
def readfile(filename):
f = open(filename, 'r')
s = f.read()
f.close()
return s
def readfile_b(filename):
f = open(filename, 'rb')
s = f.read()
f.close()
return s
def writefile(filename, writeobject):
f = open(filename, 'w')
f.write(writeobject)
f.close()
def encode(filename):
s = readfile(filename)
s_enc = bytes(s, 'utf-32')
writefile(filename, str(s_enc))
def decode(filename):
s_enc = readfile_b(filename)
print(s_enc)
s = str(s_enc, 'utf-32')
writefile(filename, s)
encode("Example.txt")
decode("Example.txt")
Output (for decode(), encode() didn't have any errors):
b"b'\\xff\\xfe\\x00\\x00H\\x00\\x00\\x00e\\x00\\x00\\x00l\\x00\\x00\\x00l\\x00\\x00\\x00o\\x00\\x00\\x00'"
Traceback (most recent call last):
File "C:/bla/bla/bla/bla/Example.py", line 29, in <module>
decode("MamaAccount.txt")
File "C:/bla/bla/bla/bla/Example.py", line 26, in decode
s = str(s_enc, 'utf-32')
UnicodeDecodeError: 'utf-32-le' codec can't decode bytes in position 0-3: code point not in range(0x110000)
Any help is greatly appreciated

Try using writefile with binary writing. Currently you are writing the bytes casted to a string. When you read that back you'll get a b or 2.
This works for me:
def readfile(filename):
f = open(filename, 'r')
s = f.read()
f.close()
return s
def readfile_b(filename):
f = open(filename, 'rb')
s = f.read()
f.close()
return s
def writefile(filename, writeobject):
f = open(filename, 'w')
f.write(writeobject)
f.close()
def writefile_b(filename, writeobject):
f = open(filename, 'wb')
f.write(writeobject)
f.close()
def encode(filename):
s = readfile(filename)
s_enc = bytes(s, 'utf-32')
writefile_b("bin_"+filename, s_enc)
def decode(filename):
s_enc = readfile_b(filename)
#print(s_enc)
s = str(s_enc, 'utf-32')
print(s)
writefile("dec_"+filename, s)
encode("Example.txt")
decode("bin_Example.txt")

Python 2.7 CSV file read/write \xef\xbb\xbf code

I have a question about Python 2.7 read/write csv file with 'utf-8-sig' code, my csv . header is
['\xef\xbb\xbfID;timestamp;CustomerID;Email']
there have some code("\xef\xbb\xbfID") I read from file A.csv and I want write the same code and header to file B.csv
My print log is shows:
['\xef\xbb\xbfID;timestamp;CustomerID;Email']
But the actual output file header it looks like
ÔªøID;timestamp
Here is the code:
def remove_gdpr_info_from_csv(file_path, file_name, temp_folder, original_header):
new_temp_folder = tempfile.mkdtemp()
new_temp_file = new_temp_folder + "/" + file_name
# Blanked new file
with open(new_temp_file, 'wb') as outfile:
writer = csv.writer(outfile, delimiter=";")
print original_header
writer.writerow(original_header)
# File from SFTP
with open(file_path, 'r') as infile:
reader = csv.reader(infile, delimiter=";")
first_row = next(reader)
email = first_row.index('Email')
contract_detractor1 = first_row.index('Contact Detractor (Q21)')
contract_detractor2 = first_row.index('Contact Detractor (Q20)')
contract_detractor3 = first_row.index('Contact Detractor (Q43)')
contract_detractor4 = first_row.index('Contact Detractor(Q26)')
contract_detractor5 = first_row.index('Contact Detractor(Q27)')
contract_detractor6 = first_row.index('Contact Detractor(Q44)')
indexes = []
for column_name in header_list:
ind = first_row.index(column_name)
indexes.append(ind)
for row in reader:
output_row = []
for ind in indexes:
data = row[ind]
if ind == email:
data = ''
elif ind == contract_detractor1:
data = ''
elif ind == contract_detractor2:
data = ''
elif ind == contract_detractor3:
data = ''
elif ind == contract_detractor4:
data = ''
elif ind == contract_detractor5:
data = ''
elif ind == contract_detractor6:
data = ''
output_row.append(data)
writer.writerow(output_row)
s3core.upload_files(SPARKY_S3, DESTINATION_PATH, new_temp_file)
shutil.rmtree(temp_folder)
shutil.rmtree(new_temp_folder)

'\xef\xbb\xbf' is the UTF8 encoded version of the unicode ZERO WIDTH NO-BREAK SPACE U+FEFF. It is often used as a Byte Order Mark at the beginning of unicode text files:
when you have 3 bytes: '\xef\xbb\xbf', then the file is utf8 encoded
when you have 2 bytes: '\xff\xfe', then the file is in utf16 little endian
when you have 2 bytes: '\xfe\xff', then the file is in utf16 big endian
The 'utf-8-sig' encoding explicitely asks for writing this BOM at the beginning of the file
To process it automatically at read time of a csv file in Python 2, you can use the codecs module:
with open(file_path, 'r') as infile:
reader = csv.reader(codecs.EncodedFile(infile, 'utf-8', 'utf-8-sig'), delimiter=";")
EncodedFile will wrap the original file object by decoding it in utf8-sig, actually skipping the BOM and re-encoding it in utf8 with no BOM.

You want to use the EncodedFile method from the codecs library as in Serge Ballesta's answer.
However using Python 2.7 the encoding utf-8-sig is not a supported alias for the UTF8-sig encoding, you need to use utf_8_sig. Additionally the order of the method properties needs to define the output data encoding first, and the file encoding second: codecs.EncodedFile(file,datacodec,filecodec=None,errors=’strict')
Here's the full result:
import codecs
with open(file_path, 'r') as infile:
reader = csv.reader(codecs.EncodedFile(infile, 'utf8', 'utf_8_sig'), delimiter=";")

Getting unicode decode error in python?

I am using facebook graph API but getting error when I try to run graph.py
How should I resolve this problem of charmap. I am facing unicode decode error.
enter image description here
In graph.py :
table = json2html.convert(json = variable)
htmlfile=table.encode('utf-8')
f = open('Table.html','wb')
f.write(htmlfile)
f.close()
# replacing '&gt' with '>' and '&lt' with '<'
f = open('Table.html','r')
s=f.read()
s=s.replace(">",">")
s=s.replace("<","<")
f.close()
# writting content to html file
f = open('Table.html','w')
f.write(s)
f.close()
# output
webbrowser.open("Table.html")
else:
print("We couldn't find anything for",PageName)
I could not understand why I am facing this issue. Also getting some error with 's=f.read()'

In error message I see it tries to guess encoding used in file when you read it and finally it uses encoding cp1250 to read it (probably because Windows use cp1250 as default in system) but it is incorrect encoding becuse you saved it as 'utf-8'.
So you have to use open( ..., encoding='utf-8') and it will not have to guess encoding.
# replacing '&gt' with '>' and '&lt' with '<'
f = open('Table.html','r', encoding='utf-8')
s = f.read()
f.close()
s = s.replace(">",">")
s = s.replace("<","<")
# writting content to html file
f = open('Table.html','w', encoding='utf-8')
f.write(s)
f.close()
But you could change it before you save it. And then you don't have to open it again.
table = json2html.convert(json=variable)
table = table.replace(">",">").replace("<","<")
f = open('Table.html', 'w', encoding='utf-8')
f.write(table)
f.close()
# output
webbrowser.open("Table.html")
BTW: python has function html.unescape(text) to replace all "chars" like > (so called entity)
import html
table = json2html.convert(json=variable)
table = html.unescape(table)
f = open('Table.html', 'w', encoding='utf-8')
f.write(table)
f.close()
# output
webbrowser.open("Table.html")

Append JSON to file

I am trying to append values to a json file. How can i append the data? I have been trying so many ways but none are working ?
Code:
def all(title,author,body,type):
title = "hello"
author = "njas"
body = "vgbhn"
data = {
"id" : id,
"author": author,
"body" : body,
"title" : title,
"type" : type
}
data_json = json.dumps(data)
#data = ast.literal_eval(data)
#print data_json
if(os.path.isfile("offline_post.json")):
with open('offline_post.json','a') as f:
new = json.loads(f)
new.update(a_dict)
json.dump(new,f)
else:
open('offline_post.json', 'a')
with open('offline_post.json','a') as f:
new = json.loads(f)
new.update(a_dict)
json.dump(new,f)
How can I append data to json file when this function is called?

I suspect you left out that you're getting a TypeError in the blocks where you're trying to write the file. Here's where you're trying to write:
with open('offline_post.json','a') as f:
new = json.loads(f)
new.update(a_dict)
json.dump(new,f)
There's a couple of problems here. First, you're passing a file object to the json.loads command, which expects a string. You probably meant to use json.load.
Second, you're opening the file in append mode, which places the pointer at the end of the file. When you run the json.load, you're not going to get anything because it's reading at the end of the file. You would need to seek to 0 before loading (edit: this would fail anyway, as append mode is not readable).
Third, when you json.dump the new data to the file, it's going to append it to the file in addition to the old data. From the structure, it appears you want to replace the contents of the file (as the new data contains the old data already).
You probably want to use r+ mode, seeking back to the start of the file between the read and write, and truncateing at the end just in case the size of the data structure ever shrinks.
with open('offline_post.json', 'r+') as f:
new = json.load(f)
new.update(a_dict)
f.seek(0)
json.dump(new, f)
f.truncate()
Alternatively, you can open the file twice:
with open('offline_post.json', 'r') as f:
new = json.load(f)
new.update(a_dict)
with open('offline_post.json', 'w') as f:
json.dump(new, f)

This is a different approach, I just wanted to append without reloading all the data. Running on a raspberry pi so want to look after memory. The test code -
import os
json_file_exists = 0
filename = "/home/pi/scratch_pad/test.json"
# remove the last run json data
try:
os.remove(filename)
except OSError:
pass
count = 0
boiler = 90
tower = 78
while count<10:
if json_file_exists==0:
# create the json file
with open(filename, mode = 'w') as fw:
json_string = "[\n\t{'boiler':"+str(boiler)+",'tower':"+str(tower)+"}\n]"
fw.write(json_string)
json_file_exists=1
else:
# append to the json file
char = ""
boiler = boiler + .01
tower = tower + .02
while(char<>"}"):
with open(filename, mode = 'rb+') as f:
f.seek(-1,2)
size=f.tell()
char = f.read()
if char == "}":
break
f.truncate(size-1)
with open(filename, mode = 'a') as fw:
json_string = "\n\t,{'boiler':"+str(boiler)+",'tower':"+str(tower)+"}\n]"
fw.seek(-1, os.SEEK_END)
fw.write(json_string)
count = count + 1

Read xml as a txt in python

i have following code in python (which only load data from txt):
def main():
f = open("text.txt", "r" ) //load txt
a = [] // new array
for line in f:
a.append(line.strip()) //append line
main()
How can i do this with xml file? f = open("myxml.xml", "r" ) doesnt work. I get error : UnicodeDecodeError: 'charmap' codec can't decode byte 0x88 in position 4877: character maps to <undefined>

This has nothing to do with the xml file format, but in which encoding your file is. Python3 assumes everything to be in utf-8, but if you are on windows your file is probably in windows-1252. You should use:
f = open("text.txt", "r", encoding="cp1252")

this will sure do your job.
a=[]
with open('reboot.xml', 'r') as f:
a = f.read()
f.closed
print a

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Appending hexlifyed content to file - python

Open your file in binary mode to append bytes: with open("test.tp", "ab") as file_new: file_new.write(x) or decode your bytes to a string first: with open("test.tp", "a") as file_new: file_new.write(x.decode('ascii') Hex digits fall within the ASCII code range, so decoding with that codec is safe.

Related

Encoding and decoding string in Python

Python 2.7 CSV file read/write \xef\xbb\xbf code

Getting unicode decode error in python?

Append JSON to file

Read xml as a txt in python

Categories

Resources