Reading binary file (.chn) in Python - python

In python, how do I read a binary file (here I need to read a .chn file) and show the result in binary format?

Assuming that values are separated by a space:
with open('myfile.chn', 'rb') as f:
data = []
for line in f: # a file supports direct iteration
data.extend(hex(int(x, 2)) for x in line.split())
In Python is better to use open() over file(), documentation says it explicitly:
When opening a file, it’s preferable to use open() instead of invoking
the file constructor directly.
rb mode will open the file in binary mode.
Reference:
http://docs.python.org/library/functions.html#open

try this:
with open('myfile.chn') as f:
data=f.read()
data=[bin(ord(x)).strip('0b') for x in data]
print ''.join(data)
and if you want only the binary data it will be in the list.
with open('myfile.chn') as f:
data=f.read()
data=[bin(ord(x)).strip('0b') for x in data]
print data
In data now you will have the list of binary numbers. you can take this and convert to hexadecimal number

with file('myfile.chn') as f:
data = f.read() # read all strings at once and return as a list of strings
data = [hex(int(x, 2)) for x in data] # convert to a list of hex strings (by interim getting the decimal value)

Related

Read and Save list of numbers as numbers in txt files in python

I have lists of float numbers in python.
I would like to save some lists in txt files and I read them list by list as they are. But after I wrote the following code I discovered that str(i)convert every digit in one str. I can not read them as well as they are written.
Ps I have 10,000 list of result so I would like to save every list in one line .
result = [-0.33434,0.4434, 4343....]
with open("out.txt", w) as out:
for i in result:
out.write(str(i)+' ')
out.write("\n")
Updated Answer
For multiple lists, put them all inside a single list and write that to the pkl/txt file.
import pickle
import numpy as np
results = [
[1.456,2.245,-3.441],
[4.53, 4.55, 1.22]
]
np.savetxt("results.txt", results) # write
#read
result = np.loadtxt("results.txt")
print(result.tolist())
If you want to have names associated to your lists, consider using a dict. But then, you can only use pickle.
import pickle
results = {
"result1": [1.456,2.245,-3.441],
"result2": [4.53, 4.55, 1.22],
}
# write
with open("results.pkl", "wb") as resultFile:
pickle.dump(results, resultFile)
# read
with open("results.pkl", "rb") as resultFile:
result = pickle.load(resultFile)
print(result)
Original Answer
Use pickle or numpy for this, as that is more well suited for this task.
Using pickle:
import pickle
result1 = [1.456,2.245,-3.441]
# write
with open("result1.pkl", "wb") as resultFile:
pickle.dump(x, resultFile)
# read
with open("result1.pkl", "rb") as resultFile:
result = pickle.load(resultFile)
print(result)
Using numpy:
import numpy as np
result1 = [1.456,2.245,-3.441]
np.savetxt("result1.txt", result1) # write
#read
result = np.loadtxt("result1.txt")
print(result.tolist())
If you don't want to use a library to do this, you can simply join each list by a known separator and then write the resulting string to the file.
Here, I assume you have a list-of-lists called allResults, which is of the form
allResults = [
[-0.332,434,0.4434,4865],
[9.456,-0.540,-7.06540,5.05453],
# ... and so on
]
separator = ","
with open("out.txt", "w") as out_file:
for l in allResults:
out_string = separator.join(str(x) for x in l) + "\n"
out_file.write(out_string)
Now, out.txt contains:
-0.332,434,0.4434,4865
9.456,-0.54,-7.0654,5.05453
Then to read the file, you can read each line, split it by your separator, convert each element of the split string to a float, and put that new list in your list of lists:
all_lists = []
with open("out.txt", "r") as in_file:
for line in in_file:
new_list = [float(x) for x in line.split(separator)]
all_lists.append(new_list)
And now you have your list of lists back:
all_lists: [[-0.332, 434.0, 0.4434, 4865.0], [9.456, -0.54, -7.0654, 5.05453]]
if you want to read it again, read the line and split it on ' '(space). This should give you the string representation of the float numbers. afterwards you can use the float(num) method to convert it back to a number

How to write/pack a binary string to file in Python

I would like to do a simple operations, but I'm not able to manage it. I have a string of '0' and '1' derived by a coding algorithm. I would like to write it to file but I think that I'm doing wrongly.
My string is something like '11101010000......10000101010'
Actually I'm writing a binary file as:
print 'WRITE TO FILE '
with open('file.bin', 'wb') as f:
f.write(my_coded_string)
print 'READ FROM FILE'
with open('file.bin', 'rb') as f:
myArr = bytearray(f.read())
myArr = str(myArr)
If I look at the size of the file, I get something pretty big. So I guess that I'm using an entire byte to write each 1 and 0. Is that correct?
I have found some example which use the 'struct' function but I didn't manage to understand how it works.
Thanks!
Because input binary is string python writes each bit as a char.
You can write your bit streams with bitarray module from
like this:
from bitarray import bitarray
str = '110010111010001110'
a = bitarray(str)
with open('file.bin', 'wb') as f:
a.tofile(f)
b = bitarray()
with open('file.bin', 'rb') as f:
b.fromfile(f)
print b
Use this:
import re
text = "01010101010000111100100101"
bytes = [ chr(int(sbyte,2)) for sbyte in re.findall('.{8}?', text) ]
to obtain a list of bytes, that can be append to binary file, with
with open('output.bin','wb') as f:
f.write("".join(bytes))

Parse a string containing a large integer in Python

I am having trouble parsing a data set from a .txt file into an Excel file (.csv) in Python.
The source code looks like:
fin = open(filename,'r')
reader = csv.reader(fin)
for line in reader:
list = str(line).split()
print list3
print str(list3[1])
My data sample looks like:
10134.5 -123 9.9527
And Python screen output looks like this
["['10134.5", '-123', '9.9527,"']"
-131.7000
So I'm assuming list3[1] is a float or a number at this moment, which cause some overflow because 100,000 is large than it can hold...
Do you know how to let Python treat it as a string not a integer..
You do not need to split, or to cast to string... numbers inside the list are strings.
fin = open(filename,'r')
reader = csv.reader(fin)
for line in reader:
print(line)
output
['10134.5', '-123', '9.9527']

Python3 ASCII Hexadecimal to Binary String Conversion

I'm using Python 3.2.3 on Windows, and am trying to convert binary data within a C-style ASCII file into its binary equivalent for later parsing using the struct module. For example, my input file contains "0x000A 0x000B 0x000C 0x000D", and I'd like to convert it into "\x00\x0a\x00\x0b\x00\x0c\x00\x0d".
The problem I'm running into is that the string datatypes have changed in Python 3, and the built-in functions to convert from hexadecimal to binary, such as binascii.unhexlify(), no longer accept regular unicode strings, but only byte strings. This process of converting from unicode strings to byte strings and back is confusing me, so I'm wondering if there's an easier way to achieve this. Below is what I have so far:
with open(path, "r") as f:
l = []
data = f.read()
values = data.split(" ")
for v in values:
if (v.startswith("0x")):
l.append(binascii.unhexlify(bytes(v[2:], "utf-8").decode("utf-8")
string = ''.join(l)
3>> ''.join(chr(int(x, 16)) for x in "0x000A 0x000B 0x000C 0x000D".split()).encode('utf-16be')
b'\x00\n\x00\x0b\x00\x0c\x00\r'
As agf says, opening the image with mode 'r' will give you string data.
Since the only thing you are doing here is looking at binary data, you probably want to open with 'rb' mode and make your result of type bytes, not str.
Something like:
with open(path, "rb") as f:
l = []
data = f.read()
values = data.split(b" ")
for v in values:
if (v.startswith(b"0x")):
l.append(binascii.unhexlify(v[2:]))
result = b''.join(l)

Convert a text CSV binary file and get a random line from it in Python without reading it to memory

I have some CSV text files in the format:
1.3, 0, 1.0
20.0, 3.2, 0
30.5, 5.0, 5.2
The files are about 3.5Gb in size and I cannot read any of them in to memory in Pandas in a useful amount of time.
But I don't need to read the all file, because what I want to do, is to choose some random lines from the file and read the values there, and I know it's theoretically possible to do it if the file is formatted in a way that all the fields have the same size - for instance, float16 in a binary file.
Now, I think I can just convert it, using the NumPy method specified in the answer to question:
How to output list of floats to a binary file in Python
But, how do I go about picking a random line from it after the conversion is done?
In a normal text file, I could just do:
import random
offset = random.randrange(filesize)
f = open('really_big_file')
f.seek(offset) #go to random position
f.readline() # discard - bound to be partial line
random_line = f.readline() # bingo!
But I can't find a way for this to work in a binary file made from NumPy.
I'd use struct to convert to binary:
import struct
with open('input.txt') as fin, open('output.txt','wb') as fout:
for line in fin:
#You could also use `csv` if you're not lazy like me ...
out_line = struct.pack('3f',*(float(x) for x in line.split(',')))
fout.write(out_line)
This writes everything as standard 4-byte floats on most systems.
Now, to read the data again:
with open('output.txt','rb') as fin:
line_size = 12 #each line is 12 bytes long (3 floats, 4 bytes each)
offset = random.randrange(filesize//line_size) #pick n'th line randomly
f.seek(offset*line_size) #seek to position of n'th line
three_floats_bytes = f.read(line_size)
three_floats = struct.unpack('3f',three_floats_bytes)
If you're concerned about disk space and want to compress the data down using np.float16 (2 byte floats), you can do that too using the basic skeleton above, just substitute np.fromstring for struct.unpack and ndarray.tostring in place of struct.pack (with the appropriate data-type ndarray of course -- and line_size would drop to 6 ...).
You'd have to play around with offsets depending on storage size, but:
import csv
import struct
import random
count = 0
with open('input.csv') as fin, open('input.dat', 'wb') as fout:
csvin = csv.reader(fin)
for row in csvin:
for col in map(float, row):
fout.write(struct.pack('f', col))
count += 1
with open('input.dat', 'rb') as fin:
i = random.randrange(count)
fin.seek(i * 4)
print struct.unpack('f', fin.read(4))
So, using the example provided by the helpfull answers, I found a way to do it with NumPy if someone is interested:
# this converts the file from text CSV to bin
with zipfile.ZipFile("input.zip", 'r') as inputZipFile:
inputCSVFile = inputZipFile.open(inputZipFile.namelist()[0], 'r') # it's 1 file only zip
with open("output.bin", 'wb') as outFile:
outCSVFile = csv.writer(outFile, dialect='excel')
for line in inputCSVFile:
lineParsed = ast.literal_eval(line)
lineOut = numpy.array(lineParsed,'float16')
lineOut.tofile(outFile)
outFile.close()
inputCSVFile.close()
inputZipFile.close()
# this reads random lines from the binary file
with open("output.bin", 'wb') as file:
file.seek(0)
lineSize = 20 # float16 has 2 bytes and there are 10 values:
fileSize = os.path.getsize("output.bin")
offset = random.randrange(fileSize//lineSize)
file.seek(offset * lineSize)
random_line = file.read(lineSize)
randomArr = numpy.fromstring(random_line, dtype='float16')

Categories

Resources