Python Binary files. - python

Hi I am having an issue using unstack in python,
fileID= open('B1b1_t100000.beam','r');
npart = 1E6;
ncoord = 7;
coords = np.reshape(struct.unpack('d'*int(ncoord*npart),fileID.read()),(npart,ncoord));
fileID.close()
And I am getting the error
Traceback (most recent call last):
File "transfer_lev_B1.py", line 30, in <module>
coords = np.reshape(struct.unpack('d'*int(ncoord*npart),fileID.read()),(npart,ncoord));
struct.error: unpack requires a string argument of length 56000000
I cant really see where the problem is. The file byte size is 56000000. In a previous attempt with np=1E4 the code worked for a different file with the same format (less total lines). But i have the problem when i go to a larger file with more lines..

ok I solved my problem,
import struct
import numpy as np
import matplotlib.pyplot as plt
if __name__ == '__main__':
fileID= open('B1b1_t100000.beam','r');
npart = 1E6;
ncoord = 7;
coords=np.fromfile('B1b1_t100000.beam',dtype=np.float64);
coords=coords[:(npart*ncoord)];
coords=np.reshape(coords,(npart,ncoord));
fileID.close()
# Beam 1
b1_x=coords[:,0];
b1_y=coords[:,2];
b1_z=coords[:,4];
b1_px=coords[:,1];
b1_py=coords[:,3];
b1_deltap =coords[:,5];
beam1=np.array([b1_x,b1_px,b1_y,b1_py,b1_z,b1_deltap,coords[:,6]],np.float64);
beam1=beam1.T;
# Map applied and new coordinates calculated.
x_mod=np.sqrt(foc)*coords[:,0];
y_mod=np.sqrt(foc)*coords[:,2];
px_mod=np.sqrt(defoc)*coords[:,1];
py_mod=np.sqrt(defoc)*coords[:,3];
beam1_mod=np.array([x_mod,px_mod,y_mod,py_mod,b1_z,b1_deltap,coords[:,6]],np.float64);
beam1_mod=beam1_mod.T;
#---------------Check shape of matrix----------------
#print coords.shape
# print (beam1_mod).shape
# print beam1.shape
# print 'beam1= \n', beam1
# print 'modified \n', beam1_mod
#----------------------------------------------------
# New coordinates printed to binary file.
fileMod=open("B1b1_t100000_mod.beam","w");
beam1_mod.tofile(fileMod);
fileMod.close()

Related

struct.error: unpack requires a buffer of 2 bytes

Im trying to identify the musical note of a sound from a .wav file using python, but im getting the error above when using "struct"
I couldn't gather a lot of info from the documents for struct or other websites on how to resolve this issue.
I have seen errors like:
struct.error: unpack requires a buffer of 4 bytes
struct.error: unpack requires a buffer of 1024 bytes
but the error seems to be for a different reason.
import numpy as np
import math
import wave
import os
import struct
import matplotlib.pyplot as plt
def note_detect(audio_file):
#-------------------------------------------
#here we are just storing our sound file as a numpy array
#you can also use any other method to store the file as an np array
file_length=audio_file.getnframes()
f_s=audio_file.getframerate() #sampling frequency
sound = np.zeros(file_length) #blank array
for i in range(file_length) :
wdata=audio_file.readframes(1)
data=struct.unpack("<h",wdata)
sound[i] = int(data[0])
plt.plot(sound)
plt.show()
sound=np.divide(sound,float(2**15)) #scaling it to 0 - 1
counter = audio_file.getnchannels() #number of channels mono/sterio
#-------------------------------------------
plt.plot(sound)
plt.show()
#fourier transformation from numpy module
fourier = np.fft.fft(sound)
fourier = np.absolute(fourier)
imax=np.argmax(fourier[0:int(file_length/2)]) #index of max element
plt.plot(fourier)
plt.show()
#peak detection
i_begin = -1
threshold = 0.3 * fourier[imax]
for i in range (0,imax+100):
if fourier[i] >= threshold:
if(i_begin==-1):
i_begin = i
if(i_begin!=-1 and fourier[i]<threshold):
break
i_end = i
imax = np.argmax(fourier[0:i_end+100])
freq=(imax*f_s)/(file_length*counter) #formula to convert index into sound frequency
#frequency database
note=0
name = np.array(["C0","C#0","D0","D#0","E0","F0","F#0","G0","G#0","A0","A#0","B0","C1","C#1","D1","D#1","E1","F1","F#1","G1","G#1","A1","A#1","B1","C2","C#2","D2","D#2","E2","F2","F#2","G2","G2#","A2","A2#","B2","C3","C3#","D3","D3#","E3","F3","F3#","G3","G3#","A3","A3#","B3","C4","C4#","D4","D4#","E4","F4","F4#","G4","G4#","A4","A4#","B4","C5","C5#","D5","D5#","E5","F5","F5#","G5","G5#","A5","A5#","B5","C6","C6#","D6","D6#","E6","F6","F6#","G6","G6#","A6","A6#","B6","C7","C7#","D7","D7#","E7","F7","F7#","G7","G7#","A7","A7#","B7","C8","C8#","D8","D8#","E8","F8","F8#","G8","G8#","A8","A8#","B8","Beyond B8"])
frequencies = np.array([16.35,17.32,18.35,19.45,20.60,21.83,23.12,24.50,25.96 ,27.50 ,29.14 ,30.87 ,32.70 ,34.65 ,36.71 ,38.89 ,41.20 ,43.65 ,46.25 ,49.00 ,51.91 ,55.00 ,58.27 ,61.74 ,65.41 ,69.30 ,73.42 ,77.78 ,82.41 ,87.31 ,92.50 ,98.00 ,103.83 ,110.00 ,116.54 ,123.47 ,130.81 ,138.59 ,146.83 ,155.56 ,164.81 ,174.61 ,185.00 ,196.00 ,207.65 ,220.00 ,233.08 ,246.94 ,261.63 ,277.18 ,293.66 ,311.13 ,329.63 ,349.23 ,369.99 ,392.00 ,415.30 ,440.00 ,466.16 ,493.88 ,523.25 ,554.37 ,587.33 ,622.25 ,659.26 ,698.46 ,739.99 ,783.99 ,830.61 ,880.00 ,932.33 ,987.77 ,1046.50 ,1108.73 ,1174.66 ,1244.51 ,1318.51 ,1396.91 ,1479.98 ,1567.98 ,1661.22 ,1760.00 ,1864.66 ,1975.53 ,2093.00 ,2217.46 ,2349.32 ,2489.02 ,2637.02 ,2793.83 ,2959.96 ,3135.96 ,3322.44 ,3520.00 ,3729.31 ,3951.07 ,4186.01 ,4434.92 ,4698.64 ,4978.03 ,5274.04 ,5587.65 ,5919.91 ,6271.93 ,6644.88 ,7040.00 ,7458.62 ,7902.13,8000])
#searching for matched frequencies
for i in range(0,frequencies.size-1):
if(freq<frequencies[0]):
note=name[0]
break
if(freq>frequencies[-1]):
note=name[-1]
break
if freq>=frequencies[i] and frequencies[i+1]>=freq :
if freq-frequencies[i]<(frequencies[i+1]-frequencies[i])/2 :
note=name[i]
else :
note=name[i+1]
break
return note
if __name__ == "__main__":
path = os.getcwd()
file_name = path + "\\" + "recording0.wav"
audio_file = wave.open(file_name)
Detected_Note = note_detect(audio_file)
print("\n\tDetected Note = " + str(Detected_Note))
The full error on line 23:
Traceback (most recent call last):
File "C:\Users\m8\Desktop\programing_stuff\python-stuff\minecraft_flute_player - 12-08-2022\app.py", line 86, in <module>
Detected_Note = note_detect(audio_file)
File "C:\Users\m8\Desktop\programing_stuff\python-stuff\minecraft_flute_player - 12-08-2022\app.py", line 23, in note_detect
data=struct.unpack("<h",wdata)
struct.error: unpack requires a buffer of 2 bytes
Thanks for the help.
What I assume is happening here is the size of the frame isn't 2 bytes as you expected.
When stating <h you are stating that you are going to extract 2 bytes from each frame. See the stuct documentation for more on that.
You can use the getparams function to better understand the structure of the wav file.
>>> audio_file.getparams()
_wave_params(nchannels=1, sampwidth=2, framerate=44100, nframes=22050, comptype='NONE', compname='not compressed')
The parameters which are interesting are nchannels and sampwidth.
You can calculate sampwidth * nchannels to understand the amount of bytes you need to extract from the frame for this WAV file.
In this example, you have sampwidth * nchannels = 1 * 2 = 2 bytes per frame.
More information can be found in this answer which shows different cases of frame sizes.

Python-OpenCV floodfill function; strange type errors

I am trying to implement my own version of the MatLab function imhmin() in Python using OpenCV and (naturally) NumPy. If you are not familiar with this MatLab function, it's extremely useful for segmentation. MatLab's documentation can explain it much better than I can:
https://it.mathworks.com/help/images/ref/imhmin.html
Here is what I have so far:
(For the sake of keeping this short, I did not include the local_min function. It takes one image parameter and returns an image of the same size where local minima are 1s and everything else is 0.)
from volume import show
import cv2
import numpy
def main():
arr = numpy.array( [[5,5,5,5,5,5,5],
[5,0,3,1,4,2,5],
[5,5,5,5,5,5,5]] ) + 1
res = imhmin(arr, 3)
print(res)
def imhmin(src, h):
# TODO: speed up function by cropping image
edm = src.copy()
# d is the domain / all values contained in the array
d = numpy.unique(edm)
# for the index of each local minima (sorted gtl)
indices = numpy.nonzero(local_min(edm)) # get indices
indices = numpy.dstack((indices[0], indices[1]))[0].tolist() # zip
# sort based on the value of edm[] at that index
indices.sort(key = lambda _: edm[_[0],_[1]], reverse = True)
for (x,y) in indices:
start = edm[x,y] # remember original value of minima
# for each in a list of heights greater than the starting height
for i in range(*numpy.where(d==edm[x,y])[0], d.shape[0]-1):
# prevent exceeding target height
step = start + h if (d[i+1] - start > h) else d[i+1]
#-------------- WORKS UNTIL HERE --------------#
# complete floodFill syntax:
# cv2.floodFill(image, mask, seed, newVal[, loDiff[, upDiff[, flags]]]) → retval, rect
# fill UPWARD onto image (and onto mask?)
cv2.floodFill(edm, None, (y,x), step, 0, step-d[i], 4)
# fill DOWNWARD NOT onto image
# have you overflowed?
if __name__ == "__main__":
main()
Which works fine until it gets to the floodfill line. It barks this error back:
Traceback (most recent call last):
File "edm.py", line 94, in <module>
main()
File "edm.py", line 14, in main
res = imhmin(arr, 3)
File "edm.py", line 66, in imhmin
cv2.floodFill(edm, None, (y,x), step, 0, step-d[i], 4)
TypeError: Layout of the output array image is incompatible with cv::Mat (step[ndims-1] != elemsize or step[1] != elemsize*nchannels)
At first I thought maybe the way I laid out the parameters was wrong because of the stuff about step in the traceback, but I tried changing that variable's name and have come to the conclusion that step is some variable name in OpenCV's code. It's talking about the output array, and I'm not using a mask, so something must be wrong with the array edm.
I can suppress this error by replacing the floodfill line with this one:
cv2.floodFill(edm.astype(numpy.double), None, (y,x), step, 0, step-d[i], 4)
The difference being that I am typecasting the numpy array to a float array. Then I am left with this error:
Traceback (most recent call last):
File "edm.py", line 92, in <module>
main()
File "edm.py", line 14, in main
res = imhmin(arr, 3)
File "edm.py", line 64, in imhmin
cv2.floodFill(edm.astype(numpy.double), None, (y,x), step, 0, step-d[i], 4)
TypeError: Scalar value for argument 'newVal' is not numeric
This is where I started suspecting something was seriously wrong, because step is "obviously" going to be an integer here (maybe it isn't obvious, but I did try printing it and it looks like it's just an integer, not an array of one integer or anything weird like that).
To entertain the error message, I typecast the newVal parameter to a float. I got pretty much the exact same error message about the upDiff parameter, so I just typecast that too, resulting in this line of code:
cv2.floodFill(edm.astype(numpy.double), None, (y,x), float(step), 0, float(step-d[i]), 4)
I know this isn't how I want to be doing things, but I just wanted to see what would happen. What happened was I got this scary looking error:
Traceback (most recent call last):
File "edm.py", line 92, in <module>
main()
File "edm.py", line 14, in main
res = imhmin(arr, 3)
File "edm.py", line 64, in imhmin
cv2.floodFill(edm.astype(numpy.double), None, (y,x), float(step), 0, float(step-d[i]), 4)
cv2.error: OpenCV(3.4.2) /opt/concourse/worker/volumes/live/9523d527-1b9e-48e0-7ed0-a36adde286f0/volume/opencv-suite_1535558719691/work/modules/imgproc/src/floodfill.cpp:587: error: (-210:Unsupported format or combination of formats) in function 'floodFill'
I don't even know where to start with this. I've used OpenCV's floodfill function many times before and have never run into problems like this. Can anyone provide any insight?
Thanks in advance
Antonio

TypeError: only size-1 arrays can be converted to Python scalars - Earth Observation

I'm attempting to create a script that will take a .GTiff file as an argument input and then extract some information out of the file to create a stats.txt file that will give me the classID, fractional coverage and total number of pixels of that classID.
Thus far I believe I have everything I need but I keep running into the same error and my attempts to rectify the error haven't proven to be very fruitful.
#!/usr/bin/env python
import sys
import calendar
import os
import gdal
import numpy as np
from scipy.stats import mode
from IPython import embed
GDAL2NUMPY = { gdal.GDT_Byte : np.uint8,
gdal.GDT_UInt16 : np.uint16,
gdal.GDT_Int16 : np.int16,
gdal.GDT_UInt32 : np.uint32,
gdal.GDT_Int32 : np.int32,
gdal.GDT_Float32 : np.float32,
gdal.GDT_Float64 : np.float64,
gdal.GDT_CInt16 : np.complex64,
gdal.GDT_CInt32 : np.complex64,
gdal.GDT_CFloat32 : np.complex64,
gdal.GDT_CFloat64 : np.complex128
}
#Open the original training data .tif map file.
fname = sys.argv[1]
lc_dataset = gdal.Open(fname)
lc = lc_dataset.ReadAsArray()
lc = np.array(lc)
#Calculating total number of pixels with a valid Land Cover ID.
fill_value = 0
number_of_pixels = np.where(lc != fill_value)[0].shape[0]
#Get the number of classes and corresponding IDs.
lc_classes = np.unique(lc)
#Split each class into its contituante pixel and write result to file.
for classID in range(1, lc_classes):
lc_class_pixels = np.where(lc == classID)[0].shape[0]
FractionalCover = lc_class_pixels/number_of_pixels
f.write(classID, FractionalCoverage, lc_class_pixels)
f.close()
When I run this, it chuck up the following traceback:
Traceback (most recent call last):
File "GeneratingLCstats.py", line 45, in <module>
for classID in range(1, lc_classes):
TypeError: only size-1 arrays can be converted to Python scalars
I've attempted a few changes as I'm sure the error is related to numpy data and native python data interactions, but converting all my arrays to numpy arrays and attempting to reformat the code has proved in vain as the same error persists.
If anyone can suggest a fix that would be greatly appreciated!
Thanks.
Well, the function lc_classes = np.unique(lc) returns an array. When you try to write the for loop as
for classID in range(1, lc_classes)
Here, lc_classes is an array and trying to give it as a bound for the range causes the error. If you want to iterate over the length of the array, you can modify the code to :
for classID in range(1, len(lc_classes))

Appending numpy arrays into two binary files

I want to create two binary files to append numpy arrays into each one of them during a loop. I wrote the following method (I use Python 2.7):
for _ in range(5):
C = np.random.rand(1, 5)
r = np.random.rand(1, 5)
with open("C.bin", "ab") as file1, open("r.bin", "ab") as file2:
# Append to binary files
np.array(C).tofile(file1)
np.array(r).tofile(file2)
# Now printing to check if appending is successful
C = np.load("C.bin")
r = np.load("r.bin")
print (C)
print (r)
However, I keep getting this error:
Traceback (most recent call last):
File "test.py", line 15, in <module>
C = np.load("C.bin")
File "/anaconda/lib/python2.7/site-packages/numpy/lib/npyio.py", line 429, in load
"Failed to interpret file %s as a pickle" % repr(file))
IOError: Failed to interpret file 'C.bin' as a pickle
I tried to fix it but I cannot see anything more. Any help is appreciated.
NOTE: I intentionally want to use np.load because later on I will be loading the dataset from the disk into a numpy array for further processing.
You should use the save method that is built in the numpy to store the array in the files. Here what your code should look like:
for _ in range(5):
C = np.random.rand(1, 5)
r = np.random.rand(1, 5)
np.save('C', C)
np.save('r', r)
# Now printing to check if appending is successful
C = np.load("C.npy")
r = np.load("r.npy")
print (C)
print (r)
del C, r
Please refer to the documentation https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.load.html

Converting files to digital image failing with "tile cannot extend outside image"

I am trying to recreate some of the work from the blog posting http://sarvamblog.blogspot.com/2013/04/clustering-malware-corpus.html
import itertools
import glob
import numpy,scipy, os, array
from scipy.misc import imsave
for filename in list(glob.glob('file/*.file')):
f = open(filename,'rb');
#just want to make sure I get the right file'
print filename
ln = os.path.getsize(filename); # length of file in bytes
width = 256;
rem = ln%width;
a = array.array("B"); # uint8 array
a.fromfile(f,ln-rem);
f.close();
g = numpy.reshape(a,(len(a)/width,width));
g = numpy.uint8(g);
fpng = filename + ".png"
# make sure the png process and everything else is going'
print fpng
scipy.misc.imsave(fpng,g);`
And although this runs great on 1 or 2 files, I run into problems on once I expand to dozens
Traceback (most recent call last):
File "<stdin>", line 14, in <module>
File "/usr/lib/python2.7/dist-packages/scipy/misc/pilutil.py", line 120, in imsave
im = toimage(arr)
File "/usr/lib/python2.7/dist-packages/scipy/misc/pilutil.py", line 183, in toimage
image = Image.fromstring('L',shape,bytedata.tostring())
File "/usr/lib/python2.7/dist-packages/PIL/Image.py", line 1797, in fromstring
im.fromstring(data, decoder_name, args)
File "/usr/lib/python2.7/dist-packages/PIL/Image.py", line 590, in fromstring
d.setimage(self.im)
ValueError: tile cannot extend outside image
I assume that my issue is with not either A: closing the scipy.misc.imsave or B: not resetting the arrarys. Any help would be greatly appreciated
Managed to figure it out with a try/except loop. Once I did that I was able to determine that only certain files were canceling out. These files were extremely small (125 bytes). My assumption is that they were too small to create all the info needed for scipy
im.crop(box) ⇒ image
The box is a 4-tuple defining the left, upper, right, and lower pixel coordinate.
when lower is small than upper in my code,this error has happened.

Categories

Resources