Im trying to identify the musical note of a sound from a .wav file using python, but im getting the error above when using "struct"
I couldn't gather a lot of info from the documents for struct or other websites on how to resolve this issue.
I have seen errors like:
struct.error: unpack requires a buffer of 4 bytes
struct.error: unpack requires a buffer of 1024 bytes
but the error seems to be for a different reason.
import numpy as np
import math
import wave
import os
import struct
import matplotlib.pyplot as plt
def note_detect(audio_file):
#-------------------------------------------
#here we are just storing our sound file as a numpy array
#you can also use any other method to store the file as an np array
file_length=audio_file.getnframes()
f_s=audio_file.getframerate() #sampling frequency
sound = np.zeros(file_length) #blank array
for i in range(file_length) :
wdata=audio_file.readframes(1)
data=struct.unpack("<h",wdata)
sound[i] = int(data[0])
plt.plot(sound)
plt.show()
sound=np.divide(sound,float(2**15)) #scaling it to 0 - 1
counter = audio_file.getnchannels() #number of channels mono/sterio
#-------------------------------------------
plt.plot(sound)
plt.show()
#fourier transformation from numpy module
fourier = np.fft.fft(sound)
fourier = np.absolute(fourier)
imax=np.argmax(fourier[0:int(file_length/2)]) #index of max element
plt.plot(fourier)
plt.show()
#peak detection
i_begin = -1
threshold = 0.3 * fourier[imax]
for i in range (0,imax+100):
if fourier[i] >= threshold:
if(i_begin==-1):
i_begin = i
if(i_begin!=-1 and fourier[i]<threshold):
break
i_end = i
imax = np.argmax(fourier[0:i_end+100])
freq=(imax*f_s)/(file_length*counter) #formula to convert index into sound frequency
#frequency database
note=0
name = np.array(["C0","C#0","D0","D#0","E0","F0","F#0","G0","G#0","A0","A#0","B0","C1","C#1","D1","D#1","E1","F1","F#1","G1","G#1","A1","A#1","B1","C2","C#2","D2","D#2","E2","F2","F#2","G2","G2#","A2","A2#","B2","C3","C3#","D3","D3#","E3","F3","F3#","G3","G3#","A3","A3#","B3","C4","C4#","D4","D4#","E4","F4","F4#","G4","G4#","A4","A4#","B4","C5","C5#","D5","D5#","E5","F5","F5#","G5","G5#","A5","A5#","B5","C6","C6#","D6","D6#","E6","F6","F6#","G6","G6#","A6","A6#","B6","C7","C7#","D7","D7#","E7","F7","F7#","G7","G7#","A7","A7#","B7","C8","C8#","D8","D8#","E8","F8","F8#","G8","G8#","A8","A8#","B8","Beyond B8"])
frequencies = np.array([16.35,17.32,18.35,19.45,20.60,21.83,23.12,24.50,25.96 ,27.50 ,29.14 ,30.87 ,32.70 ,34.65 ,36.71 ,38.89 ,41.20 ,43.65 ,46.25 ,49.00 ,51.91 ,55.00 ,58.27 ,61.74 ,65.41 ,69.30 ,73.42 ,77.78 ,82.41 ,87.31 ,92.50 ,98.00 ,103.83 ,110.00 ,116.54 ,123.47 ,130.81 ,138.59 ,146.83 ,155.56 ,164.81 ,174.61 ,185.00 ,196.00 ,207.65 ,220.00 ,233.08 ,246.94 ,261.63 ,277.18 ,293.66 ,311.13 ,329.63 ,349.23 ,369.99 ,392.00 ,415.30 ,440.00 ,466.16 ,493.88 ,523.25 ,554.37 ,587.33 ,622.25 ,659.26 ,698.46 ,739.99 ,783.99 ,830.61 ,880.00 ,932.33 ,987.77 ,1046.50 ,1108.73 ,1174.66 ,1244.51 ,1318.51 ,1396.91 ,1479.98 ,1567.98 ,1661.22 ,1760.00 ,1864.66 ,1975.53 ,2093.00 ,2217.46 ,2349.32 ,2489.02 ,2637.02 ,2793.83 ,2959.96 ,3135.96 ,3322.44 ,3520.00 ,3729.31 ,3951.07 ,4186.01 ,4434.92 ,4698.64 ,4978.03 ,5274.04 ,5587.65 ,5919.91 ,6271.93 ,6644.88 ,7040.00 ,7458.62 ,7902.13,8000])
#searching for matched frequencies
for i in range(0,frequencies.size-1):
if(freq<frequencies[0]):
note=name[0]
break
if(freq>frequencies[-1]):
note=name[-1]
break
if freq>=frequencies[i] and frequencies[i+1]>=freq :
if freq-frequencies[i]<(frequencies[i+1]-frequencies[i])/2 :
note=name[i]
else :
note=name[i+1]
break
return note
if __name__ == "__main__":
path = os.getcwd()
file_name = path + "\\" + "recording0.wav"
audio_file = wave.open(file_name)
Detected_Note = note_detect(audio_file)
print("\n\tDetected Note = " + str(Detected_Note))
The full error on line 23:
Traceback (most recent call last):
File "C:\Users\m8\Desktop\programing_stuff\python-stuff\minecraft_flute_player - 12-08-2022\app.py", line 86, in <module>
Detected_Note = note_detect(audio_file)
File "C:\Users\m8\Desktop\programing_stuff\python-stuff\minecraft_flute_player - 12-08-2022\app.py", line 23, in note_detect
data=struct.unpack("<h",wdata)
struct.error: unpack requires a buffer of 2 bytes
Thanks for the help.
What I assume is happening here is the size of the frame isn't 2 bytes as you expected.
When stating <h you are stating that you are going to extract 2 bytes from each frame. See the stuct documentation for more on that.
You can use the getparams function to better understand the structure of the wav file.
>>> audio_file.getparams()
_wave_params(nchannels=1, sampwidth=2, framerate=44100, nframes=22050, comptype='NONE', compname='not compressed')
The parameters which are interesting are nchannels and sampwidth.
You can calculate sampwidth * nchannels to understand the amount of bytes you need to extract from the frame for this WAV file.
In this example, you have sampwidth * nchannels = 1 * 2 = 2 bytes per frame.
More information can be found in this answer which shows different cases of frame sizes.
I am coding a basic frequency analisys of WAVE audio files, but I have trouble when it comes to convertion from WAVE frames to integer.
Here is the relevant part of my code:
import wave
track = wave.open('/some_path/my_audio.wav', 'r')
byt_depth = track.getsampwidth() #Byte depth of the file in BYTES
frame_rate = track.getframerate()
buf_size = 512
def byt_sum (word):
#convert a string of n bytes into an int in [0;8**n-1]
return sum( (256**k)*word[k] for k in range(len(word)) )
raw_buf = track.readframes(buf_size)
'''
One frame is a string of n bytes, where n = byt_depth.
For instance, with a 24bits-encoded file, track.readframe(1) could be:
b'\xff\xfe\xfe'.
raw_buf[n] returns an int in [0;255]
'''
sample_buf = [byt_sum(raw_buf[byt_depth*k:byt_depth*(k+1)])
- 2**(8*byt_depth-1) for k in range(buf_size)]
Problem is: when I plot sample_buf for a single sine signal, I get
an alternative, wrecked sine signal.
I can't figure out why the signal overlaps udpside-down.
Any idea?
P.S.: Since I'm French, my English is quite hesitating. Feel free to edit if there are ugly mistakes.
It might be because you need to use an unsigned value for representing the 16bit samples. See https://en.wikipedia.org/wiki/Pulse-code_modulation
Try to add 32767 to each sample.
Also you should use the python struct module to decode the buffer.
import struct
buff_size = 512
# 'H' is for unsigned 16 bit integer, try 'h' also
sample_buff = struct.unpack('H'*buf_size, raw_buf)
The easiest way is to use a library that does the decoding for you. There are several Python libraries available, my favorite is the soundfile module:
import soundfile as sf
signal, samplerate = sf.read('/some_path/my_audio.wav')
import sys
import os
import zlib
try:
import pylzma as lzma
except ImportError:
import lzma
from io import StringIO
import struct
#-----------------------------------------------------------------------------------------------------------------------
def read_ui8(c):
return struct.unpack('<B', c)[0]
def read_ui16(c):
return struct.unpack('<H', c)[0]
def read_ui32(c):
return struct.unpack('<I', c)[0]
def parse(input):
"""Parses the header information from an SWF file."""
if hasattr(input, 'read'):
input.seek(0)
else:
input = open(input, 'rb')
header = { }
# Read the 3-byte signature field
header['signature'] = signature = b''.join(struct.unpack('<3c', input.read(3))).decode()
# Version
header['version'] = read_ui8(input.read(1))
# File size (stored as a 32-bit integer)
header['size'] = read_ui32(input.read(4))
# Payload
if header['signature'] == 'FWS':
print("The opened file doesn't appear to be compressed")
buffer = input.read(header['size'])
elif header['signature'] == 'CWS':
print("The opened file appears to be compressed with Zlib")
buffer = zlib.decompress(input.read(header['size']))
elif header['signature'] == 'ZWS':
print("The opened file appears to be compressed with Lzma")
# ZWS(LZMA)
# | 4 bytes | 4 bytes | 4 bytes | 5 bytes | n bytes | 6 bytes |
# | 'ZWS'+version | scriptLen | compressedLen | LZMA props | LZMA data | LZMA end marker |
size = read_ui32(input.read(4))
buffer = lzma.decompress(input.read())
# Containing rectangle (struct RECT)
# The number of bits used to store the each of the RECT values are
# stored in first five bits of the first byte.
nbits = read_ui8(buffer[0]) >> 3
current_byte, buffer = read_ui8(buffer[0]), buffer[1:]
bit_cursor = 5
for item in 'xmin', 'xmax', 'ymin', 'ymax':
value = 0
for value_bit in range(nbits-1, -1, -1): # == reversed(range(nbits))
if (current_byte << bit_cursor) & 0x80:
value |= 1 << value_bit
# Advance the bit cursor to the next bit
bit_cursor += 1
if bit_cursor > 7:
# We've exhausted the current byte, consume the next one
# from the buffer.
current_byte, buffer = read_ui8(buffer[0]), buffer[1:]
bit_cursor = 0
# Convert value from TWIPS to a pixel value
header[item] = value / 20
header['width'] = header['xmax'] - header['xmin']
header['height'] = header['ymax'] - header['ymin']
header['frames'] = read_ui16(buffer[0:2])
header['fps'] = read_ui16(buffer[2:4])
input.close()
return header
header = parse(sys.argv[1]);
print('SWF header')
print('----------')
print('Version: %s' % header['version'])
print('Signature: %s' % header['signature'])
print('Dimensions: %s x %s' % (header['width'], header['height']))
print('Bounding box: (%s, %s, %s, %s)' % (header['xmin'], header['xmax'], header['ymin'], header['ymax']))
print('Frames: %s' % header['frames'])
print('FPS: %s' % header['fps'])
I was under the impression the built in python 3.4 LZMA module works the same as the Python 2.7 pyLZMA module.
The code I've provided runs on both 2.7 and 3.4, but when it is run on 3.4 (which doesn't have pylzma so it resorts to the inbuilt lzma) I get the following error:
_lzma.LZMAError: Input format not supported by decoder
Why does pylzma work but Python 3.4's lzma doesn't?
While I do not have an answer to why the two modules work differently, I do have a solution.
I was unable to get the non-stream LZMA lzma.decompress to work since I do not have enough knowledge about the LZMA/XZ/SWF specs, however I got the lzma.LZMADecompressor to work. For completeness, I believe SWF LZMA uses this header format (not 100% confirmed):
Bytes Length Type Endianness Description
0- 2 3 UI8 - SWF Signature: ZWS
3 1 UI8 - SWF Version
4- 7 4 UI32 LE SWF FileLength aka File Size
8-11 4 UI32 LE SWF? Compressed Size (File Size - 17)
12 1 - - LZMA Decoder Properties
13-16 4 UI32 LE LZMA Dictionary Size
17- - - - LZMA Compressed Data (including rest of SWF header)
However the LZMA file format spec says that it should be:
Bytes Length Type Endianness Description
0 1 - - LZMA Decoder Properties
1- 4 4 UI32 LE LZMA Dictionary Size
5-12 8 UI64 LE LZMA Uncompressed Size
13- - - - LZMA Compressed Data
I was never able to really get my head around what Uncompressed Size should be (if even possible to define for this format). pylzma seems to not care about this, while Python 3.3 lzma does. However, it seems that an explicit unknown size works and may be specified as an UI64 with value 2^64, e.g. 8*b'\xff' or 8*'\xff', so by shuffling around headers a bit and instead of using:
buffer = lzma.decompress(input.read())
Try:
d = lzma.LZMADecompressor(format=lzma.FORMAT_ALONE)
buffer = d.decompress(input.read(5) + 8*b'\xff' + input.read())
Note: I had no local python3 interpreter available so only tested it online with a slightly modified read procedure, so it might not work out of the box.
Edit: Confirmed to work in python3 however some things needed to be changed, like Marcus mentioned about unpack (easily solved by using buffer[0:1] instead of buffer[0]). It's not really necessary to read the whole file either, a small chunk, say 256 bytes should be fine for reading the whole SWF header. The frames field is a bit quirky too, though I believe all you have to do is some bit shifting, i.e.:
header['frames'] = read_ui16(buffer[0:2]) >> 8
SWF file format spec
LZMA file format spec
I'm trying to use python to create a random binary file. This is what I've got already:
f = open(filename,'wb')
for i in xrange(size_kb):
for ii in xrange(1024/4):
f.write(struct.pack("=I",random.randint(0,sys.maxint*2+1)))
f.close()
But it's terribly slow (0.82 seconds for size_kb=1024 on my 3.9GHz SSD disk machine). A big bottleneck seems to be the random int generation (replacing the randint() with a 0 reduces running time from 0.82s to 0.14s).
Now I know there are more efficient ways of creating random data files (namely dd if=/dev/urandom) but I'm trying to figure this out for sake of curiosity... is there an obvious way to improve this?
IMHO - the following is completely redundant:
f.write(struct.pack("=I",random.randint(0,sys.maxint*2+1)))
There's absolutely no need to use struct.pack, just do something like:
import os
fileSizeInBytes = 1024
with open('output_filename', 'wb') as fout:
fout.write(os.urandom(fileSizeInBytes)) # replace 1024 with a size in kilobytes if it is not unreasonably large
Then, if you need to re-use the file for reading integers, then struct.unpack then.
(my use case is generating a file for a unit test so I just need a
file that isn't identical with other generated files).
Another option is to just write a UUID4 to the file, but since I don't know the exact use case, I'm not sure that's viable.
The python code you should write completely depends on the way you intend to use the random binary file. If you just need a "rather good" randomness for multiple purposes, then the code of Jon Clements is probably the best.
However, on Linux OS at least, os.urandom relies on /dev/urandom, which is described in the Linux Kernel (drivers/char/random.c) as follows:
The /dev/urandom device [...] will return as many bytes as are
requested. As more and more random bytes are requested without giving
time for the entropy pool to recharge, this will result in random
numbers that are merely cryptographically strong. For many
applications, however, this is acceptable.
So the question is, is this acceptable for your application ? If you prefer a more secure RNG, you could read bytes on /dev/random instead. The main inconvenient of this device: it can block indefinitely if the Linux kernel is not able to gather enough entropy. There are also other cryptographically secure RNGs like EGD.
Alternatively, if your main concern is execution speed and if you just need some "light-randomness" for a Monte-Carlo method (i.e unpredictability doesn't matter, uniform distribution does), you could consider generate your random binary file once and use it many times, at least for development.
Here's a complete script based on accepted answer that creates random files.
import sys, os
def help(error: str = None) -> None:
if error and error != "help":
print("***",error,"\n\n",file=sys.stderr,sep=' ',end='');
sys.exit(1)
print("""\tCreates binary files with random content""", end='\n')
print("""Usage:""",)
print(os.path.split(__file__)[1], """ "name1" "1TB" "name2" "5kb"
Accepted units: MB, GB, KB, TB, B""")
sys.exit(2)
# https://stackoverflow.com/a/51253225/1077444
def convert_size_to_bytes(size_str):
"""Convert human filesizes to bytes.
ex: 1 tb, 1 kb, 1 mb, 1 pb, 1 eb, 1 zb, 3 yb
To reverse this, see hurry.filesize or the Django filesizeformat template
filter.
:param size_str: A human-readable string representing a file size, e.g.,
"22 megabytes".
:return: The number of bytes represented by the string.
"""
multipliers = {
'kilobyte': 1024,
'megabyte': 1024 ** 2,
'gigabyte': 1024 ** 3,
'terabyte': 1024 ** 4,
'petabyte': 1024 ** 5,
'exabyte': 1024 ** 6,
'zetabyte': 1024 ** 7,
'yottabyte': 1024 ** 8,
'kb': 1024,
'mb': 1024**2,
'gb': 1024**3,
'tb': 1024**4,
'pb': 1024**5,
'eb': 1024**6,
'zb': 1024**7,
'yb': 1024**8,
}
for suffix in multipliers:
size_str = size_str.lower().strip().strip('s')
if size_str.lower().endswith(suffix):
return int(float(size_str[0:-len(suffix)]) * multipliers[suffix])
else:
if size_str.endswith('b'):
size_str = size_str[0:-1]
elif size_str.endswith('byte'):
size_str = size_str[0:-4]
return int(size_str)
if __name__ == "__main__":
input = {} #{ file: byte_size }
if (len(sys.argv)-1) % 2 != 0:
print("-- Provide even number of arguments --")
print(f'--\tGot: {len(sys.argv)-1}: "' + r'" "'.join(sys.argv[1:]) +'"')
sys.exit(2)
elif len(sys.argv) == 1:
help()
try:
for file, size_str in zip(sys.argv[1::2], sys.argv[2::2]):
input[file] = convert_size_to_bytes(size_str)
except ValueError as ex:
print(f'Invalid size: "{size_str}"', file=sys.stderr)
sys.exit(1)
for file, size_bytes in input.items():
print(f"Writing: {file}")
#https://stackoverflow.com/a/14276423/1077444
with open(file, 'wb') as fout:
while( size_bytes > 0 ):
wrote = min(size_bytes, 1024) #chunk
fout.write(os.urandom(wrote))
size_bytes -= wrote
I'm trying to convert binary files written in C to and from HDF5 files using Python.
To read the binary file Python works like this:
pos=np.fromfile(f, count=npt*3, dtype='f4').reshape((npt, 3))
To write the same thing I've tried, without success, array.tofile() and now I'm tryin to use ctypes like that (stitching together different answers found on the web):
import ctypes as c
print "Loading C libraries with ctype"
libc = c.CDLL("libc.so.6") # Linux
# fopen()
libc.fopen.restype = c.c_void_p
def errcheck(res, func, args):
if not res: raise IOError
return res
libc.fopen.errcheck = errcheck
# errcheck() could be similarly defined for `fwrite`, `fclose`
c_int_p = c.POINTER(c.c_int)
c_float_p = c.POINTER(c.c_float)
c_double_p = c.POINTER(c.c_double)
def c_write(data, f, numpy_type, c_type_p, nbyte, count):
data = data.astype(numpy_type)
data_p = data.ctypes.data_as(c_type_p)
nitems = libc.fwrite(data_p, nbyte, count, f)
if nitems != data.size: # not all data were written
print "Not all data were written, exit..."
sys.exit()
c_write(pos, f, np.int32, c_int_p, 4, npart.size)
You should probably look into the struct module, it's awesome for packing and unpacking data at the lowest byte-per-byte level.