Related
My Problem:
The web app I'm building relies on real-time transcription of a user's voice along with timestamps for when each word begins and ends.
Google's Speech-to-Text API has a limit of 4 minutes for streaming requests but I want users to be able to run their mic's for as long as 30 minutes if they so choose.
Thankfully, Google provides its own code examples for how to make successive requests to their Speech-to-Text API in a way that mimics endless streaming speech recognition.
I've adapted their Python infinite streaming example for my purposes (see below for my code). The timestamps provided by Google are pretty accurate but the issue is that when I exceed the streaming limit (4 minutes) and a new request is made, the timestamped transcript returned by Google's API from the new request is off by as much as 5 seconds or more.
Below is an example of the output when I adjust the streaming limit to 10 seconds (so a new request to Google's Speech-to-Text API begins every 10 seconds).
The timestamp you see printed next to each transcribed response (the 'corrected_time' in the code) is the timestamp for the end of the transcribed line, not the beginning. These timestamps are accurate for the first request but are off by ~4 seconds in the second request and ~9 seconds in the third request.
In a Nutshell, I want to make sure that when the streaming limit is exceeded and a new request is made, the timestamps returned by Google for that new request are adjusted accurately.
My Code:
To help you understand what's going on, I would recommend running it on your machine (only takes a couple of minutes to get working if you have a Google Cloud service account).
I've included more detail on my current diagnosis below the code.
#!/usr/bin/env python
"""Google Cloud Speech API sample application using the streaming API.
NOTE: This module requires the dependencies `pyaudio`.
To install using pip:
pip install pyaudio
Example usage:
python THIS_FILENAME.py
"""
# [START speech_transcribe_infinite_streaming]
import os
import re
import sys
import time
from google.cloud import speech
import pyaudio
from six.moves import queue
# Audio recording parameters
STREAMING_LIMIT = 20000 # 20 seconds (originally 4 mins but shortened for testing purposes)
SAMPLE_RATE = 16000
CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms
# Environment Variable set for Google Credentials. Put the json service account
# key in the root directory
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'YOUR_SERVICE_ACCOUNT_KEY.json'
def get_current_time():
"""Return Current Time in MS."""
return int(round(time.time() * 1000))
class ResumableMicrophoneStream:
"""Opens a recording stream as a generator yielding the audio chunks."""
def __init__(self, rate, chunk_size):
self._rate = rate
self.chunk_size = chunk_size
self._num_channels = 1
self._buff = queue.Queue()
self.closed = True
self.start_time = get_current_time()
self.restart_counter = 0
self.audio_input = []
self.last_audio_input = []
self.result_end_time = 0
self.is_final_end_time = 0
self.final_request_end_time = 0
self.bridging_offset = 0
self.last_transcript_was_final = False
self.new_stream = True
self._audio_interface = pyaudio.PyAudio()
self._audio_stream = self._audio_interface.open(
format=pyaudio.paInt16,
channels=self._num_channels,
rate=self._rate,
input=True,
frames_per_buffer=self.chunk_size,
# Run the audio stream asynchronously to fill the buffer object.
# This is necessary so that the input device's buffer doesn't
# overflow while the calling thread makes network requests, etc.
stream_callback=self._fill_buffer,
)
def __enter__(self):
self.closed = False
return self
def __exit__(self, type, value, traceback):
self._audio_stream.stop_stream()
self._audio_stream.close()
self.closed = True
# Signal the generator to terminate so that the client's
# streaming_recognize method will not block the process termination.
self._buff.put(None)
self._audio_interface.terminate()
def _fill_buffer(self, in_data, *args, **kwargs):
"""Continuously collect data from the audio stream, into the buffer."""
self._buff.put(in_data)
return None, pyaudio.paContinue
def generator(self):
"""Stream Audio from microphone to API and to local buffer"""
while not self.closed:
data = []
"""
THE BELOW 'IF' STATEMENT IS WHERE THE ERROR IS LIKELY OCCURRING
This statement runs when the streaming limit is hit and a new request is made.
"""
if self.new_stream and self.last_audio_input:
chunk_time = STREAMING_LIMIT / len(self.last_audio_input)
if chunk_time != 0:
if self.bridging_offset < 0:
self.bridging_offset = 0
if self.bridging_offset > self.final_request_end_time:
self.bridging_offset = self.final_request_end_time
chunks_from_ms = round(
(self.final_request_end_time - self.bridging_offset)
/ chunk_time
)
self.bridging_offset = round(
(len(self.last_audio_input) - chunks_from_ms) * chunk_time
)
for i in range(chunks_from_ms, len(self.last_audio_input)):
data.append(self.last_audio_input[i])
self.new_stream = False
# Use a blocking get() to ensure there's at least one chunk of
# data, and stop iteration if the chunk is None, indicating the
# end of the audio stream.
chunk = self._buff.get()
self.audio_input.append(chunk)
if chunk is None:
return
data.append(chunk)
# Now consume whatever other data's still buffered.
while True:
try:
chunk = self._buff.get(block=False)
if chunk is None:
return
data.append(chunk)
self.audio_input.append(chunk)
except queue.Empty:
break
yield b"".join(data)
def listen_print_loop(responses, stream):
"""Iterates through server responses and prints them.
The responses passed is a generator that will block until a response
is provided by the server.
Each response may contain multiple results, and each result may contain
multiple alternatives; Here we print only the transcription for the top
alternative of the top result.
In this case, responses are provided for interim results as well. If the
response is an interim one, print a line feed at the end of it, to allow
the next result to overwrite it, until the response is a final one. For the
final one, print a newline to preserve the finalized transcription.
"""
for response in responses:
if get_current_time() - stream.start_time > STREAMING_LIMIT:
stream.start_time = get_current_time()
break
if not response.results:
continue
result = response.results[0]
if not result.alternatives:
continue
transcript = result.alternatives[0].transcript
result_seconds = 0
result_micros = 0
if result.result_end_time.seconds:
result_seconds = result.result_end_time.seconds
if result.result_end_time.microseconds:
result_micros = result.result_end_time.microseconds
stream.result_end_time = int((result_seconds * 1000) + (result_micros / 1000))
corrected_time = (
stream.result_end_time
- stream.bridging_offset
+ (STREAMING_LIMIT * stream.restart_counter)
)
# Display interim results, but with a carriage return at the end of the
# line, so subsequent lines will overwrite them.
if result.is_final:
sys.stdout.write("FINAL RESULT # ")
sys.stdout.write(str(corrected_time/1000) + ": " + transcript + "\n")
stream.is_final_end_time = stream.result_end_time
stream.last_transcript_was_final = True
# Exit recognition if any of the transcribed phrases could be
# one of our keywords.
if re.search(r"\b(exit|quit)\b", transcript, re.I):
sys.stdout.write("Exiting...\n")
stream.closed = True
break
else:
sys.stdout.write("INTERIM RESULT # ")
sys.stdout.write(str(corrected_time/1000) + ": " + transcript + "\r")
stream.last_transcript_was_final = False
def main():
"""start bidirectional streaming from microphone input to speech API"""
client = speech.SpeechClient()
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=SAMPLE_RATE,
language_code="en-US",
max_alternatives=1,
)
streaming_config = speech.StreamingRecognitionConfig(
config=config, interim_results=True
)
mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE)
print(mic_manager.chunk_size)
sys.stdout.write('\nListening, say "Quit" or "Exit" to stop.\n\n')
sys.stdout.write("End (ms) Transcript Results/Status\n")
sys.stdout.write("=====================================================\n")
with mic_manager as stream:
while not stream.closed:
sys.stdout.write(
"\n" + str(STREAMING_LIMIT * stream.restart_counter) + ": NEW REQUEST\n"
)
stream.audio_input = []
audio_generator = stream.generator()
requests = (
speech.StreamingRecognizeRequest(audio_content=content)
for content in audio_generator
)
responses = client.streaming_recognize(streaming_config, requests)
# Now, put the transcription responses to use.
listen_print_loop(responses, stream)
if stream.result_end_time > 0:
stream.final_request_end_time = stream.is_final_end_time
stream.result_end_time = 0
stream.last_audio_input = []
stream.last_audio_input = stream.audio_input
stream.audio_input = []
stream.restart_counter = stream.restart_counter + 1
if not stream.last_transcript_was_final:
sys.stdout.write("\n")
stream.new_stream = True
if __name__ == "__main__":
main()
# [END speech_transcribe_infinite_streaming]
My Current Diagnosis
The 'corrected_time' is not being set correctly when new requests are made. This is due to the 'bridging_offset' not being set correctly. So what we need to look at is the 'generator()' method in the 'ResumableMicrophoneStream' class.
In the 'generator()' method, there is an 'if' statement which is run when the streaming limit is hit and a new request is made
if self.new_stream and self.last_audio_input:
Its purpose appears to be to take any lingering audio data that wasn't finished being transcribed before the streaming limit was hit and add it to the buffer before any new audio chunks so that it's transcribed in the new request.
It is also the responsibility of this 'if' statement to set the 'bridging offset' but I'm not entirely sure what this offset represents. All I know is that however it is being set, it is not being set accurately.
Time offset values show the beginning and the end of each spoken word
that is recognized in the supplied audio. A time offset value
represents the amount of time that has elapsed from the beginning of
the audio, in increments of 100ms.
This tells us that the offset you are receiving for each of the timestamps that you are running within your project will always make the timestamps from start to finish. That would be my guess as to why it’s causing your application problems.
I've read the Google documentation and looked at their examples however have not managed to get this working correctly in my particular use case. The problem is that the packets of the audio stream are broken up into smaller chunks (frame size) base64 encoded and sent over MQTT - meaning that the generator approach is likely to stop part way through despite not being fully completed by the sender. My MicrophoneSender component will send the final part of the message with a segment_key = -1, so this is the flag that the complete message has been sent and that a full/final process of the stream can be completed. Prior to that point the buffer may not have all of the complete stream so it's difficult to get either a) the generator to stop yielding b) the google as to return a partial transcription. A partial transcription is required once every 10 or so frames.
To illustrate this better here is my code.
inside receiver:
STREAMFRAMETHRESHOLD = 10
def mqttMsgCallback(self, client, userData, msg):
if msg.topic.startswith("MicSender/stream"):
msgDict = json.loads(msg.payload)
streamBytes = b64decode(msgDict['audio_data'].encode('utf-8'))
frameNum = int(msgDict['segment_num'])
if frameNum == 0:
self.asr_time_start = time.time()
self.asr.endOfStream = False
if frameNum >= 0:
self.asr.store_stream_bytes(streamBytes)
self.asr.endOfStream = False
if frameNum % STREAMFRAMETHRESHOLD == 0:
self.asr.get_intermediate_and_print()
else:
#FINAL, recieved -1
trans = self.asr.finish_stream()
self.send_message(trans)
self.frameCount=0
inside Google Speech Class implementation:
class GoogleASR(ASR):
def __init__(self, name):
super().__init__(name)
# STREAMING
self.stream_buf = queue.Queue()
self.stream_gen = self.getGenerator(self.stream_buf)
self.endOfStream = True
self.requests = (types.StreamingRecognizeRequest(audio_content=chunk) for chunk in self.stream_gen)
self.streaming_config = types.StreamingRecognitionConfig(config=self.config)
self.current_transcript = ''
self.numCharsPrinted = 0
def getGenerator(self, buff):
while not self.endOfStream:
# Use a blocking get() to ensure there's at least one chunk of
# data, and stop iteration if the chunk is None, indicating the
# end of the audio stream.
chunk = buff.get()
if chunk is None:
return
data = [chunk]
# Now consume whatever other data's still buffered.
while True:
try:
chunk = buff.get(block=False)
data.append(chunk)
except queue.Empty:
self.endOfStream = True
yield b''.join(data)
break
yield b''.join(data)
def store_stream_bytes(self, bytes):
self.stream_buf.put(bytes)
def get_intermediate_and_print(self):
self.get_intermediate()
def get_intermediate(self):
if self.stream_buf.qsize() > 1:
print("stream buf size: {}".format(self.stream_buf.qsize()))
responses = self.client.streaming_recognize(self.streaming_config, self.requests)
# print(responses)
try:
# Now, put the transcription responses to use.
if not self.numCharsPrinted:
self.numCharsPrinted = 0
for response in responses:
if not response.results:
continue
# The `results` list is consecutive. For streaming, we only care about
# the first result being considered, since once it's `is_final`, it
# moves on to considering the next utterance.
result = response.results[0]
if not result.alternatives:
continue
# Display the transcription of the top alternative.
self.current_transcript = result.alternatives[0].transcript
# Display interim results, but with a carriage return at the end of the
# line, so subsequent lines will overwrite them.
#
# If the previous result was longer than this one, we need to print
# some extra spaces to overwrite the previous result
overwrite_chars = ' ' * (self.numCharsPrinted - len(self.current_transcript))
sys.stdout.write(self.current_transcript + overwrite_chars + '\r')
sys.stdout.flush()
self.numCharsPrinted = len(self.current_transcript)
def finish_stream(self):
self.endOfStream = False
self.get_intermediate()
self.endOfStream = True
final_result = self.current_transcript
self.stream_buf= queue.Queue()
self.allBytes = bytearray()
self.current_transcript = ''
self.requests = (types.StreamingRecognizeRequest(audio_content=chunk) for chunk in self.stream_gen)
self.streaming_config = types.StreamingRecognitionConfig(config=self.config)
return final_result
Currently what this does is output nothing from the transcriptions side.
stream buf size: 21
stream buf size: 41
stream buf size: 61
stream buf size: 81
stream buf size: 101
stream buf size: 121
stream buf size: 141
stream buf size: 159
But the response/transcript is empty. If I put a breakpoint on the for response in responses inside the get_intermediate function then it never runs which means that for some reason it's empty (not retuned from Google). However, if I put a breakpoint on the generator and take too long (> 5 seconds) to continue to yield the data, it (Google) tells me that the data is probably being sent to the server too slow. google.api_core.exceptions.OutOfRange: 400 Audio data is being streamed too slow. Please stream audio data approximately at real time.
Maybe someone can spot the obvious here...
The way you have organized your code, the generator you give to the Google API is initialized exactly once - on line 10, using a generator expression: self.requests = (...). As constructed, this generator will also run exactly once and become 'exhausted'. Same applies to the generator function that the (for ...) generator itself calls (self.getGeneerator()). It will run once only and stop when it retrieved 10 chunks of data (which are very small, from what I can see). Then, the outer generator (what you assigned to self.requests) will also stop forever - giving the ASR only a short bit of data (10 times 20 bytes, looking at the printed debug output). There's nothing recognizable in that, most likely.
BTW, note you have a redundant yield b''.join(data) in your function, the data will be sent twice.
You will need to redo the (outer) generator so it does not return until all data is received. If you want to use another generator as you do to gather each bigger chunk for the 'outer' generator from which the Google API is reading, you will need to re-make it every time you begin a new loop with it.
I am trying to read registry information from a modbus device using MinimalModbus. However, every time I attempt to read registry 40003 which has a value of 220 I receive this error:
raise ValueError('The slave is indicating an error. The response is: {!r}'.format(response))
ValueError: The slave is indicating an error. The response is: '\x01\x83\x02Àñ'
I know there is a value in 40003 and I am following the communication documents for the device. Here is my code:
import minimalmodbus
import serial
gas = minimalmodbus.Instrument('COM5', 1)
gas.serial.baudrate = 9600
gas.serial.bytesize = 8
gas.serial.parity = serial.PARITY_NONE
gas.serial.stopbits = 1
gas.serial.timeout = 0.05
gas.mode = minimalmodbus.MODE_RTU
temp = gas.read_register(40003, 1)
print (float(temp))
I have this problem for every registry and I cannot find information regarding Àñ.
The problem was the registry number 40003. I guess the modbus protocol doesn't require the full registry number, so I changed it to temp = gas.read_register(3, 1)
So I'm making an application using a GPS module, Python and a MySQL database.
So I have written some code, to try to capture the data from the GPS and store it in the database. I'm using a plugin called "pynmea2" to parse some of the data (longitude and latitude). However, I need more data then that, so, I already tried ALOT different things, but my program keeps crashing the whole time. Could someone help me out with this?
Most of the time I get all the data from the Serial connection, but I want to be able to strip data from it. So example of what I get:[b'$GPGGA,093512.000,,,,,0,3,,,M,,M,,*47\r\n', b'$GPGLL,,,,,093512.000,V,N*76\r\n', b'$GPGSA,A,1,,,,,,,,,,,,,,,*1E\r\n', b'$GPGSV,3,1,11,15,72,214,,24,52,276,,13,48,141,,17,31,093,29*70\r\n', b'$GPGSV,3,2,11,18,28,292,,28,27,049,25,19,24,120,24,12,23,211,13*7E\r\n', b'$GPGSV,3
Well, it's not that simple to extract data from it,but it works out just fine with the pynmea2 library (only library I'm allowed to use.
So, I need the speed, the latitude and longitude for now, but the speed is bothering me now. It gives ValueError: could not convert string to float: "22*49\\r\\n'"
alot of times because I don't do a proper way of finding the data and then "parsing" it.
Here is my code I'm currently using;
from model.GPSParser import GPSParser
from model.DB import DB
import serial
import time
import datetime
import pynmea2
#########################################
# This is the main code to setup the
# serial connection with the GPS module.
# it needs to be OR runt as root OR as
# pi with all the root rights.
#########################################
port = "/dev/ttyAMA0"
ser = serial.Serial(port, 9600, timeout=0)
#########################################
# These are all the global variables
# to be used. All defined and set to
# zero or their standard 'Null' value.
#########################################
lat = 0.0
lon = 0.0
cur_speed = 0.0
while True:
try:
# Get the data from the serial monitor.
data = str(ser.readlines()).lstrip("b'")[:-3]
# print(data)
#########################################
# Find the speed, to check if we're
# standing still or not. Save it in a
# #var speed
#########################################
if data.find('$GPVTG') != -1:
cur_speed = data.split(",")[7]
#########################################
# Get the Latitude and Longitude
#########################################
if data.find('$GPGGA') != -1:
print(data)
# Check whether the data strings are empty or not.
if GPSParser.parseLatitude(data) != "" and GPSParser.parseLongitude(data) != "":
lat = GPSParser.parseLatitude(data)
lon = GPSParser.parseLongitude(data)
# Debug printing
# print("Latitude: " + GPSParser.parseLatitude(data))
# print("Longitude: " + GPSParser.parseLongitude(data))
# print("Speed: " + cur_speed)
#########################################
# Insert the coordinates into the database
# Be sure to check of we are really driving
# So when the speed is higher then 5 km/u
# Store everything into the database.
#########################################
if float(cur_speed) > 5.0:
db = DB()
db.insertCoordinates(lat, lon, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
# Wait a bit to not overload the Serial port
time.sleep(0.5)
############################################################
# The error handling
############################################################
except serial.serialutil.SerialException:
ser.close()
port = "/dev/ttyAMA0"
ser = serial.Serial(port, 9600, timeout=0)
continue
except pynmea2.ParseError:
# print("Error on parsing object, continuing...")
continue
except BlockingIOError:
# print("Blocking I/O error, continuing...")
continue
except TypeError:
# print("Type error, continuing...")
continue
except IndexError:
# print("To catch an error...")
continue
except KeyboardInterrupt:
print("\nProgram stopped.")
exit()
So the import from model doesn't do much, only the database connection and the "gps parser" is only the pynmea that parses a string of data and then returns it.
So what I want is something like:
It gets all the data it pulses once per second,
it then splits it all into chucks where it starts with the $GP variable, then I can search for the second variable part, for example VTG or GGA. And then I can use that string to make conversions to the right value to extract the speed, latitude, longitude and other data if needed.
Hope you guys can understand me well and can help me out.
Not sure if that solves your problem, but pynmea2 has speed attributes, defined in talker.py.
import pynmea2
for i, line in enumerate(open('/tmp/nmea.txt').readlines()):
# parsing via pynmea
msg = pynmea2.parse(line.strip())
if msg.sentence_type == 'VTG':
print ('parsing line %s with pynmea:' % i, float(msg.spd_over_grnd_kmph))
# parsing via manually
if line.startswith('$GPVTG'):
cur_speed = line.split(",")[7]
print ('parsing line %s manually:' % i, float(cur_speed))
Returns:
parsing line 1 with pynmea: 91.626
parsing line 1 manually: 91.626
parsing line 10 with pynmea: 90.842
parsing line 10 manually: 90.842
parsing line 19 with pynmea: 89.676
parsing line 19 manually: 89.676
I try to write a Python program which calculates the WPA-handshake, but I have problems with the hashes. For comparison I installed cowpatty (to see where I start beeing wrong).
My PMK-generation works fine, but the PTK-calculation alsways seems to be wrong. I am not sure if I have to format my input (macadresses and noces) or just give them into the function as a string.
I will give you my routerinformation, which is no problem since I just set it up for testing.
My program looks as follows:
import hmac,hashlib,binascii
passPhrase = "10zZz10ZZzZ"
ssid = "Netgear 2/158"
A = "Pairwise key expansion"
APmac = "001e2ae0bdd0"
Clientmac = "cc08e0620bc8"
ANonce = "61c9a3f5cdcdf5fae5fd760836b8008c863aa2317022c7a202434554fb38452b"
SNonce = "60eff10088077f8b03a0e2fc2fc37e1fe1f30f9f7cfbcfb2826f26f3379c4318"
B = min(APmac,Clientmac)+max(APmac,Clientmac)+min(ANonce,SNonce)+max(ANonce,SNonce)
data="0103005ffe010900200000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
def customPRF512(key,A,B):
blen = 64
i = 0
R = ''
while i<=((blen*8+159)/160):
hmacsha1 = hmac.new(key,A+chr(0x00)+B+chr(i),sha)
i+=1
R = R+hmacsha1.digest()
return R[:blen]
pmk = pbkdf2(passPhrase, ssid, 4096, 32) #no sourcecode, since b2a_p(pmk) output fits to those of cowpatty
ptk = customPRF512(pmk,A,B) #the prf-function fits the pseudocode in the ieee, but does not give me the correct output (like cowpatty does)
# and i have no idea why :(
print b2a_p(pmk),"\n\n\n"
print b2a_p(ptk),"\n\n\n"
mic1 = hmac.new(ptk[0:16],data)
print mic1.hexdigest() #should be the mic-calculation, not sure if this is correct...
the desired outputs (which cowpatty confirmed) are:
PMK is
01b8 09f9 ab2f b5dc 4798 4f52 fb2d 112e
13d8 4ccb 6b86 d4a7 193e c529 9f85 1c48
Calculated PTK for "10zZz10ZZzZ" is
bf49 a95f 0494 f444 2716 2f38 696e f8b6
428b cf8b a3c6 f0d7 245a d314 a14c 0d18
efd6 38aa e653 c908 a7ab c648 0a7f 4068
2479 c970 8aaa abc3 eb7e da28 9d06 d535
Calculated MIC with "10zZz10ZZzZ" is
4528 2522 bc67 07d6 a70a 0317 a3ed 48f0
Maybe someone of you could tell me, why my program simply doesn't work. Do the hmac-functions work correctly? Is my input formatted wrong? Do I have to regard endianess anywhere? Thanks for your time in advance, I would appreciate any help!
Alright, I figured it out by myself... more by desperate testing and some luck, than successful research, which lead to nothing long enough. Instead of using the MAC-adresses and nonces as the strings they were, I had to unhexlify them. I used
a2b_hex() #alternatively unhexlify()
My final code looks somewhat like this, defs excluded:
import hmac,hashlib,binascii
passPhrase="10zZz10ZZzZ"
ssid = "Netgear 2/158"
A = "Pairwise key expansion"
APmac = a2b_hex("001e2ae0bdd0")
Clientmac = a2b_hex("cc08e0620bc8")
ANonce = a2b_hex("61c9a3f5cdcdf5fae5fd760836b8008c863aa2317022c7a202434554fb38452b")
SNonce = a2b_hex("60eff10088077f8b03a0e2fc2fc37e1fe1f30f9f7cfbcfb2826f26f3379c4318")
B = min(APmac,Clientmac)+max(APmac,Clientmac)+min(ANonce,SNonce)+max(ANonce,SNonce)
data = a2b_hex("0103005ffe01090020000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000")
pmk = pbkdf2(passPhrase, ssid, 4096, 32)
ptk = customPRF512(pmk,A,B)
mic = hmac.new(ptk[0:16],data)
print "desiredpmk:\t","01b809f9ab2fb5dc47984f52fb2d112e13d84ccb6b86d4a7193ec5299f851c48"
print "pmk:\t\t",b2a_hex(pmk),"\n"
print "desired ptk:\t","bf49a95f0494f44427162f38696ef8b6"
print "ptk:\t\t",b2a_hex(ptk[0:16]),"\n"
print "desired mic:\t","45282522bc6707d6a70a0317a3ed48f0"
print "mic:\t\t",mic.hexdigest(),"\n"
So the answers to my questions were: yes, hashfunctions work correctly, yes, input is formatted wrong, no, no endianess-issues.
Thanks for posting. This helped me out, so posting my revisions:
#==========================================================================================
#
# Verify the MIC code in EAPoL Message #2 is valid, or not (WPA2)
#
#==========================================================================================
#
# The home for this code is (so check for updates):
#
# https://www.duckware.com/tech/verify-mic-in-four-way-handshake.py.txt
#
# and this code is fully public, as it was based on/derived from this public code:
#
# https://stackoverflow.com/questions/12018920/wpa-handshake-with-python-hashing-difficulties
#
# 1. PMK: 'Pairwise Master Key' (256-bit) is generated from SSID/PASS in WPA2:
#
# o https://www.wireshark.org/tools/wpa-psk.html (SSID/PASS to PMK)
# o http://anandam.name/pbkdf2/ (Password-Based Key Derivation Function 2)
#
# 2. PRF512: The PRF-512 function is used to compute four 128-bit keys (KCK,KEK,TK1,TK2).
# For details on this function, see:
#
# o http://etutorials.org/Networking/802.11+security.+wi-fi+protected+access+and+802.11i/Part+II+The+Design+of+Wi-Fi+Security/Chapter+10.+WPA+and+RSN+Key+Hierarchy/Computing+the+Temporal+Keys/
#
# 3. KCK: The KCK (first 128 bits of the PTK; see above) are used to generate the MIC:
#
# o https://tldp.org/HOWTO/8021X-HOWTO/intro.html
#
# RUN: Run the code below in an ONLINE Python 2.7 compiler. For example:
#
# o https://repl.it/languages/python
# o https://www.tutorialspoint.com/execute_python_online.php
#
# CUSTOMIZE: How to customize the code below:
#
# 1) PCAP the problematic handshake (TIP: use tcpdump with ether host xx:xx:xx:xx:xx:xx)
# 2) Update SSID/PASS vars below with the known Wi-Fi name/password
# 3) Copy entire Ethernet frames for EAPoL Message #1/#2 into EAPOL1/2 vars below.
# TIP: In Wireshark, right click on Ethernet frame, 'Copy' / '...as a Hex Stream' / paste below
# 4) Use first with a working 4-way handshake (to confirm proper usage; MIC match), then apply
# to non-working 4-way handshake to confirm that the MIC in Message #2 is good/bad.
# 5) The code below, unmodified, results in a MIC found/calculated 'match'
#
# See also:
#
# o https://www.wifi-professionals.com/2019/01/4-way-handshake
# o https://stackoverflow.com/questions/15133797/creating-wpa-message-integrity-code-mic-with-python
# o https://www.shellvoide.com/wifi/understanding-wpa-wpa2-hash-mic-cracking-process-python/
# o https://ww.ins1gn1a.com/understanding-wpa-psk-cracking/
# o https://docs.python.org/3/library/binascii.html
# o https://stackoverflow.com/questions/9020843/how-to-convert-a-mac-number-to-mac-string
#
#==========================================================================================
import hmac,hashlib,binascii
def to_mac(addr): return ':'.join(addr[i:i+2] for i in range(0,len(addr),2))
def PRF_512(key,A,B): return ''.join(hmac.new(key,A+chr(0)+B+chr(i),hashlib.sha1).digest() for i in range(4))[:64]
def a2b(s): return binascii.a2b_hex(s);
def b2a(by): return binascii.b2a_hex(by);
EAPOL1 = a2b("60f189052d94a00460216606888e0203005f02008a00100000000000000001141f7a3ebdc0b51712934bef6e43ea13f80cb460f121f35408aa607046e239980000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000")
EAPOL2 = a2b("a0046021660660f189052d94888e0103007502010a000000000000000000015b46c7165f504c664aed90b78f3b705e02b4b029a67e3189d1632479d7e7a4e6000000000000000000000000000000000000000000000000000000000000000056de18f5efa272a4663560b73c537a65001630140100000fac040100000fac040100000fac028000")
SSID = "your-wifi-ssid"
PASS = "your-wifi-password"
PMK = hashlib.pbkdf2_hmac('sha1', PASS, SSID, 4096, 32)
VER_WPA = 2 # WPA2 means use 'SHA1'
XAUTH = a2b("888E")
if EAPOL1[0:6]==EAPOL2[6:12] and EAPOL2[0:6]==EAPOL1[6:12] and EAPOL1[12:14]==XAUTH and EAPOL1[12:14]==XAUTH:
if ord(EAPOL1[20])%8==VER_WPA and ord(EAPOL2[20])%8==VER_WPA:
R1 = EAPOL1[31:63] # random 1 (AP nonce)
R2 = EAPOL2[31:63] # random 2 (STA nonce)
M1 = EAPOL2[0:6] # MAC 1 (AP MAC)
M2 = EAPOL1[0:6] # MAC 2 (STA MAC)
# Generate KCK, KEK, TK1, TK2 from the PMK (and AP/STA info)
PTK = PRF_512(PMK,"Pairwise key expansion",min(M1,M2)+max(M1,M2)+min(R1,R2)+max(R1,R2))
KCK = PTK[0:16];
# try to validate the MIC in EAPoL message #2 is correct
MICRAW = hmac.new(KCK,EAPOL2[14:95]+a2b("00000000000000000000000000000000")+EAPOL2[111:],hashlib.sha1)
MICFOUND = b2a(EAPOL2[95:111])
MICCALC = MICRAW.hexdigest()[0:32]
print "SSID/PASS: ",SSID,"/",PASS
print "PMK: ",b2a(PMK)
print "AP-MAC: ",to_mac(b2a(M1))
print "STA-MAC: ",to_mac(b2a(M2))
print "AP-NONCE: ",b2a(R1)
print "STA-NONCE: ",b2a(R2)
print "KCK: ",b2a(KCK)
print "MIC-found: ",MICFOUND
print "MIC-calc: ",MICCALC
print "Result: ",("OK: EAPoL message #2 validated" if MICFOUND==MICCALC else "ERROR: MIC does not match")
else:
print "***ERROR: Did not find expected 'WPA2' version in EAPoL messages"
else:
print "***ERROR: Problem validated Ethernet frames. Do EAPOL1 and EAPOL2 both include the Ethernet headers?"