Decode h264 video frame from bytes received from a WebSocket

Decode h264 video frame from bytes received from a WebSocket - python

I'm trying to get frames from my home security camera (Provision-ISR).
So, I see when I open the web client, that the video frames are sent in a WebSocket.
I copy one of the frames,and I try to save it to file, but it's not working.
import numpy as np
from cv2 import cv2
frame_buffer = np.frombuffer(bytearray(frame), np.int16,int(len(frame) / 2))
cv2.imwrite("image.jpg",frame_buffer)
this is example of the hex editor

Solved!
av.open(rawData, format="h264", mode='r') - do the decode
def save_banch_of_frames(rawData):
global count
rawData.seek(0)
container = av.open(rawData, format="h264", mode='r')
for packet in container.demux():
if packet.size == 0:
continue
for frame in packet.decode():
cv2.imwrite(f"frames/file{count}.jpg", frame.to_ndarray(format="bgr24"))
count += 1
def check_is_keyframe(frame):
frameData = io.BytesIO()
frameData.write(frame)
frameData.seek(0)
container = av.open(frameData, format="h264", mode='r')
for packet in container.demux():
if packet.is_keyframe:
return True
return False
data = get_frame_from_response(video_socket)
while True:
rawData = io.BytesIO()
is_keyframe = False
while not is_keyframe:
rawData.write(data)
data = get_frame_from_response(video_socket)
is_keyframe = check_is_keyframe(data)
save_banch_of_frames(rawData)

Related

Opening wav from URL in Python

With the Python script shown below I try to play a wav file from the internet but I'm getting the error message OSError: [Errno 22] Invalid argument: 'https://file-examples-com.github.io/uploads/2017/11/file_example_WAV_1MG.wav'.
How can I play a wav file from the internet?
import pyaudio
import wave
chunk = 1024
f = wave.open("https://file-examples-com.github.io/uploads/2017/11/file_example_WAV_1MG.wav","rb")
p = pyaudio.PyAudio()
stream = p.open(format = p.get_format_from_width(f.getsampwidth()),
channels = f.getnchannels(),
rate = f.getframerate(),
output = True)
data = f.readframes(chunk)
while data:
stream.write(data)
data = f.readframes(chunk)
stream.stop_stream()
stream.close()
p.terminate()

You can also get the content of website, store it in a variable, and play it. There is no need to store it on the disk for a short file like this. Here is an example of how to do this:
import logging
import requests
import simpleaudio
sample_rate = 8000
num_channels = 2
bytes_per_sample = 2
total = sample_rate * num_channels * bytes_per_sample
logging.basicConfig(level=logging.INFO)
audio_url = "https://file-examples-com.github.io/uploads/2017/11/file_example_WAV_1MG.wav"
logging.info(f"Downloading audio file from: {audio_url}")
content = requests.get(audio_url).content
# Just to ensure that the file does not have extra bytes
blocks = len(content) // total
content = content[:total * blocks]
wave = simpleaudio.WaveObject(audio_data=content,
sample_rate=sample_rate,
num_channels=num_channels,
bytes_per_sample=bytes_per_sample)
control = wave.play()
control.wait_done()

I'm demonstrating what #larsks suggests.
import requests
with open(audio_file, 'wb') as a:
resp = requests.get("https://file-examples-com.github.io/uploads/2017/11/file_example_WAV_1MG.wav")
if resp.status_code == 200:
a.write(resp.content)
print('downloaded')
else:
print(resp.reason)
exit(1)
f = wave.open(audio_file, "rb")
# the remaining lines are the same
And I also suggest another great python library python-mpv which is based on mpv, this library can handle much more codecs and also online streaming play.

Turn image array to io bytes object to simulare open with 'rb'

I am trying to send frames of a video to a remote server using requests, the code I am using for the same is
def send_request(frame_path = "frame_on_disk_1.jpeg"):
files = {'upload': with open(frame_path,"rb")}
r = requests.post(URL, files=files)
return r
So I am writing the frames to disk and then reading them as bytes when sending over to the server, which is not the best way to do it.
However, I am not sure how I can actually convert the array in the following code represented by variable frame in the code below, directly into a read byte object without touching the disk.
import cv2
cap = cv2.VideoCapture("video.MOV")
count = 0
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
cv2.imwrite(f"all_frames/frame_num_{count}.png",frame)

You can use io.BytesIO and cv2.imencode to encode an image into a memory buffer.
I've also used a queue so the frames are enqueued and then HTTP requests are done in a separate threads.
import traceback
import cv2
from io import BytesIO
from queue import Queue
from threading import Thread
from requests import Session
URL = "http://example.com"
THREADS = 5
SAMPLE = "sample.mov"
class UploaderThread(Thread):
def __init__(self, q, s):
super().__init__()
self.q = q
self.s = s
def run(self):
for count, file in iter(self.q.get, "STOP"):
try:
r = self.s.post(URL, files={"upload": file})
except Exception:
traceback.print_exc()
else:
print(f"Frame ({count}): {r}")
def main():
cap = cv2.VideoCapture(SAMPLE)
q = Queue()
s = Session()
count = 0
threads = []
for _ in range(THREADS):
t = UploaderThread(q, s)
t.start()
threads.append(t)
while True:
ret, frame = cap.read()
count += 1
if not ret:
break
_, img = cv2.imencode(".png", frame)
q.put_nowait((count, BytesIO(img)))
for _ in range(THREADS):
q.put("STOP")
if __name__ == "__main__":
main()

How to use pyav or opencv to decode a live stream of raw H.264 data?

The data was received by socket ,with no more shell , they are pure I P B frames begin with NAL Header(something like 00 00 00 01). I am now using pyav to decode the frames ,but i can only decode the data after the second pps info(in key frame) was received(so the chunk of data I send to my decode thread can begin with pps and sps ), otherwise the decode() or demux() will return error "non-existing PPS 0 referenced decode_slice_header error" .
I want to feed data to a sustaining decoder which can remember the previous P frame , so after feeding one B frame, the decoder return a decoded video frame. Or someform of IO that can be opened as container and keep writing data into it by another thread.
Here is my key code:
#read thread... read until get a key frame, then make a new io.BytesIO() to store the new data.
rawFrames = io.BytesIO()
while flag_get_keyFrame:()
....
content= socket.recv(2048)
rawFrames.write(content)
....
#decode thread... decode content between two key frames
....
rawFrames.seek(0)
container = av.open(rawFrames)
for packet in container.demux():
for frame in packet.decode():
self.frames.append(frame)
....
My code will play the video but with a 3~4 seconds delay. So I am not putting all of it here, because I know it's not actually working for what I want to achieve.
I want to play the video after receiving the first key frame and decode the following frames right after receiving them . Pyav opencv ffmpeg or something else ,how can I achieve my goal?

After hours of finding an answer for this as well. I figure this out myself.
For single thread, you can do the following:
rawData = io.BytesIO()
container = av.open(rawData, format="h264", mode='r')
cur_pos = 0
while True:
data = await websocket.recv()
rawData.write(data)
rawData.seek(cur_pos)
for packet in container.demux():
if packet.size == 0:
continue
cur_pos += packet.size
for frame in packet.decode():
self.frames.append(frame)
That is the basic idea. I have worked out a generic version that has receiving thread and decoding thread separated. The code will also skip frames if the CPU does not keep up with the decoding speed and will start decoding from the next key frame (so you will not have the teared green screen effect). Here is the full version of the code:
import asyncio
import av
import cv2
import io
from multiprocessing import Process, Queue, Event
import time
import websockets
def display_frame(frame, start_time, pts_offset, frame_rate):
if frame.pts is not None:
play_time = (frame.pts - pts_offset) * frame.time_base.numerator / frame.time_base.denominator
if start_time is not None:
current_time = time.time() - start_time
time_diff = play_time - current_time
if time_diff > 1 / frame_rate:
return False
if time_diff > 0:
time.sleep(time_diff)
img = frame.to_ndarray(format='bgr24')
cv2.imshow('Video', img)
return True
def get_pts(frame):
return frame.pts
def render(terminated, data_queue):
rawData = io.BytesIO()
cur_pos = 0
frames_buffer = []
start_time = None
pts_offset = None
got_key_frame = False
while not terminated.is_set():
try:
data = data_queue.get_nowait()
except:
time.sleep(0.01)
continue
rawData.write(data)
rawData.seek(cur_pos)
if cur_pos == 0:
container = av.open(rawData, mode='r')
original_codec_ctx = container.streams.video[0].codec_context
codec = av.codec.CodecContext.create(original_codec_ctx.name, 'r')
cur_pos += len(data)
dts = None
for packet in container.demux():
if packet.size == 0:
continue
dts = packet.dts
if pts_offset is None:
pts_offset = packet.pts
if not got_key_frame and packet.is_keyframe:
got_key_frame = True
if data_queue.qsize() > 8 and not packet.is_keyframe:
got_key_frame = False
continue
if not got_key_frame:
continue
frames = codec.decode(packet)
if start_time is None:
start_time = time.time()
frames_buffer += frames
frames_buffer.sort(key=get_pts)
for frame in frames_buffer:
if display_frame(frame, start_time, pts_offset, codec.framerate):
frames_buffer.remove(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if dts is not None:
container.seek(25000)
rawData.seek(cur_pos)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
terminated.set()
cv2.destroyAllWindows()
async def receive_encoded_video(websocket, path):
data_queue = Queue()
terminated = Event()
p = Process(
target=render,
args=(terminated, data_queue)
)
p.start()
while not terminated.is_set():
try:
data = await websocket.recv()
except:
break
data_queue.put(data)
terminated.set()

Its normal getting 3~4 seconds delay because you are reading encoded data and decoding it takes time via on CPU.
If you have GPU hardware, you can use FFMPEG to decode H264 by GPU. Here is an example.
If you don't have a GPU, decoding H264 on CPU always will cause delays. You can use FFMPEG for effective decoding but this will also decrease total delay almost 10%

Speed up video playback from a networked device with camera or IP camera

I'm trying to stream video from my raspberry pi using flask api in python. So that I may process individual frames on my workstation. It is working fine as far as data delivery is concerned. However on client side the process of reading frames introduces a lag of 1-3 seconds that is undesirable in a real time application. I can view the video playback in my web browser without any latency that proves that my raspberry pi and network are innocent. The problem is with the method of reading individual frames from byte stream. Any thoughts about eliminating latency in such an application. Below is my code for client side application. Complete source to a sample application can be found here: https://github.com/shehzi-khan/video-streaming
import cv2
import urllib
import numpy as np
stream = urllib.urlopen('http://192.168.100.128:5000/video_feed')
bytes = ''
while True:
bytes += stream.read(1024)
a = bytes.find(b'\xff\xd8')
b = bytes.find(b'\xff\xd9')
if a != -1 and b != -1:
jpg = bytes[a:b+2]
bytes = bytes[b+2:]
img = cv2.imdecode(np.fromstring(jpg, dtype=np.uint8), cv2.IMREAD_COLOR)
cv2.imshow('Video', img)
if cv2.waitKey(1) == 27:
exit(0)

Main suggestions:
Search end-mark and then search start-mark
read more data (e.g. 64kb)
drop other frames and show only last
I can't test it, but here is general code:
import cv2
import urllib
import numpy as np
stream = urllib.urlopen('http://192.168.100.128:5000/video_feed')
bytes = ''
while True:
buff = stream.read(64 * 1024)
bytes += buff
if buff.rfind(b'\xff\xd9') != -1: # buff is smaller than bytes
endmark = bytes.rfind(b'\xff\xd9') + 2
startmark = bytes[:endmark - 2].rfind(b'\xff\xd8')
jpg = bytes[startmark:endmark] # please, check indexes! I could mess up with them.
bytes = bytes[endmark:]
img = cv2.imdecode(np.fromstring(jpg, dtype=np.uint8), cv2.IMREAD_COLOR)
cv2.imshow('Video', img)
if cv2.waitKey(1) == 27:
exit(0)
I can't find how stream.read behave. If he wait until buffer be full, than you need to decrease buffer size. If he just read N bytes OR until end of stream, than it will work.

How to parse mjpeg http stream from ip camera?

Given below is the code written for getting live stream from an IP Camera.
from cv2 import *
from cv2 import cv
import urllib
import numpy as np
k=0
capture=cv.CaptureFromFile("http://IPADDRESS of the camera/axis-cgi/mjpg/video.cgi")
namedWindow("Display",1)
while True:
frame=cv.QueryFrame(capture)
if frame is None:
print 'Cam not found'
break
else:
cv.ShowImage("Display", frame)
if k==0x1b:
print 'Esc. Exiting'
break
On running the code the output that I am getting is:
Cam not found
Where am I going wrong? Also, why is frame None here? Is there some problem with the conversion?

import cv2
import urllib
import numpy as np
stream = urllib.urlopen('http://localhost:8080/frame.mjpg')
bytes = ''
while True:
bytes += stream.read(1024)
a = bytes.find('\xff\xd8')
b = bytes.find('\xff\xd9')
if a != -1 and b != -1:
jpg = bytes[a:b+2]
bytes = bytes[b+2:]
i = cv2.imdecode(np.fromstring(jpg, dtype=np.uint8), cv2.CV_LOAD_IMAGE_COLOR)
cv2.imshow('i', i)
if cv2.waitKey(1) == 27:
exit(0)
edit (explanation)
I just saw that you mention that you have c++ code that is working, if that is the case your camera may work in python as well. The code above manually parses the mjpeg stream without relying on opencv, since in some of my projects the url will not be opened by opencv no matter what I did(c++,python).
Mjpeg over http is multipart/x-mixed-replace with boundary frame info and jpeg data is just sent in binary. So you don't really need to care about http protocol headers. All jpeg frames start with marker 0xff 0xd8 and end with 0xff 0xd9. So the code above extracts such frames from the http stream and decodes them one by one. like below.
...(http)
0xff 0xd8 --|
[jpeg data] |--this part is extracted and decoded
0xff 0xd9 --|
...(http)
0xff 0xd8 --|
[jpeg data] |--this part is extracted and decoded
0xff 0xd9 --|
...(http)
edit 2 (reading from mjpg file)
Regarding your question of saving the file, yes the file can be directly saved and reopened using the same method with very small modification. For example you would do curl http://IPCAM > output.mjpg
and then change the line stream=urllib.urlopen('http://localhost:8080/frame.mjpg')so that the code becomes this
import cv2
import urllib
import numpy as np
stream = open('output.mjpg', 'rb')
bytes = ''
while True:
bytes += stream.read(1024)
a = bytes.find('\xff\xd8')
b = bytes.find('\xff\xd9')
if a != -1 and b != -1:
jpg = bytes[a:b+2]
bytes = bytes[b+2:]
i = cv2.imdecode(np.fromstring(jpg, dtype=np.uint8), cv2.CV_LOAD_IMAGE_COLOR)
cv2.imshow('i', i)
if cv2.waitKey(1) == 27:
exit(0)
Of course you are saving a lot of redundant http headers, which you might want to strip away. Or if you have extra cpu power, maybe just encode to h264 first. But if the camera is adding some meta data to http header frames such as channel, timestamp, etc. Then it may be useful to keep them.
edit 3 (tkinter interfacing)
import cv2
import urllib
import numpy as np
import Tkinter
from PIL import Image, ImageTk
import threading
root = Tkinter.Tk()
image_label = Tkinter.Label(root)
image_label.pack()
def cvloop():
stream=open('output.mjpg', 'rb')
bytes = ''
while True:
bytes += stream.read(1024)
a = bytes.find('\xff\xd8')
b = bytes.find('\xff\xd9')
if a != -1 and b != -1:
jpg = bytes[a:b+2]
bytes = bytes[b+2:]
i = cv2.imdecode(np.fromstring(jpg, dtype=np.uint8), cv2.CV_LOAD_IMAGE_COLOR)
tki = ImageTk.PhotoImage(Image.fromarray(cv2.cvtColor(i, cv2.COLOR_BGR2RGB)))
image_label.configure(image=tki)
image_label._backbuffer_ = tki #avoid flicker caused by premature gc
cv2.imshow('i', i)
if cv2.waitKey(1) == 27:
exit(0)
thread = threading.Thread(target=cvloop)
thread.start()
root.mainloop()

First of all, please be aware that you should first try simply using OpenCV's video capture functions directly, e.g. cv2.VideoCapture('http://localhost:8080/frame.mjpg')!
This works just fine for me:
import cv2
cap = cv2.VideoCapture('http://localhost:8080/frame.mjpg')
while True:
ret, frame = cap.read()
cv2.imshow('Video', frame)
if cv2.waitKey(1) == 27:
exit(0)
Anyways, here is Zaw Lin's solution ported to OpenCV 3 (only change is cv2.CV_LOAD_IMAGE_COLOR to cv2.IMREAD_COLOR and Python 3 (string vs byte handling changed plus urllib):
import cv2
import urllib.request
import numpy as np
stream = urllib.request.urlopen('http://localhost:8080/frame.mjpg')
bytes = bytes()
while True:
bytes += stream.read(1024)
a = bytes.find(b'\xff\xd8')
b = bytes.find(b'\xff\xd9')
if a != -1 and b != -1:
jpg = bytes[a:b+2]
bytes = bytes[b+2:]
i = cv2.imdecode(np.fromstring(jpg, dtype=np.uint8), cv2.IMREAD_COLOR)
cv2.imshow('i', i)
if cv2.waitKey(1) == 27:
exit(0)

Here is an answer using the Python 3 requests module instead of urllib.
The reason for not using urllib is that it cannot correctly interpret a URL like http://user:pass#ipaddress:port
Adding authentication parameters is more complex in urllib than the requests module.
Here is a nice, concise solution using the requests module:
import cv2
import requests
import numpy as np
r = requests.get('http://192.168.1.xx/mjpeg.cgi', auth=('user', 'password'), stream=True)
if(r.status_code == 200):
bytes = bytes()
for chunk in r.iter_content(chunk_size=1024):
bytes += chunk
a = bytes.find(b'\xff\xd8')
b = bytes.find(b'\xff\xd9')
if a != -1 and b != -1:
jpg = bytes[a:b+2]
bytes = bytes[b+2:]
i = cv2.imdecode(np.fromstring(jpg, dtype=np.uint8), cv2.IMREAD_COLOR)
cv2.imshow('i', i)
if cv2.waitKey(1) == 27:
exit(0)
else:
print("Received unexpected status code {}".format(r.status_code))

I don't think the first anwser is fine with other format image data, eg png.
So I write the following code, which can handle other type of images
"""
MJPEG format
Content-Type: multipart/x-mixed-replace; boundary=--BoundaryString
--BoundaryString
Content-type: image/jpg
Content-Length: 12390
... image-data here ...
--BoundaryString
Content-type: image/jpg
Content-Length: 12390
... image-data here ...
"""
import io
import requests
import cv2
import numpy as np
class MjpegReader():
def __init__(self, url: str):
self._url = url
def iter_content(self):
"""
Raises:
RuntimeError
"""
r = requests.get(self._url, stream=True)
# parse boundary
content_type = r.headers['content-type']
index = content_type.rfind("boundary=")
assert index != 1
boundary = content_type[index+len("boundary="):] + "\r\n"
boundary = boundary.encode('utf-8')
rd = io.BufferedReader(r.raw)
while True:
self._skip_to_boundary(rd, boundary)
length = self._parse_length(rd)
yield rd.read(length)
def _parse_length(self, rd) -> int:
length = 0
while True:
line = rd.readline()
if line == b'\r\n':
return length
if line.startswith(b"Content-Length"):
length = int(line.decode('utf-8').split(": ")[1])
assert length > 0
def _skip_to_boundary(self, rd, boundary: bytes):
for _ in range(10):
if boundary in rd.readline():
break
else:
raise RuntimeError("Boundary not detected:", boundary)
mr = MjpegReader("http://127.0.0.1/mjpeg.cgi")
for content in mr.iter_content():
i = cv2.imdecode(np.frombuffer(content, dtype=np.uint8), cv2.IMREAD_COLOR)
cv2.imshow('i', i)
if cv2.waitKey(1) == 27:
break

I had the same problem.
The solution without requests or urllib: just add the user and password in the cam address, using VideoCapture, like this:
E.g.
cv2.VideoCapture('http://user:password#XXX.XXX.XXX.XXX/video')
using IPWebcam for android.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Decode h264 video frame from bytes received from a WebSocket - python

Related

Opening wav from URL in Python

Turn image array to io bytes object to simulare open with 'rb'

How to use pyav or opencv to decode a live stream of raw H.264 data?

Speed up video playback from a networked device with camera or IP camera

How to parse mjpeg http stream from ip camera?

Categories

Resources