You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
lal/pkg/avc/avc.go

721 lines
18 KiB
Go

// Copyright 2019, Chef. All rights reserved.
// https://github.com/q191201771/lal
//
// Use of this source code is governed by a MIT-style license
// that can be found in the License file.
//
// Author: Chef (191201771@qq.com)
package avc
import (
"errors"
"io"
"github.com/q191201771/naza/pkg/bele"
"github.com/q191201771/naza/pkg/nazabits"
"github.com/q191201771/naza/pkg/nazalog"
)
// AnnexB:
// keywords: MPEG-2 transport stream, ElementaryStream(ES),
// nalu with start code.
// e.g. ts
//
// AVCC:
// keywords: AVC1, MPEG-4, extradata, sequence header, AVCDecoderConfigurationRecord
// nalu with length prefix.
// e.g. rtmp, flv
var ErrAVC = errors.New("lal.avc: fxxk")
var (
NALUStartCode3 = []byte{0x0, 0x0, 0x1}
NALUStartCode4 = []byte{0x0, 0x0, 0x0, 0x1}
)
var NALUTypeMapping = map[uint8]string{
1: "SLICE",
5: "IDR",
6: "SEI",
7: "SPS",
8: "PPS",
9: "AUD",
}
var SliceTypeMapping = map[uint8]string{
0: "P",
1: "B",
2: "I",
3: "SP",
4: "SI",
5: "P",
6: "B",
7: "I",
8: "SP",
9: "SI",
}
const (
NALUTypeSlice uint8 = 1
NALUTypeIDRSlice uint8 = 5
NALUTypeSEI uint8 = 6
NALUTypeSPS uint8 = 7
NALUTypePPS uint8 = 8
NALUTypeAUD uint8 = 9 // Access Unit Delimiter
)
const (
SliceTypeP uint8 = 0
SliceTypeB uint8 = 1
SliceTypeI uint8 = 2
SliceTypeSP uint8 = 3
SliceTypeSI uint8 = 4
)
type Context struct {
Profile uint8
Level uint8
Width uint32
Height uint32
}
// H.264-AVC-ISO_IEC_14496-15.pdf
// 5.2.4 Decoder configuration information
type DecoderConfigurationRecord struct {
ConfigurationVersion uint8
AVCProfileIndication uint8
ProfileCompatibility uint8
AVCLevelIndication uint8
LengthSizeMinusOne uint8
NumOfSPS uint8
SPSLength uint16
NumOfPPS uint8
PPSLength uint16
}
// ISO-14496-10.pdf
// 7.3.2.1 Sequence parameter set RBSP syntax
// 7.4.2.1 Sequence parameter set RBSP semantics
type SPS struct {
ProfileIdc uint8
ConstraintSet0Flag uint8
ConstraintSet1Flag uint8
ConstraintSet2Flag uint8
LevelIdc uint8
SPSId uint32
ChromaFormatIdc uint32
ResidualColorTransformFlag uint8
BitDepthLuma uint32
BitDepthChroma uint32
TransFormBypass uint8
Log2MaxFrameNumMinus4 uint32
PicOrderCntType uint32
Log2MaxPicOrderCntLsb uint32
NumRefFrames uint32 // num_ref_frames
GapsInFrameNumValueAllowedFlag uint8 // gaps_in_frame_num_value_allowed_flag
PicWidthInMbsMinusOne uint32 // pic_width_in_mbs_minus1
PicHeightInMapUnitsMinusOne uint32 // pic_height_in_map_units_minus1
FrameMbsOnlyFlag uint8 // frame_mbs_only_flag
MbAdaptiveFrameFieldFlag uint8 // mb_adaptive_frame_field_flag
Direct8X8InferenceFlag uint8 // direct_8x8_inference_flag
FrameCroppingFlag uint8 // frame_cropping_flag
FrameCropLeftOffset uint32 // frame_crop_left_offset
FrameCropRightOffset uint32 // frame_crop_right_offset
FrameCropTopOffset uint32 // frame_crop_top_offset
FrameCropBottomOffset uint32 // frame_crop_bottom_offset
}
func ParseNALUType(v uint8) uint8 {
return v & 0x1f
}
func ParseSliceType(nalu []byte) (uint8, error) {
if len(nalu) < 2 {
return 0, ErrAVC
}
br := nazabits.NewBitReader(nalu[1:])
// skip first_mb_in_slice
if _, err := br.ReadGolomb(); err != nil {
return 0, err
}
sliceType, err := br.ReadGolomb()
if err != nil {
return 0, err
}
// range: [0, 9]
if sliceType > 9 {
return 0, ErrAVC
}
if sliceType > 4 {
sliceType -= 5
}
return uint8(sliceType), nil
}
func ParseNALUTypeReadable(v uint8) string {
t := ParseNALUType(v)
ret, ok := NALUTypeMapping[t]
if !ok {
return "unknown"
}
return ret
}
func ParseSliceTypeReadable(nalu []byte) (string, error) {
naluType := ParseNALUType(nalu[0])
// 这些类型不属于视频帧数据类型没有slice type
switch naluType {
case NALUTypeSEI:
fallthrough
case NALUTypeSPS:
fallthrough
case NALUTypePPS:
return "", nil
}
t, err := ParseSliceType(nalu)
if err != nil {
return "unknown", err
}
ret, ok := SliceTypeMapping[t]
if !ok {
return "unknown", ErrAVC
}
return ret, nil
}
// AVCC Seq Header -> AnnexB
// 注意,返回的内存块为独立的内存块,不依赖指向传输参数<payload>内存块
//
func SPSPPSSeqHeader2AnnexB(payload []byte) ([]byte, error) {
sps, pps, err := ParseSPSPPSFromSeqHeader(payload)
if err != nil {
return nil, ErrAVC
}
var ret []byte
ret = append(ret, NALUStartCode4...)
ret = append(ret, sps...)
ret = append(ret, NALUStartCode4...)
ret = append(ret, pps...)
return ret, nil
}
// 从AVCC格式的Seq Header中得到SPS和PPS内存块
//
// @param <payload> rtmp message的payload部分或者flv tag的payload部分
// 注意包含了头部2字节类型以及3字节的cts
//
// @return 注意返回的spspps内存块指向的是传入参数<payload>内存块的内存
//
func ParseSPSPPSFromSeqHeader(payload []byte) (sps, pps []byte, err error) {
if len(payload) < 5 {
return nil, nil, ErrAVC
}
if payload[0] != 0x17 || payload[1] != 0x00 || payload[2] != 0 || payload[3] != 0 || payload[4] != 0 {
return nil, nil, ErrAVC
}
if len(payload) < 13 {
return nil, nil, ErrAVC
}
index := 10
numOfSPS := int(payload[index] & 0x1F)
index++
if numOfSPS != 1 {
return nil, nil, ErrAVC
}
spsLength := int(bele.BEUint16(payload[index:]))
index += 2
if len(payload) < 13+spsLength {
return nil, nil, ErrAVC
}
sps = payload[index : index+spsLength]
index += spsLength
if len(payload) < 16+spsLength {
return nil, nil, ErrAVC
}
numOfPPS := int(payload[index] & 0x1F)
index++
if numOfPPS != 1 {
return nil, nil, ErrAVC
}
ppsLength := int(bele.BEUint16(payload[index:]))
index += 2
if len(payload) < 16+spsLength+ppsLength {
return nil, nil, ErrAVC
}
pps = payload[index : index+ppsLength]
return
}
// 返回的内存块为新申请的独立内存块
func BuildSeqHeaderFromSPSPPS(sps, pps []byte) ([]byte, error) {
var sh []byte
sh = make([]byte, 16+len(sps)+len(pps))
sh[0] = 0x17
sh[1] = 0x0
sh[2] = 0x0
sh[3] = 0x0
sh[4] = 0x0
// H.264-AVC-ISO_IEC_14496-15.pdf
// 5.2.4 Decoder configuration information
sh[5] = 0x1 // configurationVersion
ctx, err := ParseSPS(sps)
if err != nil {
return nil, err
}
sh[6] = ctx.Profile // AVCProfileIndication
sh[7] = 0 // profile_compatibility
sh[8] = ctx.Level // AVCLevelIndication
sh[9] = 0xFF // lengthSizeMinusOne '111111'b | (4-1)
sh[10] = 0xE1 // numOfSequenceParameterSets '111'b | 1
sh[11] = uint8((len(sps) >> 8) & 0xFF) // sequenceParameterSetLength
sh[12] = uint8(len(sps) & 0xFF)
i := 13
copy(sh[i:], sps)
i += len(sps)
sh[i] = 0x1 // numOfPictureParameterSets 1
i++
sh[i] = uint8((len(pps) >> 8) & 0xFF) // sequenceParameterSetLength
sh[i+1] = uint8(len(pps) & 0xFF)
i += 2
copy(sh[i:], pps)
return sh, nil
}
// AVCC -> AnnexB
//
// @param <payload> rtmp message的payload部分或者flv tag的payload部分
// 注意包含了头部2字节类型以及3字节的cts
//
func CaptureAVCC2AnnexB(w io.Writer, payload []byte) error {
// sps pps
if payload[0] == 0x17 && payload[1] == 0x00 {
spspps, err := SPSPPSSeqHeader2AnnexB(payload)
if err != nil {
return err
}
_, _ = w.Write(spspps)
return nil
}
// payload中可能存在多个nalu
for i := 5; i != len(payload); {
naluLen := int(bele.BEUint32(payload[i:]))
i += 4
_, _ = w.Write(NALUStartCode4)
_, _ = w.Write(payload[i : i+naluLen])
i += naluLen
break
}
return nil
}
// TODO chef: hevc中ctx作为参数传入这里考虑统一一下
// 尝试解析SPS所有字段实验中请勿直接使用该函数
func ParseSPS(payload []byte) (Context, error) {
var sps SPS
br := nazabits.NewBitReader(payload)
t, err := br.ReadBits8(8) //nalType SPS should be 0x67
if err != nil {
return Context{}, err
}
_ = t
//if t != 0x67 {
// return Context{}, ErrAVC
//}
sps.ProfileIdc, err = br.ReadBits8(8)
if err != nil {
return Context{}, err
}
sps.ConstraintSet0Flag, err = br.ReadBits8(1)
if err != nil {
return Context{}, err
}
sps.ConstraintSet1Flag, err = br.ReadBits8(1)
if err != nil {
return Context{}, err
}
sps.ConstraintSet2Flag, err = br.ReadBits8(1)
if err != nil {
return Context{}, err
}
_, err = br.ReadBits8(5)
if err != nil {
return Context{}, err
}
sps.LevelIdc, err = br.ReadBits8(8)
if err != nil {
return Context{}, err
}
sps.SPSId, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
if sps.SPSId >= 32 {
return Context{}, ErrAVC
}
// 100 High profile
if sps.ProfileIdc == 100 {
sps.ChromaFormatIdc, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
if sps.ChromaFormatIdc > 3 {
return Context{}, ErrAVC
}
if sps.ChromaFormatIdc == 3 {
sps.ResidualColorTransformFlag, err = br.ReadBits8(1)
if err != nil {
return Context{}, err
}
}
sps.BitDepthLuma, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
sps.BitDepthLuma += 8
sps.BitDepthChroma, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
sps.BitDepthChroma += 8
if sps.BitDepthChroma != sps.BitDepthLuma || sps.BitDepthChroma < 8 || sps.BitDepthChroma > 14 {
return Context{}, ErrAVC
}
sps.TransFormBypass, err = br.ReadBits8(1)
if err != nil {
return Context{}, err
}
// seq scaling matrix present
flag, err := br.ReadBits8(1)
if err != nil {
return Context{}, err
}
if flag == 1 {
nazalog.Debugf("scaling matrix present.")
// TODO chef: 还没有正确实现只是针对特定case做了处理
_, err = br.ReadBits32(128)
if err != nil {
return Context{}, err
}
}
} else {
sps.ChromaFormatIdc = 1
sps.BitDepthLuma = 8
sps.BitDepthChroma = 8
}
sps.Log2MaxFrameNumMinus4, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
if sps.Log2MaxFrameNumMinus4 > 12 {
return Context{}, ErrAVC
}
sps.PicOrderCntType, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
if sps.PicOrderCntType == 0 {
sps.Log2MaxPicOrderCntLsb, err = br.ReadGolomb()
sps.Log2MaxPicOrderCntLsb += 4
} else if sps.PicOrderCntType == 2 {
// noop
} else {
nazalog.Debugf("not impl yet. sps.PicOrderCntType=%d", sps.PicOrderCntType)
return Context{}, ErrAVC
}
sps.NumRefFrames, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
sps.GapsInFrameNumValueAllowedFlag, err = br.ReadBits8(1)
if err != nil {
return Context{}, err
}
sps.PicWidthInMbsMinusOne, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
sps.PicHeightInMapUnitsMinusOne, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
sps.FrameMbsOnlyFlag, err = br.ReadBits8(1)
if err != nil {
return Context{}, err
}
if sps.FrameMbsOnlyFlag == 0 {
sps.MbAdaptiveFrameFieldFlag, err = br.ReadBits8(1)
if err != nil {
return Context{}, err
}
}
sps.Direct8X8InferenceFlag, err = br.ReadBits8(1)
if err != nil {
return Context{}, err
}
sps.FrameCroppingFlag, err = br.ReadBits8(1)
if err != nil {
return Context{}, err
}
if sps.FrameCroppingFlag == 1 {
sps.FrameCropLeftOffset, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
sps.FrameCropRightOffset, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
sps.FrameCropTopOffset, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
sps.FrameCropBottomOffset, err = br.ReadGolomb()
if err != nil {
return Context{}, err
}
}
// TODO parse sps vui parameters
//nazalog.Debugf("%+v", sps)
var ctx Context
ctx.Width = (sps.PicWidthInMbsMinusOne+1)*16 - (sps.FrameCropLeftOffset+sps.FrameCropRightOffset)*2
ctx.Height = (2-uint32(sps.FrameMbsOnlyFlag))*(sps.PicHeightInMapUnitsMinusOne+1)*16 - (sps.FrameCropTopOffset+sps.FrameCropBottomOffset)*2
ctx.Profile = sps.ProfileIdc
ctx.Level = sps.LevelIdc
//nazalog.Debugf("%+v", ctx)
return ctx, err
}
// 尝试解析PPS所有字段实验中请勿直接使用该函数
func TryParsePPS(payload []byte) error {
// ISO-14496-10.pdf
// 7.3.2.2 Picture parameter set RBSP syntax
// TODO impl me
return nil
}
// 尝试解析SeqHeader所有字段实验中请勿直接使用该函数
//
// @param <payload> rtmp message的payload部分或者flv tag的payload部分
// 注意包含了头部2字节类型以及3字节的cts
//
func TryParseSeqHeader(payload []byte) error {
if len(payload) < 5 {
return ErrAVC
}
if payload[0] != 0x17 || payload[1] != 0x00 || payload[2] != 0 || payload[3] != 0 || payload[4] != 0 {
return ErrAVC
}
// H.264-AVC-ISO_IEC_14496-15.pdf
// 5.2.4 Decoder configuration information
var dcr DecoderConfigurationRecord
var err error
br := nazabits.NewBitReader(payload[5:])
// TODO check error
dcr.ConfigurationVersion, err = br.ReadBits8(8)
dcr.AVCProfileIndication, err = br.ReadBits8(8)
dcr.ProfileCompatibility, err = br.ReadBits8(8)
dcr.AVCLevelIndication, err = br.ReadBits8(8)
_, err = br.ReadBits8(6) // reserved = '111111'b
dcr.LengthSizeMinusOne, err = br.ReadBits8(2)
_, err = br.ReadBits8(3) // reserved = '111'b
dcr.NumOfSPS, err = br.ReadBits8(5)
b, err := br.ReadBytes(2)
dcr.SPSLength = bele.BEUint16(b)
_, _ = br.ReadBytes(uint(dcr.SPSLength))
_, err = br.ReadBits8(3) // reserved = '111'b
dcr.NumOfPPS, err = br.ReadBits8(5)
b, err = br.ReadBytes(2)
dcr.PPSLength = bele.BEUint16(b)
nazalog.Debugf("%+v", dcr)
// 5 + 5 + 1 + 2
_, _ = ParseSPS(payload[13 : 13+dcr.SPSLength])
// 13 + 1 + 2
_ = TryParsePPS(payload[16 : 16+dcr.PPSLength])
return err
}
//var defaultScaling4 = [][]uint8{
// {
// 6, 13, 20, 28, 13, 20, 28, 32,
// 20, 28, 32, 37, 28, 32, 37, 42,
// },
// {
// 10, 14, 20, 24, 14, 20, 24, 27,
// 20, 24, 27, 30, 24, 27, 30, 34,
// },
//}
//
//var defaultScaling8 = [][]uint8{
// {
// 6, 10, 13, 16, 18, 23, 25, 27,
// 10, 11, 16, 18, 23, 25, 27, 29,
// 13, 16, 18, 23, 25, 27, 29, 31,
// 16, 18, 23, 25, 27, 29, 31, 33,
// 18, 23, 25, 27, 29, 31, 33, 36,
// 23, 25, 27, 29, 31, 33, 36, 38,
// 25, 27, 29, 31, 33, 36, 38, 40,
// 27, 29, 31, 33, 36, 38, 40, 42,
// },
// {
// 9, 13, 15, 17, 19, 21, 22, 24,
// 13, 13, 17, 19, 21, 22, 24, 25,
// 15, 17, 19, 21, 22, 24, 25, 27,
// 17, 19, 21, 22, 24, 25, 27, 28,
// 19, 21, 22, 24, 25, 27, 28, 30,
// 21, 22, 24, 25, 27, 28, 30, 32,
// 22, 24, 25, 27, 28, 30, 32, 33,
// 24, 25, 27, 28, 30, 32, 33, 35,
// },
//}
//
//var ffZigzagDirect = []uint8{
// 0, 1, 8, 16, 9, 2, 3, 10,
// 17, 24, 32, 25, 18, 11, 4, 5,
// 12, 19, 26, 33, 40, 48, 41, 34,
// 27, 20, 13, 6, 7, 14, 21, 28,
// 35, 42, 49, 56, 57, 50, 43, 36,
// 29, 22, 15, 23, 30, 37, 44, 51,
// 58, 59, 52, 45, 38, 31, 39, 46,
// 53, 60, 61, 54, 47, 55, 62, 63,
//}
//
//var ffZigzagScan = []uint8{
// 0 + 0*4, 1 + 0*4, 0 + 1*4, 0 + 2*4,
// 1 + 1*4, 2 + 0*4, 3 + 0*4, 2 + 1*4,
// 1 + 2*4, 0 + 3*4, 1 + 3*4, 2 + 2*4,
// 3 + 1*4, 3 + 2*4, 2 + 3*4, 3 + 3*4,
//}
//
//func decodeScalingMatrices(reader *nazabits.BitReader) error {
// // 6 * 16
// var spsScalingMatrix4 = [][]uint8{
// {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// }
// // 6 * 64
// var spsScalingMatrix8 = [][]uint8{
// {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
// }
//
// fallback := [][]uint8{defaultScaling4[0], defaultScaling4[1], defaultScaling8[0], defaultScaling8[1]}
// decodeScalingList(reader, spsScalingMatrix4[0], 16, defaultScaling4[0], fallback[0])
// decodeScalingList(reader, spsScalingMatrix4[1], 16, defaultScaling4[0], spsScalingMatrix4[0])
// decodeScalingList(reader, spsScalingMatrix4[2], 16, defaultScaling4[0], spsScalingMatrix4[1])
// decodeScalingList(reader, spsScalingMatrix4[3], 16, defaultScaling4[1], fallback[1])
// decodeScalingList(reader, spsScalingMatrix4[4], 16, defaultScaling4[1], spsScalingMatrix4[3])
// decodeScalingList(reader, spsScalingMatrix4[4], 16, defaultScaling4[1], spsScalingMatrix4[3])
//
// decodeScalingList(reader, spsScalingMatrix8[0], 64, defaultScaling8[0], fallback[2])
// decodeScalingList(reader, spsScalingMatrix8[3], 64, defaultScaling8[1], fallback[3])
//
// return nil
//}
//
//func decodeScalingList(reader *nazabits.BitReader, factors []uint8, size int, jvtList []uint8, fallbackList []uint8) error {
// var (
// i = 0
// last = 8
// next = 8
// scan []uint8
// )
// if size == 16 {
// scan = ffZigzagScan
// } else {
// scan = ffZigzagDirect
// }
// flag, err := reader.ReadBit()
// if err != nil {
// return err
// }
// return nil
// if flag == 0 {
// for n := 0; n < size; n++ {
// factors[n] = fallbackList[n]
// }
// } else {
// for i = 0; i < size; i++ {
// if next != 0 {
// v, err := reader.ReadGolomb()
// if err != nil {
// return err
// }
// next = (last + int(v)) & 0xff
// }
// if i == 0 && next == 0 {
// for n := 0; n < size; n++ {
// factors[n] = jvtList[n]
// }
// break
// }
// if next != 0 {
// factors[scan[i]] = uint8(next)
// last = next
// } else {
// factors[scan[i]] = uint8(last)
// }
// }
// }
// return nil
//}