Support for the Multiview extension of H.265/HEVC

- A fix for the parsing of the SPS is provided. This is because for NALUs associated with layers whose identifier is greater than 0 the spec requires the decoder to parse sps_ext_or_max_sub_layers_minus1 (see F.7.3.2.2.1). The value of this syntax element is then used to infer the value for sps_max_sub_layers_minus1 as specified in Clause F.7.4.3.2.1. The current design would led to have sps_max_sub_layers_minus1 equal to zero. This is fine for streams captured by devices such as Apple iPhones since the value for sps_ext_or_max_sub_layers_minus1 is set to 7 and the spec in this case mandates that sps_max_sub_layers_minus1 assumes the same value transmitted in the SPS. However, different encoders might be setting sps_ext_or_max_sub_layers_minus1 to a different value, thus also sps_max_sub_layers_minus1 will be different from the value parsed in the VPS. The fix proposed in av_parser.c::gf_hevc_read_sps_bs_internal addresses some discrepancy between the implementation and the spec. - Support is added to parse and embed in a .mov file the three dimension reference displays info SEI message from Clause G.14.2.3.3. Such a supplemental information is required to play Multiview content on devices such as the Apple Vision Pro. Having MP4box embedding such information would allow users to wrap Annex B HEVC streams into .mov containers.
gpac · jeanlf · Jul 3, 2024 · Jun 6, 2024 · Jun 6, 2024 · c05056d7e631619eb44bf7b90fb662d679ddebca
commit c05056d7e631619eb44bf7b90fb662d679ddebca
diff --git a/include/gpac/internal/media_dev.h b/include/gpac/internal/media_dev.h
@@ -572,6 +572,7 @@ typedef struct _hevc_state
  u8 clli_data[4];
  u8 mdcv_data[24];
  u8 clli_valid, mdcv_valid;
+ u8 has_3d_ref_disp_info;
 } HEVCState;
 
 typedef struct hevc_combine{

diff --git a/src/filters/reframe_nalu.c b/src/filters/reframe_nalu.c
@@ -155,7 +155,7 @@ typedef struct
  u32 nal_store_size, nal_store_alloc;
 
  //list of param sets found
- GF_List *sps, *pps, *vps, *sps_ext, *pps_svc, *vvc_aps_pre, *vvc_dci, *vvc_opi;
+ GF_List *sps, *pps, *vps, *sps_ext, *pps_svc, *vvc_aps_pre, *vvc_dci, *vvc_opi, *sei_prefix;
  //set to true if one of the PS has been modified, will potentially trigger a PID reconfigure
  Bool ps_modified;
  //set to true if one PS has been changed - if false and ps_modified is set, only new PS have been added
@@ -1196,6 +1196,16 @@ static Bool naludmx_create_hevc_decoder_config(GF_NALUDmxCtx *ctx, u8 **dsi, u32
  naludmx_add_param_nalu(layer_id ? lvcc->param_array : cfg->param_array, sl, GF_HEVC_NALU_PIC_PARAM);
  }
 
+ cfg = ctx->explicit ? lvcc : hvcc;
+ count = gf_list_count(ctx->sei_prefix);
+ for (i=0; i<count; i++) {
+ GF_NALUFFParam *sl = gf_list_get(ctx->sei_prefix, i);
+ layer_id = ((sl->data[0] & 0x1) << 5) | (sl->data[1] >> 3);
+ if (!layer_id) *has_hevc_base = GF_TRUE;
+ if (!ctx->analyze)
+ naludmx_add_param_nalu(layer_id ? lvcc->param_array : cfg->param_array, sl, GF_HEVC_NALU_SEI_PREFIX);
+ }
+
  *dsi = *dsi_enh = NULL;
  *dsi_size = *dsi_enh_size = 0;
 
@@ -2088,6 +2098,11 @@ static void naludmx_queue_param_set(GF_NALUDmxCtx *ctx, char *data, u32 size, u3
  list = ctx->pps;
  ctx->valid_ps_flags |= 1<<1;
  break;
+ case GF_HEVC_NALU_SEI_PREFIX:
+ if (!ctx->sei_prefix)
+ ctx->sei_prefix = gf_list_new();
+ list = ctx->sei_prefix;
+ break;
  default:
  gf_assert(0);
  return;
@@ -2487,6 +2502,9 @@ static s32 naludmx_parse_nal_hevc(GF_NALUDmxCtx *ctx, char *data, u32 size, Bool
  break;
  case GF_HEVC_NALU_SEI_PREFIX:
  gf_hevc_parse_sei(data, size, ctx->hevc_state);
+ if (ctx->hevc_state->has_3d_ref_disp_info) {
+ naludmx_queue_param_set(ctx, data, size, GF_HEVC_NALU_SEI_PREFIX, 0, temporal_id, layer_id);
+ }
  if (!ctx->nosei) {
  ctx->nb_sei++;
  naludmx_push_prefix(ctx, data, size, GF_FALSE);

diff --git a/src/media_tools/av_parsers.c b/src/media_tools/av_parsers.c
@@ -7521,7 +7521,9 @@ static void gf_hevc_vvc_parse_sei(char *buffer, u32 nal_size, HEVCState *hevc, V
  }
 
  nb_zeros = gf_bs_get_emulation_byte_removed(bs);
-
+ if (hevc) {
+ hevc->has_3d_ref_disp_info = 0;
+ }
  switch (ptype) {
  case 4: /*user registered ITU-T T35*/
  if (hevc) {
@@ -7554,6 +7556,12 @@ static void gf_hevc_vvc_parse_sei(char *buffer, u32 nal_size, HEVCState *hevc, V
  vvc->mdcv_valid = 1;
  }
  break;
+ // three_dimensional_reference_displays_info
+ case 176:
+ if (hevc) {
+ hevc->has_3d_ref_disp_info = 1;
+ }
+ break;
  default:
  break;
  }
@@ -8319,8 +8327,10 @@ static s32 gf_hevc_read_sps_bs_internal(GF_BitStream *bs, HEVCState *hevc, u8 la
  sps_ext_or_max_sub_layers_minus1 = 0;
  if (layer_id == 0)
  max_sub_layers_minus1 = gf_bs_read_int_log(bs, 3, "max_sub_layers_minus1");
- else
+ else {
  sps_ext_or_max_sub_layers_minus1 = gf_bs_read_int_log(bs, 3, "sps_ext_or_max_sub_layers_minus1");
+ max_sub_layers_minus1 = sps_ext_or_max_sub_layers_minus1 == 7 ? hevc->vps[vps_id].max_sub_layers - 1 : sps_ext_or_max_sub_layers_minus1;
+ }
  multiLayerExtSpsFlag = (layer_id != 0) && (sps_ext_or_max_sub_layers_minus1 == 7);
  if (!multiLayerExtSpsFlag) {
  gf_bs_read_int_log(bs, 1, "temporal_id_nesting_flag");