Discussion:
[Libva-intel-driver][PATCH] Support AVC VDEnc on KBL
(too old to reply)
Xiang, Haihao
2016-12-23 16:58:04 UTC
Permalink
I verified AVC VDEnc on KBL with the HuC loading patch from
https://patchwork.freedesktop.org/api/1.0/series/16584/revisions/1/mbox/

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_vdenc.c | 231 +++++++++++++++++++++++++++++++++++++++++++++----
src/gen9_vdenc.h | 7 +-
src/i965_defines.h | 1 +
src/i965_device_info.c | 3 +
4 files changed, 224 insertions(+), 18 deletions(-)

diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 6402d41..1913a67 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -41,6 +41,9 @@
#include "intel_media.h"
#include "gen9_vdenc.h"

+extern int
+intel_avc_enc_slice_type_fixup(int slice_type);
+
static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
0, 0, -8, -12, -16, -20, -28, -36,
0, 0, -4, -8, -12, -16, -24, -32,
@@ -2050,6 +2053,7 @@ gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
}

pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
+ pstate->dw1.extended_pak_obj_cmd_enable = !!vdenc_context->use_extended_pak_obj_cmd;

pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;

@@ -2752,6 +2756,86 @@ gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
}

static void
+gen95_vdenc_vdecn_weihgtsoffsets_state(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ VAEncSliceParameterBufferH264 *slice_param)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+
+ BEGIN_BCS_BATCH(batch, 3);
+
+ OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (3 - 2));
+
+ if (pic_param->pic_fields.bits.weighted_pred_flag == 1) {
+ OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[1] << 24 |
+ slice_param->luma_weight_l0[1] << 16 |
+ slice_param->luma_offset_l0[0] << 8 |
+ slice_param->luma_weight_l0[0] << 0));
+ OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[2] << 8 |
+ slice_param->luma_weight_l0[2] << 0));
+ } else {
+ OUT_BCS_BATCH(batch, (0 << 24 |
+ 1 << 16 |
+ 0 << 8 |
+ 1 << 0));
+ OUT_BCS_BATCH(batch, (0 << 8 |
+ 1 << 0));
+ }
+
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen95_vdenc_vdenc_walker_state(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ VAEncSliceParameterBufferH264 *slice_param,
+ VAEncSliceParameterBufferH264 *next_slice_param)
+{
+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+ int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+ int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
+ int luma_log2_weight_denom, weighted_pred_idc;
+
+ slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
+ slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
+
+ if (next_slice_param) {
+ next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
+ next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
+ } else {
+ next_slice_hor_pos = 0;
+ next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
+ }
+
+ if (slice_type == SLICE_TYPE_P)
+ weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
+ else
+ weighted_pred_idc = 0;
+
+ if (weighted_pred_idc == 1)
+ luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
+ else
+ luma_log2_weight_denom = 0;
+
+ BEGIN_BCS_BATCH(batch, 4);
+
+ OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (4 - 2));
+ OUT_BCS_BATCH(batch, (slice_hor_pos << 16 |
+ slice_ver_pos));
+ OUT_BCS_BATCH(batch, (next_slice_hor_pos << 16 |
+ next_slice_ver_pos));
+ OUT_BCS_BATCH(batch, luma_log2_weight_denom);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
@@ -2766,9 +2850,6 @@ gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
ADVANCE_BCS_BATCH(batch);
}

-extern int
-intel_avc_enc_slice_type_fixup(int slice_type);
-
static void
gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
struct intel_encoder_context *encoder_context,
@@ -2802,7 +2883,8 @@ static void
gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context,
- int slice_index)
+ int slice_index,
+ unsigned int insert_one_zero_byte)
{
VAEncPackedHeaderParameterBuffer *param = NULL;
unsigned int length_in_bits;
@@ -2846,13 +2928,28 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
0,
!param->has_emulation_bytes,
0);
+
+ insert_one_zero_byte = 0;
+ }
+
+ /* Insert one zero byte before the slice header if no any other NAL unit is inserted, required on KBL */
+ if (insert_one_zero_byte) {
+ unsigned int insert_data[] = { 0, };
+
+ gen9_vdenc_mfx_avc_insert_object(ctx,
+ encoder_context,
+ insert_data,
+ 1,
+ 8,
+ 1,
+ 0, 0, 0, 0);
}

if (slice_header_index == -1) {
VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
- unsigned char *slice_header = NULL;
+ unsigned char *slice_header = NULL, *slice_header1 = NULL;
int slice_header_length_in_bits = 0;

/* No slice header data is passed. And the driver needs to generate it */
@@ -2861,9 +2958,17 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
pic_param,
slice_params,
&slice_header);
+
+ slice_header1 = slice_header;
+
+ if (insert_one_zero_byte) {
+ slice_header1 += 1;
+ slice_header_length_in_bits -= 8;
+ }
+
gen9_vdenc_mfx_avc_insert_object(ctx,
encoder_context,
- (unsigned int *)slice_header,
+ (unsigned int *)slice_header1,
ALIGN(slice_header_length_in_bits, 32) >> 5,
slice_header_length_in_bits & 0x1f,
5, /* first 5 bytes are start code + nal unit type */
@@ -2873,20 +2978,31 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
free(slice_header);
} else {
unsigned int skip_emul_byte_cnt;
+ unsigned char *slice_header1 = NULL;

header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;

param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
length_in_bits = param->bit_length;

+ slice_header1 = (unsigned char *)header_data;
+
+ if (insert_one_zero_byte) {
+ slice_header1 += 1;
+ length_in_bits -= 8;
+ }
+
/* as the slice header is the last header data for one slice,
* the last header flag is set to one.
*/
skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);

+ if (insert_one_zero_byte)
+ skip_emul_byte_cnt -= 1;
+
gen9_vdenc_mfx_avc_insert_object(ctx,
encoder_context,
- header_data,
+ (unsigned int *)slice_header1,
ALIGN(length_in_bits, 32) >> 5,
length_in_bits & 0x1f,
skip_emul_byte_cnt,
@@ -2910,8 +3026,11 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
unsigned int skip_emul_byte_cnt;
+ unsigned int insert_one_zero_byte = 0;

if (slice_index == 0) {
+ insert_one_zero_byte = 1;
+
if (encode_state->packed_header_data[idx]) {
VAEncPackedHeaderParameterBuffer *param = NULL;
unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
@@ -2932,6 +3051,8 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
0,
!param->has_emulation_bytes,
0);
+
+ insert_one_zero_byte = 0;
}

idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
@@ -2957,6 +3078,8 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
0,
!param->has_emulation_bytes,
0);
+
+ insert_one_zero_byte = 0;
}

idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
@@ -2981,15 +3104,21 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
0,
!param->has_emulation_bytes,
0);
+
+ insert_one_zero_byte = 0;
} else if (internal_rate_mode == I965_BRC_CBR) {
/* TODO: insert others */
}
}

+ if (vdenc_context->is_frame_level_vdenc)
+ insert_one_zero_byte = 0;
+
gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
encode_state,
encoder_context,
- slice_index);
+ slice_index,
+ insert_one_zero_byte);
}

static void
@@ -3233,6 +3362,7 @@ gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
VAEncSliceParameterBufferH264 *next_slice_param,
int slice_index)
{
+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;

gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
@@ -3252,6 +3382,18 @@ gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
encoder_context,
slice_param,
slice_index);
+
+ if (!vdenc_context->is_frame_level_vdenc) {
+ gen95_vdenc_vdecn_weihgtsoffsets_state(ctx,
+ encode_state,
+ encoder_context,
+ slice_param);
+ gen95_vdenc_vdenc_walker_state(ctx,
+ encode_state,
+ encoder_context,
+ slice_param,
+ next_slice_param);
+ }
}

static void
@@ -3259,12 +3401,12 @@ gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
struct intel_batchbuffer *batch = encoder_context->base.batch;
struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
int i, j;
int slice_index = 0;
- int is_frame_level_vdenc = 1; /* TODO: check it for SKL */
int has_tail = 0; /* TODO: check it later */

for (j = 0; j < encode_state->num_slice_params_ext; j++) {
@@ -3287,22 +3429,47 @@ gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
slice_param,
next_slice_param,
slice_index);
- slice_param++;
- slice_index++;

- if (is_frame_level_vdenc)
+ if (vdenc_context->is_frame_level_vdenc)
break;
else {
- /* TODO: remove assert(0) and add other commands here */
- assert(0);
+ struct vd_pipeline_flush_parameter pipeline_flush_params;
+ int insert_mi_flush;
+
+ memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
+
+ if (next_slice_group_param) {
+ pipeline_flush_params.mfx_pipeline_done = 0;
+ insert_mi_flush = 1;
+ } else if (i < encode_state->slice_params_ext[j]->num_elements - 1) {
+ pipeline_flush_params.mfx_pipeline_done = 0;
+ insert_mi_flush = 1;
+ } else {
+ pipeline_flush_params.mfx_pipeline_done = !has_tail;
+ insert_mi_flush = 0;
+ }
+
+ pipeline_flush_params.vdenc_pipeline_done = 1;
+ pipeline_flush_params.vdenc_pipeline_command_flush = 1;
+ pipeline_flush_params.vd_command_message_parser_done = 1;
+ gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
+
+ if (insert_mi_flush) {
+ memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
+ mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+ }
}
+
+ slice_param++;
+ slice_index++;
}

- if (is_frame_level_vdenc)
+ if (vdenc_context->is_frame_level_vdenc)
break;
}

- if (is_frame_level_vdenc) {
+ if (vdenc_context->is_frame_level_vdenc) {
struct vd_pipeline_flush_parameter pipeline_flush_params;

gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
@@ -3653,6 +3820,36 @@ gen9_vdenc_allocate_resources(VADriverContextP ctx,
"HuC Status buffer");
}

+static void
+gen9_vdenc_hw_interfaces_init(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ struct gen9_vdenc_context *vdenc_context)
+{
+ vdenc_context->is_frame_level_vdenc = 1;
+}
+
+static void
+gen95_vdenc_hw_interfaces_init(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ struct gen9_vdenc_context *vdenc_context)
+{
+ vdenc_context->use_extended_pak_obj_cmd = 1;
+}
+
+static void
+vdenc_hw_interfaces_init(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ struct gen9_vdenc_context *vdenc_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ if (IS_KBL(i965->intel.device_info)) {
+ gen95_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
+ } else {
+ gen9_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
+ }
+}
+
static VAStatus
gen9_vdenc_context_get_status(VADriverContextP ctx,
struct intel_encoder_context *encoder_context,
@@ -3680,7 +3877,9 @@ gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *enco
vdenc_context->num_passes = 1;
vdenc_context->vdenc_streamin_enable = 0;
vdenc_context->vdenc_pak_threshold_check_enable = 0;
+ vdenc_context->is_frame_level_vdenc = 0;

+ vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);

encoder_context->mfc_context = vdenc_context;
diff --git a/src/gen9_vdenc.h b/src/gen9_vdenc.h
index 41e4362..ad0f2ae 100644
--- a/src/gen9_vdenc.h
+++ b/src/gen9_vdenc.h
@@ -304,7 +304,8 @@ struct gen9_vdenc_img_state
uint32_t bidirectional_mix_disable:1;
uint32_t pad1:1;
uint32_t time_budget_overflow_check:1;
- uint32_t pad2:2;
+ uint32_t pad2:1;
+ uint32_t extended_pak_obj_cmd_enable:1;
uint32_t transform_8x8_flag:1;
uint32_t vdenc_l1_cache_priority:2;
uint32_t pad3:22;
@@ -776,7 +777,9 @@ struct gen9_vdenc_context
uint32_t frame_type:2;

uint32_t mb_brc_enabled:1;
- uint32_t pad0:31;
+ uint32_t is_frame_level_vdenc:1;
+ uint32_t use_extended_pak_obj_cmd:1;
+ uint32_t pad0:29;

struct i965_gpe_resource brc_init_reset_dmem_res;
struct i965_gpe_resource brc_history_buffer_res;
diff --git a/src/i965_defines.h b/src/i965_defines.h
index f86ac8e..941ad4e 100755
--- a/src/i965_defines.h
+++ b/src/i965_defines.h
@@ -959,6 +959,7 @@
#define VDENC_IMG_STATE VDENC(1, 0, 5)
#define VDENC_CONST_QPT_STATE VDENC(1, 0, 6)
#define VDENC_WALKER_STATE VDENC(1, 0, 7)
+#define VDENC_WEIGHTSOFFSETS_STATE VDENC(1, 0, 8)

#define VDENC_CODEC_AVC 2

diff --git a/src/i965_device_info.c b/src/i965_device_info.c
index 73602ae..0fc8930 100644
--- a/src/i965_device_info.c
+++ b/src/i965_device_info.c
@@ -481,6 +481,9 @@ static struct hw_codec_info kbl_hw_codec_info = {
.has_vp9_decoding = 1,
.has_vpp_p010 = 1,
.has_vp9_encoding = 1,
+ .has_lp_h264_encoding = 1,
+
+ .lp_h264_brc_mode = VA_RC_CQP,

.num_filters = 5,
.filters = {
--
1.9.1
Kelley, Sean V
2016-12-27 17:21:03 UTC
Permalink
On 12/23/16, 8:58 AM, "Libva on behalf of Xiang, Haihao" <libva-***@lists.freedesktop.org on behalf of ***@intel.com> wrote:

I verified AVC VDEnc on KBL with the HuC loading patch from
https://patchwork.freedesktop.org/api/1.0/series/16584/revisions/1/mbox/

lgtm

Sean

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_vdenc.c | 231 +++++++++++++++++++++++++++++++++++++++++++++----
src/gen9_vdenc.h | 7 +-
src/i965_defines.h | 1 +
src/i965_device_info.c | 3 +
4 files changed, 224 insertions(+), 18 deletions(-)

diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 6402d41..1913a67 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -41,6 +41,9 @@
#include "intel_media.h"
#include "gen9_vdenc.h"

+extern int
+intel_avc_enc_slice_type_fixup(int slice_type);
+
static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
0, 0, -8, -12, -16, -20, -28, -36,
0, 0, -4, -8, -12, -16, -24, -32,
@@ -2050,6 +2053,7 @@ gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
}

pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
+ pstate->dw1.extended_pak_obj_cmd_enable = !!vdenc_context->use_extended_pak_obj_cmd;

pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;

@@ -2752,6 +2756,86 @@ gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
}

static void
+gen95_vdenc_vdecn_weihgtsoffsets_state(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ VAEncSliceParameterBufferH264 *slice_param)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+
+ BEGIN_BCS_BATCH(batch, 3);
+
+ OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (3 - 2));
+
+ if (pic_param->pic_fields.bits.weighted_pred_flag == 1) {
+ OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[1] << 24 |
+ slice_param->luma_weight_l0[1] << 16 |
+ slice_param->luma_offset_l0[0] << 8 |
+ slice_param->luma_weight_l0[0] << 0));
+ OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[2] << 8 |
+ slice_param->luma_weight_l0[2] << 0));
+ } else {
+ OUT_BCS_BATCH(batch, (0 << 24 |
+ 1 << 16 |
+ 0 << 8 |
+ 1 << 0));
+ OUT_BCS_BATCH(batch, (0 << 8 |
+ 1 << 0));
+ }
+
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen95_vdenc_vdenc_walker_state(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ VAEncSliceParameterBufferH264 *slice_param,
+ VAEncSliceParameterBufferH264 *next_slice_param)
+{
+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+ int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+ int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
+ int luma_log2_weight_denom, weighted_pred_idc;
+
+ slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
+ slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
+
+ if (next_slice_param) {
+ next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
+ next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
+ } else {
+ next_slice_hor_pos = 0;
+ next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
+ }
+
+ if (slice_type == SLICE_TYPE_P)
+ weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
+ else
+ weighted_pred_idc = 0;
+
+ if (weighted_pred_idc == 1)
+ luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
+ else
+ luma_log2_weight_denom = 0;
+
+ BEGIN_BCS_BATCH(batch, 4);
+
+ OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (4 - 2));
+ OUT_BCS_BATCH(batch, (slice_hor_pos << 16 |
+ slice_ver_pos));
+ OUT_BCS_BATCH(batch, (next_slice_hor_pos << 16 |
+ next_slice_ver_pos));
+ OUT_BCS_BATCH(batch, luma_log2_weight_denom);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
@@ -2766,9 +2850,6 @@ gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
ADVANCE_BCS_BATCH(batch);
}

-extern int
-intel_avc_enc_slice_type_fixup(int slice_type);
-
static void
gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
struct intel_encoder_context *encoder_context,
@@ -2802,7 +2883,8 @@ static void
gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context,
- int slice_index)
+ int slice_index,
+ unsigned int insert_one_zero_byte)
{
VAEncPackedHeaderParameterBuffer *param = NULL;
unsigned int length_in_bits;
@@ -2846,13 +2928,28 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
0,
!param->has_emulation_bytes,
0);
+
+ insert_one_zero_byte = 0;
+ }
+
+ /* Insert one zero byte before the slice header if no any other NAL unit is inserted, required on KBL */
+ if (insert_one_zero_byte) {
+ unsigned int insert_data[] = { 0, };
+
+ gen9_vdenc_mfx_avc_insert_object(ctx,
+ encoder_context,
+ insert_data,
+ 1,
+ 8,
+ 1,
+ 0, 0, 0, 0);
}

if (slice_header_index == -1) {
VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
- unsigned char *slice_header = NULL;
+ unsigned char *slice_header = NULL, *slice_header1 = NULL;
int slice_header_length_in_bits = 0;

/* No slice header data is passed. And the driver needs to generate it */
@@ -2861,9 +2958,17 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
pic_param,
slice_params,
&slice_header);
+
+ slice_header1 = slice_header;
+
+ if (insert_one_zero_byte) {
+ slice_header1 += 1;
+ slice_header_length_in_bits -= 8;
+ }
+
gen9_vdenc_mfx_avc_insert_object(ctx,
encoder_context,
- (unsigned int *)slice_header,
+ (unsigned int *)slice_header1,
ALIGN(slice_header_length_in_bits, 32) >> 5,
slice_header_length_in_bits & 0x1f,
5, /* first 5 bytes are start code + nal unit type */
@@ -2873,20 +2978,31 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
free(slice_header);
} else {
unsigned int skip_emul_byte_cnt;
+ unsigned char *slice_header1 = NULL;

header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;

param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
length_in_bits = param->bit_length;

+ slice_header1 = (unsigned char *)header_data;
+
+ if (insert_one_zero_byte) {
+ slice_header1 += 1;
+ length_in_bits -= 8;
+ }
+
/* as the slice header is the last header data for one slice,
* the last header flag is set to one.
*/
skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);

+ if (insert_one_zero_byte)
+ skip_emul_byte_cnt -= 1;
+
gen9_vdenc_mfx_avc_insert_object(ctx,
encoder_context,
- header_data,
+ (unsigned int *)slice_header1,
ALIGN(length_in_bits, 32) >> 5,
length_in_bits & 0x1f,
skip_emul_byte_cnt,
@@ -2910,8 +3026,11 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
unsigned int skip_emul_byte_cnt;
+ unsigned int insert_one_zero_byte = 0;

if (slice_index == 0) {
+ insert_one_zero_byte = 1;
+
if (encode_state->packed_header_data[idx]) {
VAEncPackedHeaderParameterBuffer *param = NULL;
unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
@@ -2932,6 +3051,8 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
0,
!param->has_emulation_bytes,
0);
+
+ insert_one_zero_byte = 0;
}

idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
@@ -2957,6 +3078,8 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
0,
!param->has_emulation_bytes,
0);
+
+ insert_one_zero_byte = 0;
}

idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
@@ -2981,15 +3104,21 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
0,
!param->has_emulation_bytes,
0);
+
+ insert_one_zero_byte = 0;
} else if (internal_rate_mode == I965_BRC_CBR) {
/* TODO: insert others */
}
}

+ if (vdenc_context->is_frame_level_vdenc)
+ insert_one_zero_byte = 0;
+
gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
encode_state,
encoder_context,
- slice_index);
+ slice_index,
+ insert_one_zero_byte);
}

static void
@@ -3233,6 +3362,7 @@ gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
VAEncSliceParameterBufferH264 *next_slice_param,
int slice_index)
{
+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;

gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
@@ -3252,6 +3382,18 @@ gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
encoder_context,
slice_param,
slice_index);
+
+ if (!vdenc_context->is_frame_level_vdenc) {
+ gen95_vdenc_vdecn_weihgtsoffsets_state(ctx,
+ encode_state,
+ encoder_context,
+ slice_param);
+ gen95_vdenc_vdenc_walker_state(ctx,
+ encode_state,
+ encoder_context,
+ slice_param,
+ next_slice_param);
+ }
}

static void
@@ -3259,12 +3401,12 @@ gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
struct intel_batchbuffer *batch = encoder_context->base.batch;
struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
int i, j;
int slice_index = 0;
- int is_frame_level_vdenc = 1; /* TODO: check it for SKL */
int has_tail = 0; /* TODO: check it later */

for (j = 0; j < encode_state->num_slice_params_ext; j++) {
@@ -3287,22 +3429,47 @@ gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
slice_param,
next_slice_param,
slice_index);
- slice_param++;
- slice_index++;

- if (is_frame_level_vdenc)
+ if (vdenc_context->is_frame_level_vdenc)
break;
else {
- /* TODO: remove assert(0) and add other commands here */
- assert(0);
+ struct vd_pipeline_flush_parameter pipeline_flush_params;
+ int insert_mi_flush;
+
+ memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
+
+ if (next_slice_group_param) {
+ pipeline_flush_params.mfx_pipeline_done = 0;
+ insert_mi_flush = 1;
+ } else if (i < encode_state->slice_params_ext[j]->num_elements - 1) {
+ pipeline_flush_params.mfx_pipeline_done = 0;
+ insert_mi_flush = 1;
+ } else {
+ pipeline_flush_params.mfx_pipeline_done = !has_tail;
+ insert_mi_flush = 0;
+ }
+
+ pipeline_flush_params.vdenc_pipeline_done = 1;
+ pipeline_flush_params.vdenc_pipeline_command_flush = 1;
+ pipeline_flush_params.vd_command_message_parser_done = 1;
+ gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
+
+ if (insert_mi_flush) {
+ memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
+ mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+ }
}
+
+ slice_param++;
+ slice_index++;
}

- if (is_frame_level_vdenc)
+ if (vdenc_context->is_frame_level_vdenc)
break;
}

- if (is_frame_level_vdenc) {
+ if (vdenc_context->is_frame_level_vdenc) {
struct vd_pipeline_flush_parameter pipeline_flush_params;

gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
@@ -3653,6 +3820,36 @@ gen9_vdenc_allocate_resources(VADriverContextP ctx,
"HuC Status buffer");
}

+static void
+gen9_vdenc_hw_interfaces_init(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ struct gen9_vdenc_context *vdenc_context)
+{
+ vdenc_context->is_frame_level_vdenc = 1;
+}
+
+static void
+gen95_vdenc_hw_interfaces_init(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ struct gen9_vdenc_context *vdenc_context)
+{
+ vdenc_context->use_extended_pak_obj_cmd = 1;
+}
+
+static void
+vdenc_hw_interfaces_init(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ struct gen9_vdenc_context *vdenc_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+ if (IS_KBL(i965->intel.device_info)) {
+ gen95_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
+ } else {
+ gen9_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
+ }
+}
+
static VAStatus
gen9_vdenc_context_get_status(VADriverContextP ctx,
struct intel_encoder_context *encoder_context,
@@ -3680,7 +3877,9 @@ gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *enco
vdenc_context->num_passes = 1;
vdenc_context->vdenc_streamin_enable = 0;
vdenc_context->vdenc_pak_threshold_check_enable = 0;
+ vdenc_context->is_frame_level_vdenc = 0;

+ vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);

encoder_context->mfc_context = vdenc_context;
diff --git a/src/gen9_vdenc.h b/src/gen9_vdenc.h
index 41e4362..ad0f2ae 100644
--- a/src/gen9_vdenc.h
+++ b/src/gen9_vdenc.h
@@ -304,7 +304,8 @@ struct gen9_vdenc_img_state
uint32_t bidirectional_mix_disable:1;
uint32_t pad1:1;
uint32_t time_budget_overflow_check:1;
- uint32_t pad2:2;
+ uint32_t pad2:1;
+ uint32_t extended_pak_obj_cmd_enable:1;
uint32_t transform_8x8_flag:1;
uint32_t vdenc_l1_cache_priority:2;
uint32_t pad3:22;
@@ -776,7 +777,9 @@ struct gen9_vdenc_context
uint32_t frame_type:2;

uint32_t mb_brc_enabled:1;
- uint32_t pad0:31;
+ uint32_t is_frame_level_vdenc:1;
+ uint32_t use_extended_pak_obj_cmd:1;
+ uint32_t pad0:29;

struct i965_gpe_resource brc_init_reset_dmem_res;
struct i965_gpe_resource brc_history_buffer_res;
diff --git a/src/i965_defines.h b/src/i965_defines.h
index f86ac8e..941ad4e 100755
--- a/src/i965_defines.h
+++ b/src/i965_defines.h
@@ -959,6 +959,7 @@
#define VDENC_IMG_STATE VDENC(1, 0, 5)
#define VDENC_CONST_QPT_STATE VDENC(1, 0, 6)
#define VDENC_WALKER_STATE VDENC(1, 0, 7)
+#define VDENC_WEIGHTSOFFSETS_STATE VDENC(1, 0, 8)

#define VDENC_CODEC_AVC 2

diff --git a/src/i965_device_info.c b/src/i965_device_info.c
index 73602ae..0fc8930 100644
--- a/src/i965_device_info.c
+++ b/src/i965_device_info.c
@@ -481,6 +481,9 @@ static struct hw_codec_info kbl_hw_codec_info = {
.has_vp9_decoding = 1,
.has_vpp_p010 = 1,
.has_vp9_encoding = 1,
+ .has_lp_h264_encoding = 1,
+
+ .lp_h264_brc_mode = VA_RC_CQP,

.num_filters = 5,
.filters = {
--
1.9.1

_______________________________________________
Libva mailing list
***@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/libva

Loading...