Discussion:
[Libva-intel-driver][PATCH 00/17] Fixes for GPE utility
(too old to reply)
Xiang, Haihao
2016-11-17 08:34:59 UTC
Permalink
The patch series cleans up some gpe settings, renames some gen9 fucntions for gen8
and addes some new gpe functions for gen8.

Xiang, Haihao (17):
Change the size calculation of the required memory for all GPE kernels
Move all curbe related settings to the inner structure in
i965_gpe_context
Move interface descriptor remapping table related settings to the
inner structure in i965_gpe_context
Move sampler state related settings to the inner structure in
i965_gpe_context
Fix the size calculation of the required memory for dynamic state
buffer
Fix curbe length in CMD_MEDIA_CURBE_LOAD on GEN8+
Rename gen8p_gpe_context_map_curbe()/gen8p_gpe_context_unmap_curbe()
to i965_gpe_context_map_curbe()/i965_gpe_context_unmap_curbe()
Rename i965_gpe_dri_object_to_2d_gpe_resource() to
i965_dri_object_to_2d_gpe_resource()
Rename gen9_gpe_media_object_walker() to
gen8_gpe_media_object_walker()
Rename gen9_gpe_mi_batch_buffer_start() to
gen8_gpe_mi_batch_buffer_start()
Rename gen9_gpe_mi_store_data_imm() to gen8_gpe_mi_store_data_imm()
Rename gen9_gpe_mi_flush_dw() to gen8_gpe_mi_flush_dw()
Rename gen9_gpe_mi_store_register_mem() to
gen8_gpe_mi_load_register_mem()
Add a new gpe function gen8_gpe_reset_binding_table() to reset binging
table
Add a new gpe function gen8_gpe_context_add_surface() to set surface
state on GEN8
Add a new gpe function gen8_gpe_mi_conditional_batch_buffer_end() for
GEN8
Add a new gpe function gen8_gpe_pipe_control() for GEN8

src/gen75_vpp_gpe.c | 12 +-
src/gen8_mfc.c | 12 +-
src/gen8_vme.c | 22 +--
src/gen9_post_processing.c | 27 +--
src/gen9_vdenc.c | 26 +--
src/gen9_vme.c | 23 +--
src/gen9_vp9_encoder.c | 85 +++++----
src/i965_gpe_utils.c | 440 +++++++++++++++++++++++++++++++++++++++++----
src/i965_gpe_utils.h | 73 +++++---
src/intel_driver.h | 6 +
10 files changed, 568 insertions(+), 158 deletions(-)
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:01 UTC
Permalink
To avoid confusion between curbe.length and curbe_size, this patch uses
curbe.length only. curbe.bo is always set even if curbe is a part of the
dynamic state buffer, hence we can use curbe related settings no matter
it is a part of the dynamic state buffer or not.

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen75_vpp_gpe.c | 2 +-
src/gen8_mfc.c | 2 +-
src/gen8_vme.c | 12 ++++++------
src/gen9_post_processing.c | 3 +--
src/gen9_vme.c | 12 ++++++------
src/gen9_vp9_encoder.c | 22 ++++++++++------------
src/i965_gpe_utils.c | 28 +++++++++++++++++++---------
src/i965_gpe_utils.h | 3 +--
8 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c
index 9850c1c..2cddb5a 100644
--- a/src/gen75_vpp_gpe.c
+++ b/src/gen75_vpp_gpe.c
@@ -890,7 +890,7 @@ vpp_gpe_context_init(VADriverContextP ctx)
gpe_ctx->surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

- gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;

}
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 63ffea5..3ed9e84 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4609,7 +4609,7 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

mfc_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- mfc_context->gpe_context.curbe_size = 32 * 4;
+ mfc_context->gpe_context.curbe.length = 32 * 4;
mfc_context->gpe_context.sampler_size = 0;

mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index c79c62b..96835bf 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -389,10 +389,10 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,

vme_state_message[31] = mv_num;

- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo->virtual);
- constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
- vme_context->gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context->gpe_context.curbe.bo->virtual +
+ vme_context->gpe_context.curbe.offset;

/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is constructed directly
@@ -400,7 +400,7 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
*/
memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);

- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);

return VA_STATUS_SUCCESS;
}
@@ -1379,7 +1379,7 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;


diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index a5d345c..71da501 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -538,8 +538,7 @@ gen9_post_processing_context_init(VADriverContextP ctx,
gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
gpe_context->idrt_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
- gpe_context->curbe_size = ALIGN(sizeof(struct scaling_input_parameter), 64);
- gpe_context->curbe.length = gpe_context->curbe_size;
+ gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);

gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
gpe_context->surface_state_binding_table.binding_table_offset = 0;
diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index 6ad8fff..a59fe2a 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -438,10 +438,10 @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,

vme_state_message[31] = mv_num;

- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo->virtual);
- constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
- vme_context->gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context->gpe_context.curbe.bo->virtual +
+ vme_context->gpe_context.curbe.offset;

/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is constructed directly
@@ -449,7 +449,7 @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
*/
memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);

- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);

return VA_STATUS_SUCCESS;
}
@@ -2032,7 +2032,7 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;


diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index f39d6d0..5ad7b26 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1820,18 +1820,18 @@ gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
/* 4. Mbenc curbe input buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context->dynamic_state.bo,
+ mbenc_gpe_context->curbe.bo,
0,
- ALIGN(mbenc_gpe_context->curbe_size, 64),
- mbenc_gpe_context->curbe_offset,
+ ALIGN(mbenc_gpe_context->curbe.length, 64),
+ mbenc_gpe_context->curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
/* 5. Mbenc curbe output buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context->dynamic_state.bo,
+ mbenc_gpe_context->curbe.bo,
0,
- ALIGN(mbenc_gpe_context->curbe_size, 64),
- mbenc_gpe_context->curbe_offset,
+ ALIGN(mbenc_gpe_context->curbe.length, 64),
+ mbenc_gpe_context->curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);

/* 6. BRC_PIC_STATE read buffer */
@@ -3289,10 +3289,10 @@ gen9_vp9_send_mbenc_surface(VADriverContextP ctx,

gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param->gpe_context_tx->dynamic_state.bo,
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param->gpe_context_tx->curbe_offset,
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CURBE_G9);

break;
@@ -3441,10 +3441,10 @@ gen9_vp9_send_mbenc_surface(VADriverContextP ctx,

gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param->gpe_context_tx->dynamic_state.bo,
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param->gpe_context_tx->curbe_offset,
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CURBE_G9);


@@ -3684,8 +3684,6 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
{
gpe_context->curbe.length = kernel_param->curbe_size; // in bytes

- gpe_context->curbe_size = ALIGN(kernel_param->curbe_size, 64);
-
gpe_context->sampler_size = 0;
if (kernel_param->sampler_size) {
gpe_context->sampler_size = ALIGN(kernel_param->sampler_size, 64);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index c5a8935..3739a88 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,8 +1066,8 @@ gen8_gpe_curbe_load(VADriverContextP ctx,

OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe_size);
- OUT_BATCH(batch, gpe_context->curbe_offset);
+ OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, gpe_context->curbe.offset);

ADVANCE_BATCH(batch);
}
@@ -1122,7 +1122,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;

- bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192;
+ bo_size = gpe_context->idrt_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state & binding table",
@@ -1137,8 +1137,11 @@ gen8_gpe_context_init(VADriverContextP ctx,

/* Constant buffer offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->curbe_offset = start_offset;
- end_offset = start_offset + gpe_context->curbe_size;
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = start_offset;
+ end_offset = start_offset + gpe_context->curbe.length;

/* Interface descriptor offset */
start_offset = ALIGN(end_offset, 64);
@@ -1170,6 +1173,8 @@ gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
dri_bo_unreference(gpe_context->indirect_state.bo);
gpe_context->indirect_state.bo = NULL;

+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = NULL;
}


@@ -1619,7 +1624,12 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
dri_bo_reference(gpe_context->dynamic_state.bo);
gpe_context->dynamic_state.bo_size = ds->bo_size;

- gpe_context->curbe_offset = ds->curbe_offset;
+ /* curbe buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = ds->bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = ds->curbe_offset;
+
gpe_context->idrt_offset = ds->idrt_offset;
gpe_context->sampler_offset = ds->sampler_offset;

@@ -1629,15 +1639,15 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
void *
gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
{
- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->curbe.bo, 1);

- return (char *)gpe_context->dynamic_state.bo->virtual + gpe_context->curbe_offset;
+ return (char *)gpe_context->curbe.bo->virtual + gpe_context->curbe.offset;
}

void
gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
{
- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->curbe.bo);
}

void
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 0cbef43..92123fe 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -92,6 +92,7 @@ struct i965_gpe_context
struct {
dri_bo *bo;
unsigned int length; /* in bytes */
+ unsigned int offset;
} curbe;

struct {
@@ -168,8 +169,6 @@ struct i965_gpe_context
int sampler_size;
unsigned int idrt_offset;
int idrt_size;
- unsigned int curbe_offset;
- int curbe_size;
};

struct gpe_mi_flush_dw_parameter
--
1.9.1
Zhao Yakui
2016-11-17 12:42:07 UTC
Permalink
Post by Xiang, Haihao
To avoid confusion between curbe.length and curbe_size, this patch uses
curbe.length only. curbe.bo is always set even if curbe is a part of the
dynamic state buffer, hence we can use curbe related settings no matter
it is a part of the dynamic state buffer or not.
The curbe.bo in *_gpe_context is defined/used for the old platform.
In fact for the platform from Sandybridge, it can reside in the
dynamic_state.bo.

If the curbe.bo/curbe.offset is used directly, maybe it brings the
confusion that one dedicated bo is declared/defined for curbe.

So I think that the curbe_offset can follow the HW spec.
Maybe we can add the wrapper function that can map/unmap the virtual
address of curbe_buffer. In such case it can also simplify the mapping
related with curbe_buffer.

Similar considerations for Interface_descriptor_data, sampler_buffer.
Post by Xiang, Haihao
---
src/gen75_vpp_gpe.c | 2 +-
src/gen8_mfc.c | 2 +-
src/gen8_vme.c | 12 ++++++------
src/gen9_post_processing.c | 3 +--
src/gen9_vme.c | 12 ++++++------
src/gen9_vp9_encoder.c | 22 ++++++++++------------
src/i965_gpe_utils.c | 28 +++++++++++++++++++---------
src/i965_gpe_utils.h | 3 +--
8 files changed, 45 insertions(+), 39 deletions(-)
diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c
index 9850c1c..2cddb5a 100644
--- a/src/gen75_vpp_gpe.c
+++ b/src/gen75_vpp_gpe.c
@@ -890,7 +890,7 @@ vpp_gpe_context_init(VADriverContextP ctx)
gpe_ctx->surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
- gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
}
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 63ffea5..3ed9e84 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4609,7 +4609,7 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
mfc_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- mfc_context->gpe_context.curbe_size = 32 * 4;
+ mfc_context->gpe_context.curbe.length = 32 * 4;
mfc_context->gpe_context.sampler_size = 0;
mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index c79c62b..96835bf 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -389,10 +389,10 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
vme_state_message[31] = mv_num;
- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo->virtual);
- constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
- vme_context->gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context->gpe_context.curbe.bo->virtual +
+ vme_context->gpe_context.curbe.offset;
/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is constructed directly
@@ -400,7 +400,7 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
*/
memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);
return VA_STATUS_SUCCESS;
}
@@ -1379,7 +1379,7 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index a5d345c..71da501 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -538,8 +538,7 @@ gen9_post_processing_context_init(VADriverContextP ctx,
gen8_gpe_load_kernels(ctx, gpe_context,&scaling_kernel, 1);
gpe_context->idrt_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
- gpe_context->curbe_size = ALIGN(sizeof(struct scaling_input_parameter), 64);
- gpe_context->curbe.length = gpe_context->curbe_size;
+ gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);
gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
gpe_context->surface_state_binding_table.binding_table_offset = 0;
diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index 6ad8fff..a59fe2a 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -438,10 +438,10 @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
vme_state_message[31] = mv_num;
- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo->virtual);
- constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
- vme_context->gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context->gpe_context.curbe.bo->virtual +
+ vme_context->gpe_context.curbe.offset;
/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is constructed directly
@@ -449,7 +449,7 @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
*/
memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);
return VA_STATUS_SUCCESS;
}
@@ -2032,7 +2032,7 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index f39d6d0..5ad7b26 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1820,18 +1820,18 @@ gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
/* 4. Mbenc curbe input buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context->dynamic_state.bo,
+ mbenc_gpe_context->curbe.bo,
0,
- ALIGN(mbenc_gpe_context->curbe_size, 64),
- mbenc_gpe_context->curbe_offset,
+ ALIGN(mbenc_gpe_context->curbe.length, 64),
+ mbenc_gpe_context->curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
/* 5. Mbenc curbe output buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context->dynamic_state.bo,
+ mbenc_gpe_context->curbe.bo,
0,
- ALIGN(mbenc_gpe_context->curbe_size, 64),
- mbenc_gpe_context->curbe_offset,
+ ALIGN(mbenc_gpe_context->curbe.length, 64),
+ mbenc_gpe_context->curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
/* 6. BRC_PIC_STATE read buffer */
@@ -3289,10 +3289,10 @@ gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param->gpe_context_tx->dynamic_state.bo,
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param->gpe_context_tx->curbe_offset,
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CURBE_G9);
break;
@@ -3441,10 +3441,10 @@ gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param->gpe_context_tx->dynamic_state.bo,
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param->gpe_context_tx->curbe_offset,
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CURBE_G9);
@@ -3684,8 +3684,6 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
{
gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
- gpe_context->curbe_size = ALIGN(kernel_param->curbe_size, 64);
-
gpe_context->sampler_size = 0;
if (kernel_param->sampler_size) {
gpe_context->sampler_size = ALIGN(kernel_param->sampler_size, 64);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index c5a8935..3739a88 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,8 +1066,8 @@ gen8_gpe_curbe_load(VADriverContextP ctx,
OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe_size);
- OUT_BATCH(batch, gpe_context->curbe_offset);
+ OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, gpe_context->curbe.offset);
ADVANCE_BATCH(batch);
}
@@ -1122,7 +1122,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;
- bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192;
+ bo_size = gpe_context->idrt_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state& binding table",
@@ -1137,8 +1137,11 @@ gen8_gpe_context_init(VADriverContextP ctx,
/* Constant buffer offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->curbe_offset = start_offset;
- end_offset = start_offset + gpe_context->curbe_size;
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = start_offset;
+ end_offset = start_offset + gpe_context->curbe.length;
/* Interface descriptor offset */
start_offset = ALIGN(end_offset, 64);
@@ -1170,6 +1173,8 @@ gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
dri_bo_unreference(gpe_context->indirect_state.bo);
gpe_context->indirect_state.bo = NULL;
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = NULL;
}
@@ -1619,7 +1624,12 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
dri_bo_reference(gpe_context->dynamic_state.bo);
gpe_context->dynamic_state.bo_size = ds->bo_size;
- gpe_context->curbe_offset = ds->curbe_offset;
+ /* curbe buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = ds->bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = ds->curbe_offset;
+
gpe_context->idrt_offset = ds->idrt_offset;
gpe_context->sampler_offset = ds->sampler_offset;
@@ -1629,15 +1639,15 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
void *
gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
{
- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->curbe.bo, 1);
- return (char *)gpe_context->dynamic_state.bo->virtual + gpe_context->curbe_offset;
+ return (char *)gpe_context->curbe.bo->virtual + gpe_context->curbe.offset;
}
void
gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
{
- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->curbe.bo);
}
void
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 0cbef43..92123fe 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -92,6 +92,7 @@ struct i965_gpe_context
struct {
dri_bo *bo;
unsigned int length; /* in bytes */
+ unsigned int offset;
} curbe;
struct {
@@ -168,8 +169,6 @@ struct i965_gpe_context
int sampler_size;
unsigned int idrt_offset;
int idrt_size;
- unsigned int curbe_offset;
- int curbe_size;
};
struct gpe_mi_flush_dw_parameter
Xiang, Haihao
2016-11-17 16:58:04 UTC
Permalink
-----Original Message-----
From: Zhao, Yakui
Sent: Thursday, November 17, 2016 8:42 PM
Subject: Re: [Libva] [Libva-intel-driver][PATCH 02/17] Move all curbe related
settings to the inner structure in i965_gpe_context
Post by Xiang, Haihao
To avoid confusion between curbe.length and curbe_size, this patch
uses curbe.length only. curbe.bo is always set even if curbe is a part
of the dynamic state buffer, hence we can use curbe related settings
no matter it is a part of the dynamic state buffer or not.
The curbe.bo in *_gpe_context is defined/used for the old platform.
In fact for the platform from Sandybridge, it can reside in the
dynamic_state.bo.
If the curbe.bo/curbe.offset is used directly,
previously curbe_offset is also used directly.
maybe it brings the confusion
that one dedicated bo is declared/defined for curbe.
curbe.bo/curbe.offset is set in gpe functions, gpe user just uses it and needn't care about
it is a dedicated bo or not.
So I think that the curbe_offset can follow the HW spec.
curbe.offset still follows the HW spec. The problem for the old structure is that we have an inner curbe structure
and curbe_offset/ curbe_size in the same structure. It is easy to confuse a new developer.
Maybe we can add the wrapper function that can map/unmap the virtual
address of curbe_buffer. In such case it can also simplify the mapping related
with curbe_buffer.
We have already such functions, but we don't use them for some old platforms. We can add similar functions for idrt and sampler.
Similar considerations for Interface_descriptor_data, sampler_buffer.
Post by Xiang, Haihao
---
src/gen75_vpp_gpe.c | 2 +-
src/gen8_mfc.c | 2 +-
src/gen8_vme.c | 12 ++++++------
src/gen9_post_processing.c | 3 +--
src/gen9_vme.c | 12 ++++++------
src/gen9_vp9_encoder.c | 22 ++++++++++------------
src/i965_gpe_utils.c | 28 +++++++++++++++++++---------
src/i965_gpe_utils.h | 3 +--
8 files changed, 45 insertions(+), 39 deletions(-)
diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index
9850c1c..2cddb5a 100644
--- a/src/gen75_vpp_gpe.c
+++ b/src/gen75_vpp_gpe.c
@@ -890,7 +890,7 @@ vpp_gpe_context_init(VADriverContextP ctx)
gpe_ctx->surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned
int)) * MAX_MEDIA_SURFACES_GEN6;
- gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
gpe_ctx->idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
}
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 63ffea5..3ed9e84
100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4609,7 +4609,7 @@ Bool gen8_mfc_context_init(VADriverContextP
ctx, struct intel_encoder_context *e
Post by Xiang, Haihao
mfc_context->gpe_context.surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) *
MAX_MEDIA_SURFACES_GEN6;
mfc_context->gpe_context.idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
Post by Xiang, Haihao
- mfc_context->gpe_context.curbe_size = 32 * 4;
+ mfc_context->gpe_context.curbe.length = 32 * 4;
mfc_context->gpe_context.sampler_size = 0;
mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c index c79c62b..96835bf
100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -389,10 +389,10 @@ static VAStatus
gen8_vme_constant_setup(VADriverContextP ctx,
vme_state_message[31] = mv_num;
- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo->virtual);
- constant_buffer = (unsigned char *)vme_context-
gpe_context.dynamic_state.bo->virtual +
- vme_context->gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context-
gpe_context.curbe.bo->virtual +
+
+ vme_context->gpe_context.curbe.offset;
/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is constructed
gen8_vme_constant_setup(VADriverContextP ctx,
Post by Xiang, Haihao
*/
memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);
return VA_STATUS_SUCCESS;
}
@@ -1379,7 +1379,7 @@ Bool gen8_vme_context_init(VADriverContextP
ctx, struct intel_encoder_context *e
Post by Xiang, Haihao
vme_context->gpe_context.surface_state_binding_table.length
= (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) *
MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
Post by Xiang, Haihao
- vme_context->gpe_context.curbe_size =
CURBE_TOTAL_DATA_LENGTH;
Post by Xiang, Haihao
+ vme_context->gpe_context.curbe.length =
+ CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index a5d345c..71da501 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -538,8 +538,7 @@
gen9_post_processing_context_init(VADriverContextP ctx,
Post by Xiang, Haihao
gen8_gpe_load_kernels(ctx, gpe_context,&scaling_kernel, 1);
gpe_context->idrt_size = ALIGN(sizeof(struct
gen8_interface_descriptor_data), 64);
Post by Xiang, Haihao
gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state),
64);
Post by Xiang, Haihao
- gpe_context->curbe_size = ALIGN(sizeof(struct
scaling_input_parameter), 64);
Post by Xiang, Haihao
- gpe_context->curbe.length = gpe_context->curbe_size;
+ gpe_context->curbe.length = ALIGN(sizeof(struct
+ scaling_input_parameter), 64);
gpe_context->surface_state_binding_table.max_entries =
MAX_SCALING_SURFACES;
Post by Xiang, Haihao
gpe_context->surface_state_binding_table.binding_table_offset =
0; diff --git a/src/gen9_vme.c b/src/gen9_vme.c index 6ad8fff..a59fe2a
100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -438,10 +438,10 @@ static VAStatus
gen9_vme_constant_setup(VADriverContextP ctx,
vme_state_message[31] = mv_num;
- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo->virtual);
- constant_buffer = (unsigned char *)vme_context-
gpe_context.dynamic_state.bo->virtual +
- vme_context->gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context-
gpe_context.curbe.bo->virtual +
+
+ vme_context->gpe_context.curbe.offset;
/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is constructed
gen9_vme_constant_setup(VADriverContextP ctx,
Post by Xiang, Haihao
*/
memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);
return VA_STATUS_SUCCESS;
}
@@ -2032,7 +2032,7 @@ Bool gen9_vme_context_init(VADriverContextP
ctx, struct intel_encoder_context *e
Post by Xiang, Haihao
vme_context->gpe_context.surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) *
MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
Post by Xiang, Haihao
- vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ vme_context->gpe_context.curbe.length =
CURBE_TOTAL_DATA_LENGTH;
Post by Xiang, Haihao
vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c index
f39d6d0..5ad7b26 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1820,18 +1820,18 @@
gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
Post by Xiang, Haihao
/* 4. Mbenc curbe input buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context->dynamic_state.bo,
+ mbenc_gpe_context->curbe.bo,
0,
- ALIGN(mbenc_gpe_context->curbe_size, 64),
- mbenc_gpe_context->curbe_offset,
+ ALIGN(mbenc_gpe_context->curbe.length, 64),
+ mbenc_gpe_context->curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
/* 5. Mbenc curbe output buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context->dynamic_state.bo,
+ mbenc_gpe_context->curbe.bo,
0,
- ALIGN(mbenc_gpe_context->curbe_size, 64),
- mbenc_gpe_context->curbe_offset,
+ ALIGN(mbenc_gpe_context->curbe.length, 64),
+ mbenc_gpe_context->curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param->gpe_context_tx->dynamic_state.bo,
+
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param->gpe_context_tx->curbe_offset,
+
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CURBE_G9);
break;
@@ -3441,10 +3441,10 @@
gen9_vp9_send_mbenc_surface(VADriverContextP
Post by Xiang, Haihao
ctx,
gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param->gpe_context_tx->dynamic_state.bo,
+
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param->gpe_context_tx->curbe_offset,
+
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CURBE_G9);
@@ -3684,8 +3684,6 @@ gen9_init_gpe_context_vp9(struct
i965_gpe_context *gpe_context,
Post by Xiang, Haihao
{
gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
- gpe_context->curbe_size = ALIGN(kernel_param->curbe_size, 64);
-
gpe_context->sampler_size = 0;
if (kernel_param->sampler_size) {
gpe_context->sampler_size =
ALIGN(kernel_param->sampler_size, 64); diff --git
a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index c5a8935..3739a88
100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,8 +1066,8 @@ gen8_gpe_curbe_load(VADriverContextP ctx,
OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe_size);
- OUT_BATCH(batch, gpe_context->curbe_offset);
+ OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, gpe_context->curbe.offset);
ADVANCE_BATCH(batch);
}
@@ -1122,7 +1122,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;
- bo_size = gpe_context->idrt_size + gpe_context->curbe_size +
gpe_context->sampler_size + 192;
Post by Xiang, Haihao
+ bo_size = gpe_context->idrt_size + gpe_context->curbe.length +
+ gpe_context->sampler_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
/* Constant buffer offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->curbe_offset = start_offset;
- end_offset = start_offset + gpe_context->curbe_size;
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = start_offset;
+ end_offset = start_offset + gpe_context->curbe.length;
/* Interface descriptor offset */
gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
dri_bo_unreference(gpe_context->indirect_state.bo);
gpe_context->indirect_state.bo = NULL;
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = NULL;
}
@@ -1619,7 +1624,12 @@
gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
Post by Xiang, Haihao
dri_bo_reference(gpe_context->dynamic_state.bo);
gpe_context->dynamic_state.bo_size = ds->bo_size;
- gpe_context->curbe_offset = ds->curbe_offset;
+ /* curbe buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = ds->bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = ds->curbe_offset;
+
gpe_context->idrt_offset = ds->idrt_offset;
gpe_context->sampler_offset = ds->sampler_offset;
@@ -1629,15 +1639,15 @@
gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
Post by Xiang, Haihao
void *
gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
{
- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->curbe.bo, 1);
- return (char *)gpe_context->dynamic_state.bo->virtual + gpe_context-
curbe_offset;
+ return (char *)gpe_context->curbe.bo->virtual +
+ gpe_context->curbe.offset;
}
void
gen8p_gpe_context_unmap_curbe(struct i965_gpe_context
*gpe_context)
Post by Xiang, Haihao
{
- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->curbe.bo);
}
void
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h index
0cbef43..92123fe 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -92,6 +92,7 @@ struct i965_gpe_context
struct {
dri_bo *bo;
unsigned int length; /* in bytes */
+ unsigned int offset;
} curbe;
struct {
@@ -168,8 +169,6 @@ struct i965_gpe_context
int sampler_size;
unsigned int idrt_offset;
int idrt_size;
- unsigned int curbe_offset;
- int curbe_size;
};
struct gpe_mi_flush_dw_parameter
Zhao Yakui
2016-11-18 01:03:07 UTC
Permalink
Post by Xiang, Haihao
-----Original Message-----
From: Zhao, Yakui
Sent: Thursday, November 17, 2016 8:42 PM
Subject: Re: [Libva] [Libva-intel-driver][PATCH 02/17] Move all curbe related
settings to the inner structure in i965_gpe_context
Post by Xiang, Haihao
To avoid confusion between curbe.length and curbe_size, this patch
uses curbe.length only. curbe.bo is always set even if curbe is a part
of the dynamic state buffer, hence we can use curbe related settings
no matter it is a part of the dynamic state buffer or not.
The curbe.bo in *_gpe_context is defined/used for the old platform.
In fact for the platform from Sandybridge, it can reside in the
dynamic_state.bo.
If the curbe.bo/curbe.offset is used directly,
previously curbe_offset is also used directly.
maybe it brings the confusion
that one dedicated bo is declared/defined for curbe.
curbe.bo/curbe.offset is set in gpe functions, gpe user just uses it and needn't care about
it is a dedicated bo or not.
So I think that the curbe_offset can follow the HW spec.
curbe.offset still follows the HW spec. The problem for the old structure is that we have an inner curbe structure
and curbe_offset/ curbe_size in the same structure. It is easy to confuse a new developer.
The inner curbe structure is mainly defined for the old platform.
For the HW that should allocate the curbe_buffer from dynamic_buffer,
IMO it only needs to care the dynamic_buffer and curbe_offset when it
needs to access the curbe_buffer.

Maybe we can add some explanations about the i965_gpe_context.
Post by Xiang, Haihao
The inner structure(idrt, sampler, curbe) is mainly defined for the
old platform. And the dedicated bo is allocated for them.
Post by Xiang, Haihao
dynamic_buffer/curbe_offset is for the later platform.
Maybe we can add the wrapper function that can map/unmap the virtual
address of curbe_buffer. In such case it can also simplify the mapping related
with curbe_buffer.
We have already such functions, but we don't use them for some old platforms. We can add similar functions for idrt and sampler.
For the wrapper, IMO we can only consider them for the platform since
gen8+. We can leave the old platform alone.
Post by Xiang, Haihao
Similar considerations for Interface_descriptor_data, sampler_buffer.
Post by Xiang, Haihao
---
src/gen75_vpp_gpe.c | 2 +-
src/gen8_mfc.c | 2 +-
src/gen8_vme.c | 12 ++++++------
src/gen9_post_processing.c | 3 +--
src/gen9_vme.c | 12 ++++++------
src/gen9_vp9_encoder.c | 22 ++++++++++------------
src/i965_gpe_utils.c | 28 +++++++++++++++++++---------
src/i965_gpe_utils.h | 3 +--
8 files changed, 45 insertions(+), 39 deletions(-)
diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index
9850c1c..2cddb5a 100644
--- a/src/gen75_vpp_gpe.c
+++ b/src/gen75_vpp_gpe.c
@@ -890,7 +890,7 @@ vpp_gpe_context_init(VADriverContextP ctx)
gpe_ctx->surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned
int)) * MAX_MEDIA_SURFACES_GEN6;
- gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
gpe_ctx->idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
}
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 63ffea5..3ed9e84
100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4609,7 +4609,7 @@ Bool gen8_mfc_context_init(VADriverContextP
ctx, struct intel_encoder_context *e
Post by Xiang, Haihao
mfc_context->gpe_context.surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) *
MAX_MEDIA_SURFACES_GEN6;
mfc_context->gpe_context.idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
Post by Xiang, Haihao
- mfc_context->gpe_context.curbe_size = 32 * 4;
+ mfc_context->gpe_context.curbe.length = 32 * 4;
mfc_context->gpe_context.sampler_size = 0;
mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c index c79c62b..96835bf
100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -389,10 +389,10 @@ static VAStatus
gen8_vme_constant_setup(VADriverContextP ctx,
vme_state_message[31] = mv_num;
- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo->virtual);
- constant_buffer = (unsigned char *)vme_context-
gpe_context.dynamic_state.bo->virtual +
- vme_context->gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context-
gpe_context.curbe.bo->virtual +
+
+ vme_context->gpe_context.curbe.offset;
/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is constructed
gen8_vme_constant_setup(VADriverContextP ctx,
Post by Xiang, Haihao
*/
memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);
return VA_STATUS_SUCCESS;
}
@@ -1379,7 +1379,7 @@ Bool gen8_vme_context_init(VADriverContextP
ctx, struct intel_encoder_context *e
Post by Xiang, Haihao
vme_context->gpe_context.surface_state_binding_table.length
= (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) *
MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
Post by Xiang, Haihao
- vme_context->gpe_context.curbe_size =
CURBE_TOTAL_DATA_LENGTH;
Post by Xiang, Haihao
+ vme_context->gpe_context.curbe.length =
+ CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index a5d345c..71da501 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -538,8 +538,7 @@
gen9_post_processing_context_init(VADriverContextP ctx,
Post by Xiang, Haihao
gen8_gpe_load_kernels(ctx, gpe_context,&scaling_kernel, 1);
gpe_context->idrt_size = ALIGN(sizeof(struct
gen8_interface_descriptor_data), 64);
Post by Xiang, Haihao
gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state),
64);
Post by Xiang, Haihao
- gpe_context->curbe_size = ALIGN(sizeof(struct
scaling_input_parameter), 64);
Post by Xiang, Haihao
- gpe_context->curbe.length = gpe_context->curbe_size;
+ gpe_context->curbe.length = ALIGN(sizeof(struct
+ scaling_input_parameter), 64);
gpe_context->surface_state_binding_table.max_entries =
MAX_SCALING_SURFACES;
Post by Xiang, Haihao
gpe_context->surface_state_binding_table.binding_table_offset =
0; diff --git a/src/gen9_vme.c b/src/gen9_vme.c index 6ad8fff..a59fe2a
100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -438,10 +438,10 @@ static VAStatus
gen9_vme_constant_setup(VADriverContextP ctx,
vme_state_message[31] = mv_num;
- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo->virtual);
- constant_buffer = (unsigned char *)vme_context-
gpe_context.dynamic_state.bo->virtual +
- vme_context->gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context-
gpe_context.curbe.bo->virtual +
+
+ vme_context->gpe_context.curbe.offset;
/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is constructed
gen9_vme_constant_setup(VADriverContextP ctx,
Post by Xiang, Haihao
*/
memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);
return VA_STATUS_SUCCESS;
}
@@ -2032,7 +2032,7 @@ Bool gen9_vme_context_init(VADriverContextP
ctx, struct intel_encoder_context *e
Post by Xiang, Haihao
vme_context->gpe_context.surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) *
MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
Post by Xiang, Haihao
- vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ vme_context->gpe_context.curbe.length =
CURBE_TOTAL_DATA_LENGTH;
Post by Xiang, Haihao
vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c index
f39d6d0..5ad7b26 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1820,18 +1820,18 @@
gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
Post by Xiang, Haihao
/* 4. Mbenc curbe input buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context->dynamic_state.bo,
+ mbenc_gpe_context->curbe.bo,
0,
- ALIGN(mbenc_gpe_context->curbe_size, 64),
- mbenc_gpe_context->curbe_offset,
+ ALIGN(mbenc_gpe_context->curbe.length, 64),
+ mbenc_gpe_context->curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
/* 5. Mbenc curbe output buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context->dynamic_state.bo,
+ mbenc_gpe_context->curbe.bo,
0,
- ALIGN(mbenc_gpe_context->curbe_size, 64),
- mbenc_gpe_context->curbe_offset,
+ ALIGN(mbenc_gpe_context->curbe.length, 64),
+ mbenc_gpe_context->curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param->gpe_context_tx->dynamic_state.bo,
+
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param->gpe_context_tx->curbe_offset,
+
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CURBE_G9);
break;
@@ -3441,10 +3441,10 @@
gen9_vp9_send_mbenc_surface(VADriverContextP
Post by Xiang, Haihao
ctx,
gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param->gpe_context_tx->dynamic_state.bo,
+
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param->gpe_context_tx->curbe_offset,
+
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CURBE_G9);
@@ -3684,8 +3684,6 @@ gen9_init_gpe_context_vp9(struct
i965_gpe_context *gpe_context,
Post by Xiang, Haihao
{
gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
- gpe_context->curbe_size = ALIGN(kernel_param->curbe_size, 64);
-
gpe_context->sampler_size = 0;
if (kernel_param->sampler_size) {
gpe_context->sampler_size =
ALIGN(kernel_param->sampler_size, 64); diff --git
a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index c5a8935..3739a88
100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,8 +1066,8 @@ gen8_gpe_curbe_load(VADriverContextP ctx,
OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe_size);
- OUT_BATCH(batch, gpe_context->curbe_offset);
+ OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, gpe_context->curbe.offset);
ADVANCE_BATCH(batch);
}
@@ -1122,7 +1122,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;
- bo_size = gpe_context->idrt_size + gpe_context->curbe_size +
gpe_context->sampler_size + 192;
Post by Xiang, Haihao
+ bo_size = gpe_context->idrt_size + gpe_context->curbe.length +
+ gpe_context->sampler_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
/* Constant buffer offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->curbe_offset = start_offset;
- end_offset = start_offset + gpe_context->curbe_size;
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = start_offset;
+ end_offset = start_offset + gpe_context->curbe.length;
/* Interface descriptor offset */
gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
dri_bo_unreference(gpe_context->indirect_state.bo);
gpe_context->indirect_state.bo = NULL;
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = NULL;
}
@@ -1619,7 +1624,12 @@
gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
Post by Xiang, Haihao
dri_bo_reference(gpe_context->dynamic_state.bo);
gpe_context->dynamic_state.bo_size = ds->bo_size;
- gpe_context->curbe_offset = ds->curbe_offset;
+ /* curbe buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = ds->bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = ds->curbe_offset;
+
gpe_context->idrt_offset = ds->idrt_offset;
gpe_context->sampler_offset = ds->sampler_offset;
@@ -1629,15 +1639,15 @@
gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
Post by Xiang, Haihao
void *
gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
{
- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->curbe.bo, 1);
- return (char *)gpe_context->dynamic_state.bo->virtual + gpe_context-
curbe_offset;
+ return (char *)gpe_context->curbe.bo->virtual +
+ gpe_context->curbe.offset;
}
void
gen8p_gpe_context_unmap_curbe(struct i965_gpe_context
*gpe_context)
Post by Xiang, Haihao
{
- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->curbe.bo);
}
void
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h index
0cbef43..92123fe 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -92,6 +92,7 @@ struct i965_gpe_context
struct {
dri_bo *bo;
unsigned int length; /* in bytes */
+ unsigned int offset;
} curbe;
struct {
@@ -168,8 +169,6 @@ struct i965_gpe_context
int sampler_size;
unsigned int idrt_offset;
int idrt_size;
- unsigned int curbe_offset;
- int curbe_size;
};
struct gpe_mi_flush_dw_parameter
Xiang, Haihao
2016-11-18 01:42:46 UTC
Permalink
Post by Zhao Yakui
Post by Xiang, Haihao
-----Original Message-----
From: Zhao, Yakui
Sent: Thursday, November 17, 2016 8:42 PM
Subject: Re: [Libva] [Libva-intel-driver][PATCH 02/17] Move all curbe related
settings to the inner structure in i965_gpe_context
Post by Xiang, Haihao
To avoid confusion between curbe.length and curbe_size, this patch
uses curbe.length only. curbe.bo is always set even if curbe is a part
of the dynamic state buffer, hence we can use curbe related settings
no matter it is a part of the dynamic state buffer or not.
The curbe.bo in *_gpe_context is defined/used for the old
platform.
In fact for the platform from Sandybridge, it can reside in the
dynamic_state.bo.
If the curbe.bo/curbe.offset is used directly,
previously curbe_offset is also used directly.
maybe it brings the confusion
that one dedicated bo is declared/defined for curbe.
curbe.bo/curbe.offset is set in gpe functions, gpe user just uses
it and needn't care about
it is a dedicated bo or not.
So I think that the curbe_offset can follow the HW spec.
curbe.offset still follows the HW spec.  The problem for the old
structure is that we have an inner curbe structure
and curbe_offset/ curbe_size in the same structure.  It is easy to
confuse a new developer.
The inner curbe structure is mainly defined for the old platform.
Actually it can be used on the new platform. 
Post by Zhao Yakui
For the HW that should allocate the curbe_buffer from dynamic_buffer,
curbo.bo points to the dynamic buffer on the new platform. 
Post by Zhao Yakui
IMO it only needs to care the dynamic_buffer and curbe_offset when it
needs to access the curbe_buffer.
My point here is gpe user only cares curbe when using curbe, no matter
it is part of dynamic buffer or a dedicated buffer, so we can use the
same code to use curbe buffer, no matter the HW is old or new, 

e.g.
in i965_gpe_context_map_curbe()

dri_bo_map(gpe_context->curbe.bo, 1) works for all platform. 

otherwise we have to use if ... else.
Post by Zhao Yakui
Maybe we can add some explanations about the i965_gpe_context.
    >The inner structure(idrt, sampler, curbe) is mainly defined for
the
old platform. And the dedicated bo is allocated for them.
    >dynamic_buffer/curbe_offset is for the later platform.
We should avoid writing too many comments if the code is clean enough.
Post by Zhao Yakui
Post by Xiang, Haihao
Maybe we can add the wrapper function that can map/unmap the virtual
address of curbe_buffer. In such case it can also simplify the mapping related
with curbe_buffer.
We have already such functions, but we don't use them for some old
platforms.  We can add similar functions for idrt and sampler.
For the wrapper, IMO we can only consider them for the platform since
gen8+. We can leave the old platform alone.
Post by Xiang, Haihao
Similar considerations for Interface_descriptor_data,
sampler_buffer.
Post by Xiang, Haihao
---
   src/gen75_vpp_gpe.c        |  2 +-
   src/gen8_mfc.c             |  2 +-
   src/gen8_vme.c             | 12 ++++++------
   src/gen9_post_processing.c |  3 +--
   src/gen9_vme.c             | 12 ++++++------
   src/gen9_vp9_encoder.c     | 22 ++++++++++------------
   src/i965_gpe_utils.c       | 28 +++++++++++++++++++---------
   src/i965_gpe_utils.h       |  3 +--
   8 files changed, 45 insertions(+), 39 deletions(-)
diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index
9850c1c..2cddb5a 100644
--- a/src/gen75_vpp_gpe.c
+++ b/src/gen75_vpp_gpe.c
@@ -890,7 +890,7 @@ vpp_gpe_context_init(VADriverContextP ctx)
           gpe_ctx->surface_state_binding_table.length =
                  (SURFACE_STATE_PADDED_SIZE_GEN8 +
sizeof(unsigned
int)) * MAX_MEDIA_SURFACES_GEN6;
-        gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
+        gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
           gpe_ctx->idrt_size  = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
       }
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index
63ffea5..3ed9e84
100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4609,7 +4609,7 @@ Bool
gen8_mfc_context_init(VADriverContextP
ctx, struct intel_encoder_context *e
Post by Xiang, Haihao
       mfc_context-
Post by Xiang, Haihao
gpe_context.surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) *
MAX_MEDIA_SURFACES_GEN6;
       mfc_context->gpe_context.idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
Post by Xiang, Haihao
-    mfc_context->gpe_context.curbe_size = 32 * 4;
+    mfc_context->gpe_context.curbe.length = 32 * 4;
       mfc_context->gpe_context.sampler_size = 0;
       mfc_context->gpe_context.vfe_state.max_num_threads = 60
- 1;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c index
c79c62b..96835bf
100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -389,10 +389,10 @@ static VAStatus
gen8_vme_constant_setup(VADriverContextP ctx,
       vme_state_message[31] = mv_num;
-    dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
-    assert(vme_context->gpe_context.dynamic_state.bo-
Post by Xiang, Haihao
virtual);
-    constant_buffer = (unsigned char *)vme_context-
gpe_context.dynamic_state.bo->virtual +
-                                         vme_context-
Post by Xiang, Haihao
gpe_context.curbe_offset;
+    dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+    assert(vme_context->gpe_context.curbe.bo->virtual);
+    constant_buffer = (unsigned char *)vme_context-
gpe_context.curbe.bo->virtual +
+
+ vme_context->gpe_context.curbe.offset;
       /* VME MV/Mb cost table is passed by using const buffer
*/
       /* Now it uses the fixed search path. So it is
constructed
gen8_vme_constant_setup(VADriverContextP ctx,
Post by Xiang, Haihao
        */
       memcpy(constant_buffer, (char *)vme_context-
Post by Xiang, Haihao
vme_state_message,
128);
-    dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+    dri_bo_unmap(vme_context->gpe_context.curbe.bo);
       return VA_STATUS_SUCCESS;
   }
@@ -1379,7 +1379,7 @@ Bool
gen8_vme_context_init(VADriverContextP
ctx, struct intel_encoder_context *e
Post by Xiang, Haihao
           vme_context-
Post by Xiang, Haihao
gpe_context.surface_state_binding_table.length
= (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) *
MAX_MEDIA_SURFACES_GEN6;
           vme_context->gpe_context.idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
Post by Xiang, Haihao
-        vme_context->gpe_context.curbe_size =
CURBE_TOTAL_DATA_LENGTH;
Post by Xiang, Haihao
+        vme_context->gpe_context.curbe.length =
+ CURBE_TOTAL_DATA_LENGTH;
           vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_post_processing.c
b/src/gen9_post_processing.c
index a5d345c..71da501 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -538,8 +538,7 @@
gen9_post_processing_context_init(VADriverContextP ctx,
Post by Xiang, Haihao
       gen8_gpe_load_kernels(ctx, gpe_context,&scaling_kernel,
1);
       gpe_context->idrt_size = ALIGN(sizeof(struct
gen8_interface_descriptor_data), 64);
Post by Xiang, Haihao
       gpe_context->sampler_size = ALIGN(sizeof(struct
gen8_sampler_state),
64);
Post by Xiang, Haihao
-    gpe_context->curbe_size = ALIGN(sizeof(struct
scaling_input_parameter), 64);
Post by Xiang, Haihao
-    gpe_context->curbe.length = gpe_context->curbe_size;
+    gpe_context->curbe.length = ALIGN(sizeof(struct
+ scaling_input_parameter), 64);
       gpe_context->surface_state_binding_table.max_entries =
MAX_SCALING_SURFACES;
Post by Xiang, Haihao
       gpe_context-
Post by Xiang, Haihao
surface_state_binding_table.binding_table_offset =
0; diff --git a/src/gen9_vme.c b/src/gen9_vme.c index
6ad8fff..a59fe2a
100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -438,10 +438,10 @@ static VAStatus
gen9_vme_constant_setup(VADriverContextP ctx,
       vme_state_message[31] = mv_num;
-    dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
-    assert(vme_context->gpe_context.dynamic_state.bo-
Post by Xiang, Haihao
virtual);
-    constant_buffer = (unsigned char *)vme_context-
gpe_context.dynamic_state.bo->virtual +
-                                         vme_context-
Post by Xiang, Haihao
gpe_context.curbe_offset;
+    dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+    assert(vme_context->gpe_context.curbe.bo->virtual);
+    constant_buffer = (unsigned char *)vme_context-
gpe_context.curbe.bo->virtual +
+
+ vme_context->gpe_context.curbe.offset;
       /* VME MV/Mb cost table is passed by using const buffer
*/
       /* Now it uses the fixed search path. So it is
constructed
gen9_vme_constant_setup(VADriverContextP ctx,
Post by Xiang, Haihao
        */
       memcpy(constant_buffer, (char *)vme_context-
Post by Xiang, Haihao
vme_state_message,
128);
-    dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+    dri_bo_unmap(vme_context->gpe_context.curbe.bo);
       return VA_STATUS_SUCCESS;
   }
@@ -2032,7 +2032,7 @@ Bool
gen9_vme_context_init(VADriverContextP
ctx, struct intel_encoder_context *e
Post by Xiang, Haihao
       vme_context-
Post by Xiang, Haihao
gpe_context.surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) *
MAX_MEDIA_SURFACES_GEN6;
       vme_context->gpe_context.idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
Post by Xiang, Haihao
-    vme_context->gpe_context.curbe_size =
CURBE_TOTAL_DATA_LENGTH;
+    vme_context->gpe_context.curbe.length =
CURBE_TOTAL_DATA_LENGTH;
Post by Xiang, Haihao
       vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c index
f39d6d0..5ad7b26 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1820,18 +1820,18 @@
gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
Post by Xiang, Haihao
       /* 4. Mbenc curbe input buffer */
       gen9_add_dri_buffer_gpe_surface(ctx,
                                       brc_gpe_context,
-                                    mbenc_gpe_context-
Post by Xiang, Haihao
dynamic_state.bo,
+                                    mbenc_gpe_context-
Post by Xiang, Haihao
curbe.bo,
                                       0,
-                                    ALIGN(mbenc_gpe_context-
Post by Xiang, Haihao
curbe_size, 64),
-                                    mbenc_gpe_context-
Post by Xiang, Haihao
curbe_offset,
+                                    ALIGN(mbenc_gpe_context-
Post by Xiang, Haihao
curbe.length, 64),
+                                    mbenc_gpe_context-
Post by Xiang, Haihao
curbe.offset,
                                       VP9_BTI_BRC_MBENC_CURBE_
INPUT_G9);
       /* 5. Mbenc curbe output buffer */
       gen9_add_dri_buffer_gpe_surface(ctx,
                                       brc_gpe_context,
-                                    mbenc_gpe_context-
Post by Xiang, Haihao
dynamic_state.bo,
+                                    mbenc_gpe_context-
Post by Xiang, Haihao
curbe.bo,
                                       0,
-                                    ALIGN(mbenc_gpe_context-
Post by Xiang, Haihao
curbe_size, 64),
-                                    mbenc_gpe_context-
Post by Xiang, Haihao
curbe_offset,
+                                    ALIGN(mbenc_gpe_context-
Post by Xiang, Haihao
curbe.length, 64),
+                                    mbenc_gpe_context-
Post by Xiang, Haihao
curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
@@
gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
           gen9_add_dri_buffer_gpe_surface(ctx,
                                           gpe_context,
-                                        mbenc_param-
Post by Xiang, Haihao
gpe_context_tx->dynamic_state.bo,
+
+ mbenc_param->gpe_context_tx->curbe.bo,
                                           0,
                                           ALIGN(res_size, 64),
-                                        mbenc_param-
Post by Xiang, Haihao
gpe_context_tx->curbe_offset,
+
+ mbenc_param->gpe_context_tx->curbe.offset,
                                           VP9_BTI_MBENC_TX_CUR
BE_G9);
           break;
@@ -3441,10 +3441,10 @@
gen9_vp9_send_mbenc_surface(VADriverContextP
Post by Xiang, Haihao
ctx,
           gen9_add_dri_buffer_gpe_surface(ctx,
                                           gpe_context,
-                                        mbenc_param-
Post by Xiang, Haihao
gpe_context_tx->dynamic_state.bo,
+
+ mbenc_param->gpe_context_tx->curbe.bo,
                                           0,
                                           ALIGN(res_size, 64),
-                                        mbenc_param-
Post by Xiang, Haihao
gpe_context_tx->curbe_offset,
+
+ mbenc_param->gpe_context_tx->curbe.offset,
                                           VP9_BTI_MBENC_TX_CUR
BE_G9);
@@ -3684,8 +3684,6 @@ gen9_init_gpe_context_vp9(struct
i965_gpe_context *gpe_context,
Post by Xiang, Haihao
   {
       gpe_context->curbe.length = kernel_param->curbe_size; //
in
bytes
-    gpe_context->curbe_size = ALIGN(kernel_param->curbe_size,
64);
-
       gpe_context->sampler_size = 0;
       if (kernel_param->sampler_size) {
           gpe_context->sampler_size =
ALIGN(kernel_param->sampler_size, 64); diff --git
a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index
c5a8935..3739a88
100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,8 +1066,8 @@ gen8_gpe_curbe_load(VADriverContextP ctx,
       OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
       OUT_BATCH(batch, 0);
-    OUT_BATCH(batch, gpe_context->curbe_size);
-    OUT_BATCH(batch, gpe_context->curbe_offset);
+    OUT_BATCH(batch, gpe_context->curbe.length);
+    OUT_BATCH(batch, gpe_context->curbe.offset);
       ADVANCE_BATCH(batch);
   }
@@ -1122,7 +1122,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
       assert(bo);
       gpe_context->surface_state_binding_table.bo = bo;
-    bo_size = gpe_context->idrt_size + gpe_context->curbe_size
+
gpe_context->sampler_size + 192;
Post by Xiang, Haihao
+    bo_size = gpe_context->idrt_size + gpe_context-
Post by Xiang, Haihao
curbe.length +
+ gpe_context->sampler_size + 192;
       dri_bo_unreference(gpe_context->dynamic_state.bo);
       bo = dri_bo_alloc(i965->intel.bufmgr,
-1137,8
       /* Constant buffer offset */
       start_offset = ALIGN(end_offset, 64);
-    gpe_context->curbe_offset = start_offset;
-    end_offset = start_offset + gpe_context->curbe_size;
+    dri_bo_unreference(gpe_context->curbe.bo);
+    gpe_context->curbe.bo = bo;
+    dri_bo_reference(gpe_context->curbe.bo);
+    gpe_context->curbe.offset = start_offset;
+    end_offset = start_offset + gpe_context->curbe.length;
       /* Interface descriptor offset */
@@
gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
       dri_bo_unreference(gpe_context->indirect_state.bo);
       gpe_context->indirect_state.bo = NULL;
+    dri_bo_unreference(gpe_context->curbe.bo);
+    gpe_context->curbe.bo = NULL;
   }
@@ -1619,7 +1624,12 @@
gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
Post by Xiang, Haihao
       dri_bo_reference(gpe_context->dynamic_state.bo);
       gpe_context->dynamic_state.bo_size = ds->bo_size;
-    gpe_context->curbe_offset = ds->curbe_offset;
+    /* curbe buffer is a part of the dynamic buffer */
+    dri_bo_unreference(gpe_context->curbe.bo);
+    gpe_context->curbe.bo = ds->bo;
+    dri_bo_reference(gpe_context->curbe.bo);
+    gpe_context->curbe.offset = ds->curbe_offset;
+
       gpe_context->idrt_offset = ds->idrt_offset;
       gpe_context->sampler_offset = ds->sampler_offset;
@@ -1629,15 +1639,15 @@
gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
Post by Xiang, Haihao
   void *
   gen8p_gpe_context_map_curbe(struct i965_gpe_context
*gpe_context)
   {
-    dri_bo_map(gpe_context->dynamic_state.bo, 1);
+    dri_bo_map(gpe_context->curbe.bo, 1);
-    return (char *)gpe_context->dynamic_state.bo->virtual +
gpe_context-
curbe_offset;
+    return (char *)gpe_context->curbe.bo->virtual +
+ gpe_context->curbe.offset;
   }
   void
   gen8p_gpe_context_unmap_curbe(struct i965_gpe_context
*gpe_context)
Post by Xiang, Haihao
   {
-    dri_bo_unmap(gpe_context->dynamic_state.bo);
+    dri_bo_unmap(gpe_context->curbe.bo);
   }
   void
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h index
0cbef43..92123fe 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -92,6 +92,7 @@ struct i965_gpe_context
       struct {
           dri_bo *bo;
           unsigned int length;            /* in bytes */
+        unsigned int offset;
       } curbe;
       struct {
@@ -168,8 +169,6 @@ struct i965_gpe_context
       int sampler_size;
       unsigned int idrt_offset;
       int idrt_size;
-    unsigned int curbe_offset;
-    int curbe_size;
   };
   struct gpe_mi_flush_dw_parameter
Zhao Yakui
2016-11-18 02:55:12 UTC
Permalink
Post by Xiang, Haihao
Post by Zhao Yakui
Post by Xiang, Haihao
-----Original Message-----
From: Zhao, Yakui
Sent: Thursday, November 17, 2016 8:42 PM
Subject: Re: [Libva] [Libva-intel-driver][PATCH 02/17] Move all curbe related
settings to the inner structure in i965_gpe_context
Post by Xiang, Haihao
To avoid confusion between curbe.length and curbe_size, this patch
uses curbe.length only. curbe.bo is always set even if curbe is a part
of the dynamic state buffer, hence we can use curbe related settings
no matter it is a part of the dynamic state buffer or not.
The curbe.bo in *_gpe_context is defined/used for the old
platform.
In fact for the platform from Sandybridge, it can reside in the
dynamic_state.bo.
If the curbe.bo/curbe.offset is used directly,
previously curbe_offset is also used directly.
maybe it brings the confusion
that one dedicated bo is declared/defined for curbe.
curbe.bo/curbe.offset is set in gpe functions, gpe user just uses
it and needn't care about
it is a dedicated bo or not.
So I think that the curbe_offset can follow the HW spec.
curbe.offset still follows the HW spec. The problem for the old
structure is that we have an inner curbe structure
and curbe_offset/ curbe_size in the same structure. It is easy to
confuse a new developer.
The inner curbe structure is mainly defined for the old platform.
Actually it can be used on the new platform.
Post by Zhao Yakui
For the HW that should allocate the curbe_buffer from dynamic_buffer,
curbo.bo points to the dynamic buffer on the new platform.
Post by Zhao Yakui
IMO it only needs to care the dynamic_buffer and curbe_offset when it
needs to access the curbe_buffer.
My point here is gpe user only cares curbe when using curbe, no matter
it is part of dynamic buffer or a dedicated buffer, so we can use the
same code to use curbe buffer, no matter the HW is old or new,
e.g.
in i965_gpe_context_map_curbe()
dri_bo_map(gpe_context->curbe.bo, 1) works for all platform.
otherwise we have to use if ... else.
OK. It is ok that they are unified into the inner structure.
Post by Xiang, Haihao
Post by Zhao Yakui
Maybe we can add some explanations about the i965_gpe_context.
Post by Xiang, Haihao
The inner structure(idrt, sampler, curbe) is mainly defined for
the
old platform. And the dedicated bo is allocated for them.
Post by Xiang, Haihao
dynamic_buffer/curbe_offset is for the later platform.
We should avoid writing too many comments if the code is clean enough.
Post by Zhao Yakui
Post by Xiang, Haihao
Maybe we can add the wrapper function that can map/unmap the virtual
address of curbe_buffer. In such case it can also simplify the mapping related
with curbe_buffer.
We have already such functions, but we don't use them for some old
platforms. We can add similar functions for idrt and sampler.
For the wrapper, IMO we can only consider them for the platform since
gen8+. We can leave the old platform alone.
Post by Xiang, Haihao
Similar considerations for Interface_descriptor_data,
sampler_buffer.
Post by Xiang, Haihao
---
src/gen75_vpp_gpe.c | 2 +-
src/gen8_mfc.c | 2 +-
src/gen8_vme.c | 12 ++++++------
src/gen9_post_processing.c | 3 +--
src/gen9_vme.c | 12 ++++++------
src/gen9_vp9_encoder.c | 22 ++++++++++------------
src/i965_gpe_utils.c | 28 +++++++++++++++++++---------
src/i965_gpe_utils.h | 3 +--
8 files changed, 45 insertions(+), 39 deletions(-)
diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c index
9850c1c..2cddb5a 100644
--- a/src/gen75_vpp_gpe.c
+++ b/src/gen75_vpp_gpe.c
@@ -890,7 +890,7 @@ vpp_gpe_context_init(VADriverContextP ctx)
gpe_ctx->surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE_GEN8 +
sizeof(unsigned
int)) * MAX_MEDIA_SURFACES_GEN6;
- gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
gpe_ctx->idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
}
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index
63ffea5..3ed9e84
100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4609,7 +4609,7 @@ Bool
gen8_mfc_context_init(VADriverContextP
ctx, struct intel_encoder_context *e
Post by Xiang, Haihao
mfc_context-
Post by Xiang, Haihao
gpe_context.surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) *
MAX_MEDIA_SURFACES_GEN6;
mfc_context->gpe_context.idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
Post by Xiang, Haihao
- mfc_context->gpe_context.curbe_size = 32 * 4;
+ mfc_context->gpe_context.curbe.length = 32 * 4;
mfc_context->gpe_context.sampler_size = 0;
mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c index
c79c62b..96835bf
100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -389,10 +389,10 @@ static VAStatus
gen8_vme_constant_setup(VADriverContextP ctx,
vme_state_message[31] = mv_num;
- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo-
Post by Xiang, Haihao
virtual);
- constant_buffer = (unsigned char *)vme_context-
gpe_context.dynamic_state.bo->virtual +
- vme_context-
Post by Xiang, Haihao
gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context-
gpe_context.curbe.bo->virtual +
+
+ vme_context->gpe_context.curbe.offset;
/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is
constructed
gen8_vme_constant_setup(VADriverContextP ctx,
Post by Xiang, Haihao
*/
memcpy(constant_buffer, (char *)vme_context-
Post by Xiang, Haihao
vme_state_message,
128);
- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);
return VA_STATUS_SUCCESS;
}
@@ -1379,7 +1379,7 @@ Bool
gen8_vme_context_init(VADriverContextP
ctx, struct intel_encoder_context *e
Post by Xiang, Haihao
vme_context-
Post by Xiang, Haihao
gpe_context.surface_state_binding_table.length
= (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) *
MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
Post by Xiang, Haihao
- vme_context->gpe_context.curbe_size =
CURBE_TOTAL_DATA_LENGTH;
Post by Xiang, Haihao
+ vme_context->gpe_context.curbe.length =
+ CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_post_processing.c
b/src/gen9_post_processing.c
index a5d345c..71da501 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -538,8 +538,7 @@
gen9_post_processing_context_init(VADriverContextP ctx,
Post by Xiang, Haihao
gen8_gpe_load_kernels(ctx, gpe_context,&scaling_kernel, 1);
gpe_context->idrt_size = ALIGN(sizeof(struct
gen8_interface_descriptor_data), 64);
Post by Xiang, Haihao
gpe_context->sampler_size = ALIGN(sizeof(struct
gen8_sampler_state),
64);
Post by Xiang, Haihao
- gpe_context->curbe_size = ALIGN(sizeof(struct
scaling_input_parameter), 64);
Post by Xiang, Haihao
- gpe_context->curbe.length = gpe_context->curbe_size;
+ gpe_context->curbe.length = ALIGN(sizeof(struct
+ scaling_input_parameter), 64);
gpe_context->surface_state_binding_table.max_entries =
MAX_SCALING_SURFACES;
Post by Xiang, Haihao
gpe_context-
Post by Xiang, Haihao
surface_state_binding_table.binding_table_offset =
0; diff --git a/src/gen9_vme.c b/src/gen9_vme.c index
6ad8fff..a59fe2a
100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -438,10 +438,10 @@ static VAStatus
gen9_vme_constant_setup(VADriverContextP ctx,
vme_state_message[31] = mv_num;
- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo-
Post by Xiang, Haihao
virtual);
- constant_buffer = (unsigned char *)vme_context-
gpe_context.dynamic_state.bo->virtual +
- vme_context-
Post by Xiang, Haihao
gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context-
gpe_context.curbe.bo->virtual +
+
+ vme_context->gpe_context.curbe.offset;
/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is
constructed
gen9_vme_constant_setup(VADriverContextP ctx,
Post by Xiang, Haihao
*/
memcpy(constant_buffer, (char *)vme_context-
Post by Xiang, Haihao
vme_state_message,
128);
- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);
return VA_STATUS_SUCCESS;
}
@@ -2032,7 +2032,7 @@ Bool
gen9_vme_context_init(VADriverContextP
ctx, struct intel_encoder_context *e
Post by Xiang, Haihao
vme_context-
Post by Xiang, Haihao
gpe_context.surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) *
MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt_size = sizeof(struct
gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
Post by Xiang, Haihao
- vme_context->gpe_context.curbe_size =
CURBE_TOTAL_DATA_LENGTH;
+ vme_context->gpe_context.curbe.length =
CURBE_TOTAL_DATA_LENGTH;
Post by Xiang, Haihao
vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c index
f39d6d0..5ad7b26 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1820,18 +1820,18 @@
gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
Post by Xiang, Haihao
/* 4. Mbenc curbe input buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context-
Post by Xiang, Haihao
dynamic_state.bo,
+ mbenc_gpe_context-
Post by Xiang, Haihao
curbe.bo,
0,
- ALIGN(mbenc_gpe_context-
Post by Xiang, Haihao
curbe_size, 64),
- mbenc_gpe_context-
Post by Xiang, Haihao
curbe_offset,
+ ALIGN(mbenc_gpe_context-
Post by Xiang, Haihao
curbe.length, 64),
+ mbenc_gpe_context-
Post by Xiang, Haihao
curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_
INPUT_G9);
/* 5. Mbenc curbe output buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context-
Post by Xiang, Haihao
dynamic_state.bo,
+ mbenc_gpe_context-
Post by Xiang, Haihao
curbe.bo,
0,
- ALIGN(mbenc_gpe_context-
Post by Xiang, Haihao
curbe_size, 64),
- mbenc_gpe_context-
Post by Xiang, Haihao
curbe_offset,
+ ALIGN(mbenc_gpe_context-
Post by Xiang, Haihao
curbe.length, 64),
+ mbenc_gpe_context-
Post by Xiang, Haihao
curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param-
Post by Xiang, Haihao
gpe_context_tx->dynamic_state.bo,
+
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param-
Post by Xiang, Haihao
gpe_context_tx->curbe_offset,
+
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CUR
BE_G9);
break;
@@ -3441,10 +3441,10 @@
gen9_vp9_send_mbenc_surface(VADriverContextP
Post by Xiang, Haihao
ctx,
gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param-
Post by Xiang, Haihao
gpe_context_tx->dynamic_state.bo,
+
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param-
Post by Xiang, Haihao
gpe_context_tx->curbe_offset,
+
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CUR
BE_G9);
@@ -3684,8 +3684,6 @@ gen9_init_gpe_context_vp9(struct
i965_gpe_context *gpe_context,
Post by Xiang, Haihao
{
gpe_context->curbe.length = kernel_param->curbe_size; //
in
bytes
- gpe_context->curbe_size = ALIGN(kernel_param->curbe_size, 64);
-
gpe_context->sampler_size = 0;
if (kernel_param->sampler_size) {
gpe_context->sampler_size =
ALIGN(kernel_param->sampler_size, 64); diff --git
a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index
c5a8935..3739a88
100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,8 +1066,8 @@ gen8_gpe_curbe_load(VADriverContextP ctx,
OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe_size);
- OUT_BATCH(batch, gpe_context->curbe_offset);
+ OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, gpe_context->curbe.offset);
ADVANCE_BATCH(batch);
}
@@ -1122,7 +1122,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;
- bo_size = gpe_context->idrt_size + gpe_context->curbe_size +
gpe_context->sampler_size + 192;
Post by Xiang, Haihao
+ bo_size = gpe_context->idrt_size + gpe_context-
Post by Xiang, Haihao
curbe.length +
+ gpe_context->sampler_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
-1137,8
/* Constant buffer offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->curbe_offset = start_offset;
- end_offset = start_offset + gpe_context->curbe_size;
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = start_offset;
+ end_offset = start_offset + gpe_context->curbe.length;
/* Interface descriptor offset */
gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
dri_bo_unreference(gpe_context->indirect_state.bo);
gpe_context->indirect_state.bo = NULL;
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = NULL;
}
@@ -1619,7 +1624,12 @@
gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
Post by Xiang, Haihao
dri_bo_reference(gpe_context->dynamic_state.bo);
gpe_context->dynamic_state.bo_size = ds->bo_size;
- gpe_context->curbe_offset = ds->curbe_offset;
+ /* curbe buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = ds->bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = ds->curbe_offset;
+
gpe_context->idrt_offset = ds->idrt_offset;
gpe_context->sampler_offset = ds->sampler_offset;
@@ -1629,15 +1639,15 @@
gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
Post by Xiang, Haihao
void *
gen8p_gpe_context_map_curbe(struct i965_gpe_context
*gpe_context)
{
- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->curbe.bo, 1);
- return (char *)gpe_context->dynamic_state.bo->virtual + gpe_context-
curbe_offset;
+ return (char *)gpe_context->curbe.bo->virtual +
+ gpe_context->curbe.offset;
}
void
gen8p_gpe_context_unmap_curbe(struct i965_gpe_context
*gpe_context)
Post by Xiang, Haihao
{
- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->curbe.bo);
}
void
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h index
0cbef43..92123fe 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -92,6 +92,7 @@ struct i965_gpe_context
struct {
dri_bo *bo;
unsigned int length; /* in bytes */
+ unsigned int offset;
} curbe;
struct {
@@ -168,8 +169,6 @@ struct i965_gpe_context
int sampler_size;
unsigned int idrt_offset;
int idrt_size;
- unsigned int curbe_offset;
- int curbe_size;
};
struct gpe_mi_flush_dw_parameter
Xiang, Haihao
2016-11-17 08:35:00 UTC
Permalink
Make sure the size is multiple of 64 bytes

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 5d4ca5c..c5a8935 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1180,7 +1180,7 @@ gen8_gpe_load_kernels(VADriverContextP ctx,
unsigned int num_kernels)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- int i, kernel_size;
+ int i, kernel_size = 0;
unsigned int kernel_offset, end_offset;
unsigned char *kernel_ptr;
struct i965_kernel *kernel;
@@ -1189,11 +1189,10 @@ gen8_gpe_load_kernels(VADriverContextP ctx,
memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
gpe_context->num_kernels = num_kernels;

- kernel_size = num_kernels * 64;
for (i = 0; i < num_kernels; i++) {
kernel = &gpe_context->kernels[i];

- kernel_size += kernel->size;
+ kernel_size += ALIGN(kernel->size, 64);
}

gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:02 UTC
Permalink
This patch delete idrt_size and use (idrt.max_entries * idrt.entry_size) instead.
idrt.bo is always set even if the interface descriptor remapping table is a part of
the dynamic state buffer, hence we can use the corresponding settings no matter this
table is a part of the dynamic state buffer or not.

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen75_vpp_gpe.c | 10 +++++-----
src/gen8_mfc.c | 7 ++++---
src/gen8_vme.c | 8 +++++---
src/gen9_post_processing.c | 3 ++-
src/gen9_vme.c | 7 ++++---
src/gen9_vp9_encoder.c | 2 --
src/i965_gpe_utils.c | 27 +++++++++++++++++++--------
src/i965_gpe_utils.h | 3 +--
8 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c
index 2cddb5a..6f5e2ef 100644
--- a/src/gen75_vpp_gpe.c
+++ b/src/gen75_vpp_gpe.c
@@ -406,13 +406,13 @@ gen8_gpe_process_interface_setup(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
struct gen8_interface_descriptor_data *desc;
- dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
+ dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
int i;

dri_bo_map(bo, 1);
assert(bo->virtual);
desc = (struct gen8_interface_descriptor_data *)(bo->virtual
- + vpp_gpe_ctx->gpe_ctx.idrt_offset);
+ + vpp_gpe_ctx->gpe_ctx.idrt.offset);

/*Setup the descritor table*/
for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
@@ -880,7 +880,7 @@ vpp_gpe_context_init(VADriverContextP ctx)

gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
- gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
+ gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen6_interface_descriptor_data), 64);

} else if (IS_GEN8(i965->intel.device_info) ||
IS_GEN9(i965->intel.device_info)) {
@@ -891,8 +891,8 @@ vpp_gpe_context_init(VADriverContextP ctx)
(SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
- gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
-
+ gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
}

return vpp_gpe_ctx;
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 3ed9e84..c4e46fb 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -1333,10 +1333,10 @@ gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
dri_bo *bo;
unsigned char *desc_ptr;

- bo = mfc_context->gpe_context.dynamic_state.bo;
+ bo = mfc_context->gpe_context.idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
- desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt_offset;
+ desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt.offset;

desc = (struct gen8_interface_descriptor_data *)desc_ptr;

@@ -4608,7 +4608,8 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
assert(mfc_context);
mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

- mfc_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
+ mfc_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ mfc_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
mfc_context->gpe_context.curbe.length = 32 * 4;
mfc_context->gpe_context.sampler_size = 0;

diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index 96835bf..5184ef1 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -333,10 +333,10 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx,
dri_bo *bo;
unsigned char *desc_ptr;

- bo = vme_context->gpe_context.dynamic_state.bo;
+ bo = vme_context->gpe_context.idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
- desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
+ desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt.offset;

desc = (struct gen8_interface_descriptor_data *)desc_ptr;

@@ -1378,7 +1378,9 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->vme_kernel_sum = i965_kernel_num;
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

- vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
+ vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
+
vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;

diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 71da501..2473803 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -536,7 +536,8 @@ gen9_post_processing_context_init(VADriverContextP ctx,
scaling_kernel.bin = pp_10bit_scaling_gen9;
scaling_kernel.size = sizeof(pp_10bit_scaling_gen9);
gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
- gpe_context->idrt_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ gpe_context->idrt.max_entries = 1;
gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);

diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index a59fe2a..33bf8aa 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -376,10 +376,10 @@ static VAStatus gen9_vme_interface_setup(VADriverContextP ctx,
dri_bo *bo;
unsigned char *desc_ptr;

- bo = vme_context->gpe_context.dynamic_state.bo;
+ bo = vme_context->gpe_context.idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
- desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
+ desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt.offset;

desc = (struct gen8_interface_descriptor_data *)desc_ptr;

@@ -2031,7 +2031,8 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->vme_kernel_sum = i965_kernel_num;
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

- vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
+ vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;

diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 5ad7b26..5d4a4a8 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -3691,8 +3691,6 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,

gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
- gpe_context->idrt_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) *
- NUM_KERNELS_PER_GPE_CONTEXT;

gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
gpe_context->surface_state_binding_table.binding_table_offset = 0;
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 3739a88..2d7cfaf 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1084,8 +1084,8 @@ gen8_gpe_idrt(VADriverContextP ctx,

OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->idrt_size);
- OUT_BATCH(batch, gpe_context->idrt_offset);
+ OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
+ OUT_BATCH(batch, gpe_context->idrt.offset);

ADVANCE_BATCH(batch);
}
@@ -1122,7 +1122,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;

- bo_size = gpe_context->idrt_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
+ bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state & binding table",
@@ -1145,8 +1145,11 @@ gen8_gpe_context_init(VADriverContextP ctx,

/* Interface descriptor offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->idrt_offset = start_offset;
- end_offset = start_offset + gpe_context->idrt_size;
+ dri_bo_unreference(gpe_context->idrt.bo);
+ gpe_context->idrt.bo = bo;
+ dri_bo_reference(gpe_context->idrt.bo);
+ gpe_context->idrt.offset = start_offset;
+ end_offset = start_offset + gpe_context->idrt.entry_size * gpe_context->idrt.max_entries;

/* Sampler state offset */
start_offset = ALIGN(end_offset, 64);
@@ -1175,6 +1178,9 @@ gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)

dri_bo_unreference(gpe_context->curbe.bo);
gpe_context->curbe.bo = NULL;
+
+ dri_bo_unreference(gpe_context->idrt.bo);
+ gpe_context->idrt.bo = NULL;
}


@@ -1630,7 +1636,12 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
dri_bo_reference(gpe_context->curbe.bo);
gpe_context->curbe.offset = ds->curbe_offset;

- gpe_context->idrt_offset = ds->idrt_offset;
+ /* idrt buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->idrt.bo);
+ gpe_context->idrt.bo = ds->bo;
+ dri_bo_reference(gpe_context->idrt.bo);
+ gpe_context->idrt.offset = ds->idrt_offset;
+
gpe_context->sampler_offset = ds->sampler_offset;

return;
@@ -1677,10 +1688,10 @@ gen8_gpe_setup_interface_data(VADriverContextP ctx,
dri_bo *bo;
unsigned char *desc_ptr;

- bo = gpe_context->dynamic_state.bo;
+ bo = gpe_context->idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
- desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt_offset;
+ desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt.offset;
desc = (struct gen8_interface_descriptor_data *)desc_ptr;

for (i = 0; i < gpe_context->num_kernels; i++) {
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 92123fe..c3b8c79 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -87,6 +87,7 @@ struct i965_gpe_context
dri_bo *bo;
unsigned int max_entries;
unsigned int entry_size; /* in bytes */
+ unsigned int offset;
} idrt;

struct {
@@ -167,8 +168,6 @@ struct i965_gpe_context
unsigned int sampler_offset;
int sampler_entries;
int sampler_size;
- unsigned int idrt_offset;
- int idrt_size;
};

struct gpe_mi_flush_dw_parameter
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:03 UTC
Permalink
User can set sampler entry size and the number of sampler entries now. sampler.bo is
always set even if the sampler state is a part of the dynamic state buffer, hence we
can use the corresponding settings no matter sampler state is a part of the dynamic
state buffer or not.

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen8_mfc.c | 3 ++-
src/gen8_vme.c | 4 ++--
src/gen9_post_processing.c | 13 +++++++------
src/gen9_vme.c | 4 ++--
src/gen9_vp9_encoder.c | 15 +++++++++------
src/i965_gpe_utils.c | 21 ++++++++++++++++-----
src/i965_gpe_utils.h | 11 +++++++----
7 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index c4e46fb..b0ee6fb 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4611,7 +4611,8 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
mfc_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
mfc_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
mfc_context->gpe_context.curbe.length = 32 * 4;
- mfc_context->gpe_context.sampler_size = 0;
+ mfc_context->gpe_context.sampler.entry_size = 0;
+ mfc_context->gpe_context.sampler.max_entries = 0;

mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index 5184ef1..b14d60a 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -1382,8 +1382,8 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;

vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
- vme_context->gpe_context.sampler_size = 0;
-
+ vme_context->gpe_context.sampler.entry_size = 0;
+ vme_context->gpe_context.sampler.max_entries = 0;

vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
vme_context->gpe_context.vfe_state.num_urb_entries = 64;
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 2473803..c0ae791 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -488,15 +488,15 @@ gen9_p010_scaling_sample_state(VADriverContextP ctx,

if (gpe_context == NULL || !src_rect || !dst_rect)
return;
- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->sampler.bo, 1);

- if (gpe_context->dynamic_state.bo->virtual == NULL)
+ if (gpe_context->sampler.bo->virtual == NULL)
return;

- assert(gpe_context->dynamic_state.bo->virtual);
+ assert(gpe_context->sampler.bo->virtual);

sampler_state = (struct gen8_sampler_state *)
- (gpe_context->dynamic_state.bo->virtual + gpe_context->sampler_offset);
+ (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);

memset(sampler_state, 0, sizeof(*sampler_state));

@@ -513,7 +513,7 @@ gen9_p010_scaling_sample_state(VADriverContextP ctx,
sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;

- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->sampler.bo);
}

void
@@ -538,7 +538,8 @@ gen9_post_processing_context_init(VADriverContextP ctx,
gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
gpe_context->idrt.max_entries = 1;
- gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
+ gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
+ gpe_context->sampler.max_entries = 1;
gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);

gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index 33bf8aa..fab80ce 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -2034,8 +2034,8 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
- vme_context->gpe_context.sampler_size = 0;
-
+ vme_context->gpe_context.sampler.entry_size = 0;
+ vme_context->gpe_context.sampler.max_entries = 0;

vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
vme_context->gpe_context.vfe_state.num_urb_entries = 64;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 5d4a4a8..1badd88 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -2594,13 +2594,13 @@ gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
if (!gpe_context)
return;

- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->sampler.bo, 1);

- if (!gpe_context->dynamic_state.bo->virtual)
+ if (!gpe_context->sampler.bo->virtual)
return;

sampler_cmd = (struct gen9_sampler_8x8_avs *)
- (gpe_context->dynamic_state.bo->virtual + gpe_context->sampler_offset);
+ (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);

memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));

@@ -2658,7 +2658,7 @@ gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
&gen9_vp9_avs_coeffs[17 * 8],
15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));

- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->sampler.bo);
}

static void
@@ -3684,9 +3684,12 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
{
gpe_context->curbe.length = kernel_param->curbe_size; // in bytes

- gpe_context->sampler_size = 0;
+ gpe_context->sampler.entry_size = 0;
+ gpe_context->sampler.max_entries = 0;
+
if (kernel_param->sampler_size) {
- gpe_context->sampler_size = ALIGN(kernel_param->sampler_size, 64);
+ gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
+ gpe_context->sampler.max_entries = 1;
}

gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 2d7cfaf..d7286fc 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1122,7 +1122,8 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;

- bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
+ bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length +
+ gpe_context->sampler.max_entries * gpe_context->sampler.entry_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state & binding table",
@@ -1153,8 +1154,11 @@ gen8_gpe_context_init(VADriverContextP ctx,

/* Sampler state offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->sampler_offset = start_offset;
- end_offset = start_offset + gpe_context->sampler_size;
+ dri_bo_unreference(gpe_context->sampler.bo);
+ gpe_context->sampler.bo = bo;
+ dri_bo_reference(gpe_context->sampler.bo);
+ gpe_context->sampler.offset = start_offset;
+ end_offset = start_offset + gpe_context->sampler.entry_size * gpe_context->sampler.max_entries;

/* update the end offset of dynamic_state */
gpe_context->dynamic_state.end_offset = end_offset;
@@ -1181,6 +1185,9 @@ gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)

dri_bo_unreference(gpe_context->idrt.bo);
gpe_context->idrt.bo = NULL;
+
+ dri_bo_unreference(gpe_context->sampler.bo);
+ gpe_context->sampler.bo = NULL;
}


@@ -1642,7 +1649,11 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
dri_bo_reference(gpe_context->idrt.bo);
gpe_context->idrt.offset = ds->idrt_offset;

- gpe_context->sampler_offset = ds->sampler_offset;
+ /* sampler buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->sampler.bo);
+ gpe_context->sampler.bo = ds->bo;
+ dri_bo_reference(gpe_context->sampler.bo);
+ gpe_context->sampler.offset = ds->sampler_offset;

return;
}
@@ -1704,7 +1715,7 @@ gen8_gpe_setup_interface_data(VADriverContextP ctx,
memset(desc, 0, sizeof(*desc));
desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
desc->desc3.sampler_count = 0;
- desc->desc3.sampler_state_pointer = (gpe_context->sampler_offset >> 5);
+ desc->desc3.sampler_state_pointer = (gpe_context->sampler.offset >> 5);
desc->desc4.binding_table_entry_count = 0;
desc->desc4.binding_table_pointer = (gpe_context->surface_state_binding_table.binding_table_offset >> 5);
desc->desc5.constant_urb_entry_read_offset = 0;
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index c3b8c79..e19e107 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -97,6 +97,13 @@ struct i965_gpe_context
} curbe;

struct {
+ dri_bo *bo;
+ unsigned int max_entries;
+ unsigned int entry_size; /* in bytes */
+ unsigned int offset;
+ } sampler;
+
+ struct {
unsigned int gpgpu_mode : 1;
unsigned int pad0 : 7;
unsigned int max_num_threads : 16;
@@ -164,10 +171,6 @@ struct i965_gpe_context
int bo_size;
unsigned int end_offset;
} dynamic_state;
-
- unsigned int sampler_offset;
- int sampler_entries;
- int sampler_size;
};

struct gpe_mi_flush_dw_parameter
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:04 UTC
Permalink
Make sure the size for each part in dynamic state buffer is multiple of 64 bytes.

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index d7286fc..139ab1b 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1122,8 +1122,9 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;

- bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length +
- gpe_context->sampler.max_entries * gpe_context->sampler.entry_size + 192;
+ bo_size = gpe_context->idrt.max_entries * ALIGN(gpe_context->idrt.entry_size, 64) +
+ ALIGN(gpe_context->curbe.length, 64) +
+ gpe_context->sampler.max_entries * ALIGN(gpe_context->sampler.entry_size, 64);
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state & binding table",
@@ -1150,7 +1151,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
gpe_context->idrt.bo = bo;
dri_bo_reference(gpe_context->idrt.bo);
gpe_context->idrt.offset = start_offset;
- end_offset = start_offset + gpe_context->idrt.entry_size * gpe_context->idrt.max_entries;
+ end_offset = start_offset + ALIGN(gpe_context->idrt.entry_size, 64) * gpe_context->idrt.max_entries;

/* Sampler state offset */
start_offset = ALIGN(end_offset, 64);
@@ -1158,7 +1159,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
gpe_context->sampler.bo = bo;
dri_bo_reference(gpe_context->sampler.bo);
gpe_context->sampler.offset = start_offset;
- end_offset = start_offset + gpe_context->sampler.entry_size * gpe_context->sampler.max_entries;
+ end_offset = start_offset + ALIGN(gpe_context->sampler.entry_size, 64) * gpe_context->sampler.max_entries;

/* update the end offset of dynamic_state */
gpe_context->dynamic_state.end_offset = end_offset;
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:06 UTC
Permalink
gpe_context->curbe.bo always points to the curbe buffer now, and the two functions
can be used on all platforms

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_post_processing.c | 4 ++--
src/gen9_vp9_encoder.c | 20 ++++++++++----------
src/i965_gpe_utils.c | 4 ++--
src/i965_gpe_utils.h | 4 ++--
4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index c0ae791..862a26a 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -653,7 +653,7 @@ gen9_gpe_context_p010_scaling_curbe(VADriverContextP ctx,
(dst_rect == NULL) || (dst_surface == NULL))
return;

- scaling_curbe = gen8p_gpe_context_map_curbe(gpe_context);
+ scaling_curbe = i965_gpe_context_map_curbe(gpe_context);

if (!scaling_curbe)
return;
@@ -696,7 +696,7 @@ gen9_gpe_context_p010_scaling_curbe(VADriverContextP ctx,
}
/* I010 will use LSB */

- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
}

static bool
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 1badd88..a933c15 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1159,7 +1159,7 @@ void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
seq_param = param->pseq_param;
segment_param = param->psegment_param;

- cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ cmd = i965_gpe_context_map_curbe(gpe_context);

if (!cmd)
return;
@@ -1347,7 +1347,7 @@ void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
cmd->dw60.brc_bitstream_size_data_bti = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
cmd->dw61.brc_hfw_data_output_bti = VP9_BTI_BRC_HFW_DATA_G9;

- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
return;
}

@@ -2057,7 +2057,7 @@ void gen9_vp9_set_curbe_me(VADriverContextP ctx,
else
enc_media_state = VP9_MEDIA_STATE_4X_ME;

- me_cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ me_cmd = i965_gpe_context_map_curbe(gpe_context);

if (!me_cmd)
return;
@@ -2117,7 +2117,7 @@ void gen9_vp9_set_curbe_me(VADriverContextP ctx,
me_cmd->dw36.vme_fwd_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L0;
me_cmd->dw37.vme_bdw_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L1;

- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
}

static void
@@ -2398,7 +2398,7 @@ gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
{
vp9_scaling4x_curbe_data_cm *curbe_cmd;

- curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ curbe_cmd = i965_gpe_context_map_curbe(gpe_context);

if (!curbe_cmd)
return;
@@ -2422,7 +2422,7 @@ gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
}

- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
return;
}

@@ -2670,7 +2670,7 @@ gen9_vp9_set_curbe_dys(VADriverContextP ctx,
{
vp9_dys_curbe_data *curbe_cmd;

- curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ curbe_cmd = i965_gpe_context_map_curbe(gpe_context);

if (!curbe_cmd)
return;
@@ -2690,7 +2690,7 @@ gen9_vp9_set_curbe_dys(VADriverContextP ctx,
curbe_cmd->dw17.output_frame_y_bti = VP9_BTI_DYS_OUTPUT_Y;
curbe_cmd->dw18.avs_sample_idx = 0;

- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
}

static void
@@ -3023,7 +3023,7 @@ gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
seg_param = &tmp_seg_param;
}

- curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ curbe_cmd = i965_gpe_context_map_curbe(gpe_context);

if (!curbe_cmd)
return;
@@ -3167,7 +3167,7 @@ gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
curbe_cmd->dw174.cu_record_bti = VP9_BTI_MBENC_CU_RECORDS_G9;
curbe_cmd->dw175.pak_data_bti = VP9_BTI_MBENC_PAK_DATA_G9;

- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
return;
}

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index a6d539b..66609c0 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1660,7 +1660,7 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
}

void *
-gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
+i965_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
{
dri_bo_map(gpe_context->curbe.bo, 1);

@@ -1668,7 +1668,7 @@ gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
}

void
-gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
+i965_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
{
dri_bo_unmap(gpe_context->curbe.bo);
}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index e19e107..66be748 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -437,10 +437,10 @@ extern void
gen9_gpe_reset_binding_table(VADriverContextP ctx,
struct i965_gpe_context *gpe_context);
extern
-void *gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context);
+void *i965_gpe_context_map_curbe(struct i965_gpe_context *gpe_context);

extern
-void gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context);
+void i965_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context);

extern
void gen8_gpe_setup_interface_data(VADriverContextP ctx,
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:07 UTC
Permalink
Like as i965_dri_object_to_buffer_gpe_resource(), use i965_ instead of i965_gpe_ as prefix

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_post_processing.c | 2 +-
src/gen9_vp9_encoder.c | 2 +-
src/i965_gpe_utils.c | 10 +++++-----
src/i965_gpe_utils.h | 10 +++++-----
4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 862a26a..2128697 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -578,7 +578,7 @@ gen9_add_dri_buffer_2d_gpe_surface(VADriverContextP ctx,
struct i965_gpe_resource gpe_resource;
struct i965_gpe_surface gpe_surface;

- i965_gpe_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
+ i965_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
memset(&gpe_surface, 0, sizeof(gpe_surface));
gpe_surface.gpe_resource = &gpe_resource;
gpe_surface.is_2d_surface = 1;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index a933c15..0b66565 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1052,7 +1052,7 @@ gen9_add_dri_buffer_2d_gpe_surface(VADriverContextP ctx,
{
struct i965_gpe_resource gpe_resource;

- i965_gpe_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
+ i965_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
gen9_add_buffer_2d_gpe_surface(ctx,
gpe_context,
&gpe_resource,
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 66609c0..b20857b 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1413,11 +1413,11 @@ i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
}

void
-i965_gpe_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
- dri_bo *bo,
- unsigned int width,
- unsigned int height,
- unsigned int pitch)
+i965_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
+ dri_bo *bo,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch)
{
unsigned int swizzle;

diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 66be748..22165da 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -348,11 +348,11 @@ void i965_object_surface_to_2d_gpe_resource(struct i965_gpe_resource *res,
void i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
dri_bo *bo);

-void i965_gpe_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
- dri_bo *bo,
- unsigned int width,
- unsigned int height,
- unsigned int pitch);
+void i965_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
+ dri_bo *bo,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch);

void i965_zero_gpe_resource(struct i965_gpe_resource *res);
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:08 UTC
Permalink
This function can be used on GEN8 too

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_post_processing.c | 2 +-
src/gen9_vp9_encoder.c | 2 +-
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 2128697..e9b8f86 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -608,7 +608,7 @@ gen9_run_kernel_media_object_walker(VADriverContextP ctx,
intel_batchbuffer_emit_mi_flush(batch);

gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
- gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
+ gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
gen8_gpe_media_state_flush(ctx, gpe_context, batch);

gen9_gpe_pipeline_end(ctx, gpe_context, batch);
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 0b66565..34d09a6 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1129,7 +1129,7 @@ gen9_run_kernel_media_object_walker(VADriverContextP ctx,
gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);

gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
- gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
+ gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
gen8_gpe_media_state_flush(ctx, gpe_context, batch);

gen9_gpe_pipeline_end(ctx, gpe_context, batch);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index b20857b..c2d06b2 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2103,7 +2103,7 @@ gen8_gpe_media_object(VADriverContextP ctx,
}

void
-gen9_gpe_media_object_walker(VADriverContextP ctx,
+gen8_gpe_media_object_walker(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch,
struct gpe_media_object_walker_parameter *param)
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 22165da..b58a02c 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -506,7 +506,7 @@ gen8_gpe_media_state_flush(VADriverContextP ctx,
struct intel_batchbuffer *batch);

extern void
-gen9_gpe_media_object_walker(VADriverContextP ctx,
+gen8_gpe_media_object_walker(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch,
struct gpe_media_object_walker_parameter *param);
--
1.9.1
Zhao Yakui
2016-11-17 12:59:22 UTC
Permalink
Post by Xiang, Haihao
This function can be used on GEN8 too
Based on HW spec, Broadwell and Gen9 have some difference about the
MEDIA_OBJECT_WALKER commands.

So it will be better to add a new wrapper of media_object_walker for gen8.

Thanks
Post by Xiang, Haihao
---
src/gen9_post_processing.c | 2 +-
src/gen9_vp9_encoder.c | 2 +-
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 2128697..e9b8f86 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -608,7 +608,7 @@ gen9_run_kernel_media_object_walker(VADriverContextP ctx,
intel_batchbuffer_emit_mi_flush(batch);
gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
- gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
+ gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
gen8_gpe_media_state_flush(ctx, gpe_context, batch);
gen9_gpe_pipeline_end(ctx, gpe_context, batch);
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 0b66565..34d09a6 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1129,7 +1129,7 @@ gen9_run_kernel_media_object_walker(VADriverContextP ctx,
gen9_gpe_mi_store_data_imm(ctx, batch,&mi_store_data_imm);
gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
- gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
+ gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
gen8_gpe_media_state_flush(ctx, gpe_context, batch);
gen9_gpe_pipeline_end(ctx, gpe_context, batch);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index b20857b..c2d06b2 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2103,7 +2103,7 @@ gen8_gpe_media_object(VADriverContextP ctx,
}
void
-gen9_gpe_media_object_walker(VADriverContextP ctx,
+gen8_gpe_media_object_walker(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch,
struct gpe_media_object_walker_parameter *param)
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 22165da..b58a02c 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -506,7 +506,7 @@ gen8_gpe_media_state_flush(VADriverContextP ctx,
struct intel_batchbuffer *batch);
extern void
-gen9_gpe_media_object_walker(VADriverContextP ctx,
+gen8_gpe_media_object_walker(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch,
struct gpe_media_object_walker_parameter *param);
Xiang, Haihao
2016-11-17 16:12:00 UTC
Permalink
-----Original Message-----
Yakui
Sent: Thursday, November 17, 2016 8:59 PM
Subject: Re: [Libva] [Libva-intel-driver][PATCH 09/17] Rename
gen9_gpe_media_object_walker() to gen8_gpe_media_object_walker()
Post by Xiang, Haihao
This function can be used on GEN8 too
Based on HW spec, Broadwell and Gen9 have some difference about the
MEDIA_OBJECT_WALKER commands.
So it will be better to add a new wrapper of media_object_walker for gen8.
Yes there are a few different fields in the command between gen8 and gen9. But they are the same for all used fields in the driver
So I prefer the same function. We can change it if we will use the different fields in the future.
Thanks
Post by Xiang, Haihao
---
src/gen9_post_processing.c | 2 +-
src/gen9_vp9_encoder.c | 2 +-
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 2128697..e9b8f86 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -608,7 +608,7 @@
gen9_run_kernel_media_object_walker(VADriverContextP ctx,
Post by Xiang, Haihao
intel_batchbuffer_emit_mi_flush(batch);
gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
- gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
+ gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
gen8_gpe_media_state_flush(ctx, gpe_context, batch);
gen9_gpe_pipeline_end(ctx, gpe_context, batch); diff --git
a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c index
0b66565..34d09a6 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1129,7 +1129,7 @@
gen9_run_kernel_media_object_walker(VADriverContextP ctx,
Post by Xiang, Haihao
gen9_gpe_mi_store_data_imm(ctx, batch,&mi_store_data_imm);
gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
- gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
+ gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
gen8_gpe_media_state_flush(ctx, gpe_context, batch);
gen9_gpe_pipeline_end(ctx, gpe_context, batch); diff --git
a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index b20857b..c2d06b2
100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2103,7 +2103,7 @@ gen8_gpe_media_object(VADriverContextP ctx,
}
void
-gen9_gpe_media_object_walker(VADriverContextP ctx,
+gen8_gpe_media_object_walker(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch,
struct
gpe_media_object_walker_parameter *param) diff --git
a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h index 22165da..b58a02c
100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -506,7 +506,7 @@ gen8_gpe_media_state_flush(VADriverContextP ctx,
struct intel_batchbuffer *batch);
extern void
-gen9_gpe_media_object_walker(VADriverContextP ctx,
+gen8_gpe_media_object_walker(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch,
struct
gpe_media_object_walker_parameter *param);
_______________________________________________
Libva mailing list
https://lists.freedesktop.org/mailman/listinfo/libva
Zhao Yakui
2016-11-18 01:06:32 UTC
Permalink
Post by Xiang, Haihao
-----Original Message-----
Yakui
Sent: Thursday, November 17, 2016 8:59 PM
Subject: Re: [Libva] [Libva-intel-driver][PATCH 09/17] Rename
gen9_gpe_media_object_walker() to gen8_gpe_media_object_walker()
Post by Xiang, Haihao
This function can be used on GEN8 too
Based on HW spec, Broadwell and Gen9 have some difference about the
MEDIA_OBJECT_WALKER commands.
So it will be better to add a new wrapper of media_object_walker for gen8.
Yes there are a few different fields in the command between gen8 and gen9. But they are the same for all used fields in the driver
So I prefer the same function. We can change it if we will use the different fields in the future.
OK. It is fine to me if it is enough to use it for Gen8.
Post by Xiang, Haihao
Thanks
Post by Xiang, Haihao
---
src/gen9_post_processing.c | 2 +-
src/gen9_vp9_encoder.c | 2 +-
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 2128697..e9b8f86 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -608,7 +608,7 @@
gen9_run_kernel_media_object_walker(VADriverContextP ctx,
Post by Xiang, Haihao
intel_batchbuffer_emit_mi_flush(batch);
gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
- gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
+ gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
gen8_gpe_media_state_flush(ctx, gpe_context, batch);
gen9_gpe_pipeline_end(ctx, gpe_context, batch); diff --git
a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c index
0b66565..34d09a6 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1129,7 +1129,7 @@
gen9_run_kernel_media_object_walker(VADriverContextP ctx,
Post by Xiang, Haihao
gen9_gpe_mi_store_data_imm(ctx, batch,&mi_store_data_imm);
gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
- gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
+ gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
gen8_gpe_media_state_flush(ctx, gpe_context, batch);
gen9_gpe_pipeline_end(ctx, gpe_context, batch); diff --git
a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index b20857b..c2d06b2
100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2103,7 +2103,7 @@ gen8_gpe_media_object(VADriverContextP ctx,
}
void
-gen9_gpe_media_object_walker(VADriverContextP ctx,
+gen8_gpe_media_object_walker(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch,
struct
gpe_media_object_walker_parameter *param) diff --git
a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h index 22165da..b58a02c
100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -506,7 +506,7 @@ gen8_gpe_media_state_flush(VADriverContextP ctx,
struct intel_batchbuffer *batch);
extern void
-gen9_gpe_media_object_walker(VADriverContextP ctx,
+gen8_gpe_media_object_walker(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch,
struct
gpe_media_object_walker_parameter *param);
_______________________________________________
Libva mailing list
https://lists.freedesktop.org/mailman/listinfo/libva
Xiang, Haihao
2016-11-17 08:35:05 UTC
Permalink
It is multiple of 64 bytes

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 139ab1b..a6d539b 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,7 +1066,7 @@ gen8_gpe_curbe_load(VADriverContextP ctx,

OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, ALIGN(gpe_context->curbe.length, 64));
OUT_BATCH(batch, gpe_context->curbe.offset);

ADVANCE_BATCH(batch);
--
1.9.1
Zhao Yakui
2016-11-17 12:46:34 UTC
Permalink
Post by Xiang, Haihao
It is multiple of 64 bytes
Can we align the size of curbe_buffer when calling the
gen8_gpe_context_init?
Post by Xiang, Haihao
---
src/i965_gpe_utils.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 139ab1b..a6d539b 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,7 +1066,7 @@ gen8_gpe_curbe_load(VADriverContextP ctx,
OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, ALIGN(gpe_context->curbe.length, 64));
OUT_BATCH(batch, gpe_context->curbe.offset);
ADVANCE_BATCH(batch);
Xiang, Haihao
2016-11-17 16:22:41 UTC
Permalink
-----Original Message-----
From: Zhao, Yakui
Sent: Thursday, November 17, 2016 8:47 PM
Subject: Re: [Libva] [Libva-intel-driver][PATCH 06/17] Fix curbe length in
CMD_MEDIA_CURBE_LOAD on GEN8+
Post by Xiang, Haihao
It is multiple of 64 bytes
Can we align the size of curbe_buffer when calling the gen8_gpe_context_init?
The size is set by gpe user, so I prefer not to change it in gen8_gpe_context_init().
or we can add a requirement on the size which must be a multiple of 64 bytes.
Post by Xiang, Haihao
---
src/i965_gpe_utils.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index
139ab1b..a6d539b 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,7 +1066,7 @@ gen8_gpe_curbe_load(VADriverContextP ctx,
OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, ALIGN(gpe_context->curbe.length, 64));
OUT_BATCH(batch, gpe_context->curbe.offset);
ADVANCE_BATCH(batch);
Zhao Yakui
2016-11-18 01:19:57 UTC
Permalink
Post by Xiang, Haihao
-----Original Message-----
From: Zhao, Yakui
Sent: Thursday, November 17, 2016 8:47 PM
Subject: Re: [Libva] [Libva-intel-driver][PATCH 06/17] Fix curbe length in
CMD_MEDIA_CURBE_LOAD on GEN8+
Post by Xiang, Haihao
It is multiple of 64 bytes
Can we align the size of curbe_buffer when calling the gen8_gpe_context_init?
The size is set by gpe user, so I prefer not to change it in gen8_gpe_context_init().
or we can add a requirement on the size which must be a multiple of 64 bytes.
Sorry that I am wrong. It only requires that the length in
MEDIA_CURE_LOAD is aligned to 64 while it is aligned to 32 in
INTERFACE_DESCRIPTOR_DATA.
So we can't align them in gen8_gpe_context_init or add extra alignment
requirement.

So this patch is fine to me.
Post by Xiang, Haihao
Post by Xiang, Haihao
---
src/i965_gpe_utils.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index
139ab1b..a6d539b 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,7 +1066,7 @@ gen8_gpe_curbe_load(VADriverContextP ctx,
OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, ALIGN(gpe_context->curbe.length, 64));
OUT_BATCH(batch, gpe_context->curbe.offset);
ADVANCE_BATCH(batch);
Xiang, Haihao
2016-11-17 08:35:10 UTC
Permalink
This function can be used on GEN8 too

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_vdenc.c | 4 ++--
src/gen9_vp9_encoder.c | 4 ++--
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 87e587a..8cddc41 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -1501,7 +1501,7 @@ gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
mi_store_data_imm_params.offset = 0;
mi_store_data_imm_params.dw0 = (1 << 6);
- gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
+ gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);

/* Store HUC_STATUS2 */
memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
@@ -2363,7 +2363,7 @@ gen9_vdenc_huc_brc_update(VADriverContextP ctx,
mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
mi_store_data_imm_params.offset = 4;
mi_store_data_imm_params.dw0 = (1 << 31);
- gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
+ gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
}

static void
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index a617eb0..4b80716 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1087,7 +1087,7 @@ gen9_run_kernel_media_object(VADriverContextP ctx,
mi_store_data_imm.bo = status_buffer->bo;
mi_store_data_imm.offset = status_buffer->media_index_offset;
mi_store_data_imm.dw0 = media_function;
- gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
+ gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);

intel_batchbuffer_emit_mi_flush(batch);
gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
@@ -1126,7 +1126,7 @@ gen9_run_kernel_media_object_walker(VADriverContextP ctx,
mi_store_data_imm.bo = status_buffer->bo;
mi_store_data_imm.offset = status_buffer->media_index_offset;
mi_store_data_imm.dw0 = media_function;
- gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
+ gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);

gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 9ca4196..85cdd50 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1512,7 +1512,7 @@ gen9_gpe_mi_flush_dw(VADriverContextP ctx,
}

void
-gen9_gpe_mi_store_data_imm(VADriverContextP ctx,
+gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_data_imm_parameter *params)
{
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 323af74..e6cc3dc 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -366,7 +366,7 @@ void gen9_gpe_mi_flush_dw(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_flush_dw_parameter *params);

-void gen9_gpe_mi_store_data_imm(VADriverContextP ctx,
+void gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_data_imm_parameter *params);
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:09 UTC
Permalink
This function can be used on GEN8 too

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_vdenc.c | 2 +-
src/gen9_vp9_encoder.c | 6 +++---
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 2bc15b7..87e587a 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -3459,7 +3459,7 @@ gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
- gen9_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
+ gen8_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
}

gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 34d09a6..a617eb0 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -5622,7 +5622,7 @@ gen9_vp9_pak_picture_level(VADriverContextP ctx,
second_level_batch.is_second_level = 1;
second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;

- gen9_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
+ gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);

if (pic_param->pic_flags.bits.segmentation_enabled &&
seg_param)
@@ -5644,13 +5644,13 @@ gen9_vp9_pak_picture_level(VADriverContextP ctx,
second_level_batch.offset = 0;
second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;

- gen9_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
+ gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);

/* PAK_OBJECT */
second_level_batch.is_second_level = 1;
second_level_batch.offset = 0;
second_level_batch.bo = pak_context->res_mb_code_surface.bo;
- gen9_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
+ gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);

return;
}
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index c2d06b2..9ca4196 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1611,7 +1611,7 @@ gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
}

void
-gen9_gpe_mi_batch_buffer_start(VADriverContextP ctx,
+gen8_gpe_mi_batch_buffer_start(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_batch_buffer_start_parameter *params)
{
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index b58a02c..323af74 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -394,7 +394,7 @@ void gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_conditional_batch_buffer_end_parameter *params);

-void gen9_gpe_mi_batch_buffer_start(VADriverContextP ctx,
+void gen8_gpe_mi_batch_buffer_start(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_batch_buffer_start_parameter *params);
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:11 UTC
Permalink
This function can be used on GEN8 too

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_vdenc.c | 10 +++++-----
src/gen9_vp9_encoder.c | 4 ++--
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 8cddc41..35373f3 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -1875,7 +1875,7 @@ gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,

memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
}

static void
@@ -2287,7 +2287,7 @@ gen9_vdenc_huc_brc_update(VADriverContextP ctx,

memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);

if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
@@ -2350,7 +2350,7 @@ gen9_vdenc_huc_brc_update(VADriverContextP ctx,

memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);

/* Store HUC_STATUS */
memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
@@ -3407,7 +3407,7 @@ gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,

memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
}

static void
@@ -3502,7 +3502,7 @@ gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encod
int i;

memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);

memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 4b80716..98ae3ca 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -5673,7 +5673,7 @@ gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *enc
status_buffer = &(vp9_state->status_buffer);

memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);

memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
mi_store_reg_mem_param.bo = status_buffer->bo;
@@ -5705,7 +5705,7 @@ gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *enc
status_buffer->vp9_image_ctrl_reg_offset;
gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);

- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);

return;
}
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 85cdd50..2f328f9 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1479,7 +1479,7 @@ i965_unmap_gpe_resource(struct i965_gpe_resource *res)
}

void
-gen9_gpe_mi_flush_dw(VADriverContextP ctx,
+gen8_gpe_mi_flush_dw(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_flush_dw_parameter *params)
{
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index e6cc3dc..517f353 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -362,7 +362,7 @@ void *i965_map_gpe_resource(struct i965_gpe_resource *res);

void i965_unmap_gpe_resource(struct i965_gpe_resource *res);

-void gen9_gpe_mi_flush_dw(VADriverContextP ctx,
+void gen8_gpe_mi_flush_dw(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_flush_dw_parameter *params);
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:12 UTC
Permalink
This function can be used on GEN8 too

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_vdenc.c | 10 +++++-----
src/gen9_vp9_encoder.c | 10 +++++-----
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 35373f3..c8cdca0 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -1508,7 +1508,7 @@ gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
mi_store_register_mem_params.offset = 4;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
}

static void
@@ -2356,7 +2356,7 @@ gen9_vdenc_huc_brc_update(VADriverContextP ctx,
memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);

/* Write HUC_STATUS mask (1 << 31) */
memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
@@ -3508,19 +3508,19 @@ gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encod
mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);

/* Update DMEM buffer for BRC Update */
for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);

mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
}
}

diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 98ae3ca..74d0d2f 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -5679,31 +5679,31 @@ gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *enc
mi_store_reg_mem_param.bo = status_buffer->bo;
mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);

mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
mi_store_reg_mem_param.offset = 0;
mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);

/* Read HCP Image status */
mi_store_reg_mem_param.bo = status_buffer->bo;
mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
mi_store_reg_mem_param.mmio_offset =
status_buffer->vp9_image_mask_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);

mi_store_reg_mem_param.bo = status_buffer->bo;
mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
mi_store_reg_mem_param.mmio_offset =
status_buffer->vp9_image_ctrl_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);

mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
mi_store_reg_mem_param.offset = 4;
mi_store_reg_mem_param.mmio_offset =
status_buffer->vp9_image_ctrl_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);

gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 2f328f9..4c0384e 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1535,7 +1535,7 @@ gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
}

void
-gen9_gpe_mi_store_register_mem(VADriverContextP ctx,
+gen8_gpe_mi_store_register_mem(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_register_mem_parameter *params)
{
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 517f353..1a3210b 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -370,7 +370,7 @@ void gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_data_imm_parameter *params);

-void gen9_gpe_mi_store_register_mem(VADriverContextP ctx,
+void gen8_gpe_mi_store_register_mem(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_register_mem_parameter *params);
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:14 UTC
Permalink
Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 231 +++++++++++++++++++++++++++++++++++++++++++++++++++
src/i965_gpe_utils.h | 5 ++
2 files changed, 236 insertions(+)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index d1b1941..e7f1c25 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2206,3 +2206,234 @@ gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_

dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
}
+
+static void
+gen8_gpe_set_2d_surface_state(struct gen8_surface_state *ss,
+ unsigned int vert_line_stride_offset,
+ unsigned int vert_line_stride,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int tiling,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch,
+ unsigned int base_offset,
+ unsigned int y_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.vert_line_stride_ofs = vert_line_stride_offset;
+ ss->ss0.vert_line_stride = vert_line_stride;
+ ss->ss0.surface_format = format;
+ ss->ss0.surface_type = I965_SURFACE_2D;
+
+ ss->ss1.surface_mocs = cacheability_control;
+
+ ss->ss2.width = width - 1;
+ ss->ss2.height = height - 1;
+
+ ss->ss3.pitch = pitch - 1;
+
+ ss->ss5.y_offset = y_offset;
+
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+ ss->ss8.base_addr = base_offset;
+
+ gen8_gpe_set_surface_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_adv_surface_state(struct gen8_surface_state2 *ss,
+ unsigned int v_direction,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int tiling,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch,
+ unsigned int base_offset,
+ unsigned int y_cb_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
+ ss->ss1.width = width - 1;
+ ss->ss1.height = height - 1;
+
+ ss->ss2.surface_format = format;
+ ss->ss2.interleave_chroma = 1;
+ ss->ss2.pitch = pitch - 1;
+
+ ss->ss3.y_offset_for_cb = y_cb_offset;
+
+ ss->ss5.surface_object_mocs = cacheability_control;
+
+ ss->ss6.base_addr = base_offset;
+
+ gen8_gpe_set_surface2_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_buffer2_surface_state(struct gen8_surface_state *ss,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int size,
+ unsigned int pitch,
+ unsigned int base_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.surface_format = format;
+ ss->ss0.surface_type = I965_SURFACE_BUFFER;
+
+ ss->ss1.surface_mocs = cacheability_control;
+
+ ss->ss2.width = (size - 1) & 0x7F;
+ ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
+
+ ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
+ ss->ss3.pitch = pitch - 1;
+
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+ ss->ss8.base_addr = base_offset;
+}
+
+void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+ struct i965_gpe_surface *gpe_surface,
+ int index)
+{
+ char *buf;
+ unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
+ unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
+ index * SURFACE_STATE_PADDED_SIZE_GEN8;
+ unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
+ index * 4;
+ struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
+
+ dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
+
+ dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+ buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
+ *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
+
+ if (gpe_surface->is_2d_surface) {
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height;
+ pitch = gpe_resource->pitch;
+
+ if (gpe_surface->is_media_block_rw)
+ width = (ALIGN(width, 4) >> 2);
+
+ gen8_gpe_set_2d_surface_state(ss,
+ gpe_surface->vert_line_stride_offset,
+ gpe_surface->vert_line_stride,
+ gpe_surface->cacheability_control,
+ gpe_surface->format,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset,
+ y_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ gpe_resource->bo);
+ } else if (gpe_surface->is_uv_surface) {
+ unsigned int cbcr_offset;
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height / 2;
+ pitch = gpe_resource->pitch;
+
+ if (gpe_surface->is_media_block_rw)
+ width = (ALIGN(width, 4) >> 2);
+
+ if (tiling == I915_TILING_Y) {
+ tile_alignment = 32;
+ } else if (tiling == I915_TILING_X) {
+ tile_alignment = 8;
+ } else
+ tile_alignment = 1;
+
+ y_offset = (gpe_resource->y_cb_offset % tile_alignment) >> 2;
+ cbcr_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
+
+ gen8_gpe_set_2d_surface_state(ss,
+ gpe_surface->vert_line_stride_offset,
+ gpe_surface->vert_line_stride,
+ gpe_surface->cacheability_control,
+ I965_SURFACEFORMAT_R16_UINT,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset + cbcr_offset,
+ y_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ cbcr_offset,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ gpe_resource->bo);
+ } else if (gpe_surface->is_adv_surface) {
+ struct gen8_surface_state2 *ss = (struct gen8_surface_state2 *)(buf + surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height;
+ pitch = gpe_resource->pitch;
+
+ gen8_gpe_set_adv_surface_state(ss,
+ gpe_surface->v_direction,
+ gpe_surface->cacheability_control,
+ MFX_SURFACE_PLANAR_420_8,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset,
+ gpe_resource->y_cb_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
+ gpe_resource->bo);
+ } else {
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
+ unsigned int format;
+
+ assert(gpe_surface->is_buffer);
+
+ if (gpe_surface->is_raw_buffer) {
+ format = I965_SURFACEFORMAT_RAW;
+ pitch = 1;
+ } else {
+ format = I965_SURFACEFORMAT_R32_UINT;
+ pitch = sizeof(unsigned int);
+ }
+
+ gen8_gpe_set_buffer2_surface_state(ss,
+ gpe_surface->cacheability_control,
+ format,
+ gpe_surface->size,
+ pitch,
+ gpe_resource->bo->offset + gpe_surface->offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ gpe_surface->offset,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ gpe_resource->bo);
+ }
+
+ dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 076f584..cbf3b05 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -526,4 +526,9 @@ intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_para
extern void
gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context);

+extern void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+ struct i965_gpe_surface *gpe_surface,
+ int index);
+
#endif /* _I965_GPE_UTILS_H_ */
--
1.9.1
Zhao Yakui
2016-11-17 13:13:58 UTC
Permalink
Can the gen8_gpe_context_add_surface follow the similar logic in
gen9_gpe_context_add_surface?
2d_surface && override_offset
2d && is_uv
2d
adv
buffer
The 2d_surface && override_offset is especially useful to add the
U/V plane of I420/YUV surface.


Thanks
---
src/i965_gpe_utils.c | 231 +++++++++++++++++++++++++++++++++++++++++++++++++++
src/i965_gpe_utils.h | 5 ++
2 files changed, 236 insertions(+)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index d1b1941..e7f1c25 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2206,3 +2206,234 @@ gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_
dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
}
+
+static void
+gen8_gpe_set_2d_surface_state(struct gen8_surface_state *ss,
+ unsigned int vert_line_stride_offset,
+ unsigned int vert_line_stride,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int tiling,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch,
+ unsigned int base_offset,
+ unsigned int y_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.vert_line_stride_ofs = vert_line_stride_offset;
+ ss->ss0.vert_line_stride = vert_line_stride;
+ ss->ss0.surface_format = format;
+ ss->ss0.surface_type = I965_SURFACE_2D;
+
+ ss->ss1.surface_mocs = cacheability_control;
+
+ ss->ss2.width = width - 1;
+ ss->ss2.height = height - 1;
+
+ ss->ss3.pitch = pitch - 1;
+
+ ss->ss5.y_offset = y_offset;
+
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+ ss->ss8.base_addr = base_offset;
+
+ gen8_gpe_set_surface_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_adv_surface_state(struct gen8_surface_state2 *ss,
+ unsigned int v_direction,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int tiling,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch,
+ unsigned int base_offset,
+ unsigned int y_cb_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
+ ss->ss1.width = width - 1;
+ ss->ss1.height = height - 1;
+
+ ss->ss2.surface_format = format;
+ ss->ss2.interleave_chroma = 1;
+ ss->ss2.pitch = pitch - 1;
+
+ ss->ss3.y_offset_for_cb = y_cb_offset;
+
+ ss->ss5.surface_object_mocs = cacheability_control;
+
+ ss->ss6.base_addr = base_offset;
+
+ gen8_gpe_set_surface2_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_buffer2_surface_state(struct gen8_surface_state *ss,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int size,
+ unsigned int pitch,
+ unsigned int base_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.surface_format = format;
+ ss->ss0.surface_type = I965_SURFACE_BUFFER;
+
+ ss->ss1.surface_mocs = cacheability_control;
+
+ ss->ss2.width = (size - 1)& 0x7F;
+ ss->ss2.height = ((size - 1)& 0x1FFF80)>> 7;
+
+ ss->ss3.depth = ((size - 1)& 0xFE00000)>> 21;
+ ss->ss3.pitch = pitch - 1;
+
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+ ss->ss8.base_addr = base_offset;
+}
+
+void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+ struct i965_gpe_surface *gpe_surface,
+ int index)
+{
+ char *buf;
+ unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
+ unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
+ index * SURFACE_STATE_PADDED_SIZE_GEN8;
+ unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
+ index * 4;
+ struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
+
+ dri_bo_get_tiling(gpe_resource->bo,&tiling,&swizzle);
+
+ dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+ buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
+ *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
+
+ if (gpe_surface->is_2d_surface) {
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height;
+ pitch = gpe_resource->pitch;
+
+ if (gpe_surface->is_media_block_rw)
+ width = (ALIGN(width, 4)>> 2);
+
+ gen8_gpe_set_2d_surface_state(ss,
+ gpe_surface->vert_line_stride_offset,
+ gpe_surface->vert_line_stride,
+ gpe_surface->cacheability_control,
+ gpe_surface->format,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset,
+ y_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ gpe_resource->bo);
+ } else if (gpe_surface->is_uv_surface) {
+ unsigned int cbcr_offset;
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height / 2;
+ pitch = gpe_resource->pitch;
+
+ if (gpe_surface->is_media_block_rw)
+ width = (ALIGN(width, 4)>> 2);
+
+ if (tiling == I915_TILING_Y) {
+ tile_alignment = 32;
+ } else if (tiling == I915_TILING_X) {
+ tile_alignment = 8;
+ } else
+ tile_alignment = 1;
+
+ y_offset = (gpe_resource->y_cb_offset % tile_alignment)>> 2;
+ cbcr_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
+
+ gen8_gpe_set_2d_surface_state(ss,
+ gpe_surface->vert_line_stride_offset,
+ gpe_surface->vert_line_stride,
+ gpe_surface->cacheability_control,
+ I965_SURFACEFORMAT_R16_UINT,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset + cbcr_offset,
+ y_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ cbcr_offset,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ gpe_resource->bo);
+ } else if (gpe_surface->is_adv_surface) {
+ struct gen8_surface_state2 *ss = (struct gen8_surface_state2 *)(buf + surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height;
+ pitch = gpe_resource->pitch;
+
+ gen8_gpe_set_adv_surface_state(ss,
+ gpe_surface->v_direction,
+ gpe_surface->cacheability_control,
+ MFX_SURFACE_PLANAR_420_8,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset,
+ gpe_resource->y_cb_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
+ gpe_resource->bo);
+ } else {
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
+ unsigned int format;
+
+ assert(gpe_surface->is_buffer);
+
+ if (gpe_surface->is_raw_buffer) {
+ format = I965_SURFACEFORMAT_RAW;
+ pitch = 1;
+ } else {
+ format = I965_SURFACEFORMAT_R32_UINT;
+ pitch = sizeof(unsigned int);
+ }
+
+ gen8_gpe_set_buffer2_surface_state(ss,
+ gpe_surface->cacheability_control,
+ format,
+ gpe_surface->size,
+ pitch,
+ gpe_resource->bo->offset + gpe_surface->offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ gpe_surface->offset,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ gpe_resource->bo);
+ }
+
+ dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 076f584..cbf3b05 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -526,4 +526,9 @@ intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_para
extern void
gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context);
+extern void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+ struct i965_gpe_surface *gpe_surface,
+ int index);
+
#endif /* _I965_GPE_UTILS_H_ */
Xiang, Haihao
2016-11-17 15:48:36 UTC
Permalink
-----Original Message-----
From: Zhao, Yakui
Sent: Thursday, November 17, 2016 9:14 PM
Subject: Re: [Libva] [Libva-intel-driver][PATCH 15/17] Add a new gpe function
gen8_gpe_context_add_surface() to set surface state on GEN8
Can the gen8_gpe_context_add_surface follow the similar logic in
gen9_gpe_context_add_surface?
2d_surface && override_offset
I won't use override offset, so I didn't add the support for override offset in this patch.
Of course I can add it if it is useful to you
2d && is_uv
2d
adv
buffer
The 2d_surface && override_offset is especially useful to add the U/V plane
of I420/YUV surface.
Thanks
---
src/i965_gpe_utils.c | 231
+++++++++++++++++++++++++++++++++++++++++++++++++++
src/i965_gpe_utils.h | 5 ++
2 files changed, 236 insertions(+)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index d1b1941..e7f1c25 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2206,3 +2206,234 @@
gen8_gpe_reset_binding_table(VADriverContextP ctx, struct
i965_gpe_context *gpe_
dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
}
+
+static void
+gen8_gpe_set_2d_surface_state(struct gen8_surface_state *ss,
+ unsigned int vert_line_stride_offset,
+ unsigned int vert_line_stride,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int tiling,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch,
+ unsigned int base_offset,
+ unsigned int y_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.vert_line_stride_ofs = vert_line_stride_offset;
+ ss->ss0.vert_line_stride = vert_line_stride;
+ ss->ss0.surface_format = format;
+ ss->ss0.surface_type = I965_SURFACE_2D;
+
+ ss->ss1.surface_mocs = cacheability_control;
+
+ ss->ss2.width = width - 1;
+ ss->ss2.height = height - 1;
+
+ ss->ss3.pitch = pitch - 1;
+
+ ss->ss5.y_offset = y_offset;
+
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+ ss->ss8.base_addr = base_offset;
+
+ gen8_gpe_set_surface_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_adv_surface_state(struct gen8_surface_state2 *ss,
+ unsigned int v_direction,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int tiling,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch,
+ unsigned int base_offset,
+ unsigned int y_cb_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
+ ss->ss1.width = width - 1;
+ ss->ss1.height = height - 1;
+
+ ss->ss2.surface_format = format;
+ ss->ss2.interleave_chroma = 1;
+ ss->ss2.pitch = pitch - 1;
+
+ ss->ss3.y_offset_for_cb = y_cb_offset;
+
+ ss->ss5.surface_object_mocs = cacheability_control;
+
+ ss->ss6.base_addr = base_offset;
+
+ gen8_gpe_set_surface2_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_buffer2_surface_state(struct gen8_surface_state *ss,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int size,
+ unsigned int pitch,
+ unsigned int base_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.surface_format = format;
+ ss->ss0.surface_type = I965_SURFACE_BUFFER;
+
+ ss->ss1.surface_mocs = cacheability_control;
+
+ ss->ss2.width = (size - 1)& 0x7F;
+ ss->ss2.height = ((size - 1)& 0x1FFF80)>> 7;
+
+ ss->ss3.depth = ((size - 1)& 0xFE00000)>> 21;
+ ss->ss3.pitch = pitch - 1;
+
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+ ss->ss8.base_addr = base_offset;
+}
+
+void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+ struct i965_gpe_surface *gpe_surface,
+ int index)
+{
+ char *buf;
+ unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset
= 0;
+ unsigned int surface_state_offset = gpe_context-
surface_state_binding_table.surface_state_offset +
+ index * SURFACE_STATE_PADDED_SIZE_GEN8;
+ unsigned int binding_table_offset = gpe_context-
surface_state_binding_table.binding_table_offset +
+ index * 4;
+ struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
+
+ dri_bo_get_tiling(gpe_resource->bo,&tiling,&swizzle);
+
+ dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+ buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
+ *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
+
+ if (gpe_surface->is_2d_surface) {
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf +
surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height;
+ pitch = gpe_resource->pitch;
+
+ if (gpe_surface->is_media_block_rw)
+ width = (ALIGN(width, 4)>> 2);
+
+ gen8_gpe_set_2d_surface_state(ss,
+ gpe_surface->vert_line_stride_offset,
+ gpe_surface->vert_line_stride,
+ gpe_surface->cacheability_control,
+ gpe_surface->format,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset,
+ y_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state,
ss8),
+ gpe_resource->bo);
+ } else if (gpe_surface->is_uv_surface) {
+ unsigned int cbcr_offset;
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf +
surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height / 2;
+ pitch = gpe_resource->pitch;
+
+ if (gpe_surface->is_media_block_rw)
+ width = (ALIGN(width, 4)>> 2);
+
+ if (tiling == I915_TILING_Y) {
+ tile_alignment = 32;
+ } else if (tiling == I915_TILING_X) {
+ tile_alignment = 8;
+ } else
+ tile_alignment = 1;
+
+ y_offset = (gpe_resource->y_cb_offset % tile_alignment)>> 2;
+ cbcr_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset,
tile_alignment) * pitch;
+
+ gen8_gpe_set_2d_surface_state(ss,
+ gpe_surface->vert_line_stride_offset,
+ gpe_surface->vert_line_stride,
+ gpe_surface->cacheability_control,
+ I965_SURFACEFORMAT_R16_UINT,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset + cbcr_offset,
+ y_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ cbcr_offset,
+ surface_state_offset + offsetof(struct gen8_surface_state,
ss8),
+ gpe_resource->bo);
+ } else if (gpe_surface->is_adv_surface) {
+ struct gen8_surface_state2 *ss = (struct gen8_surface_state2 *)(buf +
surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height;
+ pitch = gpe_resource->pitch;
+
+ gen8_gpe_set_adv_surface_state(ss,
+ gpe_surface->v_direction,
+ gpe_surface->cacheability_control,
+ MFX_SURFACE_PLANAR_420_8,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset,
+ gpe_resource->y_cb_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state2,
ss6),
+ gpe_resource->bo);
+ } else {
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf +
surface_state_offset);
+ unsigned int format;
+
+ assert(gpe_surface->is_buffer);
+
+ if (gpe_surface->is_raw_buffer) {
+ format = I965_SURFACEFORMAT_RAW;
+ pitch = 1;
+ } else {
+ format = I965_SURFACEFORMAT_R32_UINT;
+ pitch = sizeof(unsigned int);
+ }
+
+ gen8_gpe_set_buffer2_surface_state(ss,
+ gpe_surface->cacheability_control,
+ format,
+ gpe_surface->size,
+ pitch,
+ gpe_resource->bo->offset + gpe_surface->offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ gpe_surface->offset,
+ surface_state_offset + offsetof(struct gen8_surface_state,
ss8),
+ gpe_resource->bo);
+ }
+
+ dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 076f584..cbf3b05 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -526,4 +526,9 @@
intel_vpp_init_media_object_walker_parameter(struct
intel_vpp_kernel_walker_para
extern void
gen8_gpe_reset_binding_table(VADriverContextP ctx, struct
i965_gpe_context *gpe_context);
+extern void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+ struct i965_gpe_surface *gpe_surface,
+ int index);
+
#endif /* _I965_GPE_UTILS_H_ */
Zhao Yakui
2016-11-18 01:08:08 UTC
Permalink
Post by Xiang, Haihao
-----Original Message-----
From: Zhao, Yakui
Sent: Thursday, November 17, 2016 9:14 PM
Subject: Re: [Libva] [Libva-intel-driver][PATCH 15/17] Add a new gpe function
gen8_gpe_context_add_surface() to set surface state on GEN8
Can the gen8_gpe_context_add_surface follow the similar logic in
gen9_gpe_context_add_surface?
2d_surface&& override_offset
I won't use override offset, so I didn't add the support for override offset in this patch.
Of course I can add it if it is useful to you
Yes. It is useful.
It will be great if it can be added.
Post by Xiang, Haihao
2d&& is_uv
2d
adv
buffer
The 2d_surface&& override_offset is especially useful to add the U/V plane
of I420/YUV surface.
Thanks
---
src/i965_gpe_utils.c | 231
+++++++++++++++++++++++++++++++++++++++++++++++++++
src/i965_gpe_utils.h | 5 ++
2 files changed, 236 insertions(+)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index d1b1941..e7f1c25 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2206,3 +2206,234 @@
gen8_gpe_reset_binding_table(VADriverContextP ctx, struct
i965_gpe_context *gpe_
dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
}
+
+static void
+gen8_gpe_set_2d_surface_state(struct gen8_surface_state *ss,
+ unsigned int vert_line_stride_offset,
+ unsigned int vert_line_stride,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int tiling,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch,
+ unsigned int base_offset,
+ unsigned int y_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.vert_line_stride_ofs = vert_line_stride_offset;
+ ss->ss0.vert_line_stride = vert_line_stride;
+ ss->ss0.surface_format = format;
+ ss->ss0.surface_type = I965_SURFACE_2D;
+
+ ss->ss1.surface_mocs = cacheability_control;
+
+ ss->ss2.width = width - 1;
+ ss->ss2.height = height - 1;
+
+ ss->ss3.pitch = pitch - 1;
+
+ ss->ss5.y_offset = y_offset;
+
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+ ss->ss8.base_addr = base_offset;
+
+ gen8_gpe_set_surface_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_adv_surface_state(struct gen8_surface_state2 *ss,
+ unsigned int v_direction,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int tiling,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch,
+ unsigned int base_offset,
+ unsigned int y_cb_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
+ ss->ss1.width = width - 1;
+ ss->ss1.height = height - 1;
+
+ ss->ss2.surface_format = format;
+ ss->ss2.interleave_chroma = 1;
+ ss->ss2.pitch = pitch - 1;
+
+ ss->ss3.y_offset_for_cb = y_cb_offset;
+
+ ss->ss5.surface_object_mocs = cacheability_control;
+
+ ss->ss6.base_addr = base_offset;
+
+ gen8_gpe_set_surface2_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_buffer2_surface_state(struct gen8_surface_state *ss,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int size,
+ unsigned int pitch,
+ unsigned int base_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.surface_format = format;
+ ss->ss0.surface_type = I965_SURFACE_BUFFER;
+
+ ss->ss1.surface_mocs = cacheability_control;
+
+ ss->ss2.width = (size - 1)& 0x7F;
+ ss->ss2.height = ((size - 1)& 0x1FFF80)>> 7;
+
+ ss->ss3.depth = ((size - 1)& 0xFE00000)>> 21;
+ ss->ss3.pitch = pitch - 1;
+
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+ ss->ss8.base_addr = base_offset;
+}
+
+void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+ struct i965_gpe_surface *gpe_surface,
+ int index)
+{
+ char *buf;
+ unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset
= 0;
+ unsigned int surface_state_offset = gpe_context-
surface_state_binding_table.surface_state_offset +
+ index * SURFACE_STATE_PADDED_SIZE_GEN8;
+ unsigned int binding_table_offset = gpe_context-
surface_state_binding_table.binding_table_offset +
+ index * 4;
+ struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
+
+ dri_bo_get_tiling(gpe_resource->bo,&tiling,&swizzle);
+
+ dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+ buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
+ *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
+
+ if (gpe_surface->is_2d_surface) {
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf +
surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height;
+ pitch = gpe_resource->pitch;
+
+ if (gpe_surface->is_media_block_rw)
+ width = (ALIGN(width, 4)>> 2);
+
+ gen8_gpe_set_2d_surface_state(ss,
+ gpe_surface->vert_line_stride_offset,
+ gpe_surface->vert_line_stride,
+ gpe_surface->cacheability_control,
+ gpe_surface->format,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset,
+ y_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state,
ss8),
+ gpe_resource->bo);
+ } else if (gpe_surface->is_uv_surface) {
+ unsigned int cbcr_offset;
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf +
surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height / 2;
+ pitch = gpe_resource->pitch;
+
+ if (gpe_surface->is_media_block_rw)
+ width = (ALIGN(width, 4)>> 2);
+
+ if (tiling == I915_TILING_Y) {
+ tile_alignment = 32;
+ } else if (tiling == I915_TILING_X) {
+ tile_alignment = 8;
+ } else
+ tile_alignment = 1;
+
+ y_offset = (gpe_resource->y_cb_offset % tile_alignment)>> 2;
+ cbcr_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset,
tile_alignment) * pitch;
+
+ gen8_gpe_set_2d_surface_state(ss,
+ gpe_surface->vert_line_stride_offset,
+ gpe_surface->vert_line_stride,
+ gpe_surface->cacheability_control,
+ I965_SURFACEFORMAT_R16_UINT,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset + cbcr_offset,
+ y_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ cbcr_offset,
+ surface_state_offset + offsetof(struct gen8_surface_state,
ss8),
+ gpe_resource->bo);
+ } else if (gpe_surface->is_adv_surface) {
+ struct gen8_surface_state2 *ss = (struct gen8_surface_state2 *)(buf +
surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height;
+ pitch = gpe_resource->pitch;
+
+ gen8_gpe_set_adv_surface_state(ss,
+ gpe_surface->v_direction,
+ gpe_surface->cacheability_control,
+ MFX_SURFACE_PLANAR_420_8,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset,
+ gpe_resource->y_cb_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state2,
ss6),
+ gpe_resource->bo);
+ } else {
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf +
surface_state_offset);
+ unsigned int format;
+
+ assert(gpe_surface->is_buffer);
+
+ if (gpe_surface->is_raw_buffer) {
+ format = I965_SURFACEFORMAT_RAW;
+ pitch = 1;
+ } else {
+ format = I965_SURFACEFORMAT_R32_UINT;
+ pitch = sizeof(unsigned int);
+ }
+
+ gen8_gpe_set_buffer2_surface_state(ss,
+ gpe_surface->cacheability_control,
+ format,
+ gpe_surface->size,
+ pitch,
+ gpe_resource->bo->offset + gpe_surface->offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ gpe_surface->offset,
+ surface_state_offset + offsetof(struct gen8_surface_state,
ss8),
+ gpe_resource->bo);
+ }
+
+ dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 076f584..cbf3b05 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -526,4 +526,9 @@
intel_vpp_init_media_object_walker_parameter(struct
intel_vpp_kernel_walker_para
extern void
gen8_gpe_reset_binding_table(VADriverContextP ctx, struct
i965_gpe_context *gpe_context);
+extern void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+ struct i965_gpe_surface *gpe_surface,
+ int index);
+
#endif /* _I965_GPE_UTILS_H_ */
Xiang, Haihao
2016-11-17 08:35:16 UTC
Permalink
Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++
src/i965_gpe_utils.h | 5 ++++
2 files changed, 78 insertions(+)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 4ac7ab7..2e54be3 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2453,3 +2453,76 @@ gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
param->offset);

}
+
+void
+gen8_gpe_pipe_control(VADriverContextP ctx,
+ struct intel_batchbuffer *batch,
+ struct gpe_pipe_control_parameter *param)
+{
+ int render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
+ int dc_flush_enable = 0;
+ int state_cache_invalidation_enable = 0;
+ int constant_cache_invalidation_enable = 0;
+ int vf_cache_invalidation_enable = 0;
+ int instruction_cache_invalidation_enable = 0;
+ int post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
+ int use_global_gtt = CMD_PIPE_CONTROL_GLOBAL_GTT_GEN8;
+ int cs_stall_enable = !param->disable_cs_stall;
+
+ switch (param->flush_mode) {
+ case PIPE_CONTROL_FLUSH_WRITE_CACHE:
+ render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
+ dc_flush_enable = CMD_PIPE_CONTROL_DC_FLUSH;
+ break;
+
+ case PIPE_CONTROL_FLUSH_READ_CACHE:
+ render_target_cache_flush_enable = 0;
+ state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
+ constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
+ vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
+ instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
+ break;
+
+ case PIPE_CONTROL_FLUSH_NONE:
+ default:
+ render_target_cache_flush_enable = 0;
+ break;
+ }
+
+ if (param->bo) {
+ post_sync_operation = CMD_PIPE_CONTROL_WRITE_QWORD;
+ use_global_gtt = CMD_PIPE_CONTROL_LOCAL_PGTT_GEN8;
+ } else {
+ post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
+ render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
+ state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
+ constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
+ vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
+ instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
+ }
+
+ __OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2));
+ __OUT_BATCH(batch, (render_target_cache_flush_enable |
+ dc_flush_enable |
+ state_cache_invalidation_enable |
+ constant_cache_invalidation_enable |
+ vf_cache_invalidation_enable |
+ instruction_cache_invalidation_enable |
+ post_sync_operation |
+ use_global_gtt |
+ cs_stall_enable |
+ CMD_PIPE_CONTROL_FLUSH_ENABLE));
+
+ if (param->bo)
+ __OUT_RELOC64(batch,
+ param->bo,
+ I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_RENDER,
+ param->offset);
+ else {
+ __OUT_BATCH(batch, 0);
+ __OUT_BATCH(batch, 0);
+ }
+
+ __OUT_BATCH(batch, param->dw0);
+ __OUT_BATCH(batch, param->dw1);
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 3e10cf8..8701150 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -550,4 +550,9 @@ gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_conditional_batch_buffer_end_parameter *param);

+extern void
+gen8_gpe_pipe_control(VADriverContextP ctx,
+ struct intel_batchbuffer *batch,
+ struct gpe_pipe_control_parameter *param);
+
#endif /* _I965_GPE_UTILS_H_ */
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:15 UTC
Permalink
Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 16 ++++++++++++++++
src/i965_gpe_utils.h | 19 +++++++++++++++++++
src/intel_driver.h | 6 ++++++
3 files changed, 41 insertions(+)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index e7f1c25..4ac7ab7 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2437,3 +2437,19 @@ gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,

dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
}
+
+void
+gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
+ struct intel_batchbuffer *batch,
+ struct gpe_mi_conditional_batch_buffer_end_parameter *param)
+{
+ __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
+ (1 << 21) |
+ (4 - 2))); /* Always use PPGTT */
+ __OUT_BATCH(batch, param->compare_data);
+ __OUT_RELOC64(batch,
+ param->bo,
+ I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
+ param->offset);
+
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index cbf3b05..3e10cf8 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -73,6 +73,20 @@ struct gpe_dynamic_state_parameter
unsigned int sampler_offset;
};

+#define PIPE_CONTROL_FLUSH_NONE 0
+#define PIPE_CONTROL_FLUSH_WRITE_CACHE 1
+#define PIPE_CONTROL_FLUSH_READ_CACHE 2
+
+struct gpe_pipe_control_parameter
+{
+ dri_bo *bo;
+ unsigned int offset;
+ unsigned int flush_mode;
+ unsigned int disable_cs_stall;
+ unsigned int dw0;
+ unsigned int dw1;
+};
+
struct i965_gpe_context
{
struct {
@@ -531,4 +545,9 @@ gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
struct i965_gpe_surface *gpe_surface,
int index);

+extern void
+gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
+ struct intel_batchbuffer *batch,
+ struct gpe_mi_conditional_batch_buffer_end_parameter *param);
+
#endif /* _I965_GPE_UTILS_H_ */
diff --git a/src/intel_driver.h b/src/intel_driver.h
index dcdc03b..4ff707d 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -76,12 +76,18 @@
#define CMD_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define CMD_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define CMD_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
+#define CMD_PIPE_CONTROL_FLUSH_ENABLE (1 << 7)
#define CMD_PIPE_CONTROL_DC_FLUSH (1 << 5)
#define CMD_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define CMD_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)

+#define CMD_PIPE_CONTROL_GLOBAL_GTT_GEN8 (1 << 24)
+#define CMD_PIPE_CONTROL_LOCAL_PGTT_GEN8 (0 << 24)
+#define CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8 (1 << 4)
+#define CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8 (1 << 3)
+#define CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8 (1 << 2)

struct intel_batchbuffer;
--
1.9.1
Xiang, Haihao
2016-11-17 08:35:13 UTC
Permalink
Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 18 +++++++++++++++++-
src/i965_gpe_utils.h | 3 ++-
2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 4c0384e..d1b1941 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2143,7 +2143,6 @@ gen8_gpe_media_object_walker(VADriverContextP ctx,
ADVANCE_BATCH(batch);
}

-
void
intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter *kernel_walker_param,
struct gpe_media_object_walker_parameter *walker_param)
@@ -2190,3 +2189,20 @@ intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_para
walker_param->local_inner_loop_unit.y = 1;
}
}
+
+void
+gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context)
+{
+ unsigned int *binding_table;
+ unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
+ int i;
+
+ dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+ binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
+
+ for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
+ *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN8;
+ }
+
+ dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 1a3210b..076f584 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -511,7 +511,6 @@ gen8_gpe_media_object_walker(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_media_object_walker_parameter *param);

-
struct intel_vpp_kernel_walker_parameter
{
unsigned int use_scoreboard;
@@ -524,5 +523,7 @@ struct intel_vpp_kernel_walker_parameter
extern void
intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter *kernel_walker_param,
struct gpe_media_object_walker_parameter *walker_param);
+extern void
+gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context);

#endif /* _I965_GPE_UTILS_H_ */
--
1.9.1
Loading...