Discussion:
[Libva-intel-driver][PATCH v2 00/17] Fixes for GPE utility
(too old to reply)
Xiang, Haihao
2016-11-18 05:43:42 UTC
Permalink
The patch series cleans up some gpe settings, renames some gen9 fucntions for gen8
and addes some new gpe functions for gen8.

v2: add support for override_offset when adding a new surface

Xiang, Haihao (17):
Change the size calculation of the required memory for all GPE kernels
Move all curbe related settings to the inner structure in
i965_gpe_context
Move interface descriptor remapping table related settings to the
inner structure in i965_gpe_context
Move sampler state related settings to the inner structure in
i965_gpe_context
Fix the size calculation of the required memory for dynamic state
buffer
Fix curbe length in CMD_MEDIA_CURBE_LOAD on GEN8+
Rename gen8p_gpe_context_map_curbe()/gen8p_gpe_context_unmap_curbe()
to i965_gpe_context_map_curbe()/i965_gpe_context_unmap_curbe()
Rename i965_gpe_dri_object_to_2d_gpe_resource() to
i965_dri_object_to_2d_gpe_resource()
Rename gen9_gpe_media_object_walker() to
gen8_gpe_media_object_walker()
Rename gen9_gpe_mi_batch_buffer_start() to
gen8_gpe_mi_batch_buffer_start()
Rename gen9_gpe_mi_store_data_imm() to gen8_gpe_mi_store_data_imm()
Rename gen9_gpe_mi_flush_dw() to gen8_gpe_mi_flush_dw()
Rename gen9_gpe_mi_store_register_mem() to
gen8_gpe_mi_load_register_mem()
Add a new gpe function gen8_gpe_reset_binding_table() to reset binging
table
Add a new gpe function gen8_gpe_context_add_surface() to set surface
state on GEN8
Add a new gpe function gen8_gpe_mi_conditional_batch_buffer_end() for
GEN8
Add a new gpe function gen8_gpe_pipe_control() for GEN8

src/gen75_vpp_gpe.c | 12 +-
src/gen8_mfc.c | 12 +-
src/gen8_vme.c | 22 +--
src/gen9_post_processing.c | 27 +--
src/gen9_vdenc.c | 26 +--
src/gen9_vme.c | 23 +--
src/gen9_vp9_encoder.c | 85 +++++----
src/i965_gpe_utils.c | 426 +++++++++++++++++++++++++++++++++++++++++----
src/i965_gpe_utils.h | 73 +++++---
src/intel_driver.h | 6 +
10 files changed, 554 insertions(+), 158 deletions(-)
--
1.9.1
Xiang, Haihao
2016-11-18 05:43:43 UTC
Permalink
Make sure the size is multiple of 64 bytes

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 5d4ca5c..c5a8935 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1180,7 +1180,7 @@ gen8_gpe_load_kernels(VADriverContextP ctx,
unsigned int num_kernels)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- int i, kernel_size;
+ int i, kernel_size = 0;
unsigned int kernel_offset, end_offset;
unsigned char *kernel_ptr;
struct i965_kernel *kernel;
@@ -1189,11 +1189,10 @@ gen8_gpe_load_kernels(VADriverContextP ctx,
memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
gpe_context->num_kernels = num_kernels;

- kernel_size = num_kernels * 64;
for (i = 0; i < num_kernels; i++) {
kernel = &gpe_context->kernels[i];

- kernel_size += kernel->size;
+ kernel_size += ALIGN(kernel->size, 64);
}

gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
--
1.9.1
Zhao Yakui
2016-11-18 06:10:06 UTC
Permalink
Post by Xiang, Haihao
Make sure the size is multiple of 64 bytes
This looks good to me.
Post by Xiang, Haihao
---
src/i965_gpe_utils.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 5d4ca5c..c5a8935 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1180,7 +1180,7 @@ gen8_gpe_load_kernels(VADriverContextP ctx,
unsigned int num_kernels)
{
struct i965_driver_data *i965 = i965_driver_data(ctx);
- int i, kernel_size;
+ int i, kernel_size = 0;
unsigned int kernel_offset, end_offset;
unsigned char *kernel_ptr;
struct i965_kernel *kernel;
@@ -1189,11 +1189,10 @@ gen8_gpe_load_kernels(VADriverContextP ctx,
memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
gpe_context->num_kernels = num_kernels;
- kernel_size = num_kernels * 64;
for (i = 0; i< num_kernels; i++) {
kernel =&gpe_context->kernels[i];
- kernel_size += kernel->size;
+ kernel_size += ALIGN(kernel->size, 64);
}
gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
Xiang, Haihao
2016-11-18 05:43:45 UTC
Permalink
This patch delete idrt_size and use (idrt.max_entries * idrt.entry_size) instead.
idrt.bo is always set even if the interface descriptor remapping table is a part of
the dynamic state buffer, hence we can use the corresponding settings no matter this
table is a part of the dynamic state buffer or not.

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen75_vpp_gpe.c | 10 +++++-----
src/gen8_mfc.c | 7 ++++---
src/gen8_vme.c | 8 +++++---
src/gen9_post_processing.c | 3 ++-
src/gen9_vme.c | 7 ++++---
src/gen9_vp9_encoder.c | 2 --
src/i965_gpe_utils.c | 27 +++++++++++++++++++--------
src/i965_gpe_utils.h | 3 +--
8 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c
index 2cddb5a..6f5e2ef 100644
--- a/src/gen75_vpp_gpe.c
+++ b/src/gen75_vpp_gpe.c
@@ -406,13 +406,13 @@ gen8_gpe_process_interface_setup(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
struct gen8_interface_descriptor_data *desc;
- dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
+ dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
int i;

dri_bo_map(bo, 1);
assert(bo->virtual);
desc = (struct gen8_interface_descriptor_data *)(bo->virtual
- + vpp_gpe_ctx->gpe_ctx.idrt_offset);
+ + vpp_gpe_ctx->gpe_ctx.idrt.offset);

/*Setup the descritor table*/
for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
@@ -880,7 +880,7 @@ vpp_gpe_context_init(VADriverContextP ctx)

gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
- gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
+ gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen6_interface_descriptor_data), 64);

} else if (IS_GEN8(i965->intel.device_info) ||
IS_GEN9(i965->intel.device_info)) {
@@ -891,8 +891,8 @@ vpp_gpe_context_init(VADriverContextP ctx)
(SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
- gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
-
+ gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
}

return vpp_gpe_ctx;
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 3ed9e84..c4e46fb 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -1333,10 +1333,10 @@ gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
dri_bo *bo;
unsigned char *desc_ptr;

- bo = mfc_context->gpe_context.dynamic_state.bo;
+ bo = mfc_context->gpe_context.idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
- desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt_offset;
+ desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt.offset;

desc = (struct gen8_interface_descriptor_data *)desc_ptr;

@@ -4608,7 +4608,8 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
assert(mfc_context);
mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

- mfc_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
+ mfc_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ mfc_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
mfc_context->gpe_context.curbe.length = 32 * 4;
mfc_context->gpe_context.sampler_size = 0;

diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index 96835bf..5184ef1 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -333,10 +333,10 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx,
dri_bo *bo;
unsigned char *desc_ptr;

- bo = vme_context->gpe_context.dynamic_state.bo;
+ bo = vme_context->gpe_context.idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
- desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
+ desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt.offset;

desc = (struct gen8_interface_descriptor_data *)desc_ptr;

@@ -1378,7 +1378,9 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->vme_kernel_sum = i965_kernel_num;
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

- vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
+ vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
+
vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;

diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 71da501..2473803 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -536,7 +536,8 @@ gen9_post_processing_context_init(VADriverContextP ctx,
scaling_kernel.bin = pp_10bit_scaling_gen9;
scaling_kernel.size = sizeof(pp_10bit_scaling_gen9);
gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
- gpe_context->idrt_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ gpe_context->idrt.max_entries = 1;
gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);

diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index a59fe2a..33bf8aa 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -376,10 +376,10 @@ static VAStatus gen9_vme_interface_setup(VADriverContextP ctx,
dri_bo *bo;
unsigned char *desc_ptr;

- bo = vme_context->gpe_context.dynamic_state.bo;
+ bo = vme_context->gpe_context.idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
- desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
+ desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt.offset;

desc = (struct gen8_interface_descriptor_data *)desc_ptr;

@@ -2031,7 +2031,8 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->vme_kernel_sum = i965_kernel_num;
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

- vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
+ vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;

diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 5ad7b26..5d4a4a8 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -3691,8 +3691,6 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,

gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
- gpe_context->idrt_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) *
- NUM_KERNELS_PER_GPE_CONTEXT;

gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
gpe_context->surface_state_binding_table.binding_table_offset = 0;
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 3739a88..2d7cfaf 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1084,8 +1084,8 @@ gen8_gpe_idrt(VADriverContextP ctx,

OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->idrt_size);
- OUT_BATCH(batch, gpe_context->idrt_offset);
+ OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
+ OUT_BATCH(batch, gpe_context->idrt.offset);

ADVANCE_BATCH(batch);
}
@@ -1122,7 +1122,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;

- bo_size = gpe_context->idrt_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
+ bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state & binding table",
@@ -1145,8 +1145,11 @@ gen8_gpe_context_init(VADriverContextP ctx,

/* Interface descriptor offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->idrt_offset = start_offset;
- end_offset = start_offset + gpe_context->idrt_size;
+ dri_bo_unreference(gpe_context->idrt.bo);
+ gpe_context->idrt.bo = bo;
+ dri_bo_reference(gpe_context->idrt.bo);
+ gpe_context->idrt.offset = start_offset;
+ end_offset = start_offset + gpe_context->idrt.entry_size * gpe_context->idrt.max_entries;

/* Sampler state offset */
start_offset = ALIGN(end_offset, 64);
@@ -1175,6 +1178,9 @@ gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)

dri_bo_unreference(gpe_context->curbe.bo);
gpe_context->curbe.bo = NULL;
+
+ dri_bo_unreference(gpe_context->idrt.bo);
+ gpe_context->idrt.bo = NULL;
}


@@ -1630,7 +1636,12 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
dri_bo_reference(gpe_context->curbe.bo);
gpe_context->curbe.offset = ds->curbe_offset;

- gpe_context->idrt_offset = ds->idrt_offset;
+ /* idrt buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->idrt.bo);
+ gpe_context->idrt.bo = ds->bo;
+ dri_bo_reference(gpe_context->idrt.bo);
+ gpe_context->idrt.offset = ds->idrt_offset;
+
gpe_context->sampler_offset = ds->sampler_offset;

return;
@@ -1677,10 +1688,10 @@ gen8_gpe_setup_interface_data(VADriverContextP ctx,
dri_bo *bo;
unsigned char *desc_ptr;

- bo = gpe_context->dynamic_state.bo;
+ bo = gpe_context->idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
- desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt_offset;
+ desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt.offset;
desc = (struct gen8_interface_descriptor_data *)desc_ptr;

for (i = 0; i < gpe_context->num_kernels; i++) {
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 92123fe..c3b8c79 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -87,6 +87,7 @@ struct i965_gpe_context
dri_bo *bo;
unsigned int max_entries;
unsigned int entry_size; /* in bytes */
+ unsigned int offset;
} idrt;

struct {
@@ -167,8 +168,6 @@ struct i965_gpe_context
unsigned int sampler_offset;
int sampler_entries;
int sampler_size;
- unsigned int idrt_offset;
- int idrt_size;
};

struct gpe_mi_flush_dw_parameter
--
1.9.1
Zhao Yakui
2016-11-18 06:20:40 UTC
Permalink
Post by Xiang, Haihao
This patch delete idrt_size and use (idrt.max_entries * idrt.entry_size) instead.
idrt.bo is always set even if the interface descriptor remapping table is a part of
the dynamic state buffer, hence we can use the corresponding settings no matter this
table is a part of the dynamic state buffer or not.
This looks good to me.

Add: Reviewed-by: Zhao Yakui <***@intel.com>

Thanks
Post by Xiang, Haihao
---
src/gen75_vpp_gpe.c | 10 +++++-----
src/gen8_mfc.c | 7 ++++---
src/gen8_vme.c | 8 +++++---
src/gen9_post_processing.c | 3 ++-
src/gen9_vme.c | 7 ++++---
src/gen9_vp9_encoder.c | 2 --
src/i965_gpe_utils.c | 27 +++++++++++++++++++--------
src/i965_gpe_utils.h | 3 +--
8 files changed, 40 insertions(+), 27 deletions(-)
diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c
index 2cddb5a..6f5e2ef 100644
--- a/src/gen75_vpp_gpe.c
+++ b/src/gen75_vpp_gpe.c
@@ -406,13 +406,13 @@ gen8_gpe_process_interface_setup(VADriverContextP ctx,
struct vpp_gpe_context *vpp_gpe_ctx)
{
struct gen8_interface_descriptor_data *desc;
- dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
+ dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
int i;
dri_bo_map(bo, 1);
assert(bo->virtual);
desc = (struct gen8_interface_descriptor_data *)(bo->virtual
- + vpp_gpe_ctx->gpe_ctx.idrt_offset);
+ + vpp_gpe_ctx->gpe_ctx.idrt.offset);
/*Setup the descritor table*/
for (i = 0; i< vpp_gpe_ctx->sub_shader_sum; i++){
@@ -880,7 +880,7 @@ vpp_gpe_context_init(VADriverContextP ctx)
gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
- gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
+ gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen6_interface_descriptor_data), 64);
} else if (IS_GEN8(i965->intel.device_info) ||
IS_GEN9(i965->intel.device_info)) {
@@ -891,8 +891,8 @@ vpp_gpe_context_init(VADriverContextP ctx)
(SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
- gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
-
+ gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
}
return vpp_gpe_ctx;
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 3ed9e84..c4e46fb 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -1333,10 +1333,10 @@ gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
dri_bo *bo;
unsigned char *desc_ptr;
- bo = mfc_context->gpe_context.dynamic_state.bo;
+ bo = mfc_context->gpe_context.idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
- desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt_offset;
+ desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt.offset;
desc = (struct gen8_interface_descriptor_data *)desc_ptr;
@@ -4608,7 +4608,8 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
assert(mfc_context);
mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
- mfc_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
+ mfc_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ mfc_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
mfc_context->gpe_context.curbe.length = 32 * 4;
mfc_context->gpe_context.sampler_size = 0;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index 96835bf..5184ef1 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -333,10 +333,10 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx,
dri_bo *bo;
unsigned char *desc_ptr;
- bo = vme_context->gpe_context.dynamic_state.bo;
+ bo = vme_context->gpe_context.idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
- desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
+ desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt.offset;
desc = (struct gen8_interface_descriptor_data *)desc_ptr;
@@ -1378,7 +1378,9 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->vme_kernel_sum = i965_kernel_num;
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
- vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
+ vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
+
vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 71da501..2473803 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -536,7 +536,8 @@ gen9_post_processing_context_init(VADriverContextP ctx,
scaling_kernel.bin = pp_10bit_scaling_gen9;
scaling_kernel.size = sizeof(pp_10bit_scaling_gen9);
gen8_gpe_load_kernels(ctx, gpe_context,&scaling_kernel, 1);
- gpe_context->idrt_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ gpe_context->idrt.max_entries = 1;
gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);
diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index a59fe2a..33bf8aa 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -376,10 +376,10 @@ static VAStatus gen9_vme_interface_setup(VADriverContextP ctx,
dri_bo *bo;
unsigned char *desc_ptr;
- bo = vme_context->gpe_context.dynamic_state.bo;
+ bo = vme_context->gpe_context.idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
- desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt_offset;
+ desc_ptr = (unsigned char *)bo->virtual + vme_context->gpe_context.idrt.offset;
desc = (struct gen8_interface_descriptor_data *)desc_ptr;
@@ -2031,7 +2031,8 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->vme_kernel_sum = i965_kernel_num;
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
- vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
+ vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 5ad7b26..5d4a4a8 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -3691,8 +3691,6 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
- gpe_context->idrt_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) *
- NUM_KERNELS_PER_GPE_CONTEXT;
gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
gpe_context->surface_state_binding_table.binding_table_offset = 0;
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 3739a88..2d7cfaf 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1084,8 +1084,8 @@ gen8_gpe_idrt(VADriverContextP ctx,
OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->idrt_size);
- OUT_BATCH(batch, gpe_context->idrt_offset);
+ OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
+ OUT_BATCH(batch, gpe_context->idrt.offset);
ADVANCE_BATCH(batch);
}
@@ -1122,7 +1122,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;
- bo_size = gpe_context->idrt_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
+ bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state& binding table",
@@ -1145,8 +1145,11 @@ gen8_gpe_context_init(VADriverContextP ctx,
/* Interface descriptor offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->idrt_offset = start_offset;
- end_offset = start_offset + gpe_context->idrt_size;
+ dri_bo_unreference(gpe_context->idrt.bo);
+ gpe_context->idrt.bo = bo;
+ dri_bo_reference(gpe_context->idrt.bo);
+ gpe_context->idrt.offset = start_offset;
+ end_offset = start_offset + gpe_context->idrt.entry_size * gpe_context->idrt.max_entries;
/* Sampler state offset */
start_offset = ALIGN(end_offset, 64);
@@ -1175,6 +1178,9 @@ gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
dri_bo_unreference(gpe_context->curbe.bo);
gpe_context->curbe.bo = NULL;
+
+ dri_bo_unreference(gpe_context->idrt.bo);
+ gpe_context->idrt.bo = NULL;
}
@@ -1630,7 +1636,12 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
dri_bo_reference(gpe_context->curbe.bo);
gpe_context->curbe.offset = ds->curbe_offset;
- gpe_context->idrt_offset = ds->idrt_offset;
+ /* idrt buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->idrt.bo);
+ gpe_context->idrt.bo = ds->bo;
+ dri_bo_reference(gpe_context->idrt.bo);
+ gpe_context->idrt.offset = ds->idrt_offset;
+
gpe_context->sampler_offset = ds->sampler_offset;
return;
@@ -1677,10 +1688,10 @@ gen8_gpe_setup_interface_data(VADriverContextP ctx,
dri_bo *bo;
unsigned char *desc_ptr;
- bo = gpe_context->dynamic_state.bo;
+ bo = gpe_context->idrt.bo;
dri_bo_map(bo, 1);
assert(bo->virtual);
- desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt_offset;
+ desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt.offset;
desc = (struct gen8_interface_descriptor_data *)desc_ptr;
for (i = 0; i< gpe_context->num_kernels; i++) {
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 92123fe..c3b8c79 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -87,6 +87,7 @@ struct i965_gpe_context
dri_bo *bo;
unsigned int max_entries;
unsigned int entry_size; /* in bytes */
+ unsigned int offset;
} idrt;
struct {
@@ -167,8 +168,6 @@ struct i965_gpe_context
unsigned int sampler_offset;
int sampler_entries;
int sampler_size;
- unsigned int idrt_offset;
- int idrt_size;
};
struct gpe_mi_flush_dw_parameter
Xiang, Haihao
2016-11-18 05:43:46 UTC
Permalink
User can set sampler entry size and the number of sampler entries now. sampler.bo is
always set even if the sampler state is a part of the dynamic state buffer, hence we
can use the corresponding settings no matter sampler state is a part of the dynamic
state buffer or not.

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen8_mfc.c | 3 ++-
src/gen8_vme.c | 4 ++--
src/gen9_post_processing.c | 13 +++++++------
src/gen9_vme.c | 4 ++--
src/gen9_vp9_encoder.c | 15 +++++++++------
src/i965_gpe_utils.c | 21 ++++++++++++++++-----
src/i965_gpe_utils.h | 11 +++++++----
7 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index c4e46fb..b0ee6fb 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4611,7 +4611,8 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
mfc_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
mfc_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
mfc_context->gpe_context.curbe.length = 32 * 4;
- mfc_context->gpe_context.sampler_size = 0;
+ mfc_context->gpe_context.sampler.entry_size = 0;
+ mfc_context->gpe_context.sampler.max_entries = 0;

mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index 5184ef1..b14d60a 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -1382,8 +1382,8 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;

vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
- vme_context->gpe_context.sampler_size = 0;
-
+ vme_context->gpe_context.sampler.entry_size = 0;
+ vme_context->gpe_context.sampler.max_entries = 0;

vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
vme_context->gpe_context.vfe_state.num_urb_entries = 64;
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 2473803..c0ae791 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -488,15 +488,15 @@ gen9_p010_scaling_sample_state(VADriverContextP ctx,

if (gpe_context == NULL || !src_rect || !dst_rect)
return;
- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->sampler.bo, 1);

- if (gpe_context->dynamic_state.bo->virtual == NULL)
+ if (gpe_context->sampler.bo->virtual == NULL)
return;

- assert(gpe_context->dynamic_state.bo->virtual);
+ assert(gpe_context->sampler.bo->virtual);

sampler_state = (struct gen8_sampler_state *)
- (gpe_context->dynamic_state.bo->virtual + gpe_context->sampler_offset);
+ (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);

memset(sampler_state, 0, sizeof(*sampler_state));

@@ -513,7 +513,7 @@ gen9_p010_scaling_sample_state(VADriverContextP ctx,
sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;

- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->sampler.bo);
}

void
@@ -538,7 +538,8 @@ gen9_post_processing_context_init(VADriverContextP ctx,
gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
gpe_context->idrt.max_entries = 1;
- gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
+ gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
+ gpe_context->sampler.max_entries = 1;
gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);

gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index 33bf8aa..fab80ce 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -2034,8 +2034,8 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
- vme_context->gpe_context.sampler_size = 0;
-
+ vme_context->gpe_context.sampler.entry_size = 0;
+ vme_context->gpe_context.sampler.max_entries = 0;

vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
vme_context->gpe_context.vfe_state.num_urb_entries = 64;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 5d4a4a8..1badd88 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -2594,13 +2594,13 @@ gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
if (!gpe_context)
return;

- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->sampler.bo, 1);

- if (!gpe_context->dynamic_state.bo->virtual)
+ if (!gpe_context->sampler.bo->virtual)
return;

sampler_cmd = (struct gen9_sampler_8x8_avs *)
- (gpe_context->dynamic_state.bo->virtual + gpe_context->sampler_offset);
+ (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);

memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));

@@ -2658,7 +2658,7 @@ gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
&gen9_vp9_avs_coeffs[17 * 8],
15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));

- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->sampler.bo);
}

static void
@@ -3684,9 +3684,12 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
{
gpe_context->curbe.length = kernel_param->curbe_size; // in bytes

- gpe_context->sampler_size = 0;
+ gpe_context->sampler.entry_size = 0;
+ gpe_context->sampler.max_entries = 0;
+
if (kernel_param->sampler_size) {
- gpe_context->sampler_size = ALIGN(kernel_param->sampler_size, 64);
+ gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
+ gpe_context->sampler.max_entries = 1;
}

gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 2d7cfaf..d7286fc 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1122,7 +1122,8 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;

- bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
+ bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length +
+ gpe_context->sampler.max_entries * gpe_context->sampler.entry_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state & binding table",
@@ -1153,8 +1154,11 @@ gen8_gpe_context_init(VADriverContextP ctx,

/* Sampler state offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->sampler_offset = start_offset;
- end_offset = start_offset + gpe_context->sampler_size;
+ dri_bo_unreference(gpe_context->sampler.bo);
+ gpe_context->sampler.bo = bo;
+ dri_bo_reference(gpe_context->sampler.bo);
+ gpe_context->sampler.offset = start_offset;
+ end_offset = start_offset + gpe_context->sampler.entry_size * gpe_context->sampler.max_entries;

/* update the end offset of dynamic_state */
gpe_context->dynamic_state.end_offset = end_offset;
@@ -1181,6 +1185,9 @@ gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)

dri_bo_unreference(gpe_context->idrt.bo);
gpe_context->idrt.bo = NULL;
+
+ dri_bo_unreference(gpe_context->sampler.bo);
+ gpe_context->sampler.bo = NULL;
}


@@ -1642,7 +1649,11 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
dri_bo_reference(gpe_context->idrt.bo);
gpe_context->idrt.offset = ds->idrt_offset;

- gpe_context->sampler_offset = ds->sampler_offset;
+ /* sampler buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->sampler.bo);
+ gpe_context->sampler.bo = ds->bo;
+ dri_bo_reference(gpe_context->sampler.bo);
+ gpe_context->sampler.offset = ds->sampler_offset;

return;
}
@@ -1704,7 +1715,7 @@ gen8_gpe_setup_interface_data(VADriverContextP ctx,
memset(desc, 0, sizeof(*desc));
desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
desc->desc3.sampler_count = 0;
- desc->desc3.sampler_state_pointer = (gpe_context->sampler_offset >> 5);
+ desc->desc3.sampler_state_pointer = (gpe_context->sampler.offset >> 5);
desc->desc4.binding_table_entry_count = 0;
desc->desc4.binding_table_pointer = (gpe_context->surface_state_binding_table.binding_table_offset >> 5);
desc->desc5.constant_urb_entry_read_offset = 0;
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index c3b8c79..e19e107 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -97,6 +97,13 @@ struct i965_gpe_context
} curbe;

struct {
+ dri_bo *bo;
+ unsigned int max_entries;
+ unsigned int entry_size; /* in bytes */
+ unsigned int offset;
+ } sampler;
+
+ struct {
unsigned int gpgpu_mode : 1;
unsigned int pad0 : 7;
unsigned int max_num_threads : 16;
@@ -164,10 +171,6 @@ struct i965_gpe_context
int bo_size;
unsigned int end_offset;
} dynamic_state;
-
- unsigned int sampler_offset;
- int sampler_entries;
- int sampler_size;
};

struct gpe_mi_flush_dw_parameter
--
1.9.1
Zhao Yakui
2016-11-18 06:24:04 UTC
Permalink
Post by Xiang, Haihao
User can set sampler entry size and the number of sampler entries now. sampler.bo is
always set even if the sampler state is a part of the dynamic state buffer, hence we
can use the corresponding settings no matter sampler state is a part of the dynamic
state buffer or not.
This looks good to me.
Post by Xiang, Haihao
---
src/gen8_mfc.c | 3 ++-
src/gen8_vme.c | 4 ++--
src/gen9_post_processing.c | 13 +++++++------
src/gen9_vme.c | 4 ++--
src/gen9_vp9_encoder.c | 15 +++++++++------
src/i965_gpe_utils.c | 21 ++++++++++++++++-----
src/i965_gpe_utils.h | 11 +++++++----
7 files changed, 45 insertions(+), 26 deletions(-)
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index c4e46fb..b0ee6fb 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4611,7 +4611,8 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
mfc_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
mfc_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
mfc_context->gpe_context.curbe.length = 32 * 4;
- mfc_context->gpe_context.sampler_size = 0;
+ mfc_context->gpe_context.sampler.entry_size = 0;
+ mfc_context->gpe_context.sampler.max_entries = 0;
mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index 5184ef1..b14d60a 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -1382,8 +1382,8 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
- vme_context->gpe_context.sampler_size = 0;
-
+ vme_context->gpe_context.sampler.entry_size = 0;
+ vme_context->gpe_context.sampler.max_entries = 0;
vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
vme_context->gpe_context.vfe_state.num_urb_entries = 64;
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 2473803..c0ae791 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -488,15 +488,15 @@ gen9_p010_scaling_sample_state(VADriverContextP ctx,
if (gpe_context == NULL || !src_rect || !dst_rect)
return;
- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->sampler.bo, 1);
- if (gpe_context->dynamic_state.bo->virtual == NULL)
+ if (gpe_context->sampler.bo->virtual == NULL)
return;
- assert(gpe_context->dynamic_state.bo->virtual);
+ assert(gpe_context->sampler.bo->virtual);
sampler_state = (struct gen8_sampler_state *)
- (gpe_context->dynamic_state.bo->virtual + gpe_context->sampler_offset);
+ (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
memset(sampler_state, 0, sizeof(*sampler_state));
@@ -513,7 +513,7 @@ gen9_p010_scaling_sample_state(VADriverContextP ctx,
sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->sampler.bo);
}
void
@@ -538,7 +538,8 @@ gen9_post_processing_context_init(VADriverContextP ctx,
gen8_gpe_load_kernels(ctx, gpe_context,&scaling_kernel, 1);
gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
gpe_context->idrt.max_entries = 1;
- gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
+ gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
+ gpe_context->sampler.max_entries = 1;
gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);
gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index 33bf8aa..fab80ce 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -2034,8 +2034,8 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
vme_context->gpe_context.idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
- vme_context->gpe_context.sampler_size = 0;
-
+ vme_context->gpe_context.sampler.entry_size = 0;
+ vme_context->gpe_context.sampler.max_entries = 0;
vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
vme_context->gpe_context.vfe_state.num_urb_entries = 64;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 5d4a4a8..1badd88 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -2594,13 +2594,13 @@ gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
if (!gpe_context)
return;
- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->sampler.bo, 1);
- if (!gpe_context->dynamic_state.bo->virtual)
+ if (!gpe_context->sampler.bo->virtual)
return;
sampler_cmd = (struct gen9_sampler_8x8_avs *)
- (gpe_context->dynamic_state.bo->virtual + gpe_context->sampler_offset);
+ (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
@@ -2658,7 +2658,7 @@ gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
&gen9_vp9_avs_coeffs[17 * 8],
15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->sampler.bo);
}
static void
@@ -3684,9 +3684,12 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
{
gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
- gpe_context->sampler_size = 0;
+ gpe_context->sampler.entry_size = 0;
+ gpe_context->sampler.max_entries = 0;
+
if (kernel_param->sampler_size) {
- gpe_context->sampler_size = ALIGN(kernel_param->sampler_size, 64);
+ gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
+ gpe_context->sampler.max_entries = 1;
}
gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 2d7cfaf..d7286fc 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1122,7 +1122,8 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;
- bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
+ bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length +
+ gpe_context->sampler.max_entries * gpe_context->sampler.entry_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state& binding table",
@@ -1153,8 +1154,11 @@ gen8_gpe_context_init(VADriverContextP ctx,
/* Sampler state offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->sampler_offset = start_offset;
- end_offset = start_offset + gpe_context->sampler_size;
+ dri_bo_unreference(gpe_context->sampler.bo);
+ gpe_context->sampler.bo = bo;
+ dri_bo_reference(gpe_context->sampler.bo);
+ gpe_context->sampler.offset = start_offset;
+ end_offset = start_offset + gpe_context->sampler.entry_size * gpe_context->sampler.max_entries;
/* update the end offset of dynamic_state */
gpe_context->dynamic_state.end_offset = end_offset;
@@ -1181,6 +1185,9 @@ gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
dri_bo_unreference(gpe_context->idrt.bo);
gpe_context->idrt.bo = NULL;
+
+ dri_bo_unreference(gpe_context->sampler.bo);
+ gpe_context->sampler.bo = NULL;
}
@@ -1642,7 +1649,11 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
dri_bo_reference(gpe_context->idrt.bo);
gpe_context->idrt.offset = ds->idrt_offset;
- gpe_context->sampler_offset = ds->sampler_offset;
+ /* sampler buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->sampler.bo);
+ gpe_context->sampler.bo = ds->bo;
+ dri_bo_reference(gpe_context->sampler.bo);
+ gpe_context->sampler.offset = ds->sampler_offset;
return;
}
@@ -1704,7 +1715,7 @@ gen8_gpe_setup_interface_data(VADriverContextP ctx,
memset(desc, 0, sizeof(*desc));
desc->desc0.kernel_start_pointer = kernel->kernel_offset>> 6;
desc->desc3.sampler_count = 0;
- desc->desc3.sampler_state_pointer = (gpe_context->sampler_offset>> 5);
+ desc->desc3.sampler_state_pointer = (gpe_context->sampler.offset>> 5);
desc->desc4.binding_table_entry_count = 0;
desc->desc4.binding_table_pointer = (gpe_context->surface_state_binding_table.binding_table_offset>> 5);
desc->desc5.constant_urb_entry_read_offset = 0;
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index c3b8c79..e19e107 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -97,6 +97,13 @@ struct i965_gpe_context
} curbe;
struct {
+ dri_bo *bo;
+ unsigned int max_entries;
+ unsigned int entry_size; /* in bytes */
+ unsigned int offset;
+ } sampler;
+
+ struct {
unsigned int gpgpu_mode : 1;
unsigned int pad0 : 7;
unsigned int max_num_threads : 16;
@@ -164,10 +171,6 @@ struct i965_gpe_context
int bo_size;
unsigned int end_offset;
} dynamic_state;
-
- unsigned int sampler_offset;
- int sampler_entries;
- int sampler_size;
};
struct gpe_mi_flush_dw_parameter
Xiang, Haihao
2016-11-18 05:43:44 UTC
Permalink
To avoid confusion between curbe.length and curbe_size, this patch uses
curbe.length only. curbe.bo is always set even if curbe is a part of the
dynamic state buffer, hence we can use curbe related settings no matter
it is a part of the dynamic state buffer or not.

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen75_vpp_gpe.c | 2 +-
src/gen8_mfc.c | 2 +-
src/gen8_vme.c | 12 ++++++------
src/gen9_post_processing.c | 3 +--
src/gen9_vme.c | 12 ++++++------
src/gen9_vp9_encoder.c | 22 ++++++++++------------
src/i965_gpe_utils.c | 28 +++++++++++++++++++---------
src/i965_gpe_utils.h | 3 +--
8 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c
index 9850c1c..2cddb5a 100644
--- a/src/gen75_vpp_gpe.c
+++ b/src/gen75_vpp_gpe.c
@@ -890,7 +890,7 @@ vpp_gpe_context_init(VADriverContextP ctx)
gpe_ctx->surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

- gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;

}
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 63ffea5..3ed9e84 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4609,7 +4609,7 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

mfc_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- mfc_context->gpe_context.curbe_size = 32 * 4;
+ mfc_context->gpe_context.curbe.length = 32 * 4;
mfc_context->gpe_context.sampler_size = 0;

mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index c79c62b..96835bf 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -389,10 +389,10 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,

vme_state_message[31] = mv_num;

- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo->virtual);
- constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
- vme_context->gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context->gpe_context.curbe.bo->virtual +
+ vme_context->gpe_context.curbe.offset;

/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is constructed directly
@@ -400,7 +400,7 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
*/
memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);

- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);

return VA_STATUS_SUCCESS;
}
@@ -1379,7 +1379,7 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;


diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index a5d345c..71da501 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -538,8 +538,7 @@ gen9_post_processing_context_init(VADriverContextP ctx,
gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
gpe_context->idrt_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
- gpe_context->curbe_size = ALIGN(sizeof(struct scaling_input_parameter), 64);
- gpe_context->curbe.length = gpe_context->curbe_size;
+ gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);

gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
gpe_context->surface_state_binding_table.binding_table_offset = 0;
diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index 6ad8fff..a59fe2a 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -438,10 +438,10 @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,

vme_state_message[31] = mv_num;

- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo->virtual);
- constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
- vme_context->gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context->gpe_context.curbe.bo->virtual +
+ vme_context->gpe_context.curbe.offset;

/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is constructed directly
@@ -449,7 +449,7 @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
*/
memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);

- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);

return VA_STATUS_SUCCESS;
}
@@ -2032,7 +2032,7 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;

vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;


diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index f39d6d0..5ad7b26 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1820,18 +1820,18 @@ gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
/* 4. Mbenc curbe input buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context->dynamic_state.bo,
+ mbenc_gpe_context->curbe.bo,
0,
- ALIGN(mbenc_gpe_context->curbe_size, 64),
- mbenc_gpe_context->curbe_offset,
+ ALIGN(mbenc_gpe_context->curbe.length, 64),
+ mbenc_gpe_context->curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
/* 5. Mbenc curbe output buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context->dynamic_state.bo,
+ mbenc_gpe_context->curbe.bo,
0,
- ALIGN(mbenc_gpe_context->curbe_size, 64),
- mbenc_gpe_context->curbe_offset,
+ ALIGN(mbenc_gpe_context->curbe.length, 64),
+ mbenc_gpe_context->curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);

/* 6. BRC_PIC_STATE read buffer */
@@ -3289,10 +3289,10 @@ gen9_vp9_send_mbenc_surface(VADriverContextP ctx,

gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param->gpe_context_tx->dynamic_state.bo,
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param->gpe_context_tx->curbe_offset,
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CURBE_G9);

break;
@@ -3441,10 +3441,10 @@ gen9_vp9_send_mbenc_surface(VADriverContextP ctx,

gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param->gpe_context_tx->dynamic_state.bo,
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param->gpe_context_tx->curbe_offset,
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CURBE_G9);


@@ -3684,8 +3684,6 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
{
gpe_context->curbe.length = kernel_param->curbe_size; // in bytes

- gpe_context->curbe_size = ALIGN(kernel_param->curbe_size, 64);
-
gpe_context->sampler_size = 0;
if (kernel_param->sampler_size) {
gpe_context->sampler_size = ALIGN(kernel_param->sampler_size, 64);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index c5a8935..3739a88 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,8 +1066,8 @@ gen8_gpe_curbe_load(VADriverContextP ctx,

OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe_size);
- OUT_BATCH(batch, gpe_context->curbe_offset);
+ OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, gpe_context->curbe.offset);

ADVANCE_BATCH(batch);
}
@@ -1122,7 +1122,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;

- bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192;
+ bo_size = gpe_context->idrt_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state & binding table",
@@ -1137,8 +1137,11 @@ gen8_gpe_context_init(VADriverContextP ctx,

/* Constant buffer offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->curbe_offset = start_offset;
- end_offset = start_offset + gpe_context->curbe_size;
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = start_offset;
+ end_offset = start_offset + gpe_context->curbe.length;

/* Interface descriptor offset */
start_offset = ALIGN(end_offset, 64);
@@ -1170,6 +1173,8 @@ gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
dri_bo_unreference(gpe_context->indirect_state.bo);
gpe_context->indirect_state.bo = NULL;

+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = NULL;
}


@@ -1619,7 +1624,12 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
dri_bo_reference(gpe_context->dynamic_state.bo);
gpe_context->dynamic_state.bo_size = ds->bo_size;

- gpe_context->curbe_offset = ds->curbe_offset;
+ /* curbe buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = ds->bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = ds->curbe_offset;
+
gpe_context->idrt_offset = ds->idrt_offset;
gpe_context->sampler_offset = ds->sampler_offset;

@@ -1629,15 +1639,15 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
void *
gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
{
- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->curbe.bo, 1);

- return (char *)gpe_context->dynamic_state.bo->virtual + gpe_context->curbe_offset;
+ return (char *)gpe_context->curbe.bo->virtual + gpe_context->curbe.offset;
}

void
gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
{
- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->curbe.bo);
}

void
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 0cbef43..92123fe 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -92,6 +92,7 @@ struct i965_gpe_context
struct {
dri_bo *bo;
unsigned int length; /* in bytes */
+ unsigned int offset;
} curbe;

struct {
@@ -168,8 +169,6 @@ struct i965_gpe_context
int sampler_size;
unsigned int idrt_offset;
int idrt_size;
- unsigned int curbe_offset;
- int curbe_size;
};

struct gpe_mi_flush_dw_parameter
--
1.9.1
Zhao Yakui
2016-11-18 06:16:05 UTC
Permalink
Post by Xiang, Haihao
To avoid confusion between curbe.length and curbe_size, this patch uses
curbe.length only. curbe.bo is always set even if curbe is a part of the
dynamic state buffer, hence we can use curbe related settings no matter
it is a part of the dynamic state buffer or not.
This looks good to me.
Post by Xiang, Haihao
---
src/gen75_vpp_gpe.c | 2 +-
src/gen8_mfc.c | 2 +-
src/gen8_vme.c | 12 ++++++------
src/gen9_post_processing.c | 3 +--
src/gen9_vme.c | 12 ++++++------
src/gen9_vp9_encoder.c | 22 ++++++++++------------
src/i965_gpe_utils.c | 28 +++++++++++++++++++---------
src/i965_gpe_utils.h | 3 +--
8 files changed, 45 insertions(+), 39 deletions(-)
diff --git a/src/gen75_vpp_gpe.c b/src/gen75_vpp_gpe.c
index 9850c1c..2cddb5a 100644
--- a/src/gen75_vpp_gpe.c
+++ b/src/gen75_vpp_gpe.c
@@ -890,7 +890,7 @@ vpp_gpe_context_init(VADriverContextP ctx)
gpe_ctx->surface_state_binding_table.length =
(SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
- gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
}
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 63ffea5..3ed9e84 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4609,7 +4609,7 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
mfc_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- mfc_context->gpe_context.curbe_size = 32 * 4;
+ mfc_context->gpe_context.curbe.length = 32 * 4;
mfc_context->gpe_context.sampler_size = 0;
mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index c79c62b..96835bf 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -389,10 +389,10 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
vme_state_message[31] = mv_num;
- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo->virtual);
- constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
- vme_context->gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context->gpe_context.curbe.bo->virtual +
+ vme_context->gpe_context.curbe.offset;
/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is constructed directly
@@ -400,7 +400,7 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx,
*/
memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);
return VA_STATUS_SUCCESS;
}
@@ -1379,7 +1379,7 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index a5d345c..71da501 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -538,8 +538,7 @@ gen9_post_processing_context_init(VADriverContextP ctx,
gen8_gpe_load_kernels(ctx, gpe_context,&scaling_kernel, 1);
gpe_context->idrt_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
gpe_context->sampler_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
- gpe_context->curbe_size = ALIGN(sizeof(struct scaling_input_parameter), 64);
- gpe_context->curbe.length = gpe_context->curbe_size;
+ gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 64);
gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
gpe_context->surface_state_binding_table.binding_table_offset = 0;
diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index 6ad8fff..a59fe2a 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -438,10 +438,10 @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
vme_state_message[31] = mv_num;
- dri_bo_map(vme_context->gpe_context.dynamic_state.bo, 1);
- assert(vme_context->gpe_context.dynamic_state.bo->virtual);
- constant_buffer = (unsigned char *)vme_context->gpe_context.dynamic_state.bo->virtual +
- vme_context->gpe_context.curbe_offset;
+ dri_bo_map(vme_context->gpe_context.curbe.bo, 1);
+ assert(vme_context->gpe_context.curbe.bo->virtual);
+ constant_buffer = (unsigned char *)vme_context->gpe_context.curbe.bo->virtual +
+ vme_context->gpe_context.curbe.offset;
/* VME MV/Mb cost table is passed by using const buffer */
/* Now it uses the fixed search path. So it is constructed directly
@@ -449,7 +449,7 @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
*/
memcpy(constant_buffer, (char *)vme_context->vme_state_message, 128);
- dri_bo_unmap(vme_context->gpe_context.dynamic_state.bo);
+ dri_bo_unmap(vme_context->gpe_context.curbe.bo);
return VA_STATUS_SUCCESS;
}
@@ -2032,7 +2032,7 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
vme_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
- vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
+ vme_context->gpe_context.curbe.length = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index f39d6d0..5ad7b26 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1820,18 +1820,18 @@ gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
/* 4. Mbenc curbe input buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context->dynamic_state.bo,
+ mbenc_gpe_context->curbe.bo,
0,
- ALIGN(mbenc_gpe_context->curbe_size, 64),
- mbenc_gpe_context->curbe_offset,
+ ALIGN(mbenc_gpe_context->curbe.length, 64),
+ mbenc_gpe_context->curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
/* 5. Mbenc curbe output buffer */
gen9_add_dri_buffer_gpe_surface(ctx,
brc_gpe_context,
- mbenc_gpe_context->dynamic_state.bo,
+ mbenc_gpe_context->curbe.bo,
0,
- ALIGN(mbenc_gpe_context->curbe_size, 64),
- mbenc_gpe_context->curbe_offset,
+ ALIGN(mbenc_gpe_context->curbe.length, 64),
+ mbenc_gpe_context->curbe.offset,
VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
/* 6. BRC_PIC_STATE read buffer */
@@ -3289,10 +3289,10 @@ gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param->gpe_context_tx->dynamic_state.bo,
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param->gpe_context_tx->curbe_offset,
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CURBE_G9);
break;
@@ -3441,10 +3441,10 @@ gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
gen9_add_dri_buffer_gpe_surface(ctx,
gpe_context,
- mbenc_param->gpe_context_tx->dynamic_state.bo,
+ mbenc_param->gpe_context_tx->curbe.bo,
0,
ALIGN(res_size, 64),
- mbenc_param->gpe_context_tx->curbe_offset,
+ mbenc_param->gpe_context_tx->curbe.offset,
VP9_BTI_MBENC_TX_CURBE_G9);
@@ -3684,8 +3684,6 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
{
gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
- gpe_context->curbe_size = ALIGN(kernel_param->curbe_size, 64);
-
gpe_context->sampler_size = 0;
if (kernel_param->sampler_size) {
gpe_context->sampler_size = ALIGN(kernel_param->sampler_size, 64);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index c5a8935..3739a88 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,8 +1066,8 @@ gen8_gpe_curbe_load(VADriverContextP ctx,
OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe_size);
- OUT_BATCH(batch, gpe_context->curbe_offset);
+ OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, gpe_context->curbe.offset);
ADVANCE_BATCH(batch);
}
@@ -1122,7 +1122,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;
- bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192;
+ bo_size = gpe_context->idrt_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state& binding table",
@@ -1137,8 +1137,11 @@ gen8_gpe_context_init(VADriverContextP ctx,
/* Constant buffer offset */
start_offset = ALIGN(end_offset, 64);
- gpe_context->curbe_offset = start_offset;
- end_offset = start_offset + gpe_context->curbe_size;
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = start_offset;
+ end_offset = start_offset + gpe_context->curbe.length;
/* Interface descriptor offset */
start_offset = ALIGN(end_offset, 64);
@@ -1170,6 +1173,8 @@ gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
dri_bo_unreference(gpe_context->indirect_state.bo);
gpe_context->indirect_state.bo = NULL;
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = NULL;
}
@@ -1619,7 +1624,12 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
dri_bo_reference(gpe_context->dynamic_state.bo);
gpe_context->dynamic_state.bo_size = ds->bo_size;
- gpe_context->curbe_offset = ds->curbe_offset;
+ /* curbe buffer is a part of the dynamic buffer */
+ dri_bo_unreference(gpe_context->curbe.bo);
+ gpe_context->curbe.bo = ds->bo;
+ dri_bo_reference(gpe_context->curbe.bo);
+ gpe_context->curbe.offset = ds->curbe_offset;
+
gpe_context->idrt_offset = ds->idrt_offset;
gpe_context->sampler_offset = ds->sampler_offset;
@@ -1629,15 +1639,15 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
void *
gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
{
- dri_bo_map(gpe_context->dynamic_state.bo, 1);
+ dri_bo_map(gpe_context->curbe.bo, 1);
- return (char *)gpe_context->dynamic_state.bo->virtual + gpe_context->curbe_offset;
+ return (char *)gpe_context->curbe.bo->virtual + gpe_context->curbe.offset;
}
void
gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
{
- dri_bo_unmap(gpe_context->dynamic_state.bo);
+ dri_bo_unmap(gpe_context->curbe.bo);
}
void
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 0cbef43..92123fe 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -92,6 +92,7 @@ struct i965_gpe_context
struct {
dri_bo *bo;
unsigned int length; /* in bytes */
+ unsigned int offset;
} curbe;
struct {
@@ -168,8 +169,6 @@ struct i965_gpe_context
int sampler_size;
unsigned int idrt_offset;
int idrt_size;
- unsigned int curbe_offset;
- int curbe_size;
};
struct gpe_mi_flush_dw_parameter
Xiang, Haihao
2016-11-18 05:43:47 UTC
Permalink
Make sure the size for each part in dynamic state buffer is multiple of 64 bytes.

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index d7286fc..139ab1b 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1122,8 +1122,9 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;

- bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length +
- gpe_context->sampler.max_entries * gpe_context->sampler.entry_size + 192;
+ bo_size = gpe_context->idrt.max_entries * ALIGN(gpe_context->idrt.entry_size, 64) +
+ ALIGN(gpe_context->curbe.length, 64) +
+ gpe_context->sampler.max_entries * ALIGN(gpe_context->sampler.entry_size, 64);
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state & binding table",
@@ -1150,7 +1151,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
gpe_context->idrt.bo = bo;
dri_bo_reference(gpe_context->idrt.bo);
gpe_context->idrt.offset = start_offset;
- end_offset = start_offset + gpe_context->idrt.entry_size * gpe_context->idrt.max_entries;
+ end_offset = start_offset + ALIGN(gpe_context->idrt.entry_size, 64) * gpe_context->idrt.max_entries;

/* Sampler state offset */
start_offset = ALIGN(end_offset, 64);
@@ -1158,7 +1159,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
gpe_context->sampler.bo = bo;
dri_bo_reference(gpe_context->sampler.bo);
gpe_context->sampler.offset = start_offset;
- end_offset = start_offset + gpe_context->sampler.entry_size * gpe_context->sampler.max_entries;
+ end_offset = start_offset + ALIGN(gpe_context->sampler.entry_size, 64) * gpe_context->sampler.max_entries;

/* update the end offset of dynamic_state */
gpe_context->dynamic_state.end_offset = end_offset;
--
1.9.1
Zhao Yakui
2016-11-18 06:26:47 UTC
Permalink
Post by Xiang, Haihao
Make sure the size for each part in dynamic state buffer is multiple of 64 bytes.
This looks good to me.
Post by Xiang, Haihao
---
src/i965_gpe_utils.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index d7286fc..139ab1b 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1122,8 +1122,9 @@ gen8_gpe_context_init(VADriverContextP ctx,
assert(bo);
gpe_context->surface_state_binding_table.bo = bo;
- bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length +
- gpe_context->sampler.max_entries * gpe_context->sampler.entry_size + 192;
+ bo_size = gpe_context->idrt.max_entries * ALIGN(gpe_context->idrt.entry_size, 64) +
+ ALIGN(gpe_context->curbe.length, 64) +
+ gpe_context->sampler.max_entries * ALIGN(gpe_context->sampler.entry_size, 64);
dri_bo_unreference(gpe_context->dynamic_state.bo);
bo = dri_bo_alloc(i965->intel.bufmgr,
"surface state& binding table",
@@ -1150,7 +1151,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
gpe_context->idrt.bo = bo;
dri_bo_reference(gpe_context->idrt.bo);
gpe_context->idrt.offset = start_offset;
- end_offset = start_offset + gpe_context->idrt.entry_size * gpe_context->idrt.max_entries;
+ end_offset = start_offset + ALIGN(gpe_context->idrt.entry_size, 64) * gpe_context->idrt.max_entries;
/* Sampler state offset */
start_offset = ALIGN(end_offset, 64);
@@ -1158,7 +1159,7 @@ gen8_gpe_context_init(VADriverContextP ctx,
gpe_context->sampler.bo = bo;
dri_bo_reference(gpe_context->sampler.bo);
gpe_context->sampler.offset = start_offset;
- end_offset = start_offset + gpe_context->sampler.entry_size * gpe_context->sampler.max_entries;
+ end_offset = start_offset + ALIGN(gpe_context->sampler.entry_size, 64) * gpe_context->sampler.max_entries;
/* update the end offset of dynamic_state */
gpe_context->dynamic_state.end_offset = end_offset;
Xiang, Haihao
2016-11-18 05:43:48 UTC
Permalink
It is multiple of 64 bytes

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 139ab1b..a6d539b 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,7 +1066,7 @@ gen8_gpe_curbe_load(VADriverContextP ctx,

OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, ALIGN(gpe_context->curbe.length, 64));
OUT_BATCH(batch, gpe_context->curbe.offset);

ADVANCE_BATCH(batch);
--
1.9.1
Zhao Yakui
2016-11-18 06:27:24 UTC
Permalink
Post by Xiang, Haihao
It is multiple of 64 bytes
This looks good to me.
Post by Xiang, Haihao
---
src/i965_gpe_utils.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 139ab1b..a6d539b 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1066,7 +1066,7 @@ gen8_gpe_curbe_load(VADriverContextP ctx,
OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
OUT_BATCH(batch, 0);
- OUT_BATCH(batch, gpe_context->curbe.length);
+ OUT_BATCH(batch, ALIGN(gpe_context->curbe.length, 64));
OUT_BATCH(batch, gpe_context->curbe.offset);
ADVANCE_BATCH(batch);
Xiang, Haihao
2016-11-18 05:43:50 UTC
Permalink
Like as i965_dri_object_to_buffer_gpe_resource(), use i965_ instead of i965_gpe_ as prefix

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_post_processing.c | 2 +-
src/gen9_vp9_encoder.c | 2 +-
src/i965_gpe_utils.c | 10 +++++-----
src/i965_gpe_utils.h | 10 +++++-----
4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 862a26a..2128697 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -578,7 +578,7 @@ gen9_add_dri_buffer_2d_gpe_surface(VADriverContextP ctx,
struct i965_gpe_resource gpe_resource;
struct i965_gpe_surface gpe_surface;

- i965_gpe_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
+ i965_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
memset(&gpe_surface, 0, sizeof(gpe_surface));
gpe_surface.gpe_resource = &gpe_resource;
gpe_surface.is_2d_surface = 1;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index a933c15..0b66565 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1052,7 +1052,7 @@ gen9_add_dri_buffer_2d_gpe_surface(VADriverContextP ctx,
{
struct i965_gpe_resource gpe_resource;

- i965_gpe_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
+ i965_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
gen9_add_buffer_2d_gpe_surface(ctx,
gpe_context,
&gpe_resource,
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 66609c0..b20857b 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1413,11 +1413,11 @@ i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
}

void
-i965_gpe_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
- dri_bo *bo,
- unsigned int width,
- unsigned int height,
- unsigned int pitch)
+i965_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
+ dri_bo *bo,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch)
{
unsigned int swizzle;

diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 66be748..22165da 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -348,11 +348,11 @@ void i965_object_surface_to_2d_gpe_resource(struct i965_gpe_resource *res,
void i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
dri_bo *bo);

-void i965_gpe_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
- dri_bo *bo,
- unsigned int width,
- unsigned int height,
- unsigned int pitch);
+void i965_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
+ dri_bo *bo,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch);

void i965_zero_gpe_resource(struct i965_gpe_resource *res);
--
1.9.1
Zhao Yakui
2016-11-18 06:29:45 UTC
Permalink
Post by Xiang, Haihao
Like as i965_dri_object_to_buffer_gpe_resource(), use i965_ instead of i965_gpe_ as prefix
This looks good to me.

Thanks
Post by Xiang, Haihao
---
src/gen9_post_processing.c | 2 +-
src/gen9_vp9_encoder.c | 2 +-
src/i965_gpe_utils.c | 10 +++++-----
src/i965_gpe_utils.h | 10 +++++-----
4 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 862a26a..2128697 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -578,7 +578,7 @@ gen9_add_dri_buffer_2d_gpe_surface(VADriverContextP ctx,
struct i965_gpe_resource gpe_resource;
struct i965_gpe_surface gpe_surface;
- i965_gpe_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
+ i965_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
memset(&gpe_surface, 0, sizeof(gpe_surface));
gpe_surface.gpe_resource =&gpe_resource;
gpe_surface.is_2d_surface = 1;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index a933c15..0b66565 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1052,7 +1052,7 @@ gen9_add_dri_buffer_2d_gpe_surface(VADriverContextP ctx,
{
struct i965_gpe_resource gpe_resource;
- i965_gpe_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
+ i965_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
gen9_add_buffer_2d_gpe_surface(ctx,
gpe_context,
&gpe_resource,
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 66609c0..b20857b 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1413,11 +1413,11 @@ i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
}
void
-i965_gpe_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
- dri_bo *bo,
- unsigned int width,
- unsigned int height,
- unsigned int pitch)
+i965_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
+ dri_bo *bo,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch)
{
unsigned int swizzle;
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 66be748..22165da 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -348,11 +348,11 @@ void i965_object_surface_to_2d_gpe_resource(struct i965_gpe_resource *res,
void i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
dri_bo *bo);
-void i965_gpe_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
- dri_bo *bo,
- unsigned int width,
- unsigned int height,
- unsigned int pitch);
+void i965_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
+ dri_bo *bo,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch);
void i965_zero_gpe_resource(struct i965_gpe_resource *res);
Xiang, Haihao
2016-11-18 05:43:51 UTC
Permalink
This function can be used on GEN8 too

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_post_processing.c | 2 +-
src/gen9_vp9_encoder.c | 2 +-
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 2128697..e9b8f86 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -608,7 +608,7 @@ gen9_run_kernel_media_object_walker(VADriverContextP ctx,
intel_batchbuffer_emit_mi_flush(batch);

gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
- gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
+ gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
gen8_gpe_media_state_flush(ctx, gpe_context, batch);

gen9_gpe_pipeline_end(ctx, gpe_context, batch);
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 0b66565..34d09a6 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1129,7 +1129,7 @@ gen9_run_kernel_media_object_walker(VADriverContextP ctx,
gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);

gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
- gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
+ gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
gen8_gpe_media_state_flush(ctx, gpe_context, batch);

gen9_gpe_pipeline_end(ctx, gpe_context, batch);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index b20857b..c2d06b2 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2103,7 +2103,7 @@ gen8_gpe_media_object(VADriverContextP ctx,
}

void
-gen9_gpe_media_object_walker(VADriverContextP ctx,
+gen8_gpe_media_object_walker(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch,
struct gpe_media_object_walker_parameter *param)
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 22165da..b58a02c 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -506,7 +506,7 @@ gen8_gpe_media_state_flush(VADriverContextP ctx,
struct intel_batchbuffer *batch);

extern void
-gen9_gpe_media_object_walker(VADriverContextP ctx,
+gen8_gpe_media_object_walker(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch,
struct gpe_media_object_walker_parameter *param);
--
1.9.1
Zhao Yakui
2016-11-18 06:38:26 UTC
Permalink
Post by Xiang, Haihao
This function can be used on GEN8 too
This looks good to me.

Thanks
Post by Xiang, Haihao
---
src/gen9_post_processing.c | 2 +-
src/gen9_vp9_encoder.c | 2 +-
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index 2128697..e9b8f86 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -608,7 +608,7 @@ gen9_run_kernel_media_object_walker(VADriverContextP ctx,
intel_batchbuffer_emit_mi_flush(batch);
gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
- gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
+ gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
gen8_gpe_media_state_flush(ctx, gpe_context, batch);
gen9_gpe_pipeline_end(ctx, gpe_context, batch);
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 0b66565..34d09a6 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1129,7 +1129,7 @@ gen9_run_kernel_media_object_walker(VADriverContextP ctx,
gen9_gpe_mi_store_data_imm(ctx, batch,&mi_store_data_imm);
gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
- gen9_gpe_media_object_walker(ctx, gpe_context, batch, param);
+ gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
gen8_gpe_media_state_flush(ctx, gpe_context, batch);
gen9_gpe_pipeline_end(ctx, gpe_context, batch);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index b20857b..c2d06b2 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2103,7 +2103,7 @@ gen8_gpe_media_object(VADriverContextP ctx,
}
void
-gen9_gpe_media_object_walker(VADriverContextP ctx,
+gen8_gpe_media_object_walker(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch,
struct gpe_media_object_walker_parameter *param)
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 22165da..b58a02c 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -506,7 +506,7 @@ gen8_gpe_media_state_flush(VADriverContextP ctx,
struct intel_batchbuffer *batch);
extern void
-gen9_gpe_media_object_walker(VADriverContextP ctx,
+gen8_gpe_media_object_walker(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
struct intel_batchbuffer *batch,
struct gpe_media_object_walker_parameter *param);
Xiang, Haihao
2016-11-18 05:43:49 UTC
Permalink
gpe_context->curbe.bo always points to the curbe buffer now, and the two functions
can be used on all platforms

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_post_processing.c | 4 ++--
src/gen9_vp9_encoder.c | 20 ++++++++++----------
src/i965_gpe_utils.c | 4 ++--
src/i965_gpe_utils.h | 4 ++--
4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index c0ae791..862a26a 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -653,7 +653,7 @@ gen9_gpe_context_p010_scaling_curbe(VADriverContextP ctx,
(dst_rect == NULL) || (dst_surface == NULL))
return;

- scaling_curbe = gen8p_gpe_context_map_curbe(gpe_context);
+ scaling_curbe = i965_gpe_context_map_curbe(gpe_context);

if (!scaling_curbe)
return;
@@ -696,7 +696,7 @@ gen9_gpe_context_p010_scaling_curbe(VADriverContextP ctx,
}
/* I010 will use LSB */

- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
}

static bool
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 1badd88..a933c15 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1159,7 +1159,7 @@ void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
seq_param = param->pseq_param;
segment_param = param->psegment_param;

- cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ cmd = i965_gpe_context_map_curbe(gpe_context);

if (!cmd)
return;
@@ -1347,7 +1347,7 @@ void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
cmd->dw60.brc_bitstream_size_data_bti = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
cmd->dw61.brc_hfw_data_output_bti = VP9_BTI_BRC_HFW_DATA_G9;

- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
return;
}

@@ -2057,7 +2057,7 @@ void gen9_vp9_set_curbe_me(VADriverContextP ctx,
else
enc_media_state = VP9_MEDIA_STATE_4X_ME;

- me_cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ me_cmd = i965_gpe_context_map_curbe(gpe_context);

if (!me_cmd)
return;
@@ -2117,7 +2117,7 @@ void gen9_vp9_set_curbe_me(VADriverContextP ctx,
me_cmd->dw36.vme_fwd_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L0;
me_cmd->dw37.vme_bdw_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L1;

- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
}

static void
@@ -2398,7 +2398,7 @@ gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
{
vp9_scaling4x_curbe_data_cm *curbe_cmd;

- curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ curbe_cmd = i965_gpe_context_map_curbe(gpe_context);

if (!curbe_cmd)
return;
@@ -2422,7 +2422,7 @@ gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
}

- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
return;
}

@@ -2670,7 +2670,7 @@ gen9_vp9_set_curbe_dys(VADriverContextP ctx,
{
vp9_dys_curbe_data *curbe_cmd;

- curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ curbe_cmd = i965_gpe_context_map_curbe(gpe_context);

if (!curbe_cmd)
return;
@@ -2690,7 +2690,7 @@ gen9_vp9_set_curbe_dys(VADriverContextP ctx,
curbe_cmd->dw17.output_frame_y_bti = VP9_BTI_DYS_OUTPUT_Y;
curbe_cmd->dw18.avs_sample_idx = 0;

- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
}

static void
@@ -3023,7 +3023,7 @@ gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
seg_param = &tmp_seg_param;
}

- curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ curbe_cmd = i965_gpe_context_map_curbe(gpe_context);

if (!curbe_cmd)
return;
@@ -3167,7 +3167,7 @@ gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
curbe_cmd->dw174.cu_record_bti = VP9_BTI_MBENC_CU_RECORDS_G9;
curbe_cmd->dw175.pak_data_bti = VP9_BTI_MBENC_PAK_DATA_G9;

- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
return;
}

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index a6d539b..66609c0 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1660,7 +1660,7 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
}

void *
-gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
+i965_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
{
dri_bo_map(gpe_context->curbe.bo, 1);

@@ -1668,7 +1668,7 @@ gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
}

void
-gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
+i965_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
{
dri_bo_unmap(gpe_context->curbe.bo);
}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index e19e107..66be748 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -437,10 +437,10 @@ extern void
gen9_gpe_reset_binding_table(VADriverContextP ctx,
struct i965_gpe_context *gpe_context);
extern
-void *gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context);
+void *i965_gpe_context_map_curbe(struct i965_gpe_context *gpe_context);

extern
-void gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context);
+void i965_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context);

extern
void gen8_gpe_setup_interface_data(VADriverContextP ctx,
--
1.9.1
Zhao Yakui
2016-11-18 06:28:40 UTC
Permalink
Post by Xiang, Haihao
gpe_context->curbe.bo always points to the curbe buffer now, and the two functions
can be used on all platforms
This looks good to me.

Thanks
Yakui
Post by Xiang, Haihao
---
src/gen9_post_processing.c | 4 ++--
src/gen9_vp9_encoder.c | 20 ++++++++++----------
src/i965_gpe_utils.c | 4 ++--
src/i965_gpe_utils.h | 4 ++--
4 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index c0ae791..862a26a 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -653,7 +653,7 @@ gen9_gpe_context_p010_scaling_curbe(VADriverContextP ctx,
(dst_rect == NULL) || (dst_surface == NULL))
return;
- scaling_curbe = gen8p_gpe_context_map_curbe(gpe_context);
+ scaling_curbe = i965_gpe_context_map_curbe(gpe_context);
if (!scaling_curbe)
return;
@@ -696,7 +696,7 @@ gen9_gpe_context_p010_scaling_curbe(VADriverContextP ctx,
}
/* I010 will use LSB */
- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
}
static bool
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 1badd88..a933c15 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1159,7 +1159,7 @@ void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
seq_param = param->pseq_param;
segment_param = param->psegment_param;
- cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ cmd = i965_gpe_context_map_curbe(gpe_context);
if (!cmd)
return;
@@ -1347,7 +1347,7 @@ void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
cmd->dw60.brc_bitstream_size_data_bti = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
cmd->dw61.brc_hfw_data_output_bti = VP9_BTI_BRC_HFW_DATA_G9;
- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
return;
}
@@ -2057,7 +2057,7 @@ void gen9_vp9_set_curbe_me(VADriverContextP ctx,
else
enc_media_state = VP9_MEDIA_STATE_4X_ME;
- me_cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ me_cmd = i965_gpe_context_map_curbe(gpe_context);
if (!me_cmd)
return;
@@ -2117,7 +2117,7 @@ void gen9_vp9_set_curbe_me(VADriverContextP ctx,
me_cmd->dw36.vme_fwd_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L0;
me_cmd->dw37.vme_bdw_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L1;
- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
}
static void
@@ -2398,7 +2398,7 @@ gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
{
vp9_scaling4x_curbe_data_cm *curbe_cmd;
- curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
if (!curbe_cmd)
return;
@@ -2422,7 +2422,7 @@ gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
}
- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
return;
}
@@ -2670,7 +2670,7 @@ gen9_vp9_set_curbe_dys(VADriverContextP ctx,
{
vp9_dys_curbe_data *curbe_cmd;
- curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
if (!curbe_cmd)
return;
@@ -2690,7 +2690,7 @@ gen9_vp9_set_curbe_dys(VADriverContextP ctx,
curbe_cmd->dw17.output_frame_y_bti = VP9_BTI_DYS_OUTPUT_Y;
curbe_cmd->dw18.avs_sample_idx = 0;
- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
}
static void
@@ -3023,7 +3023,7 @@ gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
seg_param =&tmp_seg_param;
}
- curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context);
+ curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
if (!curbe_cmd)
return;
@@ -3167,7 +3167,7 @@ gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
curbe_cmd->dw174.cu_record_bti = VP9_BTI_MBENC_CU_RECORDS_G9;
curbe_cmd->dw175.pak_data_bti = VP9_BTI_MBENC_PAK_DATA_G9;
- gen8p_gpe_context_unmap_curbe(gpe_context);
+ i965_gpe_context_unmap_curbe(gpe_context);
return;
}
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index a6d539b..66609c0 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1660,7 +1660,7 @@ gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
}
void *
-gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
+i965_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
{
dri_bo_map(gpe_context->curbe.bo, 1);
@@ -1668,7 +1668,7 @@ gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
}
void
-gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
+i965_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
{
dri_bo_unmap(gpe_context->curbe.bo);
}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index e19e107..66be748 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -437,10 +437,10 @@ extern void
gen9_gpe_reset_binding_table(VADriverContextP ctx,
struct i965_gpe_context *gpe_context);
extern
-void *gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context);
+void *i965_gpe_context_map_curbe(struct i965_gpe_context *gpe_context);
extern
-void gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context);
+void i965_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context);
extern
void gen8_gpe_setup_interface_data(VADriverContextP ctx,
Xiang, Haihao
2016-11-18 05:43:53 UTC
Permalink
This function can be used on GEN8 too

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_vdenc.c | 4 ++--
src/gen9_vp9_encoder.c | 4 ++--
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 87e587a..8cddc41 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -1501,7 +1501,7 @@ gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
mi_store_data_imm_params.offset = 0;
mi_store_data_imm_params.dw0 = (1 << 6);
- gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
+ gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);

/* Store HUC_STATUS2 */
memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
@@ -2363,7 +2363,7 @@ gen9_vdenc_huc_brc_update(VADriverContextP ctx,
mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
mi_store_data_imm_params.offset = 4;
mi_store_data_imm_params.dw0 = (1 << 31);
- gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
+ gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
}

static void
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index a617eb0..4b80716 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1087,7 +1087,7 @@ gen9_run_kernel_media_object(VADriverContextP ctx,
mi_store_data_imm.bo = status_buffer->bo;
mi_store_data_imm.offset = status_buffer->media_index_offset;
mi_store_data_imm.dw0 = media_function;
- gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
+ gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);

intel_batchbuffer_emit_mi_flush(batch);
gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
@@ -1126,7 +1126,7 @@ gen9_run_kernel_media_object_walker(VADriverContextP ctx,
mi_store_data_imm.bo = status_buffer->bo;
mi_store_data_imm.offset = status_buffer->media_index_offset;
mi_store_data_imm.dw0 = media_function;
- gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
+ gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);

gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 9ca4196..85cdd50 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1512,7 +1512,7 @@ gen9_gpe_mi_flush_dw(VADriverContextP ctx,
}

void
-gen9_gpe_mi_store_data_imm(VADriverContextP ctx,
+gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_data_imm_parameter *params)
{
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 323af74..e6cc3dc 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -366,7 +366,7 @@ void gen9_gpe_mi_flush_dw(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_flush_dw_parameter *params);

-void gen9_gpe_mi_store_data_imm(VADriverContextP ctx,
+void gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_data_imm_parameter *params);
--
1.9.1
Zhao Yakui
2016-11-18 06:39:08 UTC
Permalink
Post by Xiang, Haihao
This function can be used on GEN8 too
This looks good to me.

Thanks
Post by Xiang, Haihao
---
src/gen9_vdenc.c | 4 ++--
src/gen9_vp9_encoder.c | 4 ++--
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 87e587a..8cddc41 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -1501,7 +1501,7 @@ gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
mi_store_data_imm_params.offset = 0;
mi_store_data_imm_params.dw0 = (1<< 6);
- gen9_gpe_mi_store_data_imm(ctx, batch,&mi_store_data_imm_params);
+ gen8_gpe_mi_store_data_imm(ctx, batch,&mi_store_data_imm_params);
/* Store HUC_STATUS2 */
memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
@@ -2363,7 +2363,7 @@ gen9_vdenc_huc_brc_update(VADriverContextP ctx,
mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
mi_store_data_imm_params.offset = 4;
mi_store_data_imm_params.dw0 = (1<< 31);
- gen9_gpe_mi_store_data_imm(ctx, batch,&mi_store_data_imm_params);
+ gen8_gpe_mi_store_data_imm(ctx, batch,&mi_store_data_imm_params);
}
static void
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index a617eb0..4b80716 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -1087,7 +1087,7 @@ gen9_run_kernel_media_object(VADriverContextP ctx,
mi_store_data_imm.bo = status_buffer->bo;
mi_store_data_imm.offset = status_buffer->media_index_offset;
mi_store_data_imm.dw0 = media_function;
- gen9_gpe_mi_store_data_imm(ctx, batch,&mi_store_data_imm);
+ gen8_gpe_mi_store_data_imm(ctx, batch,&mi_store_data_imm);
intel_batchbuffer_emit_mi_flush(batch);
gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
@@ -1126,7 +1126,7 @@ gen9_run_kernel_media_object_walker(VADriverContextP ctx,
mi_store_data_imm.bo = status_buffer->bo;
mi_store_data_imm.offset = status_buffer->media_index_offset;
mi_store_data_imm.dw0 = media_function;
- gen9_gpe_mi_store_data_imm(ctx, batch,&mi_store_data_imm);
+ gen8_gpe_mi_store_data_imm(ctx, batch,&mi_store_data_imm);
gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 9ca4196..85cdd50 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1512,7 +1512,7 @@ gen9_gpe_mi_flush_dw(VADriverContextP ctx,
}
void
-gen9_gpe_mi_store_data_imm(VADriverContextP ctx,
+gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_data_imm_parameter *params)
{
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 323af74..e6cc3dc 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -366,7 +366,7 @@ void gen9_gpe_mi_flush_dw(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_flush_dw_parameter *params);
-void gen9_gpe_mi_store_data_imm(VADriverContextP ctx,
+void gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_data_imm_parameter *params);
Xiang, Haihao
2016-11-18 05:43:54 UTC
Permalink
This function can be used on GEN8 too

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_vdenc.c | 10 +++++-----
src/gen9_vp9_encoder.c | 4 ++--
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 8cddc41..35373f3 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -1875,7 +1875,7 @@ gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,

memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
}

static void
@@ -2287,7 +2287,7 @@ gen9_vdenc_huc_brc_update(VADriverContextP ctx,

memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);

if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
@@ -2350,7 +2350,7 @@ gen9_vdenc_huc_brc_update(VADriverContextP ctx,

memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);

/* Store HUC_STATUS */
memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
@@ -3407,7 +3407,7 @@ gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,

memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
}

static void
@@ -3502,7 +3502,7 @@ gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encod
int i;

memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);

memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 4b80716..98ae3ca 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -5673,7 +5673,7 @@ gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *enc
status_buffer = &(vp9_state->status_buffer);

memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);

memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
mi_store_reg_mem_param.bo = status_buffer->bo;
@@ -5705,7 +5705,7 @@ gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *enc
status_buffer->vp9_image_ctrl_reg_offset;
gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);

- gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
+ gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);

return;
}
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 85cdd50..2f328f9 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1479,7 +1479,7 @@ i965_unmap_gpe_resource(struct i965_gpe_resource *res)
}

void
-gen9_gpe_mi_flush_dw(VADriverContextP ctx,
+gen8_gpe_mi_flush_dw(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_flush_dw_parameter *params)
{
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index e6cc3dc..517f353 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -362,7 +362,7 @@ void *i965_map_gpe_resource(struct i965_gpe_resource *res);

void i965_unmap_gpe_resource(struct i965_gpe_resource *res);

-void gen9_gpe_mi_flush_dw(VADriverContextP ctx,
+void gen8_gpe_mi_flush_dw(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_flush_dw_parameter *params);
--
1.9.1
Xiang, Haihao
2016-11-18 05:43:55 UTC
Permalink
This function can be used on GEN8 too

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_vdenc.c | 10 +++++-----
src/gen9_vp9_encoder.c | 10 +++++-----
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 35373f3..c8cdca0 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -1508,7 +1508,7 @@ gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
mi_store_register_mem_params.offset = 4;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
}

static void
@@ -2356,7 +2356,7 @@ gen9_vdenc_huc_brc_update(VADriverContextP ctx,
memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);

/* Write HUC_STATUS mask (1 << 31) */
memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
@@ -3508,19 +3508,19 @@ gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encod
mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);

/* Update DMEM buffer for BRC Update */
for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);

mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
}
}

diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 98ae3ca..74d0d2f 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -5679,31 +5679,31 @@ gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *enc
mi_store_reg_mem_param.bo = status_buffer->bo;
mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);

mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
mi_store_reg_mem_param.offset = 0;
mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);

/* Read HCP Image status */
mi_store_reg_mem_param.bo = status_buffer->bo;
mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
mi_store_reg_mem_param.mmio_offset =
status_buffer->vp9_image_mask_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);

mi_store_reg_mem_param.bo = status_buffer->bo;
mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
mi_store_reg_mem_param.mmio_offset =
status_buffer->vp9_image_ctrl_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);

mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
mi_store_reg_mem_param.offset = 4;
mi_store_reg_mem_param.mmio_offset =
status_buffer->vp9_image_ctrl_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);

gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 2f328f9..4c0384e 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1535,7 +1535,7 @@ gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
}

void
-gen9_gpe_mi_store_register_mem(VADriverContextP ctx,
+gen8_gpe_mi_store_register_mem(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_register_mem_parameter *params)
{
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 517f353..1a3210b 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -370,7 +370,7 @@ void gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_data_imm_parameter *params);

-void gen9_gpe_mi_store_register_mem(VADriverContextP ctx,
+void gen8_gpe_mi_store_register_mem(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_register_mem_parameter *params);
--
1.9.1
Zhao Yakui
2016-11-18 06:42:26 UTC
Permalink
Post by Xiang, Haihao
This function can be used on GEN8 too
The patch looks good to me.

But please change the title to "gen8_gpe_mi_store_register_mem" instead
of gen8_gpe_mi_load_register_mem

Thanks.
Post by Xiang, Haihao
---
src/gen9_vdenc.c | 10 +++++-----
src/gen9_vp9_encoder.c | 10 +++++-----
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 35373f3..c8cdca0 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -1508,7 +1508,7 @@ gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
mi_store_register_mem_params.offset = 4;
- gen9_gpe_mi_store_register_mem(ctx, batch,&mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch,&mi_store_register_mem_params);
}
static void
@@ -2356,7 +2356,7 @@ gen9_vdenc_huc_brc_update(VADriverContextP ctx,
memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
- gen9_gpe_mi_store_register_mem(ctx, batch,&mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch,&mi_store_register_mem_params);
/* Write HUC_STATUS mask (1<< 31) */
memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
@@ -3508,19 +3508,19 @@ gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encod
mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch,&mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch,&mi_store_register_mem_params);
/* Update DMEM buffer for BRC Update */
for (i = 0; i< NUM_OF_BRC_PAK_PASSES; i++) {
mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
- gen9_gpe_mi_store_register_mem(ctx, batch,&mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch,&mi_store_register_mem_params);
mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
- gen9_gpe_mi_store_register_mem(ctx, batch,&mi_store_register_mem_params);
+ gen8_gpe_mi_store_register_mem(ctx, batch,&mi_store_register_mem_params);
}
}
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 98ae3ca..74d0d2f 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -5679,31 +5679,31 @@ gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *enc
mi_store_reg_mem_param.bo = status_buffer->bo;
mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch,&mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch,&mi_store_reg_mem_param);
mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
mi_store_reg_mem_param.offset = 0;
mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch,&mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch,&mi_store_reg_mem_param);
/* Read HCP Image status */
mi_store_reg_mem_param.bo = status_buffer->bo;
mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
mi_store_reg_mem_param.mmio_offset =
status_buffer->vp9_image_mask_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch,&mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch,&mi_store_reg_mem_param);
mi_store_reg_mem_param.bo = status_buffer->bo;
mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
mi_store_reg_mem_param.mmio_offset =
status_buffer->vp9_image_ctrl_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch,&mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch,&mi_store_reg_mem_param);
mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
mi_store_reg_mem_param.offset = 4;
mi_store_reg_mem_param.mmio_offset =
status_buffer->vp9_image_ctrl_reg_offset;
- gen9_gpe_mi_store_register_mem(ctx, batch,&mi_store_reg_mem_param);
+ gen8_gpe_mi_store_register_mem(ctx, batch,&mi_store_reg_mem_param);
gen8_gpe_mi_flush_dw(ctx, batch,&mi_flush_dw_param);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 2f328f9..4c0384e 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1535,7 +1535,7 @@ gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
}
void
-gen9_gpe_mi_store_register_mem(VADriverContextP ctx,
+gen8_gpe_mi_store_register_mem(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_register_mem_parameter *params)
{
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 517f353..1a3210b 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -370,7 +370,7 @@ void gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_data_imm_parameter *params);
-void gen9_gpe_mi_store_register_mem(VADriverContextP ctx,
+void gen8_gpe_mi_store_register_mem(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_store_register_mem_parameter *params);
Xiang, Haihao
2016-11-18 07:03:18 UTC
Permalink
Thanks for catching the typo.
Post by Zhao Yakui
Post by Xiang, Haihao
This function can be used on GEN8 too
The patch looks good to me.
But please change the title to "gen8_gpe_mi_store_register_mem" instead
of gen8_gpe_mi_load_register_mem
Thanks.
Post by Xiang, Haihao
---
  src/gen9_vdenc.c       | 10 +++++-----
  src/gen9_vp9_encoder.c | 10 +++++-----
  src/i965_gpe_utils.c   |  2 +-
  src/i965_gpe_utils.h   |  2 +-
  4 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 35373f3..c8cdca0 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -1508,7 +1508,7 @@
gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
      mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
      mi_store_register_mem_params.bo = vdenc_context-
Post by Xiang, Haihao
huc_status2_res.bo;
      mi_store_register_mem_params.offset = 4;
-    gen9_gpe_mi_store_register_mem(ctx,
batch,&mi_store_register_mem_params);
+    gen8_gpe_mi_store_register_mem(ctx,
batch,&mi_store_register_mem_params);
  }
  static void
@@ -2356,7 +2356,7 @@ gen9_vdenc_huc_brc_update(VADriverContextP ctx,
      memset(&mi_store_register_mem_params, 0,
sizeof(mi_store_register_mem_params));
      mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
      mi_store_register_mem_params.bo = vdenc_context-
Post by Xiang, Haihao
huc_status_res.bo;
-    gen9_gpe_mi_store_register_mem(ctx,
batch,&mi_store_register_mem_params);
+    gen8_gpe_mi_store_register_mem(ctx,
batch,&mi_store_register_mem_params);
      /* Write HUC_STATUS mask (1<<  31) */
      memset(&mi_store_data_imm_params, 0,
sizeof(mi_store_data_imm_params));
@@ -3508,19 +3508,19 @@ gen9_vdenc_read_status(VADriverContextP
ctx, struct intel_encoder_context *encod
      mi_store_register_mem_params.mmio_offset =
MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is
used */
      mi_store_register_mem_params.bo = vdenc_context-
Post by Xiang, Haihao
status_bffuer.res.bo;
      mi_store_register_mem_params.offset = base_offset +
vdenc_context->status_bffuer.bytes_per_frame_offset;
-    gen9_gpe_mi_store_register_mem(ctx,
batch,&mi_store_register_mem_params);
+    gen8_gpe_mi_store_register_mem(ctx,
batch,&mi_store_register_mem_params);
      /* Update DMEM buffer for BRC Update */
      for (i = 0; i<  NUM_OF_BRC_PAK_PASSES; i++) {
          mi_store_register_mem_params.mmio_offset =
MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is
used */
          mi_store_register_mem_params.bo = vdenc_context-
Post by Xiang, Haihao
brc_update_dmem_res[i].bo;
          mi_store_register_mem_params.offset = 5 *
sizeof(uint32_t);
-        gen9_gpe_mi_store_register_mem(ctx,
batch,&mi_store_register_mem_params);
+        gen8_gpe_mi_store_register_mem(ctx,
batch,&mi_store_register_mem_params);
          mi_store_register_mem_params.mmio_offset =
MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
          mi_store_register_mem_params.bo = vdenc_context-
Post by Xiang, Haihao
brc_update_dmem_res[i].bo;
          mi_store_register_mem_params.offset = 7 *
sizeof(uint32_t);
-        gen9_gpe_mi_store_register_mem(ctx,
batch,&mi_store_register_mem_params);
+        gen8_gpe_mi_store_register_mem(ctx,
batch,&mi_store_register_mem_params);
      }
  }
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 98ae3ca..74d0d2f 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -5679,31 +5679,31 @@ gen9_vp9_read_mfc_status(VADriverContextP
ctx, struct intel_encoder_context *enc
      mi_store_reg_mem_param.bo = status_buffer->bo;
      mi_store_reg_mem_param.offset = status_buffer-
Post by Xiang, Haihao
bs_byte_count_offset;
      mi_store_reg_mem_param.mmio_offset = status_buffer-
Post by Xiang, Haihao
vp9_bs_frame_reg_offset;
-    gen9_gpe_mi_store_register_mem(ctx,
batch,&mi_store_reg_mem_param);
+    gen8_gpe_mi_store_register_mem(ctx,
batch,&mi_store_reg_mem_param);
      mi_store_reg_mem_param.bo = pak_context-
Post by Xiang, Haihao
res_brc_bitstream_size_buffer.bo;
      mi_store_reg_mem_param.offset = 0;
      mi_store_reg_mem_param.mmio_offset = status_buffer-
Post by Xiang, Haihao
vp9_bs_frame_reg_offset;
-    gen9_gpe_mi_store_register_mem(ctx,
batch,&mi_store_reg_mem_param);
+    gen8_gpe_mi_store_register_mem(ctx,
batch,&mi_store_reg_mem_param);
      /* Read HCP Image status */
      mi_store_reg_mem_param.bo = status_buffer->bo;
      mi_store_reg_mem_param.offset = status_buffer-
Post by Xiang, Haihao
image_status_mask_offset;
      mi_store_reg_mem_param.mmio_offset =
                                 status_buffer-
Post by Xiang, Haihao
vp9_image_mask_reg_offset;
-    gen9_gpe_mi_store_register_mem(ctx,
batch,&mi_store_reg_mem_param);
+    gen8_gpe_mi_store_register_mem(ctx,
batch,&mi_store_reg_mem_param);
      mi_store_reg_mem_param.bo = status_buffer->bo;
      mi_store_reg_mem_param.offset = status_buffer-
Post by Xiang, Haihao
image_status_ctrl_offset;
      mi_store_reg_mem_param.mmio_offset =
                                 status_buffer-
Post by Xiang, Haihao
vp9_image_ctrl_reg_offset;
-    gen9_gpe_mi_store_register_mem(ctx,
batch,&mi_store_reg_mem_param);
+    gen8_gpe_mi_store_register_mem(ctx,
batch,&mi_store_reg_mem_param);
      mi_store_reg_mem_param.bo = pak_context-
Post by Xiang, Haihao
res_brc_bitstream_size_buffer.bo;
      mi_store_reg_mem_param.offset = 4;
      mi_store_reg_mem_param.mmio_offset =
                                 status_buffer-
Post by Xiang, Haihao
vp9_image_ctrl_reg_offset;
-    gen9_gpe_mi_store_register_mem(ctx,
batch,&mi_store_reg_mem_param);
+    gen8_gpe_mi_store_register_mem(ctx,
batch,&mi_store_reg_mem_param);
      gen8_gpe_mi_flush_dw(ctx, batch,&mi_flush_dw_param);
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 2f328f9..4c0384e 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1535,7 +1535,7 @@ gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
  }
  void
-gen9_gpe_mi_store_register_mem(VADriverContextP ctx,
+gen8_gpe_mi_store_register_mem(VADriverContextP ctx,
                                 struct intel_batchbuffer *batch,
                                 struct
gpe_mi_store_register_mem_parameter *params)
  {
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 517f353..1a3210b 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -370,7 +370,7 @@ void
gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
                                  struct intel_batchbuffer *batch,
                                  struct
gpe_mi_store_data_imm_parameter *params);
-void gen9_gpe_mi_store_register_mem(VADriverContextP ctx,
+void gen8_gpe_mi_store_register_mem(VADriverContextP ctx,
                                      struct intel_batchbuffer
*batch,
                                      struct
gpe_mi_store_register_mem_parameter *params);
Xiang, Haihao
2016-11-18 05:43:56 UTC
Permalink
Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 18 +++++++++++++++++-
src/i965_gpe_utils.h | 3 ++-
2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 4c0384e..d1b1941 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2143,7 +2143,6 @@ gen8_gpe_media_object_walker(VADriverContextP ctx,
ADVANCE_BATCH(batch);
}

-
void
intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter *kernel_walker_param,
struct gpe_media_object_walker_parameter *walker_param)
@@ -2190,3 +2189,20 @@ intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_para
walker_param->local_inner_loop_unit.y = 1;
}
}
+
+void
+gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context)
+{
+ unsigned int *binding_table;
+ unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
+ int i;
+
+ dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+ binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
+
+ for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
+ *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN8;
+ }
+
+ dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 1a3210b..076f584 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -511,7 +511,6 @@ gen8_gpe_media_object_walker(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_media_object_walker_parameter *param);

-
struct intel_vpp_kernel_walker_parameter
{
unsigned int use_scoreboard;
@@ -524,5 +523,7 @@ struct intel_vpp_kernel_walker_parameter
extern void
intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter *kernel_walker_param,
struct gpe_media_object_walker_parameter *walker_param);
+extern void
+gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context);

#endif /* _I965_GPE_UTILS_H_ */
--
1.9.1
Zhao Yakui
2016-11-18 06:43:39 UTC
Permalink
On 11/18/2016 01:43 PM, Xiang, Haihao wrote:

This looks good to me.

Thanks
Post by Xiang, Haihao
---
src/i965_gpe_utils.c | 18 +++++++++++++++++-
src/i965_gpe_utils.h | 3 ++-
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 4c0384e..d1b1941 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2143,7 +2143,6 @@ gen8_gpe_media_object_walker(VADriverContextP ctx,
ADVANCE_BATCH(batch);
}
-
void
intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter *kernel_walker_param,
struct gpe_media_object_walker_parameter *walker_param)
@@ -2190,3 +2189,20 @@ intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_para
walker_param->local_inner_loop_unit.y = 1;
}
}
+
+void
+gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context)
+{
+ unsigned int *binding_table;
+ unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
+ int i;
+
+ dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+ binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
+
+ for (i = 0; i< gpe_context->surface_state_binding_table.max_entries; i++) {
+ *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN8;
+ }
+
+ dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 1a3210b..076f584 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -511,7 +511,6 @@ gen8_gpe_media_object_walker(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_media_object_walker_parameter *param);
-
struct intel_vpp_kernel_walker_parameter
{
unsigned int use_scoreboard;
@@ -524,5 +523,7 @@ struct intel_vpp_kernel_walker_parameter
extern void
intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter *kernel_walker_param,
struct gpe_media_object_walker_parameter *walker_param);
+extern void
+gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context);
#endif /* _I965_GPE_UTILS_H_ */
Xiang, Haihao
2016-11-18 05:43:52 UTC
Permalink
This function can be used on GEN8 too

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/gen9_vdenc.c | 2 +-
src/gen9_vp9_encoder.c | 6 +++---
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 2bc15b7..87e587a 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -3459,7 +3459,7 @@ gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
- gen9_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
+ gen8_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
}

gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 34d09a6..a617eb0 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -5622,7 +5622,7 @@ gen9_vp9_pak_picture_level(VADriverContextP ctx,
second_level_batch.is_second_level = 1;
second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;

- gen9_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
+ gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);

if (pic_param->pic_flags.bits.segmentation_enabled &&
seg_param)
@@ -5644,13 +5644,13 @@ gen9_vp9_pak_picture_level(VADriverContextP ctx,
second_level_batch.offset = 0;
second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;

- gen9_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
+ gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);

/* PAK_OBJECT */
second_level_batch.is_second_level = 1;
second_level_batch.offset = 0;
second_level_batch.bo = pak_context->res_mb_code_surface.bo;
- gen9_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
+ gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);

return;
}
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index c2d06b2..9ca4196 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1611,7 +1611,7 @@ gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
}

void
-gen9_gpe_mi_batch_buffer_start(VADriverContextP ctx,
+gen8_gpe_mi_batch_buffer_start(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_batch_buffer_start_parameter *params)
{
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index b58a02c..323af74 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -394,7 +394,7 @@ void gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_conditional_batch_buffer_end_parameter *params);

-void gen9_gpe_mi_batch_buffer_start(VADriverContextP ctx,
+void gen8_gpe_mi_batch_buffer_start(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_batch_buffer_start_parameter *params);
--
1.9.1
Zhao Yakui
2016-11-18 06:38:46 UTC
Permalink
Post by Xiang, Haihao
This function can be used on GEN8 too
This looks good to me.
Post by Xiang, Haihao
---
src/gen9_vdenc.c | 2 +-
src/gen9_vp9_encoder.c | 6 +++---
src/i965_gpe_utils.c | 2 +-
src/i965_gpe_utils.h | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 2bc15b7..87e587a 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -3459,7 +3459,7 @@ gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
- gen9_gpe_mi_batch_buffer_start(ctx, batch,&mi_batch_buffer_start_params);
+ gen8_gpe_mi_batch_buffer_start(ctx, batch,&mi_batch_buffer_start_params);
}
gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index 34d09a6..a617eb0 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -5622,7 +5622,7 @@ gen9_vp9_pak_picture_level(VADriverContextP ctx,
second_level_batch.is_second_level = 1;
second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
- gen9_gpe_mi_batch_buffer_start(ctx, batch,&second_level_batch);
+ gen8_gpe_mi_batch_buffer_start(ctx, batch,&second_level_batch);
if (pic_param->pic_flags.bits.segmentation_enabled&&
seg_param)
@@ -5644,13 +5644,13 @@ gen9_vp9_pak_picture_level(VADriverContextP ctx,
second_level_batch.offset = 0;
second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
- gen9_gpe_mi_batch_buffer_start(ctx, batch,&second_level_batch);
+ gen8_gpe_mi_batch_buffer_start(ctx, batch,&second_level_batch);
/* PAK_OBJECT */
second_level_batch.is_second_level = 1;
second_level_batch.offset = 0;
second_level_batch.bo = pak_context->res_mb_code_surface.bo;
- gen9_gpe_mi_batch_buffer_start(ctx, batch,&second_level_batch);
+ gen8_gpe_mi_batch_buffer_start(ctx, batch,&second_level_batch);
return;
}
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index c2d06b2..9ca4196 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -1611,7 +1611,7 @@ gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
}
void
-gen9_gpe_mi_batch_buffer_start(VADriverContextP ctx,
+gen8_gpe_mi_batch_buffer_start(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_batch_buffer_start_parameter *params)
{
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index b58a02c..323af74 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -394,7 +394,7 @@ void gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_conditional_batch_buffer_end_parameter *params);
-void gen9_gpe_mi_batch_buffer_start(VADriverContextP ctx,
+void gen8_gpe_mi_batch_buffer_start(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_batch_buffer_start_parameter *params);
Xiang, Haihao
2016-11-18 05:43:58 UTC
Permalink
Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 16 ++++++++++++++++
src/i965_gpe_utils.h | 19 +++++++++++++++++++
src/intel_driver.h | 6 ++++++
3 files changed, 41 insertions(+)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 058e6b6..46960e6 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2423,3 +2423,19 @@ gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,

dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
}
+
+void
+gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
+ struct intel_batchbuffer *batch,
+ struct gpe_mi_conditional_batch_buffer_end_parameter *param)
+{
+ __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
+ (1 << 21) |
+ (4 - 2))); /* Always use PPGTT */
+ __OUT_BATCH(batch, param->compare_data);
+ __OUT_RELOC64(batch,
+ param->bo,
+ I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
+ param->offset);
+
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index cbf3b05..3e10cf8 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -73,6 +73,20 @@ struct gpe_dynamic_state_parameter
unsigned int sampler_offset;
};

+#define PIPE_CONTROL_FLUSH_NONE 0
+#define PIPE_CONTROL_FLUSH_WRITE_CACHE 1
+#define PIPE_CONTROL_FLUSH_READ_CACHE 2
+
+struct gpe_pipe_control_parameter
+{
+ dri_bo *bo;
+ unsigned int offset;
+ unsigned int flush_mode;
+ unsigned int disable_cs_stall;
+ unsigned int dw0;
+ unsigned int dw1;
+};
+
struct i965_gpe_context
{
struct {
@@ -531,4 +545,9 @@ gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
struct i965_gpe_surface *gpe_surface,
int index);

+extern void
+gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
+ struct intel_batchbuffer *batch,
+ struct gpe_mi_conditional_batch_buffer_end_parameter *param);
+
#endif /* _I965_GPE_UTILS_H_ */
diff --git a/src/intel_driver.h b/src/intel_driver.h
index dcdc03b..4ff707d 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -76,12 +76,18 @@
#define CMD_PIPE_CONTROL_IS_FLUSH (1 << 11)
#define CMD_PIPE_CONTROL_TC_FLUSH (1 << 10)
#define CMD_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8)
+#define CMD_PIPE_CONTROL_FLUSH_ENABLE (1 << 7)
#define CMD_PIPE_CONTROL_DC_FLUSH (1 << 5)
#define CMD_PIPE_CONTROL_GLOBAL_GTT (1 << 2)
#define CMD_PIPE_CONTROL_LOCAL_PGTT (0 << 2)
#define CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
#define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)

+#define CMD_PIPE_CONTROL_GLOBAL_GTT_GEN8 (1 << 24)
+#define CMD_PIPE_CONTROL_LOCAL_PGTT_GEN8 (0 << 24)
+#define CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8 (1 << 4)
+#define CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8 (1 << 3)
+#define CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8 (1 << 2)

struct intel_batchbuffer;
--
1.9.1
Zhao Yakui
2016-11-18 07:05:20 UTC
Permalink
This looks good to me.

Thanks
Post by Xiang, Haihao
---
src/i965_gpe_utils.c | 16 ++++++++++++++++
src/i965_gpe_utils.h | 19 +++++++++++++++++++
src/intel_driver.h | 6 ++++++
3 files changed, 41 insertions(+)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 058e6b6..46960e6 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2423,3 +2423,19 @@ gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
}
+
+void
+gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
+ struct intel_batchbuffer *batch,
+ struct gpe_mi_conditional_batch_buffer_end_parameter *param)
+{
+ __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
+ (1<< 21) |
+ (4 - 2))); /* Always use PPGTT */
+ __OUT_BATCH(batch, param->compare_data);
+ __OUT_RELOC64(batch,
+ param->bo,
+ I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
+ param->offset);
+
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index cbf3b05..3e10cf8 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -73,6 +73,20 @@ struct gpe_dynamic_state_parameter
unsigned int sampler_offset;
};
+#define PIPE_CONTROL_FLUSH_NONE 0
+#define PIPE_CONTROL_FLUSH_WRITE_CACHE 1
+#define PIPE_CONTROL_FLUSH_READ_CACHE 2
+
+struct gpe_pipe_control_parameter
+{
+ dri_bo *bo;
+ unsigned int offset;
+ unsigned int flush_mode;
+ unsigned int disable_cs_stall;
+ unsigned int dw0;
+ unsigned int dw1;
+};
+
struct i965_gpe_context
{
struct {
@@ -531,4 +545,9 @@ gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
struct i965_gpe_surface *gpe_surface,
int index);
+extern void
+gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
+ struct intel_batchbuffer *batch,
+ struct gpe_mi_conditional_batch_buffer_end_parameter *param);
+
#endif /* _I965_GPE_UTILS_H_ */
diff --git a/src/intel_driver.h b/src/intel_driver.h
index dcdc03b..4ff707d 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -76,12 +76,18 @@
#define CMD_PIPE_CONTROL_IS_FLUSH (1<< 11)
#define CMD_PIPE_CONTROL_TC_FLUSH (1<< 10)
#define CMD_PIPE_CONTROL_NOTIFY_ENABLE (1<< 8)
+#define CMD_PIPE_CONTROL_FLUSH_ENABLE (1<< 7)
#define CMD_PIPE_CONTROL_DC_FLUSH (1<< 5)
#define CMD_PIPE_CONTROL_GLOBAL_GTT (1<< 2)
#define CMD_PIPE_CONTROL_LOCAL_PGTT (0<< 2)
#define CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD (1<< 1)
#define CMD_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<< 0)
+#define CMD_PIPE_CONTROL_GLOBAL_GTT_GEN8 (1<< 24)
+#define CMD_PIPE_CONTROL_LOCAL_PGTT_GEN8 (0<< 24)
+#define CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8 (1<< 4)
+#define CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8 (1<< 3)
+#define CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8 (1<< 2)
struct intel_batchbuffer;
Xiang, Haihao
2016-11-18 05:43:59 UTC
Permalink
Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++
src/i965_gpe_utils.h | 5 ++++
2 files changed, 78 insertions(+)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 46960e6..ca8c86a 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2439,3 +2439,76 @@ gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
param->offset);

}
+
+void
+gen8_gpe_pipe_control(VADriverContextP ctx,
+ struct intel_batchbuffer *batch,
+ struct gpe_pipe_control_parameter *param)
+{
+ int render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
+ int dc_flush_enable = 0;
+ int state_cache_invalidation_enable = 0;
+ int constant_cache_invalidation_enable = 0;
+ int vf_cache_invalidation_enable = 0;
+ int instruction_cache_invalidation_enable = 0;
+ int post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
+ int use_global_gtt = CMD_PIPE_CONTROL_GLOBAL_GTT_GEN8;
+ int cs_stall_enable = !param->disable_cs_stall;
+
+ switch (param->flush_mode) {
+ case PIPE_CONTROL_FLUSH_WRITE_CACHE:
+ render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
+ dc_flush_enable = CMD_PIPE_CONTROL_DC_FLUSH;
+ break;
+
+ case PIPE_CONTROL_FLUSH_READ_CACHE:
+ render_target_cache_flush_enable = 0;
+ state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
+ constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
+ vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
+ instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
+ break;
+
+ case PIPE_CONTROL_FLUSH_NONE:
+ default:
+ render_target_cache_flush_enable = 0;
+ break;
+ }
+
+ if (param->bo) {
+ post_sync_operation = CMD_PIPE_CONTROL_WRITE_QWORD;
+ use_global_gtt = CMD_PIPE_CONTROL_LOCAL_PGTT_GEN8;
+ } else {
+ post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
+ render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
+ state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
+ constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
+ vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
+ instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
+ }
+
+ __OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2));
+ __OUT_BATCH(batch, (render_target_cache_flush_enable |
+ dc_flush_enable |
+ state_cache_invalidation_enable |
+ constant_cache_invalidation_enable |
+ vf_cache_invalidation_enable |
+ instruction_cache_invalidation_enable |
+ post_sync_operation |
+ use_global_gtt |
+ cs_stall_enable |
+ CMD_PIPE_CONTROL_FLUSH_ENABLE));
+
+ if (param->bo)
+ __OUT_RELOC64(batch,
+ param->bo,
+ I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_RENDER,
+ param->offset);
+ else {
+ __OUT_BATCH(batch, 0);
+ __OUT_BATCH(batch, 0);
+ }
+
+ __OUT_BATCH(batch, param->dw0);
+ __OUT_BATCH(batch, param->dw1);
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 3e10cf8..8701150 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -550,4 +550,9 @@ gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_conditional_batch_buffer_end_parameter *param);

+extern void
+gen8_gpe_pipe_control(VADriverContextP ctx,
+ struct intel_batchbuffer *batch,
+ struct gpe_pipe_control_parameter *param);
+
#endif /* _I965_GPE_UTILS_H_ */
--
1.9.1
Zhao Yakui
2016-11-18 07:04:54 UTC
Permalink
This looks good to me.

Thanks
Post by Xiang, Haihao
---
src/i965_gpe_utils.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++
src/i965_gpe_utils.h | 5 ++++
2 files changed, 78 insertions(+)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index 46960e6..ca8c86a 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2439,3 +2439,76 @@ gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
param->offset);
}
+
+void
+gen8_gpe_pipe_control(VADriverContextP ctx,
+ struct intel_batchbuffer *batch,
+ struct gpe_pipe_control_parameter *param)
+{
+ int render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
+ int dc_flush_enable = 0;
+ int state_cache_invalidation_enable = 0;
+ int constant_cache_invalidation_enable = 0;
+ int vf_cache_invalidation_enable = 0;
+ int instruction_cache_invalidation_enable = 0;
+ int post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
+ int use_global_gtt = CMD_PIPE_CONTROL_GLOBAL_GTT_GEN8;
+ int cs_stall_enable = !param->disable_cs_stall;
+
+ switch (param->flush_mode) {
+ render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
+ dc_flush_enable = CMD_PIPE_CONTROL_DC_FLUSH;
+ break;
+
+ render_target_cache_flush_enable = 0;
+ state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
+ constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
+ vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
+ instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
+ break;
+
+ render_target_cache_flush_enable = 0;
+ break;
+ }
+
+ if (param->bo) {
+ post_sync_operation = CMD_PIPE_CONTROL_WRITE_QWORD;
+ use_global_gtt = CMD_PIPE_CONTROL_LOCAL_PGTT_GEN8;
+ } else {
+ post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
+ render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
+ state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
+ constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
+ vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
+ instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
+ }
+
+ __OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2));
+ __OUT_BATCH(batch, (render_target_cache_flush_enable |
+ dc_flush_enable |
+ state_cache_invalidation_enable |
+ constant_cache_invalidation_enable |
+ vf_cache_invalidation_enable |
+ instruction_cache_invalidation_enable |
+ post_sync_operation |
+ use_global_gtt |
+ cs_stall_enable |
+ CMD_PIPE_CONTROL_FLUSH_ENABLE));
+
+ if (param->bo)
+ __OUT_RELOC64(batch,
+ param->bo,
+ I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_RENDER,
+ param->offset);
+ else {
+ __OUT_BATCH(batch, 0);
+ __OUT_BATCH(batch, 0);
+ }
+
+ __OUT_BATCH(batch, param->dw0);
+ __OUT_BATCH(batch, param->dw1);
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 3e10cf8..8701150 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -550,4 +550,9 @@ gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
struct intel_batchbuffer *batch,
struct gpe_mi_conditional_batch_buffer_end_parameter *param);
+extern void
+gen8_gpe_pipe_control(VADriverContextP ctx,
+ struct intel_batchbuffer *batch,
+ struct gpe_pipe_control_parameter *param);
+
#endif /* _I965_GPE_UTILS_H_ */
Xiang, Haihao
2016-11-18 05:43:57 UTC
Permalink
v2: Add support for override_offset, clean up the code for 2D surface.

Signed-off-by: Xiang, Haihao <***@intel.com>
---
src/i965_gpe_utils.c | 217 +++++++++++++++++++++++++++++++++++++++++++++++++++
src/i965_gpe_utils.h | 5 ++
2 files changed, 222 insertions(+)

diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index d1b1941..058e6b6 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2206,3 +2206,220 @@ gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_

dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
}
+
+static void
+gen8_gpe_set_2d_surface_state(struct gen8_surface_state *ss,
+ unsigned int vert_line_stride_offset,
+ unsigned int vert_line_stride,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int tiling,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch,
+ unsigned int base_offset,
+ unsigned int y_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.vert_line_stride_ofs = vert_line_stride_offset;
+ ss->ss0.vert_line_stride = vert_line_stride;
+ ss->ss0.surface_format = format;
+ ss->ss0.surface_type = I965_SURFACE_2D;
+
+ ss->ss1.surface_mocs = cacheability_control;
+
+ ss->ss2.width = width - 1;
+ ss->ss2.height = height - 1;
+
+ ss->ss3.pitch = pitch - 1;
+
+ ss->ss5.y_offset = y_offset;
+
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+ ss->ss8.base_addr = base_offset;
+
+ gen8_gpe_set_surface_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_adv_surface_state(struct gen8_surface_state2 *ss,
+ unsigned int v_direction,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int tiling,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch,
+ unsigned int base_offset,
+ unsigned int y_cb_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
+ ss->ss1.width = width - 1;
+ ss->ss1.height = height - 1;
+
+ ss->ss2.surface_format = format;
+ ss->ss2.interleave_chroma = 1;
+ ss->ss2.pitch = pitch - 1;
+
+ ss->ss3.y_offset_for_cb = y_cb_offset;
+
+ ss->ss5.surface_object_mocs = cacheability_control;
+
+ ss->ss6.base_addr = base_offset;
+
+ gen8_gpe_set_surface2_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_buffer2_surface_state(struct gen8_surface_state *ss,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int size,
+ unsigned int pitch,
+ unsigned int base_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.surface_format = format;
+ ss->ss0.surface_type = I965_SURFACE_BUFFER;
+
+ ss->ss1.surface_mocs = cacheability_control;
+
+ ss->ss2.width = (size - 1) & 0x7F;
+ ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
+
+ ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
+ ss->ss3.pitch = pitch - 1;
+
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+ ss->ss8.base_addr = base_offset;
+}
+
+void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+ struct i965_gpe_surface *gpe_surface,
+ int index)
+{
+ char *buf;
+ unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
+ unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
+ index * SURFACE_STATE_PADDED_SIZE_GEN8;
+ unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
+ index * 4;
+ struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
+
+ dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
+
+ dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+ buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
+ *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
+
+ if (gpe_surface->is_2d_surface) {
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
+ unsigned int target_offset;
+
+ width = gpe_resource->width;
+ height = gpe_resource->height;
+ pitch = gpe_resource->pitch;
+
+ if (gpe_surface->is_override_offset) {
+ y_offset = 0;
+ target_offset = gpe_surface->offset;
+ } else if (gpe_surface->is_uv_surface) {
+ height /= 2;
+
+ if (tiling == I915_TILING_Y) {
+ tile_alignment = 32;
+ } else if (tiling == I915_TILING_X) {
+ tile_alignment = 8;
+ } else
+ tile_alignment = 1;
+
+ y_offset = (gpe_resource->y_cb_offset % tile_alignment);
+ target_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
+ } else {
+ y_offset = 0;
+ target_offset = 0;
+ }
+
+ if (gpe_surface->is_media_block_rw) {
+ width = (ALIGN(width, 4) >> 2);
+ }
+
+ gen8_gpe_set_2d_surface_state(ss,
+ gpe_surface->vert_line_stride_offset,
+ gpe_surface->vert_line_stride,
+ gpe_surface->cacheability_control,
+ gpe_surface->format,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset64 + target_offset,
+ y_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ target_offset,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ gpe_resource->bo);
+ } else if (gpe_surface->is_adv_surface) {
+ struct gen8_surface_state2 *ss = (struct gen8_surface_state2 *)(buf + surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height;
+ pitch = gpe_resource->pitch;
+
+ gen8_gpe_set_adv_surface_state(ss,
+ gpe_surface->v_direction,
+ gpe_surface->cacheability_control,
+ MFX_SURFACE_PLANAR_420_8,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset64,
+ gpe_resource->y_cb_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
+ gpe_resource->bo);
+ } else {
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
+ unsigned int format;
+
+ assert(gpe_surface->is_buffer);
+
+ if (gpe_surface->is_raw_buffer) {
+ format = I965_SURFACEFORMAT_RAW;
+ pitch = 1;
+ } else {
+ format = I965_SURFACEFORMAT_R32_UINT;
+ pitch = sizeof(unsigned int);
+ }
+
+ gen8_gpe_set_buffer2_surface_state(ss,
+ gpe_surface->cacheability_control,
+ format,
+ gpe_surface->size,
+ pitch,
+ gpe_resource->bo->offset64 + gpe_surface->offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ gpe_surface->offset,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ gpe_resource->bo);
+ }
+
+ dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 076f584..cbf3b05 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -526,4 +526,9 @@ intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_para
extern void
gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context);

+extern void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+ struct i965_gpe_surface *gpe_surface,
+ int index);
+
#endif /* _I965_GPE_UTILS_H_ */
--
1.9.1
Zhao Yakui
2016-11-18 06:50:49 UTC
Permalink
Post by Xiang, Haihao
v2: Add support for override_offset, clean up the code for 2D surface.
This looks good to me.

Thanks
Post by Xiang, Haihao
---
src/i965_gpe_utils.c | 217 +++++++++++++++++++++++++++++++++++++++++++++++++++
src/i965_gpe_utils.h | 5 ++
2 files changed, 222 insertions(+)
diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c
index d1b1941..058e6b6 100644
--- a/src/i965_gpe_utils.c
+++ b/src/i965_gpe_utils.c
@@ -2206,3 +2206,220 @@ gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_
dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
}
+
+static void
+gen8_gpe_set_2d_surface_state(struct gen8_surface_state *ss,
+ unsigned int vert_line_stride_offset,
+ unsigned int vert_line_stride,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int tiling,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch,
+ unsigned int base_offset,
+ unsigned int y_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.vert_line_stride_ofs = vert_line_stride_offset;
+ ss->ss0.vert_line_stride = vert_line_stride;
+ ss->ss0.surface_format = format;
+ ss->ss0.surface_type = I965_SURFACE_2D;
+
+ ss->ss1.surface_mocs = cacheability_control;
+
+ ss->ss2.width = width - 1;
+ ss->ss2.height = height - 1;
+
+ ss->ss3.pitch = pitch - 1;
+
+ ss->ss5.y_offset = y_offset;
+
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+ ss->ss8.base_addr = base_offset;
+
+ gen8_gpe_set_surface_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_adv_surface_state(struct gen8_surface_state2 *ss,
+ unsigned int v_direction,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int tiling,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch,
+ unsigned int base_offset,
+ unsigned int y_cb_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
+ ss->ss1.width = width - 1;
+ ss->ss1.height = height - 1;
+
+ ss->ss2.surface_format = format;
+ ss->ss2.interleave_chroma = 1;
+ ss->ss2.pitch = pitch - 1;
+
+ ss->ss3.y_offset_for_cb = y_cb_offset;
+
+ ss->ss5.surface_object_mocs = cacheability_control;
+
+ ss->ss6.base_addr = base_offset;
+
+ gen8_gpe_set_surface2_tiling(ss, tiling);
+}
+
+static void
+gen8_gpe_set_buffer2_surface_state(struct gen8_surface_state *ss,
+ unsigned int cacheability_control,
+ unsigned int format,
+ unsigned int size,
+ unsigned int pitch,
+ unsigned int base_offset)
+{
+ memset(ss, 0, sizeof(*ss));
+
+ ss->ss0.surface_format = format;
+ ss->ss0.surface_type = I965_SURFACE_BUFFER;
+
+ ss->ss1.surface_mocs = cacheability_control;
+
+ ss->ss2.width = (size - 1)& 0x7F;
+ ss->ss2.height = ((size - 1)& 0x1FFF80)>> 7;
+
+ ss->ss3.depth = ((size - 1)& 0xFE00000)>> 21;
+ ss->ss3.pitch = pitch - 1;
+
+ ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
+ ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
+ ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
+ ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
+
+ ss->ss8.base_addr = base_offset;
+}
+
+void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+ struct i965_gpe_surface *gpe_surface,
+ int index)
+{
+ char *buf;
+ unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
+ unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
+ index * SURFACE_STATE_PADDED_SIZE_GEN8;
+ unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
+ index * 4;
+ struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
+
+ dri_bo_get_tiling(gpe_resource->bo,&tiling,&swizzle);
+
+ dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
+ buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
+ *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
+
+ if (gpe_surface->is_2d_surface) {
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
+ unsigned int target_offset;
+
+ width = gpe_resource->width;
+ height = gpe_resource->height;
+ pitch = gpe_resource->pitch;
+
+ if (gpe_surface->is_override_offset) {
+ y_offset = 0;
+ target_offset = gpe_surface->offset;
+ } else if (gpe_surface->is_uv_surface) {
+ height /= 2;
+
+ if (tiling == I915_TILING_Y) {
+ tile_alignment = 32;
+ } else if (tiling == I915_TILING_X) {
+ tile_alignment = 8;
+ } else
+ tile_alignment = 1;
+
+ y_offset = (gpe_resource->y_cb_offset % tile_alignment);
+ target_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
+ } else {
+ y_offset = 0;
+ target_offset = 0;
+ }
+
+ if (gpe_surface->is_media_block_rw) {
+ width = (ALIGN(width, 4)>> 2);
+ }
+
+ gen8_gpe_set_2d_surface_state(ss,
+ gpe_surface->vert_line_stride_offset,
+ gpe_surface->vert_line_stride,
+ gpe_surface->cacheability_control,
+ gpe_surface->format,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset64 + target_offset,
+ y_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ target_offset,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ gpe_resource->bo);
+ } else if (gpe_surface->is_adv_surface) {
+ struct gen8_surface_state2 *ss = (struct gen8_surface_state2 *)(buf + surface_state_offset);
+
+ width = gpe_resource->width;
+ height = gpe_resource->height;
+ pitch = gpe_resource->pitch;
+
+ gen8_gpe_set_adv_surface_state(ss,
+ gpe_surface->v_direction,
+ gpe_surface->cacheability_control,
+ MFX_SURFACE_PLANAR_420_8,
+ tiling,
+ width, height, pitch,
+ gpe_resource->bo->offset64,
+ gpe_resource->y_cb_offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ 0,
+ surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
+ gpe_resource->bo);
+ } else {
+ struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
+ unsigned int format;
+
+ assert(gpe_surface->is_buffer);
+
+ if (gpe_surface->is_raw_buffer) {
+ format = I965_SURFACEFORMAT_RAW;
+ pitch = 1;
+ } else {
+ format = I965_SURFACEFORMAT_R32_UINT;
+ pitch = sizeof(unsigned int);
+ }
+
+ gen8_gpe_set_buffer2_surface_state(ss,
+ gpe_surface->cacheability_control,
+ format,
+ gpe_surface->size,
+ pitch,
+ gpe_resource->bo->offset64 + gpe_surface->offset);
+
+ dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+ gpe_surface->offset,
+ surface_state_offset + offsetof(struct gen8_surface_state, ss8),
+ gpe_resource->bo);
+ }
+
+ dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
+}
diff --git a/src/i965_gpe_utils.h b/src/i965_gpe_utils.h
index 076f584..cbf3b05 100644
--- a/src/i965_gpe_utils.h
+++ b/src/i965_gpe_utils.h
@@ -526,4 +526,9 @@ intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_para
extern void
gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context);
+extern void
+gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
+ struct i965_gpe_surface *gpe_surface,
+ int index);
+
#endif /* _I965_GPE_UTILS_H_ */
Loading...