Discussion:
[LIBVA_INTEL_DRIVER][PATCH 1/4] Query the kernel API to check the EU counts of GPU device
(too old to reply)
Zhao Yakui
2016-11-17 02:06:48 UTC
Permalink
This info can be used to configure the max EU threads of GPU device.
eu_total * 6
Signed-off-by: Zhao Yakui <***@intel.com>
---
src/intel_driver.c | 13 +++++++++++++
src/intel_driver.h | 3 +++
2 files changed, 16 insertions(+)

diff --git a/src/intel_driver.c b/src/intel_driver.c
index bb19401..a2c8c71 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -50,6 +50,12 @@ uint32_t g_intel_debug_option_flags = 0;
#define LOCAL_I915_PARAM_HAS_HUC 42
#endif

+#ifdef I915_PARAM_EU_TOTAL
+#define LOCAL_I915_PARAM_EU_TOTAL I915_PARAM_EU_TOTAL
+#else
+#define LOCAL_I915_PARAM_EU_TOTAL 34
+#endif
+
static Bool
intel_driver_get_param(struct intel_driver_data *intel, int param, int *value)
{
@@ -142,6 +148,13 @@ intel_driver_init(VADriverContextP ctx)
if (intel_driver_get_param(intel, LOCAL_I915_PARAM_HAS_HUC, &ret_value))
intel->has_huc = !!ret_value;

+ intel->has_eu_flag = 0;
+ intel->eu_total = 0;
+ if (intel_driver_get_param(intel, LOCAL_I915_PARAM_EU_TOTAL, &ret_value)) {
+ intel->has_eu_flag = !!ret_value;
+ intel->eu_total = ret_value;
+ }
+
intel_driver_get_revid(intel, &intel->revision);
return true;
}
diff --git a/src/intel_driver.h b/src/intel_driver.h
index dcdc03b..a02bfa8 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -182,6 +182,9 @@ struct intel_driver_data
unsigned int has_vebox : 1; /* Flag: has VEBOX unit */
unsigned int has_bsd2 : 1; /* Flag: has the second BSD video ring unit */
unsigned int has_huc : 1; /* Flag: has a fully loaded HuC firmware? */
+ unsigned int has_eu_flag : 1; /* Flag: Kernel will return EU counts */
+
+ int eu_total;

const struct intel_device_info *device_info;
};
--
2.8.3
Zhao Yakui
2016-11-17 02:06:49 UTC
Permalink
This will help to make good use of HW EU resources.
If it is not supported, it will fall back to the original config.


Signed-off-by: Zhao Yakui <***@intel.com>
---
src/gen8_mfc.c | 6 +++++-
src/gen8_post_processing.c | 5 ++++-
src/gen8_vme.c | 7 ++++++-
src/gen9_post_processing.c | 12 ++++++++----
src/gen9_vme.c | 8 +++++++-
src/gen9_vp9_encoder.c | 23 +++++++++++++++--------
6 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 63ffea5..634a500 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4612,7 +4612,11 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
mfc_context->gpe_context.curbe_size = 32 * 4;
mfc_context->gpe_context.sampler_size = 0;

- mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
+ if (i965->intel.has_eu_flag)
+ mfc_context->gpe_context.vfe_state.max_num_threads = 6 * i965->intel.eu_total;
+ else
+ mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
+
mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c
index 708918b..cabf06e 100644
--- a/src/gen8_post_processing.c
+++ b/src/gen8_post_processing.c
@@ -1592,7 +1592,10 @@ gen8_post_processing_context_common_init(VADriverContextP ctx,
struct pp_module *pp_module;
struct i965_post_processing_context *pp_context = data;

- pp_context->vfe_gpu_state.max_num_threads = 60;
+ if (i965->intel.has_eu_flag)
+ pp_context->vfe_gpu_state.max_num_threads = 6 * i965->intel.eu_total;
+ else
+ pp_context->vfe_gpu_state.max_num_threads = 60;
pp_context->vfe_gpu_state.num_urb_entries = 59;
pp_context->vfe_gpu_state.gpgpu_mode = 0;
pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index c79c62b..fd16ac8 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -1333,6 +1333,7 @@ gen8_vme_context_destroy(void *context)

Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_vme_context *vme_context = NULL;
struct i965_kernel *vme_kernel_list = NULL;
int i965_kernel_num;
@@ -1382,8 +1383,12 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;

+ if (i965->intel.has_eu_flag) {
+ vme_context->gpe_context.vfe_state.max_num_threads = 6 *
+ i965->intel.eu_total;
+ } else
+ vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;

- vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
vme_context->gpe_context.vfe_state.num_urb_entries = 64;
vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
vme_context->gpe_context.vfe_state.urb_entry_size = 16;
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index a5d345c..3ea0908 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -546,10 +546,14 @@ gen9_post_processing_context_init(VADriverContextP ctx,
gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);

- if (i965->intel.has_bsd2)
- gpe_context->vfe_state.max_num_threads = 300;
- else
- gpe_context->vfe_state.max_num_threads = 60;
+ if (i965->intel.has_eu_flag) {
+ gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
+ } else {
+ if (i965->intel.has_bsd2)
+ gpe_context->vfe_state.max_num_threads = 300;
+ else
+ gpe_context->vfe_state.max_num_threads = 60;
+ }

gpe_context->vfe_state.curbe_allocation_size = 37;
gpe_context->vfe_state.urb_entry_size = 16;
diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index 6ad8fff..bbaec3c 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -1978,6 +1978,7 @@ gen9_vme_context_destroy(void *context)

Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_vme_context *vme_context;
struct i965_kernel *vme_kernel_list = NULL;
int i965_kernel_num;
@@ -2036,7 +2037,12 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.sampler_size = 0;


- vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
+ if (i965->intel.has_eu_flag) {
+ vme_context->gpe_context.vfe_state.max_num_threads = 6 *
+ i965->intel.eu_total;
+ } else
+ vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
+
vme_context->gpe_context.vfe_state.num_urb_entries = 64;
vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
vme_context->gpe_context.vfe_state.urb_entry_size = 16;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index f39d6d0..0a54a36 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -3679,9 +3679,12 @@ gen9_vp9_mbenc_kernel(VADriverContextP ctx,
}

static void
-gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
+gen9_init_gpe_context_vp9(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
struct vp9_encoder_kernel_parameter *kernel_param)
{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
gpe_context->curbe.length = kernel_param->curbe_size; // in bytes

gpe_context->curbe_size = ALIGN(kernel_param->curbe_size, 64);
@@ -3701,7 +3704,11 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);

- gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
+ if (i965->intel.has_eu_flag)
+ gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
+ else
+ gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
+
gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
@@ -4607,7 +4614,7 @@ gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
scoreboard_param.walkpat_flag = 0;

gpe_context = &scaling_context->gpe_contexts[0];
- gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
+ gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);

scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
@@ -4633,7 +4640,7 @@ gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
kernel_param.sampler_size = 0;

gpe_context = &scaling_context->gpe_contexts[1];
- gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
+ gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);

memset(&scale_kernel, 0, sizeof(scale_kernel));
@@ -4675,7 +4682,7 @@ gen9_vme_me_context_init_vp9(VADriverContextP ctx,
scoreboard_param.walkpat_flag = 0;

gpe_context = &me_context->gpe_context;
- gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
+ gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);

memset(&scale_kernel, 0, sizeof(scale_kernel));
@@ -4723,7 +4730,7 @@ gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
} else
scoreboard_param.walkpat_flag = 0;

- gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
+ gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);

memset(&scale_kernel, 0, sizeof(scale_kernel));
@@ -4763,7 +4770,7 @@ gen9_vme_brc_context_init_vp9(VADriverContextP ctx,

for (i = 0; i < NUM_VP9_BRC; i++) {
gpe_context = &brc_context->gpe_contexts[i];
- gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
+ gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);

memset(&scale_kernel, 0, sizeof(scale_kernel));
@@ -4802,7 +4809,7 @@ gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
scoreboard_param.walkpat_flag = 0;

gpe_context = &dys_context->gpe_context;
- gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
+ gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);

memset(&scale_kernel, 0, sizeof(scale_kernel));
--
2.8.3
Charles, Daniel
2016-11-17 19:24:11 UTC
Permalink
Post by Zhao Yakui
This will help to make good use of HW EU resources.
If it is not supported, it will fall back to the original config.
---
src/gen8_mfc.c | 6 +++++-
src/gen8_post_processing.c | 5 ++++-
src/gen8_vme.c | 7 ++++++-
src/gen9_post_processing.c | 12 ++++++++----
src/gen9_vme.c | 8 +++++++-
src/gen9_vp9_encoder.c | 23 +++++++++++++++--------
6 files changed, 45 insertions(+), 16 deletions(-)
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 63ffea5..634a500 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -4612,7 +4612,11 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
mfc_context->gpe_context.curbe_size = 32 * 4;
mfc_context->gpe_context.sampler_size = 0;
- mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
+ if (i965->intel.has_eu_flag)
+ mfc_context->gpe_context.vfe_state.max_num_threads = 6 * i965->intel.eu_total;
+ else
+ mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
+
mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c
index 708918b..cabf06e 100644
--- a/src/gen8_post_processing.c
+++ b/src/gen8_post_processing.c
@@ -1592,7 +1592,10 @@ gen8_post_processing_context_common_init(VADriverContextP ctx,
struct pp_module *pp_module;
struct i965_post_processing_context *pp_context = data;
- pp_context->vfe_gpu_state.max_num_threads = 60;
+ if (i965->intel.has_eu_flag)
+ pp_context->vfe_gpu_state.max_num_threads = 6 * i965->intel.eu_total;
+ else
+ pp_context->vfe_gpu_state.max_num_threads = 60;
pp_context->vfe_gpu_state.num_urb_entries = 59;
pp_context->vfe_gpu_state.gpgpu_mode = 0;
pp_context->vfe_gpu_state.urb_entry_size = 16 - 1;
diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index c79c62b..fd16ac8 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -1333,6 +1333,7 @@ gen8_vme_context_destroy(void *context)
Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_vme_context *vme_context = NULL;
struct i965_kernel *vme_kernel_list = NULL;
int i965_kernel_num;
@@ -1382,8 +1383,12 @@ Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.curbe_size = CURBE_TOTAL_DATA_LENGTH;
vme_context->gpe_context.sampler_size = 0;
+ if (i965->intel.has_eu_flag) {
+ vme_context->gpe_context.vfe_state.max_num_threads = 6 *
+ i965->intel.eu_total;
+ } else
+ vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
- vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
vme_context->gpe_context.vfe_state.num_urb_entries = 64;
vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
vme_context->gpe_context.vfe_state.urb_entry_size = 16;
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index a5d345c..3ea0908 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -546,10 +546,14 @@ gen9_post_processing_context_init(VADriverContextP ctx,
gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
- if (i965->intel.has_bsd2)
- gpe_context->vfe_state.max_num_threads = 300;
- else
- gpe_context->vfe_state.max_num_threads = 60;
+ if (i965->intel.has_eu_flag) {
+ gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
+ } else {
+ if (i965->intel.has_bsd2)
+ gpe_context->vfe_state.max_num_threads = 300;
+ else
+ gpe_context->vfe_state.max_num_threads = 60;
+ }
gpe_context->vfe_state.curbe_allocation_size = 37;
gpe_context->vfe_state.urb_entry_size = 16;
diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index 6ad8fff..bbaec3c 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -1978,6 +1978,7 @@ gen9_vme_context_destroy(void *context)
Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
struct gen6_vme_context *vme_context;
struct i965_kernel *vme_kernel_list = NULL;
int i965_kernel_num;
@@ -2036,7 +2037,12 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
vme_context->gpe_context.sampler_size = 0;
- vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
+ if (i965->intel.has_eu_flag) {
+ vme_context->gpe_context.vfe_state.max_num_threads = 6 *
+ i965->intel.eu_total;
+ } else
+ vme_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
+
vme_context->gpe_context.vfe_state.num_urb_entries = 64;
vme_context->gpe_context.vfe_state.gpgpu_mode = 0;
vme_context->gpe_context.vfe_state.urb_entry_size = 16;
diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c
index f39d6d0..0a54a36 100644
--- a/src/gen9_vp9_encoder.c
+++ b/src/gen9_vp9_encoder.c
@@ -3679,9 +3679,12 @@ gen9_vp9_mbenc_kernel(VADriverContextP ctx,
}
static void
-gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
+gen9_init_gpe_context_vp9(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
struct vp9_encoder_kernel_parameter *kernel_param)
{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+
gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
gpe_context->curbe_size = ALIGN(kernel_param->curbe_size, 64);
@@ -3701,7 +3704,11 @@ gen9_init_gpe_context_vp9(struct i965_gpe_context *gpe_context,
gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
- gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
+ if (i965->intel.has_eu_flag)
+ gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
+ else
+ gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
+
gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
@@ -4607,7 +4614,7 @@ gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
scoreboard_param.walkpat_flag = 0;
gpe_context = &scaling_context->gpe_contexts[0];
- gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
+ gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
@@ -4633,7 +4640,7 @@ gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
kernel_param.sampler_size = 0;
gpe_context = &scaling_context->gpe_contexts[1];
- gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
+ gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
memset(&scale_kernel, 0, sizeof(scale_kernel));
@@ -4675,7 +4682,7 @@ gen9_vme_me_context_init_vp9(VADriverContextP ctx,
scoreboard_param.walkpat_flag = 0;
gpe_context = &me_context->gpe_context;
- gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
+ gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
memset(&scale_kernel, 0, sizeof(scale_kernel));
@@ -4723,7 +4730,7 @@ gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
} else
scoreboard_param.walkpat_flag = 0;
- gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
+ gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
memset(&scale_kernel, 0, sizeof(scale_kernel));
@@ -4763,7 +4770,7 @@ gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
for (i = 0; i < NUM_VP9_BRC; i++) {
gpe_context = &brc_context->gpe_contexts[i];
- gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
+ gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
memset(&scale_kernel, 0, sizeof(scale_kernel));
@@ -4802,7 +4809,7 @@ gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
scoreboard_param.walkpat_flag = 0;
gpe_context = &dys_context->gpe_context;
- gen9_init_gpe_context_vp9(gpe_context, &kernel_param);
+ gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
memset(&scale_kernel, 0, sizeof(scale_kernel));
This patch lgtm and also I have verified it on vp9 encoder supporting h/w

Thanks,
--
Daniel
Post by Zhao Yakui
--
2.8.3
_______________________________________________
Libva mailing list
https://lists.freedesktop.org/mailman/listinfo/libva
Zhao Yakui
2016-11-17 02:06:50 UTC
Permalink
And it is treated as non-tiling for I010 surfaces, which is like I420.

Signed-off-by: Zhao Yakui <***@intel.com>
---
src/i965_drv_video.c | 23 +++++++++++++++++++++++
src/i965_fourcc.h | 4 ++++
2 files changed, 27 insertions(+)

diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 04670f4..fbb6407 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -96,6 +96,8 @@ static int get_sampling_from_fourcc(unsigned int fourcc);

#define I_P010 2, 2, 2, {I965_16BITS, I965_8BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_1, OFFSET_16} }

+#define I_I010 2, 2, 3, {I965_16BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
+
#define I_422H 2, 1, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
#define I_422V 1, 2, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
#define I_YV16 2, 1, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, { {PLANE_0, OFFSET_0}, {PLANE_2, OFFSET_0}, {PLANE_1, OFFSET_0} }
@@ -141,6 +143,7 @@ static const i965_fourcc_info i965_fourcc_infos[] = {
DEF_YUV(IMC1, YUV420, I_S),

DEF_YUV(P010, YUV420, I_SI),
+ DEF_YUV(I010, YUV420, I_S),

DEF_YUV(422H, YUV422H, I_SI),
DEF_YUV(422V, YUV422V, I_S),
@@ -1288,6 +1291,7 @@ i965_surface_native_memory(VADriverContextP ctx,
// todo, should we disable tiling for 422 format?
if (expected_fourcc == VA_FOURCC_I420 ||
expected_fourcc == VA_FOURCC_IYUV ||
+ expected_fourcc == VA_FOURCC_I010 ||
expected_fourcc == VA_FOURCC_YV12 ||
expected_fourcc == VA_FOURCC_YV16)
tiling = 0;
@@ -1357,6 +1361,7 @@ i965_suface_external_memory(VADriverContextP ctx,
case VA_FOURCC_I420:
case VA_FOURCC_IYUV:
case VA_FOURCC_IMC3:
+ case VA_FOURCC_I010:
ASSERT_RET(memory_attibute->num_planes == 3, VA_STATUS_ERROR_INVALID_PARAMETER);
ASSERT_RET(memory_attibute->pitches[1] == memory_attibute->pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER);

@@ -4206,6 +4211,17 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
region_height = obj_surface->height + obj_surface->height / 2;
break;

+ case VA_FOURCC_I010:
+ obj_surface->y_cb_offset = obj_surface->height;
+ obj_surface->y_cr_offset = obj_surface->height + obj_surface->height / 4;
+ obj_surface->cb_cr_width = obj_surface->orig_width / 2;
+ obj_surface->width = ALIGN(obj_surface->cb_cr_width * 2, i965->codec_info->min_linear_wpitch) * 2;
+ obj_surface->cb_cr_height = obj_surface->orig_height / 2;
+ obj_surface->cb_cr_pitch = obj_surface->width / 2;
+ region_width = obj_surface->width;
+ region_height = obj_surface->height + obj_surface->height / 2;
+
+ break;
case VA_FOURCC_YUY2:
case VA_FOURCC_UYVY:
obj_surface->width = ALIGN(obj_surface->orig_width * 2, i965->codec_info->min_linear_wpitch);
@@ -4356,6 +4372,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx,
break;

case VA_FOURCC_I420:
+ case VA_FOURCC_I010:
case VA_FOURCC_422H:
case VA_FOURCC_IMC3:
case VA_FOURCC_444P:
@@ -5904,6 +5921,12 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx,
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
attribs[i].value.value.i = VA_FOURCC_P010;
i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_I010;
+ i++;
}
}
}
diff --git a/src/i965_fourcc.h b/src/i965_fourcc.h
index 55daf50..c440dcd 100644
--- a/src/i965_fourcc.h
+++ b/src/i965_fourcc.h
@@ -33,6 +33,10 @@
#define VA_FOURCC_YVY2 VA_FOURCC('Y','V','Y','2')
#endif

+#ifndef VA_FOURCC_I010
+#define VA_FOURCC_I010 VA_FOURCC('I','0','1','0')
+#endif
+
#define I965_MAX_PLANES 4
#define I965_MAX_COMONENTS 4
--
2.8.3
Xiang, Haihao
2016-11-18 01:00:35 UTC
Permalink
Could you add the new FOURCC in libva as well ?

Thanks
Haihao
Post by Zhao Yakui
And it is treated as non-tiling for I010 surfaces, which is like I420.
---
 src/i965_drv_video.c | 23 +++++++++++++++++++++++
 src/i965_fourcc.h    |  4 ++++
 2 files changed, 27 insertions(+)
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 04670f4..fbb6407 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -96,6 +96,8 @@ static int get_sampling_from_fourcc(unsigned int fourcc);
 
 #define I_P010  2, 2, 2, {I965_16BITS, I965_8BITS}, 3, { {PLANE_0,
OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_1, OFFSET_16} }
 
+#define I_I010  2, 2, 3, {I965_16BITS, I965_4BITS, I965_4BITS}, 3, {
{PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
+
 #define I_422H  2, 1, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, {
{PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
 #define I_422V  1, 2, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, {
{PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
 #define I_YV16  2, 1, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, {
{PLANE_0, OFFSET_0}, {PLANE_2, OFFSET_0}, {PLANE_1, OFFSET_0} }
@@ -141,6 +143,7 @@ static const i965_fourcc_info i965_fourcc_infos[] = {
     DEF_YUV(IMC1, YUV420, I_S),
 
     DEF_YUV(P010, YUV420, I_SI),
+    DEF_YUV(I010, YUV420, I_S),
 
     DEF_YUV(422H, YUV422H, I_SI),
     DEF_YUV(422V, YUV422V, I_S),
@@ -1288,6 +1291,7 @@ i965_surface_native_memory(VADriverContextP ctx,
     // todo, should we disable tiling for 422 format?
     if (expected_fourcc == VA_FOURCC_I420 ||
         expected_fourcc == VA_FOURCC_IYUV ||
+        expected_fourcc == VA_FOURCC_I010 ||
         expected_fourcc == VA_FOURCC_YV12 ||
         expected_fourcc == VA_FOURCC_YV16)
         tiling = 0;
@@ -1357,6 +1361,7 @@ i965_suface_external_memory(VADriverContextP ctx,
         ASSERT_RET(memory_attibute->num_planes == 3,
VA_STATUS_ERROR_INVALID_PARAMETER);
         ASSERT_RET(memory_attibute->pitches[1] == memory_attibute-
Post by Zhao Yakui
pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER);
 
@@ -4206,6 +4211,17 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
             region_height = obj_surface->height + obj_surface-
Post by Zhao Yakui
height / 2;
             break;
 
+            obj_surface->y_cb_offset = obj_surface->height;
+            obj_surface->y_cr_offset = obj_surface->height +
obj_surface->height / 4;
+            obj_surface->cb_cr_width = obj_surface->orig_width / 2;
+            obj_surface->width = ALIGN(obj_surface->cb_cr_width * 2,
i965->codec_info->min_linear_wpitch) * 2;
+            obj_surface->cb_cr_height = obj_surface->orig_height /
2;
+            obj_surface->cb_cr_pitch = obj_surface->width / 2;
+            region_width = obj_surface->width;
+            region_height = obj_surface->height + obj_surface-
Post by Zhao Yakui
height / 2;
+
+            break;
             obj_surface->width = ALIGN(obj_surface->orig_width * 2,
i965->codec_info->min_linear_wpitch);
@@ -4356,6 +4372,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx,
         break;
 
@@ -5904,6 +5921,12 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx,
                   attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE |
VA_SURFACE_ATTRIB_SETTABLE;
                   attribs[i].value.value.i = VA_FOURCC_P010;
                   i++;
+
+                  attribs[i].type = VASurfaceAttribPixelFormat;
+                  attribs[i].value.type = VAGenericValueTypeInteger;
+                  attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE |
VA_SURFACE_ATTRIB_SETTABLE;
+                  attribs[i].value.value.i = VA_FOURCC_I010;
+                  i++;
                 }
             }
         }
diff --git a/src/i965_fourcc.h b/src/i965_fourcc.h
index 55daf50..c440dcd 100644
--- a/src/i965_fourcc.h
+++ b/src/i965_fourcc.h
@@ -33,6 +33,10 @@
 #define VA_FOURCC_YVY2 VA_FOURCC('Y','V','Y','2')
 #endif
 
+#ifndef VA_FOURCC_I010
+#define VA_FOURCC_I010 VA_FOURCC('I','0','1','0')
+#endif
+
 #define I965_MAX_PLANES         4
 #define I965_MAX_COMONENTS      4
 
Zhao Yakui
2016-11-18 01:22:17 UTC
Permalink
Post by Xiang, Haihao
Could you add the new FOURCC in libva as well ?
Sure.
Post by Xiang, Haihao
Thanks
Haihao
Post by Zhao Yakui
And it is treated as non-tiling for I010 surfaces, which is like I420.
---
src/i965_drv_video.c | 23 +++++++++++++++++++++++
src/i965_fourcc.h | 4 ++++
2 files changed, 27 insertions(+)
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 04670f4..fbb6407 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -96,6 +96,8 @@ static int get_sampling_from_fourcc(unsigned int fourcc);
#define I_P010 2, 2, 2, {I965_16BITS, I965_8BITS}, 3, { {PLANE_0,
OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_1, OFFSET_16} }
+#define I_I010 2, 2, 3, {I965_16BITS, I965_4BITS, I965_4BITS}, 3, {
{PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
+
#define I_422H 2, 1, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, {
{PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
#define I_422V 1, 2, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, {
{PLANE_0, OFFSET_0}, {PLANE_1, OFFSET_0}, {PLANE_2, OFFSET_0} }
#define I_YV16 2, 1, 3, {I965_8BITS, I965_4BITS, I965_4BITS}, 3, {
{PLANE_0, OFFSET_0}, {PLANE_2, OFFSET_0}, {PLANE_1, OFFSET_0} }
@@ -141,6 +143,7 @@ static const i965_fourcc_info i965_fourcc_infos[] = {
DEF_YUV(IMC1, YUV420, I_S),
DEF_YUV(P010, YUV420, I_SI),
+ DEF_YUV(I010, YUV420, I_S),
DEF_YUV(422H, YUV422H, I_SI),
DEF_YUV(422V, YUV422V, I_S),
@@ -1288,6 +1291,7 @@ i965_surface_native_memory(VADriverContextP ctx,
// todo, should we disable tiling for 422 format?
if (expected_fourcc == VA_FOURCC_I420 ||
expected_fourcc == VA_FOURCC_IYUV ||
+ expected_fourcc == VA_FOURCC_I010 ||
expected_fourcc == VA_FOURCC_YV12 ||
expected_fourcc == VA_FOURCC_YV16)
tiling = 0;
@@ -1357,6 +1361,7 @@ i965_suface_external_memory(VADriverContextP ctx,
ASSERT_RET(memory_attibute->num_planes == 3,
VA_STATUS_ERROR_INVALID_PARAMETER);
ASSERT_RET(memory_attibute->pitches[1] == memory_attibute-
Post by Zhao Yakui
pitches[2], VA_STATUS_ERROR_INVALID_PARAMETER);
@@ -4206,6 +4211,17 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
region_height = obj_surface->height + obj_surface-
Post by Zhao Yakui
height / 2;
break;
+ obj_surface->y_cb_offset = obj_surface->height;
+ obj_surface->y_cr_offset = obj_surface->height +
obj_surface->height / 4;
+ obj_surface->cb_cr_width = obj_surface->orig_width / 2;
+ obj_surface->width = ALIGN(obj_surface->cb_cr_width * 2,
i965->codec_info->min_linear_wpitch) * 2;
+ obj_surface->cb_cr_height = obj_surface->orig_height / 2;
+ obj_surface->cb_cr_pitch = obj_surface->width / 2;
+ region_width = obj_surface->width;
+ region_height = obj_surface->height + obj_surface-
Post by Zhao Yakui
height / 2;
+
+ break;
obj_surface->width = ALIGN(obj_surface->orig_width * 2,
i965->codec_info->min_linear_wpitch);
@@ -4356,6 +4372,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx,
break;
@@ -5904,6 +5921,12 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx,
attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE |
VA_SURFACE_ATTRIB_SETTABLE;
attribs[i].value.value.i = VA_FOURCC_P010;
i++;
+
+ attribs[i].type = VASurfaceAttribPixelFormat;
+ attribs[i].value.type = VAGenericValueTypeInteger;
+ attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE |
VA_SURFACE_ATTRIB_SETTABLE;
+ attribs[i].value.value.i = VA_FOURCC_I010;
+ i++;
}
}
}
diff --git a/src/i965_fourcc.h b/src/i965_fourcc.h
index 55daf50..c440dcd 100644
--- a/src/i965_fourcc.h
+++ b/src/i965_fourcc.h
@@ -33,6 +33,10 @@
#define VA_FOURCC_YVY2 VA_FOURCC('Y','V','Y','2')
#endif
+#ifndef VA_FOURCC_I010
+#define VA_FOURCC_I010 VA_FOURCC('I','0','1','0')
+#endif
+
#define I965_MAX_PLANES 4
#define I965_MAX_COMONENTS 4
Zhao Yakui
2016-11-17 02:06:51 UTC
Permalink
I010 format is another kind of 10-bit surface. And its layout is similar to I420.

Signed-off-by: Zhao Yakui <***@intel.com>
---
src/gen75_picture_process.c | 45 ++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 42 insertions(+), 3 deletions(-)

diff --git a/src/gen75_picture_process.c b/src/gen75_picture_process.c
index 8097e02..46c4ed5 100644
--- a/src/gen75_picture_process.c
+++ b/src/gen75_picture_process.c
@@ -201,10 +201,49 @@ gen75_proc_picture(VADriverContextP ctx,
}

if (pipeline_param->num_filters == 0 || pipeline_param->filters == NULL ) {
- if ((obj_src_surf->fourcc == VA_FOURCC_P010) &&
+/* The Bit 2 is used to indicate that it is 10bit or 8bit.
+ * The Bit 0/1 is used to indicate the 420/422/444 format
+ */
+#define SRC_10BIT_420 (5 << 0)
+#define SRC_10BIT_422 (6 << 0)
+#define SRC_10BIT_444 (7 << 0)
+
+/* The Bit 6 is used to indicate that it is 10bit or 8bit.
+ * The Bit 5/4 is used to indicate the 420/422/444 format
+ */
+#define DST_10BIT_420 (5 << 4)
+#define DST_10BIT_422 (6 << 4)
+#define DST_10BIT_444 (7 << 4)
+
+/* This is mainly for YUY2/RGBA. It is reserved for further */
+#define SRC_YUV_PACKED (1 << 3)
+#define DST_YUV_PACKED (1 << 7)
+
+#define MASK_CSC (0xFF)
+#define SCALE_10BIT_420 (SRC_10BIT_420 | DST_10BIT_420)
+
+ unsigned int scale_flag;
+
+ scale_flag = 0;
+ if (obj_src_surf->fourcc == VA_FOURCC_P010 ||
+ obj_src_surf->fourcc == VA_FOURCC_I010)
+ scale_flag |= SRC_10BIT_420;
+
+ if (obj_dst_surf->fourcc == VA_FOURCC_P010 ||
+ obj_dst_surf->fourcc == VA_FOURCC_I010)
+ scale_flag |= DST_10BIT_420;
+
+ /* If P010 is converted without resolution change,
+ * fall back to VEBOX
+ */
+ if (i965->intel.has_vebox &&
+ (obj_src_surf->fourcc == VA_FOURCC_P010) &&
(obj_dst_surf->fourcc == VA_FOURCC_P010) &&
- (src_rect.width != dst_rect.width ||
- src_rect.height != dst_rect.height) &&
+ (src_rect.width == dst_rect.width) &&
+ (src_rect.height == dst_rect.height))
+ scale_flag = 0;
+
+ if (((scale_flag & MASK_CSC) == SCALE_10BIT_420) &&
intel_gpe_support_10bit_scaling(proc_ctx)) {
struct i965_proc_context *gpe_proc_ctx;
struct i965_surface src_surface, dst_surface;
--
2.8.3
Charles, Daniel
2016-11-17 19:23:41 UTC
Permalink
Post by Zhao Yakui
This info can be used to configure the max EU threads of GPU device.
eu_total * 6
---
src/intel_driver.c | 13 +++++++++++++
src/intel_driver.h | 3 +++
2 files changed, 16 insertions(+)
diff --git a/src/intel_driver.c b/src/intel_driver.c
index bb19401..a2c8c71 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -50,6 +50,12 @@ uint32_t g_intel_debug_option_flags = 0;
#define LOCAL_I915_PARAM_HAS_HUC 42
#endif
+#ifdef I915_PARAM_EU_TOTAL
+#define LOCAL_I915_PARAM_EU_TOTAL I915_PARAM_EU_TOTAL
+#else
+#define LOCAL_I915_PARAM_EU_TOTAL 34
+#endif
+
static Bool
intel_driver_get_param(struct intel_driver_data *intel, int param, int *value)
{
@@ -142,6 +148,13 @@ intel_driver_init(VADriverContextP ctx)
if (intel_driver_get_param(intel, LOCAL_I915_PARAM_HAS_HUC, &ret_value))
intel->has_huc = !!ret_value;
+ intel->has_eu_flag = 0;
+ intel->eu_total = 0;
+ if (intel_driver_get_param(intel, LOCAL_I915_PARAM_EU_TOTAL, &ret_value)) {
+ intel->has_eu_flag = !!ret_value;
+ intel->eu_total = ret_value;
+ }
+
intel_driver_get_revid(intel, &intel->revision);
return true;
}
diff --git a/src/intel_driver.h b/src/intel_driver.h
index dcdc03b..a02bfa8 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -182,6 +182,9 @@ struct intel_driver_data
unsigned int has_vebox : 1; /* Flag: has VEBOX unit */
unsigned int has_bsd2 : 1; /* Flag: has the second BSD video ring unit */
unsigned int has_huc : 1; /* Flag: has a fully loaded HuC firmware? */
+ unsigned int has_eu_flag : 1; /* Flag: Kernel will return EU counts */
+
+ int eu_total;
const struct intel_device_info *device_info;
};
This patch lgtm and also I have verified it on vp9 encoder supporting h/w

Thanks,
--
Daniel
Post by Zhao Yakui
--
2.8.3
_______________________________________________
Libva mailing list
https://lists.freedesktop.org/mailman/listinfo/libva
Xiang, Haihao
2016-11-18 00:47:04 UTC
Permalink
Post by Zhao Yakui
This info can be used to configure the max EU threads of GPU device.
eu_total * 6
---
 src/intel_driver.c | 13 +++++++++++++
 src/intel_driver.h |  3 +++
 2 files changed, 16 insertions(+)
diff --git a/src/intel_driver.c b/src/intel_driver.c
index bb19401..a2c8c71 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -50,6 +50,12 @@ uint32_t g_intel_debug_option_flags = 0;
 #define LOCAL_I915_PARAM_HAS_HUC 42
 #endif
 
+#ifdef I915_PARAM_EU_TOTAL
+#define LOCAL_I915_PARAM_EU_TOTAL I915_PARAM_EU_TOTAL
+#else
+#define LOCAL_I915_PARAM_EU_TOTAL 34
+#endif
+
 static Bool
 intel_driver_get_param(struct intel_driver_data *intel, int param,
int *value)
 {
@@ -142,6 +148,13 @@ intel_driver_init(VADriverContextP ctx)
     if (intel_driver_get_param(intel, LOCAL_I915_PARAM_HAS_HUC,
&ret_value))
         intel->has_huc = !!ret_value;
 
+    intel->has_eu_flag = 0;
+    intel->eu_total = 0;
+    if (intel_driver_get_param(intel, LOCAL_I915_PARAM_EU_TOTAL,
&ret_value)) {
+        intel->has_eu_flag = !!ret_value;
+        intel->eu_total = ret_value;
+    }
+
     intel_driver_get_revid(intel, &intel->revision);
     return true;
 }
diff --git a/src/intel_driver.h b/src/intel_driver.h
index dcdc03b..a02bfa8 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -182,6 +182,9 @@ struct intel_driver_data
     unsigned int has_vebox  : 1; /* Flag: has VEBOX unit */
     unsigned int has_bsd2   : 1; /* Flag: has the second BSD video
ring unit */
     unsigned int has_huc    : 1; /* Flag: has a fully loaded HuC
firmware? */
+    unsigned int has_eu_flag : 1; /* Flag: Kernel will return EU
counts */
+
+    int eu_total;
adding eu_total is enough, we can use the following if ... else
statement later

if (intel->eu_total > 0) {
  ...
} else {
  ...
}

It avoids setting the number of thread to 0 (although it is unlikely)
as well.
Post by Zhao Yakui
 
     const struct intel_device_info *device_info;
 };
Zhao Yakui
2016-11-18 01:25:29 UTC
Permalink
Post by Xiang, Haihao
Post by Zhao Yakui
This info can be used to configure the max EU threads of GPU device.
eu_total * 6
---
src/intel_driver.c | 13 +++++++++++++
src/intel_driver.h | 3 +++
2 files changed, 16 insertions(+)
diff --git a/src/intel_driver.c b/src/intel_driver.c
index bb19401..a2c8c71 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -50,6 +50,12 @@ uint32_t g_intel_debug_option_flags = 0;
#define LOCAL_I915_PARAM_HAS_HUC 42
#endif
+#ifdef I915_PARAM_EU_TOTAL
+#define LOCAL_I915_PARAM_EU_TOTAL I915_PARAM_EU_TOTAL
+#else
+#define LOCAL_I915_PARAM_EU_TOTAL 34
+#endif
+
static Bool
intel_driver_get_param(struct intel_driver_data *intel, int param, int *value)
{
@@ -142,6 +148,13 @@ intel_driver_init(VADriverContextP ctx)
if (intel_driver_get_param(intel, LOCAL_I915_PARAM_HAS_HUC, &ret_value))
intel->has_huc = !!ret_value;
+ intel->has_eu_flag = 0;
+ intel->eu_total = 0;
+ if (intel_driver_get_param(intel, LOCAL_I915_PARAM_EU_TOTAL, &ret_value)) {
+ intel->has_eu_flag = !!ret_value;
+ intel->eu_total = ret_value;
+ }
+
intel_driver_get_revid(intel,&intel->revision);
return true;
}
diff --git a/src/intel_driver.h b/src/intel_driver.h
index dcdc03b..a02bfa8 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -182,6 +182,9 @@ struct intel_driver_data
unsigned int has_vebox : 1; /* Flag: has VEBOX unit */
unsigned int has_bsd2 : 1; /* Flag: has the second BSD video ring unit */
unsigned int has_huc : 1; /* Flag: has a fully loaded HuC firmware? */
+ unsigned int has_eu_flag : 1; /* Flag: Kernel will return EU counts */
+
+ int eu_total;
adding eu_total is enough, we can use the following if ... else
statement later
In fact the has_eu_flag is equal to the condition check.
Post by Xiang, Haihao
if (intel->eu_total > 0)
It only use extra bit_field to avoid calling it every time.
Post by Xiang, Haihao
if (intel->eu_total> 0) {
...
} else {
...
}
It avoids setting the number of thread to 0 (although it is unlikely)
as well.
Post by Zhao Yakui
const struct intel_device_info *device_info;
};
Xiang, Haihao
2016-11-18 01:33:34 UTC
Permalink
Post by Zhao Yakui
Post by Xiang, Haihao
Post by Zhao Yakui
This info can be used to configure the max EU threads of GPU device.
eu_total * 6
---
  src/intel_driver.c | 13 +++++++++++++
  src/intel_driver.h |  3 +++
  2 files changed, 16 insertions(+)
diff --git a/src/intel_driver.c b/src/intel_driver.c
index bb19401..a2c8c71 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -50,6 +50,12 @@ uint32_t g_intel_debug_option_flags = 0;
  #define LOCAL_I915_PARAM_HAS_HUC 42
  #endif
+#ifdef I915_PARAM_EU_TOTAL
+#define LOCAL_I915_PARAM_EU_TOTAL I915_PARAM_EU_TOTAL
+#else
+#define LOCAL_I915_PARAM_EU_TOTAL 34
+#endif
+
  static Bool
  intel_driver_get_param(struct intel_driver_data *intel, int
param,
int *value)
  {
@@ -142,6 +148,13 @@ intel_driver_init(VADriverContextP ctx)
      if (intel_driver_get_param(intel, LOCAL_I915_PARAM_HAS_HUC,
&ret_value))
          intel->has_huc = !!ret_value;
+    intel->has_eu_flag = 0;
+    intel->eu_total = 0;
+    if (intel_driver_get_param(intel, LOCAL_I915_PARAM_EU_TOTAL,
&ret_value)) {
+        intel->has_eu_flag = !!ret_value;
+        intel->eu_total = ret_value;
+    }
+
      intel_driver_get_revid(intel,&intel->revision);
      return true;
  }
diff --git a/src/intel_driver.h b/src/intel_driver.h
index dcdc03b..a02bfa8 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -182,6 +182,9 @@ struct intel_driver_data
      unsigned int has_vebox  : 1; /* Flag: has VEBOX unit */
      unsigned int has_bsd2   : 1; /* Flag: has the second BSD
video
ring unit */
      unsigned int has_huc    : 1; /* Flag: has a fully loaded
HuC
firmware? */
+    unsigned int has_eu_flag : 1; /* Flag: Kernel will return EU
counts */
+
+    int eu_total;
adding eu_total is enough, we can use the following if ... else
statement later
In fact the has_eu_flag is equal to the condition check.
    > if (intel->eu_total > 0)
It only use extra bit_field to avoid calling it every time.
however the extra bit is still used every time.
Post by Zhao Yakui
Post by Xiang, Haihao
if (intel->eu_total>  0) {
   ...
} else {
   ...
}
It avoids setting the number of thread to 0 (although it is
unlikely)
as well.
Post by Zhao Yakui
      const struct intel_device_info *device_info;
  };
Loading...