Skip to content

Commit 00331d3

Browse files
committed
Add gpu work period support for i915
Signed-off-by: Aakash Sarkar <aakash.deep.sarkar@intel.com>
1 parent 568130a commit 00331d3

9 files changed

Lines changed: 319 additions & 0 deletions

drivers/gpu/drm/i915/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,8 @@ i915-y += \
362362

363363
i915-y += i915_perf.o
364364

365+
i915-y += i915_gpu_work.o
366+
365367
# Protected execution platform (PXP) support. Base support is required for HuC
366368
i915-y += \
367369
pxp/intel_pxp.o \

drivers/gpu/drm/i915/gt/intel_context_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ struct intel_context {
152152
struct ewma_runtime avg;
153153
u64 total;
154154
u32 last;
155+
u32 dt;
155156
I915_SELFTEST_DECLARE(u32 num_underflow);
156157
I915_SELFTEST_DECLARE(u32 max_underflow);
157158
} runtime;
@@ -173,6 +174,8 @@ struct intel_context {
173174
/** sseu: Control eu/slice partitioning */
174175
struct intel_sseu sseu;
175176

177+
u64 start_time_ns;
178+
176179
/**
177180
* pinned_contexts_link: List link for the engine's pinned contexts.
178181
* This is only used if this is a perma-pinned kernel context and

drivers/gpu/drm/i915/gt/intel_engine_cs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1601,6 +1601,8 @@ int intel_engines_init(struct intel_gt *gt)
16011601
return err;
16021602

16031603
intel_engine_add_user(engine);
1604+
1605+
i915_gpu_work_stats_init(engine);
16041606
}
16051607

16061608
return 0;

drivers/gpu/drm/i915/gt/intel_engine_types.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "intel_uncore.h"
2727
#include "intel_wakeref.h"
2828
#include "intel_workarounds_types.h"
29+
#include "i915_gpu_work.h"
2930

3031
/* HW Engine class + instance */
3132
#define RENDER_CLASS 0
@@ -169,6 +170,11 @@ struct intel_engine_execlists {
169170
*/
170171
struct timer_list preempt;
171172

173+
/**
174+
* @work_period_timer: emit the gpu work period stats event
175+
*/
176+
struct timer_list work_period_timer;
177+
172178
/**
173179
* @preempt_target: active request at the time of the preemption request
174180
*
@@ -487,6 +493,7 @@ struct intel_engine_cs {
487493
struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
488494
} pmu;
489495

496+
struct i915_engine_work gpu_work;
490497
struct intel_hw_status_page status_page;
491498
struct i915_ctx_workarounds wa_ctx;
492499
struct i915_wa_list ctx_wa_list;

drivers/gpu/drm/i915/gt/intel_execlists_submission.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,11 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
12521252
return READ_ONCE(engine->props.preempt_timeout_ms);
12531253
}
12541254

1255+
static unsigned long work_period_timeslice(struct intel_engine_cs *engine)
1256+
{
1257+
return GPU_WORK_PERIOD_EVENT_TIMEOUT;
1258+
}
1259+
12551260
static void set_preempt_timeout(struct intel_engine_cs *engine,
12561261
const struct i915_request *rq)
12571262
{
@@ -2040,6 +2045,8 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
20402045
if (active_ce)
20412046
lrc_runtime_start(active_ce);
20422047
}
2048+
if (active_ce)
2049+
active_ce->start_time_ns = ktime_get_raw_ns();
20432050
new_timeslice(execlists);
20442051
}
20452052

@@ -2423,6 +2430,13 @@ static bool preempt_timeout(const struct intel_engine_cs *const engine)
24232430
return engine->execlists.pending[0];
24242431
}
24252432

2433+
static bool work_period_expired(const struct intel_engine_cs *const engine)
2434+
{
2435+
const struct timer_list *t = &engine->execlists.work_period_timer;
2436+
2437+
return timer_expired(t);
2438+
}
2439+
24262440
/*
24272441
* Check the unread Context Status Buffers and manage the submission of new
24282442
* contexts to the ELSP accordingly.
@@ -2439,6 +2453,17 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
24392453
inactive = process_csb(engine, post);
24402454
GEM_BUG_ON(inactive - post > ARRAY_SIZE(post));
24412455

2456+
struct intel_context *ce = (*inactive)->context;
2457+
struct i915_engine_work *ew = &engine->gpu_work;
2458+
i915_gpu_work_process_ctx(ce, ew);
2459+
2460+
if (unlikely(work_period_expired(engine))) {
2461+
cancel_timer(&engine->execlists.work_period_timer);
2462+
schedule_work(&ew->event_work);
2463+
set_timer_ms(&engine->execlists.work_period_timer,
2464+
work_period_timeslice(engine));
2465+
}
2466+
24422467
if (unlikely(preempt_timeout(engine))) {
24432468
const struct i915_request *rq = *engine->execlists.active;
24442469

@@ -2547,6 +2572,11 @@ static void execlists_preempt(struct timer_list *timer)
25472572
execlists_kick(timer, preempt);
25482573
}
25492574

2575+
static void execlists_work_period(struct timer_list *timer)
2576+
{
2577+
execlists_kick(timer, work_period_timer);
2578+
}
2579+
25502580
static void queue_request(struct intel_engine_cs *engine,
25512581
struct i915_request *rq)
25522582
{
@@ -3543,6 +3573,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
35433573
tasklet_setup(&engine->sched_engine->tasklet, execlists_submission_tasklet);
35443574
timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
35453575
timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
3576+
timer_setup(&engine->execlists.work_period_timer, execlists_work_period, 0);
3577+
35463578

35473579
logical_ring_default_vfuncs(engine);
35483580
logical_ring_default_irqs(engine);

drivers/gpu/drm/i915/gt/intel_lrc.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1946,6 +1946,7 @@ void lrc_update_runtime(struct intel_context *ce)
19461946

19471947
ewma_runtime_add(&stats->runtime.avg, dt);
19481948
stats->runtime.total += dt;
1949+
stats->runtime.dt = dt;
19491950
}
19501951

19511952
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
#include "i915_gpu_work.h"
2+
#include <linux/pid.h>
3+
#include <linux/errno.h>
4+
5+
#include "gt/intel_context.h"
6+
#include "gt/intel_engine.h"
7+
#include "gem/i915_gem_context.h"
8+
9+
#define CREATE_TRACE_POINTS
10+
#include "intel_power_gpu_work_period_trace.h"
11+
12+
static s32 get_uid_ctx(struct intel_context *ce)
13+
{
14+
struct i915_gem_context *ctx = NULL;
15+
struct task_struct *task = NULL;
16+
const struct cred *cred = NULL;
17+
s32 ret;
18+
19+
rcu_read_lock();
20+
ctx = rcu_dereference(ce->gem_context);
21+
if (ctx && !kref_get_unless_zero(&ctx->ref))
22+
ctx = NULL;
23+
rcu_read_unlock();
24+
25+
if (!ctx)
26+
ret = -EINVAL;
27+
28+
task = get_pid_task(ctx->pid, PIDTYPE_PID);
29+
cred = get_task_cred(task);
30+
const unsigned int uid = cred->euid.val;
31+
ret = (s32)uid;
32+
33+
put_cred(cred);
34+
put_task_struct(task);
35+
i915_gem_context_put(ctx);
36+
return ret;
37+
}
38+
39+
static void emit_work_period_event(struct i915_engine_work *ew)
40+
{
41+
struct i915_work_stats * const stats = &ew->stats[0];
42+
for (int itr = 0; itr < I915_ENGINE_WORK_STATS_COUNT; itr++) {
43+
struct i915_work_stats *stat = &stats[itr];
44+
if (!stat->uid)
45+
continue;
46+
47+
trace_gpu_work_period(0, stat->uid,
48+
stat->start_time_ns, stat->end_time_ns,
49+
stat->total_active_duration_ns);
50+
51+
if (!ew->num_entries--)
52+
break;
53+
}
54+
GEM_BUG_ON(ew->num_entries != 0);
55+
memset(stats, 0, sizeof(*stats) *
56+
I915_ENGINE_WORK_STATS_COUNT);
57+
smp_wmb();
58+
}
59+
60+
static void i915_work_period_event_worker(struct work_struct *work)
61+
{
62+
struct i915_engine_work *ew =
63+
container_of(work, typeof(*ew), event_work);
64+
spin_lock_bh(&ew->stats_lock);
65+
emit_work_period_event(ew);
66+
spin_unlock_bh(&ew->stats_lock);
67+
}
68+
69+
static inline u32 get_cur_dt(struct intel_context* ce)
70+
{
71+
struct intel_context_stats *stats = &ce->stats;
72+
s32 dt = READ_ONCE(stats->runtime.dt);
73+
if (unlikely(dt < 0)) {
74+
return 0;
75+
}
76+
return dt;
77+
}
78+
79+
static u64 get_active_duration_ns(struct intel_context* ce)
80+
{
81+
u64 dur = get_cur_dt(ce);
82+
if (ce->ops->flags & COPS_RUNTIME_CYCLES)
83+
dur *= ce->engine->gt->clock_period_ns;
84+
return dur;
85+
}
86+
87+
static inline u32 get_stats_uid(s32 key, struct i915_work_stats *stats)
88+
{
89+
return stats[key].uid;
90+
}
91+
92+
static s32 handle_collision(s32 key, struct i915_engine_work *ew)
93+
{
94+
struct i915_work_stats * const stats = &ew->stats[0];
95+
u32 uid, count = 0;
96+
97+
spin_lock(&ew->stats_lock);
98+
while (uid = get_stats_uid(key, stats)) {
99+
if (unlikely(count >=
100+
I915_ENGINE_WORK_STATS_COUNT)) {
101+
spin_unlock(&ew->stats_lock);
102+
return -ENOMEM;
103+
}
104+
105+
if (key == I915_ENGINE_WORK_STATS_COUNT)
106+
key = 0;
107+
key++;
108+
count++;
109+
}
110+
111+
spin_unlock(&ew->stats_lock);
112+
return key;
113+
}
114+
115+
void i915_gpu_work_process_ctx(struct intel_context *ce,
116+
struct i915_engine_work *ew)
117+
{
118+
struct i915_work_stats * const stats = &ew->stats[0];
119+
struct i915_work_stats *stat;
120+
s32 key, uid;
121+
122+
uid = get_uid_ctx(ce);
123+
// TODO: Handle this correctly
124+
if (uid < 0)
125+
return;
126+
127+
key = HASH_MAP(uid);
128+
129+
if (get_stats_uid(key, stats) != uid)
130+
key = handle_collision(key, ew);
131+
132+
if (unlikely(KEY_INVALID(key))) {
133+
spin_lock(&ew->stats_lock);
134+
emit_work_period_event(ew);
135+
spin_unlock(&ew->stats_lock);
136+
key = 0;
137+
}
138+
139+
GEM_BUG_ON(KEY_INVALID(key));
140+
stat = &stats[key];
141+
142+
spin_lock(&ew->stats_lock);
143+
if (!stat->uid) {
144+
stat->uid = uid;
145+
stat->start_time_ns = ce->start_time_ns;
146+
stat->total_active_duration_ns =
147+
get_active_duration_ns(ce);
148+
ew->num_entries++;
149+
goto out;
150+
}
151+
GEM_BUG_ON(stat->uid != uid);
152+
stat->end_time_ns = ktime_get_raw_ns();
153+
stat->total_active_duration_ns +=
154+
get_active_duration_ns(ce);
155+
156+
out:
157+
spin_unlock(&ew->stats_lock);
158+
}
159+
160+
void i915_gpu_work_stats_init(struct intel_engine_cs *engine)
161+
{
162+
struct i915_engine_work *ew = &engine->gpu_work;
163+
struct i915_work_stats * const stats = &ew->stats[0];
164+
165+
ew->enabled = false;
166+
ew->num_entries = 0;
167+
memset(stats, 0, sizeof(*stats) *
168+
I915_ENGINE_WORK_STATS_COUNT);
169+
170+
spin_lock_init(&ew->stats_lock);
171+
INIT_WORK(&ew->event_work, i915_work_period_event_worker);
172+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#ifndef __I915_GPU_WORK_H__
2+
#define __I915_GPU_WORK_H__
3+
4+
#include <linux/types.h>
5+
#include <linux/spinlock.h>
6+
#include <linux/workqueue.h>
7+
8+
#define I915_ENGINE_WORK_STATS_COUNT 256
9+
#define GPU_WORK_PERIOD_EVENT_TIMEOUT 10
10+
11+
#define HASH_MAP(x) (x & (I915_ENGINE_WORK_STATS_COUNT - 1))
12+
#define KEY_INVALID(key) (key < 0 || key >= I915_ENGINE_WORK_STATS_COUNT)
13+
14+
struct intel_context;
15+
struct intel_engine_cs;
16+
17+
struct i915_work_stats {
18+
u32 gpu_id;
19+
u32 uid;
20+
u64 start_time_ns;
21+
u64 end_time_ns;
22+
23+
u64 total_active_duration_ns;
24+
};
25+
26+
struct i915_engine_work {
27+
bool enabled; /* if engine work stats should be emitted */
28+
u32 num_entries; /* number of entries currently in work stats */
29+
/* serialize access to work stats array */
30+
spinlock_t stats_lock;
31+
/* work period stats record per engine */
32+
struct i915_work_stats stats[I915_ENGINE_WORK_STATS_COUNT];
33+
struct work_struct event_work;
34+
};
35+
36+
void i915_gpu_work_process_ctx(struct intel_context *ctx, struct i915_engine_work *ew);
37+
38+
void i915_gpu_work_stats_init(struct intel_engine_cs *engine);
39+
#endif /*__I915_GPU_WORK_H__*/

0 commit comments

Comments
 (0)