Skip to content

Commit e9071d1

Browse files
committed
Add gpu work period support for i915
Signed-off-by: Aakash Sarkar <aakash.deep.sarkar@intel.com>
1 parent 568130a commit e9071d1

9 files changed

Lines changed: 381 additions & 0 deletions

drivers/gpu/drm/i915/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,8 @@ i915-y += \
362362

363363
i915-y += i915_perf.o
364364

365+
i915-y += i915_gpu_work.o
366+
365367
# Protected execution platform (PXP) support. Base support is required for HuC
366368
i915-y += \
367369
pxp/intel_pxp.o \

drivers/gpu/drm/i915/gt/intel_context_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ struct intel_context {
152152
struct ewma_runtime avg;
153153
u64 total;
154154
u32 last;
155+
u32 dt;
155156
I915_SELFTEST_DECLARE(u32 num_underflow);
156157
I915_SELFTEST_DECLARE(u32 max_underflow);
157158
} runtime;
@@ -173,6 +174,8 @@ struct intel_context {
173174
/** sseu: Control eu/slice partitioning */
174175
struct intel_sseu sseu;
175176

177+
u64 start_time_ns;
178+
176179
/**
177180
* pinned_contexts_link: List link for the engine's pinned contexts.
178181
* This is only used if this is a perma-pinned kernel context and

drivers/gpu/drm/i915/gt/intel_engine_cs.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1601,6 +1601,8 @@ int intel_engines_init(struct intel_gt *gt)
16011601
return err;
16021602

16031603
intel_engine_add_user(engine);
1604+
1605+
i915_gpu_work_stats_init(engine);
16041606
}
16051607

16061608
return 0;

drivers/gpu/drm/i915/gt/intel_engine_types.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "intel_uncore.h"
2727
#include "intel_wakeref.h"
2828
#include "intel_workarounds_types.h"
29+
#include "i915_gpu_work.h"
2930

3031
/* HW Engine class + instance */
3132
#define RENDER_CLASS 0
@@ -169,6 +170,11 @@ struct intel_engine_execlists {
169170
*/
170171
struct timer_list preempt;
171172

173+
/**
174+
* @work_period_timer: emit the gpu work period stats event
175+
*/
176+
struct timer_list work_period_timer;
177+
172178
/**
173179
* @preempt_target: active request at the time of the preemption request
174180
*
@@ -487,6 +493,7 @@ struct intel_engine_cs {
487493
struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT];
488494
} pmu;
489495

496+
struct i915_engine_work gpu_work;
490497
struct intel_hw_status_page status_page;
491498
struct i915_ctx_workarounds wa_ctx;
492499
struct i915_wa_list ctx_wa_list;

drivers/gpu/drm/i915/gt/intel_execlists_submission.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,11 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
12521252
return READ_ONCE(engine->props.preempt_timeout_ms);
12531253
}
12541254

1255+
static unsigned long work_period_timeslice(struct intel_engine_cs *engine)
1256+
{
1257+
return GPU_WORK_PERIOD_EVENT_TIMEOUT;
1258+
}
1259+
12551260
static void set_preempt_timeout(struct intel_engine_cs *engine,
12561261
const struct i915_request *rq)
12571262
{
@@ -2040,6 +2045,8 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
20402045
if (active_ce)
20412046
lrc_runtime_start(active_ce);
20422047
}
2048+
if (active_ce)
2049+
active_ce->start_time_ns = ktime_get_raw_ns();
20432050
new_timeslice(execlists);
20442051
}
20452052

@@ -2423,6 +2430,13 @@ static bool preempt_timeout(const struct intel_engine_cs *const engine)
24232430
return engine->execlists.pending[0];
24242431
}
24252432

2433+
static bool work_period_expired(const struct intel_engine_cs *const engine)
2434+
{
2435+
const struct timer_list *t = &engine->execlists.work_period_timer;
2436+
2437+
return timer_expired(t);
2438+
}
2439+
24262440
/*
24272441
* Check the unread Context Status Buffers and manage the submission of new
24282442
* contexts to the ELSP accordingly.
@@ -2439,6 +2453,17 @@ static void execlists_submission_tasklet(struct tasklet_struct *t)
24392453
inactive = process_csb(engine, post);
24402454
GEM_BUG_ON(inactive - post > ARRAY_SIZE(post));
24412455

2456+
struct intel_context *ce = (*inactive)->context;
2457+
struct i915_engine_work *ew = &engine->gpu_work;
2458+
i915_gpu_work_process_ctx(ce, ew);
2459+
2460+
if (unlikely(work_period_expired(engine))) {
2461+
cancel_timer(&engine->execlists.work_period_timer);
2462+
schedule_work(&ew->event_work);
2463+
set_timer_ms(&engine->execlists.work_period_timer,
2464+
work_period_timeslice(engine));
2465+
}
2466+
24422467
if (unlikely(preempt_timeout(engine))) {
24432468
const struct i915_request *rq = *engine->execlists.active;
24442469

@@ -2547,6 +2572,11 @@ static void execlists_preempt(struct timer_list *timer)
25472572
execlists_kick(timer, preempt);
25482573
}
25492574

2575+
static void execlists_work_period(struct timer_list *timer)
2576+
{
2577+
execlists_kick(timer, work_period_timer);
2578+
}
2579+
25502580
static void queue_request(struct intel_engine_cs *engine,
25512581
struct i915_request *rq)
25522582
{
@@ -3543,6 +3573,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
35433573
tasklet_setup(&engine->sched_engine->tasklet, execlists_submission_tasklet);
35443574
timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
35453575
timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
3576+
timer_setup(&engine->execlists.work_period_timer, execlists_work_period, 0);
3577+
35463578

35473579
logical_ring_default_vfuncs(engine);
35483580
logical_ring_default_irqs(engine);

drivers/gpu/drm/i915/gt/intel_lrc.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1946,6 +1946,7 @@ void lrc_update_runtime(struct intel_context *ce)
19461946

19471947
ewma_runtime_add(&stats->runtime.avg, dt);
19481948
stats->runtime.total += dt;
1949+
stats->runtime.dt = dt;
19491950
}
19501951

19511952
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
#include "i915_gpu_work.h"
2+
#include <linux/pid.h>
3+
#include <linux/errno.h>
4+
5+
#include "gt/intel_context.h"
6+
#include "gt/intel_engine.h"
7+
#include "gem/i915_gem_context.h"
8+
9+
#define CREATE_TRACE_POINTS
10+
#include "intel_power_gpu_work_period_trace.h"
11+
12+
static inline u32 get_stats_uid(s32 key, struct i915_work_stats *stats)
13+
{
14+
// TODO: stats is always accessed under spinlock.
15+
// Do we really need the READ_ONCE?
16+
return READ_ONCE(stats[key].uid);
17+
}
18+
19+
static s32 get_uid_ctx(struct intel_context *ce)
20+
{
21+
struct i915_gem_context *ctx = NULL;
22+
struct task_struct *task = NULL;
23+
const struct cred *cred = NULL;
24+
s32 ret;
25+
26+
rcu_read_lock();
27+
ctx = rcu_dereference(ce->gem_context);
28+
/* ctx could be freed from right under our nose,
29+
* so check first if we are able to get a reference
30+
*/
31+
if (ctx && !kref_get_unless_zero(&ctx->ref))
32+
ctx = NULL;
33+
rcu_read_unlock();
34+
35+
if (!ctx) {
36+
ret = -EINVAL;
37+
goto out;
38+
}
39+
40+
// TODO: Error handling
41+
task = get_pid_task(ctx->pid, PIDTYPE_PID);
42+
cred = get_task_cred(task);
43+
const unsigned int uid = cred->euid.val;
44+
ret = (s32)uid;
45+
46+
put_cred(cred);
47+
put_task_struct(task);
48+
i915_gem_context_put(ctx);
49+
out:
50+
return ret;
51+
}
52+
53+
// TODO: Can this be called inside softirq?
54+
static void emit_work_period_event(struct i915_engine_work *ew)
55+
{
56+
struct i915_work_stats * const stats = &ew->stats[0];
57+
for (int itr = 0; itr < I915_ENGINE_WORK_STATS_COUNT; itr++) {
58+
struct i915_work_stats *stat = &stats[itr];
59+
if (!stat->uid)
60+
continue;
61+
62+
trace_gpu_work_period(0, stat->uid,
63+
stat->start_time_ns, stat->end_time_ns,
64+
stat->total_active_duration_ns);
65+
66+
// TODO: check concurrent accesses to num_entries
67+
if (!ew->num_entries--)
68+
break;
69+
}
70+
GEM_BUG_ON(ew->num_entries != 0);
71+
memset(stats, 0, sizeof(*stats) *
72+
I915_ENGINE_WORK_STATS_COUNT);
73+
smp_wmb();
74+
}
75+
76+
static void i915_work_period_event_worker(struct work_struct *work)
77+
{
78+
struct i915_engine_work *ew =
79+
container_of(work, typeof(*ew), event_work);
80+
spin_lock_bh(&ew->stats_lock);
81+
// TODO: Is it too expensive to be called inside softirq?
82+
emit_work_period_event(ew);
83+
spin_unlock_bh(&ew->stats_lock);
84+
}
85+
86+
static inline u32 get_cur_dt(struct intel_context* ce)
87+
{
88+
struct intel_context_stats *stats = &ce->stats;
89+
s32 dt = READ_ONCE(stats->runtime.dt);
90+
if (unlikely(dt < 0)) {
91+
return 0;
92+
}
93+
return dt;
94+
}
95+
96+
static u64 get_active_duration_ns(struct intel_context* ce)
97+
{
98+
u64 dur = get_cur_dt(ce);
99+
if (ce->ops->flags & COPS_RUNTIME_CYCLES)
100+
dur *= ce->engine->gt->clock_period_ns;
101+
return dur;
102+
}
103+
104+
/*
105+
* Hash collision is handled here the same way we handle the situation
106+
* when our favourite urinal is occupied in a crowded office restroom.
107+
* Sorry!
108+
*/
109+
static s32 handle_collision(s32 key, struct i915_engine_work *ew)
110+
{
111+
struct i915_work_stats * const stats = &ew->stats[0];
112+
u32 uid, count = 0;
113+
114+
spin_lock(&ew->stats_lock);
115+
while (uid = get_stats_uid(key, stats)) {
116+
if (unlikely(count >=
117+
I915_ENGINE_WORK_STATS_COUNT)) {
118+
spin_unlock(&ew->stats_lock);
119+
return -ENOMEM;
120+
}
121+
122+
if (key == I915_ENGINE_WORK_STATS_COUNT)
123+
key = 0;
124+
key++;
125+
count++;
126+
}
127+
spin_unlock(&ew->stats_lock);
128+
return key;
129+
}
130+
131+
void i915_gpu_work_process_ctx(struct intel_context *ce,
132+
struct i915_engine_work *ew)
133+
{
134+
struct i915_work_stats * const stats = &ew->stats[0];
135+
struct i915_work_stats *stat = NULL;
136+
s32 key = 0, uid = 0;
137+
138+
uid = get_uid_ctx(ce);
139+
// TODO: Handle this correctly
140+
if (uid < 0)
141+
return;
142+
143+
key = HASH_MAP(uid);
144+
145+
/* Hash collision. Find the next available key */
146+
if (get_stats_uid(key, stats) != uid)
147+
key = handle_collision(key, ew);
148+
149+
if (unlikely(KEY_INVALID(key))) {
150+
/*
151+
* This can only happen if all the slots in our stats
152+
* array are occupied. Emit the events now and empty
153+
* all the slots.
154+
*/
155+
spin_lock(&ew->stats_lock);
156+
emit_work_period_event(ew);
157+
spin_unlock(&ew->stats_lock);
158+
key = 0;
159+
}
160+
stat = &stats[key];
161+
162+
spin_lock(&ew->stats_lock);
163+
/*
164+
* If the uid at our hash index is empty (zero)
165+
* this implies that our ctx is processed first
166+
* time since we emitted the events last time
167+
* and subsequently evicted all the slots.
168+
*
169+
* So, we set the start time to the last time this
170+
* ctx was put into the active queue. We also set
171+
* the end time and the total active duration to
172+
* the current runtime of this ctx
173+
*/
174+
if (!stat->uid) {
175+
stat->uid = uid;
176+
stat->start_time_ns = READ_ONCE(ce->start_time_ns);
177+
stat->total_active_duration_ns =
178+
get_active_duration_ns(ce);
179+
stat->end_time_ns = get_active_duration_ns(ce);
180+
/* TODO: num_entries could be accessed concurrently
181+
* b/w two cpus (use atomic type?)
182+
*/
183+
ew->num_entries++;
184+
goto out;
185+
}
186+
187+
/*
188+
* Now we have the hash index but the slot
189+
* could be occupied by another uid that maps
190+
* to the same slot index.
191+
* So, we do a linear search from our index until
192+
* we find a slot with matching uid or we run
193+
* through all the slots.
194+
*/
195+
u32 count = 0;
196+
while (stat->uid != uid) {
197+
/* Is this if condition really reuqired? */
198+
if (unlikely(count >=
199+
I915_ENGINE_WORK_STATS_COUNT))
200+
goto out;
201+
202+
if (key == I915_ENGINE_WORK_STATS_COUNT)
203+
key = 0;
204+
205+
stat = &stats[++key];
206+
++count;
207+
}
208+
209+
/*
210+
* We set the endtime to the current time this ctx
211+
* is being processed and accumulate the current
212+
* runtime to the total active duration
213+
*/
214+
stat->end_time_ns = ktime_get_raw_ns();
215+
stat->total_active_duration_ns +=
216+
get_active_duration_ns(ce);
217+
218+
out:
219+
spin_unlock(&ew->stats_lock);
220+
}
221+
222+
void i915_gpu_work_stats_init(struct intel_engine_cs *engine)
223+
{
224+
struct i915_engine_work *ew = &engine->gpu_work;
225+
struct i915_work_stats * const stats = &ew->stats[0];
226+
227+
ew->enabled = false;
228+
ew->num_entries = 0;
229+
memset(stats, 0, sizeof(*stats) *
230+
I915_ENGINE_WORK_STATS_COUNT);
231+
232+
spin_lock_init(&ew->stats_lock);
233+
INIT_WORK(&ew->event_work, i915_work_period_event_worker);
234+
}

0 commit comments

Comments
 (0)