1 /*        $NetBSD: i915_scheduler.c,v 1.8 2021/12/21 12:06:29 tnn Exp $         */
2 
3 /*
4  * SPDX-License-Identifier: MIT
5  *
6  * Copyright © 2018 Intel Corporation
7  */
8 
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: i915_scheduler.c,v 1.8 2021/12/21 12:06:29 tnn Exp $");
11 
12 #include <linux/mutex.h>
13 
14 #include "i915_drv.h"
15 #include "i915_globals.h"
16 #include "i915_request.h"
17 #include "i915_scheduler.h"
18 
19 #include <linux/nbsd-namespace.h>
20 
21 static struct i915_global_scheduler {
22           struct i915_global base;
23           struct kmem_cache *slab_dependencies;
24           struct kmem_cache *slab_priorities;
25 } global;
26 
27 #ifdef __NetBSD__
28 static spinlock_t schedule_lock;
29 spinlock_t *const i915_schedule_lock = &schedule_lock;
30 #else
31 static DEFINE_SPINLOCK(schedule_lock);
32 #endif
33 
34 static const struct i915_request *
node_to_request(const struct i915_sched_node * node)35 node_to_request(const struct i915_sched_node *node)
36 {
37           return const_container_of(node, struct i915_request, sched);
38 }
39 
node_started(const struct i915_sched_node * node)40 static inline bool node_started(const struct i915_sched_node *node)
41 {
42           return i915_request_started(node_to_request(node));
43 }
44 
node_signaled(const struct i915_sched_node * node)45 static inline bool node_signaled(const struct i915_sched_node *node)
46 {
47           return i915_request_completed(node_to_request(node));
48 }
49 
to_priolist(struct rb_node * rb)50 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
51 {
52           return rb_entry(rb, struct i915_priolist, node);
53 }
54 
assert_priolists(struct intel_engine_execlists * const execlists)55 static void assert_priolists(struct intel_engine_execlists * const execlists)
56 {
57           struct rb_node *rb;
58           long last_prio, i;
59 
60           if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
61                     return;
62 
63           GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
64                        rb_first(&execlists->queue.rb_root));
65 
66           last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1;
67           for (rb = rb_first_cached(&execlists->queue);
68                rb;
69                rb = rb_next2(&execlists->queue.rb_root, rb)) {
70                     const struct i915_priolist *p = to_priolist(rb);
71 
72                     GEM_BUG_ON(p->priority >= last_prio);
73                     last_prio = p->priority;
74 
75                     GEM_BUG_ON(!p->used);
76                     for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
77                               if (list_empty(&p->requests[i]))
78                                         continue;
79 
80                               GEM_BUG_ON(!(p->used & BIT(i)));
81                     }
82           }
83 }
84 
85 #ifdef __NetBSD__
86 
87 static int
compare_priolists(void * cookie,const void * va,const void * vb)88 compare_priolists(void *cookie, const void *va, const void *vb)
89 {
90           const struct i915_priolist *a = va;
91           const struct i915_priolist *b = vb;
92 
93           if (a->priority > b->priority)
94                     return -1;
95           if (a->priority < b->priority)
96                     return +1;
97           return 0;
98 }
99 
100 static int
compare_priolist_key(void * cookie,const void * vp,const void * vk)101 compare_priolist_key(void *cookie, const void *vp, const void *vk)
102 {
103           const struct i915_priolist *p = vp;
104           const int *priorityp = vk, priority = *priorityp;
105 
106           if (p->priority > priority)
107                     return -1;
108           if (p->priority < priority)
109                     return +1;
110           return 0;
111 }
112 
113 static const rb_tree_ops_t i915_priolist_rb_ops = {
114           .rbto_compare_nodes = compare_priolists,
115           .rbto_compare_key = compare_priolist_key,
116           .rbto_node_offset = offsetof(struct i915_priolist, node),
117 };
118 
119 #endif
120 
121 void
i915_sched_init(struct intel_engine_execlists * execlists)122 i915_sched_init(struct intel_engine_execlists *execlists)
123 {
124 
125 #ifdef __NetBSD__
126           rb_tree_init(&execlists->queue.rb_root.rbr_tree,
127               &i915_priolist_rb_ops);
128 #else
129           execlists->queue = RB_ROOT_CACHED;
130 #endif
131 }
132 
133 struct list_head *
i915_sched_lookup_priolist(struct intel_engine_cs * engine,int prio)134 i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio)
135 {
136           struct intel_engine_execlists * const execlists = &engine->execlists;
137           struct i915_priolist *p;
138           struct rb_node **parent, *rb;
139           bool first = true;
140           int idx, i;
141 
142           lockdep_assert_held(&engine->active.lock);
143           assert_priolists(execlists);
144 
145           /* buckets sorted from highest [in slot 0] to lowest priority */
146           idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1;
147           prio >>= I915_USER_PRIORITY_SHIFT;
148           if (unlikely(execlists->no_priolist))
149                     prio = I915_PRIORITY_NORMAL;
150 
151 find_priolist:
152 #ifdef __NetBSD__
153           /* XXX  */
154           __USE(first);
155           __USE(parent);
156           __USE(rb);
157           p = rb_tree_find_node(&execlists->queue.rb_root.rbr_tree, &prio);
158           if (p)
159                     goto out;
160 #else
161           /* most positive priority is scheduled first, equal priorities fifo */
162           rb = NULL;
163           parent = &execlists->queue.rb_root.rb_node;
164           while (*parent) {
165                     rb = *parent;
166                     p = to_priolist(rb);
167                     if (prio > p->priority) {
168                               parent = &rb->rb_left;
169                     } else if (prio < p->priority) {
170                               parent = &rb->rb_right;
171                               first = false;
172                     } else {
173                               goto out;
174                     }
175           }
176 #endif
177 
178           if (prio == I915_PRIORITY_NORMAL) {
179                     p = &execlists->default_priolist;
180           } else {
181                     p = kmem_cache_alloc(global.slab_priorities, GFP_ATOMIC);
182                     /* Convert an allocation failure to a priority bump */
183                     if (unlikely(!p)) {
184                               prio = I915_PRIORITY_NORMAL; /* recurses just once */
185 
186                               /* To maintain ordering with all rendering, after an
187                                * allocation failure we have to disable all scheduling.
188                                * Requests will then be executed in fifo, and schedule
189                                * will ensure that dependencies are emitted in fifo.
190                                * There will be still some reordering with existing
191                                * requests, so if userspace lied about their
192                                * dependencies that reordering may be visible.
193                                */
194                               execlists->no_priolist = true;
195                               goto find_priolist;
196                     }
197           }
198 
199           p->priority = prio;
200           for (i = 0; i < ARRAY_SIZE(p->requests); i++)
201                     INIT_LIST_HEAD(&p->requests[i]);
202 #ifdef __NetBSD__
203           struct i915_priolist *collision __diagused;
204           collision = rb_tree_insert_node(&execlists->queue.rb_root.rbr_tree,
205               p);
206           KASSERT(collision == p);
207 #else
208           rb_link_node(&p->node, rb, parent);
209           rb_insert_color_cached(&p->node, &execlists->queue, first);
210 #endif
211           p->used = 0;
212 
213 out:
214           p->used |= BIT(idx);
215           return &p->requests[idx];
216 }
217 
__i915_priolist_free(struct i915_priolist * p)218 void __i915_priolist_free(struct i915_priolist *p)
219 {
220           kmem_cache_free(global.slab_priorities, p);
221 }
222 
223 struct sched_cache {
224           struct list_head *priolist;
225 };
226 
227 static struct intel_engine_cs *
sched_lock_engine(const struct i915_sched_node * node,struct intel_engine_cs * locked,struct sched_cache * cache)228 sched_lock_engine(const struct i915_sched_node *node,
229                       struct intel_engine_cs *locked,
230                       struct sched_cache *cache)
231 {
232           const struct i915_request *rq = node_to_request(node);
233           struct intel_engine_cs *engine;
234 
235           GEM_BUG_ON(!locked);
236 
237           /*
238            * Virtual engines complicate acquiring the engine timeline lock,
239            * as their rq->engine pointer is not stable until under that
240            * engine lock. The simple ploy we use is to take the lock then
241            * check that the rq still belongs to the newly locked engine.
242            */
243           while (locked != (engine = READ_ONCE(rq->engine))) {
244                     spin_unlock(&locked->active.lock);
245                     memset(cache, 0, sizeof(*cache));
246                     spin_lock(&engine->active.lock);
247                     locked = engine;
248           }
249 
250           GEM_BUG_ON(locked != engine);
251           return locked;
252 }
253 
rq_prio(const struct i915_request * rq)254 static inline int rq_prio(const struct i915_request *rq)
255 {
256           return rq->sched.attr.priority | __NO_PREEMPTION;
257 }
258 
need_preempt(int prio,int active)259 static inline bool need_preempt(int prio, int active)
260 {
261           /*
262            * Allow preemption of low -> normal -> high, but we do
263            * not allow low priority tasks to preempt other low priority
264            * tasks under the impression that latency for low priority
265            * tasks does not matter (as much as background throughput),
266            * so kiss.
267            */
268           return prio >= max(I915_PRIORITY_NORMAL, active);
269 }
270 
kick_submission(struct intel_engine_cs * engine,const struct i915_request * rq,int prio)271 static void kick_submission(struct intel_engine_cs *engine,
272                                   const struct i915_request *rq,
273                                   int prio)
274 {
275           const struct i915_request *inflight;
276 
277           /*
278            * We only need to kick the tasklet once for the high priority
279            * new context we add into the queue.
280            */
281           if (prio <= engine->execlists.queue_priority_hint)
282                     return;
283 
284           rcu_read_lock();
285 
286           /* Nothing currently active? We're overdue for a submission! */
287           inflight = execlists_active(&engine->execlists);
288           if (!inflight)
289                     goto unlock;
290 
291           /*
292            * If we are already the currently executing context, don't
293            * bother evaluating if we should preempt ourselves.
294            */
295           if (inflight->context == rq->context)
296                     goto unlock;
297 
298           engine->execlists.queue_priority_hint = prio;
299           if (need_preempt(prio, rq_prio(inflight)))
300                     tasklet_hi_schedule(&engine->execlists.tasklet);
301 
302 unlock:
303           rcu_read_unlock();
304 }
305 
__i915_schedule(struct i915_sched_node * node,const struct i915_sched_attr * attr)306 static void __i915_schedule(struct i915_sched_node *node,
307                                   const struct i915_sched_attr *attr)
308 {
309           struct intel_engine_cs *engine;
310           struct i915_dependency *dep, *p;
311           struct i915_dependency stack;
312           const int prio = attr->priority;
313           struct sched_cache cache;
314           LIST_HEAD(dfs);
315 
316           /* Needed in order to use the temporary link inside i915_dependency */
317           lockdep_assert_held(&schedule_lock);
318           GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
319 
320           if (prio <= READ_ONCE(node->attr.priority))
321                     return;
322 
323           if (node_signaled(node))
324                     return;
325 
326           stack.signaler = node;
327           list_add(&stack.dfs_link, &dfs);
328 
329           /*
330            * Recursively bump all dependent priorities to match the new request.
331            *
332            * A naive approach would be to use recursion:
333            * static void update_priorities(struct i915_sched_node *node, prio) {
334            *        list_for_each_entry(dep, &node->signalers_list, signal_link)
335            *                  update_priorities(dep->signal, prio)
336            *        queue_request(node);
337            * }
338            * but that may have unlimited recursion depth and so runs a very
339            * real risk of overunning the kernel stack. Instead, we build
340            * a flat list of all dependencies starting with the current request.
341            * As we walk the list of dependencies, we add all of its dependencies
342            * to the end of the list (this may include an already visited
343            * request) and continue to walk onwards onto the new dependencies. The
344            * end result is a topological list of requests in reverse order, the
345            * last element in the list is the request we must execute first.
346            */
347           list_for_each_entry(dep, &dfs, dfs_link) {
348                     struct i915_sched_node *node = dep->signaler;
349 
350                     /* If we are already flying, we know we have no signalers */
351                     if (node_started(node))
352                               continue;
353 
354                     /*
355                      * Within an engine, there can be no cycle, but we may
356                      * refer to the same dependency chain multiple times
357                      * (redundant dependencies are not eliminated) and across
358                      * engines.
359                      */
360                     list_for_each_entry(p, &node->signalers_list, signal_link) {
361                               GEM_BUG_ON(p == dep); /* no cycles! */
362 
363                               if (node_signaled(p->signaler))
364                                         continue;
365 
366                               if (prio > READ_ONCE(p->signaler->attr.priority))
367                                         list_move_tail(&p->dfs_link, &dfs);
368                     }
369           }
370 
371           /*
372            * If we didn't need to bump any existing priorities, and we haven't
373            * yet submitted this request (i.e. there is no potential race with
374            * execlists_submit_request()), we can set our own priority and skip
375            * acquiring the engine locks.
376            */
377           if (node->attr.priority == I915_PRIORITY_INVALID) {
378                     GEM_BUG_ON(!list_empty(&node->link));
379                     node->attr = *attr;
380 
381                     if (stack.dfs_link.next == stack.dfs_link.prev)
382                               return;
383 
384                     __list_del_entry(&stack.dfs_link);
385           }
386 
387           memset(&cache, 0, sizeof(cache));
388           engine = node_to_request(node)->engine;
389           spin_lock(&engine->active.lock);
390 
391           /* Fifo and depth-first replacement ensure our deps execute before us */
392           engine = sched_lock_engine(node, engine, &cache);
393           list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
394                     INIT_LIST_HEAD(&dep->dfs_link);
395 
396                     node = dep->signaler;
397                     engine = sched_lock_engine(node, engine, &cache);
398                     lockdep_assert_held(&engine->active.lock);
399 
400                     /* Recheck after acquiring the engine->timeline.lock */
401                     if (prio <= node->attr.priority || node_signaled(node))
402                               continue;
403 
404                     GEM_BUG_ON(node_to_request(node)->engine != engine);
405 
406                     node->attr.priority = prio;
407 
408                     /*
409                      * Once the request is ready, it will be placed into the
410                      * priority lists and then onto the HW runlist. Before the
411                      * request is ready, it does not contribute to our preemption
412                      * decisions and we can safely ignore it, as it will, and
413                      * any preemption required, be dealt with upon submission.
414                      * See engine->submit_request()
415                      */
416                     if (list_empty(&node->link))
417                               continue;
418 
419                     if (i915_request_in_priority_queue(node_to_request(node))) {
420                               if (!cache.priolist)
421                                         cache.priolist =
422                                                   i915_sched_lookup_priolist(engine,
423                                                                                    prio);
424                               list_move_tail(&node->link, cache.priolist);
425                     }
426 
427                     /* Defer (tasklet) submission until after all of our updates. */
428                     kick_submission(engine, node_to_request(node), prio);
429           }
430 
431           spin_unlock(&engine->active.lock);
432 }
433 
i915_schedule(struct i915_request * rq,const struct i915_sched_attr * attr)434 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
435 {
436           spin_lock_irq(&schedule_lock);
437           __i915_schedule(&rq->sched, attr);
438           spin_unlock_irq(&schedule_lock);
439 }
440 
__bump_priority(struct i915_sched_node * node,unsigned int bump)441 static void __bump_priority(struct i915_sched_node *node, unsigned int bump)
442 {
443           struct i915_sched_attr attr = node->attr;
444 
445           attr.priority |= bump;
446           __i915_schedule(node, &attr);
447 }
448 
i915_schedule_bump_priority(struct i915_request * rq,unsigned int bump)449 void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
450 {
451           unsigned long flags;
452 
453           GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
454           if (READ_ONCE(rq->sched.attr.priority) & bump)
455                     return;
456 
457           spin_lock_irqsave(&schedule_lock, flags);
458           __bump_priority(&rq->sched, bump);
459           spin_unlock_irqrestore(&schedule_lock, flags);
460 }
461 
i915_sched_node_init(struct i915_sched_node * node)462 void i915_sched_node_init(struct i915_sched_node *node)
463 {
464           INIT_LIST_HEAD(&node->signalers_list);
465           INIT_LIST_HEAD(&node->waiters_list);
466           INIT_LIST_HEAD(&node->link);
467 
468           i915_sched_node_reinit(node);
469 }
470 
i915_sched_node_reinit(struct i915_sched_node * node)471 void i915_sched_node_reinit(struct i915_sched_node *node)
472 {
473           node->attr.priority = I915_PRIORITY_INVALID;
474           node->semaphores = 0;
475           node->flags = 0;
476 
477           GEM_BUG_ON(!list_empty(&node->signalers_list));
478           GEM_BUG_ON(!list_empty(&node->waiters_list));
479           GEM_BUG_ON(!list_empty(&node->link));
480 }
481 
482 static struct i915_dependency *
i915_dependency_alloc(void)483 i915_dependency_alloc(void)
484 {
485           return kmem_cache_alloc(global.slab_dependencies, GFP_KERNEL);
486 }
487 
488 static void
i915_dependency_free(struct i915_dependency * dep)489 i915_dependency_free(struct i915_dependency *dep)
490 {
491           kmem_cache_free(global.slab_dependencies, dep);
492 }
493 
__i915_sched_node_add_dependency(struct i915_sched_node * node,struct i915_sched_node * signal,struct i915_dependency * dep,unsigned long flags)494 bool __i915_sched_node_add_dependency(struct i915_sched_node *node,
495                                               struct i915_sched_node *signal,
496                                               struct i915_dependency *dep,
497                                               unsigned long flags)
498 {
499           bool ret = false;
500 
501           spin_lock_irq(&schedule_lock);
502 
503           if (!node_signaled(signal)) {
504                     INIT_LIST_HEAD(&dep->dfs_link);
505                     dep->signaler = signal;
506                     dep->waiter = node;
507                     dep->flags = flags;
508 
509                     /* Keep track of whether anyone on this chain has a semaphore */
510                     if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN &&
511                         !node_started(signal))
512                               node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
513 
514                     /* All set, now publish. Beware the lockless walkers. */
515                     list_add(&dep->signal_link, &node->signalers_list);
516                     list_add_rcu(&dep->wait_link, &signal->waiters_list);
517 
518                     /*
519                      * As we do not allow WAIT to preempt inflight requests,
520                      * once we have executed a request, along with triggering
521                      * any execution callbacks, we must preserve its ordering
522                      * within the non-preemptible FIFO.
523                      */
524                     BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK);
525                     if (flags & I915_DEPENDENCY_EXTERNAL)
526                               __bump_priority(signal, __NO_PREEMPTION);
527 
528                     ret = true;
529           }
530 
531           spin_unlock_irq(&schedule_lock);
532 
533           return ret;
534 }
535 
i915_sched_node_add_dependency(struct i915_sched_node * node,struct i915_sched_node * signal)536 int i915_sched_node_add_dependency(struct i915_sched_node *node,
537                                            struct i915_sched_node *signal)
538 {
539           struct i915_dependency *dep;
540 
541           dep = i915_dependency_alloc();
542           if (!dep)
543                     return -ENOMEM;
544 
545           if (!__i915_sched_node_add_dependency(node, signal, dep,
546                                                         I915_DEPENDENCY_EXTERNAL |
547                                                         I915_DEPENDENCY_ALLOC))
548                     i915_dependency_free(dep);
549 
550           return 0;
551 }
552 
i915_sched_node_fini(struct i915_sched_node * node)553 void i915_sched_node_fini(struct i915_sched_node *node)
554 {
555           struct i915_dependency *dep, *tmp;
556 
557           spin_lock_irq(&schedule_lock);
558 
559           /*
560            * Everyone we depended upon (the fences we wait to be signaled)
561            * should retire before us and remove themselves from our list.
562            * However, retirement is run independently on each timeline and
563            * so we may be called out-of-order.
564            */
565           list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
566                     GEM_BUG_ON(!list_empty(&dep->dfs_link));
567 
568                     list_del(&dep->wait_link);
569                     if (dep->flags & I915_DEPENDENCY_ALLOC)
570                               i915_dependency_free(dep);
571           }
572           INIT_LIST_HEAD(&node->signalers_list);
573 
574           /* Remove ourselves from everyone who depends upon us */
575           list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
576                     GEM_BUG_ON(dep->signaler != node);
577                     GEM_BUG_ON(!list_empty(&dep->dfs_link));
578 
579                     list_del(&dep->signal_link);
580                     if (dep->flags & I915_DEPENDENCY_ALLOC)
581                               i915_dependency_free(dep);
582           }
583           INIT_LIST_HEAD(&node->waiters_list);
584 
585           spin_unlock_irq(&schedule_lock);
586 }
587 
i915_global_scheduler_shrink(void)588 static void i915_global_scheduler_shrink(void)
589 {
590           kmem_cache_shrink(global.slab_dependencies);
591           kmem_cache_shrink(global.slab_priorities);
592 }
593 
i915_global_scheduler_exit(void)594 static void i915_global_scheduler_exit(void)
595 {
596           kmem_cache_destroy(global.slab_dependencies);
597           kmem_cache_destroy(global.slab_priorities);
598 }
599 
600 static struct i915_global_scheduler global = { {
601           .shrink = i915_global_scheduler_shrink,
602           .exit = i915_global_scheduler_exit,
603 } };
604 
i915_global_scheduler_init(void)605 int __init i915_global_scheduler_init(void)
606 {
607           global.slab_dependencies = KMEM_CACHE(i915_dependency,
608                                                         SLAB_HWCACHE_ALIGN);
609           if (!global.slab_dependencies)
610                     return -ENOMEM;
611 
612           global.slab_priorities = KMEM_CACHE(i915_priolist,
613                                                       SLAB_HWCACHE_ALIGN);
614           if (!global.slab_priorities)
615                     goto err_priorities;
616 
617           i915_global_register(&global.base);
618           return 0;
619 
620 err_priorities:
621           kmem_cache_destroy(global.slab_priorities);
622           return -ENOMEM;
623 }
624