1 /* OpenACC Profiling Interface
2 
3    Copyright (C) 2019-2022 Free Software Foundation, Inc.
4 
5    Contributed by Mentor, a Siemens Business.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 #define _GNU_SOURCE
30 #include "libgomp.h"
31 #include "oacc-int.h"
32 #include "secure_getenv.h"
33 #include "acc_prof.h"
34 #include <assert.h>
35 #ifdef HAVE_STRING_H
36 # include <string.h>
37 #endif
38 #ifdef PLUGIN_SUPPORT
39 # include <dlfcn.h>
40 #endif
41 
42 #define STATIC_ASSERT(expr) _Static_assert (expr, "!(" #expr ")")
43 
44 /* Statically assert that the layout of the common fields in the
45    'acc_event_info' variants matches.  */
46 /* 'event_type' */
47 STATIC_ASSERT (offsetof (acc_event_info, event_type)
48                  == offsetof (acc_event_info, data_event.event_type));
49 STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
50                  == offsetof (acc_event_info, launch_event.event_type));
51 STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
52                  == offsetof (acc_event_info, other_event.event_type));
53 /* 'valid_bytes' */
54 STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
55                  == offsetof (acc_event_info, launch_event.valid_bytes));
56 STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
57                  == offsetof (acc_event_info, other_event.valid_bytes));
58 /* 'parent_construct' */
59 STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
60                  == offsetof (acc_event_info, launch_event.parent_construct));
61 STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
62                  == offsetof (acc_event_info, other_event.parent_construct));
63 /* 'implicit' */
64 STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
65                  == offsetof (acc_event_info, launch_event.implicit));
66 STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
67                  == offsetof (acc_event_info, other_event.implicit));
68 /* 'tool_info' */
69 STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
70                  == offsetof (acc_event_info, launch_event.tool_info));
71 STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
72                  == offsetof (acc_event_info, other_event.tool_info));
73 
74 struct goacc_prof_callback_entry
75 {
76   acc_prof_callback cb;
77   int ref;
78   bool enabled;
79   struct goacc_prof_callback_entry *next;
80 };
81 
82 /* Use a separate flag to minimize run-time performance impact for the (very
83    common) case that profiling is not enabled.
84 
85    Once enabled, we're not going to disable this anymore, anywhere.  We
86    probably could, by adding appropriate logic to 'acc_prof_register',
87    'acc_prof_unregister'.  */
88 bool goacc_prof_enabled = false;
89 
90 /* Global state for registered callbacks.
91    'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle.  */
92 static bool goacc_prof_callbacks_enabled[acc_ev_last];
93 static struct goacc_prof_callback_entry *goacc_prof_callback_entries[acc_ev_last];
94 /* Lock used to protect access to 'goacc_prof_callbacks_enabled', and
95    'goacc_prof_callback_entries'.  */
96 static gomp_mutex_t goacc_prof_lock;
97 
98 void
goacc_profiling_initialize(void)99 goacc_profiling_initialize (void)
100 {
101   gomp_mutex_init (&goacc_prof_lock);
102 
103   /* Initially, all callbacks for all events are enabled.  */
104   for (int i = 0; i < acc_ev_last; ++i)
105     goacc_prof_callbacks_enabled[i] = true;
106 
107 
108 #ifdef PLUGIN_SUPPORT
109   char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
110   while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
111     {
112       char *acc_proflibs_sep = strchr (acc_proflibs, ';');
113       char *acc_proflib;
114       if (acc_proflibs_sep == acc_proflibs)
115           {
116             /* Stray ';' separator: make sure we don't 'dlopen' the main
117                program.  */
118             acc_proflib = NULL;
119           }
120       else
121           {
122             if (acc_proflibs_sep != NULL)
123               {
124                 /* Single out the first library.  */
125                 acc_proflib = gomp_malloc (acc_proflibs_sep - acc_proflibs + 1);
126                 memcpy (acc_proflib, acc_proflibs,
127                           acc_proflibs_sep - acc_proflibs);
128                 acc_proflib[acc_proflibs_sep - acc_proflibs] = '\0';
129               }
130             else
131               {
132                 /* No ';' separator, so only one library.  */
133                 acc_proflib = acc_proflibs;
134               }
135 
136             gomp_debug (0, "%s: dlopen (\"%s\")\n", __FUNCTION__, acc_proflib);
137             void *dl_handle = dlopen (acc_proflib, RTLD_LAZY);
138             if (dl_handle != NULL)
139               {
140                 typeof (&acc_register_library) a_r_l
141                     = dlsym (dl_handle, "acc_register_library");
142                 if (a_r_l == NULL)
143                     goto dl_fail;
144                 gomp_debug (0, "  %s: calling %s:acc_register_library\n",
145                                 __FUNCTION__, acc_proflib);
146                 a_r_l (acc_prof_register, acc_prof_unregister,
147                          acc_prof_lookup);
148               }
149             else
150               {
151               dl_fail:
152                 gomp_error ("while loading ACC_PROFLIB \"%s\": %s",
153                                 acc_proflib, dlerror ());
154                 if (dl_handle != NULL)
155                     {
156                       int err = dlclose (dl_handle);
157                       dl_handle = NULL;
158                       if (err != 0)
159                         goto dl_fail;
160                     }
161               }
162           }
163 
164       if (acc_proflib != acc_proflibs)
165           {
166             free (acc_proflib);
167 
168             acc_proflibs = acc_proflibs_sep + 1;
169           }
170       else
171           acc_proflibs = NULL;
172     }
173 #endif /* PLUGIN_SUPPORT */
174 }
175 
176 void
acc_prof_register(acc_event_t ev,acc_prof_callback cb,acc_register_t reg)177 acc_prof_register (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
178 {
179   gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
180                 __FUNCTION__, (int) ev, (void *) cb, (int) reg);
181 
182 
183   /* For any events to be dispatched, the user first has to register a
184      callback, which makes this here a good place for enabling the whole
185      machinery.  */
186   if (!GOACC_PROF_ENABLED)
187     __atomic_store_n (&goacc_prof_enabled, true, MEMMODEL_RELEASE);
188 
189 
190   enum
191   {
192     EVENT_KIND_BOGUS,
193     EVENT_KIND_NORMAL,
194     /* As end events invoke callbacks in the reverse order, we register these
195        in the reverse order here.  */
196     EVENT_KIND_END,
197   } event_kind = EVENT_KIND_BOGUS;
198   switch (ev)
199     {
200     case acc_ev_none:
201     case acc_ev_device_init_start:
202     case acc_ev_device_shutdown_start:
203     case acc_ev_runtime_shutdown:
204     case acc_ev_create:
205     case acc_ev_delete:
206     case acc_ev_alloc:
207     case acc_ev_free:
208     case acc_ev_enter_data_start:
209     case acc_ev_exit_data_start:
210     case acc_ev_update_start:
211     case acc_ev_compute_construct_start:
212     case acc_ev_enqueue_launch_start:
213     case acc_ev_enqueue_upload_start:
214     case acc_ev_enqueue_download_start:
215     case acc_ev_wait_start:
216       event_kind = EVENT_KIND_NORMAL;
217       break;
218     case acc_ev_device_init_end:
219     case acc_ev_device_shutdown_end:
220     case acc_ev_enter_data_end:
221     case acc_ev_exit_data_end:
222     case acc_ev_update_end:
223     case acc_ev_compute_construct_end:
224     case acc_ev_enqueue_launch_end:
225     case acc_ev_enqueue_upload_end:
226     case acc_ev_enqueue_download_end:
227     case acc_ev_wait_end:
228       event_kind = EVENT_KIND_END;
229       break;
230     case acc_ev_last:
231       break;
232     }
233   if (event_kind == EVENT_KIND_BOGUS)
234     {
235       /* Silently ignore.  */
236       gomp_debug (0, "  ignoring request for bogus 'acc_event_t'\n");
237       return;
238     }
239 
240   bool bogus = true;
241   switch (reg)
242     {
243     case acc_reg:
244     case acc_toggle:
245     case acc_toggle_per_thread:
246       bogus = false;
247       break;
248     }
249   if (bogus)
250     {
251       /* Silently ignore.  */
252       gomp_debug (0, "  ignoring request with bogus 'acc_register_t'\n");
253       return;
254     }
255 
256   /* Special cases.  */
257   if (reg == acc_toggle)
258     {
259       if (cb == NULL)
260           {
261             gomp_debug (0, "  globally enabling callbacks\n");
262             gomp_mutex_lock (&goacc_prof_lock);
263             /* For 'acc_ev_none', this acts as a global toggle.  */
264             goacc_prof_callbacks_enabled[ev] = true;
265             gomp_mutex_unlock (&goacc_prof_lock);
266             return;
267           }
268       else if (ev == acc_ev_none && cb != NULL)
269           {
270             gomp_debug (0, "  ignoring request\n");
271             return;
272           }
273     }
274   else if (reg == acc_toggle_per_thread)
275     {
276       if (ev == acc_ev_none && cb == NULL)
277           {
278             gomp_debug (0, "  thread: enabling callbacks\n");
279             goacc_lazy_initialize ();
280             struct goacc_thread *thr = goacc_thread ();
281             thr->prof_callbacks_enabled = true;
282             return;
283           }
284       /* Silently ignore.  */
285       gomp_debug (0, "  ignoring bogus request\n");
286       return;
287     }
288 
289   gomp_mutex_lock (&goacc_prof_lock);
290 
291   struct goacc_prof_callback_entry *it, *it_p;
292   it = goacc_prof_callback_entries[ev];
293   it_p = NULL;
294   while (it)
295     {
296       if (it->cb == cb)
297           break;
298       it_p = it;
299       it = it->next;
300     }
301 
302   switch (reg)
303     {
304     case acc_reg:
305       /* If we already have this callback registered, just increment its
306            reference count.  */
307       if (it != NULL)
308           {
309             it->ref++;
310             gomp_debug (0, "  already registered;"
311                           " incrementing reference count to: %d\n", it->ref);
312           }
313       else
314           {
315             struct goacc_prof_callback_entry *e
316               = gomp_malloc (sizeof (struct goacc_prof_callback_entry));
317             e->cb = cb;
318             e->ref = 1;
319             e->enabled = true;
320             bool prepend = (event_kind == EVENT_KIND_END);
321             /* If we don't have any callback registered yet, also use the
322                'prepend' code path.  */
323             if (it_p == NULL)
324               prepend = true;
325             if (prepend)
326               {
327                 gomp_debug (0, "  prepending\n");
328                 e->next = goacc_prof_callback_entries[ev];
329                 goacc_prof_callback_entries[ev] = e;
330               }
331             else
332               {
333                 gomp_debug (0, "  appending\n");
334                 e->next = NULL;
335                 it_p->next = e;
336               }
337           }
338       break;
339 
340     case acc_toggle:
341       if (it == NULL)
342           {
343             gomp_debug (0, "  ignoring request: is not registered\n");
344             break;
345           }
346       else
347           {
348             gomp_debug (0, "  enabling\n");
349             it->enabled = true;
350           }
351       break;
352 
353     case acc_toggle_per_thread:
354       __builtin_unreachable ();
355     }
356 
357   gomp_mutex_unlock (&goacc_prof_lock);
358 }
359 
360 void
acc_prof_unregister(acc_event_t ev,acc_prof_callback cb,acc_register_t reg)361 acc_prof_unregister (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
362 {
363   gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
364                 __FUNCTION__, (int) ev, (void *) cb, (int) reg);
365 
366   /* If profiling is not enabled, there cannot be anything to unregister.  */
367   if (!GOACC_PROF_ENABLED)
368     return;
369 
370   if (ev < acc_ev_none
371       || ev >= acc_ev_last)
372     {
373       /* Silently ignore.  */
374       gomp_debug (0, "  ignoring request for bogus 'acc_event_t'\n");
375       return;
376     }
377 
378   bool bogus = true;
379   switch (reg)
380     {
381     case acc_reg:
382     case acc_toggle:
383     case acc_toggle_per_thread:
384       bogus = false;
385       break;
386     }
387   if (bogus)
388     {
389       /* Silently ignore.  */
390       gomp_debug (0, "  ignoring request with bogus 'acc_register_t'\n");
391       return;
392     }
393 
394   /* Special cases.  */
395   if (reg == acc_toggle)
396     {
397       if (cb == NULL)
398           {
399             gomp_debug (0, "  globally disabling callbacks\n");
400             gomp_mutex_lock (&goacc_prof_lock);
401             /* For 'acc_ev_none', this acts as a global toggle.  */
402             goacc_prof_callbacks_enabled[ev] = false;
403             gomp_mutex_unlock (&goacc_prof_lock);
404             return;
405           }
406       else if (ev == acc_ev_none && cb != NULL)
407           {
408             gomp_debug (0, "  ignoring request\n");
409             return;
410           }
411     }
412   else if (reg == acc_toggle_per_thread)
413     {
414       if (ev == acc_ev_none && cb == NULL)
415           {
416             gomp_debug (0, "  thread: disabling callbacks\n");
417             goacc_lazy_initialize ();
418             struct goacc_thread *thr = goacc_thread ();
419             thr->prof_callbacks_enabled = false;
420             return;
421           }
422       /* Silently ignore.  */
423       gomp_debug (0, "  ignoring bogus request\n");
424       return;
425     }
426 
427   gomp_mutex_lock (&goacc_prof_lock);
428 
429   struct goacc_prof_callback_entry *it, *it_p;
430   it = goacc_prof_callback_entries[ev];
431   it_p = NULL;
432   while (it)
433     {
434       if (it->cb == cb)
435           break;
436       it_p = it;
437       it = it->next;
438     }
439 
440   switch (reg)
441     {
442     case acc_reg:
443       if (it == NULL)
444           {
445             /* Silently ignore.  */
446             gomp_debug (0, "  ignoring bogus request: is not registered\n");
447             break;
448           }
449       it->ref--;
450       gomp_debug (0, "  decrementing reference count to: %d\n", it->ref);
451       if (it->ref == 0)
452           {
453             if (it_p == NULL)
454               goacc_prof_callback_entries[ev] = it->next;
455             else
456               it_p->next = it->next;
457             free (it);
458           }
459       break;
460 
461     case acc_toggle:
462       if (it == NULL)
463           {
464             gomp_debug (0, "  ignoring request: is not registered\n");
465             break;
466           }
467       else
468           {
469             gomp_debug (0, "  disabling\n");
470             it->enabled = false;
471           }
472       break;
473 
474     case acc_toggle_per_thread:
475       __builtin_unreachable ();
476     }
477 
478   gomp_mutex_unlock (&goacc_prof_lock);
479 }
480 
481 acc_query_fn
acc_prof_lookup(const char * name)482 acc_prof_lookup (const char *name)
483 {
484   gomp_debug (0, "%s (%s)\n",
485                 __FUNCTION__, name ?: "NULL");
486 
487   return NULL;
488 }
489 
490 void
acc_register_library(acc_prof_reg reg,acc_prof_reg unreg,acc_prof_lookup_func lookup)491 acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
492                           acc_prof_lookup_func lookup)
493 {
494   gomp_fatal ("TODO");
495 }
496 
497 /* Prepare to dispatch events?  */
498 
499 bool
_goacc_profiling_dispatch_p(bool check_not_nested_p)500 _goacc_profiling_dispatch_p (bool check_not_nested_p)
501 {
502   gomp_debug (0, "%s\n", __FUNCTION__);
503 
504   bool ret;
505 
506   struct goacc_thread *thr = goacc_thread ();
507   if (__builtin_expect (thr == NULL, false))
508     {
509       /* If we don't have any per-thread state yet, that means that per-thread
510            callback dispatch has not been explicitly disabled (which only a call
511            to 'acc_prof_unregister' with 'acc_toggle_per_thread' would do, and
512            that would have allocated per-thread state via
513            'goacc_lazy_initialize'); initially, all callbacks for all events are
514            enabled.  */
515       gomp_debug (0, "  %s: don't have any per-thread state yet\n", __FUNCTION__);
516     }
517   else
518     {
519       if (check_not_nested_p)
520           {
521             /* No nesting.  */
522             assert (thr->prof_info == NULL);
523             assert (thr->api_info == NULL);
524           }
525 
526       if (__builtin_expect (!thr->prof_callbacks_enabled, true))
527           {
528             gomp_debug (0, "  %s: disabled for this thread\n", __FUNCTION__);
529             ret = false;
530             goto out;
531           }
532     }
533 
534   gomp_mutex_lock (&goacc_prof_lock);
535 
536   /* 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle.  */
537   if (__builtin_expect (!goacc_prof_callbacks_enabled[acc_ev_none], true))
538     {
539       gomp_debug (0, "  %s: disabled globally\n", __FUNCTION__);
540       ret = false;
541       goto out_unlock;
542     }
543   else
544     ret = true;
545 
546  out_unlock:
547   gomp_mutex_unlock (&goacc_prof_lock);
548 
549  out:
550   return ret;
551 }
552 
553 /* Set up to dispatch events?  */
554 
555 bool
_goacc_profiling_setup_p(struct goacc_thread * thr,acc_prof_info * prof_info,acc_api_info * api_info)556 _goacc_profiling_setup_p (struct goacc_thread *thr,
557                                 acc_prof_info *prof_info, acc_api_info *api_info)
558 {
559   gomp_debug (0, "%s (%p)\n", __FUNCTION__, thr);
560 
561   /* If we don't have any per-thread state yet, we can't register 'prof_info'
562      and 'api_info'.  */
563   if (__builtin_expect (thr == NULL, false))
564     {
565       gomp_debug (0, "Can't dispatch OpenACC Profiling Interface events for"
566                       " the current call, construct, or directive\n");
567       return false;
568     }
569 
570   if (thr->prof_info != NULL)
571     {
572       /* Profiling has already been set up for an outer construct.  In this
573            case, we continue to use the existing information, and thus return
574            'false' here.
575 
576            This can happen, for example, for an 'enter data' directive, which
577            sets up profiling, then calls into 'acc_copyin', which should not
578            again set up profiling, should not overwrite the existing
579            information.  */
580       return false;
581     }
582 
583   thr->prof_info = prof_info;
584   thr->api_info = api_info;
585 
586   /* Fill in some defaults.  */
587 
588   prof_info->event_type = -1; /* Must be set later.  */
589   prof_info->valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
590   prof_info->version = _ACC_PROF_INFO_VERSION;
591   if (thr->dev)
592     {
593       prof_info->device_type = acc_device_type (thr->dev->type);
594       prof_info->device_number = thr->dev->target_id;
595     }
596   else
597     {
598       prof_info->device_type = -1;
599       prof_info->device_number = -1;
600     }
601   prof_info->thread_id = -1;
602   prof_info->async = acc_async_sync;
603   prof_info->async_queue = prof_info->async;
604   prof_info->src_file = NULL;
605   prof_info->func_name = NULL;
606   prof_info->line_no = -1;
607   prof_info->end_line_no = -1;
608   prof_info->func_line_no = -1;
609   prof_info->func_end_line_no = -1;
610 
611   api_info->device_api = acc_device_api_none;
612   api_info->valid_bytes = _ACC_API_INFO_VALID_BYTES;
613   api_info->device_type = prof_info->device_type;
614   api_info->vendor = -1;
615   api_info->device_handle = NULL;
616   api_info->context_handle = NULL;
617   api_info->async_handle = NULL;
618 
619   return true;
620 }
621 
622 /* Dispatch events.
623 
624    This must only be called if 'GOACC_PROFILING_DISPATCH_P' or
625    'GOACC_PROFILING_SETUP_P' returned a true result.  */
626 
627 void
goacc_profiling_dispatch(acc_prof_info * prof_info,acc_event_info * event_info,acc_api_info * apt_info)628 goacc_profiling_dispatch (acc_prof_info *prof_info, acc_event_info *event_info,
629                                 acc_api_info *apt_info)
630 {
631   acc_event_t event_type = event_info->event_type;
632   gomp_debug (0, "%s: event_type=%d\n", __FUNCTION__, (int) event_type);
633   assert (event_type > acc_ev_none
634             && event_type < acc_ev_last);
635 
636   gomp_mutex_lock (&goacc_prof_lock);
637 
638   if (!goacc_prof_callbacks_enabled[event_type])
639     {
640       gomp_debug (0, "  disabled for this event type\n");
641 
642       goto out_unlock;
643     }
644 
645   for (struct goacc_prof_callback_entry *e
646            = goacc_prof_callback_entries[event_type];
647        e != NULL;
648        e = e->next)
649     {
650       if (!e->enabled)
651           {
652             gomp_debug (0, "  disabled for callback %p\n", e->cb);
653             continue;
654           }
655 
656       gomp_debug (0, "  calling callback %p\n", e->cb);
657       e->cb (prof_info, event_info, apt_info);
658     }
659 
660  out_unlock:
661   gomp_mutex_unlock (&goacc_prof_lock);
662 }
663