1 /* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2    Contributed by Jakub Jelinek <jakub@redhat.com>.
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This file contains the support of offloading.  */
27 
28 #include "libgomp.h"
29 #include "oacc-plugin.h"
30 #include "oacc-int.h"
31 #include "gomp-constants.h"
32 #include <limits.h>
33 #include <stdbool.h>
34 #include <stdlib.h>
35 #ifdef HAVE_INTTYPES_H
36 # include <inttypes.h>  /* For PRIu64.  */
37 #endif
38 #include <string.h>
39 #include <assert.h>
40 #include <errno.h>
41 
42 #ifdef PLUGIN_SUPPORT
43 #include <dlfcn.h>
44 #include "plugin-suffix.h"
45 #endif
46 
47 typedef uintptr_t *hash_entry_type;
htab_alloc(size_t size)48 static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
htab_free(void * ptr)49 static inline void htab_free (void *ptr) { free (ptr); }
50 #include "hashtab.h"
51 
52 static inline hashval_t
htab_hash(hash_entry_type element)53 htab_hash (hash_entry_type element)
54 {
55   return hash_pointer ((void *) element);
56 }
57 
58 static inline bool
htab_eq(hash_entry_type x,hash_entry_type y)59 htab_eq (hash_entry_type x, hash_entry_type y)
60 {
61   return x == y;
62 }
63 
64 #define FIELD_TGT_EMPTY (~(size_t) 0)
65 
66 static void gomp_target_init (void);
67 
68 /* The whole initialization code for offloading plugins is only run one.  */
69 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
70 
71 /* Mutex for offload image registration.  */
72 static gomp_mutex_t register_lock;
73 
74 /* This structure describes an offload image.
75    It contains type of the target device, pointer to host table descriptor, and
76    pointer to target data.  */
77 struct offload_image_descr {
78   unsigned version;
79   enum offload_target_type type;
80   const void *host_table;
81   const void *target_data;
82 };
83 
84 /* Array of descriptors of offload images.  */
85 static struct offload_image_descr *offload_images;
86 
87 /* Total number of offload images.  */
88 static int num_offload_images;
89 
90 /* Array of descriptors for all available devices.  */
91 static struct gomp_device_descr *devices;
92 
93 /* Total number of available devices.  */
94 static int num_devices;
95 
96 /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
97 static int num_devices_openmp;
98 
99 /* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
100 
101 static void *
gomp_realloc_unlock(void * old,size_t size)102 gomp_realloc_unlock (void *old, size_t size)
103 {
104   void *ret = realloc (old, size);
105   if (ret == NULL)
106     {
107       gomp_mutex_unlock (&register_lock);
108       gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
109     }
110   return ret;
111 }
112 
113 attribute_hidden void
gomp_init_targets_once(void)114 gomp_init_targets_once (void)
115 {
116   (void) pthread_once (&gomp_is_initialized, gomp_target_init);
117 }
118 
119 attribute_hidden int
gomp_get_num_devices(void)120 gomp_get_num_devices (void)
121 {
122   gomp_init_targets_once ();
123   return num_devices_openmp;
124 }
125 
126 static struct gomp_device_descr *
resolve_device(int device_id)127 resolve_device (int device_id)
128 {
129   if (device_id == GOMP_DEVICE_ICV)
130     {
131       struct gomp_task_icv *icv = gomp_icv (false);
132       device_id = icv->default_device_var;
133     }
134 
135   if (device_id < 0 || device_id >= gomp_get_num_devices ())
136     {
137       if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
138             && device_id != GOMP_DEVICE_HOST_FALLBACK
139             && device_id != num_devices_openmp)
140           gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
141                         "but device not found");
142 
143       return NULL;
144     }
145 
146   gomp_mutex_lock (&devices[device_id].lock);
147   if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
148     gomp_init_device (&devices[device_id]);
149   else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
150     {
151       gomp_mutex_unlock (&devices[device_id].lock);
152 
153       if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
154           gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
155                         "but device is finalized");
156 
157       return NULL;
158     }
159   gomp_mutex_unlock (&devices[device_id].lock);
160 
161   return &devices[device_id];
162 }
163 
164 
165 static inline splay_tree_key
gomp_map_lookup(splay_tree mem_map,splay_tree_key key)166 gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
167 {
168   if (key->host_start != key->host_end)
169     return splay_tree_lookup (mem_map, key);
170 
171   key->host_end++;
172   splay_tree_key n = splay_tree_lookup (mem_map, key);
173   key->host_end--;
174   if (n)
175     return n;
176   key->host_start--;
177   n = splay_tree_lookup (mem_map, key);
178   key->host_start++;
179   if (n)
180     return n;
181   return splay_tree_lookup (mem_map, key);
182 }
183 
184 static inline splay_tree_key
gomp_map_0len_lookup(splay_tree mem_map,splay_tree_key key)185 gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
186 {
187   if (key->host_start != key->host_end)
188     return splay_tree_lookup (mem_map, key);
189 
190   key->host_end++;
191   splay_tree_key n = splay_tree_lookup (mem_map, key);
192   key->host_end--;
193   return n;
194 }
195 
196 static inline void
gomp_device_copy(struct gomp_device_descr * devicep,bool (* copy_func)(int,void *,const void *,size_t),const char * dst,void * dstaddr,const char * src,const void * srcaddr,size_t size)197 gomp_device_copy (struct gomp_device_descr *devicep,
198                       bool (*copy_func) (int, void *, const void *, size_t),
199                       const char *dst, void *dstaddr,
200                       const char *src, const void *srcaddr,
201                       size_t size)
202 {
203   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
204     {
205       gomp_mutex_unlock (&devicep->lock);
206       gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
207                       src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
208     }
209 }
210 
211 static inline void
goacc_device_copy_async(struct gomp_device_descr * devicep,bool (* copy_func)(int,void *,const void *,size_t,struct goacc_asyncqueue *),const char * dst,void * dstaddr,const char * src,const void * srcaddr,const void * srcaddr_orig,size_t size,struct goacc_asyncqueue * aq)212 goacc_device_copy_async (struct gomp_device_descr *devicep,
213                                bool (*copy_func) (int, void *, const void *, size_t,
214                                                       struct goacc_asyncqueue *),
215                                const char *dst, void *dstaddr,
216                                const char *src, const void *srcaddr,
217                                const void *srcaddr_orig,
218                                size_t size, struct goacc_asyncqueue *aq)
219 {
220   if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
221     {
222       gomp_mutex_unlock (&devicep->lock);
223       if (srcaddr_orig && srcaddr_orig != srcaddr)
224           gomp_fatal ("Copying of %s object [%p..%p)"
225                         " via buffer %s object [%p..%p)"
226                         " to %s object [%p..%p) failed",
227                         src, srcaddr_orig, srcaddr_orig + size,
228                         src, srcaddr, srcaddr + size,
229                         dst, dstaddr, dstaddr + size);
230       else
231           gomp_fatal ("Copying of %s object [%p..%p)"
232                         " to %s object [%p..%p) failed",
233                         src, srcaddr, srcaddr + size,
234                         dst, dstaddr, dstaddr + size);
235     }
236 }
237 
238 /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
239    host to device memory transfers.  */
240 
241 struct gomp_coalesce_chunk
242 {
243   /* The starting and ending point of a coalesced chunk of memory.  */
244   size_t start, end;
245 };
246 
247 struct gomp_coalesce_buf
248 {
249   /* Buffer into which gomp_copy_host2dev will memcpy data and from which
250      it will be copied to the device.  */
251   void *buf;
252   struct target_mem_desc *tgt;
253   /* Array with offsets, chunks[i].start is the starting offset and
254      chunks[i].end ending offset relative to tgt->tgt_start device address
255      of chunks which are to be copied to buf and later copied to device.  */
256   struct gomp_coalesce_chunk *chunks;
257   /* Number of chunks in chunks array, or -1 if coalesce buffering should not
258      be performed.  */
259   long chunk_cnt;
260   /* During construction of chunks array, how many memory regions are within
261      the last chunk.  If there is just one memory region for a chunk, we copy
262      it directly to device rather than going through buf.  */
263   long use_cnt;
264 };
265 
266 /* Maximum size of memory region considered for coalescing.  Larger copies
267    are performed directly.  */
268 #define MAX_COALESCE_BUF_SIZE (32 * 1024)
269 
270 /* Maximum size of a gap in between regions to consider them being copied
271    within the same chunk.  All the device offsets considered are within
272    newly allocated device memory, so it isn't fatal if we copy some padding
273    in between from host to device.  The gaps come either from alignment
274    padding or from memory regions which are not supposed to be copied from
275    host to device (e.g. map(alloc:), map(from:) etc.).  */
276 #define MAX_COALESCE_BUF_GAP  (4 * 1024)
277 
278 /* Add region with device tgt_start relative offset and length to CBUF.
279 
280    This must not be used for asynchronous copies, because the host data might
281    not be computed yet (by an earlier asynchronous compute region, for
282    example).
283    TODO ... but we could allow CBUF usage for EPHEMERAL data?  (Open question:
284    is it more performant to use libgomp CBUF buffering or individual device
285    asyncronous copying?)  */
286 
287 static inline void
gomp_coalesce_buf_add(struct gomp_coalesce_buf * cbuf,size_t start,size_t len)288 gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
289 {
290   if (len > MAX_COALESCE_BUF_SIZE || len == 0)
291     return;
292   if (cbuf->chunk_cnt)
293     {
294       if (cbuf->chunk_cnt < 0)
295           return;
296       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
297           {
298             cbuf->chunk_cnt = -1;
299             return;
300           }
301       if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
302           {
303             cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
304             cbuf->use_cnt++;
305             return;
306           }
307       /* If the last chunk is only used by one mapping, discard it,
308            as it will be one host to device copy anyway and
309            memcpying it around will only waste cycles.  */
310       if (cbuf->use_cnt == 1)
311           cbuf->chunk_cnt--;
312     }
313   cbuf->chunks[cbuf->chunk_cnt].start = start;
314   cbuf->chunks[cbuf->chunk_cnt].end = start + len;
315   cbuf->chunk_cnt++;
316   cbuf->use_cnt = 1;
317 }
318 
319 /* Return true for mapping kinds which need to copy data from the
320    host to device for regions that weren't previously mapped.  */
321 
322 static inline bool
gomp_to_device_kind_p(int kind)323 gomp_to_device_kind_p (int kind)
324 {
325   switch (kind)
326     {
327     case GOMP_MAP_ALLOC:
328     case GOMP_MAP_FROM:
329     case GOMP_MAP_FORCE_ALLOC:
330     case GOMP_MAP_FORCE_FROM:
331     case GOMP_MAP_ALWAYS_FROM:
332       return false;
333     default:
334       return true;
335     }
336 }
337 
338 /* Copy host memory to an offload device.  In asynchronous mode (if AQ is
339    non-NULL), when the source data is stack or may otherwise be deallocated
340    before the asynchronous copy takes place, EPHEMERAL must be passed as
341    TRUE.  */
342 
343 attribute_hidden void
gomp_copy_host2dev(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,void * d,const void * h,size_t sz,bool ephemeral,struct gomp_coalesce_buf * cbuf)344 gomp_copy_host2dev (struct gomp_device_descr *devicep,
345                         struct goacc_asyncqueue *aq,
346                         void *d, const void *h, size_t sz,
347                         bool ephemeral, struct gomp_coalesce_buf *cbuf)
348 {
349   if (__builtin_expect (aq != NULL, 0))
350     {
351       /* See 'gomp_coalesce_buf_add'.  */
352       assert (!cbuf);
353 
354       void *h_buf = (void *) h;
355       if (ephemeral)
356           {
357             /* We're queueing up an asynchronous copy from data that may
358                disappear before the transfer takes place (i.e. because it is a
359                stack local in a function that is no longer executing).  Make a
360                copy of the data into a temporary buffer in those cases.  */
361             h_buf = gomp_malloc (sz);
362             memcpy (h_buf, h, sz);
363           }
364       goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
365                                      "dev", d, "host", h_buf, h, sz, aq);
366       if (ephemeral)
367           /* Free temporary buffer once the transfer has completed.  */
368           devicep->openacc.async.queue_callback_func (aq, free, h_buf);
369 
370       return;
371     }
372 
373   if (cbuf)
374     {
375       uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
376       if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
377           {
378             long first = 0;
379             long last = cbuf->chunk_cnt - 1;
380             while (first <= last)
381               {
382                 long middle = (first + last) >> 1;
383                 if (cbuf->chunks[middle].end <= doff)
384                     first = middle + 1;
385                 else if (cbuf->chunks[middle].start <= doff)
386                     {
387                       if (doff + sz > cbuf->chunks[middle].end)
388                         {
389                           gomp_mutex_unlock (&devicep->lock);
390                           gomp_fatal ("internal libgomp cbuf error");
391                         }
392                       memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
393                                 h, sz);
394                       return;
395                     }
396                 else
397                     last = middle - 1;
398               }
399           }
400     }
401 
402   gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
403 }
404 
405 attribute_hidden void
gomp_copy_dev2host(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,void * h,const void * d,size_t sz)406 gomp_copy_dev2host (struct gomp_device_descr *devicep,
407                         struct goacc_asyncqueue *aq,
408                         void *h, const void *d, size_t sz)
409 {
410   if (__builtin_expect (aq != NULL, 0))
411     goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
412                                    "host", h, "dev", d, NULL, sz, aq);
413   else
414     gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
415 }
416 
417 static void
gomp_free_device_memory(struct gomp_device_descr * devicep,void * devptr)418 gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
419 {
420   if (!devicep->free_func (devicep->target_id, devptr))
421     {
422       gomp_mutex_unlock (&devicep->lock);
423       gomp_fatal ("error in freeing device memory block at %p", devptr);
424     }
425 }
426 
427 /* Increment reference count of a splay_tree_key region K by 1.
428    If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
429    increment the value if refcount is not yet contained in the set (used for
430    OpenMP 5.0, which specifies that a region's refcount is adjusted at most
431    once for each construct).  */
432 
433 static inline void
gomp_increment_refcount(splay_tree_key k,htab_t * refcount_set)434 gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
435 {
436   if (k == NULL || k->refcount == REFCOUNT_INFINITY)
437     return;
438 
439   uintptr_t *refcount_ptr = &k->refcount;
440 
441   if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
442     refcount_ptr = &k->structelem_refcount;
443   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
444     refcount_ptr = k->structelem_refcount_ptr;
445 
446   if (refcount_set)
447     {
448       if (htab_find (*refcount_set, refcount_ptr))
449           return;
450       uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
451       *slot = refcount_ptr;
452     }
453 
454   *refcount_ptr += 1;
455   return;
456 }
457 
458 /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
459    is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
460    track already seen refcounts, and only adjust the value if refcount is not
461    yet contained in the set (like gomp_increment_refcount).
462 
463    Return out-values: set *DO_COPY to true if we set the refcount to zero, or
464    it is already zero and we know we decremented it earlier. This signals that
465    associated maps should be copied back to host.
466 
467    *DO_REMOVE is set to true when we this is the first handling of this refcount
468    and we are setting it to zero. This signals a removal of this key from the
469    splay-tree map.
470 
471    Copy and removal are separated due to cases like handling of structure
472    elements, e.g. each map of a structure element representing a possible copy
473    out of a structure field has to be handled individually, but we only signal
474    removal for one (the first encountered) sibing map.  */
475 
476 static inline void
gomp_decrement_refcount(splay_tree_key k,htab_t * refcount_set,bool delete_p,bool * do_copy,bool * do_remove)477 gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
478                                bool *do_copy, bool *do_remove)
479 {
480   if (k == NULL || k->refcount == REFCOUNT_INFINITY)
481     {
482       *do_copy = *do_remove = false;
483       return;
484     }
485 
486   uintptr_t *refcount_ptr = &k->refcount;
487 
488   if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
489     refcount_ptr = &k->structelem_refcount;
490   else if (REFCOUNT_STRUCTELEM_P (k->refcount))
491     refcount_ptr = k->structelem_refcount_ptr;
492 
493   bool new_encountered_refcount;
494   bool set_to_zero = false;
495   bool is_zero = false;
496 
497   uintptr_t orig_refcount = *refcount_ptr;
498 
499   if (refcount_set)
500     {
501       if (htab_find (*refcount_set, refcount_ptr))
502           {
503             new_encountered_refcount = false;
504             goto end;
505           }
506 
507       uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
508       *slot = refcount_ptr;
509       new_encountered_refcount = true;
510     }
511   else
512     /* If no refcount_set being used, assume all keys are being decremented
513        for the first time.  */
514     new_encountered_refcount = true;
515 
516   if (delete_p)
517     *refcount_ptr = 0;
518   else if (*refcount_ptr > 0)
519     *refcount_ptr -= 1;
520 
521  end:
522   if (*refcount_ptr == 0)
523     {
524       if (orig_refcount > 0)
525           set_to_zero = true;
526 
527       is_zero = true;
528     }
529 
530   *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
531   *do_remove = (new_encountered_refcount && set_to_zero);
532 }
533 
534 /* Handle the case where gomp_map_lookup, splay_tree_lookup or
535    gomp_map_0len_lookup found oldn for newn.
536    Helper function of gomp_map_vars.  */
537 
538 static inline void
gomp_map_vars_existing(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,splay_tree_key oldn,splay_tree_key newn,struct target_var_desc * tgt_var,unsigned char kind,bool always_to_flag,bool implicit,struct gomp_coalesce_buf * cbuf,htab_t * refcount_set)539 gomp_map_vars_existing (struct gomp_device_descr *devicep,
540                               struct goacc_asyncqueue *aq, splay_tree_key oldn,
541                               splay_tree_key newn, struct target_var_desc *tgt_var,
542                               unsigned char kind, bool always_to_flag, bool implicit,
543                               struct gomp_coalesce_buf *cbuf,
544                               htab_t *refcount_set)
545 {
546   assert (kind != GOMP_MAP_ATTACH
547             || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
548 
549   tgt_var->key = oldn;
550   tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
551   tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
552   tgt_var->is_attach = false;
553   tgt_var->offset = newn->host_start - oldn->host_start;
554 
555   /* For implicit maps, old contained in new is valid.  */
556   bool implicit_subset = (implicit
557                                 && newn->host_start <= oldn->host_start
558                                 && oldn->host_end <= newn->host_end);
559   if (implicit_subset)
560     tgt_var->length = oldn->host_end - oldn->host_start;
561   else
562     tgt_var->length = newn->host_end - newn->host_start;
563 
564   if ((kind & GOMP_MAP_FLAG_FORCE)
565       /* For implicit maps, old contained in new is valid.  */
566       || !(implicit_subset
567              /* Otherwise, new contained inside old is considered valid.  */
568              || (oldn->host_start <= newn->host_start
569                  && newn->host_end <= oldn->host_end)))
570     {
571       gomp_mutex_unlock (&devicep->lock);
572       gomp_fatal ("Trying to map into device [%p..%p) object when "
573                       "[%p..%p) is already mapped",
574                       (void *) newn->host_start, (void *) newn->host_end,
575                       (void *) oldn->host_start, (void *) oldn->host_end);
576     }
577 
578   if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
579     {
580       /* Implicit + always should not happen. If this does occur, below
581            address/length adjustment is a TODO.  */
582       assert (!implicit_subset);
583 
584       if (oldn->aux && oldn->aux->attach_count)
585           {
586             /* We have to be careful not to overwrite still attached pointers
587                during the copyback to host.  */
588             uintptr_t addr = newn->host_start;
589             while (addr < newn->host_end)
590               {
591                 size_t i = (addr - oldn->host_start) / sizeof (void *);
592                 if (oldn->aux->attach_count[i] == 0)
593                     gomp_copy_host2dev (devicep, aq,
594                                             (void *) (oldn->tgt->tgt_start
595                                                         + oldn->tgt_offset
596                                                         + addr - oldn->host_start),
597                                             (void *) addr,
598                                             sizeof (void *), false, cbuf);
599                 addr += sizeof (void *);
600               }
601           }
602       else
603           gomp_copy_host2dev (devicep, aq,
604                                   (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
605                                               + newn->host_start - oldn->host_start),
606                                   (void *) newn->host_start,
607                                   newn->host_end - newn->host_start, false, cbuf);
608     }
609 
610   gomp_increment_refcount (oldn, refcount_set);
611 }
612 
613 static int
get_kind(bool short_mapkind,void * kinds,int idx)614 get_kind (bool short_mapkind, void *kinds, int idx)
615 {
616   if (!short_mapkind)
617     return ((unsigned char *) kinds)[idx];
618 
619   int val = ((unsigned short *) kinds)[idx];
620   if (GOMP_MAP_IMPLICIT_P (val))
621     val &= ~GOMP_MAP_IMPLICIT;
622   return val;
623 }
624 
625 
626 static bool
get_implicit(bool short_mapkind,void * kinds,int idx)627 get_implicit (bool short_mapkind, void *kinds, int idx)
628 {
629   if (!short_mapkind)
630     return false;
631 
632   int val = ((unsigned short *) kinds)[idx];
633   return GOMP_MAP_IMPLICIT_P (val);
634 }
635 
636 static void
gomp_map_pointer(struct target_mem_desc * tgt,struct goacc_asyncqueue * aq,uintptr_t host_ptr,uintptr_t target_offset,uintptr_t bias,struct gomp_coalesce_buf * cbuf,bool allow_zero_length_array_sections)637 gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
638                       uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
639                       struct gomp_coalesce_buf *cbuf,
640                       bool allow_zero_length_array_sections)
641 {
642   struct gomp_device_descr *devicep = tgt->device_descr;
643   struct splay_tree_s *mem_map = &devicep->mem_map;
644   struct splay_tree_key_s cur_node;
645 
646   cur_node.host_start = host_ptr;
647   if (cur_node.host_start == (uintptr_t) NULL)
648     {
649       cur_node.tgt_offset = (uintptr_t) NULL;
650       gomp_copy_host2dev (devicep, aq,
651                                 (void *) (tgt->tgt_start + target_offset),
652                                 (void *) &cur_node.tgt_offset, sizeof (void *),
653                                 true, cbuf);
654       return;
655     }
656   /* Add bias to the pointer value.  */
657   cur_node.host_start += bias;
658   cur_node.host_end = cur_node.host_start;
659   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
660   if (n == NULL)
661     {
662       if (allow_zero_length_array_sections)
663           cur_node.tgt_offset = 0;
664       else
665           {
666             gomp_mutex_unlock (&devicep->lock);
667             gomp_fatal ("Pointer target of array section wasn't mapped");
668           }
669     }
670   else
671     {
672       cur_node.host_start -= n->host_start;
673       cur_node.tgt_offset
674           = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
675       /* At this point tgt_offset is target address of the
676            array section.  Now subtract bias to get what we want
677            to initialize the pointer with.  */
678       cur_node.tgt_offset -= bias;
679     }
680   gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
681                           (void *) &cur_node.tgt_offset, sizeof (void *),
682                           true, cbuf);
683 }
684 
685 static void
gomp_map_fields_existing(struct target_mem_desc * tgt,struct goacc_asyncqueue * aq,splay_tree_key n,size_t first,size_t i,void ** hostaddrs,size_t * sizes,void * kinds,struct gomp_coalesce_buf * cbuf,htab_t * refcount_set)686 gomp_map_fields_existing (struct target_mem_desc *tgt,
687                                 struct goacc_asyncqueue *aq, splay_tree_key n,
688                                 size_t first, size_t i, void **hostaddrs,
689                                 size_t *sizes, void *kinds,
690                                 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
691 {
692   struct gomp_device_descr *devicep = tgt->device_descr;
693   struct splay_tree_s *mem_map = &devicep->mem_map;
694   struct splay_tree_key_s cur_node;
695   int kind;
696   bool implicit;
697   const bool short_mapkind = true;
698   const int typemask = short_mapkind ? 0xff : 0x7;
699 
700   cur_node.host_start = (uintptr_t) hostaddrs[i];
701   cur_node.host_end = cur_node.host_start + sizes[i];
702   splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
703   kind = get_kind (short_mapkind, kinds, i);
704   implicit = get_implicit (short_mapkind, kinds, i);
705   if (n2
706       && n2->tgt == n->tgt
707       && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
708     {
709       gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
710                                     kind & typemask, false, implicit, cbuf,
711                                     refcount_set);
712       return;
713     }
714   if (sizes[i] == 0)
715     {
716       if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
717           {
718             cur_node.host_start--;
719             n2 = splay_tree_lookup (mem_map, &cur_node);
720             cur_node.host_start++;
721             if (n2
722                 && n2->tgt == n->tgt
723                 && n2->host_start - n->host_start
724                      == n2->tgt_offset - n->tgt_offset)
725               {
726                 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
727                                               kind & typemask, false, implicit, cbuf,
728                                               refcount_set);
729                 return;
730               }
731           }
732       cur_node.host_end++;
733       n2 = splay_tree_lookup (mem_map, &cur_node);
734       cur_node.host_end--;
735       if (n2
736             && n2->tgt == n->tgt
737             && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
738           {
739             gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
740                                           kind & typemask, false, implicit, cbuf,
741                                           refcount_set);
742             return;
743           }
744     }
745   gomp_mutex_unlock (&devicep->lock);
746   gomp_fatal ("Trying to map into device [%p..%p) structure element when "
747                 "other mapped elements from the same structure weren't mapped "
748                 "together with it", (void *) cur_node.host_start,
749                 (void *) cur_node.host_end);
750 }
751 
752 attribute_hidden void
gomp_attach_pointer(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,splay_tree mem_map,splay_tree_key n,uintptr_t attach_to,size_t bias,struct gomp_coalesce_buf * cbufp,bool allow_zero_length_array_sections)753 gomp_attach_pointer (struct gomp_device_descr *devicep,
754                          struct goacc_asyncqueue *aq, splay_tree mem_map,
755                          splay_tree_key n, uintptr_t attach_to, size_t bias,
756                          struct gomp_coalesce_buf *cbufp,
757                          bool allow_zero_length_array_sections)
758 {
759   struct splay_tree_key_s s;
760   size_t size, idx;
761 
762   if (n == NULL)
763     {
764       gomp_mutex_unlock (&devicep->lock);
765       gomp_fatal ("enclosing struct not mapped for attach");
766     }
767 
768   size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
769   /* We might have a pointer in a packed struct: however we cannot have more
770      than one such pointer in each pointer-sized portion of the struct, so
771      this is safe.  */
772   idx = (attach_to - n->host_start) / sizeof (void *);
773 
774   if (!n->aux)
775     n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
776 
777   if (!n->aux->attach_count)
778     n->aux->attach_count
779       = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
780 
781   if (n->aux->attach_count[idx] < UINTPTR_MAX)
782     n->aux->attach_count[idx]++;
783   else
784     {
785       gomp_mutex_unlock (&devicep->lock);
786       gomp_fatal ("attach count overflow");
787     }
788 
789   if (n->aux->attach_count[idx] == 1)
790     {
791       uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
792                                - n->host_start;
793       uintptr_t target = (uintptr_t) *(void **) attach_to;
794       splay_tree_key tn;
795       uintptr_t data;
796 
797       if ((void *) target == NULL)
798           {
799             gomp_mutex_unlock (&devicep->lock);
800             gomp_fatal ("attempt to attach null pointer");
801           }
802 
803       s.host_start = target + bias;
804       s.host_end = s.host_start + 1;
805       tn = splay_tree_lookup (mem_map, &s);
806 
807       if (!tn)
808           {
809             if (allow_zero_length_array_sections)
810               /* When allowing attachment to zero-length array sections, we
811                  allow attaching to NULL pointers when the target region is not
812                  mapped.  */
813               data = 0;
814             else
815               {
816                 gomp_mutex_unlock (&devicep->lock);
817                 gomp_fatal ("pointer target not mapped for attach");
818               }
819           }
820       else
821           data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
822 
823       gomp_debug (1,
824                       "%s: attaching host %p, target %p (struct base %p) to %p\n",
825                       __FUNCTION__, (void *) attach_to, (void *) devptr,
826                       (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
827 
828       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
829                                 sizeof (void *), true, cbufp);
830     }
831   else
832     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
833                     (void *) attach_to, (int) n->aux->attach_count[idx]);
834 }
835 
836 attribute_hidden void
gomp_detach_pointer(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,splay_tree_key n,uintptr_t detach_from,bool finalize,struct gomp_coalesce_buf * cbufp)837 gomp_detach_pointer (struct gomp_device_descr *devicep,
838                          struct goacc_asyncqueue *aq, splay_tree_key n,
839                          uintptr_t detach_from, bool finalize,
840                          struct gomp_coalesce_buf *cbufp)
841 {
842   size_t idx;
843 
844   if (n == NULL)
845     {
846       gomp_mutex_unlock (&devicep->lock);
847       gomp_fatal ("enclosing struct not mapped for detach");
848     }
849 
850   idx = (detach_from - n->host_start) / sizeof (void *);
851 
852   if (!n->aux || !n->aux->attach_count)
853     {
854       gomp_mutex_unlock (&devicep->lock);
855       gomp_fatal ("no attachment counters for struct");
856     }
857 
858   if (finalize)
859     n->aux->attach_count[idx] = 1;
860 
861   if (n->aux->attach_count[idx] == 0)
862     {
863       gomp_mutex_unlock (&devicep->lock);
864       gomp_fatal ("attach count underflow");
865     }
866   else
867     n->aux->attach_count[idx]--;
868 
869   if (n->aux->attach_count[idx] == 0)
870     {
871       uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
872                                - n->host_start;
873       uintptr_t target = (uintptr_t) *(void **) detach_from;
874 
875       gomp_debug (1,
876                       "%s: detaching host %p, target %p (struct base %p) to %p\n",
877                       __FUNCTION__, (void *) detach_from, (void *) devptr,
878                       (void *) (n->tgt->tgt_start + n->tgt_offset),
879                       (void *) target);
880 
881       gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
882                                 sizeof (void *), true, cbufp);
883     }
884   else
885     gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
886                     (void *) detach_from, (int) n->aux->attach_count[idx]);
887 }
888 
889 attribute_hidden uintptr_t
gomp_map_val(struct target_mem_desc * tgt,void ** hostaddrs,size_t i)890 gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
891 {
892   if (tgt->list[i].key != NULL)
893     return tgt->list[i].key->tgt->tgt_start
894              + tgt->list[i].key->tgt_offset
895              + tgt->list[i].offset;
896 
897   switch (tgt->list[i].offset)
898     {
899     case OFFSET_INLINED:
900       return (uintptr_t) hostaddrs[i];
901 
902     case OFFSET_POINTER:
903       return 0;
904 
905     case OFFSET_STRUCT:
906       return tgt->list[i + 1].key->tgt->tgt_start
907                + tgt->list[i + 1].key->tgt_offset
908                + tgt->list[i + 1].offset
909                + (uintptr_t) hostaddrs[i]
910                - (uintptr_t) hostaddrs[i + 1];
911 
912     default:
913       return tgt->tgt_start + tgt->list[i].offset;
914     }
915 }
916 
917 static inline __attribute__((always_inline)) struct target_mem_desc *
gomp_map_vars_internal(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,size_t mapnum,void ** hostaddrs,void ** devaddrs,size_t * sizes,void * kinds,bool short_mapkind,htab_t * refcount_set,enum gomp_map_vars_kind pragma_kind)918 gomp_map_vars_internal (struct gomp_device_descr *devicep,
919                               struct goacc_asyncqueue *aq, size_t mapnum,
920                               void **hostaddrs, void **devaddrs, size_t *sizes,
921                               void *kinds, bool short_mapkind,
922                               htab_t *refcount_set,
923                               enum gomp_map_vars_kind pragma_kind)
924 {
925   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
926   bool has_firstprivate = false;
927   bool has_always_ptrset = false;
928   bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
929   const int rshift = short_mapkind ? 8 : 3;
930   const int typemask = short_mapkind ? 0xff : 0x7;
931   struct splay_tree_s *mem_map = &devicep->mem_map;
932   struct splay_tree_key_s cur_node;
933   struct target_mem_desc *tgt
934     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
935   tgt->list_count = mapnum;
936   tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
937   tgt->device_descr = devicep;
938   tgt->prev = NULL;
939   struct gomp_coalesce_buf cbuf, *cbufp = NULL;
940 
941   if (mapnum == 0)
942     {
943       tgt->tgt_start = 0;
944       tgt->tgt_end = 0;
945       return tgt;
946     }
947 
948   tgt_align = sizeof (void *);
949   tgt_size = 0;
950   cbuf.chunks = NULL;
951   cbuf.chunk_cnt = -1;
952   cbuf.use_cnt = 0;
953   cbuf.buf = NULL;
954   if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
955     {
956       size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
957       cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
958       cbuf.chunk_cnt = 0;
959     }
960   if (pragma_kind == GOMP_MAP_VARS_TARGET)
961     {
962       size_t align = 4 * sizeof (void *);
963       tgt_align = align;
964       tgt_size = mapnum * sizeof (void *);
965       cbuf.chunk_cnt = 1;
966       cbuf.use_cnt = 1 + (mapnum > 1);
967       cbuf.chunks[0].start = 0;
968       cbuf.chunks[0].end = tgt_size;
969     }
970 
971   gomp_mutex_lock (&devicep->lock);
972   if (devicep->state == GOMP_DEVICE_FINALIZED)
973     {
974       gomp_mutex_unlock (&devicep->lock);
975       free (tgt);
976       return NULL;
977     }
978 
979   for (i = 0; i < mapnum; i++)
980     {
981       int kind = get_kind (short_mapkind, kinds, i);
982       bool implicit = get_implicit (short_mapkind, kinds, i);
983       if (hostaddrs[i] == NULL
984             || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
985           {
986             tgt->list[i].key = NULL;
987             tgt->list[i].offset = OFFSET_INLINED;
988             continue;
989           }
990       else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
991                  || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
992           {
993             tgt->list[i].key = NULL;
994             if (!not_found_cnt)
995               {
996                 /* In OpenMP < 5.0 and OpenACC the mapping has to be done
997                      on a separate construct prior to using use_device_{addr,ptr}.
998                      In OpenMP 5.0, map directives need to be ordered by the
999                      middle-end before the use_device_* clauses.  If
1000                      !not_found_cnt, all mappings requested (if any) are already
1001                      mapped, so use_device_{addr,ptr} can be resolved right away.
1002                      Otherwise, if not_found_cnt, gomp_map_lookup might fail
1003                      now but would succeed after performing the mappings in the
1004                      following loop.  We can't defer this always to the second
1005                      loop, because it is not even invoked when !not_found_cnt
1006                      after the first loop.  */
1007                 cur_node.host_start = (uintptr_t) hostaddrs[i];
1008                 cur_node.host_end = cur_node.host_start;
1009                 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1010                 if (n != NULL)
1011                     {
1012                       cur_node.host_start -= n->host_start;
1013                       hostaddrs[i]
1014                         = (void *) (n->tgt->tgt_start + n->tgt_offset
1015                                         + cur_node.host_start);
1016                     }
1017                 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1018                     {
1019                       gomp_mutex_unlock (&devicep->lock);
1020                       gomp_fatal ("use_device_ptr pointer wasn't mapped");
1021                     }
1022                 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1023                     /* If not present, continue using the host address.  */
1024                     ;
1025                 else
1026                     __builtin_unreachable ();
1027                 tgt->list[i].offset = OFFSET_INLINED;
1028               }
1029             else
1030               tgt->list[i].offset = 0;
1031             continue;
1032           }
1033       else if ((kind & typemask) == GOMP_MAP_STRUCT)
1034           {
1035             size_t first = i + 1;
1036             size_t last = i + sizes[i];
1037             cur_node.host_start = (uintptr_t) hostaddrs[i];
1038             cur_node.host_end = (uintptr_t) hostaddrs[last]
1039                                     + sizes[last];
1040             tgt->list[i].key = NULL;
1041             tgt->list[i].offset = OFFSET_STRUCT;
1042             splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1043             if (n == NULL)
1044               {
1045                 size_t align = (size_t) 1 << (kind >> rshift);
1046                 if (tgt_align < align)
1047                     tgt_align = align;
1048                 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1049                 tgt_size = (tgt_size + align - 1) & ~(align - 1);
1050                 tgt_size += cur_node.host_end - cur_node.host_start;
1051                 not_found_cnt += last - i;
1052                 for (i = first; i <= last; i++)
1053                     {
1054                       tgt->list[i].key = NULL;
1055                       if (!aq
1056                           && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1057                                                             & typemask))
1058                         gomp_coalesce_buf_add (&cbuf,
1059                                                      tgt_size - cur_node.host_end
1060                                                      + (uintptr_t) hostaddrs[i],
1061                                                      sizes[i]);
1062                     }
1063                 i--;
1064                 continue;
1065               }
1066             for (i = first; i <= last; i++)
1067               gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1068                                               sizes, kinds, NULL, refcount_set);
1069             i--;
1070             continue;
1071           }
1072       else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1073           {
1074             tgt->list[i].key = NULL;
1075             tgt->list[i].offset = OFFSET_POINTER;
1076             has_firstprivate = true;
1077             continue;
1078           }
1079       else if ((kind & typemask) == GOMP_MAP_ATTACH
1080                  || ((kind & typemask)
1081                        == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1082           {
1083             tgt->list[i].key = NULL;
1084             has_firstprivate = true;
1085             continue;
1086           }
1087       cur_node.host_start = (uintptr_t) hostaddrs[i];
1088       if (!GOMP_MAP_POINTER_P (kind & typemask))
1089           cur_node.host_end = cur_node.host_start + sizes[i];
1090       else
1091           cur_node.host_end = cur_node.host_start + sizeof (void *);
1092       if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1093           {
1094             tgt->list[i].key = NULL;
1095 
1096             size_t align = (size_t) 1 << (kind >> rshift);
1097             if (tgt_align < align)
1098               tgt_align = align;
1099             tgt_size = (tgt_size + align - 1) & ~(align - 1);
1100             if (!aq)
1101               gomp_coalesce_buf_add (&cbuf, tgt_size,
1102                                            cur_node.host_end - cur_node.host_start);
1103             tgt_size += cur_node.host_end - cur_node.host_start;
1104             has_firstprivate = true;
1105             continue;
1106           }
1107       splay_tree_key n;
1108       if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1109           {
1110             n = gomp_map_0len_lookup (mem_map, &cur_node);
1111             if (!n)
1112               {
1113                 tgt->list[i].key = NULL;
1114                 tgt->list[i].offset = OFFSET_POINTER;
1115                 continue;
1116               }
1117           }
1118       else
1119           n = splay_tree_lookup (mem_map, &cur_node);
1120       if (n && n->refcount != REFCOUNT_LINK)
1121           {
1122             int always_to_cnt = 0;
1123             if ((kind & typemask) == GOMP_MAP_TO_PSET)
1124               {
1125                 bool has_nullptr = false;
1126                 size_t j;
1127                 for (j = 0; j < n->tgt->list_count; j++)
1128                     if (n->tgt->list[j].key == n)
1129                       {
1130                         has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1131                         break;
1132                       }
1133                 if (n->tgt->list_count == 0)
1134                     {
1135                       /* 'declare target'; assume has_nullptr; it could also be
1136                          statically assigned pointer, but that it should be to
1137                          the equivalent variable on the host.  */
1138                       assert (n->refcount == REFCOUNT_INFINITY);
1139                       has_nullptr = true;
1140                     }
1141                 else
1142                     assert (j < n->tgt->list_count);
1143                 /* Re-map the data if there is an 'always' modifier or if it a
1144                      null pointer was there and non a nonnull has been found; that
1145                      permits transparent re-mapping for Fortran array descriptors
1146                      which were previously mapped unallocated.  */
1147                 for (j = i + 1; j < mapnum; j++)
1148                     {
1149                       int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1150                       if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1151                           && (!has_nullptr
1152                                 || !GOMP_MAP_POINTER_P (ptr_kind)
1153                                 || *(void **) hostaddrs[j] == NULL))
1154                         break;
1155                       else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1156                                  || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1157                                      > cur_node.host_end))
1158                         break;
1159                       else
1160                         {
1161                           has_always_ptrset = true;
1162                           ++always_to_cnt;
1163                         }
1164                     }
1165               }
1166             gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1167                                           kind & typemask, always_to_cnt > 0, implicit,
1168                                           NULL, refcount_set);
1169             i += always_to_cnt;
1170           }
1171       else
1172           {
1173             tgt->list[i].key = NULL;
1174 
1175             if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1176               {
1177                 /* Not present, hence, skip entry - including its MAP_POINTER,
1178                      when existing.  */
1179                 tgt->list[i].offset = OFFSET_POINTER;
1180                 if (i + 1 < mapnum
1181                       && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1182                           == GOMP_MAP_POINTER))
1183                     {
1184                       ++i;
1185                       tgt->list[i].key = NULL;
1186                       tgt->list[i].offset = 0;
1187                     }
1188                 continue;
1189               }
1190             size_t align = (size_t) 1 << (kind >> rshift);
1191             not_found_cnt++;
1192             if (tgt_align < align)
1193               tgt_align = align;
1194             tgt_size = (tgt_size + align - 1) & ~(align - 1);
1195             if (!aq
1196                 && gomp_to_device_kind_p (kind & typemask))
1197               gomp_coalesce_buf_add (&cbuf, tgt_size,
1198                                            cur_node.host_end - cur_node.host_start);
1199             tgt_size += cur_node.host_end - cur_node.host_start;
1200             if ((kind & typemask) == GOMP_MAP_TO_PSET)
1201               {
1202                 size_t j;
1203                 int kind;
1204                 for (j = i + 1; j < mapnum; j++)
1205                     if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1206                                                               kinds, j)) & typemask))
1207                         && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1208                       break;
1209                     else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1210                                || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1211                                    > cur_node.host_end))
1212                       break;
1213                     else
1214                       {
1215                         tgt->list[j].key = NULL;
1216                         i++;
1217                       }
1218               }
1219           }
1220     }
1221 
1222   if (devaddrs)
1223     {
1224       if (mapnum != 1)
1225           {
1226             gomp_mutex_unlock (&devicep->lock);
1227             gomp_fatal ("unexpected aggregation");
1228           }
1229       tgt->to_free = devaddrs[0];
1230       tgt->tgt_start = (uintptr_t) tgt->to_free;
1231       tgt->tgt_end = tgt->tgt_start + sizes[0];
1232     }
1233   else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1234     {
1235       /* Allocate tgt_align aligned tgt_size block of memory.  */
1236       /* FIXME: Perhaps change interface to allocate properly aligned
1237            memory.  */
1238       tgt->to_free = devicep->alloc_func (devicep->target_id,
1239                                                     tgt_size + tgt_align - 1);
1240       if (!tgt->to_free)
1241           {
1242             gomp_mutex_unlock (&devicep->lock);
1243             gomp_fatal ("device memory allocation fail");
1244           }
1245 
1246       tgt->tgt_start = (uintptr_t) tgt->to_free;
1247       tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1248       tgt->tgt_end = tgt->tgt_start + tgt_size;
1249 
1250       if (cbuf.use_cnt == 1)
1251           cbuf.chunk_cnt--;
1252       if (cbuf.chunk_cnt > 0)
1253           {
1254             cbuf.buf
1255               = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1256             if (cbuf.buf)
1257               {
1258                 cbuf.tgt = tgt;
1259                 cbufp = &cbuf;
1260               }
1261           }
1262     }
1263   else
1264     {
1265       tgt->to_free = NULL;
1266       tgt->tgt_start = 0;
1267       tgt->tgt_end = 0;
1268     }
1269 
1270   tgt_size = 0;
1271   if (pragma_kind == GOMP_MAP_VARS_TARGET)
1272     tgt_size = mapnum * sizeof (void *);
1273 
1274   tgt->array = NULL;
1275   if (not_found_cnt || has_firstprivate || has_always_ptrset)
1276     {
1277       if (not_found_cnt)
1278           tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1279       splay_tree_node array = tgt->array;
1280       size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1281       uintptr_t field_tgt_base = 0;
1282       splay_tree_key field_tgt_structelem_first = NULL;
1283 
1284       for (i = 0; i < mapnum; i++)
1285           if (has_always_ptrset
1286               && tgt->list[i].key
1287               && (get_kind (short_mapkind, kinds, i) & typemask)
1288                  == GOMP_MAP_TO_PSET)
1289             {
1290               splay_tree_key k = tgt->list[i].key;
1291               bool has_nullptr = false;
1292               size_t j;
1293               for (j = 0; j < k->tgt->list_count; j++)
1294                 if (k->tgt->list[j].key == k)
1295                     {
1296                       has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1297                       break;
1298                     }
1299               if (k->tgt->list_count == 0)
1300                 has_nullptr = true;
1301               else
1302                 assert (j < k->tgt->list_count);
1303 
1304               tgt->list[i].has_null_ptr_assoc = false;
1305               for (j = i + 1; j < mapnum; j++)
1306                 {
1307                     int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1308                     if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1309                         && (!has_nullptr
1310                               || !GOMP_MAP_POINTER_P (ptr_kind)
1311                               || *(void **) hostaddrs[j] == NULL))
1312                       break;
1313                     else if ((uintptr_t) hostaddrs[j] < k->host_start
1314                                || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1315                                    > k->host_end))
1316                       break;
1317                     else
1318                       {
1319                         if (*(void **) hostaddrs[j] == NULL)
1320                           tgt->list[i].has_null_ptr_assoc = true;
1321                         tgt->list[j].key = k;
1322                         tgt->list[j].copy_from = false;
1323                         tgt->list[j].always_copy_from = false;
1324                         tgt->list[j].is_attach = false;
1325                         gomp_increment_refcount (k, refcount_set);
1326                         gomp_map_pointer (k->tgt, aq,
1327                                               (uintptr_t) *(void **) hostaddrs[j],
1328                                               k->tgt_offset + ((uintptr_t) hostaddrs[j]
1329                                                                    - k->host_start),
1330                                               sizes[j], cbufp, false);
1331                       }
1332                 }
1333               i = j - 1;
1334             }
1335           else if (tgt->list[i].key == NULL)
1336             {
1337               int kind = get_kind (short_mapkind, kinds, i);
1338               bool implicit = get_implicit (short_mapkind, kinds, i);
1339               if (hostaddrs[i] == NULL)
1340                 continue;
1341               switch (kind & typemask)
1342                 {
1343                     size_t align, len, first, last;
1344                     splay_tree_key n;
1345                 case GOMP_MAP_FIRSTPRIVATE:
1346                     align = (size_t) 1 << (kind >> rshift);
1347                     tgt_size = (tgt_size + align - 1) & ~(align - 1);
1348                     tgt->list[i].offset = tgt_size;
1349                     len = sizes[i];
1350                     gomp_copy_host2dev (devicep, aq,
1351                                             (void *) (tgt->tgt_start + tgt_size),
1352                                             (void *) hostaddrs[i], len, false, cbufp);
1353                     tgt_size += len;
1354                     continue;
1355                 case GOMP_MAP_FIRSTPRIVATE_INT:
1356                 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1357                     continue;
1358                 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1359                     /* The OpenACC 'host_data' construct only allows 'use_device'
1360                        "mapping" clauses, so in the first loop, 'not_found_cnt'
1361                        must always have been zero, so all OpenACC 'use_device'
1362                        clauses have already been handled.  (We can only easily test
1363                        'use_device' with 'if_present' clause here.)  */
1364                     assert (tgt->list[i].offset == OFFSET_INLINED);
1365                     /* Nevertheless, FALLTHRU to the normal handling, to keep the
1366                        code conceptually simple, similar to the first loop.  */
1367                 case GOMP_MAP_USE_DEVICE_PTR:
1368                     if (tgt->list[i].offset == 0)
1369                       {
1370                         cur_node.host_start = (uintptr_t) hostaddrs[i];
1371                         cur_node.host_end = cur_node.host_start;
1372                         n = gomp_map_lookup (mem_map, &cur_node);
1373                         if (n != NULL)
1374                           {
1375                               cur_node.host_start -= n->host_start;
1376                               hostaddrs[i]
1377                                 = (void *) (n->tgt->tgt_start + n->tgt_offset
1378                                               + cur_node.host_start);
1379                           }
1380                         else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1381                           {
1382                               gomp_mutex_unlock (&devicep->lock);
1383                               gomp_fatal ("use_device_ptr pointer wasn't mapped");
1384                           }
1385                         else if ((kind & typemask)
1386                                    == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1387                           /* If not present, continue using the host address.  */
1388                           ;
1389                         else
1390                           __builtin_unreachable ();
1391                         tgt->list[i].offset = OFFSET_INLINED;
1392                       }
1393                     continue;
1394                 case GOMP_MAP_STRUCT:
1395                     first = i + 1;
1396                     last = i + sizes[i];
1397                     cur_node.host_start = (uintptr_t) hostaddrs[i];
1398                     cur_node.host_end = (uintptr_t) hostaddrs[last]
1399                                             + sizes[last];
1400                     if (tgt->list[first].key != NULL)
1401                       continue;
1402                     n = splay_tree_lookup (mem_map, &cur_node);
1403                     if (n == NULL)
1404                       {
1405                         size_t align = (size_t) 1 << (kind >> rshift);
1406                         tgt_size -= (uintptr_t) hostaddrs[first]
1407                                         - (uintptr_t) hostaddrs[i];
1408                         tgt_size = (tgt_size + align - 1) & ~(align - 1);
1409                         tgt_size += (uintptr_t) hostaddrs[first]
1410                                         - (uintptr_t) hostaddrs[i];
1411                         field_tgt_base = (uintptr_t) hostaddrs[first];
1412                         field_tgt_offset = tgt_size;
1413                         field_tgt_clear = last;
1414                         field_tgt_structelem_first = NULL;
1415                         tgt_size += cur_node.host_end
1416                                         - (uintptr_t) hostaddrs[first];
1417                         continue;
1418                       }
1419                     for (i = first; i <= last; i++)
1420                       gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1421                                                       sizes, kinds, cbufp, refcount_set);
1422                     i--;
1423                     continue;
1424                 case GOMP_MAP_ALWAYS_POINTER:
1425                     cur_node.host_start = (uintptr_t) hostaddrs[i];
1426                     cur_node.host_end = cur_node.host_start + sizeof (void *);
1427                     n = splay_tree_lookup (mem_map, &cur_node);
1428                     if (n == NULL
1429                         || n->host_start > cur_node.host_start
1430                         || n->host_end < cur_node.host_end)
1431                       {
1432                         gomp_mutex_unlock (&devicep->lock);
1433                         gomp_fatal ("always pointer not mapped");
1434                       }
1435                     if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1436                         != GOMP_MAP_ALWAYS_POINTER)
1437                       cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1438                     if (cur_node.tgt_offset)
1439                       cur_node.tgt_offset -= sizes[i];
1440                     gomp_copy_host2dev (devicep, aq,
1441                                             (void *) (n->tgt->tgt_start
1442                                                         + n->tgt_offset
1443                                                         + cur_node.host_start
1444                                                         - n->host_start),
1445                                             (void *) &cur_node.tgt_offset,
1446                                             sizeof (void *), true, cbufp);
1447                     cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1448                                               + cur_node.host_start - n->host_start;
1449                     continue;
1450                 case GOMP_MAP_IF_PRESENT:
1451                     /* Not present - otherwise handled above. Skip over its
1452                        MAP_POINTER as well.  */
1453                     if (i + 1 < mapnum
1454                         && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1455                               == GOMP_MAP_POINTER))
1456                       ++i;
1457                     continue;
1458                 case GOMP_MAP_ATTACH:
1459                 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1460                     {
1461                       cur_node.host_start = (uintptr_t) hostaddrs[i];
1462                       cur_node.host_end = cur_node.host_start + sizeof (void *);
1463                       splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1464                       if (n != NULL)
1465                         {
1466                           tgt->list[i].key = n;
1467                           tgt->list[i].offset = cur_node.host_start - n->host_start;
1468                           tgt->list[i].length = n->host_end - n->host_start;
1469                           tgt->list[i].copy_from = false;
1470                           tgt->list[i].always_copy_from = false;
1471                           tgt->list[i].is_attach = true;
1472                           /* OpenACC 'attach'/'detach' doesn't affect
1473                                structured/dynamic reference counts ('n->refcount',
1474                                'n->dynamic_refcount').  */
1475 
1476                           bool zlas
1477                               = ((kind & typemask)
1478                                  == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1479                           gomp_attach_pointer (devicep, aq, mem_map, n,
1480                                                      (uintptr_t) hostaddrs[i], sizes[i],
1481                                                      cbufp, zlas);
1482                         }
1483                       else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1484                         {
1485                           gomp_mutex_unlock (&devicep->lock);
1486                           gomp_fatal ("outer struct not mapped for attach");
1487                         }
1488                       continue;
1489                     }
1490                 default:
1491                     break;
1492                 }
1493               splay_tree_key k = &array->key;
1494               k->host_start = (uintptr_t) hostaddrs[i];
1495               if (!GOMP_MAP_POINTER_P (kind & typemask))
1496                 k->host_end = k->host_start + sizes[i];
1497               else
1498                 k->host_end = k->host_start + sizeof (void *);
1499               splay_tree_key n = splay_tree_lookup (mem_map, k);
1500               if (n && n->refcount != REFCOUNT_LINK)
1501                 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1502                                               kind & typemask, false, implicit, cbufp,
1503                                               refcount_set);
1504               else
1505                 {
1506                     k->aux = NULL;
1507                     if (n && n->refcount == REFCOUNT_LINK)
1508                       {
1509                         /* Replace target address of the pointer with target address
1510                            of mapped object in the splay tree.  */
1511                         splay_tree_remove (mem_map, n);
1512                         k->aux
1513                           = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1514                         k->aux->link_key = n;
1515                       }
1516                     size_t align = (size_t) 1 << (kind >> rshift);
1517                     tgt->list[i].key = k;
1518                     k->tgt = tgt;
1519                     k->refcount = 0;
1520                     k->dynamic_refcount = 0;
1521                     if (field_tgt_clear != FIELD_TGT_EMPTY)
1522                       {
1523                         k->tgt_offset = k->host_start - field_tgt_base
1524                                             + field_tgt_offset;
1525                         if (openmp_p)
1526                           {
1527                               k->refcount = REFCOUNT_STRUCTELEM;
1528                               if (field_tgt_structelem_first == NULL)
1529                                 {
1530                                   /* Set to first structure element of sequence.  */
1531                                   k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1532                                   field_tgt_structelem_first = k;
1533                                 }
1534                               else
1535                                 /* Point to refcount of leading element, but do not
1536                                    increment again.  */
1537                                 k->structelem_refcount_ptr
1538                                   = &field_tgt_structelem_first->structelem_refcount;
1539 
1540                               if (i == field_tgt_clear)
1541                                 {
1542                                   k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1543                                   field_tgt_structelem_first = NULL;
1544                                 }
1545                           }
1546                         if (i == field_tgt_clear)
1547                           field_tgt_clear = FIELD_TGT_EMPTY;
1548                       }
1549                     else
1550                       {
1551                         tgt_size = (tgt_size + align - 1) & ~(align - 1);
1552                         k->tgt_offset = tgt_size;
1553                         tgt_size += k->host_end - k->host_start;
1554                       }
1555                     /* First increment, from 0 to 1. gomp_increment_refcount
1556                        encapsulates the different increment cases, so use this
1557                        instead of directly setting 1 during initialization.  */
1558                     gomp_increment_refcount (k, refcount_set);
1559 
1560                     tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1561                     tgt->list[i].always_copy_from
1562                       = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1563                     tgt->list[i].is_attach = false;
1564                     tgt->list[i].offset = 0;
1565                     tgt->list[i].length = k->host_end - k->host_start;
1566                     tgt->refcount++;
1567                     array->left = NULL;
1568                     array->right = NULL;
1569                     splay_tree_insert (mem_map, array);
1570                     switch (kind & typemask)
1571                       {
1572                       case GOMP_MAP_ALLOC:
1573                       case GOMP_MAP_FROM:
1574                       case GOMP_MAP_FORCE_ALLOC:
1575                       case GOMP_MAP_FORCE_FROM:
1576                       case GOMP_MAP_ALWAYS_FROM:
1577                         break;
1578                       case GOMP_MAP_TO:
1579                       case GOMP_MAP_TOFROM:
1580                       case GOMP_MAP_FORCE_TO:
1581                       case GOMP_MAP_FORCE_TOFROM:
1582                       case GOMP_MAP_ALWAYS_TO:
1583                       case GOMP_MAP_ALWAYS_TOFROM:
1584                         gomp_copy_host2dev (devicep, aq,
1585                                                   (void *) (tgt->tgt_start
1586                                                               + k->tgt_offset),
1587                                                   (void *) k->host_start,
1588                                                   k->host_end - k->host_start,
1589                                                   false, cbufp);
1590                         break;
1591                       case GOMP_MAP_POINTER:
1592                       case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1593                         gomp_map_pointer
1594                           (tgt, aq, (uintptr_t) *(void **) k->host_start,
1595                            k->tgt_offset, sizes[i], cbufp,
1596                            ((kind & typemask)
1597                               == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1598                         break;
1599                       case GOMP_MAP_TO_PSET:
1600                         gomp_copy_host2dev (devicep, aq,
1601                                                   (void *) (tgt->tgt_start
1602                                                               + k->tgt_offset),
1603                                                   (void *) k->host_start,
1604                                                   k->host_end - k->host_start,
1605                                                   false, cbufp);
1606                         tgt->list[i].has_null_ptr_assoc = false;
1607 
1608                         for (j = i + 1; j < mapnum; j++)
1609                           {
1610                               int ptr_kind = (get_kind (short_mapkind, kinds, j)
1611                                                   & typemask);
1612                               if (!GOMP_MAP_POINTER_P (ptr_kind)
1613                                   && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1614                                 break;
1615                               else if ((uintptr_t) hostaddrs[j] < k->host_start
1616                                          || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1617                                              > k->host_end))
1618                                 break;
1619                               else
1620                                 {
1621                                   tgt->list[j].key = k;
1622                                   tgt->list[j].copy_from = false;
1623                                   tgt->list[j].always_copy_from = false;
1624                                   tgt->list[j].is_attach = false;
1625                                   tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1626                                   /* For OpenMP, the use of refcount_sets causes
1627                                      errors if we set k->refcount = 1 above but also
1628                                      increment it again here, for decrementing will
1629                                      not properly match, since we decrement only once
1630                                      for each key's refcount. Therefore avoid this
1631                                      increment for OpenMP constructs.  */
1632                                   if (!openmp_p)
1633                                     gomp_increment_refcount (k, refcount_set);
1634                                   gomp_map_pointer (tgt, aq,
1635                                                         (uintptr_t) *(void **) hostaddrs[j],
1636                                                         k->tgt_offset
1637                                                         + ((uintptr_t) hostaddrs[j]
1638                                                              - k->host_start),
1639                                                         sizes[j], cbufp, false);
1640                                 }
1641                               }
1642                         i = j - 1;
1643                         break;
1644                       case GOMP_MAP_FORCE_PRESENT:
1645                         {
1646                           /* We already looked up the memory region above and it
1647                                was missing.  */
1648                           size_t size = k->host_end - k->host_start;
1649                           gomp_mutex_unlock (&devicep->lock);
1650 #ifdef HAVE_INTTYPES_H
1651                           gomp_fatal ("present clause: !acc_is_present (%p, "
1652                                           "%"PRIu64" (0x%"PRIx64"))",
1653                                           (void *) k->host_start,
1654                                           (uint64_t) size, (uint64_t) size);
1655 #else
1656                           gomp_fatal ("present clause: !acc_is_present (%p, "
1657                                           "%lu (0x%lx))", (void *) k->host_start,
1658                                           (unsigned long) size, (unsigned long) size);
1659 #endif
1660                         }
1661                         break;
1662                       case GOMP_MAP_FORCE_DEVICEPTR:
1663                         assert (k->host_end - k->host_start == sizeof (void *));
1664                         gomp_copy_host2dev (devicep, aq,
1665                                                   (void *) (tgt->tgt_start
1666                                                               + k->tgt_offset),
1667                                                   (void *) k->host_start,
1668                                                   sizeof (void *), false, cbufp);
1669                         break;
1670                       default:
1671                         gomp_mutex_unlock (&devicep->lock);
1672                         gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1673                                         kind);
1674                       }
1675 
1676                     if (k->aux && k->aux->link_key)
1677                       {
1678                         /* Set link pointer on target to the device address of the
1679                            mapped object.  */
1680                         void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1681                         /* We intentionally do not use coalescing here, as it's not
1682                            data allocated by the current call to this function.  */
1683                         gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1684                                                   &tgt_addr, sizeof (void *), true, NULL);
1685                       }
1686                     array++;
1687                 }
1688             }
1689     }
1690 
1691   if (pragma_kind == GOMP_MAP_VARS_TARGET)
1692     {
1693       for (i = 0; i < mapnum; i++)
1694           {
1695             cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1696             gomp_copy_host2dev (devicep, aq,
1697                                     (void *) (tgt->tgt_start + i * sizeof (void *)),
1698                                     (void *) &cur_node.tgt_offset, sizeof (void *),
1699                                     true, cbufp);
1700           }
1701     }
1702 
1703   if (cbufp)
1704     {
1705       /* See 'gomp_coalesce_buf_add'.  */
1706       assert (!aq);
1707 
1708       long c = 0;
1709       for (c = 0; c < cbuf.chunk_cnt; ++c)
1710           gomp_copy_host2dev (devicep, aq,
1711                                   (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1712                                   (char *) cbuf.buf + (cbuf.chunks[c].start
1713                                                              - cbuf.chunks[0].start),
1714                                   cbuf.chunks[c].end - cbuf.chunks[c].start,
1715                                   true, NULL);
1716       free (cbuf.buf);
1717       cbuf.buf = NULL;
1718       cbufp = NULL;
1719     }
1720 
1721   /* If the variable from "omp target enter data" map-list was already mapped,
1722      tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
1723      gomp_exit_data.  */
1724   if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1725     {
1726       free (tgt);
1727       tgt = NULL;
1728     }
1729 
1730   gomp_mutex_unlock (&devicep->lock);
1731   return tgt;
1732 }
1733 
1734 static struct target_mem_desc *
gomp_map_vars(struct gomp_device_descr * devicep,size_t mapnum,void ** hostaddrs,void ** devaddrs,size_t * sizes,void * kinds,bool short_mapkind,htab_t * refcount_set,enum gomp_map_vars_kind pragma_kind)1735 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1736                  void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1737                  bool short_mapkind, htab_t *refcount_set,
1738                  enum gomp_map_vars_kind pragma_kind)
1739 {
1740   /* This management of a local refcount_set is for convenience of callers
1741      who do not share a refcount_set over multiple map/unmap uses.  */
1742   htab_t local_refcount_set = NULL;
1743   if (refcount_set == NULL)
1744     {
1745       local_refcount_set = htab_create (mapnum);
1746       refcount_set = &local_refcount_set;
1747     }
1748 
1749   struct target_mem_desc *tgt;
1750   tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1751                                         sizes, kinds, short_mapkind, refcount_set,
1752                                         pragma_kind);
1753   if (local_refcount_set)
1754     htab_free (local_refcount_set);
1755 
1756   return tgt;
1757 }
1758 
1759 attribute_hidden struct target_mem_desc *
goacc_map_vars(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,size_t mapnum,void ** hostaddrs,void ** devaddrs,size_t * sizes,void * kinds,bool short_mapkind,enum gomp_map_vars_kind pragma_kind)1760 goacc_map_vars (struct gomp_device_descr *devicep,
1761                     struct goacc_asyncqueue *aq, size_t mapnum,
1762                     void **hostaddrs, void **devaddrs, size_t *sizes,
1763                     void *kinds, bool short_mapkind,
1764                     enum gomp_map_vars_kind pragma_kind)
1765 {
1766   return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1767                                          sizes, kinds, short_mapkind, NULL,
1768                                          GOMP_MAP_VARS_OPENACC | pragma_kind);
1769 }
1770 
1771 static void
gomp_unmap_tgt(struct target_mem_desc * tgt)1772 gomp_unmap_tgt (struct target_mem_desc *tgt)
1773 {
1774   /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
1775   if (tgt->tgt_end)
1776     gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1777 
1778   free (tgt->array);
1779   free (tgt);
1780 }
1781 
1782 static bool
gomp_unref_tgt(void * ptr)1783 gomp_unref_tgt (void *ptr)
1784 {
1785   bool is_tgt_unmapped = false;
1786 
1787   struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1788 
1789   if (tgt->refcount > 1)
1790     tgt->refcount--;
1791   else
1792     {
1793       gomp_unmap_tgt (tgt);
1794       is_tgt_unmapped = true;
1795     }
1796 
1797   return is_tgt_unmapped;
1798 }
1799 
1800 static void
gomp_unref_tgt_void(void * ptr)1801 gomp_unref_tgt_void (void *ptr)
1802 {
1803   (void) gomp_unref_tgt (ptr);
1804 }
1805 
1806 static void
gomp_remove_splay_tree_key(splay_tree sp,splay_tree_key k)1807 gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1808 {
1809   splay_tree_remove (sp, k);
1810   if (k->aux)
1811     {
1812       if (k->aux->link_key)
1813           splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1814       if (k->aux->attach_count)
1815           free (k->aux->attach_count);
1816       free (k->aux);
1817       k->aux = NULL;
1818     }
1819 }
1820 
1821 static inline __attribute__((always_inline)) bool
gomp_remove_var_internal(struct gomp_device_descr * devicep,splay_tree_key k,struct goacc_asyncqueue * aq)1822 gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1823                                 struct goacc_asyncqueue *aq)
1824 {
1825   bool is_tgt_unmapped = false;
1826 
1827   if (REFCOUNT_STRUCTELEM_P (k->refcount))
1828     {
1829       if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1830           /* Infer the splay_tree_key of the first structelem key using the
1831              pointer to the first structleme_refcount.  */
1832           k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1833                                     - offsetof (struct splay_tree_key_s,
1834                                                     structelem_refcount));
1835       assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1836 
1837       /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1838            with the splay_tree_keys embedded inside.  */
1839       splay_tree_node node =
1840           (splay_tree_node) ((char *) k
1841                                  - offsetof (struct splay_tree_node_s, key));
1842       while (true)
1843           {
1844             /* Starting from the _FIRST key, and continue for all following
1845                sibling keys.  */
1846             gomp_remove_splay_tree_key (&devicep->mem_map, k);
1847             if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1848               break;
1849             else
1850               k = &(++node)->key;
1851           }
1852     }
1853   else
1854     gomp_remove_splay_tree_key (&devicep->mem_map, k);
1855 
1856   if (aq)
1857     devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1858                                                             (void *) k->tgt);
1859   else
1860     is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1861   return is_tgt_unmapped;
1862 }
1863 
1864 attribute_hidden bool
gomp_remove_var(struct gomp_device_descr * devicep,splay_tree_key k)1865 gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1866 {
1867   return gomp_remove_var_internal (devicep, k, NULL);
1868 }
1869 
1870 /* Remove a variable asynchronously.  This actually removes the variable
1871    mapping immediately, but retains the linked target_mem_desc until the
1872    asynchronous operation has completed (as it may still refer to target
1873    memory).  The device lock must be held before entry, and remains locked on
1874    exit.  */
1875 
1876 attribute_hidden void
gomp_remove_var_async(struct gomp_device_descr * devicep,splay_tree_key k,struct goacc_asyncqueue * aq)1877 gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1878                            struct goacc_asyncqueue *aq)
1879 {
1880   (void) gomp_remove_var_internal (devicep, k, aq);
1881 }
1882 
1883 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
1884    variables back from device to host: if it is false, it is assumed that this
1885    has been done already.  */
1886 
1887 static inline __attribute__((always_inline)) void
gomp_unmap_vars_internal(struct target_mem_desc * tgt,bool do_copyfrom,htab_t * refcount_set,struct goacc_asyncqueue * aq)1888 gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1889                                 htab_t *refcount_set, struct goacc_asyncqueue *aq)
1890 {
1891   struct gomp_device_descr *devicep = tgt->device_descr;
1892 
1893   if (tgt->list_count == 0)
1894     {
1895       free (tgt);
1896       return;
1897     }
1898 
1899   gomp_mutex_lock (&devicep->lock);
1900   if (devicep->state == GOMP_DEVICE_FINALIZED)
1901     {
1902       gomp_mutex_unlock (&devicep->lock);
1903       free (tgt->array);
1904       free (tgt);
1905       return;
1906     }
1907 
1908   size_t i;
1909 
1910   /* We must perform detachments before any copies back to the host.  */
1911   for (i = 0; i < tgt->list_count; i++)
1912     {
1913       splay_tree_key k = tgt->list[i].key;
1914 
1915       if (k != NULL && tgt->list[i].is_attach)
1916           gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1917                                                        + tgt->list[i].offset,
1918                                    false, NULL);
1919     }
1920 
1921   for (i = 0; i < tgt->list_count; i++)
1922     {
1923       splay_tree_key k = tgt->list[i].key;
1924       if (k == NULL)
1925           continue;
1926 
1927       /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1928            counts ('n->refcount', 'n->dynamic_refcount').  */
1929       if (tgt->list[i].is_attach)
1930           continue;
1931 
1932       bool do_copy, do_remove;
1933       gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
1934 
1935       if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
1936             || tgt->list[i].always_copy_from)
1937           gomp_copy_dev2host (devicep, aq,
1938                                   (void *) (k->host_start + tgt->list[i].offset),
1939                                   (void *) (k->tgt->tgt_start + k->tgt_offset
1940                                               + tgt->list[i].offset),
1941                                   tgt->list[i].length);
1942       if (do_remove)
1943           {
1944             struct target_mem_desc *k_tgt = k->tgt;
1945             bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1946             /* It would be bad if TGT got unmapped while we're still iterating
1947                over its LIST_COUNT, and also expect to use it in the following
1948                code.  */
1949             assert (!is_tgt_unmapped
1950                       || k_tgt != tgt);
1951           }
1952     }
1953 
1954   if (aq)
1955     devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1956                                                             (void *) tgt);
1957   else
1958     gomp_unref_tgt ((void *) tgt);
1959 
1960   gomp_mutex_unlock (&devicep->lock);
1961 }
1962 
1963 static void
gomp_unmap_vars(struct target_mem_desc * tgt,bool do_copyfrom,htab_t * refcount_set)1964 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1965                      htab_t *refcount_set)
1966 {
1967   /* This management of a local refcount_set is for convenience of callers
1968      who do not share a refcount_set over multiple map/unmap uses.  */
1969   htab_t local_refcount_set = NULL;
1970   if (refcount_set == NULL)
1971     {
1972       local_refcount_set = htab_create (tgt->list_count);
1973       refcount_set = &local_refcount_set;
1974     }
1975 
1976   gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
1977 
1978   if (local_refcount_set)
1979     htab_free (local_refcount_set);
1980 }
1981 
1982 attribute_hidden void
goacc_unmap_vars(struct target_mem_desc * tgt,bool do_copyfrom,struct goacc_asyncqueue * aq)1983 goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1984                       struct goacc_asyncqueue *aq)
1985 {
1986   gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
1987 }
1988 
1989 static void
gomp_update(struct gomp_device_descr * devicep,size_t mapnum,void ** hostaddrs,size_t * sizes,void * kinds,bool short_mapkind)1990 gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1991                size_t *sizes, void *kinds, bool short_mapkind)
1992 {
1993   size_t i;
1994   struct splay_tree_key_s cur_node;
1995   const int typemask = short_mapkind ? 0xff : 0x7;
1996 
1997   if (!devicep)
1998     return;
1999 
2000   if (mapnum == 0)
2001     return;
2002 
2003   gomp_mutex_lock (&devicep->lock);
2004   if (devicep->state == GOMP_DEVICE_FINALIZED)
2005     {
2006       gomp_mutex_unlock (&devicep->lock);
2007       return;
2008     }
2009 
2010   for (i = 0; i < mapnum; i++)
2011     if (sizes[i])
2012       {
2013           cur_node.host_start = (uintptr_t) hostaddrs[i];
2014           cur_node.host_end = cur_node.host_start + sizes[i];
2015           splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2016           if (n)
2017             {
2018               int kind = get_kind (short_mapkind, kinds, i);
2019               if (n->host_start > cur_node.host_start
2020                     || n->host_end < cur_node.host_end)
2021                 {
2022                     gomp_mutex_unlock (&devicep->lock);
2023                     gomp_fatal ("Trying to update [%p..%p) object when "
2024                                   "only [%p..%p) is mapped",
2025                                   (void *) cur_node.host_start,
2026                                   (void *) cur_node.host_end,
2027                                   (void *) n->host_start,
2028                                   (void *) n->host_end);
2029                 }
2030 
2031               if (n->aux && n->aux->attach_count)
2032                 {
2033                     uintptr_t addr = cur_node.host_start;
2034                     while (addr < cur_node.host_end)
2035                       {
2036                         /* We have to be careful not to overwrite still attached
2037                            pointers during host<->device updates.  */
2038                         size_t i = (addr - cur_node.host_start) / sizeof (void *);
2039                         if (n->aux->attach_count[i] == 0)
2040                           {
2041                               void *devaddr = (void *) (n->tgt->tgt_start
2042                                                               + n->tgt_offset
2043                                                               + addr - n->host_start);
2044                               if (GOMP_MAP_COPY_TO_P (kind & typemask))
2045                                 gomp_copy_host2dev (devicep, NULL,
2046                                                         devaddr, (void *) addr,
2047                                                         sizeof (void *), false, NULL);
2048                               if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2049                                 gomp_copy_dev2host (devicep, NULL,
2050                                                         (void *) addr, devaddr,
2051                                                         sizeof (void *));
2052                           }
2053                         addr += sizeof (void *);
2054                       }
2055                 }
2056               else
2057                 {
2058                     void *hostaddr = (void *) cur_node.host_start;
2059                     void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2060                                                     + cur_node.host_start
2061                                                     - n->host_start);
2062                     size_t size = cur_node.host_end - cur_node.host_start;
2063 
2064                     if (GOMP_MAP_COPY_TO_P (kind & typemask))
2065                       gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2066                                               false, NULL);
2067                     if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2068                       gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2069                 }
2070             }
2071       }
2072   gomp_mutex_unlock (&devicep->lock);
2073 }
2074 
2075 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2076    And insert to splay tree the mapping between addresses from HOST_TABLE and
2077    from loaded target image.  We rely in the host and device compiler
2078    emitting variable and functions in the same order.  */
2079 
2080 static void
gomp_load_image_to_device(struct gomp_device_descr * devicep,unsigned version,const void * host_table,const void * target_data,bool is_register_lock)2081 gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2082                                  const void *host_table, const void *target_data,
2083                                  bool is_register_lock)
2084 {
2085   void **host_func_table = ((void ***) host_table)[0];
2086   void **host_funcs_end  = ((void ***) host_table)[1];
2087   void **host_var_table  = ((void ***) host_table)[2];
2088   void **host_vars_end   = ((void ***) host_table)[3];
2089 
2090   /* The func table contains only addresses, the var table contains addresses
2091      and corresponding sizes.  */
2092   int num_funcs = host_funcs_end - host_func_table;
2093   int num_vars  = (host_vars_end - host_var_table) / 2;
2094 
2095   /* Others currently is only 'device_num' */
2096   int num_others = 1;
2097 
2098   /* Load image to device and get target addresses for the image.  */
2099   struct addr_pair *target_table = NULL;
2100   int i, num_target_entries;
2101 
2102   num_target_entries
2103     = devicep->load_image_func (devicep->target_id, version,
2104                                         target_data, &target_table);
2105 
2106   if (num_target_entries != num_funcs + num_vars
2107       /* Others (device_num) are included as trailing entries in pair list.  */
2108       && num_target_entries != num_funcs + num_vars + num_others)
2109     {
2110       gomp_mutex_unlock (&devicep->lock);
2111       if (is_register_lock)
2112           gomp_mutex_unlock (&register_lock);
2113       gomp_fatal ("Cannot map target functions or variables"
2114                       " (expected %u, have %u)", num_funcs + num_vars,
2115                       num_target_entries);
2116     }
2117 
2118   /* Insert host-target address mapping into splay tree.  */
2119   struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2120   tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
2121   tgt->refcount = REFCOUNT_INFINITY;
2122   tgt->tgt_start = 0;
2123   tgt->tgt_end = 0;
2124   tgt->to_free = NULL;
2125   tgt->prev = NULL;
2126   tgt->list_count = 0;
2127   tgt->device_descr = devicep;
2128   splay_tree_node array = tgt->array;
2129 
2130   for (i = 0; i < num_funcs; i++)
2131     {
2132       splay_tree_key k = &array->key;
2133       k->host_start = (uintptr_t) host_func_table[i];
2134       k->host_end = k->host_start + 1;
2135       k->tgt = tgt;
2136       k->tgt_offset = target_table[i].start;
2137       k->refcount = REFCOUNT_INFINITY;
2138       k->dynamic_refcount = 0;
2139       k->aux = NULL;
2140       array->left = NULL;
2141       array->right = NULL;
2142       splay_tree_insert (&devicep->mem_map, array);
2143       array++;
2144     }
2145 
2146   /* Most significant bit of the size in host and target tables marks
2147      "omp declare target link" variables.  */
2148   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2149   const uintptr_t size_mask = ~link_bit;
2150 
2151   for (i = 0; i < num_vars; i++)
2152     {
2153       struct addr_pair *target_var = &target_table[num_funcs + i];
2154       uintptr_t target_size = target_var->end - target_var->start;
2155       bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2156 
2157       if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2158           {
2159             gomp_mutex_unlock (&devicep->lock);
2160             if (is_register_lock)
2161               gomp_mutex_unlock (&register_lock);
2162             gomp_fatal ("Cannot map target variables (size mismatch)");
2163           }
2164 
2165       splay_tree_key k = &array->key;
2166       k->host_start = (uintptr_t) host_var_table[i * 2];
2167       k->host_end
2168           = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2169       k->tgt = tgt;
2170       k->tgt_offset = target_var->start;
2171       k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2172       k->dynamic_refcount = 0;
2173       k->aux = NULL;
2174       array->left = NULL;
2175       array->right = NULL;
2176       splay_tree_insert (&devicep->mem_map, array);
2177       array++;
2178     }
2179 
2180   /* Last entry is for the on-device 'device_num' variable. Tolerate case
2181      where plugin does not return this entry.  */
2182   if (num_funcs + num_vars < num_target_entries)
2183     {
2184       struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
2185       /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2186            was found in this image.  */
2187       if (device_num_var->start != 0)
2188           {
2189             /* The index of the devicep within devices[] is regarded as its
2190                'device number', which is different from the per-device type
2191                devicep->target_id.  */
2192             int device_num_val = (int) (devicep - &devices[0]);
2193             if (device_num_var->end - device_num_var->start != sizeof (int))
2194               {
2195                 gomp_mutex_unlock (&devicep->lock);
2196                 if (is_register_lock)
2197                     gomp_mutex_unlock (&register_lock);
2198                 gomp_fatal ("offload plugin managed 'device_num' not of expected "
2199                                 "format");
2200               }
2201 
2202             /* Copy device_num value to place on device memory, hereby actually
2203                designating its device number into effect.  */
2204             gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
2205                                     &device_num_val, sizeof (int), false, NULL);
2206           }
2207     }
2208 
2209   free (target_table);
2210 }
2211 
2212 /* Unload the mappings described by target_data from device DEVICE_P.
2213    The device must be locked.   */
2214 
2215 static void
gomp_unload_image_from_device(struct gomp_device_descr * devicep,unsigned version,const void * host_table,const void * target_data)2216 gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2217                                      unsigned version,
2218                                      const void *host_table, const void *target_data)
2219 {
2220   void **host_func_table = ((void ***) host_table)[0];
2221   void **host_funcs_end  = ((void ***) host_table)[1];
2222   void **host_var_table  = ((void ***) host_table)[2];
2223   void **host_vars_end   = ((void ***) host_table)[3];
2224 
2225   /* The func table contains only addresses, the var table contains addresses
2226      and corresponding sizes.  */
2227   int num_funcs = host_funcs_end - host_func_table;
2228   int num_vars  = (host_vars_end - host_var_table) / 2;
2229 
2230   struct splay_tree_key_s k;
2231   splay_tree_key node = NULL;
2232 
2233   /* Find mapping at start of node array */
2234   if (num_funcs || num_vars)
2235     {
2236       k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2237                           : (uintptr_t) host_var_table[0]);
2238       k.host_end = k.host_start + 1;
2239       node = splay_tree_lookup (&devicep->mem_map, &k);
2240     }
2241 
2242   if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2243     {
2244       gomp_mutex_unlock (&devicep->lock);
2245       gomp_fatal ("image unload fail");
2246     }
2247 
2248   /* Remove mappings from splay tree.  */
2249   int i;
2250   for (i = 0; i < num_funcs; i++)
2251     {
2252       k.host_start = (uintptr_t) host_func_table[i];
2253       k.host_end = k.host_start + 1;
2254       splay_tree_remove (&devicep->mem_map, &k);
2255     }
2256 
2257   /* Most significant bit of the size in host and target tables marks
2258      "omp declare target link" variables.  */
2259   const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2260   const uintptr_t size_mask = ~link_bit;
2261   bool is_tgt_unmapped = false;
2262 
2263   for (i = 0; i < num_vars; i++)
2264     {
2265       k.host_start = (uintptr_t) host_var_table[i * 2];
2266       k.host_end
2267           = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2268 
2269       if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2270           splay_tree_remove (&devicep->mem_map, &k);
2271       else
2272           {
2273             splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2274             is_tgt_unmapped = gomp_remove_var (devicep, n);
2275           }
2276     }
2277 
2278   if (node && !is_tgt_unmapped)
2279     {
2280       free (node->tgt);
2281       free (node);
2282     }
2283 }
2284 
2285 /* This function should be called from every offload image while loading.
2286    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2287    the target, and TARGET_DATA needed by target plugin.  */
2288 
2289 void
GOMP_offload_register_ver(unsigned version,const void * host_table,int target_type,const void * target_data)2290 GOMP_offload_register_ver (unsigned version, const void *host_table,
2291                                  int target_type, const void *target_data)
2292 {
2293   int i;
2294 
2295   if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2296     gomp_fatal ("Library too old for offload (version %u < %u)",
2297                     GOMP_VERSION, GOMP_VERSION_LIB (version));
2298 
2299   gomp_mutex_lock (&register_lock);
2300 
2301   /* Load image to all initialized devices.  */
2302   for (i = 0; i < num_devices; i++)
2303     {
2304       struct gomp_device_descr *devicep = &devices[i];
2305       gomp_mutex_lock (&devicep->lock);
2306       if (devicep->type == target_type
2307             && devicep->state == GOMP_DEVICE_INITIALIZED)
2308           gomp_load_image_to_device (devicep, version,
2309                                            host_table, target_data, true);
2310       gomp_mutex_unlock (&devicep->lock);
2311     }
2312 
2313   /* Insert image to array of pending images.  */
2314   offload_images
2315     = gomp_realloc_unlock (offload_images,
2316                                  (num_offload_images + 1)
2317                                  * sizeof (struct offload_image_descr));
2318   offload_images[num_offload_images].version = version;
2319   offload_images[num_offload_images].type = target_type;
2320   offload_images[num_offload_images].host_table = host_table;
2321   offload_images[num_offload_images].target_data = target_data;
2322 
2323   num_offload_images++;
2324   gomp_mutex_unlock (&register_lock);
2325 }
2326 
2327 void
GOMP_offload_register(const void * host_table,int target_type,const void * target_data)2328 GOMP_offload_register (const void *host_table, int target_type,
2329                            const void *target_data)
2330 {
2331   GOMP_offload_register_ver (0, host_table, target_type, target_data);
2332 }
2333 
2334 /* This function should be called from every offload image while unloading.
2335    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2336    the target, and TARGET_DATA needed by target plugin.  */
2337 
2338 void
GOMP_offload_unregister_ver(unsigned version,const void * host_table,int target_type,const void * target_data)2339 GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2340                                    int target_type, const void *target_data)
2341 {
2342   int i;
2343 
2344   gomp_mutex_lock (&register_lock);
2345 
2346   /* Unload image from all initialized devices.  */
2347   for (i = 0; i < num_devices; i++)
2348     {
2349       struct gomp_device_descr *devicep = &devices[i];
2350       gomp_mutex_lock (&devicep->lock);
2351       if (devicep->type == target_type
2352             && devicep->state == GOMP_DEVICE_INITIALIZED)
2353           gomp_unload_image_from_device (devicep, version,
2354                                                host_table, target_data);
2355       gomp_mutex_unlock (&devicep->lock);
2356     }
2357 
2358   /* Remove image from array of pending images.  */
2359   for (i = 0; i < num_offload_images; i++)
2360     if (offload_images[i].target_data == target_data)
2361       {
2362           offload_images[i] = offload_images[--num_offload_images];
2363           break;
2364       }
2365 
2366   gomp_mutex_unlock (&register_lock);
2367 }
2368 
2369 void
GOMP_offload_unregister(const void * host_table,int target_type,const void * target_data)2370 GOMP_offload_unregister (const void *host_table, int target_type,
2371                                const void *target_data)
2372 {
2373   GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2374 }
2375 
2376 /* This function initializes the target device, specified by DEVICEP.  DEVICEP
2377    must be locked on entry, and remains locked on return.  */
2378 
2379 attribute_hidden void
gomp_init_device(struct gomp_device_descr * devicep)2380 gomp_init_device (struct gomp_device_descr *devicep)
2381 {
2382   int i;
2383   if (!devicep->init_device_func (devicep->target_id))
2384     {
2385       gomp_mutex_unlock (&devicep->lock);
2386       gomp_fatal ("device initialization failed");
2387     }
2388 
2389   /* Load to device all images registered by the moment.  */
2390   for (i = 0; i < num_offload_images; i++)
2391     {
2392       struct offload_image_descr *image = &offload_images[i];
2393       if (image->type == devicep->type)
2394           gomp_load_image_to_device (devicep, image->version,
2395                                            image->host_table, image->target_data,
2396                                            false);
2397     }
2398 
2399   /* Initialize OpenACC asynchronous queues.  */
2400   goacc_init_asyncqueues (devicep);
2401 
2402   devicep->state = GOMP_DEVICE_INITIALIZED;
2403 }
2404 
2405 /* This function finalizes the target device, specified by DEVICEP.  DEVICEP
2406    must be locked on entry, and remains locked on return.  */
2407 
2408 attribute_hidden bool
gomp_fini_device(struct gomp_device_descr * devicep)2409 gomp_fini_device (struct gomp_device_descr *devicep)
2410 {
2411   bool ret = goacc_fini_asyncqueues (devicep);
2412   ret &= devicep->fini_device_func (devicep->target_id);
2413   devicep->state = GOMP_DEVICE_FINALIZED;
2414   return ret;
2415 }
2416 
2417 attribute_hidden void
gomp_unload_device(struct gomp_device_descr * devicep)2418 gomp_unload_device (struct gomp_device_descr *devicep)
2419 {
2420   if (devicep->state == GOMP_DEVICE_INITIALIZED)
2421     {
2422       unsigned i;
2423 
2424       /* Unload from device all images registered at the moment.  */
2425       for (i = 0; i < num_offload_images; i++)
2426           {
2427             struct offload_image_descr *image = &offload_images[i];
2428             if (image->type == devicep->type)
2429               gomp_unload_image_from_device (devicep, image->version,
2430                                                      image->host_table,
2431                                                      image->target_data);
2432           }
2433     }
2434 }
2435 
2436 /* Host fallback for GOMP_target{,_ext} routines.  */
2437 
2438 static void
gomp_target_fallback(void (* fn)(void *),void ** hostaddrs,struct gomp_device_descr * devicep,void ** args)2439 gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2440                           struct gomp_device_descr *devicep, void **args)
2441 {
2442   struct gomp_thread old_thr, *thr = gomp_thread ();
2443 
2444   if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2445       && devicep != NULL)
2446     gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2447                     "be used for offloading");
2448 
2449   old_thr = *thr;
2450   memset (thr, '\0', sizeof (*thr));
2451   if (gomp_places_list)
2452     {
2453       thr->place = old_thr.place;
2454       thr->ts.place_partition_len = gomp_places_list_len;
2455     }
2456   if (args)
2457     while (*args)
2458       {
2459           intptr_t id = (intptr_t) *args++, val;
2460           if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2461             val = (intptr_t) *args++;
2462           else
2463             val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2464           if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2465             continue;
2466           id &= GOMP_TARGET_ARG_ID_MASK;
2467           if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2468             continue;
2469           val = val > INT_MAX ? INT_MAX : val;
2470           if (val)
2471             gomp_icv (true)->thread_limit_var = val;
2472           break;
2473       }
2474 
2475   fn (hostaddrs);
2476   gomp_free_thread (thr);
2477   *thr = old_thr;
2478 }
2479 
2480 /* Calculate alignment and size requirements of a private copy of data shared
2481    as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
2482 
2483 static inline void
calculate_firstprivate_requirements(size_t mapnum,size_t * sizes,unsigned short * kinds,size_t * tgt_align,size_t * tgt_size)2484 calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2485                                              unsigned short *kinds, size_t *tgt_align,
2486                                              size_t *tgt_size)
2487 {
2488   size_t i;
2489   for (i = 0; i < mapnum; i++)
2490     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2491       {
2492           size_t align = (size_t) 1 << (kinds[i] >> 8);
2493           if (*tgt_align < align)
2494             *tgt_align = align;
2495           *tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2496           *tgt_size += sizes[i];
2497       }
2498 }
2499 
2500 /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
2501 
2502 static inline void
copy_firstprivate_data(char * tgt,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,size_t tgt_align,size_t tgt_size)2503 copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2504                               size_t *sizes, unsigned short *kinds, size_t tgt_align,
2505                               size_t tgt_size)
2506 {
2507   uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2508   if (al)
2509     tgt += tgt_align - al;
2510   tgt_size = 0;
2511   size_t i;
2512   for (i = 0; i < mapnum; i++)
2513     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2514       {
2515           size_t align = (size_t) 1 << (kinds[i] >> 8);
2516           tgt_size = (tgt_size + align - 1) & ~(align - 1);
2517           memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2518           hostaddrs[i] = tgt + tgt_size;
2519           tgt_size = tgt_size + sizes[i];
2520       }
2521 }
2522 
2523 /* Helper function of GOMP_target{,_ext} routines.  */
2524 
2525 static void *
gomp_get_target_fn_addr(struct gomp_device_descr * devicep,void (* host_fn)(void *))2526 gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2527                                void (*host_fn) (void *))
2528 {
2529   if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2530     return (void *) host_fn;
2531   else
2532     {
2533       gomp_mutex_lock (&devicep->lock);
2534       if (devicep->state == GOMP_DEVICE_FINALIZED)
2535           {
2536             gomp_mutex_unlock (&devicep->lock);
2537             return NULL;
2538           }
2539 
2540       struct splay_tree_key_s k;
2541       k.host_start = (uintptr_t) host_fn;
2542       k.host_end = k.host_start + 1;
2543       splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2544       gomp_mutex_unlock (&devicep->lock);
2545       if (tgt_fn == NULL)
2546           return NULL;
2547 
2548       return (void *) tgt_fn->tgt_offset;
2549     }
2550 }
2551 
2552 /* Called when encountering a target directive.  If DEVICE
2553    is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
2554    GOMP_DEVICE_HOST_FALLBACK (or any value
2555    larger than last available hw device), use host fallback.
2556    FN is address of host code, UNUSED is part of the current ABI, but
2557    we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
2558    with MAPNUM entries, with addresses of the host objects,
2559    sizes of the host objects (resp. for pointer kind pointer bias
2560    and assumed sizeof (void *) size) and kinds.  */
2561 
2562 void
GOMP_target(int device,void (* fn)(void *),const void * unused,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned char * kinds)2563 GOMP_target (int device, void (*fn) (void *), const void *unused,
2564                size_t mapnum, void **hostaddrs, size_t *sizes,
2565                unsigned char *kinds)
2566 {
2567   struct gomp_device_descr *devicep = resolve_device (device);
2568 
2569   void *fn_addr;
2570   if (devicep == NULL
2571       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2572       /* All shared memory devices should use the GOMP_target_ext function.  */
2573       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2574       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2575     return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2576 
2577   htab_t refcount_set = htab_create (mapnum);
2578   struct target_mem_desc *tgt_vars
2579     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2580                          &refcount_set, GOMP_MAP_VARS_TARGET);
2581   devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2582                          NULL);
2583   htab_clear (refcount_set);
2584   gomp_unmap_vars (tgt_vars, true, &refcount_set);
2585   htab_free (refcount_set);
2586 }
2587 
2588 static inline unsigned int
clear_unsupported_flags(struct gomp_device_descr * devicep,unsigned int flags)2589 clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2590 {
2591   /* If we cannot run asynchronously, simply ignore nowait.  */
2592   if (devicep != NULL && devicep->async_run_func == NULL)
2593     flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2594 
2595   return flags;
2596 }
2597 
2598 /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2599    and several arguments have been added:
2600    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2601    DEPEND is array of dependencies, see GOMP_task for details.
2602 
2603    ARGS is a pointer to an array consisting of a variable number of both
2604    device-independent and device-specific arguments, which can take one two
2605    elements where the first specifies for which device it is intended, the type
2606    and optionally also the value.  If the value is not present in the first
2607    one, the whole second element the actual value.  The last element of the
2608    array is a single NULL.  Among the device independent can be for example
2609    NUM_TEAMS and THREAD_LIMIT.
2610 
2611    NUM_TEAMS is positive if GOMP_teams will be called in the body with
2612    that value, or 1 if teams construct is not present, or 0, if
2613    teams construct does not have num_teams clause and so the choice is
2614    implementation defined, and -1 if it can't be determined on the host
2615    what value will GOMP_teams have on the device.
2616    THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2617    body with that value, or 0, if teams construct does not have thread_limit
2618    clause or the teams construct is not present, or -1 if it can't be
2619    determined on the host what value will GOMP_teams have on the device.  */
2620 
2621 void
GOMP_target_ext(int device,void (* fn)(void *),size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend,void ** args)2622 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2623                      void **hostaddrs, size_t *sizes, unsigned short *kinds,
2624                      unsigned int flags, void **depend, void **args)
2625 {
2626   struct gomp_device_descr *devicep = resolve_device (device);
2627   size_t tgt_align = 0, tgt_size = 0;
2628   bool fpc_done = false;
2629 
2630   flags = clear_unsupported_flags (devicep, flags);
2631 
2632   if (flags & GOMP_TARGET_FLAG_NOWAIT)
2633     {
2634       struct gomp_thread *thr = gomp_thread ();
2635       /* Create a team if we don't have any around, as nowait
2636            target tasks make sense to run asynchronously even when
2637            outside of any parallel.  */
2638       if (__builtin_expect (thr->ts.team == NULL, 0))
2639           {
2640             struct gomp_team *team = gomp_new_team (1);
2641             struct gomp_task *task = thr->task;
2642             struct gomp_task **implicit_task = &task;
2643             struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2644             team->prev_ts = thr->ts;
2645             thr->ts.team = team;
2646             thr->ts.team_id = 0;
2647             thr->ts.work_share = &team->work_shares[0];
2648             thr->ts.last_work_share = NULL;
2649 #ifdef HAVE_SYNC_BUILTINS
2650             thr->ts.single_count = 0;
2651 #endif
2652             thr->ts.static_trip = 0;
2653             thr->task = &team->implicit_task[0];
2654             gomp_init_task (thr->task, NULL, icv);
2655             while (*implicit_task
2656                      && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2657               implicit_task = &(*implicit_task)->parent;
2658             if (*implicit_task)
2659               {
2660                 thr->task = *implicit_task;
2661                 gomp_end_task ();
2662                 free (*implicit_task);
2663                 thr->task = &team->implicit_task[0];
2664               }
2665             else
2666               pthread_setspecific (gomp_thread_destructor, thr);
2667             if (implicit_task != &task)
2668               {
2669                 *implicit_task = thr->task;
2670                 thr->task = task;
2671               }
2672           }
2673       if (thr->ts.team
2674             && !thr->task->final_task)
2675           {
2676             gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2677                                            sizes, kinds, flags, depend, args,
2678                                            GOMP_TARGET_TASK_BEFORE_MAP);
2679             return;
2680           }
2681     }
2682 
2683   /* If there are depend clauses, but nowait is not present
2684      (or we are in a final task), block the parent task until the
2685      dependencies are resolved and then just continue with the rest
2686      of the function as if it is a merged task.  */
2687   if (depend != NULL)
2688     {
2689       struct gomp_thread *thr = gomp_thread ();
2690       if (thr->task && thr->task->depend_hash)
2691           {
2692             /* If we might need to wait, copy firstprivate now.  */
2693             calculate_firstprivate_requirements (mapnum, sizes, kinds,
2694                                                          &tgt_align, &tgt_size);
2695             if (tgt_align)
2696               {
2697                 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2698                 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2699                                               tgt_align, tgt_size);
2700               }
2701             fpc_done = true;
2702             gomp_task_maybe_wait_for_dependencies (depend);
2703           }
2704     }
2705 
2706   void *fn_addr;
2707   if (devicep == NULL
2708       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2709       || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2710       || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2711     {
2712       if (!fpc_done)
2713           {
2714             calculate_firstprivate_requirements (mapnum, sizes, kinds,
2715                                                          &tgt_align, &tgt_size);
2716             if (tgt_align)
2717               {
2718                 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2719                 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2720                                               tgt_align, tgt_size);
2721               }
2722           }
2723       gomp_target_fallback (fn, hostaddrs, devicep, args);
2724       return;
2725     }
2726 
2727   struct target_mem_desc *tgt_vars;
2728   htab_t refcount_set = NULL;
2729 
2730   if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2731     {
2732       if (!fpc_done)
2733           {
2734             calculate_firstprivate_requirements (mapnum, sizes, kinds,
2735                                                          &tgt_align, &tgt_size);
2736             if (tgt_align)
2737               {
2738                 char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2739                 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2740                                               tgt_align, tgt_size);
2741               }
2742           }
2743       tgt_vars = NULL;
2744     }
2745   else
2746     {
2747       refcount_set = htab_create (mapnum);
2748       tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2749                                         true, &refcount_set, GOMP_MAP_VARS_TARGET);
2750     }
2751   devicep->run_func (devicep->target_id, fn_addr,
2752                          tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2753                          args);
2754   if (tgt_vars)
2755     {
2756       htab_clear (refcount_set);
2757       gomp_unmap_vars (tgt_vars, true, &refcount_set);
2758     }
2759   if (refcount_set)
2760     htab_free (refcount_set);
2761 }
2762 
2763 /* Host fallback for GOMP_target_data{,_ext} routines.  */
2764 
2765 static void
gomp_target_data_fallback(struct gomp_device_descr * devicep)2766 gomp_target_data_fallback (struct gomp_device_descr *devicep)
2767 {
2768   struct gomp_task_icv *icv = gomp_icv (false);
2769 
2770   if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2771       && devicep != NULL)
2772     gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2773                     "be used for offloading");
2774 
2775   if (icv->target_data)
2776     {
2777       /* Even when doing a host fallback, if there are any active
2778          #pragma omp target data constructs, need to remember the
2779          new #pragma omp target data, otherwise GOMP_target_end_data
2780          would get out of sync.  */
2781       struct target_mem_desc *tgt
2782           = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2783                                NULL, GOMP_MAP_VARS_DATA);
2784       tgt->prev = icv->target_data;
2785       icv->target_data = tgt;
2786     }
2787 }
2788 
2789 void
GOMP_target_data(int device,const void * unused,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned char * kinds)2790 GOMP_target_data (int device, const void *unused, size_t mapnum,
2791                       void **hostaddrs, size_t *sizes, unsigned char *kinds)
2792 {
2793   struct gomp_device_descr *devicep = resolve_device (device);
2794 
2795   if (devicep == NULL
2796       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2797       || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2798     return gomp_target_data_fallback (devicep);
2799 
2800   struct target_mem_desc *tgt
2801     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2802                          NULL, GOMP_MAP_VARS_DATA);
2803   struct gomp_task_icv *icv = gomp_icv (true);
2804   tgt->prev = icv->target_data;
2805   icv->target_data = tgt;
2806 }
2807 
2808 void
GOMP_target_data_ext(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds)2809 GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2810                           size_t *sizes, unsigned short *kinds)
2811 {
2812   struct gomp_device_descr *devicep = resolve_device (device);
2813 
2814   if (devicep == NULL
2815       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2816       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2817     return gomp_target_data_fallback (devicep);
2818 
2819   struct target_mem_desc *tgt
2820     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2821                          NULL, GOMP_MAP_VARS_DATA);
2822   struct gomp_task_icv *icv = gomp_icv (true);
2823   tgt->prev = icv->target_data;
2824   icv->target_data = tgt;
2825 }
2826 
2827 void
GOMP_target_end_data(void)2828 GOMP_target_end_data (void)
2829 {
2830   struct gomp_task_icv *icv = gomp_icv (false);
2831   if (icv->target_data)
2832     {
2833       struct target_mem_desc *tgt = icv->target_data;
2834       icv->target_data = tgt->prev;
2835       gomp_unmap_vars (tgt, true, NULL);
2836     }
2837 }
2838 
2839 void
GOMP_target_update(int device,const void * unused,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned char * kinds)2840 GOMP_target_update (int device, const void *unused, size_t mapnum,
2841                         void **hostaddrs, size_t *sizes, unsigned char *kinds)
2842 {
2843   struct gomp_device_descr *devicep = resolve_device (device);
2844 
2845   if (devicep == NULL
2846       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2847       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2848     return;
2849 
2850   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2851 }
2852 
2853 void
GOMP_target_update_ext(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend)2854 GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2855                               size_t *sizes, unsigned short *kinds,
2856                               unsigned int flags, void **depend)
2857 {
2858   struct gomp_device_descr *devicep = resolve_device (device);
2859 
2860   /* If there are depend clauses, but nowait is not present,
2861      block the parent task until the dependencies are resolved
2862      and then just continue with the rest of the function as if it
2863      is a merged task.  Until we are able to schedule task during
2864      variable mapping or unmapping, ignore nowait if depend clauses
2865      are not present.  */
2866   if (depend != NULL)
2867     {
2868       struct gomp_thread *thr = gomp_thread ();
2869       if (thr->task && thr->task->depend_hash)
2870           {
2871             if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2872                 && thr->ts.team
2873                 && !thr->task->final_task)
2874               {
2875                 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2876                                                      mapnum, hostaddrs, sizes, kinds,
2877                                                      flags | GOMP_TARGET_FLAG_UPDATE,
2878                                                      depend, NULL, GOMP_TARGET_TASK_DATA))
2879                     return;
2880               }
2881             else
2882               {
2883                 struct gomp_team *team = thr->ts.team;
2884                 /* If parallel or taskgroup has been cancelled, don't start new
2885                      tasks.  */
2886                 if (__builtin_expect (gomp_cancel_var, 0) && team)
2887                     {
2888                       if (gomp_team_barrier_cancelled (&team->barrier))
2889                         return;
2890                       if (thr->task->taskgroup)
2891                         {
2892                           if (thr->task->taskgroup->cancelled)
2893                               return;
2894                           if (thr->task->taskgroup->workshare
2895                                 && thr->task->taskgroup->prev
2896                                 && thr->task->taskgroup->prev->cancelled)
2897                               return;
2898                         }
2899                     }
2900 
2901                 gomp_task_maybe_wait_for_dependencies (depend);
2902               }
2903           }
2904     }
2905 
2906   if (devicep == NULL
2907       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2908       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2909     return;
2910 
2911   struct gomp_thread *thr = gomp_thread ();
2912   struct gomp_team *team = thr->ts.team;
2913   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
2914   if (__builtin_expect (gomp_cancel_var, 0) && team)
2915     {
2916       if (gomp_team_barrier_cancelled (&team->barrier))
2917           return;
2918       if (thr->task->taskgroup)
2919           {
2920             if (thr->task->taskgroup->cancelled)
2921               return;
2922             if (thr->task->taskgroup->workshare
2923                 && thr->task->taskgroup->prev
2924                 && thr->task->taskgroup->prev->cancelled)
2925               return;
2926           }
2927     }
2928 
2929   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2930 }
2931 
2932 static void
gomp_exit_data(struct gomp_device_descr * devicep,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,htab_t * refcount_set)2933 gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2934                     void **hostaddrs, size_t *sizes, unsigned short *kinds,
2935                     htab_t *refcount_set)
2936 {
2937   const int typemask = 0xff;
2938   size_t i;
2939   gomp_mutex_lock (&devicep->lock);
2940   if (devicep->state == GOMP_DEVICE_FINALIZED)
2941     {
2942       gomp_mutex_unlock (&devicep->lock);
2943       return;
2944     }
2945 
2946   for (i = 0; i < mapnum; i++)
2947     if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
2948       {
2949           struct splay_tree_key_s cur_node;
2950           cur_node.host_start = (uintptr_t) hostaddrs[i];
2951           cur_node.host_end = cur_node.host_start + sizeof (void *);
2952           splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2953 
2954           if (n)
2955             gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
2956                                      false, NULL);
2957       }
2958 
2959   int nrmvars = 0;
2960   splay_tree_key remove_vars[mapnum];
2961 
2962   for (i = 0; i < mapnum; i++)
2963     {
2964       struct splay_tree_key_s cur_node;
2965       unsigned char kind = kinds[i] & typemask;
2966       switch (kind)
2967           {
2968           case GOMP_MAP_FROM:
2969           case GOMP_MAP_ALWAYS_FROM:
2970           case GOMP_MAP_DELETE:
2971           case GOMP_MAP_RELEASE:
2972           case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2973           case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2974             cur_node.host_start = (uintptr_t) hostaddrs[i];
2975             cur_node.host_end = cur_node.host_start + sizes[i];
2976             splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2977                                     || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2978               ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2979               : splay_tree_lookup (&devicep->mem_map, &cur_node);
2980             if (!k)
2981               continue;
2982 
2983             bool delete_p = (kind == GOMP_MAP_DELETE
2984                                  || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
2985             bool do_copy, do_remove;
2986             gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
2987                                            &do_remove);
2988 
2989             if ((kind == GOMP_MAP_FROM && do_copy)
2990                 || kind == GOMP_MAP_ALWAYS_FROM)
2991               {
2992                 if (k->aux && k->aux->attach_count)
2993                     {
2994                       /* We have to be careful not to overwrite still attached
2995                          pointers during the copyback to host.  */
2996                       uintptr_t addr = k->host_start;
2997                       while (addr < k->host_end)
2998                         {
2999                           size_t i = (addr - k->host_start) / sizeof (void *);
3000                           if (k->aux->attach_count[i] == 0)
3001                               gomp_copy_dev2host (devicep, NULL, (void *) addr,
3002                                                       (void *) (k->tgt->tgt_start
3003                                                                   + k->tgt_offset
3004                                                                   + addr - k->host_start),
3005                                                       sizeof (void *));
3006                           addr += sizeof (void *);
3007                         }
3008                     }
3009                 else
3010                     gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
3011                                             (void *) (k->tgt->tgt_start + k->tgt_offset
3012                                                         + cur_node.host_start
3013                                                         - k->host_start),
3014                                             cur_node.host_end - cur_node.host_start);
3015               }
3016 
3017             /* Structure elements lists are removed altogether at once, which
3018                may cause immediate deallocation of the target_mem_desc, causing
3019                errors if we still have following element siblings to copy back.
3020                While we're at it, it also seems more disciplined to simply
3021                queue all removals together for processing below.
3022 
3023                Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3024                not have this problem, since they maintain an additional
3025                tgt->refcount = 1 reference to the target_mem_desc to start with.
3026             */
3027             if (do_remove)
3028               remove_vars[nrmvars++] = k;
3029             break;
3030 
3031           case GOMP_MAP_DETACH:
3032             break;
3033           default:
3034             gomp_mutex_unlock (&devicep->lock);
3035             gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3036                           kind);
3037           }
3038     }
3039 
3040   for (int i = 0; i < nrmvars; i++)
3041     gomp_remove_var (devicep, remove_vars[i]);
3042 
3043   gomp_mutex_unlock (&devicep->lock);
3044 }
3045 
3046 void
GOMP_target_enter_exit_data(int device,size_t mapnum,void ** hostaddrs,size_t * sizes,unsigned short * kinds,unsigned int flags,void ** depend)3047 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
3048                                    size_t *sizes, unsigned short *kinds,
3049                                    unsigned int flags, void **depend)
3050 {
3051   struct gomp_device_descr *devicep = resolve_device (device);
3052 
3053   /* If there are depend clauses, but nowait is not present,
3054      block the parent task until the dependencies are resolved
3055      and then just continue with the rest of the function as if it
3056      is a merged task.  Until we are able to schedule task during
3057      variable mapping or unmapping, ignore nowait if depend clauses
3058      are not present.  */
3059   if (depend != NULL)
3060     {
3061       struct gomp_thread *thr = gomp_thread ();
3062       if (thr->task && thr->task->depend_hash)
3063           {
3064             if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3065                 && thr->ts.team
3066                 && !thr->task->final_task)
3067               {
3068                 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3069                                                      mapnum, hostaddrs, sizes, kinds,
3070                                                      flags, depend, NULL,
3071                                                      GOMP_TARGET_TASK_DATA))
3072                     return;
3073               }
3074             else
3075               {
3076                 struct gomp_team *team = thr->ts.team;
3077                 /* If parallel or taskgroup has been cancelled, don't start new
3078                      tasks.  */
3079                 if (__builtin_expect (gomp_cancel_var, 0) && team)
3080                     {
3081                       if (gomp_team_barrier_cancelled (&team->barrier))
3082                         return;
3083                       if (thr->task->taskgroup)
3084                         {
3085                           if (thr->task->taskgroup->cancelled)
3086                               return;
3087                           if (thr->task->taskgroup->workshare
3088                                 && thr->task->taskgroup->prev
3089                                 && thr->task->taskgroup->prev->cancelled)
3090                               return;
3091                         }
3092                     }
3093 
3094                 gomp_task_maybe_wait_for_dependencies (depend);
3095               }
3096           }
3097     }
3098 
3099   if (devicep == NULL
3100       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3101       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3102     return;
3103 
3104   struct gomp_thread *thr = gomp_thread ();
3105   struct gomp_team *team = thr->ts.team;
3106   /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
3107   if (__builtin_expect (gomp_cancel_var, 0) && team)
3108     {
3109       if (gomp_team_barrier_cancelled (&team->barrier))
3110           return;
3111       if (thr->task->taskgroup)
3112           {
3113             if (thr->task->taskgroup->cancelled)
3114               return;
3115             if (thr->task->taskgroup->workshare
3116                 && thr->task->taskgroup->prev
3117                 && thr->task->taskgroup->prev->cancelled)
3118               return;
3119           }
3120     }
3121 
3122   htab_t refcount_set = htab_create (mapnum);
3123 
3124   /* The variables are mapped separately such that they can be released
3125      independently.  */
3126   size_t i, j;
3127   if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3128     for (i = 0; i < mapnum; i++)
3129       if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3130           {
3131             gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
3132                                &kinds[i], true, &refcount_set,
3133                                GOMP_MAP_VARS_ENTER_DATA);
3134             i += sizes[i];
3135           }
3136       else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
3137           {
3138             for (j = i + 1; j < mapnum; j++)
3139               if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
3140                     && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
3141                 break;
3142             gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
3143                                &kinds[i], true, &refcount_set,
3144                                GOMP_MAP_VARS_ENTER_DATA);
3145             i += j - i - 1;
3146           }
3147       else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
3148           {
3149             /* An attach operation must be processed together with the mapped
3150                base-pointer list item.  */
3151             gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3152                                true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3153             i += 1;
3154           }
3155       else
3156           gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3157                            true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3158   else
3159     gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
3160   htab_free (refcount_set);
3161 }
3162 
3163 bool
gomp_target_task_fn(void * data)3164 gomp_target_task_fn (void *data)
3165 {
3166   struct gomp_target_task *ttask = (struct gomp_target_task *) data;
3167   struct gomp_device_descr *devicep = ttask->devicep;
3168 
3169   if (ttask->fn != NULL)
3170     {
3171       void *fn_addr;
3172       if (devicep == NULL
3173             || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3174             || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
3175             || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3176           {
3177             ttask->state = GOMP_TARGET_TASK_FALLBACK;
3178             gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
3179                                         ttask->args);
3180             return false;
3181           }
3182 
3183       if (ttask->state == GOMP_TARGET_TASK_FINISHED)
3184           {
3185             if (ttask->tgt)
3186               gomp_unmap_vars (ttask->tgt, true, NULL);
3187             return false;
3188           }
3189 
3190       void *actual_arguments;
3191       if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3192           {
3193             ttask->tgt = NULL;
3194             actual_arguments = ttask->hostaddrs;
3195           }
3196       else
3197           {
3198             ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
3199                                               NULL, ttask->sizes, ttask->kinds, true,
3200                                               NULL, GOMP_MAP_VARS_TARGET);
3201             actual_arguments = (void *) ttask->tgt->tgt_start;
3202           }
3203       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
3204 
3205       assert (devicep->async_run_func);
3206       devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
3207                                      ttask->args, (void *) ttask);
3208       return true;
3209     }
3210   else if (devicep == NULL
3211              || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3212              || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3213     return false;
3214 
3215   size_t i;
3216   if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
3217     gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3218                      ttask->kinds, true);
3219   else
3220     {
3221       htab_t refcount_set = htab_create (ttask->mapnum);
3222       if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3223           for (i = 0; i < ttask->mapnum; i++)
3224             if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3225               {
3226                 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
3227                                    NULL, &ttask->sizes[i], &ttask->kinds[i], true,
3228                                    &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3229                 i += ttask->sizes[i];
3230               }
3231             else
3232               gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
3233                                  &ttask->kinds[i], true, &refcount_set,
3234                                  GOMP_MAP_VARS_ENTER_DATA);
3235       else
3236           gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3237                               ttask->kinds, &refcount_set);
3238       htab_free (refcount_set);
3239     }
3240   return false;
3241 }
3242 
3243 void
GOMP_teams(unsigned int num_teams,unsigned int thread_limit)3244 GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
3245 {
3246   if (thread_limit)
3247     {
3248       struct gomp_task_icv *icv = gomp_icv (true);
3249       icv->thread_limit_var
3250           = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3251     }
3252   (void) num_teams;
3253 }
3254 
3255 bool
GOMP_teams4(unsigned int num_teams_low,unsigned int num_teams_high,unsigned int thread_limit,bool first)3256 GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
3257                unsigned int thread_limit, bool first)
3258 {
3259   struct gomp_thread *thr = gomp_thread ();
3260   if (first)
3261     {
3262       if (thread_limit)
3263           {
3264             struct gomp_task_icv *icv = gomp_icv (true);
3265             icv->thread_limit_var
3266               = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3267           }
3268       (void) num_teams_high;
3269       if (num_teams_low == 0)
3270           num_teams_low = 1;
3271       thr->num_teams = num_teams_low - 1;
3272       thr->team_num = 0;
3273     }
3274   else if (thr->team_num == thr->num_teams)
3275     return false;
3276   else
3277     ++thr->team_num;
3278   return true;
3279 }
3280 
3281 void *
omp_target_alloc(size_t size,int device_num)3282 omp_target_alloc (size_t size, int device_num)
3283 {
3284   if (device_num == gomp_get_num_devices ())
3285     return malloc (size);
3286 
3287   if (device_num < 0)
3288     return NULL;
3289 
3290   struct gomp_device_descr *devicep = resolve_device (device_num);
3291   if (devicep == NULL)
3292     return NULL;
3293 
3294   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3295       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3296     return malloc (size);
3297 
3298   gomp_mutex_lock (&devicep->lock);
3299   void *ret = devicep->alloc_func (devicep->target_id, size);
3300   gomp_mutex_unlock (&devicep->lock);
3301   return ret;
3302 }
3303 
3304 void
omp_target_free(void * device_ptr,int device_num)3305 omp_target_free (void *device_ptr, int device_num)
3306 {
3307   if (device_ptr == NULL)
3308     return;
3309 
3310   if (device_num == gomp_get_num_devices ())
3311     {
3312       free (device_ptr);
3313       return;
3314     }
3315 
3316   if (device_num < 0)
3317     return;
3318 
3319   struct gomp_device_descr *devicep = resolve_device (device_num);
3320   if (devicep == NULL)
3321     return;
3322 
3323   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3324       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3325     {
3326       free (device_ptr);
3327       return;
3328     }
3329 
3330   gomp_mutex_lock (&devicep->lock);
3331   gomp_free_device_memory (devicep, device_ptr);
3332   gomp_mutex_unlock (&devicep->lock);
3333 }
3334 
3335 int
omp_target_is_present(const void * ptr,int device_num)3336 omp_target_is_present (const void *ptr, int device_num)
3337 {
3338   if (ptr == NULL)
3339     return 1;
3340 
3341   if (device_num == gomp_get_num_devices ())
3342     return 1;
3343 
3344   if (device_num < 0)
3345     return 0;
3346 
3347   struct gomp_device_descr *devicep = resolve_device (device_num);
3348   if (devicep == NULL)
3349     return 0;
3350 
3351   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3352       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3353     return 1;
3354 
3355   gomp_mutex_lock (&devicep->lock);
3356   struct splay_tree_s *mem_map = &devicep->mem_map;
3357   struct splay_tree_key_s cur_node;
3358 
3359   cur_node.host_start = (uintptr_t) ptr;
3360   cur_node.host_end = cur_node.host_start;
3361   splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3362   int ret = n != NULL;
3363   gomp_mutex_unlock (&devicep->lock);
3364   return ret;
3365 }
3366 
3367 int
omp_target_memcpy(void * dst,const void * src,size_t length,size_t dst_offset,size_t src_offset,int dst_device_num,int src_device_num)3368 omp_target_memcpy (void *dst, const void *src, size_t length,
3369                        size_t dst_offset, size_t src_offset, int dst_device_num,
3370                        int src_device_num)
3371 {
3372   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3373   bool ret;
3374 
3375   if (dst_device_num != gomp_get_num_devices ())
3376     {
3377       if (dst_device_num < 0)
3378           return EINVAL;
3379 
3380       dst_devicep = resolve_device (dst_device_num);
3381       if (dst_devicep == NULL)
3382           return EINVAL;
3383 
3384       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3385             || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3386           dst_devicep = NULL;
3387     }
3388   if (src_device_num != num_devices_openmp)
3389     {
3390       if (src_device_num < 0)
3391           return EINVAL;
3392 
3393       src_devicep = resolve_device (src_device_num);
3394       if (src_devicep == NULL)
3395           return EINVAL;
3396 
3397       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3398             || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3399           src_devicep = NULL;
3400     }
3401   if (src_devicep == NULL && dst_devicep == NULL)
3402     {
3403       memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
3404       return 0;
3405     }
3406   if (src_devicep == NULL)
3407     {
3408       gomp_mutex_lock (&dst_devicep->lock);
3409       ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3410                                                   (char *) dst + dst_offset,
3411                                                   (char *) src + src_offset, length);
3412       gomp_mutex_unlock (&dst_devicep->lock);
3413       return (ret ? 0 : EINVAL);
3414     }
3415   if (dst_devicep == NULL)
3416     {
3417       gomp_mutex_lock (&src_devicep->lock);
3418       ret = src_devicep->dev2host_func (src_devicep->target_id,
3419                                                   (char *) dst + dst_offset,
3420                                                   (char *) src + src_offset, length);
3421       gomp_mutex_unlock (&src_devicep->lock);
3422       return (ret ? 0 : EINVAL);
3423     }
3424   if (src_devicep == dst_devicep)
3425     {
3426       gomp_mutex_lock (&src_devicep->lock);
3427       ret = src_devicep->dev2dev_func (src_devicep->target_id,
3428                                                (char *) dst + dst_offset,
3429                                                (char *) src + src_offset, length);
3430       gomp_mutex_unlock (&src_devicep->lock);
3431       return (ret ? 0 : EINVAL);
3432     }
3433   return EINVAL;
3434 }
3435 
3436 static int
omp_target_memcpy_rect_worker(void * dst,const void * src,size_t element_size,int num_dims,const size_t * volume,const size_t * dst_offsets,const size_t * src_offsets,const size_t * dst_dimensions,const size_t * src_dimensions,struct gomp_device_descr * dst_devicep,struct gomp_device_descr * src_devicep)3437 omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
3438                                      int num_dims, const size_t *volume,
3439                                      const size_t *dst_offsets,
3440                                      const size_t *src_offsets,
3441                                      const size_t *dst_dimensions,
3442                                      const size_t *src_dimensions,
3443                                      struct gomp_device_descr *dst_devicep,
3444                                      struct gomp_device_descr *src_devicep)
3445 {
3446   size_t dst_slice = element_size;
3447   size_t src_slice = element_size;
3448   size_t j, dst_off, src_off, length;
3449   int i, ret;
3450 
3451   if (num_dims == 1)
3452     {
3453       if (__builtin_mul_overflow (element_size, volume[0], &length)
3454             || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
3455             || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
3456           return EINVAL;
3457       if (dst_devicep == NULL && src_devicep == NULL)
3458           {
3459             memcpy ((char *) dst + dst_off, (const char *) src + src_off,
3460                       length);
3461             ret = 1;
3462           }
3463       else if (src_devicep == NULL)
3464           ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3465                                                     (char *) dst + dst_off,
3466                                                     (const char *) src + src_off,
3467                                                     length);
3468       else if (dst_devicep == NULL)
3469           ret = src_devicep->dev2host_func (src_devicep->target_id,
3470                                                     (char *) dst + dst_off,
3471                                                     (const char *) src + src_off,
3472                                                     length);
3473       else if (src_devicep == dst_devicep)
3474           ret = src_devicep->dev2dev_func (src_devicep->target_id,
3475                                                    (char *) dst + dst_off,
3476                                                    (const char *) src + src_off,
3477                                                    length);
3478       else
3479           ret = 0;
3480       return ret ? 0 : EINVAL;
3481     }
3482 
3483   /* FIXME: it would be nice to have some plugin function to handle
3484      num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
3485      be handled in the generic recursion below, and for host-host it
3486      should be used even for any num_dims >= 2.  */
3487 
3488   for (i = 1; i < num_dims; i++)
3489     if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
3490           || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
3491       return EINVAL;
3492   if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
3493       || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
3494     return EINVAL;
3495   for (j = 0; j < volume[0]; j++)
3496     {
3497       ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
3498                                                      (const char *) src + src_off,
3499                                                      element_size, num_dims - 1,
3500                                                      volume + 1, dst_offsets + 1,
3501                                                      src_offsets + 1, dst_dimensions + 1,
3502                                                      src_dimensions + 1, dst_devicep,
3503                                                      src_devicep);
3504       if (ret)
3505           return ret;
3506       dst_off += dst_slice;
3507       src_off += src_slice;
3508     }
3509   return 0;
3510 }
3511 
3512 int
omp_target_memcpy_rect(void * dst,const void * src,size_t element_size,int num_dims,const size_t * volume,const size_t * dst_offsets,const size_t * src_offsets,const size_t * dst_dimensions,const size_t * src_dimensions,int dst_device_num,int src_device_num)3513 omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
3514                               int num_dims, const size_t *volume,
3515                               const size_t *dst_offsets,
3516                               const size_t *src_offsets,
3517                               const size_t *dst_dimensions,
3518                               const size_t *src_dimensions,
3519                               int dst_device_num, int src_device_num)
3520 {
3521   struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3522 
3523   if (!dst && !src)
3524     return INT_MAX;
3525 
3526   if (dst_device_num != gomp_get_num_devices ())
3527     {
3528       if (dst_device_num < 0)
3529           return EINVAL;
3530 
3531       dst_devicep = resolve_device (dst_device_num);
3532       if (dst_devicep == NULL)
3533           return EINVAL;
3534 
3535       if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3536             || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3537           dst_devicep = NULL;
3538     }
3539   if (src_device_num != num_devices_openmp)
3540     {
3541       if (src_device_num < 0)
3542           return EINVAL;
3543 
3544       src_devicep = resolve_device (src_device_num);
3545       if (src_devicep == NULL)
3546           return EINVAL;
3547 
3548       if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3549             || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3550           src_devicep = NULL;
3551     }
3552 
3553   if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
3554     return EINVAL;
3555 
3556   if (src_devicep)
3557     gomp_mutex_lock (&src_devicep->lock);
3558   else if (dst_devicep)
3559     gomp_mutex_lock (&dst_devicep->lock);
3560   int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3561                                                      volume, dst_offsets, src_offsets,
3562                                                      dst_dimensions, src_dimensions,
3563                                                      dst_devicep, src_devicep);
3564   if (src_devicep)
3565     gomp_mutex_unlock (&src_devicep->lock);
3566   else if (dst_devicep)
3567     gomp_mutex_unlock (&dst_devicep->lock);
3568   return ret;
3569 }
3570 
3571 int
omp_target_associate_ptr(const void * host_ptr,const void * device_ptr,size_t size,size_t device_offset,int device_num)3572 omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3573                                 size_t size, size_t device_offset, int device_num)
3574 {
3575   if (device_num == gomp_get_num_devices ())
3576     return EINVAL;
3577 
3578   if (device_num < 0)
3579     return EINVAL;
3580 
3581   struct gomp_device_descr *devicep = resolve_device (device_num);
3582   if (devicep == NULL)
3583     return EINVAL;
3584 
3585   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3586       || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3587     return EINVAL;
3588 
3589   gomp_mutex_lock (&devicep->lock);
3590 
3591   struct splay_tree_s *mem_map = &devicep->mem_map;
3592   struct splay_tree_key_s cur_node;
3593   int ret = EINVAL;
3594 
3595   cur_node.host_start = (uintptr_t) host_ptr;
3596   cur_node.host_end = cur_node.host_start + size;
3597   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3598   if (n)
3599     {
3600       if (n->tgt->tgt_start + n->tgt_offset
3601             == (uintptr_t) device_ptr + device_offset
3602             && n->host_start <= cur_node.host_start
3603             && n->host_end >= cur_node.host_end)
3604           ret = 0;
3605     }
3606   else
3607     {
3608       struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3609       tgt->array = gomp_malloc (sizeof (*tgt->array));
3610       tgt->refcount = 1;
3611       tgt->tgt_start = 0;
3612       tgt->tgt_end = 0;
3613       tgt->to_free = NULL;
3614       tgt->prev = NULL;
3615       tgt->list_count = 0;
3616       tgt->device_descr = devicep;
3617       splay_tree_node array = tgt->array;
3618       splay_tree_key k = &array->key;
3619       k->host_start = cur_node.host_start;
3620       k->host_end = cur_node.host_end;
3621       k->tgt = tgt;
3622       k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3623       k->refcount = REFCOUNT_INFINITY;
3624       k->dynamic_refcount = 0;
3625       k->aux = NULL;
3626       array->left = NULL;
3627       array->right = NULL;
3628       splay_tree_insert (&devicep->mem_map, array);
3629       ret = 0;
3630     }
3631   gomp_mutex_unlock (&devicep->lock);
3632   return ret;
3633 }
3634 
3635 int
omp_target_disassociate_ptr(const void * ptr,int device_num)3636 omp_target_disassociate_ptr (const void *ptr, int device_num)
3637 {
3638   if (device_num == gomp_get_num_devices ())
3639     return EINVAL;
3640 
3641   if (device_num < 0)
3642     return EINVAL;
3643 
3644   struct gomp_device_descr *devicep = resolve_device (device_num);
3645   if (devicep == NULL)
3646     return EINVAL;
3647 
3648   if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3649     return EINVAL;
3650 
3651   gomp_mutex_lock (&devicep->lock);
3652 
3653   struct splay_tree_s *mem_map = &devicep->mem_map;
3654   struct splay_tree_key_s cur_node;
3655   int ret = EINVAL;
3656 
3657   cur_node.host_start = (uintptr_t) ptr;
3658   cur_node.host_end = cur_node.host_start;
3659   splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3660   if (n
3661       && n->host_start == cur_node.host_start
3662       && n->refcount == REFCOUNT_INFINITY
3663       && n->tgt->tgt_start == 0
3664       && n->tgt->to_free == NULL
3665       && n->tgt->refcount == 1
3666       && n->tgt->list_count == 0)
3667     {
3668       splay_tree_remove (&devicep->mem_map, n);
3669       gomp_unmap_tgt (n->tgt);
3670       ret = 0;
3671     }
3672 
3673   gomp_mutex_unlock (&devicep->lock);
3674   return ret;
3675 }
3676 
3677 int
omp_pause_resource(omp_pause_resource_t kind,int device_num)3678 omp_pause_resource (omp_pause_resource_t kind, int device_num)
3679 {
3680   (void) kind;
3681   if (device_num == gomp_get_num_devices ())
3682     return gomp_pause_host ();
3683   if (device_num < 0 || device_num >= num_devices_openmp)
3684     return -1;
3685   /* Do nothing for target devices for now.  */
3686   return 0;
3687 }
3688 
3689 int
omp_pause_resource_all(omp_pause_resource_t kind)3690 omp_pause_resource_all (omp_pause_resource_t kind)
3691 {
3692   (void) kind;
3693   if (gomp_pause_host ())
3694     return -1;
3695   /* Do nothing for target devices for now.  */
3696   return 0;
3697 }
3698 
3699 ialias (omp_pause_resource)
ialias(omp_pause_resource_all)3700 ialias (omp_pause_resource_all)
3701 
3702 #ifdef PLUGIN_SUPPORT
3703 
3704 /* This function tries to load a plugin for DEVICE.  Name of plugin is passed
3705    in PLUGIN_NAME.
3706    The handles of the found functions are stored in the corresponding fields
3707    of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
3708 
3709 static bool
3710 gomp_load_plugin_for_device (struct gomp_device_descr *device,
3711                                    const char *plugin_name)
3712 {
3713   const char *err = NULL, *last_missing = NULL;
3714 
3715   void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3716   if (!plugin_handle)
3717 #if OFFLOAD_DEFAULTED
3718     return 0;
3719 #else
3720     goto dl_fail;
3721 #endif
3722 
3723   /* Check if all required functions are available in the plugin and store
3724      their handlers.  None of the symbols can legitimately be NULL,
3725      so we don't need to check dlerror all the time.  */
3726 #define DLSYM(f)                                                                \
3727   if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))          \
3728     goto dl_fail
3729   /* Similar, but missing functions are not an error.  Return false if
3730      failed, true otherwise.  */
3731 #define DLSYM_OPT(f, n)                                                                   \
3732   ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))     \
3733    || (last_missing = #n, 0))
3734 
3735   DLSYM (version);
3736   if (device->version_func () != GOMP_VERSION)
3737     {
3738       err = "plugin version mismatch";
3739       goto fail;
3740     }
3741 
3742   DLSYM (get_name);
3743   DLSYM (get_caps);
3744   DLSYM (get_type);
3745   DLSYM (get_num_devices);
3746   DLSYM (init_device);
3747   DLSYM (fini_device);
3748   DLSYM (load_image);
3749   DLSYM (unload_image);
3750   DLSYM (alloc);
3751   DLSYM (free);
3752   DLSYM (dev2host);
3753   DLSYM (host2dev);
3754   device->capabilities = device->get_caps_func ();
3755   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3756     {
3757       DLSYM (run);
3758       DLSYM_OPT (async_run, async_run);
3759       DLSYM_OPT (can_run, can_run);
3760       DLSYM (dev2dev);
3761     }
3762   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3763     {
3764       if (!DLSYM_OPT (openacc.exec, openacc_exec)
3765             || !DLSYM_OPT (openacc.create_thread_data,
3766                                openacc_create_thread_data)
3767             || !DLSYM_OPT (openacc.destroy_thread_data,
3768                                openacc_destroy_thread_data)
3769             || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3770             || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3771             || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3772             || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3773             || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3774             || !DLSYM_OPT (openacc.async.queue_callback,
3775                                openacc_async_queue_callback)
3776             || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3777             || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3778             || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3779             || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3780           {
3781             /* Require all the OpenACC handlers if we have
3782                GOMP_OFFLOAD_CAP_OPENACC_200.  */
3783             err = "plugin missing OpenACC handler function";
3784             goto fail;
3785           }
3786 
3787       unsigned cuda = 0;
3788       cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3789                                openacc_cuda_get_current_device);
3790       cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3791                                openacc_cuda_get_current_context);
3792       cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3793       cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3794       if (cuda && cuda != 4)
3795           {
3796             /* Make sure all the CUDA functions are there if any of them are.  */
3797             err = "plugin missing OpenACC CUDA handler function";
3798             goto fail;
3799           }
3800     }
3801 #undef DLSYM
3802 #undef DLSYM_OPT
3803 
3804   return 1;
3805 
3806  dl_fail:
3807   err = dlerror ();
3808  fail:
3809   gomp_error ("while loading %s: %s", plugin_name, err);
3810   if (last_missing)
3811     gomp_error ("missing function was %s", last_missing);
3812   if (plugin_handle)
3813     dlclose (plugin_handle);
3814 
3815   return 0;
3816 }
3817 
3818 /* This function finalizes all initialized devices.  */
3819 
3820 static void
gomp_target_fini(void)3821 gomp_target_fini (void)
3822 {
3823   int i;
3824   for (i = 0; i < num_devices; i++)
3825     {
3826       bool ret = true;
3827       struct gomp_device_descr *devicep = &devices[i];
3828       gomp_mutex_lock (&devicep->lock);
3829       if (devicep->state == GOMP_DEVICE_INITIALIZED)
3830           ret = gomp_fini_device (devicep);
3831       gomp_mutex_unlock (&devicep->lock);
3832       if (!ret)
3833           gomp_fatal ("device finalization failed");
3834     }
3835 }
3836 
3837 /* This function initializes the runtime for offloading.
3838    It parses the list of offload plugins, and tries to load these.
3839    On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3840    will be set, and the array DEVICES initialized, containing descriptors for
3841    corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3842    by the others.  */
3843 
3844 static void
gomp_target_init(void)3845 gomp_target_init (void)
3846 {
3847   const char *prefix ="libgomp-plugin-";
3848   const char *suffix = SONAME_SUFFIX (1);
3849   const char *cur, *next;
3850   char *plugin_name;
3851   int i, new_num_devs;
3852   int num_devs = 0, num_devs_openmp;
3853   struct gomp_device_descr *devs = NULL;
3854 
3855   if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
3856     return;
3857 
3858   cur = OFFLOAD_PLUGINS;
3859   if (*cur)
3860     do
3861       {
3862           struct gomp_device_descr current_device;
3863           size_t prefix_len, suffix_len, cur_len;
3864 
3865           next = strchr (cur, ',');
3866 
3867           prefix_len = strlen (prefix);
3868           cur_len = next ? next - cur : strlen (cur);
3869           suffix_len = strlen (suffix);
3870 
3871           plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3872           if (!plugin_name)
3873             {
3874               num_devs = 0;
3875               break;
3876             }
3877 
3878           memcpy (plugin_name, prefix, prefix_len);
3879           memcpy (plugin_name + prefix_len, cur, cur_len);
3880           memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3881 
3882           if (gomp_load_plugin_for_device (&current_device, plugin_name))
3883             {
3884               new_num_devs = current_device.get_num_devices_func ();
3885               if (new_num_devs >= 1)
3886                 {
3887                     /* Augment DEVICES and NUM_DEVICES.  */
3888 
3889                     devs = realloc (devs, (num_devs + new_num_devs)
3890                                               * sizeof (struct gomp_device_descr));
3891                     if (!devs)
3892                       {
3893                         num_devs = 0;
3894                         free (plugin_name);
3895                         break;
3896                       }
3897 
3898                     current_device.name = current_device.get_name_func ();
3899                     /* current_device.capabilities has already been set.  */
3900                     current_device.type = current_device.get_type_func ();
3901                     current_device.mem_map.root = NULL;
3902                     current_device.state = GOMP_DEVICE_UNINITIALIZED;
3903                     for (i = 0; i < new_num_devs; i++)
3904                       {
3905                         current_device.target_id = i;
3906                         devs[num_devs] = current_device;
3907                         gomp_mutex_init (&devs[num_devs].lock);
3908                         num_devs++;
3909                       }
3910                 }
3911             }
3912 
3913           free (plugin_name);
3914           cur = next + 1;
3915       }
3916     while (next);
3917 
3918   /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3919      NUM_DEVICES_OPENMP.  */
3920   struct gomp_device_descr *devs_s
3921     = malloc (num_devs * sizeof (struct gomp_device_descr));
3922   if (!devs_s)
3923     {
3924       num_devs = 0;
3925       free (devs);
3926       devs = NULL;
3927     }
3928   num_devs_openmp = 0;
3929   for (i = 0; i < num_devs; i++)
3930     if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3931       devs_s[num_devs_openmp++] = devs[i];
3932   int num_devs_after_openmp = num_devs_openmp;
3933   for (i = 0; i < num_devs; i++)
3934     if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3935       devs_s[num_devs_after_openmp++] = devs[i];
3936   free (devs);
3937   devs = devs_s;
3938 
3939   for (i = 0; i < num_devs; i++)
3940     {
3941       /* The 'devices' array can be moved (by the realloc call) until we have
3942            found all the plugins, so registering with the OpenACC runtime (which
3943            takes a copy of the pointer argument) must be delayed until now.  */
3944       if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3945           goacc_register (&devs[i]);
3946     }
3947 
3948   num_devices = num_devs;
3949   num_devices_openmp = num_devs_openmp;
3950   devices = devs;
3951   if (atexit (gomp_target_fini) != 0)
3952     gomp_fatal ("atexit failed");
3953 }
3954 
3955 #else /* PLUGIN_SUPPORT */
3956 /* If dlfcn.h is unavailable we always fallback to host execution.
3957    GOMP_target* routines are just stubs for this case.  */
3958 static void
3959 gomp_target_init (void)
3960 {
3961 }
3962 #endif /* PLUGIN_SUPPORT */
3963