unstick r374919

Revision 374919

Date:
2019/10/15 17:15:26
Author:
jonchesterfield
Revision Log:
[libomptarget][nfc] Make interface.h target independent

Summary:
[libomptarget][nfc] Make interface.h target independent

Move interface.h under a top level include directory.
Remove #includes to avoid the interface depending on the implementation.

Reviewers: ABataev, jdoerfert, grokos, ronlieb, RaviNarayanaswamy

Reviewed By: jdoerfert

Subscribers: mgorny, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D68615
Files:

Legend:

 
Added
 
Removed
 
Modified
  • openmp/trunk/libomptarget/deviceRTLs/interface.h

     
    1 //===------- interface.h - OpenMP interface definitions ---------- CUDA -*-===//
    2 //
    3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    4 // See https://llvm.org/LICENSE.txt for license information.
    5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
    6 //
    7 //===----------------------------------------------------------------------===//
    8 //
    9 // This file contains all the definitions that are relevant to
    10 // the interface. The first section contains the interface as
    11 // declared by OpenMP. The second section includes the compiler
    12 // specific interfaces.
    13 //
    14 //===----------------------------------------------------------------------===//
    15
    16 #ifndef _INTERFACES_H_
    17 #define _INTERFACES_H_
    18
    19 #include <stdint.h>
    20
    21 #ifdef __CUDACC__
    22 #include "nvptx/src/nvptx_interface.h"
    23 #endif
    24
    25 ////////////////////////////////////////////////////////////////////////////////
    26 // OpenMP interface
    27 ////////////////////////////////////////////////////////////////////////////////
    28
    29 typedef uint32_t omp_lock_t; /* arbitrary type of the right length */
    30 typedef uint64_t omp_nest_lock_t; /* arbitrary type of the right length */
    31
    32 typedef enum omp_sched_t {
    33 omp_sched_static = 1, /* chunkSize >0 */
    34 omp_sched_dynamic = 2, /* chunkSize >0 */
    35 omp_sched_guided = 3, /* chunkSize >0 */
    36 omp_sched_auto = 4, /* no chunkSize */
    37 } omp_sched_t;
    38
    39 typedef enum omp_proc_bind_t {
    40 omp_proc_bind_false = 0,
    41 omp_proc_bind_true = 1,
    42 omp_proc_bind_master = 2,
    43 omp_proc_bind_close = 3,
    44 omp_proc_bind_spread = 4
    45 } omp_proc_bind_t;
    46
    47 EXTERN double omp_get_wtick(void);
    48 EXTERN double omp_get_wtime(void);
    49
    50 EXTERN void omp_set_num_threads(int num);
    51 EXTERN int omp_get_num_threads(void);
    52 EXTERN int omp_get_max_threads(void);
    53 EXTERN int omp_get_thread_limit(void);
    54 EXTERN int omp_get_thread_num(void);
    55 EXTERN int omp_get_num_procs(void);
    56 EXTERN int omp_in_parallel(void);
    57 EXTERN int omp_in_final(void);
    58 EXTERN void omp_set_dynamic(int flag);
    59 EXTERN int omp_get_dynamic(void);
    60 EXTERN void omp_set_nested(int flag);
    61 EXTERN int omp_get_nested(void);
    62 EXTERN void omp_set_max_active_levels(int level);
    63 EXTERN int omp_get_max_active_levels(void);
    64 EXTERN int omp_get_level(void);
    65 EXTERN int omp_get_active_level(void);
    66 EXTERN int omp_get_ancestor_thread_num(int level);
    67 EXTERN int omp_get_team_size(int level);
    68
    69 EXTERN void omp_init_lock(omp_lock_t *lock);
    70 EXTERN void omp_init_nest_lock(omp_nest_lock_t *lock);
    71 EXTERN void omp_destroy_lock(omp_lock_t *lock);
    72 EXTERN void omp_destroy_nest_lock(omp_nest_lock_t *lock);
    73 EXTERN void omp_set_lock(omp_lock_t *lock);
    74 EXTERN void omp_set_nest_lock(omp_nest_lock_t *lock);
    75 EXTERN void omp_unset_lock(omp_lock_t *lock);
    76 EXTERN void omp_unset_nest_lock(omp_nest_lock_t *lock);
    77 EXTERN int omp_test_lock(omp_lock_t *lock);
    78 EXTERN int omp_test_nest_lock(omp_nest_lock_t *lock);
    79
    80 EXTERN void omp_get_schedule(omp_sched_t *kind, int *modifier);
    81 EXTERN void omp_set_schedule(omp_sched_t kind, int modifier);
    82 EXTERN omp_proc_bind_t omp_get_proc_bind(void);
    83 EXTERN int omp_get_cancellation(void);
    84 EXTERN void omp_set_default_device(int deviceId);
    85 EXTERN int omp_get_default_device(void);
    86 EXTERN int omp_get_num_devices(void);
    87 EXTERN int omp_get_num_teams(void);
    88 EXTERN int omp_get_team_num(void);
    89 EXTERN int omp_is_initial_device(void);
    90 EXTERN int omp_get_initial_device(void);
    91 EXTERN int omp_get_max_task_priority(void);
    92
    93 ////////////////////////////////////////////////////////////////////////////////
    94 // file below is swiped from kmpc host interface
    95 ////////////////////////////////////////////////////////////////////////////////
    96
    97 ////////////////////////////////////////////////////////////////////////////////
    98 // kmp specifc types
    99 ////////////////////////////////////////////////////////////////////////////////
    100
    101 typedef enum kmp_sched_t {
    102 kmp_sched_static_chunk = 33,
    103 kmp_sched_static_nochunk = 34,
    104 kmp_sched_dynamic = 35,
    105 kmp_sched_guided = 36,
    106 kmp_sched_runtime = 37,
    107 kmp_sched_auto = 38,
    108
    109 kmp_sched_static_balanced_chunk = 45,
    110
    111 kmp_sched_static_ordered = 65,
    112 kmp_sched_static_nochunk_ordered = 66,
    113 kmp_sched_dynamic_ordered = 67,
    114 kmp_sched_guided_ordered = 68,
    115 kmp_sched_runtime_ordered = 69,
    116 kmp_sched_auto_ordered = 70,
    117
    118 kmp_sched_distr_static_chunk = 91,
    119 kmp_sched_distr_static_nochunk = 92,
    120 kmp_sched_distr_static_chunk_sched_static_chunkone = 93,
    121
    122 kmp_sched_default = kmp_sched_static_nochunk,
    123 kmp_sched_unordered_first = kmp_sched_static_chunk,
    124 kmp_sched_unordered_last = kmp_sched_auto,
    125 kmp_sched_ordered_first = kmp_sched_static_ordered,
    126 kmp_sched_ordered_last = kmp_sched_auto_ordered,
    127 kmp_sched_distribute_first = kmp_sched_distr_static_chunk,
    128 kmp_sched_distribute_last =
    129 kmp_sched_distr_static_chunk_sched_static_chunkone,
    130
    131 /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
    132 * Since we need to distinguish the three possible cases (no modifier,
    133 * monotonic modifier, nonmonotonic modifier), we need separate bits for
    134 * each modifier. The absence of monotonic does not imply nonmonotonic,
    135 * especially since 4.5 says that the behaviour of the "no modifier" case
    136 * is implementation defined in 4.5, but will become "nonmonotonic" in 5.0.
    137 *
    138 * Since we're passing a full 32 bit value, we can use a couple of high
    139 * bits for these flags; out of paranoia we avoid the sign bit.
    140 *
    141 * These modifiers can be or-ed into non-static schedules by the compiler
    142 * to pass the additional information. They will be stripped early in the
    143 * processing in __kmp_dispatch_init when setting up schedules, so
    144 * most of the code won't ever see schedules with these bits set.
    145 */
    146 kmp_sched_modifier_monotonic = (1 << 29),
    147 /**< Set if the monotonic schedule modifier was present */
    148 kmp_sched_modifier_nonmonotonic = (1 << 30),
    149 /**< Set if the nonmonotonic schedule modifier was present */
    150
    151 #define SCHEDULE_WITHOUT_MODIFIERS(s) \
    152 (enum kmp_sched_t)( \
    153 (s) & ~(kmp_sched_modifier_nonmonotonic | kmp_sched_modifier_monotonic))
    154 #define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sched_modifier_monotonic) != 0)
    155 #define SCHEDULE_HAS_NONMONOTONIC(s) \
    156 (((s)&kmp_sched_modifier_nonmonotonic) != 0)
    157 #define SCHEDULE_HAS_NO_MODIFIERS(s) \
    158 (((s) & (kmp_sched_modifier_nonmonotonic | kmp_sched_modifier_monotonic)) == \
    159 0)
    160
    161 } kmp_sched_t;
    162
    163 /*!
    164 * Enum for accesseing the reserved_2 field of the ident_t struct below.
    165 */
    166 enum {
    167 /*! Bit set to 1 when in SPMD mode. */
    168 KMP_IDENT_SPMD_MODE = 0x01,
    169 /*! Bit set to 1 when a simplified runtime is used. */
    170 KMP_IDENT_SIMPLE_RT_MODE = 0x02,
    171 };
    172
    173 /*!
    174 * The ident structure that describes a source location.
    175 * The struct is identical to the one in the kmp.h file.
    176 * We maintain the same data structure for compatibility.
    177 */
    178 typedef int kmp_int32;
    179 typedef struct ident {
    180 kmp_int32 reserved_1; /**< might be used in Fortran; see above */
    181 kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
    182 identifies this union member */
    183 kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
    184 kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
    185 char const *psource; /**< String describing the source location.
    186 The string is composed of semi-colon separated fields
    187 which describe the source file, the function and a pair
    188 of line numbers that delimit the construct. */
    189 } ident_t;
    190
    191 // parallel defs
    192 typedef ident_t kmp_Ident;
    193 typedef void (*kmp_ParFctPtr)(int32_t *global_tid, int32_t *bound_tid, ...);
    194 typedef void (*kmp_ReductFctPtr)(void *lhsData, void *rhsData);
    195 typedef void (*kmp_InterWarpCopyFctPtr)(void *src, int32_t warp_num);
    196 typedef void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id,
    197 int16_t lane_offset,
    198 int16_t shortCircuit);
    199 typedef void (*kmp_CopyToScratchpadFctPtr)(void *reduceData, void *scratchpad,
    200 int32_t index, int32_t width);
    201 typedef void (*kmp_LoadReduceFctPtr)(void *reduceData, void *scratchpad,
    202 int32_t index, int32_t width,
    203 int32_t reduce);
    204 typedef void (*kmp_ListGlobalFctPtr)(void *buffer, int idx, void *reduce_data);
    205
    206 // task defs
    207 typedef struct kmp_TaskDescr kmp_TaskDescr;
    208 typedef int32_t (*kmp_TaskFctPtr)(int32_t global_tid, kmp_TaskDescr *taskDescr);
    209 typedef struct kmp_TaskDescr {
    210 void *sharedPointerTable; // ptr to a table of shared var ptrs
    211 kmp_TaskFctPtr sub; // task subroutine
    212 int32_t partId; // unused
    213 kmp_TaskFctPtr destructors; // destructor of c++ first private
    214 } kmp_TaskDescr;
    215
    216 // sync defs
    217 typedef int32_t kmp_CriticalName[8];
    218
    219 ////////////////////////////////////////////////////////////////////////////////
    220 // external interface
    221 ////////////////////////////////////////////////////////////////////////////////
    222
    223 // parallel
    224 EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc);
    225 EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t global_tid,
    226 int32_t num_threads);
    227 // simd
    228 EXTERN void __kmpc_push_simd_limit(kmp_Ident *loc, int32_t global_tid,
    229 int32_t simd_limit);
    230 // aee ... not supported
    231 // EXTERN void __kmpc_fork_call(kmp_Ident *loc, int32_t argc, kmp_ParFctPtr
    232 // microtask, ...);
    233 EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid);
    234 EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc,
    235 uint32_t global_tid);
    236 EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid);
    237
    238 // proc bind
    239 EXTERN void __kmpc_push_proc_bind(kmp_Ident *loc, uint32_t global_tid,
    240 int proc_bind);
    241 EXTERN int omp_get_num_places(void);
    242 EXTERN int omp_get_place_num_procs(int place_num);
    243 EXTERN void omp_get_place_proc_ids(int place_num, int *ids);
    244 EXTERN int omp_get_place_num(void);
    245 EXTERN int omp_get_partition_num_places(void);
    246 EXTERN void omp_get_partition_place_nums(int *place_nums);
    247
    248 // for static (no chunk or chunk)
    249 EXTERN void __kmpc_for_static_init_4(kmp_Ident *loc, int32_t global_tid,
    250 int32_t sched, int32_t *plastiter,
    251 int32_t *plower, int32_t *pupper,
    252 int32_t *pstride, int32_t incr,
    253 int32_t chunk);
    254 EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid,
    255 int32_t sched, int32_t *plastiter,
    256 uint32_t *plower, uint32_t *pupper,
    257 int32_t *pstride, int32_t incr,
    258 int32_t chunk);
    259 EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid,
    260 int32_t sched, int32_t *plastiter,
    261 int64_t *plower, int64_t *pupper,
    262 int64_t *pstride, int64_t incr,
    263 int64_t chunk);
    264 EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid,
    265 int32_t sched, int32_t *plastiter1,
    266 uint64_t *plower, uint64_t *pupper,
    267 int64_t *pstride, int64_t incr,
    268 int64_t chunk);
    269 EXTERN
    270 void __kmpc_for_static_init_4_simple_spmd(kmp_Ident *loc, int32_t global_tid,
    271 int32_t sched, int32_t *plastiter,
    272 int32_t *plower, int32_t *pupper,
    273 int32_t *pstride, int32_t incr,
    274 int32_t chunk);
    275 EXTERN
    276 void __kmpc_for_static_init_4u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
    277 int32_t sched, int32_t *plastiter,
    278 uint32_t *plower, uint32_t *pupper,
    279 int32_t *pstride, int32_t incr,
    280 int32_t chunk);
    281 EXTERN
    282 void __kmpc_for_static_init_8_simple_spmd(kmp_Ident *loc, int32_t global_tid,
    283 int32_t sched, int32_t *plastiter,
    284 int64_t *plower, int64_t *pupper,
    285 int64_t *pstride, int64_t incr,
    286 int64_t chunk);
    287 EXTERN
    288 void __kmpc_for_static_init_8u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
    289 int32_t sched, int32_t *plastiter1,
    290 uint64_t *plower, uint64_t *pupper,
    291 int64_t *pstride, int64_t incr,
    292 int64_t chunk);
    293 EXTERN
    294 void __kmpc_for_static_init_4_simple_generic(kmp_Ident *loc,
    295 int32_t global_tid, int32_t sched,
    296 int32_t *plastiter,
    297 int32_t *plower, int32_t *pupper,
    298 int32_t *pstride, int32_t incr,
    299 int32_t chunk);
    300 EXTERN
    301 void __kmpc_for_static_init_4u_simple_generic(
    302 kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter,
    303 uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr,
    304 int32_t chunk);
    305 EXTERN
    306 void __kmpc_for_static_init_8_simple_generic(kmp_Ident *loc,
    307 int32_t global_tid, int32_t sched,
    308 int32_t *plastiter,
    309 int64_t *plower, int64_t *pupper,
    310 int64_t *pstride, int64_t incr,
    311 int64_t chunk);
    312 EXTERN
    313 void __kmpc_for_static_init_8u_simple_generic(
    314 kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter1,
    315 uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr,
    316 int64_t chunk);
    317
    318 EXTERN void __kmpc_for_static_fini(kmp_Ident *loc, int32_t global_tid);
    319
    320 // for dynamic
    321 EXTERN void __kmpc_dispatch_init_4(kmp_Ident *loc, int32_t global_tid,
    322 int32_t sched, int32_t lower, int32_t upper,
    323 int32_t incr, int32_t chunk);
    324 EXTERN void __kmpc_dispatch_init_4u(kmp_Ident *loc, int32_t global_tid,
    325 int32_t sched, uint32_t lower,
    326 uint32_t upper, int32_t incr,
    327 int32_t chunk);
    328 EXTERN void __kmpc_dispatch_init_8(kmp_Ident *loc, int32_t global_tid,
    329 int32_t sched, int64_t lower, int64_t upper,
    330 int64_t incr, int64_t chunk);
    331 EXTERN void __kmpc_dispatch_init_8u(kmp_Ident *loc, int32_t global_tid,
    332 int32_t sched, uint64_t lower,
    333 uint64_t upper, int64_t incr,
    334 int64_t chunk);
    335
    336 EXTERN int __kmpc_dispatch_next_4(kmp_Ident *loc, int32_t global_tid,
    337 int32_t *plastiter, int32_t *plower,
    338 int32_t *pupper, int32_t *pstride);
    339 EXTERN int __kmpc_dispatch_next_4u(kmp_Ident *loc, int32_t global_tid,
    340 int32_t *plastiter, uint32_t *plower,
    341 uint32_t *pupper, int32_t *pstride);
    342 EXTERN int __kmpc_dispatch_next_8(kmp_Ident *loc, int32_t global_tid,
    343 int32_t *plastiter, int64_t *plower,
    344 int64_t *pupper, int64_t *pstride);
    345 EXTERN int __kmpc_dispatch_next_8u(kmp_Ident *loc, int32_t global_tid,
    346 int32_t *plastiter, uint64_t *plower,
    347 uint64_t *pupper, int64_t *pstride);
    348
    349 EXTERN void __kmpc_dispatch_fini_4(kmp_Ident *loc, int32_t global_tid);
    350 EXTERN void __kmpc_dispatch_fini_4u(kmp_Ident *loc, int32_t global_tid);
    351 EXTERN void __kmpc_dispatch_fini_8(kmp_Ident *loc, int32_t global_tid);
    352 EXTERN void __kmpc_dispatch_fini_8u(kmp_Ident *loc, int32_t global_tid);
    353
    354 // Support for reducing conditional lastprivate variables
    355 EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc,
    356 int32_t global_tid,
    357 int32_t varNum, void *array);
    358
    359 // reduction
    360 EXTERN void __kmpc_nvptx_end_reduce(int32_t global_tid);
    361 EXTERN void __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
    362 EXTERN __attribute__((deprecated)) int32_t __kmpc_nvptx_parallel_reduce_nowait(
    363 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    364 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct);
    365 EXTERN int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(
    366 kmp_Ident *loc, int32_t global_tid, int32_t num_vars, size_t reduce_size,
    367 void *reduce_data, kmp_ShuffleReductFctPtr shflFct,
    368 kmp_InterWarpCopyFctPtr cpyFct);
    369 EXTERN int32_t __kmpc_nvptx_parallel_reduce_nowait_simple_spmd(
    370 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    371 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct);
    372 EXTERN int32_t __kmpc_nvptx_parallel_reduce_nowait_simple_generic(
    373 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    374 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct);
    375 EXTERN int32_t __kmpc_nvptx_simd_reduce_nowait(
    376 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    377 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct);
    378 EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
    379 kmp_Ident *loc, int32_t global_tid, void *global_buffer,
    380 int32_t num_of_records, void *reduce_data, kmp_ShuffleReductFctPtr shflFct,
    381 kmp_InterWarpCopyFctPtr cpyFct, kmp_ListGlobalFctPtr lgcpyFct,
    382 kmp_ListGlobalFctPtr lgredFct, kmp_ListGlobalFctPtr glcpyFct,
    383 kmp_ListGlobalFctPtr glredFct);
    384 EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait(
    385 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    386 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct,
    387 kmp_CopyToScratchpadFctPtr sratchFct, kmp_LoadReduceFctPtr ldFct);
    388 EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple_spmd(
    389 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    390 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct,
    391 kmp_CopyToScratchpadFctPtr sratchFct, kmp_LoadReduceFctPtr ldFct);
    392 EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple_generic(
    393 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    394 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct,
    395 kmp_CopyToScratchpadFctPtr sratchFct, kmp_LoadReduceFctPtr ldFct);
    396 EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc,
    397 int32_t global_tid,
    398 kmp_CriticalName *crit);
    399 EXTERN void __kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc,
    400 int32_t global_tid,
    401 kmp_CriticalName *crit);
    402 EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size);
    403 EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size);
    404
    405 // sync barrier
    406 EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid);
    407 EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid);
    408 EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid);
    409 EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc, int32_t global_tid);
    410
    411 // single
    412 EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid);
    413 EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid);
    414
    415 // sync
    416 EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid);
    417 EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid);
    418 EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t global_tid);
    419 EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t global_tid);
    420 EXTERN void __kmpc_critical(kmp_Ident *loc, int32_t global_tid,
    421 kmp_CriticalName *crit);
    422 EXTERN void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
    423 kmp_CriticalName *crit);
    424 EXTERN void __kmpc_flush(kmp_Ident *loc);
    425
    426 // vote
    427 EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask();
    428 // syncwarp
    429 EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t);
    430
    431 // tasks
    432 EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc,
    433 uint32_t global_tid, int32_t flag,
    434 size_t sizeOfTaskInclPrivate,
    435 size_t sizeOfSharedTable,
    436 kmp_TaskFctPtr sub);
    437 EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid,
    438 kmp_TaskDescr *newLegacyTaskDescr);
    439 EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
    440 kmp_TaskDescr *newLegacyTaskDescr,
    441 int32_t depNum, void *depList,
    442 int32_t noAliasDepNum,
    443 void *noAliasDepList);
    444 EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
    445 kmp_TaskDescr *newLegacyTaskDescr);
    446 EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
    447 kmp_TaskDescr *newLegacyTaskDescr);
    448 EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid,
    449 int32_t depNum, void *depList,
    450 int32_t noAliasDepNum, void *noAliasDepList);
    451 EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid);
    452 EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid);
    453 EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid,
    454 int end_part);
    455 EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid);
    456 EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid,
    457 kmp_TaskDescr *newKmpTaskDescr, int if_val,
    458 uint64_t *lb, uint64_t *ub, int64_t st, int nogroup,
    459 int32_t sched, uint64_t grainsize, void *task_dup);
    460
    461 // cancel
    462 EXTERN int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid,
    463 int32_t cancelVal);
    464 EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
    465 int32_t cancelVal);
    466
    467 // non standard
    468 EXTERN void __kmpc_kernel_init_params(void *ReductionScratchpadPtr);
    469 EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime);
    470 EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
    471 EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
    472 int16_t RequiresDataSharing);
    473 EXTERN __attribute__((deprecated)) void __kmpc_spmd_kernel_deinit();
    474 EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
    475 EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
    476 int16_t IsOMPRuntimeInitialized);
    477 EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
    478 int16_t IsOMPRuntimeInitialized);
    479 EXTERN void __kmpc_kernel_end_parallel();
    480 EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer,
    481 __kmpc_impl_lanemask_t Mask,
    482 bool *IsFinal,
    483 int32_t *LaneSource);
    484 EXTERN void __kmpc_kernel_end_convergent_parallel(void *buffer);
    485 EXTERN bool __kmpc_kernel_convergent_simd(void *buffer,
    486 __kmpc_impl_lanemask_t Mask,
    487 bool *IsFinal, int32_t *LaneSource,
    488 int32_t *LaneId, int32_t *NumLanes);
    489 EXTERN void __kmpc_kernel_end_convergent_simd(void *buffer);
    490
    491
    492 EXTERN void __kmpc_data_sharing_init_stack();
    493 EXTERN void __kmpc_data_sharing_init_stack_spmd();
    494 EXTERN void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
    495 int16_t UseSharedMemory);
    496 EXTERN void *__kmpc_data_sharing_push_stack(size_t size, int16_t UseSharedMemory);
    497 EXTERN void __kmpc_data_sharing_pop_stack(void *a);
    498 EXTERN void __kmpc_begin_sharing_variables(void ***GlobalArgs, size_t nArgs);
    499 EXTERN void __kmpc_end_sharing_variables();
    500 EXTERN void __kmpc_get_shared_variables(void ***GlobalArgs);
    501
    502 // The slot used for data sharing by the master and worker threads. We use a
    503 // complete (default size version and an incomplete one so that we allow sizes
    504 // greater than the default).
    505 struct __kmpc_data_sharing_slot {
    506 __kmpc_data_sharing_slot *Next;
    507 __kmpc_data_sharing_slot *Prev;
    508 void *PrevSlotStackPtr;
    509 void *DataEnd;
    510 char Data[];
    511 };
    512 EXTERN void
    513 __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *RootS,
    514 size_t InitialDataSize);
    515 EXTERN void *__kmpc_data_sharing_environment_begin(
    516 __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
    517 void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
    518 size_t SharingDataSize, size_t SharingDefaultDataSize,
    519 int16_t IsOMPRuntimeInitialized);
    520 EXTERN void __kmpc_data_sharing_environment_end(
    521 __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
    522 void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
    523 int32_t IsEntryPoint);
    524
    525 EXTERN void *
    526 __kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID,
    527 int16_t IsOMPRuntimeInitialized);
    528
    529 // SPMD execution mode interrogation function.
    530 EXTERN int8_t __kmpc_is_spmd_exec_mode();
    531
    532 EXTERN void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
    533 const void *buf, size_t size,
    534 int16_t is_shared, const void **res);
    535
    536 EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
    537 int16_t is_shared);
    538
    539 #endif
  • openmp/trunk/libomptarget/deviceRTLs/nvptx/CMakeLists.txt

     
    35 35 set(CUDA_HOST_COMPILER "${LIBOMPTARGET_NVPTX_ALTERNATE_GCC_HOST_COMPILER}" CACHE FILEPATH "" FORCE)
    36 36 endif()
    37 37
    38 get_filename_component(devicertl_base_directory
    39 ${CMAKE_CURRENT_SOURCE_DIR}
    40 DIRECTORY)
    41
    38 42 if(LIBOMPTARGET_DEP_CUDA_FOUND)
    39 43 libomptarget_say("Building CUDA offloading device RTL.")
    40 44
     
    83 87 # yet supported by the CUDA toolchain on the device.
    84 88 set(BUILD_SHARED_LIBS OFF)
    85 89 set(CUDA_SEPARABLE_COMPILATION ON)
    86
    90 list(APPEND CUDA_NVCC_FLAGS -I${devicertl_base_directory})
    87 91 cuda_add_library(omptarget-nvptx STATIC ${cuda_src_files} ${omp_data_objects}
    88 92 OPTIONS ${CUDA_ARCH} ${CUDA_DEBUG})
    89 93
     
    117 121 libomptarget_say("Building CUDA LLVM bitcode offloading device RTL.")
    118 122
    119 123 # Set flags for LLVM Bitcode compilation.
    120 set(bc_flags ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS})
    124 set(bc_flags ${LIBOMPTARGET_NVPTX_SELECTED_CUDA_COMPILER_FLAGS}
    125 -I${devicertl_base_directory})
    121 126 if(${LIBOMPTARGET_NVPTX_DEBUG})
    122 127 set(bc_flags ${bc_flags} -DOMPTARGET_NVPTX_DEBUG=-1)
    123 128 else()
  • openmp/trunk/libomptarget/deviceRTLs/nvptx/src/interface.h

     
    1 //===------- interface.h - NVPTX OpenMP interface definitions ---- CUDA -*-===//
    2 //
    3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    4 // See https://llvm.org/LICENSE.txt for license information.
    5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
    6 //
    7 //===----------------------------------------------------------------------===//
    8 //
    9 // This file contains debug macros to be used in the application.
    10 //
    11 // This file contains all the definitions that are relevant to
    12 // the interface. The first section contains the interface as
    13 // declared by OpenMP. The second section includes the compiler
    14 // specific interfaces.
    15 //
    16 //===----------------------------------------------------------------------===//
    17
    18 #ifndef _INTERFACES_H_
    19 #define _INTERFACES_H_
    20
    21 #include "option.h"
    22 #include "target_impl.h"
    23
    24 ////////////////////////////////////////////////////////////////////////////////
    25 // OpenMP interface
    26 ////////////////////////////////////////////////////////////////////////////////
    27
    28 typedef uint32_t omp_lock_t; /* arbitrary type of the right length */
    29 typedef uint64_t omp_nest_lock_t; /* arbitrary type of the right length */
    30
    31 typedef enum omp_sched_t {
    32 omp_sched_static = 1, /* chunkSize >0 */
    33 omp_sched_dynamic = 2, /* chunkSize >0 */
    34 omp_sched_guided = 3, /* chunkSize >0 */
    35 omp_sched_auto = 4, /* no chunkSize */
    36 } omp_sched_t;
    37
    38 typedef enum omp_proc_bind_t {
    39 omp_proc_bind_false = 0,
    40 omp_proc_bind_true = 1,
    41 omp_proc_bind_master = 2,
    42 omp_proc_bind_close = 3,
    43 omp_proc_bind_spread = 4
    44 } omp_proc_bind_t;
    45
    46 EXTERN double omp_get_wtick(void);
    47 EXTERN double omp_get_wtime(void);
    48
    49 EXTERN void omp_set_num_threads(int num);
    50 EXTERN int omp_get_num_threads(void);
    51 EXTERN int omp_get_max_threads(void);
    52 EXTERN int omp_get_thread_limit(void);
    53 EXTERN int omp_get_thread_num(void);
    54 EXTERN int omp_get_num_procs(void);
    55 EXTERN int omp_in_parallel(void);
    56 EXTERN int omp_in_final(void);
    57 EXTERN void omp_set_dynamic(int flag);
    58 EXTERN int omp_get_dynamic(void);
    59 EXTERN void omp_set_nested(int flag);
    60 EXTERN int omp_get_nested(void);
    61 EXTERN void omp_set_max_active_levels(int level);
    62 EXTERN int omp_get_max_active_levels(void);
    63 EXTERN int omp_get_level(void);
    64 EXTERN int omp_get_active_level(void);
    65 EXTERN int omp_get_ancestor_thread_num(int level);
    66 EXTERN int omp_get_team_size(int level);
    67
    68 EXTERN void omp_init_lock(omp_lock_t *lock);
    69 EXTERN void omp_init_nest_lock(omp_nest_lock_t *lock);
    70 EXTERN void omp_destroy_lock(omp_lock_t *lock);
    71 EXTERN void omp_destroy_nest_lock(omp_nest_lock_t *lock);
    72 EXTERN void omp_set_lock(omp_lock_t *lock);
    73 EXTERN void omp_set_nest_lock(omp_nest_lock_t *lock);
    74 EXTERN void omp_unset_lock(omp_lock_t *lock);
    75 EXTERN void omp_unset_nest_lock(omp_nest_lock_t *lock);
    76 EXTERN int omp_test_lock(omp_lock_t *lock);
    77 EXTERN int omp_test_nest_lock(omp_nest_lock_t *lock);
    78
    79 EXTERN void omp_get_schedule(omp_sched_t *kind, int *modifier);
    80 EXTERN void omp_set_schedule(omp_sched_t kind, int modifier);
    81 EXTERN omp_proc_bind_t omp_get_proc_bind(void);
    82 EXTERN int omp_get_cancellation(void);
    83 EXTERN void omp_set_default_device(int deviceId);
    84 EXTERN int omp_get_default_device(void);
    85 EXTERN int omp_get_num_devices(void);
    86 EXTERN int omp_get_num_teams(void);
    87 EXTERN int omp_get_team_num(void);
    88 EXTERN int omp_is_initial_device(void);
    89 EXTERN int omp_get_initial_device(void);
    90 EXTERN int omp_get_max_task_priority(void);
    91
    92 ////////////////////////////////////////////////////////////////////////////////
    93 // file below is swiped from kmpc host interface
    94 ////////////////////////////////////////////////////////////////////////////////
    95
    96 ////////////////////////////////////////////////////////////////////////////////
    97 // kmp specifc types
    98 ////////////////////////////////////////////////////////////////////////////////
    99
    100 typedef enum kmp_sched_t {
    101 kmp_sched_static_chunk = 33,
    102 kmp_sched_static_nochunk = 34,
    103 kmp_sched_dynamic = 35,
    104 kmp_sched_guided = 36,
    105 kmp_sched_runtime = 37,
    106 kmp_sched_auto = 38,
    107
    108 kmp_sched_static_balanced_chunk = 45,
    109
    110 kmp_sched_static_ordered = 65,
    111 kmp_sched_static_nochunk_ordered = 66,
    112 kmp_sched_dynamic_ordered = 67,
    113 kmp_sched_guided_ordered = 68,
    114 kmp_sched_runtime_ordered = 69,
    115 kmp_sched_auto_ordered = 70,
    116
    117 kmp_sched_distr_static_chunk = 91,
    118 kmp_sched_distr_static_nochunk = 92,
    119 kmp_sched_distr_static_chunk_sched_static_chunkone = 93,
    120
    121 kmp_sched_default = kmp_sched_static_nochunk,
    122 kmp_sched_unordered_first = kmp_sched_static_chunk,
    123 kmp_sched_unordered_last = kmp_sched_auto,
    124 kmp_sched_ordered_first = kmp_sched_static_ordered,
    125 kmp_sched_ordered_last = kmp_sched_auto_ordered,
    126 kmp_sched_distribute_first = kmp_sched_distr_static_chunk,
    127 kmp_sched_distribute_last =
    128 kmp_sched_distr_static_chunk_sched_static_chunkone,
    129
    130 /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
    131 * Since we need to distinguish the three possible cases (no modifier,
    132 * monotonic modifier, nonmonotonic modifier), we need separate bits for
    133 * each modifier. The absence of monotonic does not imply nonmonotonic,
    134 * especially since 4.5 says that the behaviour of the "no modifier" case
    135 * is implementation defined in 4.5, but will become "nonmonotonic" in 5.0.
    136 *
    137 * Since we're passing a full 32 bit value, we can use a couple of high
    138 * bits for these flags; out of paranoia we avoid the sign bit.
    139 *
    140 * These modifiers can be or-ed into non-static schedules by the compiler
    141 * to pass the additional information. They will be stripped early in the
    142 * processing in __kmp_dispatch_init when setting up schedules, so
    143 * most of the code won't ever see schedules with these bits set.
    144 */
    145 kmp_sched_modifier_monotonic = (1 << 29),
    146 /**< Set if the monotonic schedule modifier was present */
    147 kmp_sched_modifier_nonmonotonic = (1 << 30),
    148 /**< Set if the nonmonotonic schedule modifier was present */
    149
    150 #define SCHEDULE_WITHOUT_MODIFIERS(s) \
    151 (enum kmp_sched_t)( \
    152 (s) & ~(kmp_sched_modifier_nonmonotonic | kmp_sched_modifier_monotonic))
    153 #define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sched_modifier_monotonic) != 0)
    154 #define SCHEDULE_HAS_NONMONOTONIC(s) \
    155 (((s)&kmp_sched_modifier_nonmonotonic) != 0)
    156 #define SCHEDULE_HAS_NO_MODIFIERS(s) \
    157 (((s) & (kmp_sched_modifier_nonmonotonic | kmp_sched_modifier_monotonic)) == \
    158 0)
    159
    160 } kmp_sched_t;
    161
    162 /*!
    163 * Enum for accesseing the reserved_2 field of the ident_t struct below.
    164 */
    165 enum {
    166 /*! Bit set to 1 when in SPMD mode. */
    167 KMP_IDENT_SPMD_MODE = 0x01,
    168 /*! Bit set to 1 when a simplified runtime is used. */
    169 KMP_IDENT_SIMPLE_RT_MODE = 0x02,
    170 };
    171
    172 /*!
    173 * The ident structure that describes a source location.
    174 * The struct is identical to the one in the kmp.h file.
    175 * We maintain the same data structure for compatibility.
    176 */
    177 typedef int kmp_int32;
    178 typedef struct ident {
    179 kmp_int32 reserved_1; /**< might be used in Fortran; see above */
    180 kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
    181 identifies this union member */
    182 kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
    183 kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
    184 char const *psource; /**< String describing the source location.
    185 The string is composed of semi-colon separated fields
    186 which describe the source file, the function and a pair
    187 of line numbers that delimit the construct. */
    188 } ident_t;
    189
    190 // parallel defs
    191 typedef ident_t kmp_Ident;
    192 typedef void (*kmp_ParFctPtr)(int32_t *global_tid, int32_t *bound_tid, ...);
    193 typedef void (*kmp_ReductFctPtr)(void *lhsData, void *rhsData);
    194 typedef void (*kmp_InterWarpCopyFctPtr)(void *src, int32_t warp_num);
    195 typedef void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id,
    196 int16_t lane_offset,
    197 int16_t shortCircuit);
    198 typedef void (*kmp_CopyToScratchpadFctPtr)(void *reduceData, void *scratchpad,
    199 int32_t index, int32_t width);
    200 typedef void (*kmp_LoadReduceFctPtr)(void *reduceData, void *scratchpad,
    201 int32_t index, int32_t width,
    202 int32_t reduce);
    203 typedef void (*kmp_ListGlobalFctPtr)(void *buffer, int idx, void *reduce_data);
    204
    205 // task defs
    206 typedef struct kmp_TaskDescr kmp_TaskDescr;
    207 typedef int32_t (*kmp_TaskFctPtr)(int32_t global_tid, kmp_TaskDescr *taskDescr);
    208 typedef struct kmp_TaskDescr {
    209 void *sharedPointerTable; // ptr to a table of shared var ptrs
    210 kmp_TaskFctPtr sub; // task subroutine
    211 int32_t partId; // unused
    212 kmp_TaskFctPtr destructors; // destructor of c++ first private
    213 } kmp_TaskDescr;
    214
    215 // sync defs
    216 typedef int32_t kmp_CriticalName[8];
    217
    218 ////////////////////////////////////////////////////////////////////////////////
    219 // external interface
    220 ////////////////////////////////////////////////////////////////////////////////
    221
    222 // parallel
    223 EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc);
    224 EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t global_tid,
    225 int32_t num_threads);
    226 // simd
    227 EXTERN void __kmpc_push_simd_limit(kmp_Ident *loc, int32_t global_tid,
    228 int32_t simd_limit);
    229 // aee ... not supported
    230 // EXTERN void __kmpc_fork_call(kmp_Ident *loc, int32_t argc, kmp_ParFctPtr
    231 // microtask, ...);
    232 EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid);
    233 EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc,
    234 uint32_t global_tid);
    235 EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid);
    236
    237 // proc bind
    238 EXTERN void __kmpc_push_proc_bind(kmp_Ident *loc, uint32_t global_tid,
    239 int proc_bind);
    240 EXTERN int omp_get_num_places(void);
    241 EXTERN int omp_get_place_num_procs(int place_num);
    242 EXTERN void omp_get_place_proc_ids(int place_num, int *ids);
    243 EXTERN int omp_get_place_num(void);
    244 EXTERN int omp_get_partition_num_places(void);
    245 EXTERN void omp_get_partition_place_nums(int *place_nums);
    246
    247 // for static (no chunk or chunk)
    248 EXTERN void __kmpc_for_static_init_4(kmp_Ident *loc, int32_t global_tid,
    249 int32_t sched, int32_t *plastiter,
    250 int32_t *plower, int32_t *pupper,
    251 int32_t *pstride, int32_t incr,
    252 int32_t chunk);
    253 EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid,
    254 int32_t sched, int32_t *plastiter,
    255 uint32_t *plower, uint32_t *pupper,
    256 int32_t *pstride, int32_t incr,
    257 int32_t chunk);
    258 EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid,
    259 int32_t sched, int32_t *plastiter,
    260 int64_t *plower, int64_t *pupper,
    261 int64_t *pstride, int64_t incr,
    262 int64_t chunk);
    263 EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid,
    264 int32_t sched, int32_t *plastiter1,
    265 uint64_t *plower, uint64_t *pupper,
    266 int64_t *pstride, int64_t incr,
    267 int64_t chunk);
    268 EXTERN
    269 void __kmpc_for_static_init_4_simple_spmd(kmp_Ident *loc, int32_t global_tid,
    270 int32_t sched, int32_t *plastiter,
    271 int32_t *plower, int32_t *pupper,
    272 int32_t *pstride, int32_t incr,
    273 int32_t chunk);
    274 EXTERN
    275 void __kmpc_for_static_init_4u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
    276 int32_t sched, int32_t *plastiter,
    277 uint32_t *plower, uint32_t *pupper,
    278 int32_t *pstride, int32_t incr,
    279 int32_t chunk);
    280 EXTERN
    281 void __kmpc_for_static_init_8_simple_spmd(kmp_Ident *loc, int32_t global_tid,
    282 int32_t sched, int32_t *plastiter,
    283 int64_t *plower, int64_t *pupper,
    284 int64_t *pstride, int64_t incr,
    285 int64_t chunk);
    286 EXTERN
    287 void __kmpc_for_static_init_8u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
    288 int32_t sched, int32_t *plastiter1,
    289 uint64_t *plower, uint64_t *pupper,
    290 int64_t *pstride, int64_t incr,
    291 int64_t chunk);
    292 EXTERN
    293 void __kmpc_for_static_init_4_simple_generic(kmp_Ident *loc,
    294 int32_t global_tid, int32_t sched,
    295 int32_t *plastiter,
    296 int32_t *plower, int32_t *pupper,
    297 int32_t *pstride, int32_t incr,
    298 int32_t chunk);
    299 EXTERN
    300 void __kmpc_for_static_init_4u_simple_generic(
    301 kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter,
    302 uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr,
    303 int32_t chunk);
    304 EXTERN
    305 void __kmpc_for_static_init_8_simple_generic(kmp_Ident *loc,
    306 int32_t global_tid, int32_t sched,
    307 int32_t *plastiter,
    308 int64_t *plower, int64_t *pupper,
    309 int64_t *pstride, int64_t incr,
    310 int64_t chunk);
    311 EXTERN
    312 void __kmpc_for_static_init_8u_simple_generic(
    313 kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter1,
    314 uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr,
    315 int64_t chunk);
    316
    317 EXTERN void __kmpc_for_static_fini(kmp_Ident *loc, int32_t global_tid);
    318
    319 // for dynamic
    320 EXTERN void __kmpc_dispatch_init_4(kmp_Ident *loc, int32_t global_tid,
    321 int32_t sched, int32_t lower, int32_t upper,
    322 int32_t incr, int32_t chunk);
    323 EXTERN void __kmpc_dispatch_init_4u(kmp_Ident *loc, int32_t global_tid,
    324 int32_t sched, uint32_t lower,
    325 uint32_t upper, int32_t incr,
    326 int32_t chunk);
    327 EXTERN void __kmpc_dispatch_init_8(kmp_Ident *loc, int32_t global_tid,
    328 int32_t sched, int64_t lower, int64_t upper,
    329 int64_t incr, int64_t chunk);
    330 EXTERN void __kmpc_dispatch_init_8u(kmp_Ident *loc, int32_t global_tid,
    331 int32_t sched, uint64_t lower,
    332 uint64_t upper, int64_t incr,
    333 int64_t chunk);
    334
    335 EXTERN int __kmpc_dispatch_next_4(kmp_Ident *loc, int32_t global_tid,
    336 int32_t *plastiter, int32_t *plower,
    337 int32_t *pupper, int32_t *pstride);
    338 EXTERN int __kmpc_dispatch_next_4u(kmp_Ident *loc, int32_t global_tid,
    339 int32_t *plastiter, uint32_t *plower,
    340 uint32_t *pupper, int32_t *pstride);
    341 EXTERN int __kmpc_dispatch_next_8(kmp_Ident *loc, int32_t global_tid,
    342 int32_t *plastiter, int64_t *plower,
    343 int64_t *pupper, int64_t *pstride);
    344 EXTERN int __kmpc_dispatch_next_8u(kmp_Ident *loc, int32_t global_tid,
    345 int32_t *plastiter, uint64_t *plower,
    346 uint64_t *pupper, int64_t *pstride);
    347
    348 EXTERN void __kmpc_dispatch_fini_4(kmp_Ident *loc, int32_t global_tid);
    349 EXTERN void __kmpc_dispatch_fini_4u(kmp_Ident *loc, int32_t global_tid);
    350 EXTERN void __kmpc_dispatch_fini_8(kmp_Ident *loc, int32_t global_tid);
    351 EXTERN void __kmpc_dispatch_fini_8u(kmp_Ident *loc, int32_t global_tid);
    352
    353 // Support for reducing conditional lastprivate variables
    354 EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc,
    355 int32_t global_tid,
    356 int32_t varNum, void *array);
    357
    358 // reduction
    359 EXTERN void __kmpc_nvptx_end_reduce(int32_t global_tid);
    360 EXTERN void __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
    361 EXTERN __attribute__((deprecated)) int32_t __kmpc_nvptx_parallel_reduce_nowait(
    362 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    363 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct);
    364 EXTERN int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(
    365 kmp_Ident *loc, int32_t global_tid, int32_t num_vars, size_t reduce_size,
    366 void *reduce_data, kmp_ShuffleReductFctPtr shflFct,
    367 kmp_InterWarpCopyFctPtr cpyFct);
    368 EXTERN int32_t __kmpc_nvptx_parallel_reduce_nowait_simple_spmd(
    369 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    370 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct);
    371 EXTERN int32_t __kmpc_nvptx_parallel_reduce_nowait_simple_generic(
    372 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    373 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct);
    374 EXTERN int32_t __kmpc_nvptx_simd_reduce_nowait(
    375 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    376 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct);
    377 EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
    378 kmp_Ident *loc, int32_t global_tid, void *global_buffer,
    379 int32_t num_of_records, void *reduce_data, kmp_ShuffleReductFctPtr shflFct,
    380 kmp_InterWarpCopyFctPtr cpyFct, kmp_ListGlobalFctPtr lgcpyFct,
    381 kmp_ListGlobalFctPtr lgredFct, kmp_ListGlobalFctPtr glcpyFct,
    382 kmp_ListGlobalFctPtr glredFct);
    383 EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait(
    384 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    385 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct,
    386 kmp_CopyToScratchpadFctPtr sratchFct, kmp_LoadReduceFctPtr ldFct);
    387 EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple_spmd(
    388 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    389 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct,
    390 kmp_CopyToScratchpadFctPtr sratchFct, kmp_LoadReduceFctPtr ldFct);
    391 EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple_generic(
    392 int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
    393 kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct,
    394 kmp_CopyToScratchpadFctPtr sratchFct, kmp_LoadReduceFctPtr ldFct);
    395 EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc,
    396 int32_t global_tid,
    397 kmp_CriticalName *crit);
    398 EXTERN void __kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc,
    399 int32_t global_tid,
    400 kmp_CriticalName *crit);
    401 EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size);
    402 EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size);
    403
    404 // sync barrier
    405 EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid);
    406 EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid);
    407 EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid);
    408 EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc, int32_t global_tid);
    409
    410 // single
    411 EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid);
    412 EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid);
    413
    414 // sync
    415 EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid);
    416 EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid);
    417 EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t global_tid);
    418 EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t global_tid);
    419 EXTERN void __kmpc_critical(kmp_Ident *loc, int32_t global_tid,
    420 kmp_CriticalName *crit);
    421 EXTERN void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
    422 kmp_CriticalName *crit);
    423 EXTERN void __kmpc_flush(kmp_Ident *loc);
    424
    425 // vote
    426 EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask();
    427 // syncwarp
    428 EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t);
    429
    430 // tasks
    431 EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc,
    432 uint32_t global_tid, int32_t flag,
    433 size_t sizeOfTaskInclPrivate,
    434 size_t sizeOfSharedTable,
    435 kmp_TaskFctPtr sub);
    436 EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid,
    437 kmp_TaskDescr *newLegacyTaskDescr);
    438 EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
    439 kmp_TaskDescr *newLegacyTaskDescr,
    440 int32_t depNum, void *depList,
    441 int32_t noAliasDepNum,
    442 void *noAliasDepList);
    443 EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
    444 kmp_TaskDescr *newLegacyTaskDescr);
    445 EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
    446 kmp_TaskDescr *newLegacyTaskDescr);
    447 EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid,
    448 int32_t depNum, void *depList,
    449 int32_t noAliasDepNum, void *noAliasDepList);
    450 EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid);
    451 EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid);
    452 EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid,
    453 int end_part);
    454 EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid);
    455 EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid,
    456 kmp_TaskDescr *newKmpTaskDescr, int if_val,
    457 uint64_t *lb, uint64_t *ub, int64_t st, int nogroup,
    458 int32_t sched, uint64_t grainsize, void *task_dup);
    459
    460 // cancel
    461 EXTERN int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid,
    462 int32_t cancelVal);
    463 EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
    464 int32_t cancelVal);
    465
    466 // non standard
    467 EXTERN void __kmpc_kernel_init_params(void *ReductionScratchpadPtr);
    468 EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime);
    469 EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
    470 EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
    471 int16_t RequiresDataSharing);
    472 EXTERN __attribute__((deprecated)) void __kmpc_spmd_kernel_deinit();
    473 EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
    474 EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
    475 int16_t IsOMPRuntimeInitialized);
    476 EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
    477 int16_t IsOMPRuntimeInitialized);
    478 EXTERN void __kmpc_kernel_end_parallel();
    479 EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer,
    480 __kmpc_impl_lanemask_t Mask,
    481 bool *IsFinal,
    482 int32_t *LaneSource);
    483 EXTERN void __kmpc_kernel_end_convergent_parallel(void *buffer);
    484 EXTERN bool __kmpc_kernel_convergent_simd(void *buffer,
    485 __kmpc_impl_lanemask_t Mask,
    486 bool *IsFinal, int32_t *LaneSource,
    487 int32_t *LaneId, int32_t *NumLanes);
    488 EXTERN void __kmpc_kernel_end_convergent_simd(void *buffer);
    489
    490
    491 EXTERN void __kmpc_data_sharing_init_stack();
    492 EXTERN void __kmpc_data_sharing_init_stack_spmd();
    493 EXTERN void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
    494 int16_t UseSharedMemory);
    495 EXTERN void *__kmpc_data_sharing_push_stack(size_t size, int16_t UseSharedMemory);
    496 EXTERN void __kmpc_data_sharing_pop_stack(void *a);
    497 EXTERN void __kmpc_begin_sharing_variables(void ***GlobalArgs, size_t nArgs);
    498 EXTERN void __kmpc_end_sharing_variables();
    499 EXTERN void __kmpc_get_shared_variables(void ***GlobalArgs);
    500
    501 // The slot used for data sharing by the master and worker threads. We use a
    502 // complete (default size version and an incomplete one so that we allow sizes
    503 // greater than the default).
    504 struct __kmpc_data_sharing_slot {
    505 __kmpc_data_sharing_slot *Next;
    506 __kmpc_data_sharing_slot *Prev;
    507 void *PrevSlotStackPtr;
    508 void *DataEnd;
    509 char Data[];
    510 };
    511 EXTERN void
    512 __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *RootS,
    513 size_t InitialDataSize);
    514 EXTERN void *__kmpc_data_sharing_environment_begin(
    515 __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
    516 void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
    517 size_t SharingDataSize, size_t SharingDefaultDataSize,
    518 int16_t IsOMPRuntimeInitialized);
    519 EXTERN void __kmpc_data_sharing_environment_end(
    520 __kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
    521 void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
    522 int32_t IsEntryPoint);
    523
    524 EXTERN void *
    525 __kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID,
    526 int16_t IsOMPRuntimeInitialized);
    527
    528 // SPMD execution mode interrogation function.
    529 EXTERN int8_t __kmpc_is_spmd_exec_mode();
    530
    531 EXTERN void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
    532 const void *buf, size_t size,
    533 int16_t is_shared, const void **res);
    534
    535 EXTERN void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
    536 int16_t is_shared);
    537
    538 #endif
  • openmp/trunk/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h

     
    1 //===--- nvptx_interface.h - OpenMP interface definitions -------- CUDA -*-===//
    2 //
    3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
    4 // See https://llvm.org/LICENSE.txt for license information.
    5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
    6 //
    7 //===----------------------------------------------------------------------===//
    8
    9 #ifndef _NVPTX_INTERFACE_H_
    10 #define _NVPTX_INTERFACE_H_
    11
    12 #include <stdint.h>
    13
    14 #define EXTERN extern "C" __device__
    15 typedef uint32_t __kmpc_impl_lanemask_t;
    16
    17 #endif
  • openmp/trunk/libomptarget/deviceRTLs/nvptx/src/option.h

     
    12 12 #ifndef _OPTION_H_
    13 13 #define _OPTION_H_
    14 14
    15 #include "interface.h"
    16
    15 17 ////////////////////////////////////////////////////////////////////////////////
    16 18 // Kernel options
    17 19 ////////////////////////////////////////////////////////////////////////////////
     
    54 56 // misc options (by def everythig here is device)
    55 57 ////////////////////////////////////////////////////////////////////////////////
    56 58
    57 #define EXTERN extern "C" __device__
    58 59 #define INLINE __forceinline__ __device__
    59 60 #define NOINLINE __noinline__ __device__
    60 61 #ifndef TRUE
  • openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h

     
    26 26 return val;
    27 27 }
    28 28
    29 typedef uint32_t __kmpc_impl_lanemask_t;
    30 29 static const __kmpc_impl_lanemask_t __kmpc_impl_all_lanes =
    31 30 UINT32_C(0xffffffff);
    32 31