-
Notifications
You must be signed in to change notification settings - Fork 5
/
onetbb-2021.9-e2k.patch
188 lines (172 loc) · 7.74 KB
/
onetbb-2021.9-e2k.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
From 52303e851850d9e7e2aaa6121811ca998af1f9e5 Mon Sep 17 00:00:00 2001
From: Ilya Kurdyukov <[email protected]>
Date: Wed, 19 Jul 2023 10:17:39 +0700
Subject: [PATCH] oneTBB-2021.9 e2k support
Compiled using LCC 1.26
tests passed: 125/137
hanging (timeout 30 min):
test_function_node
test_multifunction_node
test_async_node
python_test
failed:
test_dynamic_link (Failed)
test_parallel_invoke (Subprocess aborted)
test_arena_constraints (Subprocess aborted)
test_malloc_regression (Subprocess aborted)
test_malloc_compliance (Subprocess aborted)
test_malloc_used_by_lib (Subprocess aborted)
test_malloc_lib_unload (Subprocess aborted)
test_malloc_whitebox (Subprocess aborted)
---
cmake/compilers/GNU.cmake | 5 +++++
include/oneapi/tbb/collaborative_call_once.h | 10 ++++++++++
include/oneapi/tbb/detail/_config.h | 2 +-
src/tbb/allocator.cpp | 6 ++++++
src/tbb/co_context.h | 9 +++++++++
src/tbb/tools_api/ittnotify_config.h | 7 +++++++
test/tbb/test_openmp.cpp | 5 +++++
7 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/cmake/compilers/GNU.cmake b/cmake/compilers/GNU.cmake
index 34c10db..81a34e0 100644
--- a/cmake/compilers/GNU.cmake
+++ b/cmake/compilers/GNU.cmake
@@ -71,6 +71,11 @@ endif ()
# Gnu flags to prevent compiler from optimizing out security checks
set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} -fno-strict-overflow -fno-delete-null-pointer-checks -fwrapv)
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL e2k)
+ set(TBB_COMMON_COMPILE_FLAGS -mno-avx -Wno-reduced-alignment -Wno-sign-compare)
+ set(TBB_TEST_COMPILE_FLAGS ${TBB_TEST_COMPILE_FLAGS} -DTBB_TEST_LOW_WORKLOAD)
+endif()
+
# TBB malloc settings
set(TBBMALLOC_LIB_COMPILE_FLAGS -fno-rtti -fno-exceptions)
set(TBB_OPENMP_FLAG -fopenmp)
diff --git a/include/oneapi/tbb/collaborative_call_once.h b/include/oneapi/tbb/collaborative_call_once.h
index db082f8..14a11e8 100644
--- a/include/oneapi/tbb/collaborative_call_once.h
+++ b/include/oneapi/tbb/collaborative_call_once.h
@@ -155,7 +155,17 @@ class collaborative_once_flag : no_copy {
template <typename Fn>
void do_collaborative_call_once(Fn&& f) {
std::uintptr_t expected = m_state.load(std::memory_order_acquire);
+#ifdef __e2k__
+ char stack_buf[max_nfs_size + sizeof(collaborative_once_runner)];
+ char *stack_ptr = &stack_buf[-(uintptr_t)stack_buf & (collaborative_once_references_mask - 1)];
+ collaborative_once_runner &runner = *new(stack_ptr) collaborative_once_runner();
+ struct runner_cleaner {
+ collaborative_once_runner &runner;
+ ~runner_cleaner() { runner.~collaborative_once_runner(); }
+ } cleaner = { runner };
+#else
collaborative_once_runner runner;
+#endif
do {
if (expected == state::uninitialized && m_state.compare_exchange_strong(expected, runner.to_bits())) {
diff --git a/include/oneapi/tbb/detail/_config.h b/include/oneapi/tbb/detail/_config.h
index 1d0b987..835f2be 100644
--- a/include/oneapi/tbb/detail/_config.h
+++ b/include/oneapi/tbb/detail/_config.h
@@ -195,7 +195,7 @@
/** __TBB_WEAK_SYMBOLS_PRESENT denotes that the system supports the weak symbol mechanism **/
#ifndef __TBB_WEAK_SYMBOLS_PRESENT
- #define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) )
+ #define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && !__e2k__ && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) )
#endif
/** Presence of compiler features **/
diff --git a/src/tbb/allocator.cpp b/src/tbb/allocator.cpp
index 5453aea..fe45631 100644
--- a/src/tbb/allocator.cpp
+++ b/src/tbb/allocator.cpp
@@ -133,7 +133,13 @@ static const dynamic_link_descriptor MallocLinkTable[] = {
If that allocator is not found, it links to malloc and free. */
void initialize_handler_pointers() {
__TBB_ASSERT(allocate_handler == &initialize_allocate_handler, nullptr);
+#ifdef __e2k__
+ // FIXME: many tests failing, first one is "conformance_join_node"
+ // internalPoolFree -> assert isMallocInitialized()
+ bool success = false;
+#else
bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4);
+#endif
if(!success) {
// If unsuccessful, set the handlers to the default routines.
// This must be done now, and not before FillDynamicLinks runs, because if other
diff --git a/src/tbb/co_context.h b/src/tbb/co_context.h
index 60d5437..0e8fbd4 100644
--- a/src/tbb/co_context.h
+++ b/src/tbb/co_context.h
@@ -337,7 +337,12 @@ inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* ar
unsigned hi = unsigned(std::uint64_t(addr) >> 32);
__TBB_ASSERT(sizeof(addr) == 8 || hi == 0, nullptr);
+#ifdef __e2k__
+ err = makecontext_e2k(&c.my_context, (coroutine_func_t)co_local_wait_for_all, 2, hi, lo);
+ __TBB_ASSERT_EX(err >= 0, NULL);
+#else
makecontext(&c.my_context, (coroutine_func_t)co_local_wait_for_all, 2, hi, lo);
+#endif
}
inline void current_coroutine(coroutine_type& c) {
@@ -357,6 +362,10 @@ inline void destroy_coroutine(coroutine_type& c) {
// Clear the stack state afterwards
c.my_stack = nullptr;
c.my_stack_size = 0;
+#ifdef __e2k__
+ c.my_context.uc_stack.ss_sp = NULL;
+ freecontext_e2k(&c.my_context);
+#endif
}
#if __APPLE__
diff --git a/src/tbb/tools_api/ittnotify_config.h b/src/tbb/tools_api/ittnotify_config.h
index 0f5d80f..92b9acc 100644
--- a/src/tbb/tools_api/ittnotify_config.h
+++ b/src/tbb/tools_api/ittnotify_config.h
@@ -381,6 +381,9 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
#else
#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
#endif /* ITT_ARCH==ITT_ARCH_IA64 */
+#ifdef __e2k__
+#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
+#endif
#ifndef ITT_SIMPLE_INIT
ITT_INLINE long
__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
@@ -397,11 +400,14 @@ __itt_interlocked_compare_exchange(volatile long* ptr, long exchange, long compe
}
#endif /* ITT_SIMPLE_INIT */
+#ifndef __e2k__
void* dlopen(const char*, int) __attribute__((weak));
void* dlsym(void*, const char*) __attribute__((weak));
int dlclose(void*) __attribute__((weak));
+#endif
#define DL_SYMBOLS (dlopen && dlsym && dlclose)
+#ifndef __e2k__
int pthread_mutex_init(pthread_mutex_t*, const pthread_mutexattr_t*) __attribute__((weak));
int pthread_mutex_lock(pthread_mutex_t*) __attribute__((weak));
int pthread_mutex_unlock(pthread_mutex_t*) __attribute__((weak));
@@ -410,6 +416,7 @@ int pthread_mutexattr_init(pthread_mutexattr_t*) __attribute__((weak));
int pthread_mutexattr_settype(pthread_mutexattr_t*, int) __attribute__((weak));
int pthread_mutexattr_destroy(pthread_mutexattr_t*) __attribute__((weak));
pthread_t pthread_self(void) __attribute__((weak));
+#endif
#define PTHREAD_SYMBOLS (pthread_mutex_init && pthread_mutex_lock && pthread_mutex_unlock && pthread_mutex_destroy && pthread_mutexattr_init && pthread_mutexattr_settype && pthread_mutexattr_destroy && pthread_self)
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
diff --git a/test/tbb/test_openmp.cpp b/test/tbb/test_openmp.cpp
index 495be1c..b10b157 100644
--- a/test/tbb/test_openmp.cpp
+++ b/test/tbb/test_openmp.cpp
@@ -114,7 +114,12 @@ public:
int start = i < n ? 0 : i - n + 1;
int finish = i < m ? i + 1 : m;
data_type sum = 0;
+#ifdef __e2k__ /* compiler bug workaround */
+ int num = p; (void)num;
+#pragma omp parallel for reduction(+:sum) num_threads(num)
+#else
#pragma omp parallel for reduction(+:sum) num_threads(p)
+#endif
for (int j = start; j < finish; ++j)
sum += my_a[j] * my_b[i - j];
my_c[i] = sum;
--
2.34.1