7#include <Kokkos_Core.hpp>
17template<
typename ValueType>
22template<
typename Kernel>
26 } -> std::same_as<void>;
29template<
typename Executor, parallelForKernel Kernel>
31 [[maybe_unused]]
const Executor& exec,
32 std::pair<localIdx, localIdx> range,
34 std::string
name =
"parallelFor"
37 auto [start, end] = range;
38 if constexpr (std::is_same<std::remove_reference_t<Executor>,
SerialExecutor>::value)
40 for (
localIdx i = start; i < end; i++)
47 using runOn =
typename Executor::exec;
50 Kokkos::RangePolicy<runOn>(start, end),
51 KOKKOS_LAMBDA(
const localIdx i) { kernel(i); }
57template<parallelForKernel Kernel>
60 std::pair<localIdx, localIdx> range,
62 std::string
name =
"parallelFor"
65 std::visit([&](
const auto& e) {
parallelFor(e, range, kernel,
name); }, exec);
69template<
typename Kernel,
typename ValueType>
73 } -> std::same_as<ValueType>;
83 [[maybe_unused]]
const Executor& exec,
84 ContType<ValueType>& container,
86 std::string
name =
"parallelFor"
89 auto view = container.view();
90 if constexpr (std::is_same<std::remove_reference_t<Executor>,
SerialExecutor>::value)
92 for (
localIdx i = 0; i < view.size(); i++)
99 using runOn =
typename Executor::exec;
100 Kokkos::parallel_for(
102 Kokkos::RangePolicy<runOn>(0, view.size()),
103 KOKKOS_LAMBDA(
const localIdx i) { view[i] = kernel(i); }
112 parallelForContainerKernel<ValueType> Kernel>
113void parallelFor(ContType<ValueType>& cont, Kernel kernel, std::string
name =
"parallelFor")
115 std::visit([&](
const auto& e) {
parallelFor(e, cont, kernel,
name); }, cont.exec());
118template<
typename Executor,
typename Kernel,
typename T>
120 [[maybe_unused]]
const Executor& exec,
121 std::pair<localIdx, localIdx> range,
126 auto [start, end] = range;
127 if constexpr (std::is_same<std::remove_reference_t<Executor>,
SerialExecutor>::value)
129 for (
localIdx i = start; i < end; i++)
131 if constexpr (Kokkos::is_reducer<T>::value)
133 kernel(i, value.reference());
143 using runOn =
typename Executor::exec;
144 Kokkos::parallel_reduce(
145 "parallelReduce", Kokkos::RangePolicy<runOn>(start, end), kernel, value
150template<
typename Kernel,
typename T>
152 const NeoN::Executor& exec, std::pair<localIdx, localIdx> range, Kernel kernel, T& value
155 std::visit([&](
const auto& e) {
parallelReduce(e, range, kernel, value); }, exec);
159template<
typename Executor,
typename ValueType,
typename Kernel,
typename T>
164 if constexpr (std::is_same<std::remove_reference_t<Executor>,
SerialExecutor>::value)
167 for (
localIdx i = 0; i < fieldSize; i++)
169 if constexpr (Kokkos::is_reducer<T>::value)
171 kernel(i, value.reference());
181 using runOn =
typename Executor::exec;
182 Kokkos::parallel_reduce(
183 "parallelReduce", Kokkos::RangePolicy<runOn>(0, field.
size()), kernel, value
188template<
typename ValueType,
typename Kernel,
typename T>
191 std::visit([&](
const auto& e) {
parallelReduce(e, field, kernel, value); }, field.
exec());
194template<
typename Executor,
typename Kernel>
196 [[maybe_unused]]
const Executor& exec, std::pair<localIdx, localIdx> range, Kernel kernel
199 auto [start, end] = range;
200 using runOn =
typename Executor::exec;
201 Kokkos::parallel_scan(
"parallelScan", Kokkos::RangePolicy<runOn>(start, end), kernel);
204template<
typename Kernel>
207 std::visit([&](
const auto& e) {
parallelScan(e, range, kernel); }, exec);
210template<
typename Executor,
typename Kernel,
typename ReturnType>
212 [[maybe_unused]]
const Executor& exec,
213 std::pair<localIdx, localIdx> range,
215 ReturnType& returnValue
218 auto [start, end] = range;
219 using runOn =
typename Executor::exec;
220 Kokkos::parallel_scan(
221 "parallelScan", Kokkos::RangePolicy<runOn>(start, end), kernel, returnValue
225template<
typename Kernel,
typename ReturnType>
228 std::pair<localIdx, localIdx> range,
230 ReturnType& returnValue
233 std::visit([&](
const auto& e) {
parallelScan(e, range, kernel, returnValue); }, exec);
Reference executor for serial CPU execution.
A class to contain the data and executors for a field and define some basic operations.
localIdx size() const
Gets the size of the field.
const Executor & exec() const
Gets the executor associated with the field.
void parallelFor(const Executor &exec, std::pair< localIdx, localIdx > range, Kernel kernel, std::string name="parallelFor")
std::variant< SerialExecutor, CPUExecutor, GPUExecutor > Executor
void parallelReduce(const Executor &exec, std::pair< localIdx, localIdx > range, Kernel kernel, T &value)
const std::string & name(const NeoN::Document &doc)
Retrieves the name of a Document.
void parallelScan(const Executor &exec, std::pair< localIdx, localIdx > range, Kernel kernel)