7#include <Kokkos_Core.hpp>
18template<
typename ValueType>
23template<
typename Kernel>
27 } -> std::same_as<void>;
32template<
typename ExecutorType>
36 if (logger !=
nullptr)
43template<
typename ExecutorType, parallelForKernel Kernel>
45 const ExecutorType& exec, std::pair<localIdx, localIdx> range, Kernel kernel, std::string
name
48 auto [start, end] = range;
50 if constexpr (std::is_same<std::remove_reference_t<ExecutorType>,
SerialExecutor>::value)
52 for (
localIdx i = start; i < end; i++)
59 using runOn =
typename ExecutorType::exec;
62 Kokkos::RangePolicy<runOn>(start, end),
63 KOKKOS_LAMBDA(
const localIdx i) { kernel(i); }
70template<parallelForKernel Kernel>
73 std::pair<localIdx, localIdx> range,
75 std::string
name =
"parallelFor"
78 std::visit([&](
const auto& e) {
parallelFor(e, range, kernel,
name); }, exec);
82template<
typename Kernel,
typename ValueType>
86 } -> std::same_as<ValueType>;
97 ContType<ValueType>& container,
99 std::string
name =
"parallelFor"
102 auto view = container.view();
103 if constexpr (std::is_same<std::remove_reference_t<Executor>,
SerialExecutor>::value)
105 for (
localIdx i = 0; i < view.size(); i++)
112 using runOn =
typename Executor::exec;
113 Kokkos::parallel_for(
115 Kokkos::RangePolicy<runOn>(0, view.size()),
116 KOKKOS_LAMBDA(
const localIdx i) { view[i] = kernel(i); }
125 parallelForContainerKernel<ValueType> Kernel>
126void parallelFor(ContType<ValueType>& cont, Kernel kernel, std::string
name =
"parallelFor")
128 std::visit([&](
const auto& e) {
parallelFor(e, cont, kernel,
name); }, cont.exec());
131template<
typename Executor,
typename Kernel,
typename T>
133 [[maybe_unused]]
const Executor& exec,
134 std::pair<localIdx, localIdx> range,
139 auto [start, end] = range;
140 if constexpr (std::is_same<std::remove_reference_t<Executor>,
SerialExecutor>::value)
142 for (
localIdx i = start; i < end; i++)
144 if constexpr (Kokkos::is_reducer<T>::value)
146 kernel(i, value.reference());
156 using runOn =
typename Executor::exec;
157 Kokkos::parallel_reduce(
158 "parallelReduce", Kokkos::RangePolicy<runOn>(start, end), kernel, value
163template<
typename Kernel,
typename T>
165 const NeoN::Executor& exec, std::pair<localIdx, localIdx> range, Kernel kernel, T& value
168 std::visit([&](
const auto& e) {
parallelReduce(e, range, kernel, value); }, exec);
172template<
typename Executor,
typename ValueType,
typename Kernel,
typename T>
177 if constexpr (std::is_same<std::remove_reference_t<Executor>,
SerialExecutor>::value)
180 for (
localIdx i = 0; i < fieldSize; i++)
182 if constexpr (Kokkos::is_reducer<T>::value)
184 kernel(i, value.reference());
194 using runOn =
typename Executor::exec;
195 Kokkos::parallel_reduce(
196 "parallelReduce", Kokkos::RangePolicy<runOn>(0, field.
size()), kernel, value
201template<
typename ValueType,
typename Kernel,
typename T>
204 std::visit([&](
const auto& e) {
parallelReduce(e, field, kernel, value); }, field.
exec());
207template<
typename Executor,
typename Kernel>
209 [[maybe_unused]]
const Executor& exec, std::pair<localIdx, localIdx> range, Kernel kernel
212 auto [start, end] = range;
213 using runOn =
typename Executor::exec;
214 Kokkos::parallel_scan(
"parallelScan", Kokkos::RangePolicy<runOn>(start, end), kernel);
217template<
typename Kernel>
220 std::visit([&](
const auto& e) {
parallelScan(e, range, kernel); }, exec);
223template<
typename Executor,
typename Kernel,
typename ReturnType>
225 [[maybe_unused]]
const Executor& exec,
226 std::pair<localIdx, localIdx> range,
228 ReturnType& returnValue
231 auto [start, end] = range;
232 using runOn =
typename Executor::exec;
233 Kokkos::parallel_scan(
234 "parallelScan", Kokkos::RangePolicy<runOn>(start, end), kernel, returnValue
238template<
typename Kernel,
typename ReturnType>
241 std::pair<localIdx, localIdx> range,
243 ReturnType& returnValue
246 std::visit([&](
const auto& e) {
parallelScan(e, range, kernel, returnValue); }, exec);
Reference executor for serial CPU execution.
A class to contain the data and executors for a field and define some basic operations.
localIdx size() const
Gets the size of the field.
const Executor & exec() const
Gets the executor associated with the field.
void fenceIfLogger(const ExecutorType &exec)
void fence(const Executor &exec)
std::shared_ptr< const Logging::BaseLogger > getLogger(const Executor &exec)
std::variant< SerialExecutor, CPUExecutor, GPUExecutor > Executor
void parallelReduce(const Executor &exec, std::pair< localIdx, localIdx > range, Kernel kernel, T &value)
const std::string & name(const NeoN::Document &doc)
Retrieves the name of a Document.
void parallelFor(const ExecutorType &exec, std::pair< localIdx, localIdx > range, Kernel kernel, std::string name)
void parallelScan(const Executor &exec, std::pair< localIdx, localIdx > range, Kernel kernel)