Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_Tuners.hpp
1//@HEADER
2// ************************************************************************
3//
4// Kokkos v. 4.0
5// Copyright (2022) National Technology & Engineering
6// Solutions of Sandia, LLC (NTESS).
7//
8// Under the terms of Contract DE-NA0003525 with NTESS,
9// the U.S. Government retains certain rights in this software.
10//
11// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12// See https://kokkos.org/LICENSE for license information.
13// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14//
15//@HEADER
16
17#ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18#include <Kokkos_Macros.hpp>
19static_assert(false,
20 "Including non-public Kokkos header files is not allowed.");
21#endif
22#ifndef KOKKOS_KOKKOS_TUNERS_HPP
23#define KOKKOS_KOKKOS_TUNERS_HPP
24
25#include <Kokkos_Macros.hpp>
26#include <Kokkos_Core_fwd.hpp>
27#include <Kokkos_ExecPolicy.hpp>
28#include <KokkosExp_MDRangePolicy.hpp>
29#include <impl/Kokkos_Profiling_Interface.hpp>
30
31#include <array>
32#include <utility>
33#include <tuple>
34#include <string>
35#include <vector>
36#include <map>
37#include <cassert>
38
39namespace Kokkos {
40namespace Tools {
41
42namespace Experimental {
43
44// forward declarations
45SetOrRange make_candidate_set(size_t size, int64_t* data);
46bool have_tuning_tool();
47size_t declare_output_type(const std::string&,
48 Kokkos::Tools::Experimental::VariableInfo);
49void request_output_values(size_t, size_t,
50 Kokkos::Tools::Experimental::VariableValue*);
51VariableValue make_variable_value(size_t, int64_t);
52VariableValue make_variable_value(size_t, double);
53SetOrRange make_candidate_range(double lower, double upper, double step,
54 bool openLower, bool openUpper);
55size_t get_new_context_id();
56void begin_context(size_t context_id);
57void end_context(size_t context_id);
58namespace Impl {
59
65template <typename ValueType, typename ContainedType>
66struct ValueHierarchyNode;
67
68template <typename ValueType, typename ContainedType>
70 std::vector<ValueType> root_values;
71 std::vector<ContainedType> sub_values;
72 void add_root_value(const ValueType& in) noexcept {
73 root_values.push_back(in);
74 }
75 void add_sub_container(const ContainedType& in) { sub_values.push_back(in); }
76 const ValueType& get_root_value(const size_t index) const {
77 return root_values[index];
78 }
79 const ContainedType& get_sub_value(const size_t index) const {
80 return sub_values[index];
81 }
82};
83
84template <typename ValueType>
85struct ValueHierarchyNode<ValueType, void> {
86 std::vector<ValueType> root_values;
87 explicit ValueHierarchyNode(std::vector<ValueType> rv)
88 : root_values(std::move(rv)) {}
89 void add_root_value(const ValueType& in) noexcept {
90 root_values.push_back(in);
91 }
92 const ValueType& get_root_value(const size_t index) const {
93 return root_values[index];
94 }
95};
96
102template <class NestedMap>
104
105// Vectors are our lowest-level, no nested values
106template <class T>
107struct MapTypeConverter<std::vector<T>> {
108 using type = ValueHierarchyNode<T, void>;
109};
110
111// Maps contain both the "root" types and sub-vectors
112template <class K, class V>
113struct MapTypeConverter<std::map<K, V>> {
115};
116
122template <class NestedMap>
124
125// Vectors are our lowest-level, no nested values. Just fill in the fundamental
126// values
127template <class T>
128struct ValueHierarchyConstructor<std::vector<T>> {
129 using return_type = typename MapTypeConverter<std::vector<T>>::type;
130 static return_type build(const std::vector<T>& in) { return return_type{in}; }
131};
132
133// For maps, we need to fill in the fundamental values, and construct child
134// nodes
135template <class K, class V>
136struct ValueHierarchyConstructor<std::map<K, V>> {
137 using return_type = typename MapTypeConverter<std::map<K, V>>::type;
138 static return_type build(const std::map<K, V>& in) {
139 return_type node_to_build;
140 for (auto& entry : in) {
141 node_to_build.add_root_value(entry.first);
142 node_to_build.add_sub_container(
143 ValueHierarchyConstructor<V>::build(entry.second));
144 }
145 return node_to_build;
146 }
147};
148
157template <class InspectForDepth>
159
160// The dimensionality of a vector is 1
161template <class T>
162struct get_space_dimensionality<std::vector<T>> {
163 static constexpr int value = 1;
164};
165
166// The dimensionality of a map is 1 (the map) plus the dimensionality
167// of the map's value type
168template <class K, class V>
169struct get_space_dimensionality<std::map<K, V>> {
170 static constexpr int value = 1 + get_space_dimensionality<V>::value;
171};
172
173template <class T, int N>
174struct n_dimensional_sparse_structure;
175
176template <class T>
177struct n_dimensional_sparse_structure<T, 1> {
178 using type = std::vector<T>;
179};
180
181template <class T, int N>
182struct n_dimensional_sparse_structure {
183 using type =
184 std::map<T, typename n_dimensional_sparse_structure<T, N - 1>::type>;
185};
186
193// First, a helper to get the value in one dimension
194template <class Container>
196
197// At any given level, just return your value at that level
198template <class RootType, class Subtype>
199struct DimensionValueExtractor<ValueHierarchyNode<RootType, Subtype>> {
200 static RootType get(const ValueHierarchyNode<RootType, Subtype>& dimension,
201 double fraction_to_traverse) {
202 size_t index = dimension.root_values.size() * fraction_to_traverse;
203 return dimension.get_root_value(index);
204 }
205};
206
212// At the bottom level, we have one double and a base-level ValueHierarchyNode
213
214template <class HierarchyNode, class... InterpolationIndices>
216
217template <class ValueType>
218struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, void>, double> {
219 using node_type = ValueHierarchyNode<ValueType, void>;
220 using return_type = std::tuple<ValueType>;
221 static return_type build(const node_type& in, double index) {
222 return std::make_tuple(DimensionValueExtractor<node_type>::get(in, index));
223 }
224};
225
226// At levels above the bottom, we tuple_cat the result of our child on the end
227// of our own tuple
228template <class ValueType, class Subtype, class... Indices>
229struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, Subtype>, double,
230 Indices...> {
231 using node_type = ValueHierarchyNode<ValueType, Subtype>;
232 using sub_tuple =
233 typename GetMultidimensionalPoint<Subtype, Indices...>::return_type;
234 using return_type = decltype(std::tuple_cat(
235 std::declval<std::tuple<ValueType>>(), std::declval<sub_tuple>()));
236 static return_type build(const node_type& in, double fraction_to_traverse,
237 Indices... indices) {
238 size_t index = in.sub_values.size() * fraction_to_traverse;
239 auto dimension_value = std::make_tuple(
240 DimensionValueExtractor<node_type>::get(in, fraction_to_traverse));
241 return std::tuple_cat(dimension_value,
242 GetMultidimensionalPoint<Subtype, Indices...>::build(
243 in.get_sub_value(index), indices...));
244 }
245};
246
247template <typename PointType, class ArrayType, size_t... Is>
248auto get_point_helper(const PointType& in, const ArrayType& indices,
249 std::index_sequence<Is...>) {
250 using helper = GetMultidimensionalPoint<
251 PointType,
252 decltype(std::get<Is>(std::declval<ArrayType>()).value.double_value)...>;
253 return helper::build(in, std::get<Is>(indices).value.double_value...);
254}
255
256template <typename PointType, typename ArrayType>
257struct GetPoint;
258
259template <typename PointType, size_t X>
260struct GetPoint<PointType,
261 std::array<Kokkos::Tools::Experimental::VariableValue, X>> {
262 using index_set_type =
263 std::array<Kokkos::Tools::Experimental::VariableValue, X>;
264 static auto build(const PointType& in, const index_set_type& indices) {
265 return get_point_helper(in, indices, std::make_index_sequence<X>{});
266 }
267};
268
269template <typename PointType, typename ArrayType>
270auto get_point(const PointType& point, const ArrayType& indices) {
271 return GetPoint<PointType, ArrayType>::build(point, indices);
272}
273
274} // namespace Impl
275
276template <template <class...> class Container, size_t MaxDimensionSize = 100,
277 class... TemplateArguments>
278class MultidimensionalSparseTuningProblem {
279 public:
280 using ProblemSpaceInput = Container<TemplateArguments...>;
281 static constexpr int space_dimensionality =
282 Impl::get_space_dimensionality<ProblemSpaceInput>::value;
283 static constexpr size_t max_space_dimension_size = MaxDimensionSize;
284 static constexpr double tuning_min = 0.0;
285 static constexpr double tuning_max = 0.999;
286
287 // Not declared as static constexpr to work around the following compiler bug
288 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96862
289 // where a floating-point expression cannot be constexpr under -frounding-math
290 double tuning_step = tuning_max / max_space_dimension_size;
291
292 using StoredProblemSpace =
293 typename Impl::MapTypeConverter<ProblemSpaceInput>::type;
294 using HierarchyConstructor =
295 typename Impl::ValueHierarchyConstructor<Container<TemplateArguments...>>;
296
297 using ValueArray = std::array<Kokkos::Tools::Experimental::VariableValue,
298 space_dimensionality>;
299 template <class Key, class Value>
300 using extended_map = std::map<Key, Value>;
301 template <typename Key>
302 using extended_problem =
303 MultidimensionalSparseTuningProblem<extended_map, MaxDimensionSize, Key,
304 ProblemSpaceInput>;
305 template <typename Key, typename Value>
306 using ExtendedProblemSpace =
307 typename Impl::MapTypeConverter<extended_map<Key, Value>>::type;
308
309 template <typename Key>
310 auto extend(const std::string& axis_name,
311 const std::vector<Key>& new_tuning_axis) const
312 -> extended_problem<Key> {
313 ExtendedProblemSpace<Key, ProblemSpaceInput> extended_space;
314 for (auto& key : new_tuning_axis) {
315 extended_space.add_root_value(key);
316 extended_space.add_sub_container(m_space);
317 }
318 std::vector<std::string> extended_names;
319 extended_names.reserve(m_variable_names.size() + 1);
320 extended_names.push_back(axis_name);
321 extended_names.insert(extended_names.end(), m_variable_names.begin(),
322 m_variable_names.end());
323 return extended_problem<Key>(extended_space, extended_names);
324 }
325
326 private:
327 StoredProblemSpace m_space;
328 std::array<size_t, space_dimensionality> variable_ids;
329 std::vector<std::string> m_variable_names;
330 size_t context;
331
332 public:
333 MultidimensionalSparseTuningProblem() = default;
334
335 MultidimensionalSparseTuningProblem(StoredProblemSpace space,
336 const std::vector<std::string>& names)
337 : m_space(std::move(space)), m_variable_names(names) {
338 assert(names.size() == space_dimensionality);
339 for (unsigned long x = 0; x < names.size(); ++x) {
340 VariableInfo info;
341 info.type = Kokkos::Tools::Experimental::ValueType::kokkos_value_double;
342 info.category = Kokkos::Tools::Experimental::StatisticalCategory::
343 kokkos_value_interval;
344 info.valueQuantity =
345 Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_range;
346 info.candidates = Kokkos::Tools::Experimental::make_candidate_range(
347 tuning_min, tuning_max, tuning_step, true, true);
348 variable_ids[x] = declare_output_type(names[x], info);
349 }
350 }
351
352 MultidimensionalSparseTuningProblem(ProblemSpaceInput space,
353 const std::vector<std::string>& names)
354 : MultidimensionalSparseTuningProblem(HierarchyConstructor::build(space),
355 names) {}
356
357 template <typename... Coordinates>
358 auto get_point(Coordinates... coordinates) {
359 using ArrayType = std::array<Kokkos::Tools::Experimental::VariableValue,
360 sizeof...(coordinates)>;
361 return Impl::get_point(
362 m_space, ArrayType({Kokkos::Tools::Experimental::make_variable_value(
363 0, static_cast<double>(coordinates))...}));
364 }
365
366 auto begin() {
367 context = Kokkos::Tools::Experimental::get_new_context_id();
368 ValueArray values;
369 for (int x = 0; x < space_dimensionality; ++x) {
370 values[x] = Kokkos::Tools::Experimental::make_variable_value(
371 variable_ids[x], 0.0);
372 }
373 begin_context(context);
374 request_output_values(context, space_dimensionality, values.data());
375 return Impl::get_point(m_space, values);
376 }
377
378 auto end() { end_context(context); }
379};
380
381template <typename Tuner>
382struct ExtendableTunerMixin {
383 template <typename Key>
384 auto combine(const std::string& axis_name,
385 const std::vector<Key>& new_axis) const {
386 const auto& sub_tuner = static_cast<const Tuner*>(this)->get_tuner();
387 return sub_tuner.extend(axis_name, new_axis);
388 }
389
390 template <typename... Coordinates>
391 auto get_point(Coordinates... coordinates) {
392 const auto& sub_tuner = static_cast<const Tuner*>(this)->get_tuner();
393 return sub_tuner.get_point(coordinates...);
394 }
395};
396
397template <size_t MaxDimensionSize = 100, template <class...> class Container,
398 class... TemplateArguments>
399auto make_multidimensional_sparse_tuning_problem(
400 const Container<TemplateArguments...>& in, std::vector<std::string> names) {
401 return MultidimensionalSparseTuningProblem<Container, MaxDimensionSize,
402 TemplateArguments...>(in, names);
403}
404
405class TeamSizeTuner : public ExtendableTunerMixin<TeamSizeTuner> {
406 private:
407 using SpaceDescription = std::map<int64_t, std::vector<int64_t>>;
408 using TunerType = decltype(make_multidimensional_sparse_tuning_problem<20>(
409 std::declval<SpaceDescription>(),
410 std::declval<std::vector<std::string>>()));
411 TunerType tuner;
412
413 public:
414 TeamSizeTuner() = default;
415 TeamSizeTuner& operator=(const TeamSizeTuner& other) = default;
416 TeamSizeTuner(const TeamSizeTuner& other) = default;
417 TeamSizeTuner& operator=(TeamSizeTuner&& other) = default;
418 TeamSizeTuner(TeamSizeTuner&& other) = default;
419 template <typename ViableConfigurationCalculator, typename Functor,
420 typename TagType, typename... Properties>
421 TeamSizeTuner(const std::string& name,
423 const Functor& functor, const TagType& tag,
424 ViableConfigurationCalculator calc) {
425 using PolicyType = Kokkos::TeamPolicy<Properties...>;
426 auto initial_vector_length = policy.impl_vector_length();
427 if (initial_vector_length < 1) {
428 policy.impl_set_vector_length(1);
429 }
455 SpaceDescription space_description;
456
457 auto max_vector_length = PolicyType::vector_length_max();
458 std::vector<int64_t> allowed_vector_lengths;
459
460 if (policy.impl_auto_vector_length()) { // case 1 or 2
461 for (int vector_length = max_vector_length; vector_length >= 1;
462 vector_length /= 2) {
463 policy.impl_set_vector_length(vector_length);
476 auto max_team_size = calc.get_max_team_size(policy, functor, tag);
477 if ((policy.impl_auto_team_size()) ||
478 (policy.team_size() <= max_team_size)) {
479 allowed_vector_lengths.push_back(vector_length);
480 }
481 }
482 } else { // case 3, there's only one vector length to care about
483 allowed_vector_lengths.push_back(policy.impl_vector_length());
484 }
485
486 for (const auto vector_length : allowed_vector_lengths) {
487 std::vector<int64_t> allowed_team_sizes;
488 policy.impl_set_vector_length(vector_length);
489 auto max_team_size = calc.get_max_team_size(policy, functor, tag);
490 if (policy.impl_auto_team_size()) { // case 1 or 3, try all legal team
491 // sizes
492 for (int team_size = max_team_size; team_size >= 1; team_size /= 2) {
493 allowed_team_sizes.push_back(team_size);
494 }
495 } else { // case 2, just try the provided team size
496 allowed_team_sizes.push_back(policy.team_size());
497 }
498 space_description[vector_length] = allowed_team_sizes;
499 }
500 tuner = make_multidimensional_sparse_tuning_problem<20>(
501 space_description, {std::string(name + "_vector_length"),
502 std::string(name + "_team_size")});
503 policy.impl_set_vector_length(initial_vector_length);
504 }
505
506 template <typename... Properties>
507 void tune(Kokkos::TeamPolicy<Properties...>& policy) {
508 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
509 auto configuration = tuner.begin();
510 auto team_size = std::get<1>(configuration);
511 auto vector_length = std::get<0>(configuration);
512 if (vector_length > 0) {
513 policy.impl_set_team_size(team_size);
514 policy.impl_set_vector_length(vector_length);
515 }
516 }
517 }
518 void end() {
519 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
520 tuner.end();
521 }
522 }
523
524 TunerType get_tuner() const { return tuner; }
525};
526
527namespace Impl {
528
529template <typename T>
530void fill_tile(std::vector<T>& cont, int tile_size) {
531 for (int x = 1; x < tile_size; x *= 2) {
532 cont.push_back(x);
533 }
534}
535template <typename T, typename Mapped>
536void fill_tile(std::map<T, Mapped>& cont, int tile_size) {
537 for (int x = 1; x < tile_size; x *= 2) {
538 fill_tile(cont[x], tile_size / x);
539 }
540}
541} // namespace Impl
542
543template <int MDRangeRank>
544struct MDRangeTuner : public ExtendableTunerMixin<MDRangeTuner<MDRangeRank>> {
545 private:
546 static constexpr int rank = MDRangeRank;
547 static constexpr int max_slices = 15;
548 using SpaceDescription =
549 typename Impl::n_dimensional_sparse_structure<int, rank>::type;
550 using TunerType =
551 decltype(make_multidimensional_sparse_tuning_problem<max_slices>(
552 std::declval<SpaceDescription>(),
553 std::declval<std::vector<std::string>>()));
554 TunerType tuner;
555
556 public:
557 MDRangeTuner() = default;
558 template <typename Functor, typename TagType, typename Calculator,
559 typename... Properties>
560 MDRangeTuner(const std::string& name,
561 const Kokkos::MDRangePolicy<Properties...>& policy,
562 const Functor& functor, const TagType& tag, Calculator calc) {
563 SpaceDescription desc;
564 int max_tile_size =
565 calc.get_mdrange_max_tile_size_product(policy, functor, tag);
566 Impl::fill_tile(desc, max_tile_size);
567 std::vector<std::string> feature_names;
568 for (int x = 0; x < rank; ++x) {
569 feature_names.push_back(name + "_tile_size_" + std::to_string(x));
570 }
571 tuner = make_multidimensional_sparse_tuning_problem<max_slices>(
572 desc, feature_names);
573 }
574 template <typename Policy, typename Tuple, size_t... Indices>
575 void set_policy_tile(Policy& policy, const Tuple& tuple,
576 const std::index_sequence<Indices...>&) {
577 policy.impl_change_tile_size({std::get<Indices>(tuple)...});
578 }
579 template <typename... Properties>
580 void tune(Kokkos::MDRangePolicy<Properties...>& policy) {
581 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
582 auto configuration = tuner.begin();
583 set_policy_tile(policy, configuration, std::make_index_sequence<rank>{});
584 }
585 }
586 void end() {
587 if (Kokkos::Tools::Experimental::have_tuning_tool()) {
588 tuner.end();
589 }
590 }
591
592 TunerType get_tuner() const { return tuner; }
593};
594
595template <class Choice>
596struct CategoricalTuner {
597 using choice_list = std::vector<Choice>;
598 choice_list choices;
599 size_t context;
600 size_t tuning_variable_id;
601 CategoricalTuner(std::string name, choice_list m_choices)
602 : choices(m_choices) {
603 std::vector<int64_t> indices;
604 for (typename decltype(choices)::size_type x = 0; x < choices.size(); ++x) {
605 indices.push_back(x);
606 }
607 VariableInfo info;
608 info.category = StatisticalCategory::kokkos_value_categorical;
609 info.valueQuantity = CandidateValueType::kokkos_value_set;
610 info.type = ValueType::kokkos_value_int64;
611 info.candidates = make_candidate_set(indices.size(), indices.data());
612 tuning_variable_id = declare_output_type(name, info);
613 }
614 const Choice& begin() {
615 context = get_new_context_id();
616 begin_context(context);
617 VariableValue value = make_variable_value(tuning_variable_id, int64_t(0));
618 request_output_values(context, 1, &value);
619 return choices[value.value.int_value];
620 }
621 void end() { end_context(context); }
622};
623
624template <typename Choice>
625auto make_categorical_tuner(std::string name, std::vector<Choice> choices)
626 -> CategoricalTuner<Choice> {
627 return CategoricalTuner<Choice>(name, choices);
628}
629
630} // namespace Experimental
631} // namespace Tools
632} // namespace Kokkos
633
634#endif
Execution policy for parallel work over a league of teams of threads.