Ginkgo Generated from branch based on main. Ginkgo version 1.11.0
A numerical linear algebra library targeting many-core architectures
Loading...
Searching...
No Matches
csr.hpp
1// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifndef GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
6#define GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
7
8
9#include <ginkgo/core/base/array.hpp>
10#include <ginkgo/core/base/index_set.hpp>
11#include <ginkgo/core/base/lin_op.hpp>
12#include <ginkgo/core/base/math.hpp>
13#include <ginkgo/core/matrix/permutation.hpp>
14#include <ginkgo/core/matrix/scaled_permutation.hpp>
15
16
17namespace gko {
18namespace matrix {
19
20
21template <typename ValueType>
22class Dense;
23
24template <typename ValueType>
25class Diagonal;
26
27template <typename ValueType, typename IndexType>
28class Coo;
29
30template <typename ValueType, typename IndexType>
31class Ell;
32
33template <typename ValueType, typename IndexType>
34class Hybrid;
35
36template <typename ValueType, typename IndexType>
37class Sellp;
38
39template <typename ValueType, typename IndexType>
40class SparsityCsr;
41
42template <typename ValueType, typename IndexType>
43class Csr;
44
45template <typename ValueType, typename IndexType>
46class Fbcsr;
47
48template <typename ValueType, typename IndexType>
50
51template <typename IndexType>
52class Permutation;
53
54
55namespace detail {
56
57
58template <typename ValueType = default_precision, typename IndexType = int32>
59void strategy_rebuild_helper(Csr<ValueType, IndexType>* result);
60
61
62} // namespace detail
63
64
103template <typename ValueType = default_precision, typename IndexType = int32>
104class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
105 public ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>,
106#if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
107 public ConvertibleTo<Csr<next_precision<ValueType, 2>, IndexType>>,
108#endif
109#if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
110 public ConvertibleTo<Csr<next_precision<ValueType, 3>, IndexType>>,
111#endif
112 public ConvertibleTo<Dense<ValueType>>,
113 public ConvertibleTo<Coo<ValueType, IndexType>>,
114 public ConvertibleTo<Ell<ValueType, IndexType>>,
115 public ConvertibleTo<Fbcsr<ValueType, IndexType>>,
116 public ConvertibleTo<Hybrid<ValueType, IndexType>>,
117 public ConvertibleTo<Sellp<ValueType, IndexType>>,
118 public ConvertibleTo<SparsityCsr<ValueType, IndexType>>,
119 public DiagonalExtractable<ValueType>,
120 public ReadableFromMatrixData<ValueType, IndexType>,
121 public WritableToMatrixData<ValueType, IndexType>,
122 public Transposable,
123 public Permutable<IndexType>,
125 remove_complex<Csr<ValueType, IndexType>>>,
126 public ScaledIdentityAddable {
127 friend class EnablePolymorphicObject<Csr, LinOp>;
128 friend class Coo<ValueType, IndexType>;
129 friend class Dense<ValueType>;
130 friend class Diagonal<ValueType>;
131 friend class Ell<ValueType, IndexType>;
132 friend class Hybrid<ValueType, IndexType>;
133 friend class Sellp<ValueType, IndexType>;
134 friend class SparsityCsr<ValueType, IndexType>;
135 friend class Fbcsr<ValueType, IndexType>;
136 friend class CsrBuilder<ValueType, IndexType>;
137 friend class Csr<to_complex<ValueType>, IndexType>;
138 GKO_ASSERT_SUPPORTED_VALUE_AND_INDEX_TYPE;
139
140public:
141 using EnableLinOp<Csr>::convert_to;
142 using EnableLinOp<Csr>::move_to;
143 using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::convert_to;
144 using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::move_to;
145 using ConvertibleTo<Dense<ValueType>>::convert_to;
146 using ConvertibleTo<Dense<ValueType>>::move_to;
147 using ConvertibleTo<Coo<ValueType, IndexType>>::convert_to;
148 using ConvertibleTo<Coo<ValueType, IndexType>>::move_to;
149 using ConvertibleTo<Ell<ValueType, IndexType>>::convert_to;
150 using ConvertibleTo<Ell<ValueType, IndexType>>::move_to;
151 using ConvertibleTo<Fbcsr<ValueType, IndexType>>::convert_to;
152 using ConvertibleTo<Fbcsr<ValueType, IndexType>>::move_to;
153 using ConvertibleTo<Hybrid<ValueType, IndexType>>::convert_to;
154 using ConvertibleTo<Hybrid<ValueType, IndexType>>::move_to;
155 using ConvertibleTo<Sellp<ValueType, IndexType>>::convert_to;
156 using ConvertibleTo<Sellp<ValueType, IndexType>>::move_to;
157 using ConvertibleTo<SparsityCsr<ValueType, IndexType>>::convert_to;
158 using ConvertibleTo<SparsityCsr<ValueType, IndexType>>::move_to;
159 using ReadableFromMatrixData<ValueType, IndexType>::read;
160
161 using value_type = ValueType;
162 using index_type = IndexType;
163 using transposed_type = Csr<ValueType, IndexType>;
164 using mat_data = matrix_data<ValueType, IndexType>;
165 using device_mat_data = device_matrix_data<ValueType, IndexType>;
166 using absolute_type = remove_complex<Csr>;
167
168 class automatical;
169
177 friend class automatical;
178
179 public:
185 strategy_type(std::string name) : name_(name) {}
186
187 virtual ~strategy_type() = default;
188
194 std::string get_name() { return name_; }
195
202 virtual void process(const array<index_type>& mtx_row_ptrs,
203 array<index_type>* mtx_srow) = 0;
204
212 virtual int64_t clac_size(const int64_t nnz) = 0;
213
218 virtual std::shared_ptr<strategy_type> copy() = 0;
219
220 protected:
221 void set_name(std::string name) { name_ = name; }
222
223 private:
224 std::string name_;
225 };
226
233 class classical : public strategy_type {
234 public:
238 classical() : strategy_type("classical"), max_length_per_row_(0) {}
239
240 void process(const array<index_type>& mtx_row_ptrs,
241 array<index_type>* mtx_srow) override
242 {
243 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
244 array<index_type> row_ptrs_host(host_mtx_exec);
245 const bool is_mtx_on_host{host_mtx_exec ==
246 mtx_row_ptrs.get_executor()};
247 const index_type* row_ptrs{};
248 if (is_mtx_on_host) {
249 row_ptrs = mtx_row_ptrs.get_const_data();
250 } else {
251 row_ptrs_host = mtx_row_ptrs;
252 row_ptrs = row_ptrs_host.get_const_data();
253 }
254 auto num_rows = mtx_row_ptrs.get_size() - 1;
255 max_length_per_row_ = 0;
256 for (size_type i = 0; i < num_rows; i++) {
257 max_length_per_row_ = std::max(max_length_per_row_,
258 row_ptrs[i + 1] - row_ptrs[i]);
259 }
260 }
261
262 int64_t clac_size(const int64_t nnz) override { return 0; }
263
264 index_type get_max_length_per_row() const noexcept
265 {
266 return max_length_per_row_;
267 }
268
269 std::shared_ptr<strategy_type> copy() override
270 {
271 return std::make_shared<classical>();
272 }
273
274 private:
275 index_type max_length_per_row_;
276 };
277
283 class merge_path : public strategy_type {
284 public:
288 merge_path() : strategy_type("merge_path") {}
289
290 void process(const array<index_type>& mtx_row_ptrs,
291 array<index_type>* mtx_srow) override
292 {}
293
294 int64_t clac_size(const int64_t nnz) override { return 0; }
295
296 std::shared_ptr<strategy_type> copy() override
297 {
298 return std::make_shared<merge_path>();
299 }
300 };
301
308 class cusparse : public strategy_type {
309 public:
313 cusparse() : strategy_type("cusparse") {}
314
315 void process(const array<index_type>& mtx_row_ptrs,
316 array<index_type>* mtx_srow) override
317 {}
318
319 int64_t clac_size(const int64_t nnz) override { return 0; }
320
321 std::shared_ptr<strategy_type> copy() override
322 {
323 return std::make_shared<cusparse>();
324 }
325 };
326
332 class sparselib : public strategy_type {
333 public:
337 sparselib() : strategy_type("sparselib") {}
338
339 void process(const array<index_type>& mtx_row_ptrs,
340 array<index_type>* mtx_srow) override
341 {}
342
343 int64_t clac_size(const int64_t nnz) override { return 0; }
344
345 std::shared_ptr<strategy_type> copy() override
346 {
347 return std::make_shared<sparselib>();
348 }
349 };
350
355 public:
362 [[deprecated]] load_balance()
363 : load_balance(std::move(
365 {}
366
372 load_balance(std::shared_ptr<const CudaExecutor> exec)
373 : load_balance(exec->get_num_warps(), exec->get_warp_size())
374 {}
375
381 load_balance(std::shared_ptr<const HipExecutor> exec)
382 : load_balance(exec->get_num_warps(), exec->get_warp_size(), false)
383 {}
384
392 load_balance(std::shared_ptr<const DpcppExecutor> exec)
393 : load_balance(exec->get_num_subgroups(), 32, false, "intel")
394 {}
395
407 load_balance(int64_t nwarps, int warp_size = 32,
408 bool cuda_strategy = true,
409 std::string strategy_name = "none")
410 : strategy_type("load_balance"),
411 nwarps_(nwarps),
412 warp_size_(warp_size),
413 cuda_strategy_(cuda_strategy),
414 strategy_name_(strategy_name)
415 {}
416
417 void process(const array<index_type>& mtx_row_ptrs,
418 array<index_type>* mtx_srow) override
419 {
420 auto nwarps = mtx_srow->get_size();
421
422 if (nwarps > 0) {
423 auto host_srow_exec = mtx_srow->get_executor()->get_master();
424 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
425 const bool is_srow_on_host{host_srow_exec ==
426 mtx_srow->get_executor()};
427 const bool is_mtx_on_host{host_mtx_exec ==
428 mtx_row_ptrs.get_executor()};
429 array<index_type> row_ptrs_host(host_mtx_exec);
430 array<index_type> srow_host(host_srow_exec);
431 const index_type* row_ptrs{};
432 index_type* srow{};
433 if (is_srow_on_host) {
434 srow = mtx_srow->get_data();
435 } else {
436 srow_host = *mtx_srow;
437 srow = srow_host.get_data();
438 }
439 if (is_mtx_on_host) {
440 row_ptrs = mtx_row_ptrs.get_const_data();
441 } else {
442 row_ptrs_host = mtx_row_ptrs;
443 row_ptrs = row_ptrs_host.get_const_data();
444 }
445 for (size_type i = 0; i < nwarps; i++) {
446 srow[i] = 0;
447 }
448 const auto num_rows = mtx_row_ptrs.get_size() - 1;
449 const auto num_elems = row_ptrs[num_rows];
450 const auto bucket_divider =
451 num_elems > 0 ? ceildiv(num_elems, warp_size_) : 1;
452 for (size_type i = 0; i < num_rows; i++) {
453 auto bucket =
454 ceildiv((ceildiv(row_ptrs[i + 1], warp_size_) * nwarps),
455 bucket_divider);
456 if (bucket < nwarps) {
457 srow[bucket]++;
458 }
459 }
460 // find starting row for thread i
461 for (size_type i = 1; i < nwarps; i++) {
462 srow[i] += srow[i - 1];
463 }
464 if (!is_srow_on_host) {
465 *mtx_srow = srow_host;
466 }
467 }
468 }
469
470 int64_t clac_size(const int64_t nnz) override
471 {
472 if (warp_size_ > 0) {
473 int multiple = 8;
474 if (nnz >= static_cast<int64_t>(2e8)) {
475 multiple = 2048;
476 } else if (nnz >= static_cast<int64_t>(2e7)) {
477 multiple = 512;
478 } else if (nnz >= static_cast<int64_t>(2e6)) {
479 multiple = 128;
480 } else if (nnz >= static_cast<int64_t>(2e5)) {
481 multiple = 32;
482 }
483 if (strategy_name_ == "intel") {
484 multiple = 8;
485 if (nnz >= static_cast<int64_t>(2e8)) {
486 multiple = 256;
487 } else if (nnz >= static_cast<int64_t>(2e7)) {
488 multiple = 32;
489 }
490 }
491#if GINKGO_HIP_PLATFORM_HCC
492 if (!cuda_strategy_) {
493 multiple = 8;
494 if (nnz >= static_cast<int64_t>(1e7)) {
495 multiple = 64;
496 } else if (nnz >= static_cast<int64_t>(1e6)) {
497 multiple = 16;
498 }
499 }
500#endif // GINKGO_HIP_PLATFORM_HCC
501
502 auto nwarps = nwarps_ * multiple;
503 return min(ceildiv(nnz, warp_size_), nwarps);
504 } else {
505 return 0;
506 }
507 }
508
509 std::shared_ptr<strategy_type> copy() override
510 {
511 return std::make_shared<load_balance>(
512 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
513 }
514
515 private:
516 int64_t nwarps_;
517 int warp_size_;
518 bool cuda_strategy_;
519 std::string strategy_name_;
520 };
521
522 class automatical : public strategy_type {
523 public:
524 /* Use imbalance strategy when the maximum number of nonzero per row is
525 * more than 1024 on NVIDIA hardware */
526 const index_type nvidia_row_len_limit = 1024;
527 /* Use imbalance strategy when the matrix has more more than 1e6 on
528 * NVIDIA hardware */
529 const index_type nvidia_nnz_limit{static_cast<index_type>(1e6)};
530 /* Use imbalance strategy when the maximum number of nonzero per row is
531 * more than 768 on AMD hardware */
532 const index_type amd_row_len_limit = 768;
533 /* Use imbalance strategy when the matrix has more more than 1e8 on AMD
534 * hardware */
535 const index_type amd_nnz_limit{static_cast<index_type>(1e8)};
536 /* Use imbalance strategy when the maximum number of nonzero per row is
537 * more than 25600 on Intel hardware */
538 const index_type intel_row_len_limit = 25600;
539 /* Use imbalance strategy when the matrix has more more than 3e8 on
540 * Intel hardware */
541 const index_type intel_nnz_limit{static_cast<index_type>(3e8)};
542
543 public:
550 [[deprecated]] automatical()
551 : automatical(std::move(
553 {}
554
560 automatical(std::shared_ptr<const CudaExecutor> exec)
561 : automatical(exec->get_num_warps(), exec->get_warp_size())
562 {}
563
569 automatical(std::shared_ptr<const HipExecutor> exec)
570 : automatical(exec->get_num_warps(), exec->get_warp_size(), false)
571 {}
572
580 automatical(std::shared_ptr<const DpcppExecutor> exec)
581 : automatical(exec->get_num_subgroups(), 32, false, "intel")
582 {}
583
595 automatical(int64_t nwarps, int warp_size = 32,
596 bool cuda_strategy = true,
597 std::string strategy_name = "none")
598 : strategy_type("automatical"),
599 nwarps_(nwarps),
600 warp_size_(warp_size),
601 cuda_strategy_(cuda_strategy),
602 strategy_name_(strategy_name),
603 max_length_per_row_(0)
604 {}
605
606 void process(const array<index_type>& mtx_row_ptrs,
607 array<index_type>* mtx_srow) override
608 {
609 // if the number of stored elements is larger than <nnz_limit> or
610 // the maximum number of stored elements per row is larger than
611 // <row_len_limit>, use load_balance otherwise use classical
612 index_type nnz_limit = nvidia_nnz_limit;
613 index_type row_len_limit = nvidia_row_len_limit;
614 if (strategy_name_ == "intel") {
615 nnz_limit = intel_nnz_limit;
616 row_len_limit = intel_row_len_limit;
617 }
618#if GINKGO_HIP_PLATFORM_HCC
619 if (!cuda_strategy_) {
620 nnz_limit = amd_nnz_limit;
621 row_len_limit = amd_row_len_limit;
622 }
623#endif // GINKGO_HIP_PLATFORM_HCC
624 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
625 const bool is_mtx_on_host{host_mtx_exec ==
626 mtx_row_ptrs.get_executor()};
627 array<index_type> row_ptrs_host(host_mtx_exec);
628 const index_type* row_ptrs{};
629 if (is_mtx_on_host) {
630 row_ptrs = mtx_row_ptrs.get_const_data();
631 } else {
632 row_ptrs_host = mtx_row_ptrs;
633 row_ptrs = row_ptrs_host.get_const_data();
634 }
635 const auto num_rows = mtx_row_ptrs.get_size() - 1;
636 if (row_ptrs[num_rows] > nnz_limit) {
637 load_balance actual_strategy(nwarps_, warp_size_,
638 cuda_strategy_, strategy_name_);
639 if (is_mtx_on_host) {
640 actual_strategy.process(mtx_row_ptrs, mtx_srow);
641 } else {
642 actual_strategy.process(row_ptrs_host, mtx_srow);
643 }
644 this->set_name(actual_strategy.get_name());
645 } else {
646 index_type maxnum = 0;
647 for (size_type i = 0; i < num_rows; i++) {
648 maxnum = std::max(maxnum, row_ptrs[i + 1] - row_ptrs[i]);
649 }
650 if (maxnum > row_len_limit) {
651 load_balance actual_strategy(
652 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
653 if (is_mtx_on_host) {
654 actual_strategy.process(mtx_row_ptrs, mtx_srow);
655 } else {
656 actual_strategy.process(row_ptrs_host, mtx_srow);
657 }
658 this->set_name(actual_strategy.get_name());
659 } else {
660 classical actual_strategy;
661 if (is_mtx_on_host) {
662 actual_strategy.process(mtx_row_ptrs, mtx_srow);
663 max_length_per_row_ =
664 actual_strategy.get_max_length_per_row();
665 } else {
666 actual_strategy.process(row_ptrs_host, mtx_srow);
667 max_length_per_row_ =
668 actual_strategy.get_max_length_per_row();
669 }
670 this->set_name(actual_strategy.get_name());
671 }
672 }
673 }
674
675 int64_t clac_size(const int64_t nnz) override
676 {
677 return std::make_shared<load_balance>(
678 nwarps_, warp_size_, cuda_strategy_, strategy_name_)
679 ->clac_size(nnz);
680 }
681
682 index_type get_max_length_per_row() const noexcept
683 {
684 return max_length_per_row_;
685 }
686
687 std::shared_ptr<strategy_type> copy() override
688 {
689 return std::make_shared<automatical>(
690 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
691 }
692
693 private:
694 int64_t nwarps_;
695 int warp_size_;
696 bool cuda_strategy_;
697 std::string strategy_name_;
698 index_type max_length_per_row_;
699 };
700
701 friend class Csr<previous_precision<ValueType>, IndexType>;
702
703 void convert_to(
704 Csr<next_precision<ValueType>, IndexType>* result) const override;
705
706 void move_to(Csr<next_precision<ValueType>, IndexType>* result) override;
707
708#if GINKGO_ENABLE_HALF || GINKGO_ENABLE_BFLOAT16
709 friend class Csr<previous_precision<ValueType, 2>, IndexType>;
710 using ConvertibleTo<
712 using ConvertibleTo<Csr<next_precision<ValueType, 2>, IndexType>>::move_to;
713
714 void convert_to(
715 Csr<next_precision<ValueType, 2>, IndexType>* result) const override;
716
717 void move_to(Csr<next_precision<ValueType, 2>, IndexType>* result) override;
718#endif
719
720#if GINKGO_ENABLE_HALF && GINKGO_ENABLE_BFLOAT16
721 friend class Csr<previous_precision<ValueType, 3>, IndexType>;
722 using ConvertibleTo<
724 using ConvertibleTo<Csr<next_precision<ValueType, 3>, IndexType>>::move_to;
725
726 void convert_to(
727 Csr<next_precision<ValueType, 3>, IndexType>* result) const override;
728
729 void move_to(Csr<next_precision<ValueType, 3>, IndexType>* result) override;
730#endif
731
732 void convert_to(Dense<ValueType>* other) const override;
733
734 void move_to(Dense<ValueType>* other) override;
735
736 void convert_to(Coo<ValueType, IndexType>* result) const override;
737
738 void move_to(Coo<ValueType, IndexType>* result) override;
739
740 void convert_to(Ell<ValueType, IndexType>* result) const override;
741
742 void move_to(Ell<ValueType, IndexType>* result) override;
743
744 void convert_to(Fbcsr<ValueType, IndexType>* result) const override;
745
746 void move_to(Fbcsr<ValueType, IndexType>* result) override;
747
748 void convert_to(Hybrid<ValueType, IndexType>* result) const override;
749
750 void move_to(Hybrid<ValueType, IndexType>* result) override;
751
752 void convert_to(Sellp<ValueType, IndexType>* result) const override;
753
754 void move_to(Sellp<ValueType, IndexType>* result) override;
755
756 void convert_to(SparsityCsr<ValueType, IndexType>* result) const override;
757
758 void move_to(SparsityCsr<ValueType, IndexType>* result) override;
759
760 void read(const mat_data& data) override;
761
762 void read(const device_mat_data& data) override;
763
764 void read(device_mat_data&& data) override;
765
766 void write(mat_data& data) const override;
767
768 std::unique_ptr<LinOp> transpose() const override;
769
770 std::unique_ptr<LinOp> conj_transpose() const override;
771
777 class multiply_reuse_info {
778 friend class Csr;
779
780 public:
781 explicit multiply_reuse_info();
782
783 ~multiply_reuse_info();
784
785 multiply_reuse_info(const multiply_reuse_info&) = delete;
786
787 multiply_reuse_info(multiply_reuse_info&&) noexcept;
788
789 multiply_reuse_info& operator=(const multiply_reuse_info&) = delete;
790
791 multiply_reuse_info& operator=(multiply_reuse_info&&) noexcept;
792
799 ptr_param<Csr> out) const;
800
801 private:
802 struct lookup_data;
803
804 explicit multiply_reuse_info(std::unique_ptr<lookup_data> data);
805
806 std::unique_ptr<lookup_data> internal;
807 };
808
819 std::unique_ptr<Csr> multiply(ptr_param<const Csr> other) const;
820
837 std::pair<std::unique_ptr<Csr>, multiply_reuse_info> multiply_reuse(
838 ptr_param<const Csr> other) const;
839
845 class multiply_add_reuse_info {
846 friend class Csr;
847
848 public:
849 explicit multiply_add_reuse_info();
850
851 ~multiply_add_reuse_info();
852
853 multiply_add_reuse_info(const multiply_add_reuse_info&) = delete;
854
855 multiply_add_reuse_info(multiply_add_reuse_info&&) noexcept;
856
857 multiply_add_reuse_info& operator=(const multiply_add_reuse_info&) =
858 delete;
859
860 multiply_add_reuse_info& operator=(multiply_add_reuse_info&&) noexcept;
861
870 ptr_param<const Dense<value_type>> scale_mult,
871 ptr_param<const Csr> mtx_mult,
873 ptr_param<const Csr> mtx_add,
874 ptr_param<Csr> out) const;
875
876 private:
877 struct lookup_data;
878
879 explicit multiply_add_reuse_info(std::unique_ptr<lookup_data> data);
880
881 std::unique_ptr<lookup_data> internal;
882 };
883
899 std::unique_ptr<Csr> multiply_add(
900 ptr_param<const Dense<value_type>> scale_mult,
901 ptr_param<const Csr> mtx_mult,
903 ptr_param<const Csr> mtx_add) const;
904
926 std::pair<std::unique_ptr<Csr>, multiply_add_reuse_info> multiply_add_reuse(
927 ptr_param<const Dense<value_type>> scale_mult,
928 ptr_param<const Csr> mtx_mult,
930 ptr_param<const Csr> mtx_add) const;
931
937 class scale_add_reuse_info {
938 friend class Csr;
939
940 public:
941 explicit scale_add_reuse_info();
942
943 ~scale_add_reuse_info();
944
945 scale_add_reuse_info(const scale_add_reuse_info&) = delete;
946
947 scale_add_reuse_info(scale_add_reuse_info&&) noexcept;
948
949 scale_add_reuse_info& operator=(const scale_add_reuse_info&) = delete;
950
951 scale_add_reuse_info& operator=(scale_add_reuse_info&&) noexcept;
952
961 ptr_param<const Dense<value_type>> scale2,
962 ptr_param<const Csr> mtx2, ptr_param<Csr> out) const;
963
964 private:
965 struct lookup_data;
966
967 explicit scale_add_reuse_info(std::unique_ptr<lookup_data> data);
968
969 std::unique_ptr<lookup_data> internal;
970 };
971
986 std::unique_ptr<Csr> scale_add(
987 ptr_param<const Dense<value_type>> scale_this,
988 ptr_param<const Dense<value_type>> scale_other,
989 ptr_param<const Csr> mtx_other) const;
990
1012 std::pair<std::unique_ptr<Csr>, scale_add_reuse_info> add_scale_reuse(
1013 ptr_param<const Dense<value_type>> scale_this,
1014 ptr_param<const Dense<value_type>> scale_other,
1015 ptr_param<const Csr> mtx_other) const;
1016
1024
1027 std::unique_ptr<Permutation<index_type>> value_permutation);
1028
1037 ptr_param<Csr> output) const;
1038
1039 std::unique_ptr<Permutation<IndexType>> value_permutation;
1040 };
1041
1054 std::pair<std::unique_ptr<Csr>, permuting_reuse_info> transpose_reuse()
1055 const;
1056
1071 std::unique_ptr<Csr> permute(
1072 ptr_param<const Permutation<index_type>> permutation,
1074
1088 std::unique_ptr<Csr> permute(
1089 ptr_param<const Permutation<index_type>> row_permutation,
1090 ptr_param<const Permutation<index_type>> column_permutation,
1091 bool invert = false) const;
1092
1113 std::pair<std::unique_ptr<Csr>, permuting_reuse_info> permute_reuse(
1114 ptr_param<const Permutation<index_type>> permutation,
1116
1135 std::pair<std::unique_ptr<Csr>, permuting_reuse_info> permute_reuse(
1136 ptr_param<const Permutation<index_type>> row_permutation,
1137 ptr_param<const Permutation<index_type>> column_permutation,
1138 bool invert = false) const;
1139
1149 std::unique_ptr<Csr> scale_permute(
1152
1165 std::unique_ptr<Csr> scale_permute(
1167 row_permutation,
1169 column_permutation,
1170 bool invert = false) const;
1171
1172 std::unique_ptr<LinOp> permute(
1173 const array<IndexType>* permutation_indices) const override;
1174
1175 std::unique_ptr<LinOp> inverse_permute(
1176 const array<IndexType>* inverse_permutation_indices) const override;
1177
1178 std::unique_ptr<LinOp> row_permute(
1179 const array<IndexType>* permutation_indices) const override;
1180
1181 std::unique_ptr<LinOp> column_permute(
1182 const array<IndexType>* permutation_indices) const override;
1183
1184 std::unique_ptr<LinOp> inverse_row_permute(
1185 const array<IndexType>* inverse_permutation_indices) const override;
1186
1187 std::unique_ptr<LinOp> inverse_column_permute(
1188 const array<IndexType>* inverse_permutation_indices) const override;
1189
1190 std::unique_ptr<Diagonal<ValueType>> extract_diagonal() const override;
1191
1192 std::unique_ptr<absolute_type> compute_absolute() const override;
1193
1195
1200
1201 /*
1202 * Tests if all row entry pairs (value, col_idx) are sorted by column index
1203 *
1204 * @returns True if all row entry pairs (value, col_idx) are sorted by
1205 * column index
1206 */
1207 bool is_sorted_by_column_index() const;
1208
1214 value_type* get_values() noexcept { return values_.get_data(); }
1215
1223 const value_type* get_const_values() const noexcept
1224 {
1225 return values_.get_const_data();
1226 }
1227
1232 std::unique_ptr<Dense<ValueType>> create_value_view();
1233
1238 std::unique_ptr<const Dense<ValueType>> create_const_value_view() const;
1239
1245 index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); }
1246
1254 const index_type* get_const_col_idxs() const noexcept
1255 {
1256 return col_idxs_.get_const_data();
1257 }
1258
1264 index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); }
1265
1273 const index_type* get_const_row_ptrs() const noexcept
1274 {
1275 return row_ptrs_.get_const_data();
1276 }
1277
1283 index_type* get_srow() noexcept { return srow_.get_data(); }
1284
1292 const index_type* get_const_srow() const noexcept
1293 {
1294 return srow_.get_const_data();
1295 }
1296
1303 {
1304 return srow_.get_size();
1305 }
1306
1313 {
1314 return values_.get_size();
1315 }
1316
1321 std::shared_ptr<strategy_type> get_strategy() const noexcept
1322 {
1323 return strategy_;
1324 }
1325
1331 void set_strategy(std::shared_ptr<strategy_type> strategy)
1332 {
1333 strategy_ = std::move(strategy->copy());
1334 this->make_srow();
1335 }
1336
1344 {
1345 auto exec = this->get_executor();
1346 GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1347 this->scale_impl(make_temporary_clone(exec, alpha).get());
1348 }
1349
1357 {
1358 auto exec = this->get_executor();
1359 GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1360 this->inv_scale_impl(make_temporary_clone(exec, alpha).get());
1361 }
1362
1371 static std::unique_ptr<Csr> create(std::shared_ptr<const Executor> exec,
1372 std::shared_ptr<strategy_type> strategy);
1373
1385 static std::unique_ptr<Csr> create(
1386 std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1387 size_type num_nonzeros = {},
1388 std::shared_ptr<strategy_type> strategy = nullptr);
1389
1409 static std::unique_ptr<Csr> create(
1410 std::shared_ptr<const Executor> exec, const dim<2>& size,
1411 array<value_type> values, array<index_type> col_idxs,
1412 array<index_type> row_ptrs,
1413 std::shared_ptr<strategy_type> strategy = nullptr);
1414
1419 template <typename InputValueType, typename InputColumnIndexType,
1420 typename InputRowPtrType>
1421 GKO_DEPRECATED(
1422 "explicitly construct the gko::array argument instead of passing "
1423 "initializer lists")
1424 static std::unique_ptr<Csr> create(
1425 std::shared_ptr<const Executor> exec, const dim<2>& size,
1426 std::initializer_list<InputValueType> values,
1427 std::initializer_list<InputColumnIndexType> col_idxs,
1428 std::initializer_list<InputRowPtrType> row_ptrs)
1429 {
1430 return create(exec, size, array<value_type>{exec, std::move(values)},
1431 array<index_type>{exec, std::move(col_idxs)},
1432 array<index_type>{exec, std::move(row_ptrs)});
1433 }
1434
1450 static std::unique_ptr<const Csr> create_const(
1451 std::shared_ptr<const Executor> exec, const dim<2>& size,
1452 gko::detail::const_array_view<ValueType>&& values,
1453 gko::detail::const_array_view<IndexType>&& col_idxs,
1454 gko::detail::const_array_view<IndexType>&& row_ptrs,
1455 std::shared_ptr<strategy_type> strategy = nullptr);
1456
1469 std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1470 const index_set<IndexType>& row_index_set,
1471 const index_set<IndexType>& column_index_set) const;
1472
1484 std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1485 const span& row_span, const span& column_span) const;
1486
1491
1498
1502 Csr(const Csr&);
1503
1510
1511protected:
1512 Csr(std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1513 size_type num_nonzeros = {},
1514 std::shared_ptr<strategy_type> strategy = nullptr);
1515
1516 Csr(std::shared_ptr<const Executor> exec, const dim<2>& size,
1517 array<value_type> values, array<index_type> col_idxs,
1518 array<index_type> row_ptrs,
1519 std::shared_ptr<strategy_type> strategy = nullptr);
1520
1521 void apply_impl(const LinOp* b, LinOp* x) const override;
1522
1523 void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta,
1524 LinOp* x) const override;
1525
1526 // TODO: This provides some more sane settings. Please fix this!
1527 static std::shared_ptr<strategy_type> make_default_strategy(
1528 std::shared_ptr<const Executor> exec)
1529 {
1530 auto cuda_exec = std::dynamic_pointer_cast<const CudaExecutor>(exec);
1531 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(exec);
1532 auto dpcpp_exec = std::dynamic_pointer_cast<const DpcppExecutor>(exec);
1533 std::shared_ptr<strategy_type> new_strategy;
1534 if (cuda_exec) {
1535 new_strategy = std::make_shared<automatical>(cuda_exec);
1536 } else if (hip_exec) {
1537 new_strategy = std::make_shared<automatical>(hip_exec);
1538 } else if (dpcpp_exec) {
1539 new_strategy = std::make_shared<automatical>(dpcpp_exec);
1540 } else {
1541 new_strategy = std::make_shared<classical>();
1542 }
1543 return new_strategy;
1544 }
1545
1546 // TODO clean this up as soon as we improve strategy_type
1547 template <typename CsrType>
1548 void convert_strategy_helper(CsrType* result) const
1549 {
1550 auto strat = this->get_strategy().get();
1551 std::shared_ptr<typename CsrType::strategy_type> new_strat;
1552 if (dynamic_cast<classical*>(strat)) {
1553 new_strat = std::make_shared<typename CsrType::classical>();
1554 } else if (dynamic_cast<merge_path*>(strat)) {
1555 new_strat = std::make_shared<typename CsrType::merge_path>();
1556 } else if (dynamic_cast<cusparse*>(strat)) {
1557 new_strat = std::make_shared<typename CsrType::cusparse>();
1558 } else if (dynamic_cast<sparselib*>(strat)) {
1559 new_strat = std::make_shared<typename CsrType::sparselib>();
1560 } else {
1561 auto rexec = result->get_executor();
1562 auto cuda_exec =
1563 std::dynamic_pointer_cast<const CudaExecutor>(rexec);
1564 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(rexec);
1565 auto dpcpp_exec =
1566 std::dynamic_pointer_cast<const DpcppExecutor>(rexec);
1567 auto lb = dynamic_cast<load_balance*>(strat);
1568 if (cuda_exec) {
1569 if (lb) {
1570 new_strat =
1571 std::make_shared<typename CsrType::load_balance>(
1572 cuda_exec);
1573 } else {
1574 new_strat = std::make_shared<typename CsrType::automatical>(
1575 cuda_exec);
1576 }
1577 } else if (hip_exec) {
1578 if (lb) {
1579 new_strat =
1580 std::make_shared<typename CsrType::load_balance>(
1581 hip_exec);
1582 } else {
1583 new_strat = std::make_shared<typename CsrType::automatical>(
1584 hip_exec);
1585 }
1586 } else if (dpcpp_exec) {
1587 if (lb) {
1588 new_strat =
1589 std::make_shared<typename CsrType::load_balance>(
1590 dpcpp_exec);
1591 } else {
1592 new_strat = std::make_shared<typename CsrType::automatical>(
1593 dpcpp_exec);
1594 }
1595 } else {
1596 // Try to preserve this executor's configuration
1597 auto this_cuda_exec =
1598 std::dynamic_pointer_cast<const CudaExecutor>(
1599 this->get_executor());
1600 auto this_hip_exec =
1601 std::dynamic_pointer_cast<const HipExecutor>(
1602 this->get_executor());
1603 auto this_dpcpp_exec =
1604 std::dynamic_pointer_cast<const DpcppExecutor>(
1605 this->get_executor());
1606 if (this_cuda_exec) {
1607 if (lb) {
1608 new_strat =
1609 std::make_shared<typename CsrType::load_balance>(
1610 this_cuda_exec);
1611 } else {
1612 new_strat =
1613 std::make_shared<typename CsrType::automatical>(
1614 this_cuda_exec);
1615 }
1616 } else if (this_hip_exec) {
1617 if (lb) {
1618 new_strat =
1619 std::make_shared<typename CsrType::load_balance>(
1620 this_hip_exec);
1621 } else {
1622 new_strat =
1623 std::make_shared<typename CsrType::automatical>(
1624 this_hip_exec);
1625 }
1626 } else if (this_dpcpp_exec) {
1627 if (lb) {
1628 new_strat =
1629 std::make_shared<typename CsrType::load_balance>(
1630 this_dpcpp_exec);
1631 } else {
1632 new_strat =
1633 std::make_shared<typename CsrType::automatical>(
1634 this_dpcpp_exec);
1635 }
1636 } else {
1637 // FIXME: this changes strategies.
1638 // We had a load balance or automatical strategy from a non
1639 // HIP or Cuda executor and are moving to a non HIP or Cuda
1640 // executor.
1641 new_strat = std::make_shared<typename CsrType::classical>();
1642 }
1643 }
1644 }
1645 result->set_strategy(new_strat);
1646 }
1647
1651 void make_srow()
1652 {
1653 srow_.resize_and_reset(strategy_->clac_size(values_.get_size()));
1654 strategy_->process(row_ptrs_, &srow_);
1655 }
1656
1663 virtual void scale_impl(const LinOp* alpha);
1664
1671 virtual void inv_scale_impl(const LinOp* alpha);
1672
1673private:
1674 std::shared_ptr<strategy_type> strategy_;
1675 array<value_type> values_;
1676 array<index_type> col_idxs_;
1677 array<index_type> row_ptrs_;
1678 array<index_type> srow_;
1679
1680 void add_scaled_identity_impl(const LinOp* a, const LinOp* b) override;
1681};
1682
1683
1684namespace detail {
1685
1686
1693template <typename ValueType, typename IndexType>
1694void strategy_rebuild_helper(Csr<ValueType, IndexType>* result)
1695{
1696 using load_balance = typename Csr<ValueType, IndexType>::load_balance;
1697 using automatical = typename Csr<ValueType, IndexType>::automatical;
1698 auto strategy = result->get_strategy();
1699 auto executor = result->get_executor();
1700 if (std::dynamic_pointer_cast<load_balance>(strategy)) {
1701 if (auto exec =
1702 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1703 result->set_strategy(std::make_shared<load_balance>(exec));
1704 } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1705 executor)) {
1706 result->set_strategy(std::make_shared<load_balance>(exec));
1707 }
1708 } else if (std::dynamic_pointer_cast<automatical>(strategy)) {
1709 if (auto exec =
1710 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1711 result->set_strategy(std::make_shared<automatical>(exec));
1712 } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1713 executor)) {
1714 result->set_strategy(std::make_shared<automatical>(exec));
1715 }
1716 }
1717}
1718
1719
1720} // namespace detail
1721} // namespace matrix
1722} // namespace gko
1723
1724
1725#endif // GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
This is the Executor subclass which represents the CUDA device.
Definition executor.hpp:1542
The diagonal of a LinOp implementing this interface can be extracted.
Definition lin_op.hpp:743
The EnableAbsoluteComputation mixin provides the default implementations of compute_absolute_linop an...
Definition lin_op.hpp:794
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition lin_op.hpp:879
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition polymorphic_object.hpp:668
The first step in using the Ginkgo library consists of creating an executor.
Definition executor.hpp:615
Definition lin_op.hpp:117
This is the Executor subclass which represents the OpenMP device (typically CPU).
Definition executor.hpp:1387
Linear operators which support permutation should implement the Permutable interface.
Definition lin_op.hpp:484
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition lin_op.hpp:605
Adds the operation M <- a I + b M for matrix M, identity operator I and scalars a and b,...
Definition lin_op.hpp:818
Linear operators which support transposition should implement the Transposable interface.
Definition lin_op.hpp:433
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition lin_op.hpp:660
An array is a container which encapsulates fixed-sized arrays, stored on the Executor tied to the arr...
Definition array.hpp:166
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the array.
Definition array.hpp:687
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition array.hpp:703
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the array.
Definition array.hpp:696
size_type get_size() const noexcept
Returns the number of elements in the array.
Definition array.hpp:670
This type is a device-side equivalent to matrix_data.
Definition device_matrix_data.hpp:36
An index set class represents an ordered set of intervals.
Definition index_set.hpp:56
COO stores a matrix in the coordinate matrix format.
Definition coo.hpp:65
Definition csr.hpp:49
Definition csr.hpp:522
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:687
automatical(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates an automatical strategy with specified parameters.
Definition csr.hpp:595
automatical()
Creates an automatical strategy.
Definition csr.hpp:550
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:675
automatical(std::shared_ptr< const CudaExecutor > exec)
Creates an automatical strategy with CUDA executor.
Definition csr.hpp:560
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:606
automatical(std::shared_ptr< const DpcppExecutor > exec)
Creates an automatical strategy with Dpcpp executor.
Definition csr.hpp:580
automatical(std::shared_ptr< const HipExecutor > exec)
Creates an automatical strategy with HIP executor.
Definition csr.hpp:569
classical is a strategy_type which uses the same number of threads on each row.
Definition csr.hpp:233
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:240
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:269
classical()
Creates a classical strategy.
Definition csr.hpp:238
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:262
cusparse is a strategy_type which uses the sparselib csr.
Definition csr.hpp:308
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:319
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:321
cusparse()
Creates a cusparse strategy.
Definition csr.hpp:313
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:315
load_balance is a strategy_type which uses the load balance algorithm.
Definition csr.hpp:354
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:417
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:509
load_balance(std::shared_ptr< const HipExecutor > exec)
Creates a load_balance strategy with HIP executor.
Definition csr.hpp:381
load_balance()
Creates a load_balance strategy.
Definition csr.hpp:362
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:470
load_balance(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates a load_balance strategy with specified parameters.
Definition csr.hpp:407
load_balance(std::shared_ptr< const CudaExecutor > exec)
Creates a load_balance strategy with CUDA executor.
Definition csr.hpp:372
load_balance(std::shared_ptr< const DpcppExecutor > exec)
Creates a load_balance strategy with DPCPP executor.
Definition csr.hpp:392
merge_path is a strategy_type which uses the merge_path algorithm.
Definition csr.hpp:283
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:294
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:296
merge_path()
Creates a merge_path strategy.
Definition csr.hpp:288
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:290
void update_values(ptr_param< const Csr > mtx, ptr_param< const Dense< value_type > > scale_mult, ptr_param< const Csr > mtx_mult, ptr_param< const Dense< value_type > > scale_add, ptr_param< const Csr > mtx_add, ptr_param< Csr > out) const
Recomputes the sparse matrix-matrix product out = scale_mult * mtx * mtx_mult + scale_add * mtx_add w...
void update_values(ptr_param< const Csr > mtx1, ptr_param< const Csr > mtx2, ptr_param< Csr > out) const
Recomputes the sparse matrix-matrix product out = mtx1 * mtx2 when only the values of mtx1 and mtx2 c...
void update_values(ptr_param< const Dense< value_type > > scale1, ptr_param< const Csr > mtx1, ptr_param< const Dense< value_type > > scale2, ptr_param< const Csr > mtx2, ptr_param< Csr > out) const
Recomputes the sparse matrix-matrix sum out = scale1 * mtx1 + scale2 * mtx2 when only the values of m...
sparselib is a strategy_type which uses the sparselib csr.
Definition csr.hpp:332
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:343
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:339
sparselib()
Creates a sparselib strategy.
Definition csr.hpp:337
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:345
strategy_type is to decide how to set the csr algorithm.
Definition csr.hpp:176
virtual int64_t clac_size(const int64_t nnz)=0
Computes the srow size according to the number of nonzeros.
std::string get_name()
Returns the name of strategy.
Definition csr.hpp:194
virtual std::shared_ptr< strategy_type > copy()=0
Copy a strategy.
virtual void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow)=0
Computes srow according to row pointers.
strategy_type(std::string name)
Creates a strategy_type.
Definition csr.hpp:185
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition csr.hpp:126
std::pair< std::unique_ptr< Csr >, multiply_add_reuse_info > multiply_add_reuse(ptr_param< const Dense< value_type > > scale_mult, ptr_param< const Csr > mtx_mult, ptr_param< const Dense< value_type > > scale_add, ptr_param< const Csr > mtx_add) const
Computes the sparse matrix product scale_mult * this * mtx_mult + scale_add * mtx_add on the executor...
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
Computes the operations necessary to propagate changed values from a matrix A to a permuted matrix.
std::pair< std::unique_ptr< Csr >, multiply_reuse_info > multiply_reuse(ptr_param< const Csr > other) const
Computes the sparse matrix product this * other on the executor of this matrix, and necessary data fo...
Csr & operator=(const Csr &)
Copy-assigns a Csr matrix.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > permutation, permute_mode=permute_mode::symmetric) const
Creates a scaled and permuted copy of this matrix.
std::unique_ptr< absolute_type > compute_absolute() const override
Gets the AbsoluteLinOp.
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:1273
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const span &row_span, const span &column_span) const
Creates a submatrix from this Csr matrix given row and column spans.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type num_nonzeros={}, std::shared_ptr< strategy_type > strategy=nullptr)
Creates an uninitialized CSR matrix of the specified size.
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition csr.hpp:1292
std::unique_ptr< Csr > multiply_add(ptr_param< const Dense< value_type > > scale_mult, ptr_param< const Csr > mtx_mult, ptr_param< const Dense< value_type > > scale_add, ptr_param< const Csr > mtx_add) const
Computes the sparse matrix product scale_mult * this * mtx_mult + scale_add * mtx_add on the executor...
void set_strategy(std::shared_ptr< strategy_type > strategy)
Set the strategy.
Definition csr.hpp:1331
void inv_scale(ptr_param< const LinOp > alpha)
Scales the matrix with the inverse of a scalar.
Definition csr.hpp:1356
index_type * get_srow() noexcept
Returns the starting rows.
Definition csr.hpp:1283
std::unique_ptr< Csr > scale_add(ptr_param< const Dense< value_type > > scale_this, ptr_param< const Dense< value_type > > scale_other, ptr_param< const Csr > mtx_other) const
Computes the sparse matrix sum scale_this * this + scale_other * mtx_add on the executor of this matr...
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, std::shared_ptr< strategy_type > strategy)
Creates an uninitialized CSR matrix of the specified size.
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps).
Definition csr.hpp:1302
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > permute_reuse(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
Computes the operations necessary to propagate changed values from a matrix A to a permuted matrix.
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const index_set< IndexType > &row_index_set, const index_set< IndexType > &column_index_set) const
Creates a submatrix from this Csr matrix given row and column index_set objects.
std::unique_ptr< Diagonal< ValueType > > extract_diagonal() const override
Extracts the diagonal entries of the matrix into a vector.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size, array< value_type > values, array< index_type > col_idxs, array< index_type > row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a CSR matrix from already allocated (and initialized) row pointer, column index and value arr...
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:1264
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
Creates a permuted copy of this matrix with the given permutation .
std::unique_ptr< const Dense< ValueType > > create_const_value_view() const
Creates a const Dense view of the value array of this matrix as a column vector of dimensions nnz x 1...
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const dim< 2 > &size, gko::detail::const_array_view< ValueType > &&values, gko::detail::const_array_view< IndexType > &&col_idxs, gko::detail::const_array_view< IndexType > &&row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a constant (immutable) Csr matrix from a set of constant arrays.
Csr(const Csr &)
Copy-constructs a Csr matrix.
Csr & operator=(Csr &&)
Move-assigns a Csr matrix.
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
std::unique_ptr< Csr > multiply(ptr_param< const Csr > other) const
Computes the sparse matrix product this * other on the executor of this matrix.
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition csr.hpp:1223
void compute_absolute_inplace() override
Compute absolute inplace on each element.
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition csr.hpp:1312
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition csr.hpp:1321
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:1254
void sort_by_column_index()
Sorts all (value, col_idx) pairs in each row by column index.
std::pair< std::unique_ptr< Csr >, permuting_reuse_info > transpose_reuse() const
Computes the necessary data to update a transposed matrix from its original matrix.
std::pair< std::unique_ptr< Csr >, scale_add_reuse_info > add_scale_reuse(ptr_param< const Dense< value_type > > scale_this, ptr_param< const Dense< value_type > > scale_other, ptr_param< const Csr > mtx_other) const
Computes the sparse matrix sum scale_this * this + scale_other * mtx_add on the executor of this matr...
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > row_permutation, ptr_param< const ScaledPermutation< value_type, index_type > > column_permutation, bool invert=false) const
Creates a scaled and permuted copy of this matrix.
std::unique_ptr< Dense< ValueType > > create_value_view()
Creates a Dense view of the value array of this matrix as a column vector of dimensions nnz x 1.
void scale(ptr_param< const LinOp > alpha)
Scales the matrix with a scalar.
Definition csr.hpp:1343
value_type * get_values() noexcept
Returns the values of the matrix.
Definition csr.hpp:1214
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:1245
Csr(Csr &&)
Move-constructs a Csr matrix.
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
Creates a non-symmetrically permuted copy of this matrix with the given row and column permutations...
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
Dense is a matrix format which explicitly stores all values of the matrix.
Definition dense.hpp:120
This class is a utility which efficiently implements the diagonal matrix (a linear operator which sca...
Definition diagonal.hpp:56
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition ell.hpp:66
Fixed-block compressed sparse row storage matrix format.
Definition fbcsr.hpp:116
HYBRID is a matrix format which splits the matrix into ELLPACK and COO format.
Definition hybrid.hpp:57
Permutation is a matrix format that represents a permutation matrix, i.e.
Definition permutation.hpp:112
ScaledPermutation is a matrix combining a permutation with scaling factors.
Definition scaled_permutation.hpp:38
SELL-P is a matrix format similar to ELL format.
Definition sellp.hpp:58
SparsityCsr is a matrix format which stores only the sparsity pattern of a sparse matrix by compressi...
Definition sparsity_csr.hpp:56
This class is used for function parameters in the place of raw pointers.
Definition utils_helper.hpp:41
The matrix namespace.
Definition dense_cache.hpp:24
permute_mode
Specifies how a permutation will be applied to a matrix.
Definition permutation.hpp:42
@ symmetric
The rows and columns will be permuted.
Definition permutation.hpp:53
The Ginkgo namespace.
Definition abstract_factory.hpp:20
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition math.hpp:264
typename detail::to_complex_s< T >::type to_complex
Obtain the type which adds the complex of complex/scalar type or the template parameter of class by a...
Definition math.hpp:283
virtual void move_to(result_type *result)=0
Converts the implementer to an object of type result_type by moving data from this object.
virtual void convert_to(result_type *result) const =0
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition math.hpp:614
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:90
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition math.hpp:750
std::unique_ptr< MatrixType > read(StreamType &&is, MatrixArgs &&... args)
Reads a matrix stored in matrix market format from an input stream.
Definition mtx_io.hpp:160
typename detail::find_precision_impl< T, -step >::type previous_precision
Obtains the previous move type of T in the singly-linked precision corresponding bfloat16/half.
Definition math.hpp:473
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition temporary_clone.hpp:208
typename detail::find_precision_impl< T, step >::type next_precision
Obtains the next move type of T in the singly-linked precision corresponding bfloat16/half.
Definition math.hpp:466
void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType > > >::default_layout)
Writes a matrix into an output stream in matrix market format.
Definition mtx_io.hpp:299
STL namespace.
A type representing the dimensions of a multidimensional object.
Definition dim.hpp:26
permuting_reuse_info()
Creates an empty reuse info.
void update_values(ptr_param< const Csr > input, ptr_param< Csr > output) const
Propagates the values from an input matrix to the transformed matrix.
permuting_reuse_info(std::unique_ptr< Permutation< index_type > > value_permutation)
Creates a reuse info structure from its value permutation.
This structure is used as an intermediate data type to store a sparse matrix.
Definition matrix_data.hpp:126
A span is a lightweight structure used to create sub-ranges from other ranges.
Definition range.hpp:46