LCI v2.0.0-dev
For Asynchronous Multithreaded Communication
Loading...
Searching...
No Matches
lci.hpp
Go to the documentation of this file.
1// Copyright (c) 2025 The LCI Project Authors
2// SPDX-License-Identifier: NCSA
3
4#ifndef LCI_API_LCI_HPP
5#define LCI_API_LCI_HPP
6
7#include <memory>
8#include <stdexcept>
9#include <vector>
10#include <string>
11#include <cstring>
12#include <functional>
13
14#include "lci_config.hpp"
15
20
25
30
35
40
45
51
56
61namespace lci
62{
63extern const char* DEFAULT_NAME;
64
71
78
79// mimic std::optional as we don't want to force c++17 for now
80template <typename T>
81struct option_t {
82 option_t() : m_value(), m_is_set(false) {}
83 option_t(T value_) : m_value(value_), m_is_set(true) {}
84 option_t(T value_, bool is_set_)
85 : m_value(value_), m_is_set(is_set_) {} // set default value
86 T get_value_or(T default_value) const
87 {
88 return m_is_set ? m_value : default_value;
89 }
90 bool get_set_value(T* value) const
91 {
92 if (m_is_set) {
93 *value = this->m_value;
94 return true;
95 }
96 return false;
97 }
98 T get_value() const { return m_value; }
99 bool is_set() const { return m_is_set; }
100 operator T() const { return m_value; }
103};
104} // namespace lci
105
106#include "lci_binding_pre.hpp"
107
108namespace lci
109{
148
154const char* get_errorcode_str(errorcode_t errorcode);
155
162struct error_t {
169 error_t(errorcode_t errorcode_) : errorcode(errorcode_) {}
178 bool is_done() const
179 {
182 }
183
187 bool is_posted() const
188 {
191 }
192
196 bool is_retry() const
197 {
200 }
201
205 const char* get_str() const { return lci::get_errorcode_str(errorcode); }
206};
207
219
226
237
244
255
262
273
280
286using net_imm_data_t = uint32_t;
287
301
306const mr_t MR_HOST = mr_t();
307
312const mr_t MR_DEVICE = mr_t(reinterpret_cast<void*>(0x1));
313
319const mr_t MR_UNKNOWN = mr_t(reinterpret_cast<void*>(0x2));
320
321inline bool mr_t::is_empty() const
322{
323 return reinterpret_cast<uintptr_t>(p_impl) < 3;
324}
325
332struct rmr_t {
333 uintptr_t base;
334 uint64_t opaque_rkey;
335 rmr_t() : base(0), opaque_rkey(0) {}
336 bool is_empty() const { return base == 0 && opaque_rkey == 0; }
337};
338
344
349using tag_t = uint64_t;
350
355enum class direction_t {
358};
359
366using rcomp_t = uint32_t;
367
372const int ANY_SOURCE = -1;
373
378const tag_t ANY_TAG = static_cast<tag_t>(-1);
379
385enum class matching_entry_type_t : unsigned {
386 send = 0,
387 recv = 1,
388};
389
394enum class matching_policy_t : unsigned {
395 none = 0,
399 max = 4,
400};
401
405using matching_entry_key_t = uint64_t;
411
424
430 virtual void* allocate(size_t size) = 0;
431 virtual void deallocate(void* ptr) = 0;
432 virtual ~allocator_base_t() = default;
433};
434
436 void* allocate(size_t size) { return malloc(size); }
437 void deallocate(void* ptr) { free(ptr); }
438};
439extern allocator_default_t g_allocator_default;
440
445struct status_t {
447 int rank = -1;
448 void* buffer = nullptr;
449 size_t size = 0;
451 void* user_context = nullptr;
452 status_t() = default;
453 status_t(errorcode_t error_) : error(error_) {}
454 explicit status_t(void* user_context_)
455 : error(errorcode_t::done), user_context(user_context_)
456 {
457 }
461 bool is_done() const { return error.is_done(); }
462 bool is_posted() const { return error.is_posted(); }
463 bool is_retry() const { return error.is_retry(); }
464 error_t get_error() const { return error; }
465 int get_rank() const { return rank; }
466 void* get_buffer() const { return buffer; }
467 size_t get_size() const { return size; }
468 tag_t get_tag() const { return tag; }
469 void* get_user_context() const { return user_context; }
470};
471
477const comp_t COMP_NULL = comp_t(reinterpret_cast<comp_impl_t*>(0x0));
478
484 comp_t(reinterpret_cast<comp_impl_t*>(0x0));
485
490const comp_t COMP_NULL_RETRY = comp_t(reinterpret_cast<comp_impl_t*>(0x1));
491
497 comp_t(reinterpret_cast<comp_impl_t*>(0x1));
498
499inline bool comp_t::is_empty() const
500{
501 return reinterpret_cast<uintptr_t>(p_impl) <= 1;
502}
503
511{
512 public:
515 {
516 attr.comp_type = attr_comp_type_t::custom;
517 attr.zero_copy_am = false;
518 attr.name = DEFAULT_NAME;
519 attr.user_context = nullptr;
520 }
521 comp_impl_t(const attr_t& attr_) : attr(attr_) {}
522 virtual ~comp_impl_t() = default;
523 virtual void signal(status_t) = 0;
525};
526
531using comp_handler_t = void (*)(status_t status);
532
538using reduce_op_t = void (*)(const void* left, const void* right, void* dst,
539 size_t n);
544using graph_node_t = void*;
545
550const graph_node_t GRAPH_START = reinterpret_cast<graph_node_t>(0x1);
551const graph_node_t GRAPH_END = reinterpret_cast<graph_node_t>(0x2);
552
558using graph_node_run_cb_t = status_t (*)(void* value);
559
567
573using graph_node_free_cb_t = void (*)(void* value);
574
579using graph_edge_run_cb_t = void (*)(status_t status, void* src_value,
580 void* dst_value);
581
582} // namespace lci
583
584#include "lci_binding_post.hpp"
585
586namespace lci
587{
630
631/***********************************************************************
632 * Overloading graph_add_node for functor
633 **********************************************************************/
634#if __cplusplus >= 201703L
635template <typename T>
636status_t graph_execute_op_fn(void* value)
637{
638 auto op = static_cast<T*>(value);
639 using result_t = std::invoke_result_t<T>;
640
641 if constexpr (std::is_same_v<result_t, status_t>) {
642 status_t result = (*op)();
643 return result;
644 } else if constexpr (std::is_same_v<result_t, errorcode_t>) {
645 errorcode_t result = (*op)();
646 return result;
647 } else {
648 (*op)();
649 return errorcode_t::done;
650 }
651}
652#else
653// Specialization for status_t return type
654template <typename T>
655typename std::enable_if<
656 std::is_same<typename std::result_of<T()>::type, status_t>::value,
657 status_t>::type
659{
660 auto op = static_cast<T*>(value);
661 status_t result = (*op)();
662 return result;
663}
664
665// Specialization for errorcode_t return type
666template <typename T>
667typename std::enable_if<
668 std::is_same<typename std::result_of<T()>::type, errorcode_t>::value,
669 status_t>::type
671{
672 auto op = static_cast<T*>(value);
673 errorcode_t result = (*op)();
674 return result;
675}
676
677// Specialization for all other return types
678template <typename T>
679typename std::enable_if<
680 !std::is_same<typename std::result_of<T()>::type, status_t>::value &&
681 !std::is_same<typename std::result_of<T()>::type, errorcode_t>::value,
682 status_t>::type
684{
685 auto op = static_cast<T*>(value);
686 (*op)();
687 return errorcode_t::done;
688}
689#endif
690
691template <typename T>
692void graph_free_op_fn(void* value)
693{
694 auto op = static_cast<T*>(value);
695 delete op;
696}
697
706template <typename T>
708{
710 T* fn = new T(op);
712 auto ret = graph_add_node_x(graph, wrapper)
713 .value(reinterpret_cast<void*>(fn))
714 .free_cb(free_cb)();
715 fn->user_context(ret);
716 return ret;
717}
718
719} // namespace lci
720
721#endif // LCI_API_LCI_HPP
Completion object implementation base type.
Definition lci.hpp:511
comp_impl_t(const attr_t &attr_)
Definition lci.hpp:521
virtual ~comp_impl_t()=default
comp_impl_t()
Definition lci.hpp:514
comp_attr_t attr
Definition lci.hpp:524
comp_attr_t attr_t
Definition lci.hpp:513
virtual void signal(status_t)=0
The actual implementation for RESOURCE comp.
Definition lci_binding_pre.hpp:490
bool is_empty() const
Definition lci.hpp:499
comp_impl_t * p_impl
Definition lci_binding_pre.hpp:502
The actual implementation for graph_add_node.
Definition lci_binding_post.hpp:2409
graph_add_node_x && free_cb(graph_node_free_cb_t free_cb_in)
Definition lci_binding_post.hpp:2425
graph_add_node_x && value(void *value_in)
Definition lci_binding_post.hpp:2424
The actual implementation for RESOURCE mr.
Definition lci_binding_pre.hpp:417
mr_impl_t * p_impl
Definition lci_binding_pre.hpp:424
bool is_empty() const
Definition lci.hpp:321
graph_node_t graph_add_node_op(comp_t graph, const T &op)
Add a functor as a node to the completion graph.
Definition lci.hpp:707
broadcast_algorithm_t
The type of broadcast algorithm.
Definition lci.hpp:231
errorcode_t
The actual error code for LCI API functions.
Definition lci.hpp:121
void(*)(status_t status, void *src_value, void *dst_value) graph_edge_run_cb_t
The function signature for a edge funciton in the completion graph.
Definition lci.hpp:579
void set_g_default_attr(const global_attr_t &attr)
Set the default global attributes for LCI.
const rmr_t RMR_NULL
The NULL value of rkey_t.
Definition lci.hpp:343
uint64_t matching_entry_key_t
The type of matching engine entry key.
Definition lci.hpp:405
const mr_t MR_DEVICE
A special mr_t value for device memory.
Definition lci.hpp:312
const int ANY_SOURCE
Special rank value for any-source receive.
Definition lci.hpp:372
void global_initialize()
Global initialization for LCI, including rank_me, rank_n, and g_default_attr.
status_t(*)(void *value) graph_node_run_cb_t
The function signature for a node function in the completion graph.
Definition lci.hpp:558
net_opcode_t
The Type of network communication operation codes.
Definition lci.hpp:212
const mr_t MR_HOST
A special mr_t value for host memory.
Definition lci.hpp:306
direction_t
The enum class of comunication direction.
Definition lci.hpp:355
void(*)(void *value) graph_node_free_cb_t
The function signature for a callback that will be triggered when the node was freed.
Definition lci.hpp:573
void(*)(const void *left, const void *right, void *dst, size_t n) reduce_op_t
The user-defined reduction operation.
Definition lci.hpp:538
int get_rank_n()
Get the number of ranks in the current application/job.
matching_policy_t
Enum class for matching policy.
Definition lci.hpp:394
void * graph_node_t
The node type for the completion graph.
Definition lci.hpp:544
const tag_t ANY_TAG
Special tag value for any-tag receive.
Definition lci.hpp:378
const graph_node_run_cb_t GRAPH_NODE_DUMMY_CB
A dummy callback function for a graph node.
Definition lci.hpp:566
allreduce_algorithm_t
The type of allreduce algorithm.
Definition lci.hpp:267
int get_rank_me()
Get the rank of the current process.
reduce_scatter_algorithm_t
The type of reduce scatter algorithm.
Definition lci.hpp:249
const comp_t COMP_NULL_EXPECT_DONE
Deprecated. Same as COMP_NULL.
Definition lci.hpp:483
const mr_t MR_UNKNOWN
A special mr_t value for unknown memory. LCI will detect the memory type automatically.
Definition lci.hpp:319
void * matching_entry_val_t
The type of matching engine entry value.
Definition lci.hpp:410
void global_finalize()
Global finalization for LCI.
matching_entry_type_t
The type of matching entry.
Definition lci.hpp:385
uint32_t rcomp_t
The type of remote completion handler.
Definition lci.hpp:366
void(*)(status_t status) comp_handler_t
Function Signature for completion handler.
Definition lci.hpp:531
const graph_node_t GRAPH_START
The start node of the completion graph.
Definition lci.hpp:550
const comp_t COMP_NULL
Special completion object setting allow_posted and allow_retry to false.
Definition lci.hpp:477
const comp_t COMP_NULL_EXPECT_DONE_OR_RETRY
Deprecated. Same as COMP_NULL_RETRY.
Definition lci.hpp:496
uint64_t tag_t
The type of tag.
Definition lci.hpp:349
global_attr_t get_g_default_attr()
Get the default global attributes for LCI. The default global attributes contain the default attribut...
comp_semantic_t
The enum class of completion semantic.
Definition lci.hpp:420
uint32_t net_imm_data_t
The type of network-layer immediate data field.
Definition lci.hpp:286
const comp_t COMP_NULL_RETRY
Special completion object setting allow_posted to false.
Definition lci.hpp:490
@ ring
Definition lci.hpp:235
@ direct
Definition lci.hpp:233
@ tree
Definition lci.hpp:234
@ retry
Definition lci.hpp:134
@ retry_nopacket
Definition lci.hpp:138
@ done_max
Definition lci.hpp:126
@ posted_backlog
Definition lci.hpp:130
@ done
Definition lci.hpp:123
@ retry_min
Definition lci.hpp:133
@ done_min
Definition lci.hpp:122
@ retry_lock
Definition lci.hpp:137
@ posted_min
Definition lci.hpp:127
@ retry_max
Definition lci.hpp:144
@ posted
Definition lci.hpp:128
@ retry_backlog
Definition lci.hpp:142
@ retry_init
Definition lci.hpp:136
@ done_backlog
Definition lci.hpp:124
@ retry_nomem
Definition lci.hpp:140
@ fatal
Definition lci.hpp:145
@ posted_max
Definition lci.hpp:132
@ READ
Definition lci.hpp:217
@ SEND
Definition lci.hpp:213
@ REMOTE_WRITE
Definition lci.hpp:216
@ RECV
Definition lci.hpp:214
@ WRITE
Definition lci.hpp:215
@ IN
Definition lci.hpp:357
@ OUT
Definition lci.hpp:356
@ max
Definition lci.hpp:399
@ rank_tag
Definition lci.hpp:398
@ tag_only
Definition lci.hpp:397
@ rank_only
Definition lci.hpp:396
@ send
Definition lci.hpp:386
@ recv
Definition lci.hpp:387
@ network
Definition lci.hpp:422
@ memory
Definition lci.hpp:421
All LCI API functions and classes are defined in this namespace.
const char * get_allreduce_algorithm_str(broadcast_algorithm_t algorithm)
Get the string representation of a collective algorithm.
attr_backend_t
Definition lci.hpp:65
@ none
Definition lci.hpp:66
@ ibv
Definition lci.hpp:67
@ ofi
Definition lci.hpp:68
@ ucx
Definition lci.hpp:69
const char * DEFAULT_NAME
const graph_node_t GRAPH_END
Definition lci.hpp:551
const char * get_reduce_scatter_algorithm_str(broadcast_algorithm_t algorithm)
Get the string representation of a collective algorithm.
allocator_default_t g_allocator_default
@ custom
Definition lci_binding_pre.hpp:58
@ graph
Definition lci_binding_pre.hpp:57
const char * get_errorcode_str(errorcode_t errorcode)
Get the string representation of an error code.
void graph_free_op_fn(void *value)
Definition lci.hpp:692
attr_net_lock_mode_t
Definition lci.hpp:72
@ LCI_NET_TRYLOCK_POLL
Definition lci.hpp:75
@ LCI_NET_TRYLOCK_SEND
Definition lci.hpp:73
@ LCI_NET_TRYLOCK_RECV
Definition lci.hpp:74
@ LCI_NET_TRYLOCK_MAX
Definition lci.hpp:76
const char * get_net_opcode_str(net_opcode_t opcode)
Get the string representation of a network operation code.
std::enable_if< std::is_same< typenamestd::result_of< T()>::type, status_t >::value, status_t >::type graph_execute_op_fn(void *value)
Definition lci.hpp:658
const char * get_broadcast_algorithm_str(broadcast_algorithm_t algorithm)
Get the string representation of a collective algorithm.
The user-defined allocator type.
Definition lci.hpp:429
virtual ~allocator_base_t()=default
virtual void deallocate(void *ptr)=0
virtual void * allocate(size_t size)=0
Definition lci.hpp:435
void * allocate(size_t size)
Definition lci.hpp:436
void deallocate(void *ptr)
Definition lci.hpp:437
Definition lci_binding_pre.hpp:133
Wrapper class for error code.
Definition lci.hpp:162
bool is_done() const
Check if the error code is in the done category.
Definition lci.hpp:178
error_t()
Definition lci.hpp:164
bool is_posted() const
Check if the error code is in the posted category.
Definition lci.hpp:187
const char * get_str() const
Get the string representation of the error code.
Definition lci.hpp:205
errorcode_t errorcode
Definition lci.hpp:163
void reset_retry()
Reset the error code to retry.
Definition lci.hpp:173
error_t(errorcode_t errorcode_)
Construct an error_t object with a specific error code.
Definition lci.hpp:169
bool is_retry() const
Check if the error code is in the retry category.
Definition lci.hpp:196
Definition lci_binding_pre.hpp:143
The struct for network status.
Definition lci.hpp:294
net_imm_data_t imm_data
Definition lci.hpp:299
int rank
Definition lci.hpp:296
void * user_context
Definition lci.hpp:297
net_opcode_t opcode
Definition lci.hpp:295
size_t length
Definition lci.hpp:298
bool m_is_set
Definition lci.hpp:102
option_t()
Definition lci.hpp:82
T get_value() const
Definition lci.hpp:98
T get_value_or(T default_value) const
Definition lci.hpp:86
bool get_set_value(T *value) const
Definition lci.hpp:90
T m_value
Definition lci.hpp:101
option_t(T value_, bool is_set_)
Definition lci.hpp:84
bool is_set() const
Definition lci.hpp:99
option_t(T value_)
Definition lci.hpp:83
The type of remote memory region.
Definition lci.hpp:332
uintptr_t base
Definition lci.hpp:333
bool is_empty() const
Definition lci.hpp:336
uint64_t opaque_rkey
Definition lci.hpp:334
rmr_t()
Definition lci.hpp:335
The type of the completion desciptor for a posted communication.
Definition lci.hpp:445
tag_t get_tag() const
Definition lci.hpp:468
status_t(errorcode_t error_)
Definition lci.hpp:453
status_t(void *user_context_)
Definition lci.hpp:454
error_t get_error() const
Definition lci.hpp:464
void set_retry()
Definition lci.hpp:460
void * get_user_context() const
Definition lci.hpp:469
error_t error
Definition lci.hpp:446
void set_done()
Definition lci.hpp:458
tag_t tag
Definition lci.hpp:450
size_t size
Definition lci.hpp:449
void set_posted()
Definition lci.hpp:459
bool is_done() const
Definition lci.hpp:461
void * buffer
Definition lci.hpp:448
void * user_context
Definition lci.hpp:451
bool is_posted() const
Definition lci.hpp:462
status_t()=default
bool is_retry() const
Definition lci.hpp:463
int get_rank() const
Definition lci.hpp:465
void * get_buffer() const
Definition lci.hpp:466
int rank
Definition lci.hpp:447
size_t get_size() const
Definition lci.hpp:467