LCI v2.0.0-dev
For Asynchronous Multithreaded Communication
Loading...
Searching...
No Matches
lci.hpp
Go to the documentation of this file.
1// Copyright (c) 2025 The LCI Project Authors
2// SPDX-License-Identifier: NCSA
3
4#ifndef LCI_API_LCI_HPP
5#define LCI_API_LCI_HPP
6
7#include <memory>
8#include <stdexcept>
9#include <vector>
10#include <string>
11#include <cstring>
12#include <cstdint>
13#include <functional>
14
15#include "lci_config.hpp"
16
21
26
31
36
41
46
52
57
62namespace lci
63{
64extern const char* DEFAULT_NAME;
65
72
79
80// mimic std::optional as we don't want to force c++17 for now
81template <typename T>
82struct option_t {
83 option_t() : m_value(), m_is_set(false) {}
84 option_t(T value_) : m_value(value_), m_is_set(true) {}
85 option_t(T value_, bool is_set_)
86 : m_value(value_), m_is_set(is_set_) {} // set default value
87 T get_value_or(T default_value) const
88 {
89 return m_is_set ? m_value : default_value;
90 }
91 bool get_set_value(T* value) const
92 {
93 if (m_is_set) {
94 *value = this->m_value;
95 return true;
96 }
97 return false;
98 }
99 T get_value() const { return m_value; }
100 bool is_set() const { return m_is_set; }
101 operator T() const { return m_value; }
104};
105} // namespace lci
106
107#include "lci_binding_pre.hpp"
108
109namespace lci
110{
149
155const char* get_errorcode_str(errorcode_t errorcode);
156
163struct error_t {
170 error_t(errorcode_t errorcode_) : errorcode(errorcode_) {}
179 bool is_done() const
180 {
183 }
184
188 bool is_posted() const
189 {
192 }
193
197 bool is_retry() const
198 {
201 }
202
206 const char* get_str() const { return lci::get_errorcode_str(errorcode); }
207};
208
220
227
238
245
256
263
274
281
287using net_imm_data_t = uint32_t;
288
302
307const mr_t MR_HOST = mr_t();
308
313const mr_t MR_DEVICE = mr_t(reinterpret_cast<void*>(0x1));
314
320const mr_t MR_UNKNOWN = mr_t(reinterpret_cast<void*>(0x2));
321
322inline bool mr_t::is_empty() const
323{
324 return reinterpret_cast<uintptr_t>(p_impl) < 3;
325}
326
333struct rmr_t {
334 uintptr_t base;
335 uint64_t opaque_rkey;
336 rmr_t() : base(0), opaque_rkey(0) {}
337 bool is_empty() const { return base == 0 && opaque_rkey == 0; }
338};
339
345
350using tag_t = uint64_t;
351
356enum class direction_t {
359};
360
367using rcomp_t = uint32_t;
368
373const int ANY_SOURCE = -1;
374
379const tag_t ANY_TAG = static_cast<tag_t>(-1);
380
386enum class matching_entry_type_t : unsigned {
387 send = 0,
388 recv = 1,
389};
390
395enum class matching_policy_t : unsigned {
396 none = 0,
400 max = 4,
401};
402
406using matching_entry_key_t = uint64_t;
412
425
431 virtual void* allocate(size_t size) = 0;
432 virtual void deallocate(void* ptr) = 0;
433 virtual ~allocator_base_t() = default;
434};
435
437 void* allocate(size_t size) { return malloc(size); }
438 void deallocate(void* ptr) { free(ptr); }
439};
440extern allocator_default_t g_allocator_default;
441
446struct status_t {
448 int rank = -1;
449 void* buffer = nullptr;
450 size_t size = 0;
452 void* user_context = nullptr;
453 status_t() = default;
454 status_t(errorcode_t error_) : error(error_) {}
455 explicit status_t(void* user_context_)
456 : error(errorcode_t::done), user_context(user_context_)
457 {
458 }
462 bool is_done() const { return error.is_done(); }
463 bool is_posted() const { return error.is_posted(); }
464 bool is_retry() const { return error.is_retry(); }
465 error_t get_error() const { return error; }
466 int get_rank() const { return rank; }
467 void* get_buffer() const { return buffer; }
468 size_t get_size() const { return size; }
469 tag_t get_tag() const { return tag; }
470 void* get_user_context() const { return user_context; }
471};
472
478const comp_t COMP_NULL = comp_t(reinterpret_cast<comp_impl_t*>(0x0));
479
485 comp_t(reinterpret_cast<comp_impl_t*>(0x0));
486
491const comp_t COMP_NULL_RETRY = comp_t(reinterpret_cast<comp_impl_t*>(0x1));
492
498 comp_t(reinterpret_cast<comp_impl_t*>(0x1));
499
500inline bool comp_t::is_empty() const
501{
502 return reinterpret_cast<uintptr_t>(p_impl) <= 1;
503}
504
512{
513 public:
516 {
517 attr.comp_type = attr_comp_type_t::custom;
518 attr.zero_copy_am = false;
519 attr.name = DEFAULT_NAME;
520 attr.user_context = nullptr;
521 }
522 comp_impl_t(const attr_t& attr_) : attr(attr_) {}
523 virtual ~comp_impl_t() = default;
524 virtual void signal(status_t) = 0;
526};
527
532using comp_handler_t = void (*)(status_t status);
533
539using reduce_op_t = void (*)(const void* left, const void* right, void* dst,
540 size_t n);
545using graph_node_t = void*;
546
551const graph_node_t GRAPH_START = reinterpret_cast<graph_node_t>(0x1);
552const graph_node_t GRAPH_END = reinterpret_cast<graph_node_t>(0x2);
553
559using graph_node_run_cb_t = status_t (*)(void* value);
560
568
574using graph_node_free_cb_t = void (*)(void* value);
575
580using graph_edge_run_cb_t = void (*)(status_t status, void* src_value,
581 void* dst_value);
582
583} // namespace lci
584
585#include "lci_binding_post.hpp"
586
587namespace lci
588{
631
632/***********************************************************************
633 * Overloading graph_add_node for functor
634 **********************************************************************/
635#if __cplusplus >= 201703L
636template <typename T>
637status_t graph_execute_op_fn(void* value)
638{
639 auto op = static_cast<T*>(value);
640 using result_t = std::invoke_result_t<T>;
641
642 if constexpr (std::is_same_v<result_t, status_t>) {
643 status_t result = (*op)();
644 return result;
645 } else if constexpr (std::is_same_v<result_t, errorcode_t>) {
646 errorcode_t result = (*op)();
647 return result;
648 } else {
649 (*op)();
650 return errorcode_t::done;
651 }
652}
653#else
654// Specialization for status_t return type
655template <typename T>
656typename std::enable_if<
657 std::is_same<typename std::result_of<T()>::type, status_t>::value,
658 status_t>::type
660{
661 auto op = static_cast<T*>(value);
662 status_t result = (*op)();
663 return result;
664}
665
666// Specialization for errorcode_t return type
667template <typename T>
668typename std::enable_if<
669 std::is_same<typename std::result_of<T()>::type, errorcode_t>::value,
670 status_t>::type
672{
673 auto op = static_cast<T*>(value);
674 errorcode_t result = (*op)();
675 return result;
676}
677
678// Specialization for all other return types
679template <typename T>
680typename std::enable_if<
681 !std::is_same<typename std::result_of<T()>::type, status_t>::value &&
682 !std::is_same<typename std::result_of<T()>::type, errorcode_t>::value,
683 status_t>::type
685{
686 auto op = static_cast<T*>(value);
687 (*op)();
688 return errorcode_t::done;
689}
690#endif
691
692template <typename T>
693void graph_free_op_fn(void* value)
694{
695 auto op = static_cast<T*>(value);
696 delete op;
697}
698
707template <typename T>
709{
711 T* fn = new T(op);
713 auto ret = graph_add_node_x(graph, wrapper)
714 .value(reinterpret_cast<void*>(fn))
715 .free_cb(free_cb)();
716 fn->user_context(ret);
717 return ret;
718}
719
720} // namespace lci
721
722#endif // LCI_API_LCI_HPP
Completion object implementation base type.
Definition lci.hpp:512
comp_impl_t(const attr_t &attr_)
Definition lci.hpp:522
virtual ~comp_impl_t()=default
comp_impl_t()
Definition lci.hpp:515
comp_attr_t attr
Definition lci.hpp:525
comp_attr_t attr_t
Definition lci.hpp:514
virtual void signal(status_t)=0
The actual implementation for RESOURCE comp.
Definition lci_binding_pre.hpp:512
bool is_empty() const
Definition lci.hpp:500
comp_impl_t * p_impl
Definition lci_binding_pre.hpp:524
The actual implementation for graph_add_node.
Definition lci_binding_post.hpp:2424
graph_add_node_x && free_cb(graph_node_free_cb_t free_cb_in)
Definition lci_binding_post.hpp:2440
graph_add_node_x && value(void *value_in)
Definition lci_binding_post.hpp:2439
The actual implementation for RESOURCE mr.
Definition lci_binding_pre.hpp:439
mr_impl_t * p_impl
Definition lci_binding_pre.hpp:446
bool is_empty() const
Definition lci.hpp:322
graph_node_t graph_add_node_op(comp_t graph, const T &op)
Add a functor as a node to the completion graph.
Definition lci.hpp:708
broadcast_algorithm_t
The type of broadcast algorithm.
Definition lci.hpp:232
errorcode_t
The actual error code for LCI API functions.
Definition lci.hpp:122
void(*)(status_t status, void *src_value, void *dst_value) graph_edge_run_cb_t
The function signature for a edge funciton in the completion graph.
Definition lci.hpp:580
void set_g_default_attr(const global_attr_t &attr)
Set the default global attributes for LCI.
const rmr_t RMR_NULL
The NULL value of rkey_t.
Definition lci.hpp:344
uint64_t matching_entry_key_t
The type of matching engine entry key.
Definition lci.hpp:406
const mr_t MR_DEVICE
A special mr_t value for device memory.
Definition lci.hpp:313
const int ANY_SOURCE
Special rank value for any-source receive.
Definition lci.hpp:373
void global_initialize()
Global initialization for LCI, including rank_me, rank_n, and g_default_attr.
status_t(*)(void *value) graph_node_run_cb_t
The function signature for a node function in the completion graph.
Definition lci.hpp:559
net_opcode_t
The Type of network communication operation codes.
Definition lci.hpp:213
const mr_t MR_HOST
A special mr_t value for host memory.
Definition lci.hpp:307
direction_t
The enum class of comunication direction.
Definition lci.hpp:356
void(*)(void *value) graph_node_free_cb_t
The function signature for a callback that will be triggered when the node was freed.
Definition lci.hpp:574
void(*)(const void *left, const void *right, void *dst, size_t n) reduce_op_t
The user-defined reduction operation.
Definition lci.hpp:539
int get_rank_n()
Get the number of ranks in the current application/job.
matching_policy_t
Enum class for matching policy.
Definition lci.hpp:395
void * graph_node_t
The node type for the completion graph.
Definition lci.hpp:545
const tag_t ANY_TAG
Special tag value for any-tag receive.
Definition lci.hpp:379
const graph_node_run_cb_t GRAPH_NODE_DUMMY_CB
A dummy callback function for a graph node.
Definition lci.hpp:567
allreduce_algorithm_t
The type of allreduce algorithm.
Definition lci.hpp:268
int get_rank_me()
Get the rank of the current process.
reduce_scatter_algorithm_t
The type of reduce scatter algorithm.
Definition lci.hpp:250
const comp_t COMP_NULL_EXPECT_DONE
Deprecated. Same as COMP_NULL.
Definition lci.hpp:484
const mr_t MR_UNKNOWN
A special mr_t value for unknown memory. LCI will detect the memory type automatically.
Definition lci.hpp:320
void * matching_entry_val_t
The type of matching engine entry value.
Definition lci.hpp:411
void global_finalize()
Global finalization for LCI.
matching_entry_type_t
The type of matching entry.
Definition lci.hpp:386
uint32_t rcomp_t
The type of remote completion handler.
Definition lci.hpp:367
void(*)(status_t status) comp_handler_t
Function Signature for completion handler.
Definition lci.hpp:532
const graph_node_t GRAPH_START
The start node of the completion graph.
Definition lci.hpp:551
const comp_t COMP_NULL
Special completion object setting allow_posted and allow_retry to false.
Definition lci.hpp:478
const comp_t COMP_NULL_EXPECT_DONE_OR_RETRY
Deprecated. Same as COMP_NULL_RETRY.
Definition lci.hpp:497
uint64_t tag_t
The type of tag.
Definition lci.hpp:350
global_attr_t get_g_default_attr()
Get the default global attributes for LCI. The default global attributes contain the default attribut...
comp_semantic_t
The enum class of completion semantic.
Definition lci.hpp:421
uint32_t net_imm_data_t
The type of network-layer immediate data field.
Definition lci.hpp:287
const comp_t COMP_NULL_RETRY
Special completion object setting allow_posted to false.
Definition lci.hpp:491
@ ring
Definition lci.hpp:236
@ direct
Definition lci.hpp:234
@ tree
Definition lci.hpp:235
@ retry
Definition lci.hpp:135
@ retry_nopacket
Definition lci.hpp:139
@ done_max
Definition lci.hpp:127
@ posted_backlog
Definition lci.hpp:131
@ done
Definition lci.hpp:124
@ retry_min
Definition lci.hpp:134
@ done_min
Definition lci.hpp:123
@ retry_lock
Definition lci.hpp:138
@ posted_min
Definition lci.hpp:128
@ retry_max
Definition lci.hpp:145
@ posted
Definition lci.hpp:129
@ retry_backlog
Definition lci.hpp:143
@ retry_init
Definition lci.hpp:137
@ done_backlog
Definition lci.hpp:125
@ retry_nomem
Definition lci.hpp:141
@ fatal
Definition lci.hpp:146
@ posted_max
Definition lci.hpp:133
@ READ
Definition lci.hpp:218
@ SEND
Definition lci.hpp:214
@ REMOTE_WRITE
Definition lci.hpp:217
@ RECV
Definition lci.hpp:215
@ WRITE
Definition lci.hpp:216
@ IN
Definition lci.hpp:358
@ OUT
Definition lci.hpp:357
@ max
Definition lci.hpp:400
@ rank_tag
Definition lci.hpp:399
@ tag_only
Definition lci.hpp:398
@ rank_only
Definition lci.hpp:397
@ send
Definition lci.hpp:387
@ recv
Definition lci.hpp:388
@ network
Definition lci.hpp:423
@ memory
Definition lci.hpp:422
All LCI API functions and classes are defined in this namespace.
const char * get_allreduce_algorithm_str(broadcast_algorithm_t algorithm)
Get the string representation of a collective algorithm.
attr_backend_t
Definition lci.hpp:66
@ none
Definition lci.hpp:67
@ ibv
Definition lci.hpp:68
@ ofi
Definition lci.hpp:69
@ ucx
Definition lci.hpp:70
const char * DEFAULT_NAME
const graph_node_t GRAPH_END
Definition lci.hpp:552
const char * get_reduce_scatter_algorithm_str(broadcast_algorithm_t algorithm)
Get the string representation of a collective algorithm.
allocator_default_t g_allocator_default
@ custom
Definition lci_binding_pre.hpp:64
@ graph
Definition lci_binding_pre.hpp:63
const char * get_errorcode_str(errorcode_t errorcode)
Get the string representation of an error code.
void graph_free_op_fn(void *value)
Definition lci.hpp:693
attr_net_lock_mode_t
Definition lci.hpp:73
@ LCI_NET_TRYLOCK_POLL
Definition lci.hpp:76
@ LCI_NET_TRYLOCK_SEND
Definition lci.hpp:74
@ LCI_NET_TRYLOCK_RECV
Definition lci.hpp:75
@ LCI_NET_TRYLOCK_MAX
Definition lci.hpp:77
const char * get_net_opcode_str(net_opcode_t opcode)
Get the string representation of a network operation code.
std::enable_if< std::is_same< typenamestd::result_of< T()>::type, status_t >::value, status_t >::type graph_execute_op_fn(void *value)
Definition lci.hpp:659
const char * get_broadcast_algorithm_str(broadcast_algorithm_t algorithm)
Get the string representation of a collective algorithm.
The user-defined allocator type.
Definition lci.hpp:430
virtual ~allocator_base_t()=default
virtual void deallocate(void *ptr)=0
virtual void * allocate(size_t size)=0
Definition lci.hpp:436
void * allocate(size_t size)
Definition lci.hpp:437
void deallocate(void *ptr)
Definition lci.hpp:438
Definition lci_binding_pre.hpp:143
Wrapper class for error code.
Definition lci.hpp:163
bool is_done() const
Check if the error code is in the done category.
Definition lci.hpp:179
error_t()
Definition lci.hpp:165
bool is_posted() const
Check if the error code is in the posted category.
Definition lci.hpp:188
const char * get_str() const
Get the string representation of the error code.
Definition lci.hpp:206
errorcode_t errorcode
Definition lci.hpp:164
void reset_retry()
Reset the error code to retry.
Definition lci.hpp:174
error_t(errorcode_t errorcode_)
Construct an error_t object with a specific error code.
Definition lci.hpp:170
bool is_retry() const
Check if the error code is in the retry category.
Definition lci.hpp:197
Definition lci_binding_pre.hpp:153
The struct for network status.
Definition lci.hpp:295
net_imm_data_t imm_data
Definition lci.hpp:300
int rank
Definition lci.hpp:297
void * user_context
Definition lci.hpp:298
net_opcode_t opcode
Definition lci.hpp:296
size_t length
Definition lci.hpp:299
bool m_is_set
Definition lci.hpp:103
option_t()
Definition lci.hpp:83
T get_value() const
Definition lci.hpp:99
T get_value_or(T default_value) const
Definition lci.hpp:87
bool get_set_value(T *value) const
Definition lci.hpp:91
T m_value
Definition lci.hpp:102
option_t(T value_, bool is_set_)
Definition lci.hpp:85
bool is_set() const
Definition lci.hpp:100
option_t(T value_)
Definition lci.hpp:84
The type of remote memory region.
Definition lci.hpp:333
uintptr_t base
Definition lci.hpp:334
bool is_empty() const
Definition lci.hpp:337
uint64_t opaque_rkey
Definition lci.hpp:335
rmr_t()
Definition lci.hpp:336
The type of the completion desciptor for a posted communication.
Definition lci.hpp:446
tag_t get_tag() const
Definition lci.hpp:469
status_t(errorcode_t error_)
Definition lci.hpp:454
status_t(void *user_context_)
Definition lci.hpp:455
error_t get_error() const
Definition lci.hpp:465
void set_retry()
Definition lci.hpp:461
void * get_user_context() const
Definition lci.hpp:470
error_t error
Definition lci.hpp:447
void set_done()
Definition lci.hpp:459
tag_t tag
Definition lci.hpp:451
size_t size
Definition lci.hpp:450
void set_posted()
Definition lci.hpp:460
bool is_done() const
Definition lci.hpp:462
void * buffer
Definition lci.hpp:449
void * user_context
Definition lci.hpp:452
bool is_posted() const
Definition lci.hpp:463
status_t()=default
bool is_retry() const
Definition lci.hpp:464
int get_rank() const
Definition lci.hpp:466
void * get_buffer() const
Definition lci.hpp:467
int rank
Definition lci.hpp:448
size_t get_size() const
Definition lci.hpp:468