LCI v2.0.0-dev
For Asynchronous Multithreaded Communication
Loading...
Searching...
No Matches
lci.hpp
Go to the documentation of this file.
1// Copyright (c) 2025 The LCI Project Authors
2// SPDX-License-Identifier: NCSA
3
4#ifndef LCI_API_LCI_HPP
5#define LCI_API_LCI_HPP
6
7#include <memory>
8#include <stdexcept>
9#include <vector>
10#include <string>
11#include <cstring>
12#include <cstdint>
13#include <functional>
14
15#include "lci_config.hpp"
16
21
26
31
36
41
46
52
57
62namespace lci
63{
64extern const char* DEFAULT_NAME;
65
72
82
83// mimic std::optional as we don't want to force c++17 for now
84template <typename T>
85struct option_t {
86 option_t() : m_value(), m_is_set(false) {}
87 option_t(T value_) : m_value(value_), m_is_set(true) {}
88 option_t(T value_, bool is_set_)
89 : m_value(value_), m_is_set(is_set_) {} // set default value
90 T get_value_or(T default_value) const
91 {
92 return m_is_set ? m_value : default_value;
93 }
94 bool get_set_value(T* value) const
95 {
96 if (m_is_set) {
97 *value = this->m_value;
98 return true;
99 }
100 return false;
101 }
102 T get_value() const { return m_value; }
103 bool is_set() const { return m_is_set; }
104 operator T() const { return m_value; }
107};
108} // namespace lci
109
110#include "lci_binding_pre.hpp"
111
112namespace lci
113{
152
158const char* get_errorcode_str(errorcode_t errorcode);
159
166struct error_t {
173 error_t(errorcode_t errorcode_) : errorcode(errorcode_) {}
182 bool is_done() const
183 {
186 }
187
191 bool is_posted() const
192 {
195 }
196
200 bool is_retry() const
201 {
204 }
205
209 const char* get_str() const { return lci::get_errorcode_str(errorcode); }
210};
211
223
230
241
248
259
266
277
284
290using net_imm_data_t = uint32_t;
291
305
310const mr_t MR_HOST = mr_t();
311
316const mr_t MR_DEVICE = mr_t(reinterpret_cast<void*>(0x1));
317
323const mr_t MR_UNKNOWN = mr_t(reinterpret_cast<void*>(0x2));
324
325inline bool mr_t::is_empty() const
326{
327 return reinterpret_cast<uintptr_t>(p_impl) < 3;
328}
329
336struct rmr_t {
337 uintptr_t base;
338 uintptr_t mr_base;
339 uint64_t opaque_rkey;
340 rmr_t() : base(0), mr_base(0), opaque_rkey(0) {}
341 bool is_empty() const
342 {
343 return base == 0 && mr_base == 0 && opaque_rkey == 0;
344 }
345};
346
352
357using tag_t = uint64_t;
358
363enum class direction_t {
366};
367
374using rcomp_t = uint32_t;
375
380const int ANY_SOURCE = -1;
381
386const tag_t ANY_TAG = static_cast<tag_t>(-1);
387
393enum class matching_entry_type_t : unsigned {
394 send = 0,
395 recv = 1,
396};
397
402enum class matching_policy_t : unsigned {
403 none = 0,
407 max = 4,
408};
409
413using matching_entry_key_t = uint64_t;
419
432
438 virtual void* allocate(size_t size) = 0;
439 virtual void deallocate(void* ptr) = 0;
440 virtual ~allocator_base_t() = default;
441};
442
444 void* allocate(size_t size) { return malloc(size); }
445 void deallocate(void* ptr) { free(ptr); }
446};
447extern allocator_default_t g_allocator_default;
448
453struct status_t {
455 int rank = -1;
456 void* buffer = nullptr;
457 size_t size = 0;
459 void* user_context = nullptr;
460 status_t() = default;
461 status_t(errorcode_t error_) : error(error_) {}
462 explicit status_t(void* user_context_)
463 : error(errorcode_t::done), user_context(user_context_)
464 {
465 }
469 bool is_done() const { return error.is_done(); }
470 bool is_posted() const { return error.is_posted(); }
471 bool is_retry() const { return error.is_retry(); }
472 error_t get_error() const { return error; }
473 int get_rank() const { return rank; }
474 void* get_buffer() const { return buffer; }
475 size_t get_size() const { return size; }
476 tag_t get_tag() const { return tag; }
477 void* get_user_context() const { return user_context; }
478};
479
485const comp_t COMP_NULL = comp_t(reinterpret_cast<comp_impl_t*>(0x0));
486
492 comp_t(reinterpret_cast<comp_impl_t*>(0x0));
493
498const comp_t COMP_NULL_RETRY = comp_t(reinterpret_cast<comp_impl_t*>(0x1));
499
505 comp_t(reinterpret_cast<comp_impl_t*>(0x1));
506
507inline bool comp_t::is_empty() const
508{
509 return reinterpret_cast<uintptr_t>(p_impl) <= 1;
510}
511
519{
520 public:
523 {
524 attr.comp_type = attr_comp_type_t::custom;
525 attr.zero_copy_am = false;
526 attr.name = DEFAULT_NAME;
527 attr.user_context = nullptr;
528 }
529 comp_impl_t(const attr_t& attr_) : attr(attr_) {}
530 virtual ~comp_impl_t() = default;
531 virtual void signal(status_t) = 0;
533};
534
539using comp_handler_t = void (*)(status_t status);
540
546using reduce_op_t = void (*)(const void* left, const void* right, void* dst,
547 size_t n);
552using graph_node_t = void*;
553
558const graph_node_t GRAPH_START = reinterpret_cast<graph_node_t>(0x1);
559const graph_node_t GRAPH_END = reinterpret_cast<graph_node_t>(0x2);
560
566using graph_node_run_cb_t = status_t (*)(void* value);
567
575
581using graph_node_free_cb_t = void (*)(void* value);
582
587using graph_edge_run_cb_t = void (*)(status_t status, void* src_value,
588 void* dst_value);
589
590} // namespace lci
591
592#include "lci_binding_post.hpp"
593
594namespace lci
595{
638
639/***********************************************************************
640 * Overloading graph_add_node for functor
641 **********************************************************************/
642#if __cplusplus >= 201703L
643template <typename T>
644status_t graph_execute_op_fn(void* value)
645{
646 auto op = static_cast<T*>(value);
647 using result_t = std::invoke_result_t<T>;
648
649 if constexpr (std::is_same_v<result_t, status_t>) {
650 status_t result = (*op)();
651 return result;
652 } else if constexpr (std::is_same_v<result_t, errorcode_t>) {
653 errorcode_t result = (*op)();
654 return result;
655 } else {
656 (*op)();
657 return errorcode_t::done;
658 }
659}
660#else
661// Specialization for status_t return type
662template <typename T>
663typename std::enable_if<
664 std::is_same<typename std::result_of<T()>::type, status_t>::value,
665 status_t>::type
667{
668 auto op = static_cast<T*>(value);
669 status_t result = (*op)();
670 return result;
671}
672
673// Specialization for errorcode_t return type
674template <typename T>
675typename std::enable_if<
676 std::is_same<typename std::result_of<T()>::type, errorcode_t>::value,
677 status_t>::type
679{
680 auto op = static_cast<T*>(value);
681 errorcode_t result = (*op)();
682 return result;
683}
684
685// Specialization for all other return types
686template <typename T>
687typename std::enable_if<
688 !std::is_same<typename std::result_of<T()>::type, status_t>::value &&
689 !std::is_same<typename std::result_of<T()>::type, errorcode_t>::value,
690 status_t>::type
692{
693 auto op = static_cast<T*>(value);
694 (*op)();
695 return errorcode_t::done;
696}
697#endif
698
699template <typename T>
700void graph_free_op_fn(void* value)
701{
702 auto op = static_cast<T*>(value);
703 delete op;
704}
705
714template <typename T>
716{
718 T* fn = new T(op);
720 auto ret = graph_add_node_x(graph, wrapper)
721 .value(reinterpret_cast<void*>(fn))
722 .free_cb(free_cb)();
723 fn->user_context(ret);
724 return ret;
725}
726
727} // namespace lci
728
729#endif // LCI_API_LCI_HPP
Completion object implementation base type.
Definition lci.hpp:519
comp_impl_t(const attr_t &attr_)
Definition lci.hpp:529
virtual ~comp_impl_t()=default
comp_impl_t()
Definition lci.hpp:522
comp_attr_t attr
Definition lci.hpp:532
comp_attr_t attr_t
Definition lci.hpp:521
virtual void signal(status_t)=0
The actual implementation for RESOURCE comp.
Definition lci_binding_pre.hpp:516
bool is_empty() const
Definition lci.hpp:507
comp_impl_t * p_impl
Definition lci_binding_pre.hpp:528
The actual implementation for graph_add_node.
Definition lci_binding_post.hpp:3466
graph_add_node_x && free_cb(graph_node_free_cb_t free_cb_in)
Definition lci_binding_post.hpp:3482
graph_add_node_x && value(void *value_in)
Definition lci_binding_post.hpp:3481
The actual implementation for RESOURCE mr.
Definition lci_binding_pre.hpp:316
mr_impl_t * p_impl
Definition lci_binding_pre.hpp:323
bool is_empty() const
Definition lci.hpp:325
graph_node_t graph_add_node_op(comp_t graph, const T &op)
Add a functor as a node to the completion graph.
Definition lci.hpp:715
broadcast_algorithm_t
The type of broadcast algorithm.
Definition lci.hpp:235
errorcode_t
The actual error code for LCI API functions.
Definition lci.hpp:125
void(*)(status_t status, void *src_value, void *dst_value) graph_edge_run_cb_t
The function signature for a edge funciton in the completion graph.
Definition lci.hpp:587
void set_g_default_attr(const global_attr_t &attr)
Set the default global attributes for LCI.
const rmr_t RMR_NULL
The NULL value of rkey_t.
Definition lci.hpp:351
uint64_t matching_entry_key_t
The type of matching engine entry key.
Definition lci.hpp:413
const mr_t MR_DEVICE
A special mr_t value for device memory.
Definition lci.hpp:316
const int ANY_SOURCE
Special rank value for any-source receive.
Definition lci.hpp:380
void global_initialize()
Global initialization for LCI, including rank_me, rank_n, and g_default_attr.
status_t(*)(void *value) graph_node_run_cb_t
The function signature for a node function in the completion graph.
Definition lci.hpp:566
net_opcode_t
The Type of network communication operation codes.
Definition lci.hpp:216
const mr_t MR_HOST
A special mr_t value for host memory.
Definition lci.hpp:310
direction_t
The enum class of comunication direction.
Definition lci.hpp:363
void(*)(void *value) graph_node_free_cb_t
The function signature for a callback that will be triggered when the node was freed.
Definition lci.hpp:581
void(*)(const void *left, const void *right, void *dst, size_t n) reduce_op_t
The user-defined reduction operation.
Definition lci.hpp:546
int get_rank_n()
Get the number of ranks in the current application/job.
matching_policy_t
Enum class for matching policy.
Definition lci.hpp:402
void * graph_node_t
The node type for the completion graph.
Definition lci.hpp:552
const tag_t ANY_TAG
Special tag value for any-tag receive.
Definition lci.hpp:386
const graph_node_run_cb_t GRAPH_NODE_DUMMY_CB
A dummy callback function for a graph node.
Definition lci.hpp:574
allreduce_algorithm_t
The type of allreduce algorithm.
Definition lci.hpp:271
int get_rank_me()
Get the rank of the current process.
reduce_scatter_algorithm_t
The type of reduce scatter algorithm.
Definition lci.hpp:253
const comp_t COMP_NULL_EXPECT_DONE
Deprecated. Same as COMP_NULL.
Definition lci.hpp:491
const mr_t MR_UNKNOWN
A special mr_t value for unknown memory. LCI will detect the memory type automatically.
Definition lci.hpp:323
void * matching_entry_val_t
The type of matching engine entry value.
Definition lci.hpp:418
void global_finalize()
Global finalization for LCI.
matching_entry_type_t
The type of matching entry.
Definition lci.hpp:393
uint32_t rcomp_t
The type of remote completion handler.
Definition lci.hpp:374
void(*)(status_t status) comp_handler_t
Function Signature for completion handler.
Definition lci.hpp:539
const graph_node_t GRAPH_START
The start node of the completion graph.
Definition lci.hpp:558
const comp_t COMP_NULL
Special completion object setting allow_posted and allow_retry to false.
Definition lci.hpp:485
const comp_t COMP_NULL_EXPECT_DONE_OR_RETRY
Deprecated. Same as COMP_NULL_RETRY.
Definition lci.hpp:504
uint64_t tag_t
The type of tag.
Definition lci.hpp:357
global_attr_t get_g_default_attr()
Get the default global attributes for LCI. The default global attributes contain the default attribut...
comp_semantic_t
The enum class of completion semantic.
Definition lci.hpp:428
uint32_t net_imm_data_t
The type of network-layer immediate data field.
Definition lci.hpp:290
const comp_t COMP_NULL_RETRY
Special completion object setting allow_posted to false.
Definition lci.hpp:498
@ ring
Definition lci.hpp:239
@ direct
Definition lci.hpp:237
@ tree
Definition lci.hpp:238
@ retry
Definition lci.hpp:138
@ retry_nopacket
Definition lci.hpp:142
@ done_max
Definition lci.hpp:130
@ posted_backlog
Definition lci.hpp:134
@ done
Definition lci.hpp:127
@ retry_min
Definition lci.hpp:137
@ done_min
Definition lci.hpp:126
@ retry_lock
Definition lci.hpp:141
@ posted_min
Definition lci.hpp:131
@ retry_max
Definition lci.hpp:148
@ posted
Definition lci.hpp:132
@ retry_backlog
Definition lci.hpp:146
@ retry_init
Definition lci.hpp:140
@ done_backlog
Definition lci.hpp:128
@ retry_nomem
Definition lci.hpp:144
@ fatal
Definition lci.hpp:149
@ posted_max
Definition lci.hpp:136
@ READ
Definition lci.hpp:221
@ SEND
Definition lci.hpp:217
@ REMOTE_WRITE
Definition lci.hpp:220
@ RECV
Definition lci.hpp:218
@ WRITE
Definition lci.hpp:219
@ IN
Definition lci.hpp:365
@ OUT
Definition lci.hpp:364
@ max
Definition lci.hpp:407
@ rank_tag
Definition lci.hpp:406
@ tag_only
Definition lci.hpp:405
@ rank_only
Definition lci.hpp:404
@ send
Definition lci.hpp:394
@ recv
Definition lci.hpp:395
@ network
Definition lci.hpp:430
@ memory
Definition lci.hpp:429
All LCI API functions and classes are defined in this namespace.
const char * get_allreduce_algorithm_str(broadcast_algorithm_t algorithm)
Get the string representation of a collective algorithm.
attr_backend_t
Definition lci.hpp:66
@ none
Definition lci.hpp:67
@ ibv
Definition lci.hpp:68
@ ofi
Definition lci.hpp:69
@ ucx
Definition lci.hpp:70
const char * DEFAULT_NAME
const graph_node_t GRAPH_END
Definition lci.hpp:559
const char * get_reduce_scatter_algorithm_str(broadcast_algorithm_t algorithm)
Get the string representation of a collective algorithm.
allocator_default_t g_allocator_default
@ custom
Definition lci_binding_pre.hpp:64
@ graph
Definition lci_binding_pre.hpp:63
const char * get_errorcode_str(errorcode_t errorcode)
Get the string representation of an error code.
void graph_free_op_fn(void *value)
Definition lci.hpp:700
attr_net_lock_mode_t
Definition lci.hpp:73
@ LCI_NET_LOCK_MR
Definition lci.hpp:77
@ LCI_NET_TRYLOCK_POLL
Definition lci.hpp:76
@ LCI_NET_TRYLOCK_SEND
Definition lci.hpp:74
@ LCI_NET_LOCK_ALL
Definition lci.hpp:78
@ LCI_NET_TRYLOCK_RECV
Definition lci.hpp:75
@ LCI_NET_TRYLOCK_MAX
Definition lci.hpp:80
const char * get_net_opcode_str(net_opcode_t opcode)
Get the string representation of a network operation code.
std::enable_if< std::is_same< typenamestd::result_of< T()>::type, status_t >::value, status_t >::type graph_execute_op_fn(void *value)
Definition lci.hpp:666
const char * get_broadcast_algorithm_str(broadcast_algorithm_t algorithm)
Get the string representation of a collective algorithm.
The user-defined allocator type.
Definition lci.hpp:437
virtual ~allocator_base_t()=default
virtual void deallocate(void *ptr)=0
virtual void * allocate(size_t size)=0
Definition lci.hpp:443
void * allocate(size_t size)
Definition lci.hpp:444
void deallocate(void *ptr)
Definition lci.hpp:445
Definition lci_binding_pre.hpp:144
Wrapper class for error code.
Definition lci.hpp:166
bool is_done() const
Check if the error code is in the done category.
Definition lci.hpp:182
error_t()
Definition lci.hpp:168
bool is_posted() const
Check if the error code is in the posted category.
Definition lci.hpp:191
const char * get_str() const
Get the string representation of the error code.
Definition lci.hpp:209
errorcode_t errorcode
Definition lci.hpp:167
void reset_retry()
Reset the error code to retry.
Definition lci.hpp:177
error_t(errorcode_t errorcode_)
Construct an error_t object with a specific error code.
Definition lci.hpp:173
bool is_retry() const
Check if the error code is in the retry category.
Definition lci.hpp:200
Definition lci_binding_pre.hpp:154
The struct for network status.
Definition lci.hpp:298
net_imm_data_t imm_data
Definition lci.hpp:303
int rank
Definition lci.hpp:300
void * user_context
Definition lci.hpp:301
net_opcode_t opcode
Definition lci.hpp:299
size_t length
Definition lci.hpp:302
bool m_is_set
Definition lci.hpp:106
option_t()
Definition lci.hpp:86
T get_value() const
Definition lci.hpp:102
T get_value_or(T default_value) const
Definition lci.hpp:90
bool get_set_value(T *value) const
Definition lci.hpp:94
T m_value
Definition lci.hpp:105
option_t(T value_, bool is_set_)
Definition lci.hpp:88
bool is_set() const
Definition lci.hpp:103
option_t(T value_)
Definition lci.hpp:87
The type of remote memory region.
Definition lci.hpp:336
uintptr_t mr_base
Definition lci.hpp:338
uintptr_t base
Definition lci.hpp:337
bool is_empty() const
Definition lci.hpp:341
uint64_t opaque_rkey
Definition lci.hpp:339
rmr_t()
Definition lci.hpp:340
The type of the completion desciptor for a posted communication.
Definition lci.hpp:453
tag_t get_tag() const
Definition lci.hpp:476
status_t(errorcode_t error_)
Definition lci.hpp:461
status_t(void *user_context_)
Definition lci.hpp:462
error_t get_error() const
Definition lci.hpp:472
void set_retry()
Definition lci.hpp:468
void * get_user_context() const
Definition lci.hpp:477
error_t error
Definition lci.hpp:454
void set_done()
Definition lci.hpp:466
tag_t tag
Definition lci.hpp:458
size_t size
Definition lci.hpp:457
void set_posted()
Definition lci.hpp:467
bool is_done() const
Definition lci.hpp:469
void * buffer
Definition lci.hpp:456
void * user_context
Definition lci.hpp:459
bool is_posted() const
Definition lci.hpp:470
status_t()=default
bool is_retry() const
Definition lci.hpp:471
int get_rank() const
Definition lci.hpp:473
void * get_buffer() const
Definition lci.hpp:474
int rank
Definition lci.hpp:455
size_t get_size() const
Definition lci.hpp:475