LCI v2.0.0-dev
For Asynchronous Multithreaded Communication
Loading...
Searching...
No Matches
lci.hpp
Go to the documentation of this file.
1// Copyright (c) 2025 The LCI Project Authors
2// SPDX-License-Identifier: NCSA
3
4#ifndef LCI_API_LCI_HPP
5#define LCI_API_LCI_HPP
6
7#include <memory>
8#include <stdexcept>
9#include <vector>
10#include <string>
11#include <cstring>
12#include <functional>
13
14#include "lci_config.hpp"
15
20
25
30
35
40
45
51
56
61namespace lci
62{
69
76
77// mimic std::optional as we don't want to force c++17 for now
78template <typename T>
79struct option_t {
80 option_t() : m_value(), m_is_set(false) {}
81 option_t(T value_) : m_value(value_), m_is_set(true) {}
82 option_t(T value_, bool is_set_)
83 : m_value(value_), m_is_set(is_set_) {} // set default value
84 T get_value_or(T default_value) const
85 {
86 return m_is_set ? m_value : default_value;
87 }
88 bool get_set_value(T* value) const
89 {
90 if (m_is_set) {
91 *value = this->m_value;
92 return true;
93 }
94 return false;
95 }
96 T get_value() const { return m_value; }
97 bool is_set() const { return m_is_set; }
98 operator T() const { return m_value; }
101};
102} // namespace lci
103
104#include "lci_binding_pre.hpp"
105
106namespace lci
107{
146
152const char* get_errorcode_str(errorcode_t errorcode);
153
160struct error_t {
167 error_t(errorcode_t errorcode_) : errorcode(errorcode_) {}
176 bool is_done() const
177 {
180 }
181
185 bool is_posted() const
186 {
189 }
190
194 bool is_retry() const
195 {
198 }
199
203 const char* get_str() const { return lci::get_errorcode_str(errorcode); }
204};
205
217
224
235
242
253
260
271
278
284using net_imm_data_t = uint32_t;
285
299
304const mr_t MR_HOST = mr_t();
305
310const mr_t MR_DEVICE = mr_t(reinterpret_cast<void*>(0x1));
311
317const mr_t MR_UNKNOWN = mr_t(reinterpret_cast<void*>(0x2));
318
319inline bool mr_t::is_empty() const
320{
321 return reinterpret_cast<uintptr_t>(p_impl) < 3;
322}
323
330struct rmr_t {
331 uintptr_t base;
332 uint64_t opaque_rkey;
333 rmr_t() : base(0), opaque_rkey(0) {}
334 bool is_empty() const { return base == 0 && opaque_rkey == 0; }
335};
336
342
347using tag_t = uint64_t;
348
353enum class direction_t {
356};
357
364using rcomp_t = uint32_t;
365
370const int ANY_SOURCE = -1;
371
376const tag_t ANY_TAG = static_cast<tag_t>(-1);
377
383enum class matching_entry_type_t : unsigned {
384 send = 0,
385 recv = 1,
386};
387
392enum class matching_policy_t : unsigned {
393 none = 0,
397 max = 4,
398};
399
403using matching_entry_key_t = uint64_t;
409
422
428 virtual void* allocate(size_t size) = 0;
429 virtual void deallocate(void* ptr) = 0;
430 virtual ~allocator_base_t() = default;
431};
432
434 void* allocate(size_t size) { return malloc(size); }
435 void deallocate(void* ptr) { free(ptr); }
436};
437extern allocator_default_t g_allocator_default;
438
443struct status_t {
445 int rank = -1;
446 void* buffer = nullptr;
447 size_t size = 0;
449 void* user_context = nullptr;
450 status_t() = default;
451 status_t(errorcode_t error_) : error(error_) {}
452 explicit status_t(void* user_context_)
453 : error(errorcode_t::done), user_context(user_context_)
454 {
455 }
459 bool is_done() const { return error.is_done(); }
460 bool is_posted() const { return error.is_posted(); }
461 bool is_retry() const { return error.is_retry(); }
462 error_t get_error() const { return error; }
463 int get_rank() const { return rank; }
464 void* get_buffer() const { return buffer; }
465 size_t get_size() const { return size; }
466 tag_t get_tag() const { return tag; }
467 void* get_user_context() const { return user_context; }
468};
469
474const comp_t COMP_NULL = comp_t(reinterpret_cast<comp_impl_t*>(0x0));
475
481 comp_t(reinterpret_cast<comp_impl_t*>(0x0));
482
488const comp_t COMP_NULL_RETRY = comp_t(reinterpret_cast<comp_impl_t*>(0x1));
489
495 comp_t(reinterpret_cast<comp_impl_t*>(0x1));
496
497inline bool comp_t::is_empty() const
498{
499 return reinterpret_cast<uintptr_t>(p_impl) <= 1;
500}
501
509{
510 public:
512 comp_impl_t() = default;
513 comp_impl_t(const attr_t& attr_) : attr(attr_) {}
514 virtual ~comp_impl_t() = default;
515 virtual void signal(status_t) = 0;
517};
518
523using comp_handler_t = void (*)(status_t status);
524
530using reduce_op_t = void (*)(const void* left, const void* right, void* dst,
531 size_t n);
536using graph_node_t = void*;
537
542const graph_node_t GRAPH_START = reinterpret_cast<graph_node_t>(0x1);
543const graph_node_t GRAPH_END = reinterpret_cast<graph_node_t>(0x2);
544
550using graph_node_run_cb_t = status_t (*)(void* value);
551
559
565using graph_node_free_cb_t = void (*)(void* value);
566
571using graph_edge_run_cb_t = void (*)(status_t status, void* src_value,
572 void* dst_value);
573
574} // namespace lci
575
576#include "lci_binding_post.hpp"
577
578namespace lci
579{
580/***********************************************************************
581 * Overloading graph_add_node for functor
582 **********************************************************************/
583#if __cplusplus >= 201703L
584template <typename T>
585status_t graph_execute_op_fn(void* value)
586{
587 auto op = static_cast<T*>(value);
588 using result_t = std::invoke_result_t<T>;
589
590 if constexpr (std::is_same_v<result_t, status_t>) {
591 status_t result = (*op)();
592 return result;
593 } else if constexpr (std::is_same_v<result_t, errorcode_t>) {
594 errorcode_t result = (*op)();
595 return result;
596 } else {
597 (*op)();
598 return errorcode_t::done;
599 }
600}
601#else
602// Specialization for status_t return type
603template <typename T>
604typename std::enable_if<
605 std::is_same<typename std::result_of<T()>::type, status_t>::value,
606 status_t>::type
608{
609 auto op = static_cast<T*>(value);
610 status_t result = (*op)();
611 return result;
612}
613
614// Specialization for errorcode_t return type
615template <typename T>
616typename std::enable_if<
617 std::is_same<typename std::result_of<T()>::type, errorcode_t>::value,
618 status_t>::type
620{
621 auto op = static_cast<T*>(value);
622 errorcode_t result = (*op)();
623 return result;
624}
625
626// Specialization for all other return types
627template <typename T>
628typename std::enable_if<
629 !std::is_same<typename std::result_of<T()>::type, status_t>::value &&
630 !std::is_same<typename std::result_of<T()>::type, errorcode_t>::value,
631 status_t>::type
633{
634 auto op = static_cast<T*>(value);
635 (*op)();
636 return errorcode_t::done;
637}
638#endif
639
640template <typename T>
641void graph_free_op_fn(void* value)
642{
643 auto op = static_cast<T*>(value);
644 delete op;
645}
646
655template <typename T>
657{
659 T* fn = new T(op);
661 auto ret = graph_add_node_x(graph, wrapper)
662 .value(reinterpret_cast<void*>(fn))
663 .free_cb(free_cb)();
664 fn->user_context(ret);
665 return ret;
666}
667
668} // namespace lci
669
670#endif // LCI_API_LCI_HPP
Completion object implementation base type.
Definition lci.hpp:509
comp_impl_t(const attr_t &attr_)
Definition lci.hpp:513
virtual ~comp_impl_t()=default
comp_attr_t attr
Definition lci.hpp:516
comp_impl_t()=default
comp_attr_t attr_t
Definition lci.hpp:511
virtual void signal(status_t)=0
The actual implementation for RESOURCE comp.
Definition lci_binding_pre.hpp:460
bool is_empty() const
Definition lci.hpp:497
comp_impl_t * p_impl
Definition lci_binding_pre.hpp:471
The actual implementation for graph_add_node.
Definition lci_binding_post.hpp:2419
graph_add_node_x && free_cb(graph_node_free_cb_t free_cb_in)
Definition lci_binding_post.hpp:2435
graph_add_node_x && value(void *value_in)
Definition lci_binding_post.hpp:2434
The actual implementation for RESOURCE mr.
Definition lci_binding_pre.hpp:233
mr_impl_t * p_impl
Definition lci_binding_pre.hpp:239
bool is_empty() const
Definition lci.hpp:319
graph_node_t graph_add_node_op(comp_t graph, const T &op)
Add a functor as a node to the completion graph.
Definition lci.hpp:656
broadcast_algorithm_t
The type of broadcast algorithm.
Definition lci.hpp:229
errorcode_t
The actual error code for LCI API functions.
Definition lci.hpp:119
void(*)(status_t status, void *src_value, void *dst_value) graph_edge_run_cb_t
The function signature for a edge funciton in the completion graph.
Definition lci.hpp:571
const rmr_t RMR_NULL
The NULL value of rkey_t.
Definition lci.hpp:341
uint64_t matching_entry_key_t
The type of matching engine entry key.
Definition lci.hpp:403
const mr_t MR_DEVICE
A special mr_t value for device memory.
Definition lci.hpp:310
const int ANY_SOURCE
Special rank value for any-source receive.
Definition lci.hpp:370
status_t(*)(void *value) graph_node_run_cb_t
The function signature for a node function in the completion graph.
Definition lci.hpp:550
net_opcode_t
The Type of network communication operation codes.
Definition lci.hpp:210
const mr_t MR_HOST
A special mr_t value for host memory.
Definition lci.hpp:304
direction_t
The enum class of comunication direction.
Definition lci.hpp:353
void(*)(void *value) graph_node_free_cb_t
The function signature for a callback that will be triggered when the node was freed.
Definition lci.hpp:565
void(*)(const void *left, const void *right, void *dst, size_t n) reduce_op_t
The user-defined reduction operation.
Definition lci.hpp:530
matching_policy_t
Enum class for matching policy.
Definition lci.hpp:392
void * graph_node_t
The node type for the completion graph.
Definition lci.hpp:536
const tag_t ANY_TAG
Special tag value for any-tag receive.
Definition lci.hpp:376
const graph_node_run_cb_t GRAPH_NODE_DUMMY_CB
A dummy callback function for a graph node.
Definition lci.hpp:558
allreduce_algorithm_t
The type of allreduce algorithm.
Definition lci.hpp:265
reduce_scatter_algorithm_t
The type of reduce scatter algorithm.
Definition lci.hpp:247
const comp_t COMP_NULL_EXPECT_DONE
Deprecated. Same as COMP_NULL.
Definition lci.hpp:480
const mr_t MR_UNKNOWN
A special mr_t value for unknown memory. LCI will detect the memory type automatically.
Definition lci.hpp:317
void * matching_entry_val_t
The type of matching engine entry value.
Definition lci.hpp:408
matching_entry_type_t
The type of matching entry.
Definition lci.hpp:383
uint32_t rcomp_t
The type of remote completion handler.
Definition lci.hpp:364
void(*)(status_t status) comp_handler_t
Function Signature for completion handler.
Definition lci.hpp:523
const graph_node_t GRAPH_START
The start node of the completion graph.
Definition lci.hpp:542
const comp_t COMP_NULL
Special completion object setting allow_posted to false.
Definition lci.hpp:474
const comp_t COMP_NULL_EXPECT_DONE_OR_RETRY
Deprecated. Same as COMP_NULL_RETRY.
Definition lci.hpp:494
uint64_t tag_t
The type of tag.
Definition lci.hpp:347
comp_semantic_t
The enum class of completion semantic.
Definition lci.hpp:418
uint32_t net_imm_data_t
The type of network-layer immediate data field.
Definition lci.hpp:284
const comp_t COMP_NULL_RETRY
Special completion object setting allow_posted and allow_retry to false.
Definition lci.hpp:488
@ ring
Definition lci.hpp:233
@ direct
Definition lci.hpp:231
@ tree
Definition lci.hpp:232
@ retry
Definition lci.hpp:132
@ retry_nopacket
Definition lci.hpp:136
@ done_max
Definition lci.hpp:124
@ posted_backlog
Definition lci.hpp:128
@ done
Definition lci.hpp:121
@ retry_min
Definition lci.hpp:131
@ done_min
Definition lci.hpp:120
@ retry_lock
Definition lci.hpp:135
@ posted_min
Definition lci.hpp:125
@ retry_max
Definition lci.hpp:142
@ posted
Definition lci.hpp:126
@ retry_backlog
Definition lci.hpp:140
@ retry_init
Definition lci.hpp:134
@ done_backlog
Definition lci.hpp:122
@ retry_nomem
Definition lci.hpp:138
@ fatal
Definition lci.hpp:143
@ posted_max
Definition lci.hpp:130
@ READ
Definition lci.hpp:215
@ SEND
Definition lci.hpp:211
@ REMOTE_WRITE
Definition lci.hpp:214
@ RECV
Definition lci.hpp:212
@ WRITE
Definition lci.hpp:213
@ IN
Definition lci.hpp:355
@ OUT
Definition lci.hpp:354
@ max
Definition lci.hpp:397
@ rank_tag
Definition lci.hpp:396
@ tag_only
Definition lci.hpp:395
@ rank_only
Definition lci.hpp:394
@ send
Definition lci.hpp:384
@ recv
Definition lci.hpp:385
@ network
Definition lci.hpp:420
@ memory
Definition lci.hpp:419
All LCI API functions and classes are defined in this namespace.
const char * get_allreduce_algorithm_str(broadcast_algorithm_t algorithm)
Get the string representation of a collective algorithm.
attr_backend_t
Definition lci.hpp:63
@ none
Definition lci.hpp:64
@ ibv
Definition lci.hpp:65
@ ofi
Definition lci.hpp:66
@ ucx
Definition lci.hpp:67
const graph_node_t GRAPH_END
Definition lci.hpp:543
const char * get_reduce_scatter_algorithm_str(broadcast_algorithm_t algorithm)
Get the string representation of a collective algorithm.
allocator_default_t g_allocator_default
@ graph
Definition lci_binding_pre.hpp:57
const char * get_errorcode_str(errorcode_t errorcode)
Get the string representation of an error code.
void graph_free_op_fn(void *value)
Definition lci.hpp:641
attr_net_lock_mode_t
Definition lci.hpp:70
@ LCI_NET_TRYLOCK_POLL
Definition lci.hpp:73
@ LCI_NET_TRYLOCK_SEND
Definition lci.hpp:71
@ LCI_NET_TRYLOCK_RECV
Definition lci.hpp:72
@ LCI_NET_TRYLOCK_MAX
Definition lci.hpp:74
const char * get_net_opcode_str(net_opcode_t opcode)
Get the string representation of a network operation code.
std::enable_if< std::is_same< typenamestd::result_of< T()>::type, status_t >::value, status_t >::type graph_execute_op_fn(void *value)
Definition lci.hpp:607
const char * get_broadcast_algorithm_str(broadcast_algorithm_t algorithm)
Get the string representation of a collective algorithm.
The user-defined allocator type.
Definition lci.hpp:427
virtual ~allocator_base_t()=default
virtual void deallocate(void *ptr)=0
virtual void * allocate(size_t size)=0
Definition lci.hpp:433
void * allocate(size_t size)
Definition lci.hpp:434
void deallocate(void *ptr)
Definition lci.hpp:435
Definition lci_binding_pre.hpp:431
Wrapper class for error code.
Definition lci.hpp:160
bool is_done() const
Check if the error code is in the done category.
Definition lci.hpp:176
error_t()
Definition lci.hpp:162
bool is_posted() const
Check if the error code is in the posted category.
Definition lci.hpp:185
const char * get_str() const
Get the string representation of the error code.
Definition lci.hpp:203
errorcode_t errorcode
Definition lci.hpp:161
void reset_retry()
Reset the error code to retry.
Definition lci.hpp:171
error_t(errorcode_t errorcode_)
Construct an error_t object with a specific error code.
Definition lci.hpp:167
bool is_retry() const
Check if the error code is in the retry category.
Definition lci.hpp:194
The struct for network status.
Definition lci.hpp:292
net_imm_data_t imm_data
Definition lci.hpp:297
int rank
Definition lci.hpp:294
void * user_context
Definition lci.hpp:295
net_opcode_t opcode
Definition lci.hpp:293
size_t length
Definition lci.hpp:296
bool m_is_set
Definition lci.hpp:100
option_t()
Definition lci.hpp:80
T get_value() const
Definition lci.hpp:96
T get_value_or(T default_value) const
Definition lci.hpp:84
bool get_set_value(T *value) const
Definition lci.hpp:88
T m_value
Definition lci.hpp:99
option_t(T value_, bool is_set_)
Definition lci.hpp:82
bool is_set() const
Definition lci.hpp:97
option_t(T value_)
Definition lci.hpp:81
The type of remote memory region.
Definition lci.hpp:330
uintptr_t base
Definition lci.hpp:331
bool is_empty() const
Definition lci.hpp:334
uint64_t opaque_rkey
Definition lci.hpp:332
rmr_t()
Definition lci.hpp:333
The type of the completion desciptor for a posted communication.
Definition lci.hpp:443
tag_t get_tag() const
Definition lci.hpp:466
status_t(errorcode_t error_)
Definition lci.hpp:451
status_t(void *user_context_)
Definition lci.hpp:452
error_t get_error() const
Definition lci.hpp:462
void set_retry()
Definition lci.hpp:458
void * get_user_context() const
Definition lci.hpp:467
error_t error
Definition lci.hpp:444
void set_done()
Definition lci.hpp:456
tag_t tag
Definition lci.hpp:448
size_t size
Definition lci.hpp:447
void set_posted()
Definition lci.hpp:457
bool is_done() const
Definition lci.hpp:459
void * buffer
Definition lci.hpp:446
void * user_context
Definition lci.hpp:449
bool is_posted() const
Definition lci.hpp:460
status_t()=default
bool is_retry() const
Definition lci.hpp:461
int get_rank() const
Definition lci.hpp:463
void * get_buffer() const
Definition lci.hpp:464
int rank
Definition lci.hpp:445
size_t get_size() const
Definition lci.hpp:465