-
Notifications
You must be signed in to change notification settings - Fork 407
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
UCT/IB/DC: dynamic allocation of dci pools by configuration #9665
base: master
Are you sure you want to change the base?
UCT/IB/DC: dynamic allocation of dci pools by configuration #9665
Conversation
f09c73c
to
d5707b5
Compare
src/uct/ib/dc/dc_mlx5.c
Outdated
@@ -162,6 +162,9 @@ uct_dc_mlx5_ep_create_connected(const uct_ep_params_t *params, uct_ep_h* ep_p) | |||
{ | |||
uct_dc_mlx5_iface_t *iface = ucs_derived_of(params->iface, | |||
uct_dc_mlx5_iface_t); | |||
uint8_t sl = iface->super.super.super.config.sl; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
const
src/uct/ib/dc/dc_mlx5_ep.h
Outdated
uint8_t *dci_index_p); | ||
|
||
static UCS_F_ALWAYS_INLINE void | ||
uct_dc_mlx5_init_dci_config_key(uct_dc_mlx5_dci_config_t *dci_config, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor: func args & init order better match struct members decl order.
uint8_t dci_index, | ||
uint8_t path_index, | ||
int full_handshake) | ||
ucs_status_t uct_dc_mlx5_iface_create_dci(uct_dc_mlx5_iface_t *iface, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why removed static
? Seems it is not used outside
src/uct/ib/dc/dc_mlx5.c
Outdated
@@ -761,7 +767,7 @@ static void uct_dc_mlx5_iface_dcis_destroy(uct_dc_mlx5_iface_t *iface, | |||
ucs_assert(num_dci_pools <= iface->tx.num_dci_pools); | |||
ucs_assert(num_dcis <= uct_dc_mlx5_iface_total_ndci(iface)); | |||
|
|||
for (dci_index = 0; dci_index < num_dcis; dci_index++) { | |||
for (dci_index = 0; dci_index < iface->tx.dci_counter; dci_index++) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems num_dcis
is not used in this func anymore, except the assert
. But the assert
by itself has no meaning if the var is not used elsewhere..
src/uct/ib/dc/dc_mlx5.c
Outdated
const uct_dc_mlx5_dci_config_t *config, | ||
uint8_t pool_size, uint8_t *pool_index_p) | ||
{ | ||
uint8_t full_handshake = iface->flags & |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
const
where possible
src/uct/ib/dc/dc_mlx5.c
Outdated
} | ||
|
||
/** | ||
* Checks wether dci pool config present in the dc_config_hash and then either returning |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Checks whether dci pool config is present in dc_config_hash and returns the matching pool index or creates a new one
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Move docs to .h
src/uct/ib/dc/dc_mlx5.c
Outdated
status = uct_dc_mlx5_iface_create_dci_pool(iface, dci_config, pool_size, | ||
pool_index_p); | ||
if (status != UCS_OK) { | ||
return status; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
kh_del
?
src/uct/ib/dc/dc_mlx5.c
Outdated
|
||
iface->flags |= UCT_DC_MLX5_IFACE_FLAG_KEEPALIVE; | ||
iface->keepalive_dci = dci_index; | ||
iface->keepalive_dci = iface->tx.dci_counter - 1; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this place be reached with dci_counter
0?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, in the current state of the PR a pool is being created with a single dci so dc_counter is at least 1.
src/uct/ib/dc/dc_mlx5.c
Outdated
dci_pool->release_stack_top = -1; | ||
|
||
for (i = 0; i < pool_size; ++i) { | ||
status = uct_dc_mlx5_iface_create_dci( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We discussed to do "lazy" creation..
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it's in progress I will add WIP-DNM label
@roiedanino maybe add some tests? |
Conflicts: src/uct/ib/dc/dc_mlx5.c src/uct/ib/dc/dc_mlx5.h src/uct/ib/dc/dc_mlx5_ep.c
src/uct/ib/dc/dc_mlx5.c
Outdated
@@ -149,15 +149,17 @@ ucs_config_field_t uct_dc_mlx5_iface_config_table[] = { | |||
static ucs_status_t | |||
uct_dc_mlx5_ep_create_connected(const uct_ep_params_t *params, uct_ep_h* ep_p) | |||
{ | |||
uct_dc_mlx5_iface_t *iface = ucs_derived_of(params->iface, | |||
uct_dc_mlx5_iface_t); | |||
uct_dc_mlx5_iface_t *iface = ucs_derived_of(params->iface, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
revert
src/uct/ib/dc/dc_mlx5.c
Outdated
uct_ib_mlx5_qp_mmio_cleanup(&dci->txwq.super, dci->txwq.reg); | ||
|
||
uct_dc_mlx5_dci_set_invalid(dci); | ||
dci = NULL; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To avoid valgrind issues, maybe it's not needed
src/uct/ib/dc/dc_mlx5.c
Outdated
dci_pool->stack[i] = dci_index; | ||
++dci_index; | ||
} | ||
ucs_assert_always(pool_index < UCT_DC_MLX5_IFACE_MAX_DCI_POOLS); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we still need dci_counter? can we use array length instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to know what will be the next dci_index somehow
src/uct/ib/dc/dc_mlx5.c
Outdated
(config->key.flags & UCT_DC_MLX5_DCI_CONFIG_MAX_RD_ATOMIC_IS_64) ? | ||
64 : | ||
16, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
to simplify, maybe max_rd_atomic can be just a uint8_t value in the key (not a flag) ?
then no need for this wrapper function
src/uct/ib/dc/dc_mlx5.c
Outdated
uint8_t pool_size = //(config->key.flags & UCT_DC_MLX5_DCI_CONFIG_KEEPALIVE) ? | ||
// UCT_DC_MLX5_KEEPALIVE_NUM_DCIS : |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
leftover?
src/uct/ib/dc/dc_mlx5_ep.h
Outdated
return status; | ||
} | ||
|
||
stack_ptr = ucs_array_append(&pool->stack, return UCS_ERR_NO_MEMORY); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
stack_ptr var is reundant
|
||
stack_ptr = ucs_array_append(&pool->stack, return UCS_ERR_NO_MEMORY); | ||
*stack_ptr = dci_index; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove space line
src/uct/ib/dc/dc_mlx5_ep.h
Outdated
ep->dci_channel_index = iface->tx.dcis[ep->dci].next_channel_index++ % | ||
iface->tx.num_dci_channels; | ||
ep->dci_channel_index = | ||
ucs_array_elem(&iface->tx.dcis, ep->dci).next_channel_index++ % |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
helper var for dci object
src/uct/ib/dc/dc_mlx5_ep.h
Outdated
@@ -331,9 +392,22 @@ uct_dc_mlx5_ep_basic_init(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep) | |||
static UCS_F_ALWAYS_INLINE int | |||
uct_dc_mlx5_iface_dci_can_alloc(uct_dc_mlx5_iface_t *iface, uint8_t pool_index) | |||
{ | |||
ucs_assert_always(pool_index < iface->tx.num_dci_pools); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
adds overhead to fast path
src/uct/ib/dc/dc_mlx5_ep.h
Outdated
|
||
uct_dc_mlx5_iface_dci_schedule_release(iface, dci_index); | ||
|
||
return 1; | ||
} | ||
|
||
static inline ucs_status_t uct_dc_mlx5_iface_dci_alloc(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- why needed to move this function?
- why needed to add return value to it?
Conflicts: src/uct/ib/dc/dc_mlx5.c
What
Instead of creating all dci pools (and dcis) on iface init, dci pools will be created dynamically on demand by configuration
Why ?
As part of the active messages priority feature - creating different pools with different SLs is required, enabling dedicated dci pools for different lanes with different priorities, as we can't change SL of a dci (qp) after creation / connection.
How ?
For each new dc ep created - a matching dci pool will be created unless a dci pool with the same exact configuration already exists in the hash table, in which the configuration is the key and the value is pool_index, so eps which share configuration will also share a dci pool.