Skip to content

Commit

Permalink
Merge pull request #690 from Hakon-Bugge/ibacm_make_nmbr_ep_addr_dynamic
Browse files Browse the repository at this point in the history
ibacm: Allocate end-point addresses dynamically
  • Loading branch information
rleon committed Feb 10, 2020
2 parents 667ed9b + 93f228b commit 7bb125c
Show file tree
Hide file tree
Showing 3 changed files with 223 additions and 95 deletions.
28 changes: 11 additions & 17 deletions ibacm/man/ibacm.1
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ needed to establish a connection, but does not implement the CM protocol.
.P
A primary user of the ibacm service is the librdmacm library. This
enables applications to make use of the ibacm service without code
changes or needing to be aware that the service is in use.
changes or needing to be aware that the service is in use.
librdmacm versions 1.0.12 - 1.0.15 can invoke IB ACM services when built using
the --with-ib_acm option. Version 1.0.16 and newer of librdmacm will automatically
use the IB ACM if it is installed. The IB ACM services tie in under the
Expand All @@ -26,16 +26,16 @@ however existing applications should still see significant connection
scaling benefits using the calls
available in librdmacm 1.0.11 and previous releases.
.P
The IB ACM is focused on being scalable, efficient, and extensible. It implements
The IB ACM is focused on being scalable, efficient, and extensible. It implements
a plugin architecture that allows a vendor to supply its proprietary provider in
addition to the default provider. The current default provider implementation
ibacmp limits network traffic, SA interactions, and centralized
services. Ibacmp supports multiple resolution protocols in order to handle
different fabric topologies.
.P
The IB ACM package is comprised of three components: the ibacm core service,
the default provider ibacmp shared library, and a test/configuration utility
- ib_acme. All three are userspace components and are available for Linux.
the default provider ibacmp shared library, and a test/configuration utility
- ib_acme. All three are userspace components and are available for Linux.
Additional details are given below.
.SH "OPTIONS"
.TP
Expand Down Expand Up @@ -90,15 +90,15 @@ The ibacm service relies on two configuration files.
.P
The ibacm_addr.cfg file contains name and address mappings for each IB
<device, port, pkey> endpoint. Although the names in the ibacm_addr.cfg
file can be anything, ib_acme maps the host name to the IB endpoints. IP
addresses, on the other hand, are assigned dynamically. If the address file
cannot be found, the ibacm service will attempt to create one using default
file can be anything, ib_acme maps the host name to the IB endpoints. IP
addresses, on the other hand, are assigned dynamically. If the address file
cannot be found, the ibacm service will attempt to create one using default
values.
.P
The ibacm_opts.cfg file provides a set of configurable options for the
ibacm core service and default provider, such as timeout, number of retries,
logging level, etc. ib_acme generates the ibacm_opts.cfg file using static
information. If an option file cannot be found, ibacm will use default values.
logging level, etc. ib_acme generates the ibacm_opts.cfg file using static
information. If an option file cannot be found, ibacm will use default values.
.P
ibacm:
.P
Expand Down Expand Up @@ -131,8 +131,8 @@ and destination names or addresses as input to the service, and receive
as output path record data.
.P
The service maps a client's source name/address to a local IB endpoint.
If the destination name/address is not cached locally in the default provider,
it sends a multicast request out on the lowest priority multicast group on the
If the destination name/address is not cached locally in the default provider,
it sends a multicast request out on the lowest priority multicast group on the
local endpoint. The request carries a list of multicast groups that the sender can use.
The recipient of the request selects the highest priority multicast group
that it can use as well and returns that information directly to the sender.
Expand All @@ -146,14 +146,8 @@ The current implementation of the provider ibacmp has several additional restric
- The ibacmp is limited in its handling of dynamic changes.
ibacm must be stopped and restarted if a cluster is reconfigured.
.P
- Cached data does not timed out and is only updated if a new resolution
request is received from a different QPN than a cached request.
.P
- Support for IPv6 has not been verified.
.P
- The number of addresses that can be assigned to a single endpoint is
limited to 4.
.P
- The number of multicast groups that an endpoint can support is limited to 2.
.P
The ibacmp contains several internal caches. These include caches for GID
Expand Down
116 changes: 92 additions & 24 deletions ibacm/prov/acmp/src/acmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,15 @@ struct acmp_send_queue {
struct acmp_addr {
uint16_t type;
union acm_ep_info info;
struct acm_address *addr;
struct acm_address addr;
struct acmp_ep *ep;
};

struct acmp_addr_ctx {
struct acmp_ep *ep;
int addr_inx;
};

struct acmp_ep {
struct acmp_port *port;
struct ibv_cq *cq;
Expand All @@ -186,7 +191,10 @@ struct acmp_ep {
struct list_head active_queue;
struct list_head wait_queue;
enum acmp_state state;
struct acmp_addr addr_info[MAX_EP_ADDR];
/* This lock protects nmbr_ep_addrs and addr_info */
pthread_rwlock_t rwlock;
int nmbr_ep_addrs;
struct acmp_addr *addr_info;
atomic_t counters[ACM_MAX_COUNTER];
};

Expand Down Expand Up @@ -1040,9 +1048,11 @@ acmp_resolve_sa_resp(struct acm_sa_mad *mad)
static struct acmp_addr *
acmp_addr_lookup(struct acmp_ep *ep, uint8_t *addr, uint16_t type)
{
struct acmp_addr *ret = NULL;
int i;

for (i = 0; i < MAX_EP_ADDR; i++) {
pthread_rwlock_rdlock(&ep->rwlock);
for (i = 0; i < ep->nmbr_ep_addrs; i++) {
if (ep->addr_info[i].type != type)
continue;

Expand All @@ -1051,11 +1061,13 @@ acmp_addr_lookup(struct acmp_ep *ep, uint8_t *addr, uint16_t type)
(char *) addr, ACM_MAX_ADDRESS)) ||
!memcmp(ep->addr_info[i].info.addr, addr,
ACM_MAX_ADDRESS)) {
return &ep->addr_info[i];
ret = ep->addr_info + i;
break;
}
}
pthread_rwlock_unlock(&ep->rwlock);

return NULL;
return ret;
}

static void
Expand Down Expand Up @@ -1593,13 +1605,12 @@ static void *acmp_retry_handler(void *context)
return NULL;
}

/* rwlock must be held read-locked */
static int
acmp_query(void *addr_context, struct acm_msg *msg, uint64_t id)
__acmp_query(struct acmp_ep *ep, struct acm_msg *msg, uint64_t id)
{
struct acmp_request *req;
struct ib_sa_mad *mad;
struct acmp_addr *address = addr_context;
struct acmp_ep *ep = address->ep;
uint8_t status;
struct acm_sa_mad *sa_mad;

Expand Down Expand Up @@ -1652,6 +1663,21 @@ acmp_query(void *addr_context, struct acm_msg *msg, uint64_t id)
return acm_query_response(id, msg);
}

static int
acmp_query(void *addr_context, struct acm_msg *msg, uint64_t id)
{
struct acmp_addr_ctx *addr_ctx = addr_context;
struct acmp_addr *address;
int ret;

pthread_rwlock_rdlock(&addr_ctx->ep->rwlock);
address = addr_ctx->ep->addr_info + addr_ctx->addr_inx;
ret = __acmp_query(address->ep, msg, id);
pthread_rwlock_unlock(&addr_ctx->ep->rwlock);

return ret;
}

static uint8_t
acmp_send_resolve(struct acmp_ep *ep, struct acmp_dest *dest,
struct acm_ep_addr_data *saddr)
Expand Down Expand Up @@ -1940,7 +1966,8 @@ acmp_resolve_path(struct acmp_ep *ep, struct acm_msg *msg, uint64_t id)
static int
acmp_resolve(void *addr_context, struct acm_msg *msg, uint64_t id)
{
struct acmp_addr *address = addr_context;
struct acmp_addr_ctx *addr_ctx = addr_context;
struct acmp_addr *address = addr_ctx->ep->addr_info + addr_ctx->addr_inx;
struct acmp_ep *ep = address->ep;

if (ep->state != ACMP_READY) {
Expand Down Expand Up @@ -2356,34 +2383,50 @@ static void acmp_ep_preload(struct acmp_ep *ep)
}
}

static int acmp_add_addr(const struct acm_address *addr, void *ep_context,
void **addr_context)
/* rwlock must be held write-locked */
static int __acmp_add_addr(const struct acm_address *addr, struct acmp_ep *ep,
void **addr_context)
{
struct acmp_ep *ep = ep_context;
struct acmp_dest *dest;
struct acmp_addr_ctx *addr_ctx;
int i;

acm_log(2, "\n");

for (i = 0; (i < MAX_EP_ADDR) &&
for (i = 0; (i < ep->nmbr_ep_addrs) &&
(ep->addr_info[i].type != ACM_ADDRESS_INVALID); i++)
;

if (i == MAX_EP_ADDR) {
acm_log(0, "ERROR - no more space for local address\n");
return -1;
if (i == ep->nmbr_ep_addrs) {
struct acmp_addr *new_info;

new_info = realloc(ep->addr_info, (i + 1) * sizeof(*ep->addr_info));
if (!new_info) {
acm_log(0, "ERROR - no more space for local address\n");
return -1;
}
ep->addr_info = new_info;
/* Added memory is not initialized */
memset(ep->addr_info + i, 0, sizeof(*ep->addr_info));
++ep->nmbr_ep_addrs;
}
ep->addr_info[i].type = addr->type;
memcpy(&ep->addr_info[i].info, &addr->info, sizeof(addr->info));
ep->addr_info[i].addr = (struct acm_address *) addr;
memcpy(&ep->addr_info[i].addr, addr, sizeof(*addr));
ep->addr_info[i].ep = ep;

addr_ctx = malloc(sizeof(*addr_ctx));
if (!addr_ctx) {
acm_log(0, "ERROR - unable to alloc address context struct\n");
return -1;
}
addr_ctx->ep = ep;
addr_ctx->addr_inx = i;

if (loopback_prot != ACMP_LOOPBACK_PROT_LOCAL) {
*addr_context = &ep->addr_info[i];
*addr_context = addr_ctx;
return 0;
}

dest = acmp_acquire_dest(ep, addr->type, (uint8_t *) addr->info.addr);
dest = acmp_acquire_dest(ep, addr->type, (uint8_t *)addr->info.addr);
if (!dest) {
acm_log(0, "ERROR - unable to create loopback dest %s\n",
addr->id_string);
Expand All @@ -2404,15 +2447,31 @@ static int acmp_add_addr(const struct acm_address *addr, void *ep_context,
dest->route_timeout = (uint64_t) ~0ULL;
dest->state = ACMP_READY;
acmp_put_dest(dest);
*addr_context = &ep->addr_info[i];
*addr_context = addr_ctx;
acm_log(1, "added loopback dest %s\n", dest->name);

return 0;
}

static int acmp_add_addr(const struct acm_address *addr, void *ep_context,
void **addr_context)
{
struct acmp_ep *ep = ep_context;
int ret;

acm_log(2, "\n");

pthread_rwlock_wrlock(&ep->rwlock);
ret = __acmp_add_addr(addr, ep, addr_context);
pthread_rwlock_unlock(&ep->rwlock);

return ret;
}

static void acmp_remove_addr(void *addr_context)
{
struct acmp_addr *address = addr_context;
struct acmp_addr_ctx *addr_ctx = addr_context;
struct acmp_addr *address = addr_ctx->ep->addr_info + addr_ctx->addr_inx;
struct acmp_device *dev;
struct acmp_dest *dest;
struct acmp_ep *ep;
Expand All @@ -2435,7 +2494,7 @@ static void acmp_remove_addr(void *addr_context)
pthread_mutex_lock(&port->lock);
list_for_each(&port->ep_list, ep, entry) {
pthread_mutex_unlock(&port->lock);
dest = acmp_get_dest(ep, address->type, address->addr->info.addr);
dest = acmp_get_dest(ep, address->type, address->addr.info.addr);
if (dest) {
acm_log(2, "Found a dest addr, deleting it\n");
pthread_mutex_lock(&ep->lock);
Expand All @@ -2451,6 +2510,7 @@ static void acmp_remove_addr(void *addr_context)
pthread_mutex_unlock(&acmp_dev_lock);

memset(address, 0, sizeof(*address));
free(addr_ctx);
}

static struct acmp_port *acmp_get_port(struct acm_endpoint *endpoint)
Expand Down Expand Up @@ -2536,6 +2596,14 @@ acmp_alloc_ep(struct acmp_port *port, struct acm_endpoint *endpoint)
pthread_mutex_init(&ep->lock, NULL);
sprintf(ep->id_string, "%s-%d-0x%x", port->dev->verbs->device->name,
port->port_num, endpoint->pkey);

if (pthread_rwlock_init(&ep->rwlock, NULL)) {
free(ep);
return NULL;
}
ep->addr_info = NULL;
ep->nmbr_ep_addrs = 0;

for (i = 0; i < ACM_MAX_COUNTER; i++)
atomic_init(&ep->counters[i]);

Expand Down

0 comments on commit 7bb125c

Please sign in to comment.