Experiencing possible race condition with pthread_cond_timedwait. #9840

g2gps · 2023-07-17T00:11:14Z

I'm using a combination conditional variables and mutex's for thread synchronization on a shared resource. I seem to be experiencing a race condition, in which a signal on a condition variable is missed, which results in the application logic becoming locked, with every thread is waiting on my interlocking mutex's.

I've attempted to extract the problematic logic out of our existing application, into a minimal reproducible example, which still encounters the issue:

#include <assert.h>
#include <err.h>
#include <fcntl.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <unistd.h>

#include <debug.h>

// clang-format off
struct trigger_cmd {
  pthread_mutex_t mutex;
  pthread_cond_t cmd_cond;
  pthread_cond_t result_cond;
  enum Type {
    trg_NONE,
    trg_START,
    trg_HALT,
  } trigger;
} core = {
    PTHREAD_MUTEX_INITIALIZER, 
    PTHREAD_COND_INITIALIZER,
    PTHREAD_COND_INITIALIZER,
    trg_NONE
};
// clang-format on

pthread_mutex_t serialise = PTHREAD_MUTEX_INITIALIZER;

size_t get_random(size_t min, size_t max) {
  FILE *f = fopen("/dev/urandom", "rb");
  if (f == NULL) {
    printf("Can't get random number\n");
    exit(EXIT_FAILURE);
  }

  size_t buffer;
  int count = fread(&buffer, sizeof(size_t), 1, f);
  fclose(f);
  if (!count) {
    printf("Can't get random number\n");
    exit(EXIT_FAILURE);
  }
  return min + (buffer % max);
}

void busyloop() {
  size_t us = get_random(0, 1000000);
  sinfo("Sleep for %u\n", us);
  usleep(us);
  sinfo("Wake\n");
}

void trigger_start() {
  sinfo("Lock serialise: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_lock(&serialise);
  sinfo("Lock core: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_lock(&core.mutex);

  core.trigger = trg_START;

  sinfo("Signal on cmd: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_cond_signal(&core.cmd_cond);
  sinfo("Wait on result: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_cond_wait(&core.result_cond, &core.mutex);

  sinfo("Unlock core: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_unlock(&core.mutex);
  sinfo("Unlock serialise: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_unlock(&serialise);
}

void trigger_halt() {

  sinfo("Lock serialise: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_lock(&serialise);
  sinfo("Lock core: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_lock(&core.mutex);

  core.trigger = trg_HALT;

  sinfo("Signal on cmd: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_cond_signal(&core.cmd_cond);
  sinfo("Wait on result: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_cond_wait(&core.result_cond, &core.mutex);

  sinfo("Unlock core: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_unlock(&core.mutex);
  sinfo("Unlock serialise: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_unlock(&serialise);
}

static void process_trigger(void) {
  switch (core.trigger) {
  case trg_START:
    busyloop();
    break;
  default:
  case trg_HALT:
    break;
  case trg_NONE:
    return;
  }
  sinfo("Signal on result: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_cond_signal(&core.result_cond);
}

void add_time(struct timespec *expire, size_t milliseconds) {
  assert(milliseconds < 1000);
  const long int billion = 1e9;
  const long int million = 1e6;
  clock_gettime(CLOCK_REALTIME, expire);
  expire->tv_nsec += milliseconds * million;
  if (expire->tv_nsec > billion) {
    expire->tv_sec += 1;
    expire->tv_nsec -= billion;
  }
}

int worker(void) {
  struct timespec expire;
  size_t count = 0;

  sinfo("Lock core: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_lock(&core.mutex);
  sinfo("Process inital trigger %d, ser %d\n", core.mutex.pid, serialise.pid);
  process_trigger();

  while (true) {
    // Reset state for all code paths.
    core.trigger = trg_NONE;
    add_time(&expire, 97);

    sinfo("Wait on cmd: core %d, ser %d\n", core.mutex.pid, serialise.pid);
#if 1
    int rc = pthread_cond_timedwait(&core.cmd_cond, &core.mutex, &expire);
#else
    int rc = pthread_cond_wait(&core.cmd_cond, &core.mutex);
#endif
    sinfo("timed wait rd %d, core %d, ser %d\n", rc, core.mutex.pid,
          serialise.pid);
    switch (rc) {
    case 0:
      // Something to do;
      process_trigger();
      count = 0;
      break;

    default:
      count++;
      break;
    }
    if (count > 20) {
      PANIC();
    }
    sinfo("Finished work: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  }

  pthread_cond_destroy(&core.cmd_cond);
  pthread_cond_destroy(&core.result_cond);
  pthread_mutex_unlock(&core.mutex);
  pthread_mutex_destroy(&core.mutex);
  return 0;
}

void *trigger_generator(void *arg) {
  (void)arg;

  // clang-format off
  struct trigger_map {
    const char* name;
    void (*handler)();
  }tm[] = {
    {"start", trigger_start },
    {"halt",  trigger_halt  },
  };
  // clang-format on
  const size_t trigger_count = sizeof(tm) / sizeof(struct trigger_map);

  while (1) {
    size_t index = get_random(0, trigger_count);
    sinfo("trigger %s, core %d, ser %d\n", tm[index].name, core.mutex.pid,
          serialise.pid);
    tm[index].handler();
    usleep(500000);
  }
};

int main(int argc, char *argv[]) {
  (void)argc;
  (void)argv;

#if 1
  {
    pthread_mutexattr_t attr;
    pthread_mutexattr_init(&attr);
    pthread_mutexattr_settype(&attr, PTHREAD_PRIO_NONE);
    pthread_mutex_init(&serialise, &attr);
    pthread_mutex_init(&core.mutex, &attr);
    sem_setprotocol(&core.cmd_cond.sem, SEM_PRIO_NONE);
    sem_setprotocol(&core.result_cond.sem, SEM_PRIO_NONE);
  }
#endif

  // Just assume the default priority is 100 and create 3 threads around this median to produce triggers.

  for (int i = 0; i < 3; i++) {
    int priority = 99 + i;
    struct sched_param param = {priority};
    pthread_t id;
    pthread_attr_t attr;
    pthread_attr_init(&attr);
    pthread_attr_setschedparam(&attr, &param);
    if ((pthread_create(&id, &attr, &trigger_generator, NULL)) == 0) {
      pthread_detach(id);
    } else {
      printf("pthread create failed..\n");
      exit(EXIT_FAILURE);
    }
    pthread_attr_destroy(&attr);
  }
  return worker();
}

Here's what I believe to be the signification parts of my defconfig:

CONFIG_ALARM_ARCH=y
CONFIG_ARCH="risc-v"
CONFIG_ARCH_ADDRENV=y
CONFIG_ARCH_BOARD_CUSTOM=y
CONFIG_ARCH_BOARD_CUSTOM_DIR=""
CONFIG_ARCH_CHIP="litex"
CONFIG_ARCH_RISCV=y
CONFIG_ARCH_USE_MMU=y
CONFIG_ARCH_USE_S_MODE=y
CONFIG_BOARD_LOOPSPERMSEC=5259
CONFIG_BUILD_KERNEL=y
CONFIG_DEV_URANDOM=y
CONFIG_ONESHOT=y
CONFIG_PRIORITY_INHERITANCE=y
CONFIG_PTHREAD_MUTEX_TYPES=y
CONFIG_SCHED_TICKLESS=y
CONFIG_USEC_PER_TICK=1

Here's a sample of the syslog for this program, around the point where the signal seems to be missed.

[ 6] [  INFO] pthread_cond_clockwait: cond=0x10401038 mutex=0x1040101c abstime=0x1080bf78
[ 6] [  INFO] pthread_cond_clockwait: Give up mutex...
[13] [  INFO] trigger_generator: trigger start, core -1, ser -1
[13] [  INFO] trigger_start: Lock serialise: core -1, ser -1
[13] [  INFO] pthread_mutex_timedlock: mutex=0x10401000
[13] [  INFO] pthread_mutex_timedlock: Returning 0
[13] [  INFO] trigger_start: Lock core: core -1, ser 13
[13] [  INFO] pthread_mutex_timedlock: mutex=0x1040101c
[13] [  INFO] pthread_mutex_timedlock: Returning 0
[13] [  INFO] trigger_start: Signal on cmd: core 13, ser 13
[13] [  INFO] pthread_cond_signal: cond=0x10401038
[13] [  INFO] pthread_cond_signal: sval=-1
[13] [  INFO] pthread_cond_signal: Signalling...
[13] [  INFO] pthread_cond_signal: Returning 0
[13] [  INFO] trigger_start: Wait on result: core 13, ser 13
[13] [  INFO] pthread_cond_wait: cond=0x1040104c mutex=0x1040101c
[13] [  INFO] pthread_cond_wait: Give up mutex / take cond
[ 6] [  INFO] pthread_cond_clockwait: Re-locking...
[ 6] [  INFO] pthread_cond_clockwait: Returning 110
[ 6] [  INFO] worker: timed wait rd 110, core 6, ser 13
[ 6] [  INFO] worker: Finished work: core 6, ser 13
[ 6] [  INFO] worker: Wait on cmd: core 6, ser 13

From this output:

Worker (PID 6) waits on the command condition variable and releases the mutex.
A producer (PID 13) takes the serialise and core mutexes
The producer signals on the command condition variable.
The producer waits on the result condition variable and releases the core mutex.
The worker re-locks the core mutex and returns 110.

At this point, I would have thought pthread_cond_timedwait would return 0, since the signal did occur. However it seems that the signal is delivered either before or after the semaphore (in the command condition variable) is waited on.

There's a few things I've experimented to try and narrow down the issue:

Using the non-standard additions, as outlined in the wiki -> No effect on the outcome.
Changing from pthread_cond_timedwait to pthread_cond_wait. -> Fixes the issue I'm seeing. However, our application requires the usage of timedwait here.
Switching from tickless to ticked scheduler. -> No effect on the outcome.
Running without CONFIG_DEBUG_FEATURES to remove syslog output. -> Same issue, however it happens less often.
Building and running using qemu-rv32 port. -> Haven't been able to reproduce the issue. Although I haven't run it for extended durations.
Removing the non-standard components of my example and building and running with gcc on my Linux host machine. -> Cant reproduce the issue here.
Change the priorities of all threads producing triggers to be below that of the worker thread. -> No difference.

There's a few comments I've found while looking through the source, which makes me think that an assumption is made that the clock_id time source is stopped if interrupts are disabled, which may not be the case on all platforms.

From sched/semaphore/sem_clockwait.c:

int nxsem_clockwait(FAR sem_t *sem, clockid_t clockid,
                    FAR const struct timespec *abstime)
{
  FAR struct tcb_s *rtcb = this_task();
  irqstate_t flags;
  sclock_t ticks;
  int status;
  int ret = ERROR;

  DEBUGASSERT(sem != NULL && abstime != NULL);
  DEBUGASSERT(up_interrupt_context() == false);

  /* We will disable interrupts until we have completed the semaphore
   * wait.  We need to do this (as opposed to just disabling pre-emption)
   * because there could be interrupt handlers that are asynchronously
   * posting semaphores and to prevent race conditions with watchdog
   * timeout.  This is not too bad because interrupts will be re-
   * enabled while we are blocked waiting for the semaphore.
   */

  flags = enter_critical_section();

  /* Try to take the semaphore without waiting. */

  ret = nxsem_trywait(sem);
  if (ret == OK)
    {
      /* We got it! */

      goto out;
    }

From sched/pthread/pthread_condclockwait.c:

      sinfo("Give up mutex...\n");

      /* We must disable pre-emption and interrupts here so that
       * the time stays valid until the wait begins.   This adds
       * complexity because we assure that interrupts and
       * pre-emption are re-enabled correctly.
       */

      sched_lock();
      flags = enter_critical_section();

If that is the case, I could see how the absolute time given to pthread_cond_timedwait may have expired before nxsem_wait is called in nxsem_clockwait. However, I would have through that the scheduler is locked between when the mutex is released and the wait is started, so this shouldn't have any effect.

I've spent a bit of time trying to understand the interactions, but I don't believe I have a full understanding of the implementation in Nuttx to make any feasible progress. At this point I'm not sure if it's an issue with the Litex port, the kernel configuration, or an oversight in our application.

I'd appreciate any assistance tracking down the issue I'm seeing.

The text was updated successfully, but these errors were encountered:

acassis · 2023-07-25T21:36:04Z

@g2gps did you try it on some ARCH more stable like STM32 or ESP32?

pkarashchenko · 2023-07-25T23:45:29Z

Also it would be good if you can try if disabling priority inheritance gives any change to reproduction rate.

g2gps · 2023-08-16T01:47:30Z

Thanks for the feedback. Enabling or disabling priority inheritance doesn't have a noticeable effect on the time until failure.

I did try building and running on an esp32-devkitc and the following configuration.

# CONFIG_ARCH_LEDS is not set
# CONFIG_NSH_ARGCAT is not set
# CONFIG_NSH_CMDOPT_HEXDUMP is not set
CONFIG_ARCH="xtensa"
CONFIG_ARCH_BOARD="esp32-devkitc"
CONFIG_ARCH_BOARD_COMMON=y
CONFIG_ARCH_BOARD_ESP32_DEVKITC=y
CONFIG_ARCH_CHIP="esp32"
CONFIG_ARCH_CHIP_ESP32=y
CONFIG_ARCH_CHIP_ESP32WROVER=y
CONFIG_ARCH_STACKDUMP=y
CONFIG_ARCH_XTENSA=y
CONFIG_BOARD_LOOPSPERMSEC=16717
CONFIG_BUILTIN=y
CONFIG_DEBUG_ASSERTIONS=y
CONFIG_DEBUG_ASSERTIONS_EXPRESSION=y
CONFIG_DEBUG_ERROR=y
CONFIG_DEBUG_FEATURES=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_SCHED=y
CONFIG_DEBUG_SCHED_ERROR=y
CONFIG_DEBUG_SCHED_INFO=y
CONFIG_DEBUG_SCHED_WARN=y
CONFIG_DEBUG_WARN=y
CONFIG_DEV_URANDOM=y
CONFIG_ESP32_UART0=y
CONFIG_FS_PROCFS=y
CONFIG_IDLETHREAD_STACKSIZE=3072
CONFIG_INIT_ENTRYPOINT="nsh_main"
CONFIG_INTELHEX_BINARY=y
CONFIG_LIBM=y
CONFIG_MM_REGIONS=3
CONFIG_NSH_ARCHINIT=y
CONFIG_NSH_BUILTIN_APPS=y
CONFIG_NSH_FILEIOSIZE=512
CONFIG_NSH_LINELEN=64
CONFIG_NSH_READLINE=y
CONFIG_PREALLOC_TIMERS=4
CONFIG_RAM_SIZE=114688
CONFIG_RAM_START=0x20000000
CONFIG_RR_INTERVAL=200
CONFIG_SCHED_WAITPID=y
CONFIG_START_DAY=6
CONFIG_START_MONTH=12
CONFIG_START_YEAR=2011
CONFIG_SYSLOG_BUFFER=y
CONFIG_SYSLOG_PROCESSID=y
CONFIG_SYSLOG_PROCESS_NAME=y
CONFIG_SYSTEM_NSH=y
CONFIG_UART0_SERIAL_CONSOLE=y

Same result. After a period of time, the signal is missed.

g2gps · 2024-05-15T00:45:38Z

This is an issue with our application logic and not NuttX.

What's happening is the cmd_cond conditional variable is being signaled after the abs time in pthread_cond_timedwait has expired, and before the mutex is relocked this function returns. From the documentation on opengroup, this is the expected behavior of this interface.

g2gps mentioned this issue Nov 15, 2023

Unexpected behaviour with poll and tickless scheduler #11189

Open

g2gps closed this as completed May 15, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Experiencing possible race condition with pthread_cond_timedwait. #9840

Experiencing possible race condition with pthread_cond_timedwait. #9840

g2gps commented Jul 17, 2023

acassis commented Jul 25, 2023

pkarashchenko commented Jul 25, 2023

g2gps commented Aug 16, 2023

g2gps commented May 15, 2024

Experiencing possible race condition with pthread_cond_timedwait. #9840

Experiencing possible race condition with pthread_cond_timedwait. #9840

Comments

g2gps commented Jul 17, 2023

acassis commented Jul 25, 2023

pkarashchenko commented Jul 25, 2023

g2gps commented Aug 16, 2023

g2gps commented May 15, 2024