Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experiencing possible race condition with pthread_cond_timedwait. #9840

Closed
g2gps opened this issue Jul 17, 2023 · 4 comments
Closed

Experiencing possible race condition with pthread_cond_timedwait. #9840

g2gps opened this issue Jul 17, 2023 · 4 comments

Comments

@g2gps
Copy link
Contributor

g2gps commented Jul 17, 2023

I'm using a combination conditional variables and mutex's for thread synchronization on a shared resource. I seem to be experiencing a race condition, in which a signal on a condition variable is missed, which results in the application logic becoming locked, with every thread is waiting on my interlocking mutex's.

I've attempted to extract the problematic logic out of our existing application, into a minimal reproducible example, which still encounters the issue:

#include <assert.h>
#include <err.h>
#include <fcntl.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <unistd.h>

#include <debug.h>

// clang-format off
struct trigger_cmd {
  pthread_mutex_t mutex;
  pthread_cond_t cmd_cond;
  pthread_cond_t result_cond;
  enum Type {
    trg_NONE,
    trg_START,
    trg_HALT,
  } trigger;
} core = {
    PTHREAD_MUTEX_INITIALIZER, 
    PTHREAD_COND_INITIALIZER,
    PTHREAD_COND_INITIALIZER,
    trg_NONE
};
// clang-format on

pthread_mutex_t serialise = PTHREAD_MUTEX_INITIALIZER;

size_t get_random(size_t min, size_t max) {
  FILE *f = fopen("/dev/urandom", "rb");
  if (f == NULL) {
    printf("Can't get random number\n");
    exit(EXIT_FAILURE);
  }

  size_t buffer;
  int count = fread(&buffer, sizeof(size_t), 1, f);
  fclose(f);
  if (!count) {
    printf("Can't get random number\n");
    exit(EXIT_FAILURE);
  }
  return min + (buffer % max);
}

void busyloop() {
  size_t us = get_random(0, 1000000);
  sinfo("Sleep for %u\n", us);
  usleep(us);
  sinfo("Wake\n");
}

void trigger_start() {
  sinfo("Lock serialise: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_lock(&serialise);
  sinfo("Lock core: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_lock(&core.mutex);

  core.trigger = trg_START;

  sinfo("Signal on cmd: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_cond_signal(&core.cmd_cond);
  sinfo("Wait on result: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_cond_wait(&core.result_cond, &core.mutex);

  sinfo("Unlock core: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_unlock(&core.mutex);
  sinfo("Unlock serialise: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_unlock(&serialise);
}

void trigger_halt() {

  sinfo("Lock serialise: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_lock(&serialise);
  sinfo("Lock core: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_lock(&core.mutex);

  core.trigger = trg_HALT;

  sinfo("Signal on cmd: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_cond_signal(&core.cmd_cond);
  sinfo("Wait on result: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_cond_wait(&core.result_cond, &core.mutex);

  sinfo("Unlock core: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_unlock(&core.mutex);
  sinfo("Unlock serialise: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_unlock(&serialise);
}

static void process_trigger(void) {
  switch (core.trigger) {
  case trg_START:
    busyloop();
    break;
  default:
  case trg_HALT:
    break;
  case trg_NONE:
    return;
  }
  sinfo("Signal on result: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_cond_signal(&core.result_cond);
}

void add_time(struct timespec *expire, size_t milliseconds) {
  assert(milliseconds < 1000);
  const long int billion = 1e9;
  const long int million = 1e6;
  clock_gettime(CLOCK_REALTIME, expire);
  expire->tv_nsec += milliseconds * million;
  if (expire->tv_nsec > billion) {
    expire->tv_sec += 1;
    expire->tv_nsec -= billion;
  }
}

int worker(void) {
  struct timespec expire;
  size_t count = 0;

  sinfo("Lock core: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  pthread_mutex_lock(&core.mutex);
  sinfo("Process inital trigger %d, ser %d\n", core.mutex.pid, serialise.pid);
  process_trigger();

  while (true) {
    // Reset state for all code paths.
    core.trigger = trg_NONE;
    add_time(&expire, 97);

    sinfo("Wait on cmd: core %d, ser %d\n", core.mutex.pid, serialise.pid);
#if 1
    int rc = pthread_cond_timedwait(&core.cmd_cond, &core.mutex, &expire);
#else
    int rc = pthread_cond_wait(&core.cmd_cond, &core.mutex);
#endif
    sinfo("timed wait rd %d, core %d, ser %d\n", rc, core.mutex.pid,
          serialise.pid);
    switch (rc) {
    case 0:
      // Something to do;
      process_trigger();
      count = 0;
      break;

    default:
      count++;
      break;
    }
    if (count > 20) {
      PANIC();
    }
    sinfo("Finished work: core %d, ser %d\n", core.mutex.pid, serialise.pid);
  }

  pthread_cond_destroy(&core.cmd_cond);
  pthread_cond_destroy(&core.result_cond);
  pthread_mutex_unlock(&core.mutex);
  pthread_mutex_destroy(&core.mutex);
  return 0;
}

void *trigger_generator(void *arg) {
  (void)arg;

  // clang-format off
  struct trigger_map {
    const char* name;
    void (*handler)();
  }tm[] = {
    {"start", trigger_start },
    {"halt",  trigger_halt  },
  };
  // clang-format on
  const size_t trigger_count = sizeof(tm) / sizeof(struct trigger_map);

  while (1) {
    size_t index = get_random(0, trigger_count);
    sinfo("trigger %s, core %d, ser %d\n", tm[index].name, core.mutex.pid,
          serialise.pid);
    tm[index].handler();
    usleep(500000);
  }
};

int main(int argc, char *argv[]) {
  (void)argc;
  (void)argv;

#if 1
  {
    pthread_mutexattr_t attr;
    pthread_mutexattr_init(&attr);
    pthread_mutexattr_settype(&attr, PTHREAD_PRIO_NONE);
    pthread_mutex_init(&serialise, &attr);
    pthread_mutex_init(&core.mutex, &attr);
    sem_setprotocol(&core.cmd_cond.sem, SEM_PRIO_NONE);
    sem_setprotocol(&core.result_cond.sem, SEM_PRIO_NONE);
  }
#endif

  // Just assume the default priority is 100 and create 3 threads around this median to produce triggers.

  for (int i = 0; i < 3; i++) {
    int priority = 99 + i;
    struct sched_param param = {priority};
    pthread_t id;
    pthread_attr_t attr;
    pthread_attr_init(&attr);
    pthread_attr_setschedparam(&attr, &param);
    if ((pthread_create(&id, &attr, &trigger_generator, NULL)) == 0) {
      pthread_detach(id);
    } else {
      printf("pthread create failed..\n");
      exit(EXIT_FAILURE);
    }
    pthread_attr_destroy(&attr);
  }
  return worker();
}

Here's what I believe to be the signification parts of my defconfig:

CONFIG_ALARM_ARCH=y
CONFIG_ARCH="risc-v"
CONFIG_ARCH_ADDRENV=y
CONFIG_ARCH_BOARD_CUSTOM=y
CONFIG_ARCH_BOARD_CUSTOM_DIR=""
CONFIG_ARCH_CHIP="litex"
CONFIG_ARCH_RISCV=y
CONFIG_ARCH_USE_MMU=y
CONFIG_ARCH_USE_S_MODE=y
CONFIG_BOARD_LOOPSPERMSEC=5259
CONFIG_BUILD_KERNEL=y
CONFIG_DEV_URANDOM=y
CONFIG_ONESHOT=y
CONFIG_PRIORITY_INHERITANCE=y
CONFIG_PTHREAD_MUTEX_TYPES=y
CONFIG_SCHED_TICKLESS=y
CONFIG_USEC_PER_TICK=1

Here's a sample of the syslog for this program, around the point where the signal seems to be missed.

[ 6] [  INFO] pthread_cond_clockwait: cond=0x10401038 mutex=0x1040101c abstime=0x1080bf78
[ 6] [  INFO] pthread_cond_clockwait: Give up mutex...
[13] [  INFO] trigger_generator: trigger start, core -1, ser -1
[13] [  INFO] trigger_start: Lock serialise: core -1, ser -1
[13] [  INFO] pthread_mutex_timedlock: mutex=0x10401000
[13] [  INFO] pthread_mutex_timedlock: Returning 0
[13] [  INFO] trigger_start: Lock core: core -1, ser 13
[13] [  INFO] pthread_mutex_timedlock: mutex=0x1040101c
[13] [  INFO] pthread_mutex_timedlock: Returning 0
[13] [  INFO] trigger_start: Signal on cmd: core 13, ser 13
[13] [  INFO] pthread_cond_signal: cond=0x10401038
[13] [  INFO] pthread_cond_signal: sval=-1
[13] [  INFO] pthread_cond_signal: Signalling...
[13] [  INFO] pthread_cond_signal: Returning 0
[13] [  INFO] trigger_start: Wait on result: core 13, ser 13
[13] [  INFO] pthread_cond_wait: cond=0x1040104c mutex=0x1040101c
[13] [  INFO] pthread_cond_wait: Give up mutex / take cond
[ 6] [  INFO] pthread_cond_clockwait: Re-locking...
[ 6] [  INFO] pthread_cond_clockwait: Returning 110
[ 6] [  INFO] worker: timed wait rd 110, core 6, ser 13
[ 6] [  INFO] worker: Finished work: core 6, ser 13
[ 6] [  INFO] worker: Wait on cmd: core 6, ser 13

From this output:

  1. Worker (PID 6) waits on the command condition variable and releases the mutex.
  2. A producer (PID 13) takes the serialise and core mutexes
  3. The producer signals on the command condition variable.
  4. The producer waits on the result condition variable and releases the core mutex.
  5. The worker re-locks the core mutex and returns 110.

At this point, I would have thought pthread_cond_timedwait would return 0, since the signal did occur. However it seems that the signal is delivered either before or after the semaphore (in the command condition variable) is waited on.

There's a few things I've experimented to try and narrow down the issue:

  1. Using the non-standard additions, as outlined in the wiki -> No effect on the outcome.
  2. Changing from pthread_cond_timedwait to pthread_cond_wait. -> Fixes the issue I'm seeing. However, our application requires the usage of timedwait here.
  3. Switching from tickless to ticked scheduler. -> No effect on the outcome.
  4. Running without CONFIG_DEBUG_FEATURES to remove syslog output. -> Same issue, however it happens less often.
  5. Building and running using qemu-rv32 port. -> Haven't been able to reproduce the issue. Although I haven't run it for extended durations.
  6. Removing the non-standard components of my example and building and running with gcc on my Linux host machine. -> Cant reproduce the issue here.
  7. Change the priorities of all threads producing triggers to be below that of the worker thread. -> No difference.

There's a few comments I've found while looking through the source, which makes me think that an assumption is made that the clock_id time source is stopped if interrupts are disabled, which may not be the case on all platforms.

From sched/semaphore/sem_clockwait.c:

int nxsem_clockwait(FAR sem_t *sem, clockid_t clockid,
                    FAR const struct timespec *abstime)
{
  FAR struct tcb_s *rtcb = this_task();
  irqstate_t flags;
  sclock_t ticks;
  int status;
  int ret = ERROR;

  DEBUGASSERT(sem != NULL && abstime != NULL);
  DEBUGASSERT(up_interrupt_context() == false);

  /* We will disable interrupts until we have completed the semaphore
   * wait.  We need to do this (as opposed to just disabling pre-emption)
   * because there could be interrupt handlers that are asynchronously
   * posting semaphores and to prevent race conditions with watchdog
   * timeout.  This is not too bad because interrupts will be re-
   * enabled while we are blocked waiting for the semaphore.
   */

  flags = enter_critical_section();

  /* Try to take the semaphore without waiting. */

  ret = nxsem_trywait(sem);
  if (ret == OK)
    {
      /* We got it! */

      goto out;
    }

From sched/pthread/pthread_condclockwait.c:

      sinfo("Give up mutex...\n");

      /* We must disable pre-emption and interrupts here so that
       * the time stays valid until the wait begins.   This adds
       * complexity because we assure that interrupts and
       * pre-emption are re-enabled correctly.
       */

      sched_lock();
      flags = enter_critical_section();

If that is the case, I could see how the absolute time given to pthread_cond_timedwait may have expired before nxsem_wait is called in nxsem_clockwait. However, I would have through that the scheduler is locked between when the mutex is released and the wait is started, so this shouldn't have any effect.

I've spent a bit of time trying to understand the interactions, but I don't believe I have a full understanding of the implementation in Nuttx to make any feasible progress. At this point I'm not sure if it's an issue with the Litex port, the kernel configuration, or an oversight in our application.

I'd appreciate any assistance tracking down the issue I'm seeing.

@acassis
Copy link
Contributor

acassis commented Jul 25, 2023

@g2gps did you try it on some ARCH more stable like STM32 or ESP32?

@pkarashchenko
Copy link
Contributor

Also it would be good if you can try if disabling priority inheritance gives any change to reproduction rate.

@g2gps
Copy link
Contributor Author

g2gps commented Aug 16, 2023

Thanks for the feedback. Enabling or disabling priority inheritance doesn't have a noticeable effect on the time until failure.

I did try building and running on an esp32-devkitc and the following configuration.

# CONFIG_ARCH_LEDS is not set
# CONFIG_NSH_ARGCAT is not set
# CONFIG_NSH_CMDOPT_HEXDUMP is not set
CONFIG_ARCH="xtensa"
CONFIG_ARCH_BOARD="esp32-devkitc"
CONFIG_ARCH_BOARD_COMMON=y
CONFIG_ARCH_BOARD_ESP32_DEVKITC=y
CONFIG_ARCH_CHIP="esp32"
CONFIG_ARCH_CHIP_ESP32=y
CONFIG_ARCH_CHIP_ESP32WROVER=y
CONFIG_ARCH_STACKDUMP=y
CONFIG_ARCH_XTENSA=y
CONFIG_BOARD_LOOPSPERMSEC=16717
CONFIG_BUILTIN=y
CONFIG_DEBUG_ASSERTIONS=y
CONFIG_DEBUG_ASSERTIONS_EXPRESSION=y
CONFIG_DEBUG_ERROR=y
CONFIG_DEBUG_FEATURES=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_SCHED=y
CONFIG_DEBUG_SCHED_ERROR=y
CONFIG_DEBUG_SCHED_INFO=y
CONFIG_DEBUG_SCHED_WARN=y
CONFIG_DEBUG_WARN=y
CONFIG_DEV_URANDOM=y
CONFIG_ESP32_UART0=y
CONFIG_FS_PROCFS=y
CONFIG_IDLETHREAD_STACKSIZE=3072
CONFIG_INIT_ENTRYPOINT="nsh_main"
CONFIG_INTELHEX_BINARY=y
CONFIG_LIBM=y
CONFIG_MM_REGIONS=3
CONFIG_NSH_ARCHINIT=y
CONFIG_NSH_BUILTIN_APPS=y
CONFIG_NSH_FILEIOSIZE=512
CONFIG_NSH_LINELEN=64
CONFIG_NSH_READLINE=y
CONFIG_PREALLOC_TIMERS=4
CONFIG_RAM_SIZE=114688
CONFIG_RAM_START=0x20000000
CONFIG_RR_INTERVAL=200
CONFIG_SCHED_WAITPID=y
CONFIG_START_DAY=6
CONFIG_START_MONTH=12
CONFIG_START_YEAR=2011
CONFIG_SYSLOG_BUFFER=y
CONFIG_SYSLOG_PROCESSID=y
CONFIG_SYSLOG_PROCESS_NAME=y
CONFIG_SYSTEM_NSH=y
CONFIG_UART0_SERIAL_CONSOLE=y

Same result. After a period of time, the signal is missed.

@g2gps
Copy link
Contributor Author

g2gps commented May 15, 2024

This is an issue with our application logic and not NuttX.

What's happening is the cmd_cond conditional variable is being signaled after the abs time in pthread_cond_timedwait has expired, and before the mutex is relocked this function returns. From the documentation on opengroup, this is the expected behavior of this interface.

@g2gps g2gps closed this as completed May 15, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants