Skip to content
Advertisement

why does pthread_cond_timedwait not trigger after indicated time-limit?

This is supposed to work in a loop (server) and delegate work/inquiry to a faulty library, here represented by the longrun() function call, to a thread with a time-out of tmax=3s. I placed synchronization vars and i am trying to wait for no more than this limit, but when longrun() hangs (run 4), it still waits the full time (7s) instead of the requested limit. Can anyone explain?

#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <pthread.h>
#include <sys/time.h>

#include <iostream>
using namespace std;


string int2str(int i){
    char buf[10]; // no larger int passed we hope
    int end = sprintf(buf, "%d", i);
    buf[end] = '';
    return string(buf);
}
string longrun(int qi){
    if(qi % 4 == 0) {
        sleep(7);
        return string("'---- to: ") + int2str(qi) + string("' (hang case)");
    }
    else {
        sleep(1);
        return string("'okay to: ") + int2str(qi) + string("'");
    }
}


struct tpack_t {            // thread pack
    pthread_t thread;
    pthread_mutex_t mutex;
    pthread_cond_t go;      // have a new value to run
    pthread_cond_t ready;   // tell main thread we're done processing
    int newq;               // predicate on go+ready condition for wait
    int qi;                 // place question as int to thread: question-int
    string res;             // where i place the response
    tpack_t();
};
tpack_t::tpack_t() {
    pthread_mutex_init (&mutex, NULL);
    pthread_cond_init (&go, NULL);
    pthread_cond_init (&ready, NULL);
    newq = 0;
}
void set_cond_time(timespec* ctp, int tmax){
    timeval now;
    gettimeofday(&now, NULL);
    ctp->tv_nsec = now.tv_usec * 1000UL;
    ctp->tv_sec = now.tv_sec + tmax; // now + max time!
    printf("[m] ... set to sleep for %d sec, i hope...n", tmax);
}

void take_faulty_evasive_action(tpack_t* tpx){
    // basically kill thread, clean faulty library copy (that file) and restart it
    cout << "will work on it (restarting thread) soon!n";
    tpx->newq = 0; // minimal action for now...
}

void* faulty_proc(void* arg){
    tpack_t* tpx = (tpack_t*) arg;
    while(true){
        pthread_mutex_lock(&tpx->mutex);
        while(tpx->newq == 0){
            pthread_cond_wait(&tpx->go, &tpx->mutex);
        }
        printf("[t] to process : %dn", tpx->qi); fflush(stdout);
        // now i have a new value in qi, process it and place the answer in... res
        tpx->res = longrun(tpx->qi);
        tpx->newq = 0;
        pthread_mutex_unlock(&tpx->mutex);
        pthread_cond_signal(&tpx->ready);
    }
}


int main(int argc, char* argv[]){

    cout << "n    this presents the problem: idx = 4k -> hang case ...n    ( challenge is to eliminate them by killing thread and restarting it )nn";
    printf("    ETIMEDOUT = %d   EINVAL = %d  EPERM = %dnn", ETIMEDOUT, EINVAL, EPERM);

    tpack_t* tpx = new tpack_t();
    pthread_create(&tpx->thread, NULL, &faulty_proc, (void*) tpx);

    // max wait time; more than that is a hanging indication!
    int numproc = 5;
    ++numproc;
    int tmax = 3;
    timespec cond_time;
    cond_time.tv_nsec = 0;
    int status, expired; // for timed wait on done condition!


    time_t t0 = time(NULL);
    for(int i=1; i<numproc; ++i){
        expired = 0;

        pthread_mutex_lock(&tpx->mutex);
        tpx->qi = i; // init the question
        tpx->newq = 1; // ... predicate
        //pthread_mutex_unlock(&tpx->mutex);
        pthread_cond_signal(&tpx->go); // let it know that...

        while(tpx->newq == 1){

            ///  ---------------------- most amazing region, timedwait waits all the way! ----------------------
            set_cond_time(&cond_time, tmax); // time must be FROM NOW! (abs time, not interval)
            time_t wt0 = time(NULL);
            status = pthread_cond_timedwait(&tpx->ready, &tpx->mutex, &cond_time);
            printf("[m] ---- t exited with status = %d  (after %.2fs)n", status, difftime(time(NULL), wt0));
            ///  -----------------------------------------------------------------------------------------------

            if (status == ETIMEDOUT){
                printf("t ['t was and newq == %d]n", tpx->newq);
                if(tpx->newq == 1){ // check one more time, to elim race possibility
                    expired = 1;
                    break;
                }
            }
            else if(status != 0){
                fprintf(stderr, "cond timewait for faulty to reply errored outn");
                return 1;
            }
        }

        if(expired){
            take_faulty_evasive_action(tpx); // kill thread, start new one, report failure below
            cout << "[m]  :: interruption: default bad answer goes here for " << i  << "nn";
        }
        else {
            cout << "[m]  ::  end with ans: " << tpx->res << endl << endl;
        }
        pthread_mutex_unlock(&tpx->mutex);
    }
    time_t t1 = time(NULL);
    printf("took %.2f sec to runn", difftime(t1, t0));
}

Used ‘g++ -pthread code.cc’ to compile under linux (ubuntu 16.04). Output is:

    this presents the problem: idx = 4k -> hang case ...
    ( challenge is to eliminate them by killing thread and restarting it )

    ETIMEDOUT = 110   EINVAL = 22  EPERM = 1

[m] ... set to sleep for 3 sec, i hope...
[t] to process : 1
[m] ----     exited with status = 0  (after 1.00s)
[m]  ::  end with ans: 'okay to: 1'

[m] ... set to sleep for 3 sec, i hope...
[t] to process : 2
[m] ----     exited with status = 0  (after 1.00s)
[m]  ::  end with ans: 'okay to: 2'

[m] ... set to sleep for 3 sec, i hope...
[t] to process : 3
[m] ----     exited with status = 0  (after 1.00s)
[m]  ::  end with ans: 'okay to: 3'

[m] ... set to sleep for 3 sec, i hope...
[t] to process : 4
[m] ----     exited with status = 110  (after 7.00s)
     ['t was and newq == 0]
[m]  ::  end with ans: '---- to: 4' (hang case)

[m] ... set to sleep for 3 sec, i hope...
[t] to process : 5
[m] ----     exited with status = 0  (after 1.00s)
[m]  ::  end with ans: 'okay to: 5'

took 11.00 sec to run

Advertisement

Answer

The problem is that faulty_proc() keeps tpx->mutex locked while it calls longrun(), and the pthread_cond_timedwait() call in main() can’t return until it can re-acquire the mutex, even if the timeout expires.

If longrun() doesn’t need the mutex to be locked – and that seems to be the case – you can unlock the mutex around that call and re-lock it before setting the completion flag and signalling the condition variable.

User contributions licensed under: CC BY-SA
6 People found this is helpful
Advertisement