I answered my own question regarding the behavior of setting the FPU in multithreaded applications. (At least using pthread) If you set the state of the FPU in your main thread before creating threads, the child threads inherit the FPU state of the parent. If you change the FPU state in a child thread, it does not affect any other threads. I came across some really bizarre behavior in regards to pthreads and conditional timed waits. It appears as though when a signal is sent to a thread waiting using a conditional timed wait that it changes the state of the FPU to extended precision regardless of what it was before it waited. If the conditional timed wait times out, the state of the FPU does not get modified. Not sure what other linux system calls will cause this behavior. Guess I'm off to some other lists to solve this latest problem. Below multithreaded code displaying above mentioned behavior and the corresponding output. #include <stdio.h> #include <sys/time.h> #include <fpu_control.h> #include <pthread.h> #include <errno.h> #include <unistd.h> pthread_cond_t g_hInitEvent = PTHREAD_COND_INITIALIZER; pthread_mutex_t g_mInitEvent; pthread_cond_t g_hCompleteEvent = PTHREAD_COND_INITIALIZER; pthread_mutex_t g_mCompleteEvent; pthread_cond_t g_hThreadBeginEvent[2]; pthread_mutex_t g_mThreadBeginEvent[2]; long g_lThreadCount = 2; void *ProcessThreadProc(void* data) { int iTmp = 0; double dTmp =-79.937384; long lNum = *((long*)data); pthread_mutex_lock(&g_mInitEvent); pthread_mutex_lock(&g_mThreadBeginEvent[lNum]); pthread_cond_signal(&g_hInitEvent); pthread_mutex_unlock(&g_mInitEvent); iTmp = (int)(dTmp * 1000000.0); printf("thread %ld int cast before waiting: %d \n",lNum,iTmp); if(lNum == 0) { struct timespec ts; struct timeval tp; gettimeofday(&tp, NULL); ts.tv_sec = tp.tv_sec; ts.tv_nsec = tp.tv_usec * 1000; ts.tv_sec += 5; pthread_cond_timedwait( &g_hThreadBeginEvent[lNum], &g_mThreadBeginEvent[lNum],&ts); pthread_mutex_unlock(&g_mThreadBeginEvent[lNum]); } else if (lNum == 1) { pthread_cond_wait( &g_hThreadBeginEvent[lNum], &g_mThreadBeginEvent[lNum]); pthread_mutex_unlock(&g_mThreadBeginEvent[lNum]); } if(lNum == 0) { for(long lCount=0; lCount < 10; lCount++ ) { iTmp = (int)(dTmp * 1000000.0); printf("thread 0 int cast: %d\n",iTmp); sleep(1); } } else if (lNum == 1) { for(long lCount=0; lCount < 10; lCount++ ) { iTmp = (int)(dTmp * 1000000.0); printf("thread 1 int cast: %d\n",iTmp); sleep(1); } } pthread_mutex_lock(&g_mInitEvent); g_lThreadCount--; if(g_lThreadCount == 0) { pthread_cond_signal(&g_hCompleteEvent); pthread_mutex_unlock(&g_mCompleteEvent); } pthread_mutex_unlock(&g_mInitEvent); return NULL; } int main(int argc, char* argv[]) { int iTmp = 0; double dTmp =-79.937384; long lThreads=2; iTmp = (int)(dTmp * 1000000.0); printf("main int cast: %d\n",iTmp); fpu_control_t cw; _FPU_GETCW(cw); cw &= ~_FPU_EXTENDED; cw |= _FPU_DOUBLE; _FPU_SETCW(cw); pthread_cond_init(&g_hInitEvent, NULL); pthread_mutex_init(&g_mInitEvent, NULL); pthread_cond_init(&g_hCompleteEvent, NULL); pthread_mutex_init(&g_mCompleteEvent, NULL); pthread_cond_init(&g_hThreadBeginEvent[0], NULL); pthread_mutex_init(&g_mThreadBeginEvent[0], NULL); pthread_cond_init(&g_hThreadBeginEvent[1], NULL); pthread_mutex_init(&g_mThreadBeginEvent[1], NULL); pthread_mutex_lock(&g_mCompleteEvent); iTmp = (int)(dTmp * 1000000.0); printf("main int cast after timed wait: %d\n",iTmp); for(long lCount=0; lCount < lThreads; lCount++) { pthread_t statThread; // Launch thread pool pthread_mutex_lock(&g_mInitEvent); pthread_create(&statThread, NULL, ProcessThreadProc, &lCount); pthread_cond_wait( &g_hInitEvent, &g_mInitEvent); pthread_mutex_unlock(&g_mInitEvent); } printf("main thread threads initialized\n"); for(long lCount=0; lCount < lThreads; lCount++) { pthread_mutex_lock(&g_mThreadBeginEvent[lCount]); pthread_cond_signal(&g_hThreadBeginEvent[lCount]); pthread_mutex_unlock(&g_mThreadBeginEvent[lCount]); } iTmp = (int)(dTmp * 1000000.0); printf("main int cast: %d\n",iTmp); struct timespec ts; struct timeval tp; gettimeofday(&tp, NULL); ts.tv_sec = tp.tv_sec; ts.tv_nsec = tp.tv_usec * 1000; ts.tv_sec += 30; // ts.tv_sec += 3; pthread_cond_timedwait( &g_hCompleteEvent, &g_mCompleteEvent,&ts); pthread_mutex_unlock(&g_mCompleteEvent); iTmp = (int)(dTmp * 1000000.0); printf("main int cast: %d\n",iTmp); pthread_mutex_destroy(&g_mInitEvent); pthread_cond_destroy(&g_hInitEvent); pthread_mutex_destroy(&g_mCompleteEvent); pthread_cond_destroy(&g_hCompleteEvent); } main int cast: -79937383 main int cast after timed wait: -79937384 thread 0 int cast before waiting: -79937384 main thread threads initialized thread 1 int cast before waiting: -79937384 main int cast: -79937384 thread 1 int cast: -79937384 thread 0 int cast: -79937383 thread 1 int cast: -79937384 thread 0 int cast: -79937383 thread 1 int cast: -79937384 thread 0 int cast: -79937383 thread 1 int cast: -79937384 thread 0 int cast: -79937383 thread 1 int cast: -79937384 thread 0 int cast: -79937383 thread 1 int cast: -79937384 thread 0 int cast: -79937383 thread 1 int cast: -79937384 thread 0 int cast: -79937383 thread 1 int cast: -79937384 thread 0 int cast: -79937383 thread 1 int cast: -79937384 thread 0 int cast: -79937383 thread 1 int cast: -79937384 thread 0 int cast: -79937383 main int cast: -79937383