Re: Bytes of long double - trouble . . .

Dennis Clarke via Gcc-help <gcc-help@xxxxxxxxxxx> · Wed, 15 Feb 2023 07:50:38 -0500

On 2/8/23 03:48, Sergey Smith wrote:

To:     gcc-help@xxxxxxxxxxx
Dear GCC,  [ 8:2:23  ]
                   Like Woooew ! What is THIS!?  I installed Visual Studio Code, & your 32 bit C, Version 9.2.0. I ran :
    printf("\nOn THIS particular computer, long double is given %d bytes\n", sizeof(long double)); /* The Answer was: 12 bytes.
    THEN, I updated to C Version 12.2.0 and ran the same code, - ON THE SAME COMPUTER, - but NOW the answer is:  16 bytes !

     HOW can this happen if, as I understand it, - this function is supposed to assess a computer’s  *hardware* ?  I am on Windows 10 btw.
                                                                                        — Sergey.

There is no x86 hardware ever that can do true 128-bit long double
floating point. The best you can hope for is the strange 10-byte
format that Intel made up as a way to extend precision a little bit.
Works pretty well for things like fused multiply add and such.

However there are various ways you can emulate the IEEE-754 floating
point stuff on x86 hardware and perhaps you really want the libquadmath
here. Regardless you *may* see the data type take a full 16 bytes and
yes that means six bytes are trash. They mean nothing. Unless you do
the software emulation goodness. Try :

/*
 * fp128_q.c  mess around with the libquadmath to see IEEE-754 2008
 *            floating point stuff sort of work in an emulated way
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
 *
 * https://www.gnu.org/licenses/gpl-3.0.txt
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <quadmath.h>
#include <float.h>
#include <fenv.h>

#define BUFFERSIZE 128

int main(int argc, char *argv[]){

    __float128 fp0, fp1, fp2, pi;
    const size_t buffer_size = BUFFERSIZE;
    char *buffer = calloc(buffer_size,sizeof(unsigned char));
    int num_chars;

#ifdef FLT_EVAL_METHOD
    printf ( "INFO : FLT_EVAL_METHOD == %d\n", FLT_EVAL_METHOD);
#endif

#ifdef DECIMAL_DIG
    printf ( "INFO : DECIMAL_DIG == %d\n", DECIMAL_DIG);
#endif

/* LDBL_DIG, FLT_DECIMAL_DIG, DBL_DECIMAL_DIG, LDBL_DECIMAL_DIG */
#ifdef FLT_DECIMAL_DIG
    printf ( "INFO : FLT_DECIMAL_DIG == %d\n", FLT_DECIMAL_DIG);
#endif

#ifdef DBL_DECIMAL_DIG
    printf ( "INFO : DBL_DECIMAL_DIG == %d\n", DBL_DECIMAL_DIG);
#endif

#ifdef LDBL_DECIMAL_DIG
    printf ( "INFO : LDBL_DECIMAL_DIG == %d\n", LDBL_DECIMAL_DIG);
#endif

#ifdef LDBL_DIG
    printf ( "INFO : LDBL_DIG == %d\n", LDBL_DIG);
#endif

    /* NOTE : floating point can NOT precisely represent the
     *        test values being used here. Such is life in
     *        the real world of floating point. Good luck.
     */
    fp0 = 36.584Q;

    printf ( "the sizeof(fp0) is %i\n", sizeof(fp0) );

    num_chars = quadmath_snprintf(buffer,
                                  buffer_size, "%40.36Qg", fp0);

    if ( num_chars > 0 ) {

        printf ("INFO : quadmath_snprintf formatted %i chars.\n",
                 num_chars);

    } else {

        fprintf(stderr,"FAIL : quadmath_snprintf failed.\n");
        return EXIT_FAILURE;

    }

    printf ("the value of fp0 is %s\n", buffer);

    fp1 =  7.812;

    num_chars = quadmath_snprintf(buffer,
                                  buffer_size, "%40.36Qg", fp1);

    if ( num_chars > 0 ) {

        printf ("INFO : quadmath_snprintf formatted %i chars.\n",
                 num_chars);

    } else {

        fprintf(stderr,"FAIL : wtf quadmath_snprintf failed.\n");
        return EXIT_FAILURE;

    }

    printf ("the value of fp1 is %s\n", buffer);

    fp2 = fp0 + fp1;

    num_chars = quadmath_snprintf(buffer,
                                  buffer_size, "%40.36Qg", fp2);

    if ( num_chars > 0 ) {

        printf("INFO : quadmath_snprintf formatted %i chars.\n",
                num_chars);

    } else {

        fprintf(stderr,"FAIL : wat? quadmath_snprintf failed.\n");
        return EXIT_FAILURE;

    }

    printf("fp2 = fp0 + fp1 = %s\n", buffer);

    /* more than reasonable value for pi which is a few more
     * decimal digits past the stuff in math.h */
    pi = 3.1415926535897932384626433832795028841971693993751Q;

    num_chars = quadmath_snprintf(buffer,
                                  buffer_size, "%46.40Qe", pi );

    if ( num_chars > 0 ) {
        printf ("INFO : quadmath_snprintf formatted %i chars.\n",
                 num_chars);
    } else {
        fprintf(stderr,"FAIL : wat? quadmath_snprintf failed.\n");
        return EXIT_FAILURE;
    }

    printf("libquadmath says pi = %s\n", buffer);
    printf("the real thing is  ~= ");
    printf("3.1415926535897932384626433832795028841971693993...\n");

    free(buffer);
    return EXIT_SUCCESS;  /* or 42 if you prefer */

}

Be sure to link with -lquadmath and you should see :

$ gcc12 -g -O0 -Wl,-rpath=/usr/local/lib/gcc12,-enable-new-dtags
           -o fp128_q fp128_q.c -lquadmath

$ ./fp128_q
INFO : FLT_EVAL_METHOD == 0
INFO : DECIMAL_DIG == 21
INFO : FLT_DECIMAL_DIG == 9
INFO : DBL_DECIMAL_DIG == 17
INFO : LDBL_DECIMAL_DIG == 21
INFO : LDBL_DIG == 18
the sizeof(fp0) is 16
INFO : quadmath_snprintf formatted 40 chars.
the value of fp0 is    36.5840000000000000000000000000000015
INFO : quadmath_snprintf formatted 40 chars.
the value of fp1 is    7.81200000000000027711166694643907249
INFO : quadmath_snprintf formatted 40 chars.
fp2 = fp0 + fp1 =     44.396000000000000277111666946439074
INFO : quadmath_snprintf formatted 46 chars.
libquadmath says pi = 3.1415926535897932384626433832795027974791e+00
the real thing is  ~= 3.1415926535897932384626433832795028841971693993...

Works pretty well.  The only other option is to get an IBM POWER9
server which has real 128-bit goodness in hardware. Also an IBM
System Z type mainframe can do all that also. The RISC-V processor
specification also has the Q-extension for true 128bit floating
point but no one seems to have fabricated that. Yet. :)

--
Dennis Clarke
RISC-V/SPARC/PPC/ARM/CISC
UNIX and Linux spoken
GreyBeard and suspenders optional