Hello, I am working on some code targeting vr4300 using mips-elf-gcc (on msys2), in which gcc was configured with: --build=x86_64-w64-mingw32 --host=x86_64-w64-mingw32 --prefix="./" --target=mips-elf --enable-languages=c,c++ --without-headers --with-newlib --with-gnu-as=./bin/mips-elf-as.exe --with-gnu-ld=./bin/mips-elf-ld.exe --enable-checking=release --enable-shared --enable-shared-libgcc --disable-decimal-float --disable-gold --disable-libatomic --disable-libgomp --disable-libitm --disable-libquadmath --disable-libquadmath-support --disable-libsanitizer --disable-libssp --disable-libunwind-exceptions --disable-libvtv --disable-multilib --disable-nls --disable-rpath --disable-symvers --disable-threads --disable-win32-registry --enable-lto --enable-plugin --enable-static --without-included-gettext While everything is working, I found that gcc always places single precision floating point constants into data sections instead of using coprocessor moves; this may not always be optimal. Please consider the following example: // ---------------------------------------------------------------- // cctest.c // ---------------------------------------------------------------- extern struct { float x; float y; float z; } var; void *test() { float t; t = 5.0; var.x = var.x + t; var.y = 10.0; var.z = 60.0; return (void*)&var; } // ---------------------------------------------------------------- To the best of my knowledge, I would expect compiler produces something like: ; ---------------------------------------------------------------- lui $2, %hi(var) lui $1, 0x40A0 ; 5.0 addiu $2,$2,%lo(var) mtc1 $1, $f2 lwc1 $f0, 0x0($2) lui $3, 0x4120 ; 10.0 lui $4, 0x4270 ; 60.0 sw $3, 0x4($2) add.s $f0, $f0, $f2 sw $4, 0x8($2) jr $31 swc1 $f0, 0x0($2) ; ---------------------------------------------------------------- However, gcc produces: ; ---------------------------------------------------------------- ; cctest.s ; ---------------------------------------------------------------- ; .text lui $3,%hi(var) lui $2,%hi($LC0) lwc1 $f0,%lo(var)($3) lwc1 $f2,%lo($LC0)($2) lui $5,%hi($LC1) add.s $f0,$f0,$f2 addiu $2,$3,%lo(var) lui $4,%hi($LC2) swc1 $f0,%lo(var)($3) lwc1 $f0,%lo($LC1)($5) swc1 $f0,4($2) lwc1 $f0,%lo($LC2)($4) jr $31 swc1 $f0,8($2) ; .rodata .align 2 $LC0: .word 1084227584 .align 2 $LC1: .word 1092616192 .align 2 $LC2: .word 1114636288 ; ---------------------------------------------------------------- with the following flags given: -G0 -fomit-frame-pointer -fno-PIC -fno-stack-protector -fno-common -fno-zero-initialized-in-bss -mips3 -march=vr4300 -mtune=vr4300 -mabi=32 -mlong32 -mno-shared -mgp32 -mhard-float -mno-check-zero-division -mno-abicalls -mno-memcpy -mbranch-likely -O3 According to VR4300/VR4305/VR4310 manual (p222, p230), both lwc1 and mtc1 takes 1 cycle to complete, and interlocks for 1 cycle when load-use occurs; thus, using mtc1 should save some space without loss of performance in this case. In fact, there exists some ancient compilers that use mtc1 to load float point constants. Therefore, I am wondering why gcc always prefers to place such constants in data sections instead of transferring them from general purpose registers - is this intended or not? And, if this is not intended, how could I change this? Many thanks.