Reverse Engineering challenge #80.

Tags: ARM FPU SSE MIPS ARM64 X64 ASM L1 .

What does this code do?

Optimizing GCC 4.8.4 (SSE enabled):


f:
	test	edi, edi
	movapd	xmm1, xmm0
	je	.L7
	js	.L15
	xorpd	xmm0, xmm0
	ucomisd	xmm1, xmm0
	jnp	.L16
.L10:
	movsd	xmm0, QWORD PTR .LC0[rip]
.L6:
	test	dil, 1
	je	.L5
	mulsd	xmm0, xmm1
.L5:
	mov	eax, edi
	mulsd	xmm1, xmm1
	shr	eax, 31
	add	edi, eax
	sar	edi
	jne	.L6
	rep ret
.L16:
	jne	.L10
	rep ret
.L7:
	movsd	xmm0, QWORD PTR .LC0[rip]
	ret
.L15:
	movsd	xmm0, QWORD PTR .LC0[rip]
	neg	edi
	divsd	xmm0, xmm1
	movapd	xmm1, xmm0
	xorpd	xmm0, xmm0
	ucomisd	xmm1, xmm0
	jp	.L10
	jmp	.L16
.LC0:
	.long	0
	.long	1072693248

Optimizing GCC 4.8.4 (FPU code (-mfpmath=387 key)):


f:
	movsd	QWORD PTR [rsp-24], xmm0
	test	edi, edi
	fld	QWORD PTR [rsp-24]
	je	.L8
	js	.L13
	fldz
	fld	st(0)
	fxch	st(2)
	fucomi	st, st(2)
	fstp	st(2)
	jnp	.L14
	fstp	st(0)
	jmp	.L11
.L17:
	fstp	st(0)
.L11:
	fld1
	jmp	.L7
.L16:
	fxch	st(1)
.L7:
	test	dil, 1
	je	.L15
	fmul	st, st(1)
	fstp	QWORD PTR [rsp-16]
	fld	QWORD PTR [rsp-16]
	fxch	st(1)
	jmp	.L6
.L15:
	fxch	st(1)
.L6:
	fmul	st, st(0)
	mov	eax, edi
	shr	eax, 31
	add	edi, eax
	sar	edi
	fstp	QWORD PTR [rsp-16]
	fld	QWORD PTR [rsp-16]
	jne	.L16
	fstp	st(0)
	fstp	QWORD PTR [rsp-24]
.L2:
	movsd	xmm0, QWORD PTR [rsp-24]
	ret
.L14:
	fstp	QWORD PTR [rsp-24]
	jne	.L11
	fstp	st(0)
	jmp	.L2
.L8:
	fstp	st(0)
	fld1
	fstp	QWORD PTR [rsp-24]
	movsd	xmm0, QWORD PTR [rsp-24]
	ret
.L13:
	fld1
	neg	edi
	fdivrp	st(1), st
	fstp	QWORD PTR [rsp-16]
	fld	QWORD PTR [rsp-16]
	fldz
	fld	st(0)
	fxch	st(2)
	fucomi	st, st(2)
	fstp	st(2)
	jp	.L17
	jmp	.L14

Optimizing GCC 4.9.3 for ARM64:


f:
        cmp     w0, wzr
        fmov    d1, d0
        beq     .L6
        blt     .L13
        fcmp    d1, #0.0
        fmov    d0, xzr
        beq     .L2
.L14:
        fmov    d0, 1.0e+0
.L5:
        add     w1, w0, w0, lsr 31
        tbz     x0, 0, .L4
        fmul    d0, d0, d1
.L4:
        asr     w0, w1, 1
        fmul    d1, d1, d1
        cbnz    w0, .L5
.L2:
        ret
.L6:
        fmov    d0, 1.0e+0
        ret
.L13:
        fmov    d0, 1.0e+0
        neg     w0, w0
        fdiv    d1, d0, d1
        fmov    d0, xzr
        fcmp    d1, #0.0
        bne     .L14
        b       .L2

(ARM) Optimizing Keil 5.05 (ARM mode):


f PROC
        PUSH     {r4-r8,lr}
        LDR      r8,|L0.188|
        SUBS     r4,r2,#0
        MOV      r6,r1
        MOV      r5,r0
        MOV      r7,#0
        MOV      r1,r8
        MOVEQ    r0,#0
        POPEQ    {r4-r8,pc}
        BGE      |L0.68|
        RSB      r4,r2,#0
        MOV      r2,r0
        MOV      r3,r6
        MOV      r0,#0
        BL       __aeabi_ddiv
        MOV      r5,r0
        MOV      r6,r1
|L0.68|
        MOV      r2,#0
        MOV      r3,r2
        MOV      r1,r6
        BL       __aeabi_cdcmpeq
        MOVEQ    r0,#0
        MOVEQ    r1,r0
        POPEQ    {r4-r8,pc}
|L0.96|
        TST      r4,#1
        BEQ      |L0.132|
        MOV      r2,r7
        MOV      r3,r8
        MOV      r0,r5
        MOV      r1,r6
        BL       __aeabi_dmul
        MOV      r7,r0
        MOV      r8,r1
|L0.132|
        ADD      r0,r4,r4,LSR #31
        ASR      r4,r0,#1
        MOV      r2,r5
        MOV      r3,r6
        MOV      r0,r2
        MOV      r1,r3
        BL       __aeabi_dmul
        CMP      r4,#0
        MOV      r5,r0
        MOV      r6,r1
        MOVEQ    r0,r7
        MOVEQ    r1,r8
        BNE      |L0.96|
        POP      {r4-r8,pc}
        ENDP

|L0.188|
        DCD      0x3ff00000
        DCD      0x00000000

(ARM) Optimizing Keil 5.05 (Thumb mode):


f PROC
        PUSH     {r3-r7,lr}
        MOVS     r5,r0
        LDR      r0,|L0.116|
        MOVS     r6,r1
        MOVS     r4,r2
        MOVS     r7,#0
        MOVS     r1,r0
        CMP      r2,#0
        STR      r0,[sp,#0]
        BEQ      |L0.60|
        CMP      r4,#0
        BGE      |L0.40|
        RSBS     r4,r2,#0
        MOVS     r2,r5
        MOVS     r3,r6
        MOVS     r0,#0
        BL       __aeabi_ddiv
        MOVS     r5,r0
        MOVS     r6,r1
|L0.40|
        MOVS     r2,#0
        MOVS     r3,r2
        MOVS     r0,r5
        MOVS     r1,r6
        BL       __aeabi_cdcmpeq
        BNE      |L0.64|
        MOVS     r0,#0
        MOVS     r1,r0
        POP      {r3-r7,pc}
|L0.60|
        MOVS     r0,#0
        POP      {r3-r7,pc}
|L0.64|
        LSLS     r0,r4,#31
        BEQ      |L0.84|
        LDR      r3,[sp,#0]
        MOVS     r2,r7
        MOVS     r0,r5
        MOVS     r1,r6
        BL       __aeabi_dmul
        MOVS     r7,r0
        STR      r1,[sp,#0]
|L0.84|
        LSRS     r0,r4,#31
        ADDS     r0,r0,r4
        ASRS     r4,r0,#1
        MOVS     r2,r5
        MOVS     r3,r6
        MOVS     r0,r2
        MOVS     r1,r3
        BL       __aeabi_dmul
        MOVS     r5,r0
        MOVS     r6,r1
        CMP      r4,#0
        BNE      |L0.64|
        LDR      r1,[sp,#0]
        MOVS     r0,r7
        POP      {r3-r7,pc}
        ENDP

|L0.116|
        DCD      0x3ff00000
        DCD      0x00000000

Optimizing GCC 4.4.5 for MIPS:


f:
        beq     $6,$0,$L2
        lui     $2,%hi($LC0)
        bltz    $6,$L13
        nop
        mtc1    $0,$f0
        nop
        mtc1    $0,$f1
        nop
        c.eq.d  $f12,$f0
        nop
        bc1t    $L15
        nop
$L14:
        lui     $2,%hi($LC0)
        lwc1    $f0,%lo($LC0+4)($2)
        nop
        lwc1    $f1,%lo($LC0)($2)
$L8:
        srl     $3,$6,31
        addu    $3,$3,$6
        andi    $2,$6,0x1
        beq     $2,$0,$L7
        sra     $6,$3,1
        mul.d   $f0,$f0,$f12
$L7:
        beq     $6,$0,$L15
        nop
        b       $L8
        mul.d   $f12,$f12,$f12
$L2:
        lwc1    $f0,%lo($LC0+4)($2)
        nop
        lwc1    $f1,%lo($LC0)($2)
$L15:
        j       $31
        nop
$L13:
        lwc1    $f0,%lo($LC0+4)($2)
        nop
        lwc1    $f1,%lo($LC0)($2)
        nop
        div.d   $f12,$f0,$f12
        mtc1    $0,$f0
        nop
        mtc1    $0,$f1
        nop
        c.eq.d  $f12,$f0
        nop
        bc1f    $L14
        subu    $6,$0,$6
        b       $L15
        nop
$LC0:
        .word   1072693248
        .word   0

More challenges: challenges.re; about solutions: challenges.re/#Solutions.