Slide 15
Slide 15 text
ASM化
clangに渡せばOK(x64では最適なコードが生成される)
% clang-15 -O2 -S -masm=intel a.ll
mcl_fp_add4: mcl_fp_add3:
push rbx mov r8, qword ptr [rdx]
mov r8, qword ptr [rdx] add r8, qword ptr [rsi]
add r8, qword ptr [rsi] mov r9, qword ptr [rdx + 8]
mov r9, qword ptr [rdx + 8] adc r9, qword ptr [rsi + 8]
adc r9, qword ptr [rsi + 8] mov r10, qword ptr [rdx + 16]
mov r10, qword ptr [rdx + 16] adc r10, qword ptr [rsi + 16]
adc r10, qword ptr [rsi + 16] mov rsi, r8
mov r11, qword ptr [rdx + 24] sub rsi, qword ptr [rcx]
adc r11, qword ptr [rsi + 24] mov rax, r9
mov rsi, r8 sbb rax, qword ptr [rcx + 8]
sub rsi, qword ptr [rcx] mov rdx, r10
mov rax, r9 sbb rdx, qword ptr [rcx + 16]
sbb rax, qword ptr [rcx + 8] mov rcx, rdx
mov rdx, r10 sar rcx, 63
sbb rdx, qword ptr [rcx + 16] cmovs rdx, r10
mov rbx, r11 cmovs rax, r9
sbb rbx, qword ptr [rcx + 24] cmovs rsi, r8
cmovs rbx, r11 mov qword ptr [rdi], rsi
cmovs rdx, r10 mov qword ptr [rdi + 8], rax
cmovs rax, r9 mov qword ptr [rdi + 16], rdx
cmovs rsi, r8 ret
mov qword ptr [rdi], rsi
mov qword ptr [rdi + 8], rax
mov qword ptr [rdi + 16], rdx
mov qword ptr [rdi + 24], rbx
pop rbx
ret
15 / 45