好多指令不认识...慢慢研究
float _SSE_cos( float x)
{
float temp;
__asm
{
movss xmm0, x
movss xmm1, _ps_am_inv_sign_mask
andps xmm0, xmm1
addss xmm0, _ps_am_pi_o_2
mulss xmm0, _ps_am_2_o_pi
cvttss2si ecx, xmm0
movss xmm5, _ps_am_1
mov edx, ecx
shl edx, (31 - 1)
cvtsi2ss xmm1, ecx
and edx, 0x80000000
and ecx, 0x1
subss xmm0, xmm1
movss xmm6, _sincos_masks[ecx * 4]
minss xmm0, xmm5
movss xmm1, _ps_sincos_p3
subss xmm5, xmm0
andps xmm5, xmm6
movss xmm7, _ps_sincos_p2
andnps xmm6, xmm0
mov temp, edx
orps xmm5, xmm6
movss xmm0, xmm5
mulss xmm5, xmm5
movss xmm4, _ps_sincos_p1
movss xmm2, xmm5
mulss xmm5, xmm1
movss xmm1, _ps_sincos_p0
addss xmm5, xmm7
mulss xmm5, xmm2
movss xmm3, temp
addss xmm5, xmm4
mulss xmm5, xmm2
orps xmm0, xmm3
addss xmm5, xmm1
mulss xmm0, xmm5
movss x, xmm0
}
return x;
}
float _SSE2_cos(float x)
{
__asm
{
movss xmm0, x
movss xmm1, _ps_am_inv_sign_mask
movss xmm2, _ps_am_pi_o_2
movss xmm3, _ps_am_2_o_pi
andps xmm0, xmm1
addss xmm0, xmm2
mulss xmm0, xmm3
pxor xmm3, xmm3
movd xmm5, _epi32_1
movss xmm4, _ps_am_1
cvttps2dq xmm2, xmm0
pand xmm5, xmm2
movd xmm1, _epi32_2
pcmpeqd xmm5, xmm3
cvtdq2ps xmm6, xmm2
pand xmm2, xmm1
pslld xmm2, (31 - 1)
subss xmm0, xmm6
movss xmm3, _ps_sincos_p3
minss xmm0, xmm4
subss xmm4, xmm0
andps xmm0, xmm5
andnps xmm5, xmm4
orps xmm0, xmm5
movaps xmm1, xmm0
movss xmm4, _ps_sincos_p2
mulss xmm0, xmm0
movss xmm5, _ps_sincos_p1
orps xmm1, xmm2
movaps xmm7, xmm0
mulss xmm0, xmm3
movss xmm6, _ps_sincos_p0
addss xmm0, xmm4
mulss xmm0, xmm7
addss xmm0, xmm5
mulss xmm0, xmm7
addss xmm0, xmm6
mulss xmm0, xmm1
movss x, xmm0
}
return x;
}
float _SSE_Sqrt(float x)
{
float root = 0.f;
_asm
{
sqrtss xmm0, x
movss root, xmm0
}
return root;
}