The conclusion is:
- NEON intrinsics support converting a float data type to int32 regarding a Q value. The reverse is also supported.
- Neon intrinsics already do the clip operation to float value outside of [-1, 1]:!!
- The issue cost 1 Cycles per 32x2 on cortex-a9.
Source code:
#include "arm_neon.h" void test_float_int32_converter() { const int q = 31; float f[] = {-1.0, -0.5, 0, 0.5}; int32_t i[] = {0, 0, 0, 0}; float32x4_t vf = vld1q_f32(&f[0]); int32x4_t vi = vcvtq_n_s32_f32(vf, q); vst1q_s32(&i[0], vi); printf ( "Q=%d
" , q); printf ( "src float 32x4: %f, %f, %f, %f
" , f[0], f[1], f[2], f[3]); printf ( "dst int 32x4: %8.8x, %8.8x, %8.8x, %8.8x
" , i[0], i[1], i[2], i[3]); vf = vcvtq_n_f32_s32(vi, q); vst1q_f32(&f[0], vf); printf ( "dst float 32x4: %f, %f, %f, %f
" , f[0], f[1], f[2], f[3]); return ; } |
build:
armcc --arm_linux_paths --arm_linux_config_file= /home/jxion/arm_linux_config .xml main.c --cpu=Cortex-A9 -o test .exe |
Result on Pandaboard:
Q=31 src float 32x4: -1.000000, -0.500000, 0.000000, 0.500000 dst int 32x4: 80000000, c0000000, 00000000, 40000000 dst float 32x4: -1.000000, -0.500000, 0.000000, 0.500000 |
Q=31
src float 32x4: -1.200000, -0.500000, 1.000000, 1.500000
dst int 32x4: 80000000, c0000000, 7fffffff, 7fffffff
dst float 32x4: -1.000000, -0.500000, 1.000000, 1.000000