Created
December 12, 2013 09:17
-
-
Save nikreiman/7925222 to your computer and use it in GitHub Desktop.
Converting 16-bit shorts to floating point samples with SSE
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdlib.h> | |
#include <malloc.h> | |
#include <xmmintrin.h> | |
#if 1 | |
#define ITERATIONS 1 | |
#define BUFSIZE 32 | |
#else | |
#define ITERATIONS 1000000 | |
#define BUFSIZE 512 | |
#endif | |
int main() { | |
// Visual studio: __declspec(align(16)) | |
short shortSamples[BUFSIZE] __attribute((aligned(16))); | |
float floatSamples[BUFSIZE + 16] __attribute((aligned(16))); | |
//__attribute((aligned(16))) short* shortSamples; | |
//__attribute((aligned(16))) float *floatSamples; | |
// Visual studio: _aligned_malloc | |
//posix_memalign((void**)&shortSamples, 16, sizeof(short) * BUFSIZE); | |
//posix_memalign((void**)&floatSamples, 16, sizeof(float) * BUFSIZE); | |
// Short -> float | |
#if 0 | |
for(int i = 0; i < BUFSIZE; ++i) { | |
shortSamples[i] = i; | |
} | |
#if 0 | |
for(int i = 0; i < BUFSIZE; ++i) { | |
shortSamples[i] = i; | |
floatSamples[i] = 0.0f; | |
} | |
for(int j = 0; j < ITERATIONS; j++) { | |
for(int i = 0; i < BUFSIZE; i++) { | |
floatSamples[i] = (float)shortSamples[i] / 32767.0f; | |
} | |
} | |
#else | |
for(int i = 0; i < BUFSIZE; ++i) { | |
shortSamples[i] = i; | |
floatSamples[i] = 0.0f; | |
} | |
__m128 factor = { 1.0f/32767, 1.0f/32767, 1.0f/32767, 1.0f/32767 }; | |
__m64 zero1 = { 0,0 }; | |
__m128i zero2 = { 0,0 }; | |
__m128i intValue; | |
__m128 floatValue; | |
for(int j = 0; j < ITERATIONS; j++) { | |
__m64 *ps = (__m64*)shortSamples; | |
__m128 *pd = (__m128*)floatSamples; | |
for (int i = 0; i < BUFSIZE; i+=4) { | |
intValue = _mm_unpacklo_epi16(_mm_set_epi64(zero1, *ps), zero2); | |
intValue = _mm_srai_epi32(_mm_slli_epi32(intValue, 16), 16); | |
floatValue = _mm_cvtepi32_ps(intValue); | |
*pd = _mm_mul_ps(floatValue, factor); | |
pd++; | |
ps++; | |
} | |
} | |
#endif | |
#endif | |
// Float -> short | |
#if 1 | |
for(int i = 0; i < BUFSIZE; i++) { | |
shortSamples[i] = 1.0f / (float)i; | |
} | |
#if 0 | |
for(int j = 0; j < ITERATIONS; j++) { | |
for(int i = 0; i < BUFSIZE; i++) { | |
shortSamples[i] = (short)(floatSamples[i] * 32767.0f); | |
printf("%f, ", floatSamples[i]); | |
} | |
} | |
#else | |
__m128 factor = { 32767.0f, 32767.0f, 32767.0f, 32767.0f }; | |
__m64 zero1 = { 0, 0 }; | |
__m128i zero2 = { 0, 0 }; | |
__m128i intValue; | |
__m128 floatValue; | |
for(int j = 0; j < ITERATIONS; j++) { | |
__m64 *ps = (__m64*)shortSamples; | |
__m128 *pd = (__m128*)floatSamples; | |
for (int i = 0; i < BUFSIZE; i+=4) { | |
floatValue = _mm_mul_ps(*floatSamples, value); | |
intValue = _mm_unpacklo_epi16(_mm_set_epi64(zero1, *ps), zero2); | |
intValue = _mm_srai_epi32(_mm_slli_epi32(intValue, 16), 16); | |
floatValue = _mm_cvtepi32_ps(intValue); | |
} | |
} | |
#endif | |
#endif | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment