Index: include/reactos/libs/libmpg123/abi_align.h =================================================================== --- include/reactos/libs/libmpg123/abi_align.h (revision 0) +++ include/reactos/libs/libmpg123/abi_align.h (working copy) @@ -0,0 +1,39 @@ +/* + mpg123lib_intern: Common non-public stuff for libmpg123 + + copyright 1995-2008 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + + derived from the old mpg123.h +*/ + +#ifndef MPG123_H_ABI_ALIGN +#define MPG123_H_ABI_ALIGN + +#include "config.h" + +/* ABI conformance for other compilers. + mpg123 needs 16byte-aligned stack for SSE and friends. + gcc provides that, but others don't necessarily. */ +#ifdef ABI_ALIGN_FUN +#ifndef attribute_align_arg +#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1) +# define attribute_align_arg __attribute__((force_align_arg_pointer)) +/* The gcc that can align the stack does not need the check... nor does it work with gcc 4.3+, anyway. */ +#else + +# define attribute_align_arg +/* Other compilers get code to catch misaligned stack. + Well, except Sun Studio, which accepts the aligned attribute but does not honor it. */ +#if !defined(__SUNPRO_C) +# define NEED_ALIGNCHECK +#endif + +#endif +#endif +#else +#define attribute_align_arg +/* We won't try the align check... */ +#endif + +#endif Index: include/reactos/libs/libmpg123/compat.h =================================================================== --- include/reactos/libs/libmpg123/compat.h (revision 62563) +++ include/reactos/libs/libmpg123/compat.h (working copy) @@ -15,6 +15,7 @@ #define MPG123_COMPAT_H #include "config.h" +#include "intsym.h" #ifdef HAVE_STDLIB_H /* realloc, size_t */ @@ -75,6 +76,9 @@ #include #endif +/* compat_open makes little sense without */ +#include + /* To parse big numbers... */ #ifdef HAVE_ATOLL #define atobigint atoll @@ -82,7 +86,7 @@ #define atobigint atol #endif -// typedef unsigned char byte; +typedef unsigned char byte; /* A safe realloc also for very old systems where realloc(NULL, size) returns NULL. */ void *safe_realloc(void *ptr, size_t size); @@ -128,7 +132,7 @@ * @param[in] mbptr Pointer to multibyte string. * @return file descriptor (>=0) or error code. */ -int compat_open(const char *filename, int mode); +int compat_open(const char *filename, int flags); /** * Closing a file handle can be platform specific. @@ -152,7 +156,7 @@ * * WideCharToMultiByte - http://msdn.microsoft.com/en-us/library/dd374130(VS.85).aspx */ -int win32_wide_utf8 (const wchar_t * const wptr, const char **const mbptr, size_t * const buflen); +int win32_wide_utf8(const wchar_t * const wptr, char **mbptr, size_t * buflen); /** * win32_mbc2uni @@ -166,7 +170,7 @@ * MultiByteToWideChar - http://msdn.microsoft.com/en-us/library/dd319072(VS.85).aspx */ -int win32_utf8_wide (const char *const mbptr, const wchar_t ** const wptr, size_t * const buflen); +int win32_utf8_wide(const char *const mbptr, wchar_t **wptr, size_t *buflen); #endif /* That one comes from Tellie on OS/2, needed in resolver. */ @@ -174,4 +178,6 @@ typedef int socklen_t; #endif +#include "true.h" + #endif Index: include/reactos/libs/libmpg123/debug.h =================================================================== --- include/reactos/libs/libmpg123/debug.h (revision 62563) +++ include/reactos/libs/libmpg123/debug.h (working copy) @@ -97,23 +97,23 @@ #endif /* error macros also here... */ -#ifndef NO_ERROR -#define error(s) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__) -#define error1(s, a) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a) -#define error2(s, a, b) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b) -#define error3(s, a, b, c) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c) -#define error4(s, a, b, c, d) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d) -#define error5(s, a, b, c, d, e) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e) -#define error6(s, a, b, c, d, e, f) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f) -#define error7(s, a, b, c, d, e, f, g) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g) -#define error8(s, a, b, c, d, e, f, g, h) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h) -#define error9(s, a, b, c, d, e, f, g, h, i) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i) -#define error10(s, a, b, c, d, e, f, g, h, i, j) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j) -#define error11(s, a, b, c, d, e, f, g, h, i, j, k) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k) -#define error12(s, a, b, c, d, e, f, g, h, i, j, k, l) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l) -#define error13(s, a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m) -#define error14(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n) -#define error15(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) +#ifndef NO_ERRORMSG +#define error(s) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__) +#define error1(s, a) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a) +#define error2(s, a, b) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b) +#define error3(s, a, b, c) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c) +#define error4(s, a, b, c, d) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d) +#define error5(s, a, b, c, d, e) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e) +#define error6(s, a, b, c, d, e, f) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f) +#define error7(s, a, b, c, d, e, f, g) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g) +#define error8(s, a, b, c, d, e, f, g, h) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h) +#define error9(s, a, b, c, d, e, f, g, h, i) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i) +#define error10(s, a, b, c, d, e, f, g, h, i, j) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j) +#define error11(s, a, b, c, d, e, f, g, h, i, j, k) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k) +#define error12(s, a, b, c, d, e, f, g, h, i, j, k, l) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l) +#define error13(s, a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m) +#define error14(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n) +#define error15(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) #else #define error(s) #define error1(s, a) @@ -135,22 +135,22 @@ /* ereturn macros also here... */ #ifndef NO_ERETURN -#define ereturn(rv, s) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__); return rv; }while(0) -#define ereturn1(rv, s, a) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a); return rv; }while(0) -#define ereturn2(rv, s, a, b) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b); return rv; }while(0) -#define ereturn3(rv, s, a, b, c) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c); return rv; }while(0) -#define ereturn4(rv, s, a, b, c, d) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d); return rv; }while(0) -#define ereturn5(rv, s, a, b, c, d, e) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e); return rv; }while(0) -#define ereturn6(rv, s, a, b, c, d, e, f) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f); return rv; }while(0) -#define ereturn7(rv, s, a, b, c, d, e, f, g) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g); return rv; }while(0) -#define ereturn8(rv, s, a, b, c, d, e, f, g, h) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h); return rv; }while(0) -#define ereturn9(rv, s, a, b, c, d, e, f, g, h, i) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i); return rv; }while(0) -#define ereturn10(rv, s, a, b, c, d, e, f, g, h, i, j) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j); return rv; }while(0) -#define ereturn11(rv, s, a, b, c, d, e, f, g, h, i, j, k) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k); return rv; }while(0) -#define ereturn12(rv, s, a, b, c, d, e, f, g, h, i, j, k, l) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l); return rv; }while(0) -#define ereturn13(rv, s, a, b, c, d, e, f, g, h, i, j, k, l, m) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m); return rv; }while(0) -#define ereturn14(rv, s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n); return rv; }while(0) -#define ereturn15(rv, s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o); return rv; }while(0) +#define ereturn(rv, s) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__); return rv; }while(0) +#define ereturn1(rv, s, a) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a); return rv; }while(0) +#define ereturn2(rv, s, a, b) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b); return rv; }while(0) +#define ereturn3(rv, s, a, b, c) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c); return rv; }while(0) +#define ereturn4(rv, s, a, b, c, d) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d); return rv; }while(0) +#define ereturn5(rv, s, a, b, c, d, e) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e); return rv; }while(0) +#define ereturn6(rv, s, a, b, c, d, e, f) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f); return rv; }while(0) +#define ereturn7(rv, s, a, b, c, d, e, f, g) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g); return rv; }while(0) +#define ereturn8(rv, s, a, b, c, d, e, f, g, h) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h); return rv; }while(0) +#define ereturn9(rv, s, a, b, c, d, e, f, g, h, i) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i); return rv; }while(0) +#define ereturn10(rv, s, a, b, c, d, e, f, g, h, i, j) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j); return rv; }while(0) +#define ereturn11(rv, s, a, b, c, d, e, f, g, h, i, j, k) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k); return rv; }while(0) +#define ereturn12(rv, s, a, b, c, d, e, f, g, h, i, j, k, l) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l); return rv; }while(0) +#define ereturn13(rv, s, a, b, c, d, e, f, g, h, i, j, k, l, m) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m); return rv; }while(0) +#define ereturn14(rv, s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n); return rv; }while(0) +#define ereturn15(rv, s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o); return rv; }while(0) #else #define ereturn(rv, s) return rv #define ereturn1(rv, s, a) return rv Index: include/reactos/libs/libmpg123/decode.h =================================================================== --- include/reactos/libs/libmpg123/decode.h (revision 62563) +++ include/reactos/libs/libmpg123/decode.h (working copy) @@ -53,7 +53,11 @@ int synth_1to1_stereo_altivec(real*, real*, mpg123_handle*); int synth_1to1_x86_64 (real*, int, mpg123_handle*, int); int synth_1to1_stereo_x86_64(real*, real*, mpg123_handle*); +int synth_1to1_avx (real*, int, mpg123_handle*, int); +int synth_1to1_stereo_avx (real*, real*, mpg123_handle*); int synth_1to1_arm (real*, int, mpg123_handle*, int); +int synth_1to1_neon (real*, int, mpg123_handle*, int); +int synth_1to1_stereo_neon(real*, real*, mpg123_handle*); /* This is different, special usage in layer3.c only. Hence, the name... and now forget about it. Never use it outside that special portion of code inside layer3.c! */ @@ -60,7 +64,7 @@ int absynth_1to1_i486(real*, int, mpg123_handle*, int); /* These mono/stereo converters use one of the above for the grunt work. */ int synth_1to1_mono (real*, mpg123_handle*); -int synth_1to1_mono2stereo(real*, mpg123_handle*); +int synth_1to1_m2s(real*, mpg123_handle*); /* Sample rate decimation comes in less flavours. */ #ifndef NO_DOWNSAMPLE @@ -68,18 +72,18 @@ int synth_2to1_dither (real*, int, mpg123_handle*, int); int synth_2to1_i386 (real*, int, mpg123_handle*, int); int synth_2to1_mono (real*, mpg123_handle*); -int synth_2to1_mono2stereo(real*, mpg123_handle*); +int synth_2to1_m2s(real*, mpg123_handle*); int synth_4to1 (real *,int, mpg123_handle*, int); int synth_4to1_dither (real *,int, mpg123_handle*, int); int synth_4to1_i386 (real*, int, mpg123_handle*, int); int synth_4to1_mono (real*, mpg123_handle*); -int synth_4to1_mono2stereo(real*, mpg123_handle*); +int synth_4to1_m2s(real*, mpg123_handle*); #endif #ifndef NO_NTOM /* NtoM is really just one implementation. */ int synth_ntom (real *,int, mpg123_handle*, int); int synth_ntom_mono (real *, mpg123_handle *); -int synth_ntom_mono2stereo (real *, mpg123_handle *); +int synth_ntom_m2s (real *, mpg123_handle *); #endif #endif @@ -92,25 +96,25 @@ int synth_1to1_8bit_wrap (real*, int, mpg123_handle*, int); int synth_1to1_8bit_mono (real*, mpg123_handle*); #endif -int synth_1to1_8bit_mono2stereo(real*, mpg123_handle*); +int synth_1to1_8bit_m2s(real*, mpg123_handle*); #ifndef NO_16BIT int synth_1to1_8bit_wrap_mono (real*, mpg123_handle*); -int synth_1to1_8bit_wrap_mono2stereo(real*, mpg123_handle*); +int synth_1to1_8bit_wrap_m2s(real*, mpg123_handle*); #endif #ifndef NO_DOWNSAMPLE int synth_2to1_8bit (real*, int, mpg123_handle*, int); int synth_2to1_8bit_i386 (real*, int, mpg123_handle*, int); int synth_2to1_8bit_mono (real*, mpg123_handle*); -int synth_2to1_8bit_mono2stereo(real*, mpg123_handle*); +int synth_2to1_8bit_m2s(real*, mpg123_handle*); int synth_4to1_8bit (real*, int, mpg123_handle*, int); int synth_4to1_8bit_i386 (real*, int, mpg123_handle*, int); int synth_4to1_8bit_mono (real*, mpg123_handle*); -int synth_4to1_8bit_mono2stereo(real*, mpg123_handle*); +int synth_4to1_8bit_m2s(real*, mpg123_handle*); #endif #ifndef NO_NTOM int synth_ntom_8bit (real*, int, mpg123_handle*, int); int synth_ntom_8bit_mono (real*, mpg123_handle*); -int synth_ntom_8bit_mono2stereo(real*, mpg123_handle*); +int synth_ntom_8bit_m2s(real*, mpg123_handle*); #endif #endif @@ -124,24 +128,28 @@ int synth_1to1_real_stereo_sse (real*, real*, mpg123_handle*); int synth_1to1_real_x86_64 (real*, int, mpg123_handle*, int); int synth_1to1_real_stereo_x86_64(real*, real*, mpg123_handle*); +int synth_1to1_real_avx (real*, int, mpg123_handle*, int); +int synth_1to1_real_stereo_avx (real*, real*, mpg123_handle*); int synth_1to1_real_altivec (real*, int, mpg123_handle*, int); int synth_1to1_real_stereo_altivec(real*, real*, mpg123_handle*); +int synth_1to1_real_neon (real*, int, mpg123_handle*, int); +int synth_1to1_real_stereo_neon(real*, real*, mpg123_handle*); int synth_1to1_real_mono (real*, mpg123_handle*); -int synth_1to1_real_mono2stereo(real*, mpg123_handle*); +int synth_1to1_real_m2s(real*, mpg123_handle*); #ifndef NO_DOWNSAMPLE int synth_2to1_real (real*, int, mpg123_handle*, int); int synth_2to1_real_i386 (real*, int, mpg123_handle*, int); int synth_2to1_real_mono (real*, mpg123_handle*); -int synth_2to1_real_mono2stereo(real*, mpg123_handle*); +int synth_2to1_real_m2s(real*, mpg123_handle*); int synth_4to1_real (real*, int, mpg123_handle*, int); int synth_4to1_real_i386 (real*, int, mpg123_handle*, int); int synth_4to1_real_mono (real*, mpg123_handle*); -int synth_4to1_real_mono2stereo(real*, mpg123_handle*); +int synth_4to1_real_m2s(real*, mpg123_handle*); #endif #ifndef NO_NTOM int synth_ntom_real (real*, int, mpg123_handle*, int); int synth_ntom_real_mono (real*, mpg123_handle*); -int synth_ntom_real_mono2stereo(real*, mpg123_handle*); +int synth_ntom_real_m2s(real*, mpg123_handle*); #endif #endif @@ -153,24 +161,28 @@ int synth_1to1_s32_stereo_sse (real*, real*, mpg123_handle*); int synth_1to1_s32_x86_64 (real*, int, mpg123_handle*, int); int synth_1to1_s32_stereo_x86_64(real*, real*, mpg123_handle*); +int synth_1to1_s32_avx (real*, int, mpg123_handle*, int); +int synth_1to1_s32_stereo_avx (real*, real*, mpg123_handle*); int synth_1to1_s32_altivec (real*, int, mpg123_handle*, int); int synth_1to1_s32_stereo_altivec(real*, real*, mpg123_handle*); +int synth_1to1_s32_neon (real*, int, mpg123_handle*, int); +int synth_1to1_s32_stereo_neon(real*, real*, mpg123_handle*); int synth_1to1_s32_mono (real*, mpg123_handle*); -int synth_1to1_s32_mono2stereo(real*, mpg123_handle*); +int synth_1to1_s32_m2s(real*, mpg123_handle*); #ifndef NO_DOWNSAMPLE int synth_2to1_s32 (real*, int, mpg123_handle*, int); int synth_2to1_s32_i386 (real*, int, mpg123_handle*, int); int synth_2to1_s32_mono (real*, mpg123_handle*); -int synth_2to1_s32_mono2stereo(real*, mpg123_handle*); +int synth_2to1_s32_m2s(real*, mpg123_handle*); int synth_4to1_s32 (real*, int, mpg123_handle*, int); int synth_4to1_s32_i386 (real*, int, mpg123_handle*, int); int synth_4to1_s32_mono (real*, mpg123_handle*); -int synth_4to1_s32_mono2stereo(real*, mpg123_handle*); +int synth_4to1_s32_m2s(real*, mpg123_handle*); #endif #ifndef NO_NTOM int synth_ntom_s32 (real*, int, mpg123_handle*, int); int synth_ntom_s32_mono (real*, mpg123_handle*); -int synth_ntom_s32_mono2stereo(real*, mpg123_handle*); +int synth_ntom_s32_m2s(real*, mpg123_handle*); #endif #endif @@ -189,6 +201,9 @@ void dct36 (real *,real *,real *,real *,real *); void dct36_3dnow (real *,real *,real *,real *,real *); void dct36_3dnowext(real *,real *,real *,real *,real *); +void dct36_x86_64 (real *,real *,real *,real *,real *); +void dct36_sse (real *,real *,real *,real *,real *); +void dct36_avx (real *,real *,real *,real *,real *); /* Tools for NtoM resampling synth, defined in ntom.c . */ int synth_ntom_set_step(mpg123_handle *fr); /* prepare ntom decoding */ Index: include/reactos/libs/libmpg123/frame.h =================================================================== --- include/reactos/libs/libmpg123/frame.h (revision 62563) +++ include/reactos/libs/libmpg123/frame.h (working copy) @@ -38,16 +38,19 @@ /* the output buffer, used to be pcm_sample, pcm_point and audiobufsize */ struct outbuffer { - unsigned char *data; + unsigned char *data; /* main data pointer, aligned */ unsigned char *p; /* read pointer */ size_t fill; /* fill from read pointer */ - size_t size; /* that's actually more like a safe size, after we have more than that, flush it */ + size_t size; + unsigned char *rdata; /* unaligned base pointer */ }; struct audioformat { - int encoding; + int encoding; /* Final encoding, after post-processing. */ int encsize; /* Size of one sample in bytes, plain int should be fine here... */ + int dec_enc; /* Encoding of decoder synth. */ + int dec_encsize; /* Size of one decoder sample. */ int channels; long rate; }; @@ -77,10 +80,19 @@ long resync_limit; long index_size; /* Long, because: negative values have a meaning. */ long preframes; +#ifndef NO_FEEDER + long feedpool; + long feedbuffer; +#endif }; +enum frame_state_flags +{ + FRAME_ACCURATE = 0x1 /**< 0001 Positions are considered accurate. */ + ,FRAME_FRANKENSTEIN = 0x2 /**< 0010 This stream is concatenated. */ + ,FRAME_FRESH_DECODER = 0x4 /**< 0100 Decoder is fleshly initialized. */ +}; - /* There is a lot to condense here... many ints can be merged as flags; though the main space is still consumed by buffers. */ struct mpg123_handle_struct { @@ -149,7 +161,7 @@ #ifdef OPT_MULTI #ifndef NO_LAYER3 -#if (defined OPT_3DNOW || defined OPT_3DNOWEXT) +#if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX) void (*the_dct36)(real *,real *,real *,real *,real *); #endif #endif @@ -184,6 +196,7 @@ int down_sample; int header_change; int lay; + long spf; /* cached count of samples per frame */ int (*do_layer)(mpg123_handle *); int error_protection; int bitrate_index; @@ -199,9 +212,10 @@ int freesize; /* free format frame size */ enum mpg123_vbr vbr; /* 1 if variable bitrate was detected */ off_t num; /* frame offset ... */ + off_t input_offset; /* byte offset of this frame in input stream */ off_t playnum; /* playback offset... includes repetitions, reset at seeks */ off_t audio_start; /* The byte offset in the file where audio data begins. */ - char accurate; /* Flag to see if we trust the frame number. */ + int state_flags; char silent_resync; /* Do not complain for the next n resyncs. */ unsigned char* xing_toc; /* The seek TOC from Xing header. */ int freeformat; @@ -237,7 +251,9 @@ unsigned char *bsbuf; unsigned char *bsbufold; int bsnum; + /* That is the header matching the last read frame body. */ unsigned long oldhead; + /* That is the header that is supposedly the first of the stream. */ unsigned long firsthead; int abr_rate; #ifdef FRAME_INDEX @@ -255,6 +271,7 @@ off_t lastframe; /* last frame to decode (for gapless or num_frames limit) */ off_t ignoreframe; /* frames to decode but discard before firstframe */ #ifdef GAPLESS + off_t gapless_frames; /* frame count for the gapless part */ off_t firstoff; /* number of samples to ignore from firstframe */ off_t lastoff; /* number of samples to use from lastframe */ off_t begin_s; /* overall begin offset in samples */ @@ -261,6 +278,7 @@ off_t begin_os; off_t end_s; /* overall end offset in samples */ off_t end_os; + off_t fullend_os; /* gapless_frames translated to output samples */ #endif unsigned int crc; /* Well, I need a safe 16bit type, actually. But wider doesn't hurt. */ struct reader *rd; /* pointer to the reading functions */ @@ -356,13 +374,11 @@ 1152 576 */ -#define spf(fr) ((fr)->lay == 1 ? 384 : ((fr)->lay==2 ? 1152 : ((fr)->lsf || (fr)->mpeg25 ? 576 : 1152))) #ifdef GAPLESS /* well, I take that one for granted... at least layer3 */ #define GAPLESS_DELAY 529 -/* still fine-tuning the "real music" window... see read_frame */ -void frame_gapless_init(mpg123_handle *fr, off_t b, off_t e); +void frame_gapless_init(mpg123_handle *fr, off_t framecount, off_t bskip, off_t eskip); void frame_gapless_realinit(mpg123_handle *fr); void frame_gapless_update(mpg123_handle *mh, off_t total_samples); /*void frame_gapless_position(mpg123_handle* fr); @@ -394,8 +410,4 @@ off_t frame_tell_seek(mpg123_handle *fr); /* Take a copy of the Xing VBR TOC for fuzzy seeking. */ int frame_fill_toc(mpg123_handle *fr, unsigned char* in); - - -/* adjust volume to current outscale and rva values if wanted */ -void do_rva(mpg123_handle *fr); #endif Index: include/reactos/libs/libmpg123/gapless.h =================================================================== --- include/reactos/libs/libmpg123/gapless.h (revision 0) +++ include/reactos/libs/libmpg123/gapless.h (working copy) @@ -0,0 +1,119 @@ +/* + sampleadjust: gapless sample offset math + + copyright 1995-2012 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + + This is no stand-alone header, precisely to be able to fool it into using fake handle types for testing the math. +*/ + +#include "debug.h" + +#ifdef GAPLESS +/* From internal sample number to external. */ +static off_t sample_adjust(mpg123_handle *mh, off_t x) +{ + off_t s; + if(mh->p.flags & MPG123_GAPLESS) + { + /* It's a bit tricky to do this computation for the padding samples. + They are not there on the outside. */ + if(x > mh->end_os) + { + if(x < mh->fullend_os) + s = mh->end_os - mh->begin_os; + else + s = x - (mh->fullend_os - mh->end_os + mh->begin_os); + } + else + s = x - mh->begin_os; + } + else + s = x; + + return s; +} + +/* from external samples to internal */ +static off_t sample_unadjust(mpg123_handle *mh, off_t x) +{ + off_t s; + if(mh->p.flags & MPG123_GAPLESS) + { + s = x + mh->begin_os; + /* There is a hole; we don't create sample positions in there. + Jump from the end of the gapless track directly to after the padding. */ + if(s >= mh->end_os) + s += mh->fullend_os - mh->end_os; + } + else s = x; + + return s; +} + +/* + Take the buffer after a frame decode (strictly: it is the data from frame fr->num!) and cut samples out. + fr->buffer.fill may then be smaller than before... +*/ +static void frame_buffercheck(mpg123_handle *fr) +{ + /* When we have no accurate position, gapless code does not make sense. */ + if(!(fr->state_flags & FRAME_ACCURATE)) return; + + /* Get a grip on dirty streams that start with a gapless header. + Simply accept all data from frames that are too much, + they are supposedly attached to the stream after the fact. */ + if(fr->gapless_frames > 0 && fr->num >= fr->gapless_frames) return; + + /* Important: We first cut samples from the end, then cut from beginning (including left-shift of the buffer). + This order works also for the case where firstframe == lastframe. */ + + /* The last interesting (planned) frame: Only use some leading samples. + Note a difference from the below: The last frame and offset are unchanges by seeks. + The lastoff keeps being valid. */ + if(fr->lastframe > -1 && fr->num >= fr->lastframe) + { + /* There can be more than one frame of padding at the end, so we ignore the whole frame if we are beyond lastframe. */ + off_t byteoff = (fr->num == fr->lastframe) ? samples_to_bytes(fr, fr->lastoff) : 0; + if((off_t)fr->buffer.fill > byteoff) + { + fr->buffer.fill = byteoff; + } + if(VERBOSE3) fprintf(stderr, "\nNote: Cut frame %"OFF_P" buffer on end of stream to %"OFF_P" samples, fill now %"SIZE_P" bytes.\n", (off_p)fr->num, (off_p)(fr->num == fr->lastframe ? fr->lastoff : 0), (size_p)fr->buffer.fill); + } + + /* The first interesting frame: Skip some leading samples. */ + if(fr->firstoff && fr->num == fr->firstframe) + { + off_t byteoff = samples_to_bytes(fr, fr->firstoff); + if((off_t)fr->buffer.fill > byteoff) + { + fr->buffer.fill -= byteoff; + /* buffer.p != buffer.data only for own buffer */ + debug6("cutting %li samples/%li bytes on begin, own_buffer=%i at %p=%p, buf[1]=%i", + (long)fr->firstoff, (long)byteoff, fr->own_buffer, (void*)fr->buffer.p, (void*)fr->buffer.data, ((short*)fr->buffer.p)[2]); + if(fr->own_buffer) fr->buffer.p = fr->buffer.data + byteoff; + else memmove(fr->buffer.data, fr->buffer.data + byteoff, fr->buffer.fill); + debug3("done cutting, buffer at %p =? %p, buf[1]=%i", + (void*)fr->buffer.p, (void*)fr->buffer.data, ((short*)fr->buffer.p)[2]); + } + else fr->buffer.fill = 0; + + if(VERBOSE3) fprintf(stderr, "\nNote: Cut frame %"OFF_P" buffer on beginning of stream by %"OFF_P" samples, fill now %"SIZE_P" bytes.\n", (off_p)fr->num, (off_p)fr->firstoff, (size_p)fr->buffer.fill); + /* We can only reach this frame again by seeking. And on seeking, firstoff will be recomputed. + So it is safe to null it here (and it makes the if() decision abort earlier). */ + fr->firstoff = 0; + } +} + +#define SAMPLE_ADJUST(mh,x) sample_adjust(mh,x) +#define SAMPLE_UNADJUST(mh,x) sample_unadjust(mh,x) +#define FRAME_BUFFERCHECK(mh) frame_buffercheck(mh) + +#else /* no gapless code included */ + +#define SAMPLE_ADJUST(mh,x) (x) +#define SAMPLE_UNADJUST(mh,x) (x) +#define FRAME_BUFFERCHECK(mh) + +#endif Index: include/reactos/libs/libmpg123/getcpuflags.h =================================================================== --- include/reactos/libs/libmpg123/getcpuflags.h (revision 62563) +++ include/reactos/libs/libmpg123/getcpuflags.h (working copy) @@ -12,7 +12,8 @@ /* standard level flags part 1 (ECX)*/ #define FLAG_SSE3 0x00000001 - +#define FLAG_SSSE3 0x00000200 +#define FLAG_AVX 0x1C000000 /* standard level flags part 2 (EDX) */ #define FLAG2_MMX 0x00800000 #define FLAG2_SSE 0x02000000 @@ -22,17 +23,23 @@ #define XFLAG_MMX 0x00800000 #define XFLAG_3DNOW 0x80000000 #define XFLAG_3DNOWEXT 0x40000000 +/* eXtended Control Register 0 */ +#define XCR0FLAG_AVX 0x00000006 + struct cpuflags { +#if defined(OPT_ARM) || defined(OPT_NEON) + unsigned int has_neon; +#else unsigned int id; unsigned int std; unsigned int std2; unsigned int ext; + unsigned int xcr0_lo; +#endif }; -extern struct cpuflags cpu_flags; - unsigned int getcpuflags(struct cpuflags* cf); /* checks the family */ @@ -45,5 +52,9 @@ #define cpu_sse(s) (FLAG2_SSE & s.std2) #define cpu_sse2(s) (FLAG2_SSE2 & s.std2) #define cpu_sse3(s) (FLAG_SSE3 & s.std) +#define cpu_avx(s) ((FLAG_AVX & s.std) == FLAG_AVX && (XCR0FLAG_AVX & s.xcr0_lo) == XCR0FLAG_AVX) +#define cpu_fast_sse(s) ((((s.id & 0xf00)>>8) == 6 && FLAG_SSSE3 & s.std) /* for Intel/VIA; family 6 CPUs with SSSE3 */ || \ + (((s.id & 0xf00)>>8) == 0xf && (((s.id & 0x0ff00000)>>20) > 0 && ((s.id & 0x0ff00000)>>20) != 5))) /* for AMD; family > 0xF CPUs except Bobcat */ +#define cpu_neon(s) (s.has_neon) #endif Index: include/reactos/libs/libmpg123/huffman.h =================================================================== --- include/reactos/libs/libmpg123/huffman.h (revision 62563) +++ include/reactos/libs/libmpg123/huffman.h (working copy) @@ -1,5 +1,5 @@ /* - huffman.h: huffman tables ... recalcualted to work with optimzed decoder scheme (MH) + huffman.h: huffman tables ... recalcualted to work with optimized decoder scheme (MH) copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1 see COPYING and AUTHORS files in distribution or http://mpg123.org @@ -16,32 +16,32 @@ struct newhuff { unsigned int linbits; - short *table; + const short *table; }; -static short tab0[] = +static const short tab0[] = { 0 }; -static short tab1[] = +static const short tab1[] = { -5, -3, -1, 17, 1, 16, 0 }; -static short tab2[] = +static const short tab2[] = { -15, -11, -9, -5, -3, -1, 34, 2, 18, -1, 33, 32, 17, -1, 1, 16, 0 }; -static short tab3[] = +static const short tab3[] = { -13, -11, -9, -5, -3, -1, 34, 2, 18, -1, 33, 32, 16, 17, -1, 1, 0 }; -static short tab5[] = +static const short tab5[] = { -29, -25, -23, -15, -7, -5, -3, -1, 51, 35, 50, 49, -3, -1, 19, 3, -1, 48, 34, -3, -1, 18, 33, -1, 2, 32, 17, -1, 1, 16, @@ -48,7 +48,7 @@ 0 }; -static short tab6[] = +static const short tab6[] = { -25, -19, -13, -9, -5, -3, -1, 51, 3, 35, -1, 50, 48, -1, 19, 49, -3, -1, 34, 2, 18, -3, -1, 33, 32, 1, -1, 17, -1, 16, @@ -55,7 +55,7 @@ 0 }; -static short tab7[] = +static const short tab7[] = { -69, -65, -57, -39, -29, -17, -11, -7, -3, -1, 85, 69, -1, 84, 83, -1, 53, 68, -3, -1, 37, 82, 21, -5, -1, 81, -1, 5, 52, -1, @@ -64,7 +64,7 @@ -5, -1, 33, -1, 2, 32, 17, -1, 1, 16, 0 }; -static short tab8[] = +static const short tab8[] = { -65, -63, -59, -45, -31, -19, -13, -7, -5, -3, -1, 85, 84, 69, 83, -3, -1, 53, 68, 37, -3, -1, 82, 5, 21, -5, -1, 81, -1, 52, @@ -73,7 +73,7 @@ 2, 32, -1, 18, 33, 17, -3, -1, 1, 16, 0 }; -static short tab9[] = +static const short tab9[] = { -63, -53, -41, -29, -19, -11, -5, -3, -1, 85, 69, 53, -1, 83, -1, 84, 5, -3, -1, 68, 37, -1, 82, 21, -3, -1, 81, 52, -1, 67, @@ -82,7 +82,7 @@ 18, -1, 33, 32, -3, -1, 17, 1, -1, 16, 0 }; -static short tab10[] = +static const short tab10[] = { -125,-121,-111, -83, -55, -35, -21, -13, -7, -3, -1, 119, 103, -1, 118, 87, -3, -1, 117, 102, 71, -3, -1, 116, 86, -1, 101, 55, -9, -3, @@ -95,7 +95,7 @@ 2, 32, 17, -1, 1, 16, 0 }; -static short tab11[] = +static const short tab11[] = { -121,-113, -89, -59, -43, -27, -17, -7, -3, -1, 119, 103, -1, 118, 117, -3, -1, 102, 71, -1, 116, -1, 87, 85, -5, -3, -1, 86, 101, 55, @@ -108,7 +108,7 @@ 32, 17, -3, -1, 1, 16, 0 }; -static short tab12[] = +static const short tab12[] = { -115, -99, -73, -45, -27, -17, -9, -5, -3, -1, 119, 103, 118, -1, 87, 117, -3, -1, 102, 71, -1, 116, 101, -3, -1, 86, 55, -3, -1, 115, @@ -121,7 +121,7 @@ 2, 32, 0, 17, -1, 1, 16 }; -static short tab13[] = +static const short tab13[] = { -509,-503,-475,-405,-333,-265,-205,-153,-115, -83, -53, -35, -21, -13, -9, -7, -5, -3, -1, 254, 252, 253, 237, 255, -1, 239, 223, -3, -1, 238, @@ -160,7 +160,7 @@ 0 }; -static short tab15[] = +static const short tab15[] = { -495,-445,-355,-263,-183,-115, -77, -43, -27, -13, -7, -3, -1, 255, 239, -1, 254, 223, -1, 238, -1, 253, 207, -7, -3, -1, 252, 222, -1, 237, @@ -199,7 +199,7 @@ 0 }; -static short tab16[] = +static const short tab16[] = { -509,-503,-461,-323,-103, -37, -27, -15, -7, -3, -1, 239, 254, -1, 223, 253, -3, -1, 207, 252, -1, 191, 251, -5, -1, 175, -1, 250, 159, -3, @@ -238,7 +238,7 @@ 0 }; -static short tab24[] = +static const short tab24[] = { -451,-117, -43, -25, -15, -7, -3, -1, 239, 254, -1, 223, 253, -3, -1, 207, 252, -1, 191, 251, -5, -1, 250, -1, 175, 159, -1, 249, 248, -9, @@ -277,7 +277,7 @@ 0 }; -static short tab_c0[] = +static const short tab_c0[] = { -29, -21, -13, -7, -3, -1, 11, 15, -1, 13, 14, -3, -1, 7, 5, 9, -3, -1, 6, 3, -1, 10, 12, -3, -1, 2, 1, -1, 4, 8, @@ -284,7 +284,7 @@ 0 }; -static short tab_c1[] = +static const short tab_c1[] = { -15, -7, -3, -1, 15, 14, -1, 13, 12, -3, -1, 11, 10, -1, 9, 8, -7, -3, -1, 7, 6, -1, 5, 4, -3, -1, 3, 2, -1, 1, @@ -293,7 +293,7 @@ -static struct newhuff ht[] = +static const struct newhuff ht[] = { { /* 0 */ 0 , tab0 } , { /* 2 */ 0 , tab1 } , @@ -330,7 +330,7 @@ { /* 16 */ 13, tab24 } }; -static struct newhuff htc[] = +static const struct newhuff htc[] = { { /* 1 , 1 , */ 0 , tab_c0 } , { /* 1 , 1 , */ 0 , tab_c1 } Index: include/reactos/libs/libmpg123/icy.h =================================================================== --- include/reactos/libs/libmpg123/icy.h (revision 62563) +++ include/reactos/libs/libmpg123/icy.h (working copy) @@ -26,8 +26,11 @@ #else +#undef init_icy #define init_icy(a) +#undef clear_icy #define clear_icy(a) +#undef reset_icy #define reset_icy(a) #endif /* NO_ICY */ Index: include/reactos/libs/libmpg123/id3.h =================================================================== --- include/reactos/libs/libmpg123/id3.h (revision 62563) +++ include/reactos/libs/libmpg123/id3.h (working copy) @@ -13,9 +13,21 @@ #include "frame.h" #ifdef NO_ID3V2 +# ifdef init_id3 +# undef init_id3 +# endif # define init_id3(fr) +# ifdef exit_id3 +# undef exit_id3 +# endif # define exit_id3(fr) +# ifdef reset_id3 +# undef reset_id3 +# endif # define reset_id3(fr) +# ifdef id3_link +# undef id3_link +# endif # define id3_link(fr) #else void init_id3(mpg123_handle *fr); Index: include/reactos/libs/libmpg123/intsym.h =================================================================== --- include/reactos/libs/libmpg123/intsym.h (revision 0) +++ include/reactos/libs/libmpg123/intsym.h (working copy) @@ -0,0 +1,272 @@ +#ifndef MPG123_INTMAP_H +#define MPG123_INTMAP_H +/* Mapping of internal mpg123 symbols to something that is less likely to conflict in case of static linking. */ +#define COS9 INT123_COS9 +#define tfcos36 INT123_tfcos36 +#define pnts INT123_pnts +#define safe_realloc INT123_safe_realloc +#define compat_open INT123_compat_open +#define compat_close INT123_compat_close +#define win32_wide_utf8 INT123_win32_wide_utf8 +#define win32_utf8_wide INT123_win32_utf8_wide +#define ntom_set_ntom INT123_ntom_set_ntom +#define synth_1to1 INT123_synth_1to1 +#define synth_1to1_dither INT123_synth_1to1_dither +#define synth_1to1_i386 INT123_synth_1to1_i386 +#define synth_1to1_i586 INT123_synth_1to1_i586 +#define synth_1to1_i586_dither INT123_synth_1to1_i586_dither +#define synth_1to1_mmx INT123_synth_1to1_mmx +#define synth_1to1_3dnow INT123_synth_1to1_3dnow +#define synth_1to1_sse INT123_synth_1to1_sse +#define synth_1to1_stereo_sse INT123_synth_1to1_stereo_sse +#define synth_1to1_3dnowext INT123_synth_1to1_3dnowext +#define synth_1to1_altivec INT123_synth_1to1_altivec +#define synth_1to1_stereo_altivec INT123_synth_1to1_stereo_altivec +#define synth_1to1_x86_64 INT123_synth_1to1_x86_64 +#define synth_1to1_stereo_x86_64 INT123_synth_1to1_stereo_x86_64 +#define synth_1to1_avx INT123_synth_1to1_avx +#define synth_1to1_stereo_avx INT123_synth_1to1_stereo_avx +#define synth_1to1_arm INT123_synth_1to1_arm +#define synth_1to1_neon INT123_synth_1to1_neon +#define synth_1to1_stereo_neon INT123_synth_1to1_stereo_neon +#define absynth_1to1_i486 INT123_absynth_1to1_i486 +#define synth_1to1_mono INT123_synth_1to1_mono +#define synth_1to1_m2s INT123_synth_1to1_m2s +#define synth_2to1 INT123_synth_2to1 +#define synth_2to1_dither INT123_synth_2to1_dither +#define synth_2to1_i386 INT123_synth_2to1_i386 +#define synth_2to1_mono INT123_synth_2to1_mono +#define synth_2to1_m2s INT123_synth_2to1_m2s +#define synth_4to1 INT123_synth_4to1 +#define synth_4to1_dither INT123_synth_4to1_dither +#define synth_4to1_i386 INT123_synth_4to1_i386 +#define synth_4to1_mono INT123_synth_4to1_mono +#define synth_4to1_m2s INT123_synth_4to1_m2s +#define synth_ntom INT123_synth_ntom +#define synth_ntom_mono INT123_synth_ntom_mono +#define synth_ntom_m2s INT123_synth_ntom_m2s +#define synth_1to1_8bit INT123_synth_1to1_8bit +#define synth_1to1_8bit_i386 INT123_synth_1to1_8bit_i386 +#define synth_1to1_8bit_wrap INT123_synth_1to1_8bit_wrap +#define synth_1to1_8bit_mono INT123_synth_1to1_8bit_mono +#define synth_1to1_8bit_m2s INT123_synth_1to1_8bit_m2s +#define synth_1to1_8bit_wrap_mono INT123_synth_1to1_8bit_wrap_mono +#define synth_1to1_8bit_wrap_m2s INT123_synth_1to1_8bit_wrap_m2s +#define synth_2to1_8bit INT123_synth_2to1_8bit +#define synth_2to1_8bit_i386 INT123_synth_2to1_8bit_i386 +#define synth_2to1_8bit_mono INT123_synth_2to1_8bit_mono +#define synth_2to1_8bit_m2s INT123_synth_2to1_8bit_m2s +#define synth_4to1_8bit INT123_synth_4to1_8bit +#define synth_4to1_8bit_i386 INT123_synth_4to1_8bit_i386 +#define synth_4to1_8bit_mono INT123_synth_4to1_8bit_mono +#define synth_4to1_8bit_m2s INT123_synth_4to1_8bit_m2s +#define synth_ntom_8bit INT123_synth_ntom_8bit +#define synth_ntom_8bit_mono INT123_synth_ntom_8bit_mono +#define synth_ntom_8bit_m2s INT123_synth_ntom_8bit_m2s +#define synth_1to1_real INT123_synth_1to1_real +#define synth_1to1_real_i386 INT123_synth_1to1_real_i386 +#define synth_1to1_real_sse INT123_synth_1to1_real_sse +#define synth_1to1_real_stereo_sse INT123_synth_1to1_real_stereo_sse +#define synth_1to1_real_x86_64 INT123_synth_1to1_real_x86_64 +#define synth_1to1_real_stereo_x86_64 INT123_synth_1to1_real_stereo_x86_64 +#define synth_1to1_real_avx INT123_synth_1to1_real_avx +#define synth_1to1_real_stereo_avx INT123_synth_1to1_real_stereo_avx +#define synth_1to1_real_altivec INT123_synth_1to1_real_altivec +#define synth_1to1_real_stereo_altivec INT123_synth_1to1_real_stereo_altivec +#define synth_1to1_real_neon INT123_synth_1to1_real_neon +#define synth_1to1_real_stereo_neon INT123_synth_1to1_real_stereo_neon +#define synth_1to1_real_mono INT123_synth_1to1_real_mono +#define synth_1to1_real_m2s INT123_synth_1to1_real_m2s +#define synth_2to1_real INT123_synth_2to1_real +#define synth_2to1_real_i386 INT123_synth_2to1_real_i386 +#define synth_2to1_real_mono INT123_synth_2to1_real_mono +#define synth_2to1_real_m2s INT123_synth_2to1_real_m2s +#define synth_4to1_real INT123_synth_4to1_real +#define synth_4to1_real_i386 INT123_synth_4to1_real_i386 +#define synth_4to1_real_mono INT123_synth_4to1_real_mono +#define synth_4to1_real_m2s INT123_synth_4to1_real_m2s +#define synth_ntom_real INT123_synth_ntom_real +#define synth_ntom_real_mono INT123_synth_ntom_real_mono +#define synth_ntom_real_m2s INT123_synth_ntom_real_m2s +#define synth_1to1_s32 INT123_synth_1to1_s32 +#define synth_1to1_s32_i386 INT123_synth_1to1_s32_i386 +#define synth_1to1_s32_sse INT123_synth_1to1_s32_sse +#define synth_1to1_s32_stereo_sse INT123_synth_1to1_s32_stereo_sse +#define synth_1to1_s32_x86_64 INT123_synth_1to1_s32_x86_64 +#define synth_1to1_s32_stereo_x86_64 INT123_synth_1to1_s32_stereo_x86_64 +#define synth_1to1_s32_avx INT123_synth_1to1_s32_avx +#define synth_1to1_s32_stereo_avx INT123_synth_1to1_s32_stereo_avx +#define synth_1to1_s32_altivec INT123_synth_1to1_s32_altivec +#define synth_1to1_s32_stereo_altivec INT123_synth_1to1_s32_stereo_altivec +#define synth_1to1_s32_neon INT123_synth_1to1_s32_neon +#define synth_1to1_s32_stereo_neon INT123_synth_1to1_s32_stereo_neon +#define synth_1to1_s32_mono INT123_synth_1to1_s32_mono +#define synth_1to1_s32_m2s INT123_synth_1to1_s32_m2s +#define synth_2to1_s32 INT123_synth_2to1_s32 +#define synth_2to1_s32_i386 INT123_synth_2to1_s32_i386 +#define synth_2to1_s32_mono INT123_synth_2to1_s32_mono +#define synth_2to1_s32_m2s INT123_synth_2to1_s32_m2s +#define synth_4to1_s32 INT123_synth_4to1_s32 +#define synth_4to1_s32_i386 INT123_synth_4to1_s32_i386 +#define synth_4to1_s32_mono INT123_synth_4to1_s32_mono +#define synth_4to1_s32_m2s INT123_synth_4to1_s32_m2s +#define synth_ntom_s32 INT123_synth_ntom_s32 +#define synth_ntom_s32_mono INT123_synth_ntom_s32_mono +#define synth_ntom_s32_m2s INT123_synth_ntom_s32_m2s +#define dct64 INT123_dct64 +#define dct64_i386 INT123_dct64_i386 +#define dct64_altivec INT123_dct64_altivec +#define dct64_i486 INT123_dct64_i486 +#define dct36 INT123_dct36 +#define dct36_3dnow INT123_dct36_3dnow +#define dct36_3dnowext INT123_dct36_3dnowext +#define dct36_sse INT123_dct36_sse +#define dct36_x86_64 INT123_dct36_x86_64 +#define dct36_avx INT123_dct36_avx +#define synth_ntom_set_step INT123_synth_ntom_set_step +#define ntom_val INT123_ntom_val +#define ntom_frame_outsamples INT123_ntom_frame_outsamples +#define ntom_frmouts INT123_ntom_frmouts +#define ntom_ins2outs INT123_ntom_ins2outs +#define ntom_frameoff INT123_ntom_frameoff +#define init_layer3 INT123_init_layer3 +#define init_layer3_gainpow2 INT123_init_layer3_gainpow2 +#define init_layer3_stuff INT123_init_layer3_stuff +#define init_layer12 INT123_init_layer12 +#define init_layer12_table INT123_init_layer12_table +#define init_layer12_stuff INT123_init_layer12_stuff +#define prepare_decode_tables INT123_prepare_decode_tables +#define make_decode_tables INT123_make_decode_tables +#define make_decode_tables_mmx INT123_make_decode_tables_mmx +#define init_layer3_gainpow2_mmx INT123_init_layer3_gainpow2_mmx +#define init_layer12_table_mmx INT123_init_layer12_table_mmx +#define make_conv16to8_table INT123_make_conv16to8_table +#define do_layer3 INT123_do_layer3 +#define do_layer2 INT123_do_layer2 +#define do_layer1 INT123_do_layer1 +#define do_equalizer INT123_do_equalizer +#define dither_table_init INT123_dither_table_init +#define frame_dither_init INT123_frame_dither_init +#define invalidate_format INT123_invalidate_format +#define frame_init INT123_frame_init +#define frame_init_par INT123_frame_init_par +#define frame_outbuffer INT123_frame_outbuffer +#define frame_output_format INT123_frame_output_format +#define frame_buffers INT123_frame_buffers +#define frame_reset INT123_frame_reset +#define frame_buffers_reset INT123_frame_buffers_reset +#define frame_exit INT123_frame_exit +#define frame_index_find INT123_frame_index_find +#define frame_index_setup INT123_frame_index_setup +#define do_volume INT123_do_volume +#define do_rva INT123_do_rva +#define frame_gapless_init INT123_frame_gapless_init +#define frame_gapless_realinit INT123_frame_gapless_realinit +#define frame_gapless_update INT123_frame_gapless_update +#define frame_gapless_bytify INT123_frame_gapless_bytify +#define frame_gapless_ignore INT123_frame_gapless_ignore +#define frame_expect_outsamples INT123_frame_expect_outsamples +#define frame_skip INT123_frame_skip +#define frame_ins2outs INT123_frame_ins2outs +#define frame_outs INT123_frame_outs +#define frame_expect_outsampels INT123_frame_expect_outsampels +#define frame_offset INT123_frame_offset +#define frame_set_frameseek INT123_frame_set_frameseek +#define frame_set_seek INT123_frame_set_seek +#define frame_tell_seek INT123_frame_tell_seek +#define frame_fill_toc INT123_frame_fill_toc +#define getbits INT123_getbits +#define getcpuflags INT123_getcpuflags +#define icy2utf8 INT123_icy2utf8 +#define init_icy INT123_init_icy +#define clear_icy INT123_clear_icy +#define reset_icy INT123_reset_icy +#define init_id3 INT123_init_id3 +#define exit_id3 INT123_exit_id3 +#define reset_id3 INT123_reset_id3 +#define id3_link INT123_id3_link +#define parse_new_id3 INT123_parse_new_id3 +#define id3_to_utf8 INT123_id3_to_utf8 +#define fi_init INT123_fi_init +#define fi_exit INT123_fi_exit +#define fi_resize INT123_fi_resize +#define fi_add INT123_fi_add +#define fi_set INT123_fi_set +#define fi_reset INT123_fi_reset +#define double_to_long_rounded INT123_double_to_long_rounded +#define scale_rounded INT123_scale_rounded +#define decode_update INT123_decode_update +#define samples_to_bytes INT123_samples_to_bytes +#define bytes_to_samples INT123_bytes_to_samples +#define frame_cpu_opt INT123_frame_cpu_opt +#define set_synth_functions INT123_set_synth_functions +#define dectype INT123_dectype +#define defdec INT123_defdec +#define decclass INT123_decclass +#define check_decoders INT123_check_decoders +#define read_frame_init INT123_read_frame_init +#define frame_bitrate INT123_frame_bitrate +#define frame_freq INT123_frame_freq +#define read_frame_recover INT123_read_frame_recover +#define read_frame INT123_read_frame +#define set_pointer INT123_set_pointer +#define position_info INT123_position_info +#define compute_bpf INT123_compute_bpf +#define time_to_frame INT123_time_to_frame +#define get_songlen INT123_get_songlen +#define open_stream INT123_open_stream +#define open_stream_handle INT123_open_stream_handle +#define open_feed INT123_open_feed +#define feed_more INT123_feed_more +#define feed_forget INT123_feed_forget +#define feed_set_pos INT123_feed_set_pos +#define open_bad INT123_open_bad +#define dct64_3dnow INT123_dct64_3dnow +#define dct64_3dnowext INT123_dct64_3dnowext +#define dct64_mmx INT123_dct64_mmx +#define dct64_MMX INT123_dct64_MMX +#define dct64_sse INT123_dct64_sse +#define dct64_real_sse INT123_dct64_real_sse +#define dct64_x86_64 INT123_dct64_x86_64 +#define dct64_real_x86_64 INT123_dct64_real_x86_64 +#define dct64_avx INT123_dct64_avx +#define dct64_real_avx INT123_dct64_real_avx +#define dct64_neon INT123_dct64_neon +#define dct64_real_neon INT123_dct64_real_neon +#define do_equalizer_3dnow INT123_do_equalizer_3dnow +#define synth_1to1_3dnow_asm INT123_synth_1to1_3dnow_asm +#define synth_1to1_arm_asm INT123_synth_1to1_arm_asm +#define synth_1to1_arm_accurate_asm INT123_synth_1to1_arm_accurate_asm +#define synth_1to1_i586_asm INT123_synth_1to1_i586_asm +#define synth_1to1_i586_asm_dither INT123_synth_1to1_i586_asm_dither +#define synth_1to1_MMX INT123_synth_1to1_MMX +#define synth_1to1_sse_accurate_asm INT123_synth_1to1_sse_accurate_asm +#define synth_1to1_real_sse_asm INT123_synth_1to1_real_sse_asm +#define synth_1to1_s32_sse_asm INT123_synth_1to1_s32_sse_asm +#define synth_1to1_s_sse_accurate_asm INT123_synth_1to1_s_sse_accurate_asm +#define synth_1to1_real_s_sse_asm INT123_synth_1to1_real_s_sse_asm +#define synth_1to1_s32_s_sse_asm INT123_synth_1to1_s32_s_sse_asm +#define synth_1to1_s_x86_64_asm INT123_synth_1to1_s_x86_64_asm +#define synth_1to1_s_x86_64_accurate_asm INT123_synth_1to1_s_x86_64_accurate_asm +#define synth_1to1_real_s_x86_64_asm INT123_synth_1to1_real_s_x86_64_asm +#define synth_1to1_s32_s_x86_64_asm INT123_synth_1to1_s32_s_x86_64_asm +#define synth_1to1_x86_64_asm INT123_synth_1to1_x86_64_asm +#define synth_1to1_x86_64_accurate_asm INT123_synth_1to1_x86_64_accurate_asm +#define synth_1to1_real_x86_64_asm INT123_synth_1to1_real_x86_64_asm +#define synth_1to1_s32_x86_64_asm INT123_synth_1to1_s32_x86_64_asm +#define synth_1to1_s_avx_asm INT123_synth_1to1_s_avx_asm +#define synth_1to1_s_avx_accurate_asm INT123_synth_1to1_s_avx_accurate_asm +#define synth_1to1_real_s_avx_asm INT123_synth_1to1_real_s_avx_asm +#define synth_1to1_s32_s_avx_asm INT123_synth_1to1_s32_s_avx_asm +#define synth_1to1_neon_asm INT123_synth_1to1_neon_asm +#define synth_1to1_neon_accurate_asm INT123_synth_1to1_neon_accurate_asm +#define synth_1to1_real_neon_asm INT123_synth_1to1_real_neon_asm +#define synth_1to1_s32_neon_asm INT123_synth_1to1_s32_neon_asm +#define synth_1to1_s_neon_asm INT123_synth_1to1_s_neon_asm +#define synth_1to1_s_neon_accurate_asm INT123_synth_1to1_s_neon_accurate_asm +#define synth_1to1_real_s_neon_asm INT123_synth_1to1_real_s_neon_asm +#define synth_1to1_s32_s_neon_asm INT123_synth_1to1_s32_s_neon_asm +#define costab_mmxsse INT123_costab_mmxsse +#define make_decode_tables_mmx_asm INT123_make_decode_tables_mmx_asm +#define check_neon INT123_check_neon +#endif Index: include/reactos/libs/libmpg123/l12_integer_tables.h =================================================================== --- include/reactos/libs/libmpg123/l12_integer_tables.h (revision 62563) +++ include/reactos/libs/libmpg123/l12_integer_tables.h (working copy) @@ -11,7 +11,11 @@ static const real layer12_table[27][64] = { - { + { /* C90 does not like empty initializer. Fill with junk. */ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 + , 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38 + , 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56 + , 57, 58, 59, 60, 61, 62, 63, 64 }, { -1431655765,-1136305934,-901886617,-715827883,-568152967,-450943309,-357913941,-284076483, Index: include/reactos/libs/libmpg123/l2tables.h =================================================================== --- include/reactos/libs/libmpg123/l2tables.h (revision 62563) +++ include/reactos/libs/libmpg123/l2tables.h (working copy) @@ -13,7 +13,7 @@ #ifndef _MPG123_L2TABLES_H_ #define _MPG123_L2TABLES_H_ -const struct al_table alloc_0[] = { +static const struct al_table alloc_0[] = { {4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511}, {11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767}, {4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511}, @@ -53,7 +53,7 @@ {2,0},{5,3},{7,5},{16,-32767}, {2,0},{5,3},{7,5},{16,-32767} }; -const struct al_table alloc_1[] = { +static const struct al_table alloc_1[] = { {4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511}, {11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767}, {4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511}, @@ -96,7 +96,7 @@ {2,0},{5,3},{7,5},{16,-32767}, {2,0},{5,3},{7,5},{16,-32767} }; -const struct al_table alloc_2[] = { +static const struct al_table alloc_2[] = { {4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255}, {10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383}, {4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255}, @@ -108,7 +108,7 @@ {3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63}, {3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63} }; -const struct al_table alloc_3[] = { +static const struct al_table alloc_3[] = { {4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255}, {10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383}, {4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255}, @@ -124,7 +124,7 @@ {3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63}, {3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63} }; -const struct al_table alloc_4[] = { +static const struct al_table alloc_4[] = { {4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127}, {9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191}, {4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127}, Index: include/reactos/libs/libmpg123/mangle.h =================================================================== --- include/reactos/libs/libmpg123/mangle.h (revision 62563) +++ include/reactos/libs/libmpg123/mangle.h (working copy) @@ -11,6 +11,7 @@ #define __MANGLE_H #include "config.h" +#include "intsym.h" #ifdef CCALIGN #define MOVUAPS movaps @@ -18,18 +19,42 @@ #define MOVUAPS movups #endif +/* + ALIGNX: align to X bytes + This differs per compiler/platform in taking the byte count or an exponent for base 2. + A way out is balign, if the assembler supports it (gas extension). +*/ + +#ifdef ASMALIGN_BALIGN + +#define ALIGN4 .balign 4 +#define ALIGN8 .balign 8 +#define ALIGN16 .balign 16 +#define ALIGN32 .balign 32 +#define ALIGN64 .balign 64 + +#else + #ifdef ASMALIGN_EXP #define ALIGN4 .align 2 #define ALIGN8 .align 3 #define ALIGN16 .align 4 #define ALIGN32 .align 5 +#define ALIGN64 .align 6 #else +#ifdef ASMALIGN_BYTE #define ALIGN4 .align 4 #define ALIGN8 .align 8 #define ALIGN16 .align 16 #define ALIGN32 .align 32 +#define ALIGN64 .align 64 +#else +#error "Dunno how assembler alignment works. Please specify." #endif +#endif +#endif + #define MANGLE_MACROCAT_REALLY(a, b) a ## b #define MANGLE_MACROCAT(a, b) MANGLE_MACROCAT_REALLY(a, b) /* Feel free to add more to the list, eg. a.out IMO */ @@ -60,10 +85,18 @@ /* Mark non-executable stack. It's mainly for GNU on Linux... who else does (not) like this? */ #if !defined(__SUNPRO_C) && defined(__linux__) && defined(__ELF__) +#if defined(__arm__) #define NONEXEC_STACK .section .note.GNU-stack,"",%progbits #else +#define NONEXEC_STACK .section .note.GNU-stack,"",@progbits +#endif +#else #define NONEXEC_STACK #endif +#if defined(__x86_64__) && (defined(_WIN64) || defined (__CYGWIN__)) +#define IS_MSABI 1 /* Not using SYSV */ +#endif + #endif /* !__MANGLE_H */ Index: include/reactos/libs/libmpg123/mpeghead.h =================================================================== --- include/reactos/libs/libmpg123/mpeghead.h (revision 0) +++ include/reactos/libs/libmpg123/mpeghead.h (working copy) @@ -0,0 +1,89 @@ +/* + mpeghead: the bits of an MPEG frame header + + copyright ?-2011 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Michael Hipp & Thomas Orgis (from parse.c) +*/ +#ifndef MPG123_MPEGHEAD_H +#define MPG123_MPEGHEAD_H + +/* + Avoid human error, let perl do the work of dissecting an MPEG header into parts. + To be clear: Never edit the following definitions by hand, modify the code block inside this comment and run it through perl instead! + + $head = "AAAAAAAA AAABBCCD EEEEFFGH IIJJKLMM"; + %parts = qw(A sync B version C layer D crc E bitrate F samplerate G padding H private I channel J chanex K copyright L original M emphasis); + for(sort keys %parts) + { + $name = uc($parts{$_}); + $bits = $head; + $bits =~ s/$_/1/g; + $bits =~ s/[^1 ]/0/g; + print "\/\* $bits \*\/\n"; + $bits =~ s/\s//g; + print "#define HDR_$name".(" " x (18-length($name))).sprintf("0x%08x", eval("0b$bits"))."\n"; + $bits =~ m/(0*)$/; + print "#define HDR_${name}_VAL(h)".(" " x (11-length($name)))."(((h)\&HDR_$name) >> ".length($1).")\n"; + } +*/ + +/* 11111111 11100000 00000000 00000000 */ +#define HDR_SYNC 0xffe00000 +#define HDR_SYNC_VAL(h) (((h)&HDR_SYNC) >> 21) +/* 00000000 00011000 00000000 00000000 */ +#define HDR_VERSION 0x00180000 +#define HDR_VERSION_VAL(h) (((h)&HDR_VERSION) >> 19) +/* 00000000 00000110 00000000 00000000 */ +#define HDR_LAYER 0x00060000 +#define HDR_LAYER_VAL(h) (((h)&HDR_LAYER) >> 17) +/* 00000000 00000001 00000000 00000000 */ +#define HDR_CRC 0x00010000 +#define HDR_CRC_VAL(h) (((h)&HDR_CRC) >> 16) +/* 00000000 00000000 11110000 00000000 */ +#define HDR_BITRATE 0x0000f000 +#define HDR_BITRATE_VAL(h) (((h)&HDR_BITRATE) >> 12) +/* 00000000 00000000 00001100 00000000 */ +#define HDR_SAMPLERATE 0x00000c00 +#define HDR_SAMPLERATE_VAL(h) (((h)&HDR_SAMPLERATE) >> 10) +/* 00000000 00000000 00000010 00000000 */ +#define HDR_PADDING 0x00000200 +#define HDR_PADDING_VAL(h) (((h)&HDR_PADDING) >> 9) +/* 00000000 00000000 00000001 00000000 */ +#define HDR_PRIVATE 0x00000100 +#define HDR_PRIVATE_VAL(h) (((h)&HDR_PRIVATE) >> 8) +/* 00000000 00000000 00000000 11000000 */ +#define HDR_CHANNEL 0x000000c0 +#define HDR_CHANNEL_VAL(h) (((h)&HDR_CHANNEL) >> 6) +/* 00000000 00000000 00000000 00110000 */ +#define HDR_CHANEX 0x00000030 +#define HDR_CHANEX_VAL(h) (((h)&HDR_CHANEX) >> 4) +/* 00000000 00000000 00000000 00001000 */ +#define HDR_COPYRIGHT 0x00000008 +#define HDR_COPYRIGHT_VAL(h) (((h)&HDR_COPYRIGHT) >> 3) +/* 00000000 00000000 00000000 00000100 */ +#define HDR_ORIGINAL 0x00000004 +#define HDR_ORIGINAL_VAL(h) (((h)&HDR_ORIGINAL) >> 2) +/* 00000000 00000000 00000000 00000011 */ +#define HDR_EMPHASIS 0x00000003 +#define HDR_EMPHASIS_VAL(h) (((h)&HDR_EMPHASIS) >> 0) + +/* + A generic mask for telling if a header is somewhat valid for the current stream. + Meaning: Most basic info is not allowed to change. + Checking of channel count needs to be done, too, though. So, + if channel count matches, frames are decoded the same way: frame buffers and decoding + routines can stay the same, especially frame buffers (think spf * channels!). +*/ +#define HDR_CMPMASK (HDR_SYNC|HDR_VERSION|HDR_LAYER|HDR_SAMPLERATE) + +/* A stricter mask, for matching free format headers. */ +#define HDR_SAMEMASK (HDR_SYNC|HDR_VERSION|HDR_LAYER|HDR_BITRATE|HDR_SAMPLERATE|HDR_CHANNEL|HDR_CHANEX) + +/* Free format headers have zero bitrate value. */ +#define HDR_FREE_FORMAT(head) (!(head & HDR_BITRATE)) + +/* A mask for changed sampling rate (version or rate bits). */ +#define HDR_SAMPMASK (HDR_VERSION|HDR_SAMPLERATE) + +#endif Index: include/reactos/libs/libmpg123/mpg123lib_intern.h =================================================================== --- include/reactos/libs/libmpg123/mpg123lib_intern.h (revision 62563) +++ include/reactos/libs/libmpg123/mpg123lib_intern.h (working copy) @@ -11,34 +11,13 @@ #define MPG123_H_INTERN #define MPG123_RATES 9 -#define MPG123_ENCODINGS 10 +#define MPG123_ENCODINGS 12 #include "config.h" /* Load this before _anything_ */ +#include "intsym.h" /* Prefixing of internal symbols that still are public in a static lib. */ -/* ABI conformance for other compilers. - mpg123 needs 16byte-aligned stack for SSE and friends. - gcc provides that, but others don't necessarily. */ -#ifdef ABI_ALIGN_FUN -#ifndef attribute_align_arg -#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1) -# define attribute_align_arg __attribute__((force_align_arg_pointer)) -/* The gcc that can align the stack does not need the check... nor does it work with gcc 4.3+, anyway. */ -#else +#include "abi_align.h" -# define attribute_align_arg -/* Other compilers get code to catch misaligned stack. - Well, except Sun Studio, which accepts the aligned attribute but does not honor it. */ -#if !defined(__SUNPRO_C) -# define NEED_ALIGNCHECK -#endif - -#endif -#endif -#else -#define attribute_align_arg -/* We won't try the align check... */ -#endif - /* export DLL symbols */ #if defined(WIN32) && defined(DYNAMIC_BUILD) #define BUILD_MPG123_DLL @@ -59,19 +38,15 @@ #define memmove(dst,src,size) bcopy(src,dst,size) #endif -/* some stuff has to go back to mpg123.h */ +/* We don't really do long double... there are 3 options for REAL: + float, long and double. */ + #ifdef REAL_IS_FLOAT # define real float -# define REAL_SCANF "%f" -# define REAL_PRINTF "%f" -#elif defined(REAL_IS_LONG_DOUBLE) -# define real long double -# define REAL_SCANF "%Lf" -# define REAL_PRINTF "%Lf" #elif defined(REAL_IS_FIXED) -/* Disable some output formats for fixed point decoder... */ -# define real long +# define real int32_t +# define dreal int64_t /* for fixed-point decoders, use pre-calculated tables to avoid expensive floating-point maths @@ -82,14 +57,14 @@ # define REAL_RADIX 24 # define REAL_FACTOR 16777216.0 -static inline long double_to_long_rounded(double x, double scalefac) +static inline int32_t double_to_long_rounded(double x, double scalefac) { x *= scalefac; x += (x > 0) ? 0.5 : -0.5; - return (long)x; + return (int32_t)x; } -static inline long scale_rounded(long x, int shift) +static inline int32_t scale_rounded(int32_t x, int shift) { x += (x >> 31); x >>= (shift - 1); @@ -136,7 +111,7 @@ "srwi %0, %0, %4 \n\t" \ "rlwimi %0, %1, %5, 0, %6 \n\t" \ : "=&r" (_mull), "=&r" (_mulh) \ - : "%r" (_x), "r" (_y), "i" (radix), "i" (32-(radix)), "i" ((radix)-1) \ + : "r" (_x), "r" (_y), "i" (radix), "i" (32-(radix)), "i" ((radix)-1) \ ); \ _mull; \ }) @@ -152,7 +127,7 @@ "slw %1, %1, %2 \n\t" \ "or %0, %0, %1 \n\t" \ : "=&r" (_mull), "=&r" (_mulh), "=&r" (_radix2) \ - : "%r" (_x), "r" (_y), "r" (_radix) \ + : "r" (_x), "r" (_y), "r" (_radix) \ : "cc" \ ); \ _mull; \ @@ -167,7 +142,7 @@ "mov %0, %0, lsr %4 \n\t" \ "orr %0, %0, %1, lsl %5 \n\t" \ : "=&r" (_mull), "=&r" (_mulh) \ - : "%r" (_x), "r" (_y), "M" (radix), "M" (32-(radix)) \ + : "r" (_x), "r" (_y), "M" (radix), "M" (32-(radix)) \ ); \ _mull; \ }) @@ -179,9 +154,10 @@ "smull %0, %1, %3, %4 \n\t" \ "mov %0, %0, lsr %5 \n\t" \ "rsb %2, %5, #32 \n\t" \ - "orr %0, %0, %1, lsl %2 \n\t" \ + "mov %1, %1, lsl %2 \n\t" \ + "orr %0, %0, %1 \n\t" \ : "=&r" (_mull), "=&r" (_mulh), "=&r" (_radix2) \ - : "%r" (_x), "r" (_y), "r" (_radix) \ + : "r" (_x), "r" (_y), "r" (_radix) \ ); \ _mull; \ }) @@ -188,7 +164,7 @@ # endif # endif -/* I just changed the (int) to (long) there... seemed right. */ +/* I just changed the (int) to (real) there... seemed right. */ # define DOUBLE_TO_REAL(x) (double_to_long_rounded(x, REAL_FACTOR)) # define DOUBLE_TO_REAL_15(x) (double_to_long_rounded(x, 32768.0)) # define DOUBLE_TO_REAL_POW43(x) (double_to_long_rounded(x, 8192.0)) @@ -200,17 +176,17 @@ # define REAL_MUL_15(x, y) REAL_MUL_ASM(x, y, 15) # define REAL_MUL_SCALE_LAYER12(x, y) REAL_MUL_ASM(x, y, 15 + 30 - REAL_RADIX) # else -# define REAL_MUL(x, y) (((long long)(x) * (long long)(y)) >> REAL_RADIX) -# define REAL_MUL_15(x, y) (((long long)(x) * (long long)(y)) >> 15) -# define REAL_MUL_SCALE_LAYER12(x, y) (((long long)(x) * (long long)(y)) >> (15 + 30 - REAL_RADIX)) +# define REAL_MUL(x, y) (((dreal)(x) * (dreal)(y)) >> REAL_RADIX) +# define REAL_MUL_15(x, y) (((dreal)(x) * (dreal)(y)) >> 15) +# define REAL_MUL_SCALE_LAYER12(x, y) (((dreal)(x) * (dreal)(y)) >> (15 + 30 - REAL_RADIX)) # endif # ifdef REAL_MUL_SCALE_LAYER3_ASM # define REAL_MUL_SCALE_LAYER3(x, y, z) REAL_MUL_SCALE_LAYER3_ASM(x, y, 13 + gainpow2_scale[z] - REAL_RADIX) # else -# define REAL_MUL_SCALE_LAYER3(x, y, z) (((long long)(x) * (long long)(y)) >> (13 + gainpow2_scale[z] - REAL_RADIX)) +# define REAL_MUL_SCALE_LAYER3(x, y, z) (((dreal)(x) * (dreal)(y)) >> (13 + gainpow2_scale[z] - REAL_RADIX)) # endif -# define REAL_SCALE_LAYER12(x) ((long)((x) >> (30 - REAL_RADIX))) -# define REAL_SCALE_LAYER3(x, y) ((long)((x) >> (gainpow2_scale[y] - REAL_RADIX))) +# define REAL_SCALE_LAYER12(x) ((real)((x) >> (30 - REAL_RADIX))) +# define REAL_SCALE_LAYER3(x, y) ((real)((x) >> (gainpow2_scale[y] - REAL_RADIX))) # ifdef ACCURATE_ROUNDING # define REAL_MUL_SYNTH(x, y) REAL_MUL(x, y) # define REAL_SCALE_DCT64(x) (x) @@ -220,13 +196,12 @@ # define REAL_SCALE_DCT64(x) ((x) >> 8) # define REAL_SCALE_WINDOW(x) scale_rounded(x, 16) # endif -# define REAL_SCANF "%ld" -# define REAL_PRINTF "%ld" #else +/* Just define a symbol to make things clear. + Existing code still uses (not (float or fixed)) for that. */ +# define REAL_IS_DOUBLE # define real double -# define REAL_SCANF "%lf" -# define REAL_PRINTF "%f" #endif #ifndef REAL_IS_FIXED @@ -321,8 +296,12 @@ int decode_update(mpg123_handle *mh); /* residing in format.c */ +off_t decoder_synth_bytes(mpg123_handle *fr , off_t s); off_t samples_to_bytes(mpg123_handle *fr , off_t s); off_t bytes_to_samples(mpg123_handle *fr , off_t b); +off_t outblock_bytes(mpg123_handle *fr, off_t s); +/* Postprocessing format conversion of freshly decoded buffer. */ +void postprocess_buffer(mpg123_handle *fr); /* If networking is enabled and we really mean internal networking, the timeout_read function is available. */ #if defined (NETWORK) && !defined (WANT_WIN32_SOCKETS) Index: include/reactos/libs/libmpg123/newhuffman.h =================================================================== --- include/reactos/libs/libmpg123/newhuffman.h (revision 0) +++ include/reactos/libs/libmpg123/newhuffman.h (working copy) @@ -0,0 +1,824 @@ +/* + nwehuffman.h: optimized huffman tables (radix-4 lookup) + + copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + + +#ifndef _MPG123_NEWHUFFMAN_H_ +#define _MPG123_NEWHUFFMAN_H_ + +struct newhuff +{ + unsigned int linbits; + const short *table; +}; + +static const short tab0_[] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static const short tab1_[] = +{ + 0x0311, 0x0311, 0x0301, 0x0301, 0x0210, 0x0210, 0x0210, 0x0210, + 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, +}; + +static const short tab2_[] = +{ + -16, -32, 0x0311, 0x0311, 0x0301, 0x0301, 0x0310, 0x0310, + 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, + 0x0222, 0x0222, 0x0222, 0x0222, 0x0202, 0x0202, 0x0202, 0x0202, + 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, + 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, + 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, +}; + +static const short tab3_[] = +{ + -16, -32, 0x0310, 0x0310, 0x0211, 0x0211, 0x0211, 0x0211, + 0x0201, 0x0201, 0x0201, 0x0201, 0x0200, 0x0200, 0x0200, 0x0200, + 0x0222, 0x0222, 0x0222, 0x0222, 0x0202, 0x0202, 0x0202, 0x0202, + 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, + 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, + 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120 +}; + +static const short tab5_[] = +{ + -16, -32, 0x0311, 0x0311, 0x0301, 0x0301, 0x0310, 0x0310, + 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, + 0x0433, 0x0423, 0x0332, 0x0332, 0x0231, 0x0231, 0x0231, 0x0231, + 0x0313, 0x0313, 0x0303, 0x0303, 0x0330, 0x0330, 0x0322, 0x0322, + 0x0212, 0x0212, 0x0212, 0x0212, 0x0221, 0x0221, 0x0221, 0x0221, + 0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, +}; + +static const short tab6_[] = +{ + -16, -32, -48, 0x0412, 0x0421, 0x0420, 0x0301, 0x0301, + 0x0211, 0x0211, 0x0211, 0x0211, 0x0310, 0x0310, 0x0300, 0x0300, + 0x0333, 0x0333, 0x0303, 0x0303, 0x0223, 0x0223, 0x0223, 0x0223, + 0x0232, 0x0232, 0x0232, 0x0232, 0x0230, 0x0230, 0x0230, 0x0230, + 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, + 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, + 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, + 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, +}; + +static const short tab7_[] = +{ + -16, -32, -48, 0x0411, 0x0301, 0x0301, 0x0310, 0x0310, + 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, + -48, -64, -80, 0x0415, 0x0451, -96, 0x0450, -112, + 0x0424, 0x0442, 0x0314, 0x0314, 0x0341, 0x0341, 0x0340, 0x0340, + 0x0404, 0x0423, 0x0432, 0x0403, 0x0313, 0x0313, 0x0331, 0x0331, + 0x0330, 0x0330, 0x0322, 0x0322, 0x0212, 0x0212, 0x0212, 0x0212, + 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, + 0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, + 0x0255, 0x0255, 0x0255, 0x0255, 0x0245, 0x0245, 0x0245, 0x0245, + 0x0254, 0x0254, 0x0254, 0x0254, 0x0253, 0x0253, 0x0253, 0x0253, + 0x0135, 0x0135, 0x0135, 0x0135, 0x0135, 0x0135, 0x0135, 0x0135, + 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, + 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, + 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, + 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, + 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, + 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, + 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, +}; + +static const short tab8_[] = +{ + -16, -32, 0x0412, 0x0421, 0x0211, 0x0211, 0x0211, 0x0211, + 0x0301, 0x0301, 0x0310, 0x0310, 0x0200, 0x0200, 0x0200, 0x0200, + -32, -48, -64, 0x0415, 0x0451, -80, -96, 0x0424, + 0x0442, 0x0414, 0x0341, 0x0341, 0x0404, 0x0440, 0x0423, 0x0432, + 0x0413, 0x0431, 0x0403, 0x0430, 0x0222, 0x0222, 0x0222, 0x0222, + 0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, + 0x0355, 0x0355, 0x0354, 0x0354, 0x0245, 0x0245, 0x0245, 0x0245, + 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, + 0x0235, 0x0235, 0x0235, 0x0235, 0x0244, 0x0244, 0x0244, 0x0244, + 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, + 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, + 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, + 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, + 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, + 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, + 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, +}; + +static const short tab9_[] = +{ + -16, -32, -48, -64, -80, 0x0412, 0x0421, 0x0420, + 0x0311, 0x0311, 0x0301, 0x0301, 0x0310, 0x0310, 0x0300, 0x0300, + -80, 0x0435, 0x0453, -96, 0x0444, 0x0425, 0x0452, 0x0415, + 0x0351, 0x0351, 0x0334, 0x0334, 0x0343, 0x0343, 0x0450, 0x0404, + 0x0324, 0x0324, 0x0342, 0x0342, 0x0333, 0x0333, 0x0340, 0x0340, + 0x0214, 0x0214, 0x0214, 0x0214, 0x0241, 0x0241, 0x0241, 0x0241, + 0x0223, 0x0223, 0x0223, 0x0223, 0x0232, 0x0232, 0x0232, 0x0232, + 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, + 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, + 0x0203, 0x0203, 0x0203, 0x0203, 0x0230, 0x0230, 0x0230, 0x0230, + 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, + 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, + 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, + 0x0145, 0x0145, 0x0145, 0x0145, 0x0145, 0x0145, 0x0145, 0x0145, + 0x0154, 0x0154, 0x0154, 0x0154, 0x0154, 0x0154, 0x0154, 0x0154, + 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, +}; + +static const short tab10_[] = +{ + -16, -32, -48, 0x0411, 0x0301, 0x0301, 0x0310, 0x0310, + 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, + -48, -64, -80, -96, -112, -128, -144, 0x0417, + 0x0471, -160, -176, -192, 0x0416, 0x0461, 0x0460, -208, + -208, -224, 0x0414, 0x0441, 0x0440, 0x0423, 0x0432, 0x0403, + 0x0313, 0x0313, 0x0331, 0x0331, 0x0330, 0x0330, 0x0322, 0x0322, + 0x0212, 0x0212, 0x0212, 0x0212, 0x0221, 0x0221, 0x0221, 0x0221, + 0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, + 0x0377, 0x0377, 0x0367, 0x0367, 0x0376, 0x0376, 0x0357, 0x0357, + 0x0375, 0x0375, 0x0366, 0x0366, 0x0247, 0x0247, 0x0247, 0x0247, + 0x0274, 0x0274, 0x0274, 0x0274, 0x0256, 0x0256, 0x0256, 0x0256, + 0x0265, 0x0265, 0x0265, 0x0265, 0x0237, 0x0237, 0x0237, 0x0237, + 0x0273, 0x0273, 0x0273, 0x0273, 0x0246, 0x0246, 0x0246, 0x0246, + 0x0355, 0x0355, 0x0354, 0x0354, 0x0263, 0x0263, 0x0263, 0x0263, + 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, + 0x0172, 0x0172, 0x0172, 0x0172, 0x0172, 0x0172, 0x0172, 0x0172, + 0x0264, 0x0264, 0x0264, 0x0264, 0x0207, 0x0207, 0x0207, 0x0207, + 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, + 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, + 0x0245, 0x0245, 0x0245, 0x0245, 0x0235, 0x0235, 0x0235, 0x0235, + 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, + 0x0253, 0x0253, 0x0253, 0x0253, 0x0244, 0x0244, 0x0244, 0x0244, + 0x0136, 0x0136, 0x0136, 0x0136, 0x0136, 0x0136, 0x0136, 0x0136, + 0x0126, 0x0126, 0x0126, 0x0126, 0x0126, 0x0126, 0x0126, 0x0126, + 0x0225, 0x0225, 0x0225, 0x0225, 0x0252, 0x0252, 0x0252, 0x0252, + 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, + 0x0151, 0x0151, 0x0151, 0x0151, 0x0151, 0x0151, 0x0151, 0x0151, + 0x0234, 0x0234, 0x0234, 0x0234, 0x0243, 0x0243, 0x0243, 0x0243, + 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, + 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, + 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, + 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, + 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, + 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, +}; + +static const short tab11_[] = +{ + -16, -32, -48, -64, 0x0412, -80, 0x0311, 0x0311, + 0x0301, 0x0301, 0x0310, 0x0310, 0x0200, 0x0200, 0x0200, 0x0200, + -80, -96, -112, -128, -144, 0x0427, 0x0472, -160, + 0x0371, 0x0371, 0x0417, 0x0470, 0x0436, 0x0463, 0x0460, -176, + -176, 0x0415, 0x0362, 0x0362, 0x0426, 0x0406, 0x0316, 0x0316, + 0x0361, 0x0361, 0x0451, 0x0434, 0x0450, -192, 0x0424, 0x0442, + 0x0414, 0x0441, 0x0404, 0x0440, 0x0323, 0x0323, 0x0332, 0x0332, + 0x0213, 0x0213, 0x0213, 0x0213, 0x0231, 0x0231, 0x0231, 0x0231, + 0x0303, 0x0303, 0x0330, 0x0330, 0x0222, 0x0222, 0x0222, 0x0222, + 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, + 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, + 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, + 0x0277, 0x0277, 0x0277, 0x0277, 0x0267, 0x0267, 0x0267, 0x0267, + 0x0276, 0x0276, 0x0276, 0x0276, 0x0275, 0x0275, 0x0275, 0x0275, + 0x0266, 0x0266, 0x0266, 0x0266, 0x0247, 0x0247, 0x0247, 0x0247, + 0x0274, 0x0274, 0x0274, 0x0274, 0x0357, 0x0357, 0x0355, 0x0355, + 0x0256, 0x0256, 0x0256, 0x0256, 0x0265, 0x0265, 0x0265, 0x0265, + 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, + 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0245, 0x0245, 0x0245, 0x0245, 0x0254, 0x0254, 0x0254, 0x0254, + 0x0235, 0x0235, 0x0235, 0x0235, 0x0253, 0x0253, 0x0253, 0x0253, + 0x0164, 0x0164, 0x0164, 0x0164, 0x0164, 0x0164, 0x0164, 0x0164, + 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, + 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, + 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, + 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, + 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, + 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, + 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, +}; + +static const short tab12_[] = +{ + -16, -32, -48, -64, -80, -96, 0x0412, 0x0421, + -112, 0x0400, 0x0311, 0x0311, 0x0301, 0x0301, 0x0310, 0x0310, + -112, -128, -144, -160, 0x0456, 0x0437, -176, 0x0427, + 0x0472, 0x0446, 0x0464, 0x0417, 0x0471, -192, 0x0436, 0x0463, + 0x0445, 0x0454, 0x0444, -192, 0x0326, 0x0326, 0x0362, 0x0362, + 0x0361, 0x0361, 0x0416, 0x0460, 0x0435, 0x0453, 0x0425, 0x0452, + 0x0315, 0x0315, 0x0351, 0x0351, 0x0334, 0x0334, 0x0343, 0x0343, + 0x0450, 0x0404, 0x0324, 0x0324, 0x0342, 0x0342, 0x0314, 0x0314, + 0x0233, 0x0233, 0x0233, 0x0233, 0x0241, 0x0241, 0x0241, 0x0241, + 0x0223, 0x0223, 0x0223, 0x0223, 0x0232, 0x0232, 0x0232, 0x0232, + 0x0340, 0x0340, 0x0303, 0x0303, 0x0230, 0x0230, 0x0230, 0x0230, + 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, + 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, + 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, + 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, + 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, + 0x0277, 0x0277, 0x0277, 0x0277, 0x0267, 0x0267, 0x0267, 0x0267, + 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, + 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, + 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, + 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, + 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, + 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, + 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, + 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, + 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, + 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, + 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, + 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, + 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, +}; + +static const short tab13_[] = +{ + -16, -32, -48, -64, 0x0411, 0x0401, 0x0310, 0x0310, + 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, + -64, -80, -96, -112, -128, -144, -160, -176, + -192, -208, -224, -240, -256, -272, -288, -304, + -304, -320, -336, -352, 0x0481, -368, -384, -400, + -416, -432, 0x0415, 0x0451, -448, -464, -480, 0x0414, + 0x0341, 0x0341, 0x0404, 0x0440, 0x0423, 0x0432, 0x0313, 0x0313, + 0x0331, 0x0331, 0x0303, 0x0303, 0x0330, 0x0330, 0x0322, 0x0322, + 0x0212, 0x0212, 0x0212, 0x0212, 0x0221, 0x0221, 0x0221, 0x0221, + 0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, + -448, -464, -480, -496, -512, -528, -544, -560, + -576, -592, -608, -624, -640, -656, 0x041f, 0x04f1, + 0x04f0, -656, -672, -688, 0x04e2, -704, 0x041e, 0x04e1, + -720, -736, -752, -768, -784, -800, 0x04c6, 0x043d, + -800, 0x042d, 0x04d2, 0x041d, 0x04b7, -816, -832, 0x04c3, + -848, 0x044b, 0x03d1, 0x03d1, 0x040d, 0x04d0, 0x048a, 0x04a8, + 0x044c, 0x04c4, 0x046b, 0x04b6, 0x033c, 0x033c, 0x032c, 0x032c, + 0x03c2, 0x03c2, 0x035b, 0x035b, 0x04b5, 0x0489, 0x031c, 0x031c, + 0x03c1, 0x03c1, 0x0498, 0x040c, 0x03c0, 0x03c0, 0x04b4, 0x046a, + 0x04a6, 0x0479, 0x033b, 0x033b, 0x03b3, 0x03b3, 0x0488, 0x045a, + 0x032b, 0x032b, 0x04a5, 0x0469, 0x03a4, 0x03a4, 0x0478, 0x0487, + 0x0394, 0x0394, 0x0477, 0x0476, 0x02b2, 0x02b2, 0x02b2, 0x02b2, + 0x021b, 0x021b, 0x021b, 0x021b, 0x02b1, 0x02b1, 0x02b1, 0x02b1, + 0x030b, 0x030b, 0x03b0, 0x03b0, 0x0396, 0x0396, 0x034a, 0x034a, + 0x033a, 0x033a, 0x03a3, 0x03a3, 0x0359, 0x0359, 0x0395, 0x0395, + 0x022a, 0x022a, 0x022a, 0x022a, 0x02a2, 0x02a2, 0x02a2, 0x02a2, + 0x021a, 0x021a, 0x021a, 0x021a, 0x02a1, 0x02a1, 0x02a1, 0x02a1, + 0x030a, 0x030a, 0x0368, 0x0368, 0x02a0, 0x02a0, 0x02a0, 0x02a0, + 0x0386, 0x0386, 0x0349, 0x0349, 0x0293, 0x0293, 0x0293, 0x0293, + 0x0339, 0x0339, 0x0358, 0x0358, 0x0385, 0x0385, 0x0367, 0x0367, + 0x0229, 0x0229, 0x0229, 0x0229, 0x0292, 0x0292, 0x0292, 0x0292, + 0x0357, 0x0357, 0x0375, 0x0375, 0x0238, 0x0238, 0x0238, 0x0238, + 0x0283, 0x0283, 0x0283, 0x0283, 0x0366, 0x0366, 0x0347, 0x0347, + 0x0374, 0x0374, 0x0356, 0x0356, 0x0365, 0x0365, 0x0373, 0x0373, + 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, + 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, + 0x0209, 0x0209, 0x0209, 0x0209, 0x0290, 0x0290, 0x0290, 0x0290, + 0x0248, 0x0248, 0x0248, 0x0248, 0x0284, 0x0284, 0x0284, 0x0284, + 0x0272, 0x0272, 0x0272, 0x0272, 0x0346, 0x0346, 0x0364, 0x0364, + 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, + 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, + 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, + 0x0237, 0x0237, 0x0237, 0x0237, 0x0227, 0x0227, 0x0227, 0x0227, + 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, + 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, + 0x0255, 0x0255, 0x0255, 0x0255, 0x0207, 0x0207, 0x0207, 0x0207, + 0x0270, 0x0270, 0x0270, 0x0270, 0x0236, 0x0236, 0x0236, 0x0236, + 0x0263, 0x0263, 0x0263, 0x0263, 0x0245, 0x0245, 0x0245, 0x0245, + 0x0254, 0x0254, 0x0254, 0x0254, 0x0226, 0x0226, 0x0226, 0x0226, + 0x0262, 0x0262, 0x0262, 0x0262, 0x0235, 0x0235, 0x0235, 0x0235, + 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, + 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, + 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, + 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, + 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, + 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, + 0x0253, 0x0253, 0x0253, 0x0253, 0x0244, 0x0244, 0x0244, 0x0244, + 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, + 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, + 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, + 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, + 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, + 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, + 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, + 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, + 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, + -448, 0x04ff, 0x04ef, 0x04df, 0x04ee, 0x04cf, 0x04de, 0x04bf, + 0x04fb, 0x04ce, 0x04dc, -464, 0x03ec, 0x03ec, 0x03dd, 0x03dd, + 0x04fa, 0x04cd, 0x03be, 0x03be, 0x03eb, 0x03eb, 0x039f, 0x039f, + 0x03f9, 0x03f9, 0x03ea, 0x03ea, 0x03bd, 0x03bd, 0x03db, 0x03db, + 0x038f, 0x038f, 0x03f8, 0x03f8, 0x03cc, 0x03cc, 0x04ae, 0x049e, + 0x038e, 0x038e, 0x047f, 0x047e, 0x02f7, 0x02f7, 0x02f7, 0x02f7, + 0x02da, 0x02da, 0x02da, 0x02da, 0x03ad, 0x03ad, 0x03bc, 0x03bc, + 0x03cb, 0x03cb, 0x03f6, 0x03f6, 0x026f, 0x026f, 0x026f, 0x026f, + 0x02e8, 0x02e8, 0x02e8, 0x02e8, 0x025f, 0x025f, 0x025f, 0x025f, + 0x029d, 0x029d, 0x029d, 0x029d, 0x02d9, 0x02d9, 0x02d9, 0x02d9, + 0x02f5, 0x02f5, 0x02f5, 0x02f5, 0x02e7, 0x02e7, 0x02e7, 0x02e7, + 0x02ac, 0x02ac, 0x02ac, 0x02ac, 0x02bb, 0x02bb, 0x02bb, 0x02bb, + 0x024f, 0x024f, 0x024f, 0x024f, 0x02f4, 0x02f4, 0x02f4, 0x02f4, + 0x03ca, 0x03ca, 0x03e6, 0x03e6, 0x02f3, 0x02f3, 0x02f3, 0x02f3, + 0x013f, 0x013f, 0x013f, 0x013f, 0x013f, 0x013f, 0x013f, 0x013f, + 0x028d, 0x028d, 0x028d, 0x028d, 0x02d8, 0x02d8, 0x02d8, 0x02d8, + 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, + 0x01f2, 0x01f2, 0x01f2, 0x01f2, 0x01f2, 0x01f2, 0x01f2, 0x01f2, + 0x026e, 0x026e, 0x026e, 0x026e, 0x029c, 0x029c, 0x029c, 0x029c, + 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, + 0x02c9, 0x02c9, 0x02c9, 0x02c9, 0x025e, 0x025e, 0x025e, 0x025e, + 0x01ab, 0x01ab, 0x01ab, 0x01ab, 0x01ab, 0x01ab, 0x01ab, 0x01ab, + 0x027d, 0x027d, 0x027d, 0x027d, 0x02d7, 0x02d7, 0x02d7, 0x02d7, + 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, + 0x02c8, 0x02c8, 0x02c8, 0x02c8, 0x02d6, 0x02d6, 0x02d6, 0x02d6, + 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, + 0x01b9, 0x01b9, 0x01b9, 0x01b9, 0x01b9, 0x01b9, 0x01b9, 0x01b9, + 0x029b, 0x029b, 0x029b, 0x029b, 0x02aa, 0x02aa, 0x02aa, 0x02aa, + 0x01ba, 0x01ba, 0x01ba, 0x01ba, 0x01ba, 0x01ba, 0x01ba, 0x01ba, + 0x01e5, 0x01e5, 0x01e5, 0x01e5, 0x01e5, 0x01e5, 0x01e5, 0x01e5, + 0x01e4, 0x01e4, 0x01e4, 0x01e4, 0x01e4, 0x01e4, 0x01e4, 0x01e4, + 0x018c, 0x018c, 0x018c, 0x018c, 0x018c, 0x018c, 0x018c, 0x018c, + 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, + 0x01e3, 0x01e3, 0x01e3, 0x01e3, 0x01e3, 0x01e3, 0x01e3, 0x01e3, + 0x012e, 0x012e, 0x012e, 0x012e, 0x012e, 0x012e, 0x012e, 0x012e, + 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, + 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, + 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, + 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, + 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, + 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, + 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, + 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, + 0x01b8, 0x01b8, 0x01b8, 0x01b8, 0x01b8, 0x01b8, 0x01b8, 0x01b8, + 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, + 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, + 0x01a9, 0x01a9, 0x01a9, 0x01a9, 0x01a9, 0x01a9, 0x01a9, 0x01a9, + 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, + 0x01d3, 0x01d3, 0x01d3, 0x01d3, 0x01d3, 0x01d3, 0x01d3, 0x01d3, + 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, + 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, + 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, + 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, + 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, + 0x01a7, 0x01a7, 0x01a7, 0x01a7, 0x01a7, 0x01a7, 0x01a7, 0x01a7, + 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, + 0x03fe, 0x03fe, 0x03fc, 0x03fc, 0x02fd, 0x02fd, 0x02fd, 0x02fd, + 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, + 0x01af, 0x01af, 0x01af, 0x01af, 0x01af, 0x01af, 0x01af, 0x01af, + 0x01e9, 0x01e9, 0x01e9, 0x01e9, 0x01e9, 0x01e9, 0x01e9, 0x01e9, +}; + +static const short tab15_[] = +{ + -16, -32, -48, -64, -80, -96, -112, -128, + -144, -160, 0x0311, 0x0311, 0x0401, 0x0410, 0x0300, 0x0300, + -160, -176, -192, -208, -224, -240, -256, -272, + -288, -304, -320, -336, -352, -368, -384, -400, + -400, -416, -432, -448, -464, -480, -496, -512, + -528, -544, -560, -576, -592, -608, -624, -640, + -640, -656, 0x0491, -672, -688, -704, -720, -736, + 0x0428, 0x0482, 0x0418, 0x0481, -752, -768, -784, -800, + 0x0427, 0x0472, 0x0464, 0x0417, 0x0455, 0x0471, -800, 0x0436, + 0x0463, 0x0445, 0x0454, 0x0426, 0x0462, 0x0416, -816, 0x0435, + 0x0361, 0x0361, 0x0453, 0x0444, 0x0325, 0x0325, 0x0352, 0x0352, + 0x0315, 0x0315, 0x0351, 0x0351, 0x0405, 0x0450, 0x0334, 0x0334, + 0x0343, 0x0343, 0x0324, 0x0324, 0x0342, 0x0342, 0x0333, 0x0333, + 0x0241, 0x0241, 0x0241, 0x0241, 0x0314, 0x0314, 0x0304, 0x0304, + 0x0223, 0x0223, 0x0223, 0x0223, 0x0232, 0x0232, 0x0232, 0x0232, + 0x0340, 0x0340, 0x0303, 0x0303, 0x0213, 0x0213, 0x0213, 0x0213, + 0x0231, 0x0231, 0x0231, 0x0231, 0x0230, 0x0230, 0x0230, 0x0230, + 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, + 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, + 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, + 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, + 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, + -720, -736, 0x04ee, -752, -768, -784, 0x04fb, -800, + 0x04dd, 0x04af, 0x04fa, 0x04be, 0x04eb, 0x04cd, 0x04dc, 0x049f, + 0x04f9, 0x04ea, 0x04bd, 0x04db, 0x048f, 0x04f8, 0x04cc, 0x049e, + 0x04e9, 0x047f, 0x04f7, 0x04ad, 0x04da, 0x04bc, 0x046f, -800, + 0x03cb, 0x03cb, 0x03f6, 0x03f6, 0x048e, 0x04e8, 0x045f, 0x049d, + 0x03f5, 0x03f5, 0x037e, 0x037e, 0x03e7, 0x03e7, 0x03ac, 0x03ac, + 0x03ca, 0x03ca, 0x03bb, 0x03bb, 0x04d9, 0x048d, 0x034f, 0x034f, + 0x03f4, 0x03f4, 0x033f, 0x033f, 0x03f3, 0x03f3, 0x03d8, 0x03d8, + 0x03e6, 0x03e6, 0x032f, 0x032f, 0x03f2, 0x03f2, 0x046e, 0x04f0, + 0x031f, 0x031f, 0x03f1, 0x03f1, 0x039c, 0x039c, 0x03c9, 0x03c9, + 0x035e, 0x035e, 0x03ab, 0x03ab, 0x03ba, 0x03ba, 0x03e5, 0x03e5, + 0x037d, 0x037d, 0x03d7, 0x03d7, 0x034e, 0x034e, 0x03e4, 0x03e4, + 0x038c, 0x038c, 0x03c8, 0x03c8, 0x033e, 0x033e, 0x036d, 0x036d, + 0x03d6, 0x03d6, 0x03e3, 0x03e3, 0x039b, 0x039b, 0x03b9, 0x03b9, + 0x032e, 0x032e, 0x03aa, 0x03aa, 0x03e2, 0x03e2, 0x031e, 0x031e, + 0x03e1, 0x03e1, 0x040e, 0x04e0, 0x035d, 0x035d, 0x03d5, 0x03d5, + 0x037c, 0x037c, 0x03c7, 0x03c7, 0x034d, 0x034d, 0x038b, 0x038b, + 0x02d4, 0x02d4, 0x02d4, 0x02d4, 0x03b8, 0x03b8, 0x039a, 0x039a, + 0x03a9, 0x03a9, 0x036c, 0x036c, 0x03c6, 0x03c6, 0x033d, 0x033d, + 0x02d3, 0x02d3, 0x02d3, 0x02d3, 0x02d2, 0x02d2, 0x02d2, 0x02d2, + 0x032d, 0x032d, 0x030d, 0x030d, 0x021d, 0x021d, 0x021d, 0x021d, + 0x027b, 0x027b, 0x027b, 0x027b, 0x02b7, 0x02b7, 0x02b7, 0x02b7, + 0x02d1, 0x02d1, 0x02d1, 0x02d1, 0x035c, 0x035c, 0x03d0, 0x03d0, + 0x02c5, 0x02c5, 0x02c5, 0x02c5, 0x028a, 0x028a, 0x028a, 0x028a, + 0x02a8, 0x02a8, 0x02a8, 0x02a8, 0x024c, 0x024c, 0x024c, 0x024c, + 0x02c4, 0x02c4, 0x02c4, 0x02c4, 0x026b, 0x026b, 0x026b, 0x026b, + 0x02b6, 0x02b6, 0x02b6, 0x02b6, 0x0399, 0x0399, 0x030c, 0x030c, + 0x023c, 0x023c, 0x023c, 0x023c, 0x02c3, 0x02c3, 0x02c3, 0x02c3, + 0x027a, 0x027a, 0x027a, 0x027a, 0x02a7, 0x02a7, 0x02a7, 0x02a7, + 0x02a6, 0x02a6, 0x02a6, 0x02a6, 0x03c0, 0x03c0, 0x030b, 0x030b, + 0x01c2, 0x01c2, 0x01c2, 0x01c2, 0x01c2, 0x01c2, 0x01c2, 0x01c2, + 0x022c, 0x022c, 0x022c, 0x022c, 0x025b, 0x025b, 0x025b, 0x025b, + 0x02b5, 0x02b5, 0x02b5, 0x02b5, 0x021c, 0x021c, 0x021c, 0x021c, + 0x0289, 0x0289, 0x0289, 0x0289, 0x0298, 0x0298, 0x0298, 0x0298, + 0x02c1, 0x02c1, 0x02c1, 0x02c1, 0x024b, 0x024b, 0x024b, 0x024b, + 0x02b4, 0x02b4, 0x02b4, 0x02b4, 0x026a, 0x026a, 0x026a, 0x026a, + 0x023b, 0x023b, 0x023b, 0x023b, 0x0279, 0x0279, 0x0279, 0x0279, + 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, + 0x0297, 0x0297, 0x0297, 0x0297, 0x0288, 0x0288, 0x0288, 0x0288, + 0x022b, 0x022b, 0x022b, 0x022b, 0x025a, 0x025a, 0x025a, 0x025a, + 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, + 0x02a5, 0x02a5, 0x02a5, 0x02a5, 0x021b, 0x021b, 0x021b, 0x021b, + 0x01b1, 0x01b1, 0x01b1, 0x01b1, 0x01b1, 0x01b1, 0x01b1, 0x01b1, + 0x02b0, 0x02b0, 0x02b0, 0x02b0, 0x0269, 0x0269, 0x0269, 0x0269, + 0x0296, 0x0296, 0x0296, 0x0296, 0x024a, 0x024a, 0x024a, 0x024a, + 0x02a4, 0x02a4, 0x02a4, 0x02a4, 0x0278, 0x0278, 0x0278, 0x0278, + 0x0287, 0x0287, 0x0287, 0x0287, 0x023a, 0x023a, 0x023a, 0x023a, + 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, + 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, + 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, + 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, + 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, + 0x011a, 0x011a, 0x011a, 0x011a, 0x011a, 0x011a, 0x011a, 0x011a, + 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, + 0x020a, 0x020a, 0x020a, 0x020a, 0x02a0, 0x02a0, 0x02a0, 0x02a0, + 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, + 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, + 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, + 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, + 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, + 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, + 0x0277, 0x0277, 0x0277, 0x0277, 0x0209, 0x0209, 0x0209, 0x0209, + 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, + 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, + 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, + 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, + 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, + 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, + 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, + 0x0190, 0x0190, 0x0190, 0x0190, 0x0190, 0x0190, 0x0190, 0x0190, + 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, + 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, + 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, + 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, + 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, + 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, + 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, + 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, + 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, + 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, + 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, + 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, + 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, + 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, + 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, + 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, + 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, + 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, + 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, + 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, + 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, + 0x01ef, 0x01ef, 0x01ef, 0x01ef, 0x01ef, 0x01ef, 0x01ef, 0x01ef, + 0x01fe, 0x01fe, 0x01fe, 0x01fe, 0x01fe, 0x01fe, 0x01fe, 0x01fe, + 0x01df, 0x01df, 0x01df, 0x01df, 0x01df, 0x01df, 0x01df, 0x01df, + 0x01fd, 0x01fd, 0x01fd, 0x01fd, 0x01fd, 0x01fd, 0x01fd, 0x01fd, + 0x01cf, 0x01cf, 0x01cf, 0x01cf, 0x01cf, 0x01cf, 0x01cf, 0x01cf, + 0x01fc, 0x01fc, 0x01fc, 0x01fc, 0x01fc, 0x01fc, 0x01fc, 0x01fc, + 0x01de, 0x01de, 0x01de, 0x01de, 0x01de, 0x01de, 0x01de, 0x01de, + 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, + 0x01bf, 0x01bf, 0x01bf, 0x01bf, 0x01bf, 0x01bf, 0x01bf, 0x01bf, + 0x01ce, 0x01ce, 0x01ce, 0x01ce, 0x01ce, 0x01ce, 0x01ce, 0x01ce, + 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, + 0x01ae, 0x01ae, 0x01ae, 0x01ae, 0x01ae, 0x01ae, 0x01ae, 0x01ae, + 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, +}; + +static const short tab16_[] = +{ + -16, -32, -48, -64, 0x0411, 0x0401, 0x0310, 0x0310, + 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, + -64, -80, -96, 0x04ff, -112, -128, -144, 0x04f2, + -160, 0x041f, 0x04f1, -176, -192, -208, -224, -240, + -240, -256, -272, -288, -304, -320, -336, -352, + -368, -384, -400, -416, -432, -448, 0x0451, -464, + -464, -480, -496, 0x0414, 0x0441, -512, 0x0423, 0x0432, + 0x0313, 0x0313, 0x0331, 0x0331, 0x0403, 0x0430, 0x0322, 0x0322, + 0x0212, 0x0212, 0x0212, 0x0212, 0x0221, 0x0221, 0x0221, 0x0221, + 0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, + 0x03ef, 0x03ef, 0x03fe, 0x03fe, 0x03df, 0x03df, 0x03fd, 0x03fd, + 0x03cf, 0x03cf, 0x03fc, 0x03fc, 0x03bf, 0x03bf, 0x03fb, 0x03fb, + 0x02af, 0x02af, 0x02af, 0x02af, 0x03fa, 0x03fa, 0x039f, 0x039f, + 0x03f9, 0x03f9, 0x03f8, 0x03f8, 0x028f, 0x028f, 0x028f, 0x028f, + 0x027f, 0x027f, 0x027f, 0x027f, 0x02f7, 0x02f7, 0x02f7, 0x02f7, + 0x026f, 0x026f, 0x026f, 0x026f, 0x02f6, 0x02f6, 0x02f6, 0x02f6, + 0x025f, 0x025f, 0x025f, 0x025f, 0x02f5, 0x02f5, 0x02f5, 0x02f5, + 0x014f, 0x014f, 0x014f, 0x014f, 0x014f, 0x014f, 0x014f, 0x014f, + 0x01f4, 0x01f4, 0x01f4, 0x01f4, 0x01f4, 0x01f4, 0x01f4, 0x01f4, + 0x01f3, 0x01f3, 0x01f3, 0x01f3, 0x01f3, 0x01f3, 0x01f3, 0x01f3, + 0x01f0, 0x01f0, 0x01f0, 0x01f0, 0x01f0, 0x01f0, 0x01f0, 0x01f0, + 0x023f, 0x023f, 0x023f, 0x023f, -416, -432, -448, -464, + 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, + 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, + -448, -464, -480, -496, -512, -528, -544, -560, + -576, -592, -608, 0x04e3, -624, -640, -656, -672, + -672, -688, -704, 0x040d, -720, -736, -752, 0x043c, + -768, 0x041c, 0x04c0, -784, 0x03e2, 0x03e2, 0x042e, 0x041e, + 0x04d3, 0x042d, 0x04d2, 0x04d1, 0x043b, -784, 0x031d, 0x031d, + 0x04c4, 0x046b, 0x04c3, 0x04a7, 0x032c, 0x032c, 0x04c2, 0x04b5, + 0x04c1, 0x040c, 0x044b, 0x04b4, 0x046a, 0x04a6, 0x03b3, 0x03b3, + 0x045a, 0x04a5, 0x032b, 0x032b, 0x03b2, 0x03b2, 0x031b, 0x031b, + 0x03b1, 0x03b1, 0x040b, 0x04b0, 0x0469, 0x0496, 0x044a, 0x04a4, + 0x0478, 0x0487, 0x03a3, 0x03a3, 0x043a, 0x0459, 0x032a, 0x032a, + 0x0495, 0x0468, 0x03a1, 0x03a1, 0x0486, 0x0477, 0x0394, 0x0394, + 0x0449, 0x0457, 0x0367, 0x0367, 0x02a2, 0x02a2, 0x02a2, 0x02a2, + 0x021a, 0x021a, 0x021a, 0x021a, 0x030a, 0x030a, 0x03a0, 0x03a0, + 0x0339, 0x0339, 0x0393, 0x0393, 0x0358, 0x0358, 0x0385, 0x0385, + 0x0229, 0x0229, 0x0229, 0x0229, 0x0292, 0x0292, 0x0292, 0x0292, + 0x0376, 0x0376, 0x0309, 0x0309, 0x0219, 0x0219, 0x0219, 0x0219, + 0x0291, 0x0291, 0x0291, 0x0291, 0x0390, 0x0390, 0x0348, 0x0348, + 0x0384, 0x0384, 0x0375, 0x0375, 0x0338, 0x0338, 0x0383, 0x0383, + 0x0366, 0x0366, 0x0328, 0x0328, 0x0282, 0x0282, 0x0282, 0x0282, + 0x0347, 0x0347, 0x0374, 0x0374, 0x0218, 0x0218, 0x0218, 0x0218, + 0x0281, 0x0281, 0x0281, 0x0281, 0x0280, 0x0280, 0x0280, 0x0280, + 0x0308, 0x0308, 0x0356, 0x0356, 0x0237, 0x0237, 0x0237, 0x0237, + 0x0273, 0x0273, 0x0273, 0x0273, 0x0365, 0x0365, 0x0346, 0x0346, + 0x0227, 0x0227, 0x0227, 0x0227, 0x0272, 0x0272, 0x0272, 0x0272, + 0x0364, 0x0364, 0x0355, 0x0355, 0x0207, 0x0207, 0x0207, 0x0207, + 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, + 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, + 0x0270, 0x0270, 0x0270, 0x0270, 0x0236, 0x0236, 0x0236, 0x0236, + 0x0263, 0x0263, 0x0263, 0x0263, 0x0245, 0x0245, 0x0245, 0x0245, + 0x0254, 0x0254, 0x0254, 0x0254, 0x0226, 0x0226, 0x0226, 0x0226, + 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, + 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, + 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, + 0x0206, 0x0206, 0x0206, 0x0206, 0x0260, 0x0260, 0x0260, 0x0260, + 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, + 0x0235, 0x0235, 0x0235, 0x0235, 0x0244, 0x0244, 0x0244, 0x0244, + 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, + 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, + 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, + 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, + 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, + 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, + 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, + 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, + 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, + 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, + 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, + 0x0140, 0x0140, 0x0140, 0x0140, 0x0140, 0x0140, 0x0140, 0x0140, + 0x04ce, -448, 0x03de, 0x03de, 0x03e9, 0x03e9, 0x04ea, 0x04d9, + 0x02ee, 0x02ee, 0x02ee, 0x02ee, 0x03ed, 0x03ed, 0x03eb, 0x03eb, + 0x02be, 0x02be, 0x02be, 0x02be, 0x02cd, 0x02cd, 0x02cd, 0x02cd, + 0x03dc, 0x03dc, 0x03db, 0x03db, 0x02ae, 0x02ae, 0x02ae, 0x02ae, + 0x02cc, 0x02cc, 0x02cc, 0x02cc, 0x03ad, 0x03ad, 0x03da, 0x03da, + 0x037e, 0x037e, 0x03ac, 0x03ac, 0x02ca, 0x02ca, 0x02ca, 0x02ca, + 0x03c9, 0x03c9, 0x037d, 0x037d, 0x025e, 0x025e, 0x025e, 0x025e, + 0x01bd, 0x01bd, 0x01bd, 0x01bd, 0x01bd, 0x01bd, 0x01bd, 0x01bd, + 0x019e, 0x019e, 0x019e, 0x019e, 0x019e, 0x019e, 0x019e, 0x019e, + 0x02bc, 0x02bc, 0x02bc, 0x02bc, 0x02cb, 0x02cb, 0x02cb, 0x02cb, + 0x028e, 0x028e, 0x028e, 0x028e, 0x02e8, 0x02e8, 0x02e8, 0x02e8, + 0x029d, 0x029d, 0x029d, 0x029d, 0x02e7, 0x02e7, 0x02e7, 0x02e7, + 0x02bb, 0x02bb, 0x02bb, 0x02bb, 0x028d, 0x028d, 0x028d, 0x028d, + 0x02d8, 0x02d8, 0x02d8, 0x02d8, 0x026e, 0x026e, 0x026e, 0x026e, + 0x01e6, 0x01e6, 0x01e6, 0x01e6, 0x01e6, 0x01e6, 0x01e6, 0x01e6, + 0x019c, 0x019c, 0x019c, 0x019c, 0x019c, 0x019c, 0x019c, 0x019c, + 0x02ab, 0x02ab, 0x02ab, 0x02ab, 0x02ba, 0x02ba, 0x02ba, 0x02ba, + 0x02e5, 0x02e5, 0x02e5, 0x02e5, 0x02d7, 0x02d7, 0x02d7, 0x02d7, + 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, + 0x02e4, 0x02e4, 0x02e4, 0x02e4, 0x028c, 0x028c, 0x028c, 0x028c, + 0x01c8, 0x01c8, 0x01c8, 0x01c8, 0x01c8, 0x01c8, 0x01c8, 0x01c8, + 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, + 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, + 0x02d6, 0x02d6, 0x02d6, 0x02d6, 0x029b, 0x029b, 0x029b, 0x029b, + 0x02b9, 0x02b9, 0x02b9, 0x02b9, 0x02aa, 0x02aa, 0x02aa, 0x02aa, + 0x01e1, 0x01e1, 0x01e1, 0x01e1, 0x01e1, 0x01e1, 0x01e1, 0x01e1, + 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, + 0x02b8, 0x02b8, 0x02b8, 0x02b8, 0x02a9, 0x02a9, 0x02a9, 0x02a9, + 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, + 0x02b7, 0x02b7, 0x02b7, 0x02b7, 0x02d0, 0x02d0, 0x02d0, 0x02d0, + 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, + 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, + 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, + 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, + 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, + 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, + 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, + 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, + 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, + 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, + 0x01c6, 0x01c6, 0x01c6, 0x01c6, 0x01c6, 0x01c6, 0x01c6, 0x01c6, + 0x013d, 0x013d, 0x013d, 0x013d, 0x013d, 0x013d, 0x013d, 0x013d, + 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, + 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, + 0x018a, 0x018a, 0x018a, 0x018a, 0x018a, 0x018a, 0x018a, 0x018a, + 0x01a8, 0x01a8, 0x01a8, 0x01a8, 0x01a8, 0x01a8, 0x01a8, 0x01a8, + 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, + 0x014c, 0x014c, 0x014c, 0x014c, 0x014c, 0x014c, 0x014c, 0x014c, + 0x01b6, 0x01b6, 0x01b6, 0x01b6, 0x01b6, 0x01b6, 0x01b6, 0x01b6, + 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, + 0x015b, 0x015b, 0x015b, 0x015b, 0x015b, 0x015b, 0x015b, 0x015b, + 0x0189, 0x0189, 0x0189, 0x0189, 0x0189, 0x0189, 0x0189, 0x0189, + 0x0198, 0x0198, 0x0198, 0x0198, 0x0198, 0x0198, 0x0198, 0x0198, + 0x0179, 0x0179, 0x0179, 0x0179, 0x0179, 0x0179, 0x0179, 0x0179, + 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, + 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, + 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, + 0x01dd, 0x01dd, 0x01dd, 0x01dd, 0x01dd, 0x01dd, 0x01dd, 0x01dd, +}; + +static const short tab24_[] = +{ + -16, -32, -48, 0x04ff, -64, -80, -96, -112, + -128, -144, -160, -176, 0x0411, 0x0401, 0x0410, 0x0400, + 0x04ef, 0x04fe, 0x04df, 0x04fd, 0x04cf, 0x04fc, 0x04bf, 0x04fb, + 0x03fa, 0x03fa, 0x04af, 0x049f, 0x03f9, 0x03f9, 0x03f8, 0x03f8, + 0x048f, 0x047f, 0x03f7, 0x03f7, 0x036f, 0x036f, 0x03f6, 0x03f6, + 0x035f, 0x035f, 0x03f5, 0x03f5, 0x034f, 0x034f, 0x03f4, 0x03f4, + 0x033f, 0x033f, 0x03f3, 0x03f3, 0x032f, 0x032f, 0x03f2, 0x03f2, + 0x03f1, 0x03f1, 0x041f, 0x04f0, -144, -160, -176, -192, + -192, -208, -224, -240, -256, -272, -288, -304, + -320, -336, -352, -368, -384, -400, -416, -432, + -432, -448, -464, -480, -496, -512, -528, -544, + -560, -576, -592, -608, -624, -640, -656, -672, + -672, -688, -704, -720, -736, -752, -768, -784, + -800, -816, -832, -848, -864, 0x0473, -880, 0x0472, + 0x0446, 0x0464, 0x0455, 0x0471, 0x0436, 0x0463, 0x0445, 0x0454, + 0x0426, 0x0462, 0x0416, 0x0461, -880, 0x0435, 0x0453, 0x0444, + 0x0425, 0x0452, 0x0415, -880, 0x0351, 0x0351, 0x0434, 0x0443, + 0x0324, 0x0324, 0x0342, 0x0342, 0x0333, 0x0333, 0x0314, 0x0314, + 0x0341, 0x0341, 0x0404, 0x0440, 0x0323, 0x0323, 0x0332, 0x0332, + 0x0213, 0x0213, 0x0213, 0x0213, 0x0231, 0x0231, 0x0231, 0x0231, + 0x0303, 0x0303, 0x0330, 0x0330, 0x0222, 0x0222, 0x0222, 0x0222, + 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, + 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, + 0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, + 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, + 0x03ee, 0x03ee, 0x03de, 0x03de, 0x03ed, 0x03ed, 0x03ce, 0x03ce, + 0x03ec, 0x03ec, 0x03dd, 0x03dd, 0x03be, 0x03be, 0x03eb, 0x03eb, + 0x03cd, 0x03cd, 0x03dc, 0x03dc, 0x03ae, 0x03ae, 0x03ea, 0x03ea, + 0x03bd, 0x03bd, 0x03db, 0x03db, 0x03cc, 0x03cc, 0x039e, 0x039e, + 0x03e9, 0x03e9, 0x03ad, 0x03ad, 0x03da, 0x03da, 0x03bc, 0x03bc, + 0x03cb, 0x03cb, 0x038e, 0x038e, 0x03e8, 0x03e8, 0x039d, 0x039d, + 0x03d9, 0x03d9, 0x037e, 0x037e, 0x03e7, 0x03e7, 0x03ac, 0x03ac, + 0x03ca, 0x03ca, 0x03bb, 0x03bb, 0x038d, 0x038d, 0x03d8, 0x03d8, + 0x040e, 0x04e0, 0x030d, 0x030d, 0x02e6, 0x02e6, 0x02e6, 0x02e6, + 0x036e, 0x036e, 0x039c, 0x039c, 0x02c9, 0x02c9, 0x02c9, 0x02c9, + 0x025e, 0x025e, 0x025e, 0x025e, 0x02ba, 0x02ba, 0x02ba, 0x02ba, + 0x02e5, 0x02e5, 0x02e5, 0x02e5, 0x03ab, 0x03ab, 0x037d, 0x037d, + 0x02d7, 0x02d7, 0x02d7, 0x02d7, 0x02e4, 0x02e4, 0x02e4, 0x02e4, + 0x028c, 0x028c, 0x028c, 0x028c, 0x02c8, 0x02c8, 0x02c8, 0x02c8, + 0x034e, 0x034e, 0x032e, 0x032e, 0x023e, 0x023e, 0x023e, 0x023e, + 0x026d, 0x026d, 0x026d, 0x026d, 0x02d6, 0x02d6, 0x02d6, 0x02d6, + 0x02e3, 0x02e3, 0x02e3, 0x02e3, 0x029b, 0x029b, 0x029b, 0x029b, + 0x02b9, 0x02b9, 0x02b9, 0x02b9, 0x02aa, 0x02aa, 0x02aa, 0x02aa, + 0x02e2, 0x02e2, 0x02e2, 0x02e2, 0x021e, 0x021e, 0x021e, 0x021e, + 0x02e1, 0x02e1, 0x02e1, 0x02e1, 0x025d, 0x025d, 0x025d, 0x025d, + 0x02d5, 0x02d5, 0x02d5, 0x02d5, 0x027c, 0x027c, 0x027c, 0x027c, + 0x02c7, 0x02c7, 0x02c7, 0x02c7, 0x024d, 0x024d, 0x024d, 0x024d, + 0x028b, 0x028b, 0x028b, 0x028b, 0x02b8, 0x02b8, 0x02b8, 0x02b8, + 0x02d4, 0x02d4, 0x02d4, 0x02d4, 0x029a, 0x029a, 0x029a, 0x029a, + 0x02a9, 0x02a9, 0x02a9, 0x02a9, 0x026c, 0x026c, 0x026c, 0x026c, + 0x02c6, 0x02c6, 0x02c6, 0x02c6, 0x023d, 0x023d, 0x023d, 0x023d, + 0x02d3, 0x02d3, 0x02d3, 0x02d3, 0x022d, 0x022d, 0x022d, 0x022d, + 0x02d2, 0x02d2, 0x02d2, 0x02d2, 0x021d, 0x021d, 0x021d, 0x021d, + 0x027b, 0x027b, 0x027b, 0x027b, 0x02b7, 0x02b7, 0x02b7, 0x02b7, + 0x02d1, 0x02d1, 0x02d1, 0x02d1, 0x025c, 0x025c, 0x025c, 0x025c, + 0x02c5, 0x02c5, 0x02c5, 0x02c5, 0x028a, 0x028a, 0x028a, 0x028a, + 0x02a8, 0x02a8, 0x02a8, 0x02a8, 0x0299, 0x0299, 0x0299, 0x0299, + 0x024c, 0x024c, 0x024c, 0x024c, 0x02c4, 0x02c4, 0x02c4, 0x02c4, + 0x026b, 0x026b, 0x026b, 0x026b, 0x02b6, 0x02b6, 0x02b6, 0x02b6, + 0x03d0, 0x03d0, 0x030c, 0x030c, 0x023c, 0x023c, 0x023c, 0x023c, + 0x02c3, 0x02c3, 0x02c3, 0x02c3, 0x027a, 0x027a, 0x027a, 0x027a, + 0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x022c, 0x022c, 0x022c, 0x022c, + 0x02c2, 0x02c2, 0x02c2, 0x02c2, 0x025b, 0x025b, 0x025b, 0x025b, + 0x02b5, 0x02b5, 0x02b5, 0x02b5, 0x021c, 0x021c, 0x021c, 0x021c, + 0x0289, 0x0289, 0x0289, 0x0289, 0x0298, 0x0298, 0x0298, 0x0298, + 0x02c1, 0x02c1, 0x02c1, 0x02c1, 0x024b, 0x024b, 0x024b, 0x024b, + 0x03c0, 0x03c0, 0x030b, 0x030b, 0x023b, 0x023b, 0x023b, 0x023b, + 0x03b0, 0x03b0, 0x030a, 0x030a, 0x021a, 0x021a, 0x021a, 0x021a, + 0x01b4, 0x01b4, 0x01b4, 0x01b4, 0x01b4, 0x01b4, 0x01b4, 0x01b4, + 0x026a, 0x026a, 0x026a, 0x026a, 0x02a6, 0x02a6, 0x02a6, 0x02a6, + 0x0279, 0x0279, 0x0279, 0x0279, 0x0297, 0x0297, 0x0297, 0x0297, + 0x03a0, 0x03a0, 0x0309, 0x0309, 0x0290, 0x0290, 0x0290, 0x0290, + 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, + 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, + 0x022b, 0x022b, 0x022b, 0x022b, 0x025a, 0x025a, 0x025a, 0x025a, + 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, + 0x02a5, 0x02a5, 0x02a5, 0x02a5, 0x021b, 0x021b, 0x021b, 0x021b, + 0x02b1, 0x02b1, 0x02b1, 0x02b1, 0x0269, 0x0269, 0x0269, 0x0269, + 0x0196, 0x0196, 0x0196, 0x0196, 0x0196, 0x0196, 0x0196, 0x0196, + 0x01a4, 0x01a4, 0x01a4, 0x01a4, 0x01a4, 0x01a4, 0x01a4, 0x01a4, + 0x024a, 0x024a, 0x024a, 0x024a, 0x0278, 0x0278, 0x0278, 0x0278, + 0x0187, 0x0187, 0x0187, 0x0187, 0x0187, 0x0187, 0x0187, 0x0187, + 0x013a, 0x013a, 0x013a, 0x013a, 0x013a, 0x013a, 0x013a, 0x013a, + 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, + 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, + 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, + 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, + 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, + 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, + 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, + 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, + 0x0177, 0x0177, 0x0177, 0x0177, 0x0177, 0x0177, 0x0177, 0x0177, + 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, + 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, + 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, + 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, + 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, + 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, + 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, + 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, + 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, + 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, + 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, + 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, + 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, + 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, + 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, + 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, + 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, + 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, + 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, + 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, + 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, + 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, + 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, + 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, + 0x0181, 0x0181, 0x0181, 0x0181, 0x0181, 0x0181, 0x0181, 0x0181, + 0x0208, 0x0208, 0x0208, 0x0208, 0x0280, 0x0280, 0x0280, 0x0280, + 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, + 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, + 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, + 0x0207, 0x0207, 0x0207, 0x0207, 0x0270, 0x0270, 0x0270, 0x0270, + 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, + 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, + 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, + 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, + 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, + 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, +}; + +static const short tab_c0[] = +{ + -29, -21, -13, -7, -3, -1, 11, 15, -1, 13, 14, -3, -1, 7, 5, + 9, -3, -1, 6, 3, -1, 10, 12, -3, -1, 2, 1, -1, 4, 8, + 0 +}; + +static const short tab_c1[] = +{ + -15, -7, -3, -1, 15, 14, -1, 13, 12, -3, -1, 11, 10, -1, 9, + 8, -7, -3, -1, 7, 6, -1, 5, 4, -3, -1, 3, 2, -1, 1, + 0 +}; + +static const struct newhuff ht[] = +{ + { /* 0 */ 0 , tab0_ } , + { /* 2 */ 0 , tab1_ } , + { /* 3 */ 0 , tab2_ } , + { /* 3 */ 0 , tab3_ } , + { /* 0 */ 0 , tab0_ } , + { /* 4 */ 0 , tab5_ } , + { /* 4 */ 0 , tab6_ } , + { /* 6 */ 0 , tab7_ } , + { /* 6 */ 0 , tab8_ } , + { /* 6 */ 0 , tab9_ } , + { /* 8 */ 0 , tab10_ } , + { /* 8 */ 0 , tab11_ } , + { /* 8 */ 0 , tab12_ } , + { /* 16 */ 0 , tab13_ } , + { /* 0 */ 0 , tab0_ } , + { /* 16 */ 0 , tab15_ } , + + { /* 16 */ 1 , tab16_ } , + { /* 16 */ 2 , tab16_ } , + { /* 16 */ 3 , tab16_ } , + { /* 16 */ 4 , tab16_ } , + { /* 16 */ 6 , tab16_ } , + { /* 16 */ 8 , tab16_ } , + { /* 16 */ 10, tab16_ } , + { /* 16 */ 13, tab16_ } , + { /* 16 */ 4 , tab24_ } , + { /* 16 */ 5 , tab24_ } , + { /* 16 */ 6 , tab24_ } , + { /* 16 */ 7 , tab24_ } , + { /* 16 */ 8 , tab24_ } , + { /* 16 */ 9 , tab24_ } , + { /* 16 */ 11, tab24_ } , + { /* 16 */ 13, tab24_ } +}; + +static const struct newhuff htc[] = +{ + { /* 1 , 1 , */ 0 , tab_c0 } , + { /* 1 , 1 , */ 0 , tab_c1 } +}; + +#endif Index: include/reactos/libs/libmpg123/optimize.h =================================================================== --- include/reactos/libs/libmpg123/optimize.h (revision 62563) +++ include/reactos/libs/libmpg123/optimize.h (working copy) @@ -3,7 +3,7 @@ /* optimize: get a grip on the different optimizations - copyright 2007 by the mpg123 project - free software under the terms of the LGPL 2.1 + copyright 2007-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 see COPYING and AUTHORS files in distribution or http://mpg123.org initially written by Thomas Orgis, taking from mpg123.[hc] @@ -16,9 +16,14 @@ OPT_I586_DITHER (Intel Pentium with dithering/noise shaping for enhanced quality) OPT_MMX (Intel Pentium and compatibles with MMX, fast, but not the best accuracy) OPT_3DNOW (AMD 3DNow!, K6-2/3, Athlon, compatibles...) + OPT_3DNOW_VINTAGE OPT_3DNOWEXT (AMD 3DNow! extended, generally Athlon, compatibles...) + OPT_3DNOWEXT_VINTAGE + OPT_SSE + OPT_SSE_VINTAGE OPT_ALTIVEC (Motorola/IBM PPC with AltiVec under MacOSX) OPT_X86_64 (x86-64 / AMD64 / Intel 64) + OPT_AVX or you define OPT_MULTI and give a combination which makes sense (do not include i486, do not mix altivec and x86). @@ -33,13 +38,128 @@ /* Runtime optimization interface now here: */ +/* Nedit inline Perl script to generate decoder list and name mapping in one place + optimize.c defining I_AM_OPTIMIZE to get the names + +perl <<'EOT' +## order is important (autodec first, nodec last) +@names= +( + ['autodec', 'auto'] +,['generic', 'generic'] +,['generic_dither', 'generic_dither'] +,['idrei', 'i386'] +,['ivier', 'i486'] +,['ifuenf', 'i586'] +,['ifuenf_dither', 'i586_dither'] +,['mmx', 'MMX'] +,['dreidnow', '3DNow'] +,['dreidnowext', '3DNowExt'] +,['altivec', 'AltiVec'] +,['sse', 'SSE'] +,['x86_64', 'x86-64'] +,['arm','ARM'] +,['neon','NEON'] +,['avx','AVX'] +,['dreidnow_vintage', '3DNow_vintage'] +,['dreidnowext_vintage', '3DNowExt_vintage'] +,['sse_vintage', 'SSE_vintage'] +,['nodec', 'nodec'] +); + +print "enum optdec\n{\n"; +for my $n (@names) +{ + $name = $n->[0]; + $enum = $name eq 'autodec' ? $name = " $name=0" : ",$name"; + print "\t$enum\n" +} +print "};\n"; +print "##ifdef I_AM_OPTIMIZE\n"; +for my $n (@names) +{ + my $key = $n->[0]; + my $val = $n->[1]; + print "static const char dn_$key\[\] = \"$val\";\n"; +} +print "static const char* decname[] =\n{\n"; +for my $n (@names) +{ + my $key = $n->[0]; + print "\t".($key eq 'autodec' ? ' ' : ',')."dn_$key\n"; +} +print "};\n##endif" +EOT +*/ enum optdec -{ /* autodec needs to be =0 and the first, nodec needs to be the last -- for loops! */ - autodec=0, generic, generic_dither, idrei, - ivier, ifuenf, ifuenf_dither, mmx, - dreidnow, dreidnowext, altivec, sse, x86_64, arm, - nodec +{ + autodec=0 + ,generic + ,generic_dither + ,idrei + ,ivier + ,ifuenf + ,ifuenf_dither + ,mmx + ,dreidnow + ,dreidnowext + ,altivec + ,sse + ,x86_64 + ,arm + ,neon + ,avx + ,dreidnow_vintage + ,dreidnowext_vintage + ,sse_vintage + ,nodec }; +#ifdef I_AM_OPTIMIZE +static const char dn_autodec[] = "auto"; +static const char dn_generic[] = "generic"; +static const char dn_generic_dither[] = "generic_dither"; +static const char dn_idrei[] = "i386"; +static const char dn_ivier[] = "i486"; +static const char dn_ifuenf[] = "i586"; +static const char dn_ifuenf_dither[] = "i586_dither"; +static const char dn_mmx[] = "MMX"; +static const char dn_dreidnow[] = "3DNow"; +static const char dn_dreidnowext[] = "3DNowExt"; +static const char dn_altivec[] = "AltiVec"; +static const char dn_sse[] = "SSE"; +static const char dn_x86_64[] = "x86-64"; +static const char dn_arm[] = "ARM"; +static const char dn_neon[] = "NEON"; +static const char dn_avx[] = "AVX"; +static const char dn_dreidnow_vintage[] = "3DNow_vintage"; +static const char dn_dreidnowext_vintage[] = "3DNowExt_vintage"; +static const char dn_sse_vintage[] = "SSE_vintage"; +static const char dn_nodec[] = "nodec"; +static const char* decname[] = +{ + dn_autodec + ,dn_generic + ,dn_generic_dither + ,dn_idrei + ,dn_ivier + ,dn_ifuenf + ,dn_ifuenf_dither + ,dn_mmx + ,dn_dreidnow + ,dn_dreidnowext + ,dn_altivec + ,dn_sse + ,dn_x86_64 + ,dn_arm + ,dn_neon + ,dn_avx + ,dn_dreidnow_vintage + ,dn_dreidnowext_vintage + ,dn_sse_vintage + ,dn_nodec +}; +#endif + enum optcla { nocla=0, normal, mmxsse }; /* - Set up the table of synth functions for current decoder choice. */ @@ -58,11 +178,7 @@ /* this is included in mpg123.h, which includes config.h */ #ifdef CCALIGN -#ifdef _MSC_VER -#define ALIGNED(a) __declspec(align(a)) -#else #define ALIGNED(a) __attribute__((aligned(a))) -#endif #else #define ALIGNED(a) #endif @@ -71,7 +187,10 @@ #ifdef REAL_IS_FIXED #if (defined OPT_I486) || (defined OPT_I586) || (defined OPT_I586_DITHER) \ || (defined OPT_MMX) || (defined OPT_SSE) || (defined_OPT_ALTIVEC) \ - || (defined OPT_3DNOW) || (defined OPT_3DNOWEXT) || (defined OPT_X86_64) || (defined OPT_GENERIC_DITHER) + || (defined OPT_3DNOW) || (defined OPT_3DNOWEXT) || (defined OPT_X86_64) \ + || (defined OPT_3DNOW_VINTAGE) || (defined OPT_3DNOWEXT_VINTAGE) \ + || (defined OPT_SSE_VINTAGE) \ + || (defined OPT_NEON) || (defined OPT_AVX) || (defined OPT_GENERIC_DITHER) #error "Bad decoder choice together with fixed point math!" #endif #endif @@ -142,9 +261,19 @@ #define OPT_X86 #ifndef OPT_MULTI # define defopt sse +# define opt_dct36(fr) dct36_sse #endif #endif +#ifdef OPT_SSE_VINTAGE +#define OPT_MMXORSSE +#define OPT_MPLAYER +#define OPT_X86 +#ifndef OPT_MULTI +# define defopt sse +#endif +#endif + #ifdef OPT_3DNOWEXT #define OPT_MMXORSSE #define OPT_MPLAYER @@ -151,6 +280,16 @@ #define OPT_X86 #ifndef OPT_MULTI # define defopt dreidnowext +#endif +#endif + +/* same as above but also using 3DNowExt dct36 */ +#ifdef OPT_3DNOWEXT_VINTAGE +#define OPT_MMXORSSE +#define OPT_MPLAYER +#define OPT_X86 +#ifndef OPT_MULTI +# define defopt dreidnowext_vintage # define opt_dct36(fr) dct36_3dnowext #endif #endif @@ -165,6 +304,14 @@ #define OPT_X86 #ifndef OPT_MULTI # define defopt dreidnow +#endif +#endif + +/* same as above but also using 3DNow dct36 */ +#ifdef OPT_3DNOW_VINTAGE +#define OPT_X86 +#ifndef OPT_MULTI +# define defopt dreidnow_vintage # define opt_dct36(fr) dct36_3dnow #endif #endif @@ -179,9 +326,18 @@ #define OPT_MMXORSSE #ifndef OPT_MULTI # define defopt x86_64 +# define opt_dct36(fr) dct36_x86_64 #endif #endif +#ifdef OPT_AVX +#define OPT_MMXORSSE +#ifndef OPT_MULTI +# define defopt avx +# define opt_dct36(fr) dct36_avx +#endif +#endif + #ifdef OPT_ARM #ifndef OPT_MULTI # define defopt arm @@ -188,6 +344,13 @@ #endif #endif +#ifdef OPT_NEON +#define OPT_MMXORSSE +#ifndef OPT_MULTI +# define defopt neon +#endif +#endif + /* used for multi opt mode and the single 3dnow mode to have the old 3dnow test flag still working */ void check_decoders(void); @@ -201,7 +364,7 @@ # define defopt nodec -# if (defined OPT_3DNOW || defined OPT_3DNOWEXT) +# if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX) # define opt_dct36(fr) ((fr)->cpu_opts.the_dct36) # endif Index: include/reactos/libs/libmpg123/reader.h =================================================================== --- include/reactos/libs/libmpg123/reader.h (revision 62563) +++ include/reactos/libs/libmpg123/reader.h (working copy) @@ -12,26 +12,44 @@ #include "config.h" #include "mpg123.h" +#ifndef NO_FEEDER struct buffy { unsigned char *data; - long size; - long realsize; + ssize_t size; + ssize_t realsize; struct buffy *next; }; + struct bufferchain { struct buffy* first; /* The beginning of the chain. */ struct buffy* last; /* The end... of the chain. */ - long size; /* Aggregated size of all buffies. */ + ssize_t size; /* Aggregated size of all buffies. */ /* These positions are relative to buffer chain beginning. */ - long pos; /* Position in whole chain. */ - long firstpos; /* The point of return on non-forget() */ + ssize_t pos; /* Position in whole chain. */ + ssize_t firstpos; /* The point of return on non-forget() */ /* The "real" filepos is fileoff + pos. */ off_t fileoff; /* Beginning of chain is at this file offset. */ + size_t bufblock; /* Default (minimal) size of buffers. */ + size_t pool_size; /* Keep that many buffers in storage. */ + size_t pool_fill; /* That many buffers are there. */ + /* A pool of buffers to re-use, if activated. It's a linked list that is worked on from the front. */ + struct buffy *pool; }; +/* Call this before any buffer chain use (even bc_init()). */ +void bc_prepare(struct bufferchain *, size_t pool_size, size_t bufblock); +/* Free persistent data in the buffer chain, after bc_reset(). */ +void bc_cleanup(struct bufferchain *); +/* Change pool size. This does not actually allocate/free anything on itself, just instructs later operations to free less / allocate more buffers. */ +void bc_poolsize(struct bufferchain *, size_t pool_size, size_t bufblock); +/* Return available byte count in the buffer. */ +size_t bc_fill(struct bufferchain *bc); + +#endif + struct reader_data { off_t filelen; /* total file length or total buffer size */ @@ -41,22 +59,24 @@ void *iohandle; int flags; long timeout_sec; - long (*fdread) (mpg123_handle *, void *, size_t); + ssize_t (*fdread) (mpg123_handle *, void *, size_t); /* User can replace the read and lseek functions. The r_* are the stored replacement functions or NULL. */ - long (*r_read) (int fd, void *buf, size_t count); + ssize_t (*r_read) (int fd, void *buf, size_t count); off_t (*r_lseek)(int fd, off_t offset, int whence); /* These are custom I/O routines for opaque user handles. They get picked if there's some iohandle set. */ - long (*r_read_handle) (void *handle, void *buf, size_t count); + ssize_t (*r_read_handle) (void *handle, void *buf, size_t count); off_t (*r_lseek_handle)(void *handle, off_t offset, int whence); /* An optional cleaner for the handle on closing the stream. */ void (*cleanup_handle)(void *handle); /* These two pointers are the actual workers (default map to POSIX read/lseek). */ - long (*read) (int fd, void *buf, size_t count); + ssize_t (*read) (int fd, void *buf, size_t count); off_t (*lseek)(int fd, off_t offset, int whence); /* Buffered readers want that abstracted, set internally. */ - long (*fullread)(mpg123_handle *, unsigned char *, long); + ssize_t (*fullread)(mpg123_handle *, unsigned char *, ssize_t); +#ifndef NO_FEEDER struct bufferchain buffer; /* Not dynamically allocated, these few struct bytes aren't worth the trouble. */ +#endif }; /* start to use off_t to properly do LFS in future ... used to be long */ @@ -64,7 +84,7 @@ { int (*init) (mpg123_handle *); void (*close) (mpg123_handle *); - long (*fullread) (mpg123_handle *, unsigned char *, long); + ssize_t (*fullread) (mpg123_handle *, unsigned char *, ssize_t); int (*head_read) (mpg123_handle *, unsigned long *newhead); /* succ: TRUE, else <= 0 (FALSE or READER_MORE) */ int (*head_shift) (mpg123_handle *, unsigned long *head); /* succ: TRUE, else <= 0 (FALSE or READER_MORE) */ off_t (*skip_bytes) (mpg123_handle *, off_t len); /* succ: >=0, else error or READER_MORE */ Index: include/reactos/libs/libmpg123/sample.h =================================================================== --- include/reactos/libs/libmpg123/sample.h (revision 62563) +++ include/reactos/libs/libmpg123/sample.h (working copy) @@ -14,11 +14,11 @@ /* Special case is fixed point math... which does work, but not that nice yet. */ #ifdef REAL_IS_FIXED -static inline short idiv_signed_rounded(long x, int shift) +static inline int16_t idiv_signed_rounded(int32_t x, int shift) { x >>= (shift - 1); x += (x & 1); - return (short)(x >> 1); + return (int16_t)(x >> 1); } # define REAL_PLUS_32767 ( 32767 << 15 ) # define REAL_MINUS_32768 ( -32768 << 15 ) @@ -36,7 +36,7 @@ # if (defined REAL_IS_FLOAT) && (defined IEEE_FLOAT) /* This function is only available for IEEE754 single-precision values This is nearly identical to proper rounding, just -+0.5 is rounded to 0 */ -static inline short ftoi16(float x) +static inline int16_t ftoi16(float x) { union { @@ -44,7 +44,7 @@ int32_t i; } u_fi; u_fi.f = x + 12582912.0f; /* Magic Number: 2^23 + 2^22 */ - return (short)u_fi.i; + return (int16_t)u_fi.i; } # define REAL_TO_SHORT_ACCURATE(x) ftoi16(x) # else @@ -88,6 +88,25 @@ /* The actual storage of a decoded sample is separated in the following macros. We can handle different types, we could also handle dithering here. */ +#ifdef NEWOLD_WRITE_SAMPLE + +/* This is the old new mpg123 WRITE_SAMPLE, fixed for newer GCC by MPlayer folks. + Makes a huge difference on old machines. */ +#if WORDS_BIGENDIAN +#define MANTISSA_OFFSET 1 +#else +#define MANTISSA_OFFSET 0 +#endif +#define WRITE_SHORT_SAMPLE(samples,sum,clip) { \ + union { double dtemp; int itemp[2]; } u; int v; \ + u.dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\ + v = u.itemp[MANTISSA_OFFSET] - 0x80000000; \ + if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \ + else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \ + else { *(samples) = v; } \ +} + +#else /* Macro to produce a short (signed 16bit) output sample from internal representation, which may be float, double or indeed some integer for fixed point handling. */ #define WRITE_SHORT_SAMPLE(samples,sum,clip) \ @@ -94,6 +113,7 @@ if( (sum) > REAL_PLUS_32767) { *(samples) = 0x7fff; (clip)++; } \ else if( (sum) < REAL_MINUS_32768) { *(samples) = -0x8000; (clip)++; } \ else { *(samples) = REAL_TO_SHORT(sum); } +#endif /* Same as above, but always using accurate rounding. Would we want softer clipping here, too? */ #define WRITE_SHORT_SAMPLE_ACCURATE(samples,sum,clip) \ @@ -119,7 +139,7 @@ /* Produce an 8bit sample, via 16bit intermediate. */ #define WRITE_8BIT_SAMPLE(samples,sum,clip) \ { \ - short write_8bit_tmp; \ + int16_t write_8bit_tmp; \ if( (sum) > REAL_PLUS_32767) { write_8bit_tmp = 0x7fff; (clip)++; } \ else if( (sum) < REAL_MINUS_32768) { write_8bit_tmp = -0x8000; (clip)++; } \ else { write_8bit_tmp = REAL_TO_SHORT(sum); } \ Index: include/reactos/libs/libmpg123/synth_sse3d.h =================================================================== --- include/reactos/libs/libmpg123/synth_sse3d.h (revision 62563) +++ include/reactos/libs/libmpg123/synth_sse3d.h (working copy) @@ -63,12 +63,12 @@ decl %ecx movl 20(%ebp),%esi movl (%edx),%eax - jecxz .L01 + jecxz 1f decl %eax andl %ebx,%eax leal 1088(%esi),%esi movl %eax,(%edx) - .L01: +1: leal (%esi,%eax,2),%edx movl %eax,TEMP incl %eax @@ -76,11 +76,11 @@ leal 544(%esi,%eax,2),%ecx incl %ebx testl $1, %eax - jnz .L02 + jnz 2f xchgl %edx,%ecx incl TEMP leal 544(%esi),%esi - .L02: +2: pushl 8(%ebp) pushl %edx pushl %ecx @@ -95,7 +95,7 @@ movl (%esp),%ecx /* restore, but leave value on stack */ shrl $1, %ecx ALIGN16 - .L03: +3: movq (%edx),%mm0 movq 64(%edx),%mm4 pmaddwd (%esi),%mm0 @@ -138,10 +138,10 @@ leal 128(%edx),%edx leal 8(%edi),%edi decl %ecx - jnz .L03 + jnz 3b popl %ecx andl $1, %ecx - jecxz .next_loop + jecxz 4f movq (%edx),%mm0 pmaddwd (%esi),%mm0 movq 8(%edx),%mm1 @@ -163,11 +163,11 @@ leal 32(%esi),%esi leal 64(%edx),%edx leal 4(%edi),%edi - .next_loop: +4: subl $64,%esi movl $7,%ecx ALIGN16 - .L04: +5: movq (%edx),%mm0 movq 64(%edx),%mm4 pmaddwd (%esi),%mm0 @@ -214,7 +214,7 @@ addl $128,%edx leal 8(%edi),%edi decl %ecx - jnz .L04 + jnz 5b movq (%edx),%mm0 pmaddwd (%esi),%mm0 movq 8(%edx),%mm1 Index: include/reactos/libs/libmpg123/synths.h =================================================================== --- include/reactos/libs/libmpg123/synths.h (revision 62563) +++ include/reactos/libs/libmpg123/synths.h (working copy) @@ -10,7 +10,7 @@ typedef int (*func_synth)(real *,int, mpg123_handle *,int ); typedef int (*func_synth_mono)(real *, mpg123_handle *); typedef int (*func_synth_stereo)(real *, real *, mpg123_handle *); -enum synth_channel { c_plain=0, c_stereo, c_mono2stereo, c_mono, c_limit }; +enum synth_channel { c_plain=0, c_stereo, c_m2s, c_mono, c_limit }; enum synth_resample { r_none=-1 Index: lib/3rdparty/libmpg123/check_neon.S =================================================================== --- lib/3rdparty/libmpg123/check_neon.S (revision 0) +++ lib/3rdparty/libmpg123/check_neon.S (working copy) @@ -0,0 +1,26 @@ +/* + check_neon: check NEON availability + + copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Momma +*/ + +#include "mangle.h" + + .code 32 +#ifndef __APPLE__ + .fpu neon +#endif + + .text + .globl ASM_NAME(check_neon) +#ifdef __ELF__ + .type ASM_NAME(check_neon), %function +#endif + ALIGN4 +ASM_NAME(check_neon): + vorr d0, d0, d0 + bx lr + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/compat.c =================================================================== --- lib/3rdparty/libmpg123/compat.c (revision 62563) +++ lib/3rdparty/libmpg123/compat.c (working copy) @@ -17,6 +17,7 @@ #else #include #endif +#include #ifdef WANT_WIN32_UNICODE #include @@ -55,25 +56,26 @@ } #endif -int compat_open(const char *filename, int mode) +int compat_open(const char *filename, int flags) { int ret; #if defined (WANT_WIN32_UNICODE) - const wchar_t *frag = NULL; + wchar_t *frag = NULL; ret = win32_utf8_wide(filename, &frag, NULL); if ((frag == NULL) || (ret == 0)) goto fallback; /* Fallback to plain open when ucs-2 conversion fails */ - ret = _wopen(frag, mode); /*Try _wopen */ + ret = _wopen(frag, flags); /*Try _wopen */ if (ret != -1 ) goto open_ok; /* msdn says -1 means failure */ fallback: #endif -#ifdef __MSVCRT__ /* MSDN says POSIX function is deprecated beginning in Visual C++ 2005 */ - ret = _open (filename, mode); /* Try plain old _open(), if it fails, do nothing */ +#if (defined(WIN32) && !defined (__CYGWIN__)) /* MSDN says POSIX function is deprecated beginning in Visual C++ 2005 */ + ret = _open(filename, flags); /* Try plain old _open(), if it fails, do nothing */ #else - ret = open (filename, mode); + /* On UNIX, we always add a default permission mask in case flags|O_CREAT. */ + ret = open(filename, flags, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH); #endif #if defined (WANT_WIN32_UNICODE) @@ -86,7 +88,7 @@ int compat_close(int infd) { -#ifdef __MSVCRT__ /* MSDN says POSIX function is deprecated beginning in Visual C++ 2005 */ +#if (defined(WIN32) && !defined (__CYGWIN__)) /* MSDN says POSIX function is deprecated beginning in Visual C++ 2005 */ return _close(infd); #else return close(infd); @@ -96,55 +98,41 @@ /* Windows Unicode stuff */ #ifdef WANT_WIN32_UNICODE -int win32_wide_utf8(const wchar_t * const wptr, const char **const mbptr, size_t * const buflen) +int win32_wide_utf8(const wchar_t * const wptr, char **mbptr, size_t * buflen) { - size_t len; - char *buf; - int ret; + size_t len; + char *buf; + int ret = 0; - len = WideCharToMultiByte(CP_UTF8, 0, wptr, -1, NULL, 0, NULL, NULL); /* Get utf-8 string length */ - buf = calloc(len, sizeof (char)); /* Can we assume sizeof char always = 1? */ - debug2("win32_wide_utf8 allocated %u bytes at %p", len, buf); + len = WideCharToMultiByte(CP_UTF8, 0, wptr, -1, NULL, 0, NULL, NULL); /* Get utf-8 string length */ + buf = calloc(len + 1, sizeof (char)); /* Can we assume sizeof char always = 1? */ - if(buf != NULL) - { - ret = WideCharToMultiByte(CP_UTF8, 0, wptr, -1, buf, len, NULL, NULL); /*Do actual conversion*/ - *mbptr = buf; /* Set string pointer to allocated buffer */ - if(buflen != NULL) *buflen = len * sizeof (char); /* Give length of allocated memory if needed. */ - - return ret; - } - else - { - if(buflen != NULL) *buflen = 0; - - return 0; - } + if(!buf) len = 0; + else { + if (len != 0) ret = WideCharToMultiByte(CP_UTF8, 0, wptr, -1, buf, len, NULL, NULL); /*Do actual conversion*/ + buf[len] = '0'; /* Must terminate */ + } + *mbptr = buf; /* Set string pointer to allocated buffer */ + if(buflen != NULL) *buflen = (len) * sizeof (char); /* Give length of allocated memory if needed. */ + return ret; } -int win32_utf8_wide(const char *const mbptr, const wchar_t ** const wptr, size_t * const buflen) +int win32_utf8_wide(const char *const mbptr, wchar_t **wptr, size_t *buflen) { - size_t len; - wchar_t *buf; - int ret; + size_t len; + wchar_t *buf; + int ret = 0; - len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mbptr, -1, NULL, 0); /* Get converted size */ - buf = calloc(len, sizeof (wchar_t)); /* Allocate memory accordingly */ - debug2("win32_utf8_wide allocated %u bytes at %p", len, buf); + len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mbptr, -1, NULL, 0); /* Get converted size */ + buf = calloc(len + 1, sizeof (wchar_t)); /* Allocate memory accordingly */ - if(buf != NULL) - { - ret = MultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, mbptr, -1, buf, len); /* Do conversion */ - *wptr = buf; /* Set string pointer to allocated buffer */ - if (buflen != NULL) *buflen = len * sizeof (wchar_t); /* Give length of allocated memory if needed. */ - - return ret; - } - else - { - if (buflen != NULL) *buflen = 0; - - return 0; - } + if(!buf) len = 0; + else { + if (len != 0) ret = MultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, mbptr, -1, buf, len); /* Do conversion */ + buf[len] = L'0'; /* Must terminate */ + } + *wptr = buf; /* Set string pointer to allocated buffer */ + if (buflen != NULL) *buflen = len * sizeof (wchar_t); /* Give length of allocated memory if needed. */ + return ret; /* Number of characters written */ } #endif Index: lib/3rdparty/libmpg123/dct36_avx.S =================================================================== --- lib/3rdparty/libmpg123/dct36_avx.S (revision 0) +++ lib/3rdparty/libmpg123/dct36_avx.S (working copy) @@ -0,0 +1,358 @@ +/* + dct36_avx: AVX optimized dct36 for x86-64 + + copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#ifdef IS_MSABI +#define in %rcx +#define out1 %rdx +#define out2 %r8 +#define w %r9 +#define ts %r10 +#define COS9_ %rax +#define tfcos36_ %r11 +#else +#define in %rdi +#define out1 %rsi +#define out2 %rdx +#define w %rcx +#define ts %r8 +#define COS9_ %rax +#define tfcos36_ %r9 +#endif + +/* + void dct36_avx(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf); +*/ + +#ifndef __APPLE__ + .section .rodata +#else + .data +#endif + ALIGN16 +dct36_avx_COS9: + .long 0x3f5db3d7 + .long 0x3f5db3d7 + .long 0x3f000000 + .long 0x3f000000 + .long 0x3f7c1c5c + .long 0x3f7c1c5c + .long 0x3f708fb2 + .long 0x3f708fb2 + .long 0x3f248dbb + .long 0x3f248dbb + .long 0x3e31d0d4 + .long 0x3e31d0d4 + .long 0x3eaf1d44 + .long 0x3eaf1d44 + .long 0x3f441b7d + .long 0x3f441b7d + ALIGN16 +dct36_avx_tfcos36: + .long 0x3f007d2b + .long 0x3f0483ee + .long 0x3f0d3b7d + .long 0x3f1c4257 + .long 0x40b79454 + .long 0x3ff746ea + .long 0x3f976fd9 + .long 0x3f5f2944 + .long 0x3f3504f3 + ALIGN16 +dct36_avx_sign: + .long 0x80000000,0x80000000,0x80000000,0x80000000 + .text + ALIGN16 + .globl ASM_NAME(dct36_avx) +ASM_NAME(dct36_avx): +#ifdef IS_MSABI + push %rbp + mov %rsp, %rbp + sub $160, %rsp + movaps %xmm6, (%rsp) + movaps %xmm7, 16(%rsp) + movaps %xmm8, 32(%rsp) + movaps %xmm9, 48(%rsp) + movaps %xmm10, 64(%rsp) + movaps %xmm11, 80(%rsp) + movaps %xmm12, 96(%rsp) + movaps %xmm13, 112(%rsp) + movaps %xmm14, 128(%rsp) + movaps %xmm15, 144(%rsp) + movq 48(%rbp), ts +#endif + lea dct36_avx_COS9(%rip), COS9_ + lea dct36_avx_tfcos36(%rip), tfcos36_ + + xorps %xmm4, %xmm4 + movups (in), %xmm0 + movups 16(in), %xmm1 + movups 32(in), %xmm2 + movups 48(in), %xmm3 + movlps 64(in), %xmm4 + vshufps $0x93, %xmm0, %xmm0, %xmm5 + vshufps $0x93, %xmm1, %xmm1, %xmm6 + vshufps $0x93, %xmm2, %xmm2, %xmm7 + vshufps $0x93, %xmm3, %xmm3, %xmm8 + vshufps $0xe1, %xmm4, %xmm4, %xmm9 + movss %xmm8, %xmm9 #[fg--] + addps %xmm9, %xmm4 #[gh--] + movss %xmm7, %xmm8 + addps %xmm8, %xmm3 #[cdef] + movss %xmm6, %xmm7 + addps %xmm7, %xmm2 #[89ab] + movss %xmm5, %xmm6 + addps %xmm6, %xmm1 #[4567] + xorps %xmm6, %xmm6 + movss %xmm6, %xmm5 + addps %xmm5, %xmm0 #[0123] + + vblendps $0x5, %xmm6, %xmm3, %xmm7 + vshufps $0x4e, %xmm4, %xmm3, %xmm4 + addps %xmm7, %xmm4 + vblendps $0x5, %xmm6, %xmm2, %xmm7 + vshufps $0x4e, %xmm3, %xmm2, %xmm3 + addps %xmm7, %xmm3 + vblendps $0x5, %xmm6, %xmm1, %xmm7 + vshufps $0x4e, %xmm2, %xmm1, %xmm2 + addps %xmm7, %xmm2 + vblendps $0x5, %xmm6, %xmm0, %xmm7 + vshufps $0x4e, %xmm1, %xmm0, %xmm1 + addps %xmm7, %xmm1 + vmovlhps %xmm0, %xmm6, %xmm0 + +/* +xmm0 in[-,-,0,1] +xmm1 in[2,3,4,5] +xmm2 in[6,7,8,9] +xmm3 in[10,11,12,13] +xmm4 in[14,15,16,17] +*/ + + vblendps $0xc, %xmm3, %xmm2, %xmm5 + blendps $0xc, %xmm4, %xmm3 + blendps $0xc, %xmm2, %xmm4 + movaps %xmm5, %xmm2 + +/* +xmm2 in[6,7,12,13] +xmm3 in[10,11,16,17] +xmm4 in[14,15,8,9] +*/ + + movaps (COS9_), %xmm15 + movaps 16(COS9_), %xmm6 + movaps 32(COS9_), %xmm7 + movaps 48(COS9_), %xmm8 + vmulps %xmm2, %xmm15, %xmm5 + addps %xmm0, %xmm5 + +/* +xmm5 [ta33,tb33,ta66,tb66] +xmm6 COS9_[1,1,2,2] +xmm7 COS9_[5,5,8,8] +xmm8 COS9_[7,7,4,4] +xmm15 COS9_[3,3,6,6] +*/ + + vmulps %xmm1, %xmm6, %xmm9 + vmulps %xmm3, %xmm7, %xmm12 + vmulps %xmm4, %xmm8, %xmm13 + addps %xmm5, %xmm9 + addps %xmm13, %xmm12 + addps %xmm9, %xmm12 + + vsubps %xmm3, %xmm1, %xmm13 + vshufps $0xe0, %xmm2, %xmm0, %xmm14 + vsubps %xmm14, %xmm0, %xmm14 + subps %xmm4, %xmm13 + mulps %xmm15, %xmm13 + addps %xmm14, %xmm13 + + vmulps %xmm1, %xmm7, %xmm9 + vmulps %xmm3, %xmm8, %xmm15 + vmulps %xmm4, %xmm6, %xmm14 + subps %xmm5, %xmm9 + subps %xmm15, %xmm14 + addps %xmm9, %xmm14 + + mulps %xmm1, %xmm8 + mulps %xmm3, %xmm6 + mulps %xmm4, %xmm7 + subps %xmm5, %xmm8 + subps %xmm7, %xmm6 + vaddps %xmm6, %xmm8, %xmm15 + + movss 32(tfcos36_), %xmm5 + subps %xmm1, %xmm0 + subps %xmm2, %xmm4 + addps %xmm3, %xmm0 + addps %xmm4, %xmm0 + shufps $0xaf, %xmm0, %xmm0 + vmulss %xmm5, %xmm0, %xmm11 + +/* +xmm12 [1a-0,1b-0, 2a-0, 2b-0] +xmm13 [1a-1,1b-1, 2a-1, 2b-1] +xmm14 [1a-2,1b-2,-2a-2,-2b-2] +xmm15 [1a-3,1b-3,-2a-3,-2b-3] +*/ + vunpckhps %xmm13, %xmm12, %xmm5 + vunpcklps %xmm13, %xmm12, %xmm12 + vunpckhps %xmm15, %xmm14, %xmm6 + vunpcklps %xmm15, %xmm14, %xmm14 + xorps dct36_avx_sign(%rip), %xmm6 + +/* +xmm12 [1a-0,1a-1,1b-0,1b-1] +xmm5 [2a-0,2a-1,2b-0,2b-1] +xmm14 [1a-2,1a-3,1b-2,1b-3] +xmm6 [2a-2,2a-3,2b-2,2b-3] +*/ + + vmovlhps %xmm14, %xmm12, %xmm0 + movhlps %xmm12, %xmm14 + vmovlhps %xmm6, %xmm5, %xmm1 + vmovhlps %xmm5, %xmm6, %xmm15 + +/* +xmm0 tmp1a +xmm1 tmp2a +xmm14 tmp1b +xmm15 tmp2b +*/ + + movaps (tfcos36_), %xmm6 + movaps 16(tfcos36_), %xmm7 + vsubps %xmm14, %xmm15, %xmm10 + addps %xmm14, %xmm15 + vsubps %xmm0, %xmm1, %xmm14 + addps %xmm1, %xmm0 + vmulps %xmm6, %xmm15, %xmm1 + mulps %xmm10, %xmm7 + +/* +%xmm0 tmp[0,1,2,3] +%xmm1 tmp[17,16,15,14] +%xmm14 tmp[8,7,6,5] +%xmm7 tmp[9,10,11,12] +%xmm11 tmp[13,-,4,-] +*/ + + movups 108(w), %xmm2 + movups 92(w), %xmm3 + shufps $0x1b, %xmm3, %xmm3 + movups 36(w), %xmm4 + movups 20(w), %xmm5 + shufps $0x1b, %xmm5, %xmm5 + vsubps %xmm1, %xmm0, %xmm6 + addps %xmm1, %xmm0 + mulps %xmm0, %xmm2 + mulps %xmm3, %xmm0 + mulps %xmm6, %xmm4 + mulps %xmm5, %xmm6 + movups 36(out1), %xmm1 + movups 20(out1), %xmm3 + shufps $0x1b, %xmm6, %xmm6 + addps %xmm4, %xmm1 + addps %xmm6, %xmm3 + shufps $0x1b, %xmm0, %xmm0 + movups %xmm2, 36(out2) + movups %xmm0, 20(out2) + movss %xmm1, 32*36(ts) + movss %xmm3, 32*20(ts) + movhlps %xmm1, %xmm2 + movhlps %xmm3, %xmm4 + movss %xmm2, 32*44(ts) + movss %xmm4, 32*28(ts) + shufps $0xb1, %xmm1, %xmm1 + shufps $0xb1, %xmm3, %xmm3 + movss %xmm1, 32*40(ts) + movss %xmm3, 32*24(ts) + movhlps %xmm1, %xmm2 + movhlps %xmm3, %xmm4 + movss %xmm2, 32*48(ts) + movss %xmm4, 32*32(ts) + + movhlps %xmm11, %xmm0 + movss 124(w), %xmm2 + movss 88(w), %xmm3 + movss 52(w), %xmm4 + movss 16(w), %xmm5 + movss %xmm0, %xmm6 + addss %xmm11, %xmm0 + subss %xmm11, %xmm6 + mulss %xmm0, %xmm2 + mulss %xmm3, %xmm0 + mulss %xmm6, %xmm4 + mulss %xmm5, %xmm6 + addss 52(out1), %xmm4 + addss 16(out1), %xmm6 + movss %xmm2, 52(out2) + movss %xmm0, 16(out2) + movss %xmm4, 32*52(ts) + movss %xmm6, 32*16(ts) + + movaps %xmm14, %xmm0 + movaps %xmm7, %xmm1 + MOVUAPS 128(w), %xmm2 + movups 72(w), %xmm3 + shufps $0x1b, %xmm2, %xmm2 + movlps 56(w), %xmm4 + movhps 64(w), %xmm4 + MOVUAPS (w), %xmm5 + shufps $0x1b, %xmm4, %xmm4 + vsubps %xmm1, %xmm0, %xmm6 + addps %xmm1, %xmm0 + mulps %xmm0, %xmm2 + mulps %xmm3, %xmm0 + mulps %xmm6, %xmm4 + mulps %xmm5, %xmm6 + movlps 56(out1), %xmm1 + movhps 64(out1), %xmm1 + movups (out1), %xmm3 + shufps $0x1b, %xmm4, %xmm4 + addps %xmm6, %xmm3 + addps %xmm4, %xmm1 + shufps $0x1b, %xmm2, %xmm2 + movups %xmm0, (out2) + movlps %xmm2, 56(out2) + movhps %xmm2, 64(out2) + movss %xmm1, 32*56(ts) + movss %xmm3, (ts) + movhlps %xmm1, %xmm2 + movhlps %xmm3, %xmm4 + movss %xmm2, 32*64(ts) + movss %xmm4, 32*8(ts) + shufps $0xb1, %xmm1, %xmm1 + shufps $0xb1, %xmm3, %xmm3 + movss %xmm1, 32*60(ts) + movss %xmm3, 32*4(ts) + movhlps %xmm1, %xmm2 + movhlps %xmm3, %xmm4 + movss %xmm2, 32*68(ts) + movss %xmm4, 32*12(ts) + +#ifdef IS_MSABI + movaps (%rsp), %xmm6 + movaps 16(%rsp), %xmm7 + movaps 32(%rsp), %xmm8 + movaps 48(%rsp), %xmm9 + movaps 64(%rsp), %xmm10 + movaps 80(%rsp), %xmm11 + movaps 96(%rsp), %xmm12 + movaps 112(%rsp), %xmm13 + movaps 128(%rsp), %xmm14 + movaps 144(%rsp), %xmm15 + mov %rbp, %rsp + pop %rbp +#endif + ret + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/dct36_sse.S =================================================================== --- lib/3rdparty/libmpg123/dct36_sse.S (revision 0) +++ lib/3rdparty/libmpg123/dct36_sse.S (working copy) @@ -0,0 +1,389 @@ +/* + dct36_sse: SSE optimized dct36 + + copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#define in %edi +#define out1 %edi +#define out2 %edx +#define w %ecx +#define ts %eax +#define COS9_ %eax +#define tfcos36_ %edx +#define tmp %esi + +/* + void dct36_sse(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf); +*/ + +#ifndef __APPLE__ + .section .rodata +#else + .data +#endif + ALIGN16 +dct36_sse_COS9: + .long 0x3f5db3d7 + .long 0x3f5db3d7 + .long 0x3f000000 + .long 0x3f000000 + .long 0x3f7c1c5c + .long 0x3f7c1c5c + .long 0x3f708fb2 + .long 0x3f708fb2 + .long 0x3f248dbb + .long 0x3f248dbb + .long 0x3e31d0d4 + .long 0x3e31d0d4 + .long 0x3eaf1d44 + .long 0x3eaf1d44 + .long 0x3f441b7d + .long 0x3f441b7d + ALIGN16 +dct36_sse_tfcos36: + .long 0x3f007d2b + .long 0x3f0483ee + .long 0x3f0d3b7d + .long 0x3f1c4257 + .long 0x40b79454 + .long 0x3ff746ea + .long 0x3f976fd9 + .long 0x3f5f2944 + .long 0x3f3504f3 + ALIGN16 +dct36_sse_mask: + .long 0,0xffffffff,0,0xffffffff + ALIGN16 +dct36_sse_sign: + .long 0x80000000,0x80000000,0x80000000,0x80000000 + .text + ALIGN16 + .globl ASM_NAME(dct36_sse) +ASM_NAME(dct36_sse): + push %ebp + mov %esp, %ebp + and $-16, %esp + sub $80, %esp + push %ebx + push %esi + push %edi + call 1f +1: + pop %ebx + lea dct36_sse_COS9-1b(%ebx), COS9_ + lea dct36_sse_tfcos36-1b(%ebx), tfcos36_ + lea 12(%esp), tmp + movl 8(%ebp), in + + xorps %xmm0, %xmm0 + xorps %xmm5, %xmm5 + movlps 64(in), %xmm5 + movups 48(in), %xmm4 + movups 32(in), %xmm3 + movups 16(in), %xmm2 + movups (in), %xmm1 + movaps %xmm5, %xmm6 + shufps $0xe1, %xmm6, %xmm6 + movaps %xmm4, %xmm7 + shufps $0x93, %xmm7, %xmm7 + movss %xmm7, %xmm6 + addps %xmm6, %xmm5 + movaps %xmm3, %xmm6 + shufps $0x93, %xmm6, %xmm6 + movss %xmm6, %xmm7 + addps %xmm7, %xmm4 + movaps %xmm2, %xmm7 + shufps $0x93, %xmm7, %xmm7 + movss %xmm7, %xmm6 + addps %xmm6, %xmm3 + movaps %xmm1, %xmm6 + shufps $0x93, %xmm6, %xmm6 + movss %xmm6, %xmm7 + addps %xmm7, %xmm2 + movss %xmm0, %xmm6 + addps %xmm6, %xmm1 + + movaps dct36_sse_mask-1b(%ebx), %xmm0 + movaps %xmm4, %xmm6 + shufps $0x4e, %xmm5, %xmm4 + movaps %xmm3, %xmm7 + shufps $0x4e, %xmm6, %xmm3 + andps %xmm0, %xmm6 + addps %xmm6, %xmm4 + movaps %xmm2, %xmm6 + shufps $0x4e, %xmm7, %xmm2 + andps %xmm0, %xmm7 + addps %xmm7, %xmm3 + movaps %xmm1, %xmm7 + shufps $0x4e, %xmm6, %xmm1 + andps %xmm0, %xmm6 + addps %xmm6, %xmm2 + movaps %xmm7, %xmm6 + andps %xmm0, %xmm7 + xorps %xmm0, %xmm0 + addps %xmm7, %xmm1 + movlhps %xmm6, %xmm0 + +/* +xmm0 in[-,-,0,1] +xmm1 in[2,3,4,5] +xmm2 in[6,7,8,9] +xmm3 in[10,11,12,13] +xmm4 in[14,15,16,17] +*/ + + movaps %xmm2, %xmm5 + shufps $0xe4, %xmm3, %xmm5 + shufps $0xe4, %xmm4, %xmm3 + shufps $0xe4, %xmm2, %xmm4 + movaps %xmm5, %xmm2 + +/* +xmm2 in[6,7,12,13] +xmm3 in[10,11,16,17] +xmm4 in[14,15,8,9] +*/ + + mulps (COS9_), %xmm5 + addps %xmm0, %xmm5 + + movaps %xmm0, (tmp) + movaps %xmm2, 16(tmp) + +/* +0(tmp) in[-,-,0,1] +xmm5 [ta33,tb33,ta66,tb66] +*/ + + movaps %xmm1, %xmm6 + subps %xmm3, %xmm6 + subps %xmm4, %xmm6 + xorps %xmm7, %xmm7 + shufps $0xe0, %xmm2, %xmm7 + mulps (COS9_), %xmm6 + subps %xmm7, %xmm0 + addps %xmm0, %xmm6 + movaps %xmm6, 48(tmp) + + movaps 16(COS9_), %xmm2 + + movaps %xmm1, %xmm0 + movaps %xmm3, %xmm6 + movaps %xmm4, %xmm7 + mulps %xmm2, %xmm0 + mulps 32(COS9_), %xmm6 + mulps 48(COS9_), %xmm7 + addps %xmm5, %xmm0 + addps %xmm7, %xmm6 + addps %xmm6, %xmm0 + movaps %xmm0, 32(tmp) + + movaps %xmm1, %xmm0 + movaps %xmm3, %xmm6 + movaps %xmm4, %xmm7 + mulps 32(COS9_), %xmm0 + mulps 48(COS9_), %xmm6 + mulps %xmm2, %xmm7 + subps %xmm5, %xmm0 + subps %xmm6, %xmm7 + addps %xmm7, %xmm0 + movaps %xmm0, 64(tmp) + + movaps %xmm1, %xmm6 + movaps %xmm4, %xmm7 + mulps 48(COS9_), %xmm6 + mulps %xmm3, %xmm2 + mulps 32(COS9_), %xmm7 + subps %xmm5, %xmm6 + subps %xmm7, %xmm2 + addps %xmm2, %xmm6 + + movaps (tmp), %xmm0 + movss 32(tfcos36_), %xmm5 + subps %xmm1, %xmm0 + subps 16(tmp), %xmm4 + addps %xmm3, %xmm0 + addps %xmm4, %xmm0 + shufps $0xaf, %xmm0, %xmm0 + mulss %xmm5, %xmm0 + movaps %xmm0, (tmp) + + movaps 32(tmp), %xmm0 + movaps 48(tmp), %xmm1 + movaps 64(tmp), %xmm2 + +/* +xmm0 [1a-0,1b-0, 2a-0, 2b-0] +xmm1 [1a-1,1b-1, 2a-1, 2b-1] +xmm2 [1a-2,1b-2,-2a-2,-2b-2] +xmm6 [1a-3,1b-3,-2a-3,-2b-3] +*/ + + movaps %xmm0, %xmm3 + unpcklps %xmm1, %xmm0 + unpckhps %xmm1, %xmm3 + movaps %xmm2, %xmm5 + unpcklps %xmm6, %xmm2 + unpckhps %xmm6, %xmm5 + xorps dct36_sse_sign-1b(%ebx), %xmm5 + +/* +xmm0 [1a-0,1a-1,1b-0,1b-1] +xmm3 [2a-0,2a-1,2b-0,2b-1] +xmm2 [1a-2,1a-3,1b-2,1b-3] +xmm5 [2a-2,2a-3,2b-2,2b-3] +*/ + + movaps %xmm0, %xmm1 + movlhps %xmm2, %xmm0 + movhlps %xmm1, %xmm2 + movaps %xmm3, %xmm4 + movlhps %xmm5, %xmm3 + movhlps %xmm4, %xmm5 + +/* +xmm0 tmp1a +xmm3 tmp2a +xmm2 tmp1b +xmm5 tmp2b +*/ + + movaps (tfcos36_), %xmm6 + movaps 16(tfcos36_), %xmm7 + movaps %xmm5, %xmm1 + addps %xmm2, %xmm5 + subps %xmm2, %xmm1 + movaps %xmm3, %xmm2 + addps %xmm0, %xmm3 + subps %xmm0, %xmm2 + mulps %xmm6, %xmm5 + mulps %xmm1, %xmm7 + + movaps %xmm2, 16(tmp) + +/* +%xmm3 tmp[0,1,2,3] +%xmm5 tmp[17,16,15,14] +16(tmp) tmp[8,7,6,5] +%xmm7 tmp[9,10,11,12] +0(tmp) tmp[13,-,4,-] +*/ + + movl 12(%ebp), out1 + movl 16(%ebp), out2 + movl 20(%ebp), w + movl 24(%ebp), ts + + movaps %xmm3, %xmm0 + movaps %xmm5, %xmm1 + movups 108(w), %xmm2 + movups 92(w), %xmm3 + shufps $0x1b, %xmm3, %xmm3 + movups 36(w), %xmm4 + movups 20(w), %xmm5 + shufps $0x1b, %xmm5, %xmm5 + movaps %xmm0, %xmm6 + addps %xmm1, %xmm0 + subps %xmm1, %xmm6 + mulps %xmm0, %xmm2 + mulps %xmm3, %xmm0 + mulps %xmm6, %xmm4 + mulps %xmm5, %xmm6 + movups 36(out1), %xmm1 + movups 20(out1), %xmm3 + shufps $0x1b, %xmm6, %xmm6 + addps %xmm4, %xmm1 + addps %xmm6, %xmm3 + shufps $0x1b, %xmm0, %xmm0 + movups %xmm2, 36(out2) + movups %xmm0, 20(out2) + movss %xmm1, 32*36(ts) + movss %xmm3, 32*20(ts) + movhlps %xmm1, %xmm2 + movhlps %xmm3, %xmm4 + movss %xmm2, 32*44(ts) + movss %xmm4, 32*28(ts) + shufps $0xb1, %xmm1, %xmm1 + shufps $0xb1, %xmm3, %xmm3 + movss %xmm1, 32*40(ts) + movss %xmm3, 32*24(ts) + movhlps %xmm1, %xmm2 + movhlps %xmm3, %xmm4 + movss %xmm2, 32*48(ts) + movss %xmm4, 32*32(ts) + + movss 8(tmp), %xmm0 + movss (tmp), %xmm1 + movss 124(w), %xmm2 + movss 88(w), %xmm3 + movss 52(w), %xmm4 + movss 16(w), %xmm5 + movss %xmm0, %xmm6 + addss %xmm1, %xmm0 + subss %xmm1, %xmm6 + mulss %xmm0, %xmm2 + mulss %xmm3, %xmm0 + mulss %xmm6, %xmm4 + mulss %xmm5, %xmm6 + addss 52(out1), %xmm4 + addss 16(out1), %xmm6 + movss %xmm2, 52(out2) + movss %xmm0, 16(out2) + movss %xmm4, 32*52(ts) + movss %xmm6, 32*16(ts) + + movaps 16(tmp), %xmm0 + movaps %xmm7, %xmm1 + MOVUAPS 128(w), %xmm2 + movups 72(w), %xmm3 + shufps $0x1b, %xmm2, %xmm2 + movlps 56(w), %xmm4 + movhps 64(w), %xmm4 + MOVUAPS (w), %xmm5 + shufps $0x1b, %xmm4, %xmm4 + movaps %xmm0, %xmm6 + addps %xmm1, %xmm0 + subps %xmm1, %xmm6 + mulps %xmm0, %xmm2 + mulps %xmm3, %xmm0 + mulps %xmm6, %xmm4 + mulps %xmm5, %xmm6 + movlps 56(out1), %xmm1 + movhps 64(out1), %xmm1 + movups (out1), %xmm3 + shufps $0x1b, %xmm4, %xmm4 + addps %xmm6, %xmm3 + addps %xmm4, %xmm1 + shufps $0x1b, %xmm2, %xmm2 + movups %xmm0, (out2) + movlps %xmm2, 56(out2) + movhps %xmm2, 64(out2) + movss %xmm1, 32*56(ts) + movss %xmm3, (ts) + movhlps %xmm1, %xmm2 + movhlps %xmm3, %xmm4 + movss %xmm2, 32*64(ts) + movss %xmm4, 32*8(ts) + shufps $0xb1, %xmm1, %xmm1 + shufps $0xb1, %xmm3, %xmm3 + movss %xmm1, 32*60(ts) + movss %xmm3, 32*4(ts) + movhlps %xmm1, %xmm2 + movhlps %xmm3, %xmm4 + movss %xmm2, 32*68(ts) + movss %xmm4, 32*12(ts) + + pop %edi + pop %esi + pop %ebx + mov %ebp, %esp + pop %ebp + + ret + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/dct36_x86_64.S =================================================================== --- lib/3rdparty/libmpg123/dct36_x86_64.S (revision 0) +++ lib/3rdparty/libmpg123/dct36_x86_64.S (working copy) @@ -0,0 +1,394 @@ +/* + dct36_x86_64: SSE optimized dct36 for x86-64 + + copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#ifdef IS_MSABI +#define in %rcx +#define out1 %rdx +#define out2 %r8 +#define w %r9 +#define ts %r10 +#define COS9_ %rax +#define tfcos36_ %r11 +#else +#define in %rdi +#define out1 %rsi +#define out2 %rdx +#define w %rcx +#define ts %r8 +#define COS9_ %rax +#define tfcos36_ %r9 +#endif + +/* + void dct36_x86_64(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf); +*/ + +#ifndef __APPLE__ + .section .rodata +#else + .data +#endif + ALIGN16 +dct36_x86_64_COS9: + .long 0x3f5db3d7 + .long 0x3f5db3d7 + .long 0x3f000000 + .long 0x3f000000 + .long 0x3f7c1c5c + .long 0x3f7c1c5c + .long 0x3f708fb2 + .long 0x3f708fb2 + .long 0x3f248dbb + .long 0x3f248dbb + .long 0x3e31d0d4 + .long 0x3e31d0d4 + .long 0x3eaf1d44 + .long 0x3eaf1d44 + .long 0x3f441b7d + .long 0x3f441b7d + ALIGN16 +dct36_x86_64_tfcos36: + .long 0x3f007d2b + .long 0x3f0483ee + .long 0x3f0d3b7d + .long 0x3f1c4257 + .long 0x40b79454 + .long 0x3ff746ea + .long 0x3f976fd9 + .long 0x3f5f2944 + .long 0x3f3504f3 + ALIGN16 +dct36_x86_64_mask: + .long 0,0xffffffff,0,0xffffffff + ALIGN16 +dct36_x86_64_sign: + .long 0x80000000,0x80000000,0x80000000,0x80000000 + .text + ALIGN16 + .globl ASM_NAME(dct36_x86_64) +ASM_NAME(dct36_x86_64): +#ifdef IS_MSABI + push %rbp + mov %rsp, %rbp + sub $160, %rsp + movaps %xmm6, (%rsp) + movaps %xmm7, 16(%rsp) + movaps %xmm8, 32(%rsp) + movaps %xmm9, 48(%rsp) + movaps %xmm10, 64(%rsp) + movaps %xmm11, 80(%rsp) + movaps %xmm12, 96(%rsp) + movaps %xmm13, 112(%rsp) + movaps %xmm14, 128(%rsp) + movaps %xmm15, 144(%rsp) + movq 48(%rbp), ts +#endif + lea dct36_x86_64_COS9(%rip), COS9_ + lea dct36_x86_64_tfcos36(%rip), tfcos36_ + + xorps %xmm5, %xmm5 + movups (in), %xmm1 + movups 16(in), %xmm2 + movups 32(in), %xmm3 + movups 48(in), %xmm4 + movlps 64(in), %xmm5 + xorps %xmm6, %xmm6 + movaps %xmm1, %xmm7 + shufps $0x93, %xmm7, %xmm7 + movaps %xmm2, %xmm8 + shufps $0x93, %xmm8, %xmm8 + movaps %xmm3, %xmm9 + shufps $0x93, %xmm9, %xmm9 + movaps %xmm4, %xmm10 + shufps $0x93, %xmm10, %xmm10 + movaps %xmm5, %xmm11 + shufps $0xe1, %xmm11, %xmm11 + movss %xmm10, %xmm11 + addps %xmm11, %xmm5 + movss %xmm9, %xmm10 + addps %xmm10, %xmm4 + movss %xmm8, %xmm9 + addps %xmm9, %xmm3 + movss %xmm7, %xmm8 + addps %xmm8, %xmm2 + movss %xmm6, %xmm7 + addps %xmm7, %xmm1 + + movaps dct36_x86_64_mask(%rip), %xmm0 + movaps %xmm4, %xmm6 + shufps $0x4e, %xmm5, %xmm4 + movaps %xmm3, %xmm7 + shufps $0x4e, %xmm6, %xmm3 + andps %xmm0, %xmm6 + addps %xmm6, %xmm4 + movaps %xmm2, %xmm6 + shufps $0x4e, %xmm7, %xmm2 + andps %xmm0, %xmm7 + addps %xmm7, %xmm3 + movaps %xmm1, %xmm7 + shufps $0x4e, %xmm6, %xmm1 + andps %xmm0, %xmm6 + addps %xmm6, %xmm2 + movaps %xmm7, %xmm6 + andps %xmm0, %xmm7 + xorps %xmm0, %xmm0 + addps %xmm7, %xmm1 + movlhps %xmm6, %xmm0 + +/* +xmm0 in[-,-,0,1] +xmm1 in[2,3,4,5] +xmm2 in[6,7,8,9] +xmm3 in[10,11,12,13] +xmm4 in[14,15,16,17] +*/ + + movaps %xmm2, %xmm5 + shufps $0xe4, %xmm3, %xmm5 + shufps $0xe4, %xmm4, %xmm3 + shufps $0xe4, %xmm2, %xmm4 + movaps %xmm5, %xmm2 +/* +xmm2 in[6,7,12,13] +xmm3 in[10,11,16,17] +xmm4 in[14,15,8,9] +*/ + + movaps (COS9_), %xmm15 + movaps 16(COS9_), %xmm6 + movaps 32(COS9_), %xmm7 + movaps 48(COS9_), %xmm8 + mulps %xmm15, %xmm5 + addps %xmm0, %xmm5 + +/* +xmm5 [ta33,tb33,ta66,tb66] +xmm6 COS9_[1,1,2,2] +xmm7 COS9_[5,5,8,8] +xmm8 COS9_[7,7,4,4] +xmm15 COS9_[3,3,6,6] +*/ + movaps %xmm6, %xmm9 + movaps %xmm7, %xmm12 + movaps %xmm8, %xmm13 + mulps %xmm1, %xmm9 + mulps %xmm3, %xmm12 + mulps %xmm4, %xmm13 + addps %xmm5, %xmm9 + addps %xmm13, %xmm12 + addps %xmm9, %xmm12 + + movaps %xmm1, %xmm13 + subps %xmm3, %xmm13 + movaps %xmm0, %xmm10 + shufps $0xe0, %xmm2, %xmm10 + movaps %xmm0, %xmm14 + subps %xmm10, %xmm14 + subps %xmm4, %xmm13 + mulps %xmm15, %xmm13 + addps %xmm14, %xmm13 + + movaps %xmm7, %xmm9 + movaps %xmm8, %xmm15 + movaps %xmm6, %xmm14 + mulps %xmm1, %xmm9 + mulps %xmm3, %xmm15 + mulps %xmm4, %xmm14 + subps %xmm5, %xmm9 + subps %xmm15, %xmm14 + addps %xmm9, %xmm14 + + mulps %xmm1, %xmm8 + mulps %xmm3, %xmm6 + mulps %xmm4, %xmm7 + subps %xmm5, %xmm8 + subps %xmm7, %xmm6 + addps %xmm6, %xmm8 + movaps %xmm8, %xmm15 + + movss 32(tfcos36_), %xmm5 + subps %xmm1, %xmm0 + subps %xmm2, %xmm4 + addps %xmm3, %xmm0 + addps %xmm4, %xmm0 + shufps $0xaf, %xmm0, %xmm0 + mulss %xmm5, %xmm0 + movaps %xmm0, %xmm11 + +/* +xmm12 [1a-0,1b-0, 2a-0, 2b-0] +xmm13 [1a-1,1b-1, 2a-1, 2b-1] +xmm14 [1a-2,1b-2,-2a-2,-2b-2] +xmm15 [1a-3,1b-3,-2a-3,-2b-3] +*/ + movaps %xmm12, %xmm5 + unpckhps %xmm13, %xmm5 + unpcklps %xmm13, %xmm12 + movaps %xmm14, %xmm6 + unpckhps %xmm15, %xmm6 + unpcklps %xmm15, %xmm14 + xorps dct36_x86_64_sign(%rip), %xmm6 + +/* +xmm12 [1a-0,1a-1,1b-0,1b-1] +xmm5 [2a-0,2a-1,2b-0,2b-1] +xmm14 [1a-2,1a-3,1b-2,1b-3] +xmm6 [2a-2,2a-3,2b-2,2b-3] +*/ + + movaps %xmm12, %xmm0 + movlhps %xmm14, %xmm12 + movhlps %xmm0, %xmm14 + movaps %xmm5, %xmm0 + movlhps %xmm6, %xmm0 + movhlps %xmm5, %xmm6 + movaps %xmm6, %xmm15 + +/* +xmm12 tmp1a +xmm0 tmp2a +xmm14 tmp1b +xmm15 tmp2b +*/ + + movaps (tfcos36_), %xmm6 + movaps 16(tfcos36_), %xmm7 + movaps %xmm15, %xmm10 + addps %xmm14, %xmm15 + subps %xmm14, %xmm10 + movaps %xmm0, %xmm14 + addps %xmm12, %xmm0 + subps %xmm12, %xmm14 + mulps %xmm6, %xmm15 + mulps %xmm10, %xmm7 + +/* +%xmm0 tmp[0,1,2,3] +%xmm15 tmp[17,16,15,14] +%xmm14 tmp[8,7,6,5] +%xmm7 tmp[9,10,11,12] +%xmm11 tmp[13,-,4,-] +*/ + + movaps %xmm15, %xmm1 + movups 108(w), %xmm2 + movups 92(w), %xmm3 + shufps $0x1b, %xmm3, %xmm3 + movups 36(w), %xmm4 + movups 20(w), %xmm5 + shufps $0x1b, %xmm5, %xmm5 + movaps %xmm0, %xmm6 + addps %xmm1, %xmm0 + subps %xmm1, %xmm6 + mulps %xmm0, %xmm2 + mulps %xmm3, %xmm0 + mulps %xmm6, %xmm4 + mulps %xmm5, %xmm6 + movups 36(out1), %xmm1 + movups 20(out1), %xmm3 + shufps $0x1b, %xmm6, %xmm6 + addps %xmm4, %xmm1 + addps %xmm6, %xmm3 + shufps $0x1b, %xmm0, %xmm0 + movups %xmm2, 36(out2) + movups %xmm0, 20(out2) + movss %xmm1, 32*36(ts) + movss %xmm3, 32*20(ts) + movhlps %xmm1, %xmm2 + movhlps %xmm3, %xmm4 + movss %xmm2, 32*44(ts) + movss %xmm4, 32*28(ts) + shufps $0xb1, %xmm1, %xmm1 + shufps $0xb1, %xmm3, %xmm3 + movss %xmm1, 32*40(ts) + movss %xmm3, 32*24(ts) + movhlps %xmm1, %xmm2 + movhlps %xmm3, %xmm4 + movss %xmm2, 32*48(ts) + movss %xmm4, 32*32(ts) + + movhlps %xmm11, %xmm0 + movaps %xmm11, %xmm1 + movss 124(w), %xmm2 + movss 88(w), %xmm3 + movss 52(w), %xmm4 + movss 16(w), %xmm5 + movss %xmm0, %xmm6 + addss %xmm1, %xmm0 + subss %xmm1, %xmm6 + mulss %xmm0, %xmm2 + mulss %xmm3, %xmm0 + mulss %xmm6, %xmm4 + mulss %xmm5, %xmm6 + addss 52(out1), %xmm4 + addss 16(out1), %xmm6 + movss %xmm2, 52(out2) + movss %xmm0, 16(out2) + movss %xmm4, 32*52(ts) + movss %xmm6, 32*16(ts) + + movaps %xmm14, %xmm0 + movaps %xmm7, %xmm1 + MOVUAPS 128(w), %xmm2 + movups 72(w), %xmm3 + shufps $0x1b, %xmm2, %xmm2 + movlps 56(w), %xmm4 + movhps 64(w), %xmm4 + MOVUAPS (w), %xmm5 + shufps $0x1b, %xmm4, %xmm4 + movaps %xmm0, %xmm6 + addps %xmm1, %xmm0 + subps %xmm1, %xmm6 + mulps %xmm0, %xmm2 + mulps %xmm3, %xmm0 + mulps %xmm6, %xmm4 + mulps %xmm5, %xmm6 + movlps 56(out1), %xmm1 + movhps 64(out1), %xmm1 + movups (out1), %xmm3 + shufps $0x1b, %xmm4, %xmm4 + addps %xmm6, %xmm3 + addps %xmm4, %xmm1 + shufps $0x1b, %xmm2, %xmm2 + movups %xmm0, (out2) + movlps %xmm2, 56(out2) + movhps %xmm2, 64(out2) + movss %xmm1, 32*56(ts) + movss %xmm3, (ts) + movhlps %xmm1, %xmm2 + movhlps %xmm3, %xmm4 + movss %xmm2, 32*64(ts) + movss %xmm4, 32*8(ts) + shufps $0xb1, %xmm1, %xmm1 + shufps $0xb1, %xmm3, %xmm3 + movss %xmm1, 32*60(ts) + movss %xmm3, 32*4(ts) + movhlps %xmm1, %xmm2 + movhlps %xmm3, %xmm4 + movss %xmm2, 32*68(ts) + movss %xmm4, 32*12(ts) + +#ifdef IS_MSABI + movaps (%rsp), %xmm6 + movaps 16(%rsp), %xmm7 + movaps 32(%rsp), %xmm8 + movaps 48(%rsp), %xmm9 + movaps 64(%rsp), %xmm10 + movaps 80(%rsp), %xmm11 + movaps 96(%rsp), %xmm12 + movaps 112(%rsp), %xmm13 + movaps 128(%rsp), %xmm14 + movaps 144(%rsp), %xmm15 + mov %rbp, %rsp + pop %rbp +#endif + ret + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/dct64_avx.S =================================================================== --- lib/3rdparty/libmpg123/dct64_avx.S (revision 0) +++ lib/3rdparty/libmpg123/dct64_avx.S (working copy) @@ -0,0 +1,324 @@ +/* + dct36_sse: AVX optimized dct64 for x86-64 + + copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#define samples %rdx +#define costab %rcx +#define out0 %rdi +#define out1 %rsi + +/* + void dct64_avx(short *out0, short *out1, real *samples); +*/ + +#ifndef __APPLE__ + .section .rodata +#else + .data +#endif + ALIGN32 +costab_avx: + .long 1056974725 + .long 1057056395 + .long 1057223771 + .long 1057485416 + .long 1057855544 + .long 1058356026 + .long 1059019886 + .long 1059897405 + .long 1061067246 + .long 1062657950 + .long 1064892987 + .long 1066774581 + .long 1069414683 + .long 1073984175 + .long 1079645762 + .long 1092815430 + .long 1057005197 + .long 1057342072 + .long 1058087743 + .long 1059427869 + .long 1061799040 + .long 1065862217 + .long 1071413542 + .long 1084439708 + .long 1057128951 + .long 1058664893 + .long 1063675095 + .long 1076102863 + .long 1057655764 + .long 1067924853 + .long 1060439283 + .long 0 + .text + ALIGN16 +.globl ASM_NAME(dct64_avx) +ASM_NAME(dct64_avx): +#ifdef IS_MSABI + push %rbp + mov %rsp, %rbp + sub $112, %rsp + movaps %xmm6, (%rsp) + movaps %xmm7, 16(%rsp) + movaps %xmm8, 32(%rsp) + movaps %xmm9, 48(%rsp) + movaps %xmm10, 64(%rsp) + movaps %xmm11, 80(%rsp) + movaps %xmm12, 96(%rsp) + push %rdi + push %rsi + mov %rcx, %rdi + mov %rdx, %rsi + mov %r8, %rdx +#endif + leaq costab_avx(%rip), costab + + vmovups (samples), %ymm0 # input[0,1,2,3,4,5,6,7] + vmovups 32(samples), %ymm1 # input[8,9,10,11,12,13,14,15] + vperm2f128 $0x23, 64(samples), %ymm2, %ymm2 + vperm2f128 $0x23, 96(samples), %ymm3, %ymm3 + vshufps $0x1b, %ymm2, %ymm2, %ymm2 # input[23,22,21,20,19,18,17,16] + vshufps $0x1b, %ymm3, %ymm3, %ymm3 # input[31,30,29,28,27,26,25,24] + vsubps %ymm2, %ymm1, %ymm6 + vsubps %ymm3, %ymm0, %ymm7 + vaddps %ymm0, %ymm3, %ymm4 # bufs[0,1,2,3,4,5,6,7] + vaddps %ymm1, %ymm2, %ymm5 # bufs[8,9,10,11,12,13,14,15] + vmulps (costab), %ymm7, %ymm7 # bufs[31,30,29,28,27,26,25,24] cos64[0,1,2,3,4,5,6,7] + vmulps 32(costab), %ymm6, %ymm6 # bufs[23,22,21,20,19,18,17,16] cos64[8,9,10,11,12,13,14,15] + + vmovaps 64(costab), %ymm8 # cos32[0,1,2,3,4,5,6,7] + + vshufps $0x1b, %ymm5, %ymm5, %ymm5 + vshufps $0x1b, %ymm6, %ymm6, %ymm6 + vperm2f128 $0x01, %ymm5, %ymm5, %ymm5 # bufs[15,14,13,12,11,10,9,8] + vperm2f128 $0x01, %ymm6, %ymm6, %ymm6 # bufs[16,17,18,19,20,21,22,23] + vsubps %ymm5, %ymm4, %ymm1 + vsubps %ymm6, %ymm7, %ymm3 + vaddps %ymm5, %ymm4, %ymm0 # bufs[32,33,34,35,36,37,38,39] + vaddps %ymm6, %ymm7, %ymm2 # bufs[48,49,50,51,52,53,54,55] + vmulps %ymm1, %ymm8, %ymm1 # bufs[47,46,45,44,43,42,41,40] + vmulps %ymm3, %ymm8, %ymm3 # bufs[63,62,61,60,59,58,57,56] + + vmovaps 96(costab), %ymm8 # cos16[0,1,2,3]:cos8[0,1]:cos4[0]:- + vperm2f128 $0x00, %ymm8, %ymm8, %ymm9 # cos16[0,1,2,3,0,1,2,3] + + vperm2f128 $0x20, %ymm1, %ymm0, %ymm4 # bufs[32,33,34,35,47,46,45,44] + vperm2f128 $0x31, %ymm1, %ymm0, %ymm5 + vshufps $0x1b, %ymm5, %ymm5, %ymm5 # bufs[39,38,37,36,40,41,42,43] + vperm2f128 $0x20, %ymm3, %ymm2, %ymm6 # bufs[48,49,50,51,63,62,61,60] + vperm2f128 $0x31, %ymm3, %ymm2, %ymm7 + vshufps $0x1b, %ymm7, %ymm7, %ymm7 # bufs[55,54,53,52,56,57,58,59] + vsubps %ymm5, %ymm4, %ymm1 + vsubps %ymm7, %ymm6, %ymm3 + vaddps %ymm5, %ymm4, %ymm0 # bufs[0,1,2,3,8,9,10,11] + vaddps %ymm7, %ymm6, %ymm2 # bufs[16,17,18,19,24,25,26,27] + vmulps %ymm1, %ymm9, %ymm1 # bufs[7,6,5,4,15,14,13,12] + vmulps %ymm3, %ymm9, %ymm3 # bufs[23,22,21,20,31,30,29,28] + + vperm2f128 $0x11, %ymm8, %ymm8, %ymm8 # cos8[0,1]:cos4[0]:-:cos8[0,1]:cos4[0]:- + vmovddup %ymm8, %ymm9 # cos8[0,1,0,1,0,1,0,1] + + vunpcklps %ymm1, %ymm0, %ymm4 # bufs[0,7,1,6,8,15,9,14] + vunpckhps %ymm1, %ymm0, %ymm5 # bufs[2,5,3,4,10,13,11,12] + vunpcklps %ymm3, %ymm2, %ymm6 # bufs[16,23,17,22,24,31,25,30] + vunpckhps %ymm3, %ymm2, %ymm7 # bufs[18,21,19,20,26,29,27,28] + vshufps $0xd8, %ymm4, %ymm4, %ymm4 # bufs[0,1,7,6,8,9,15,14] + vshufps $0x72, %ymm5, %ymm5, %ymm5 # bufs[3,2,4,5,11,10,12,13] + vshufps $0xd8, %ymm6, %ymm6, %ymm6 # bufs[16,17,23,22,24,25,31,30] + vshufps $0x72, %ymm7, %ymm7, %ymm7 # bufs[19,18,20,21,27,26,28,29] + vsubps %ymm5, %ymm4, %ymm1 + vsubps %ymm7, %ymm6, %ymm3 + vaddps %ymm5, %ymm4, %ymm0 # bufs[32,33,36,37,40,41,44,45] + vaddps %ymm7, %ymm6, %ymm2 # bufs[48,49,52,53,56,57,60,61] + vmulps %ymm1, %ymm9, %ymm1 # bufs[35,34,39,38,43,42,47,46] + vmulps %ymm3, %ymm9, %ymm3 # bufs[51,50,55,54,59,58,63,62] + + vpermilps $0xaa, %ymm8, %ymm8 # cos4[0,0,0,0,0,0,0,0] + + vshufps $0xd8, %ymm0, %ymm0, %ymm0 # bufs[32,36,33,37,40,44,41,45] + vshufps $0xd8, %ymm1, %ymm1, %ymm1 # bufs[35,39,34,38,43,47,42,46] + vshufps $0xd8, %ymm2, %ymm2, %ymm2 # bufs[48,52,49,53,56,60,57,61] + vshufps $0xd8, %ymm3, %ymm3, %ymm3 # bufs[51,55,50,54,59,63,58,62] + vunpcklps %ymm1, %ymm0, %ymm4 # bufs[32,35,36,39,40,43,44,47] + vunpckhps %ymm1, %ymm0, %ymm5 # bufs[33,34,37,38,41,42,45,46] + vunpcklps %ymm3, %ymm2, %ymm6 # bufs[48,51,52,55,56,59,60,63] + vunpckhps %ymm3, %ymm2, %ymm7 # bufs[49,50,53,54,57,58,61,62] + vsubps %ymm5, %ymm4, %ymm1 + vsubps %ymm7, %ymm6, %ymm3 + vaddps %ymm5, %ymm4, %ymm0 # bufs[0,2,4,6,8,10,12,14] + vaddps %ymm7, %ymm6, %ymm2 # bufs[16,18,20,22,24,26,28,30] + vmulps %ymm1, %ymm8, %ymm1 # bufs[1,3,5,7,9,11,13,15] + vmulps %ymm3, %ymm8, %ymm3 # bufs[17,19,21,23,25,27,29,31] + + vxorps %ymm8, %ymm8, %ymm8 + vblendps $0xaa, %ymm1, %ymm8, %ymm5 + vblendps $0xaa, %ymm3, %ymm8, %ymm6 + vaddps %ymm5, %ymm0, %ymm0 + vaddps %ymm6, %ymm2, %ymm2 + vunpcklps %ymm1, %ymm0, %ymm4 # bufs[0,1,2,3,8,9,10,11] + vunpckhps %ymm1, %ymm0, %ymm5 # bufs[4,5,6,7,12,13,14,15] + vunpcklps %ymm3, %ymm2, %ymm6 # bufs[16,17,18,19,24,25,26,27] + vunpckhps %ymm3, %ymm2, %ymm7 # bufs[20,21,22,23,28,29,30,31] + + vextractf128 $0x1, %ymm4, %xmm0 # bufs[8,9,10,11] + vextractf128 $0x1, %ymm5, %xmm1 # bufs[12,13,14,15] + vextractf128 $0x1, %ymm6, %xmm2 # bufs[24,25,26,27] + vextractf128 $0x1, %ymm7, %xmm3 # bufs[28,29,30,31] + + vshufps $0x1e, %xmm5, %xmm5, %xmm9 # bufs[6,7,5,4] + vshufps $0x1e, %xmm1, %xmm1, %xmm10 # bufs[14,15,13,12] + vshufps $0x1e, %xmm7, %xmm7, %xmm11 # bufs[22,23,21,20] + vshufps $0x1e, %xmm3, %xmm3, %xmm12 # bufs[30,31,29,28] + vblendps $0x7, %xmm9, %xmm8, %xmm9 # bufs[6,7,5,-] + vblendps $0x7, %xmm10, %xmm8, %xmm10 # bufs[14,15,13,-] + vblendps $0x7, %xmm11, %xmm8, %xmm11 # bufs[22,23,21,-] + vblendps $0x7, %xmm12, %xmm8, %xmm12 # bufs[30,31,29,-] + vaddps %xmm5, %xmm9, %xmm5 + vaddps %xmm1, %xmm10, %xmm1 + vaddps %xmm7, %xmm11, %xmm7 + vaddps %xmm3, %xmm12, %xmm3 + + prefetcht0 512(out0) + + vshufps $0x1e, %xmm0, %xmm0, %xmm9 # bufs[10,11,9,8] + vshufps $0x1e, %xmm2, %xmm2, %xmm10 # bufs[26,27,25,24] + vaddps %xmm1, %xmm0, %xmm0 + vaddps %xmm3, %xmm2, %xmm2 + vblendps $0x7, %xmm9, %xmm8, %xmm9 # bufs[10,11,9,-] + vblendps $0x7, %xmm10, %xmm8, %xmm10 # bufs[26,27,25,-] + vaddps %xmm1, %xmm9, %xmm1 + vaddps %xmm3, %xmm10, %xmm3 + + vzeroupper + prefetcht0 512(out1) + + cvtps2dq %xmm4, %xmm4 + cvtps2dq %xmm0, %xmm0 + cvtps2dq %xmm5, %xmm5 + cvtps2dq %xmm1, %xmm1 + packssdw %xmm5, %xmm4 + packssdw %xmm1, %xmm0 + movq %xmm4, %rcx + pshufd $0x4e, %xmm4, %xmm5 + movq %xmm0, %rdx + pshufd $0x4e, %xmm0, %xmm1 + movq %xmm5, %r8 + movq %xmm1, %r9 + + addq $512, out0 + movq $-64, %rax + + movw %cx, (out0) + movw %dx, (out0,%rax,1) + movw %r8w, (out0,%rax,2) + movw %r9w, -64(out0,%rax,2) + leaq (out0,%rax,4), out0 + shr $16, %rcx + shr $16, %rdx + shr $16, %r8 + shr $16, %r9 + movw %cx, (out0,%rax,4) + negq %rax + movw %cx, (out1) + movw %dx, (out1,%rax,1) + movw %r8w, (out1,%rax,2) + movw %r9w, 64(out1,%rax,2) + leaq (out1,%rax,4), out1 + shr $16, %rcx + shr $16, %rdx + shr $16, %r8 + shr $16, %r9 + negq %rax + movw %cx, (out0) + movw %dx, (out0,%rax,1) + movw %r8w, (out0,%rax,2) + movw %r9w, -64(out0,%rax,2) + shr $16, %rcx + shr $16, %rdx + shr $16, %r8 + shr $16, %r9 + negq %rax + movw %cx, (out1) + movw %dx, (out1,%rax,1) + movw %r8w, (out1,%rax,2) + movw %r9w, 64(out1,%rax,2) + + leaq -32(out0,%rax,4), out0 + negq %rax + leaq 32(out1,%rax,4), out1 + + vshufps $0x1e, %xmm6, %xmm6, %xmm0 + vblendps $0x7, %xmm0, %xmm8, %xmm0 + addps %xmm2, %xmm6 + addps %xmm7, %xmm2 + addps %xmm3, %xmm7 + addps %xmm0, %xmm3 + cvtps2dq %xmm6, %xmm6 + cvtps2dq %xmm2, %xmm2 + cvtps2dq %xmm7, %xmm7 + cvtps2dq %xmm3, %xmm3 + packssdw %xmm7, %xmm6 + packssdw %xmm3, %xmm2 + movq %xmm6, %rcx + pshufd $0x4e, %xmm6, %xmm7 + movq %xmm2, %rdx + pshufd $0x4e, %xmm2, %xmm3 + movq %xmm7, %r8 + movq %xmm3, %r9 + + movw %cx, (out0) + movw %dx, (out0,%rax,1) + movw %r8w, (out0,%rax,2) + movw %r9w, -64(out0,%rax,2) + leaq (out0,%rax,4), out0 + shr $16, %rcx + shr $16, %rdx + shr $16, %r8 + shr $16, %r9 + negq %rax + movw %cx, (out1) + movw %dx, (out1,%rax,1) + movw %r8w, (out1,%rax,2) + movw %r9w, 64(out1,%rax,2) + leaq (out1,%rax,4), out1 + shr $16, %rcx + shr $16, %rdx + shr $16, %r8 + shr $16, %r9 + negq %rax + movw %cx, (out0) + movw %dx, (out0,%rax,1) + movw %r8w, (out0,%rax,2) + movw %r9w, -64(out0,%rax,2) + shr $16, %rcx + shr $16, %rdx + shr $16, %r8 + shr $16, %r9 + negq %rax + movw %cx, (out1) + movw %dx, (out1,%rax,1) + movw %r8w, (out1,%rax,2) + movw %r9w, 64(out1,%rax,2) + +#ifdef IS_MSABI + pop %rsi + pop %rdi + movaps (%rsp), %xmm6 + movaps 16(%rsp), %xmm7 + movaps 32(%rsp), %xmm8 + movaps 48(%rsp), %xmm9 + movaps 64(%rsp), %xmm10 + movaps 80(%rsp), %xmm11 + movaps 96(%rsp), %xmm12 + mov %rbp, %rsp + pop %rbp +#endif + ret + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/dct64_avx_float.S =================================================================== --- lib/3rdparty/libmpg123/dct64_avx_float.S (revision 0) +++ lib/3rdparty/libmpg123/dct64_avx_float.S (working copy) @@ -0,0 +1,294 @@ +/* + dct64_x86_64_float: SSE optimized dct64 for x86-64 (float output version) + + copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#define samples %rdx +#define costab %rcx +#define out0 %rdi +#define out1 %rsi + +/* + void dct64_real_avx(real *out0, real *out1, real *samples); +*/ + +#ifndef __APPLE__ + .section .rodata +#else + .data +#endif + ALIGN32 +costab_avx: + .long 1056974725 + .long 1057056395 + .long 1057223771 + .long 1057485416 + .long 1057855544 + .long 1058356026 + .long 1059019886 + .long 1059897405 + .long 1061067246 + .long 1062657950 + .long 1064892987 + .long 1066774581 + .long 1069414683 + .long 1073984175 + .long 1079645762 + .long 1092815430 + .long 1057005197 + .long 1057342072 + .long 1058087743 + .long 1059427869 + .long 1061799040 + .long 1065862217 + .long 1071413542 + .long 1084439708 + .long 1057128951 + .long 1058664893 + .long 1063675095 + .long 1076102863 + .long 1057655764 + .long 1067924853 + .long 1060439283 + .long 0 + .text + ALIGN16 +.globl ASM_NAME(dct64_real_avx) +ASM_NAME(dct64_real_avx): +#ifdef IS_MSABI + push %rbp + mov %rsp, %rbp + sub $112, %rsp + movaps %xmm6, (%rsp) + movaps %xmm7, 16(%rsp) + movaps %xmm8, 32(%rsp) + movaps %xmm9, 48(%rsp) + movaps %xmm10, 64(%rsp) + movaps %xmm11, 80(%rsp) + movaps %xmm12, 96(%rsp) + push %rdi + push %rsi + mov %rcx, %rdi + mov %rdx, %rsi + mov %r8, %rdx +#endif + leaq costab_avx(%rip), costab + + vmovups (samples), %ymm0 # input[0,1,2,3,4,5,6,7] + vmovups 32(samples), %ymm1 # input[8,9,10,11,12,13,14,15] + vperm2f128 $0x23, 64(samples), %ymm2, %ymm2 + vperm2f128 $0x23, 96(samples), %ymm3, %ymm3 + vshufps $0x1b, %ymm2, %ymm2, %ymm2 # input[23,22,21,20,19,18,17,16] + vshufps $0x1b, %ymm3, %ymm3, %ymm3 # input[31,30,29,28,27,26,25,24] + vsubps %ymm2, %ymm1, %ymm6 + vsubps %ymm3, %ymm0, %ymm7 + vaddps %ymm0, %ymm3, %ymm4 # bufs[0,1,2,3,4,5,6,7] + vaddps %ymm1, %ymm2, %ymm5 # bufs[8,9,10,11,12,13,14,15] + vmulps (costab), %ymm7, %ymm7 # bufs[31,30,29,28,27,26,25,24] cos64[0,1,2,3,4,5,6,7] + vmulps 32(costab), %ymm6, %ymm6 # bufs[23,22,21,20,19,18,17,16] cos64[8,9,10,11,12,13,14,15] + + vmovaps 64(costab), %ymm8 # cos32[0,1,2,3,4,5,6,7] + + vshufps $0x1b, %ymm5, %ymm5, %ymm5 + vshufps $0x1b, %ymm6, %ymm6, %ymm6 + vperm2f128 $0x01, %ymm5, %ymm5, %ymm5 # bufs[15,14,13,12,11,10,9,8] + vperm2f128 $0x01, %ymm6, %ymm6, %ymm6 # bufs[16,17,18,19,20,21,22,23] + vsubps %ymm5, %ymm4, %ymm1 + vsubps %ymm6, %ymm7, %ymm3 + vaddps %ymm5, %ymm4, %ymm0 # bufs[32,33,34,35,36,37,38,39] + vaddps %ymm6, %ymm7, %ymm2 # bufs[48,49,50,51,52,53,54,55] + vmulps %ymm1, %ymm8, %ymm1 # bufs[47,46,45,44,43,42,41,40] + vmulps %ymm3, %ymm8, %ymm3 # bufs[63,62,61,60,59,58,57,56] + + vmovaps 96(costab), %ymm8 # cos16[0,1,2,3]:cos8[0,1]:cos4[0]:- + vperm2f128 $0x00, %ymm8, %ymm8, %ymm9 # cos16[0,1,2,3,0,1,2,3] + + vperm2f128 $0x20, %ymm1, %ymm0, %ymm4 # bufs[32,33,34,35,47,46,45,44] + vperm2f128 $0x31, %ymm1, %ymm0, %ymm5 + vshufps $0x1b, %ymm5, %ymm5, %ymm5 # bufs[39,38,37,36,40,41,42,43] + vperm2f128 $0x20, %ymm3, %ymm2, %ymm6 # bufs[48,49,50,51,63,62,61,60] + vperm2f128 $0x31, %ymm3, %ymm2, %ymm7 + vshufps $0x1b, %ymm7, %ymm7, %ymm7 # bufs[55,54,53,52,56,57,58,59] + vsubps %ymm5, %ymm4, %ymm1 + vsubps %ymm7, %ymm6, %ymm3 + vaddps %ymm5, %ymm4, %ymm0 # bufs[0,1,2,3,8,9,10,11] + vaddps %ymm7, %ymm6, %ymm2 # bufs[16,17,18,19,24,25,26,27] + vmulps %ymm1, %ymm9, %ymm1 # bufs[7,6,5,4,15,14,13,12] + vmulps %ymm3, %ymm9, %ymm3 # bufs[23,22,21,20,31,30,29,28] + + vperm2f128 $0x11, %ymm8, %ymm8, %ymm8 # cos8[0,1]:cos4[0]:-:cos8[0,1]:cos4[0]:- + vmovddup %ymm8, %ymm9 # cos8[0,1,0,1,0,1,0,1] + + vunpcklps %ymm1, %ymm0, %ymm4 # bufs[0,7,1,6,8,15,9,14] + vunpckhps %ymm1, %ymm0, %ymm5 # bufs[2,5,3,4,10,13,11,12] + vunpcklps %ymm3, %ymm2, %ymm6 # bufs[16,23,17,22,24,31,25,30] + vunpckhps %ymm3, %ymm2, %ymm7 # bufs[18,21,19,20,26,29,27,28] + vshufps $0xd8, %ymm4, %ymm4, %ymm4 # bufs[0,1,7,6,8,9,15,14] + vshufps $0x72, %ymm5, %ymm5, %ymm5 # bufs[3,2,4,5,11,10,12,13] + vshufps $0xd8, %ymm6, %ymm6, %ymm6 # bufs[16,17,23,22,24,25,31,30] + vshufps $0x72, %ymm7, %ymm7, %ymm7 # bufs[19,18,20,21,27,26,28,29] + vsubps %ymm5, %ymm4, %ymm1 + vsubps %ymm7, %ymm6, %ymm3 + vaddps %ymm5, %ymm4, %ymm0 # bufs[32,33,36,37,40,41,44,45] + vaddps %ymm7, %ymm6, %ymm2 # bufs[48,49,52,53,56,57,60,61] + vmulps %ymm1, %ymm9, %ymm1 # bufs[35,34,39,38,43,42,47,46] + vmulps %ymm3, %ymm9, %ymm3 # bufs[51,50,55,54,59,58,63,62] + + vpermilps $0xaa, %ymm8, %ymm8 # cos4[0,0,0,0,0,0,0,0] + + vshufps $0xd8, %ymm0, %ymm0, %ymm0 # bufs[32,36,33,37,40,44,41,45] + vshufps $0xd8, %ymm1, %ymm1, %ymm1 # bufs[35,39,34,38,43,47,42,46] + vshufps $0xd8, %ymm2, %ymm2, %ymm2 # bufs[48,52,49,53,56,60,57,61] + vshufps $0xd8, %ymm3, %ymm3, %ymm3 # bufs[51,55,50,54,59,63,58,62] + vunpcklps %ymm1, %ymm0, %ymm4 # bufs[32,35,36,39,40,43,44,47] + vunpckhps %ymm1, %ymm0, %ymm5 # bufs[33,34,37,38,41,42,45,46] + vunpcklps %ymm3, %ymm2, %ymm6 # bufs[48,51,52,55,56,59,60,63] + vunpckhps %ymm3, %ymm2, %ymm7 # bufs[49,50,53,54,57,58,61,62] + vsubps %ymm5, %ymm4, %ymm1 + vsubps %ymm7, %ymm6, %ymm3 + vaddps %ymm5, %ymm4, %ymm0 # bufs[0,2,4,6,8,10,12,14] + vaddps %ymm7, %ymm6, %ymm2 # bufs[16,18,20,22,24,26,28,30] + vmulps %ymm1, %ymm8, %ymm1 # bufs[1,3,5,7,9,11,13,15] + vmulps %ymm3, %ymm8, %ymm3 # bufs[17,19,21,23,25,27,29,31] + + vxorps %ymm8, %ymm8, %ymm8 + vblendps $0xaa, %ymm1, %ymm8, %ymm5 + vblendps $0xaa, %ymm3, %ymm8, %ymm6 + vaddps %ymm5, %ymm0, %ymm0 + vaddps %ymm6, %ymm2, %ymm2 + vunpcklps %ymm1, %ymm0, %ymm4 # bufs[0,1,2,3,8,9,10,11] + vunpckhps %ymm1, %ymm0, %ymm5 # bufs[4,5,6,7,12,13,14,15] + vunpcklps %ymm3, %ymm2, %ymm6 # bufs[16,17,18,19,24,25,26,27] + vunpckhps %ymm3, %ymm2, %ymm7 # bufs[20,21,22,23,28,29,30,31] + + vextractf128 $0x1, %ymm4, %xmm0 # bufs[8,9,10,11] + vextractf128 $0x1, %ymm5, %xmm1 # bufs[12,13,14,15] + vextractf128 $0x1, %ymm6, %xmm2 # bufs[24,25,26,27] + vextractf128 $0x1, %ymm7, %xmm3 # bufs[28,29,30,31] + + vshufps $0x1e, %xmm5, %xmm5, %xmm9 # bufs[6,7,5,4] + vshufps $0x1e, %xmm1, %xmm1, %xmm10 # bufs[14,15,13,12] + vshufps $0x1e, %xmm7, %xmm7, %xmm11 # bufs[22,23,21,20] + vshufps $0x1e, %xmm3, %xmm3, %xmm12 # bufs[30,31,29,28] + vblendps $0x7, %xmm9, %xmm8, %xmm9 # bufs[6,7,5,-] + vblendps $0x7, %xmm10, %xmm8, %xmm10 # bufs[14,15,13,-] + vblendps $0x7, %xmm11, %xmm8, %xmm11 # bufs[22,23,21,-] + vblendps $0x7, %xmm12, %xmm8, %xmm12 # bufs[30,31,29,-] + vaddps %xmm5, %xmm9, %xmm5 + vaddps %xmm1, %xmm10, %xmm1 + vaddps %xmm7, %xmm11, %xmm7 + vaddps %xmm3, %xmm12, %xmm3 + + prefetcht0 1024(out0) + + vshufps $0x1e, %xmm0, %xmm0, %xmm9 # bufs[10,11,9,8] + vshufps $0x1e, %xmm2, %xmm2, %xmm10 # bufs[26,27,25,24] + vaddps %xmm1, %xmm0, %xmm0 + vaddps %xmm3, %xmm2, %xmm2 + vblendps $0x7, %xmm9, %xmm8, %xmm9 # bufs[10,11,9,-] + vblendps $0x7, %xmm10, %xmm8, %xmm10 # bufs[26,27,25,-] + vaddps %xmm1, %xmm9, %xmm1 + vaddps %xmm3, %xmm10, %xmm3 + + vzeroupper + prefetcht0 1024(out1) + + addq $1024, out0 + movq $-128, %rax + movss %xmm4, (out0) + movss %xmm0, (out0,%rax,1) + movss %xmm5, (out0,%rax,2) + movss %xmm1, -128(out0,%rax,2) + leaq (out0,%rax,4), out0 + movhlps %xmm4, %xmm9 + movhlps %xmm0, %xmm10 + movhlps %xmm5, %xmm11 + movhlps %xmm1, %xmm12 + vmovss %xmm9, (out0) + vmovss %xmm10, (out0,%rax,1) + vmovss %xmm11, (out0,%rax,2) + vmovss %xmm12, -128(out0,%rax,2) + leaq (out0,%rax,4), out0 + negq %rax + shufps $0xb1, %xmm4, %xmm4 + shufps $0xb1, %xmm0, %xmm0 + shufps $0xb1, %xmm5, %xmm5 + shufps $0xb1, %xmm1, %xmm1 + movss %xmm4, (out0) + movss %xmm4, (out1) + leaq (out1,%rax,1), out1 + movss %xmm0, (out1) + movss %xmm5, (out1,%rax,1) + movss %xmm1, (out1,%rax,2) + leaq (out1,%rax,4), out1 + movhlps %xmm4, %xmm4 + movhlps %xmm0, %xmm0 + movhlps %xmm5, %xmm5 + movhlps %xmm1, %xmm1 + movss %xmm4, -128(out1) + movss %xmm0, (out1) + movss %xmm5, (out1,%rax,1) + movss %xmm1, (out1,%rax,2) + + leaq -64(out0,%rax,8), out0 + negq %rax + vshufps $0x1e, %xmm6, %xmm6, %xmm0 + vblendps $0x7, %xmm0, %xmm8, %xmm0 + addps %xmm2, %xmm6 + addps %xmm7, %xmm2 + addps %xmm3, %xmm7 + addps %xmm0, %xmm3 + movss %xmm6, (out0) + movss %xmm2, (out0,%rax,1) + movss %xmm7, (out0,%rax,2) + movss %xmm3, -128(out0,%rax,2) + leaq (out0,%rax,4), out0 + movhlps %xmm6, %xmm0 + movhlps %xmm2, %xmm1 + movhlps %xmm7, %xmm4 + movhlps %xmm3, %xmm5 + movss %xmm0, (out0) + movss %xmm1, (out0,%rax,1) + movss %xmm4, (out0,%rax,2) + movss %xmm5, -128(out0,%rax,2) + leaq 64(out1,%rax,4), out1 + negq %rax + shufps $0xb1, %xmm6, %xmm6 + shufps $0xb1, %xmm2, %xmm2 + shufps $0xb1, %xmm7, %xmm7 + shufps $0xb1, %xmm3, %xmm3 + movss %xmm6, -128(out1) + movss %xmm2, (out1) + movss %xmm7, (out1,%rax,1) + movss %xmm3, (out1,%rax,2) + leaq (out1,%rax,4), out1 + movhlps %xmm6, %xmm6 + movhlps %xmm2, %xmm2 + movhlps %xmm7, %xmm7 + movhlps %xmm3, %xmm3 + movss %xmm6, -128(out1) + movss %xmm2, (out1) + movss %xmm7, (out1,%rax,1) + movss %xmm3, (out1,%rax,2) + +#ifdef IS_MSABI + pop %rsi + pop %rdi + movaps (%rsp), %xmm6 + movaps 16(%rsp), %xmm7 + movaps 32(%rsp), %xmm8 + movaps 48(%rsp), %xmm9 + movaps 64(%rsp), %xmm10 + movaps 80(%rsp), %xmm11 + movaps 96(%rsp), %xmm12 + mov %rbp, %rsp + pop %rbp +#endif + ret + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/dct64_neon.S =================================================================== --- lib/3rdparty/libmpg123/dct64_neon.S (revision 0) +++ lib/3rdparty/libmpg123/dct64_neon.S (working copy) @@ -0,0 +1,308 @@ +/* + dct64_neon: ARM NEON optimized dct64 + + copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + + .code 32 +#ifndef __APPLE__ + .fpu neon +#endif + + .text + ALIGN16 +costab_arm: + .word 1056974725 + .word 1057056395 + .word 1057223771 + .word 1057485416 + .word 1057855544 + .word 1058356026 + .word 1059019886 + .word 1059897405 + .word 1061067246 + .word 1062657950 + .word 1064892987 + .word 1066774581 + .word 1069414683 + .word 1073984175 + .word 1079645762 + .word 1092815430 + .word 1057005197 + .word 1057342072 + .word 1058087743 + .word 1059427869 + .word 1061799040 + .word 1065862217 + .word 1071413542 + .word 1084439708 + .word 1057128951 + .word 1058664893 + .word 1063675095 + .word 1076102863 + .word 1057655764 + .word 1067924853 + .word 1060439283 + .word 1060439283 + ALIGN4 + .globl ASM_NAME(dct64_neon) +#ifdef __ELF__ + .type ASM_NAME(dct64_neon), %function +#endif +ASM_NAME(dct64_neon): + vpush {q4-q7} + + adr r3, costab_arm + vld1.32 {q0, q1}, [r2]! + vld1.32 {q2, q3}, [r2]! + vld1.32 {q4, q5}, [r2]! + vld1.32 {q6, q7}, [r2] + vld1.32 {q12, q13}, [r3, :128]! + vld1.32 {q14, q15}, [r3, :128]! + + vrev64.32 q4, q4 + vrev64.32 q5, q5 + vrev64.32 q6, q6 + vrev64.32 q7, q7 + vswp d8, d9 + vswp d10, d11 + vswp d12, d13 + vswp d14, d15 + + vsub.f32 q8, q0, q7 + vsub.f32 q9, q1, q6 + vsub.f32 q10, q2, q5 + vsub.f32 q11, q3, q4 + vadd.f32 q0, q0, q7 + vadd.f32 q1, q1, q6 + vadd.f32 q2, q2, q5 + vadd.f32 q3, q3, q4 + vmul.f32 q4, q8, q12 + vmul.f32 q5, q9, q13 + vmul.f32 q6, q10, q14 + vmul.f32 q7, q11, q15 + + vld1.32 {q12, q13}, [r3, :128]! + vld1.32 {q14, q15}, [r3, :128] + + vrev64.32 q2, q2 + vrev64.32 q3, q3 + vrev64.32 q6, q6 + vrev64.32 q7, q7 + vswp d4, d5 + vswp d6, d7 + vswp d12, d13 + vswp d14, d15 + + vsub.f32 q8, q0, q3 + vsub.f32 q9, q1, q2 + vsub.f32 q10, q4, q7 + vsub.f32 q11, q5, q6 + vadd.f32 q0, q0, q3 + vadd.f32 q1, q1, q2 + vadd.f32 q4, q4, q7 + vadd.f32 q5, q5, q6 + vmul.f32 q2, q8, q12 + vmul.f32 q3, q9, q13 + vmul.f32 q6, q10, q12 + vmul.f32 q7, q11, q13 + + vrev64.32 q1, q1 + vrev64.32 q3, q3 + vrev64.32 q5, q5 + vrev64.32 q7, q7 + vswp d2, d3 + vswp d6, d7 + vswp d10, d11 + vswp d14, d15 + + vsub.f32 q8, q0, q1 + vsub.f32 q9, q2, q3 + vsub.f32 q10, q4, q5 + vsub.f32 q11, q6, q7 + vadd.f32 q0, q0, q1 + vadd.f32 q2, q2, q3 + vadd.f32 q4, q4, q5 + vadd.f32 q6, q6, q7 + vmul.f32 q1, q8, q14 + vmul.f32 q3, q9, q14 + vmul.f32 q5, q10, q14 + vmul.f32 q7, q11, q14 + + vdup.32 q12, d31[0] + vmov d31, d30 + + vswp d1, d2 + vswp d5, d6 + vswp d9, d10 + vswp d13, d14 + vrev64.32 q1, q1 + vrev64.32 q3, q3 + vrev64.32 q5, q5 + vrev64.32 q7, q7 + + vsub.f32 q8, q0, q1 + vsub.f32 q9, q2, q3 + vsub.f32 q10, q4, q5 + vsub.f32 q11, q6, q7 + vadd.f32 q0, q0, q1 + vadd.f32 q2, q2, q3 + vadd.f32 q4, q4, q5 + vadd.f32 q6, q6, q7 + vmul.f32 q1, q8, q15 + vmul.f32 q3, q9, q15 + vmul.f32 q5, q10, q15 + vmul.f32 q7, q11, q15 + + vtrn.32 q0, q1 + vtrn.32 q2, q3 + vtrn.32 q4, q5 + vtrn.32 q6, q7 + + vsub.f32 q8, q0, q1 + vsub.f32 q9, q2, q3 + vsub.f32 q10, q4, q5 + vsub.f32 q11, q6, q7 + vadd.f32 q0, q0, q1 + vadd.f32 q2, q2, q3 + vadd.f32 q4, q4, q5 + vadd.f32 q6, q6, q7 + vmul.f32 q1, q8, q12 + vmul.f32 q3, q9, q12 + vmul.f32 q5, q10, q12 + vmul.f32 q7, q11, q12 + + vtrn.32 q0, q1 + vtrn.32 q2, q3 + vtrn.32 q4, q5 + vtrn.32 q6, q7 + vswp d1, d2 + vswp d5, d6 + vswp d9, d10 + vswp d13, d14 + + vshr.u64 d16, d1, #32 + vshr.u64 d17, d3, #32 + vshr.u64 d18, d5, #32 + vshr.u64 d19, d7, #32 + vadd.f32 d1, d1, d16 + vadd.f32 d3, d3, d17 + vadd.f32 d5, d5, d18 + vadd.f32 d7, d7, d19 + vshr.u64 d20, d9, #32 + vshr.u64 d21, d11, #32 + vshr.u64 d22, d13, #32 + vshr.u64 d23, d15, #32 + vadd.f32 d9, d9, d20 + vadd.f32 d11, d11, d21 + vadd.f32 d13, d13, d22 + vadd.f32 d15, d15, d23 + + vshr.u64 d16, d2, #32 + vshr.u64 d18, d6, #32 + vshr.u64 d20, d10, #32 + vshr.u64 d22, d14, #32 + vext.8 q8, q1, q8, #8 + vext.8 q9, q3, q9, #8 + vext.8 q10, q5, q10, #8 + vext.8 q11, q7, q11, #8 + vadd.f32 q1, q1, q8 + vadd.f32 q3, q3, q9 + vadd.f32 q5, q5, q10 + vadd.f32 q7, q7, q11 + + vshr.u64 d16, d4, #32 + vshr.u64 d18, d12, #32 + vext.8 q8, q2, q8, #8 + vext.8 q9, q6, q9, #8 + vadd.f32 q2, q2, q3 + vadd.f32 q6, q6, q7 + vadd.f32 q3, q3, q8 + vadd.f32 q7, q7, q9 + + vrev64.32 q8, q4 + vshr.u64 d19, d9, #32 + vext.8 d17, d17, d16, #4 + vswp d9, d10 + vswp d13, d14 + vtrn.32 q4, q5 + vtrn.32 q6, q7 + vmov d16, d9 + vmov d18, d11 + + vadd.f32 q4, q6 + vadd.f32 q5, q7 + vadd.f32 q6, q8 + vadd.f32 q7, q9 + + vmov.i32 q8, #0x4b000000 + vorr.i32 q8, #0x00400000 + vadd.f32 q0, q0, q8 + vadd.f32 q1, q1, q8 + vadd.f32 q2, q2, q8 + vadd.f32 q3, q3, q8 + vadd.f32 q4, q4, q8 + vadd.f32 q5, q5, q8 + vadd.f32 q6, q6, q8 + vadd.f32 q7, q7, q8 + vshl.i32 q0, q0, #10 + vshl.i32 q1, q1, #10 + vshl.i32 q2, q2, #10 + vshl.i32 q3, q3, #10 + vshl.i32 q4, q4, #10 + vshl.i32 q5, q5, #10 + vshl.i32 q6, q6, #10 + vshl.i32 q7, q7, #10 + vqshrn.s32 d0, q0, #10 + vqshrn.s32 d2, q1, #10 + vqshrn.s32 d4, q2, #10 + vqshrn.s32 d6, q3, #10 + vqshrn.s32 d8, q4, #10 + vqshrn.s32 d10, q5, #10 + vqshrn.s32 d12, q6, #10 + vqshrn.s32 d14, q7, #10 + + mov r3, #32 + vst1.16 {d0[1]}, [r0, :16], r3 + vst1.16 {d12[3]}, [r0, :16], r3 + vst1.16 {d6[2]}, [r0, :16], r3 + vst1.16 {d8[3]}, [r0, :16], r3 + vst1.16 {d2[2]}, [r0, :16], r3 + vst1.16 {d12[1]}, [r0, :16], r3 + vst1.16 {d4[2]}, [r0, :16], r3 + vst1.16 {d8[1]}, [r0, :16], r3 + vst1.16 {d0[2]}, [r0, :16], r3 + vst1.16 {d12[2]}, [r0, :16], r3 + vst1.16 {d6[0]}, [r0, :16], r3 + vst1.16 {d8[2]}, [r0, :16], r3 + vst1.16 {d2[0]}, [r0, :16], r3 + vst1.16 {d12[0]}, [r0, :16], r3 + vst1.16 {d4[0]}, [r0, :16], r3 + vst1.16 {d8[0]}, [r0, :16], r3 + vst1.16 {d0[0]}, [r0, :16] + + vst1.16 {d0[1]}, [r1, :16], r3 + vst1.16 {d10[0]}, [r1, :16], r3 + vst1.16 {d4[1]}, [r1, :16], r3 + vst1.16 {d14[0]}, [r1, :16], r3 + vst1.16 {d2[1]}, [r1, :16], r3 + vst1.16 {d10[2]}, [r1, :16], r3 + vst1.16 {d6[1]}, [r1, :16], r3 + vst1.16 {d14[2]}, [r1, :16], r3 + vst1.16 {d0[3]}, [r1, :16], r3 + vst1.16 {d10[1]}, [r1, :16], r3 + vst1.16 {d4[3]}, [r1, :16], r3 + vst1.16 {d14[1]}, [r1, :16], r3 + vst1.16 {d2[3]}, [r1, :16], r3 + vst1.16 {d10[3]}, [r1, :16], r3 + vst1.16 {d6[3]}, [r1, :16], r3 + vst1.16 {d14[3]}, [r1, :16] + + vpop {q4-q7} + bx lr + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/dct64_neon_float.S =================================================================== --- lib/3rdparty/libmpg123/dct64_neon_float.S (revision 0) +++ lib/3rdparty/libmpg123/dct64_neon_float.S (working copy) @@ -0,0 +1,281 @@ +/* + dct64_neon_float: ARM NEON optimized dct64 (float output version) + + copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + + .code 32 +#ifndef __APPLE__ + .fpu neon +#endif + + .text + ALIGN16 +costab_arm: + .word 1056974725 + .word 1057056395 + .word 1057223771 + .word 1057485416 + .word 1057855544 + .word 1058356026 + .word 1059019886 + .word 1059897405 + .word 1061067246 + .word 1062657950 + .word 1064892987 + .word 1066774581 + .word 1069414683 + .word 1073984175 + .word 1079645762 + .word 1092815430 + .word 1057005197 + .word 1057342072 + .word 1058087743 + .word 1059427869 + .word 1061799040 + .word 1065862217 + .word 1071413542 + .word 1084439708 + .word 1057128951 + .word 1058664893 + .word 1063675095 + .word 1076102863 + .word 1057655764 + .word 1067924853 + .word 1060439283 + .word 1060439283 + ALIGN4 + .globl ASM_NAME(dct64_real_neon) +#ifdef __ELF__ + .type ASM_NAME(dct64_real_neon), %function +#endif +ASM_NAME(dct64_real_neon): + vpush {q4-q7} + + adr r3, costab_arm + vld1.32 {q0, q1}, [r2]! + vld1.32 {q2, q3}, [r2]! + vld1.32 {q4, q5}, [r2]! + vld1.32 {q6, q7}, [r2] + vld1.32 {q12, q13}, [r3, :128]! + vld1.32 {q14, q15}, [r3, :128]! + + vrev64.32 q4, q4 + vrev64.32 q5, q5 + vrev64.32 q6, q6 + vrev64.32 q7, q7 + vswp d8, d9 + vswp d10, d11 + vswp d12, d13 + vswp d14, d15 + + vsub.f32 q8, q0, q7 + vsub.f32 q9, q1, q6 + vsub.f32 q10, q2, q5 + vsub.f32 q11, q3, q4 + vadd.f32 q0, q0, q7 + vadd.f32 q1, q1, q6 + vadd.f32 q2, q2, q5 + vadd.f32 q3, q3, q4 + vmul.f32 q4, q8, q12 + vmul.f32 q5, q9, q13 + vmul.f32 q6, q10, q14 + vmul.f32 q7, q11, q15 + + vld1.32 {q12, q13}, [r3, :128]! + vld1.32 {q14, q15}, [r3, :128] + + vrev64.32 q2, q2 + vrev64.32 q3, q3 + vrev64.32 q6, q6 + vrev64.32 q7, q7 + vswp d4, d5 + vswp d6, d7 + vswp d12, d13 + vswp d14, d15 + + vsub.f32 q8, q0, q3 + vsub.f32 q9, q1, q2 + vsub.f32 q10, q4, q7 + vsub.f32 q11, q5, q6 + vadd.f32 q0, q0, q3 + vadd.f32 q1, q1, q2 + vadd.f32 q4, q4, q7 + vadd.f32 q5, q5, q6 + vmul.f32 q2, q8, q12 + vmul.f32 q3, q9, q13 + vmul.f32 q6, q10, q12 + vmul.f32 q7, q11, q13 + + vrev64.32 q1, q1 + vrev64.32 q3, q3 + vrev64.32 q5, q5 + vrev64.32 q7, q7 + vswp d2, d3 + vswp d6, d7 + vswp d10, d11 + vswp d14, d15 + + vsub.f32 q8, q0, q1 + vsub.f32 q9, q2, q3 + vsub.f32 q10, q4, q5 + vsub.f32 q11, q6, q7 + vadd.f32 q0, q0, q1 + vadd.f32 q2, q2, q3 + vadd.f32 q4, q4, q5 + vadd.f32 q6, q6, q7 + vmul.f32 q1, q8, q14 + vmul.f32 q3, q9, q14 + vmul.f32 q5, q10, q14 + vmul.f32 q7, q11, q14 + + vdup.32 q12, d31[0] + vmov d31, d30 + + vswp d1, d2 + vswp d5, d6 + vswp d9, d10 + vswp d13, d14 + vrev64.32 q1, q1 + vrev64.32 q3, q3 + vrev64.32 q5, q5 + vrev64.32 q7, q7 + + vsub.f32 q8, q0, q1 + vsub.f32 q9, q2, q3 + vsub.f32 q10, q4, q5 + vsub.f32 q11, q6, q7 + vadd.f32 q0, q0, q1 + vadd.f32 q2, q2, q3 + vadd.f32 q4, q4, q5 + vadd.f32 q6, q6, q7 + vmul.f32 q1, q8, q15 + vmul.f32 q3, q9, q15 + vmul.f32 q5, q10, q15 + vmul.f32 q7, q11, q15 + + vtrn.32 q0, q1 + vtrn.32 q2, q3 + vtrn.32 q4, q5 + vtrn.32 q6, q7 + + vsub.f32 q8, q0, q1 + vsub.f32 q9, q2, q3 + vsub.f32 q10, q4, q5 + vsub.f32 q11, q6, q7 + vadd.f32 q0, q0, q1 + vadd.f32 q2, q2, q3 + vadd.f32 q4, q4, q5 + vadd.f32 q6, q6, q7 + vmul.f32 q1, q8, q12 + vmul.f32 q3, q9, q12 + vmul.f32 q5, q10, q12 + vmul.f32 q7, q11, q12 + + vtrn.32 q0, q1 + vtrn.32 q2, q3 + vtrn.32 q4, q5 + vtrn.32 q6, q7 + vswp d1, d2 + vswp d5, d6 + vswp d9, d10 + vswp d13, d14 + + vshr.u64 d16, d1, #32 + vshr.u64 d17, d3, #32 + vshr.u64 d18, d5, #32 + vshr.u64 d19, d7, #32 + vadd.f32 d1, d1, d16 + vadd.f32 d3, d3, d17 + vadd.f32 d5, d5, d18 + vadd.f32 d7, d7, d19 + vshr.u64 d20, d9, #32 + vshr.u64 d21, d11, #32 + vshr.u64 d22, d13, #32 + vshr.u64 d23, d15, #32 + vadd.f32 d9, d9, d20 + vadd.f32 d11, d11, d21 + vadd.f32 d13, d13, d22 + vadd.f32 d15, d15, d23 + + vshr.u64 d16, d2, #32 + vshr.u64 d18, d6, #32 + vshr.u64 d20, d10, #32 + vshr.u64 d22, d14, #32 + vext.8 q8, q1, q8, #8 + vext.8 q9, q3, q9, #8 + vext.8 q10, q5, q10, #8 + vext.8 q11, q7, q11, #8 + vadd.f32 q1, q1, q8 + vadd.f32 q3, q3, q9 + vadd.f32 q5, q5, q10 + vadd.f32 q7, q7, q11 + + vshr.u64 d16, d4, #32 + vshr.u64 d18, d12, #32 + vext.8 q8, q2, q8, #8 + vext.8 q9, q6, q9, #8 + vadd.f32 q2, q2, q3 + vadd.f32 q6, q6, q7 + vadd.f32 q3, q3, q8 + vadd.f32 q7, q7, q9 + + vrev64.32 q8, q4 + vshr.u64 d19, d9, #32 + vext.8 d17, d17, d16, #4 + vswp d9, d10 + vswp d13, d14 + vtrn.32 q4, q5 + vtrn.32 q6, q7 + vmov d16, d9 + vmov d18, d11 + + vadd.f32 q4, q6 + vadd.f32 q5, q7 + vadd.f32 q6, q8 + vadd.f32 q7, q9 + + mov r3, #64 + vst1.32 {d0[1]}, [r0, :32], r3 + vst1.32 {d13[1]}, [r0, :32], r3 + vst1.32 {d7[0]}, [r0, :32], r3 + vst1.32 {d9[1]}, [r0, :32], r3 + vst1.32 {d3[0]}, [r0, :32], r3 + vst1.32 {d12[1]}, [r0, :32], r3 + vst1.32 {d5[0]}, [r0, :32], r3 + vst1.32 {d8[1]}, [r0, :32], r3 + vst1.32 {d1[0]}, [r0, :32], r3 + vst1.32 {d13[0]}, [r0, :32], r3 + vst1.32 {d6[0]}, [r0, :32], r3 + vst1.32 {d9[0]}, [r0, :32], r3 + vst1.32 {d2[0]}, [r0, :32], r3 + vst1.32 {d12[0]}, [r0, :32], r3 + vst1.32 {d4[0]}, [r0, :32], r3 + vst1.32 {d8[0]}, [r0, :32], r3 + vst1.32 {d0[0]}, [r0, :32] + + vst1.32 {d0[1]}, [r1, :32], r3 + vst1.32 {d10[0]}, [r1, :32], r3 + vst1.32 {d4[1]}, [r1, :32], r3 + vst1.32 {d14[0]}, [r1, :32], r3 + vst1.32 {d2[1]}, [r1, :32], r3 + vst1.32 {d11[0]}, [r1, :32], r3 + vst1.32 {d6[1]}, [r1, :32], r3 + vst1.32 {d15[0]}, [r1, :32], r3 + vst1.32 {d1[1]}, [r1, :32], r3 + vst1.32 {d10[1]}, [r1, :32], r3 + vst1.32 {d5[1]}, [r1, :32], r3 + vst1.32 {d14[1]}, [r1, :32], r3 + vst1.32 {d3[1]}, [r1, :32], r3 + vst1.32 {d11[1]}, [r1, :32], r3 + vst1.32 {d7[1]}, [r1, :32], r3 + vst1.32 {d15[1]}, [r1, :32] + + vpop {q4-q7} + bx lr + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/dct64_x86_64.S =================================================================== --- lib/3rdparty/libmpg123/dct64_x86_64.S (revision 62563) +++ lib/3rdparty/libmpg123/dct64_x86_64.S (working copy) @@ -8,7 +8,7 @@ #include "mangle.h" -#ifdef _WIN64 +#ifdef IS_MSABI /* short *out0 */ #define ARG0 %r9 /* short *out1 */ @@ -71,7 +71,7 @@ ALIGN16 .globl ASM_NAME(dct64_x86_64) ASM_NAME(dct64_x86_64): -#ifdef _WIN64 /* should save xmm6-15 */ +#ifdef IS_MSABI /* should save xmm6-15 */ movq %rcx, ARG0 subq $168, %rsp /* stack alignment + 10 xmm registers */ movaps %xmm6, (%rsp) @@ -446,7 +446,7 @@ movw %ax, 416(ARG1) movw %cx, 480(ARG1) -#ifdef _WIN64 +#ifdef IS_MSABI movaps (%rsp), %xmm6 movaps 16(%rsp), %xmm7 movaps 32(%rsp), %xmm8 Index: lib/3rdparty/libmpg123/dct64_x86_64_float.S =================================================================== --- lib/3rdparty/libmpg123/dct64_x86_64_float.S (revision 62563) +++ lib/3rdparty/libmpg123/dct64_x86_64_float.S (working copy) @@ -8,7 +8,7 @@ #include "mangle.h" -#ifdef _WIN64 +#ifdef IS_MSABI /* short *out0 */ #define ARG0 %r9 /* short *out1 */ @@ -71,7 +71,7 @@ ALIGN16 .globl ASM_NAME(dct64_real_x86_64) ASM_NAME(dct64_real_x86_64): -#ifdef _WIN64 /* should save xmm6-15 */ +#ifdef IS_MSABI /* should save xmm6-15 */ movq %rcx, ARG0 subq $168, %rsp /* stack alignment + 10 xmm registers */ movaps %xmm6, (%rsp) @@ -408,7 +408,7 @@ movss %xmm2, 832(ARG1) movss %xmm3, 960(ARG1) -#ifdef _WIN64 +#ifdef IS_MSABI movaps (%rsp), %xmm6 movaps 16(%rsp), %xmm7 movaps 32(%rsp), %xmm8 Index: lib/3rdparty/libmpg123/dither.c =================================================================== --- lib/3rdparty/libmpg123/dither.c (revision 62563) +++ lib/3rdparty/libmpg123/dither.c (working copy) @@ -10,7 +10,7 @@ #include "compat.h" #include "dither.h" -const uint32_t init_seed = 2463534242UL; +static const uint32_t init_seed = 2463534242UL; #define LAP 100 Index: lib/3rdparty/libmpg123/format.c =================================================================== --- lib/3rdparty/libmpg123/format.c (revision 62563) +++ lib/3rdparty/libmpg123/format.c (working copy) @@ -1,9 +1,28 @@ /* format:routines to deal with audio (output) format - copyright 2008-9 by the mpg123 project - free software under the terms of the LGPL 2.1 + copyright 2008-14 by the mpg123 project - free software under the terms of the LGPL 2.1 see COPYING and AUTHORS files in distribution or http://mpg123.org initially written by Thomas Orgis, starting with parts of the old audio.c, with only faintly manage to show now + + A Major change from mpg123 <= 1.18 is that all encodings are only really + disabled when done so via specific build configuration. Otherwise, the + missing support of decoders to produce a certain format is augmented by + postprocessing that converts the samples. This means happily creating + data with higher resolution from less accurate decoder output. + + The main point is to still offer float encoding when the decoding core uses + a fixed point representation that has only 16 bit output. Actually, that's + the only point: A fixed-point build needs to create float from 16 bit, also + 32 or 24 bit from the same source. That's all there is to it: Everything else + is covered by fallback synth functions. It may be a further step to check if + there are cases where conversion in postprocessing works well enough to omit + a certain specialized decoder ... but usually, they are justified by some + special way to get from float to integer to begin with. + + I won't cover the case of faking double output with float/s16 decoders here. + Double precision output is a thing for experimental builds anyway. Mostly + theoretical and without a point. */ #include "mpg123lib_intern.h" @@ -23,8 +42,12 @@ MPG123_ENC_UNSIGNED_16, MPG123_ENC_SIGNED_32, MPG123_ENC_UNSIGNED_32, + MPG123_ENC_SIGNED_24, + MPG123_ENC_UNSIGNED_24, + /* Floating point range, see below. */ MPG123_ENC_FLOAT_32, MPG123_ENC_FLOAT_64, + /* 8 bit range, see below. */ MPG123_ENC_SIGNED_8, MPG123_ENC_UNSIGNED_8, MPG123_ENC_ULAW_8, @@ -31,11 +54,25 @@ MPG123_ENC_ALAW_8 }; -/* Only one type of float is supported. */ -# ifdef REAL_IS_FLOAT +/* Make that match the above table. + And yes, I still don't like this kludgy stuff. */ +/* range[0] <= i < range[1] for forced floating point */ +static const int enc_float_range[2] = { 6, 8 }; +/* same for 8 bit encodings */ +static const int enc_8bit_range[2] = { 8, 12 }; + +/* + Only one type of float is supported. + Actually, double is a very special experimental case not occuring in normal + builds. Might actually get rid of it. + + Remember here: Also with REAL_IS_FIXED, I want to be able to produce float + output (f32) via post-processing. +*/ +# ifdef REAL_IS_DOUBLE +# define MPG123_FLOAT_ENC MPG123_ENC_FLOAT_64 +# else # define MPG123_FLOAT_ENC MPG123_ENC_FLOAT_32 -# else -# define MPG123_FLOAT_ENC MPG123_ENC_FLOAT_64 # endif /* The list of actually possible encodings. */ @@ -48,6 +85,8 @@ #ifndef NO_32BIT MPG123_ENC_SIGNED_32, MPG123_ENC_UNSIGNED_32, + MPG123_ENC_SIGNED_24, + MPG123_ENC_UNSIGNED_24, #endif #ifndef NO_REAL MPG123_FLOAT_ENC, @@ -84,6 +123,22 @@ if(number != NULL) *number = sizeof(good_encodings)/sizeof(int); } +int attribute_align_arg mpg123_encsize(int encoding) +{ + if(encoding & MPG123_ENC_8) + return 1; + else if(encoding & MPG123_ENC_16) + return 2; + else if(encoding & MPG123_ENC_24) + return 3; + else if(encoding & MPG123_ENC_32 || encoding == MPG123_ENC_FLOAT_32) + return 4; + else if(encoding == MPG123_ENC_FLOAT_64) + return 8; + else + return 0; +} + /* char audio_caps[NUM_CHANNELS][MPG123_RATES+1][MPG123_ENCODINGS]; */ static int rate2num(mpg123_pars *mp, long r) @@ -126,14 +181,18 @@ { nf->rate = frame_freq(fr)>>fr->p.down_sample; if(cap_fit(fr,nf,f0,f2)) return 1; - nf->rate>>=1; - if(cap_fit(fr,nf,f0,f2)) return 1; - nf->rate>>=1; - if(cap_fit(fr,nf,f0,f2)) return 1; + if(fr->p.flags & MPG123_AUTO_RESAMPLE) + { + nf->rate>>=1; + if(cap_fit(fr,nf,f0,f2)) return 1; + nf->rate>>=1; + if(cap_fit(fr,nf,f0,f2)) return 1; + } #ifndef NO_NTOM /* If nothing worked, try the other rates, only without constrains from user. In case you didn't guess: We enable flexible resampling if we find a working rate. */ - if(!fr->p.force_rate && fr->p.down_sample == 0) + if( fr->p.flags & MPG123_AUTO_RESAMPLE && + !fr->p.force_rate && fr->p.down_sample == 0) { int i; int c = nf->channels-1; @@ -176,13 +235,13 @@ /* All this forcing should be removed in favour of the capabilities table... */ if(p->flags & MPG123_FORCE_8BIT) { - f0 = 6; - f2 = 10; + f0 = enc_8bit_range[0]; + f2 = enc_8bit_range[1]; } if(p->flags & MPG123_FORCE_FLOAT) { - f0 = 4; - f2 = 6; + f0 = enc_float_range[0]; + f2 = enc_float_range[1]; } /* force stereo is stronger */ @@ -254,15 +313,8 @@ fr->af.channels = nf.channels; fr->af.encoding = nf.encoding; /* Cache the size of one sample in bytes, for ease of use. */ - if(fr->af.encoding & MPG123_ENC_8) - fr->af.encsize = 1; - else if(fr->af.encoding & MPG123_ENC_16) - fr->af.encsize = 2; - else if(fr->af.encoding & MPG123_ENC_32 || fr->af.encoding == MPG123_ENC_FLOAT_32) - fr->af.encsize = 4; - else if(fr->af.encoding == MPG123_ENC_FLOAT_64) - fr->af.encsize = 8; - else + fr->af.encsize = mpg123_encsize(fr->af.encoding); + if(fr->af.encsize < 1) { if(NOQUIET) error1("Some unknown encoding??? (%i)", fr->af.encoding); @@ -269,6 +321,36 @@ fr->err = MPG123_BAD_OUTFORMAT; return -1; } + /* Set up the decoder synth format. Might differ. */ +#ifdef NO_SYNTH32 + /* Without high-precision synths, 16 bit signed is the basis for + everything higher than 8 bit. */ + if(fr->af.encsize > 2) + fr->af.dec_enc = MPG123_ENC_SIGNED_16; + else + { +#endif + switch(fr->af.encoding) + { +#ifndef NO_32BIT + case MPG123_ENC_SIGNED_24: + case MPG123_ENC_UNSIGNED_24: + case MPG123_ENC_UNSIGNED_32: + fr->af.dec_enc = MPG123_ENC_SIGNED_32; + break; +#endif +#ifndef NO_16BIT + case MPG123_ENC_UNSIGNED_16: + fr->af.dec_enc = MPG123_ENC_SIGNED_16; + break; +#endif + default: + fr->af.dec_enc = fr->af.encoding; + } +#ifdef NO_SYNTH32 + } +#endif + fr->af.dec_encsize = mpg123_encsize(fr->af.dec_enc); return 1; } } @@ -383,6 +465,13 @@ af->channels = 0; } +/* Number of bytes the decoder produces. */ +off_t decoder_synth_bytes(mpg123_handle *fr, off_t s) +{ + return s * fr->af.dec_encsize * fr->af.channels; +} + +/* Samples/bytes for output buffer after post-processing. */ /* take into account: channels, bytes per sample -- NOT resampling!*/ off_t samples_to_bytes(mpg123_handle *fr , off_t s) { @@ -393,3 +482,213 @@ { return b / fr->af.encsize / fr->af.channels; } + +/* Number of bytes needed for decoding _and_ post-processing. */ +off_t outblock_bytes(mpg123_handle *fr, off_t s) +{ + int encsize = (fr->af.encoding & MPG123_ENC_24) + ? 4 /* Intermediate 32 bit. */ + : (fr->af.encsize > fr->af.dec_encsize + ? fr->af.encsize + : fr->af.dec_encsize); + return s * encsize * fr->af.channels; +} + +#ifndef NO_32BIT +/* Remove every fourth byte, facilitating conversion from 32 bit to 24 bit integers. + This has to be aware of endianness, of course. */ +static void chop_fourth_byte(struct outbuffer *buf) +{ + unsigned char *wpos = buf->data; + unsigned char *rpos = buf->data; +#ifdef WORDS_BIGENDIAN + while((size_t) (rpos - buf->data + 4) <= buf->fill) + { + /* Really stupid: Copy, increment. Byte per byte. */ + *wpos = *rpos; + wpos++; rpos++; + *wpos = *rpos; + wpos++; rpos++; + *wpos = *rpos; + wpos++; rpos++; + rpos++; /* Skip the lowest byte (last). */ + } +#else + while((size_t) (rpos - buf->data + 4) <= buf->fill) + { + /* Really stupid: Copy, increment. Byte per byte. */ + rpos++; /* Skip the lowest byte (first). */ + *wpos = *rpos; + wpos++; rpos++; + *wpos = *rpos; + wpos++; rpos++; + *wpos = *rpos; + wpos++; rpos++; + } +#endif + buf->fill = wpos-buf->data; +} + +static void conv_s32_to_u32(struct outbuffer *buf) +{ + size_t i; + int32_t *ssamples = (int32_t*) buf->data; + uint32_t *usamples = (uint32_t*) buf->data; + size_t count = buf->fill/sizeof(int32_t); + + for(i=0; i= 0) + usamples[i] = (uint32_t)ssamples[i] + 2147483647+1; + /* The smallest value goes zero. */ + else if(ssamples[i] == ((int32_t)-2147483647-1)) + usamples[i] = 0; + /* Now -value is in the positive range of signed int ... so it's a possible value at all. */ + else + usamples[i] = (uint32_t)2147483647+1 - (uint32_t)(-ssamples[i]); + } +} + +#endif + + +/* We always assume that whole numbers are written! + partials will be cut out. */ + +static const char *bufsizeerr = "Fatal: Buffer too small for postprocessing!"; + + +#ifndef NO_16BIT + +static void conv_s16_to_u16(struct outbuffer *buf) +{ + size_t i; + int16_t *ssamples = (int16_t*) buf->data; + uint16_t *usamples = (uint16_t*)buf->data; + size_t count = buf->fill/sizeof(int16_t); + + for(i=0; idata; + float *out = (float*) buf->data; + size_t count = buf->fill/sizeof(int16_t); + /* Does that make any sense? In x86, there is an actual instruction to divide + float by integer ... but then, if we have that FPU, we don't really need + fixed point decoder hacks ...? */ + float scale = 1./SHORT_SCALE; + + if(buf->size < count*sizeof(float)) + { + error1("%s", bufsizeerr); + return; + } + + /* Work from the back since output is bigger. */ + for(i=count-1; i>=0; --i) + out[i] = (float)in[i] * scale; + + buf->fill = count*sizeof(float); +} +#endif + +#ifndef NO_32BIT +static void conv_s16_to_s32(struct outbuffer *buf) +{ + ssize_t i; + int16_t *in = (int16_t*) buf->data; + int32_t *out = (int32_t*) buf->data; + size_t count = buf->fill/sizeof(int16_t); + + if(buf->size < count*sizeof(int32_t)) + { + error1("%s", bufsizeerr); + return; + } + + /* Work from the back since output is bigger. */ + for(i=count-1; i>=0; --i) + { + out[i] = in[i]; + /* Could just shift bytes, but would have to mess with sign bit. */ + out[i] *= S32_RESCALE; + } + + buf->fill = count*sizeof(int32_t); +} +#endif +#endif + + +void postprocess_buffer(mpg123_handle *fr) +{ + /* + This caters for the final output formats that are never produced by + decoder synth directly (wide unsigned and 24 bit formats) or that are + missing because of limited decoder precision (16 bit synth but 32 or + 24 bit output). + */ + switch(fr->af.dec_enc) + { +#ifndef NO_32BIT + case MPG123_ENC_SIGNED_32: + switch(fr->af.encoding) + { + case MPG123_ENC_UNSIGNED_32: + conv_s32_to_u32(&fr->buffer); + break; + case MPG123_ENC_UNSIGNED_24: + conv_s32_to_u32(&fr->buffer); + chop_fourth_byte(&fr->buffer); + break; + case MPG123_ENC_SIGNED_24: + chop_fourth_byte(&fr->buffer); + break; + } + break; +#endif +#ifndef NO_16BIT + case MPG123_ENC_SIGNED_16: + switch(fr->af.encoding) + { + case MPG123_ENC_UNSIGNED_16: + conv_s16_to_u16(&fr->buffer); + break; +#ifndef NO_REAL + case MPG123_ENC_FLOAT_32: + conv_s16_to_f32(&fr->buffer); + break; +#endif +#ifndef NO_32BIT + case MPG123_ENC_SIGNED_32: + conv_s16_to_s32(&fr->buffer); + break; + case MPG123_ENC_UNSIGNED_32: + conv_s16_to_s32(&fr->buffer); + conv_s32_to_u32(&fr->buffer); + break; + case MPG123_ENC_UNSIGNED_24: + conv_s16_to_s32(&fr->buffer); + conv_s32_to_u32(&fr->buffer); + chop_fourth_byte(&fr->buffer); + break; + case MPG123_ENC_SIGNED_24: + conv_s16_to_s32(&fr->buffer); + chop_fourth_byte(&fr->buffer); + break; +#endif + } + break; +#endif + } +} Index: lib/3rdparty/libmpg123/frame.c =================================================================== --- lib/3rdparty/libmpg123/frame.c (revision 62563) +++ lib/3rdparty/libmpg123/frame.c (working copy) @@ -1,7 +1,7 @@ /* frame: Heap of routines dealing with the core mpg123 data structure. - copyright 2008-2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + copyright 2008-2014 by the mpg123 project - free software under the terms of the LGPL 2.1 see COPYING and AUTHORS files in distribution or http://mpg123.org initially written by Thomas Orgis */ @@ -33,14 +33,14 @@ else return base; } -void frame_default_pars(mpg123_pars *mp) +static void frame_default_pars(mpg123_pars *mp) { mp->outscale = 1.0; + mp->flags = 0; #ifdef GAPLESS - mp->flags = MPG123_GAPLESS; -#else - mp->flags = 0; + mp->flags |= MPG123_GAPLESS; #endif + mp->flags |= MPG123_AUTO_RESAMPLE; #ifndef NO_NTOM mp->force_rate = 0; #endif @@ -59,6 +59,11 @@ #endif mp->preframes = 4; /* That's good for layer 3 ISO compliance bitstream. */ mpg123_fmt_all(mp); + /* Default of keeping some 4K buffers at hand, should cover the "usual" use case (using 16K pipe buffers as role model). */ +#ifndef NO_FEEDER + mp->feedpool = 5; + mp->feedbuffer = 4096; +#endif } void frame_init(mpg123_handle *fr) @@ -68,8 +73,11 @@ void frame_init_par(mpg123_handle *fr, mpg123_pars *mp) { - fr->own_buffer = FALSE; + fr->own_buffer = TRUE; fr->buffer.data = NULL; + fr->buffer.rdata = NULL; + fr->buffer.fill = 0; + fr->buffer.size = 0; fr->rawbuffs = NULL; fr->rawbuffss = 0; fr->rawdecwin = NULL; @@ -111,6 +119,10 @@ if(mp == NULL) frame_default_pars(&fr->p); else memcpy(&fr->p, mp, sizeof(struct mpg123_pars_struct)); +#ifndef NO_FEEDER + bc_prepare(&fr->rdat.buffer, fr->p.feedpool, fr->p.feedbuffer); +#endif + fr->down_sample = 0; /* Initialize to silence harmless errors when debugging. */ frame_fixed_reset(fr); /* Reset only the fixed data, dynamic buffers are not there yet! */ fr->synth = NULL; @@ -163,34 +175,51 @@ int frame_outbuffer(mpg123_handle *fr) { - size_t size = mpg123_safe_buffer()*AUDIOBUFSIZE; - if(!fr->own_buffer) fr->buffer.data = NULL; - if(fr->buffer.data != NULL && fr->buffer.size != size) + size_t size = fr->outblock; + if(!fr->own_buffer) { - free(fr->buffer.data); - fr->buffer.data = NULL; + if(fr->buffer.size < size) + { + fr->err = MPG123_BAD_BUFFER; + if(NOQUIET) error2("have external buffer of size %"SIZE_P", need %"SIZE_P, (size_p)fr->buffer.size, (size_p)size); + + return MPG123_ERR; + } } + + debug1("need frame buffer of %"SIZE_P, (size_p)size); + if(fr->buffer.rdata != NULL && fr->buffer.size != size) + { + free(fr->buffer.rdata); + fr->buffer.rdata = NULL; + } fr->buffer.size = size; - if(fr->buffer.data == NULL) fr->buffer.data = (unsigned char*) malloc(fr->buffer.size); - if(fr->buffer.data == NULL) + fr->buffer.data = NULL; + /* be generous: use 16 byte alignment */ + if(fr->buffer.rdata == NULL) fr->buffer.rdata = (unsigned char*) malloc(fr->buffer.size+15); + if(fr->buffer.rdata == NULL) { fr->err = MPG123_OUT_OF_MEM; - return -1; + return MPG123_ERR; } + fr->buffer.data = aligned_pointer(fr->buffer.rdata, unsigned char*, 16); fr->own_buffer = TRUE; fr->buffer.fill = 0; - return 0; + return MPG123_OK; } int attribute_align_arg mpg123_replace_buffer(mpg123_handle *mh, unsigned char *data, size_t size) { - if(data == NULL || size < mpg123_safe_buffer()) + debug2("replace buffer with %p size %"SIZE_P, data, (size_p)size); + /* Will accept any size, the error comes later... */ + if(data == NULL) { mh->err = MPG123_BAD_BUFFER; return MPG123_ERR; } - if(mh->own_buffer && mh->buffer.data != NULL) free(mh->buffer.data); + if(mh->buffer.rdata != NULL) free(mh->buffer.rdata); mh->own_buffer = FALSE; + mh->buffer.rdata = NULL; mh->buffer.data = data; mh->buffer.size = size; mh->buffer.fill = 0; @@ -313,8 +342,9 @@ #endif #endif #if defined(OPT_ALTIVEC) || defined(OPT_ARM) - if(decwin_size < (512+32)*4) decwin_size = (512+32)*4; - decwin_size += 512*4; + /* sizeof(real) >= 4 ... yes, it could be 8, for example. + We got it intialized to at least (512+32)*sizeof(real).*/ + decwin_size += 512*sizeof(real); #endif /* Hm, that's basically realloc() ... */ if(fr->rawdecwin != NULL && fr->rawdecwins != decwin_size) @@ -406,7 +436,7 @@ /* Wondering: could it be actually _wanted_ to retain buffer contents over different files? (special gapless / cut stuff) */ fr->bsbuf = fr->bsspace[1]; fr->bsbufold = fr->bsbuf; - fr->bitreservoir = 0; /* Not entirely sure if this is the right place for that counter. */ + fr->bitreservoir = 0; frame_decode_buffers_reset(fr); memset(fr->bsspace, 0, 2*(MAXFRAMESIZE+512)); memset(fr->ssave, 0, 34); @@ -415,7 +445,7 @@ return 0; } -void frame_icy_reset(mpg123_handle* fr) +static void frame_icy_reset(mpg123_handle* fr) { #ifndef NO_ICY if(fr->icy.data != NULL) free(fr->icy.data); @@ -425,7 +455,7 @@ #endif } -void frame_free_toc(mpg123_handle *fr) +static void frame_free_toc(mpg123_handle *fr) { if(fr->xing_toc != NULL){ free(fr->xing_toc); fr->xing_toc = NULL; } } @@ -472,10 +502,11 @@ fr->to_decode = FALSE; fr->to_ignore = FALSE; fr->metaflags = 0; - fr->outblock = mpg123_safe_buffer(); + fr->outblock = 0; /* This will be set before decoding! */ fr->num = -1; + fr->input_offset = -1; fr->playnum = -1; - fr->accurate = TRUE; + fr->state_flags = FRAME_ACCURATE; fr->silent_resync = 0; fr->audio_start = 0; fr->clip = 0; @@ -499,11 +530,12 @@ fr->fsizeold = 0; fr->firstframe = 0; fr->ignoreframe = fr->firstframe-fr->p.preframes; + fr->header_change = 0; fr->lastframe = -1; fr->fresh = 1; fr->new_format = 0; #ifdef GAPLESS - frame_gapless_init(fr,0,0); + frame_gapless_init(fr,-1,0,0); fr->lastoff = 0; fr->firstoff = 0; #endif @@ -526,7 +558,7 @@ fr->freeformat_framesize = -1; } -void frame_free_buffers(mpg123_handle *fr) +static void frame_free_buffers(mpg123_handle *fr) { if(fr->rawbuffs != NULL) free(fr->rawbuffs); fr->rawbuffs = NULL; @@ -543,12 +575,12 @@ void frame_exit(mpg123_handle *fr) { - if(fr->own_buffer && fr->buffer.data != NULL) + if(fr->buffer.rdata != NULL) { - debug1("freeing buffer at %p", (void*)fr->buffer.data); - free(fr->buffer.data); + debug1("freeing buffer at %p", (void*)fr->buffer.rdata); + free(fr->buffer.rdata); } - fr->buffer.data = NULL; + fr->buffer.rdata = NULL; frame_free_buffers(fr); frame_free_toc(fr); #ifdef FRAME_INDEX @@ -569,42 +601,23 @@ fr->wrapperclean(fr->wrapperdata); fr->wrapperdata = NULL; } +#ifndef NO_FEEDER + bc_cleanup(&fr->rdat.buffer); +#endif } -int attribute_align_arg mpg123_info(mpg123_handle *mh, struct mpg123_frameinfo *mi) +int attribute_align_arg mpg123_framedata(mpg123_handle *mh, unsigned long *header, unsigned char **bodydata, size_t *bodybytes) { - if(mh == NULL) return MPG123_ERR; - if(mi == NULL) - { - mh->err = MPG123_ERR_NULL; - return MPG123_ERR; - } - mi->version = mh->mpeg25 ? MPG123_2_5 : (mh->lsf ? MPG123_2_0 : MPG123_1_0); - mi->layer = mh->lay; - mi->rate = frame_freq(mh); - switch(mh->mode) - { - case 0: mi->mode = MPG123_M_STEREO; break; - case 1: mi->mode = MPG123_M_JOINT; break; - case 2: mi->mode = MPG123_M_DUAL; break; - case 3: mi->mode = MPG123_M_MONO; break; - default: error("That mode cannot be!"); - } - mi->mode_ext = mh->mode_ext; - mi->framesize = mh->framesize+4; /* Include header. */ - mi->flags = 0; - if(mh->error_protection) mi->flags |= MPG123_CRC; - if(mh->copyright) mi->flags |= MPG123_COPYRIGHT; - if(mh->extension) mi->flags |= MPG123_PRIVATE; - if(mh->original) mi->flags |= MPG123_ORIGINAL; - mi->emphasis = mh->emphasis; - mi->bitrate = frame_bitrate(mh); - mi->abr_rate = mh->abr_rate; - mi->vbr = mh->vbr; + if(mh == NULL) return MPG123_ERR; + if(!mh->to_decode) return MPG123_ERR; + + if(header != NULL) *header = mh->oldhead; + if(bodydata != NULL) *bodydata = mh->bsbuf; + if(bodybytes != NULL) *bodybytes = mh->framesize; + return MPG123_OK; } - /* Fuzzy frame offset searching (guessing). When we don't have an accurate position, we may use an inaccurate one. @@ -613,7 +626,7 @@ - guess wildly from mean framesize and offset of first frame / beginning of file. */ -off_t frame_fuzzy_find(mpg123_handle *fr, off_t want_frame, off_t* get_frame) +static off_t frame_fuzzy_find(mpg123_handle *fr, off_t want_frame, off_t* get_frame) { /* Default is to go to the beginning. */ off_t ret = fr->audio_start; @@ -633,7 +646,7 @@ /* Now estimate back what frame we get. */ *get_frame = (off_t) ((double)toc_entry/100. * fr->track_frames); - fr->accurate = FALSE; + fr->state_flags &= ~FRAME_ACCURATE; fr->silent_resync = 1; /* Question: Is the TOC for whole file size (with/without ID3) or the "real" audio data only? ID3v1 info could also matter. */ @@ -642,7 +655,7 @@ else if(fr->mean_framesize > 0) { /* Just guess with mean framesize (may be exact with CBR files). */ /* Query filelen here or not? */ - fr->accurate = FALSE; /* Fuzzy! */ + fr->state_flags &= ~FRAME_ACCURATE; /* Fuzzy! */ fr->silent_resync = 1; *get_frame = want_frame; ret = (off_t) (fr->audio_start+fr->mean_framesize*want_frame); @@ -689,7 +702,7 @@ /* We have index position, that yields frame and byte offsets. */ *get_frame = fi*fr->index.step; gopos = fr->index.data[fi]; - fr->accurate = TRUE; /* When using the frame index, we are accurate. */ + fr->state_flags |= FRAME_ACCURATE; /* When using the frame index, we are accurate. */ } else { @@ -736,7 +749,7 @@ case 1: case 2: # endif - outs = (spf(fr)>>fr->down_sample)*num; + outs = (fr->spf>>fr->down_sample)*num; break; #ifndef NO_NTOM case 3: outs = ntom_frmouts(fr, num); break; @@ -758,7 +771,7 @@ case 1: case 2: # endif - outs = spf(fr)>>fr->down_sample; + outs = fr->spf>>fr->down_sample; break; #ifndef NO_NTOM case 3: outs = ntom_frame_outsamples(fr); break; @@ -778,7 +791,7 @@ case 1: case 2: # endif - num = outs/(spf(fr)>>fr->down_sample); + num = outs/(fr->spf>>fr->down_sample); break; #ifndef NO_NTOM case 3: num = ntom_frameoff(fr, outs); break; @@ -790,14 +803,21 @@ #ifdef GAPLESS /* input in _input_ samples */ -void frame_gapless_init(mpg123_handle *fr, off_t b, off_t e) +void frame_gapless_init(mpg123_handle *fr, off_t framecount, off_t bskip, off_t eskip) { - fr->begin_s = b; - fr->end_s = e; + debug3("frame_gapless_init: given %"OFF_P" frames, skip %"OFF_P" and %"OFF_P, (off_p)framecount, (off_p)bskip, (off_p)eskip); + fr->gapless_frames = framecount; + if(fr->gapless_frames > 0 && bskip >=0 && eskip >= 0) + { + fr->begin_s = bskip+GAPLESS_DELAY; + fr->end_s = framecount*fr->spf-eskip+GAPLESS_DELAY; + } + else fr->begin_s = fr->end_s = 0; /* These will get proper values later, from above plus resampling info. */ fr->begin_os = 0; fr->end_os = 0; - debug2("frame_gapless_init: from %lu to %lu samples", (long unsigned)fr->begin_s, (long unsigned)fr->end_s); + fr->fullend_os = 0; + debug2("frame_gapless_init: from %"OFF_P" to %"OFF_P" samples", (off_p)fr->begin_s, (off_p)fr->end_s); } void frame_gapless_realinit(mpg123_handle *fr) @@ -804,23 +824,28 @@ { fr->begin_os = frame_ins2outs(fr, fr->begin_s); fr->end_os = frame_ins2outs(fr, fr->end_s); - debug2("frame_gapless_realinit: from %lu to %lu samples", (long unsigned)fr->begin_os, (long unsigned)fr->end_os); + if(fr->gapless_frames > 0) + fr->fullend_os = frame_ins2outs(fr, fr->gapless_frames*fr->spf); + else fr->fullend_os = 0; + + debug4("frame_gapless_realinit: from %"OFF_P" to %"OFF_P" samples (%"OFF_P", %"OFF_P")", (off_p)fr->begin_os, (off_p)fr->end_os, (off_p)fr->fullend_os, (off_p)fr->gapless_frames); } -/* When we got a new sample count, update the gaplessness. */ +/* At least note when there is trouble... */ void frame_gapless_update(mpg123_handle *fr, off_t total_samples) { - if(fr->end_s < 1) + off_t gapless_samples = fr->gapless_frames*fr->spf; + debug2("gapless update with new sample count %"OFF_P" as opposed to known %"OFF_P, total_samples, gapless_samples); + if(NOQUIET && total_samples != gapless_samples) + fprintf(stderr, "\nWarning: Real sample count %"OFF_P" differs from given gapless sample count %"OFF_P". Frankenstein stream?\n" + , total_samples, gapless_samples); + + if(gapless_samples > total_samples) { - fr->end_s = total_samples; + if(NOQUIET) error2("End sample count smaller than gapless end! (%"OFF_P" < %"OFF_P"). Disabling gapless mode from now on.", (off_p)total_samples, (off_p)fr->end_s); + /* This invalidates the current position... but what should I do? */ + frame_gapless_init(fr, -1, 0, 0); frame_gapless_realinit(fr); - } - else if(fr->end_s > total_samples) - { - if(NOQUIET) error2("end sample count smaller than gapless end! (%"OFF_P" < %"OFF_P").", (off_p)total_samples, (off_p)fr->end_s); - /* Humbly disabling gapless stuff on track end. */ - fr->end_s = 0; - frame_gapless_realinit(fr); fr->lastframe = -1; fr->lastoff = 0; } @@ -840,7 +865,7 @@ return fr->firstframe - preshift; } -/* The frame seek... This is not simply the seek to fe*spf(fr) samples in output because we think of _input_ frames here. +/* The frame seek... This is not simply the seek to fe*fr->spf samples in output because we think of _input_ frames here. Seek to frame offset 1 may be just seek to 200 samples offset in output since the beginning of first frame is delay/padding. Hm, is that right? OK for the padding stuff, but actually, should the decoder delay be better totally hidden or not? With gapless, even the whole frame position could be advanced further than requested (since Homey don't play dat). */ @@ -848,7 +873,7 @@ { fr->firstframe = fe; #ifdef GAPLESS - if(fr->p.flags & MPG123_GAPLESS) + if(fr->p.flags & MPG123_GAPLESS && fr->gapless_frames > 0) { /* Take care of the beginning... */ off_t beg_f = frame_offset(fr, fr->begin_os); @@ -863,7 +888,7 @@ { fr->lastframe = frame_offset(fr,fr->end_os); fr->lastoff = fr->end_os - frame_outs(fr, fr->lastframe); - } else fr->lastoff = 0; + } else {fr->lastframe = -1; fr->lastoff = 0; } } else { fr->firstoff = fr->lastoff = 0; fr->lastframe = -1; } #endif fr->ignoreframe = ignoreframe(fr); @@ -889,6 +914,7 @@ void frame_set_seek(mpg123_handle *fr, off_t sp) { fr->firstframe = frame_offset(fr, sp); + debug1("frame_set_seek: from %"OFF_P, fr->num); #ifndef NO_NTOM if(fr->down_sample == 3) ntom_set_ntom(fr, fr->firstframe); #endif @@ -902,8 +928,6 @@ debug3("frame_set_seek: begin at %li frames, end at %li; ignore from %li", (long) fr->firstframe, (long) fr->lastframe, (long) fr->ignoreframe); #endif - /* Old bit reservoir should be invalid, eh? */ - fr->bitreservoir = 0; } int attribute_align_arg mpg123_volume_change(mpg123_handle *mh, double change) @@ -986,3 +1010,9 @@ return MPG123_OK; } +off_t attribute_align_arg mpg123_framepos(mpg123_handle *mh) +{ + if(mh == NULL) return MPG123_ERR; + + return mh->input_offset; +} Index: lib/3rdparty/libmpg123/getcpuflags.S =================================================================== --- lib/3rdparty/libmpg123/getcpuflags.S (revision 62563) +++ lib/3rdparty/libmpg123/getcpuflags.S (working copy) @@ -51,6 +51,7 @@ /* In principle, I would have to check the CPU's identify first to be sure how to interpret the extended flags. */ /* now get the info, first extended */ movl $0x0, 12(%esi) /* clear value */ + movl $0x0, 16(%esi) /* clear value */ /* only if supported... */ movl $0x80000000, %eax cpuid @@ -68,6 +69,15 @@ movl %eax, (%esi) movl %ecx, 4(%esi) movl %edx, 8(%esi) +/* check if xgetbv instruction is available */ + test $0x04000000, %ecx + jz .Lend + test $0x08000000, %ecx + jz .Lend + xor %ecx, %ecx + .byte 0x0f, 0x01, 0xd0 /* xgetbv instruction */ + movl %eax, 16(%esi) + movl (%esi), %eax jmp .Lend ALIGN4 .Lnocpuid: @@ -77,6 +87,7 @@ movl $0, 4(%esi) movl $0, 8(%esi) movl $0, 12(%esi) + movl $0, 16(%esi) ALIGN4 .Lend: /* return value are the id flags, still stored in %eax */ Index: lib/3rdparty/libmpg123/getcpuflags_arm.c =================================================================== --- lib/3rdparty/libmpg123/getcpuflags_arm.c (revision 0) +++ lib/3rdparty/libmpg123/getcpuflags_arm.c (working copy) @@ -0,0 +1,41 @@ +/* + getcpuflags_arm: get cpuflags for ARM + + copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Momma +*/ + +#include +#include +#include "mpg123lib_intern.h" +#include "getcpuflags.h" + +extern void check_neon(void); + +static sigjmp_buf jmpbuf; + +static void mpg123_arm_catch_sigill(int sig) +{ + siglongjmp(jmpbuf, 1); +} + +unsigned int getcpuflags(struct cpuflags* cf) +{ + struct sigaction act, act_old; + act.sa_handler = mpg123_arm_catch_sigill; + act.sa_flags = SA_RESTART; + sigemptyset(&act.sa_mask); + sigaction(SIGILL, &act, &act_old); + + cf->has_neon = 0; + + if(!sigsetjmp(jmpbuf, 1)) { + check_neon(); + cf->has_neon = 1; + } + + sigaction(SIGILL, &act_old, NULL); + + return 0; +} Index: lib/3rdparty/libmpg123/getcpuflags_x86_64.S =================================================================== --- lib/3rdparty/libmpg123/getcpuflags_x86_64.S (revision 0) +++ lib/3rdparty/libmpg123/getcpuflags_x86_64.S (working copy) @@ -0,0 +1,57 @@ +/* + getcpuflags_x86_64: get cpuflags for x86-64 + + copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + + .text + ALIGN4 + .globl ASM_NAME(getcpuflags) +ASM_NAME(getcpuflags): + push %rbp + mov %rsp, %rbp + push %rbx + +#ifdef IS_MSABI + push %rdi + mov %rcx, %rdi +#endif + + movl $0, 12(%rdi) + movl $0, 16(%rdi) + + mov $0x80000000, %eax + cpuid + cmp $0x80000001, %eax + jb 1f + mov $0x80000001, %eax + cpuid + movl %edx, 12(%rdi) +1: + mov $0x00000001, %eax + cpuid + movl %eax, (%rdi) + movl %ecx, 4(%rdi) + movl %edx, 8(%rdi) + test $0x04000000, %ecx + jz 2f + test $0x08000000, %ecx + jz 2f + xor %ecx, %ecx + .byte 0x0f, 0x01, 0xd0 /* xgetbv instruction */ + movl %eax, 16(%rdi) + movl (%rdi), %eax +2: +#ifdef IS_MSABI + pop %rdi +#endif + pop %rbx + mov %rbp, %rsp + pop %rbp + ret + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/id3.c =================================================================== --- lib/3rdparty/libmpg123/id3.c (revision 62563) +++ lib/3rdparty/libmpg123/id3.c (working copy) @@ -1,7 +1,7 @@ /* id3: ID3v2.3 and ID3v2.4 parsing (a relevant subset) - copyright 2006-2008 by the mpg123 project - free software under the terms of the LGPL 2.1 + copyright 2006-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 see COPYING and AUTHORS files in distribution or http://mpg123.org initially written by Thomas Orgis */ @@ -13,9 +13,9 @@ #ifndef NO_ID3V2 /* Only the main parsing routine will always be there. */ /* We know the usual text frames plus some specifics. */ -#define KNOWN_FRAMES 4 -static const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2", "USLT" }; -enum frame_types { unknown = -2, text = -1, comment, extra, rva2, uslt }; +#define KNOWN_FRAMES 5 +static const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2", "USLT", "APIC" }; +enum frame_types { unknown = -2, text = -1, comment, extra, rva2, uslt, picture }; /* UTF support definitions */ @@ -25,7 +25,7 @@ static void convert_utf16bom(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet); static void convert_utf8 (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet); -static const text_converter text_converters[4] = +static const text_converter text_converters[4] = { convert_latin1, /* We always check for (multiple) BOM in 16bit unicode. Without BOM, UTF16 BE is the default. @@ -35,7 +35,7 @@ convert_utf8 }; -const unsigned int encoding_widths[4] = { 1, 2, 2, 1 }; +static const unsigned int encoding_widths[4] = { 1, 2, 2, 1 }; /* the code starts here... */ @@ -59,6 +59,8 @@ fr->id3v2.text = NULL; fr->id3v2.extras = 0; fr->id3v2.extra = NULL; + fr->id3v2.pictures = 0; + fr->id3v2.picture = NULL; } /* Managing of the text, comment and extra lists. */ @@ -77,6 +79,15 @@ txt->lang[2] = 0; } +static void init_mpg123_picture(mpg123_picture *pic) +{ + mpg123_init_string(&pic->mime_type); + mpg123_init_string(&pic->description); + pic->type = 0; + pic->size = 0; + pic->data = NULL; +} + /* Free memory of one element. */ static void free_mpg123_text(mpg123_text *txt) { @@ -84,10 +95,19 @@ mpg123_free_string(&txt->description); } +static void free_mpg123_picture(mpg123_picture * pic) +{ + mpg123_free_string(&pic->mime_type); + mpg123_free_string(&pic->description); + if (pic->data != NULL) + free(pic->data); +} + /* Free memory of whole list. */ #define free_comment(mh) free_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments)) #define free_text(mh) free_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts)) #define free_extra(mh) free_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras)) +#define free_picture(mh) free_id3_picture(&((mh)->id3v2.picture), &((mh)->id3v2.pictures)) static void free_id3_text(mpg123_text **list, size_t *size) { size_t i; @@ -97,11 +117,21 @@ *list = NULL; *size = 0; } +static void free_id3_picture(mpg123_picture **list, size_t *size) +{ + size_t i; + for(i=0; i<*size; ++i) free_mpg123_picture(&((*list)[i])); + free(*list); + *list = NULL; + *size = 0; +} + /* Add items to the list. */ #define add_comment(mh) add_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments)) #define add_text(mh) add_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts)) #define add_extra(mh) add_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras)) +#define add_picture(mh) add_id3_picture(&((mh)->id3v2.picture), &((mh)->id3v2.pictures)) static mpg123_text *add_id3_text(mpg123_text **list, size_t *size) { mpg123_text *x = safe_realloc(*list, sizeof(mpg123_text)*(*size+1)); @@ -113,11 +143,24 @@ return &((*list)[*size-1]); /* Return pointer to the added text. */ } +static mpg123_picture *add_id3_picture(mpg123_picture **list, size_t *size) +{ + mpg123_picture *x = safe_realloc(*list, sizeof(mpg123_picture)*(*size+1)); + if(x == NULL) return NULL; /* bad */ + *list = x; + *size += 1; + init_mpg123_picture(&((*list)[*size-1])); + + return &((*list)[*size-1]); /* Return pointer to the added picture. */ +} + + /* Remove the last item. */ #define pop_comment(mh) pop_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments)) #define pop_text(mh) pop_id3_text(&((mh)->id3v2.text), &((mh)->id3v2.texts)) #define pop_extra(mh) pop_id3_text(&((mh)->id3v2.extra), &((mh)->id3v2.extras)) +#define pop_picture(mh) pop_id3_picture(&((mh)->id3v2.picture), &((mh)->id3v2.pictures)) static void pop_id3_text(mpg123_text **list, size_t *size) { mpg123_text *x; @@ -136,11 +179,30 @@ *size = 0; } } +static void pop_id3_picture(mpg123_picture **list, size_t *size) +{ + mpg123_picture *x; + if(*size < 1) return; -/* OK, back t the higher level functions. */ + free_mpg123_picture(&((*list)[*size-1])); + if(*size > 1) + { + x = safe_realloc(*list, sizeof(mpg123_picture)*(*size-1)); + if(x != NULL){ *list = x; *size -= 1; } + } + else + { + free(*list); + *list = NULL; + *size = 0; + } +} +/* OK, back to the higher level functions. */ + void exit_id3(mpg123_handle *fr) { + free_picture(fr); free_comment(fr); free_extra(fr); free_text(fr); @@ -186,7 +248,7 @@ ID3v2 standard says that there should be one text frame of specific type per tag, and subsequent tags overwrite old values. So, I always replace the text that may be stored already (perhaps with a list of zero-separated strings, though). */ -void store_id3_text(mpg123_string *sb, char *source, size_t source_size, const int noquiet, const int notranslate) +static void store_id3_text(mpg123_string *sb, unsigned char *source, size_t source_size, const int noquiet, const int notranslate) { if(!source_size) { @@ -209,7 +271,7 @@ return; } - id3_to_utf8(sb, ((unsigned char *)source)[0], (unsigned char*)source+1, source_size-1, noquiet); + id3_to_utf8(sb, source[0], source+1, source_size-1, noquiet); if(sb->fill) debug1("UTF-8 string (the first one): %s", sb->p); else if(noquiet) error("unable to convert string to UTF-8 (out of memory, junk input?)!"); @@ -247,14 +309,14 @@ text_converters[encoding](sb, source, source_size, noquiet); } -char *next_text(char* prev, int encoding, size_t limit) +static unsigned char *next_text(unsigned char* prev, unsigned char encoding, size_t limit) { - char *text = prev; + unsigned char *text = prev; size_t width = encoding_widths[encoding]; /* So I go lengths to find zero or double zero... Remember bug 2834636: Only check for aligned NULLs! */ - while(text-prev < (long)limit) + while(text-prev < (ssize_t)limit) { if(text[0] == 0) { @@ -279,7 +341,7 @@ return text; } -static const char *enc_name(int enc) +static const char *enc_name(unsigned char enc) { switch(enc) { @@ -291,7 +353,7 @@ } } -static void process_text(mpg123_handle *fr, char *realdata, size_t realsize, char *id) +static void process_text(mpg123_handle *fr, unsigned char *realdata, size_t realsize, char *id) { /* Text encoding $xx */ /* The text (encoded) ... */ @@ -307,24 +369,82 @@ if(VERBOSE4) fprintf(stderr, "Note: ID3v2 %c%c%c%c text frame: %s\n", id[0], id[1], id[2], id[3], t->text.p); } +static void process_picture(mpg123_handle *fr, unsigned char *realdata, size_t realsize) +{ + unsigned char encoding = realdata[0]; + mpg123_picture *i = NULL; + unsigned char* workpoint; + if(realsize == 0) + { + debug("Empty id3 data!"); + return; + } + if(VERBOSE4) fprintf(stderr, "Note: Storing picture from APIC frame.\n"); + /* decompose realdata accordingly */ + i = add_picture(fr); + if(i == NULL) + { + if(NOQUIET) error("Unable to attach new picture!"); + return; + } + realdata++; realsize--; + /* get mime type (encoding is always latin-1) */ + workpoint = next_text(realdata, 0, realsize); + if (workpoint == NULL) { + pop_picture(fr); + if (NOQUIET) error("Unable to get mime type for picture; skipping picture."); + return; + } + id3_to_utf8(&i->mime_type, 0, realdata, workpoint - realdata, NOQUIET); + realsize -= workpoint - realdata; + realdata = workpoint; + /* get picture type */ + i->type = realdata[0]; + realdata++; realsize--; + /* get description (encoding is encoding) */ + workpoint = next_text(realdata, encoding, realsize); + if (workpoint == NULL) { + if (NOQUIET) error("Unable to get description for picture; skipping picture."); + pop_picture(fr); + return; + } + id3_to_utf8(&i->description, encoding, realdata, workpoint - realdata, NOQUIET); + realsize -= workpoint - realdata; + if (realsize == 0) { + if (NOQUIET) error("No picture data defined; skipping picture."); + pop_picture(fr); + return; + } + /* store_id3_picture(i, picture, realsize, NOQUIET)) */ + i->data = (unsigned char*)malloc(realsize); + if (i->data == NULL) { + if (NOQUIET) error("Unable to allocate memory for picture; skipping picture"); + pop_picture(fr); + return; + } + memcpy(i->data, workpoint, realsize); + i->size = realsize; + if(VERBOSE4) fprintf(stderr, "Note: ID3v2 APIC picture frame of type: %d\n", i->type); +} + /* Store a new comment that perhaps is a RVA / RVA_ALBUM/AUDIOPHILE / RVA_MIX/RADIO one Special gimmik: It also stores USLT to the texts. Stucture is the same as for comments. */ -static void process_comment(mpg123_handle *fr, enum frame_types tt, char *realdata, size_t realsize, int rva_level, char *id) +static void process_comment(mpg123_handle *fr, enum frame_types tt, unsigned char *realdata, size_t realsize, int rva_level, char *id) { /* Text encoding $xx */ /* Language $xx xx xx */ /* Short description (encoded!) $00 (00) */ /* Then the comment text (encoded) ... */ - char encoding = realdata[0]; - char *lang = realdata+1; /* I'll only use the 3 bytes! */ - char *descr = realdata+4; - char *text = NULL; + unsigned char encoding = realdata[0]; + unsigned char *lang = realdata+1; /* I'll only use the 3 bytes! */ + unsigned char *descr = realdata+4; + unsigned char *text = NULL; mpg123_text *xcom = NULL; mpg123_text localcom; /* UTF-8 variant for local processing. */ - if((int)realsize < descr-realdata) + if(realsize < (size_t)(descr-realdata)) { - if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize); + if(NOQUIET) error1("Invalid frame size of %"SIZE_P" (too small for anything).", (size_p)realsize); return; } xcom = (tt == uslt ? add_text(fr) : add_comment(fr)); @@ -393,14 +513,14 @@ free_mpg123_text(&localcom); } -void process_extra(mpg123_handle *fr, char* realdata, size_t realsize, int rva_level, char *id) +static void process_extra(mpg123_handle *fr, unsigned char* realdata, size_t realsize, int rva_level, char *id) { /* Text encoding $xx */ /* Description ... $00 (00) */ /* Text ... */ - char encoding = realdata[0]; - char *descr = realdata+1; /* remember, the encoding is descr[-1] */ - char *text; + unsigned char encoding = realdata[0]; + unsigned char *descr = realdata+1; /* remember, the encoding is descr[-1] */ + unsigned char *text; mpg123_text *xex; mpg123_text localex; @@ -485,7 +605,7 @@ Note that not all frames survived to 2.4; the mapping goes to 2.3 . A notable miss is the old RVA frame, which is very unspecific anyway. This function returns -1 when a not known 3 char ID was encountered, 0 otherwise. */ -int promote_framename(mpg123_handle *fr, char *id) /* fr because of VERBOSE macros */ +static int promote_framename(mpg123_handle *fr, char *id) /* fr because of VERBOSE macros */ { size_t i; char *old[] = @@ -536,7 +656,6 @@ unsigned char flags = 0; int ret = 1; int ret2; - unsigned char* tagdata = NULL; unsigned char major = first4bytes & 0xff; debug1("ID3v2: major tag version: %i", major); if(major == 0xff) return 0; /* Invalid... */ @@ -588,10 +707,17 @@ #ifndef NO_ID3V2 if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length); /* skip if unknown version/scary flags, parse otherwise */ - if((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 2)) + if(fr->p.flags & MPG123_SKIP_ID3V2 || ((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 2))) { - /* going to skip because there are unknown flags set */ - if(NOQUIET) warning2("ID3v2: Won't parse the ID3v2 tag with major version %u and flags 0x%xu - some extra code may be needed", major, flags); + if(NOQUIET) + { + if(fr->p.flags & MPG123_SKIP_ID3V2) + { + if(VERBOSE3) fprintf(stderr, "Note: Skipping ID3v2 tag per user request.\n"); + } + else /* Must be because of scary Tag properties. */ + warning2("ID3v2: Won't parse the ID3v2 tag with major version %u and flags 0x%xu - some extra code may be needed", major, flags); + } #endif if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) /* will not store data in backbuff! */ ret = ret2; @@ -599,6 +725,7 @@ } else { + unsigned char* tagdata = NULL; fr->id3v2.version = major; /* try to interpret that beast */ if((tagdata = (unsigned char*) malloc(length+1)) != NULL) @@ -647,6 +774,7 @@ { /* 4 or 3 bytes id */ strncpy(id, (char*) tagdata+pos, head_part); + id[head_part] = 0; /* terminate for 3 or 4 bytes */ pos += head_part; tagpos += head_part; /* size as 32 bits or 28 bits */ @@ -693,7 +821,7 @@ if(NOQUIET) warning("ID3v2: skipping invalid/unsupported frame"); continue; } - + for(i = 0; i < KNOWN_FRAMES; ++i) if(!strncmp(frame_type[i], id, 4)){ tt = i; break; } @@ -736,10 +864,10 @@ { case comment: case uslt: - process_comment(fr, tt, (char*)realdata, realsize, comment+1, id); + process_comment(fr, tt, realdata, realsize, comment+1, id); break; case extra: /* perhaps foobar2000's work */ - process_extra(fr, (char*)realdata, realsize, extra+1, id); + process_extra(fr, realdata, realsize, extra+1, id); break; case rva2: /* "the" RVA tag */ { @@ -773,8 +901,13 @@ break; /* non-rva metainfo, simply store... */ case text: - process_text(fr, (char*)realdata, realsize, id); + process_text(fr, realdata, realsize, id); break; + case picture: + if (fr->p.flags & MPG123_PICTURE) + process_picture(fr, realdata, realsize); + + break; default: if(NOQUIET) error1("ID3v2: unknown frame type %i", tt); } if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) free(realdata); @@ -797,7 +930,7 @@ else { /* There are tags with zero length. Strictly not an error, then. */ - if(length > 0 && NOQUIET) error("ID3v2: Duh, not able to read ID3v2 tag data."); + if(length > 0 && NOQUIET && ret2 != MPG123_NEED_MORE) error("ID3v2: Duh, not able to read ID3v2 tag data."); ret = ret2; } tagparse_cleanup: @@ -858,7 +991,7 @@ 1: big endian This modifies source and len to indicate the data _after_ the BOM(s). - Note on nasty data: The last encountered BOM determines the endianess. + Note on nasty data: The last encountered BOM determines the endianness. I have seen data with multiple BOMS, namely from "the" id3v2 program. Not nice, but what should I do? */ @@ -905,7 +1038,7 @@ debug1("convert_utf16 with length %lu", (unsigned long)l); bom_endian = check_bom(&s, &l); - debug1("UTF16 endianess check: %i", bom_endian); + debug1("UTF16 endianness check: %i", bom_endian); if(bom_endian == -1) /* little-endian */ { @@ -963,7 +1096,7 @@ *p++ = 0x80 | ((codepoint>>6) & 0x3f); *p++ = 0x80 | (codepoint & 0x3f); } - else if (codepoint < 0x200000) + else if (codepoint < 0x200000) { *p++ = (unsigned char) (0xf0 | codepoint>>18); *p++ = (unsigned char) (0x80 | ((codepoint>>12) & 0x3f)); Index: lib/3rdparty/libmpg123/layer1.c =================================================================== --- lib/3rdparty/libmpg123/layer1.c (revision 62563) +++ lib/3rdparty/libmpg123/layer1.c (working copy) @@ -10,9 +10,32 @@ #include "mpg123lib_intern.h" #include "getbits.h" +#include "debug.h" -void I_step_one(unsigned int balloc[], unsigned int scale_index[2][SBLIMIT],mpg123_handle *fr) +/* + Allocation value is not allowed to be 15. Initially, libmad showed me the + error that mpg123 used to ignore. Then, I found a quote on that in + Shlien, S. (1994): Guide to MPEG-1 Audio Standard. + IEEE Transactions on Broadcasting 40, 4 + + "To avoid conflicts with the synchronization code, code '1111' is defined + to be illegal." +*/ +static int check_balloc(mpg123_handle *fr, unsigned int *balloc, unsigned int *end) { + unsigned int *ba; + for(ba=balloc; ba != end; ++ba) + if(*ba == 15) + { + if(NOQUIET) error("Illegal bit allocation value."); + return -1; + } + + return 0; +} + +static int I_step_one(unsigned int balloc[], unsigned int scale_index[2][SBLIMIT],mpg123_handle *fr) +{ unsigned int *ba=balloc; unsigned int *sca = (unsigned int *) scale_index; @@ -21,12 +44,14 @@ int i; int jsbound = fr->jsbound; for(i=0;istereo-1; int sblimit = fr->II_sblimit; @@ -191,7 +191,7 @@ } -void II_step_two(unsigned int *bit_alloc,real fraction[2][4][SBLIMIT],int *scale,mpg123_handle *fr,int x1) +static void II_step_two(unsigned int *bit_alloc,real fraction[2][4][SBLIMIT],int *scale,mpg123_handle *fr,int x1) { int i,j,k,ba; int stereo = fr->stereo; Index: lib/3rdparty/libmpg123/layer3.c =================================================================== --- lib/3rdparty/libmpg123/layer3.c (revision 62563) +++ lib/3rdparty/libmpg123/layer3.c (working copy) @@ -15,10 +15,16 @@ */ #include "mpg123lib_intern.h" +#ifdef USE_NEW_HUFFTABLE +#include "newhuffman.h" +#else #include "huffman.h" +#endif #include "getbits.h" #include "debug.h" + + /* define CUT_SFB21 if you want to cut-off the frequency above 16kHz */ #if 0 #define CUT_SFB21 @@ -31,8 +37,8 @@ /* static one-time calculated tables... or so */ static real ispow[8207]; static real aa_ca[8],aa_cs[8]; -static real win[4][36]; -static real win1[4][36]; +static ALIGNED(16) real win[4][36]; +static ALIGNED(16) real win1[4][36]; real COS9[9]; /* dct36_3dnow wants to use that */ static real COS6_1,COS6_2; real tfcos36[9]; /* dct36_3dnow wants to use that */ @@ -79,14 +85,14 @@ struct bandInfoStruct { - int longIdx[23]; - int longDiff[22]; - int shortIdx[14]; - int shortDiff[13]; + unsigned short longIdx[23]; + unsigned char longDiff[22]; + unsigned short shortIdx[14]; + unsigned char shortDiff[13]; }; /* Techy details about our friendly MPEG data. Fairly constant over the years;-) */ -const struct bandInfoStruct bandInfo[9] = +static const struct bandInfoStruct bandInfo[9] = { { /* MPEG 1.0 */ {0,4,8,12,16,20,24,30,36,44,52,62,74, 90,110,134,162,196,238,288,342,418,576}, @@ -270,7 +276,7 @@ const struct bandInfoStruct *bi = &bandInfo[j]; int *mp; int cb,lwin; - const int *bdf; + const unsigned char *bdf; mp = map[j][0] = mapbuf0[j]; bdf = bi->longDiff; @@ -403,7 +409,7 @@ if(si->main_data_begin > fr->bitreservoir) { - if(VERBOSE2) fprintf(stderr, "Note: missing %d bytes in bit reservoir for frame %li\n", (int)(si->main_data_begin - fr->bitreservoir), (long)fr->num); + if(!fr->to_ignore && VERBOSE2) fprintf(stderr, "Note: missing %d bytes in bit reservoir for frame %li\n", (int)(si->main_data_begin - fr->bitreservoir), (long)fr->num); /* overwrite main_data_begin for the really available bit reservoir */ backbits(fr, tab[1]); @@ -494,6 +500,7 @@ if((gr_info->block_type == 2) && (!gr_info->mixed_block_flag) ) r0c = 5; else r0c = 7; + /* r0c+1+r1c+1 == 22, always. */ r1c = 20 - r0c; gr_info->region1start = bandInfo[sfreq].longIdx[r0c+1] >> 1 ; gr_info->region2start = bandInfo[sfreq].longIdx[r0c+1+r1c+1] >> 1; @@ -511,12 +518,12 @@ for (i=0; i<3; i++) gr_info->table_select[i] = getbits_fast(fr, 5); - r0c = getbits_fast(fr, 4); - r1c = getbits_fast(fr, 3); + r0c = getbits_fast(fr, 4); /* 0 .. 15 */ + r1c = getbits_fast(fr, 3); /* 0 .. 7 */ gr_info->region1start = bandInfo[sfreq].longIdx[r0c+1] >> 1 ; - gr_info->region2start = bandInfo[sfreq].longIdx[r0c+1+r1c+1] >> 1; - if(r0c + r1c + 2 > 22) gr_info->region2start = 576>>1; + /* max(r0c+r1c+2) = 15+7+2 = 24 */ + if(r0c+1+r1c+1 > 22) gr_info->region2start = 576>>1; else gr_info->region2start = bandInfo[sfreq].longIdx[r0c+1+r1c+1] >> 1; gr_info->block_type = 0; @@ -677,8 +684,11 @@ return numbits; } -static const int pretab1[22] = {0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,3,3,2,0}; -static const int pretab2[22] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; +static unsigned char pretab_choice[2][22] = +{ + {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, + {0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,3,3,2,0} +}; /* Dequantize samples @@ -716,16 +726,6 @@ int bv = gr_info->big_values; int region1 = gr_info->region1start; int region2 = gr_info->region2start; - if(region1 > region2) - { - /* - That's not optimal: it fixes a segfault with fuzzed data, but also apparently triggers where it shouldn't, see bug 1641196. - The benefit of not crashing / having this security risk is bigger than these few frames of a lame-3.70 file that aren't audible anyway. - But still, I want to know if indeed this check or the old lame is at fault. - */ - if(NOQUIET) error("You got some really nasty file there... region1>region2!"); - return 1; - } l3 = ((576>>1)-bv)>>1; /* we may lose the 'odd' bit here !! check this later again */ @@ -778,10 +778,10 @@ for(i=0;i<2;i++) { int lp = l[i]; - struct newhuff *h = ht+gr_info->table_select[i]; + const struct newhuff *h = ht+gr_info->table_select[i]; for(;lp;lp--,mc--) { - register int x,y; + register long x,y; if( (!mc) ) { mc = *m++; @@ -806,8 +806,20 @@ } } { - register short *val = h->table; + const short *val = h->table; REFRESH_MASK; +#ifdef USE_NEW_HUFFTABLE + while((y=val[(unsigned long)mask>>(BITSHIFT+4)])<0) + { + val -= y; + num -= 4; + mask <<= 4; + } + num -= (y >> 8); + mask <<= (y >> 8); + x = (y >> 4) & 0xf; + y &= 0xf; +#else while((y=*val++)<0) { if (mask < 0) val -= y; @@ -817,6 +829,7 @@ } x = y >> 4; y &= 0xf; +#endif } if(x == 15 && h->linbits) { @@ -871,8 +884,8 @@ for(;l3 && (part2remain+num > 0);l3--) { - struct newhuff* h; - register short* val; + const struct newhuff* h; + const short* val; register short a; /* This is only a humble hack to prevent a special segfault. @@ -990,7 +1003,7 @@ else { /* decoding with 'long' BandIndex table (block_type != 2) */ - const int *pretab = gr_info->preflag ? pretab1 : pretab2; + const unsigned char *pretab = pretab_choice[gr_info->preflag]; int i,max = -1; int cb = 0; int *m = map[sfreq][2]; @@ -1001,11 +1014,11 @@ for(i=0;i<3;i++) { int lp = l[i]; - struct newhuff *h = ht+gr_info->table_select[i]; + const struct newhuff *h = ht+gr_info->table_select[i]; for(;lp;lp--,mc--) { - int x,y; + long x,y; if(!mc) { mc = *m++; @@ -1023,8 +1036,20 @@ } } { - register short *val = h->table; + const short *val = h->table; REFRESH_MASK; +#ifdef USE_NEW_HUFFTABLE + while((y=val[(unsigned long)mask>>(BITSHIFT+4)])<0) + { + val -= y; + num -= 4; + mask <<= 4; + } + num -= (y >> 8); + mask <<= (y >> 8); + x = (y >> 4) & 0xf; + y &= 0xf; +#else while((y=*val++)<0) { if (mask < 0) val -= y; @@ -1034,6 +1059,7 @@ } x = y >> 4; y &= 0xf; +#endif } if(x == 15 && h->linbits) @@ -1087,8 +1113,9 @@ /* short (count1table) values */ for(;l3 && (part2remain+num > 0);l3--) { - struct newhuff *h = htc+gr_info->count1table_select; - register short *val = h->table,a; + const struct newhuff *h = htc+gr_info->count1table_select; + const short *val = h->table; + register short a; REFRESH_MASK; while((a=*val++)<0) Index: lib/3rdparty/libmpg123/lfs_alias.c =================================================================== --- lib/3rdparty/libmpg123/lfs_alias.c (revision 62563) +++ lib/3rdparty/libmpg123/lfs_alias.c (working copy) @@ -1,25 +1,39 @@ /* lfs_alias: Aliases to the small/native API functions with the size of long int as suffix. - copyright 2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + copyright 2010-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 see COPYING and AUTHORS files in distribution or http://mpg123.org initially written by Thomas Orgis - Use case: Client code on Linux/x86-64 that defines _FILE_OFFSET_BITS to 64, which is the only choice on that platform anyway. It should be no-op, but prompts the platform-agnostic header of mpg123 to define API calls with the corresponding suffix. - This file provides the names for this case. It's cruft, but glibc does it, too -- so people rely on it. - Oh, and it also caters for the lunatics that define _FILE_OFFSET_BITS=32 on 32 bit platforms. + Use case: Client code on Linux/x86-64 that defines _FILE_OFFSET_BITS to 64, + which is the only choice on that platform anyway. It should be no-op, but + prompts the platform-agnostic header of mpg123 to define API calls with the + corresponding suffix. This file provides the names for this case. It's cruft, + but glibc does it, too -- so people rely on it. + Oh, and it also caters for the lunatics that define _FILE_OFFSET_BITS=32 on + 32 bit platforms. In addition, it's needed for platforms that always have + off_t /= long, and clients still insisting on defining _FILE_OFFSET_BITS. - There is also the strange case that the mpg123 build itself is configured for unnecessary _FILE_OFFSET_BITS == LFS_ALIAS_BITS =^ sizeof(long). In that case, the "native" function will have the suffix and the alias shall be provided without the suffix. + Depending on use case, the aliases map to 32 (small) or 64 bit (large) offset + functions, to the ones from libmpg123 or the ones from lfs_wrap. + + So, two basic cases: + 1. mpg123_bla_32 alias for mpg123_bla (native) + 2. mpg123_bla alias for mpg123_bla_32 (wrapper) + Same for 64 bits. Confusing, I know. It sucks. - So, two basic cases: - 1. mpg123_bla_32 alias for mpg123_bla - 2. mpg123_bla alias for mpg123_bla_32 - Confusing, I know. It sucks. + Note that the mpg123 header is _not_ used here to avoid definition with whacky off_t. + The aliases are always about arguments of native alias_t type. This can be off_t, but + on Linux/x86, this is long int. The off_t declarations in mpg123.h confuse things, + so reproduce definitions for the wrapper functions in that case. The definitions are + pulled by an inline Perl script in any case ... no need to copy anything manually! + As a benefit, one can skip undefining possible largefile namings. */ #include "config.h" -/* Hack for Solaris: Some system headers included from compat.h might force _FILE_OFFSET_BITS. Need to follow that here. */ +/* Hack for Solaris: Some system headers included from compat.h might force _FILE_OFFSET_BITS. Need to follow that here. + Also, want it around to have types defined. */ #include "compat.h" #ifndef LFS_ALIAS_BITS @@ -33,10 +47,6 @@ #if _FILE_OFFSET_BITS+0 == LFS_ALIAS_BITS -/* The native functions are actually _with_ suffix, so let the mpg123 header use large file hackery to define the correct interfaces. */ -#include "mpg123.h" -/* Don't forget to undef the function symbols before usage... */ - /* The native functions have suffix, the aliases not. */ #define NATIVE_SUFFIX MACROCAT(_, _FILE_OFFSET_BITS) #define NATIVE_NAME(func) MACROCAT(func, NATIVE_SUFFIX) @@ -44,10 +54,6 @@ #else -/* Native functions are without suffix... */ -#define MPG123_NO_LARGENAME -#include "mpg123.h" - /* The alias functions have suffix, the native ones not. */ #define ALIAS_SUFFIX MACROCAT(_, LFS_ALIAS_BITS) #define ALIAS_NAME(func) MACROCAT(func, ALIAS_SUFFIX) @@ -55,9 +61,14 @@ #endif -/* Now get the rest of the infrastructure on speed, namely attribute_align_arg, to stay safe. */ -#include "mpg123lib_intern.h" +/* Copy of necessary definitions, actually just forward declarations. */ +struct mpg123_handle_struct; +typedef struct mpg123_handle_struct mpg123_handle; + +/* Get attribute_align_arg, to stay safe. */ +#include "abi_align.h" + /* Extract the list of functions we need wrappers for, pregenerating the wrappers for simple cases (inline script for nedit): perl -ne ' @@ -67,9 +78,9 @@ my $name = $2; my $args = $3; next unless ($type =~ /off_t/ or $args =~ /off_t/ or ($name =~ /open/ and $name ne mpg123_open_feed)); - $type =~ s/off_t/long/g; + $type =~ s/off_t/lfs_alias_t/g; my @nargs = (); - $args =~ s/off_t/long/g; + $args =~ s/off_t/lfs_alias_t/g; foreach my $a (split(/,/, $args)) { $a =~ s/^.*\s\**([a-z_]+)$/$1/; @@ -79,9 +90,7 @@ $nargs = "Human: figure me out." if($nargs =~ /\(/); print <err = MPG123_LFS_OVERFLOW; + return MPG123_ERR; + } + return val; +} + #undef mpg123_tell /* off_t mpg123_tell(mpg123_handle *mh); */ long attribute_align_arg mpg123_tell(mpg123_handle *mh) @@ -209,7 +224,7 @@ { long val; off_t largeval; - ALIGNCHECK(mh); + largeval = MPG123_LARGENAME(mpg123_tell)(mh); val = largeval; if(val != largeval) @@ -226,7 +241,7 @@ { long val; off_t largeval; - ALIGNCHECK(mh); + largeval = MPG123_LARGENAME(mpg123_tellframe)(mh); val = largeval; if(val != largeval) @@ -243,7 +258,7 @@ { long val; off_t largeval; - ALIGNCHECK(mh); + largeval = MPG123_LARGENAME(mpg123_tell_stream)(mh); val = largeval; if(val != largeval) @@ -260,7 +275,7 @@ { long val; off_t largeval; - ALIGNCHECK(mh); + largeval = MPG123_LARGENAME(mpg123_seek)(mh, sampleoff, whence); val = largeval; if(val != largeval) @@ -278,7 +293,7 @@ long val; off_t largeioff; off_t largeval; - ALIGNCHECK(mh); + largeval = MPG123_LARGENAME(mpg123_feedseek)(mh, sampleoff, whence, &largeioff); /* Error/message codes are small... */ if(largeval < 0) return (long)largeval; @@ -299,7 +314,7 @@ { long val; off_t largeval; - ALIGNCHECK(mh); + largeval = MPG123_LARGENAME(mpg123_seek_frame)(mh, frameoff, whence); val = largeval; if(val != largeval) @@ -316,7 +331,7 @@ { long val; off_t largeval; - ALIGNCHECK(mh); + largeval = MPG123_LARGENAME(mpg123_timeframe)(mh, sec); val = largeval; if(val != largeval) @@ -340,7 +355,7 @@ off_t largestep; off_t *largeoffsets; struct wrap_data *whd; - ALIGNCHECK(mh); + whd = wrap_get(mh); if(whd == NULL) return MPG123_ERR; @@ -394,7 +409,7 @@ size_t i; struct wrap_data *whd; off_t *indextmp; - ALIGNCHECK(mh); + whd = wrap_get(mh); if(whd == NULL) return MPG123_ERR; @@ -432,7 +447,7 @@ off_t curframe, frameleft; long small_curframe, small_frameleft; int err; - ALIGNCHECK(mh); + err = MPG123_LARGENAME(mpg123_position)(mh, frame_offset, buffered_bytes, &curframe, &frameleft, current_seconds, seconds_left); if(err != MPG123_OK) return err; @@ -458,7 +473,7 @@ { long val; off_t largeval; - ALIGNCHECK(mh); + largeval = MPG123_LARGENAME(mpg123_length)(mh); val = largeval; if(val != largeval) @@ -474,7 +489,6 @@ /* int mpg123_set_filesize(mpg123_handle *mh, off_t size); */ int attribute_align_arg mpg123_set_filesize(mpg123_handle *mh, long size) { - ALIGNCHECK(mh); return MPG123_LARGENAME(mpg123_set_filesize)(mh, size); } @@ -497,7 +511,7 @@ #endif /* Read callback needs nothing special. */ -long wrap_read(void* handle, void *buf, size_t count) +ssize_t wrap_read(void* handle, void *buf, size_t count) { struct wrap_data *ioh = handle; switch(ioh->iotype) @@ -545,7 +559,7 @@ /* Normal reader replacement needs fallback implementations. */ -static long fallback_read(int fd, void *buf, size_t count) +static ssize_t fallback_read(int fd, void *buf, size_t count) { return read(fd, buf, count); } @@ -567,10 +581,10 @@ } /* Reader replacement prepares the hidden handle storage for next mpg123_open_fd() or plain mpg123_open(). */ -int attribute_align_arg mpg123_replace_reader(mpg123_handle *mh, long (*r_read) (int, void *, size_t), long (*r_lseek)(int, long, int) ) +int attribute_align_arg mpg123_replace_reader(mpg123_handle *mh, ssize_t (*r_read) (int, void *, size_t), long (*r_lseek)(int, long, int) ) { struct wrap_data* ioh; - ALIGNCHECK(mh); + if(mh == NULL) return MPG123_ERR; mpg123_close(mh); @@ -598,10 +612,10 @@ return MPG123_OK; } -int attribute_align_arg mpg123_replace_reader_handle(mpg123_handle *mh, long (*r_read) (void*, void *, size_t), long (*r_lseek)(void*, long, int), void (*cleanup)(void*)) +int attribute_align_arg mpg123_replace_reader_handle(mpg123_handle *mh, ssize_t (*r_read) (void*, void *, size_t), long (*r_lseek)(void*, long, int), void (*cleanup)(void*)) { struct wrap_data* ioh; - ALIGNCHECK(mh); + if(mh == NULL) return MPG123_ERR; mpg123_close(mh); @@ -628,7 +642,6 @@ int attribute_align_arg mpg123_open(mpg123_handle *mh, const char *path) { struct wrap_data* ioh; - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; @@ -681,7 +694,6 @@ int attribute_align_arg mpg123_open_fd(mpg123_handle *mh, int fd) { struct wrap_data* ioh; - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; @@ -713,7 +725,6 @@ int attribute_align_arg mpg123_open_handle(mpg123_handle *mh, void *handle) { struct wrap_data* ioh; - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; Index: lib/3rdparty/libmpg123/libmpg123.c =================================================================== --- lib/3rdparty/libmpg123/libmpg123.c (revision 62563) +++ lib/3rdparty/libmpg123/libmpg123.c (working copy) @@ -1,7 +1,7 @@ /* libmpg123: MPEG Audio Decoder library - copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1 see COPYING and AUTHORS files in distribution or http://mpg123.org */ @@ -10,73 +10,14 @@ #include "icy2utf8.h" #include "debug.h" -#ifdef GAPLESS -#define SAMPLE_ADJUST(x) ((x) - ((mh->p.flags & MPG123_GAPLESS) ? mh->begin_os : 0)) -#define SAMPLE_UNADJUST(x) ((x) + ((mh->p.flags & MPG123_GAPLESS) ? mh->begin_os : 0)) -#else -#define SAMPLE_ADJUST(x) (x) -#define SAMPLE_UNADJUST(x) (x) -#endif +#include "gapless.h" #define SEEKFRAME(mh) ((mh)->ignoreframe < 0 ? 0 : (mh)->ignoreframe) static int initialized = 0; -#include "aligncheck.h" - -#ifdef GAPLESS -/* - Take the buffer after a frame decode (strictly: it is the data from frame fr->num!) and cut samples out. - fr->buffer.fill may then be smaller than before... -*/ -static void frame_buffercheck(mpg123_handle *fr) -{ - /* When we have no accurate position, gapless code does not make sense. */ - if(!fr->accurate) return; - - /* Important: We first cut samples from the end, then cut from beginning (including left-shift of the buffer). - This order works also for the case where firstframe == lastframe. */ - - /* The last interesting (planned) frame: Only use some leading samples. - Note a difference from the below: The last frame and offset are unchanges by seeks. - The lastoff keeps being valid. */ - if(fr->lastframe > -1 && fr->num >= fr->lastframe) - { - /* There can be more than one frame of padding at the end, so we ignore the whole frame if we are beyond lastframe. */ - off_t byteoff = (fr->num == fr->lastframe) ? samples_to_bytes(fr, fr->lastoff) : 0; - if((off_t)fr->buffer.fill > byteoff) - { - fr->buffer.fill = byteoff; - } - debug1("Cut frame buffer on end of stream, fill now %"SIZE_P" bytes.", (size_p)fr->buffer.fill); - } - - /* The first interesting frame: Skip some leading samples. */ - if(fr->firstoff && fr->num == fr->firstframe) - { - off_t byteoff = samples_to_bytes(fr, fr->firstoff); - if((off_t)fr->buffer.fill > byteoff) - { - fr->buffer.fill -= byteoff; - /* buffer.p != buffer.data only for own buffer */ - debug6("cutting %li samples/%li bytes on begin, own_buffer=%i at %p=%p, buf[1]=%i", - (long)fr->firstoff, (long)byteoff, fr->own_buffer, (void*)fr->buffer.p, (void*)fr->buffer.data, ((short*)fr->buffer.p)[2]); - if(fr->own_buffer) fr->buffer.p = fr->buffer.data + byteoff; - else memmove(fr->buffer.data, fr->buffer.data + byteoff, fr->buffer.fill); - debug3("done cutting, buffer at %p =? %p, buf[1]=%i", - (void*)fr->buffer.p, (void*)fr->buffer.data, ((short*)fr->buffer.p)[2]); - } - else fr->buffer.fill = 0; - /* We can only reach this frame again by seeking. And on seeking, firstoff will be recomputed. - So it is safe to null it here (and it makes the if() decision abort earlier). */ - fr->firstoff = 0; - } -} -#endif - int attribute_align_arg mpg123_init(void) { - ALIGNCHECKK if((sizeof(short) != 2) || (sizeof(long) < 4)) return MPG123_BAD_TYPES; if(initialized) return MPG123_OK; /* no need to initialize twice */ @@ -109,17 +50,7 @@ { mpg123_handle *fr = NULL; int err = MPG123_OK; -#if (defined CCALIGN) && (defined NEED_ALIGNCHECK) && ((defined DEBUG) || (defined CHECK_ALIGN)) -#ifdef CCALIGN - double ALIGNED(16) altest[4]; - if(((size_t)altest) % 16 != 0) - { - error("Stack variable is not aligned! Your combination of compiler/library is dangerous!"); - *error = MPG123_BAD_ALIGN; - return NULL; - } -#endif -#endif + if(initialized) fr = (mpg123_handle*) malloc(sizeof(mpg123_handle)); else err = MPG123_NOT_INITIALIZED; if(fr != NULL) @@ -136,27 +67,7 @@ } if(fr != NULL) { - /* Cleanup that mess! ... use mpg123_decoder / decode_update! */ - if(frame_outbuffer(fr) != 0) - { - err = MPG123_NO_BUFFERS; - frame_exit(fr); - free(fr); - fr = NULL; - } - else - { - /* I smell cleanup here... with get_next_frame() */ -/* if(decode_update(fr) != 0) - { - err = fr->err != MPG123_OK ? fr->err : MPG123_BAD_DECODER; - frame_exit(fr); - free(fr); - fr = NULL; - } - else */ - fr->decoder_change = 1; - } + fr->decoder_change = 1; } else if(err == MPG123_OK) err = MPG123_OUT_OF_MEM; @@ -167,7 +78,7 @@ int attribute_align_arg mpg123_decoder(mpg123_handle *mh, const char* decoder) { enum optdec dt = dectype(decoder); - ALIGNCHECK(mh); + if(mh == NULL) return MPG123_ERR; if(dt == nodec) @@ -194,8 +105,7 @@ frame_exit(mh); return MPG123_ERR; } - /* I smell cleanup here... with get_next_frame() */ - decode_update(mh); + /* Do _not_ call decode_update here! That is only allowed after a first MPEG frame has been met. */ mh->decoder_change = 1; return MPG123_OK; } @@ -203,7 +113,7 @@ int attribute_align_arg mpg123_param(mpg123_handle *mh, enum mpg123_parms key, long val, double fval) { int r; - ALIGNCHECK(mh); + if(mh == NULL) return MPG123_ERR; r = mpg123_par(&mh->p, key, val, fval); if(r != MPG123_OK){ mh->err = r; r = MPG123_ERR; } @@ -216,6 +126,11 @@ if(r != MPG123_OK) mh->err = MPG123_INDEX_FAIL; } #endif +#ifndef NO_FEEDER + /* Feeder pool size is applied right away, reader will react to that. */ + if(key == MPG123_FEEDPOOL || key == MPG123_FEEDBUFFER) + bc_poolsize(&mh->rdat.buffer, mh->p.feedpool, mh->p.feedbuffer); +#endif } return r; } @@ -223,7 +138,7 @@ int attribute_align_arg mpg123_par(mpg123_pars *mp, enum mpg123_parms key, long val, double fval) { int ret = MPG123_OK; - ALIGNCHECKK + if(mp == NULL) return MPG123_BAD_PARS; switch(key) { @@ -310,6 +225,22 @@ if(val >= 0) mp->preframes = val; else ret = MPG123_BAD_VALUE; break; + case MPG123_FEEDPOOL: +#ifndef NO_FEEDER + if(val >= 0) mp->feedpool = val; + else ret = MPG123_BAD_VALUE; +#else + ret = MPG123_MISSING_FEATURE; +#endif + break; + case MPG123_FEEDBUFFER: +#ifndef NO_FEEDER + if(val > 0) mp->feedbuffer = val; + else ret = MPG123_BAD_VALUE; +#else + ret = MPG123_MISSING_FEATURE; +#endif + break; default: ret = MPG123_BAD_PARAM; } @@ -319,7 +250,7 @@ int attribute_align_arg mpg123_getparam(mpg123_handle *mh, enum mpg123_parms key, long *val, double *fval) { int r; - ALIGNCHECK(mh); + if(mh == NULL) return MPG123_ERR; r = mpg123_getpar(&mh->p, key, val, fval); if(r != MPG123_OK){ mh->err = r; r = MPG123_ERR; } @@ -329,7 +260,7 @@ int attribute_align_arg mpg123_getpar(mpg123_pars *mp, enum mpg123_parms key, long *val, double *fval) { int ret = 0; - ALIGNCHECKK + if(mp == NULL) return MPG123_BAD_PARS; switch(key) { @@ -385,6 +316,20 @@ case MPG123_PREFRAMES: *val = mp->preframes; break; + case MPG123_FEEDPOOL: +#ifndef NO_FEEDER + *val = mp->feedpool; +#else + ret = MPG123_MISSING_FEATURE; +#endif + break; + case MPG123_FEEDBUFFER: +#ifndef NO_FEEDER + *val = mp->feedbuffer; +#else + ret = MPG123_MISSING_FEATURE; +#endif + break; default: ret = MPG123_BAD_PARAM; } @@ -396,14 +341,37 @@ int ret = MPG123_OK; long theval = 0; double thefval = 0.; - ALIGNCHECK(mh); + if(mh == NULL) return MPG123_ERR; switch(key) { case MPG123_ACCURATE: - theval = mh->accurate; + theval = mh->state_flags & FRAME_ACCURATE; break; + case MPG123_FRANKENSTEIN: + theval = mh->state_flags & FRAME_FRANKENSTEIN; + break; + case MPG123_BUFFERFILL: +#ifndef NO_FEEDER + { + size_t sval = bc_fill(&mh->rdat.buffer); + theval = (long)sval; + if((size_t)theval != sval) + { + mh->err = MPG123_INT_OVERFLOW; + ret = MPG123_ERR; + } + } +#else + mh->err = MPG123_MISSING_FEATURE; + ret = MPG123_ERR; +#endif + break; + case MPG123_FRESH_DECODER: + theval = mh->state_flags & FRAME_FRESH_DECODER; + mh->state_flags &= ~FRAME_FRESH_DECODER; + break; default: mh->err = MPG123_BAD_KEY; ret = MPG123_ERR; @@ -417,7 +385,6 @@ int attribute_align_arg mpg123_eq(mpg123_handle *mh, enum mpg123_channels channel, int band, double val) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; if(band < 0 || band > 31){ mh->err = MPG123_BAD_BAND; return MPG123_ERR; } switch(channel) @@ -438,7 +405,7 @@ double attribute_align_arg mpg123_geteq(mpg123_handle *mh, enum mpg123_channels channel, int band) { double ret = 0.; - ALIGNCHECK(mh); + if(mh == NULL) return MPG123_ERR; /* Handle this gracefully. When there is no band, it has no volume. */ @@ -460,7 +427,6 @@ /* plain file access, no http! */ int attribute_align_arg mpg123_open(mpg123_handle *mh, const char *path) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; mpg123_close(mh); @@ -469,7 +435,6 @@ int attribute_align_arg mpg123_open_fd(mpg123_handle *mh, int fd) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; mpg123_close(mh); @@ -478,7 +443,6 @@ int attribute_align_arg mpg123_open_handle(mpg123_handle *mh, void *iohandle) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; mpg123_close(mh); @@ -492,7 +456,6 @@ int attribute_align_arg mpg123_open_feed(mpg123_handle *mh) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; mpg123_close(mh); @@ -500,10 +463,9 @@ } int attribute_align_arg mpg123_replace_reader( mpg123_handle *mh, - long (*r_read) (int, void *, size_t), + ssize_t (*r_read) (int, void *, size_t), off_t (*r_lseek)(int, off_t, int) ) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; mpg123_close(mh); @@ -513,11 +475,10 @@ } int attribute_align_arg mpg123_replace_reader_handle( mpg123_handle *mh, - long (*r_read) (void*, void *, size_t), + ssize_t (*r_read) (void*, void *, size_t), off_t (*r_lseek)(void*, off_t, int), void (*cleanup)(void*) ) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; mpg123_close(mh); @@ -527,11 +488,24 @@ return MPG123_OK; } +/* Update decoding engine for + a) a new choice of decoder + b) a changed native format of the MPEG stream + ... calls are only valid after parsing some MPEG frame! */ int decode_update(mpg123_handle *mh) { long native_rate; int b; - ALIGNCHECK(mh); + + if(mh->num < 0) + { + if(!(mh->p.flags & MPG123_QUIET)) error("decode_update() has been called before reading the first MPEG frame! Internal programming error."); + + mh->err = MPG123_BAD_DECODER_SETUP; + return MPG123_ERR; + } + + mh->state_flags |= FRAME_FRESH_DECODER; native_rate = frame_freq(mh); b = frame_output_format(mh); /* Select the new output format based on given constraints. */ @@ -551,7 +525,7 @@ case 2: mh->down_sample_sblimit = SBLIMIT>>(mh->down_sample); /* With downsampling I get less samples per frame */ - mh->outblock = samples_to_bytes(mh, (spf(mh)>>mh->down_sample)); + mh->outblock = outblock_bytes(mh, (mh->spf>>mh->down_sample)); break; #ifndef NO_NTOM case 3: @@ -563,10 +537,10 @@ mh->down_sample_sblimit /= frame_freq(mh); } else mh->down_sample_sblimit = SBLIMIT; - mh->outblock = mh->af.encsize * mh->af.channels * - ( ( NTOM_MUL-1+spf(mh) + mh->outblock = outblock_bytes(mh, + ( ( NTOM_MUL-1+mh->spf * (((size_t)NTOM_MUL*mh->af.rate)/frame_freq(mh)) - )/NTOM_MUL ); + )/NTOM_MUL )); } break; #endif @@ -580,6 +554,9 @@ else mh->single = (mh->p.flags & MPG123_FORCE_MONO)-1; if(set_synth_functions(mh) != 0) return -1;; + /* The needed size of output buffer may have changed. */ + if(frame_outbuffer(mh) != MPG123_OK) return -1; + do_rva(mh); debug3("done updating decoder structure with native rate %li and af.rate %li and down_sample %i", frame_freq(mh), mh->af.rate, mh->down_sample); @@ -594,13 +571,28 @@ size_t attribute_align_arg mpg123_outblock(mpg123_handle *mh) { - if(mh != NULL) return mh->outblock; + /* Try to be helpful and never return zero output block size. */ + if(mh != NULL && mh->outblock > 0) return mh->outblock; else return mpg123_safe_buffer(); } +/* Read in the next frame we actually want for decoding. + This includes skipping/ignoring frames, in additon to skipping junk in the parser. */ static int get_next_frame(mpg123_handle *mh) { int change = mh->decoder_change; + /* Ensure we got proper decoder for ignoring frames. + Header can be changed from seeking around. But be careful: Only after at + least one frame got read, decoder update makes sense. */ + if(mh->header_change > 1 && mh->num >= 0) + { + change = 1; + mh->header_change = 0; + debug("starting with big header change"); + if(decode_update(mh) < 0) + return MPG123_ERR; + } + do { int b; @@ -625,7 +617,7 @@ else if(b <= 0) { /* More sophisticated error control? */ - if(b==0 || mh->rdat.filepos == mh->rdat.filelen) + if(b==0 || (mh->rdat.filelen >= 0 && mh->rdat.filepos == mh->rdat.filelen)) { /* We simply reached the end. */ mh->track_frames = mh->num + 1; debug("What about updating/checking gapless sample count here?"); @@ -638,6 +630,11 @@ { debug("big header change"); change = 1; + mh->header_change = 0; + /* Need to update decoder structure right away since frame might need to + be decoded on next loop iteration for properly ignoring its output. */ + if(decode_update(mh) < 0) + return MPG123_ERR; } /* Now some accounting: Look at the numbers and decide if we want this frame. */ ++mh->playnum; @@ -655,13 +652,10 @@ else break; } while(1); + /* If we reach this point, we got a new frame ready to be decoded. + All other situations resulted in returns from the loop. */ if(change) { - if(decode_update(mh) < 0) /* dito... */ - return MPG123_ERR; - -debug1("new format: %i", mh->new_format); - mh->decoder_change = 0; if(mh->fresh) { @@ -684,6 +678,7 @@ } /* Assumption: A buffer full of zero samples can be constructed by repetition of this byte. + Oh, and it handles some format conversion. Only to be used by decode_the_frame() ... */ static int zero_byte(mpg123_handle *fr) { @@ -698,9 +693,10 @@ Not part of the api. This just decodes the frame and fills missing bits with zeroes. There can be frames that are broken and thus make do_layer() fail. */ -void decode_the_frame(mpg123_handle *fr) +static void decode_the_frame(mpg123_handle *fr) { - size_t needed_bytes = samples_to_bytes(fr, frame_expect_outsamples(fr)); fr->clip += (fr->do_layer)(fr); + size_t needed_bytes = decoder_synth_bytes(fr, frame_expect_outsamples(fr)); + fr->clip += (fr->do_layer)(fr); /*fprintf(stderr, "frame %"OFF_P": got %"SIZE_P" / %"SIZE_P"\n", fr->num,(size_p)fr->buffer.fill, (size_p)needed_bytes);*/ /* There could be less data than promised. Also, then debugging, we look out for coding errors that could result in _more_ data than expected. */ @@ -735,47 +731,7 @@ } } #endif - /* Handle unsigned output formats via reshifting after decode here. */ -#ifndef NO_32BIT - if(fr->af.encoding == MPG123_ENC_UNSIGNED_32) - { /* 32bit signed -> unsigned */ - size_t i; - int32_t *ssamples; - uint32_t *usamples; - ssamples = (int32_t*)fr->buffer.data; - usamples = (uint32_t*)fr->buffer.data; - debug("converting output to unsigned 32 bit integer"); - for(i=0; ibuffer.fill/sizeof(int32_t); ++i) - { - /* Different strategy since we don't have a larger type at hand. - Also watch out for silly +-1 fun because integer constants are signed in C90! */ - if(ssamples[i] >= 0) - usamples[i] = (uint32_t)ssamples[i] + 2147483647+1; - /* The smalles value goes zero. */ - else if(ssamples[i] == ((int32_t)-2147483647-1)) - usamples[i] = 0; - /* Now -value is in the positive range of signed int ... so it's a possible value at all. */ - else - usamples[i] = (uint32_t)2147483647+1 - (uint32_t)(-ssamples[i]); - } - } -#endif -#ifndef NO_16BIT - if(fr->af.encoding == MPG123_ENC_UNSIGNED_16) - { - size_t i; - short *ssamples; - unsigned short *usamples; - ssamples = (short*)fr->buffer.data; - usamples = (unsigned short*)fr->buffer.data; - debug("converting output to unsigned 16 bit integer"); - for(i=0; ibuffer.fill/sizeof(short); ++i) - { - long tmp = (long)ssamples[i]+32768; - usamples[i] = (unsigned short)tmp; - } - } -#endif + postprocess_buffer(fr); } /* @@ -792,7 +748,6 @@ */ int attribute_align_arg mpg123_framebyframe_decode(mpg123_handle *mh, off_t *num, unsigned char **audio, size_t *bytes) { - ALIGNCHECK(mh); if(bytes == NULL) return MPG123_ERR_NULL; if(audio == NULL) return MPG123_ERR_NULL; if(mh == NULL) return MPG123_BAD_HANDLE; @@ -807,10 +762,7 @@ decode_the_frame(mh); mh->to_decode = mh->to_ignore = FALSE; mh->buffer.p = mh->buffer.data; -#ifdef GAPLESS - /* This checks for individual samples to skip, for gapless mode or sample-accurate seek. */ - frame_buffercheck(mh); -#endif + FRAME_BUFFERCHECK(mh); *audio = mh->buffer.p; *bytes = mh->buffer.fill; return MPG123_OK; @@ -865,7 +817,6 @@ */ int attribute_align_arg mpg123_decode_frame(mpg123_handle *mh, off_t *num, unsigned char **audio, size_t *bytes) { - ALIGNCHECK(mh); if(bytes != NULL) *bytes = 0; if(mh == NULL) return MPG123_ERR; if(mh->buffer.size < mh->outblock) return MPG123_NO_SPACE; @@ -888,10 +839,7 @@ mh->to_decode = mh->to_ignore = FALSE; mh->buffer.p = mh->buffer.data; -#ifdef GAPLESS - /* This checks for individual samples to skip, for gapless mode or sample-accurate seek. */ - frame_buffercheck(mh); -#endif + FRAME_BUFFERCHECK(mh); if(audio != NULL) *audio = mh->buffer.p; if(bytes != NULL) *bytes = mh->buffer.fill; @@ -914,6 +862,7 @@ int attribute_align_arg mpg123_feed(mpg123_handle *mh, const unsigned char *in, size_t size) { if(mh == NULL) return MPG123_ERR; +#ifndef NO_FEEDER if(size > 0) { if(in != NULL) @@ -935,6 +884,10 @@ } } return MPG123_OK; +#else + mh->err = MPG123_MISSING_FEATURE; + return MPG123_ERR; +#endif } /* @@ -955,9 +908,10 @@ { int ret = MPG123_OK; size_t mdone = 0; - ALIGNCHECK(mh); + if(done != NULL) *done = 0; if(mh == NULL) return MPG123_ERR; +#ifndef NO_FEEDER if(inmemsize > 0 && mpg123_feed(mh, inmemory, inmemsize) != MPG123_OK) { ret = MPG123_ERR; @@ -988,9 +942,7 @@ mh->to_decode = mh->to_ignore = FALSE; mh->buffer.p = mh->buffer.data; debug2("decoded frame %li, got %li samples in buffer", (long)mh->num, (long)(mh->buffer.fill / (samples_to_bytes(mh, 1)))); -#ifdef GAPLESS - frame_buffercheck(mh); /* Seek & gapless. */ -#endif + FRAME_BUFFERCHECK(mh); } if(mh->buffer.fill) /* Copy (part of) the decoded data to the caller's buffer. */ { @@ -1014,12 +966,16 @@ decodeend: if(done != NULL) *done = mdone; return ret; +#else + mh->err = MPG123_MISSING_FEATURE; + return MPG123_ERR; +#endif } long attribute_align_arg mpg123_clip(mpg123_handle *mh) { long ret = 0; - ALIGNCHECK(mh); + if(mh != NULL) { ret = mh->clip; @@ -1028,7 +984,8 @@ return ret; } -#define track_need_init(mh) (!(mh)->to_decode && (mh)->fresh) +/* Simples: Track needs initializtion if no initial frame has been read yet. */ +#define track_need_init(mh) ((mh)->num < 0) static int init_track(mpg123_handle *mh) { @@ -1041,10 +998,48 @@ return 0; } +int attribute_align_arg mpg123_info(mpg123_handle *mh, struct mpg123_frameinfo *mi) +{ + int b; + + if(mh == NULL) return MPG123_ERR; + if(mi == NULL) + { + mh->err = MPG123_ERR_NULL; + return MPG123_ERR; + } + b = init_track(mh); + if(b < 0) return b; + + mi->version = mh->mpeg25 ? MPG123_2_5 : (mh->lsf ? MPG123_2_0 : MPG123_1_0); + mi->layer = mh->lay; + mi->rate = frame_freq(mh); + switch(mh->mode) + { + case 0: mi->mode = MPG123_M_STEREO; break; + case 1: mi->mode = MPG123_M_JOINT; break; + case 2: mi->mode = MPG123_M_DUAL; break; + case 3: mi->mode = MPG123_M_MONO; break; + default: error("That mode cannot be!"); + } + mi->mode_ext = mh->mode_ext; + mi->framesize = mh->framesize+4; /* Include header. */ + mi->flags = 0; + if(mh->error_protection) mi->flags |= MPG123_CRC; + if(mh->copyright) mi->flags |= MPG123_COPYRIGHT; + if(mh->extension) mi->flags |= MPG123_PRIVATE; + if(mh->original) mi->flags |= MPG123_ORIGINAL; + mi->emphasis = mh->emphasis; + mi->bitrate = frame_bitrate(mh); + mi->abr_rate = mh->abr_rate; + mi->vbr = mh->vbr; + return MPG123_OK; +} + int attribute_align_arg mpg123_getformat(mpg123_handle *mh, long *rate, int *channels, int *encoding) { int b; - ALIGNCHECK(mh); + if(mh == NULL) return MPG123_ERR; b = init_track(mh); if(b < 0) return b; @@ -1059,7 +1054,7 @@ off_t attribute_align_arg mpg123_timeframe(mpg123_handle *mh, double seconds) { off_t b; - ALIGNCHECK(mh); + if(mh == NULL) return MPG123_ERR; b = init_track(mh); if(b<0) return b; @@ -1076,7 +1071,6 @@ */ off_t attribute_align_arg mpg123_tell(mpg123_handle *mh) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; if(track_need_init(mh)) return 0; /* Now we have all the info at hand. */ @@ -1100,7 +1094,7 @@ pos = frame_outs(mh, mh->num+1) - bytes_to_samples(mh, mh->buffer.fill); } /* Substract padding and delay from the beginning. */ - pos = SAMPLE_ADJUST(pos); + pos = SAMPLE_ADJUST(mh,pos); /* Negative sample offsets are not right, less than nothing is still nothing. */ return pos>0 ? pos : 0; } @@ -1108,7 +1102,6 @@ off_t attribute_align_arg mpg123_tellframe(mpg123_handle *mh) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; if(mh->num < mh->firstframe) return mh->firstframe; if(mh->to_decode) return mh->num; @@ -1118,7 +1111,6 @@ off_t attribute_align_arg mpg123_tell_stream(mpg123_handle *mh) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; /* mh->rd is at least a bad_reader, so no worry. */ return mh->rd->tell(mh); @@ -1156,6 +1148,11 @@ } #endif b = mh->rd->seek_frame(mh, fnum); + if(mh->header_change > 1) + { + if(decode_update(mh) < 0) return MPG123_ERR; + mh->header_change = 0; + } debug1("seek_frame returned: %i", b); if(b<0) return b; /* Only mh->to_ignore is TRUE. */ @@ -1169,7 +1166,7 @@ { int b; off_t pos; - ALIGNCHECK(mh); + pos = mpg123_tell(mh); /* adjusted samples */ /* pos < 0 also can mean that simply a former seek failed at the lower levels. In that case, we only allow absolute seeks. */ @@ -1187,10 +1184,9 @@ /* When we do not know the end already, we can try to find it. */ if(mh->track_frames < 1 && (mh->rdat.flags & READER_SEEKABLE)) mpg123_scan(mh); + if(mh->track_frames > 0) pos = SAMPLE_ADJUST(mh,frame_outs(mh, mh->track_frames)) - sampleoff; #ifdef GAPLESS - if(mh->end_os > 0) pos = SAMPLE_ADJUST(mh->end_os) - sampleoff; -#else - if(mh->track_frames > 0) pos = SAMPLE_ADJUST(frame_outs(mh, mh->track_frames)) - sampleoff; + else if(mh->end_os > 0) pos = SAMPLE_ADJUST(mh,mh->end_os) - sampleoff; #endif else { @@ -1202,7 +1198,7 @@ } if(pos < 0) pos = 0; /* pos now holds the wanted sample offset in adjusted samples */ - frame_set_seek(mh, SAMPLE_UNADJUST(pos)); + frame_set_seek(mh, SAMPLE_UNADJUST(mh,pos)); pos = do_the_seek(mh); if(pos < 0) return pos; @@ -1219,11 +1215,12 @@ { int b; off_t pos; - ALIGNCHECK(mh); + pos = mpg123_tell(mh); /* adjusted samples */ debug3("seek from %li to %li (whence=%i)", (long)pos, (long)sampleoff, whence); /* The special seek error handling does not apply here... there is no lowlevel I/O. */ if(pos < 0) return pos; /* mh == NULL is covered in mpg123_tell() */ +#ifndef NO_FEEDER if(input_offset == NULL) { mh->err = MPG123_NULL_POINTER; @@ -1237,10 +1234,9 @@ case SEEK_CUR: pos += sampleoff; break; case SEEK_SET: pos = sampleoff; break; case SEEK_END: + if(mh->track_frames > 0) pos = SAMPLE_ADJUST(mh,frame_outs(mh, mh->track_frames)) - sampleoff; #ifdef GAPLESS - if(mh->end_os >= 0) pos = SAMPLE_ADJUST(mh->end_os) - sampleoff; -#else - if(mh->track_frames > 0) pos = SAMPLE_ADJUST(frame_outs(mh, mh->track_frames)) - sampleoff; + else if(mh->end_os >= 0) pos = SAMPLE_ADJUST(mh,mh->end_os) - sampleoff; #endif else { @@ -1251,7 +1247,7 @@ default: mh->err = MPG123_BAD_WHENCE; return MPG123_ERR; } if(pos < 0) pos = 0; - frame_set_seek(mh, SAMPLE_UNADJUST(pos)); + frame_set_seek(mh, SAMPLE_UNADJUST(mh,pos)); pos = SEEKFRAME(mh); mh->buffer.fill = 0; @@ -1267,6 +1263,10 @@ feedseekend: return mpg123_tell(mh); +#else + mh->err = MPG123_MISSING_FEATURE; + return MPG123_ERR; +#endif } off_t attribute_align_arg mpg123_seek_frame(mpg123_handle *mh, off_t offset, int whence) @@ -1273,7 +1273,7 @@ { int b; off_t pos = 0; - ALIGNCHECK(mh); + if(mh == NULL) return MPG123_ERR; if((b=init_track(mh)) < 0) return b; @@ -1296,8 +1296,7 @@ return MPG123_ERR; } if(pos < 0) pos = 0; - /* Hm, do we need to seek right past the end? */ - else if(mh->track_frames > 0 && pos >= mh->track_frames) pos = mh->track_frames; + /* Not limiting the possible position on end for the chance that there might be more to the stream than announced via track_frames. */ frame_set_frameseek(mh, pos); pos = do_the_seek(mh); @@ -1308,7 +1307,6 @@ int attribute_align_arg mpg123_set_filesize(mpg123_handle *mh, off_t size) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; mh->rdat.filelen = size; @@ -1319,17 +1317,17 @@ { int b; off_t length; - ALIGNCHECK(mh); + if(mh == NULL) return MPG123_ERR; b = init_track(mh); if(b<0) return b; if(mh->track_samples > -1) length = mh->track_samples; - else if(mh->track_frames > 0) length = mh->track_frames*spf(mh); + else if(mh->track_frames > 0) length = mh->track_frames*mh->spf; else if(mh->rdat.filelen > 0) /* Let the case of 0 length just fall through. */ { /* A bad estimate. Ignoring tags 'n stuff. */ double bpf = mh->mean_framesize ? mh->mean_framesize : compute_bpf(mh); - length = (off_t)((double)(mh->rdat.filelen)/bpf*spf(mh)); + length = (off_t)((double)(mh->rdat.filelen)/bpf*mh->spf); } else if(mh->rdat.filelen == 0) return mpg123_tell(mh); /* we could be in feeder mode */ else return MPG123_ERR; /* No length info there! */ @@ -1338,15 +1336,7 @@ length = frame_ins2outs(mh, length); debug1("mpg123_length: external sample length: %"OFF_P, (off_p)length); -#ifdef GAPLESS - if(mh->p.flags & MPG123_GAPLESS) - { - debug2("mpg123_length: begin_os = %"OFF_P", end_os = %"OFF_P, (off_p)mh->begin_os, (off_p)mh->end_os); - if(mh->end_os > 0 && length > mh->end_os) length = mh->end_os; - length -= mh->begin_os; - debug1("mpg123_length: after gapless correction: %"OFF_P, (off_p)length); - } -#endif + length = SAMPLE_ADJUST(mh,length); return length; } @@ -1353,9 +1343,10 @@ int attribute_align_arg mpg123_scan(mpg123_handle *mh) { int b; - off_t backframe; - int to_decode, to_ignore; - ALIGNCHECK(mh); + off_t oldpos; + off_t track_frames = 0; + off_t track_samples = 0; + if(mh == NULL) return MPG123_ERR; if(!(mh->rdat.flags & READER_SEEKABLE)){ mh->err = MPG123_NO_SEEK; return MPG123_ERR; } /* Scan through the _whole_ file, since the current position is no count but computed assuming constant samples per frame. */ @@ -1367,30 +1358,28 @@ if(b == MPG123_DONE) return MPG123_OK; else return MPG123_ERR; /* Must be error here, NEED_MORE is not for seekable streams. */ } - backframe = mh->num; - to_decode = mh->to_decode; - to_ignore = mh->to_ignore; + oldpos = mpg123_tell(mh); b = mh->rd->seek_frame(mh, 0); if(b<0 || mh->num != 0) return MPG123_ERR; /* One frame must be there now. */ - mh->track_frames = 1; - mh->track_samples = spf(mh); /* Internal samples. */ - debug("TODO: We should disable gapless code when encountering inconsistent spf(mh)!"); + track_frames = 1; + track_samples = mh->spf; /* Internal samples. */ + debug("TODO: We should disable gapless code when encountering inconsistent mh->spf!"); + debug(" ... at least unset MPG123_ACCURATE."); + /* Do not increment mh->track_frames in the loop as tha would confuse Frankenstein detection. */ while(read_frame(mh) == 1) { - ++mh->track_frames; - mh->track_samples += spf(mh); + ++track_frames; + track_samples += mh->spf; } + mh->track_frames = track_frames; + mh->track_samples = track_samples; debug2("Scanning yielded %"OFF_P" track samples, %"OFF_P" frames.", (off_p)mh->track_samples, (off_p)mh->track_frames); #ifdef GAPLESS /* Also, think about usefulness of that extra value track_samples ... it could be used for consistency checking. */ - frame_gapless_update(mh, mh->track_samples); -#endif - b = mh->rd->seek_frame(mh, backframe); - if(b<0 || mh->num != backframe) return MPG123_ERR; - mh->to_decode = to_decode; - mh->to_ignore = to_ignore; - return MPG123_OK; + if(mh->p.flags & MPG123_GAPLESS) frame_gapless_update(mh, mh->track_samples); +#endif + return mpg123_seek(mh, oldpos, SEEK_SET) >= 0 ? MPG123_OK : MPG123_ERR; } int attribute_align_arg mpg123_meta_check(mpg123_handle *mh) @@ -1399,9 +1388,16 @@ else return 0; } +void attribute_align_arg mpg123_meta_free(mpg123_handle *mh) +{ + if(mh == NULL) return; + + reset_id3(mh); + reset_icy(&mh->icy); +} + int attribute_align_arg mpg123_id3(mpg123_handle *mh, mpg123_id3v1 **v1, mpg123_id3v2 **v2) { - ALIGNCHECK(mh); if(v1 != NULL) *v1 = NULL; if(v2 != NULL) *v2 = NULL; if(mh == NULL) return MPG123_ERR; @@ -1425,7 +1421,6 @@ int attribute_align_arg mpg123_icy(mpg123_handle *mh, char **icy_meta) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; #ifndef NO_ICY if(icy_meta == NULL) @@ -1448,11 +1443,6 @@ #endif } -/* - Simple utility functions that do not possibly call code with extra alignment requirements do not use the ALIGNCHECK. - I am aware of the chance that the compiler could have introduced such code outside assembly functions, but such a modern compiler (gcc) can also honour attribute_align_arg. -*/ - char* attribute_align_arg mpg123_icy2utf8(const char* icy_text) { #ifndef NO_ICY @@ -1527,7 +1517,6 @@ int attribute_align_arg mpg123_index(mpg123_handle *mh, off_t **offsets, off_t *step, size_t *fill) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; if(offsets == NULL || step == NULL || fill == NULL) { @@ -1548,7 +1537,6 @@ int attribute_align_arg mpg123_set_index(mpg123_handle *mh, off_t *offsets, off_t step, size_t fill) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; #ifdef FRAME_INDEX if(step == 0) @@ -1570,10 +1558,11 @@ int attribute_align_arg mpg123_close(mpg123_handle *mh) { - ALIGNCHECK(mh); if(mh == NULL) return MPG123_ERR; - if(mh->rd != NULL && mh->rd->close != NULL) mh->rd->close(mh); - mh->rd = NULL; + + /* mh->rd is never NULL! */ + if(mh->rd->close != NULL) mh->rd->close(mh); + if(mh->new_format) { debug("Hey, we are closing a track before the new format has been queried..."); @@ -1640,6 +1629,7 @@ ,"Low-level seeking has failed (call to lseek(), usually)." ,"Custom I/O obviously not prepared." ,"Overflow in LFS (large file support) conversion." + ,"Overflow in integer conversion." }; const char* attribute_align_arg mpg123_plain_strerror(int errcode) Index: lib/3rdparty/libmpg123/ntom.c =================================================================== --- lib/3rdparty/libmpg123/ntom.c (revision 62563) +++ lib/3rdparty/libmpg123/ntom.c (working copy) @@ -51,11 +51,11 @@ ntm = NTOM_MUL>>1; /* for frame 0 */ for(f=0; f 0 */ { - ntm += spf(fr)*fr->ntom_step; + ntm += fr->spf*fr->ntom_step; ntm -= (ntm/NTOM_MUL)*NTOM_MUL; } #else /* Just make one computation with overall sample offset. */ - ntm = (NTOM_MUL>>1) + spf(fr)*frame*fr->ntom_step; + ntm = (NTOM_MUL>>1) + fr->spf*frame*fr->ntom_step; ntm -= (ntm/NTOM_MUL)*NTOM_MUL; #endif return (unsigned long) ntm; @@ -74,7 +74,7 @@ { /* The do this before decoding the separate channels, so there is only one common ntom value. */ int ntm = fr->ntom_val[0]; - ntm += spf(fr)*fr->ntom_step; + ntm += fr->spf*fr->ntom_step; return ntm/NTOM_MUL; } @@ -90,12 +90,12 @@ if(frame <= 0) return 0; for(f=0; fntom_step; + ntm += fr->spf*fr->ntom_step; soff += ntm/NTOM_MUL; ntm -= (ntm/NTOM_MUL)*NTOM_MUL; } #else - soff = (ntm + frame*(off_t)spf(fr)*(off_t)fr->ntom_step)/(off_t)NTOM_MUL; + soff = (ntm + frame*(off_t)fr->spf*(off_t)fr->ntom_step)/(off_t)NTOM_MUL; #endif return soff; } @@ -107,7 +107,7 @@ off_t ntm = ntom_val(fr,0); #ifdef SAFE_NTOM { - off_t block = spf(fr); + off_t block = fr->spf; if(ins <= 0) return 0; do { @@ -135,7 +135,7 @@ if(soff <= 0) return 0; for(ioff=0; 1; ++ioff) { - ntm += spf(fr)*fr->ntom_step; + ntm += fr->spf*fr->ntom_step; if(ntm/NTOM_MUL > soff) break; soff -= ntm/NTOM_MUL; ntm -= (ntm/NTOM_MUL)*NTOM_MUL; @@ -143,6 +143,6 @@ return ioff; #else ioff = (soff*(off_t)NTOM_MUL-ntm)/(off_t)fr->ntom_step; - return ioff/(off_t)spf(fr); + return ioff/(off_t)fr->spf; #endif } Index: lib/3rdparty/libmpg123/optimize.c =================================================================== --- lib/3rdparty/libmpg123/optimize.c (revision 62563) +++ lib/3rdparty/libmpg123/optimize.c (working copy) @@ -8,53 +8,17 @@ Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect. */ +#define I_AM_OPTIMIZE #include "mpg123lib_intern.h" /* includes optimize.h */ #include "debug.h" -/* Must match the enum dectype! */ - -/* - It SUCKS having to define these names that way, but compile-time intialization of string arrays is a bitch. - GCC doesn't see constant stuff when it's wiggling in front of it! - Anyhow: Have a script for that: -names="generic generic_dither i386 i486 i586 i586_dither MMX 3DNow 3DNowExt AltiVec SSE x86-64" -for i in $names; do echo "##define dn_${i/-/_} \"$i\""; done -echo -n "static const char* decname[] = -{ - \"auto\" - " -for i in $names; do echo -n ", dn_${i/-/_}"; done -echo " - , \"nodec\" -};" -*/ -#define dn_generic "generic" -#define dn_generic_dither "generic_dither" -#define dn_i386 "i386" -#define dn_i486 "i486" -#define dn_i586 "i586" -#define dn_i586_dither "i586_dither" -#define dn_MMX "MMX" -#define dn_3DNow "3DNow" -#define dn_3DNowExt "3DNowExt" -#define dn_AltiVec "AltiVec" -#define dn_SSE "SSE" -#define dn_x86_64 "x86-64" -#define dn_ARM "ARM" -static const char* decname[] = -{ - "auto" - , dn_generic, dn_generic_dither, dn_i386, dn_i486, dn_i586, dn_i586_dither, dn_MMX, dn_3DNow, dn_3DNowExt, dn_AltiVec, dn_SSE, dn_x86_64, dn_ARM - , "nodec" -}; - -#if (defined OPT_X86) && (defined OPT_MULTI) +#if ((defined OPT_X86) || (defined OPT_X86_64) || (defined OPT_NEON)) && (defined OPT_MULTI) #include "getcpuflags.h" -struct cpuflags cpu_flags; +static struct cpuflags cpu_flags; #else /* Faking stuff for non-multi builds. The same code for synth function choice is used. Just no runtime dependency of result... */ -char cpu_flags; +#define cpu_flags nothing #define cpu_i586(s) 1 #define cpu_fpu(s) 1 #define cpu_mmx(s) 1 @@ -63,6 +27,8 @@ #define cpu_sse(s) 1 #define cpu_sse2(s) 1 #define cpu_sse3(s) 1 +#define cpu_avx(s) 1 +#define cpu_neon(s) 1 #endif /* Ugly macros to build conditional synth function array values. */ @@ -73,6 +39,8 @@ #define IF8(synth) #endif +#ifndef NO_SYNTH32 + #ifndef NO_REAL #define IFREAL(synth) synth, #else @@ -85,6 +53,13 @@ #define IF32(synth) #endif +#else + +#define IFREAL(synth) +#define IF32(synth) + +#endif + #ifndef NO_16BIT # define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) } #else @@ -93,7 +68,7 @@ /* The call of left and right plain synth, wrapped. This may be replaced by a direct stereo optimized synth. */ -int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) +static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) { int clip; clip = (fr->synth)(bandPtr_l, 0, fr, 0); @@ -101,7 +76,7 @@ return clip; } -const struct synth_s synth_base = +static const struct synth_s synth_base = { { /* plain */ OUT_SYNTHS(synth_1to1, synth_1to1_8bit, synth_1to1_real, synth_1to1_s32) @@ -124,13 +99,13 @@ # endif }, { /* mono2stereo */ - OUT_SYNTHS(synth_1to1_mono2stereo, synth_1to1_8bit_mono2stereo, synth_1to1_real_mono2stereo, synth_1to1_s32_mono2stereo) + OUT_SYNTHS(synth_1to1_m2s, synth_1to1_8bit_m2s, synth_1to1_real_m2s, synth_1to1_s32_m2s) # ifndef NO_DOWNSAMPLE - ,OUT_SYNTHS(synth_2to1_mono2stereo, synth_2to1_8bit_mono2stereo, synth_2to1_real_mono2stereo, synth_2to1_s32_mono2stereo) - ,OUT_SYNTHS(synth_4to1_mono2stereo, synth_4to1_8bit_mono2stereo, synth_4to1_real_mono2stereo, synth_4to1_s32_mono2stereo) + ,OUT_SYNTHS(synth_2to1_m2s, synth_2to1_8bit_m2s, synth_2to1_real_m2s, synth_2to1_s32_m2s) + ,OUT_SYNTHS(synth_4to1_m2s, synth_4to1_8bit_m2s, synth_4to1_real_m2s, synth_4to1_s32_m2s) # endif # ifndef NO_NTOM - ,OUT_SYNTHS(synth_ntom_mono2stereo, synth_ntom_8bit_mono2stereo, synth_ntom_real_mono2stereo, synth_ntom_s32_mono2stereo) + ,OUT_SYNTHS(synth_ntom_m2s, synth_ntom_8bit_m2s, synth_ntom_real_m2s, synth_ntom_s32_m2s) # endif }, { /* mono*/ @@ -165,10 +140,19 @@ enum optcla decclass(const enum optdec type) { - return (type == mmx || type == sse || type == dreidnowext || type == x86_64 ) ? mmxsse : normal; + return + ( + type == mmx + || type == sse + || type == sse_vintage + || type == dreidnowext + || type == dreidnowext_vintage + || type == x86_64 + || type == neon + || type == avx + ) ? mmxsse : normal; } - static int find_synth(func_synth synth, const func_synth synths[r_limit][f_limit]) { enum synth_resample ri; @@ -181,6 +165,25 @@ return FALSE; } + +#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) +/* After knowing that it is either vintage or current SSE, + this separates the two. In case of non-OPT_MULTI, only one + of OPT_SSE and OPT_SSE_VINTAGE is active. */ +static enum optdec sse_or_vintage(mpg123_handle *fr) +{ + enum optdec type; + type = sse_vintage; +# ifdef OPT_SSE +# ifdef OPT_MULTI + if(fr->cpu_opts.the_dct36 == dct36_sse) +# endif + type = sse; +# endif + return type; +} +#endif + /* Determine what kind of decoder is actually active This depends on runtime choices which may cause fallback to i386 or generic code. */ static int find_dectype(mpg123_handle *fr) @@ -198,14 +201,35 @@ if(FALSE) ; /* Just to initialize the else if ladder. */ #ifndef NO_16BIT -#ifdef OPT_3DNOWEXT - else if(basic_synth == synth_1to1_3dnowext) type = dreidnowext; +#if defined(OPT_3DNOWEXT) || defined(OPT_3DNOWEXT_VINTAGE) + else if(basic_synth == synth_1to1_3dnowext) + { + type = dreidnowext; +# ifdef OPT_3DNOWEXT_VINTAGE +# ifdef OPT_MULTI + if(fr->cpu_opts.the_dct36 == dct36_3dnowext) +# endif + type = dreidnowext_vintage; +# endif + } #endif -#ifdef OPT_SSE - else if(basic_synth == synth_1to1_sse) type = sse; +#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) + else if(basic_synth == synth_1to1_sse) + { + type = sse_or_vintage(fr); + } #endif -#ifdef OPT_3DNOW - else if(basic_synth == synth_1to1_3dnow) type = dreidnow; +#if defined(OPT_3DNOW) || defined(OPT_3DNOW_VINTAGE) + else if(basic_synth == synth_1to1_3dnow) + { + type = dreidnow; +# ifdef OPT_3DNOW_VINTAGE +# ifdef OPT_MULTI + if(fr->cpu_opts.the_dct36 == dct36_3dnow) +# endif + type = dreidnow_vintage; +# endif + } #endif #ifdef OPT_MMX else if(basic_synth == synth_1to1_mmx) type = mmx; @@ -222,9 +246,15 @@ #ifdef OPT_X86_64 else if(basic_synth == synth_1to1_x86_64) type = x86_64; #endif +#ifdef OPT_AVX + else if(basic_synth == synth_1to1_avx) type = avx; +#endif #ifdef OPT_ARM else if(basic_synth == synth_1to1_arm) type = arm; #endif +#ifdef OPT_NEON + else if(basic_synth == synth_1to1_neon) type = neon; +#endif #ifdef OPT_GENERIC_DITHER else if(basic_synth == synth_1to1_dither) type = generic_dither; #endif @@ -239,31 +269,53 @@ #endif #endif /* 16bit */ +#ifndef NO_SYNTH32 + #ifndef NO_REAL -#ifdef OPT_SSE - else if(basic_synth == synth_1to1_real_sse) type = sse; +#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) + else if(basic_synth == synth_1to1_real_sse) + { + type = sse_or_vintage(fr); + } #endif #ifdef OPT_X86_64 else if(basic_synth == synth_1to1_real_x86_64) type = x86_64; #endif +#ifdef OPT_AVX + else if(basic_synth == synth_1to1_real_avx) type = avx; +#endif #ifdef OPT_ALTIVEC else if(basic_synth == synth_1to1_real_altivec) type = altivec; #endif +#ifdef OPT_NEON + else if(basic_synth == synth_1to1_real_neon) type = neon; +#endif #endif /* real */ #ifndef NO_32BIT -#ifdef OPT_SSE - else if(basic_synth == synth_1to1_s32_sse) type = sse; +#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) + else if(basic_synth == synth_1to1_s32_sse) + { + type = sse_or_vintage(fr); + } #endif #ifdef OPT_X86_64 else if(basic_synth == synth_1to1_s32_x86_64) type = x86_64; #endif +#ifdef OPT_AVX + else if(basic_synth == synth_1to1_s32_avx) type = avx; +#endif #ifdef OPT_ALTIVEC else if(basic_synth == synth_1to1_s32_altivec) type = altivec; #endif +#ifdef OPT_NEON + else if(basic_synth == synth_1to1_s32_neon) type = neon; +#endif #endif /* 32bit */ +#endif /* any 32 bit synth */ + #ifdef OPT_X86 else if(find_synth(basic_synth, plain_i386)) type = idrei; @@ -306,19 +358,20 @@ /* Select the basic output format, different from 16bit: 8bit, real. */ if(FALSE){} #ifndef NO_16BIT - else if(fr->af.encoding & MPG123_ENC_16) + else if(fr->af.dec_enc & MPG123_ENC_16) basic_format = f_16; #endif #ifndef NO_8BIT - else if(fr->af.encoding & MPG123_ENC_8) + else if(fr->af.dec_enc & MPG123_ENC_8) basic_format = f_8; #endif #ifndef NO_REAL - else if(fr->af.encoding & MPG123_ENC_FLOAT) + else if(fr->af.dec_enc & MPG123_ENC_FLOAT) basic_format = f_real; #endif #ifndef NO_32BIT - else if(fr->af.encoding & MPG123_ENC_32) + /* 24 bit integer means decoding to 32 bit first. */ + else if(fr->af.dec_enc & MPG123_ENC_32 || fr->af.dec_enc & MPG123_ENC_24) basic_format = f_32; #endif @@ -396,7 +449,10 @@ # endif # ifdef ACCURATE_ROUNDING && fr->cpu_opts.type != sse + && fr->cpu_opts.type != sse_vintage && fr->cpu_opts.type != x86_64 + && fr->cpu_opts.type != neon + && fr->cpu_opts.type != avx # endif ) { @@ -452,28 +508,31 @@ #endif fr->cpu_opts.type = nodec; - /* covers any i386+ cpu; they actually differ only in the synth_1to1 function, mostly... */ -#ifdef OPT_X86 - #ifdef OPT_MULTI #ifndef NO_LAYER3 -#if (defined OPT_3DNOW || defined OPT_3DNOWEXT) +#if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX) fr->cpu_opts.the_dct36 = dct36; #endif #endif #endif - + /* covers any i386+ cpu; they actually differ only in the synth_1to1 function, mostly... */ +#ifdef OPT_X86 if(cpu_i586(cpu_flags)) { # ifdef OPT_MULTI debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext); # endif - #ifdef OPT_SSE +# ifdef OPT_SSE if( !done && (auto_choose || want_dec == sse) && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) ) { - chosen = "SSE"; + chosen = dn_sse; fr->cpu_opts.type = sse; +#ifdef OPT_MULTI +# ifndef NO_LAYER3 + /* if(cpu_fast_sse(cpu_flags)) */ fr->cpu_opts.the_dct36 = dct36_sse; +# endif +#endif # ifndef NO_16BIT fr->synths.plain[r_1to1][f_16] = synth_1to1_sse; # ifdef ACCURATE_ROUNDING @@ -490,15 +549,52 @@ # endif done = 1; } - #endif +# endif +# ifdef OPT_SSE_VINTAGE + if( !done && (auto_choose || want_dec == sse_vintage) + && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) ) + { + chosen = dn_sse_vintage; + fr->cpu_opts.type = sse_vintage; +# ifndef NO_16BIT + fr->synths.plain[r_1to1][f_16] = synth_1to1_sse; +# ifdef ACCURATE_ROUNDING + fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse; +# endif +# endif +# ifndef NO_REAL + fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse; + fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse; +# endif +# ifndef NO_32BIT + fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse; + fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse; +# endif + done = 1; + } +# endif # ifdef OPT_3DNOWEXT - if( !done && (auto_choose || want_dec == dreidnowext ) + if( !done && (auto_choose || want_dec == dreidnowext) && cpu_3dnow(cpu_flags) && cpu_3dnowext(cpu_flags) && cpu_mmx(cpu_flags) ) { - chosen = "3DNowExt"; + chosen = dn_dreidnowext; fr->cpu_opts.type = dreidnowext; +# ifndef NO_16BIT + fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext; +# endif + done = 1; + } +# endif +# ifdef OPT_3DNOWEXT_VINTAGE + if( !done && (auto_choose || want_dec == dreidnowext_vintage) + && cpu_3dnow(cpu_flags) + && cpu_3dnowext(cpu_flags) + && cpu_mmx(cpu_flags) ) + { + chosen = dn_dreidnowext_vintage; + fr->cpu_opts.type = dreidnowext_vintage; #ifdef OPT_MULTI # ifndef NO_LAYER3 fr->cpu_opts.the_dct36 = dct36_3dnowext; @@ -509,13 +605,25 @@ # endif done = 1; } - #endif - #ifdef OPT_3DNOW +# endif +# ifdef OPT_3DNOW if( !done && (auto_choose || want_dec == dreidnow) && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) ) { - chosen = "3DNow"; + chosen = dn_dreidnow; fr->cpu_opts.type = dreidnow; +# ifndef NO_16BIT + fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow; +# endif + done = 1; + } +# endif +# ifdef OPT_3DNOW_VINTAGE + if( !done && (auto_choose || want_dec == dreidnow_vintage) + && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) ) + { + chosen = dn_dreidnow_vintage; + fr->cpu_opts.type = dreidnow_vintage; #ifdef OPT_MULTI # ifndef NO_LAYER3 fr->cpu_opts.the_dct36 = dct36_3dnow; @@ -526,12 +634,12 @@ # endif done = 1; } - #endif +# endif #ifdef OPT_MMX if( !done && (auto_choose || want_dec == mmx) && cpu_mmx(cpu_flags) ) { - chosen = "MMX"; + chosen = dn_mmx; fr->cpu_opts.type = mmx; # ifndef NO_16BIT fr->synths.plain[r_1to1][f_16] = synth_1to1_mmx; @@ -572,7 +680,7 @@ But still... here it is... maybe for real use in future. */ if(!done && (auto_choose || want_dec == ivier)) { - chosen = "i486"; + chosen = dn_ivier; fr->cpu_opts.type = ivier; done = 1; } @@ -580,7 +688,7 @@ #ifdef OPT_I386 if(!done && (auto_choose || want_dec == idrei)) { - chosen = "i386"; + chosen = dn_idrei; fr->cpu_opts.type = idrei; done = 1; } @@ -601,7 +709,7 @@ { fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap; fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono; - fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_mono2stereo; + fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s; } # endif # endif @@ -615,11 +723,42 @@ #endif /* OPT_X86 */ +#ifdef OPT_AVX + if(!done && (auto_choose || want_dec == avx) && cpu_avx(cpu_flags)) + { + chosen = "x86-64 (AVX)"; + fr->cpu_opts.type = avx; +#ifdef OPT_MULTI +# ifndef NO_LAYER3 + fr->cpu_opts.the_dct36 = dct36_avx; +# endif +#endif +# ifndef NO_16BIT + fr->synths.plain[r_1to1][f_16] = synth_1to1_avx; + fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_avx; +# endif +# ifndef NO_REAL + fr->synths.plain[r_1to1][f_real] = synth_1to1_real_avx; + fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_avx; +# endif +# ifndef NO_32BIT + fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_avx; + fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_avx; +# endif + done = 1; + } +#endif + #ifdef OPT_X86_64 if(!done && (auto_choose || want_dec == x86_64)) { chosen = "x86-64 (SSE)"; fr->cpu_opts.type = x86_64; +#ifdef OPT_MULTI +# ifndef NO_LAYER3 + fr->cpu_opts.the_dct36 = dct36_x86_64; +# endif +#endif # ifndef NO_16BIT fr->synths.plain[r_1to1][f_16] = synth_1to1_x86_64; fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_x86_64; @@ -636,27 +775,10 @@ } #endif -#ifdef OPT_GENERIC_DITHER - if(!done && (auto_choose || want_dec == generic_dither)) - { - chosen = "dithered generic"; - fr->cpu_opts.type = generic_dither; - dithered = TRUE; -# ifndef NO_16BIT - fr->synths.plain[r_1to1][f_16] = synth_1to1_dither; -# ifndef NO_DOWNSAMPLE - fr->synths.plain[r_2to1][f_16] = synth_2to1_dither; - fr->synths.plain[r_4to1][f_16] = synth_4to1_dither; -# endif -# endif - done = 1; - } -#endif - # ifdef OPT_ALTIVEC if(!done && (auto_choose || want_dec == altivec)) { - chosen = "AltiVec"; + chosen = dn_altivec; fr->cpu_opts.type = altivec; # ifndef NO_16BIT fr->synths.plain[r_1to1][f_16] = synth_1to1_altivec; @@ -674,10 +796,31 @@ } # endif +# ifdef OPT_NEON + if(!done && (auto_choose || want_dec == neon) && cpu_neon(cpu_flags)) + { + chosen = dn_neon; + fr->cpu_opts.type = neon; +# ifndef NO_16BIT + fr->synths.plain[r_1to1][f_16] = synth_1to1_neon; + fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_neon; +# endif +# ifndef NO_REAL + fr->synths.plain[r_1to1][f_real] = synth_1to1_real_neon; + fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_neon; +# endif +# ifndef NO_32BIT + fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_neon; + fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_neon; +# endif + done = 1; + } +# endif + # ifdef OPT_ARM if(!done && (auto_choose || want_dec == arm)) { - chosen = "ARM"; + chosen = dn_arm; fr->cpu_opts.type = arm; # ifndef NO_16BIT fr->synths.plain[r_1to1][f_16] = synth_1to1_arm; @@ -689,12 +832,29 @@ # ifdef OPT_GENERIC if(!done && (auto_choose || want_dec == generic)) { - chosen = "generic"; + chosen = dn_generic; fr->cpu_opts.type = generic; done = 1; } # endif +#ifdef OPT_GENERIC_DITHER + if(!done && (auto_choose || want_dec == generic_dither)) + { + chosen = "dithered generic"; + fr->cpu_opts.type = generic_dither; + dithered = TRUE; +# ifndef NO_16BIT + fr->synths.plain[r_1to1][f_16] = synth_1to1_dither; +# ifndef NO_DOWNSAMPLE + fr->synths.plain[r_2to1][f_16] = synth_2to1_dither; + fr->synths.plain[r_4to1][f_16] = synth_4to1_dither; +# endif +# endif + done = 1; + } +#endif + fr->cpu_opts.class = decclass(fr->cpu_opts.type); # ifndef NO_8BIT @@ -706,7 +866,7 @@ { fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap; fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono; - fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_mono2stereo; + fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s; } # endif # endif @@ -756,12 +916,21 @@ #ifdef OPT_SSE NULL, #endif + #ifdef OPT_SSE_VINTAGE + NULL, + #endif #ifdef OPT_3DNOWEXT NULL, #endif + #ifdef OPT_3DNOWEXT_VINTAGE + NULL, + #endif #ifdef OPT_3DNOW NULL, #endif + #ifdef OPT_3DNOW_VINTAGE + NULL, + #endif #ifdef OPT_MMX NULL, #endif @@ -780,6 +949,9 @@ #ifdef OPT_ALTIVEC NULL, #endif + #ifdef OPT_AVX + NULL, + #endif #ifdef OPT_X86_64 NULL, #endif @@ -786,6 +958,9 @@ #ifdef OPT_ARM NULL, #endif + #ifdef OPT_NEON + NULL, + #endif #ifdef OPT_GENERIC_FLOAT NULL, #endif @@ -802,38 +977,53 @@ static const char *mpg123_decoder_list[] = { #ifdef OPT_SSE - dn_SSE, + dn_sse, #endif + #ifdef OPT_SSE_VINTAGE + dn_sse_vintage, + #endif #ifdef OPT_3DNOWEXT - dn_3DNowExt, + dn_dreidnowext, #endif + #ifdef OPT_3DNOWEXT_VINTAGE + dn_dreidnowext_vintage, + #endif #ifdef OPT_3DNOW - dn_3DNow, + dn_dreidnow, #endif + #ifdef OPT_3DNOW_VINTAGE + dn_dreidnow_vintage, + #endif #ifdef OPT_MMX - dn_MMX, + dn_mmx, #endif #ifdef OPT_I586 - dn_i586, + dn_ifuenf, #endif #ifdef OPT_I586_DITHER - dn_i586_dither, + dn_ifuenf_dither, #endif #ifdef OPT_I486 - dn_i486, + dn_ivier, #endif #ifdef OPT_I386 - dn_i386, + dn_idrei, #endif #ifdef OPT_ALTIVEC - dn_AltiVec, + dn_altivec, #endif + #ifdef OPT_AVX + dn_avx, + #endif #ifdef OPT_X86_64 dn_x86_64, #endif #ifdef OPT_ARM - dn_ARM, + dn_arm, #endif + #ifdef OPT_NEON + dn_neon, + #endif #ifdef OPT_GENERIC dn_generic, #endif @@ -850,54 +1040,71 @@ return; #else const char **d = mpg123_supported_decoder_list; +#if (defined OPT_X86) || (defined OPT_X86_64) || (defined OPT_NEON) + getcpuflags(&cpu_flags); +#endif #ifdef OPT_X86 - getcpuflags(&cpu_flags); if(cpu_i586(cpu_flags)) { /* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2"); if(cpu_sse3(cpu_flags)) printf(" SSE3"); */ #ifdef OPT_SSE - if(cpu_sse(cpu_flags)) *(d++) = decname[sse]; + if(cpu_sse(cpu_flags)) *(d++) = dn_sse; #endif +#ifdef OPT_SSE_VINTAGE + if(cpu_sse(cpu_flags)) *(d++) = dn_sse_vintage; +#endif #ifdef OPT_3DNOWEXT - if(cpu_3dnowext(cpu_flags)) *(d++) = decname[dreidnowext]; + if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext; #endif +#ifdef OPT_3DNOWEXT_VINTAGE + if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext_vintage; +#endif #ifdef OPT_3DNOW - if(cpu_3dnow(cpu_flags)) *(d++) = decname[dreidnow]; + if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow; #endif +#ifdef OPT_3DNOW_VINTAGE + if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow_vintage; +#endif #ifdef OPT_MMX - if(cpu_mmx(cpu_flags)) *(d++) = decname[mmx]; + if(cpu_mmx(cpu_flags)) *(d++) = dn_mmx; #endif #ifdef OPT_I586 - *(d++) = decname[ifuenf]; + *(d++) = dn_ifuenf; #endif #ifdef OPT_I586_DITHER - *(d++) = decname[ifuenf_dither]; + *(d++) = dn_ifuenf_dither; #endif } #endif /* just assume that the i486 built is run on a i486 cpu... */ #ifdef OPT_I486 - *(d++) = decname[ivier]; + *(d++) = dn_ivier; #endif #ifdef OPT_ALTIVEC - *(d++) = decname[altivec]; + *(d++) = dn_altivec; #endif /* every supported x86 can do i386, any cpu can do generic */ #ifdef OPT_I386 - *(d++) = decname[idrei]; + *(d++) = dn_idrei; #endif +#ifdef OPT_AVX + if(cpu_avx(cpu_flags)) *(d++) = dn_avx; +#endif #ifdef OPT_X86_64 - *(d++) = decname[x86_64]; + *(d++) = dn_x86_64; #endif #ifdef OPT_ARM - *(d++) = decname[arm]; + *(d++) = dn_arm; #endif +#ifdef OPT_NEON + if(cpu_neon(cpu_flags)) *(d++) = dn_neon; +#endif #ifdef OPT_GENERIC - *(d++) = decname[generic]; + *(d++) = dn_generic; #endif #ifdef OPT_GENERIC_DITHER - *(d++) = decname[generic_dither]; + *(d++) = dn_generic_dither; #endif #endif /* ndef OPT_MULTI */ } Index: lib/3rdparty/libmpg123/parse.c =================================================================== --- lib/3rdparty/libmpg123/parse.c (revision 62563) +++ lib/3rdparty/libmpg123/parse.c (working copy) @@ -1,7 +1,7 @@ /* parse: spawned from common; clustering around stream/frame parsing - copyright ?-2009 by the mpg123 project - free software under the terms of the LGPL 2.1 + copyright ?-2014 by the mpg123 project - free software under the terms of the LGPL 2.1 see COPYING and AUTHORS files in distribution or http://mpg123.org initially written by Michael Hipp & Thomas Orgis */ @@ -28,44 +28,24 @@ #endif #define TRACK_MAX_FRAMES ULONG_MAX/4/1152 +#include "mpeghead.h" + #include "debug.h" #define bsbufid(fr) (fr)->bsbuf==(fr)->bsspace[0] ? 0 : ((fr)->bsbuf==fr->bsspace[1] ? 1 : ( (fr)->bsbuf==(fr)->bsspace[0]+512 ? 2 : ((fr)->bsbuf==fr->bsspace[1]+512 ? 3 : -1) ) ) -/* - AAAAAAAA AAABBCCD EEEEFFGH IIJJKLMM - A: sync - B: mpeg version - C: layer - D: CRC - E: bitrate - F:sampling rate - G: padding - H: private - I: channel mode - J: mode ext - K: copyright - L: original - M: emphasis +/* PARSE_GOOD and PARSE_BAD have to be 1 and 0 (TRUE and FALSE), others can vary. */ +enum parse_codes +{ + PARSE_MORE = MPG123_NEED_MORE + ,PARSE_ERR = MPG123_ERR + ,PARSE_END = 10 /* No more audio data to find. */ + ,PARSE_GOOD = 1 /* Everything's fine. */ + ,PARSE_BAD = 0 /* Not fine (invalid data). */ + ,PARSE_RESYNC = 2 /* Header not good, go into resync. */ + ,PARSE_AGAIN = 3 /* Really start over, throw away and read a new header, again. */ +}; - old compare mask 0xfffffd00: - 11111111 11111111 11111101 00000000 - - means: everything must match excluding padding and channel mode, ext mode, ... - But a vbr stream's headers will differ in bitrate! - We are already strict in allowing only frames of same type in stream, we should at least watch out for VBR while being strict. - - So a better mask is: - 11111111 11111111 00001101 00000000 - - Even more, I'll allow varying crc bit. - 11111111 11111110 00001101 00000000 - - (still unsure about this private bit) -*/ -#define HDRCMPMASK 0xfffe0d00 -#define HDRSAMPMASK 0xc00 /* 1100 00000000, FF bits (sample rate) */ - /* bitrates for [mpeg1/2][layer] */ static const int tabsel_123[2][3][16] = { @@ -81,11 +61,15 @@ } }; -const long freqs[9] = { 44100, 48000, 32000, 22050, 24000, 16000 , 11025 , 12000 , 8000 }; +static const long freqs[9] = { 44100, 48000, 32000, 22050, 24000, 16000 , 11025 , 12000 , 8000 }; -static int decode_header(mpg123_handle *fr,unsigned long newhead); +static int decode_header(mpg123_handle *fr,unsigned long newhead, int *freeformat_count); +static int skip_junk(mpg123_handle *fr, unsigned long *newheadp, long *headcount); +static int do_readahead(mpg123_handle *fr, unsigned long newhead); +static int wetwork(mpg123_handle *fr, unsigned long *newheadp); /* These two are to be replaced by one function that gives all the frame parameters (for outsiders).*/ +/* Those functions are unsafe regarding bad arguments (inside the mpg123_handle), but just returning anything would also be unsafe, the caller code has to be trusted. */ int frame_bitrate(mpg123_handle *fr) { @@ -97,24 +81,21 @@ return freqs[fr->sampling_frequency]; } -#define free_format_header(head) ( ((head & 0xffe00000) == 0xffe00000) && ((head>>17)&3) && (((head>>12)&0xf) == 0x0) && (((head>>10)&0x3) != 0x3 )) - /* compiler is smart enought to inline this one or should I really do it as macro...? */ -int head_check(unsigned long head) +static int head_check(unsigned long head) { if ( - /* first 11 bits are set to 1 for frame sync */ - ((head & 0xffe00000) != 0xffe00000) + ((head & HDR_SYNC) != HDR_SYNC) || /* layer: 01,10,11 is 1,2,3; 00 is reserved */ - (!((head>>17)&3)) + (!(HDR_LAYER_VAL(head))) || /* 1111 means bad bitrate */ - (((head>>12)&0xf) == 0xf) + (HDR_BITRATE_VAL(head) == 0xf) || /* sampling freq: 11 is reserved */ - (((head>>10)&0x3) == 0x3 ) + (HDR_SAMPLERATE_VAL(head) == 0x3) /* here used to be a mpeg 2.5 check... re-enabled 2.5 decoding due to lack of evidence that it is really not good */ ) { @@ -139,14 +120,10 @@ I hope that ensuring all zeros until tag start is enough. */ int lame_offset = (fr->stereo == 2) ? (fr->lsf ? 17 : 32 ) : (fr->lsf ? 9 : 17); - /* At least skip the decoder delay. */ -#ifdef GAPLESS - if(fr->p.flags & MPG123_GAPLESS) - { - if(fr->begin_s == 0) frame_gapless_init(fr, GAPLESS_DELAY, 0); - } -#endif + if(fr->p.flags & MPG123_IGNORE_INFOFRAME) return 0; + + /* Note: CRC or not, that does not matter here. */ if(fr->framesize >= 120+lame_offset) /* traditional Xing header is 120 bytes */ { int i; @@ -203,23 +180,13 @@ } else { - /* - In theory, one should use that value for skipping... - When I know the exact number of samples I could simply count in flush_output, - but that's problematic with seeking and such. - I still miss the real solution for detecting the end. - */ fr->track_frames = (off_t) make_long(fr->bsbuf, lame_offset); if(fr->track_frames > TRACK_MAX_FRAMES) fr->track_frames = 0; /* endless stream? */ - #ifdef GAPLESS - /* if no further info there, remove/add at least the decoder delay */ +#ifdef GAPLESS + /* All or nothing: Only if encoder delay/padding is known we'll cut samples for gapless. */ if(fr->p.flags & MPG123_GAPLESS) - { - off_t length = fr->track_frames * spf(fr); - if(length > 1) - frame_gapless_init(fr, GAPLESS_DELAY, length+GAPLESS_DELAY); - } - #endif + frame_gapless_init(fr, fr->track_frames, 0, 0); +#endif if(VERBOSE3) fprintf(stderr, "Note: Xing: %lu frames\n", (long unsigned)fr->track_frames); } @@ -274,7 +241,6 @@ lame_offset += 4; } /* I guess that either 0 or LAME extra data follows */ - /* there may this crc16 be floating around... (?) */ if(fr->bsbuf[lame_offset] != 0) { unsigned char lame_vbr; @@ -287,8 +253,23 @@ if(VERBOSE3) fprintf(stderr, "Note: Info: Encoder: %s\n", nb); if(!strncmp("LAME", nb, 4)) { - gain_offset = 6; - debug("TODO: finish lame detetcion..."); + /* Lame versions before 3.95.1 used 83 dB reference level, later versions 89 dB. + We stick with 89 dB as being "normal", adding 6 dB. */ + unsigned int major, minor; + char rest[6]; + rest[0] = 0; + if(sscanf(nb+4, "%u.%u%s", &major, &minor, rest) >= 2) + { + debug3("LAME: %u/%u/%s", major, minor, rest); + /* We cannot detect LAME 3.95 reliably (same version string as 3.95.1), so this is a blind spot. + Everything < 3.95 is safe, though. */ + if(major < 3 || (major == 3 && minor < 95)) /* || (major == 3 && minor == 95 && rest[0] == 0)) */ + { + gain_offset = 6; + if(VERBOSE3) fprintf(stderr, "Note: Info: Old LAME detected, using ReplayGain preamp of %f dB.\n", gain_offset); + } + } + else if(VERBOSE3) fprintf(stderr, "Note: Info: Cannot determine LAME version.\n"); } lame_offset += 9; /* the 4 big bits are tag revision, the small bits vbr method */ @@ -348,7 +329,10 @@ else if(gt == 2) gt = 1; /* audiophile */ else continue; /* get the 9 bits into a number, divide by 10, multiply sign... happy bit banging */ - replay_gain[0] = (float) ((fr->bsbuf[lame_offset] & 0x2) ? -0.1 : 0.1) * (make_short(fr->bsbuf, lame_offset) & 0x1f); + replay_gain[gt] = (float) ((fr->bsbuf[lame_offset] & 0x2) ? -0.1 : 0.1) * (make_short(fr->bsbuf, lame_offset) & 0x1ff); + /* If this is an automatic value from LAME (or whatever), the automatic gain offset applies. + If a user or whoever set the value, do not touch it! 011 is automatic origin. */ + if(origin == 3) replay_gain[gt] += gain_offset; } lame_offset += 2; } @@ -381,13 +365,9 @@ #ifdef GAPLESS if(fr->p.flags & MPG123_GAPLESS) { - off_t length = fr->track_frames * spf(fr); - off_t skipbegin = GAPLESS_DELAY + ((((int) fr->bsbuf[lame_offset]) << 4) | (((int) fr->bsbuf[lame_offset+1]) >> 4)); - off_t skipend = -GAPLESS_DELAY + (((((int) fr->bsbuf[lame_offset+1]) << 8) | (((int) fr->bsbuf[lame_offset+2]))) & 0xfff); - debug3("preparing gapless mode for layer3: length %lu, skipbegin %lu, skipend %lu", - (long unsigned)length, (long unsigned)skipbegin, (long unsigned)skipend); - if(length > 1) - frame_gapless_init(fr, skipbegin, (skipend < length) ? length-skipend : length); + off_t skipbegin = ((((int) fr->bsbuf[lame_offset]) << 4) | (((int) fr->bsbuf[lame_offset+1]) >> 4)); + off_t skipend = (((((int) fr->bsbuf[lame_offset+1]) << 8) | (((int) fr->bsbuf[lame_offset+2]))) & 0xfff); + frame_gapless_init(fr, fr->track_frames, skipbegin, skipend); } #endif } @@ -404,30 +384,29 @@ /* Just tell if the header is some mono. */ static int header_mono(unsigned long newhead) { - return ((newhead>>6)&0x3) == MPG_MD_MONO ? TRUE : FALSE; + return HDR_CHANNEL_VAL(newhead) == MPG_MD_MONO ? TRUE : FALSE; } -/* - That's a big one: read the next frame. 1 is success, <= 0 is some error - Special error READER_MORE means: Please feed more data and try again. -*/ -int read_frame(mpg123_handle *fr) +/* true if the two headers will work with the same decoding routines */ +static int head_compatible(unsigned long fred, unsigned long bret) { - /* TODO: rework this thing */ - unsigned long newhead; - off_t framepos; - int ret; - /* stuff that needs resetting if complete frame reading fails */ - int oldsize = fr->framesize; - int oldphase = fr->halfphase; + return ( (fred & HDR_CMPMASK) == (bret & HDR_CMPMASK) + && header_mono(fred) == header_mono(bret) ); +} - /* The counter for the search-first-header loop. - It is persistent outside the loop to prevent seemingly endless loops - when repeatedly headers are found that do not have valid followup headers. */ - int headcount = 0; +static void halfspeed_prepare(mpg123_handle *fr) +{ + /* save for repetition */ + if(fr->p.halfspeed && fr->lay == 3) + { + debug("halfspeed - reusing old bsbuf "); + memcpy (fr->ssave, fr->bsbuf, fr->ssize); + } +} - fr->fsizeold=fr->framesize; /* for Layer3 */ - +/* If this returns 1, the next frame is the repetition. */ +static int halfspeed_do(mpg123_handle *fr) +{ /* Speed-down hack: Play it again, Sam (the frame, I mean). */ if (fr->p.halfspeed) { @@ -447,7 +426,46 @@ fr->halfphase = fr->p.halfspeed - 1; } } + return 0; +} +/* + Temporary macro until we got this worked out. + Idea is to filter out special return values that shall trigger direct jumps to end / resync / read again. + Particularily, the generic ret==PARSE_BAD==0 and ret==PARSE_GOOD==1 are not affected. +*/ +#define JUMP_CONCLUSION(ret) \ +{ \ +if(ret < 0){ debug1("%s", ret == MPG123_NEED_MORE ? "need more" : "read error"); goto read_frame_bad; } \ +else if(ret == PARSE_AGAIN) goto read_again; \ +else if(ret == PARSE_RESYNC) goto init_resync; \ +else if(ret == PARSE_END){ ret=0; goto read_frame_bad; } \ +} + +/* + That's a big one: read the next frame. 1 is success, <= 0 is some error + Special error READER_MORE means: Please feed more data and try again. +*/ +int read_frame(mpg123_handle *fr) +{ + /* TODO: rework this thing */ + int freeformat_count = 0; + unsigned long newhead; + off_t framepos; + int ret; + /* stuff that needs resetting if complete frame reading fails */ + int oldsize = fr->framesize; + int oldphase = fr->halfphase; + + /* The counter for the search-first-header loop. + It is persistent outside the loop to prevent seemingly endless loops + when repeatedly headers are found that do not have valid followup headers. */ + long headcount = 0; + + fr->fsizeold=fr->framesize; /* for Layer3 */ + + if(halfspeed_do(fr) == 1) return 1; + read_again: /* In case we are looping to find a valid frame, discard any buffered data before the current position. This is essential to prevent endless looping, always going back to the beginning when feeder buffer is exhausted. */ @@ -454,269 +472,40 @@ if(fr->rd->forget != NULL) fr->rd->forget(fr); debug2("trying to get frame %"OFF_P" at %"OFF_P, (off_p)fr->num+1, (off_p)fr->rd->tell(fr)); - if((ret = fr->rd->head_read(fr,&newhead)) <= 0){ debug("need more?"); goto read_frame_bad;} + if((ret = fr->rd->head_read(fr,&newhead)) <= 0){ debug1("need more? (%i)", ret); goto read_frame_bad;} init_resync: - fr->header_change = 2; /* output format change is possible... */ - if(fr->oldhead) /* check a following header for change */ +#ifdef SKIP_JUNK + if(!fr->firsthead && !head_check(newhead)) { - if(fr->oldhead == newhead) fr->header_change = 0; - else - /* If they have the same sample rate. Note that only is _not_ the case for the first header, as we enforce sample rate match for following frames. - So, during one stream, only change of stereoness is possible and indicated by header_change == 2. */ - if((fr->oldhead & HDRSAMPMASK) == (newhead & HDRSAMPMASK)) - { - /* Now if both channel modes are mono or both stereo, it's no big deal. */ - if( header_mono(fr->oldhead) == header_mono(newhead)) - fr->header_change = 1; - } + ret = skip_junk(fr, &newhead, &headcount); + JUMP_CONCLUSION(ret); } - -#ifdef SKIP_JUNK - /* watch out for junk/tags on beginning of stream by invalid header */ - if(!fr->firsthead && !head_check(newhead)) { - - /* check for id3v2; first three bytes (of 4) are "ID3" */ - if((newhead & (unsigned long) 0xffffff00) == (unsigned long) 0x49443300) - { - int id3ret = 0; - id3ret = parse_new_id3(fr, newhead); - if (id3ret < 0){ debug("need more?"); ret = id3ret; goto read_frame_bad; } -#ifndef NO_ID3V2 - else if(id3ret > 0){ debug("got ID3v2"); fr->metaflags |= MPG123_NEW_ID3|MPG123_ID3; } - else debug("no useful ID3v2"); #endif - fr->oldhead = 0; - goto read_again; /* Also in case of invalid ID3 tag (ret==0), try to get on track again. */ - } - else if(VERBOSE2 && fr->silent_resync == 0) fprintf(stderr,"Note: Junk at the beginning (0x%08lx)\n",newhead); + ret = head_check(newhead); + if(ret) ret = decode_header(fr, newhead, &freeformat_count); - /* I even saw RIFF headers at the beginning of MPEG streams ;( */ - if(newhead == ('R'<<24)+('I'<<16)+('F'<<8)+'F') { - if(VERBOSE2 && fr->silent_resync == 0) fprintf(stderr, "Note: Looks like a RIFF header.\n"); - - if((ret=fr->rd->head_read(fr,&newhead))<=0){ debug("need more?"); goto read_frame_bad; } - - while(newhead != ('d'<<24)+('a'<<16)+('t'<<8)+'a') - { - if((ret=fr->rd->head_shift(fr,&newhead))<=0){ debug("need more?"); goto read_frame_bad; } - } - if((ret=fr->rd->head_read(fr,&newhead))<=0){ debug("need more?"); goto read_frame_bad; } - - if(VERBOSE2 && fr->silent_resync == 0) fprintf(stderr,"Note: Skipped RIFF header!\n"); - - fr->oldhead = 0; - goto read_again; - } - /* unhandled junk... just continue search for a header */ - /* step in byte steps through next 64K */ - debug("searching for header..."); - - ret = 0; /* We will check the value after the loop. */ - for(; headcount<65536; headcount++) - { - if((ret=fr->rd->head_shift(fr,&newhead))<=0){ debug("need more?"); goto read_frame_bad; } - /* if(head_check(newhead)) */ - if(head_check(newhead) && (ret=decode_header(fr, newhead))) - break; - } - if(ret<0){ debug("need more?"); goto read_frame_bad; } - - if(headcount == 65536) - { - if(NOQUIET) error("Giving up searching valid MPEG header after (over) 64K of junk."); - return 0; - } - else debug1("hopefully found one at %"OFF_P, (off_p)fr->rd->tell(fr)); - /* - * should we additionaly check, whether a new frame starts at - * the next expected position? (some kind of read ahead) - * We could implement this easily, at least for files. - */ + JUMP_CONCLUSION(ret); /* That only continues for ret == PARSE_BAD or PARSE_GOOD. */ + if(ret == PARSE_BAD) + { /* Header was not good. */ + ret = wetwork(fr, &newhead); /* Messy stuff, handle junk, resync ... */ + JUMP_CONCLUSION(ret); + /* Normally, we jumped already. If for some reason everything's fine to continue, do continue. */ + if(ret != PARSE_GOOD) goto read_frame_bad; } -#endif - /* first attempt of read ahead check to find the real first header; cannot believe what junk is out there! */ - if(!fr->firsthead && fr->rdat.flags & (READER_SEEKABLE|READER_BUFFERED) && head_check(newhead) && (ret=decode_header(fr, newhead))) + if(!fr->firsthead) { - unsigned long nexthead = 0; - int hd = 0; - off_t start = fr->rd->tell(fr); - if(ret<0){ debug("need more?"); goto read_frame_bad; } - - debug2("doing ahead check with BPF %d at %"OFF_P, fr->framesize+4, (off_p)start); - /* step framesize bytes forward and read next possible header*/ - if((ret=fr->rd->skip_bytes(fr, fr->framesize))<0) - { - if(ret==READER_ERROR && NOQUIET) error("cannot seek!"); - goto read_frame_bad; - } - hd = fr->rd->head_read(fr,&nexthead); - if(hd==MPG123_NEED_MORE){ debug("need more?"); ret = hd; goto read_frame_bad; } - if((ret=fr->rd->back_bytes(fr, fr->rd->tell(fr)-start))<0) - { - if(ret==READER_ERROR && NOQUIET) error("cannot seek!"); - else debug("need more?"); - goto read_frame_bad; - } - debug1("After fetching next header, at %"OFF_P, (off_p)fr->rd->tell(fr)); - if(!hd) - { - if(NOQUIET) warning("cannot read next header, a one-frame stream? Duh..."); - } - else - { - debug2("does next header 0x%08lx match first 0x%08lx?", nexthead, newhead); - /* not allowing free format yet */ - if(!head_check(nexthead) || (nexthead & HDRCMPMASK) != (newhead & HDRCMPMASK)) - { - debug("No, the header was not valid, start from beginning..."); - fr->oldhead = 0; /* start over */ - /* try next byte for valid header */ - if((ret=fr->rd->back_bytes(fr, 3))<0) - { - if(NOQUIET) error("cannot seek!"); - else debug("need more?"); - goto read_frame_bad; - } - goto read_again; - } - } + ret = do_readahead(fr, newhead); + /* readahead can fail mit NEED_MORE, in which case we must also make the just read header available again for next go */ + if(ret < 0) fr->rd->back_bytes(fr, 4); + JUMP_CONCLUSION(ret); } - /* why has this head check been avoided here before? */ - if(!head_check(newhead)) - { - /* and those ugly ID3 tags */ - if((newhead & 0xffffff00) == ('T'<<24)+('A'<<16)+('G'<<8)) - { - fr->id3buf[0] = (unsigned char) ((newhead >> 24) & 0xff); - fr->id3buf[1] = (unsigned char) ((newhead >> 16) & 0xff); - fr->id3buf[2] = (unsigned char) ((newhead >> 8) & 0xff); - fr->id3buf[3] = (unsigned char) ( newhead & 0xff); - if((ret=fr->rd->fullread(fr,fr->id3buf+4,124)) < 0){ debug("need more?"); goto read_frame_bad; } - fr->metaflags |= MPG123_NEW_ID3|MPG123_ID3; - fr->rdat.flags |= READER_ID3TAG; /* that marks id3v1 */ - if (VERBOSE3) fprintf(stderr,"Note: Skipped ID3v1 tag.\n"); - goto read_again; - } - /* duplicated code from above! */ - /* check for id3v2; first three bytes (of 4) are "ID3" */ - if((newhead & (unsigned long) 0xffffff00) == (unsigned long) 0x49443300) - { - int id3length = 0; - id3length = parse_new_id3(fr, newhead); - if(id3length < 0){ debug("need more?"); ret = id3length; goto read_frame_bad; } + /* Now we should have our valid header and proceed to reading the frame. */ - fr->metaflags |= MPG123_NEW_ID3|MPG123_ID3; - goto read_again; - } - else if(NOQUIET && fr->silent_resync == 0) - { - fprintf(stderr,"Note: Illegal Audio-MPEG-Header 0x%08lx at offset %"OFF_P".\n", - newhead, (off_p)fr->rd->tell(fr)-4); - } - - if(NOQUIET && (newhead & 0xffffff00) == ('b'<<24)+('m'<<16)+('p'<<8)) fprintf(stderr,"Note: Could be a BMP album art.\n"); - /* Do resync if not forbidden by flag. - I used to have a check for not-icy-meta here, but concluded that the desync issues came from a reader bug, not the stream. */ - if( !(fr->p.flags & MPG123_NO_RESYNC) ) - { - long try = 0; - long limit = fr->p.resync_limit; - - /* If a resync is needed the bitreservoir of previous frames is no longer valid */ - fr->bitreservoir = 0; - - /* TODO: make this more robust, I'd like to cat two mp3 fragments together (in a dirty way) and still have mpg123 beign able to decode all it somehow. */ - if(NOQUIET && fr->silent_resync == 0) fprintf(stderr, "Note: Trying to resync...\n"); - /* Read more bytes until we find something that looks - reasonably like a valid header. This is not a - perfect strategy, but it should get us back on the - track within a short time (and hopefully without - too much distortion in the audio output). */ - do - { - ++try; - if(limit >= 0 && try >= limit) break; - - if((ret=fr->rd->head_shift(fr,&newhead)) <= 0) - { - debug("need more?"); - if(NOQUIET) fprintf (stderr, "Note: Hit end of (available) data during resync.\n"); - - goto read_frame_bad; - } - if(VERBOSE3) debug3("resync try %li at %"OFF_P", got newhead 0x%08lx", try, (off_p)fr->rd->tell(fr), newhead); - - if(!fr->oldhead) - { - debug("going to init_resync..."); - goto init_resync; /* "considered harmful", eh? */ - } - /* we should perhaps collect a list of valid headers that occured in file... there can be more */ - /* Michael's new resync routine seems to work better with the one frame readahead (and some input buffering?) */ - } while - ( - !head_check(newhead) /* Simply check for any valid header... we have the readahead to get it straight now(?) */ - /* (newhead & HDRCMPMASK) != (fr->oldhead & HDRCMPMASK) - && (newhead & HDRCMPMASK) != (fr->firsthead & HDRCMPMASK)*/ - ); - /* too many false positives - }while (!(head_check(newhead) && decode_header(fr, newhead))); */ - if(NOQUIET && fr->silent_resync == 0) fprintf (stderr, "Note: Skipped %li bytes in input.\n", try); - - if(limit >= 0 && try >= limit) - { - if(NOQUIET) - error1("Giving up resync after %li bytes - your stream is not nice... (maybe increasing resync limit could help).", try); - - fr->err = MPG123_RESYNC_FAIL; - return READER_ERROR; - } - else - { - debug1("Found valid header 0x%lx... unsetting firsthead to reinit stream.", newhead); - fr->firsthead = 0; - goto init_resync; - } - } - else - { - if(NOQUIET) error("not attempting to resync..."); - - fr->err = MPG123_OUT_OF_SYNC; - return READER_ERROR; - } - } - - /* Man, that code looks awfully redundant... - I need to untangle the spaghetti here in a future version. */ - if(!fr->firsthead) - { - ret=decode_header(fr,newhead); - if(ret == 0) - { - if(NOQUIET) error("decode header failed before first valid one, going to read again"); - - goto read_again; - } - else if(ret < 0){ debug("need more?"); goto read_frame_bad; } - } - else - { - ret=decode_header(fr,newhead); - if(ret == 0) - { - if(NOQUIET) error("decode header failed - goto resync"); - /* return 0; */ - goto init_resync; - } - else if(ret < 0){ debug("need more?"); goto read_frame_bad; } - } - /* if filepos is invalid, so is framepos */ framepos = fr->rd->tell(fr) - 4; /* flip/init buffer for Layer 3 */ @@ -770,19 +559,29 @@ ++fr->num; /* 0 for first frame! */ debug4("Frame %"OFF_P" %08lx %i, next filepos=%"OFF_P, (off_p)fr->num, newhead, fr->framesize, (off_p)fr->rd->tell(fr)); - - /* save for repetition */ - if(fr->p.halfspeed && fr->lay == 3) + if(!(fr->state_flags & FRAME_FRANKENSTEIN) && ( + (fr->track_frames > 0 && fr->num >= fr->track_frames) +#ifdef GAPLESS + || (fr->gapless_frames > 0 && fr->num >= fr->gapless_frames) +#endif + )) { - debug("halfspeed - reusing old bsbuf "); - memcpy (fr->ssave, fr->bsbuf, fr->ssize); + fr->state_flags |= FRAME_FRANKENSTEIN; + if(NOQUIET) fprintf(stderr, "\nWarning: Encountered more data after announced end of track (frame %"OFF_P"/%"OFF_P"). Frankenstein!\n", (off_p)fr->num, +#ifdef GAPLESS + fr->gapless_frames > 0 ? (off_p)fr->gapless_frames : +#endif + (off_p)fr->track_frames); } + halfspeed_prepare(fr); + /* index the position */ + fr->input_offset = framepos; #ifdef FRAME_INDEX /* Keep track of true frame positions in our frame index. but only do so when we are sure that the frame number is accurate... */ - if(fr->accurate && FI_NEXT(fr->index, fr->num)) + if((fr->state_flags & FRAME_ACCURATE) && FI_NEXT(fr->index, fr->num)) fi_add(&fr->index, framepos); #endif @@ -793,9 +592,46 @@ fr->to_decode = fr->to_ignore = TRUE; if(fr->error_protection) fr->crc = getbits(fr, 16); /* skip crc */ + /* + Let's check for header change after deciding that the new one is good + and actually having read a frame. + + header_change > 1: decoder structure has to be updated + Preserve header_change value from previous runs if it is serious. + If we still have a big change pending, it should be dealt with outside, + fr->header_change set to zero afterwards. + */ + if(fr->header_change < 2) + { + fr->header_change = 2; /* output format change is possible... */ + if(fr->oldhead) /* check a following header for change */ + { + if(fr->oldhead == newhead) fr->header_change = 0; + else + /* Headers that match in this test behave the same for the outside world. + namely: same decoding routines, same amount of decoded data. */ + if(head_compatible(fr->oldhead, newhead)) + fr->header_change = 1; + else + { + fr->state_flags |= FRAME_FRANKENSTEIN; + if(NOQUIET) + fprintf(stderr, "\nWarning: Big change (MPEG version, layer, rate). Frankenstein stream?\n"); + } + } + else if(fr->firsthead && !head_compatible(fr->firsthead, newhead)) + { + fr->state_flags |= FRAME_FRANKENSTEIN; + if(NOQUIET) + fprintf(stderr, "\nWarning: Big change from first (MPEG version, layer, rate). Frankenstein stream?\n"); + } + } + + fr->oldhead = newhead; + return 1; read_frame_bad: - /* Also if we searched for valid data in vain, we can forget skipped data. + /* Also if we searched for valid data in vein, we can forget skipped data. Otherwise, the feeder would hold every dead old byte in memory until the first valid frame! */ if(fr->rd->forget != NULL) fr->rd->forget(fr); @@ -811,9 +647,9 @@ /* * read ahead and find the next MPEG header, to guess framesize * return value: success code - * 1: found a valid frame size (stored in the handle). + * PARSE_GOOD: found a valid frame size (stored in the handle). * <0: error codes, possibly from feeder buffer (NEED_MORE) - * 0: cannot get the framesize for some reason and shall silentry try the next possible header (if this is no free format stream after all...) + * PARSE_BAD: cannot get the framesize for some reason and shall silentry try the next possible header (if this is no free format stream after all...) */ static int guess_freeformat_framesize(mpg123_handle *fr) { @@ -824,48 +660,26 @@ { if(NOQUIET) error("Cannot look for freeformat frame size with non-seekable and non-buffered stream!"); - return 0; + return PARSE_BAD; } if((ret=fr->rd->head_read(fr,&head))<=0) return ret; /* We are already 4 bytes into it */ -/* fix that limit to be absolute for the first header search! */ - for(i=4;i<65536;i++) { - if((ret=fr->rd->head_shift(fr,&head))<=0) + for(i=4;ird->head_shift(fr,&head))<=0) return ret; + + /* No head_check needed, the mask contains all relevant bits. */ + if((head & HDR_SAMEMASK) == (fr->oldhead & HDR_SAMEMASK)) { - return ret; + fr->rd->back_bytes(fr,i+1); + fr->framesize = i-3; + return PARSE_GOOD; /* Success! */ } - if(head_check(head)) - { - int sampling_frequency,mpeg25,lsf; - - if(head & (1<<20)) - { - lsf = (head & (1<<19)) ? 0x0 : 0x1; - mpeg25 = 0; - } - else - { - lsf = 1; - mpeg25 = 1; - } - - if(mpeg25) - sampling_frequency = 6 + ((head>>10)&0x3); - else - sampling_frequency = ((head>>10)&0x3) + (lsf*3); - - if((lsf==fr->lsf) && (mpeg25==fr->mpeg25) && (sampling_frequency == fr->sampling_frequency)) - { - fr->rd->back_bytes(fr,i+1); - fr->framesize = i-3; - return 1; /* Success! */ - } - } } fr->rd->back_bytes(fr,i); - return 0; + return PARSE_BAD; } @@ -876,64 +690,51 @@ * 1: success * 0: no valid header * <0: some error + * You are required to do a head_check() before calling! */ -static int decode_header(mpg123_handle *fr,unsigned long newhead) +static int decode_header(mpg123_handle *fr,unsigned long newhead, int *freeformat_count) { +#ifdef DEBUG /* Do not waste cycles checking the header twice all the time. */ if(!head_check(newhead)) { - if(NOQUIET) error("tried to decode obviously invalid header"); + error1("trying to decode obviously invalid header 0x%08lx", newhead); + } +#endif + /* For some reason, the layer and sampling freq settings used to be wrapped + in a weird conditional including MPG123_NO_RESYNC. What was I thinking? + This information has to be consistent. */ + fr->lay = 4 - HDR_LAYER_VAL(newhead); - return 0; - } - if( newhead & (1<<20) ) + if(HDR_VERSION_VAL(newhead) & 0x2) { - fr->lsf = (newhead & (1<<19)) ? 0x0 : 0x1; + fr->lsf = (HDR_VERSION_VAL(newhead) & 0x1) ? 0 : 1; fr->mpeg25 = 0; + fr->sampling_frequency = HDR_SAMPLERATE_VAL(newhead) + (fr->lsf*3); } else { fr->lsf = 1; fr->mpeg25 = 1; + fr->sampling_frequency = 6 + HDR_SAMPLERATE_VAL(newhead); } - if( (fr->p.flags & MPG123_NO_RESYNC) || !fr->oldhead - || (((fr->oldhead>>19)&0x3) ^ ((newhead>>19)&0x3)) ) - { - /* If "tryresync" is false, assume that certain - parameters do not change within the stream! - Force an update if lsf or mpeg25 settings - have changed. */ - fr->lay = 4-((newhead>>17)&3); - if( ((newhead>>10)&0x3) == 0x3) - { - if(NOQUIET) error("Stream error"); - - return 0; /* exit() here really is too much, isn't it? */ - } - if(fr->mpeg25) - fr->sampling_frequency = 6 + ((newhead>>10)&0x3); - else - fr->sampling_frequency = ((newhead>>10)&0x3) + (fr->lsf*3); - } - #ifdef DEBUG - if((((newhead>>16)&0x1)^0x1) != fr->error_protection) debug("changed crc bit!"); + /* seen a file where this varies (old lame tag without crc, track with crc) */ + if((HDR_CRC_VAL(newhead)^0x1) != fr->error_protection) debug("changed crc bit!"); #endif - fr->error_protection = ((newhead>>16)&0x1)^0x1; /* seen a file where this varies (old lame tag without crc, track with crc) */ - fr->bitrate_index = ((newhead>>12)&0xf); - fr->padding = ((newhead>>9)&0x1); - fr->extension = ((newhead>>8)&0x1); - fr->mode = ((newhead>>6)&0x3); - fr->mode_ext = ((newhead>>4)&0x3); - fr->copyright = ((newhead>>3)&0x1); - fr->original = ((newhead>>2)&0x1); - fr->emphasis = newhead & 0x3; - fr->freeformat = free_format_header(newhead); + fr->error_protection = HDR_CRC_VAL(newhead)^0x1; + fr->bitrate_index = HDR_BITRATE_VAL(newhead); + fr->padding = HDR_PADDING_VAL(newhead); + fr->extension = HDR_PRIVATE_VAL(newhead); + fr->mode = HDR_CHANNEL_VAL(newhead); + fr->mode_ext = HDR_CHANEX_VAL(newhead); + fr->copyright = HDR_COPYRIGHT_VAL(newhead); + fr->original = HDR_ORIGINAL_VAL(newhead); + fr->emphasis = HDR_EMPHASIS_VAL(newhead); + fr->freeformat = !(newhead & HDR_BITRATE); - fr->stereo = (fr->mode == MPG_MD_MONO) ? 1 : 2; + fr->stereo = (fr->mode == MPG_MD_MONO) ? 1 : 2; - fr->oldhead = newhead; - /* we can't use tabsel_123 for freeformat, so trying to guess framesize... */ if(fr->freeformat) { @@ -941,8 +742,14 @@ if(fr->freeformat_framesize < 0) { int ret; + *freeformat_count += 1; + if(*freeformat_count > 5) + { + if(VERBOSE3) error("You fooled me too often. Refusing to guess free format frame size _again_."); + return PARSE_BAD; + } ret = guess_freeformat_framesize(fr); - if(ret>0) + if(ret == PARSE_GOOD) { fr->freeformat_framesize = fr->framesize - fr->padding; if(VERBOSE2) @@ -952,8 +759,9 @@ { if(ret == MPG123_NEED_MORE) debug("Need more data to guess free format frame size."); - else + else if(VERBOSE3) error("Encountered free format header, but failed to guess frame size."); + return ret; } } @@ -968,6 +776,7 @@ { #ifndef NO_LAYER1 case 1: + fr->spf = 384; fr->do_layer = do_layer1; if(!fr->freeformat) { @@ -979,6 +788,7 @@ #endif #ifndef NO_LAYER2 case 2: + fr->spf = 1152; fr->do_layer = do_layer2; if(!fr->freeformat) { @@ -991,6 +801,7 @@ #endif #ifndef NO_LAYER3 case 3: + fr->spf = fr->lsf ? 576 : 1152; /* MPEG 2.5 implies LSF.*/ fr->do_layer = do_layer3; if(fr->lsf) fr->ssize = (fr->stereo == 1) ? 9 : 17; @@ -1011,15 +822,15 @@ default: if(NOQUIET) error1("Layer type %i not supported in this build!", fr->lay); - return 0; + return PARSE_BAD; } if (fr->framesize > MAXFRAMESIZE) { if(NOQUIET) error1("Frame size too big: %d", fr->framesize+4-fr->padding); - return (0); + return PARSE_BAD; } - return 1; + return PARSE_GOOD; } void set_pointer(mpg123_handle *fr, long backstep) @@ -1057,11 +868,18 @@ return bpf; } +int attribute_align_arg mpg123_spf(mpg123_handle *mh) +{ + if(mh == NULL) return MPG123_ERR; + + return mh->firsthead ? mh->spf : MPG123_ERR; +} + double attribute_align_arg mpg123_tpf(mpg123_handle *fr) { static int bs[4] = { 0,384,1152,1152 }; double tpf; - if(fr == NULL) return -1; + if(fr == NULL || !fr->firsthead) return -1; tpf = (double) bs[fr->lay]; tpf /= freqs[fr->sampling_frequency] << (fr->lsf); @@ -1150,3 +968,235 @@ tpf = mpg123_tpf(fr); return (int) (no*tpf); } + +/* first attempt of read ahead check to find the real first header; cannot believe what junk is out there! */ +static int do_readahead(mpg123_handle *fr, unsigned long newhead) +{ + unsigned long nexthead = 0; + int hd = 0; + off_t start, oret; + int ret; + + if( ! (!fr->firsthead && fr->rdat.flags & (READER_SEEKABLE|READER_BUFFERED)) ) + return PARSE_GOOD; + + start = fr->rd->tell(fr); + + debug2("doing ahead check with BPF %d at %"OFF_P, fr->framesize+4, (off_p)start); + /* step framesize bytes forward and read next possible header*/ + if((oret=fr->rd->skip_bytes(fr, fr->framesize))<0) + { + if(oret==READER_ERROR && NOQUIET) error("cannot seek!"); + + return oret == MPG123_NEED_MORE ? PARSE_MORE : PARSE_ERR; + } + + /* Read header, seek back. */ + hd = fr->rd->head_read(fr,&nexthead); + if( fr->rd->back_bytes(fr, fr->rd->tell(fr)-start) < 0 ) + { + if(NOQUIET) error("Cannot seek back!"); + + return PARSE_ERR; + } + if(hd == MPG123_NEED_MORE) return PARSE_MORE; + + debug1("After fetching next header, at %"OFF_P, (off_p)fr->rd->tell(fr)); + if(!hd) + { + if(NOQUIET) warning("Cannot read next header, a one-frame stream? Duh..."); + return PARSE_END; + } + + debug2("does next header 0x%08lx match first 0x%08lx?", nexthead, newhead); + if(!head_check(nexthead) || !head_compatible(newhead, nexthead)) + { + debug("No, the header was not valid, start from beginning..."); + fr->oldhead = 0; /* start over */ + /* try next byte for valid header */ + if((ret=fr->rd->back_bytes(fr, 3))<0) + { + if(NOQUIET) error("Cannot seek 3 bytes back!"); + + return PARSE_ERR; + } + return PARSE_AGAIN; + } + else return PARSE_GOOD; +} + +static int handle_id3v2(mpg123_handle *fr, unsigned long newhead) +{ + int ret; + fr->oldhead = 0; /* Think about that. Used to be present only for skipping of junk, not resync-style wetwork. */ + ret = parse_new_id3(fr, newhead); + if (ret < 0) return ret; +#ifndef NO_ID3V2 + else if(ret > 0){ debug("got ID3v2"); fr->metaflags |= MPG123_NEW_ID3|MPG123_ID3; } + else debug("no useful ID3v2"); +#endif + return PARSE_AGAIN; +} + +/* watch out for junk/tags on beginning of stream by invalid header */ +static int skip_junk(mpg123_handle *fr, unsigned long *newheadp, long *headcount) +{ + int ret; + int freeformat_count = 0; + long limit = 65536; + unsigned long newhead = *newheadp; + /* check for id3v2; first three bytes (of 4) are "ID3" */ + if((newhead & (unsigned long) 0xffffff00) == (unsigned long) 0x49443300) + { + return handle_id3v2(fr, newhead); + } + else if(VERBOSE2 && fr->silent_resync == 0) fprintf(stderr,"Note: Junk at the beginning (0x%08lx)\n",newhead); + + /* I even saw RIFF headers at the beginning of MPEG streams ;( */ + if(newhead == ('R'<<24)+('I'<<16)+('F'<<8)+'F') + { + if(VERBOSE2 && fr->silent_resync == 0) fprintf(stderr, "Note: Looks like a RIFF header.\n"); + + if((ret=fr->rd->head_read(fr,&newhead))<=0) return ret; + + while(newhead != ('d'<<24)+('a'<<16)+('t'<<8)+'a') + { + if((ret=fr->rd->head_shift(fr,&newhead))<=0) return ret; + } + if((ret=fr->rd->head_read(fr,&newhead))<=0) return ret; + + if(VERBOSE2 && fr->silent_resync == 0) fprintf(stderr,"Note: Skipped RIFF header!\n"); + + fr->oldhead = 0; + *newheadp = newhead; + return PARSE_AGAIN; + } + + /* + Unhandled junk... just continue search for a header, stepping in single bytes through next 64K. + This is rather identical to the resync loop. + */ + debug("searching for header..."); + *newheadp = 0; /* Invalidate the external value. */ + ret = 0; /* We will check the value after the loop. */ + + /* We prepare for at least the 64K bytes as usual, unless + user explicitly wanted more (even infinity). Never less. */ + if(fr->p.resync_limit < 0 || fr->p.resync_limit > limit) + limit = fr->p.resync_limit; + + do + { + ++(*headcount); + if(limit >= 0 && *headcount >= limit) break; + + if((ret=fr->rd->head_shift(fr,&newhead))<=0) return ret; + + if(head_check(newhead) && (ret=decode_header(fr, newhead, &freeformat_count))) break; + } while(1); + if(ret<0) return ret; + + if(limit >= 0 && *headcount >= limit) + { + if(NOQUIET) error1("Giving up searching valid MPEG header after %li bytes of junk.", *headcount); + return PARSE_END; + } + else debug1("hopefully found one at %"OFF_P, (off_p)fr->rd->tell(fr)); + + /* If the new header ist good, it is already decoded. */ + *newheadp = newhead; + return PARSE_GOOD; +} + +/* The newhead is bad, so let's check if it is something special, otherwise just resync. */ +static int wetwork(mpg123_handle *fr, unsigned long *newheadp) +{ + int ret = PARSE_ERR; + unsigned long newhead = *newheadp; + *newheadp = 0; + + /* Classic ID3 tags. Read, then start parsing again. */ + if((newhead & 0xffffff00) == ('T'<<24)+('A'<<16)+('G'<<8)) + { + fr->id3buf[0] = (unsigned char) ((newhead >> 24) & 0xff); + fr->id3buf[1] = (unsigned char) ((newhead >> 16) & 0xff); + fr->id3buf[2] = (unsigned char) ((newhead >> 8) & 0xff); + fr->id3buf[3] = (unsigned char) ( newhead & 0xff); + + if((ret=fr->rd->fullread(fr,fr->id3buf+4,124)) < 0) return ret; + + fr->metaflags |= MPG123_NEW_ID3|MPG123_ID3; + fr->rdat.flags |= READER_ID3TAG; /* that marks id3v1 */ + if(VERBOSE3) fprintf(stderr,"Note: Skipped ID3v1 tag.\n"); + + return PARSE_AGAIN; + } + /* This is similar to initial junk skipping code... */ + /* Check for id3v2; first three bytes (of 4) are "ID3" */ + if((newhead & (unsigned long) 0xffffff00) == (unsigned long) 0x49443300) + { + return handle_id3v2(fr, newhead); + } + else if(NOQUIET && fr->silent_resync == 0) + { + fprintf(stderr,"Note: Illegal Audio-MPEG-Header 0x%08lx at offset %"OFF_P".\n", + newhead, (off_p)fr->rd->tell(fr)-4); + } + + /* Now we got something bad at hand, try to recover. */ + + if(NOQUIET && (newhead & 0xffffff00) == ('b'<<24)+('m'<<16)+('p'<<8)) fprintf(stderr,"Note: Could be a BMP album art.\n"); + + if( !(fr->p.flags & MPG123_NO_RESYNC) ) + { + long try = 0; + long limit = fr->p.resync_limit; + + /* If a resync is needed the bitreservoir of previous frames is no longer valid */ + fr->bitreservoir = 0; + + if(NOQUIET && fr->silent_resync == 0) fprintf(stderr, "Note: Trying to resync...\n"); + + do /* ... shift the header with additional single bytes until be found something that could be a header. */ + { + ++try; + if(limit >= 0 && try >= limit) break; + + if((ret=fr->rd->head_shift(fr,&newhead)) <= 0) + { + *newheadp = newhead; + if(NOQUIET) fprintf (stderr, "Note: Hit end of (available) data during resync.\n"); + + return ret ? ret : PARSE_END; + } + if(VERBOSE3) debug3("resync try %li at %"OFF_P", got newhead 0x%08lx", try, (off_p)fr->rd->tell(fr), newhead); + } while(!head_check(newhead)); + + *newheadp = newhead; + if(NOQUIET && fr->silent_resync == 0) fprintf (stderr, "Note: Skipped %li bytes in input.\n", try); + + /* Now we either got something that could be a header, or we gave up. */ + if(limit >= 0 && try >= limit) + { + if(NOQUIET) + error1("Giving up resync after %li bytes - your stream is not nice... (maybe increasing resync limit could help).", try); + + fr->err = MPG123_RESYNC_FAIL; + return PARSE_ERR; + } + else + { + debug1("Found possibly valid header 0x%lx... unsetting oldhead to reinit stream.", newhead); + fr->oldhead = 0; + return PARSE_RESYNC; + } + } + else + { + if(NOQUIET) error("not attempting to resync..."); + + fr->err = MPG123_OUT_OF_SYNC; + return PARSE_ERR; + } + /* Control never goes here... we return before that. */ +} Index: lib/3rdparty/libmpg123/readers.c =================================================================== --- lib/3rdparty/libmpg123/readers.c (revision 62563) +++ lib/3rdparty/libmpg123/readers.c (working copy) @@ -28,37 +28,35 @@ static int default_init(mpg123_handle *fr); static off_t get_fileinfo(mpg123_handle *); -static long posix_read(int fd, void *buf, size_t count){ return read(fd, buf, count); } +static ssize_t posix_read(int fd, void *buf, size_t count){ return read(fd, buf, count); } static off_t posix_lseek(int fd, off_t offset, int whence){ return lseek(fd, offset, whence); } +static off_t nix_lseek(int fd, off_t offset, int whence){ return -1; } -static long plain_fullread(mpg123_handle *fr,unsigned char *buf, long count); +static ssize_t plain_fullread(mpg123_handle *fr,unsigned char *buf, ssize_t count); /* Wrapper to decide between descriptor-based and external handle-based I/O. */ static off_t io_seek(struct reader_data *rdat, off_t offset, int whence); -static long io_read(struct reader_data *rdat, void *buf, size_t count); +static ssize_t io_read(struct reader_data *rdat, void *buf, size_t count); #ifndef NO_FEEDER /* Bufferchain methods. */ static void bc_init(struct bufferchain *bc); static void bc_reset(struct bufferchain *bc); -static int bc_append(struct bufferchain *bc, long size); +static int bc_append(struct bufferchain *bc, ssize_t size); #if 0 static void bc_drop(struct bufferchain *bc); #endif -static int bc_add(struct bufferchain *bc, const unsigned char *data, long size); -static long bc_give(struct bufferchain *bc, unsigned char *out, long size); -static long bc_skip(struct bufferchain *bc, long count); -static long bc_seekback(struct bufferchain *bc, long count); +static int bc_add(struct bufferchain *bc, const unsigned char *data, ssize_t size); +static ssize_t bc_give(struct bufferchain *bc, unsigned char *out, ssize_t size); +static ssize_t bc_skip(struct bufferchain *bc, ssize_t count); +static ssize_t bc_seekback(struct bufferchain *bc, ssize_t count); static void bc_forget(struct bufferchain *bc); -#else -#define bc_init(a) -#define bc_reset(a) #endif /* A normal read and a read with timeout. */ -static long plain_read(mpg123_handle *fr, void *buf, size_t count) +static ssize_t plain_read(mpg123_handle *fr, void *buf, size_t count) { - long ret = io_read(&fr->rdat, buf, count); + ssize_t ret = io_read(&fr->rdat, buf, count); if(VERBOSE3) debug2("read %li bytes of %li", (long)ret, (long)count); return ret; } @@ -67,10 +65,10 @@ /* Wait for data becoming available, allowing soft-broken network connection to die This is needed for Shoutcast servers that have forgotten about us while connection was temporarily down. */ -static long timeout_read(mpg123_handle *fr, void *buf, size_t count) +static ssize_t timeout_read(mpg123_handle *fr, void *buf, size_t count) { struct timeval tv; - long ret = 0; + ssize_t ret = 0; fd_set fds; tv.tv_sec = fr->rdat.timeout_sec; tv.tv_usec = 0; @@ -90,9 +88,9 @@ #ifndef NO_ICY /* stream based operation with icy meta data*/ -static long icy_fullread(mpg123_handle *fr, unsigned char *buf, long count) +static ssize_t icy_fullread(mpg123_handle *fr, unsigned char *buf, ssize_t count) { - long ret,cnt; + ssize_t ret,cnt; cnt = 0; if(fr->rdat.flags & READER_SEEKABLE) { @@ -115,7 +113,7 @@ { unsigned char temp_buff; size_t meta_size; - long cut_pos; + ssize_t cut_pos; /* we are near icy-metaint boundary, read up to the boundary */ if(fr->icy.next > 0) @@ -154,10 +152,11 @@ { /* we have got some metadata */ char *meta_buff; + /* TODO: Get rid of this malloc ... perhaps hooking into the reader buffer pool? */ meta_buff = malloc(meta_size+1); if(meta_buff != NULL) { - long left = meta_size; + ssize_t left = meta_size; while(left > 0) { ret = fr->rdat.fdread(fr,meta_buff+meta_size-left,left); @@ -199,10 +198,13 @@ #endif /* NO_ICY */ /* stream based operation */ -static long plain_fullread(mpg123_handle *fr,unsigned char *buf, long count) +static ssize_t plain_fullread(mpg123_handle *fr,unsigned char *buf, ssize_t count) { - long ret,cnt=0; + ssize_t ret,cnt=0; +#ifdef EXTRA_DEBUG + debug1("plain fullread of %"SSIZE_P, (size_p)count); +#endif /* There used to be a check for expected file end here (length value or ID3 flag). This is not needed: @@ -240,7 +242,9 @@ fr->rdat.filept = 0; +#ifndef NO_FEEDER if(fr->rdat.flags & READER_BUFFERED) bc_reset(&fr->rdat.buffer); +#endif if(fr->rdat.flags & READER_HANDLEIO) { if(fr->rdat.cleanup_handle != NULL) fr->rdat.cleanup_handle(fr->rdat.iohandle); @@ -335,10 +339,10 @@ else if(len >= 0) { unsigned char buf[1024]; /* ThOr: Compaq cxx complained and it makes sense to me... or should one do a cast? What for? */ - long ret; + ssize_t ret; while (len > 0) { - long num = len < (off_t)sizeof(buf) ? (long)len : (long)sizeof(buf); + ssize_t num = len < (off_t)sizeof(buf) ? (ssize_t)len : (ssize_t)sizeof(buf); ret = fr->rd->fullread(fr, buf, num); if (ret < 0) return ret; else if(ret == 0) break; /* EOF... an error? interface defined to tell the actual position... */ @@ -346,6 +350,7 @@ } return fr->rd->tell(fr); } +#ifndef NO_FEEDER else if(fr->rdat.flags & READER_BUFFERED) { /* Perhaps we _can_ go a bit back. */ if(fr->rdat.buffer.pos >= -len) @@ -359,6 +364,7 @@ return READER_ERROR; } } +#endif else { fr->err = MPG123_NO_SEEK; @@ -393,8 +399,10 @@ static off_t generic_tell(mpg123_handle *fr) { +#ifndef NO_FEEDER if(fr->rdat.flags & READER_BUFFERED) fr->rdat.filepos = fr->rdat.buffer.fileoff+fr->rdat.buffer.pos; +#endif return fr->rdat.filepos; } @@ -403,7 +411,13 @@ static void stream_rewind(mpg123_handle *fr) { if(fr->rdat.flags & READER_SEEKABLE) - fr->rdat.buffer.fileoff = fr->rdat.filepos = stream_lseek(fr,0,SEEK_SET); + { + fr->rdat.filepos = stream_lseek(fr,0,SEEK_SET); +#ifndef NO_FEEDER + fr->rdat.buffer.fileoff = fr->rdat.filepos; +#endif + } +#ifndef NO_FEEDER if(fr->rdat.flags & READER_BUFFERED) { fr->rdat.buffer.pos = 0; @@ -410,6 +424,7 @@ fr->rdat.buffer.firstpos = 0; fr->rdat.filepos = fr->rdat.buffer.fileoff; } +#endif } /* @@ -436,12 +451,136 @@ return len; } -/* Let's work in nice 4K blocks, that may be nicely reusable (by malloc(), even). */ -#define BUFFBLOCK 4096 - #ifndef NO_FEEDER /* Methods for the buffer chain, mainly used for feed reader, but not just that. */ + +static struct buffy* buffy_new(size_t size, size_t minsize) +{ + struct buffy *newbuf; + newbuf = malloc(sizeof(struct buffy)); + if(newbuf == NULL) return NULL; + + newbuf->realsize = size > minsize ? size : minsize; + newbuf->data = malloc(newbuf->realsize); + if(newbuf->data == NULL) + { + free(newbuf); + return NULL; + } + newbuf->size = 0; + newbuf->next = NULL; + return newbuf; +} + +static void buffy_del(struct buffy* buf) +{ + if(buf) + { + free(buf->data); + free(buf); + } +} + +/* Delete this buffy and all following buffies. */ +static void buffy_del_chain(struct buffy* buf) +{ + while(buf) + { + struct buffy* next = buf->next; + buffy_del(buf); + buf = next; + } +} + +void bc_prepare(struct bufferchain *bc, size_t pool_size, size_t bufblock) +{ + bc_poolsize(bc, pool_size, bufblock); + bc->pool = NULL; + bc->pool_fill = 0; + bc_init(bc); /* Ensure that members are zeroed for read-only use. */ +} + +size_t bc_fill(struct bufferchain *bc) +{ + return (size_t)(bc->size - bc->pos); +} + +void bc_poolsize(struct bufferchain *bc, size_t pool_size, size_t bufblock) +{ + bc->pool_size = pool_size; + bc->bufblock = bufblock; +} + +void bc_cleanup(struct bufferchain *bc) +{ + buffy_del_chain(bc->pool); + bc->pool = NULL; + bc->pool_fill = 0; +} + +/* Fetch a buffer from the pool (if possible) or create one. */ +static struct buffy* bc_alloc(struct bufferchain *bc, size_t size) +{ + /* Easy route: Just try the first available buffer. + Size does not matter, it's only a hint for creation of new buffers. */ + if(bc->pool) + { + struct buffy *buf = bc->pool; + bc->pool = buf->next; + buf->next = NULL; /* That shall be set to a sensible value later. */ + buf->size = 0; + --bc->pool_fill; + debug2("bc_alloc: picked %p from pool (fill now %"SIZE_P")", (void*)buf, (size_p)bc->pool_fill); + return buf; + } + else return buffy_new(size, bc->bufblock); +} + +/* Either stuff the buffer back into the pool or free it for good. */ +static void bc_free(struct bufferchain *bc, struct buffy* buf) +{ + if(!buf) return; + + if(bc->pool_fill < bc->pool_size) + { + buf->next = bc->pool; + bc->pool = buf; + ++bc->pool_fill; + } + else buffy_del(buf); +} + +/* Make the buffer count in the pool match the pool size. */ +static int bc_fill_pool(struct bufferchain *bc) +{ + /* Remove superfluous ones. */ + while(bc->pool_fill > bc->pool_size) + { + /* Lazyness: Just work on the front. */ + struct buffy* buf = bc->pool; + bc->pool = buf->next; + buffy_del(buf); + --bc->pool_fill; + } + + /* Add missing ones. */ + while(bc->pool_fill < bc->pool_size) + { + /* Again, just work on the front. */ + struct buffy* buf; + buf = buffy_new(0, bc->bufblock); /* Use default block size. */ + if(!buf) return -1; + + buf->next = bc->pool; + bc->pool = buf; + ++bc->pool_fill; + } + + return 0; +} + + static void bc_init(struct bufferchain *bc) { bc->first = NULL; @@ -454,101 +593,68 @@ static void bc_reset(struct bufferchain *bc) { - /* free the buffer chain */ - struct buffy *b = bc->first; - while(b != NULL) + /* Free current chain, possibly stuffing back into the pool. */ + while(bc->first) { - struct buffy *n = b->next; - free(b->data); - free(b); - b = n; + struct buffy* buf = bc->first; + bc->first = buf->next; + bc_free(bc, buf); } + bc_fill_pool(bc); /* Ignoring an error here... */ bc_init(bc); } /* Create a new buffy at the end to be filled. */ -static int bc_append(struct bufferchain *bc, long size) +static int bc_append(struct bufferchain *bc, ssize_t size) { struct buffy *newbuf; if(size < 1) return -1; - newbuf = malloc(sizeof(struct buffy)); + newbuf = bc_alloc(bc, size); if(newbuf == NULL) return -2; - newbuf->realsize = size > BUFFBLOCK ? size : BUFFBLOCK; - newbuf->data = malloc(newbuf->realsize); - if(newbuf->data == NULL) - { - free(newbuf); - return -3; - } - newbuf->size = size; - newbuf->next = NULL; if(bc->last != NULL) bc->last->next = newbuf; else if(bc->first == NULL) bc->first = newbuf; bc->last = newbuf; - bc->size += size; + debug3("bc_append: new last buffer %p with %"SSIZE_P" B (really %"SSIZE_P")", (void*)bc->last, (ssize_p)bc->last->size, (ssize_p)bc->last->realsize); return 0; } -#if 0 -/* Drop the last one (again). - This is not optimal but should happen on error situations only, anyway. */ -static void bc_drop(struct bufferchain *bc) -{ - struct buffy *cur = bc->first; - if(bc->first == NULL || bc->last == NULL) return; - /* Special case: only one buffer there. */ - if(cur->next == NULL) - { - free(cur->data); - free(cur); - bc->first = bc->last = NULL; - bc->size = 0; - return; - } - /* Find the pre-last buffy. If chain is consistent, this _will_ succeed. */ - while(cur->next != bc->last){ cur = cur->next; } - - bc->size -= bc->last->size; - free(bc->last->data); - free(bc->last); - cur->next = NULL; - bc->last = cur; -} -#endif - /* Append a new buffer and copy content to it. */ -static int bc_add(struct bufferchain *bc, const unsigned char *data, long size) +static int bc_add(struct bufferchain *bc, const unsigned char *data, ssize_t size) { int ret = 0; - long part = 0; + ssize_t part = 0; debug2("bc_add: adding %"SSIZE_P" bytes at %"OFF_P, (ssize_p)size, (off_p)(bc->fileoff+bc->size)); if(size >=4) debug4("first bytes: %02x %02x %02x %02x", data[0], data[1], data[2], data[3]); - /* Try to fill up the last buffer block. */ - if(bc->last != NULL && bc->last->size < bc->last->realsize) + while(size > 0) { - part = bc->last->realsize - bc->last->size; - if(part > size) part = size; + /* Try to fill up the last buffer block. */ + if(bc->last != NULL && bc->last->size < bc->last->realsize) + { + part = bc->last->realsize - bc->last->size; + if(part > size) part = size; - memcpy(bc->last->data+bc->last->size, data, part); - bc->last->size += part; - size -= part; - bc->size += part; + debug2("bc_add: adding %"SSIZE_P" B to existing block %p", (ssize_p)part, (void*)bc->last); + memcpy(bc->last->data+bc->last->size, data, part); + bc->last->size += part; + size -= part; + bc->size += part; + data += part; + } + + /* If there is still data left, put it into a new buffer block. */ + if(size > 0 && (ret = bc_append(bc, size)) != 0) + break; } - - /* If there is still data left, put it into a new buffer block. */ - if(size > 0 && (ret = bc_append(bc, size)) == 0) - memcpy(bc->last->data, data+part, size); - return ret; } /* Common handler for "You want more than I can give." situation. */ -static long bc_need_more(struct bufferchain *bc) +static ssize_t bc_need_more(struct bufferchain *bc) { debug3("hit end, back to beginning (%li - %li < %li)", (long)bc->size, (long)bc->pos, (long)bc->size); /* go back to firstpos, undo the previous reads */ @@ -557,11 +663,11 @@ } /* Give some data, advancing position but not forgetting yet. */ -static long bc_give(struct bufferchain *bc, unsigned char *out, long size) +static ssize_t bc_give(struct bufferchain *bc, unsigned char *out, ssize_t size) { struct buffy *b = bc->first; - long gotcount = 0; - long offset = 0; + ssize_t gotcount = 0; + ssize_t offset = 0; if(bc->size - bc->pos < size) return bc_need_more(bc); /* find the current buffer */ @@ -573,8 +679,8 @@ /* now start copying from there */ while(gotcount < size && (b != NULL)) { - long loff = bc->pos - offset; - long chunk = size - gotcount; /* amount of bytes to get from here... */ + ssize_t loff = bc->pos - offset; + ssize_t chunk = size - gotcount; /* amount of bytes to get from here... */ if(chunk > b->size - loff) chunk = b->size - loff; #ifdef EXTRA_DEBUG @@ -596,7 +702,7 @@ /* Skip some bytes and return the new position. The buffers are still there, just the read pointer is moved! */ -static long bc_skip(struct bufferchain *bc, long count) +static ssize_t bc_skip(struct bufferchain *bc, ssize_t count) { if(count >= 0) { @@ -606,7 +712,7 @@ else return READER_ERROR; } -static long bc_seekback(struct bufferchain *bc, long count) +static ssize_t bc_seekback(struct bufferchain *bc, ssize_t count) { if(count >= 0 && count <= bc->pos) return bc->pos -= count; else return READER_ERROR; @@ -631,8 +737,7 @@ debug5("bc_forget: forgot %p with %lu, pos=%li, size=%li, fileoff=%li", (void*)b->data, (long)b->size, (long)bc->pos, (long)bc->size, (long)bc->fileoff); - free(b->data); - free(b); + bc_free(bc, b); b = n; } bc->first = b; @@ -644,6 +749,7 @@ static int feed_init(mpg123_handle *fr) { bc_init(&fr->rdat.buffer); + bc_fill_pool(&fr->rdat.buffer); fr->rdat.filelen = 0; fr->rdat.filepos = 0; fr->rdat.flags |= READER_BUFFERED; @@ -667,9 +773,9 @@ return ret; } -static long feed_read(mpg123_handle *fr, unsigned char *out, long count) +static ssize_t feed_read(mpg123_handle *fr, unsigned char *out, ssize_t count) { - long gotcount = bc_give(&fr->rdat.buffer, out, count); + ssize_t gotcount = bc_give(&fr->rdat.buffer, out, count); if(gotcount >= 0 && gotcount != count) return READER_ERROR; else return gotcount; } @@ -678,7 +784,7 @@ static off_t feed_skip_bytes(mpg123_handle *fr,off_t len) { /* This is either the new buffer offset or some negative error value. */ - off_t res = bc_skip(&fr->rdat.buffer, (long)len); + off_t res = bc_skip(&fr->rdat.buffer, (ssize_t)len); if(res < 0) return res; return fr->rdat.buffer.fileoff+res; @@ -687,7 +793,7 @@ static int feed_back_bytes(mpg123_handle *fr, off_t bytes) { if(bytes >=0) - return bc_seekback(&fr->rdat.buffer, (long)bytes) >= 0 ? 0 : READER_ERROR; + return bc_seekback(&fr->rdat.buffer, (ssize_t)bytes) >= 0 ? 0 : READER_ERROR; else return feed_skip_bytes(fr, -bytes) >= 0 ? 0 : READER_ERROR; } @@ -706,7 +812,7 @@ struct bufferchain *bc = &fr->rdat.buffer; if(pos >= bc->fileoff && pos-bc->fileoff < bc->size) { /* We have the position! */ - bc->pos = (long)(pos - bc->fileoff); + bc->pos = (ssize_t)(pos - bc->fileoff); debug1("feed_set_pos inside, next feed from %"OFF_P, (off_p)(bc->fileoff+bc->size)); return bc->fileoff+bc->size; /* Next input after end of buffer... */ } @@ -721,18 +827,18 @@ /* The specific stuff for buffered stream reader. */ -static long buffered_fullread(mpg123_handle *fr, unsigned char *out, long count) +static ssize_t buffered_fullread(mpg123_handle *fr, unsigned char *out, ssize_t count) { struct bufferchain *bc = &fr->rdat.buffer; - long gotcount; + ssize_t gotcount; if(bc->size - bc->pos < count) { /* Add more stuff to buffer. If hitting end of file, adjust count. */ - unsigned char readbuf[BUFFBLOCK]; - long need = count - (bc->size-bc->pos); + unsigned char readbuf[4096]; + ssize_t need = count - (bc->size-bc->pos); while(need>0) { int ret; - long got = fr->rdat.fullread(fr, readbuf, BUFFBLOCK); + ssize_t got = fr->rdat.fullread(fr, readbuf, sizeof(readbuf)); if(got < 0) { if(NOQUIET) error("buffer reading"); @@ -747,7 +853,7 @@ } need -= got; /* May underflow here... */ - if(got < BUFFBLOCK) /* That naturally catches got == 0, too. */ + if(got < sizeof(readbuf)) /* That naturally catches got == 0, too. */ { if(VERBOSE3) fprintf(stderr, "Note: Input data end.\n"); break; /* End. */ @@ -781,17 +887,17 @@ */ #define bugger_off { mh->err = MPG123_NO_READER; return MPG123_ERR; } -int bad_init(mpg123_handle *mh) bugger_off -void bad_close(mpg123_handle *mh){} -long bad_fullread(mpg123_handle *mh, unsigned char *data, long count) bugger_off -int bad_head_read(mpg123_handle *mh, unsigned long *newhead) bugger_off -int bad_head_shift(mpg123_handle *mh, unsigned long *head) bugger_off -off_t bad_skip_bytes(mpg123_handle *mh, off_t len) bugger_off -int bad_read_frame_body(mpg123_handle *mh, unsigned char *data, int size) bugger_off -int bad_back_bytes(mpg123_handle *mh, off_t bytes) bugger_off -int bad_seek_frame(mpg123_handle *mh, off_t num) bugger_off -off_t bad_tell(mpg123_handle *mh) bugger_off -void bad_rewind(mpg123_handle *mh){} +static int bad_init(mpg123_handle *mh) bugger_off +static void bad_close(mpg123_handle *mh){} +static ssize_t bad_fullread(mpg123_handle *mh, unsigned char *data, ssize_t count) bugger_off +static int bad_head_read(mpg123_handle *mh, unsigned long *newhead) bugger_off +static int bad_head_shift(mpg123_handle *mh, unsigned long *head) bugger_off +static off_t bad_skip_bytes(mpg123_handle *mh, off_t len) bugger_off +static int bad_read_frame_body(mpg123_handle *mh, unsigned char *data, int size) bugger_off +static int bad_back_bytes(mpg123_handle *mh, off_t bytes) bugger_off +static int bad_seek_frame(mpg123_handle *mh, off_t num) bugger_off +static off_t bad_tell(mpg123_handle *mh) bugger_off +static void bad_rewind(mpg123_handle *mh){} #undef bugger_off #define READER_STREAM 0 @@ -799,7 +905,7 @@ #define READER_FEED 2 #define READER_BUF_STREAM 3 #define READER_BUF_ICY_STREAM 4 -struct reader readers[] = +static struct reader readers[] = { { /* READER_STREAM */ default_init, @@ -898,7 +1004,7 @@ #endif }; -struct reader bad_reader = +static struct reader bad_reader = { bad_init, bad_close, @@ -938,8 +1044,18 @@ fr->rdat.read = fr->rdat.r_read != NULL ? fr->rdat.r_read : posix_read; fr->rdat.lseek = fr->rdat.r_lseek != NULL ? fr->rdat.r_lseek : posix_lseek; +#ifndef NO_ICY + /* ICY streams of any sort shall not be seekable. */ + if(fr->p.icy_interval > 0) fr->rdat.lseek = nix_lseek; +#endif + fr->rdat.filelen = get_fileinfo(fr); fr->rdat.filepos = 0; + /* + Don't enable seeking on ICY streams, just plain normal files. + This check is necessary since the client can enforce ICY parsing on files that would otherwise be seekable. + It is a task for the future to make the ICY parsing safe with seeks ... or not. + */ if(fr->rdat.filelen >= 0) { fr->rdat.flags |= READER_SEEKABLE; @@ -985,12 +1101,16 @@ void open_bad(mpg123_handle *mh) { + debug("open_bad"); #ifndef NO_ICY clear_icy(&mh->icy); #endif mh->rd = &bad_reader; mh->rdat.flags = 0; +#ifndef NO_FEEDER bc_init(&mh->rdat.buffer); +#endif + mh->rdat.filelen = -1; } int open_feed(mpg123_handle *fr) @@ -1013,6 +1133,8 @@ fr->rd = &readers[READER_FEED]; fr->rdat.flags = 0; if(fr->rd->init(fr) < 0) return -1; + + debug("feed reader init successful"); return 0; #endif /* NO_FEEDER */ } @@ -1098,7 +1220,7 @@ return rdat->lseek(rdat->filept, offset, whence); } -static long io_read(struct reader_data *rdat, void *buf, size_t count) +static ssize_t io_read(struct reader_data *rdat, void *buf, size_t count) { if(rdat->flags & READER_HANDLEIO) { Index: lib/3rdparty/libmpg123/stringbuf.c =================================================================== --- lib/3rdparty/libmpg123/stringbuf.c (revision 62563) +++ lib/3rdparty/libmpg123/stringbuf.c (working copy) @@ -1,7 +1,7 @@ /* stringbuf: mimicking a bit of C++ to more safely handle strings - copyright 2006-8 by the mpg123 project - free software under the terms of the LGPL 2.1 + copyright 2006-10 by the mpg123 project - free software under the terms of the LGPL 2.1 see COPYING and AUTHORS files in distribution or http://mpg123.org initially written by Thomas Orgis */ @@ -129,3 +129,58 @@ sb->fill = 0; return mpg123_add_string(sb, stuff); } + +size_t attribute_align_arg mpg123_strlen(mpg123_string *sb, int utf8) +{ + size_t i; + size_t bytelen; + + /* Notions of empty string. If there's only a single character, it has to be the trailing zero, and if the first is the trailing zero anyway, we got empty. */ + if(sb->fill < 2 || sb->p[0] == 0) return 0; + + /* Find the first non-null character from the back. + We already established that the first character is non-null + That at fill-2 has to be null, though. */ + for(i=sb->fill-2; i>0; --i) + if(sb->p[i] != 0) break; + + /* For simple byte strings, we are done now. */ + bytelen = i+1; + + if(!utf8) return bytelen; + else + { + /* Work out the actual count of UTF8 bytes. + This employs no particular encoding error checking. */ + size_t len = 0; + for(i=0; ip[i] & 0xc0) != 0x80) len++; + } + return len; + } +} + +int attribute_align_arg mpg123_chomp_string(mpg123_string *sb) +{ + ssize_t i; + if(!sb || !sb->fill) return 0; + + /* Ensure that it is zero-terminated. */ + sb->p[sb->fill-1] = 0; + for(i=sb->fill-2; i>=0; --i) + { + char *c = sb->p+i; + /* Stop at the first proper character. */ + if(*c && *c != '\r' && *c != '\n') break; + else *c = 0; + } + /* initial fill at least 1, so i at least -1, + +2 means nothing happened for fill=1 . + With i=0, we got one non-null character, fill shall be 2 + to accomodate the trailing zero. */ + sb->fill = (size_t)i+2; + + return 1; +} Index: lib/3rdparty/libmpg123/synth.c =================================================================== --- lib/3rdparty/libmpg123/synth.c (revision 62563) +++ lib/3rdparty/libmpg123/synth.c (working copy) @@ -14,7 +14,7 @@ Part 1: All synth functions that produce signed short. That is: - synth_1to1 with cpu-specific variants (synth_1to1_i386, synth_1to1_i586 ...) - - synth_1to1_mono and synth_1to1_mono2stereo; which use fr->synths.plain[r_1to1][f_16]. + - synth_1to1_mono and synth_1to1_m2s; which use fr->synths.plain[r_1to1][f_16]. Nearly every decoder variant has it's own synth_1to1, while the mono conversion is shared. */ @@ -31,7 +31,7 @@ /* Mono-related synths; they wrap over _some_ synth_1to1. */ #define SYNTH_NAME fr->synths.plain[r_1to1][f_16] #define MONO_NAME synth_1to1_mono -#define MONO2STEREO_NAME synth_1to1_mono2stereo +#define MONO2STEREO_NAME synth_1to1_m2s #include "synth_mono.h" #undef SYNTH_NAME #undef MONO_NAME @@ -105,7 +105,7 @@ } #endif -#ifdef OPT_3DNOW +#if defined(OPT_3DNOW) || defined(OPT_3DNOW_VINTAGE) /* Those are defined in assembler. */ void do_equalizer_3dnow(real *bandPtr,int channel, real equalizer[2][32]); int synth_1to1_3dnow_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); @@ -139,11 +139,11 @@ } #endif -#ifdef OPT_SSE +#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) #ifdef ACCURATE_ROUNDING /* This is defined in assembler. */ int synth_1to1_sse_accurate_asm(real *window, real *b0, short *samples, int bo1); -int synth_1to1_stereo_sse_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1); +int synth_1to1_s_sse_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1); void dct64_real_sse(real *out0, real *out1, real *samples); /* This is just a hull to use the mpg123 handle. */ int synth_1to1_sse(real *bandPtr,int channel, mpg123_handle *fr, int final) @@ -223,7 +223,7 @@ dct64_real_sse(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); } - clip = synth_1to1_stereo_sse_accurate_asm(fr->decwin, b0l, b0r, samples, bo1); + clip = synth_1to1_s_sse_accurate_asm(fr->decwin, b0l, b0r, samples, bo1); fr->buffer.fill += 128; @@ -244,7 +244,7 @@ #endif #endif -#ifdef OPT_3DNOWEXT +#if defined(OPT_3DNOWEXT) || defined(OPT_3DNOWEXT_VINTAGE) /* This is defined in assembler. */ void synth_1to1_3dnowext_asm(real *bandPtr, int channel, short *samples, short *buffs, int *bo, real *decwin); /* This is just a hull to use the mpg123 handle. */ @@ -262,7 +262,7 @@ #ifdef ACCURATE_ROUNDING /* Assembler routines. */ int synth_1to1_x86_64_accurate_asm(real *window, real *b0, short *samples, int bo1); -int synth_1to1_stereo_x86_64_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1); +int synth_1to1_s_x86_64_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1); void dct64_real_x86_64(real *out0, real *out1, real *samples); /* Hull for C mpg123 API */ int synth_1to1_x86_64(real *bandPtr,int channel, mpg123_handle *fr, int final) @@ -343,7 +343,7 @@ dct64_real_x86_64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); } - clip = synth_1to1_stereo_x86_64_accurate_asm(fr->decwin, b0l, b0r, samples, bo1); + clip = synth_1to1_s_x86_64_accurate_asm(fr->decwin, b0l, b0r, samples, bo1); fr->buffer.fill += 128; @@ -352,7 +352,7 @@ #else /* This is defined in assembler. */ int synth_1to1_x86_64_asm(short *window, short *b0, short *samples, int bo1); -int synth_1to1_stereo_x86_64_asm(short *window, short *b0l, short *b0r, short *samples, int bo1); +int synth_1to1_s_x86_64_asm(short *window, short *b0l, short *b0r, short *samples, int bo1); void dct64_x86_64(short *out0, short *out1, real *samples); /* This is just a hull to use the mpg123 handle. */ int synth_1to1_x86_64(real *bandPtr,int channel, mpg123_handle *fr, int final) @@ -431,7 +431,7 @@ dct64_x86_64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); } - clip = synth_1to1_stereo_x86_64_asm((short *)fr->decwins, b0l, b0r, samples, bo1); + clip = synth_1to1_s_x86_64_asm((short *)fr->decwins, b0l, b0r, samples, bo1); fr->buffer.fill += 128; @@ -440,6 +440,192 @@ #endif #endif +#ifdef OPT_AVX +#ifdef ACCURATE_ROUNDING +/* Assembler routines. */ +#ifndef OPT_X86_64 +int synth_1to1_x86_64_accurate_asm(real *window, real *b0, short *samples, int bo1); +#endif +int synth_1to1_s_avx_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1); +void dct64_real_avx(real *out0, real *out1, real *samples); +/* Hull for C mpg123 API */ +int synth_1to1_avx(real *bandPtr,int channel, mpg123_handle *fr, int final) +{ + short *samples = (short *) (fr->buffer.data+fr->buffer.fill); + + real *b0, **buf; + int bo1; + int clip; + + if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); + + if(!channel) + { + fr->bo--; + fr->bo &= 0xf; + buf = fr->real_buffs[0]; + } + else + { + samples++; + buf = fr->real_buffs[1]; + } + + if(fr->bo & 0x1) + { + b0 = buf[0]; + bo1 = fr->bo; + dct64_real_avx(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); + } + else + { + b0 = buf[1]; + bo1 = fr->bo+1; + dct64_real_avx(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); + } + + clip = synth_1to1_x86_64_accurate_asm(fr->decwin, b0, samples, bo1); + + if(final) fr->buffer.fill += 128; + + return clip; +} + +int synth_1to1_stereo_avx(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) +{ + short *samples = (short *) (fr->buffer.data+fr->buffer.fill); + + real *b0l, *b0r, **bufl, **bufr; + int bo1; + int clip; + + if(fr->have_eq_settings) + { + do_equalizer(bandPtr_l,0,fr->equalizer); + do_equalizer(bandPtr_r,1,fr->equalizer); + } + + fr->bo--; + fr->bo &= 0xf; + bufl = fr->real_buffs[0]; + bufr = fr->real_buffs[1]; + + if(fr->bo & 0x1) + { + b0l = bufl[0]; + b0r = bufr[0]; + bo1 = fr->bo; + dct64_real_avx(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); + dct64_real_avx(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); + } + else + { + b0l = bufl[1]; + b0r = bufr[1]; + bo1 = fr->bo+1; + dct64_real_avx(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); + dct64_real_avx(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); + } + + clip = synth_1to1_s_avx_accurate_asm(fr->decwin, b0l, b0r, samples, bo1); + + fr->buffer.fill += 128; + + return clip; +} +#else +/* This is defined in assembler. */ +#ifndef OPT_X86_64 +int synth_1to1_x86_64_asm(short *window, short *b0, short *samples, int bo1); +#endif +int synth_1to1_s_avx_asm(short *window, short *b0l, short *b0r, short *samples, int bo1); +void dct64_avx(short *out0, short *out1, real *samples); +/* This is just a hull to use the mpg123 handle. */ +int synth_1to1_avx(real *bandPtr,int channel, mpg123_handle *fr, int final) +{ + short *samples = (short *) (fr->buffer.data+fr->buffer.fill); + short *b0, **buf; + int clip; + int bo1; + + if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); + + if(!channel) + { + fr->bo--; + fr->bo &= 0xf; + buf = fr->short_buffs[0]; + } + else + { + samples++; + buf = fr->short_buffs[1]; + } + + if(fr->bo & 0x1) + { + b0 = buf[0]; + bo1 = fr->bo; + dct64_avx(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); + } + else + { + b0 = buf[1]; + bo1 = fr->bo+1; + dct64_avx(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); + } + + clip = synth_1to1_x86_64_asm((short *)fr->decwins, b0, samples, bo1); + + if(final) fr->buffer.fill += 128; + + return clip; +} + +int synth_1to1_stereo_avx(real *bandPtr_l,real *bandPtr_r, mpg123_handle *fr) +{ + short *samples = (short *) (fr->buffer.data+fr->buffer.fill); + short *b0l, *b0r, **bufl, **bufr; + int clip; + int bo1; + + if(fr->have_eq_settings) + { + do_equalizer(bandPtr_l,0,fr->equalizer); + do_equalizer(bandPtr_r,1,fr->equalizer); + } + + fr->bo--; + fr->bo &= 0xf; + bufl = fr->short_buffs[0]; + bufr = fr->short_buffs[1]; + + if(fr->bo & 0x1) + { + b0l = bufl[0]; + b0r = bufr[0]; + bo1 = fr->bo; + dct64_avx(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); + dct64_avx(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); + } + else + { + b0l = bufl[1]; + b0r = bufr[1]; + bo1 = fr->bo+1; + dct64_avx(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); + dct64_avx(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); + } + + clip = synth_1to1_s_avx_asm((short *)fr->decwins, b0l, b0r, samples, bo1); + + fr->buffer.fill += 128; + + return clip; +} +#endif +#endif + #ifdef OPT_ARM #ifdef ACCURATE_ROUNDING /* Assembler routines. */ @@ -534,6 +720,188 @@ #endif #endif +#ifdef OPT_NEON +#ifdef ACCURATE_ROUNDING +/* This is defined in assembler. */ +int synth_1to1_neon_accurate_asm(real *window, real *b0, short *samples, int bo1); +int synth_1to1_s_neon_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1); +void dct64_real_neon(real *out0, real *out1, real *samples); +/* Hull for C mpg123 API */ +int synth_1to1_neon(real *bandPtr,int channel, mpg123_handle *fr, int final) +{ + short *samples = (short *) (fr->buffer.data+fr->buffer.fill); + + real *b0, **buf; + int bo1; + int clip; + + if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); + + if(!channel) + { + fr->bo--; + fr->bo &= 0xf; + buf = fr->real_buffs[0]; + } + else + { + samples++; + buf = fr->real_buffs[1]; + } + + if(fr->bo & 0x1) + { + b0 = buf[0]; + bo1 = fr->bo; + dct64_real_neon(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); + } + else + { + b0 = buf[1]; + bo1 = fr->bo+1; + dct64_real_neon(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); + } + + clip = synth_1to1_neon_accurate_asm(fr->decwin, b0, samples, bo1); + + if(final) fr->buffer.fill += 128; + + return clip; +} + +int synth_1to1_stereo_neon(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) +{ + short *samples = (short *) (fr->buffer.data+fr->buffer.fill); + + real *b0l, *b0r, **bufl, **bufr; + int bo1; + int clip; + + if(fr->have_eq_settings) + { + do_equalizer(bandPtr_l,0,fr->equalizer); + do_equalizer(bandPtr_r,1,fr->equalizer); + } + + fr->bo--; + fr->bo &= 0xf; + bufl = fr->real_buffs[0]; + bufr = fr->real_buffs[1]; + + if(fr->bo & 0x1) + { + b0l = bufl[0]; + b0r = bufr[0]; + bo1 = fr->bo; + dct64_real_neon(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); + dct64_real_neon(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); + } + else + { + b0l = bufl[1]; + b0r = bufr[1]; + bo1 = fr->bo+1; + dct64_real_neon(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); + dct64_real_neon(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); + } + + clip = synth_1to1_s_neon_accurate_asm(fr->decwin, b0l, b0r, samples, bo1); + + fr->buffer.fill += 128; + + return clip; +} +#else +/* This is defined in assembler. */ +int synth_1to1_neon_asm(short *window, short *b0, short *samples, int bo1); +int synth_1to1_s_neon_asm(short *window, short *b0l, short *b0r, short *samples, int bo1); +void dct64_neon(short *out0, short *out1, real *samples); +/* Hull for C mpg123 API */ +int synth_1to1_neon(real *bandPtr,int channel, mpg123_handle *fr, int final) +{ + short *samples = (short *) (fr->buffer.data+fr->buffer.fill); + short *b0, **buf; + int clip; + int bo1; + + if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); + + if(!channel) + { + fr->bo--; + fr->bo &= 0xf; + buf = fr->short_buffs[0]; + } + else + { + samples++; + buf = fr->short_buffs[1]; + } + + if(fr->bo & 0x1) + { + b0 = buf[0]; + bo1 = fr->bo; + dct64_neon(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); + } + else + { + b0 = buf[1]; + bo1 = fr->bo+1; + dct64_neon(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); + } + + clip = synth_1to1_neon_asm((short *)fr->decwins, b0, samples, bo1); + + if(final) fr->buffer.fill += 128; + + return clip; +} + +int synth_1to1_stereo_neon(real *bandPtr_l,real *bandPtr_r, mpg123_handle *fr) +{ + short *samples = (short *) (fr->buffer.data+fr->buffer.fill); + short *b0l, *b0r, **bufl, **bufr; + int clip; + int bo1; + + if(fr->have_eq_settings) + { + do_equalizer(bandPtr_l,0,fr->equalizer); + do_equalizer(bandPtr_r,1,fr->equalizer); + } + + fr->bo--; + fr->bo &= 0xf; + bufl = fr->short_buffs[0]; + bufr = fr->short_buffs[1]; + + if(fr->bo & 0x1) + { + b0l = bufl[0]; + b0r = bufr[0]; + bo1 = fr->bo; + dct64_neon(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); + dct64_neon(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); + } + else + { + b0l = bufl[1]; + b0r = bufr[1]; + bo1 = fr->bo+1; + dct64_neon(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); + dct64_neon(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); + } + + clip = synth_1to1_s_neon_asm((short *)fr->decwins, b0l, b0r, samples, bo1); + + fr->buffer.fill += 128; + + return clip; +} +#endif +#endif + #ifndef NO_DOWNSAMPLE /* @@ -556,7 +924,7 @@ #define SYNTH_NAME fr->synths.plain[r_2to1][f_16] #define MONO_NAME synth_2to1_mono -#define MONO2STEREO_NAME synth_2to1_mono2stereo +#define MONO2STEREO_NAME synth_2to1_m2s #include "synth_mono.h" #undef SYNTH_NAME #undef MONO_NAME @@ -593,7 +961,7 @@ #define SYNTH_NAME fr->synths.plain[r_4to1][f_16] /* This is just for the _i386 one... gotta check if it is really useful... */ #define MONO_NAME synth_4to1_mono -#define MONO2STEREO_NAME synth_4to1_mono2stereo +#define MONO2STEREO_NAME synth_4to1_m2s #include "synth_mono.h" #undef SYNTH_NAME #undef MONO_NAME @@ -621,7 +989,7 @@ /* These are all in one header, there's no flexibility to gain. */ #define SYNTH_NAME synth_ntom #define MONO_NAME synth_ntom_mono -#define MONO2STEREO_NAME synth_ntom_mono2stereo +#define MONO2STEREO_NAME synth_ntom_m2s #include "synth_ntom.h" #undef SYNTH_NAME #undef MONO_NAME Index: lib/3rdparty/libmpg123/synth_8bit.c =================================================================== --- lib/3rdparty/libmpg123/synth_8bit.c (revision 62563) +++ lib/3rdparty/libmpg123/synth_8bit.c (working copy) @@ -29,7 +29,7 @@ /* Mono-related synths; they wrap over _some_ synth_1to1_8bit (could be generic, could be i386). */ #define SYNTH_NAME fr->synths.plain[r_1to1][f_8] #define MONO_NAME synth_1to1_8bit_mono -#define MONO2STEREO_NAME synth_1to1_8bit_mono2stereo +#define MONO2STEREO_NAME synth_1to1_8bit_m2s #include "synth_mono.h" #undef SYNTH_NAME #undef MONO_NAME @@ -50,7 +50,7 @@ #define BASE_SYNTH_NAME fr->synths.plain[r_1to1][f_16] #define SYNTH_NAME synth_1to1_8bit_wrap #define MONO_NAME synth_1to1_8bit_wrap_mono -#define MONO2STEREO_NAME synth_1to1_8bit_wrap_mono2stereo +#define MONO2STEREO_NAME synth_1to1_8bit_wrap_m2s #include "synth_8bit.h" #undef BASE_SYNTH_NAME #undef SYNTH_NAME @@ -73,7 +73,7 @@ /* Mono-related synths; they wrap over _some_ synth_2to1_8bit (could be generic, could be i386). */ #define SYNTH_NAME fr->synths.plain[r_2to1][f_8] #define MONO_NAME synth_2to1_8bit_mono -#define MONO2STEREO_NAME synth_2to1_8bit_mono2stereo +#define MONO2STEREO_NAME synth_2to1_8bit_m2s #include "synth_mono.h" #undef SYNTH_NAME #undef MONO_NAME @@ -102,7 +102,7 @@ /* Mono-related synths; they wrap over _some_ synth_4to1_8bit (could be generic, could be i386). */ #define SYNTH_NAME fr->synths.plain[r_4to1][f_8] #define MONO_NAME synth_4to1_8bit_mono -#define MONO2STEREO_NAME synth_4to1_8bit_mono2stereo +#define MONO2STEREO_NAME synth_4to1_8bit_m2s #include "synth_mono.h" #undef SYNTH_NAME #undef MONO_NAME @@ -130,7 +130,7 @@ /* These are all in one header, there's no flexibility to gain. */ #define SYNTH_NAME synth_ntom_8bit #define MONO_NAME synth_ntom_8bit_mono -#define MONO2STEREO_NAME synth_ntom_8bit_mono2stereo +#define MONO2STEREO_NAME synth_ntom_8bit_m2s #include "synth_ntom.h" #undef SYNTH_NAME #undef MONO_NAME Index: lib/3rdparty/libmpg123/synth_arm.S =================================================================== --- lib/3rdparty/libmpg123/synth_arm.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_arm.S (working copy) @@ -19,9 +19,14 @@ return value: number of clipped samples */ + .code 32 + .text ALIGN4 .globl ASM_NAME(synth_1to1_arm_asm) +#ifdef __ELF__ + .type ASM_NAME(synth_1to1_arm_asm), %function +#endif ASM_NAME(synth_1to1_arm_asm): stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, lr} @@ -35,7 +40,7 @@ ldr r5, [WINDOW], #4 ldr r6, [B0], #4 -.Loop_start_1: +1: ldr r8, [WINDOW], #4 ldr r9, [B0], #4 mul r7, r5, r6 @@ -98,7 +103,7 @@ strh r7, [SAMPLES], #4 subs r3, r3, #1 - bne .Loop_start_1 + bne 1b add WINDOW, WINDOW, #4 add B0, B0, #4 @@ -142,7 +147,7 @@ mov r3, #14 -.Loop_start_2: +1: ldr r8, [WINDOW], #4 ldr r9, [B0], #4 mul r7, r5, r6 @@ -205,7 +210,7 @@ strh r7, [SAMPLES], #4 subs r3, r3, #1 - bne .Loop_start_2 + bne 1b ldr r8, [WINDOW], #4 ldr r9, [B0], #4 @@ -269,3 +274,5 @@ mov r0, REG_CLIP ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, pc} + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_arm_accurate.S =================================================================== --- lib/3rdparty/libmpg123/synth_arm_accurate.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_arm_accurate.S (working copy) @@ -19,9 +19,14 @@ return value: number of clipped samples */ + .code 32 + .text ALIGN4 .globl ASM_NAME(synth_1to1_arm_accurate_asm) +#ifdef __ELF__ + .type ASM_NAME(synth_1to1_arm_accurate_asm), %function +#endif ASM_NAME(synth_1to1_arm_accurate_asm): stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr} @@ -35,7 +40,7 @@ ldr r5, [WINDOW], #4 ldr r6, [B0], #4 -.Loop_start_1: +1: ldr r9, [WINDOW], #4 ldr r10, [B0], #4 smull r8, r7, r5, r6 @@ -102,7 +107,7 @@ strh r8, [SAMPLES], #4 subs r3, r3, #1 - bne .Loop_start_1 + bne 1b add WINDOW, WINDOW, #4 add B0, B0, #4 @@ -150,7 +155,7 @@ mov r3, #14 -.Loop_start_2: +1: ldr r9, [WINDOW], #4 ldr r10, [B0], #4 smull r8, r7, r5, r6 @@ -217,7 +222,7 @@ strh r8, [SAMPLES], #4 subs r3, r3, #1 - bne .Loop_start_2 + bne 1b ldr r9, [WINDOW], #4 ldr r10, [B0], #4 @@ -285,3 +290,5 @@ mov r0, REG_CLIP ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc} + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_mmx.S =================================================================== --- lib/3rdparty/libmpg123/synth_mmx.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_mmx.S (working copy) @@ -31,12 +31,12 @@ decl %ecx movl 32(%esp),%esi movl (%edx),%eax - jecxz .L1 + jecxz 1f decl %eax andl %ebx,%eax leal 1088(%esi),%esi movl %eax,(%edx) -.L1: +1: leal (%esi,%eax,2),%edx movl %eax,%ebp incl %eax @@ -45,11 +45,11 @@ leal 544(%esi,%eax,2),%ecx incl %ebx testl $1, %eax - jnz .L2 + jnz 2f xchgl %edx,%ecx incl %ebp leal 544(%esi),%esi -.L2: +2: pushl %edx pushl %ecx call ASM_NAME(dct64_MMX) @@ -61,7 +61,7 @@ movl 44(%esp),%eax /* decwins */ leal (%eax,%ebx,2), %edx popl %eax -.L3: +3: movq (%edx),%mm0 pmaddwd (%esi),%mm0 movq 8(%edx),%mm1 @@ -84,12 +84,12 @@ leal 32(%esi),%esi leal 64(%edx),%edx leal 4(%edi),%edi - loop .L3 + loop 3b subl $64,%esi movl $15,%ecx -.L4: +4: movq (%edx),%mm0 pmaddwd (%esi),%mm0 movq 8(%edx),%mm1 @@ -114,7 +114,7 @@ subl $32,%esi addl $64,%edx leal 4(%edi),%edi - loop .L4 + loop 4b emms popl %ebx popl %esi Index: lib/3rdparty/libmpg123/synth_neon.S =================================================================== --- lib/3rdparty/libmpg123/synth_neon.S (revision 0) +++ lib/3rdparty/libmpg123/synth_neon.S (working copy) @@ -0,0 +1,133 @@ +/* + synth_neon: ARM NEON optimized synth + + copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#define WINDOW r0 +#define B0 r1 +#define SAMPLES r2 + +/* + int synth_1to1_neon_asm(short *window, short *b0, short *samples, int bo1); + return value: number of clipped samples +*/ + + .code 32 +#ifndef __APPLE__ + .fpu neon +#endif + + .text + .globl ASM_NAME(synth_1to1_neon_asm) +#ifdef __ELF__ + .type ASM_NAME(synth_1to1_neon_asm), %function +#endif + ALIGN4 +ASM_NAME(synth_1to1_neon_asm): + push {r4-r5, lr} + vpush {q4-q7} + + add WINDOW, WINDOW, #32 + sub WINDOW, WINDOW, r3, lsl #1 + + mov r3, #4 + mov r4, #64 +1: + vld1.16 {d0-d3}, [WINDOW], r4 + vld1.16 {d4-d7}, [B0, :128]! + vld1.16 {d8-d11}, [WINDOW], r4 + vswp d1, d4 + vld1.16 {d12-d15}, [B0, :128]! + vld1.16 {d16-d19}, [WINDOW], r4 + vld1.16 {d20-d23}, [B0, :128]! + vswp d9, d12 + vld1.16 {d24-d27}, [WINDOW], r4 + vld1.16 {d28-d31}, [B0, :128]! + vswp d17, d20 + vswp d25, d28 + vmull.s16 q0, d0, d1 + vmull.s16 q4, d8, d9 + vmull.s16 q8, d16, d17 + vmull.s16 q12, d24, d25 + vmlal.s16 q0, d4, d5 + vmlal.s16 q4, d12, d13 + vmlal.s16 q8, d20, d21 + vmlal.s16 q12, d28, d29 + vmlal.s16 q0, d2, d6 + vmlal.s16 q4, d10, d14 + vmlal.s16 q8, d18, d22 + vmlal.s16 q12, d26, d30 + vmlal.s16 q0, d3, d7 + vmlal.s16 q4, d11, d15 + vmlal.s16 q8, d19, d23 + vmlal.s16 q12, d27, d31 + vpadd.i32 d0, d0, d1 + vpadd.i32 d8, d8, d9 + vpadd.i32 d16, d16, d17 + vpadd.i32 d24, d24, d25 + vpadd.i32 d0, d0, d8 + vpadd.i32 d1, d16, d24 + + vld2.16 {d2,d3}, [SAMPLES] + vqshrn.s32 d1, q0, #13 + vst2.16 {d1,d3}, [SAMPLES]! + + subs r3, r3, #1 + bne 1b + + mov r3, #4 + mov r5, #-32 +1: + vld1.16 {d0-d3}, [WINDOW], r4 + vld1.16 {d4-d7}, [B0, :128], r5 + vld1.16 {d8-d11}, [WINDOW], r4 + vswp d1, d4 + vld1.16 {d12-d15}, [B0, :128], r5 + vld1.16 {d16-d19}, [WINDOW], r4 + vld1.16 {d20-d23}, [B0, :128], r5 + vswp d9, d12 + vld1.16 {d24-d27}, [WINDOW], r4 + vld1.16 {d28-d31}, [B0, :128], r5 + vswp d17, d20 + vswp d25, d28 + vmull.s16 q0, d0, d1 + vmull.s16 q4, d8, d9 + vmull.s16 q8, d16, d17 + vmull.s16 q12, d24, d25 + vmlal.s16 q0, d4, d5 + vmlal.s16 q4, d12, d13 + vmlal.s16 q8, d20, d21 + vmlal.s16 q12, d28, d29 + vmlal.s16 q0, d2, d6 + vmlal.s16 q4, d10, d14 + vmlal.s16 q8, d18, d22 + vmlal.s16 q12, d26, d30 + vmlal.s16 q0, d3, d7 + vmlal.s16 q4, d11, d15 + vmlal.s16 q8, d19, d23 + vmlal.s16 q12, d27, d31 + vpadd.i32 d0, d0, d1 + vpadd.i32 d8, d8, d9 + vpadd.i32 d16, d16, d17 + vpadd.i32 d24, d24, d25 + vpadd.i32 d0, d0, d8 + vpadd.i32 d1, d16, d24 + + vld2.16 {d2,d3}, [SAMPLES] + vqshrn.s32 d1, q0, #13 + vst2.16 {d1,d3}, [SAMPLES]! + + subs r3, r3, #1 + bne 1b + + mov r0, #0 + + vpop {q4-q7} + pop {r4-r5, pc} + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_neon_accurate.S =================================================================== --- lib/3rdparty/libmpg123/synth_neon_accurate.S (revision 0) +++ lib/3rdparty/libmpg123/synth_neon_accurate.S (working copy) @@ -0,0 +1,183 @@ +/* + synth_neon_accurate: ARM NEON optimized synth (MPEG compliant 16-bit output version) + + copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#define WINDOW r0 +#define B0 r1 +#define SAMPLES r2 + +/* + int synth_1to1_real_neon_accurate_asm(real *window, real *b0, real *samples, int bo1); + return value: number of clipped samples (0) +*/ + + .code 32 +#ifndef __APPLE__ + .fpu neon +#endif + + .text + .globl ASM_NAME(synth_1to1_neon_accurate_asm) +#ifdef __ELF__ + .type ASM_NAME(synth_1to1_neon_accurate_asm), %function +#endif +ASM_NAME(synth_1to1_neon_accurate_asm): + push {r4-r6, lr} + vpush {q4-q7} + mov r6, sp + sub sp, sp, #16 + bic sp, #0xff + + add WINDOW, WINDOW, #64 + sub WINDOW, WINDOW, r3, lsl #2 + + mov r3, #4 + mov r4, #128 + mov r5, #64 +1: + vld1.32 {q0,q1}, [WINDOW], r4 + vld1.32 {q2,q3}, [WINDOW], r4 + vld1.32 {q4,q5}, [WINDOW], r4 + vld1.32 {q6,q7}, [WINDOW] + sub WINDOW, WINDOW, #352 + vld1.32 {q8,q9}, [B0, :128], r5 + vld1.32 {q10,q11}, [B0, :128], r5 + vld1.32 {q12,q13}, [B0, :128], r5 + vld1.32 {q14,q15}, [B0, :128] + vswp q1, q4 + vswp q3, q6 + sub B0, B0, #160 + vmul.f32 q0, q0, q8 + vmul.f32 q2, q2, q10 + vmul.f32 q1, q1, q12 + vmul.f32 q3, q3, q14 + vmla.f32 q0, q4, q9 + vmla.f32 q2, q6, q11 + vmla.f32 q1, q5, q13 + vmla.f32 q3, q7, q15 + vld1.32 {q4,q5}, [WINDOW], r4 + vld1.32 {q6,q7}, [WINDOW], r4 + vld1.32 {q8,q9}, [WINDOW], r4 + vld1.32 {q10,q11}, [B0, :128], r5 + vld1.32 {q12,q13}, [B0, :128], r5 + vld1.32 {q14,q15}, [B0, :128], r5 + vswp q5, q6 + vswp q11, q12 + vmla.f32 q0, q4, q10 + vmla.f32 q2, q5, q11 + vmla.f32 q1, q8, q14 + vld1.32 {q4,q5}, [WINDOW] + vld1.32 {q10,q11}, [B0, :128]! + add WINDOW, WINDOW, #96 + vmla.f32 q3, q4, q10 + vmla.f32 q0, q6, q12 + vmla.f32 q2, q7, q13 + vmla.f32 q1, q9, q15 + vmla.f32 q3, q5, q11 + vmov.i32 q4, #0x4b000000 + vmvn.i32 q5, #0xb9000000 + vorr.i32 q4, #0x00400000 + vpadd.f32 d0, d0, d1 + vpadd.f32 d4, d4, d5 + vpadd.f32 d2, d2, d3 + vpadd.f32 d6, d6, d7 + vld1.32 {q6}, [sp, :128] + vpadd.f32 d0, d0, d4 + vpadd.f32 d1, d2, d6 + + vadd.f32 q3, q0, q4 + vacgt.f32 q5, q0, q5 + vld2.16 {d4,d5}, [SAMPLES] + vshl.i32 q3, q3, #10 + vqshrn.s32 d3, q3, #10 + vshr.u32 q5, q5, #31 + vst2.16 {d3,d5}, [SAMPLES]! + vadd.i32 q5, q5, q6 + vst1.32 {q5}, [sp, :128] + + subs r3, r3, #1 + bne 1b + + mov r3, #4 + mov r5, #-64 +1: + vld1.32 {q0,q1}, [WINDOW], r4 + vld1.32 {q2,q3}, [WINDOW], r4 + vld1.32 {q4,q5}, [WINDOW], r4 + vld1.32 {q6,q7}, [WINDOW] + sub WINDOW, WINDOW, #352 + vld1.32 {q8,q9}, [B0, :128], r5 + vld1.32 {q10,q11}, [B0, :128], r5 + vld1.32 {q12,q13}, [B0, :128], r5 + vld1.32 {q14,q15}, [B0, :128] + vswp q1, q4 + vswp q3, q6 + add B0, B0, #224 + vmul.f32 q0, q0, q8 + vmul.f32 q2, q2, q10 + vmul.f32 q1, q1, q12 + vmul.f32 q3, q3, q14 + vmla.f32 q0, q4, q9 + vmla.f32 q2, q6, q11 + vmla.f32 q1, q5, q13 + vmla.f32 q3, q7, q15 + vld1.32 {q4,q5}, [WINDOW], r4 + vld1.32 {q6,q7}, [WINDOW], r4 + vld1.32 {q8,q9}, [WINDOW], r4 + vld1.32 {q10,q11}, [B0, :128], r5 + vld1.32 {q12,q13}, [B0, :128], r5 + vld1.32 {q14,q15}, [B0, :128], r5 + vswp q5, q6 + vswp q11, q12 + vmla.f32 q0, q4, q10 + vmla.f32 q2, q5, q11 + vmla.f32 q1, q8, q14 + vld1.32 {q4,q5}, [WINDOW] + vld1.32 {q10,q11}, [B0, :128] + add WINDOW, WINDOW, #96 + sub B0, B0, #96 + vmla.f32 q3, q4, q10 + vmla.f32 q0, q6, q12 + vmla.f32 q2, q7, q13 + vmla.f32 q1, q9, q15 + vmla.f32 q3, q5, q11 + vmov.i32 q4, #0x4b000000 + vmvn.i32 q5, #0xb9000000 + vorr.i32 q4, #0x00400000 + vpadd.f32 d0, d0, d1 + vpadd.f32 d4, d4, d5 + vpadd.f32 d2, d2, d3 + vpadd.f32 d6, d6, d7 + vld1.32 {q6}, [sp, :128] + vpadd.f32 d0, d0, d4 + vpadd.f32 d1, d2, d6 + + vadd.f32 q3, q0, q4 + vacgt.f32 q5, q0, q5 + vld2.16 {d4,d5}, [SAMPLES] + vshl.i32 q3, q3, #10 + vqshrn.s32 d3, q3, #10 + vshr.u32 q5, q5, #31 + vst2.16 {d3,d5}, [SAMPLES]! + vadd.i32 q5, q5, q6 + vst1.32 {q5}, [sp, :128] + + subs r3, r3, #1 + bne 1b + + vld1.32 {q0}, [sp, :128] + vpadd.i32 d0, d0, d1 + vpadd.i32 d0, d0, d0 + vmov.32 r0, d0[0] + + mov sp, r6 + vpop {q4-q7} + pop {r4-r6, pc} + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_neon_float.S =================================================================== --- lib/3rdparty/libmpg123/synth_neon_float.S (revision 0) +++ lib/3rdparty/libmpg123/synth_neon_float.S (working copy) @@ -0,0 +1,159 @@ +/* + synth_neon_float: ARM NEON optimized synth (float output version) + + copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#define WINDOW r0 +#define B0 r1 +#define SAMPLES r2 + +/* + int synth_1to1_real_neon_asm(real *window, real *b0, real *samples, int bo1); + return value: number of clipped samples (0) +*/ + + .code 32 +#ifndef __APPLE__ + .fpu neon +#endif + + .text + .globl ASM_NAME(synth_1to1_real_neon_asm) +#ifdef __ELF__ + .type ASM_NAME(synth_1to1_real_neon_asm), %function +#endif + ALIGN4 +ASM_NAME(synth_1to1_real_neon_asm): + push {r4-r5, lr} + vpush {q4-q7} + + add WINDOW, WINDOW, #64 + sub WINDOW, WINDOW, r3, lsl #2 + + mov r3, #4 + mov r4, #128 + mov r5, #64 +1: + vld1.32 {q0,q1}, [WINDOW], r4 + vld1.32 {q2,q3}, [WINDOW], r4 + vld1.32 {q4,q5}, [WINDOW], r4 + vld1.32 {q6,q7}, [WINDOW] + sub WINDOW, WINDOW, #352 + vld1.32 {q8,q9}, [B0, :128], r5 + vld1.32 {q10,q11}, [B0, :128], r5 + vld1.32 {q12,q13}, [B0, :128], r5 + vld1.32 {q14,q15}, [B0, :128] + vswp q1, q4 + vswp q3, q6 + sub B0, B0, #160 + vmul.f32 q0, q0, q8 + vmul.f32 q2, q2, q10 + vmul.f32 q1, q1, q12 + vmul.f32 q3, q3, q14 + vmla.f32 q0, q4, q9 + vmla.f32 q2, q6, q11 + vmla.f32 q1, q5, q13 + vmla.f32 q3, q7, q15 + vld1.32 {q4,q5}, [WINDOW], r4 + vld1.32 {q6,q7}, [WINDOW], r4 + vld1.32 {q8,q9}, [WINDOW], r4 + vld1.32 {q10,q11}, [B0, :128], r5 + vld1.32 {q12,q13}, [B0, :128], r5 + vld1.32 {q14,q15}, [B0, :128], r5 + vswp q5, q6 + vswp q11, q12 + vmla.f32 q0, q4, q10 + vmla.f32 q2, q5, q11 + vmla.f32 q1, q8, q14 + vld1.32 {q4,q5}, [WINDOW] + vld1.32 {q10,q11}, [B0, :128]! + add WINDOW, WINDOW, #96 + vmla.f32 q3, q4, q10 + vmla.f32 q0, q6, q12 + vmla.f32 q2, q7, q13 + vmla.f32 q1, q9, q15 + vmla.f32 q3, q5, q11 + vld2.32 {q4,q5}, [SAMPLES] + vpadd.f32 d0, d0, d1 + vpadd.f32 d4, d4, d5 + vpadd.f32 d2, d2, d3 + vpadd.f32 d6, d6, d7 + vpadd.f32 d0, d0, d4 + vpadd.f32 d1, d2, d6 + + vmov.i32 q1, #0x38000000 + vmul.f32 q4, q0, q1 + vst2.32 {q4,q5}, [SAMPLES]! + + subs r3, r3, #1 + bne 1b + + mov r3, #4 + mov r5, #-64 +1: + vld1.32 {q0,q1}, [WINDOW], r4 + vld1.32 {q2,q3}, [WINDOW], r4 + vld1.32 {q4,q5}, [WINDOW], r4 + vld1.32 {q6,q7}, [WINDOW] + sub WINDOW, WINDOW, #352 + vld1.32 {q8,q9}, [B0, :128], r5 + vld1.32 {q10,q11}, [B0, :128], r5 + vld1.32 {q12,q13}, [B0, :128], r5 + vld1.32 {q14,q15}, [B0, :128] + vswp q1, q4 + vswp q3, q6 + add B0, B0, #224 + vmul.f32 q0, q0, q8 + vmul.f32 q2, q2, q10 + vmul.f32 q1, q1, q12 + vmul.f32 q3, q3, q14 + vmla.f32 q0, q4, q9 + vmla.f32 q2, q6, q11 + vmla.f32 q1, q5, q13 + vmla.f32 q3, q7, q15 + vld1.32 {q4,q5}, [WINDOW], r4 + vld1.32 {q6,q7}, [WINDOW], r4 + vld1.32 {q8,q9}, [WINDOW], r4 + vld1.32 {q10,q11}, [B0, :128], r5 + vld1.32 {q12,q13}, [B0, :128], r5 + vld1.32 {q14,q15}, [B0, :128], r5 + vswp q5, q6 + vswp q11, q12 + vmla.f32 q0, q4, q10 + vmla.f32 q2, q5, q11 + vmla.f32 q1, q8, q14 + vld1.32 {q4,q5}, [WINDOW] + vld1.32 {q10,q11}, [B0, :128] + add WINDOW, WINDOW, #96 + sub B0, B0, #96 + vmla.f32 q3, q4, q10 + vmla.f32 q0, q6, q12 + vmla.f32 q2, q7, q13 + vmla.f32 q1, q9, q15 + vmla.f32 q3, q5, q11 + vld2.32 {q4,q5}, [SAMPLES] + vpadd.f32 d0, d0, d1 + vpadd.f32 d4, d4, d5 + vpadd.f32 d2, d2, d3 + vpadd.f32 d6, d6, d7 + vpadd.f32 d0, d0, d4 + vpadd.f32 d1, d2, d6 + + vmov.i32 q1, #0x38000000 + vmul.f32 q4, q0, q1 + vst2.32 {q4,q5}, [SAMPLES]! + + subs r3, r3, #1 + bne 1b + + mov r0, #0 + + vpop {q4-q7} + pop {r4-r5, pc} + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_neon_s32.S =================================================================== --- lib/3rdparty/libmpg123/synth_neon_s32.S (revision 0) +++ lib/3rdparty/libmpg123/synth_neon_s32.S (working copy) @@ -0,0 +1,178 @@ +/* + synth_neon_s32: ARM NEON optimized synth (32-bit output version) + + copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#define WINDOW r0 +#define B0 r1 +#define SAMPLES r2 + +/* + int synth_1to1_s32_neon_asm(real *window, real *b0, int *samples, int bo1); + return value: number of clipped samples (0) +*/ + + .code 32 +#ifndef __APPLE__ + .fpu neon +#endif + + .text + .globl ASM_NAME(synth_1to1_s32_neon_asm) +#ifdef __ELF__ + .type ASM_NAME(synth_1to1_s32_neon_asm), %function +#endif + ALIGN4 +ASM_NAME(synth_1to1_s32_neon_asm): + push {r4-r6, lr} + vpush {q4-q7} + mov r6, sp + sub sp, sp, #16 + bic sp, #0xff + + add WINDOW, WINDOW, #64 + sub WINDOW, WINDOW, r3, lsl #2 + + mov r3, #4 + mov r4, #128 + mov r5, #64 +1: + vld1.32 {q0,q1}, [WINDOW], r4 + vld1.32 {q2,q3}, [WINDOW], r4 + vld1.32 {q4,q5}, [WINDOW], r4 + vld1.32 {q6,q7}, [WINDOW] + sub WINDOW, WINDOW, #352 + vld1.32 {q8,q9}, [B0, :128], r5 + vld1.32 {q10,q11}, [B0, :128], r5 + vld1.32 {q12,q13}, [B0, :128], r5 + vld1.32 {q14,q15}, [B0, :128] + vswp q1, q4 + vswp q3, q6 + sub B0, B0, #160 + vmul.f32 q0, q0, q8 + vmul.f32 q2, q2, q10 + vmul.f32 q1, q1, q12 + vmul.f32 q3, q3, q14 + vmla.f32 q0, q4, q9 + vmla.f32 q2, q6, q11 + vmla.f32 q1, q5, q13 + vmla.f32 q3, q7, q15 + vld1.32 {q4,q5}, [WINDOW], r4 + vld1.32 {q6,q7}, [WINDOW], r4 + vld1.32 {q8,q9}, [WINDOW], r4 + vld1.32 {q10,q11}, [B0, :128], r5 + vld1.32 {q12,q13}, [B0, :128], r5 + vld1.32 {q14,q15}, [B0, :128], r5 + vswp q5, q6 + vswp q11, q12 + vmla.f32 q0, q4, q10 + vmla.f32 q2, q5, q11 + vmla.f32 q1, q8, q14 + vld1.32 {q4,q5}, [WINDOW] + vld1.32 {q10,q11}, [B0, :128]! + add WINDOW, WINDOW, #96 + vmla.f32 q3, q4, q10 + vmla.f32 q0, q6, q12 + vmla.f32 q2, q7, q13 + vmla.f32 q1, q9, q15 + vmla.f32 q3, q5, q11 + vmvn.i32 q5, #0xb9000000 + vpadd.f32 d0, d0, d1 + vpadd.f32 d4, d4, d5 + vpadd.f32 d2, d2, d3 + vpadd.f32 d6, d6, d7 + vld1.32 {q6}, [sp, :128] + vpadd.f32 d0, d0, d4 + vpadd.f32 d1, d2, d6 + + vcvt.s32.f32 q3, q0, #16 + vacgt.f32 q5, q0, q5 + vld2.32 {q1,q2}, [SAMPLES] + vshr.u32 q5, q5, #31 + vmov q1, q3 + vst2.32 {q1,q2}, [SAMPLES]! + vadd.i32 q5, q5, q6 + vst1.32 {q5}, [sp, :128] + + subs r3, r3, #1 + bne 1b + + mov r3, #4 + mov r5, #-64 +1: + vld1.32 {q0,q1}, [WINDOW], r4 + vld1.32 {q2,q3}, [WINDOW], r4 + vld1.32 {q4,q5}, [WINDOW], r4 + vld1.32 {q6,q7}, [WINDOW] + sub WINDOW, WINDOW, #352 + vld1.32 {q8,q9}, [B0, :128], r5 + vld1.32 {q10,q11}, [B0, :128], r5 + vld1.32 {q12,q13}, [B0, :128], r5 + vld1.32 {q14,q15}, [B0, :128] + vswp q1, q4 + vswp q3, q6 + add B0, B0, #224 + vmul.f32 q0, q0, q8 + vmul.f32 q2, q2, q10 + vmul.f32 q1, q1, q12 + vmul.f32 q3, q3, q14 + vmla.f32 q0, q4, q9 + vmla.f32 q2, q6, q11 + vmla.f32 q1, q5, q13 + vmla.f32 q3, q7, q15 + vld1.32 {q4,q5}, [WINDOW], r4 + vld1.32 {q6,q7}, [WINDOW], r4 + vld1.32 {q8,q9}, [WINDOW], r4 + vld1.32 {q10,q11}, [B0, :128], r5 + vld1.32 {q12,q13}, [B0, :128], r5 + vld1.32 {q14,q15}, [B0, :128], r5 + vswp q5, q6 + vswp q11, q12 + vmla.f32 q0, q4, q10 + vmla.f32 q2, q5, q11 + vmla.f32 q1, q8, q14 + vld1.32 {q4,q5}, [WINDOW] + vld1.32 {q10,q11}, [B0, :128] + add WINDOW, WINDOW, #96 + sub B0, B0, #96 + vmla.f32 q3, q4, q10 + vmla.f32 q0, q6, q12 + vmla.f32 q2, q7, q13 + vmla.f32 q1, q9, q15 + vmla.f32 q3, q5, q11 + vmvn.i32 q5, #0xb9000000 + vpadd.f32 d0, d0, d1 + vpadd.f32 d4, d4, d5 + vpadd.f32 d2, d2, d3 + vpadd.f32 d6, d6, d7 + vld1.32 {q6}, [sp, :128] + vpadd.f32 d0, d0, d4 + vpadd.f32 d1, d2, d6 + + vcvt.s32.f32 q3, q0, #16 + vacgt.f32 q5, q0, q5 + vld2.32 {q1,q2}, [SAMPLES] + vshr.u32 q5, q5, #31 + vmov q1, q3 + vst2.32 {q1,q2}, [SAMPLES]! + vadd.i32 q5, q5, q6 + vst1.32 {q5}, [sp, :128] + + subs r3, r3, #1 + bne 1b + + vld1.32 {q0}, [sp, :128] + vpadd.i32 d0, d0, d1 + vpadd.i32 d0, d0, d0 + vmov.32 r0, d0[0] + + mov sp, r6 + vpop {q4-q7} + pop {r4-r6, pc} + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_real.c =================================================================== --- lib/3rdparty/libmpg123/synth_real.c (revision 62563) +++ lib/3rdparty/libmpg123/synth_real.c (working copy) @@ -32,7 +32,7 @@ /* Mono-related synths; they wrap over _some_ synth_1to1_real (could be generic, could be i386). */ #define SYNTH_NAME fr->synths.plain[r_1to1][f_real] #define MONO_NAME synth_1to1_real_mono -#define MONO2STEREO_NAME synth_1to1_real_mono2stereo +#define MONO2STEREO_NAME synth_1to1_real_m2s #include "synth_mono.h" #undef SYNTH_NAME #undef MONO_NAME @@ -53,7 +53,7 @@ #ifdef OPT_X86_64 /* Assembler routines. */ int synth_1to1_real_x86_64_asm(real *window, real *b0, real *samples, int bo1); -int synth_1to1_real_stereo_x86_64_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); +int synth_1to1_real_s_x86_64_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); void dct64_real_x86_64(real *out0, real *out1, real *samples); /* Hull for C mpg123 API */ int synth_1to1_real_x86_64(real *bandPtr,int channel, mpg123_handle *fr, int final) @@ -132,7 +132,7 @@ dct64_real_x86_64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); } - synth_1to1_real_stereo_x86_64_asm(fr->decwin, b0l, b0r, samples, bo1); + synth_1to1_real_s_x86_64_asm(fr->decwin, b0l, b0r, samples, bo1); fr->buffer.fill += 256; @@ -140,10 +140,102 @@ } #endif -#ifdef OPT_SSE +#ifdef OPT_AVX /* Assembler routines. */ +#ifndef OPT_X86_64 +int synth_1to1_real_x86_64_asm(real *window, real *b0, real *samples, int bo1); +#endif +int synth_1to1_real_s_avx_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); +void dct64_real_avx(real *out0, real *out1, real *samples); +/* Hull for C mpg123 API */ +int synth_1to1_real_avx(real *bandPtr,int channel, mpg123_handle *fr, int final) +{ + real *samples = (real *) (fr->buffer.data+fr->buffer.fill); + + real *b0, **buf; + int bo1; + + if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); + + if(!channel) + { + fr->bo--; + fr->bo &= 0xf; + buf = fr->real_buffs[0]; + } + else + { + samples++; + buf = fr->real_buffs[1]; + } + + if(fr->bo & 0x1) + { + b0 = buf[0]; + bo1 = fr->bo; + dct64_real_avx(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); + } + else + { + b0 = buf[1]; + bo1 = fr->bo+1; + dct64_real_avx(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); + } + + synth_1to1_real_x86_64_asm(fr->decwin, b0, samples, bo1); + + if(final) fr->buffer.fill += 256; + + return 0; +} + +int synth_1to1_real_stereo_avx(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) +{ + real *samples = (real *) (fr->buffer.data+fr->buffer.fill); + + real *b0l, *b0r, **bufl, **bufr; + int bo1; + + if(fr->have_eq_settings) + { + do_equalizer(bandPtr_l,0,fr->equalizer); + do_equalizer(bandPtr_r,1,fr->equalizer); + } + + fr->bo--; + fr->bo &= 0xf; + bufl = fr->real_buffs[0]; + bufr = fr->real_buffs[1]; + + if(fr->bo & 0x1) + { + b0l = bufl[0]; + b0r = bufr[0]; + bo1 = fr->bo; + dct64_real_avx(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); + dct64_real_avx(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); + } + else + { + b0l = bufl[1]; + b0r = bufr[1]; + bo1 = fr->bo+1; + dct64_real_avx(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); + dct64_real_avx(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); + } + + synth_1to1_real_s_avx_asm(fr->decwin, b0l, b0r, samples, bo1); + + fr->buffer.fill += 256; + + return 0; +} +#endif + +#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) +/* Assembler routines. */ int synth_1to1_real_sse_asm(real *window, real *b0, real *samples, int bo1); -int synth_1to1_real_stereo_sse_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); +int synth_1to1_real_s_sse_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); void dct64_real_sse(real *out0, real *out1, real *samples); /* Hull for C mpg123 API */ int synth_1to1_real_sse(real *bandPtr,int channel, mpg123_handle *fr, int final) @@ -222,7 +314,7 @@ dct64_real_sse(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); } - synth_1to1_real_stereo_sse_asm(fr->decwin, b0l, b0r, samples, bo1); + synth_1to1_real_s_sse_asm(fr->decwin, b0l, b0r, samples, bo1); fr->buffer.fill += 256; @@ -230,6 +322,95 @@ } #endif +#ifdef OPT_NEON +/* Assembler routines. */ +int synth_1to1_real_neon_asm(real *window, real *b0, real *samples, int bo1); +int synth_1to1_real_s_neon_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); +void dct64_real_neon(real *out0, real *out1, real *samples); +/* Hull for C mpg123 API */ +int synth_1to1_real_neon(real *bandPtr,int channel, mpg123_handle *fr, int final) +{ + real *samples = (real *) (fr->buffer.data+fr->buffer.fill); + + real *b0, **buf; + int bo1; + + if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); + + if(!channel) + { + fr->bo--; + fr->bo &= 0xf; + buf = fr->real_buffs[0]; + } + else + { + samples++; + buf = fr->real_buffs[1]; + } + + if(fr->bo & 0x1) + { + b0 = buf[0]; + bo1 = fr->bo; + dct64_real_neon(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); + } + else + { + b0 = buf[1]; + bo1 = fr->bo+1; + dct64_real_neon(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); + } + + synth_1to1_real_neon_asm(fr->decwin, b0, samples, bo1); + + if(final) fr->buffer.fill += 256; + + return 0; +} +int synth_1to1_real_stereo_neon(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) +{ + real *samples = (real *) (fr->buffer.data+fr->buffer.fill); + + real *b0l, *b0r, **bufl, **bufr; + int bo1; + + if(fr->have_eq_settings) + { + do_equalizer(bandPtr_l,0,fr->equalizer); + do_equalizer(bandPtr_r,1,fr->equalizer); + } + + fr->bo--; + fr->bo &= 0xf; + bufl = fr->real_buffs[0]; + bufr = fr->real_buffs[1]; + + if(fr->bo & 0x1) + { + b0l = bufl[0]; + b0r = bufr[0]; + bo1 = fr->bo; + dct64_real_neon(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); + dct64_real_neon(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); + } + else + { + b0l = bufl[1]; + b0r = bufr[1]; + bo1 = fr->bo+1; + dct64_real_neon(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); + dct64_real_neon(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); + } + + synth_1to1_real_s_neon_asm(fr->decwin, b0l, b0r, samples, bo1); + + fr->buffer.fill += 256; + + return 0; +} +#endif + #ifndef NO_DOWNSAMPLE /* @@ -244,7 +425,7 @@ /* Mono-related synths; they wrap over _some_ synth_2to1_real (could be generic, could be i386). */ #define SYNTH_NAME fr->synths.plain[r_2to1][f_real] #define MONO_NAME synth_2to1_real_mono -#define MONO2STEREO_NAME synth_2to1_real_mono2stereo +#define MONO2STEREO_NAME synth_2to1_real_m2s #include "synth_mono.h" #undef SYNTH_NAME #undef MONO_NAME @@ -273,7 +454,7 @@ /* Mono-related synths; they wrap over _some_ synth_4to1_real (could be generic, could be i386). */ #define SYNTH_NAME fr->synths.plain[r_4to1][f_real] #define MONO_NAME synth_4to1_real_mono -#define MONO2STEREO_NAME synth_4to1_real_mono2stereo +#define MONO2STEREO_NAME synth_4to1_real_m2s #include "synth_mono.h" #undef SYNTH_NAME #undef MONO_NAME @@ -301,7 +482,7 @@ /* These are all in one header, there's no flexibility to gain. */ #define SYNTH_NAME synth_ntom_real #define MONO_NAME synth_ntom_real_mono -#define MONO2STEREO_NAME synth_ntom_real_mono2stereo +#define MONO2STEREO_NAME synth_ntom_real_m2s #include "synth_ntom.h" #undef SYNTH_NAME #undef MONO_NAME Index: lib/3rdparty/libmpg123/synth_s32.c =================================================================== --- lib/3rdparty/libmpg123/synth_s32.c (revision 62563) +++ lib/3rdparty/libmpg123/synth_s32.c (working copy) @@ -31,7 +31,7 @@ /* Mono-related synths; they wrap over _some_ synth_1to1_s32 (could be generic, could be i386). */ #define SYNTH_NAME fr->synths.plain[r_1to1][f_32] #define MONO_NAME synth_1to1_s32_mono -#define MONO2STEREO_NAME synth_1to1_s32_mono2stereo +#define MONO2STEREO_NAME synth_1to1_s32_m2s #include "synth_mono.h" #undef SYNTH_NAME #undef MONO_NAME @@ -49,7 +49,7 @@ #ifdef OPT_X86_64 /* Assembler routines. */ int synth_1to1_s32_x86_64_asm(real *window, real *b0, int32_t *samples, int bo1); -int synth_1to1_s32_stereo_x86_64_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); +int synth_1to1_s32_s_x86_64_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); void dct64_real_x86_64(real *out0, real *out1, real *samples); /* Hull for C mpg123 API */ int synth_1to1_s32_x86_64(real *bandPtr,int channel, mpg123_handle *fr, int final) @@ -131,7 +131,7 @@ dct64_real_x86_64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); } - clip = synth_1to1_s32_stereo_x86_64_asm(fr->decwin, b0l, b0r, samples, bo1); + clip = synth_1to1_s32_s_x86_64_asm(fr->decwin, b0l, b0r, samples, bo1); fr->buffer.fill += 256; @@ -139,10 +139,105 @@ } #endif -#ifdef OPT_SSE +#ifdef OPT_AVX /* Assembler routines. */ +#ifndef OPT_x86_64 +int synth_1to1_s32_x86_64_asm(real *window, real *b0, int32_t *samples, int bo1); +#endif +int synth_1to1_s32_s_avx_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); +void dct64_real_avx(real *out0, real *out1, real *samples); +/* Hull for C mpg123 API */ +int synth_1to1_s32_avx(real *bandPtr,int channel, mpg123_handle *fr, int final) +{ + int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); + + real *b0, **buf; + int bo1; + int clip; + + if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); + + if(!channel) + { + fr->bo--; + fr->bo &= 0xf; + buf = fr->real_buffs[0]; + } + else + { + samples++; + buf = fr->real_buffs[1]; + } + + if(fr->bo & 0x1) + { + b0 = buf[0]; + bo1 = fr->bo; + dct64_real_avx(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); + } + else + { + b0 = buf[1]; + bo1 = fr->bo+1; + dct64_real_avx(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); + } + + clip = synth_1to1_s32_x86_64_asm(fr->decwin, b0, samples, bo1); + + if(final) fr->buffer.fill += 256; + + return clip; +} + + +int synth_1to1_s32_stereo_avx(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) +{ + int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); + + real *b0l, *b0r, **bufl, **bufr; + int bo1; + int clip; + + if(fr->have_eq_settings) + { + do_equalizer(bandPtr_l,0,fr->equalizer); + do_equalizer(bandPtr_r,1,fr->equalizer); + } + + fr->bo--; + fr->bo &= 0xf; + bufl = fr->real_buffs[0]; + bufr = fr->real_buffs[1]; + + if(fr->bo & 0x1) + { + b0l = bufl[0]; + b0r = bufr[0]; + bo1 = fr->bo; + dct64_real_avx(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); + dct64_real_avx(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); + } + else + { + b0l = bufl[1]; + b0r = bufr[1]; + bo1 = fr->bo+1; + dct64_real_avx(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); + dct64_real_avx(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); + } + + clip = synth_1to1_s32_s_avx_asm(fr->decwin, b0l, b0r, samples, bo1); + + fr->buffer.fill += 256; + + return clip; +} +#endif + +#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) +/* Assembler routines. */ int synth_1to1_s32_sse_asm(real *window, real *b0, int32_t *samples, int bo1); -int synth_1to1_s32_stereo_sse_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); +int synth_1to1_s32_s_sse_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); void dct64_real_sse(real *out0, real *out1, real *samples); /* Hull for C mpg123 API */ int synth_1to1_s32_sse(real *bandPtr,int channel, mpg123_handle *fr, int final) @@ -224,7 +319,7 @@ dct64_real_sse(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); } - clip = synth_1to1_s32_stereo_sse_asm(fr->decwin, b0l, b0r, samples, bo1); + clip = synth_1to1_s32_s_sse_asm(fr->decwin, b0l, b0r, samples, bo1); fr->buffer.fill += 256; @@ -232,6 +327,98 @@ } #endif +#ifdef OPT_NEON +/* Assembler routines. */ +int synth_1to1_s32_neon_asm(real *window, real *b0, int32_t *samples, int bo1); +int synth_1to1_s32_s_neon_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); +void dct64_real_neon(real *out0, real *out1, real *samples); +/* Hull for C mpg123 API */ +int synth_1to1_s32_neon(real *bandPtr,int channel, mpg123_handle *fr, int final) +{ + int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); + + real *b0, **buf; + int bo1; + int clip; + + if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer); + + if(!channel) + { + fr->bo--; + fr->bo &= 0xf; + buf = fr->real_buffs[0]; + } + else + { + samples++; + buf = fr->real_buffs[1]; + } + + if(fr->bo & 0x1) + { + b0 = buf[0]; + bo1 = fr->bo; + dct64_real_neon(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr); + } + else + { + b0 = buf[1]; + bo1 = fr->bo+1; + dct64_real_neon(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr); + } + + clip = synth_1to1_s32_neon_asm(fr->decwin, b0, samples, bo1); + + if(final) fr->buffer.fill += 256; + + return clip; +} + +int synth_1to1_s32_stereo_neon(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) +{ + int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill); + + real *b0l, *b0r, **bufl, **bufr; + int bo1; + int clip; + + if(fr->have_eq_settings) + { + do_equalizer(bandPtr_l,0,fr->equalizer); + do_equalizer(bandPtr_r,1,fr->equalizer); + } + + fr->bo--; + fr->bo &= 0xf; + bufl = fr->real_buffs[0]; + bufr = fr->real_buffs[1]; + + if(fr->bo & 0x1) + { + b0l = bufl[0]; + b0r = bufr[0]; + bo1 = fr->bo; + dct64_real_neon(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l); + dct64_real_neon(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r); + } + else + { + b0l = bufl[1]; + b0r = bufr[1]; + bo1 = fr->bo+1; + dct64_real_neon(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l); + dct64_real_neon(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r); + } + + clip = synth_1to1_s32_s_neon_asm(fr->decwin, b0l, b0r, samples, bo1); + + fr->buffer.fill += 256; + + return clip; +} +#endif + #undef BLOCK #ifndef NO_DOWNSAMPLE @@ -248,7 +435,7 @@ /* Mono-related synths; they wrap over _some_ synth_2to1_s32 (could be generic, could be i386). */ #define SYNTH_NAME fr->synths.plain[r_2to1][f_32] #define MONO_NAME synth_2to1_s32_mono -#define MONO2STEREO_NAME synth_2to1_s32_mono2stereo +#define MONO2STEREO_NAME synth_2to1_s32_m2s #include "synth_mono.h" #undef SYNTH_NAME #undef MONO_NAME @@ -277,7 +464,7 @@ /* Mono-related synths; they wrap over _some_ synth_4to1_s32 (could be generic, could be i386). */ #define SYNTH_NAME fr->synths.plain[r_4to1][f_32] #define MONO_NAME synth_4to1_s32_mono -#define MONO2STEREO_NAME synth_4to1_s32_mono2stereo +#define MONO2STEREO_NAME synth_4to1_s32_m2s #include "synth_mono.h" #undef SYNTH_NAME #undef MONO_NAME @@ -305,7 +492,7 @@ /* These are all in one header, there's no flexibility to gain. */ #define SYNTH_NAME synth_ntom_s32 #define MONO_NAME synth_ntom_s32_mono -#define MONO2STEREO_NAME synth_ntom_s32_mono2stereo +#define MONO2STEREO_NAME synth_ntom_s32_m2s #include "synth_ntom.h" #undef SYNTH_NAME #undef MONO_NAME Index: lib/3rdparty/libmpg123/synth_sse_accurate.S =================================================================== --- lib/3rdparty/libmpg123/synth_sse_accurate.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_sse_accurate.S (working copy) @@ -60,7 +60,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -164,12 +164,12 @@ leal 16(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -273,7 +273,7 @@ leal 16(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_2 + jnz 1b pshufw $0xee, MMREG_CLIP, %mm0 paddw MMREG_CLIP, %mm0 Index: lib/3rdparty/libmpg123/synth_sse_float.S =================================================================== --- lib/3rdparty/libmpg123/synth_sse_float.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_sse_float.S (working copy) @@ -52,7 +52,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -137,12 +137,12 @@ leal 32(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -227,7 +227,7 @@ leal 32(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_2 + jnz 1b xorl %eax, %eax Index: lib/3rdparty/libmpg123/synth_sse_s32.S =================================================================== --- lib/3rdparty/libmpg123/synth_sse_s32.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_sse_s32.S (working copy) @@ -66,7 +66,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -173,12 +173,12 @@ leal 32(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -285,7 +285,7 @@ leal 32(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_2 + jnz 1b pshufw $0xee, MMREG_CLIP, %mm0 paddw MMREG_CLIP, %mm0 Index: lib/3rdparty/libmpg123/synth_stereo_avx.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_avx.S (revision 0) +++ lib/3rdparty/libmpg123/synth_stereo_avx.S (working copy) @@ -0,0 +1,256 @@ +/* + synth_stereo_avx: AVX optimized synth for x86-64 (stereo specific version) + + copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#ifdef IS_MSABI +/* short *window; */ +#define WINDOW %r10 +/* short *b0l; */ +#define B0L %rdx +/* short *b0r; */ +#define B0R %r8 +/* short *samples; */ +#define SAMPLES %r9 +#else +/* short *window; */ +#define WINDOW %rdi +/* short *b0l; */ +#define B0L %rsi +/* short *b0r; */ +#define B0R %rdx +/* short *samples; */ +#define SAMPLES %r9 +#endif + +/* + int synth_1to1_s_avx_asm(short *window, short *b0l, short *b0r, short *samples, int bo1); + return value: number of clipped samples +*/ + + ALIGN16 +.globl ASM_NAME(synth_1to1_s_avx_asm) +ASM_NAME(synth_1to1_s_avx_asm): +#ifdef IS_MSABI /* should save xmm6-15 */ + push %rbp + mov %rsp, %rbp + sub $144, %rsp + movaps %xmm6, (%rsp) + movaps %xmm7, 16(%rsp) + movaps %xmm8, 32(%rsp) + movaps %xmm9, 48(%rsp) + movaps %xmm10, 64(%rsp) + movaps %xmm11, 80(%rsp) + movaps %xmm12, 96(%rsp) + movaps %xmm13, 112(%rsp) + movaps %xmm14, 128(%rsp) + movl 48(%rbp), %eax /* 5th argument; placed after 32-byte shadow space */ +#endif + +#ifdef IS_MSABI + shl $1, %eax + mov %rcx, WINDOW +#else + mov %r8d, %eax + shl $1, %eax + movq %rcx, SAMPLES +#endif + add $32, WINDOW + sub %rax, WINDOW + + mov $64, %rax + movl $4, %ecx + vpxor %xmm14, %xmm14, %xmm14 + + ALIGN16 +1: + movups (WINDOW), %xmm8 + movups 16(WINDOW), %xmm9 + movups (WINDOW,%rax), %xmm10 + movups 16(WINDOW,%rax), %xmm11 + vpmaddwd (B0L), %xmm8, %xmm0 + vpmaddwd 16(B0L), %xmm9, %xmm1 + vpmaddwd (B0R), %xmm8, %xmm2 + vpmaddwd 16(B0R), %xmm9, %xmm3 + vpmaddwd 32(B0L), %xmm10, %xmm4 + vpmaddwd 48(B0L), %xmm11, %xmm5 + vpmaddwd 32(B0R), %xmm10, %xmm6 + vpmaddwd 48(B0R), %xmm11, %xmm7 + vpaddd %xmm1, %xmm0, %xmm8 + vpaddd %xmm3, %xmm2, %xmm0 + vpaddd %xmm5, %xmm4, %xmm9 + vpaddd %xmm7, %xmm6, %xmm1 + lea (WINDOW,%rax,2), WINDOW + add %rax, B0L + add %rax, B0R + + movups (WINDOW), %xmm10 + movups 16(WINDOW), %xmm11 + movups (WINDOW,%rax), %xmm12 + movups 16(WINDOW,%rax), %xmm13 + vpmaddwd (B0L), %xmm10, %xmm2 + vpmaddwd 16(B0L), %xmm11, %xmm3 + vpmaddwd (B0R), %xmm10, %xmm4 + vpmaddwd 16(B0R), %xmm11, %xmm5 + vpmaddwd 32(B0L), %xmm12, %xmm6 + vpmaddwd 48(B0L), %xmm13, %xmm10 + vpmaddwd 32(B0R), %xmm12, %xmm7 + vpmaddwd 48(B0R), %xmm13, %xmm11 + vpaddd %xmm3, %xmm2, %xmm2 + vpaddd %xmm5, %xmm4, %xmm3 + vpaddd %xmm6, %xmm10, %xmm4 + vpaddd %xmm7, %xmm11, %xmm5 + lea (WINDOW,%rax,2), WINDOW + add %rax, B0L + add %rax, B0R + + vpunpckldq %xmm0, %xmm8, %xmm6 + vpunpckhdq %xmm0, %xmm8, %xmm0 + vpunpckldq %xmm1, %xmm9, %xmm7 + vpunpckhdq %xmm1, %xmm9, %xmm1 + vpaddd %xmm6, %xmm0, %xmm0 + vpaddd %xmm7, %xmm1, %xmm1 + vpunpckldq %xmm3, %xmm2, %xmm6 + vpunpckhdq %xmm3, %xmm2, %xmm2 + vpunpckldq %xmm5, %xmm4, %xmm7 + vpunpckhdq %xmm5, %xmm4, %xmm3 + vpaddd %xmm6, %xmm2, %xmm2 + vpaddd %xmm7, %xmm3, %xmm3 + + vpunpcklqdq %xmm1, %xmm0, %xmm4 + vpunpckhqdq %xmm1, %xmm0, %xmm0 + vpunpcklqdq %xmm3, %xmm2, %xmm5 + vpunpckhqdq %xmm3, %xmm2, %xmm1 + vpaddd %xmm0, %xmm4, %xmm0 + vpaddd %xmm1, %xmm5, %xmm1 + vpsrad $13, %xmm0, %xmm0 + vpsrad $13, %xmm1, %xmm1 + vpackssdw %xmm1, %xmm0, %xmm2 + vpcmpeqd %xmm3, %xmm3, %xmm3 + vpslld $16, %xmm0, %xmm0 + vpslld $16, %xmm1, %xmm1 + vpsrld $16, %xmm0, %xmm0 + vpsrld $16, %xmm1, %xmm1 + vpackusdw %xmm1, %xmm0, %xmm0 + vpcmpeqw %xmm2, %xmm0, %xmm0 + vpxor %xmm3, %xmm0, %xmm0 + vpaddw %xmm0, %xmm14, %xmm14 + + movups %xmm2, (SAMPLES) + add $16, SAMPLES + dec %ecx + jnz 1b + + movl $4, %ecx + + ALIGN16 +1: + movups (WINDOW), %xmm8 + movups 16(WINDOW), %xmm9 + movups (WINDOW,%rax), %xmm10 + movups 16(WINDOW,%rax), %xmm11 + vpmaddwd (B0L), %xmm8, %xmm0 + vpmaddwd 16(B0L), %xmm9, %xmm1 + vpmaddwd (B0R), %xmm8, %xmm2 + vpmaddwd 16(B0R), %xmm9, %xmm3 + vpmaddwd -32(B0L), %xmm10, %xmm4 + vpmaddwd -16(B0L), %xmm11, %xmm5 + vpmaddwd -32(B0R), %xmm10, %xmm6 + vpmaddwd -16(B0R), %xmm11, %xmm7 + vpaddd %xmm1, %xmm0, %xmm8 + vpaddd %xmm3, %xmm2, %xmm0 + vpaddd %xmm5, %xmm4, %xmm9 + vpaddd %xmm7, %xmm6, %xmm1 + lea (WINDOW,%rax,2), WINDOW + sub %rax, B0L + sub %rax, B0R + + movups (WINDOW), %xmm10 + movups 16(WINDOW), %xmm11 + movups (WINDOW,%rax), %xmm12 + movups 16(WINDOW,%rax), %xmm13 + vpmaddwd (B0L), %xmm10, %xmm2 + vpmaddwd 16(B0L), %xmm11, %xmm3 + vpmaddwd (B0R), %xmm10, %xmm4 + vpmaddwd 16(B0R), %xmm11, %xmm5 + vpmaddwd -32(B0L), %xmm12, %xmm6 + vpmaddwd -16(B0L), %xmm13, %xmm10 + vpmaddwd -32(B0R), %xmm12, %xmm7 + vpmaddwd -16(B0R), %xmm13, %xmm11 + vpaddd %xmm3, %xmm2, %xmm2 + vpaddd %xmm5, %xmm4, %xmm3 + vpaddd %xmm6, %xmm10, %xmm4 + vpaddd %xmm7, %xmm11, %xmm5 + lea (WINDOW,%rax,2), WINDOW + sub %rax, B0L + sub %rax, B0R + + vpunpckldq %xmm0, %xmm8, %xmm6 + vpunpckhdq %xmm0, %xmm8, %xmm0 + vpunpckldq %xmm1, %xmm9, %xmm7 + vpunpckhdq %xmm1, %xmm9, %xmm1 + vpaddd %xmm6, %xmm0, %xmm0 + vpaddd %xmm7, %xmm1, %xmm1 + vpunpckldq %xmm3, %xmm2, %xmm6 + vpunpckhdq %xmm3, %xmm2, %xmm2 + vpunpckldq %xmm5, %xmm4, %xmm7 + vpunpckhdq %xmm5, %xmm4, %xmm3 + vpaddd %xmm6, %xmm2, %xmm2 + vpaddd %xmm7, %xmm3, %xmm3 + + vpunpcklqdq %xmm1, %xmm0, %xmm4 + vpunpckhqdq %xmm1, %xmm0, %xmm0 + vpunpcklqdq %xmm3, %xmm2, %xmm5 + vpunpckhqdq %xmm3, %xmm2, %xmm1 + vpaddd %xmm0, %xmm4, %xmm0 + vpaddd %xmm1, %xmm5, %xmm1 + vpsrad $13, %xmm0, %xmm0 + vpsrad $13, %xmm1, %xmm1 + vpackssdw %xmm1, %xmm0, %xmm2 + vpcmpeqd %xmm3, %xmm3, %xmm3 + vpslld $16, %xmm0, %xmm0 + vpslld $16, %xmm1, %xmm1 + vpsrld $16, %xmm0, %xmm0 + vpsrld $16, %xmm1, %xmm1 + vpackusdw %xmm1, %xmm0, %xmm0 + vpcmpeqw %xmm2, %xmm0, %xmm0 + vpxor %xmm3, %xmm0, %xmm0 + vpaddw %xmm0, %xmm14, %xmm14 + + movups %xmm2, (SAMPLES) + add $16, SAMPLES + dec %ecx + jnz 1b + + pxor %xmm1, %xmm1 + psubw %xmm14, %xmm1 + pshufd $0x4e, %xmm1, %xmm0 + paddw %xmm1, %xmm0 + pshuflw $0x4e, %xmm0, %xmm1 + paddw %xmm1, %xmm0 + pshuflw $0x11, %xmm0, %xmm1 + paddw %xmm1, %xmm0 + movd %xmm0, %eax + and $0x7f, %eax + +#ifdef IS_MSABI + movaps (%rsp), %xmm6 + movaps 16(%rsp), %xmm7 + movaps 32(%rsp), %xmm8 + movaps 48(%rsp), %xmm9 + movaps 64(%rsp), %xmm10 + movaps 80(%rsp), %xmm11 + movaps 96(%rsp), %xmm12 + movaps 112(%rsp), %xmm13 + movaps 128(%rsp), %xmm14 + mov %rbp, %rsp + pop %rbp +#endif + ret + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_stereo_avx_accurate.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_avx_accurate.S (revision 0) +++ lib/3rdparty/libmpg123/synth_stereo_avx_accurate.S (working copy) @@ -0,0 +1,286 @@ +/* + synth_stereo_avx_accurate: AVX optimized synth for x86-64 (stereo specific, MPEG-compliant 16bit output version) + + copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#ifdef IS_MSABI +/* real *window; */ +#define WINDOW %r10 +/* real *b0l; */ +#define B0L %rdx +/* real *b0r; */ +#define B0R %r8 +/* real *samples; */ +#define SAMPLES %r9 +#else +/* real *window; */ +#define WINDOW %rdi +/* real *b0l; */ +#define B0L %rsi +/* real *b0r; */ +#define B0R %rdx +/* real *samples; */ +#define SAMPLES %r9 +#endif + +/* + int synth_1to1_s_avx_accurate_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); + return value: number of clipped samples +*/ + +#ifndef __APPLE__ + .section .rodata +#else + .data +#endif + ALIGN32 +maxmin_avx: + .long 1191182335 + .long 1191182335 + .long 1191182335 + .long 1191182335 + .long 1191182335 + .long 1191182335 + .long 1191182335 + .long 1191182335 + .long -956301312 + .long -956301312 + .long -956301312 + .long -956301312 + .long -956301312 + .long -956301312 + .long -956301312 + .long -956301312 + .text + ALIGN16 + .globl ASM_NAME(synth_1to1_s_avx_accurate_asm) +ASM_NAME(synth_1to1_s_avx_accurate_asm): +#ifdef IS_MSABI /* should save xmm6-15 */ + push %rbp + mov %rsp, %rbp + sub $144, %rsp + movaps %xmm6, (%rsp) + movaps %xmm7, 16(%rsp) + movaps %xmm8, 32(%rsp) + movaps %xmm9, 48(%rsp) + movaps %xmm10, 64(%rsp) + movaps %xmm11, 80(%rsp) + movaps %xmm12, 96(%rsp) + movaps %xmm13, 112(%rsp) + movaps %xmm14, 128(%rsp) + movl 48(%rbp), %eax /* 5th argument; placed after 32-byte shadow space */ +#endif + +#ifdef IS_MSABI + shl $2, %eax + mov %rcx, WINDOW +#else + mov %r8d, %eax + shl $2, %eax + mov %rcx, SAMPLES +#endif + add $64, WINDOW + sub %rax, WINDOW + + mov $128, %rax + mov $4, %ecx + vpxor %xmm14, %xmm14, %xmm14 + + ALIGN16 +1: + vmovups (WINDOW), %ymm8 + vmovups 32(WINDOW), %ymm9 + vmovups (WINDOW,%rax), %ymm10 + vmovups 32(WINDOW,%rax), %ymm11 + vmulps (B0L), %ymm8, %ymm0 + vmulps 32(B0L), %ymm9, %ymm1 + vmulps (B0R), %ymm8, %ymm2 + vmulps 32(B0R), %ymm9, %ymm3 + vmulps 64(B0L), %ymm10, %ymm4 + vmulps 96(B0L), %ymm11, %ymm5 + vmulps 64(B0R), %ymm10, %ymm6 + vmulps 96(B0R), %ymm11, %ymm7 + vaddps %ymm1, %ymm0, %ymm8 + vaddps %ymm3, %ymm2, %ymm0 + vaddps %ymm5, %ymm4, %ymm9 + vaddps %ymm7, %ymm6, %ymm1 + lea (WINDOW,%rax,2), WINDOW + add %rax, B0L + add %rax, B0R + + vmovups (WINDOW), %ymm10 + vmovups 32(WINDOW), %ymm11 + vmovups (WINDOW,%rax), %ymm12 + vmovups 32(WINDOW,%rax), %ymm13 + vmulps (B0L), %ymm10, %ymm2 + vmulps 32(B0L), %ymm11, %ymm3 + vmulps (B0R), %ymm10, %ymm4 + vmulps 32(B0R), %ymm11, %ymm5 + vmulps 64(B0L), %ymm12, %ymm6 + vmulps 96(B0L), %ymm13, %ymm10 + vmulps 64(B0R), %ymm12, %ymm7 + vmulps 96(B0R), %ymm13, %ymm11 + vaddps %ymm3, %ymm2, %ymm2 + vaddps %ymm5, %ymm4, %ymm3 + vaddps %ymm6, %ymm10, %ymm4 + vaddps %ymm7, %ymm11, %ymm5 + lea (WINDOW,%rax,2), WINDOW + add %rax, B0L + add %rax, B0R + + vunpcklps %ymm0, %ymm8, %ymm6 + vunpckhps %ymm0, %ymm8, %ymm0 + vunpcklps %ymm1, %ymm9, %ymm7 + vunpckhps %ymm1, %ymm9, %ymm1 + vaddps %ymm6, %ymm0, %ymm0 + vaddps %ymm7, %ymm1, %ymm1 + vunpcklps %ymm3, %ymm2, %ymm6 + vunpckhps %ymm3, %ymm2, %ymm2 + vunpcklps %ymm5, %ymm4, %ymm7 + vunpckhps %ymm5, %ymm4, %ymm3 + vaddps %ymm6, %ymm2, %ymm2 + vaddps %ymm7, %ymm3, %ymm3 + + vunpcklpd %ymm1, %ymm0, %ymm4 + vunpckhpd %ymm1, %ymm0, %ymm0 + vunpcklpd %ymm3, %ymm2, %ymm5 + vunpckhpd %ymm3, %ymm2, %ymm1 + vsubps %ymm0, %ymm4, %ymm0 + vsubps %ymm1, %ymm5, %ymm1 + vperm2f128 $0x20, %ymm1, %ymm0, %ymm2 + vperm2f128 $0x31, %ymm1, %ymm0, %ymm3 + vaddps %ymm3, %ymm2, %ymm0 + vcmpnleps maxmin_avx(%rip), %ymm0, %ymm1 + vcmpltps 32+maxmin_avx(%rip), %ymm0, %ymm2 + vextractf128 $0x1, %ymm1, %xmm3 + vextractf128 $0x1, %ymm2, %xmm4 + vpackssdw %xmm2, %xmm1, %xmm1 + vpackssdw %xmm4, %xmm3, %xmm3 + vpaddw %xmm3, %xmm1, %xmm1 + vpaddw %xmm1, %xmm14, %xmm14 + vcvtps2dq %ymm0, %ymm0 + vextractf128 $0x1, %ymm0, %xmm1 + vpackssdw %xmm1, %xmm0, %xmm0 + + vmovups %xmm0, (SAMPLES) + add $16, SAMPLES + dec %ecx + jnz 1b + + mov $4, %ecx + + ALIGN16 +1: + vmovups (WINDOW), %ymm8 + vmovups 32(WINDOW), %ymm9 + vmovups (WINDOW,%rax), %ymm10 + vmovups 32(WINDOW,%rax), %ymm11 + vmulps (B0L), %ymm8, %ymm0 + vmulps 32(B0L), %ymm9, %ymm1 + vmulps (B0R), %ymm8, %ymm2 + vmulps 32(B0R), %ymm9, %ymm3 + vmulps -64(B0L), %ymm10, %ymm4 + vmulps -32(B0L), %ymm11, %ymm5 + vmulps -64(B0R), %ymm10, %ymm6 + vmulps -32(B0R), %ymm11, %ymm7 + vaddps %ymm1, %ymm0, %ymm8 + vaddps %ymm3, %ymm2, %ymm0 + vaddps %ymm5, %ymm4, %ymm9 + vaddps %ymm7, %ymm6, %ymm1 + lea (WINDOW,%rax,2), WINDOW + sub %rax, B0L + sub %rax, B0R + + vmovups (WINDOW), %ymm10 + vmovups 32(WINDOW), %ymm11 + vmovups (WINDOW,%rax), %ymm12 + vmovups 32(WINDOW,%rax), %ymm13 + vmulps (B0L), %ymm10, %ymm2 + vmulps 32(B0L), %ymm11, %ymm3 + vmulps (B0R), %ymm10, %ymm4 + vmulps 32(B0R), %ymm11, %ymm5 + vmulps -64(B0L), %ymm12, %ymm6 + vmulps -32(B0L), %ymm13, %ymm10 + vmulps -64(B0R), %ymm12, %ymm7 + vmulps -32(B0R), %ymm13, %ymm11 + vaddps %ymm3, %ymm2, %ymm2 + vaddps %ymm5, %ymm4, %ymm3 + vaddps %ymm6, %ymm10, %ymm4 + vaddps %ymm7, %ymm11, %ymm5 + lea (WINDOW,%rax,2), WINDOW + sub %rax, B0L + sub %rax, B0R + + vunpcklps %ymm0, %ymm8, %ymm6 + vunpckhps %ymm0, %ymm8, %ymm0 + vunpcklps %ymm1, %ymm9, %ymm7 + vunpckhps %ymm1, %ymm9, %ymm1 + vaddps %ymm6, %ymm0, %ymm0 + vaddps %ymm7, %ymm1, %ymm1 + vunpcklps %ymm3, %ymm2, %ymm6 + vunpckhps %ymm3, %ymm2, %ymm2 + vunpcklps %ymm5, %ymm4, %ymm7 + vunpckhps %ymm5, %ymm4, %ymm3 + vaddps %ymm6, %ymm2, %ymm2 + vaddps %ymm7, %ymm3, %ymm3 + + vunpcklpd %ymm1, %ymm0, %ymm4 + vunpckhpd %ymm1, %ymm0, %ymm0 + vunpcklpd %ymm3, %ymm2, %ymm5 + vunpckhpd %ymm3, %ymm2, %ymm1 + vaddps %ymm0, %ymm4, %ymm0 + vaddps %ymm1, %ymm5, %ymm1 + vperm2f128 $0x20, %ymm1, %ymm0, %ymm2 + vperm2f128 $0x31, %ymm1, %ymm0, %ymm3 + vaddps %ymm3, %ymm2, %ymm0 + vcmpnleps maxmin_avx(%rip), %ymm0, %ymm1 + vcmpltps 32+maxmin_avx(%rip), %ymm0, %ymm2 + vextractf128 $0x1, %ymm1, %xmm3 + vextractf128 $0x1, %ymm2, %xmm4 + vpackssdw %xmm2, %xmm1, %xmm1 + vpackssdw %xmm4, %xmm3, %xmm3 + vpaddw %xmm3, %xmm1, %xmm1 + vpaddw %xmm1, %xmm14, %xmm14 + vcvtps2dq %ymm0, %ymm0 + vextractf128 $0x1, %ymm0, %xmm1 + vpackssdw %xmm1, %xmm0, %xmm0 + + vmovups %xmm0, (SAMPLES) + add $16, SAMPLES + dec %ecx + jnz 1b + + vzeroupper + + pxor %xmm1, %xmm1 + psubw %xmm14, %xmm1 + pshufd $0x4e, %xmm1, %xmm0 + paddw %xmm1, %xmm0 + pshuflw $0x4e, %xmm0, %xmm1 + paddw %xmm1, %xmm0 + pshuflw $0x11, %xmm0, %xmm1 + paddw %xmm1, %xmm0 + movd %xmm0, %eax + and $0x7f, %eax + +#ifdef IS_MSABI + movaps (%rsp), %xmm6 + movaps 16(%rsp), %xmm7 + movaps 32(%rsp), %xmm8 + movaps 48(%rsp), %xmm9 + movaps 64(%rsp), %xmm10 + movaps 80(%rsp), %xmm11 + movaps 96(%rsp), %xmm12 + movaps 112(%rsp), %xmm13 + movaps 128(%rsp), %xmm14 + mov %rbp, %rsp + pop %rbp +#endif + ret + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_stereo_avx_float.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_avx_float.S (revision 0) +++ lib/3rdparty/libmpg123/synth_stereo_avx_float.S (working copy) @@ -0,0 +1,243 @@ +/* + synth_stereo_avx_float: AVX optimized synth for x86-64 (stereo specific, float output version) + + copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#ifdef IS_MSABI +/* real *window; */ +#define WINDOW %r10 +/* real *b0l; */ +#define B0L %rdx +/* real *b0r; */ +#define B0R %r8 +/* real *samples; */ +#define SAMPLES %r9 +#else +/* real *window; */ +#define WINDOW %rdi +/* real *b0l; */ +#define B0L %rsi +/* real *b0r; */ +#define B0R %rdx +/* real *samples; */ +#define SAMPLES %r9 +#endif + +/* + int synth_1to1_real_s_avx_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); + return value: number of clipped samples (0) +*/ + +#ifndef __APPLE__ + .section .rodata +#else + .data +#endif + ALIGN16 +scale_avx: + .long 939524096 + .text + ALIGN16 + .globl ASM_NAME(synth_1to1_real_s_avx_asm) +ASM_NAME(synth_1to1_real_s_avx_asm): +#ifdef IS_MSABI /* should save xmm6-15 */ + push %rbp + mov %rsp, %rbp + sub $144, %rsp + movaps %xmm6, (%rsp) + movaps %xmm7, 16(%rsp) + movaps %xmm8, 32(%rsp) + movaps %xmm9, 48(%rsp) + movaps %xmm10, 64(%rsp) + movaps %xmm11, 80(%rsp) + movaps %xmm12, 96(%rsp) + movaps %xmm13, 112(%rsp) + movaps %xmm14, 128(%rsp) + movl 48(%rbp), %eax /* 5th argument; placed after 32-byte shadow space */ +#endif + + vbroadcastss scale_avx(%rip), %ymm14 + +#ifdef IS_MSABI + shl $2, %eax + mov %rcx, WINDOW +#else + mov %r8d, %eax + shl $2, %eax + mov %rcx, SAMPLES +#endif + add $64, WINDOW + sub %rax, WINDOW + + mov $128, %rax + mov $4, %ecx + + ALIGN16 +1: + vmovups (WINDOW), %ymm8 + vmovups 32(WINDOW), %ymm9 + vmovups (WINDOW,%rax), %ymm10 + vmovups 32(WINDOW,%rax), %ymm11 + vmulps (B0L), %ymm8, %ymm0 + vmulps 32(B0L), %ymm9, %ymm1 + vmulps (B0R), %ymm8, %ymm2 + vmulps 32(B0R), %ymm9, %ymm3 + vmulps 64(B0L), %ymm10, %ymm4 + vmulps 96(B0L), %ymm11, %ymm5 + vmulps 64(B0R), %ymm10, %ymm6 + vmulps 96(B0R), %ymm11, %ymm7 + vaddps %ymm1, %ymm0, %ymm8 + vaddps %ymm3, %ymm2, %ymm0 + vaddps %ymm5, %ymm4, %ymm9 + vaddps %ymm7, %ymm6, %ymm1 + lea (WINDOW,%rax,2), WINDOW + add %rax, B0L + add %rax, B0R + + vmovups (WINDOW), %ymm10 + vmovups 32(WINDOW), %ymm11 + vmovups (WINDOW,%rax), %ymm12 + vmovups 32(WINDOW,%rax), %ymm13 + vmulps (B0L), %ymm10, %ymm2 + vmulps 32(B0L), %ymm11, %ymm3 + vmulps (B0R), %ymm10, %ymm4 + vmulps 32(B0R), %ymm11, %ymm5 + vmulps 64(B0L), %ymm12, %ymm6 + vmulps 96(B0L), %ymm13, %ymm10 + vmulps 64(B0R), %ymm12, %ymm7 + vmulps 96(B0R), %ymm13, %ymm11 + vaddps %ymm3, %ymm2, %ymm2 + vaddps %ymm5, %ymm4, %ymm3 + vaddps %ymm6, %ymm10, %ymm4 + vaddps %ymm7, %ymm11, %ymm5 + lea (WINDOW,%rax,2), WINDOW + add %rax, B0L + add %rax, B0R + + vunpcklps %ymm0, %ymm8, %ymm6 + vunpckhps %ymm0, %ymm8, %ymm0 + vunpcklps %ymm1, %ymm9, %ymm7 + vunpckhps %ymm1, %ymm9, %ymm1 + vaddps %ymm6, %ymm0, %ymm0 + vaddps %ymm7, %ymm1, %ymm1 + vunpcklps %ymm3, %ymm2, %ymm6 + vunpckhps %ymm3, %ymm2, %ymm2 + vunpcklps %ymm5, %ymm4, %ymm7 + vunpckhps %ymm5, %ymm4, %ymm3 + vaddps %ymm6, %ymm2, %ymm2 + vaddps %ymm7, %ymm3, %ymm3 + + vunpcklpd %ymm1, %ymm0, %ymm4 + vunpckhpd %ymm1, %ymm0, %ymm0 + vunpcklpd %ymm3, %ymm2, %ymm5 + vunpckhpd %ymm3, %ymm2, %ymm1 + vsubps %ymm0, %ymm4, %ymm0 + vsubps %ymm1, %ymm5, %ymm1 + vperm2f128 $0x20, %ymm1, %ymm0, %ymm2 + vperm2f128 $0x31, %ymm1, %ymm0, %ymm3 + vaddps %ymm3, %ymm2, %ymm0 + vmulps %ymm14, %ymm0, %ymm0 + + vmovups %ymm0, (SAMPLES) + add $32, SAMPLES + dec %ecx + jnz 1b + + mov $4, %ecx + + ALIGN16 +1: + vmovups (WINDOW), %ymm8 + vmovups 32(WINDOW), %ymm9 + vmovups (WINDOW,%rax), %ymm10 + vmovups 32(WINDOW,%rax), %ymm11 + vmulps (B0L), %ymm8, %ymm0 + vmulps 32(B0L), %ymm9, %ymm1 + vmulps (B0R), %ymm8, %ymm2 + vmulps 32(B0R), %ymm9, %ymm3 + vmulps -64(B0L), %ymm10, %ymm4 + vmulps -32(B0L), %ymm11, %ymm5 + vmulps -64(B0R), %ymm10, %ymm6 + vmulps -32(B0R), %ymm11, %ymm7 + vaddps %ymm1, %ymm0, %ymm8 + vaddps %ymm3, %ymm2, %ymm0 + vaddps %ymm5, %ymm4, %ymm9 + vaddps %ymm7, %ymm6, %ymm1 + lea (WINDOW,%rax,2), WINDOW + sub %rax, B0L + sub %rax, B0R + + vmovups (WINDOW), %ymm10 + vmovups 32(WINDOW), %ymm11 + vmovups (WINDOW,%rax), %ymm12 + vmovups 32(WINDOW,%rax), %ymm13 + vmulps (B0L), %ymm10, %ymm2 + vmulps 32(B0L), %ymm11, %ymm3 + vmulps (B0R), %ymm10, %ymm4 + vmulps 32(B0R), %ymm11, %ymm5 + vmulps -64(B0L), %ymm12, %ymm6 + vmulps -32(B0L), %ymm13, %ymm10 + vmulps -64(B0R), %ymm12, %ymm7 + vmulps -32(B0R), %ymm13, %ymm11 + vaddps %ymm3, %ymm2, %ymm2 + vaddps %ymm5, %ymm4, %ymm3 + vaddps %ymm6, %ymm10, %ymm4 + vaddps %ymm7, %ymm11, %ymm5 + lea (WINDOW,%rax,2), WINDOW + sub %rax, B0L + sub %rax, B0R + + vunpcklps %ymm0, %ymm8, %ymm6 + vunpckhps %ymm0, %ymm8, %ymm0 + vunpcklps %ymm1, %ymm9, %ymm7 + vunpckhps %ymm1, %ymm9, %ymm1 + vaddps %ymm6, %ymm0, %ymm0 + vaddps %ymm7, %ymm1, %ymm1 + vunpcklps %ymm3, %ymm2, %ymm6 + vunpckhps %ymm3, %ymm2, %ymm2 + vunpcklps %ymm5, %ymm4, %ymm7 + vunpckhps %ymm5, %ymm4, %ymm3 + vaddps %ymm6, %ymm2, %ymm2 + vaddps %ymm7, %ymm3, %ymm3 + + vunpcklpd %ymm1, %ymm0, %ymm4 + vunpckhpd %ymm1, %ymm0, %ymm0 + vunpcklpd %ymm3, %ymm2, %ymm5 + vunpckhpd %ymm3, %ymm2, %ymm1 + vaddps %ymm0, %ymm4, %ymm0 + vaddps %ymm1, %ymm5, %ymm1 + vperm2f128 $0x20, %ymm1, %ymm0, %ymm2 + vperm2f128 $0x31, %ymm1, %ymm0, %ymm3 + vaddps %ymm3, %ymm2, %ymm0 + vmulps %ymm14, %ymm0, %ymm0 + + vmovups %ymm0, (SAMPLES) + add $32, SAMPLES + dec %ecx + jnz 1b + + vzeroupper + + xor %eax, %eax + +#ifdef IS_MSABI + movaps (%rsp), %xmm6 + movaps 16(%rsp), %xmm7 + movaps 32(%rsp), %xmm8 + movaps 48(%rsp), %xmm9 + movaps 64(%rsp), %xmm10 + movaps 80(%rsp), %xmm11 + movaps 96(%rsp), %xmm12 + movaps 112(%rsp), %xmm13 + movaps 128(%rsp), %xmm14 + mov %rbp, %rsp + pop %rbp +#endif + ret + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_stereo_avx_s32.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_avx_s32.S (revision 0) +++ lib/3rdparty/libmpg123/synth_stereo_avx_s32.S (working copy) @@ -0,0 +1,292 @@ +/* + synth_stereo_avx_s32: AVX optimized synth for x86-64 (stereo specific, s32 output version) + + copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#ifdef IS_MSABI +/* real *window; */ +#define WINDOW %r10 +/* real *b0l; */ +#define B0L %rdx +/* real *b0r; */ +#define B0R %r8 +/* real *samples; */ +#define SAMPLES %r9 +#else +/* real *window; */ +#define WINDOW %rdi +/* real *b0l; */ +#define B0L %rsi +/* real *b0r; */ +#define B0R %rdx +/* real *samples; */ +#define SAMPLES %r9 +#endif + +/* + int synth_1to1_s32_s_avx_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); + return value: number of clipped samples +*/ + +#ifndef __APPLE__ + .section .rodata +#else + .data +#endif + ALIGN32 +maxmin_avx: + .long 1191182335 + .long 1191182335 + .long 1191182335 + .long 1191182335 + .long 1191182335 + .long 1191182335 + .long 1191182335 + .long 1191182335 + .long -956301312 + .long -956301312 + .long -956301312 + .long -956301312 + .long -956301312 + .long -956301312 + .long -956301312 + .long -956301312 +scale_avx: + .long 1199570944 + .text + ALIGN16 + .globl ASM_NAME(synth_1to1_s32_s_avx_asm) +ASM_NAME(synth_1to1_s32_s_avx_asm): +#ifdef IS_MSABI /* should save xmm6-15 */ + push %rbp + mov %rsp, %rbp + sub $160, %rsp + movaps %xmm6, (%rsp) + movaps %xmm7, 16(%rsp) + movaps %xmm8, 32(%rsp) + movaps %xmm9, 48(%rsp) + movaps %xmm10, 64(%rsp) + movaps %xmm11, 80(%rsp) + movaps %xmm12, 96(%rsp) + movaps %xmm13, 112(%rsp) + movaps %xmm14, 128(%rsp) + movaps %xmm15, 144(%rsp) + movl 48(%rbp), %eax /* 5th argument; placed after 32-byte shadow space */ +#endif + + vbroadcastss scale_avx(%rip), %ymm14 + +#ifdef IS_MSABI + shl $2, %eax + mov %rcx, WINDOW +#else + mov %r8d, %eax + shl $2, %eax + mov %rcx, SAMPLES +#endif + add $64, WINDOW + sub %rax, WINDOW + + mov $128, %rax + mov $4, %ecx + vpxor %xmm15, %xmm15, %xmm15 + + ALIGN16 +1: + vmovups (WINDOW), %ymm8 + vmovups 32(WINDOW), %ymm9 + vmovups (WINDOW,%rax), %ymm10 + vmovups 32(WINDOW,%rax), %ymm11 + vmulps (B0L), %ymm8, %ymm0 + vmulps 32(B0L), %ymm9, %ymm1 + vmulps (B0R), %ymm8, %ymm2 + vmulps 32(B0R), %ymm9, %ymm3 + vmulps 64(B0L), %ymm10, %ymm4 + vmulps 96(B0L), %ymm11, %ymm5 + vmulps 64(B0R), %ymm10, %ymm6 + vmulps 96(B0R), %ymm11, %ymm7 + vaddps %ymm1, %ymm0, %ymm8 + vaddps %ymm3, %ymm2, %ymm0 + vaddps %ymm5, %ymm4, %ymm9 + vaddps %ymm7, %ymm6, %ymm1 + lea (WINDOW,%rax,2), WINDOW + add %rax, B0L + add %rax, B0R + + vmovups (WINDOW), %ymm10 + vmovups 32(WINDOW), %ymm11 + vmovups (WINDOW,%rax), %ymm12 + vmovups 32(WINDOW,%rax), %ymm13 + vmulps (B0L), %ymm10, %ymm2 + vmulps 32(B0L), %ymm11, %ymm3 + vmulps (B0R), %ymm10, %ymm4 + vmulps 32(B0R), %ymm11, %ymm5 + vmulps 64(B0L), %ymm12, %ymm6 + vmulps 96(B0L), %ymm13, %ymm10 + vmulps 64(B0R), %ymm12, %ymm7 + vmulps 96(B0R), %ymm13, %ymm11 + vaddps %ymm3, %ymm2, %ymm2 + vaddps %ymm5, %ymm4, %ymm3 + vaddps %ymm6, %ymm10, %ymm4 + vaddps %ymm7, %ymm11, %ymm5 + lea (WINDOW,%rax,2), WINDOW + add %rax, B0L + add %rax, B0R + + vunpcklps %ymm0, %ymm8, %ymm6 + vunpckhps %ymm0, %ymm8, %ymm0 + vunpcklps %ymm1, %ymm9, %ymm7 + vunpckhps %ymm1, %ymm9, %ymm1 + vaddps %ymm6, %ymm0, %ymm0 + vaddps %ymm7, %ymm1, %ymm1 + vunpcklps %ymm3, %ymm2, %ymm6 + vunpckhps %ymm3, %ymm2, %ymm2 + vunpcklps %ymm5, %ymm4, %ymm7 + vunpckhps %ymm5, %ymm4, %ymm3 + vaddps %ymm6, %ymm2, %ymm2 + vaddps %ymm7, %ymm3, %ymm3 + + vunpcklpd %ymm1, %ymm0, %ymm4 + vunpckhpd %ymm1, %ymm0, %ymm0 + vunpcklpd %ymm3, %ymm2, %ymm5 + vunpckhpd %ymm3, %ymm2, %ymm1 + vsubps %ymm0, %ymm4, %ymm0 + vsubps %ymm1, %ymm5, %ymm1 + vperm2f128 $0x20, %ymm1, %ymm0, %ymm2 + vperm2f128 $0x31, %ymm1, %ymm0, %ymm3 + vaddps %ymm3, %ymm2, %ymm0 + vcmpnleps maxmin_avx(%rip), %ymm0, %ymm1 + vcmpltps 32+maxmin_avx(%rip), %ymm0, %ymm2 + vmulps %ymm14, %ymm0, %ymm0 + vextractf128 $0x1, %ymm1, %xmm3 + vextractf128 $0x1, %ymm2, %xmm4 + vpackssdw %xmm2, %xmm1, %xmm5 + vpackssdw %xmm4, %xmm3, %xmm3 + vcvtps2dq %ymm0, %ymm0 + vpaddw %xmm3, %xmm5, %xmm5 + vpaddw %xmm5, %xmm15, %xmm15 + vxorps %ymm1, %ymm0, %ymm0 + + vmovups %ymm0, (SAMPLES) + add $32, SAMPLES + dec %ecx + jnz 1b + + mov $4, %ecx + + ALIGN16 +1: + vmovups (WINDOW), %ymm8 + vmovups 32(WINDOW), %ymm9 + vmovups (WINDOW,%rax), %ymm10 + vmovups 32(WINDOW,%rax), %ymm11 + vmulps (B0L), %ymm8, %ymm0 + vmulps 32(B0L), %ymm9, %ymm1 + vmulps (B0R), %ymm8, %ymm2 + vmulps 32(B0R), %ymm9, %ymm3 + vmulps -64(B0L), %ymm10, %ymm4 + vmulps -32(B0L), %ymm11, %ymm5 + vmulps -64(B0R), %ymm10, %ymm6 + vmulps -32(B0R), %ymm11, %ymm7 + vaddps %ymm1, %ymm0, %ymm8 + vaddps %ymm3, %ymm2, %ymm0 + vaddps %ymm5, %ymm4, %ymm9 + vaddps %ymm7, %ymm6, %ymm1 + lea (WINDOW,%rax,2), WINDOW + sub %rax, B0L + sub %rax, B0R + + vmovups (WINDOW), %ymm10 + vmovups 32(WINDOW), %ymm11 + vmovups (WINDOW,%rax), %ymm12 + vmovups 32(WINDOW,%rax), %ymm13 + vmulps (B0L), %ymm10, %ymm2 + vmulps 32(B0L), %ymm11, %ymm3 + vmulps (B0R), %ymm10, %ymm4 + vmulps 32(B0R), %ymm11, %ymm5 + vmulps -64(B0L), %ymm12, %ymm6 + vmulps -32(B0L), %ymm13, %ymm10 + vmulps -64(B0R), %ymm12, %ymm7 + vmulps -32(B0R), %ymm13, %ymm11 + vaddps %ymm3, %ymm2, %ymm2 + vaddps %ymm5, %ymm4, %ymm3 + vaddps %ymm6, %ymm10, %ymm4 + vaddps %ymm7, %ymm11, %ymm5 + lea (WINDOW,%rax,2), WINDOW + sub %rax, B0L + sub %rax, B0R + + vunpcklps %ymm0, %ymm8, %ymm6 + vunpckhps %ymm0, %ymm8, %ymm0 + vunpcklps %ymm1, %ymm9, %ymm7 + vunpckhps %ymm1, %ymm9, %ymm1 + vaddps %ymm6, %ymm0, %ymm0 + vaddps %ymm7, %ymm1, %ymm1 + vunpcklps %ymm3, %ymm2, %ymm6 + vunpckhps %ymm3, %ymm2, %ymm2 + vunpcklps %ymm5, %ymm4, %ymm7 + vunpckhps %ymm5, %ymm4, %ymm3 + vaddps %ymm6, %ymm2, %ymm2 + vaddps %ymm7, %ymm3, %ymm3 + + vunpcklpd %ymm1, %ymm0, %ymm4 + vunpckhpd %ymm1, %ymm0, %ymm0 + vunpcklpd %ymm3, %ymm2, %ymm5 + vunpckhpd %ymm3, %ymm2, %ymm1 + vaddps %ymm0, %ymm4, %ymm0 + vaddps %ymm1, %ymm5, %ymm1 + vperm2f128 $0x20, %ymm1, %ymm0, %ymm2 + vperm2f128 $0x31, %ymm1, %ymm0, %ymm3 + vaddps %ymm3, %ymm2, %ymm0 + vcmpnleps maxmin_avx(%rip), %ymm0, %ymm1 + vcmpltps 32+maxmin_avx(%rip), %ymm0, %ymm2 + vmulps %ymm14, %ymm0, %ymm0 + vextractf128 $0x1, %ymm1, %xmm3 + vextractf128 $0x1, %ymm2, %xmm4 + vpackssdw %xmm2, %xmm1, %xmm5 + vpackssdw %xmm4, %xmm3, %xmm3 + vcvtps2dq %ymm0, %ymm0 + vpaddw %xmm3, %xmm5, %xmm5 + vpaddw %xmm5, %xmm15, %xmm15 + vxorps %ymm1, %ymm0, %ymm0 + + vmovups %ymm0, (SAMPLES) + add $32, SAMPLES + dec %ecx + jnz 1b + + vzeroupper + + pxor %xmm1, %xmm1 + psubw %xmm15, %xmm1 + pshufd $0x4e, %xmm1, %xmm0 + paddw %xmm1, %xmm0 + pshuflw $0x4e, %xmm0, %xmm1 + paddw %xmm1, %xmm0 + pshuflw $0x11, %xmm0, %xmm1 + paddw %xmm1, %xmm0 + movd %xmm0, %eax + and $0x7f, %eax + +#ifdef IS_MSABI + movaps (%rsp), %xmm6 + movaps 16(%rsp), %xmm7 + movaps 32(%rsp), %xmm8 + movaps 48(%rsp), %xmm9 + movaps 64(%rsp), %xmm10 + movaps 80(%rsp), %xmm11 + movaps 96(%rsp), %xmm12 + movaps 112(%rsp), %xmm13 + movaps 128(%rsp), %xmm14 + movaps 144(%rsp), %xmm15 + mov %rbp, %rsp + pop %rbp +#endif + ret + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_stereo_neon.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_neon.S (revision 0) +++ lib/3rdparty/libmpg123/synth_stereo_neon.S (working copy) @@ -0,0 +1,185 @@ +/* + synth_stereo_neon: ARM NEON optimized synth (stereo specific version) + + copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#define WINDOW r0 +#define B0L r1 +#define B0R r2 +#define SAMPLES r3 + +/* + int synth_1to1_s_neon_asm(short *window, short *b0l, short *b0r, short *samples, int bo1); + return value: number of clipped samples +*/ + + .code 32 +#ifndef __APPLE__ + .fpu neon +#endif + + .text + .globl ASM_NAME(synth_1to1_s_neon_asm) +#ifdef __ELF__ + .type ASM_NAME(synth_1to1_s_neon_asm), %function +#endif + ALIGN4 +ASM_NAME(synth_1to1_s_neon_asm): + push {r4-r6, lr} + vpush {q4-q7} + + ldr r4, [sp, #80] + add WINDOW, WINDOW, #32 + sub WINDOW, WINDOW, r4, lsl #1 + + mov r4, #4 + mov r5, #64 +1: + vld1.16 {d0-d3}, [WINDOW], r5 + vld1.16 {d4-d7}, [WINDOW], r5 + vld1.16 {d8-d11}, [B0L, :128]! + vld1.16 {d12-d15}, [B0R, :128]! + vld1.16 {d16-d19}, [B0L, :128]! + vld1.16 {d20-d23}, [B0R, :128]! + + vmull.s16 q12, d0, d8 + vmull.s16 q13, d0, d12 + vmull.s16 q14, d4, d16 + vmull.s16 q15, d4, d20 + vmlal.s16 q12, d1, d9 + vmlal.s16 q13, d1, d13 + vmlal.s16 q14, d5, d17 + vmlal.s16 q15, d5, d21 + vmlal.s16 q12, d2, d10 + vmlal.s16 q13, d2, d14 + vmlal.s16 q14, d6, d18 + vmlal.s16 q15, d6, d22 + vmlal.s16 q12, d3, d11 + vmlal.s16 q13, d3, d15 + vmlal.s16 q14, d7, d19 + vmlal.s16 q15, d7, d23 + vpadd.i32 d24, d24, d25 + vpadd.i32 d26, d26, d27 + vpadd.i32 d28, d28, d29 + vpadd.i32 d30, d30, d31 + vpadd.i32 d24, d24, d26 + vpadd.i32 d25, d28, d30 + vqshrn.s32 d0, q12, #13 + vst1.16 {d0}, [SAMPLES]! + + vld1.16 {d0-d3}, [WINDOW], r5 + vld1.16 {d4-d7}, [WINDOW], r5 + vld1.16 {d8-d11}, [B0L, :128]! + vld1.16 {d12-d15}, [B0R, :128]! + vld1.16 {d16-d19}, [B0L, :128]! + vld1.16 {d20-d23}, [B0R, :128]! + + vmull.s16 q12, d0, d8 + vmull.s16 q13, d0, d12 + vmull.s16 q14, d4, d16 + vmull.s16 q15, d4, d20 + vmlal.s16 q12, d1, d9 + vmlal.s16 q13, d1, d13 + vmlal.s16 q14, d5, d17 + vmlal.s16 q15, d5, d21 + vmlal.s16 q12, d2, d10 + vmlal.s16 q13, d2, d14 + vmlal.s16 q14, d6, d18 + vmlal.s16 q15, d6, d22 + vmlal.s16 q12, d3, d11 + vmlal.s16 q13, d3, d15 + vmlal.s16 q14, d7, d19 + vmlal.s16 q15, d7, d23 + vpadd.i32 d24, d24, d25 + vpadd.i32 d26, d26, d27 + vpadd.i32 d28, d28, d29 + vpadd.i32 d30, d30, d31 + vpadd.i32 d24, d24, d26 + vpadd.i32 d25, d28, d30 + vqshrn.s32 d0, q12, #13 + vst1.16 {d0}, [SAMPLES]! + + subs r4, r4, #1 + bne 1b + + mov r4, #4 + mov r6, #-32 +1: + vld1.16 {d0-d3}, [WINDOW], r5 + vld1.16 {d4-d7}, [WINDOW], r5 + vld1.16 {d8-d11}, [B0L, :128], r6 + vld1.16 {d12-d15}, [B0R, :128], r6 + vld1.16 {d16-d19}, [B0L, :128], r6 + vld1.16 {d20-d23}, [B0R, :128], r6 + + vmull.s16 q12, d0, d8 + vmull.s16 q13, d0, d12 + vmull.s16 q14, d4, d16 + vmull.s16 q15, d4, d20 + vmlal.s16 q12, d1, d9 + vmlal.s16 q13, d1, d13 + vmlal.s16 q14, d5, d17 + vmlal.s16 q15, d5, d21 + vmlal.s16 q12, d2, d10 + vmlal.s16 q13, d2, d14 + vmlal.s16 q14, d6, d18 + vmlal.s16 q15, d6, d22 + vmlal.s16 q12, d3, d11 + vmlal.s16 q13, d3, d15 + vmlal.s16 q14, d7, d19 + vmlal.s16 q15, d7, d23 + vpadd.i32 d24, d24, d25 + vpadd.i32 d26, d26, d27 + vpadd.i32 d28, d28, d29 + vpadd.i32 d30, d30, d31 + vpadd.i32 d24, d24, d26 + vpadd.i32 d25, d28, d30 + vqshrn.s32 d0, q12, #13 + vst1.16 {d0}, [SAMPLES]! + + vld1.16 {d0-d3}, [WINDOW], r5 + vld1.16 {d4-d7}, [WINDOW], r5 + vld1.16 {d8-d11}, [B0L, :128], r6 + vld1.16 {d12-d15}, [B0R, :128], r6 + vld1.16 {d16-d19}, [B0L, :128], r6 + vld1.16 {d20-d23}, [B0R, :128], r6 + + vmull.s16 q12, d0, d8 + vmull.s16 q13, d0, d12 + vmull.s16 q14, d4, d16 + vmull.s16 q15, d4, d20 + vmlal.s16 q12, d1, d9 + vmlal.s16 q13, d1, d13 + vmlal.s16 q14, d5, d17 + vmlal.s16 q15, d5, d21 + vmlal.s16 q12, d2, d10 + vmlal.s16 q13, d2, d14 + vmlal.s16 q14, d6, d18 + vmlal.s16 q15, d6, d22 + vmlal.s16 q12, d3, d11 + vmlal.s16 q13, d3, d15 + vmlal.s16 q14, d7, d19 + vmlal.s16 q15, d7, d23 + vpadd.i32 d24, d24, d25 + vpadd.i32 d26, d26, d27 + vpadd.i32 d28, d28, d29 + vpadd.i32 d30, d30, d31 + vpadd.i32 d24, d24, d26 + vpadd.i32 d25, d28, d30 + vqshrn.s32 d0, q12, #13 + vst1.16 {d0}, [SAMPLES]! + + subs r4, r4, #1 + bne 1b + + mov r0, #0 + + vpop {q4-q7} + pop {r4-r6, pc} + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_stereo_neon_accurate.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_neon_accurate.S (revision 0) +++ lib/3rdparty/libmpg123/synth_stereo_neon_accurate.S (working copy) @@ -0,0 +1,272 @@ +/* + synth_stereo_neon_accurate: ARM NEON optimized synth (stereo specific, MPEG compliant 16-bit output version) + + copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#define WINDOW r0 +#define B0L r1 +#define B0R r2 +#define SAMPLES r3 + +/* + int synth_1to1_s_neon_accurate_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); + return value: number of clipped samples +*/ + + .code 32 +#ifndef __APPLE__ + .fpu neon +#endif + + .text + .globl ASM_NAME(synth_1to1_s_neon_accurate_asm) +#ifdef __ELF__ + .type ASM_NAME(synth_1to1_s_neon_accurate_asm), %function +#endif +ASM_NAME(synth_1to1_s_neon_accurate_asm): + push {r4-r7, lr} + vpush {q4-q7} + ldr r4, [sp, #84] + mov r7, sp + sub sp, sp, #16 + bic sp, #0xff + + add WINDOW, WINDOW, #64 + sub WINDOW, WINDOW, r4, lsl #2 + + mov r4, #4 + mov r5, #128 + mov r6, #64 +1: + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vmul.f32 q12, q0, q4 + vmul.f32 q13, q0, q6 + vmul.f32 q14, q2, q8 + vmul.f32 q15, q2, q10 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + sub WINDOW, WINDOW, #96 + sub B0L, B0L, #32 + sub B0R, B0R, #32 + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128]! + vld1.32 {q10,q11}, [B0R, :128]! + vmla.f32 q12, q0, q4 + vmla.f32 q13, q0, q6 + vmla.f32 q14, q2, q8 + vmla.f32 q15, q2, q10 + add WINDOW, WINDOW, #96 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vpadd.f32 d26, d28, d29 + vpadd.f32 d27, d30, d31 + vmov.i32 q15, #0x4b000000 + vmvn.i32 q14, #0xb9000000 + vorr.i32 q15, #0x00400000 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vacgt.f32 q14, q12, q14 + vadd.f32 q13, q12, q15 + vld1.32 {q15}, [sp, :128] + vshr.u32 q14, q14, #31 + vshl.i32 q13, q13, #10 + vadd.i32 q14, q14, q15 + vqshrn.s32 d26, q13, #10 + vst1.32 {q14}, [sp, :128] + vst1.16 {d26}, [SAMPLES]! + vmul.f32 q12, q0, q4 + vmul.f32 q13, q0, q6 + vmul.f32 q14, q2, q8 + vmul.f32 q15, q2, q10 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + sub WINDOW, WINDOW, #96 + sub B0L, B0L, #32 + sub B0R, B0R, #32 + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128]! + vld1.32 {q10,q11}, [B0R, :128]! + vmla.f32 q12, q0, q4 + vmla.f32 q13, q0, q6 + vmla.f32 q14, q2, q8 + vmla.f32 q15, q2, q10 + add WINDOW, WINDOW, #96 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vpadd.f32 d26, d28, d29 + vpadd.f32 d27, d30, d31 + vmov.i32 q15, #0x4b000000 + vmvn.i32 q14, #0xb9000000 + vorr.i32 q15, #0x00400000 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vacgt.f32 q14, q12, q14 + vadd.f32 q13, q12, q15 + vld1.32 {q15}, [sp, :128] + vshr.u32 q14, q14, #31 + vshl.i32 q13, q13, #10 + vadd.i32 q14, q14, q15 + vqshrn.s32 d26, q13, #10 + vst1.32 {q14}, [sp, :128] + vst1.16 {d26}, [SAMPLES]! + + subs r4, r4, #1 + bne 1b + + mov r4, #4 + mov r6, #-64 +1: + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vmul.f32 q12, q0, q4 + vmul.f32 q13, q0, q6 + vmul.f32 q14, q2, q8 + vmul.f32 q15, q2, q10 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + sub WINDOW, WINDOW, #96 + add B0L, B0L, #96 + add B0R, B0R, #96 + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vmla.f32 q12, q0, q4 + vmla.f32 q13, q0, q6 + vmla.f32 q14, q2, q8 + vmla.f32 q15, q2, q10 + add WINDOW, WINDOW, #96 + sub B0L, B0L, #96 + sub B0R, B0R, #96 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vpadd.f32 d26, d28, d29 + vpadd.f32 d27, d30, d31 + vmov.i32 q15, #0x4b000000 + vmvn.i32 q14, #0xb9000000 + vorr.i32 q15, #0x00400000 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vacgt.f32 q14, q12, q14 + vadd.f32 q13, q12, q15 + vld1.32 {q15}, [sp, :128] + vshr.u32 q14, q14, #31 + vshl.i32 q13, q13, #10 + vadd.i32 q14, q14, q15 + vqshrn.s32 d26, q13, #10 + vst1.32 {q14}, [sp, :128] + vst1.16 {d26}, [SAMPLES]! + vmul.f32 q12, q0, q4 + vmul.f32 q13, q0, q6 + vmul.f32 q14, q2, q8 + vmul.f32 q15, q2, q10 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + sub WINDOW, WINDOW, #96 + add B0L, B0L, #96 + add B0R, B0R, #96 + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vmla.f32 q12, q0, q4 + vmla.f32 q13, q0, q6 + vmla.f32 q14, q2, q8 + vmla.f32 q15, q2, q10 + add WINDOW, WINDOW, #96 + sub B0L, B0L, #96 + sub B0R, B0R, #96 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vpadd.f32 d26, d28, d29 + vpadd.f32 d27, d30, d31 + vmov.i32 q15, #0x4b000000 + vmvn.i32 q14, #0xb9000000 + vorr.i32 q15, #0x00400000 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vacgt.f32 q14, q12, q14 + vadd.f32 q13, q12, q15 + vld1.32 {q15}, [sp, :128] + vshr.u32 q14, q14, #31 + vshl.i32 q13, q13, #10 + vadd.i32 q14, q14, q15 + vqshrn.s32 d26, q13, #10 + vst1.32 {q14}, [sp, :128] + vst1.16 {d26}, [SAMPLES]! + + subs r4, r4, #1 + bne 1b + + vld1.32 {q0}, [sp, :128] + vpadd.i32 d0, d0, d1 + vpadd.i32 d0, d0, d0 + vmov.32 r0, d0[0] + + mov sp, r7 + vpop {q4-q7} + pop {r4-r7, pc} + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_stereo_neon_float.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_neon_float.S (revision 0) +++ lib/3rdparty/libmpg123/synth_stereo_neon_float.S (working copy) @@ -0,0 +1,230 @@ +/* + synth_stereo_neon_float: ARM NEON optimized synth (stereo specific, float output version) + + copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#define WINDOW r0 +#define B0L r1 +#define B0R r2 +#define SAMPLES r3 + +/* + int synth_1to1_real_s_neon_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); + return value: number of clipped samples (0) +*/ + + .code 32 +#ifndef __APPLE__ + .fpu neon +#endif + + .text + .globl ASM_NAME(synth_1to1_real_s_neon_asm) +#ifdef __ELF__ + .type ASM_NAME(synth_1to1_real_s_neon_asm), %function +#endif + ALIGN4 +ASM_NAME(synth_1to1_real_s_neon_asm): + push {r4-r6, lr} + vpush {q4-q7} + + ldr r4, [sp, #80] + add WINDOW, WINDOW, #64 + sub WINDOW, WINDOW, r4, lsl #2 + + mov r4, #4 + mov r5, #128 + mov r6, #64 +1: + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vmul.f32 q12, q0, q4 + vmul.f32 q13, q0, q6 + vmul.f32 q14, q2, q8 + vmul.f32 q15, q2, q10 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + sub WINDOW, WINDOW, #96 + sub B0L, B0L, #32 + sub B0R, B0R, #32 + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128]! + vld1.32 {q10,q11}, [B0R, :128]! + vmla.f32 q12, q0, q4 + vmla.f32 q13, q0, q6 + vmla.f32 q14, q2, q8 + vmla.f32 q15, q2, q10 + add WINDOW, WINDOW, #96 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vpadd.f32 d26, d28, d29 + vpadd.f32 d27, d30, d31 + vmov.i32 q14, #0x38000000 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vmul.f32 q15, q12, q14 + vmul.f32 q12, q0, q4 + vmul.f32 q13, q0, q6 + vmul.f32 q14, q2, q8 + vst1.32 {q15}, [SAMPLES]! + vmul.f32 q15, q2, q10 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + sub WINDOW, WINDOW, #96 + sub B0L, B0L, #32 + sub B0R, B0R, #32 + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128]! + vld1.32 {q10,q11}, [B0R, :128]! + vmla.f32 q12, q0, q4 + vmla.f32 q13, q0, q6 + vmla.f32 q14, q2, q8 + vmla.f32 q15, q2, q10 + add WINDOW, WINDOW, #96 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vpadd.f32 d26, d28, d29 + vpadd.f32 d27, d30, d31 + vmov.i32 q14, #0x38000000 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vmul.f32 q12, q12, q14 + vst1.32 {q12}, [SAMPLES]! + + subs r4, r4, #1 + bne 1b + + mov r4, #4 + mov r6, #-64 +1: + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vmul.f32 q12, q0, q4 + vmul.f32 q13, q0, q6 + vmul.f32 q14, q2, q8 + vmul.f32 q15, q2, q10 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + sub WINDOW, WINDOW, #96 + add B0L, B0L, #96 + add B0R, B0R, #96 + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vmla.f32 q12, q0, q4 + vmla.f32 q13, q0, q6 + vmla.f32 q14, q2, q8 + vmla.f32 q15, q2, q10 + add WINDOW, WINDOW, #96 + sub B0L, B0L, #96 + sub B0R, B0R, #96 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vpadd.f32 d26, d28, d29 + vpadd.f32 d27, d30, d31 + vmov.i32 q14, #0x38000000 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vmul.f32 q15, q12, q14 + vmul.f32 q12, q0, q4 + vmul.f32 q13, q0, q6 + vmul.f32 q14, q2, q8 + vst1.32 {q15}, [SAMPLES]! + vmul.f32 q15, q2, q10 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + sub WINDOW, WINDOW, #96 + add B0L, B0L, #96 + add B0R, B0R, #96 + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vmla.f32 q12, q0, q4 + vmla.f32 q13, q0, q6 + vmla.f32 q14, q2, q8 + vmla.f32 q15, q2, q10 + add WINDOW, WINDOW, #96 + sub B0L, B0L, #96 + sub B0R, B0R, #96 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vpadd.f32 d26, d28, d29 + vpadd.f32 d27, d30, d31 + vmov.i32 q14, #0x38000000 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vmul.f32 q12, q12, q14 + vst1.32 {q12}, [SAMPLES]! + + subs r4, r4, #1 + bne 1b + + mov r0, #0 + + vpop {q4-q7} + pop {r4-r6, pc} + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_stereo_neon_s32.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_neon_s32.S (revision 0) +++ lib/3rdparty/libmpg123/synth_stereo_neon_s32.S (working copy) @@ -0,0 +1,257 @@ +/* + synth_stereo_neon_s32: ARM NEON optimized synth (stereo specific, 32-bit output version) + + copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1 + see COPYING and AUTHORS files in distribution or http://mpg123.org + initially written by Taihei Monma +*/ + +#include "mangle.h" + +#define WINDOW r0 +#define B0L r1 +#define B0R r2 +#define SAMPLES r3 + +/* + int synth_1to1_s32_s_neon_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); + return value: number of clipped samples +*/ + + .code 32 +#ifndef __APPLE__ + .fpu neon +#endif + + .text + .globl ASM_NAME(synth_1to1_s32_s_neon_asm) +#ifdef __ELF__ + .type ASM_NAME(synth_1to1_s32_s_neon_asm), %function +#endif + ALIGN4 +ASM_NAME(synth_1to1_s32_s_neon_asm): + push {r4-r7, lr} + vpush {q4-q7} + ldr r4, [sp, #84] + mov r7, sp + sub sp, sp, #16 + bic sp, #0xff + + add WINDOW, WINDOW, #64 + sub WINDOW, WINDOW, r4, lsl #2 + + mov r4, #4 + mov r5, #128 + mov r6, #64 +1: + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vmul.f32 q12, q0, q4 + vmul.f32 q13, q0, q6 + vmul.f32 q14, q2, q8 + vmul.f32 q15, q2, q10 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + sub WINDOW, WINDOW, #96 + sub B0L, B0L, #32 + sub B0R, B0R, #32 + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128]! + vld1.32 {q10,q11}, [B0R, :128]! + vmla.f32 q12, q0, q4 + vmla.f32 q13, q0, q6 + vmla.f32 q14, q2, q8 + vmla.f32 q15, q2, q10 + add WINDOW, WINDOW, #96 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vpadd.f32 d26, d28, d29 + vpadd.f32 d27, d30, d31 + vmvn.i32 q14, #0xb9000000 + vld1.32 {q15}, [sp, :128] + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vacgt.f32 q14, q12, q14 + vcvt.s32.f32 q13, q12, #16 + vshr.u32 q14, q14, #31 + vst1.32 {q13}, [SAMPLES]! + vadd.i32 q14, q14, q15 + vst1.32 {q14}, [sp, :128] + vmul.f32 q12, q0, q4 + vmul.f32 q13, q0, q6 + vmul.f32 q14, q2, q8 + vmul.f32 q15, q2, q10 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + sub WINDOW, WINDOW, #96 + sub B0L, B0L, #32 + sub B0R, B0R, #32 + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128]! + vld1.32 {q10,q11}, [B0R, :128]! + vmla.f32 q12, q0, q4 + vmla.f32 q13, q0, q6 + vmla.f32 q14, q2, q8 + vmla.f32 q15, q2, q10 + add WINDOW, WINDOW, #96 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vpadd.f32 d26, d28, d29 + vpadd.f32 d27, d30, d31 + vmvn.i32 q14, #0xb9000000 + vld1.32 {q15}, [sp, :128] + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vacgt.f32 q14, q12, q14 + vcvt.s32.f32 q13, q12, #16 + vshr.u32 q14, q14, #31 + vst1.32 {q13}, [SAMPLES]! + vadd.i32 q14, q14, q15 + vst1.32 {q14}, [sp, :128] + + subs r4, r4, #1 + bne 1b + + mov r4, #4 + mov r6, #-64 +1: + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vmul.f32 q12, q0, q4 + vmul.f32 q13, q0, q6 + vmul.f32 q14, q2, q8 + vmul.f32 q15, q2, q10 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + sub WINDOW, WINDOW, #96 + add B0L, B0L, #96 + add B0R, B0R, #96 + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vmla.f32 q12, q0, q4 + vmla.f32 q13, q0, q6 + vmla.f32 q14, q2, q8 + vmla.f32 q15, q2, q10 + add WINDOW, WINDOW, #96 + sub B0L, B0L, #96 + sub B0R, B0R, #96 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vpadd.f32 d26, d28, d29 + vpadd.f32 d27, d30, d31 + vmvn.i32 q14, #0xb9000000 + vld1.32 {q15}, [sp, :128] + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vacgt.f32 q14, q12, q14 + vcvt.s32.f32 q13, q12, #16 + vshr.u32 q14, q14, #31 + vst1.32 {q13}, [SAMPLES]! + vadd.i32 q14, q14, q15 + vst1.32 {q14}, [sp, :128] + vmul.f32 q12, q0, q4 + vmul.f32 q13, q0, q6 + vmul.f32 q14, q2, q8 + vmul.f32 q15, q2, q10 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + sub WINDOW, WINDOW, #96 + add B0L, B0L, #96 + add B0R, B0R, #96 + vld1.32 {q0,q1}, [WINDOW], r5 + vld1.32 {q2,q3}, [WINDOW] + vld1.32 {q4,q5}, [B0L, :128], r6 + vld1.32 {q6,q7}, [B0R, :128], r6 + vld1.32 {q8,q9}, [B0L, :128] + vld1.32 {q10,q11}, [B0R, :128] + vmla.f32 q12, q0, q4 + vmla.f32 q13, q0, q6 + vmla.f32 q14, q2, q8 + vmla.f32 q15, q2, q10 + add WINDOW, WINDOW, #96 + sub B0L, B0L, #96 + sub B0R, B0R, #96 + vmla.f32 q12, q1, q5 + vmla.f32 q13, q1, q7 + vmla.f32 q14, q3, q9 + vmla.f32 q15, q3, q11 + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vpadd.f32 d26, d28, d29 + vpadd.f32 d27, d30, d31 + vmvn.i32 q14, #0xb9000000 + vld1.32 {q15}, [sp, :128] + vpadd.f32 d24, d24, d25 + vpadd.f32 d25, d26, d27 + vacgt.f32 q14, q12, q14 + vcvt.s32.f32 q13, q12, #16 + vshr.u32 q14, q14, #31 + vst1.32 {q13}, [SAMPLES]! + vadd.i32 q14, q14, q15 + vst1.32 {q14}, [sp, :128] + + subs r4, r4, #1 + bne 1b + + vld1.32 {q0}, [sp, :128] + vpadd.i32 d0, d0, d1 + vpadd.i32 d0, d0, d0 + vmov.32 r0, d0[0] + + mov sp, r7 + vpop {q4-q7} + pop {r4-r7, pc} + +NONEXEC_STACK Index: lib/3rdparty/libmpg123/synth_stereo_sse_accurate.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_sse_accurate.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_stereo_sse_accurate.S (working copy) @@ -21,7 +21,7 @@ #define MMREG_CLIP %mm7 /* - int synth_1to1_stereo_sse_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1); + int synth_1to1_s_sse_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1); return value: number of clipped samples */ @@ -42,8 +42,8 @@ .long -956301312 .text ALIGN16 -.globl ASM_NAME(synth_1to1_stereo_sse_accurate_asm) -ASM_NAME(synth_1to1_stereo_sse_accurate_asm): +.globl ASM_NAME(synth_1to1_s_sse_accurate_asm) +ASM_NAME(synth_1to1_s_sse_accurate_asm): pushl %ebp movl %esp, %ebp andl $-16, %esp @@ -67,7 +67,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -274,12 +274,12 @@ leal 16(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -486,7 +486,7 @@ leal 16(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_2 + jnz 1b pshufw $0xee, MMREG_CLIP, %mm0 paddw MMREG_CLIP, %mm0 Index: lib/3rdparty/libmpg123/synth_stereo_sse_float.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_sse_float.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_stereo_sse_float.S (working copy) @@ -20,7 +20,7 @@ #define TEMP(n) (12+16*n)(%esp) /* - int synth_1to1_real_stereo_sse_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); + int synth_1to1_real_s_sse_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); return value: number of clipped samples (0) */ @@ -37,8 +37,8 @@ .long 939524096 .text ALIGN16 -.globl ASM_NAME(synth_1to1_real_stereo_sse_asm) -ASM_NAME(synth_1to1_real_stereo_sse_asm): +.globl ASM_NAME(synth_1to1_real_s_sse_asm) +ASM_NAME(synth_1to1_real_s_sse_asm): pushl %ebp movl %esp, %ebp andl $-16, %esp @@ -60,7 +60,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -228,12 +228,12 @@ leal 32(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -401,7 +401,7 @@ leal 32(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_2 + jnz 1b xorl %eax, %eax Index: lib/3rdparty/libmpg123/synth_stereo_sse_s32.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_sse_s32.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_stereo_sse_s32.S (working copy) @@ -21,7 +21,7 @@ #define MMREG_CLIP %mm7 /* - int synth_1to1_s32_stereo_sse_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); + int synth_1to1_s32_s_sse_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); return value: number of clipped samples */ @@ -48,8 +48,8 @@ .long -956301312 .text ALIGN16 -.globl ASM_NAME(synth_1to1_s32_stereo_sse_asm) -ASM_NAME(synth_1to1_s32_stereo_sse_asm): +.globl ASM_NAME(synth_1to1_s32_s_sse_asm) +ASM_NAME(synth_1to1_s32_s_sse_asm): pushl %ebp movl %esp, %ebp andl $-16, %esp @@ -73,7 +73,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -293,12 +293,12 @@ leal 32(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -518,7 +518,7 @@ leal 32(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_2 + jnz 1b pshufw $0xee, MMREG_CLIP, %mm0 paddw MMREG_CLIP, %mm0 Index: lib/3rdparty/libmpg123/synth_stereo_x86_64.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_x86_64.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_stereo_x86_64.S (working copy) @@ -8,7 +8,7 @@ #include "mangle.h" -#ifdef _WIN64 +#ifdef IS_MSABI /* short *window; */ #define WINDOW %r10 /* short *b0l; */ @@ -34,7 +34,7 @@ #define XMMREG_FULL %xmm12 /* {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} */ /* - int synth_1to1_stereo_x86_64_asm(short *window, short *b0l, short *b0r, short *samples, int bo1); + int synth_1to1_s_x86_64_asm(short *window, short *b0l, short *b0r, short *samples, int bo1); return value: number of clipped samples */ @@ -55,9 +55,9 @@ .long -32769 .text ALIGN16 -.globl ASM_NAME(synth_1to1_stereo_x86_64_asm) -ASM_NAME(synth_1to1_stereo_x86_64_asm): -#ifdef _WIN64 /* should save xmm6-15 */ +.globl ASM_NAME(synth_1to1_s_x86_64_asm) +ASM_NAME(synth_1to1_s_x86_64_asm): +#ifdef IS_MSABI /* should save xmm6-15 */ movl 40(%rsp), %eax /* 5th argument; placed after 32-byte shadow space */ subq $168, %rsp /* stack alignment + 10 xmm registers */ movaps %xmm6, (%rsp) @@ -72,7 +72,7 @@ movaps %xmm15, 144(%rsp) #endif -#ifdef _WIN64 +#ifdef IS_MSABI shlq $32, %rax shrq $31, %rax movq %rcx, %r10 @@ -94,7 +94,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 64(WINDOW), %xmm2 @@ -196,12 +196,12 @@ leaq 16(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (WINDOW), %xmm0 movups 16(WINDOW), %xmm1 movups 64(WINDOW), %xmm2 @@ -303,7 +303,7 @@ leaq 16(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_2 + jnz 1b movhlps XMMREG_CLIP, %xmm0 paddw XMMREG_CLIP, %xmm0 @@ -317,7 +317,7 @@ movd %xmm0, %eax andl $0xffff, %eax -#ifdef _WIN64 +#ifdef IS_MSABI movaps (%rsp), %xmm6 movaps 16(%rsp), %xmm7 movaps 32(%rsp), %xmm8 Index: lib/3rdparty/libmpg123/synth_stereo_x86_64_accurate.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_x86_64_accurate.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_stereo_x86_64_accurate.S (working copy) @@ -8,7 +8,7 @@ #include "mangle.h" -#ifdef _WIN64 +#ifdef IS_MSABI /* short *window; */ #define WINDOW %rsi /* short *b0l; */ @@ -33,7 +33,7 @@ #define TEMP_CLIP (%rsp) /* - int synth_1to1_stereo_x86_64_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1); + int synth_1to1_s_x86_64_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1); return value: number of clipped samples */ @@ -54,9 +54,9 @@ .long -956301312 .text ALIGN16 -.globl ASM_NAME(synth_1to1_stereo_x86_64_accurate_asm) -ASM_NAME(synth_1to1_stereo_x86_64_accurate_asm): -#ifdef _WIN64 /* should save xmm6-15 */ +.globl ASM_NAME(synth_1to1_s_x86_64_accurate_asm) +ASM_NAME(synth_1to1_s_x86_64_accurate_asm): +#ifdef IS_MSABI /* should save xmm6-15 */ movl 40(%rsp), %eax /* 5th argument; placed after 32-byte shadow space */ pushq %rsi subq $176, %rsp /* 10 xmm registers + temp */ @@ -79,7 +79,7 @@ xorps %xmm0, %xmm0 movaps %xmm0, TEMP_CLIP -#ifdef _WIN64 +#ifdef IS_MSABI shlq $32, %rax shrq $30, %rax movq %rcx, %rbx @@ -95,7 +95,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (WINDOW), %xmm8 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -254,12 +254,12 @@ leaq 16(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (WINDOW), %xmm8 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -418,7 +418,7 @@ leaq 16(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_2 + jnz 1b movaps TEMP_CLIP, %xmm4 movhlps %xmm4, %xmm0 @@ -433,7 +433,7 @@ movd %xmm0, %eax andl $0xffff, %eax -#ifdef _WIN64 +#ifdef IS_MSABI movaps (%rsp), %xmm6 movaps 16(%rsp), %xmm7 movaps 32(%rsp), %xmm8 Index: lib/3rdparty/libmpg123/synth_stereo_x86_64_float.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_x86_64_float.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_stereo_x86_64_float.S (working copy) @@ -8,7 +8,7 @@ #include "mangle.h" -#ifdef _WIN64 +#ifdef IS_MSABI /* short *window; */ #define WINDOW %r10 /* short *b0l; */ @@ -31,7 +31,7 @@ #define XMMREG_SCALE (%r11) /* {1/32768.0, 1/32768.0, 1/32768.0, 1/32768.0} */ /* - int synth_1to1_real_stereo_x86_64_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); + int synth_1to1_real_s_x86_64_asm(real *window, real *b0l, real *b0r, real *samples, int bo1); return value: number of clipped samples (0) */ @@ -48,9 +48,9 @@ .long 939524096 .text ALIGN16 -.globl ASM_NAME(synth_1to1_real_stereo_x86_64_asm) -ASM_NAME(synth_1to1_real_stereo_x86_64_asm): -#ifdef _WIN64 /* should save xmm6-15 */ +.globl ASM_NAME(synth_1to1_real_s_x86_64_asm) +ASM_NAME(synth_1to1_real_s_x86_64_asm): +#ifdef IS_MSABI /* should save xmm6-15 */ movl 40(%rsp), %eax /* 5th argument; placed after 32-byte shadow space */ subq $168, %rsp /* stack alignment + 10 xmm registers */ movaps %xmm6, (%rsp) @@ -67,7 +67,7 @@ leaq ASM_NAME(scale_x86_64)(%rip), %r11 -#ifdef _WIN64 +#ifdef IS_MSABI shlq $32, %rax shrq $30, %rax movq %rcx, %r10 @@ -83,7 +83,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (WINDOW), %xmm8 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -226,12 +226,12 @@ leaq 32(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (WINDOW), %xmm8 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -374,11 +374,11 @@ leaq 32(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_2 + jnz 1b xorl %eax, %eax -#ifdef _WIN64 +#ifdef IS_MSABI movaps (%rsp), %xmm6 movaps 16(%rsp), %xmm7 movaps 32(%rsp), %xmm8 Index: lib/3rdparty/libmpg123/synth_stereo_x86_64_s32.S =================================================================== --- lib/3rdparty/libmpg123/synth_stereo_x86_64_s32.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_stereo_x86_64_s32.S (working copy) @@ -8,7 +8,7 @@ #include "mangle.h" -#ifdef _WIN64 +#ifdef IS_MSABI /* short *window; */ #define WINDOW %rsi /* short *b0l; */ @@ -34,7 +34,7 @@ #define TEMP_CLIP (%rsp) /* - int synth_1to1_s32_stereo_x86_64_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); + int synth_1to1_s32_s_x86_64_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1); return value: number of clipped samples */ @@ -61,9 +61,9 @@ .long -956301312 .text ALIGN16 -.globl ASM_NAME(synth_1to1_s32_stereo_x86_64_asm) -ASM_NAME(synth_1to1_s32_stereo_x86_64_asm): -#ifdef _WIN64 /* should save xmm6-15 */ +.globl ASM_NAME(synth_1to1_s32_s_x86_64_asm) +ASM_NAME(synth_1to1_s32_s_x86_64_asm): +#ifdef IS_MSABI /* should save xmm6-15 */ movl 40(%rsp), %eax /* 5th argument; placed after 32-byte shadow space */ pushq %rsi pushq %rdi @@ -82,7 +82,7 @@ subq $24, %rsp /* stack alignment + temp */ #endif -#ifdef _WIN64 +#ifdef IS_MSABI shlq $32, %rax shrq $30, %rax movq %rcx, %rsi @@ -105,7 +105,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (WINDOW), %xmm8 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -268,12 +268,12 @@ leaq 32(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (WINDOW), %xmm8 movups 16(WINDOW), %xmm1 movups 32(WINDOW), %xmm2 @@ -436,7 +436,7 @@ leaq 32(SAMPLES), SAMPLES decl %ecx - jnz Loop_start_2 + jnz 1b movaps TEMP_CLIP, %xmm4 movhlps %xmm4, %xmm0 @@ -451,7 +451,7 @@ movd %xmm0, %eax andl $0xffff, %eax -#ifdef _WIN64 +#ifdef IS_MSABI movaps (%rsp), %xmm6 movaps 16(%rsp), %xmm7 movaps 32(%rsp), %xmm8 Index: lib/3rdparty/libmpg123/synth_x86_64.S =================================================================== --- lib/3rdparty/libmpg123/synth_x86_64.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_x86_64.S (working copy) @@ -8,7 +8,7 @@ #include "mangle.h" -#ifdef _WIN64 +#ifdef IS_MSABI /* short *window; */ #define ARG0 %r10 /* short *b0; */ @@ -57,7 +57,7 @@ ALIGN16 .globl ASM_NAME(synth_1to1_x86_64_asm) ASM_NAME(synth_1to1_x86_64_asm): -#ifdef _WIN64 /* should save xmm6-15 */ +#ifdef IS_MSABI /* should save xmm6-15 */ movq %rcx, ARG0 subq $104, %rsp /* stack alignment + 6 xmm registers */ movaps %xmm6, (%rsp) @@ -82,7 +82,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (ARG0), %xmm0 movups 16(ARG0), %xmm1 movups 64(ARG0), %xmm2 @@ -148,12 +148,12 @@ leaq 16(ARG2), ARG2 decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (ARG0), %xmm0 movups 16(ARG0), %xmm1 movups 64(ARG0), %xmm2 @@ -219,7 +219,7 @@ leaq 16(ARG2), ARG2 decl %ecx - jnz Loop_start_2 + jnz 1b pshuflw $0xee, XMMREG_CLIP, %xmm0 movhlps XMMREG_CLIP, %xmm1 @@ -230,7 +230,7 @@ movd XMMREG_CLIP, %eax -#ifdef _WIN64 +#ifdef IS_MSABI movaps (%rsp), %xmm6 movaps 16(%rsp), %xmm7 movaps 32(%rsp), %xmm12 Index: lib/3rdparty/libmpg123/synth_x86_64_accurate.S =================================================================== --- lib/3rdparty/libmpg123/synth_x86_64_accurate.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_x86_64_accurate.S (working copy) @@ -8,7 +8,7 @@ #include "mangle.h" -#ifdef _WIN64 +#ifdef IS_MSABI /* short *window; */ #define ARG0 %r10 /* short *b0; */ @@ -56,7 +56,7 @@ ALIGN16 .globl ASM_NAME(synth_1to1_x86_64_accurate_asm) ASM_NAME(synth_1to1_x86_64_accurate_asm): -#ifdef _WIN64 /* should save xmm6-15 */ +#ifdef IS_MSABI /* should save xmm6-15 */ movq %rcx, ARG0 subq $152, %rsp /* stack alignment + 9 xmm registers */ movaps %xmm6, (%rsp) @@ -84,7 +84,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (ARG0), %xmm8 movups 16(ARG0), %xmm1 movups 32(ARG0), %xmm2 @@ -176,12 +176,12 @@ leaq 16(ARG2), ARG2 decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (ARG0), %xmm8 movups 16(ARG0), %xmm1 movups 32(ARG0), %xmm2 @@ -273,7 +273,7 @@ leaq 16(ARG2), ARG2 decl %ecx - jnz Loop_start_2 + jnz 1b pshuflw $0xee, XMMREG_CLIP, %xmm0 movhlps XMMREG_CLIP, %xmm1 @@ -284,7 +284,7 @@ movd XMMREG_CLIP, %eax -#ifdef _WIN64 +#ifdef IS_MSABI movaps (%rsp), %xmm6 movaps 16(%rsp), %xmm7 movaps 32(%rsp), %xmm8 Index: lib/3rdparty/libmpg123/synth_x86_64_float.S =================================================================== --- lib/3rdparty/libmpg123/synth_x86_64_float.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_x86_64_float.S (working copy) @@ -8,7 +8,7 @@ #include "mangle.h" -#ifdef _WIN64 +#ifdef IS_MSABI /* short *window; */ #define ARG0 %r10 /* short *b0; */ @@ -50,7 +50,7 @@ ALIGN16 .globl ASM_NAME(synth_1to1_real_x86_64_asm) ASM_NAME(synth_1to1_real_x86_64_asm): -#ifdef _WIN64 /* should save xmm6-15 */ +#ifdef IS_MSABI /* should save xmm6-15 */ movq %rcx, ARG0 subq $120, %rsp /* stack alignment + 7 xmm registers */ movaps %xmm6, (%rsp) @@ -73,7 +73,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (ARG0), %xmm8 movups 16(ARG0), %xmm1 movups 32(ARG0), %xmm2 @@ -154,12 +154,12 @@ leaq 32(ARG2), ARG2 decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (ARG0), %xmm8 movups 16(ARG0), %xmm1 movups 32(ARG0), %xmm2 @@ -240,11 +240,11 @@ leaq 32(ARG2), ARG2 decl %ecx - jnz Loop_start_2 + jnz 1b xorl %eax, %eax -#ifdef _WIN64 +#ifdef IS_MSABI movaps (%rsp), %xmm6 movaps 16(%rsp), %xmm7 movaps 32(%rsp), %xmm8 Index: lib/3rdparty/libmpg123/synth_x86_64_s32.S =================================================================== --- lib/3rdparty/libmpg123/synth_x86_64_s32.S (revision 62563) +++ lib/3rdparty/libmpg123/synth_x86_64_s32.S (working copy) @@ -8,7 +8,7 @@ #include "mangle.h" -#ifdef _WIN64 +#ifdef IS_MSABI /* short *window; */ #define ARG0 %r10 /* short *b0; */ @@ -63,7 +63,7 @@ ALIGN16 .globl ASM_NAME(synth_1to1_s32_x86_64_asm) ASM_NAME(synth_1to1_s32_x86_64_asm): -#ifdef _WIN64 /* should save xmm6-15 */ +#ifdef IS_MSABI /* should save xmm6-15 */ movq %rcx, ARG0 subq $168, %rsp /* stack alignment + 10 xmm registers */ movaps %xmm6, (%rsp) @@ -94,7 +94,7 @@ movl $4, %ecx ALIGN16 -Loop_start_1: +1: movups (ARG0), %xmm8 movups 16(ARG0), %xmm1 movups 32(ARG0), %xmm2 @@ -186,12 +186,12 @@ leaq 32(ARG2), ARG2 decl %ecx - jnz Loop_start_1 + jnz 1b movl $4, %ecx ALIGN16 -Loop_start_2: +1: movups (ARG0), %xmm8 movups 16(ARG0), %xmm1 movups 32(ARG0), %xmm2 @@ -283,7 +283,7 @@ leaq 32(ARG2), ARG2 decl %ecx - jnz Loop_start_2 + jnz 1b pshuflw $0xee, XMMREG_CLIP, %xmm0 movhlps XMMREG_CLIP, %xmm1 @@ -294,7 +294,7 @@ movd XMMREG_CLIP, %eax -#ifdef _WIN64 +#ifdef IS_MSABI movaps (%rsp), %xmm6 movaps 16(%rsp), %xmm7 movaps 32(%rsp), %xmm8 Index: lib/3rdparty/libmpg123/tabinit.c =================================================================== --- lib/3rdparty/libmpg123/tabinit.c (revision 62563) +++ lib/3rdparty/libmpg123/tabinit.c (working copy) @@ -90,7 +90,7 @@ } #ifdef OPT_MMXORSSE -#ifndef OPT_X86_64 +#if !defined(OPT_X86_64) && !defined(OPT_NEON) && !defined(OPT_AVX) void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); void make_decode_tables_mmx(mpg123_handle *fr) { @@ -167,11 +167,15 @@ { int i,j; int idx = 0; + double scaleval; +#ifdef REAL_IS_FIXED + real scaleval_long; +#endif /* Scale is always based on 1.0 . */ - double scaleval = -0.5*(fr->lastscale < 0 ? fr->p.outscale : fr->lastscale); + scaleval = -0.5*(fr->lastscale < 0 ? fr->p.outscale : fr->lastscale); debug1("decode tables with scaleval %g", scaleval); #ifdef REAL_IS_FIXED - long scaleval_long = DOUBLE_TO_REAL_15(scaleval); + scaleval_long = DOUBLE_TO_REAL_15(scaleval); #endif for(i=0,j=0;i<256;i++,j++,idx+=32) { @@ -210,8 +214,14 @@ scaleval = - scaleval; #endif } -#if defined(OPT_X86_64) || defined(OPT_ALTIVEC) || defined(OPT_SSE) || defined(OPT_ARM) - if(fr->cpu_opts.type == x86_64 || fr->cpu_opts.type == altivec || fr->cpu_opts.type == sse || fr->cpu_opts.type == arm) +#if defined(OPT_X86_64) || defined(OPT_ALTIVEC) || defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) || defined(OPT_ARM) || defined(OPT_NEON) || defined(OPT_AVX) + if( fr->cpu_opts.type == x86_64 + || fr->cpu_opts.type == altivec + || fr->cpu_opts.type == sse + || fr->cpu_opts.type == sse_vintage + || fr->cpu_opts.type == arm + || fr->cpu_opts.type == neon + || fr->cpu_opts.type == avx ) { /* for float SSE / AltiVec / ARM decoder */ for(i=512; i<512+32; i++) { @@ -221,6 +231,15 @@ { fr->decwin[512+32+i] = -fr->decwin[511-i]; } +#ifdef OPT_NEON + if(fr->cpu_opts.type == neon) + { + for(i=0; i<512; i+=2) + { + fr->decwin[i] = -fr->decwin[i]; + } + } +#endif } #endif debug("decode tables done"); @@ -230,7 +249,7 @@ int make_conv16to8_table(mpg123_handle *fr) { int i; - int mode = fr->af.encoding; + int mode = fr->af.dec_enc; /* * ????: 8.0 is right but on SB cards '2.0' is a better value ??? @@ -247,38 +266,94 @@ fr->conv16to8 = fr->conv16to8_buf + 4096; } - if(fr->af.encoding == MPG123_ENC_ULAW_8){ - double m=127.0 / log(256.0); - int c1; + switch(mode) + { + case MPG123_ENC_ULAW_8: + { + double m=127.0 / log(256.0); + int c1; - for(i=-4096;i<4096;i++) { -/* dunno whether this is a valid transformation rule ?!?!? */ - if(i < 0) - c1 = 127 - (int) (log( 1.0 - 255.0 * (double) i*mul / 32768.0 ) * m); - else - c1 = 255 - (int) (log( 1.0 + 255.0 * (double) i*mul / 32768.0 ) * m); - if((c1 < 0 || c1 > 255) && NOQUIET) error2("Converror %d %d",i,c1); + for(i=-4096;i<4096;i++) + { + /* dunno whether this is a valid transformation rule ?!?!? */ + if(i < 0) + c1 = 127 - (int) (log( 1.0 - 255.0 * (double) i*mul / 32768.0 ) * m); + else + c1 = 255 - (int) (log( 1.0 + 255.0 * (double) i*mul / 32768.0 ) * m); + if(c1 < 0 || c1 > 255) + { + if(NOQUIET) error2("Converror %d %d",i,c1); + return -1; + } + if(c1 == 0) + c1 = 2; + fr->conv16to8[i] = (unsigned char) c1; + } + } + break; + case MPG123_ENC_SIGNED_8: + for(i=-4096;i<4096;i++) + fr->conv16to8[i] = i>>5; + break; + case MPG123_ENC_UNSIGNED_8: + for(i=-4096;i<4096;i++) + fr->conv16to8[i] = (i>>5)+128; + break; + case MPG123_ENC_ALAW_8: + { + /* + Let's believe Wikipedia (http://en.wikipedia.org/wiki/G.711) that this + is the correct table: - if(c1 == 0) - c1 = 2; - fr->conv16to8[i] = (unsigned char) c1; - } - } - else if(mode == MPG123_ENC_SIGNED_8) { - for(i=-4096;i<4096;i++) { - fr->conv16to8[i] = i>>5; - } - } - else if(mode == MPG123_ENC_UNSIGNED_8) { - for(i=-4096;i<4096;i++) { - fr->conv16to8[i] = (i>>5)+128; - } - } - else { - for(i=-4096;i<4096;i++) { - fr->conv16to8[i] = 0; - } - } + s0000000wxyza... n000wxyz [0-31] -> [0-15] + s0000001wxyza... n001wxyz [32-63] -> [16-31] + s000001wxyzab... n010wxyz [64-127] -> [32-47] + s00001wxyzabc... n011wxyz [128-255] -> [48-63] + s0001wxyzabcd... n100wxyz [256-511] -> [64-79] + s001wxyzabcde... n101wxyz [512-1023] -> [80-95] + s01wxyzabcdef... n110wxyz [1024-2047] -> [96-111] + s1wxyzabcdefg... n111wxyz [2048-4095] -> [112-127] + + Let's extend to -4096, too. + Also, bytes are xored with 0x55 for transmission. + + Since it sounds OK, I assume it is fine;-) + */ + for(i=0; i<64; ++i) + fr->conv16to8[i] = ((unsigned int)i)>>1; + for(i=64; i<128; ++i) + fr->conv16to8[i] = ((((unsigned int)i)>>2) & 0xf) | (2<<4); + for(i=128; i<256; ++i) + fr->conv16to8[i] = ((((unsigned int)i)>>3) & 0xf) | (3<<4); + for(i=256; i<512; ++i) + fr->conv16to8[i] = ((((unsigned int)i)>>4) & 0xf) | (4<<4); + for(i=512; i<1024; ++i) + fr->conv16to8[i] = ((((unsigned int)i)>>5) & 0xf) | (5<<4); + for(i=1024; i<2048; ++i) + fr->conv16to8[i] = ((((unsigned int)i)>>6) & 0xf) | (6<<4); + for(i=2048; i<4096; ++i) + fr->conv16to8[i] = ((((unsigned int)i)>>7) & 0xf) | (7<<4); + + for(i=-4095; i<0; ++i) + fr->conv16to8[i] = fr->conv16to8[-i] | 0x80; + + fr->conv16to8[-4096] = fr->conv16to8[-4095]; + + for(i=-4096;i<4096;i++) + { + /* fr->conv16to8[i] = - i>>5; */ + /* fprintf(stderr, "table %i %i\n", i<conv16to8[i]); */ + fr->conv16to8[i] ^= 0x55; + } + } + break; + default: + fr->err = MPG123_ERR_16TO8TABLE; + if(NOQUIET) error("Unknown 8 bit encoding choice."); + return -1; + break; + } + return 0; } #endif Index: lib/3rdparty/libmpg123/tabinit_mmx.S =================================================================== --- lib/3rdparty/libmpg123/tabinit_mmx.S (revision 62563) +++ lib/3rdparty/libmpg123/tabinit_mmx.S (working copy) @@ -45,39 +45,39 @@ .long 1060439283 ALIGN32 intwinbase: - .value 0, -1, -1, -1, -1, -1, -1, -2 - .value -2, -2, -2, -3, -3, -4, -4, -5 - .value -5, -6, -7, -7, -8, -9, -10, -11 - .value -13, -14, -16, -17, -19, -21, -24, -26 - .value -29, -31, -35, -38, -41, -45, -49, -53 - .value -58, -63, -68, -73, -79, -85, -91, -97 - .value -104, -111, -117, -125, -132, -139, -147, -154 - .value -161, -169, -176, -183, -190, -196, -202, -208 - .value -213, -218, -222, -225, -227, -228, -228, -227 - .value -224, -221, -215, -208, -200, -189, -177, -163 - .value -146, -127, -106, -83, -57, -29, 2, 36 - .value 72, 111, 153, 197, 244, 294, 347, 401 - .value 459, 519, 581, 645, 711, 779, 848, 919 - .value 991, 1064, 1137, 1210, 1283, 1356, 1428, 1498 - .value 1567, 1634, 1698, 1759, 1817, 1870, 1919, 1962 - .value 2001, 2032, 2057, 2075, 2085, 2087, 2080, 2063 - .value 2037, 2000, 1952, 1893, 1822, 1739, 1644, 1535 - .value 1414, 1280, 1131, 970, 794, 605, 402, 185 - .value -45, -288, -545, -814, -1095, -1388, -1692, -2006 - .value -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788 - .value -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597 - .value -7910, -8209, -8491, -8755, -8998, -9219, -9416, -9585 - .value -9727, -9838, -9916, -9959, -9966, -9935, -9863, -9750 - .value -9592, -9389, -9139, -8840, -8492, -8092, -7640, -7134 - .value -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082 - .value -70, 998, 2122, 3300, 4533, 5818, 7154, 8540 - .value 9975, 11455, 12980, 14548, 16155, 17799, 19478, 21189 - .value 22929, 24694, 26482, 28289, 30112, 31947,-26209,-24360 - .value -22511,-20664,-18824,-16994,-15179,-13383,-11610, -9863 - .value -8147, -6466, -4822, -3222, -1667, -162, 1289, 2684 - .value 4019, 5290, 6494, 7629, 8692, 9679, 10590, 11420 - .value 12169, 12835, 13415, 13908, 14313, 14630, 14856, 14992 - .value 15038 + .short 0, -1, -1, -1, -1, -1, -1, -2 + .short -2, -2, -2, -3, -3, -4, -4, -5 + .short -5, -6, -7, -7, -8, -9, -10, -11 + .short -13, -14, -16, -17, -19, -21, -24, -26 + .short -29, -31, -35, -38, -41, -45, -49, -53 + .short -58, -63, -68, -73, -79, -85, -91, -97 + .short -104, -111, -117, -125, -132, -139, -147, -154 + .short -161, -169, -176, -183, -190, -196, -202, -208 + .short -213, -218, -222, -225, -227, -228, -228, -227 + .short -224, -221, -215, -208, -200, -189, -177, -163 + .short -146, -127, -106, -83, -57, -29, 2, 36 + .short 72, 111, 153, 197, 244, 294, 347, 401 + .short 459, 519, 581, 645, 711, 779, 848, 919 + .short 991, 1064, 1137, 1210, 1283, 1356, 1428, 1498 + .short 1567, 1634, 1698, 1759, 1817, 1870, 1919, 1962 + .short 2001, 2032, 2057, 2075, 2085, 2087, 2080, 2063 + .short 2037, 2000, 1952, 1893, 1822, 1739, 1644, 1535 + .short 1414, 1280, 1131, 970, 794, 605, 402, 185 + .short -45, -288, -545, -814, -1095, -1388, -1692, -2006 + .short -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788 + .short -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597 + .short -7910, -8209, -8491, -8755, -8998, -9219, -9416, -9585 + .short -9727, -9838, -9916, -9959, -9966, -9935, -9863, -9750 + .short -9592, -9389, -9139, -8840, -8492, -8092, -7640, -7134 + .short -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082 + .short -70, 998, 2122, 3300, 4533, 5818, 7154, 8540 + .short 9975, 11455, 12980, 14548, 16155, 17799, 19478, 21189 + .short 22929, 24694, 26482, 28289, 30112, 31947,-26209,-24360 + .short -22511,-20664,-18824,-16994,-15179,-13383,-11610, -9863 + .short -8147, -6466, -4822, -3222, -1667, -162, 1289, 2684 + .short 4019, 5290, 6494, 7629, 8692, 9679, 10590, 11420 + .short 12169, 12835, 13415, 13908, 14313, 14630, 14856, 14992 + .short 15038 intwindiv: .long 0x47800000 # 65536.0 @@ -119,11 +119,11 @@ /* stack: 20=scaleval 24=decwin_mmx, 28=decwins */ .L02: leal -1(%esi),%edx - and %ebx,%edx - cmp $31,%edx + andl %ebx,%edx + cmpl $31,%edx jnz .L03 addl $-1023,%ecx - test %esi,%ebx + testl %esi,%ebx jz .L03 negl 20(%esp) .L03: @@ -132,7 +132,7 @@ incl %ebx cmpl $intwinbase,%edi jz .L04 - cmp $256,%ebx + cmpl $256,%ebx jnz .L00 negl (%esp) jmp .L00 @@ -182,11 +182,11 @@ popl %ebx /* that has to match the pushl before */ .L11: leal -1(%esi),%edx - and %ebx,%edx - cmp $31,%edx + andl %ebx,%edx + cmpl $31,%edx jnz .L12 addl $-1023,%ecx - test %esi,%ebx + testl %esi,%ebx jz .L12 negl 20(%esp) .L12: @@ -195,7 +195,7 @@ incl %ebx cmpl $intwinbase,%edi jz .L13 - cmp $256,%ebx + cmpl $256,%ebx jnz .L05 negl (%esp) jmp .L05