Index: include/reactos/libs/libmpg123/abi_align.h
===================================================================
--- include/reactos/libs/libmpg123/abi_align.h	(revision 0)
+++ include/reactos/libs/libmpg123/abi_align.h	(working copy)
@@ -0,0 +1,39 @@
+/*
+	mpg123lib_intern: Common non-public stuff for libmpg123
+
+	copyright 1995-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+
+	derived from the old mpg123.h
+*/
+
+#ifndef MPG123_H_ABI_ALIGN
+#define MPG123_H_ABI_ALIGN
+
+#include "config.h"
+
+/* ABI conformance for other compilers.
+   mpg123 needs 16byte-aligned stack for SSE and friends.
+   gcc provides that, but others don't necessarily. */
+#ifdef ABI_ALIGN_FUN
+#ifndef attribute_align_arg
+#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1)
+#    define attribute_align_arg __attribute__((force_align_arg_pointer))
+/* The gcc that can align the stack does not need the check... nor does it work with gcc 4.3+, anyway. */
+#else
+
+#    define attribute_align_arg
+/* Other compilers get code to catch misaligned stack.
+   Well, except Sun Studio, which accepts the aligned attribute but does not honor it. */
+#if !defined(__SUNPRO_C)
+#    define NEED_ALIGNCHECK
+#endif
+
+#endif
+#endif
+#else
+#define attribute_align_arg
+/* We won't try the align check... */
+#endif
+
+#endif
Index: include/reactos/libs/libmpg123/check_neon.S
===================================================================
--- include/reactos/libs/libmpg123/check_neon.S	(revision 0)
+++ include/reactos/libs/libmpg123/check_neon.S	(working copy)
@@ -0,0 +1,33 @@
+/*
+	check_neon: check NEON availability
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Momma
+*/
+
+#include "mangle.h"
+
+#ifndef __aarch64__
+	.code 32
+#ifndef __APPLE__
+	.fpu neon
+#endif
+#endif
+
+	.text
+	.globl ASM_NAME(check_neon)
+#ifdef __ELF__
+	.type ASM_NAME(check_neon), %function
+#endif
+	ALIGN4
+ASM_NAME(check_neon):
+#ifdef __aarch64__
+	orr		v0.16b, v0.16b, v0.16b
+	ret
+#else
+	vorr	d0, d0, d0
+	bx		lr
+#endif
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/compat.c
===================================================================
--- include/reactos/libs/libmpg123/compat.c	(revision 0)
+++ include/reactos/libs/libmpg123/compat.c	(working copy)
@@ -0,0 +1,138 @@
+/*
+	compat: Some compatibility functions.
+
+	The mpg123 code is determined to keep it's legacy. A legacy of old, old UNIX.
+	So anything possibly somewhat advanced should be considered to be put here, with proper #ifdef;-)
+
+	copyright 2007-8 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Thomas Orgis, Windows Unicode stuff by JonY.
+*/
+
+#include "config.h"
+#include "compat.h"
+
+#ifdef _MSC_VER
+#include <io.h>
+#else
+#include <fcntl.h>
+#endif
+#include <sys/stat.h>
+
+#ifdef WANT_WIN32_UNICODE
+#include <wchar.h>
+#include <windows.h>
+#include <winnls.h>
+#endif
+
+#include "debug.h"
+
+/* A safe realloc also for very old systems where realloc(NULL, size) returns NULL. */
+void *safe_realloc(void *ptr, size_t size)
+{
+	if(ptr == NULL) return malloc(size);
+	else return realloc(ptr, size);
+}
+
+#ifndef HAVE_STRERROR
+const char *strerror(int errnum)
+{
+	extern int sys_nerr;
+	extern char *sys_errlist[];
+
+	return (errnum < sys_nerr) ?  sys_errlist[errnum]  :  "";
+}
+#endif
+
+#ifndef HAVE_STRDUP
+char *strdup(const char *src)
+{
+	char *dest;
+
+	if (!(dest = (char *) malloc(strlen(src)+1)))
+	return NULL;
+	else
+	return strcpy(dest, src);
+}
+#endif
+
+int compat_open(const char *filename, int flags)
+{
+	int ret;
+#if defined (WANT_WIN32_UNICODE)
+	wchar_t *frag = NULL;
+
+	ret = win32_utf8_wide(filename, &frag, NULL);
+	if ((frag == NULL) || (ret == 0)) goto fallback; /* Fallback to plain open when ucs-2 conversion fails */
+
+	ret = _wopen(frag, flags); /*Try _wopen */
+	if (ret != -1 ) goto open_ok; /* msdn says -1 means failure */
+
+fallback:
+#endif
+
+#if (defined(WIN32) && !defined (__CYGWIN__)) /* MSDN says POSIX function is deprecated beginning in Visual C++ 2005 */
+	ret = _open(filename, flags); /* Try plain old _open(), if it fails, do nothing */
+#else
+	/* On UNIX, we always add a default permission mask in case flags|O_CREAT. */
+	ret = open(filename, flags, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH);
+#endif
+
+#if defined (WANT_WIN32_UNICODE)
+open_ok:
+	free ((void *)frag); /* Freeing a NULL should be OK */
+#endif
+
+	return ret;
+}
+
+int compat_close(int infd)
+{
+#if (defined(WIN32) && !defined (__CYGWIN__)) /* MSDN says POSIX function is deprecated beginning in Visual C++ 2005 */
+	return _close(infd);
+#else
+	return close(infd);
+#endif
+}
+
+/* Windows Unicode stuff */
+
+#ifdef WANT_WIN32_UNICODE
+int win32_wide_utf8(const wchar_t * const wptr, char **mbptr, size_t * buflen)
+{
+  size_t len;
+  char *buf;
+  int ret = 0;
+
+  len = WideCharToMultiByte(CP_UTF8, 0, wptr, -1, NULL, 0, NULL, NULL); /* Get utf-8 string length */
+  buf = calloc(len + 1, sizeof (char)); /* Can we assume sizeof char always = 1? */
+
+  if(!buf) len = 0;
+  else {
+    if (len != 0) ret = WideCharToMultiByte(CP_UTF8, 0, wptr, -1, buf, len, NULL, NULL); /*Do actual conversion*/
+    buf[len] = '0'; /* Must terminate */
+  }
+  *mbptr = buf; /* Set string pointer to allocated buffer */
+  if(buflen != NULL) *buflen = (len) * sizeof (char); /* Give length of allocated memory if needed. */
+  return ret;
+}
+
+int win32_utf8_wide(const char *const mbptr, wchar_t **wptr, size_t *buflen)
+{
+  size_t len;
+  wchar_t *buf;
+  int ret = 0;
+
+  len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mbptr, -1, NULL, 0); /* Get converted size */
+  buf = calloc(len + 1, sizeof (wchar_t)); /* Allocate memory accordingly */
+
+  if(!buf) len = 0;
+  else {
+    if (len != 0) ret = MultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, mbptr, -1, buf, len); /* Do conversion */
+    buf[len] = L'0'; /* Must terminate */
+  }
+  *wptr = buf; /* Set string pointer to allocated buffer */
+  if (buflen != NULL) *buflen = len * sizeof (wchar_t); /* Give length of allocated memory if needed. */
+  return ret; /* Number of characters written */
+}
+#endif
Index: include/reactos/libs/libmpg123/compat.h
===================================================================
--- include/reactos/libs/libmpg123/compat.h	(revision 63976)
+++ include/reactos/libs/libmpg123/compat.h	(working copy)
@@ -15,6 +15,7 @@
 #define MPG123_COMPAT_H
 
 #include "config.h"
+#include "intsym.h"
 
 #ifdef HAVE_STDLIB_H
 /* realloc, size_t */
@@ -75,6 +76,9 @@
 #include <sys/select.h>
 #endif
 
+/* compat_open makes little sense without */
+#include <fcntl.h>
+
 /* To parse big numbers... */
 #ifdef HAVE_ATOLL
 #define atobigint atoll
@@ -82,7 +86,7 @@
 #define atobigint atol
 #endif
 
-// typedef unsigned char byte;
+typedef unsigned char byte;
 
 /* A safe realloc also for very old systems where realloc(NULL, size) returns NULL. */
 void *safe_realloc(void *ptr, size_t size);
@@ -128,7 +132,7 @@
  * @param[in] mbptr Pointer to multibyte string.
  * @return file descriptor (>=0) or error code.
  */
-int compat_open(const char *filename, int mode);
+int compat_open(const char *filename, int flags);
 
 /**
  * Closing a file handle can be platform specific.
@@ -152,7 +156,7 @@
  *
  * WideCharToMultiByte - http://msdn.microsoft.com/en-us/library/dd374130(VS.85).aspx
  */
-int win32_wide_utf8 (const wchar_t * const wptr, const char **const mbptr, size_t * const buflen);
+int win32_wide_utf8(const wchar_t * const wptr, char **mbptr, size_t * buflen);
 
 /**
  * win32_mbc2uni
@@ -166,7 +170,7 @@
  * MultiByteToWideChar - http://msdn.microsoft.com/en-us/library/dd319072(VS.85).aspx
  */
 
-int win32_utf8_wide (const char *const mbptr, const wchar_t ** const wptr, size_t * const buflen);
+int win32_utf8_wide(const char *const mbptr, wchar_t **wptr, size_t *buflen);
 #endif
 
 /* That one comes from Tellie on OS/2, needed in resolver. */
@@ -174,4 +178,6 @@
 typedef int socklen_t;
 #endif
 
+#include "true.h"
+
 #endif
Index: include/reactos/libs/libmpg123/dct36_3dnow.S
===================================================================
--- include/reactos/libs/libmpg123/dct36_3dnow.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct36_3dnow.S	(working copy)
@@ -0,0 +1,505 @@
+/*
+	dct64_3dnow.s: Replacement of dct36() with AMD's 3DNow! SIMD operations support
+
+	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Syuuhei Kashiyama
+
+	This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
+	<squash@mb.kcom.ne.jp>,only two types of changes have been made:
+
+	- remove PREFETCH instruction for speedup
+	- change function name for support 3DNow! automatic detect
+
+	You can find Kashiyama's original 3dnow! support patch
+	(for mpg123-0.59o) at
+	http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
+
+	by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
+	                   <kim@comtec.co.jp>               - after  1.Apr.1999
+
+	Replacement of dct36() with AMD's 3DNow! SIMD operations support
+
+	Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
+
+	The author of this program disclaim whole expressed or implied
+	warranties with regard to this program, and in no event shall the
+	author of this program liable to whatever resulted from the use of
+	this program. Use it at your own risk.
+*/
+
+#include "mangle.h"
+
+	.globl ASM_NAME(dct36_3dnow)
+/*	.type	 ASM_NAME(dct36_3dnow),@function */
+ASM_NAME(dct36_3dnow):
+	pushl %ebp
+	movl %esp,%ebp
+	subl $120,%esp
+	pushl %esi
+	pushl %ebx
+	movl 8(%ebp),%eax
+	movl 12(%ebp),%esi
+	movl 16(%ebp),%ecx
+	movl 20(%ebp),%edx
+	movl 24(%ebp),%ebx
+	leal -128(%ebp),%esp
+
+	femms
+	movq (%eax),%mm0
+	movq 4(%eax),%mm1
+	pfadd %mm1,%mm0
+	movq %mm0,4(%eax)
+	psrlq $32,%mm1
+	movq 12(%eax),%mm2
+	punpckldq %mm2,%mm1
+	pfadd %mm2,%mm1
+	movq %mm1,12(%eax)
+	psrlq $32,%mm2
+	movq 20(%eax),%mm3
+	punpckldq %mm3,%mm2
+	pfadd %mm3,%mm2
+	movq %mm2,20(%eax)
+	psrlq $32,%mm3
+	movq 28(%eax),%mm4
+	punpckldq %mm4,%mm3
+	pfadd %mm4,%mm3
+	movq %mm3,28(%eax)
+	psrlq $32,%mm4
+	movq 36(%eax),%mm5
+	punpckldq %mm5,%mm4
+	pfadd %mm5,%mm4
+	movq %mm4,36(%eax)
+	psrlq $32,%mm5
+	movq 44(%eax),%mm6
+	punpckldq %mm6,%mm5
+	pfadd %mm6,%mm5
+	movq %mm5,44(%eax)
+	psrlq $32,%mm6
+	movq 52(%eax),%mm7
+	punpckldq %mm7,%mm6
+	pfadd %mm7,%mm6
+	movq %mm6,52(%eax)
+	psrlq $32,%mm7
+	movq 60(%eax),%mm0
+	punpckldq %mm0,%mm7
+	pfadd %mm0,%mm7
+	movq %mm7,60(%eax)
+	psrlq $32,%mm0
+	movd 68(%eax),%mm1
+	pfadd %mm1,%mm0
+	movd %mm0,68(%eax)
+	movd 4(%eax),%mm0
+	movd 12(%eax),%mm1
+	punpckldq %mm1,%mm0
+	punpckldq 20(%eax),%mm1
+	pfadd %mm1,%mm0
+	movd %mm0,12(%eax)
+	psrlq $32,%mm0
+	movd %mm0,20(%eax)
+	psrlq $32,%mm1
+	movd 28(%eax),%mm2
+	punpckldq %mm2,%mm1
+	punpckldq 36(%eax),%mm2
+	pfadd %mm2,%mm1
+	movd %mm1,28(%eax)
+	psrlq $32,%mm1
+	movd %mm1,36(%eax)
+	psrlq $32,%mm2
+	movd 44(%eax),%mm3
+	punpckldq %mm3,%mm2
+	punpckldq 52(%eax),%mm3
+	pfadd %mm3,%mm2
+	movd %mm2,44(%eax)
+	psrlq $32,%mm2
+	movd %mm2,52(%eax)
+	psrlq $32,%mm3
+	movd 60(%eax),%mm4
+	punpckldq %mm4,%mm3
+	punpckldq 68(%eax),%mm4
+	pfadd %mm4,%mm3
+	movd %mm3,60(%eax)
+	psrlq $32,%mm3
+	movd %mm3,68(%eax)
+
+	movq 24(%eax),%mm0
+	movq 48(%eax),%mm1
+	movd ASM_NAME(COS9)+12,%mm2
+	punpckldq %mm2,%mm2
+	movd ASM_NAME(COS9)+24,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm2,%mm0
+	pfmul %mm3,%mm1
+	pushl %eax
+	movl $1,%eax
+	movd %eax,%mm7
+	pi2fd %mm7,%mm7
+	popl %eax
+	movq 8(%eax),%mm2
+	movd ASM_NAME(COS9)+4,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm3,%mm2
+	pfadd %mm0,%mm2
+	movq 40(%eax),%mm3
+	movd ASM_NAME(COS9)+20,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfadd %mm3,%mm2
+	movq 56(%eax),%mm3
+	movd ASM_NAME(COS9)+28,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfadd %mm3,%mm2
+	movq (%eax),%mm3
+	movq 16(%eax),%mm4
+	movd ASM_NAME(COS9)+8,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	movq 32(%eax),%mm4
+	movd ASM_NAME(COS9)+16,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	pfadd %mm1,%mm3
+	movq 64(%eax),%mm4
+	movd ASM_NAME(COS9)+32,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	movq %mm2,%mm4
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+0,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 108(%edx),%mm6
+	punpckldq 104(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,36(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,32(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 32(%edx),%mm6
+	punpckldq 36(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 32(%esi),%mm6
+	punpckldq 36(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,1024(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1152(%ebx)
+	movq %mm3,%mm4
+	pfsub %mm2,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+32,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 140(%edx),%mm6
+	punpckldq 72(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,68(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,0(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 0(%edx),%mm6
+	punpckldq 68(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 0(%esi),%mm6
+	punpckldq 68(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,0(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,2176(%ebx)
+	movq 8(%eax),%mm2
+	movq 40(%eax),%mm3
+	pfsub %mm3,%mm2
+	movq 56(%eax),%mm3
+	pfsub %mm3,%mm2
+	movd ASM_NAME(COS9)+12,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm3,%mm2
+	movq 16(%eax),%mm3
+	movq 32(%eax),%mm4
+	pfsub %mm4,%mm3
+	movq 64(%eax),%mm4
+	pfsub %mm4,%mm3
+	movd ASM_NAME(COS9)+24,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	movq 48(%eax),%mm4
+	pfsub %mm4,%mm3
+	movq (%eax),%mm4
+	pfadd %mm4,%mm3
+	movq %mm2,%mm4
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+4,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 112(%edx),%mm6
+	punpckldq 100(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,40(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,28(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 28(%edx),%mm6
+	punpckldq 40(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 28(%esi),%mm6
+	punpckldq 40(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,896(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1280(%ebx)
+	movq %mm3,%mm4
+	pfsub %mm2,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+28,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 136(%edx),%mm6
+	punpckldq 76(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,64(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,4(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 4(%edx),%mm6
+	punpckldq 64(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 4(%esi),%mm6
+	punpckldq 64(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,128(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,2048(%ebx)
+
+	movq 8(%eax),%mm2
+	movd ASM_NAME(COS9)+20,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm3,%mm2
+	pfsub %mm0,%mm2
+	movq 40(%eax),%mm3
+	movd ASM_NAME(COS9)+28,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfsub %mm3,%mm2
+	movq 56(%eax),%mm3
+	movd ASM_NAME(COS9)+4,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfadd %mm3,%mm2
+	movq (%eax),%mm3
+	movq 16(%eax),%mm4
+	movd ASM_NAME(COS9)+32,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfsub %mm4,%mm3
+	movq 32(%eax),%mm4
+	movd ASM_NAME(COS9)+8,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfsub %mm4,%mm3
+	pfadd %mm1,%mm3
+	movq 64(%eax),%mm4
+	movd ASM_NAME(COS9)+16,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	movq %mm2,%mm4
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+8,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 116(%edx),%mm6
+	punpckldq 96(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,44(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,24(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 24(%edx),%mm6
+	punpckldq 44(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 24(%esi),%mm6
+	punpckldq 44(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,768(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1408(%ebx)
+	movq %mm3,%mm4
+	pfsub %mm2,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+24,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 132(%edx),%mm6
+	punpckldq 80(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,60(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,8(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 8(%edx),%mm6
+	punpckldq 60(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 8(%esi),%mm6
+	punpckldq 60(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,256(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1920(%ebx)
+	movq 8(%eax),%mm2
+	movd ASM_NAME(COS9)+28,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm3,%mm2
+	pfsub %mm0,%mm2
+	movq 40(%eax),%mm3
+	movd ASM_NAME(COS9)+4,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfadd %mm3,%mm2
+	movq 56(%eax),%mm3
+	movd ASM_NAME(COS9)+20,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfsub %mm3,%mm2
+	movq (%eax),%mm3
+	movq 16(%eax),%mm4
+	movd ASM_NAME(COS9)+16,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfsub %mm4,%mm3
+	movq 32(%eax),%mm4
+	movd ASM_NAME(COS9)+32,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	pfadd %mm1,%mm3
+	movq 64(%eax),%mm4
+	movd ASM_NAME(COS9)+8,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfsub %mm4,%mm3
+	movq %mm2,%mm4
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+12,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 120(%edx),%mm6
+	punpckldq 92(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,48(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,20(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 20(%edx),%mm6
+	punpckldq 48(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 20(%esi),%mm6
+	punpckldq 48(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,640(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1536(%ebx)
+	movq %mm3,%mm4
+	pfsub %mm2,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+20,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 128(%edx),%mm6
+	punpckldq 84(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,56(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,12(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 12(%edx),%mm6
+	punpckldq 56(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 12(%esi),%mm6
+	punpckldq 56(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,384(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1792(%ebx)
+
+	movq (%eax),%mm4
+	movq 16(%eax),%mm3
+	pfsub %mm3,%mm4
+	movq 32(%eax),%mm3
+	pfadd %mm3,%mm4
+	movq 48(%eax),%mm3
+	pfsub %mm3,%mm4
+	movq 64(%eax),%mm3
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+16,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 124(%edx),%mm6
+	punpckldq 88(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,52(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,16(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 16(%edx),%mm6
+	punpckldq 52(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 16(%esi),%mm6
+	punpckldq 52(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,512(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1664(%ebx)
+
+	femms
+	popl %ebx
+	popl %esi
+	movl %ebp,%esp
+	popl %ebp
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct36_3dnowext.S
===================================================================
--- include/reactos/libs/libmpg123/dct36_3dnowext.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct36_3dnowext.S	(working copy)
@@ -0,0 +1,512 @@
+/*
+	dct36_3dnowext: extended 3DNow optimized DCT36
+
+	copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+
+	Transformed back into standalone asm, with help of
+	gcc -S -DHAVE_CONFIG_H -I.  -march=k6-3 -O3 -Wall -pedantic -fno-strict-aliasing  -DREAL_IS_FLOAT -c -o dct36_3dnowext.{S,c}
+
+	MPlayer comment follows.
+*/
+
+/*
+ * dct36_3dnow.c - 3DNow! optimized dct36()
+ *
+ * This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
+ * <squash@mb.kcom.ne.jp>, only two types of changes have been made:
+ *
+ * - removed PREFETCH instruction for speedup
+ * - changed function name for support 3DNow! automatic detection
+ *
+ * You can find Kashiyama's original 3dnow! support patch
+ * (for mpg123-0.59o) at
+ * http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
+ *
+ * by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
+ *                    <kim@comtec.co.jp>               - after  1.Apr.1999
+ *
+ * Modified for use with MPlayer, for details see the changelog at
+ * http://svn.mplayerhq.hu/mplayer/trunk/
+ * $Id: dct36_3dnow.c 18786 2006-06-22 13:34:00Z diego $
+ *
+ * Original disclaimer:
+ *  The author of this program disclaim whole expressed or implied
+ *  warranties with regard to this program, and in no event shall the
+ *  author of this program liable to whatever resulted from the use of
+ *  this program. Use it at your own risk.
+ *
+ * 2003/06/21: Moved to GCC inline assembly - Alex Beregszaszi
+ */
+
+#include "mangle.h"
+
+	.text
+	ALIGN32
+.globl ASM_NAME(dct36_3dnowext)
+	/* .type	ASM_NAME(dct36_3dnowext), @function */
+ASM_NAME(dct36_3dnowext):
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%esi
+	pushl	%ebx
+	movl	8(%ebp), %eax
+	movl	12(%ebp), %esi
+	movl	16(%ebp), %ecx
+	movl	20(%ebp), %edx
+	movl	24(%ebp), %ebx
+/* APP */
+	movq (%eax),%mm0
+	movq 4(%eax),%mm1
+	pfadd %mm1,%mm0
+	movq %mm0,4(%eax)
+	psrlq $32,%mm1
+	movq 12(%eax),%mm2
+	punpckldq %mm2,%mm1
+	pfadd %mm2,%mm1
+	movq %mm1,12(%eax)
+	psrlq $32,%mm2
+	movq 20(%eax),%mm3
+	punpckldq %mm3,%mm2
+	pfadd %mm3,%mm2
+	movq %mm2,20(%eax)
+	psrlq $32,%mm3
+	movq 28(%eax),%mm4
+	punpckldq %mm4,%mm3
+	pfadd %mm4,%mm3
+	movq %mm3,28(%eax)
+	psrlq $32,%mm4
+	movq 36(%eax),%mm5
+	punpckldq %mm5,%mm4
+	pfadd %mm5,%mm4
+	movq %mm4,36(%eax)
+	psrlq $32,%mm5
+	movq 44(%eax),%mm6
+	punpckldq %mm6,%mm5
+	pfadd %mm6,%mm5
+	movq %mm5,44(%eax)
+	psrlq $32,%mm6
+	movq 52(%eax),%mm7
+	punpckldq %mm7,%mm6
+	pfadd %mm7,%mm6
+	movq %mm6,52(%eax)
+	psrlq $32,%mm7
+	movq 60(%eax),%mm0
+	punpckldq %mm0,%mm7
+	pfadd %mm0,%mm7
+	movq %mm7,60(%eax)
+	psrlq $32,%mm0
+	movd 68(%eax),%mm1
+	pfadd %mm1,%mm0
+	movd %mm0,68(%eax)
+	movd 4(%eax),%mm0
+	movd 12(%eax),%mm1
+	punpckldq %mm1,%mm0
+	punpckldq 20(%eax),%mm1
+	pfadd %mm1,%mm0
+	movd %mm0,12(%eax)
+	psrlq $32,%mm0
+	movd %mm0,20(%eax)
+	psrlq $32,%mm1
+	movd 28(%eax),%mm2
+	punpckldq %mm2,%mm1
+	punpckldq 36(%eax),%mm2
+	pfadd %mm2,%mm1
+	movd %mm1,28(%eax)
+	psrlq $32,%mm1
+	movd %mm1,36(%eax)
+	psrlq $32,%mm2
+	movd 44(%eax),%mm3
+	punpckldq %mm3,%mm2
+	punpckldq 52(%eax),%mm3
+	pfadd %mm3,%mm2
+	movd %mm2,44(%eax)
+	psrlq $32,%mm2
+	movd %mm2,52(%eax)
+	psrlq $32,%mm3
+	movd 60(%eax),%mm4
+	punpckldq %mm4,%mm3
+	punpckldq 68(%eax),%mm4
+	pfadd %mm4,%mm3
+	movd %mm3,60(%eax)
+	psrlq $32,%mm3
+	movd %mm3,68(%eax)
+	movq 24(%eax),%mm0
+	movq 48(%eax),%mm1
+	movd ASM_NAME(COS9)+12,%mm2
+	punpckldq %mm2,%mm2
+	movd ASM_NAME(COS9)+24,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm2,%mm0
+	pfmul %mm3,%mm1
+	pushl %eax
+	movl $1,%eax
+	movd %eax,%mm7
+	pi2fd %mm7,%mm7
+	popl %eax
+	movq 8(%eax),%mm2
+	movd ASM_NAME(COS9)+4,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm3,%mm2
+	pfadd %mm0,%mm2
+	movq 40(%eax),%mm3
+	movd ASM_NAME(COS9)+20,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfadd %mm3,%mm2
+	movq 56(%eax),%mm3
+	movd ASM_NAME(COS9)+28,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfadd %mm3,%mm2
+	movq (%eax),%mm3
+	movq 16(%eax),%mm4
+	movd ASM_NAME(COS9)+8,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	movq 32(%eax),%mm4
+	movd ASM_NAME(COS9)+16,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	pfadd %mm1,%mm3
+	movq 64(%eax),%mm4
+	movd ASM_NAME(COS9)+32,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	movq %mm2,%mm4
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+0,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 108(%edx),%mm6
+	punpckldq 104(%edx),%mm6
+	pfmul %mm6,%mm5
+	pswapd %mm5,%mm5
+	movq %mm5,32(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 32(%edx),%mm6
+	punpckldq 36(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 32(%esi),%mm6
+	punpckldq 36(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,1024(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1152(%ebx)
+	movq %mm3,%mm4
+	pfsub %mm2,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+32,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 140(%edx),%mm6
+	punpckldq 72(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,68(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,0(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 0(%edx),%mm6
+	punpckldq 68(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 0(%esi),%mm6
+	punpckldq 68(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,0(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,2176(%ebx)
+	movq 8(%eax),%mm2
+	movq 40(%eax),%mm3
+	pfsub %mm3,%mm2
+	movq 56(%eax),%mm3
+	pfsub %mm3,%mm2
+	movd ASM_NAME(COS9)+12,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm3,%mm2
+	movq 16(%eax),%mm3
+	movq 32(%eax),%mm4
+	pfsub %mm4,%mm3
+	movq 64(%eax),%mm4
+	pfsub %mm4,%mm3
+	movd ASM_NAME(COS9)+24,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	movq 48(%eax),%mm4
+	pfsub %mm4,%mm3
+	movq (%eax),%mm4
+	pfadd %mm4,%mm3
+	movq %mm2,%mm4
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+4,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 112(%edx),%mm6
+	punpckldq 100(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,40(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,28(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 28(%edx),%mm6
+	punpckldq 40(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 28(%esi),%mm6
+	punpckldq 40(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,896(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1280(%ebx)
+	movq %mm3,%mm4
+	pfsub %mm2,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+28,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 136(%edx),%mm6
+	punpckldq 76(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,64(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,4(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 4(%edx),%mm6
+	punpckldq 64(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 4(%esi),%mm6
+	punpckldq 64(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,128(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,2048(%ebx)
+	movq 8(%eax),%mm2
+	movd ASM_NAME(COS9)+20,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm3,%mm2
+	pfsub %mm0,%mm2
+	movq 40(%eax),%mm3
+	movd ASM_NAME(COS9)+28,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfsub %mm3,%mm2
+	movq 56(%eax),%mm3
+	movd ASM_NAME(COS9)+4,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfadd %mm3,%mm2
+	movq (%eax),%mm3
+	movq 16(%eax),%mm4
+	movd ASM_NAME(COS9)+32,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfsub %mm4,%mm3
+	movq 32(%eax),%mm4
+	movd ASM_NAME(COS9)+8,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfsub %mm4,%mm3
+	pfadd %mm1,%mm3
+	movq 64(%eax),%mm4
+	movd ASM_NAME(COS9)+16,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	movq %mm2,%mm4
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+8,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 116(%edx),%mm6
+	punpckldq 96(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,44(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,24(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 24(%edx),%mm6
+	punpckldq 44(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 24(%esi),%mm6
+	punpckldq 44(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,768(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1408(%ebx)
+	movq %mm3,%mm4
+	pfsub %mm2,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+24,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 132(%edx),%mm6
+	punpckldq 80(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,60(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,8(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 8(%edx),%mm6
+	punpckldq 60(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 8(%esi),%mm6
+	punpckldq 60(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,256(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1920(%ebx)
+	movq 8(%eax),%mm2
+	movd ASM_NAME(COS9)+28,%mm3
+	punpckldq %mm3,%mm3
+	pfmul %mm3,%mm2
+	pfsub %mm0,%mm2
+	movq 40(%eax),%mm3
+	movd ASM_NAME(COS9)+4,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfadd %mm3,%mm2
+	movq 56(%eax),%mm3
+	movd ASM_NAME(COS9)+20,%mm4
+	punpckldq %mm4,%mm4
+	pfmul %mm4,%mm3
+	pfsub %mm3,%mm2
+	movq (%eax),%mm3
+	movq 16(%eax),%mm4
+	movd ASM_NAME(COS9)+16,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfsub %mm4,%mm3
+	movq 32(%eax),%mm4
+	movd ASM_NAME(COS9)+32,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfadd %mm4,%mm3
+	pfadd %mm1,%mm3
+	movq 64(%eax),%mm4
+	movd ASM_NAME(COS9)+8,%mm5
+	punpckldq %mm5,%mm5
+	pfmul %mm5,%mm4
+	pfsub %mm4,%mm3
+	movq %mm2,%mm4
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+12,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 120(%edx),%mm6
+	punpckldq 92(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,48(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,20(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 20(%edx),%mm6
+	punpckldq 48(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 20(%esi),%mm6
+	punpckldq 48(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,640(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1536(%ebx)
+	movq %mm3,%mm4
+	pfsub %mm2,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+20,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 128(%edx),%mm6
+	punpckldq 84(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,56(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,12(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 12(%edx),%mm6
+	punpckldq 56(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 12(%esi),%mm6
+	punpckldq 56(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,384(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1792(%ebx)
+	movq (%eax),%mm4
+	movq 16(%eax),%mm3
+	pfsub %mm3,%mm4
+	movq 32(%eax),%mm3
+	pfadd %mm3,%mm4
+	movq 48(%eax),%mm3
+	pfsub %mm3,%mm4
+	movq 64(%eax),%mm3
+	pfadd %mm3,%mm4
+	movq %mm7,%mm5
+	punpckldq ASM_NAME(tfcos36)+16,%mm5
+	pfmul %mm5,%mm4
+	movq %mm4,%mm5
+	pfacc %mm5,%mm5
+	movd 124(%edx),%mm6
+	punpckldq 88(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd %mm5,52(%ecx)
+	psrlq $32,%mm5
+	movd %mm5,16(%ecx)
+	movq %mm4,%mm6
+	punpckldq %mm6,%mm5
+	pfsub %mm6,%mm5
+	punpckhdq %mm5,%mm5
+	movd 16(%edx),%mm6
+	punpckldq 52(%edx),%mm6
+	pfmul %mm6,%mm5
+	movd 16(%esi),%mm6
+	punpckldq 52(%esi),%mm6
+	pfadd %mm6,%mm5
+	movd %mm5,512(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,1664(%ebx)
+	femms
+	
+/* NO_APP */
+	popl	%ebx
+	popl	%esi
+	leave
+	ret
+	/* .size	ASM_NAME(dct36_3dnowext), .-ASM_NAME(dct36_3dnowext) */
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct36_avx.S
===================================================================
--- include/reactos/libs/libmpg123/dct36_avx.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct36_avx.S	(working copy)
@@ -0,0 +1,358 @@
+/*
+	dct36_avx: AVX optimized dct36 for x86-64
+
+	copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+#define in %rcx
+#define out1 %rdx
+#define out2 %r8
+#define w  %r9
+#define ts %r10
+#define COS9_ %rax
+#define tfcos36_ %r11
+#else
+#define in %rdi
+#define out1 %rsi
+#define out2 %rdx
+#define w  %rcx
+#define ts %r8
+#define COS9_ %rax
+#define tfcos36_ %r9
+#endif
+
+/*
+	void dct36_avx(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf);
+*/
+	
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+dct36_avx_COS9:
+	.long 0x3f5db3d7
+	.long 0x3f5db3d7
+	.long 0x3f000000
+	.long 0x3f000000
+	.long 0x3f7c1c5c
+	.long 0x3f7c1c5c
+	.long 0x3f708fb2
+	.long 0x3f708fb2
+	.long 0x3f248dbb
+	.long 0x3f248dbb
+	.long 0x3e31d0d4
+	.long 0x3e31d0d4
+	.long 0x3eaf1d44
+	.long 0x3eaf1d44
+	.long 0x3f441b7d
+	.long 0x3f441b7d
+	ALIGN16
+dct36_avx_tfcos36:
+	.long 0x3f007d2b
+	.long 0x3f0483ee
+	.long 0x3f0d3b7d
+	.long 0x3f1c4257
+	.long 0x40b79454
+	.long 0x3ff746ea
+	.long 0x3f976fd9
+	.long 0x3f5f2944
+	.long 0x3f3504f3
+	ALIGN16
+dct36_avx_sign:
+	.long 0x80000000,0x80000000,0x80000000,0x80000000
+	.text
+	ALIGN16
+	.globl ASM_NAME(dct36_avx)
+ASM_NAME(dct36_avx):
+#ifdef IS_MSABI
+	push		%rbp
+	mov			%rsp, %rbp
+	sub			$160, %rsp
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	movaps		%xmm13, 112(%rsp)
+	movaps		%xmm14, 128(%rsp)
+	movaps		%xmm15, 144(%rsp)
+	movq		48(%rbp), ts
+#endif
+	lea			dct36_avx_COS9(%rip), COS9_
+	lea			dct36_avx_tfcos36(%rip), tfcos36_
+	
+	xorps		%xmm4, %xmm4
+	movups		(in), %xmm0
+	movups		16(in), %xmm1
+	movups		32(in), %xmm2
+	movups		48(in), %xmm3
+	movlps		64(in), %xmm4
+	vshufps		$0x93, %xmm0, %xmm0, %xmm5
+	vshufps		$0x93, %xmm1, %xmm1, %xmm6
+	vshufps		$0x93, %xmm2, %xmm2, %xmm7
+	vshufps		$0x93, %xmm3, %xmm3, %xmm8
+	vshufps		$0xe1, %xmm4, %xmm4, %xmm9
+	movss		%xmm8, %xmm9 #[fg--]
+	addps		%xmm9, %xmm4 #[gh--]
+	movss		%xmm7, %xmm8
+	addps		%xmm8, %xmm3 #[cdef]
+	movss		%xmm6, %xmm7
+	addps		%xmm7, %xmm2 #[89ab]
+	movss		%xmm5, %xmm6
+	addps		%xmm6, %xmm1 #[4567]
+	xorps		%xmm6, %xmm6
+	movss		%xmm6, %xmm5
+	addps		%xmm5, %xmm0 #[0123]
+	
+	vblendps	$0x5, %xmm6, %xmm3, %xmm7
+	vshufps		$0x4e, %xmm4, %xmm3, %xmm4
+	addps		%xmm7, %xmm4
+	vblendps	$0x5, %xmm6, %xmm2, %xmm7
+	vshufps		$0x4e, %xmm3, %xmm2, %xmm3
+	addps		%xmm7, %xmm3
+	vblendps	$0x5, %xmm6, %xmm1, %xmm7
+	vshufps		$0x4e, %xmm2, %xmm1, %xmm2
+	addps		%xmm7, %xmm2
+	vblendps	$0x5, %xmm6, %xmm0, %xmm7
+	vshufps		$0x4e, %xmm1, %xmm0, %xmm1
+	addps		%xmm7, %xmm1
+	vmovlhps	%xmm0, %xmm6, %xmm0
+
+/*
+xmm0 in[-,-,0,1]
+xmm1 in[2,3,4,5]
+xmm2 in[6,7,8,9]
+xmm3 in[10,11,12,13]
+xmm4 in[14,15,16,17]
+*/
+
+	vblendps	$0xc, %xmm3, %xmm2, %xmm5
+	blendps		$0xc, %xmm4, %xmm3
+	blendps		$0xc, %xmm2, %xmm4
+	movaps		%xmm5, %xmm2
+
+/*
+xmm2 in[6,7,12,13]
+xmm3 in[10,11,16,17]
+xmm4 in[14,15,8,9]
+*/
+
+	movaps		(COS9_), %xmm15
+	movaps		16(COS9_), %xmm6
+	movaps		32(COS9_), %xmm7
+	movaps		48(COS9_), %xmm8
+	vmulps		%xmm2, %xmm15, %xmm5
+	addps		%xmm0, %xmm5
+	
+/*
+xmm5 [ta33,tb33,ta66,tb66]
+xmm6 COS9_[1,1,2,2]
+xmm7 COS9_[5,5,8,8]
+xmm8 COS9_[7,7,4,4]
+xmm15 COS9_[3,3,6,6]
+*/
+	
+	vmulps		%xmm1, %xmm6, %xmm9
+	vmulps		%xmm3, %xmm7, %xmm12
+	vmulps		%xmm4, %xmm8, %xmm13
+	addps		%xmm5, %xmm9
+	addps		%xmm13, %xmm12
+	addps		%xmm9, %xmm12
+	
+	vsubps		%xmm3, %xmm1, %xmm13
+	vshufps		$0xe0, %xmm2, %xmm0, %xmm14
+	vsubps		%xmm14, %xmm0, %xmm14
+	subps		%xmm4, %xmm13
+	mulps		%xmm15, %xmm13
+	addps		%xmm14, %xmm13
+	
+	vmulps		%xmm1, %xmm7, %xmm9
+	vmulps		%xmm3, %xmm8, %xmm15
+	vmulps		%xmm4, %xmm6, %xmm14
+	subps		%xmm5, %xmm9
+	subps		%xmm15, %xmm14
+	addps		%xmm9, %xmm14
+	
+	mulps		%xmm1, %xmm8
+	mulps		%xmm3, %xmm6
+	mulps		%xmm4, %xmm7
+	subps		%xmm5, %xmm8
+	subps		%xmm7, %xmm6
+	vaddps		%xmm6, %xmm8, %xmm15
+	
+	movss		32(tfcos36_), %xmm5
+	subps		%xmm1, %xmm0
+	subps		%xmm2, %xmm4
+	addps		%xmm3, %xmm0
+	addps		%xmm4, %xmm0
+	shufps		$0xaf, %xmm0, %xmm0
+	vmulss		%xmm5, %xmm0, %xmm11
+
+/*
+xmm12 [1a-0,1b-0, 2a-0, 2b-0]
+xmm13 [1a-1,1b-1, 2a-1, 2b-1]
+xmm14 [1a-2,1b-2,-2a-2,-2b-2]
+xmm15 [1a-3,1b-3,-2a-3,-2b-3]
+*/
+	vunpckhps	%xmm13, %xmm12, %xmm5
+	vunpcklps	%xmm13, %xmm12, %xmm12
+	vunpckhps	%xmm15, %xmm14, %xmm6
+	vunpcklps	%xmm15, %xmm14, %xmm14
+	xorps		dct36_avx_sign(%rip), %xmm6
+
+/*
+xmm12 [1a-0,1a-1,1b-0,1b-1]
+xmm5  [2a-0,2a-1,2b-0,2b-1]
+xmm14 [1a-2,1a-3,1b-2,1b-3]
+xmm6 [2a-2,2a-3,2b-2,2b-3]
+*/
+
+	vmovlhps	%xmm14, %xmm12, %xmm0
+	movhlps		%xmm12, %xmm14
+	vmovlhps	%xmm6, %xmm5, %xmm1
+	vmovhlps	%xmm5, %xmm6, %xmm15
+
+/*
+xmm0 tmp1a
+xmm1 tmp2a
+xmm14 tmp1b
+xmm15 tmp2b
+*/
+
+	movaps		(tfcos36_), %xmm6
+	movaps		16(tfcos36_), %xmm7
+	vsubps		%xmm14, %xmm15, %xmm10
+	addps		%xmm14, %xmm15
+	vsubps		%xmm0, %xmm1, %xmm14
+	addps		%xmm1, %xmm0
+	vmulps		%xmm6, %xmm15, %xmm1
+	mulps		%xmm10, %xmm7
+
+/*
+%xmm0 tmp[0,1,2,3]
+%xmm1 tmp[17,16,15,14]
+%xmm14 tmp[8,7,6,5]
+%xmm7 tmp[9,10,11,12]
+%xmm11 tmp[13,-,4,-]
+*/
+
+	movups		108(w), %xmm2
+	movups		92(w), %xmm3
+	shufps		$0x1b, %xmm3, %xmm3
+	movups		36(w), %xmm4
+	movups		20(w), %xmm5
+	shufps		$0x1b, %xmm5, %xmm5
+	vsubps		%xmm1, %xmm0, %xmm6
+	addps		%xmm1, %xmm0
+	mulps		%xmm0, %xmm2
+	mulps		%xmm3, %xmm0
+	mulps		%xmm6, %xmm4
+	mulps		%xmm5, %xmm6
+	movups		36(out1), %xmm1
+	movups		20(out1), %xmm3
+	shufps		$0x1b, %xmm6, %xmm6
+	addps		%xmm4, %xmm1
+	addps		%xmm6, %xmm3
+	shufps		$0x1b, %xmm0, %xmm0
+	movups		%xmm2, 36(out2)
+	movups		%xmm0, 20(out2)
+	movss		%xmm1, 32*36(ts)
+	movss		%xmm3, 32*20(ts)
+	movhlps		%xmm1, %xmm2
+	movhlps		%xmm3, %xmm4
+	movss		%xmm2, 32*44(ts)
+	movss		%xmm4, 32*28(ts)
+	shufps		$0xb1, %xmm1, %xmm1
+	shufps		$0xb1, %xmm3, %xmm3
+	movss		%xmm1, 32*40(ts)
+	movss		%xmm3, 32*24(ts)
+	movhlps		%xmm1, %xmm2
+	movhlps		%xmm3, %xmm4
+	movss		%xmm2, 32*48(ts)
+	movss		%xmm4, 32*32(ts)
+	
+	movhlps		%xmm11, %xmm0
+	movss		124(w), %xmm2
+	movss		88(w), %xmm3
+	movss		52(w), %xmm4
+	movss		16(w), %xmm5
+	movss		%xmm0, %xmm6
+	addss		%xmm11, %xmm0
+	subss		%xmm11, %xmm6
+	mulss		%xmm0, %xmm2
+	mulss		%xmm3, %xmm0
+	mulss		%xmm6, %xmm4
+	mulss		%xmm5, %xmm6
+	addss		52(out1), %xmm4
+	addss		16(out1), %xmm6
+	movss		%xmm2, 52(out2)
+	movss		%xmm0, 16(out2)
+	movss		%xmm4, 32*52(ts)
+	movss		%xmm6, 32*16(ts)
+	
+	movaps		%xmm14, %xmm0
+	movaps		%xmm7, %xmm1
+	MOVUAPS		128(w), %xmm2
+	movups		72(w), %xmm3
+	shufps		$0x1b, %xmm2, %xmm2
+	movlps		56(w), %xmm4
+	movhps		64(w), %xmm4
+	MOVUAPS		(w), %xmm5
+	shufps		$0x1b, %xmm4, %xmm4
+	vsubps		%xmm1, %xmm0, %xmm6
+	addps		%xmm1, %xmm0
+	mulps		%xmm0, %xmm2
+	mulps		%xmm3, %xmm0
+	mulps		%xmm6, %xmm4
+	mulps		%xmm5, %xmm6
+	movlps		56(out1), %xmm1
+	movhps		64(out1), %xmm1
+	movups		(out1), %xmm3
+	shufps		$0x1b, %xmm4, %xmm4
+	addps		%xmm6, %xmm3
+	addps		%xmm4, %xmm1
+	shufps		$0x1b, %xmm2, %xmm2
+	movups		%xmm0, (out2)
+	movlps		%xmm2, 56(out2)
+	movhps		%xmm2, 64(out2)
+	movss		%xmm1, 32*56(ts)
+	movss		%xmm3, (ts)
+	movhlps		%xmm1, %xmm2
+	movhlps		%xmm3, %xmm4
+	movss		%xmm2, 32*64(ts)
+	movss		%xmm4, 32*8(ts)
+	shufps		$0xb1, %xmm1, %xmm1
+	shufps		$0xb1, %xmm3, %xmm3
+	movss		%xmm1, 32*60(ts)
+	movss		%xmm3, 32*4(ts)
+	movhlps		%xmm1, %xmm2
+	movhlps		%xmm3, %xmm4
+	movss		%xmm2, 32*68(ts)
+	movss		%xmm4, 32*12(ts)
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	movaps		144(%rsp), %xmm15
+	mov			%rbp, %rsp
+	pop			%rbp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct36_neon.S
===================================================================
--- include/reactos/libs/libmpg123/dct36_neon.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct36_neon.S	(working copy)
@@ -0,0 +1,281 @@
+/*
+	dct36_neon: ARM NEON optimized dct36
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+
+#include "mangle.h"
+
+	.code 32
+#ifndef __APPLE__
+	.fpu neon
+#endif
+	
+	.text
+	ALIGN16
+dct36_neon_COS9:
+	.word 0x3f5db3d7
+	.word 0x3f5db3d7
+	.word 0x3f000000
+	.word 0x3f000000
+	.word 0x3f7c1c5c
+	.word 0x3f7c1c5c
+	.word 0x3f708fb2
+	.word 0x3f708fb2
+	.word 0x3f248dbb
+	.word 0x3f248dbb
+	.word 0x3e31d0d4
+	.word 0x3e31d0d4
+	.word 0x3eaf1d44
+	.word 0x3eaf1d44
+	.word 0x3f441b7d
+	.word 0x3f441b7d
+	.word 0x3f007d2b
+	.word 0x3f0483ee
+	.word 0x3f0d3b7d
+	.word 0x3f1c4257
+	.word 0x40b79454
+	.word 0x3ff746ea
+	.word 0x3f976fd9
+	.word 0x3f5f2944
+	.word 0x3f800000
+	.word 0x3f3504f3
+	
+	ALIGN4
+	.globl ASM_NAME(dct36_neon)
+#ifdef __ELF__
+	.type ASM_NAME(dct36_neon), %function
+#endif
+ASM_NAME(dct36_neon):
+	push		{r4-r5, lr}
+	vpush		{q4-q7}
+	ldr			r4, [sp, #76]
+	adr			r5, dct36_neon_COS9
+	
+	vceq.i32	q14, q14, q14
+	veor		q15, q15, q15
+	vshl.i64	q14, q14, #32
+	vld1.32		{q0, q1}, [r0]!
+	vld1.32		{q2, q3}, [r0]!
+	vld1.32		{d8}, [r0]
+	
+	vext.8		q5, q15, q0, #12
+	vext.8		q6, q0, q1, #12
+	vext.8		q7, q1, q2, #12
+	vext.8		q8, q2, q3, #12
+	vext.8		d18, d7, d8, #4
+	vadd.f32	q0, q0, q5
+	vadd.f32	q1, q1, q6
+	vadd.f32	q2, q2, q7
+	vadd.f32	q3, q3, q8
+	vadd.f32	d8, d8, d18
+	
+	vext.8		q6, q0, q1, #8
+	vext.8		q7, q1, q2, #8
+	vext.8		q8, q2, q3, #8
+	vext.8		q9, q3, q4, #8
+	vand		q10, q0, q14
+	vext.8		q0, q15, q0, #8
+	vand		q11, q1, q14
+	vand		q12, q2, q14
+	vand		q13, q3, q14
+	vadd.f32	q1, q10, q6
+	vadd.f32	q2, q11, q7
+	vadd.f32	q3, q12, q8
+	vadd.f32	q4, q13, q9
+	
+/*
+q0 in[-,-,0,1]
+q1 in[2,3,4,5]
+q2 in[6,7,8,9]
+q3 in[10,11,12,13]
+q4 in[14,15,16,17]
+*/
+	
+	vswp		d5, d7
+	vswp		d7, d9
+	
+/*
+q2 in[6,7,12,13]
+q3 in[10,11,16,17]
+q4 in[14,15,8,9]
+*/
+	
+	vld1.32		{q5, q6}, [r5, :128]!
+	vld1.32		{q7, q8}, [r5, :128]!
+	vmov		q9, q0
+	vmla.f32	q9, q2, q5
+	
+/*
+q6 COS9_[1,1,2,2]
+q7 COS9_[5,5,8,8]
+q8 COS9_[7,7,4,4]
+q5 COS9_[3,3,6,6]
+q9 [ta33,tb33,ta66,tb66]
+*/
+	
+	vmov		q10, q9
+	vmov		d26, d0
+	vmov		d27, d5
+	vmul.f32	q12, q1, q6
+	vsub.f32	q11, q1, q3
+	vmla.f32	q10, q3, q7
+	vsub.f32	q13, q0, q13
+	vmla.f32	q12, q4, q8
+	vsub.f32	q11, q11, q4
+	vmul.f32	q14, q1, q7
+	vmul.f32	q15, q1, q8
+	vadd.f32	q12, q12, q10
+	vmov		q10, q9
+	vmla.f32	q13, q11, q5
+	vmla.f32	q10, q3, q8
+	vmla.f32	q14, q4, q6
+	vmla.f32	q9, q4, q7
+	vmla.f32	q15, q3, q6
+	vsub.f32	q14, q14, q10
+	vsub.f32	q15, q15, q9
+	
+/*
+q12 [1a-0,1b-0, 2a-0, 2b-0]
+q13 [1a-1,1b-1, 2a-1, 2b-1]
+q14 [1a-2,1b-2,-2a-2,-2b-2]
+q15 [1a-3,1b-3,-2a-3,-2b-3]
+*/
+	
+	vzip.32		q12, q13
+	vzip.32		q14, q15
+	vneg.f32	q15, q15
+
+/*
+q12 [1a-0,1a-1,1b-0,1b-1]
+q13 [2a-0,2a-1,2b-0,2b-1]
+q14 [1a-2,1a-3,1b-2,1b-3]
+q15 [2a-2,2a-3,2b-2,2b-3]
+*/
+	
+	vswp		d25, d28
+	vswp		d27, d30
+	
+/*
+q12 tmp1a
+q13 tmp2a
+q14 tmp1b
+q15 tmp2b
+*/
+	vsub.f32	d1, d1, d3
+	vsub.f32	d9, d9, d5
+	vld1.32		{q5, q6}, [r5, :128]!
+	vld1.32		{d0}, [r5, :64]
+	vadd.f32	q10, q14, q15
+	vsub.f32	q8, q15, q14
+	vadd.f32	d1, d1, d7
+	vadd.f32	q9, q12, q13
+	vsub.f32	q7, q13, q12
+	vadd.f32	d1, d1, d9
+	vmul.f32	q10, q10, q5
+	vmul.f32	q8, q8, q6
+	vmul.f32	d0, d1, d0
+	
+/*
+q9 tmp[0,1,2,3]
+q10 tmp[17,16,15,14]
+q7 tmp[8,7,6,5]
+q8 tmp[9,10,11,12]
+d0 tmp[4,13]
+*/
+	
+	add			r0, r4, #640
+	add			r5, r3, #20
+	vld1.32		{q1,q2}, [r5]
+	add			r5, r3, #92
+	vld1.32		{q3,q4}, [r5]
+	add			r5, r1, #20
+	vld1.32		{q5,q6}, [r5] 
+	vadd.f32	q11, q9, q10
+	vsub.f32	q12, q9, q10
+	vmul.f32	q10, q11, q4
+	vmla.f32	q6, q12, q2
+	vrev64.32	q11, q11
+	vrev64.32	q12, q12
+	vswp		d22, d23
+	vswp		d24, d25
+	vmul.f32	q9, q11, q3
+	vmla.f32	q5, q12, q1
+	add			r5, r2, #20
+	vst1.32		{q9,q10}, [r5]
+	mov			r5, #128
+	vst1.32		{d10[0]}, [r0], r5
+	vst1.32		{d10[1]}, [r0], r5
+	vst1.32		{d11[0]}, [r0], r5
+	vst1.32		{d11[1]}, [r0], r5
+	vst1.32		{d12[0]}, [r0], r5
+	vst1.32		{d12[1]}, [r0], r5
+	vst1.32		{d13[0]}, [r0], r5
+	vst1.32		{d13[1]}, [r0], r5
+	
+	add			r0, r4, #1792
+	add			r5, r3, #56
+	vld1.32		{q1}, [r3]
+	vld1.32		{q2,q3}, [r5]
+	add			r5, r3, #128
+	vld1.32		{q4}, [r5]
+	add			r5, r1, #56
+	vld1.32		{q5}, [r1]
+	vld1.32		{q6}, [r5]
+	vadd.f32	q9, q7, q8
+	vsub.f32	q10, q7, q8
+	vmul.f32	q7, q9, q3
+	vmla.f32	q5, q10, q1
+	vrev64.32	q9, q9
+	vrev64.32	q10, q10
+	vswp		d18, d19
+	vswp		d20, d21
+	vmul.f32	q8, q9, q4
+	vmla.f32	q6, q10, q2
+	add			r5, r2, #56
+	vst1.32		{q7}, [r2]
+	vst1.32		{q8}, [r5]
+	mov			r5, #128
+	vst1.32		{d10[0]}, [r4], r5
+	vst1.32		{d10[1]}, [r4], r5
+	vst1.32		{d11[0]}, [r4], r5
+	vst1.32		{d11[1]}, [r4], r5
+	vst1.32		{d12[0]}, [r0], r5
+	vst1.32		{d12[1]}, [r0], r5
+	vst1.32		{d13[0]}, [r0], r5
+	vst1.32		{d13[1]}, [r0], r5
+	
+	vtrn.32		d0, d1
+	add			r5, r3, #16
+	vld1.32		{d2}, [r5]
+	add			r5, r3, #52
+	vld1.32		{d3}, [r5]
+	add			r5, r3, #88
+	vld1.32		{d4}, [r5]
+	add			r3, r3, #124
+	vld1.32		{d5}, [r3]
+	add			r5, r1, #16
+	vld1.32		{d6}, [r5]
+	add			r1, r1, #52
+	vld1.32		{d7}, [r1]
+	vadd.f32	d8, d0, d1
+	vsub.f32	d9, d0, d1
+	vmul.f32	d4, d8, d4
+	vmul.f32	d5, d8, d5
+	vmla.f32	d6, d9, d2
+	vmla.f32	d7, d9, d3
+	add			r2, r2, #16
+	vst1.32		{d4[0]}, [r2]
+	add			r2, r2, #36
+	vst1.32		{d5[0]}, [r2]
+	vst1.32		{d6[0]}, [r4]
+	add			r4, r4, #1152
+	vst1.32		{d7[0]}, [r4]
+	
+	vpop		{q4-q7}
+	pop			{r4-r5, pc}
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct36_neon64.S
===================================================================
--- include/reactos/libs/libmpg123/dct36_neon64.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct36_neon64.S	(working copy)
@@ -0,0 +1,249 @@
+/*
+	dct36_neon64: NEON optimized dct36 for AArch64
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+dct36_aarch64_COS9:
+	.word 0x3f5db3d7
+	.word 0x3f5db3d7
+	.word 0x3f000000
+	.word 0x3f000000
+	.word 0x3f7c1c5c
+	.word 0x3f7c1c5c
+	.word 0x3f708fb2
+	.word 0x3f708fb2
+	.word 0x3f248dbb
+	.word 0x3f248dbb
+	.word 0x3e31d0d4
+	.word 0x3e31d0d4
+	.word 0x3eaf1d44
+	.word 0x3eaf1d44
+	.word 0x3f441b7d
+	.word 0x3f441b7d
+	.word 0x3f007d2b
+	.word 0x3f0483ee
+	.word 0x3f0d3b7d
+	.word 0x3f1c4257
+	.word 0x40b79454
+	.word 0x3ff746ea
+	.word 0x3f976fd9
+	.word 0x3f5f2944
+	.word 0x3f800000
+	.word 0x3f3504f3
+	
+	.text
+	ALIGN4
+	.globl ASM_NAME(dct36_neon64)
+#ifdef __ELF__
+	.type ASM_NAME(dct36_neon64), %function
+#endif
+ASM_NAME(dct36_neon64):
+	adrp		x5, AARCH64_PCREL_HI(dct36_aarch64_COS9)
+	add			x5, x5, AARCH64_PCREL_LO(dct36_aarch64_COS9)
+	cmeq		v28.16b, v28.16b, v28.16b
+	eor			v29.16b, v29.16b, v29.16b
+	shl			v28.2d, v28.2d, #32
+	ld1			{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], #64
+	ld1			{v4.2s}, [x0]
+	
+	ext			v16.16b, v29.16b, v0.16b, #12
+	ext			v17.16b, v0.16b, v1.16b, #12
+	ext			v18.16b, v1.16b, v2.16b, #12
+	ext			v19.16b, v2.16b, v3.16b, #12
+	ext			v20.16b, v3.16b, v4.16b, #12
+	fadd		v0.4s, v0.4s, v16.4s
+	fadd		v1.4s, v1.4s, v17.4s
+	fadd		v2.4s, v2.4s, v18.4s
+	fadd		v3.4s, v3.4s, v19.4s
+	fadd		v4.2s, v4.2s, v20.2s
+	
+	ext			v16.16b, v0.16b, v1.16b, #8
+	ext			v17.16b, v1.16b, v2.16b, #8
+	ext			v18.16b, v2.16b, v3.16b, #8
+	ext			v19.16b, v3.16b, v4.16b, #8
+	and			v20.16b, v0.16b, v28.16b
+	ext			v0.16b, v29.16b, v0.16b, #8
+	and			v21.16b, v1.16b, v28.16b
+	and			v22.16b, v2.16b, v28.16b
+	and			v23.16b, v3.16b, v28.16b
+	fadd		v1.4s, v20.4s, v16.4s
+	fadd		v2.4s, v21.4s, v17.4s
+	fadd		v3.4s, v22.4s, v18.4s
+	fadd		v4.4s, v23.4s, v19.4s
+	
+/*
+v0 in[-,-,0,1]
+v1 in[2,3,4,5]
+v2 in[6,7,8,9]
+v3 in[10,11,12,13]
+v4 in[14,15,16,17]
+*/
+	
+	orr			v5.16b, v2.16b, v2.16b
+	ins			v2.d[1], v3.d[1]
+	ins			v3.d[1], v4.d[1]
+	ins			v4.d[1], v5.d[1]
+	
+/*
+v2 in[6,7,12,13]
+v3 in[10,11,16,17]
+v4 in[14,15,8,9]
+*/
+	
+	ld1			{v16.4s,v17.4s,v18.4s,v19.4s}, [x5], #64
+	orr			v20.16b, v0.16b, v0.16b
+	fmla		v20.4s, v2.4s, v16.4s
+	
+/*
+v17 COS9_[1,1,2,2]
+v18 COS9_[5,5,8,8]
+v19 COS9_[7,7,4,4]
+v16 COS9_[3,3,6,6]
+v20 [ta33,tb33,ta66,tb66]
+*/
+	
+	orr			v21.16b, v20.16b, v20.16b
+	orr			v23.16b, v20.16b, v20.16b
+	zip2		v25.2d, v29.2d, v2.2d
+	fsub		v22.4s, v1.4s, v3.4s
+	fmul		v24.4s, v1.4s, v17.4s
+	fmul		v26.4s, v1.4s, v18.4s
+	fmul		v27.4s, v1.4s, v19.4s
+	fmla		v21.4s, v3.4s, v18.4s
+	fmla		v23.4s, v3.4s, v19.4s
+	fmla		v20.4s, v4.4s, v18.4s
+	fsub		v25.4s, v0.4s, v25.4s
+	fsub		v22.4s, v22.4s, v4.4s
+	fmla		v24.4s, v4.4s, v19.4s
+	fmla		v26.4s, v4.4s, v17.4s
+	fmla		v27.4s, v3.4s, v17.4s
+	fmla		v25.4s, v22.4s, v16.4s
+	fadd		v24.4s, v24.4s, v21.4s
+	fsub		v26.4s, v26.4s, v23.4s
+	fsub		v27.4s, v27.4s, v20.4s
+	
+	zip1		v16.4s, v24.4s, v25.4s
+	zip2		v17.4s, v24.4s, v25.4s
+	zip1		v18.4s, v26.4s, v27.4s
+	zip2		v19.4s, v26.4s, v27.4s
+	fneg		v19.4s, v19.4s
+	zip1		v20.2d, v16.2d, v18.2d
+	zip1		v21.2d, v17.2d, v19.2d
+	zip2		v22.2d, v16.2d, v18.2d
+	zip2		v23.2d, v17.2d, v19.2d
+	
+	ld1			{v5.4s,v6.4s}, [x5], #32
+	ld1			{v7.2s}, [x5]
+	fsub		v0.4s, v0.4s, v1.4s
+	fsub		v4.4s, v4.4s, v2.4s
+	fadd		v17.4s, v22.4s, v23.4s
+	fsub		v19.4s, v23.4s, v22.4s
+	fadd		v0.4s, v0.4s, v3.4s
+	fadd		v16.4s, v20.4s, v21.4s
+	fsub		v18.4s, v21.4s, v20.4s
+	fadd		v0.4s, v0.4s, v4.4s
+	fmul		v17.4s, v17.4s, v5.4s
+	fmul		v19.4s, v19.4s, v6.4s
+	AARCH64_DUP_2D(v0, v0, 1)
+	fmul		v0.2s, v0.2s, v7.2s
+	
+/*
+v16 tmp[0,1,2,3]
+v17 tmp[17,16,15,14]
+v18 tmp[8,7,6,5]
+v19 tmp[9,10,11,12]
+v0 tmp[4,13]
+*/
+	
+	add			x0, x4, #640
+	add			x5, x3, #20
+	add			x6, x3, #92
+	add			x7, x1, #20
+	ld1			{v1.4s,v2.4s}, [x5]
+	ld1			{v3.4s,v4.4s}, [x6]
+	ld1			{v5.4s,v6.4s}, [x7]
+	fadd		v20.4s, v16.4s, v17.4s
+	fsub		v21.4s, v16.4s, v17.4s
+	fmul		v4.4s, v20.4s, v4.4s
+	fmla		v6.4s, v21.4s, v2.4s
+	rev64		v20.4s, v20.4s
+	rev64		v21.4s, v21.4s
+	ext			v20.16b, v20.16b, v20.16b, #8
+	ext			v21.16b, v21.16b, v21.16b, #8
+	fmul		v3.4s, v20.4s, v3.4s
+	fmla		v5.4s, v21.4s, v1.4s
+	add			x5, x2, #20
+	mov			x9, #128
+	st1			{v3.4s,v4.4s}, [x5]
+	st1			{v5.s}[0], [x0], x9
+	st1			{v5.s}[1], [x0], x9
+	st1			{v5.s}[2], [x0], x9
+	st1			{v5.s}[3], [x0], x9
+	st1			{v6.s}[0], [x0], x9
+	st1			{v6.s}[1], [x0], x9
+	st1			{v6.s}[2], [x0], x9
+	st1			{v6.s}[3], [x0], x9
+	
+	add			x0, x4, #1792
+	add			x5, x3, #56
+	add			x6, x3, #128
+	add			x7, x1, #56
+	ld1			{v1.4s}, [x3]
+	ld1			{v2.4s,v3.4s}, [x5]
+	ld1			{v4.4s}, [x6]
+	ld1			{v5.4s}, [x1]
+	ld1			{v6.4s}, [x7]
+	fadd		v20.4s, v18.4s, v19.4s
+	fsub		v21.4s, v18.4s, v19.4s
+	fmul		v3.4s, v20.4s, v3.4s
+	fmla		v5.4s, v21.4s, v1.4s
+	rev64		v20.4s, v20.4s
+	rev64		v21.4s, v21.4s
+	ext			v20.16b, v20.16b, v20.16b, #8
+	ext			v21.16b, v21.16b, v21.16b, #8
+	fmul		v4.4s, v20.4s, v4.4s
+	fmla		v6.4s, v21.4s, v2.4s
+	add			x5, x2, #56
+	st1			{v3.4s}, [x2]
+	st1			{v4.4s}, [x5]
+	st1			{v5.s}[0], [x4], x9
+	st1			{v5.s}[1], [x4], x9
+	st1			{v5.s}[2], [x4], x9
+	st1			{v5.s}[3], [x4], x9
+	st1			{v6.s}[0], [x0], x9
+	st1			{v6.s}[1], [x0], x9
+	st1			{v6.s}[2], [x0], x9
+	st1			{v6.s}[3], [x0], x9
+	
+	ins			v1.s[0], v0.s[1]
+	ldr			s2, [x3, #16]
+	ldr			s3, [x3, #52]
+	ldr			s4, [x3, #88]
+	ldr			s5, [x3, #124]
+	ldr			s6, [x1, #16]
+	ldr			s7, [x1, #52]
+	fadd		s16, s0, s1
+	fsub		s17, s0, s1
+	fmul		s4, s16, s4
+	fmul		s5, s16, s5
+	fmadd		s6, s17, s2, s6
+	fmadd		s7, s17, s3, s7
+	str			s4, [x2, #16]
+	str			s5, [x2, #52]
+	str			s6, [x4]
+	str			s7, [x4, #1152]
+	
+	ret
+	
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct36_sse.S
===================================================================
--- include/reactos/libs/libmpg123/dct36_sse.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct36_sse.S	(working copy)
@@ -0,0 +1,389 @@
+/*
+	dct36_sse: SSE optimized dct36
+
+	copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define in %edi
+#define out1 %edi
+#define out2 %edx
+#define w  %ecx
+#define ts %eax
+#define COS9_ %eax
+#define tfcos36_ %edx
+#define tmp %esi
+
+/*
+	void dct36_sse(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf);
+*/
+	
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+dct36_sse_COS9:
+	.long 0x3f5db3d7
+	.long 0x3f5db3d7
+	.long 0x3f000000
+	.long 0x3f000000
+	.long 0x3f7c1c5c
+	.long 0x3f7c1c5c
+	.long 0x3f708fb2
+	.long 0x3f708fb2
+	.long 0x3f248dbb
+	.long 0x3f248dbb
+	.long 0x3e31d0d4
+	.long 0x3e31d0d4
+	.long 0x3eaf1d44
+	.long 0x3eaf1d44
+	.long 0x3f441b7d
+	.long 0x3f441b7d
+	ALIGN16
+dct36_sse_tfcos36:
+	.long 0x3f007d2b
+	.long 0x3f0483ee
+	.long 0x3f0d3b7d
+	.long 0x3f1c4257
+	.long 0x40b79454
+	.long 0x3ff746ea
+	.long 0x3f976fd9
+	.long 0x3f5f2944
+	.long 0x3f3504f3
+	ALIGN16
+dct36_sse_mask:
+	.long 0,0xffffffff,0,0xffffffff
+	ALIGN16
+dct36_sse_sign:
+	.long 0x80000000,0x80000000,0x80000000,0x80000000
+	.text
+	ALIGN16
+	.globl ASM_NAME(dct36_sse)
+ASM_NAME(dct36_sse):
+	push		%ebp
+	mov			%esp, %ebp
+	and			$-16, %esp
+	sub			$80, %esp
+	push		%ebx
+	push		%esi
+	push		%edi
+	call		1f
+1:
+	pop			%ebx
+	lea			dct36_sse_COS9-1b(%ebx), COS9_
+	lea			dct36_sse_tfcos36-1b(%ebx), tfcos36_
+	lea			12(%esp), tmp
+	movl		8(%ebp), in
+	
+	xorps		%xmm0, %xmm0
+	xorps		%xmm5, %xmm5
+	movlps		64(in), %xmm5
+	movups		48(in), %xmm4
+	movups		32(in), %xmm3
+	movups		16(in), %xmm2
+	movups		(in), %xmm1
+	movaps		%xmm5, %xmm6
+	shufps		$0xe1, %xmm6, %xmm6
+	movaps		%xmm4, %xmm7
+	shufps		$0x93, %xmm7, %xmm7
+	movss		%xmm7, %xmm6
+	addps		%xmm6, %xmm5
+	movaps		%xmm3, %xmm6
+	shufps		$0x93, %xmm6, %xmm6
+	movss		%xmm6, %xmm7
+	addps		%xmm7, %xmm4
+	movaps		%xmm2, %xmm7
+	shufps		$0x93, %xmm7, %xmm7
+	movss		%xmm7, %xmm6
+	addps		%xmm6, %xmm3
+	movaps		%xmm1, %xmm6
+	shufps		$0x93, %xmm6, %xmm6
+	movss		%xmm6, %xmm7
+	addps		%xmm7, %xmm2
+	movss		%xmm0, %xmm6
+	addps		%xmm6, %xmm1
+	
+	movaps		dct36_sse_mask-1b(%ebx), %xmm0
+	movaps		%xmm4, %xmm6
+	shufps		$0x4e, %xmm5, %xmm4
+	movaps		%xmm3, %xmm7
+	shufps		$0x4e, %xmm6, %xmm3
+	andps		%xmm0, %xmm6
+	addps		%xmm6, %xmm4
+	movaps		%xmm2, %xmm6
+	shufps		$0x4e, %xmm7, %xmm2
+	andps		%xmm0, %xmm7
+	addps		%xmm7, %xmm3
+	movaps		%xmm1, %xmm7
+	shufps		$0x4e, %xmm6, %xmm1
+	andps		%xmm0, %xmm6
+	addps		%xmm6, %xmm2
+	movaps		%xmm7, %xmm6
+	andps		%xmm0, %xmm7
+	xorps		%xmm0, %xmm0
+	addps		%xmm7, %xmm1
+	movlhps		%xmm6, %xmm0
+
+/*
+xmm0 in[-,-,0,1]
+xmm1 in[2,3,4,5]
+xmm2 in[6,7,8,9]
+xmm3 in[10,11,12,13]
+xmm4 in[14,15,16,17]
+*/
+
+	movaps		%xmm2, %xmm5
+	shufps		$0xe4, %xmm3, %xmm5
+	shufps		$0xe4, %xmm4, %xmm3
+	shufps		$0xe4, %xmm2, %xmm4
+	movaps		%xmm5, %xmm2
+
+/*
+xmm2 in[6,7,12,13]
+xmm3 in[10,11,16,17]
+xmm4 in[14,15,8,9]
+*/
+
+	mulps		(COS9_), %xmm5
+	addps		%xmm0, %xmm5
+	
+	movaps		%xmm0, (tmp)
+	movaps		%xmm2, 16(tmp)
+
+/*
+0(tmp) in[-,-,0,1]
+xmm5 [ta33,tb33,ta66,tb66]
+*/
+
+	movaps		%xmm1, %xmm6
+	subps		%xmm3, %xmm6
+	subps		%xmm4, %xmm6
+	xorps		%xmm7, %xmm7
+	shufps		$0xe0, %xmm2, %xmm7
+	mulps		(COS9_), %xmm6
+	subps		%xmm7, %xmm0
+	addps		%xmm0, %xmm6
+	movaps		%xmm6, 48(tmp)
+	
+	movaps		16(COS9_), %xmm2
+
+	movaps		%xmm1, %xmm0
+	movaps		%xmm3, %xmm6
+	movaps		%xmm4, %xmm7
+	mulps		%xmm2, %xmm0
+	mulps		32(COS9_), %xmm6
+	mulps		48(COS9_), %xmm7
+	addps		%xmm5, %xmm0
+	addps		%xmm7, %xmm6
+	addps		%xmm6, %xmm0
+	movaps		%xmm0, 32(tmp)
+	
+	movaps		%xmm1, %xmm0
+	movaps		%xmm3, %xmm6
+	movaps		%xmm4, %xmm7
+	mulps		32(COS9_), %xmm0
+	mulps		48(COS9_), %xmm6
+	mulps		%xmm2, %xmm7
+	subps		%xmm5, %xmm0
+	subps		%xmm6, %xmm7
+	addps		%xmm7, %xmm0
+	movaps		%xmm0, 64(tmp)
+	
+	movaps		%xmm1, %xmm6
+	movaps		%xmm4, %xmm7
+	mulps		48(COS9_), %xmm6
+	mulps		%xmm3, %xmm2
+	mulps		32(COS9_), %xmm7
+	subps		%xmm5, %xmm6
+	subps		%xmm7, %xmm2
+	addps		%xmm2, %xmm6
+	
+	movaps		(tmp), %xmm0
+	movss		32(tfcos36_), %xmm5
+	subps		%xmm1, %xmm0
+	subps		16(tmp), %xmm4
+	addps		%xmm3, %xmm0
+	addps		%xmm4, %xmm0
+	shufps		$0xaf, %xmm0, %xmm0
+	mulss		%xmm5, %xmm0
+	movaps		%xmm0, (tmp)
+	
+	movaps		32(tmp), %xmm0
+	movaps		48(tmp), %xmm1
+	movaps		64(tmp), %xmm2
+
+/*
+xmm0 [1a-0,1b-0, 2a-0, 2b-0]
+xmm1 [1a-1,1b-1, 2a-1, 2b-1]
+xmm2 [1a-2,1b-2,-2a-2,-2b-2]
+xmm6 [1a-3,1b-3,-2a-3,-2b-3]
+*/
+
+	movaps		%xmm0, %xmm3
+	unpcklps	%xmm1, %xmm0
+	unpckhps	%xmm1, %xmm3
+	movaps		%xmm2, %xmm5
+	unpcklps	%xmm6, %xmm2
+	unpckhps	%xmm6, %xmm5
+	xorps		dct36_sse_sign-1b(%ebx), %xmm5
+
+/*
+xmm0 [1a-0,1a-1,1b-0,1b-1]
+xmm3 [2a-0,2a-1,2b-0,2b-1]
+xmm2 [1a-2,1a-3,1b-2,1b-3]
+xmm5 [2a-2,2a-3,2b-2,2b-3]
+*/
+
+	movaps		%xmm0, %xmm1
+	movlhps		%xmm2, %xmm0
+	movhlps		%xmm1, %xmm2
+	movaps		%xmm3, %xmm4
+	movlhps		%xmm5, %xmm3
+	movhlps		%xmm4, %xmm5
+
+/*
+xmm0 tmp1a
+xmm3 tmp2a
+xmm2 tmp1b
+xmm5 tmp2b
+*/
+
+	movaps		(tfcos36_), %xmm6
+	movaps		16(tfcos36_), %xmm7
+	movaps		%xmm5, %xmm1
+	addps		%xmm2, %xmm5
+	subps		%xmm2, %xmm1
+	movaps		%xmm3, %xmm2
+	addps		%xmm0, %xmm3
+	subps		%xmm0, %xmm2
+	mulps		%xmm6, %xmm5
+	mulps		%xmm1, %xmm7
+	
+	movaps		%xmm2, 16(tmp)
+
+/*
+%xmm3 tmp[0,1,2,3]
+%xmm5 tmp[17,16,15,14]
+16(tmp) tmp[8,7,6,5]
+%xmm7 tmp[9,10,11,12]
+0(tmp) tmp[13,-,4,-]
+*/
+
+	movl		12(%ebp), out1
+	movl		16(%ebp), out2
+	movl		20(%ebp), w
+	movl		24(%ebp), ts
+
+	movaps		%xmm3, %xmm0
+	movaps		%xmm5, %xmm1
+	movups		108(w), %xmm2
+	movups		92(w), %xmm3
+	shufps		$0x1b, %xmm3, %xmm3
+	movups		36(w), %xmm4
+	movups		20(w), %xmm5
+	shufps		$0x1b, %xmm5, %xmm5
+	movaps		%xmm0, %xmm6
+	addps		%xmm1, %xmm0
+	subps		%xmm1, %xmm6
+	mulps		%xmm0, %xmm2
+	mulps		%xmm3, %xmm0
+	mulps		%xmm6, %xmm4
+	mulps		%xmm5, %xmm6
+	movups		36(out1), %xmm1
+	movups		20(out1), %xmm3
+	shufps		$0x1b, %xmm6, %xmm6
+	addps		%xmm4, %xmm1
+	addps		%xmm6, %xmm3
+	shufps		$0x1b, %xmm0, %xmm0
+	movups		%xmm2, 36(out2)
+	movups		%xmm0, 20(out2)
+	movss		%xmm1, 32*36(ts)
+	movss		%xmm3, 32*20(ts)
+	movhlps		%xmm1, %xmm2
+	movhlps		%xmm3, %xmm4
+	movss		%xmm2, 32*44(ts)
+	movss		%xmm4, 32*28(ts)
+	shufps		$0xb1, %xmm1, %xmm1
+	shufps		$0xb1, %xmm3, %xmm3
+	movss		%xmm1, 32*40(ts)
+	movss		%xmm3, 32*24(ts)
+	movhlps		%xmm1, %xmm2
+	movhlps		%xmm3, %xmm4
+	movss		%xmm2, 32*48(ts)
+	movss		%xmm4, 32*32(ts)
+	
+	movss		8(tmp), %xmm0
+	movss		(tmp), %xmm1
+	movss		124(w), %xmm2
+	movss		88(w), %xmm3
+	movss		52(w), %xmm4
+	movss		16(w), %xmm5
+	movss		%xmm0, %xmm6
+	addss		%xmm1, %xmm0
+	subss		%xmm1, %xmm6
+	mulss		%xmm0, %xmm2
+	mulss		%xmm3, %xmm0
+	mulss		%xmm6, %xmm4
+	mulss		%xmm5, %xmm6
+	addss		52(out1), %xmm4
+	addss		16(out1), %xmm6
+	movss		%xmm2, 52(out2)
+	movss		%xmm0, 16(out2)
+	movss		%xmm4, 32*52(ts)
+	movss		%xmm6, 32*16(ts)
+	
+	movaps		16(tmp), %xmm0
+	movaps		%xmm7, %xmm1
+	MOVUAPS		128(w), %xmm2
+	movups		72(w), %xmm3
+	shufps		$0x1b, %xmm2, %xmm2
+	movlps		56(w), %xmm4
+	movhps		64(w), %xmm4
+	MOVUAPS		(w), %xmm5
+	shufps		$0x1b, %xmm4, %xmm4
+	movaps		%xmm0, %xmm6
+	addps		%xmm1, %xmm0
+	subps		%xmm1, %xmm6
+	mulps		%xmm0, %xmm2
+	mulps		%xmm3, %xmm0
+	mulps		%xmm6, %xmm4
+	mulps		%xmm5, %xmm6
+	movlps		56(out1), %xmm1
+	movhps		64(out1), %xmm1
+	movups		(out1), %xmm3
+	shufps		$0x1b, %xmm4, %xmm4
+	addps		%xmm6, %xmm3
+	addps		%xmm4, %xmm1
+	shufps		$0x1b, %xmm2, %xmm2
+	movups		%xmm0, (out2)
+	movlps		%xmm2, 56(out2)
+	movhps		%xmm2, 64(out2)
+	movss		%xmm1, 32*56(ts)
+	movss		%xmm3, (ts)
+	movhlps		%xmm1, %xmm2
+	movhlps		%xmm3, %xmm4
+	movss		%xmm2, 32*64(ts)
+	movss		%xmm4, 32*8(ts)
+	shufps		$0xb1, %xmm1, %xmm1
+	shufps		$0xb1, %xmm3, %xmm3
+	movss		%xmm1, 32*60(ts)
+	movss		%xmm3, 32*4(ts)
+	movhlps		%xmm1, %xmm2
+	movhlps		%xmm3, %xmm4
+	movss		%xmm2, 32*68(ts)
+	movss		%xmm4, 32*12(ts)
+	
+	pop			%edi
+	pop			%esi
+	pop			%ebx
+	mov			%ebp, %esp
+	pop			%ebp
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct36_x86_64.S
===================================================================
--- include/reactos/libs/libmpg123/dct36_x86_64.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct36_x86_64.S	(working copy)
@@ -0,0 +1,394 @@
+/*
+	dct36_x86_64: SSE optimized dct36 for x86-64
+
+	copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+#define in %rcx
+#define out1 %rdx
+#define out2 %r8
+#define w  %r9
+#define ts %r10
+#define COS9_ %rax
+#define tfcos36_ %r11
+#else
+#define in %rdi
+#define out1 %rsi
+#define out2 %rdx
+#define w  %rcx
+#define ts %r8
+#define COS9_ %rax
+#define tfcos36_ %r9
+#endif
+
+/*
+	void dct36_x86_64(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf);
+*/
+	
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+dct36_x86_64_COS9:
+	.long 0x3f5db3d7
+	.long 0x3f5db3d7
+	.long 0x3f000000
+	.long 0x3f000000
+	.long 0x3f7c1c5c
+	.long 0x3f7c1c5c
+	.long 0x3f708fb2
+	.long 0x3f708fb2
+	.long 0x3f248dbb
+	.long 0x3f248dbb
+	.long 0x3e31d0d4
+	.long 0x3e31d0d4
+	.long 0x3eaf1d44
+	.long 0x3eaf1d44
+	.long 0x3f441b7d
+	.long 0x3f441b7d
+	ALIGN16
+dct36_x86_64_tfcos36:
+	.long 0x3f007d2b
+	.long 0x3f0483ee
+	.long 0x3f0d3b7d
+	.long 0x3f1c4257
+	.long 0x40b79454
+	.long 0x3ff746ea
+	.long 0x3f976fd9
+	.long 0x3f5f2944
+	.long 0x3f3504f3
+	ALIGN16
+dct36_x86_64_mask:
+	.long 0,0xffffffff,0,0xffffffff
+	ALIGN16
+dct36_x86_64_sign:
+	.long 0x80000000,0x80000000,0x80000000,0x80000000
+	.text
+	ALIGN16
+	.globl ASM_NAME(dct36_x86_64)
+ASM_NAME(dct36_x86_64):
+#ifdef IS_MSABI
+	push		%rbp
+	mov			%rsp, %rbp
+	sub			$160, %rsp
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	movaps		%xmm13, 112(%rsp)
+	movaps		%xmm14, 128(%rsp)
+	movaps		%xmm15, 144(%rsp)
+	movq		48(%rbp), ts
+#endif
+	lea			dct36_x86_64_COS9(%rip), COS9_
+	lea			dct36_x86_64_tfcos36(%rip), tfcos36_
+	
+	xorps		%xmm5, %xmm5
+	movups		(in), %xmm1
+	movups		16(in), %xmm2
+	movups		32(in), %xmm3
+	movups		48(in), %xmm4
+	movlps		64(in), %xmm5
+	xorps		%xmm6, %xmm6
+	movaps		%xmm1, %xmm7
+	shufps		$0x93, %xmm7, %xmm7
+	movaps		%xmm2, %xmm8
+	shufps		$0x93, %xmm8, %xmm8
+	movaps		%xmm3, %xmm9
+	shufps		$0x93, %xmm9, %xmm9
+	movaps		%xmm4, %xmm10
+	shufps		$0x93, %xmm10, %xmm10
+	movaps		%xmm5, %xmm11
+	shufps		$0xe1, %xmm11, %xmm11
+	movss		%xmm10, %xmm11
+	addps		%xmm11, %xmm5
+	movss		%xmm9, %xmm10
+	addps		%xmm10, %xmm4
+	movss		%xmm8, %xmm9
+	addps		%xmm9, %xmm3
+	movss		%xmm7, %xmm8
+	addps		%xmm8, %xmm2
+	movss		%xmm6, %xmm7
+	addps		%xmm7, %xmm1
+	
+	movaps		dct36_x86_64_mask(%rip), %xmm0
+	movaps		%xmm4, %xmm6
+	shufps		$0x4e, %xmm5, %xmm4
+	movaps		%xmm3, %xmm7
+	shufps		$0x4e, %xmm6, %xmm3
+	andps		%xmm0, %xmm6
+	addps		%xmm6, %xmm4
+	movaps		%xmm2, %xmm6
+	shufps		$0x4e, %xmm7, %xmm2
+	andps		%xmm0, %xmm7
+	addps		%xmm7, %xmm3
+	movaps		%xmm1, %xmm7
+	shufps		$0x4e, %xmm6, %xmm1
+	andps		%xmm0, %xmm6
+	addps		%xmm6, %xmm2
+	movaps		%xmm7, %xmm6
+	andps		%xmm0, %xmm7
+	xorps		%xmm0, %xmm0
+	addps		%xmm7, %xmm1
+	movlhps		%xmm6, %xmm0
+
+/*
+xmm0 in[-,-,0,1]
+xmm1 in[2,3,4,5]
+xmm2 in[6,7,8,9]
+xmm3 in[10,11,12,13]
+xmm4 in[14,15,16,17]
+*/
+	
+	movaps		%xmm2, %xmm5
+	shufps		$0xe4, %xmm3, %xmm5
+	shufps		$0xe4, %xmm4, %xmm3
+	shufps		$0xe4, %xmm2, %xmm4
+	movaps		%xmm5, %xmm2
+/*
+xmm2 in[6,7,12,13]
+xmm3 in[10,11,16,17]
+xmm4 in[14,15,8,9]
+*/
+
+	movaps		(COS9_), %xmm15
+	movaps		16(COS9_), %xmm6
+	movaps		32(COS9_), %xmm7
+	movaps		48(COS9_), %xmm8
+	mulps		%xmm15, %xmm5
+	addps		%xmm0, %xmm5
+	
+/*
+xmm5 [ta33,tb33,ta66,tb66]
+xmm6 COS9_[1,1,2,2]
+xmm7 COS9_[5,5,8,8]
+xmm8 COS9_[7,7,4,4]
+xmm15 COS9_[3,3,6,6]
+*/
+	movaps		%xmm6, %xmm9
+	movaps		%xmm7, %xmm12
+	movaps		%xmm8, %xmm13
+	mulps		%xmm1, %xmm9
+	mulps		%xmm3, %xmm12
+	mulps		%xmm4, %xmm13
+	addps		%xmm5, %xmm9
+	addps		%xmm13, %xmm12
+	addps		%xmm9, %xmm12
+	
+	movaps		%xmm1, %xmm13
+	subps		%xmm3, %xmm13
+	movaps		%xmm0, %xmm10
+	shufps		$0xe0, %xmm2, %xmm10
+	movaps		%xmm0, %xmm14
+	subps		%xmm10, %xmm14
+	subps		%xmm4, %xmm13
+	mulps		%xmm15, %xmm13
+	addps		%xmm14, %xmm13
+	
+	movaps		%xmm7, %xmm9
+	movaps		%xmm8, %xmm15
+	movaps		%xmm6, %xmm14
+	mulps		%xmm1, %xmm9
+	mulps		%xmm3, %xmm15
+	mulps		%xmm4, %xmm14
+	subps		%xmm5, %xmm9
+	subps		%xmm15, %xmm14
+	addps		%xmm9, %xmm14
+	
+	mulps		%xmm1, %xmm8
+	mulps		%xmm3, %xmm6
+	mulps		%xmm4, %xmm7
+	subps		%xmm5, %xmm8
+	subps		%xmm7, %xmm6
+	addps		%xmm6, %xmm8
+	movaps		%xmm8, %xmm15
+	
+	movss		32(tfcos36_), %xmm5
+	subps		%xmm1, %xmm0
+	subps		%xmm2, %xmm4
+	addps		%xmm3, %xmm0
+	addps		%xmm4, %xmm0
+	shufps		$0xaf, %xmm0, %xmm0
+	mulss		%xmm5, %xmm0
+	movaps		%xmm0, %xmm11
+
+/*
+xmm12 [1a-0,1b-0, 2a-0, 2b-0]
+xmm13 [1a-1,1b-1, 2a-1, 2b-1]
+xmm14 [1a-2,1b-2,-2a-2,-2b-2]
+xmm15 [1a-3,1b-3,-2a-3,-2b-3]
+*/
+	movaps		%xmm12, %xmm5
+	unpckhps	%xmm13, %xmm5
+	unpcklps	%xmm13, %xmm12
+	movaps		%xmm14, %xmm6
+	unpckhps	%xmm15, %xmm6
+	unpcklps	%xmm15, %xmm14
+	xorps		dct36_x86_64_sign(%rip), %xmm6
+
+/*
+xmm12 [1a-0,1a-1,1b-0,1b-1]
+xmm5  [2a-0,2a-1,2b-0,2b-1]
+xmm14 [1a-2,1a-3,1b-2,1b-3]
+xmm6 [2a-2,2a-3,2b-2,2b-3]
+*/
+
+	movaps		%xmm12, %xmm0
+	movlhps		%xmm14, %xmm12
+	movhlps		%xmm0, %xmm14
+	movaps		%xmm5, %xmm0
+	movlhps		%xmm6, %xmm0
+	movhlps		%xmm5, %xmm6
+	movaps		%xmm6, %xmm15
+
+/*
+xmm12 tmp1a
+xmm0 tmp2a
+xmm14 tmp1b
+xmm15 tmp2b
+*/
+
+	movaps		(tfcos36_), %xmm6
+	movaps		16(tfcos36_), %xmm7
+	movaps		%xmm15, %xmm10
+	addps		%xmm14, %xmm15
+	subps		%xmm14, %xmm10
+	movaps		%xmm0, %xmm14
+	addps		%xmm12, %xmm0
+	subps		%xmm12, %xmm14
+	mulps		%xmm6, %xmm15
+	mulps		%xmm10, %xmm7
+
+/*
+%xmm0  tmp[0,1,2,3]
+%xmm15 tmp[17,16,15,14]
+%xmm14 tmp[8,7,6,5]
+%xmm7  tmp[9,10,11,12]
+%xmm11 tmp[13,-,4,-]
+*/
+
+	movaps		%xmm15, %xmm1
+	movups		108(w), %xmm2
+	movups		92(w), %xmm3
+	shufps		$0x1b, %xmm3, %xmm3
+	movups		36(w), %xmm4
+	movups		20(w), %xmm5
+	shufps		$0x1b, %xmm5, %xmm5
+	movaps		%xmm0, %xmm6
+	addps		%xmm1, %xmm0
+	subps		%xmm1, %xmm6
+	mulps		%xmm0, %xmm2
+	mulps		%xmm3, %xmm0
+	mulps		%xmm6, %xmm4
+	mulps		%xmm5, %xmm6
+	movups		36(out1), %xmm1
+	movups		20(out1), %xmm3
+	shufps		$0x1b, %xmm6, %xmm6
+	addps		%xmm4, %xmm1
+	addps		%xmm6, %xmm3
+	shufps		$0x1b, %xmm0, %xmm0
+	movups		%xmm2, 36(out2)
+	movups		%xmm0, 20(out2)
+	movss		%xmm1, 32*36(ts)
+	movss		%xmm3, 32*20(ts)
+	movhlps		%xmm1, %xmm2
+	movhlps		%xmm3, %xmm4
+	movss		%xmm2, 32*44(ts)
+	movss		%xmm4, 32*28(ts)
+	shufps		$0xb1, %xmm1, %xmm1
+	shufps		$0xb1, %xmm3, %xmm3
+	movss		%xmm1, 32*40(ts)
+	movss		%xmm3, 32*24(ts)
+	movhlps		%xmm1, %xmm2
+	movhlps		%xmm3, %xmm4
+	movss		%xmm2, 32*48(ts)
+	movss		%xmm4, 32*32(ts)
+	
+	movhlps		%xmm11, %xmm0
+	movaps		%xmm11, %xmm1
+	movss		124(w), %xmm2
+	movss		88(w), %xmm3
+	movss		52(w), %xmm4
+	movss		16(w), %xmm5
+	movss		%xmm0, %xmm6
+	addss		%xmm1, %xmm0
+	subss		%xmm1, %xmm6
+	mulss		%xmm0, %xmm2
+	mulss		%xmm3, %xmm0
+	mulss		%xmm6, %xmm4
+	mulss		%xmm5, %xmm6
+	addss		52(out1), %xmm4
+	addss		16(out1), %xmm6
+	movss		%xmm2, 52(out2)
+	movss		%xmm0, 16(out2)
+	movss		%xmm4, 32*52(ts)
+	movss		%xmm6, 32*16(ts)
+	
+	movaps		%xmm14, %xmm0
+	movaps		%xmm7, %xmm1
+	MOVUAPS		128(w), %xmm2
+	movups		72(w), %xmm3
+	shufps		$0x1b, %xmm2, %xmm2
+	movlps		56(w), %xmm4
+	movhps		64(w), %xmm4
+	MOVUAPS		(w), %xmm5
+	shufps		$0x1b, %xmm4, %xmm4
+	movaps		%xmm0, %xmm6
+	addps		%xmm1, %xmm0
+	subps		%xmm1, %xmm6
+	mulps		%xmm0, %xmm2
+	mulps		%xmm3, %xmm0
+	mulps		%xmm6, %xmm4
+	mulps		%xmm5, %xmm6
+	movlps		56(out1), %xmm1
+	movhps		64(out1), %xmm1
+	movups		(out1), %xmm3
+	shufps		$0x1b, %xmm4, %xmm4
+	addps		%xmm6, %xmm3
+	addps		%xmm4, %xmm1
+	shufps		$0x1b, %xmm2, %xmm2
+	movups		%xmm0, (out2)
+	movlps		%xmm2, 56(out2)
+	movhps		%xmm2, 64(out2)
+	movss		%xmm1, 32*56(ts)
+	movss		%xmm3, (ts)
+	movhlps		%xmm1, %xmm2
+	movhlps		%xmm3, %xmm4
+	movss		%xmm2, 32*64(ts)
+	movss		%xmm4, 32*8(ts)
+	shufps		$0xb1, %xmm1, %xmm1
+	shufps		$0xb1, %xmm3, %xmm3
+	movss		%xmm1, 32*60(ts)
+	movss		%xmm3, 32*4(ts)
+	movhlps		%xmm1, %xmm2
+	movhlps		%xmm3, %xmm4
+	movss		%xmm2, 32*68(ts)
+	movss		%xmm4, 32*12(ts)
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	movaps		144(%rsp), %xmm15
+	mov			%rbp, %rsp
+	pop			%rbp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64.c
===================================================================
--- include/reactos/libs/libmpg123/dct64.c	(revision 0)
+++ include/reactos/libs/libmpg123/dct64.c	(working copy)
@@ -0,0 +1,174 @@
+/*
+	dct64.c: DCT64, the plain C version
+
+	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp
+*/
+
+/*
+ * Discrete Cosine Tansform (DCT) for subband synthesis
+ *
+ * -funroll-loops (for gcc) will remove the loops for better performance
+ * using loops in the source-code enhances readabillity
+ *
+ *
+ * TODO: write an optimized version for the down-sampling modes
+ *       (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero 
+ */
+
+#include "mpg123lib_intern.h"
+
+void dct64(real *out0,real *out1,real *samples)
+{
+  real bufs[64];
+
+ {
+  register int i,j;
+  register real *b1,*b2,*bs,*costab;
+
+  b1 = samples;
+  bs = bufs;
+  costab = pnts[0]+16;
+  b2 = b1 + 32;
+
+  for(i=15;i>=0;i--)
+    *bs++ = (*b1++ + *--b2); 
+  for(i=15;i>=0;i--)
+    *bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
+
+  b1 = bufs;
+  costab = pnts[1]+8;
+  b2 = b1 + 16;
+
+  {
+    for(i=7;i>=0;i--)
+      *bs++ = (*b1++ + *--b2); 
+    for(i=7;i>=0;i--)
+      *bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
+    b2 += 32;
+    costab += 8;
+    for(i=7;i>=0;i--)
+      *bs++ = (*b1++ + *--b2); 
+    for(i=7;i>=0;i--)
+      *bs++ = REAL_MUL((*b1++ - *--b2), *--costab);
+    b2 += 32;
+  }
+
+  bs = bufs;
+  costab = pnts[2];
+  b2 = b1 + 8;
+
+  for(j=2;j;j--)
+  {
+    for(i=3;i>=0;i--)
+      *bs++ = (*b1++ + *--b2); 
+    for(i=3;i>=0;i--)
+      *bs++ = REAL_MUL((*--b2 - *b1++), costab[i]);
+    b2 += 16;
+    for(i=3;i>=0;i--)
+      *bs++ = (*b1++ + *--b2); 
+    for(i=3;i>=0;i--)
+      *bs++ = REAL_MUL((*b1++ - *--b2), costab[i]);
+    b2 += 16;
+  }
+
+  b1 = bufs;
+  costab = pnts[3];
+  b2 = b1 + 4;
+
+  for(j=4;j;j--)
+  {
+    *bs++ = (*b1++ + *--b2); 
+    *bs++ = (*b1++ + *--b2);
+    *bs++ = REAL_MUL((*--b2 - *b1++), costab[1]);
+    *bs++ = REAL_MUL((*--b2 - *b1++), costab[0]);
+    b2 += 8;
+    *bs++ = (*b1++ + *--b2); 
+    *bs++ = (*b1++ + *--b2);
+    *bs++ = REAL_MUL((*b1++ - *--b2), costab[1]);
+    *bs++ = REAL_MUL((*b1++ - *--b2), costab[0]);
+    b2 += 8;
+  }
+  bs = bufs;
+  costab = pnts[4];
+
+  for(j=8;j;j--)
+  {
+    real v0,v1;
+    v0=*b1++; v1 = *b1++;
+    *bs++ = (v0 + v1);
+    *bs++ = REAL_MUL((v0 - v1), (*costab));
+    v0=*b1++; v1 = *b1++;
+    *bs++ = (v0 + v1);
+    *bs++ = REAL_MUL((v1 - v0), (*costab));
+  }
+
+ }
+
+
+ {
+  register real *b1;
+  register int i;
+
+  for(b1=bufs,i=8;i;i--,b1+=4)
+    b1[2] += b1[3];
+
+  for(b1=bufs,i=4;i;i--,b1+=8)
+  {
+    b1[4] += b1[6];
+    b1[6] += b1[5];
+    b1[5] += b1[7];
+  }
+
+  for(b1=bufs,i=2;i;i--,b1+=16)
+  {
+    b1[8]  += b1[12];
+    b1[12] += b1[10];
+    b1[10] += b1[14];
+    b1[14] += b1[9];
+    b1[9]  += b1[13];
+    b1[13] += b1[11];
+    b1[11] += b1[15];
+  }
+ }
+
+
+  out0[0x10*16] = REAL_SCALE_DCT64(bufs[0]);
+  out0[0x10*15] = REAL_SCALE_DCT64(bufs[16+0]  + bufs[16+8]);
+  out0[0x10*14] = REAL_SCALE_DCT64(bufs[8]);
+  out0[0x10*13] = REAL_SCALE_DCT64(bufs[16+8]  + bufs[16+4]);
+  out0[0x10*12] = REAL_SCALE_DCT64(bufs[4]);
+  out0[0x10*11] = REAL_SCALE_DCT64(bufs[16+4]  + bufs[16+12]);
+  out0[0x10*10] = REAL_SCALE_DCT64(bufs[12]);
+  out0[0x10* 9] = REAL_SCALE_DCT64(bufs[16+12] + bufs[16+2]);
+  out0[0x10* 8] = REAL_SCALE_DCT64(bufs[2]);
+  out0[0x10* 7] = REAL_SCALE_DCT64(bufs[16+2]  + bufs[16+10]);
+  out0[0x10* 6] = REAL_SCALE_DCT64(bufs[10]);
+  out0[0x10* 5] = REAL_SCALE_DCT64(bufs[16+10] + bufs[16+6]);
+  out0[0x10* 4] = REAL_SCALE_DCT64(bufs[6]);
+  out0[0x10* 3] = REAL_SCALE_DCT64(bufs[16+6]  + bufs[16+14]);
+  out0[0x10* 2] = REAL_SCALE_DCT64(bufs[14]);
+  out0[0x10* 1] = REAL_SCALE_DCT64(bufs[16+14] + bufs[16+1]);
+  out0[0x10* 0] = REAL_SCALE_DCT64(bufs[1]);
+
+  out1[0x10* 0] = REAL_SCALE_DCT64(bufs[1]);
+  out1[0x10* 1] = REAL_SCALE_DCT64(bufs[16+1]  + bufs[16+9]);
+  out1[0x10* 2] = REAL_SCALE_DCT64(bufs[9]);
+  out1[0x10* 3] = REAL_SCALE_DCT64(bufs[16+9]  + bufs[16+5]);
+  out1[0x10* 4] = REAL_SCALE_DCT64(bufs[5]);
+  out1[0x10* 5] = REAL_SCALE_DCT64(bufs[16+5]  + bufs[16+13]);
+  out1[0x10* 6] = REAL_SCALE_DCT64(bufs[13]);
+  out1[0x10* 7] = REAL_SCALE_DCT64(bufs[16+13] + bufs[16+3]);
+  out1[0x10* 8] = REAL_SCALE_DCT64(bufs[3]);
+  out1[0x10* 9] = REAL_SCALE_DCT64(bufs[16+3]  + bufs[16+11]);
+  out1[0x10*10] = REAL_SCALE_DCT64(bufs[11]);
+  out1[0x10*11] = REAL_SCALE_DCT64(bufs[16+11] + bufs[16+7]);
+  out1[0x10*12] = REAL_SCALE_DCT64(bufs[7]);
+  out1[0x10*13] = REAL_SCALE_DCT64(bufs[16+7]  + bufs[16+15]);
+  out1[0x10*14] = REAL_SCALE_DCT64(bufs[15]);
+  out1[0x10*15] = REAL_SCALE_DCT64(bufs[16+15]);
+
+}
+
+
Index: include/reactos/libs/libmpg123/dct64_3dnow.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_3dnow.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_3dnow.S	(working copy)
@@ -0,0 +1,712 @@
+/*
+	dct64_3dnow.s: Replacement of dct64() with AMD's 3DNow! SIMD operations support
+
+	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Syuuhei Kashiyama
+
+	Original "license" statement:
+	The author of this program disclaim whole expressed or implied
+	warranties with regard to this program, and in no event shall the
+	author of this program liable to whatever resulted from the use of
+	this program. Use it at your own risk.
+*/
+
+#include "mangle.h"
+
+	.globl ASM_NAME(dct64_3dnow)
+/*	.type	 ASM_NAME(dct64_3dnow),@function */
+ASM_NAME(dct64_3dnow):
+	subl $256,%esp
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %ebx
+	leal 16(%esp),%ebx
+	movl 284(%esp),%edi
+	movl 276(%esp),%ebp
+	movl 280(%esp),%edx
+	leal 128(%ebx),%esi
+
+	/* femms */
+
+	/* 1 */
+	movl ASM_NAME(pnts),%eax
+	movq 0(%edi),%mm0
+        movq %mm0,%mm1
+	movd 124(%edi),%mm2
+	punpckldq 120(%edi),%mm2
+	movq 0(%eax),%mm3
+	pfadd %mm2,%mm0
+	movq %mm0,0(%ebx)
+	pfsub %mm2,%mm1
+	pfmul %mm3,%mm1
+	movd %mm1,124(%ebx)
+	psrlq $32,%mm1
+	movd %mm1,120(%ebx)
+	movq 8(%edi),%mm4
+	movq %mm4,%mm5
+	movd 116(%edi),%mm6
+	punpckldq 112(%edi),%mm6
+	movq 8(%eax),%mm7
+	pfadd %mm6,%mm4
+	movq %mm4,8(%ebx)
+	pfsub %mm6,%mm5
+	pfmul %mm7,%mm5
+	movd %mm5,116(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,112(%ebx)
+	movq 16(%edi),%mm0
+	movq %mm0,%mm1
+	movd 108(%edi),%mm2
+	punpckldq 104(%edi),%mm2
+	movq 16(%eax),%mm3
+	pfadd %mm2,%mm0
+	movq %mm0,16(%ebx)
+	pfsub %mm2,%mm1
+	pfmul %mm3,%mm1
+	movd %mm1,108(%ebx)
+	psrlq $32,%mm1
+	movd %mm1,104(%ebx)
+	movq 24(%edi),%mm4
+	movq %mm4,%mm5
+	movd 100(%edi),%mm6
+	punpckldq 96(%edi),%mm6
+	movq 24(%eax),%mm7
+	pfadd %mm6,%mm4
+	movq %mm4,24(%ebx)
+	pfsub %mm6,%mm5
+	pfmul %mm7,%mm5
+	movd %mm5,100(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,96(%ebx)
+	movq 32(%edi),%mm0
+	movq %mm0,%mm1
+	movd 92(%edi),%mm2
+	punpckldq 88(%edi),%mm2
+	movq 32(%eax),%mm3
+	pfadd %mm2,%mm0
+	movq %mm0,32(%ebx)
+	pfsub %mm2,%mm1
+	pfmul %mm3,%mm1
+	movd %mm1,92(%ebx)
+	psrlq $32,%mm1
+	movd %mm1,88(%ebx)
+	movq 40(%edi),%mm4
+	movq %mm4,%mm5
+	movd 84(%edi),%mm6
+	punpckldq 80(%edi),%mm6
+	movq 40(%eax),%mm7
+	pfadd %mm6,%mm4
+	movq %mm4,40(%ebx)
+	pfsub %mm6,%mm5
+	pfmul %mm7,%mm5
+	movd %mm5,84(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,80(%ebx)
+	movq 48(%edi),%mm0
+	movq %mm0,%mm1
+	movd 76(%edi),%mm2
+	punpckldq 72(%edi),%mm2
+	movq 48(%eax),%mm3
+	pfadd %mm2,%mm0
+	movq %mm0,48(%ebx)
+	pfsub %mm2,%mm1
+	pfmul %mm3,%mm1
+	movd %mm1,76(%ebx)
+	psrlq $32,%mm1
+	movd %mm1,72(%ebx)
+	movq 56(%edi),%mm4
+	movq %mm4,%mm5
+	movd 68(%edi),%mm6
+	punpckldq 64(%edi),%mm6
+	movq 56(%eax),%mm7
+	pfadd %mm6,%mm4
+	movq %mm4,56(%ebx)
+	pfsub %mm6,%mm5
+	pfmul %mm7,%mm5
+	movd %mm5,68(%ebx)
+	psrlq $32,%mm5
+	movd %mm5,64(%ebx)
+	
+	/* 2 */
+	movl ASM_NAME(pnts)+4,%eax
+	/* 0,14 */
+	movq 0(%ebx),%mm0
+	movq %mm0,%mm1
+	movd 60(%ebx),%mm2
+	punpckldq 56(%ebx),%mm2
+	movq 0(%eax),%mm3
+	pfadd %mm2,%mm0
+	movq %mm0,0(%esi)
+	pfsub %mm2,%mm1
+	pfmul %mm3,%mm1
+	movd %mm1,60(%esi)
+	psrlq $32,%mm1
+	movd %mm1,56(%esi)
+	/* 16,30 */
+	movq 64(%ebx),%mm0
+	movq %mm0,%mm1
+	movd 124(%ebx),%mm2
+	punpckldq 120(%ebx),%mm2
+	pfadd %mm2,%mm0
+	movq %mm0,64(%esi)
+	pfsubr %mm2,%mm1
+	pfmul %mm3,%mm1
+	movd %mm1,124(%esi)
+	psrlq $32,%mm1
+	movd %mm1,120(%esi)
+	/* 2,12 */
+	movq 8(%ebx),%mm4
+	movq %mm4,%mm5
+	movd 52(%ebx),%mm6
+	punpckldq 48(%ebx),%mm6
+	movq 8(%eax),%mm7
+	pfadd %mm6,%mm4
+	movq %mm4,8(%esi)
+	pfsub %mm6,%mm5
+	pfmul %mm7,%mm5
+	movd %mm5,52(%esi)
+	psrlq $32,%mm5
+	movd %mm5,48(%esi)
+	/* 18,28 */
+	movq 72(%ebx),%mm4
+	movq %mm4,%mm5
+	movd 116(%ebx),%mm6
+	punpckldq 112(%ebx),%mm6
+	pfadd %mm6,%mm4
+	movq %mm4,72(%esi)
+	pfsubr %mm6,%mm5
+	pfmul %mm7,%mm5
+	movd %mm5,116(%esi)
+	psrlq $32,%mm5
+	movd %mm5,112(%esi)
+	/* 4,10 */
+	movq 16(%ebx),%mm0
+	movq %mm0,%mm1
+	movd 44(%ebx),%mm2
+	punpckldq 40(%ebx),%mm2
+	movq 16(%eax),%mm3
+	pfadd %mm2,%mm0
+	movq %mm0,16(%esi)
+	pfsub %mm2,%mm1
+	pfmul %mm3,%mm1
+	movd %mm1,44(%esi)
+	psrlq $32,%mm1
+	movd %mm1,40(%esi)
+	/* 20,26 */
+	movq 80(%ebx),%mm0
+	movq %mm0,%mm1
+	movd 108(%ebx),%mm2
+	punpckldq 104(%ebx),%mm2
+	pfadd %mm2,%mm0
+	movq %mm0,80(%esi)
+	pfsubr %mm2,%mm1
+	pfmul %mm3,%mm1
+	movd %mm1,108(%esi)
+	psrlq $32,%mm1
+	movd %mm1,104(%esi)
+	/* 6,8 */
+	movq 24(%ebx),%mm4
+	movq %mm4,%mm5
+	movd 36(%ebx),%mm6
+	punpckldq 32(%ebx),%mm6
+	movq 24(%eax),%mm7
+	pfadd %mm6,%mm4
+	movq %mm4,24(%esi)
+	pfsub %mm6,%mm5
+	pfmul %mm7,%mm5
+	movd %mm5,36(%esi)
+	psrlq $32,%mm5
+	movd %mm5,32(%esi)
+	/* 22,24 */
+	movq 88(%ebx),%mm4
+	movq %mm4,%mm5
+	movd 100(%ebx),%mm6
+	punpckldq 96(%ebx),%mm6
+	pfadd %mm6,%mm4
+	movq %mm4,88(%esi)
+	pfsubr %mm6,%mm5
+	pfmul %mm7,%mm5
+	movd %mm5,100(%esi)
+	psrlq $32,%mm5
+	movd %mm5,96(%esi)
+
+	/* 3 */
+	movl ASM_NAME(pnts)+8,%eax
+	movq 0(%eax),%mm0
+	movq 8(%eax),%mm1
+	/* 0,6 */
+	movq 0(%esi),%mm2
+	movq %mm2,%mm3
+	movd 28(%esi),%mm4
+	punpckldq 24(%esi),%mm4
+	pfadd %mm4,%mm2
+	pfsub %mm4,%mm3
+	pfmul %mm0,%mm3
+	movq %mm2,0(%ebx)
+	movd %mm3,28(%ebx)
+	psrlq $32,%mm3
+	movd %mm3,24(%ebx)
+	/* 2,4 */
+	movq 8(%esi),%mm5
+	movq %mm5,%mm6
+	movd 20(%esi),%mm7
+	punpckldq 16(%esi),%mm7
+	pfadd %mm7,%mm5
+	pfsub %mm7,%mm6
+	pfmul %mm1,%mm6
+	movq %mm5,8(%ebx)
+	movd %mm6,20(%ebx)
+	psrlq $32,%mm6
+	movd %mm6,16(%ebx)
+	/* 8,14 */
+	movq 32(%esi),%mm2
+	movq %mm2,%mm3
+	movd 60(%esi),%mm4
+	punpckldq 56(%esi),%mm4
+	pfadd %mm4,%mm2
+	pfsubr %mm4,%mm3
+	pfmul %mm0,%mm3
+	movq %mm2,32(%ebx)
+	movd %mm3,60(%ebx)
+	psrlq $32,%mm3
+	movd %mm3,56(%ebx)
+	/* 10,12 */
+	movq 40(%esi),%mm5
+	movq %mm5,%mm6
+	movd 52(%esi),%mm7
+	punpckldq 48(%esi),%mm7
+	pfadd %mm7,%mm5
+	pfsubr %mm7,%mm6
+	pfmul %mm1,%mm6
+	movq %mm5,40(%ebx)
+	movd %mm6,52(%ebx)
+	psrlq $32,%mm6
+	movd %mm6,48(%ebx)
+	/* 16,22 */
+	movq 64(%esi),%mm2
+	movq %mm2,%mm3
+	movd 92(%esi),%mm4
+	punpckldq 88(%esi),%mm4
+	pfadd %mm4,%mm2
+	pfsub %mm4,%mm3
+	pfmul %mm0,%mm3
+	movq %mm2,64(%ebx)
+	movd %mm3,92(%ebx)
+	psrlq $32,%mm3
+	movd %mm3,88(%ebx)
+	/*  18,20 */
+	movq 72(%esi),%mm5
+	movq %mm5,%mm6
+	movd 84(%esi),%mm7
+	punpckldq 80(%esi),%mm7
+	pfadd %mm7,%mm5
+	pfsub %mm7,%mm6
+	pfmul %mm1,%mm6
+	movq %mm5,72(%ebx)
+	movd %mm6,84(%ebx)
+	psrlq $32,%mm6
+	movd %mm6,80(%ebx)
+	/*  24,30 */
+	movq 96(%esi),%mm2
+	movq %mm2,%mm3
+	movd 124(%esi),%mm4
+	punpckldq 120(%esi),%mm4
+	pfadd %mm4,%mm2
+	pfsubr %mm4,%mm3
+	pfmul %mm0,%mm3
+	movq %mm2,96(%ebx)
+	movd %mm3,124(%ebx)
+	psrlq $32,%mm3
+	movd %mm3,120(%ebx)
+	/*  26,28 */
+	movq 104(%esi),%mm5
+	movq %mm5,%mm6
+	movd 116(%esi),%mm7
+	punpckldq 112(%esi),%mm7
+	pfadd %mm7,%mm5
+	pfsubr %mm7,%mm6
+	pfmul %mm1,%mm6
+	movq %mm5,104(%ebx)
+	movd %mm6,116(%ebx)
+	psrlq $32,%mm6
+	movd %mm6,112(%ebx)
+	
+	/*  4 */
+	movl ASM_NAME(pnts)+12,%eax
+	movq 0(%eax),%mm0
+	/*  0 */
+	movq 0(%ebx),%mm1
+	movq %mm1,%mm2
+	movd 12(%ebx),%mm3
+	punpckldq 8(%ebx),%mm3
+	pfadd %mm3,%mm1
+	pfsub %mm3,%mm2
+	pfmul %mm0,%mm2
+	movq %mm1,0(%esi)
+	movd %mm2,12(%esi)
+	psrlq $32,%mm2
+	movd %mm2,8(%esi)
+	/*  4 */
+	movq 16(%ebx),%mm4
+	movq %mm4,%mm5
+	movd 28(%ebx),%mm6
+	punpckldq 24(%ebx),%mm6
+	pfadd %mm6,%mm4
+	pfsubr %mm6,%mm5
+	pfmul %mm0,%mm5
+	movq %mm4,16(%esi)
+	movd %mm5,28(%esi)
+	psrlq $32,%mm5
+	movd %mm5,24(%esi)
+	/*  8 */
+	movq 32(%ebx),%mm1
+	movq %mm1,%mm2
+	movd 44(%ebx),%mm3
+	punpckldq 40(%ebx),%mm3
+	pfadd %mm3,%mm1
+	pfsub %mm3,%mm2
+	pfmul %mm0,%mm2
+	movq %mm1,32(%esi)
+	movd %mm2,44(%esi)
+	psrlq $32,%mm2
+	movd %mm2,40(%esi)
+	/*  12 */
+	movq 48(%ebx),%mm4
+	movq %mm4,%mm5
+	movd 60(%ebx),%mm6
+	punpckldq 56(%ebx),%mm6
+	pfadd %mm6,%mm4
+	pfsubr %mm6,%mm5
+	pfmul %mm0,%mm5
+	movq %mm4,48(%esi)
+	movd %mm5,60(%esi)
+	psrlq $32,%mm5
+	movd %mm5,56(%esi)
+	/*  16 */
+	movq 64(%ebx),%mm1
+	movq %mm1,%mm2
+	movd 76(%ebx),%mm3
+	punpckldq 72(%ebx),%mm3
+	pfadd %mm3,%mm1
+	pfsub %mm3,%mm2
+	pfmul %mm0,%mm2
+	movq %mm1,64(%esi)
+	movd %mm2,76(%esi)
+	psrlq $32,%mm2
+	movd %mm2,72(%esi)
+	/*  20 */
+	movq 80(%ebx),%mm4
+	movq %mm4,%mm5
+	movd 92(%ebx),%mm6
+	punpckldq 88(%ebx),%mm6
+	pfadd %mm6,%mm4
+	pfsubr %mm6,%mm5
+	pfmul %mm0,%mm5
+	movq %mm4,80(%esi)
+	movd %mm5,92(%esi)
+	psrlq $32,%mm5
+	movd %mm5,88(%esi)
+	/*  24 */
+	movq 96(%ebx),%mm1
+	movq %mm1,%mm2
+	movd 108(%ebx),%mm3
+	punpckldq 104(%ebx),%mm3
+	pfadd %mm3,%mm1
+	pfsub %mm3,%mm2
+	pfmul %mm0,%mm2
+	movq %mm1,96(%esi)
+	movd %mm2,108(%esi)
+	psrlq $32,%mm2
+	movd %mm2,104(%esi)
+	/*  28 */
+	movq 112(%ebx),%mm4
+	movq %mm4,%mm5
+	movd 124(%ebx),%mm6
+	punpckldq 120(%ebx),%mm6
+	pfadd %mm6,%mm4
+	pfsubr %mm6,%mm5
+	pfmul %mm0,%mm5
+	movq %mm4,112(%esi)
+	movd %mm5,124(%esi)
+	psrlq $32,%mm5
+	movd %mm5,120(%esi)
+
+	/*  5 */
+	movl $-1,%eax
+	movd %eax,%mm1
+	movl $1,%eax
+	/*  L | H */
+	movd %eax,%mm0	
+	punpckldq %mm1,%mm0
+	/*  1.0 | -1.0 */
+	pi2fd %mm0,%mm0	
+	movd %eax,%mm1
+	pi2fd %mm1,%mm1
+	movl ASM_NAME(pnts)+16,%eax
+	movd 0(%eax),%mm2
+	/*  1.0 | cos0 */
+	punpckldq %mm2,%mm1
+	/*  0 */
+	movq 0(%esi),%mm2
+	movq %mm2,%mm3
+	pfmul %mm0,%mm3
+	pfacc %mm3,%mm2
+	pfmul %mm1,%mm2
+	movq %mm2,0(%ebx)
+	movq 8(%esi),%mm4
+	movq %mm4,%mm5
+	pfmul %mm0,%mm5
+	pfacc %mm5,%mm4
+	pfmul %mm0,%mm4
+	pfmul %mm1,%mm4
+	movq %mm4,%mm5
+	psrlq $32,%mm5
+	pfacc %mm5,%mm4
+	movq %mm4,8(%ebx)
+	/*  4 */
+	movq 16(%esi),%mm2
+	movq %mm2,%mm3
+	pfmul %mm0,%mm3
+	pfacc %mm3,%mm2 
+	pfmul %mm1,%mm2
+	movq 24(%esi),%mm4
+	movq %mm4,%mm5
+	pfmul %mm0,%mm5
+	pfacc %mm5,%mm4
+	pfmul %mm0,%mm4
+	pfmul %mm1,%mm4
+	movq %mm4,%mm5
+	psrlq $32,%mm5
+	pfacc %mm5,%mm4
+	movq %mm2,%mm3
+	psrlq $32,%mm3
+	pfadd %mm4,%mm2
+	pfadd %mm3,%mm4
+	movq %mm2,16(%ebx)
+	movq %mm4,24(%ebx)
+	/*  8 */
+	movq 32(%esi),%mm2
+	movq %mm2,%mm3
+	pfmul %mm0,%mm3
+	pfacc %mm3,%mm2
+	pfmul %mm1,%mm2
+	movq %mm2,32(%ebx)
+	movq 40(%esi),%mm4
+	movq %mm4,%mm5
+	pfmul %mm0,%mm5
+	pfacc %mm5,%mm4
+	pfmul %mm0,%mm4
+	pfmul %mm1,%mm4
+	movq %mm4,%mm5
+	psrlq $32,%mm5
+	pfacc %mm5,%mm4
+	movq %mm4,40(%ebx)
+	/*  12 */
+	movq 48(%esi),%mm2
+	movq %mm2,%mm3
+	pfmul %mm0,%mm3
+	pfacc %mm3,%mm2 
+	pfmul %mm1,%mm2
+	movq 56(%esi),%mm4
+	movq %mm4,%mm5
+	pfmul %mm0,%mm5
+	pfacc %mm5,%mm4
+	pfmul %mm0,%mm4
+	pfmul %mm1,%mm4
+	movq %mm4,%mm5
+	psrlq $32,%mm5
+	pfacc %mm5,%mm4
+	movq %mm2,%mm3
+	psrlq $32,%mm3
+	pfadd %mm4,%mm2
+	pfadd %mm3,%mm4
+	movq %mm2,48(%ebx)
+	movq %mm4,56(%ebx)
+	/*  16 */
+	movq 64(%esi),%mm2
+	movq %mm2,%mm3
+	pfmul %mm0,%mm3
+	pfacc %mm3,%mm2
+	pfmul %mm1,%mm2
+	movq %mm2,64(%ebx)
+	movq 72(%esi),%mm4
+	movq %mm4,%mm5
+	pfmul %mm0,%mm5
+	pfacc %mm5,%mm4
+	pfmul %mm0,%mm4
+	pfmul %mm1,%mm4
+	movq %mm4,%mm5
+	psrlq $32,%mm5
+	pfacc %mm5,%mm4
+	movq %mm4,72(%ebx)
+	/*  20 */
+	movq 80(%esi),%mm2
+	movq %mm2,%mm3
+	pfmul %mm0,%mm3
+	pfacc %mm3,%mm2 
+	pfmul %mm1,%mm2
+	movq 88(%esi),%mm4
+	movq %mm4,%mm5
+	pfmul %mm0,%mm5
+	pfacc %mm5,%mm4
+	pfmul %mm0,%mm4
+	pfmul %mm1,%mm4
+	movq %mm4,%mm5
+	psrlq $32,%mm5
+	pfacc %mm5,%mm4
+	movq %mm2,%mm3
+	psrlq $32,%mm3
+	pfadd %mm4,%mm2
+	pfadd %mm3,%mm4
+	movq %mm2,80(%ebx)
+	movq %mm4,88(%ebx)
+	/*  24 */
+	movq 96(%esi),%mm2
+	movq %mm2,%mm3
+	pfmul %mm0,%mm3
+	pfacc %mm3,%mm2
+	pfmul %mm1,%mm2
+	movq %mm2,96(%ebx)
+	movq 104(%esi),%mm4
+	movq %mm4,%mm5
+	pfmul %mm0,%mm5
+	pfacc %mm5,%mm4
+	pfmul %mm0,%mm4
+	pfmul %mm1,%mm4
+	movq %mm4,%mm5
+	psrlq $32,%mm5
+	pfacc %mm5,%mm4
+	movq %mm4,104(%ebx)
+	/*  28 */
+	movq 112(%esi),%mm2
+	movq %mm2,%mm3
+	pfmul %mm0,%mm3
+	pfacc %mm3,%mm2 
+	pfmul %mm1,%mm2
+	movq 120(%esi),%mm4
+	movq %mm4,%mm5
+	pfmul %mm0,%mm5
+	pfacc %mm5,%mm4
+	pfmul %mm0,%mm4
+	pfmul %mm1,%mm4
+	movq %mm4,%mm5
+	psrlq $32,%mm5
+	pfacc %mm5,%mm4
+	movq %mm2,%mm3
+	psrlq $32,%mm3
+	pfadd %mm4,%mm2
+	pfadd %mm3,%mm4
+	movq %mm2,112(%ebx)
+	movq %mm4,120(%ebx)
+
+	/*  Phase6 */
+	movl 0(%ebx),%eax
+	movl %eax,1024(%ebp)
+	movl 4(%ebx),%eax
+	movl %eax,0(%ebp)
+	movl %eax,0(%edx)
+	movl 8(%ebx),%eax
+	movl %eax,512(%ebp)
+	movl 12(%ebx),%eax
+	movl %eax,512(%edx)
+
+	movl 16(%ebx),%eax
+	movl %eax,768(%ebp)
+	movl 20(%ebx),%eax
+	movl %eax,256(%edx)
+
+	movl 24(%ebx),%eax
+	movl %eax,256(%ebp)
+	movl 28(%ebx),%eax
+	movl %eax,768(%edx)
+
+	movq 32(%ebx),%mm0
+	movq 48(%ebx),%mm1
+	pfadd %mm1,%mm0
+	movd %mm0,896(%ebp)
+	psrlq $32,%mm0
+	movd %mm0,128(%edx)
+	movq 40(%ebx),%mm2
+	pfadd %mm2,%mm1
+	movd %mm1,640(%ebp)
+	psrlq $32,%mm1
+	movd %mm1,384(%edx)
+
+	movq 56(%ebx),%mm3
+	pfadd %mm3,%mm2
+	movd %mm2,384(%ebp)
+        psrlq $32,%mm2
+	movd %mm2,640(%edx)
+
+	movd 36(%ebx),%mm4
+	pfadd %mm4,%mm3
+	movd %mm3,128(%ebp)
+	psrlq $32,%mm3
+	movd %mm3,896(%edx)
+	movq 96(%ebx),%mm0
+	movq 64(%ebx),%mm1
+
+	movq 112(%ebx),%mm2
+        pfadd %mm2,%mm0
+	movq %mm0,%mm3
+	pfadd %mm1,%mm3
+	movd %mm3,960(%ebp)
+	psrlq $32,%mm3
+	movd %mm3,64(%edx)
+	movq 80(%ebx),%mm1
+	pfadd %mm1,%mm0
+	movd %mm0,832(%ebp)
+        psrlq $32,%mm0
+	movd %mm0,192(%edx)
+	movq 104(%ebx),%mm3
+	pfadd %mm3,%mm2
+	movq %mm2,%mm4
+	pfadd %mm1,%mm4
+	movd %mm4,704(%ebp)
+	psrlq $32,%mm4
+	movd %mm4,320(%edx)
+	movq 72(%ebx),%mm1
+	pfadd %mm1,%mm2
+	movd %mm2,576(%ebp)
+	psrlq $32,%mm2
+	movd %mm2,448(%edx)
+
+	movq 120(%ebx),%mm4
+	pfadd %mm4,%mm3
+	movq %mm3,%mm5
+	pfadd %mm1,%mm5
+	movd %mm5,448(%ebp)
+	psrlq $32,%mm5
+	movd %mm5,576(%edx)
+	movq 88(%ebx),%mm1
+	pfadd %mm1,%mm3
+	movd %mm3,320(%ebp)
+	psrlq $32,%mm3
+	movd %mm3,704(%edx)
+
+	movd 100(%ebx),%mm5
+	pfadd %mm5,%mm4
+	movq %mm4,%mm6
+	pfadd %mm1,%mm6
+	movd %mm6,192(%ebp)
+	psrlq $32,%mm6
+	movd %mm6,832(%edx)	
+	movd 68(%ebx),%mm1
+	pfadd %mm1,%mm4
+	movd %mm4,64(%ebp)
+	psrlq $32,%mm4
+	movd %mm4,960(%edx)
+
+	/*  femms */
+
+        popl %ebx
+	popl %esi
+	popl %edi
+        popl %ebp
+	addl $256,%esp
+	
+        ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64_3dnowext.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_3dnowext.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_3dnowext.S	(working copy)
@@ -0,0 +1,714 @@
+/*
+	dct64_3dnowext: extended 3DNow optimized DCT64
+
+	copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+
+	Transformed back into standalone asm, with help of
+	gcc -S -DHAVE_CONFIG_H -I.  -march=k6-3 -O3 -Wall -pedantic -fno-strict-aliasing  -DREAL_IS_FLOAT -c -o dct64_3dnowext.{S,c}
+
+	MPlayer comment follows.
+*/
+
+/*
+* This code was taken from http://www.mpg123.org
+* See ChangeLog of mpg123-0.59s-pre.1 for detail
+* Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
+* Partial 3dnowex-DSP! optimization by Nick Kurshev
+*
+* TODO: optimize scalar 3dnow! code
+* Warning: Phases 7 & 8 are not tested
+*/
+
+#include "mangle.h"
+
+	.data
+	ALIGN4
+	/* .type	plus_1f, @object
+	   .size	plus_1f, 4  */
+plus_1f:
+	.long	1065353216
+	ALIGN8
+	/* .type	x_plus_minus_3dnow, @object
+	   .size	x_plus_minus_3dnow, 8 */
+x_plus_minus_3dnow:
+	.long	0
+	.long	-2147483648
+
+	.text
+	ALIGN32
+.globl ASM_NAME(dct64_3dnowext)
+	/* .type	ASM_NAME(dct64_3dnowext), @function */
+ASM_NAME(dct64_3dnowext):
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	subl	$256, %esp
+/* APP */
+	movl 16(%ebp),%eax
+	leal 128+-268(%ebp),%edx
+	movl 8(%ebp),%esi
+	movl 12(%ebp),%edi
+	movl ASM_VALUE(costab_mmxsse),%ebx
+	leal -268(%ebp),%ecx
+	movq	(%eax), %mm0
+	movq	8(%eax), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	120(%eax), %mm1
+	pswapd	112(%eax), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, (%edx)
+	movq	%mm4, 8(%edx)
+	pfsub	%mm1, %mm3
+	pfsub	%mm5, %mm7
+	pfmul	(%ebx), %mm3
+	pfmul	8(%ebx), %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 120(%edx)
+	movq	%mm7, 112(%edx)
+	movq	16(%eax), %mm0
+	movq	24(%eax), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	104(%eax), %mm1
+	pswapd	96(%eax), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, 16(%edx)
+	movq	%mm4, 24(%edx)
+	pfsub	%mm1, %mm3
+	pfsub	%mm5, %mm7
+	pfmul	16(%ebx), %mm3
+	pfmul	24(%ebx), %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 104(%edx)
+	movq	%mm7, 96(%edx)
+	movq	32(%eax), %mm0
+	movq	40(%eax), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	88(%eax), %mm1
+	pswapd	80(%eax), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, 32(%edx)
+	movq	%mm4, 40(%edx)
+	pfsub	%mm1, %mm3
+	pfsub	%mm5, %mm7
+	pfmul	32(%ebx), %mm3
+	pfmul	40(%ebx), %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 88(%edx)
+	movq	%mm7, 80(%edx)
+	movq	48(%eax), %mm0
+	movq	56(%eax), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	72(%eax), %mm1
+	pswapd	64(%eax), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, 48(%edx)
+	movq	%mm4, 56(%edx)
+	pfsub	%mm1, %mm3
+	pfsub	%mm5, %mm7
+	pfmul	48(%ebx), %mm3
+	pfmul	56(%ebx), %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 72(%edx)
+	movq	%mm7, 64(%edx)
+	movq	(%edx), %mm0
+	movq	8(%edx), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	56(%edx), %mm1
+	pswapd	48(%edx), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, (%ecx)
+	movq	%mm4, 8(%ecx)
+	pfsub	%mm1, %mm3
+	pfsub	%mm5, %mm7
+	pfmul	64(%ebx), %mm3
+	pfmul	72(%ebx), %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 56(%ecx)
+	movq	%mm7, 48(%ecx)
+	movq	16(%edx), %mm0
+	movq	24(%edx), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	40(%edx), %mm1
+	pswapd	32(%edx), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, 16(%ecx)
+	movq	%mm4, 24(%ecx)
+	pfsub	%mm1, %mm3
+	pfsub	%mm5, %mm7
+	pfmul	80(%ebx), %mm3
+	pfmul	88(%ebx), %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 40(%ecx)
+	movq	%mm7, 32(%ecx)
+	movq	64(%edx), %mm0
+	movq	72(%edx), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	120(%edx), %mm1
+	pswapd	112(%edx), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, 64(%ecx)
+	movq	%mm4, 72(%ecx)
+	pfsubr	%mm1, %mm3
+	pfsubr	%mm5, %mm7
+	pfmul	64(%ebx), %mm3
+	pfmul	72(%ebx), %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 120(%ecx)
+	movq	%mm7, 112(%ecx)
+	movq	80(%edx), %mm0
+	movq	88(%edx), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	104(%edx), %mm1
+	pswapd	96(%edx), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, 80(%ecx)
+	movq	%mm4, 88(%ecx)
+	pfsubr	%mm1, %mm3
+	pfsubr	%mm5, %mm7
+	pfmul	80(%ebx), %mm3
+	pfmul	88(%ebx), %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 104(%ecx)
+	movq	%mm7, 96(%ecx)
+	movq	96(%ebx), %mm2
+	movq	104(%ebx), %mm6
+	movq	(%ecx), %mm0
+	movq	8(%ecx), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	24(%ecx), %mm1
+	pswapd	16(%ecx), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, (%edx)
+	movq	%mm4, 8(%edx)
+	pfsub	%mm1, %mm3
+	pfsub	%mm5, %mm7
+	pfmul	%mm2, %mm3
+	pfmul	%mm6, %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 24(%edx)
+	movq	%mm7, 16(%edx)
+	movq	32(%ecx), %mm0
+	movq	40(%ecx), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	56(%ecx), %mm1
+	pswapd	48(%ecx), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, 32(%edx)
+	movq	%mm4, 40(%edx)
+	pfsubr	%mm1, %mm3
+	pfsubr	%mm5, %mm7
+	pfmul	%mm2, %mm3
+	pfmul	%mm6, %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 56(%edx)
+	movq	%mm7, 48(%edx)
+	movq	64(%ecx), %mm0
+	movq	72(%ecx), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	88(%ecx), %mm1
+	pswapd	80(%ecx), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, 64(%edx)
+	movq	%mm4, 72(%edx)
+	pfsub	%mm1, %mm3
+	pfsub	%mm5, %mm7
+	pfmul	%mm2, %mm3
+	pfmul	%mm6, %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 88(%edx)
+	movq	%mm7, 80(%edx)
+	movq	96(%ecx), %mm0
+	movq	104(%ecx), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	120(%ecx), %mm1
+	pswapd	112(%ecx), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, 96(%edx)
+	movq	%mm4, 104(%edx)
+	pfsubr	%mm1, %mm3
+	pfsubr	%mm5, %mm7
+	pfmul	%mm2, %mm3
+	pfmul	%mm6, %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 120(%edx)
+	movq	%mm7, 112(%edx)
+	movq	112(%ebx), %mm2
+	movq	(%edx), %mm0
+	movq	16(%edx), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	8(%edx), %mm1
+	pswapd	24(%edx), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, (%ecx)
+	movq	%mm4, 16(%ecx)
+	pfsub	%mm1, %mm3
+	pfsubr	%mm5, %mm7
+	pfmul	%mm2, %mm3
+	pfmul	%mm2, %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 8(%ecx)
+	movq	%mm7, 24(%ecx)
+	movq	32(%edx), %mm0
+	movq	48(%edx), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	40(%edx), %mm1
+	pswapd	56(%edx), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, 32(%ecx)
+	movq	%mm4, 48(%ecx)
+	pfsub	%mm1, %mm3
+	pfsubr	%mm5, %mm7
+	pfmul	%mm2, %mm3
+	pfmul	%mm2, %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 40(%ecx)
+	movq	%mm7, 56(%ecx)
+	movq	64(%edx), %mm0
+	movq	80(%edx), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	72(%edx), %mm1
+	pswapd	88(%edx), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, 64(%ecx)
+	movq	%mm4, 80(%ecx)
+	pfsub	%mm1, %mm3
+	pfsubr	%mm5, %mm7
+	pfmul	%mm2, %mm3
+	pfmul	%mm2, %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 72(%ecx)
+	movq	%mm7, 88(%ecx)
+	movq	96(%edx), %mm0
+	movq	112(%edx), %mm4
+	movq	%mm0, %mm3
+	movq	%mm4, %mm7
+	pswapd	104(%edx), %mm1
+	pswapd	120(%edx), %mm5
+	pfadd	%mm1, %mm0
+	pfadd	%mm5, %mm4
+	movq	%mm0, 96(%ecx)
+	movq	%mm4, 112(%ecx)
+	pfsub	%mm1, %mm3
+	pfsubr	%mm5, %mm7
+	pfmul	%mm2, %mm3
+	pfmul	%mm2, %mm7
+	pswapd	%mm3, %mm3
+	pswapd	%mm7, %mm7
+	movq	%mm3, 104(%ecx)
+	movq	%mm7, 120(%ecx)
+	movd	plus_1f, %mm6
+	punpckldq 120(%ebx), %mm6
+	movq	x_plus_minus_3dnow, %mm7
+	movq	32(%ecx), %mm0
+	movq	64(%ecx), %mm2
+	movq	%mm0, %mm1
+	movq	%mm2, %mm3
+	pxor	%mm7, %mm1
+	pxor	%mm7, %mm3
+	pfacc	%mm1, %mm0
+	pfacc	%mm3, %mm2
+	pfmul	%mm6, %mm0
+	pfmul	%mm6, %mm2
+	movq	%mm0, 32(%edx)
+	movq	%mm2, 64(%edx)
+	movd	44(%ecx), %mm0
+	movd	40(%ecx), %mm2
+	movd	120(%ebx), %mm3
+	punpckldq 76(%ecx), %mm0
+	punpckldq 72(%ecx), %mm2
+	punpckldq %mm3, %mm3
+	movq	%mm0, %mm4
+	movq	%mm2, %mm5
+	pfsub	%mm2, %mm0
+	pfmul	%mm3, %mm0
+	movq	%mm0, %mm1
+	pfadd	%mm5, %mm0
+	pfadd	%mm4, %mm0
+	movq	%mm0, %mm2
+	punpckldq %mm1, %mm0
+	punpckhdq %mm1, %mm2
+	movq	%mm0, 40(%edx)
+	movq	%mm2, 72(%edx)
+	movd   48(%ecx), %mm3
+	movd   60(%ecx), %mm2
+	pfsub  52(%ecx), %mm3
+	pfsub  56(%ecx), %mm2
+	pfmul 120(%ebx), %mm3
+	pfmul 120(%ebx), %mm2
+	movq	%mm2, %mm1
+	pfadd  56(%ecx), %mm1
+	pfadd  60(%ecx), %mm1
+	movq	%mm1, %mm0
+	pfadd  48(%ecx), %mm0
+	pfadd  52(%ecx), %mm0
+	pfadd	%mm3, %mm1
+	punpckldq %mm2, %mm1
+	pfadd	%mm3, %mm2
+	punpckldq %mm2, %mm0
+	movq	%mm1, 56(%edx)
+	movq	%mm0, 48(%edx)
+	movd   92(%ecx), %mm1
+	pfsub  88(%ecx), %mm1
+	pfmul 120(%ebx), %mm1
+	movd   %mm1, 92(%edx)
+	pfadd  92(%ecx), %mm1
+	pfadd  88(%ecx), %mm1
+	movq   %mm1, %mm0
+	pfadd  80(%ecx), %mm0
+	pfadd  84(%ecx), %mm0
+	movd   %mm0, 80(%edx)
+	movd   80(%ecx), %mm0
+	pfsub  84(%ecx), %mm0
+	pfmul 120(%ebx), %mm0
+	pfadd  %mm0, %mm1
+	pfadd  92(%edx), %mm0
+	punpckldq %mm1, %mm0
+	movq   %mm0, 84(%edx)
+	movq	96(%ecx), %mm0
+	movq	%mm0, %mm1
+	pxor	%mm7, %mm1
+	pfacc	%mm1, %mm0
+	pfmul	%mm6, %mm0
+	movq	%mm0, 96(%edx)
+	movd  108(%ecx), %mm0
+	pfsub 104(%ecx), %mm0
+	pfmul 120(%ebx), %mm0
+	movd  %mm0, 108(%edx)
+	pfadd 104(%ecx), %mm0
+	pfadd 108(%ecx), %mm0
+	movd  %mm0, 104(%edx)
+	movd  124(%ecx), %mm1
+	pfsub 120(%ecx), %mm1
+	pfmul 120(%ebx), %mm1
+	movd  %mm1, 124(%edx)
+	pfadd 120(%ecx), %mm1
+	pfadd 124(%ecx), %mm1
+	movq  %mm1, %mm0
+	pfadd 112(%ecx), %mm0
+	pfadd 116(%ecx), %mm0
+	movd  %mm0, 112(%edx)
+	movd  112(%ecx), %mm0
+	pfsub 116(%ecx), %mm0
+	pfmul 120(%ebx), %mm0
+	pfadd %mm0,%mm1
+	pfadd 124(%edx), %mm0
+	punpckldq %mm1, %mm0
+	movq  %mm0, 116(%edx)
+	jnz .L01
+	movd      (%ecx), %mm0
+	pfadd    4(%ecx), %mm0
+	movd     %mm0, 1024(%esi)
+	movd      (%ecx), %mm0
+	pfsub    4(%ecx), %mm0
+	pfmul  120(%ebx), %mm0
+	movd      %mm0, (%esi)
+	movd      %mm0, (%edi)
+	movd   12(%ecx), %mm0
+	pfsub   8(%ecx), %mm0
+	pfmul 120(%ebx), %mm0
+	movd    %mm0, 512(%edi)
+	pfadd   12(%ecx), %mm0
+	pfadd   8(%ecx), %mm0
+	movd    %mm0, 512(%esi)
+	movd   16(%ecx), %mm0
+	pfsub  20(%ecx), %mm0
+	pfmul 120(%ebx), %mm0
+	movq	%mm0, %mm3
+	movd   28(%ecx), %mm0
+	pfsub  24(%ecx), %mm0
+	pfmul 120(%ebx), %mm0
+	movd    %mm0, 768(%edi)
+	movq	%mm0, %mm2
+	pfadd  24(%ecx), %mm0
+	pfadd  28(%ecx), %mm0
+	movq	%mm0, %mm1
+	pfadd  16(%ecx), %mm0
+	pfadd  20(%ecx), %mm0
+	movd   %mm0, 768(%esi)
+	pfadd  %mm3, %mm1
+	movd   %mm1, 256(%esi)
+	pfadd  %mm3, %mm2
+	movd   %mm2, 256(%edi)
+	movq   32(%edx), %mm0
+	movq   48(%edx), %mm1
+	pfadd  48(%edx), %mm0
+	pfadd  40(%edx), %mm1
+	movd   %mm0, 896(%esi)
+	movd   %mm1, 640(%esi)
+	psrlq  $32, %mm0
+	psrlq  $32, %mm1
+	movd   %mm0, 128(%edi)
+	movd   %mm1, 384(%edi)
+	movd   40(%edx), %mm0
+	pfadd  56(%edx), %mm0
+	movd   %mm0, 384(%esi)
+	movd   56(%edx), %mm0
+	pfadd  36(%edx), %mm0
+	movd   %mm0, 128(%esi)
+	movd   60(%edx), %mm0
+	movd   %mm0, 896(%edi)
+	pfadd  44(%edx), %mm0
+	movd   %mm0, 640(%edi)
+	movq   96(%edx), %mm0
+	movq   112(%edx), %mm2
+	movq   104(%edx), %mm4
+	pfadd  112(%edx), %mm0
+	pfadd  104(%edx), %mm2
+	pfadd  120(%edx), %mm4
+	movq   %mm0, %mm1
+	movq   %mm2, %mm3
+	movq   %mm4, %mm5
+	pfadd  64(%edx), %mm0
+	pfadd  80(%edx), %mm2
+	pfadd  72(%edx), %mm4
+	movd   %mm0, 960(%esi)
+	movd   %mm2, 704(%esi)
+	movd   %mm4, 448(%esi)
+	psrlq  $32, %mm0
+	psrlq  $32, %mm2
+	psrlq  $32, %mm4
+	movd   %mm0, 64(%edi)
+	movd   %mm2, 320(%edi)
+	movd   %mm4, 576(%edi)
+	pfadd  80(%edx), %mm1
+	pfadd  72(%edx), %mm3
+	pfadd  88(%edx), %mm5
+	movd   %mm1, 832(%esi)
+	movd   %mm3, 576(%esi)
+	movd   %mm5, 320(%esi)
+	psrlq  $32, %mm1
+	psrlq  $32, %mm3
+	psrlq  $32, %mm5
+	movd   %mm1, 192(%edi)
+	movd   %mm3, 448(%edi)
+	movd   %mm5, 704(%edi)
+	movd   120(%edx), %mm0
+	pfadd  100(%edx), %mm0
+	movq   %mm0, %mm1
+	pfadd  88(%edx), %mm0
+	movd   %mm0, 192(%esi)
+	pfadd  68(%edx), %mm1
+	movd   %mm1, 64(%esi)
+	movd  124(%edx), %mm0
+	movd  %mm0, 960(%edi)
+	pfadd  92(%edx), %mm0
+	movd  %mm0, 832(%edi)
+	jmp	.L_bye
+.L01:	
+	movq	(%ecx), %mm0
+	movq	%mm0, %mm1
+	pxor    %mm7, %mm1
+	pfacc	%mm1, %mm0
+	pfmul	%mm6, %mm0
+	pf2iw	%mm0, %mm0
+	movd	%mm0, %eax
+	movw    %ax, 512(%esi)
+	psrlq	$32, %mm0
+	movd	%mm0, %eax
+	movw    %ax, (%esi)
+	movd    12(%ecx), %mm0
+	pfsub    8(%ecx), %mm0
+	pfmul  120(%ebx), %mm0
+	pf2iw    %mm0, %mm7
+	movd	 %mm7, %eax
+	movw     %ax, 256(%edi)
+	pfadd   12(%ecx), %mm0
+	pfadd    8(%ecx), %mm0
+	pf2iw    %mm0, %mm0
+	movd	 %mm0, %eax
+	movw     %ax, 256(%esi)
+	movd   16(%ecx), %mm3
+	pfsub  20(%ecx), %mm3
+	pfmul  120(%ebx), %mm3
+	movq   %mm3, %mm2
+	movd   28(%ecx), %mm2
+	pfsub  24(%ecx), %mm2
+	pfmul 120(%ebx), %mm2
+	movq   %mm2, %mm1
+	pf2iw  %mm2, %mm7
+	movd   %mm7, %eax
+	movw   %ax, 384(%edi)
+	pfadd  24(%ecx), %mm1
+	pfadd  28(%ecx), %mm1
+	movq   %mm1, %mm0
+	pfadd  16(%ecx), %mm0
+	pfadd  20(%ecx), %mm0
+	pf2iw  %mm0, %mm0
+	movd   %mm0, %eax
+	movw   %ax, 384(%esi)
+	pfadd  %mm3, %mm1
+	pf2iw  %mm1, %mm1
+	movd   %mm1, %eax
+	movw   %ax, 128(%esi)
+	pfadd  %mm3, %mm2
+	pf2iw  %mm2, %mm2
+	movd   %mm2, %eax
+	movw   %ax, 128(%edi)
+	movq    32(%edx), %mm0
+	movq    48(%edx), %mm1
+	pfadd   48(%edx), %mm0
+	pfadd   40(%edx), %mm1
+	pf2iw   %mm0, %mm0
+	pf2iw   %mm1, %mm1
+	movd	%mm0, %eax
+	movd	%mm1, %ecx
+	movw    %ax, 448(%esi)
+	movw    %cx, 320(%esi)
+	psrlq   $32, %mm0
+	psrlq   $32, %mm1
+	movd	%mm0, %eax
+	movd	%mm1, %ecx
+	movw    %ax, 64(%edi)
+	movw    %cx, 192(%edi)
+	movd   40(%edx), %mm3
+	movd   56(%edx), %mm4
+	movd   60(%edx), %mm0
+	movd   44(%edx), %mm2
+	movd  120(%edx), %mm5
+	punpckldq %mm4, %mm3
+	punpckldq 124(%edx), %mm0
+	pfadd 100(%edx), %mm5
+	punpckldq 36(%edx), %mm4
+	punpckldq 92(%edx), %mm2
+	movq  %mm5, %mm6
+	pfadd  %mm4, %mm3
+	pf2iw  %mm0, %mm1
+	pf2iw  %mm3, %mm3
+	pfadd  88(%edx), %mm5
+	movd   %mm1, %eax
+	movd   %mm3, %ecx
+	movw   %ax, 448(%edi)
+	movw   %cx, 192(%esi)
+	pf2iw  %mm5, %mm5
+	psrlq  $32, %mm1
+	psrlq  $32, %mm3
+	movd   %mm5, %ebx
+	movd   %mm1, %eax
+	movd   %mm3, %ecx
+	movw   %bx, 96(%esi)
+	movw   %ax, 480(%edi)
+	movw   %cx, 64(%esi)
+	pfadd  %mm2, %mm0
+	pf2iw  %mm0, %mm0
+	movd   %mm0, %eax
+	pfadd  68(%edx), %mm6
+	movw   %ax, 320(%edi)
+	psrlq  $32, %mm0
+	pf2iw  %mm6, %mm6
+	movd   %mm0, %eax
+	movd   %mm6, %ebx
+	movw   %ax, 416(%edi)
+	movw   %bx, 32(%esi)
+	movq   96(%edx), %mm0
+	movq  112(%edx), %mm2
+	movq  104(%edx), %mm4
+	pfadd %mm2, %mm0
+	pfadd %mm4, %mm2
+	pfadd 120(%edx), %mm4
+	movq  %mm0, %mm1
+	movq  %mm2, %mm3
+	movq  %mm4, %mm5
+	pfadd  64(%edx), %mm0
+	pfadd  80(%edx), %mm2
+	pfadd  72(%edx), %mm4
+	pf2iw  %mm0, %mm0
+	pf2iw  %mm2, %mm2
+	pf2iw  %mm4, %mm4
+	movd   %mm0, %eax
+	movd   %mm2, %ecx
+	movd   %mm4, %ebx
+	movw   %ax, 480(%esi)
+	movw   %cx, 352(%esi)
+	movw   %bx, 224(%esi)
+	psrlq  $32, %mm0
+	psrlq  $32, %mm2
+	psrlq  $32, %mm4
+	movd   %mm0, %eax
+	movd   %mm2, %ecx
+	movd   %mm4, %ebx
+	movw   %ax, 32(%edi)
+	movw   %cx, 160(%edi)
+	movw   %bx, 288(%edi)
+	pfadd  80(%edx), %mm1
+	pfadd  72(%edx), %mm3
+	pfadd  88(%edx), %mm5
+	pf2iw  %mm1, %mm1
+	pf2iw  %mm3, %mm3
+	pf2iw  %mm5, %mm5
+	movd   %mm1, %eax
+	movd   %mm3, %ecx
+	movd   %mm5, %ebx
+	movw   %ax, 416(%esi)
+	movw   %cx, 288(%esi)
+	movw   %bx, 160(%esi)
+	psrlq  $32, %mm1
+	psrlq  $32, %mm3
+	psrlq  $32, %mm5
+	movd   %mm1, %eax
+	movd   %mm3, %ecx
+	movd   %mm5, %ebx
+	movw   %ax, 96(%edi)
+	movw   %cx, 224(%edi)
+	movw   %bx, 352(%edi)
+	movsw
+.L_bye:
+	femms
+	
+/* NO_APP */
+	addl	$256, %esp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	leave
+	ret
+	/* .size	ASM_NAME(dct64_3dnowext), .-ASM_NAME(dct64_3dnowext) */
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64_altivec.c
===================================================================
--- include/reactos/libs/libmpg123/dct64_altivec.c	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_altivec.c	(working copy)
@@ -0,0 +1,315 @@
+/*
+	dct64_altivec.c: Discrete Cosine Tansform (DCT) for Altivec
+
+	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp
+	altivec optimization by tmkk
+*/
+
+/*
+ * Discrete Cosine Tansform (DCT) for subband synthesis
+ *
+ * -funroll-loops (for gcc) will remove the loops for better performance
+ * using loops in the source-code enhances readabillity
+ *
+ *
+ * TODO: write an optimized version for the down-sampling modes
+ *       (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero 
+ */
+
+#include "mpg123lib_intern.h"
+
+#ifndef __APPLE__
+#include <altivec.h>
+#endif
+
+void dct64_altivec(real *out0,real *out1,real *samples)
+{
+  ALIGNED(16) real bufs[32];
+
+	{
+		register real *b1,*costab;
+		
+		vector unsigned char vinvert,vperm1,vperm2,vperm3,vperm4;
+		vector float v1,v2,v3,v4,v5,v6,v7,v8;
+		vector float vbs1,vbs2,vbs3,vbs4,vbs5,vbs6,vbs7,vbs8;
+		vector float vbs9,vbs10,vbs11,vbs12,vbs13,vbs14,vbs15,vbs16;
+		vector float vzero;
+		b1 = samples;
+		costab = pnts[0];
+		
+		vzero = vec_xor(vzero,vzero);
+#ifdef __APPLE__
+		vinvert = (vector unsigned char)(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3);
+#else
+		vinvert = (vector unsigned char){12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
+#endif
+		vperm1 = vec_lvsl(0,b1);
+		vperm2 = vec_perm(vperm1,vperm1,vinvert);
+		
+		v1 = vec_ld(0,b1);
+		v2 = vec_ld(16,b1);
+		v3 = vec_ld(112,b1);
+		v4 = vec_ld(127,b1);
+		v5 = vec_perm(v1,v2,vperm1); /* b1[0,1,2,3] */
+		v6 = vec_perm(v3,v4,vperm2); /* b1[31,30,29,28] */
+		
+		vbs1 = vec_add(v5,v6);
+		vbs8 = vec_sub(v5,v6);
+		
+		v1 = vec_ld(32,b1);
+		v4 = vec_ld(96,b1);
+		v5 = vec_perm(v2,v1,vperm1); /* b1[4,5,6,7] */
+		v6 = vec_perm(v4,v3,vperm2); /* b1[27,26,25,24] */
+		
+		vbs2 = vec_add(v5,v6);
+		vbs7 = vec_sub(v5,v6);
+		
+		v2 = vec_ld(48,b1);
+		v3 = vec_ld(80,b1);
+		v5 = vec_perm(v1,v2,vperm1); /* b1[8,9,10,11] */
+		v6 = vec_perm(v3,v4,vperm2); /* b1[23,22,21,20] */
+		
+		vbs3 = vec_add(v5,v6);
+		vbs6 = vec_sub(v5,v6);
+		
+		v1 = vec_ld(64,b1);
+		v5 = vec_perm(v2,v1,vperm1); /* b1[12,13,14,15] */
+		v6 = vec_perm(v1,v3,vperm2); /* b1[19,18,17,16] */
+		
+		vbs4 = vec_add(v5,v6);
+		vbs5 = vec_sub(v5,v6);
+		
+		v1 = vec_ld(0,costab);
+		vbs8 = vec_madd(vbs8,v1,vzero);
+		v2 = vec_ld(16,costab);
+		vbs7 = vec_madd(vbs7,v2,vzero);
+		v3 = vec_ld(32,costab);
+		vbs6 = vec_madd(vbs6,v3,vzero);
+		v4 = vec_ld(48,costab);
+		vbs5 = vec_madd(vbs5,v4,vzero);
+		vbs6 = vec_perm(vbs6,vbs6,vinvert);
+		vbs5 = vec_perm(vbs5,vbs5,vinvert);
+		
+		
+		costab = pnts[1];
+		
+		v1 = vec_perm(vbs4,vbs4,vinvert);
+		vbs9 = vec_add(vbs1,v1);
+		v3 = vec_sub(vbs1,v1);
+		v5 = vec_ld(0,costab);
+		v2 = vec_perm(vbs3,vbs3,vinvert);
+		vbs10 = vec_add(vbs2,v2);
+		v4 = vec_sub(vbs2,v2);
+		v6 = vec_ld(16,costab);
+		vbs12 = vec_madd(v3,v5,vzero);
+		vbs11 = vec_madd(v4,v6,vzero);
+		
+		v7 = vec_sub(vbs7,vbs6);
+		v8 = vec_sub(vbs8,vbs5);
+		vbs13 = vec_add(vbs5,vbs8);
+		vbs14 = vec_add(vbs6,vbs7);
+		vbs15 = vec_madd(v7,v6,vzero);
+		vbs16 = vec_madd(v8,v5,vzero);
+		
+		
+		costab = pnts[2];
+		
+		v1 = vec_perm(vbs10,vbs10,vinvert);
+		v5 = vec_perm(vbs14,vbs14,vinvert);
+		vbs1 = vec_add(v1,vbs9);
+		vbs5 = vec_add(v5,vbs13);
+		v2 = vec_sub(vbs9,v1);
+		v6 = vec_sub(vbs13,v5);
+		v3 = vec_ld(0,costab);
+		vbs11 = vec_perm(vbs11,vbs11,vinvert);
+		vbs15 = vec_perm(vbs15,vbs15,vinvert);
+		vbs3 = vec_add(vbs11,vbs12);
+		vbs7 = vec_add(vbs15,vbs16);
+		v4 = vec_sub(vbs12,vbs11);
+		v7 = vec_sub(vbs16,vbs15);
+		vbs2 = vec_madd(v2,v3,vzero);
+		vbs4 = vec_madd(v4,v3,vzero);
+		vbs6 = vec_madd(v6,v3,vzero);
+		vbs8 = vec_madd(v7,v3,vzero);
+		
+		vbs2 = vec_perm(vbs2,vbs2,vinvert);
+		vbs4 = vec_perm(vbs4,vbs4,vinvert);
+		vbs6 = vec_perm(vbs6,vbs6,vinvert);
+		vbs8 = vec_perm(vbs8,vbs8,vinvert);
+		
+		
+		costab = pnts[3];
+		
+#ifdef __APPLE__
+		vperm1 = (vector unsigned char)(0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
+		vperm2 = (vector unsigned char)(12,13,14,15,8,9,10,11,28,29,30,31,24,25,26,27);
+		vperm3 = (vector unsigned char)(0,1,2,3,4,5,6,7,20,21,22,23,16,17,18,19);
+#else
+		vperm1 = (vector unsigned char){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23};
+		vperm2 = (vector unsigned char){12,13,14,15,8,9,10,11,28,29,30,31,24,25,26,27};
+		vperm3 = (vector unsigned char){0,1,2,3,4,5,6,7,20,21,22,23,16,17,18,19};
+#endif
+		vperm4 = vec_add(vperm3,vec_splat_u8(8));
+		
+		v1 = vec_ld(0,costab);
+		v2 = vec_splat(v1,0);
+		v3 = vec_splat(v1,1);
+		v1 = vec_mergeh(v2,v3);
+		
+		v2 = vec_perm(vbs1,vbs3,vperm1);
+		v3 = vec_perm(vbs2,vbs4,vperm1);
+		v4 = vec_perm(vbs1,vbs3,vperm2);
+		v5 = vec_perm(vbs2,vbs4,vperm2);
+		v6 = vec_sub(v2,v4);
+		v7 = vec_sub(v3,v5);
+		v2 = vec_add(v2,v4);
+		v3 = vec_add(v3,v5);
+		v4 = vec_madd(v6,v1,vzero);
+		v5 = vec_nmsub(v7,v1,vzero);
+		vbs9 = vec_perm(v2,v4,vperm3);
+		vbs11 = vec_perm(v2,v4,vperm4);
+		vbs10 = vec_perm(v3,v5,vperm3);
+		vbs12 = vec_perm(v3,v5,vperm4);
+		
+		v2 = vec_perm(vbs5,vbs7,vperm1);
+		v3 = vec_perm(vbs6,vbs8,vperm1);
+		v4 = vec_perm(vbs5,vbs7,vperm2);
+		v5 = vec_perm(vbs6,vbs8,vperm2);
+		v6 = vec_sub(v2,v4);
+		v7 = vec_sub(v3,v5);
+		v2 = vec_add(v2,v4);
+		v3 = vec_add(v3,v5);
+		v4 = vec_madd(v6,v1,vzero);
+		v5 = vec_nmsub(v7,v1,vzero);
+		vbs13 = vec_perm(v2,v4,vperm3);
+		vbs15 = vec_perm(v2,v4,vperm4);
+		vbs14 = vec_perm(v3,v5,vperm3);
+		vbs16 = vec_perm(v3,v5,vperm4);
+		
+		
+		costab = pnts[4];
+		
+		v1 = vec_lde(0,costab);
+#ifdef __APPLE__
+		v2 = (vector float)(1.0f,-1.0f,1.0f,-1.0f);
+#else
+		v2 = (vector float){1.0f,-1.0f,1.0f,-1.0f};
+#endif
+		v3 = vec_splat(v1,0);
+		v1 = vec_madd(v2,v3,vzero);
+		
+		v2 = vec_mergeh(vbs9,vbs10);
+		v3 = vec_mergel(vbs9,vbs10);
+		v4 = vec_mergeh(vbs11,vbs12);
+		v5 = vec_mergel(vbs11,vbs12);
+		v6 = vec_mergeh(v2,v3);
+		v7 = vec_mergel(v2,v3);
+		v2 = vec_mergeh(v4,v5);
+		v3 = vec_mergel(v4,v5); 
+		v4 = vec_sub(v6,v7);
+		v5 = vec_sub(v2,v3);
+		v6 = vec_add(v6,v7);
+		v7 = vec_add(v2,v3);
+		v2 = vec_madd(v4,v1,vzero);
+		v3 = vec_madd(v5,v1,vzero);
+		vbs1 = vec_mergeh(v6,v2);
+		vbs2 = vec_mergel(v6,v2);
+		vbs3 = vec_mergeh(v7,v3);
+		vbs4 = vec_mergel(v7,v3);
+		
+		v2 = vec_mergeh(vbs13,vbs14);
+		v3 = vec_mergel(vbs13,vbs14);
+		v4 = vec_mergeh(vbs15,vbs16);
+		v5 = vec_mergel(vbs15,vbs16);
+		v6 = vec_mergeh(v2,v3);
+		v7 = vec_mergel(v2,v3);
+		v2 = vec_mergeh(v4,v5);
+		v3 = vec_mergel(v4,v5); 
+		v4 = vec_sub(v6,v7);
+		v5 = vec_sub(v2,v3);
+		v6 = vec_add(v6,v7);
+		v7 = vec_add(v2,v3);
+		v2 = vec_madd(v4,v1,vzero);
+		v3 = vec_madd(v5,v1,vzero);
+		vbs5 = vec_mergeh(v6,v2);
+		vbs6 = vec_mergel(v6,v2);
+		vbs7 = vec_mergeh(v7,v3);
+		vbs8 = vec_mergel(v7,v3);
+		
+		vec_st(vbs1,0,bufs);
+		vec_st(vbs2,16,bufs);
+		vec_st(vbs3,32,bufs);
+		vec_st(vbs4,48,bufs);
+		vec_st(vbs5,64,bufs);
+		vec_st(vbs6,80,bufs);
+		vec_st(vbs7,96,bufs);
+		vec_st(vbs8,112,bufs);
+	}
+
+ {
+  register real *b1;
+  register int i;
+
+  for(b1=bufs,i=8;i;i--,b1+=4)
+    b1[2] += b1[3];
+
+  for(b1=bufs,i=4;i;i--,b1+=8)
+  {
+    b1[4] += b1[6];
+    b1[6] += b1[5];
+    b1[5] += b1[7];
+  }
+
+  for(b1=bufs,i=2;i;i--,b1+=16)
+  {
+    b1[8]  += b1[12];
+    b1[12] += b1[10];
+    b1[10] += b1[14];
+    b1[14] += b1[9];
+    b1[9]  += b1[13];
+    b1[13] += b1[11];
+    b1[11] += b1[15];
+  }
+ }
+
+
+  out0[0x10*16] = bufs[0];
+  out0[0x10*15] = bufs[16+0]  + bufs[16+8];
+  out0[0x10*14] = bufs[8];
+  out0[0x10*13] = bufs[16+8]  + bufs[16+4];
+  out0[0x10*12] = bufs[4];
+  out0[0x10*11] = bufs[16+4]  + bufs[16+12];
+  out0[0x10*10] = bufs[12];
+  out0[0x10* 9] = bufs[16+12] + bufs[16+2];
+  out0[0x10* 8] = bufs[2];
+  out0[0x10* 7] = bufs[16+2]  + bufs[16+10];
+  out0[0x10* 6] = bufs[10];
+  out0[0x10* 5] = bufs[16+10] + bufs[16+6];
+  out0[0x10* 4] = bufs[6];
+  out0[0x10* 3] = bufs[16+6]  + bufs[16+14];
+  out0[0x10* 2] = bufs[14];
+  out0[0x10* 1] = bufs[16+14] + bufs[16+1];
+  out0[0x10* 0] = bufs[1];
+
+  out1[0x10* 0] = bufs[1];
+  out1[0x10* 1] = bufs[16+1]  + bufs[16+9];
+  out1[0x10* 2] = bufs[9];
+  out1[0x10* 3] = bufs[16+9]  + bufs[16+5];
+  out1[0x10* 4] = bufs[5];
+  out1[0x10* 5] = bufs[16+5]  + bufs[16+13];
+  out1[0x10* 6] = bufs[13];
+  out1[0x10* 7] = bufs[16+13] + bufs[16+3];
+  out1[0x10* 8] = bufs[3];
+  out1[0x10* 9] = bufs[16+3]  + bufs[16+11];
+  out1[0x10*10] = bufs[11];
+  out1[0x10*11] = bufs[16+11] + bufs[16+7];
+  out1[0x10*12] = bufs[7];
+  out1[0x10*13] = bufs[16+7]  + bufs[16+15];
+  out1[0x10*14] = bufs[15];
+  out1[0x10*15] = bufs[16+15];
+
+}
+
+
Index: include/reactos/libs/libmpg123/dct64_avx.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_avx.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_avx.S	(working copy)
@@ -0,0 +1,324 @@
+/*
+	dct36_sse: AVX optimized dct64 for x86-64
+
+	copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define samples %rdx
+#define costab %rcx
+#define out0 %rdi
+#define out1 %rsi
+
+/*
+	void dct64_avx(short *out0, short *out1, real *samples);
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+costab_avx:
+	.long 1056974725
+	.long 1057056395
+	.long 1057223771
+	.long 1057485416
+	.long 1057855544
+	.long 1058356026
+	.long 1059019886
+	.long 1059897405
+	.long 1061067246
+	.long 1062657950
+	.long 1064892987
+	.long 1066774581
+	.long 1069414683
+	.long 1073984175
+	.long 1079645762
+	.long 1092815430
+	.long 1057005197
+	.long 1057342072
+	.long 1058087743
+	.long 1059427869
+	.long 1061799040
+	.long 1065862217
+	.long 1071413542
+	.long 1084439708
+	.long 1057128951
+	.long 1058664893
+	.long 1063675095
+	.long 1076102863
+	.long 1057655764
+	.long 1067924853
+	.long 1060439283
+	.long 0
+	.text
+	ALIGN16
+.globl ASM_NAME(dct64_avx)
+ASM_NAME(dct64_avx):
+#ifdef IS_MSABI
+	push		%rbp
+	mov			%rsp, %rbp
+	sub			$112, %rsp
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	push		%rdi
+	push		%rsi
+	mov			%rcx, %rdi
+	mov			%rdx, %rsi
+	mov			%r8, %rdx
+#endif
+	leaq		costab_avx(%rip), costab
+
+	vmovups		(samples), %ymm0			# input[0,1,2,3,4,5,6,7]
+	vmovups		32(samples), %ymm1			# input[8,9,10,11,12,13,14,15]
+	vperm2f128	$0x23, 64(samples), %ymm2, %ymm2
+	vperm2f128	$0x23, 96(samples), %ymm3, %ymm3
+	vshufps		$0x1b, %ymm2, %ymm2, %ymm2	# input[23,22,21,20,19,18,17,16]
+	vshufps		$0x1b, %ymm3, %ymm3, %ymm3	# input[31,30,29,28,27,26,25,24]
+	vsubps		%ymm2, %ymm1, %ymm6
+	vsubps		%ymm3, %ymm0, %ymm7
+	vaddps		%ymm0, %ymm3, %ymm4			# bufs[0,1,2,3,4,5,6,7]
+	vaddps		%ymm1, %ymm2, %ymm5			# bufs[8,9,10,11,12,13,14,15]
+	vmulps		(costab), %ymm7, %ymm7		# bufs[31,30,29,28,27,26,25,24] cos64[0,1,2,3,4,5,6,7]
+	vmulps		32(costab), %ymm6, %ymm6	# bufs[23,22,21,20,19,18,17,16] cos64[8,9,10,11,12,13,14,15]
+	
+	vmovaps		64(costab), %ymm8			# cos32[0,1,2,3,4,5,6,7]
+	
+	vshufps		$0x1b, %ymm5, %ymm5, %ymm5
+	vshufps		$0x1b, %ymm6, %ymm6, %ymm6
+	vperm2f128	$0x01, %ymm5, %ymm5, %ymm5	# bufs[15,14,13,12,11,10,9,8]
+	vperm2f128	$0x01, %ymm6, %ymm6, %ymm6	# bufs[16,17,18,19,20,21,22,23]
+	vsubps		%ymm5, %ymm4, %ymm1
+	vsubps		%ymm6, %ymm7, %ymm3
+	vaddps		%ymm5, %ymm4, %ymm0			# bufs[32,33,34,35,36,37,38,39]
+	vaddps		%ymm6, %ymm7, %ymm2			# bufs[48,49,50,51,52,53,54,55]
+	vmulps		%ymm1, %ymm8, %ymm1			# bufs[47,46,45,44,43,42,41,40]
+	vmulps		%ymm3, %ymm8, %ymm3			# bufs[63,62,61,60,59,58,57,56]
+	
+	vmovaps		96(costab), %ymm8			# cos16[0,1,2,3]:cos8[0,1]:cos4[0]:-
+	vperm2f128	$0x00, %ymm8, %ymm8, %ymm9	# cos16[0,1,2,3,0,1,2,3]
+	
+	vperm2f128	$0x20, %ymm1, %ymm0, %ymm4	# bufs[32,33,34,35,47,46,45,44]
+	vperm2f128	$0x31, %ymm1, %ymm0, %ymm5
+	vshufps		$0x1b, %ymm5, %ymm5, %ymm5	# bufs[39,38,37,36,40,41,42,43]
+	vperm2f128	$0x20, %ymm3, %ymm2, %ymm6	# bufs[48,49,50,51,63,62,61,60]
+	vperm2f128	$0x31, %ymm3, %ymm2, %ymm7
+	vshufps		$0x1b, %ymm7, %ymm7, %ymm7	# bufs[55,54,53,52,56,57,58,59]
+	vsubps		%ymm5, %ymm4, %ymm1
+	vsubps		%ymm7, %ymm6, %ymm3
+	vaddps		%ymm5, %ymm4, %ymm0			# bufs[0,1,2,3,8,9,10,11]
+	vaddps		%ymm7, %ymm6, %ymm2			# bufs[16,17,18,19,24,25,26,27]
+	vmulps		%ymm1, %ymm9, %ymm1			# bufs[7,6,5,4,15,14,13,12]
+	vmulps		%ymm3, %ymm9, %ymm3			# bufs[23,22,21,20,31,30,29,28]
+	
+	vperm2f128	$0x11, %ymm8, %ymm8, %ymm8	# cos8[0,1]:cos4[0]:-:cos8[0,1]:cos4[0]:-
+	vmovddup	%ymm8, %ymm9				# cos8[0,1,0,1,0,1,0,1]
+	
+	vunpcklps	%ymm1, %ymm0, %ymm4			# bufs[0,7,1,6,8,15,9,14]
+	vunpckhps	%ymm1, %ymm0, %ymm5			# bufs[2,5,3,4,10,13,11,12]
+	vunpcklps	%ymm3, %ymm2, %ymm6			# bufs[16,23,17,22,24,31,25,30]
+	vunpckhps	%ymm3, %ymm2, %ymm7			# bufs[18,21,19,20,26,29,27,28]
+	vshufps		$0xd8, %ymm4, %ymm4, %ymm4	# bufs[0,1,7,6,8,9,15,14]
+	vshufps		$0x72, %ymm5, %ymm5, %ymm5	# bufs[3,2,4,5,11,10,12,13]
+	vshufps		$0xd8, %ymm6, %ymm6, %ymm6	# bufs[16,17,23,22,24,25,31,30]
+	vshufps		$0x72, %ymm7, %ymm7, %ymm7	# bufs[19,18,20,21,27,26,28,29]
+	vsubps		%ymm5, %ymm4, %ymm1
+	vsubps		%ymm7, %ymm6, %ymm3
+	vaddps		%ymm5, %ymm4, %ymm0			# bufs[32,33,36,37,40,41,44,45]
+	vaddps		%ymm7, %ymm6, %ymm2			# bufs[48,49,52,53,56,57,60,61]
+	vmulps		%ymm1, %ymm9, %ymm1			# bufs[35,34,39,38,43,42,47,46]
+	vmulps		%ymm3, %ymm9, %ymm3			# bufs[51,50,55,54,59,58,63,62]
+	
+	vpermilps	$0xaa, %ymm8, %ymm8			# cos4[0,0,0,0,0,0,0,0]
+	
+	vshufps		$0xd8, %ymm0, %ymm0, %ymm0	# bufs[32,36,33,37,40,44,41,45]
+	vshufps		$0xd8, %ymm1, %ymm1, %ymm1	# bufs[35,39,34,38,43,47,42,46]
+	vshufps		$0xd8, %ymm2, %ymm2, %ymm2	# bufs[48,52,49,53,56,60,57,61]
+	vshufps		$0xd8, %ymm3, %ymm3, %ymm3	# bufs[51,55,50,54,59,63,58,62]
+	vunpcklps	%ymm1, %ymm0, %ymm4			# bufs[32,35,36,39,40,43,44,47]
+	vunpckhps	%ymm1, %ymm0, %ymm5			# bufs[33,34,37,38,41,42,45,46]
+	vunpcklps	%ymm3, %ymm2, %ymm6			# bufs[48,51,52,55,56,59,60,63]
+	vunpckhps	%ymm3, %ymm2, %ymm7			# bufs[49,50,53,54,57,58,61,62]
+	vsubps		%ymm5, %ymm4, %ymm1
+	vsubps		%ymm7, %ymm6, %ymm3
+	vaddps		%ymm5, %ymm4, %ymm0			# bufs[0,2,4,6,8,10,12,14]
+	vaddps		%ymm7, %ymm6, %ymm2			# bufs[16,18,20,22,24,26,28,30]
+	vmulps		%ymm1, %ymm8, %ymm1			# bufs[1,3,5,7,9,11,13,15]
+	vmulps		%ymm3, %ymm8, %ymm3			# bufs[17,19,21,23,25,27,29,31]
+	
+	vxorps		%ymm8, %ymm8, %ymm8
+	vblendps	$0xaa, %ymm1, %ymm8, %ymm5
+	vblendps	$0xaa, %ymm3, %ymm8, %ymm6
+	vaddps		%ymm5, %ymm0, %ymm0
+	vaddps		%ymm6, %ymm2, %ymm2
+	vunpcklps	%ymm1, %ymm0, %ymm4			# bufs[0,1,2,3,8,9,10,11]
+	vunpckhps	%ymm1, %ymm0, %ymm5			# bufs[4,5,6,7,12,13,14,15]
+	vunpcklps	%ymm3, %ymm2, %ymm6			# bufs[16,17,18,19,24,25,26,27]
+	vunpckhps	%ymm3, %ymm2, %ymm7			# bufs[20,21,22,23,28,29,30,31]
+	
+	vextractf128	$0x1, %ymm4, %xmm0		# bufs[8,9,10,11]
+	vextractf128	$0x1, %ymm5, %xmm1		# bufs[12,13,14,15]
+	vextractf128	$0x1, %ymm6, %xmm2		# bufs[24,25,26,27]
+	vextractf128	$0x1, %ymm7, %xmm3		# bufs[28,29,30,31]
+	
+	vshufps		$0x1e, %xmm5, %xmm5, %xmm9	# bufs[6,7,5,4]
+	vshufps		$0x1e, %xmm1, %xmm1, %xmm10	# bufs[14,15,13,12]
+	vshufps		$0x1e, %xmm7, %xmm7, %xmm11	# bufs[22,23,21,20]
+	vshufps		$0x1e, %xmm3, %xmm3, %xmm12	# bufs[30,31,29,28]
+	vblendps	$0x7, %xmm9, %xmm8, %xmm9	# bufs[6,7,5,-]
+	vblendps	$0x7, %xmm10, %xmm8, %xmm10 # bufs[14,15,13,-]
+	vblendps	$0x7, %xmm11, %xmm8, %xmm11	# bufs[22,23,21,-]
+	vblendps	$0x7, %xmm12, %xmm8, %xmm12	# bufs[30,31,29,-]
+	vaddps		%xmm5, %xmm9, %xmm5
+	vaddps		%xmm1, %xmm10, %xmm1
+	vaddps		%xmm7, %xmm11, %xmm7
+	vaddps		%xmm3, %xmm12, %xmm3
+	
+	prefetcht0	512(out0)
+	
+	vshufps		$0x1e, %xmm0, %xmm0, %xmm9	# bufs[10,11,9,8]
+	vshufps		$0x1e, %xmm2, %xmm2, %xmm10	# bufs[26,27,25,24]
+	vaddps		%xmm1, %xmm0, %xmm0
+	vaddps		%xmm3, %xmm2, %xmm2
+	vblendps	$0x7, %xmm9, %xmm8, %xmm9	# bufs[10,11,9,-]
+	vblendps	$0x7, %xmm10, %xmm8, %xmm10	# bufs[26,27,25,-]
+	vaddps		%xmm1, %xmm9, %xmm1
+	vaddps		%xmm3, %xmm10, %xmm3
+	
+	vzeroupper
+	prefetcht0	512(out1)
+	
+	cvtps2dq	%xmm4, %xmm4
+	cvtps2dq	%xmm0, %xmm0
+	cvtps2dq	%xmm5, %xmm5
+	cvtps2dq	%xmm1, %xmm1
+	packssdw	%xmm5, %xmm4
+	packssdw	%xmm1, %xmm0
+	movq		%xmm4, %rcx
+	pshufd		$0x4e, %xmm4, %xmm5
+	movq		%xmm0, %rdx
+	pshufd		$0x4e, %xmm0, %xmm1
+	movq		%xmm5, %r8
+	movq		%xmm1, %r9
+	
+	addq		$512, out0
+	movq		$-64, %rax
+	
+	movw		%cx, (out0)
+	movw		%dx, (out0,%rax,1)
+	movw		%r8w, (out0,%rax,2)
+	movw		%r9w, -64(out0,%rax,2)
+	leaq		(out0,%rax,4), out0
+	shr			$16, %rcx
+	shr			$16, %rdx
+	shr			$16, %r8
+	shr			$16, %r9
+	movw		%cx, (out0,%rax,4)
+	negq		%rax
+	movw		%cx, (out1)
+	movw		%dx, (out1,%rax,1)
+	movw		%r8w, (out1,%rax,2)
+	movw		%r9w, 64(out1,%rax,2)
+	leaq		(out1,%rax,4), out1
+	shr			$16, %rcx
+	shr			$16, %rdx
+	shr			$16, %r8
+	shr			$16, %r9
+	negq		%rax
+	movw		%cx, (out0)
+	movw		%dx, (out0,%rax,1)
+	movw		%r8w, (out0,%rax,2)
+	movw		%r9w, -64(out0,%rax,2)
+	shr			$16, %rcx
+	shr			$16, %rdx
+	shr			$16, %r8
+	shr			$16, %r9
+	negq		%rax
+	movw		%cx, (out1)
+	movw		%dx, (out1,%rax,1)
+	movw		%r8w, (out1,%rax,2)
+	movw		%r9w, 64(out1,%rax,2)
+	
+	leaq		-32(out0,%rax,4), out0
+	negq		%rax
+	leaq		32(out1,%rax,4), out1
+	
+	vshufps		$0x1e, %xmm6, %xmm6, %xmm0
+	vblendps	$0x7, %xmm0, %xmm8, %xmm0
+	addps		%xmm2, %xmm6
+	addps		%xmm7, %xmm2
+	addps		%xmm3, %xmm7
+	addps		%xmm0, %xmm3
+	cvtps2dq	%xmm6, %xmm6
+	cvtps2dq	%xmm2, %xmm2
+	cvtps2dq	%xmm7, %xmm7
+	cvtps2dq	%xmm3, %xmm3
+	packssdw	%xmm7, %xmm6
+	packssdw	%xmm3, %xmm2
+	movq		%xmm6, %rcx
+	pshufd		$0x4e, %xmm6, %xmm7
+	movq		%xmm2, %rdx
+	pshufd		$0x4e, %xmm2, %xmm3
+	movq		%xmm7, %r8
+	movq		%xmm3, %r9
+	
+	movw		%cx, (out0)
+	movw		%dx, (out0,%rax,1)
+	movw		%r8w, (out0,%rax,2)
+	movw		%r9w, -64(out0,%rax,2)
+	leaq		(out0,%rax,4), out0
+	shr			$16, %rcx
+	shr			$16, %rdx
+	shr			$16, %r8
+	shr			$16, %r9
+	negq		%rax
+	movw		%cx, (out1)
+	movw		%dx, (out1,%rax,1)
+	movw		%r8w, (out1,%rax,2)
+	movw		%r9w, 64(out1,%rax,2)
+	leaq		(out1,%rax,4), out1
+	shr			$16, %rcx
+	shr			$16, %rdx
+	shr			$16, %r8
+	shr			$16, %r9
+	negq		%rax
+	movw		%cx, (out0)
+	movw		%dx, (out0,%rax,1)
+	movw		%r8w, (out0,%rax,2)
+	movw		%r9w, -64(out0,%rax,2)
+	shr			$16, %rcx
+	shr			$16, %rdx
+	shr			$16, %r8
+	shr			$16, %r9
+	negq		%rax
+	movw		%cx, (out1)
+	movw		%dx, (out1,%rax,1)
+	movw		%r8w, (out1,%rax,2)
+	movw		%r9w, 64(out1,%rax,2)
+	
+#ifdef IS_MSABI
+	pop			%rsi
+	pop			%rdi
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	mov			%rbp, %rsp
+	pop			%rbp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64_avx_float.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_avx_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_avx_float.S	(working copy)
@@ -0,0 +1,294 @@
+/*
+	dct64_x86_64_float: SSE optimized dct64 for x86-64 (float output version)
+
+	copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define samples %rdx
+#define costab %rcx
+#define out0 %rdi
+#define out1 %rsi
+
+/*
+	void dct64_real_avx(real *out0, real *out1, real *samples);
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+costab_avx:
+	.long 1056974725
+	.long 1057056395
+	.long 1057223771
+	.long 1057485416
+	.long 1057855544
+	.long 1058356026
+	.long 1059019886
+	.long 1059897405
+	.long 1061067246
+	.long 1062657950
+	.long 1064892987
+	.long 1066774581
+	.long 1069414683
+	.long 1073984175
+	.long 1079645762
+	.long 1092815430
+	.long 1057005197
+	.long 1057342072
+	.long 1058087743
+	.long 1059427869
+	.long 1061799040
+	.long 1065862217
+	.long 1071413542
+	.long 1084439708
+	.long 1057128951
+	.long 1058664893
+	.long 1063675095
+	.long 1076102863
+	.long 1057655764
+	.long 1067924853
+	.long 1060439283
+	.long 0
+	.text
+	ALIGN16
+.globl ASM_NAME(dct64_real_avx)
+ASM_NAME(dct64_real_avx):
+#ifdef IS_MSABI
+	push		%rbp
+	mov			%rsp, %rbp
+	sub			$112, %rsp
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	push		%rdi
+	push		%rsi
+	mov			%rcx, %rdi
+	mov			%rdx, %rsi
+	mov			%r8, %rdx
+#endif
+	leaq		costab_avx(%rip), costab
+
+	vmovups		(samples), %ymm0			# input[0,1,2,3,4,5,6,7]
+	vmovups		32(samples), %ymm1			# input[8,9,10,11,12,13,14,15]
+	vperm2f128	$0x23, 64(samples), %ymm2, %ymm2
+	vperm2f128	$0x23, 96(samples), %ymm3, %ymm3
+	vshufps		$0x1b, %ymm2, %ymm2, %ymm2	# input[23,22,21,20,19,18,17,16]
+	vshufps		$0x1b, %ymm3, %ymm3, %ymm3	# input[31,30,29,28,27,26,25,24]
+	vsubps		%ymm2, %ymm1, %ymm6
+	vsubps		%ymm3, %ymm0, %ymm7
+	vaddps		%ymm0, %ymm3, %ymm4			# bufs[0,1,2,3,4,5,6,7]
+	vaddps		%ymm1, %ymm2, %ymm5			# bufs[8,9,10,11,12,13,14,15]
+	vmulps		(costab), %ymm7, %ymm7		# bufs[31,30,29,28,27,26,25,24] cos64[0,1,2,3,4,5,6,7]
+	vmulps		32(costab), %ymm6, %ymm6	# bufs[23,22,21,20,19,18,17,16] cos64[8,9,10,11,12,13,14,15]
+	
+	vmovaps		64(costab), %ymm8			# cos32[0,1,2,3,4,5,6,7]
+	
+	vshufps		$0x1b, %ymm5, %ymm5, %ymm5
+	vshufps		$0x1b, %ymm6, %ymm6, %ymm6
+	vperm2f128	$0x01, %ymm5, %ymm5, %ymm5	# bufs[15,14,13,12,11,10,9,8]
+	vperm2f128	$0x01, %ymm6, %ymm6, %ymm6	# bufs[16,17,18,19,20,21,22,23]
+	vsubps		%ymm5, %ymm4, %ymm1
+	vsubps		%ymm6, %ymm7, %ymm3
+	vaddps		%ymm5, %ymm4, %ymm0			# bufs[32,33,34,35,36,37,38,39]
+	vaddps		%ymm6, %ymm7, %ymm2			# bufs[48,49,50,51,52,53,54,55]
+	vmulps		%ymm1, %ymm8, %ymm1			# bufs[47,46,45,44,43,42,41,40]
+	vmulps		%ymm3, %ymm8, %ymm3			# bufs[63,62,61,60,59,58,57,56]
+	
+	vmovaps		96(costab), %ymm8			# cos16[0,1,2,3]:cos8[0,1]:cos4[0]:-
+	vperm2f128	$0x00, %ymm8, %ymm8, %ymm9	# cos16[0,1,2,3,0,1,2,3]
+	
+	vperm2f128	$0x20, %ymm1, %ymm0, %ymm4	# bufs[32,33,34,35,47,46,45,44]
+	vperm2f128	$0x31, %ymm1, %ymm0, %ymm5
+	vshufps		$0x1b, %ymm5, %ymm5, %ymm5	# bufs[39,38,37,36,40,41,42,43]
+	vperm2f128	$0x20, %ymm3, %ymm2, %ymm6	# bufs[48,49,50,51,63,62,61,60]
+	vperm2f128	$0x31, %ymm3, %ymm2, %ymm7
+	vshufps		$0x1b, %ymm7, %ymm7, %ymm7	# bufs[55,54,53,52,56,57,58,59]
+	vsubps		%ymm5, %ymm4, %ymm1
+	vsubps		%ymm7, %ymm6, %ymm3
+	vaddps		%ymm5, %ymm4, %ymm0			# bufs[0,1,2,3,8,9,10,11]
+	vaddps		%ymm7, %ymm6, %ymm2			# bufs[16,17,18,19,24,25,26,27]
+	vmulps		%ymm1, %ymm9, %ymm1			# bufs[7,6,5,4,15,14,13,12]
+	vmulps		%ymm3, %ymm9, %ymm3			# bufs[23,22,21,20,31,30,29,28]
+	
+	vperm2f128	$0x11, %ymm8, %ymm8, %ymm8	# cos8[0,1]:cos4[0]:-:cos8[0,1]:cos4[0]:-
+	vmovddup	%ymm8, %ymm9				# cos8[0,1,0,1,0,1,0,1]
+	
+	vunpcklps	%ymm1, %ymm0, %ymm4			# bufs[0,7,1,6,8,15,9,14]
+	vunpckhps	%ymm1, %ymm0, %ymm5			# bufs[2,5,3,4,10,13,11,12]
+	vunpcklps	%ymm3, %ymm2, %ymm6			# bufs[16,23,17,22,24,31,25,30]
+	vunpckhps	%ymm3, %ymm2, %ymm7			# bufs[18,21,19,20,26,29,27,28]
+	vshufps		$0xd8, %ymm4, %ymm4, %ymm4	# bufs[0,1,7,6,8,9,15,14]
+	vshufps		$0x72, %ymm5, %ymm5, %ymm5	# bufs[3,2,4,5,11,10,12,13]
+	vshufps		$0xd8, %ymm6, %ymm6, %ymm6	# bufs[16,17,23,22,24,25,31,30]
+	vshufps		$0x72, %ymm7, %ymm7, %ymm7	# bufs[19,18,20,21,27,26,28,29]
+	vsubps		%ymm5, %ymm4, %ymm1
+	vsubps		%ymm7, %ymm6, %ymm3
+	vaddps		%ymm5, %ymm4, %ymm0			# bufs[32,33,36,37,40,41,44,45]
+	vaddps		%ymm7, %ymm6, %ymm2			# bufs[48,49,52,53,56,57,60,61]
+	vmulps		%ymm1, %ymm9, %ymm1			# bufs[35,34,39,38,43,42,47,46]
+	vmulps		%ymm3, %ymm9, %ymm3			# bufs[51,50,55,54,59,58,63,62]
+	
+	vpermilps	$0xaa, %ymm8, %ymm8			# cos4[0,0,0,0,0,0,0,0]
+	
+	vshufps		$0xd8, %ymm0, %ymm0, %ymm0	# bufs[32,36,33,37,40,44,41,45]
+	vshufps		$0xd8, %ymm1, %ymm1, %ymm1	# bufs[35,39,34,38,43,47,42,46]
+	vshufps		$0xd8, %ymm2, %ymm2, %ymm2	# bufs[48,52,49,53,56,60,57,61]
+	vshufps		$0xd8, %ymm3, %ymm3, %ymm3	# bufs[51,55,50,54,59,63,58,62]
+	vunpcklps	%ymm1, %ymm0, %ymm4			# bufs[32,35,36,39,40,43,44,47]
+	vunpckhps	%ymm1, %ymm0, %ymm5			# bufs[33,34,37,38,41,42,45,46]
+	vunpcklps	%ymm3, %ymm2, %ymm6			# bufs[48,51,52,55,56,59,60,63]
+	vunpckhps	%ymm3, %ymm2, %ymm7			# bufs[49,50,53,54,57,58,61,62]
+	vsubps		%ymm5, %ymm4, %ymm1
+	vsubps		%ymm7, %ymm6, %ymm3
+	vaddps		%ymm5, %ymm4, %ymm0			# bufs[0,2,4,6,8,10,12,14]
+	vaddps		%ymm7, %ymm6, %ymm2			# bufs[16,18,20,22,24,26,28,30]
+	vmulps		%ymm1, %ymm8, %ymm1			# bufs[1,3,5,7,9,11,13,15]
+	vmulps		%ymm3, %ymm8, %ymm3			# bufs[17,19,21,23,25,27,29,31]
+	
+	vxorps		%ymm8, %ymm8, %ymm8
+	vblendps	$0xaa, %ymm1, %ymm8, %ymm5
+	vblendps	$0xaa, %ymm3, %ymm8, %ymm6
+	vaddps		%ymm5, %ymm0, %ymm0
+	vaddps		%ymm6, %ymm2, %ymm2
+	vunpcklps	%ymm1, %ymm0, %ymm4			# bufs[0,1,2,3,8,9,10,11]
+	vunpckhps	%ymm1, %ymm0, %ymm5			# bufs[4,5,6,7,12,13,14,15]
+	vunpcklps	%ymm3, %ymm2, %ymm6			# bufs[16,17,18,19,24,25,26,27]
+	vunpckhps	%ymm3, %ymm2, %ymm7			# bufs[20,21,22,23,28,29,30,31]
+	
+	vextractf128	$0x1, %ymm4, %xmm0		# bufs[8,9,10,11]
+	vextractf128	$0x1, %ymm5, %xmm1		# bufs[12,13,14,15]
+	vextractf128	$0x1, %ymm6, %xmm2		# bufs[24,25,26,27]
+	vextractf128	$0x1, %ymm7, %xmm3		# bufs[28,29,30,31]
+	
+	vshufps		$0x1e, %xmm5, %xmm5, %xmm9	# bufs[6,7,5,4]
+	vshufps		$0x1e, %xmm1, %xmm1, %xmm10	# bufs[14,15,13,12]
+	vshufps		$0x1e, %xmm7, %xmm7, %xmm11	# bufs[22,23,21,20]
+	vshufps		$0x1e, %xmm3, %xmm3, %xmm12	# bufs[30,31,29,28]
+	vblendps	$0x7, %xmm9, %xmm8, %xmm9	# bufs[6,7,5,-]
+	vblendps	$0x7, %xmm10, %xmm8, %xmm10 # bufs[14,15,13,-]
+	vblendps	$0x7, %xmm11, %xmm8, %xmm11	# bufs[22,23,21,-]
+	vblendps	$0x7, %xmm12, %xmm8, %xmm12	# bufs[30,31,29,-]
+	vaddps		%xmm5, %xmm9, %xmm5
+	vaddps		%xmm1, %xmm10, %xmm1
+	vaddps		%xmm7, %xmm11, %xmm7
+	vaddps		%xmm3, %xmm12, %xmm3
+	
+	prefetcht0	1024(out0)
+	
+	vshufps		$0x1e, %xmm0, %xmm0, %xmm9	# bufs[10,11,9,8]
+	vshufps		$0x1e, %xmm2, %xmm2, %xmm10	# bufs[26,27,25,24]
+	vaddps		%xmm1, %xmm0, %xmm0
+	vaddps		%xmm3, %xmm2, %xmm2
+	vblendps	$0x7, %xmm9, %xmm8, %xmm9	# bufs[10,11,9,-]
+	vblendps	$0x7, %xmm10, %xmm8, %xmm10	# bufs[26,27,25,-]
+	vaddps		%xmm1, %xmm9, %xmm1
+	vaddps		%xmm3, %xmm10, %xmm3
+	
+	vzeroupper
+	prefetcht0	1024(out1)
+	
+	addq		$1024, out0
+	movq		$-128, %rax
+	movss		%xmm4, (out0)
+	movss		%xmm0, (out0,%rax,1)
+	movss		%xmm5, (out0,%rax,2)
+	movss		%xmm1, -128(out0,%rax,2)
+	leaq		(out0,%rax,4), out0
+	movhlps		%xmm4, %xmm9
+	movhlps		%xmm0, %xmm10
+	movhlps		%xmm5, %xmm11
+	movhlps		%xmm1, %xmm12
+	vmovss		%xmm9, (out0)
+	vmovss		%xmm10, (out0,%rax,1)
+	vmovss		%xmm11, (out0,%rax,2)
+	vmovss		%xmm12, -128(out0,%rax,2)
+	leaq		(out0,%rax,4), out0
+	negq		%rax
+	shufps		$0xb1, %xmm4, %xmm4
+	shufps		$0xb1, %xmm0, %xmm0
+	shufps		$0xb1, %xmm5, %xmm5
+	shufps		$0xb1, %xmm1, %xmm1
+	movss		%xmm4, (out0)
+	movss		%xmm4, (out1)
+	leaq		(out1,%rax,1), out1
+	movss		%xmm0, (out1)
+	movss		%xmm5, (out1,%rax,1)
+	movss		%xmm1, (out1,%rax,2)
+	leaq		(out1,%rax,4), out1
+	movhlps		%xmm4, %xmm4
+	movhlps		%xmm0, %xmm0
+	movhlps		%xmm5, %xmm5
+	movhlps		%xmm1, %xmm1
+	movss		%xmm4, -128(out1)
+	movss		%xmm0, (out1)
+	movss		%xmm5, (out1,%rax,1)
+	movss		%xmm1, (out1,%rax,2)
+	
+	leaq		-64(out0,%rax,8), out0
+	negq		%rax
+	vshufps		$0x1e, %xmm6, %xmm6, %xmm0
+	vblendps	$0x7, %xmm0, %xmm8, %xmm0
+	addps		%xmm2, %xmm6
+	addps		%xmm7, %xmm2
+	addps		%xmm3, %xmm7
+	addps		%xmm0, %xmm3
+	movss		%xmm6, (out0)
+	movss		%xmm2, (out0,%rax,1)
+	movss		%xmm7, (out0,%rax,2)
+	movss		%xmm3, -128(out0,%rax,2)
+	leaq		(out0,%rax,4), out0
+	movhlps		%xmm6, %xmm0
+	movhlps		%xmm2, %xmm1
+	movhlps		%xmm7, %xmm4
+	movhlps		%xmm3, %xmm5
+	movss		%xmm0, (out0)
+	movss		%xmm1, (out0,%rax,1)
+	movss		%xmm4, (out0,%rax,2)
+	movss		%xmm5, -128(out0,%rax,2)
+	leaq		64(out1,%rax,4), out1
+	negq		%rax
+	shufps		$0xb1, %xmm6, %xmm6
+	shufps		$0xb1, %xmm2, %xmm2
+	shufps		$0xb1, %xmm7, %xmm7
+	shufps		$0xb1, %xmm3, %xmm3
+	movss		%xmm6, -128(out1)
+	movss		%xmm2, (out1)
+	movss		%xmm7, (out1,%rax,1)
+	movss		%xmm3, (out1,%rax,2)
+	leaq		(out1,%rax,4), out1
+	movhlps		%xmm6, %xmm6
+	movhlps		%xmm2, %xmm2
+	movhlps		%xmm7, %xmm7
+	movhlps		%xmm3, %xmm3
+	movss		%xmm6, -128(out1)
+	movss		%xmm2, (out1)
+	movss		%xmm7, (out1,%rax,1)
+	movss		%xmm3, (out1,%rax,2)
+
+#ifdef IS_MSABI
+	pop			%rsi
+	pop			%rdi
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	mov			%rbp, %rsp
+	pop			%rbp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64_i386.c
===================================================================
--- include/reactos/libs/libmpg123/dct64_i386.c	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_i386.c	(working copy)
@@ -0,0 +1,336 @@
+/*
+	dct64_i386.c: DCT64, a C variant for i386
+
+	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp
+*/
+
+/*
+ * Discrete Cosine Tansform (DCT) for subband synthesis
+ * optimized for machines with no auto-increment. 
+ * The performance is highly compiler dependend. Maybe
+ * the dct64.c version for 'normal' processor may be faster
+ * even for Intel processors.
+ */
+
+#include "mpg123lib_intern.h"
+
+static void dct64_1(real *out0,real *out1,real *b1,real *b2,real *samples)
+{
+ {
+  register real *costab = pnts[0];
+
+  b1[0x00] = samples[0x00] + samples[0x1F];
+  b1[0x01] = samples[0x01] + samples[0x1E];
+  b1[0x1F] = REAL_MUL(samples[0x00] - samples[0x1F], costab[0x0]);
+  b1[0x1E] = REAL_MUL(samples[0x01] - samples[0x1E], costab[0x1]);
+
+  b1[0x02] = samples[0x02] + samples[0x1D];
+  b1[0x03] = samples[0x03] + samples[0x1C];
+  b1[0x1D] = REAL_MUL(samples[0x02] - samples[0x1D], costab[0x2]);
+  b1[0x1C] = REAL_MUL(samples[0x03] - samples[0x1C], costab[0x3]);
+
+  b1[0x04] = samples[0x04] + samples[0x1B];
+  b1[0x05] = samples[0x05] + samples[0x1A];
+  b1[0x1B] = REAL_MUL(samples[0x04] - samples[0x1B], costab[0x4]);
+  b1[0x1A] = REAL_MUL(samples[0x05] - samples[0x1A], costab[0x5]);
+
+  b1[0x06] = samples[0x06] + samples[0x19];
+  b1[0x07] = samples[0x07] + samples[0x18];
+  b1[0x19] = REAL_MUL(samples[0x06] - samples[0x19], costab[0x6]);
+  b1[0x18] = REAL_MUL(samples[0x07] - samples[0x18], costab[0x7]);
+
+  b1[0x08] = samples[0x08] + samples[0x17];
+  b1[0x09] = samples[0x09] + samples[0x16];
+  b1[0x17] = REAL_MUL(samples[0x08] - samples[0x17], costab[0x8]);
+  b1[0x16] = REAL_MUL(samples[0x09] - samples[0x16], costab[0x9]);
+
+  b1[0x0A] = samples[0x0A] + samples[0x15];
+  b1[0x0B] = samples[0x0B] + samples[0x14];
+  b1[0x15] = REAL_MUL(samples[0x0A] - samples[0x15], costab[0xA]);
+  b1[0x14] = REAL_MUL(samples[0x0B] - samples[0x14], costab[0xB]);
+
+  b1[0x0C] = samples[0x0C] + samples[0x13];
+  b1[0x0D] = samples[0x0D] + samples[0x12];
+  b1[0x13] = REAL_MUL(samples[0x0C] - samples[0x13], costab[0xC]);
+  b1[0x12] = REAL_MUL(samples[0x0D] - samples[0x12], costab[0xD]);
+
+  b1[0x0E] = samples[0x0E] + samples[0x11];
+  b1[0x0F] = samples[0x0F] + samples[0x10];
+  b1[0x11] = REAL_MUL(samples[0x0E] - samples[0x11], costab[0xE]);
+  b1[0x10] = REAL_MUL(samples[0x0F] - samples[0x10], costab[0xF]);
+
+ }
+
+
+ {
+  register real *costab = pnts[1];
+
+  b2[0x00] = b1[0x00] + b1[0x0F]; 
+  b2[0x01] = b1[0x01] + b1[0x0E]; 
+  b2[0x0F] = REAL_MUL(b1[0x00] - b1[0x0F], costab[0]);
+  b2[0x0E] = REAL_MUL(b1[0x01] - b1[0x0E], costab[1]);
+
+  b2[0x02] = b1[0x02] + b1[0x0D]; 
+  b2[0x03] = b1[0x03] + b1[0x0C]; 
+  b2[0x0D] = REAL_MUL(b1[0x02] - b1[0x0D], costab[2]);
+  b2[0x0C] = REAL_MUL(b1[0x03] - b1[0x0C], costab[3]);
+
+  b2[0x04] = b1[0x04] + b1[0x0B]; 
+  b2[0x05] = b1[0x05] + b1[0x0A]; 
+  b2[0x0B] = REAL_MUL(b1[0x04] - b1[0x0B], costab[4]);
+  b2[0x0A] = REAL_MUL(b1[0x05] - b1[0x0A], costab[5]);
+
+  b2[0x06] = b1[0x06] + b1[0x09]; 
+  b2[0x07] = b1[0x07] + b1[0x08]; 
+  b2[0x09] = REAL_MUL(b1[0x06] - b1[0x09], costab[6]);
+  b2[0x08] = REAL_MUL(b1[0x07] - b1[0x08], costab[7]);
+
+  /* */
+
+  b2[0x10] = b1[0x10] + b1[0x1F];
+  b2[0x11] = b1[0x11] + b1[0x1E];
+  b2[0x1F] = REAL_MUL(b1[0x1F] - b1[0x10], costab[0]);
+  b2[0x1E] = REAL_MUL(b1[0x1E] - b1[0x11], costab[1]);
+
+  b2[0x12] = b1[0x12] + b1[0x1D];
+  b2[0x13] = b1[0x13] + b1[0x1C];
+  b2[0x1D] = REAL_MUL(b1[0x1D] - b1[0x12], costab[2]);
+  b2[0x1C] = REAL_MUL(b1[0x1C] - b1[0x13], costab[3]);
+
+  b2[0x14] = b1[0x14] + b1[0x1B];
+  b2[0x15] = b1[0x15] + b1[0x1A];
+  b2[0x1B] = REAL_MUL(b1[0x1B] - b1[0x14], costab[4]);
+  b2[0x1A] = REAL_MUL(b1[0x1A] - b1[0x15], costab[5]);
+
+  b2[0x16] = b1[0x16] + b1[0x19];
+  b2[0x17] = b1[0x17] + b1[0x18];
+  b2[0x19] = REAL_MUL(b1[0x19] - b1[0x16], costab[6]);
+  b2[0x18] = REAL_MUL(b1[0x18] - b1[0x17], costab[7]);
+ }
+
+ {
+  register real *costab = pnts[2];
+
+  b1[0x00] = b2[0x00] + b2[0x07];
+  b1[0x07] = REAL_MUL(b2[0x00] - b2[0x07], costab[0]);
+  b1[0x01] = b2[0x01] + b2[0x06];
+  b1[0x06] = REAL_MUL(b2[0x01] - b2[0x06], costab[1]);
+  b1[0x02] = b2[0x02] + b2[0x05];
+  b1[0x05] = REAL_MUL(b2[0x02] - b2[0x05], costab[2]);
+  b1[0x03] = b2[0x03] + b2[0x04];
+  b1[0x04] = REAL_MUL(b2[0x03] - b2[0x04], costab[3]);
+
+  b1[0x08] = b2[0x08] + b2[0x0F];
+  b1[0x0F] = REAL_MUL(b2[0x0F] - b2[0x08], costab[0]);
+  b1[0x09] = b2[0x09] + b2[0x0E];
+  b1[0x0E] = REAL_MUL(b2[0x0E] - b2[0x09], costab[1]);
+  b1[0x0A] = b2[0x0A] + b2[0x0D];
+  b1[0x0D] = REAL_MUL(b2[0x0D] - b2[0x0A], costab[2]);
+  b1[0x0B] = b2[0x0B] + b2[0x0C];
+  b1[0x0C] = REAL_MUL(b2[0x0C] - b2[0x0B], costab[3]);
+
+  b1[0x10] = b2[0x10] + b2[0x17];
+  b1[0x17] = REAL_MUL(b2[0x10] - b2[0x17], costab[0]);
+  b1[0x11] = b2[0x11] + b2[0x16];
+  b1[0x16] = REAL_MUL(b2[0x11] - b2[0x16], costab[1]);
+  b1[0x12] = b2[0x12] + b2[0x15];
+  b1[0x15] = REAL_MUL(b2[0x12] - b2[0x15], costab[2]);
+  b1[0x13] = b2[0x13] + b2[0x14];
+  b1[0x14] = REAL_MUL(b2[0x13] - b2[0x14], costab[3]);
+
+  b1[0x18] = b2[0x18] + b2[0x1F];
+  b1[0x1F] = REAL_MUL(b2[0x1F] - b2[0x18], costab[0]);
+  b1[0x19] = b2[0x19] + b2[0x1E];
+  b1[0x1E] = REAL_MUL(b2[0x1E] - b2[0x19], costab[1]);
+  b1[0x1A] = b2[0x1A] + b2[0x1D];
+  b1[0x1D] = REAL_MUL(b2[0x1D] - b2[0x1A], costab[2]);
+  b1[0x1B] = b2[0x1B] + b2[0x1C];
+  b1[0x1C] = REAL_MUL(b2[0x1C] - b2[0x1B], costab[3]);
+ }
+
+ {
+  register real const cos0 = pnts[3][0];
+  register real const cos1 = pnts[3][1];
+
+  b2[0x00] = b1[0x00] + b1[0x03];
+  b2[0x03] = REAL_MUL(b1[0x00] - b1[0x03], cos0);
+  b2[0x01] = b1[0x01] + b1[0x02];
+  b2[0x02] = REAL_MUL(b1[0x01] - b1[0x02], cos1);
+
+  b2[0x04] = b1[0x04] + b1[0x07];
+  b2[0x07] = REAL_MUL(b1[0x07] - b1[0x04], cos0);
+  b2[0x05] = b1[0x05] + b1[0x06];
+  b2[0x06] = REAL_MUL(b1[0x06] - b1[0x05], cos1);
+
+  b2[0x08] = b1[0x08] + b1[0x0B];
+  b2[0x0B] = REAL_MUL(b1[0x08] - b1[0x0B], cos0);
+  b2[0x09] = b1[0x09] + b1[0x0A];
+  b2[0x0A] = REAL_MUL(b1[0x09] - b1[0x0A], cos1);
+  
+  b2[0x0C] = b1[0x0C] + b1[0x0F];
+  b2[0x0F] = REAL_MUL(b1[0x0F] - b1[0x0C], cos0);
+  b2[0x0D] = b1[0x0D] + b1[0x0E];
+  b2[0x0E] = REAL_MUL(b1[0x0E] - b1[0x0D], cos1);
+
+  b2[0x10] = b1[0x10] + b1[0x13];
+  b2[0x13] = REAL_MUL(b1[0x10] - b1[0x13], cos0);
+  b2[0x11] = b1[0x11] + b1[0x12];
+  b2[0x12] = REAL_MUL(b1[0x11] - b1[0x12], cos1);
+
+  b2[0x14] = b1[0x14] + b1[0x17];
+  b2[0x17] = REAL_MUL(b1[0x17] - b1[0x14], cos0);
+  b2[0x15] = b1[0x15] + b1[0x16];
+  b2[0x16] = REAL_MUL(b1[0x16] - b1[0x15], cos1);
+
+  b2[0x18] = b1[0x18] + b1[0x1B];
+  b2[0x1B] = REAL_MUL(b1[0x18] - b1[0x1B], cos0);
+  b2[0x19] = b1[0x19] + b1[0x1A];
+  b2[0x1A] = REAL_MUL(b1[0x19] - b1[0x1A], cos1);
+
+  b2[0x1C] = b1[0x1C] + b1[0x1F];
+  b2[0x1F] = REAL_MUL(b1[0x1F] - b1[0x1C], cos0);
+  b2[0x1D] = b1[0x1D] + b1[0x1E];
+  b2[0x1E] = REAL_MUL(b1[0x1E] - b1[0x1D], cos1);
+ }
+
+ {
+  register real const cos0 = pnts[4][0];
+
+  b1[0x00] = b2[0x00] + b2[0x01];
+  b1[0x01] = REAL_MUL(b2[0x00] - b2[0x01], cos0);
+  b1[0x02] = b2[0x02] + b2[0x03];
+  b1[0x03] = REAL_MUL(b2[0x03] - b2[0x02], cos0);
+  b1[0x02] += b1[0x03];
+
+  b1[0x04] = b2[0x04] + b2[0x05];
+  b1[0x05] = REAL_MUL(b2[0x04] - b2[0x05], cos0);
+  b1[0x06] = b2[0x06] + b2[0x07];
+  b1[0x07] = REAL_MUL(b2[0x07] - b2[0x06], cos0);
+  b1[0x06] += b1[0x07];
+  b1[0x04] += b1[0x06];
+  b1[0x06] += b1[0x05];
+  b1[0x05] += b1[0x07];
+
+  b1[0x08] = b2[0x08] + b2[0x09];
+  b1[0x09] = REAL_MUL(b2[0x08] - b2[0x09], cos0);
+  b1[0x0A] = b2[0x0A] + b2[0x0B];
+  b1[0x0B] = REAL_MUL(b2[0x0B] - b2[0x0A], cos0);
+  b1[0x0A] += b1[0x0B];
+
+  b1[0x0C] = b2[0x0C] + b2[0x0D];
+  b1[0x0D] = REAL_MUL(b2[0x0C] - b2[0x0D], cos0);
+  b1[0x0E] = b2[0x0E] + b2[0x0F];
+  b1[0x0F] = REAL_MUL(b2[0x0F] - b2[0x0E], cos0);
+  b1[0x0E] += b1[0x0F];
+  b1[0x0C] += b1[0x0E];
+  b1[0x0E] += b1[0x0D];
+  b1[0x0D] += b1[0x0F];
+
+  b1[0x10] = b2[0x10] + b2[0x11];
+  b1[0x11] = REAL_MUL(b2[0x10] - b2[0x11], cos0);
+  b1[0x12] = b2[0x12] + b2[0x13];
+  b1[0x13] = REAL_MUL(b2[0x13] - b2[0x12], cos0);
+  b1[0x12] += b1[0x13];
+
+  b1[0x14] = b2[0x14] + b2[0x15];
+  b1[0x15] = REAL_MUL(b2[0x14] - b2[0x15], cos0);
+  b1[0x16] = b2[0x16] + b2[0x17];
+  b1[0x17] = REAL_MUL(b2[0x17] - b2[0x16], cos0);
+  b1[0x16] += b1[0x17];
+  b1[0x14] += b1[0x16];
+  b1[0x16] += b1[0x15];
+  b1[0x15] += b1[0x17];
+
+  b1[0x18] = b2[0x18] + b2[0x19];
+  b1[0x19] = REAL_MUL(b2[0x18] - b2[0x19], cos0);
+  b1[0x1A] = b2[0x1A] + b2[0x1B];
+  b1[0x1B] = REAL_MUL(b2[0x1B] - b2[0x1A], cos0);
+  b1[0x1A] += b1[0x1B];
+
+  b1[0x1C] = b2[0x1C] + b2[0x1D];
+  b1[0x1D] = REAL_MUL(b2[0x1C] - b2[0x1D], cos0);
+  b1[0x1E] = b2[0x1E] + b2[0x1F];
+  b1[0x1F] = REAL_MUL(b2[0x1F] - b2[0x1E], cos0);
+  b1[0x1E] += b1[0x1F];
+  b1[0x1C] += b1[0x1E];
+  b1[0x1E] += b1[0x1D];
+  b1[0x1D] += b1[0x1F];
+ }
+
+ out0[0x10*16] = REAL_SCALE_DCT64(b1[0x00]);
+ out0[0x10*12] = REAL_SCALE_DCT64(b1[0x04]);
+ out0[0x10* 8] = REAL_SCALE_DCT64(b1[0x02]);
+ out0[0x10* 4] = REAL_SCALE_DCT64(b1[0x06]);
+ out0[0x10* 0] = REAL_SCALE_DCT64(b1[0x01]);
+ out1[0x10* 0] = REAL_SCALE_DCT64(b1[0x01]);
+ out1[0x10* 4] = REAL_SCALE_DCT64(b1[0x05]);
+ out1[0x10* 8] = REAL_SCALE_DCT64(b1[0x03]);
+ out1[0x10*12] = REAL_SCALE_DCT64(b1[0x07]);
+
+#if 1
+ out0[0x10*14] = REAL_SCALE_DCT64(b1[0x08] + b1[0x0C]);
+ out0[0x10*10] = REAL_SCALE_DCT64(b1[0x0C] + b1[0x0a]);
+ out0[0x10* 6] = REAL_SCALE_DCT64(b1[0x0A] + b1[0x0E]);
+ out0[0x10* 2] = REAL_SCALE_DCT64(b1[0x0E] + b1[0x09]);
+ out1[0x10* 2] = REAL_SCALE_DCT64(b1[0x09] + b1[0x0D]);
+ out1[0x10* 6] = REAL_SCALE_DCT64(b1[0x0D] + b1[0x0B]);
+ out1[0x10*10] = REAL_SCALE_DCT64(b1[0x0B] + b1[0x0F]);
+ out1[0x10*14] = REAL_SCALE_DCT64(b1[0x0F]);
+#else
+ b1[0x08] += b1[0x0C];
+ out0[0x10*14] = REAL_SCALE_DCT64(b1[0x08]);
+ b1[0x0C] += b1[0x0a];
+ out0[0x10*10] = REAL_SCALE_DCT64(b1[0x0C]);
+ b1[0x0A] += b1[0x0E];
+ out0[0x10* 6] = REAL_SCALE_DCT64(b1[0x0A]);
+ b1[0x0E] += b1[0x09];
+ out0[0x10* 2] = REAL_SCALE_DCT64(b1[0x0E]);
+ b1[0x09] += b1[0x0D];
+ out1[0x10* 2] = REAL_SCALE_DCT64(b1[0x09]);
+ b1[0x0D] += b1[0x0B];
+ out1[0x10* 6] = REAL_SCALE_DCT64(b1[0x0D]);
+ b1[0x0B] += b1[0x0F];
+ out1[0x10*10] = REAL_SCALE_DCT64(b1[0x0B]);
+ out1[0x10*14] = REAL_SCALE_DCT64(b1[0x0F]);
+#endif
+
+ { 
+ real tmp;
+ tmp = b1[0x18] + b1[0x1C];
+ out0[0x10*15] = REAL_SCALE_DCT64(tmp + b1[0x10]);
+ out0[0x10*13] = REAL_SCALE_DCT64(tmp + b1[0x14]);
+ tmp = b1[0x1C] + b1[0x1A];
+ out0[0x10*11] = REAL_SCALE_DCT64(tmp + b1[0x14]);
+ out0[0x10* 9] = REAL_SCALE_DCT64(tmp + b1[0x12]);
+ tmp = b1[0x1A] + b1[0x1E];
+ out0[0x10* 7] = REAL_SCALE_DCT64(tmp + b1[0x12]);
+ out0[0x10* 5] = REAL_SCALE_DCT64(tmp + b1[0x16]);
+ tmp = b1[0x1E] + b1[0x19];
+ out0[0x10* 3] = REAL_SCALE_DCT64(tmp + b1[0x16]);
+ out0[0x10* 1] = REAL_SCALE_DCT64(tmp + b1[0x11]);
+ tmp = b1[0x19] + b1[0x1D];
+ out1[0x10* 1] = REAL_SCALE_DCT64(tmp + b1[0x11]);
+ out1[0x10* 3] = REAL_SCALE_DCT64(tmp + b1[0x15]); 
+ tmp = b1[0x1D] + b1[0x1B];
+ out1[0x10* 5] = REAL_SCALE_DCT64(tmp + b1[0x15]);
+ out1[0x10* 7] = REAL_SCALE_DCT64(tmp + b1[0x13]);
+ tmp = b1[0x1B] + b1[0x1F];
+ out1[0x10* 9] = REAL_SCALE_DCT64(tmp + b1[0x13]);
+ out1[0x10*11] = REAL_SCALE_DCT64(tmp + b1[0x17]);
+ out1[0x10*13] = REAL_SCALE_DCT64(b1[0x17] + b1[0x1F]);
+ out1[0x10*15] = REAL_SCALE_DCT64(b1[0x1F]);
+ }
+}
+
+/*
+ * the call via dct64 is a trick to force GCC to use
+ * (new) registers for the b1,b2 pointer to the bufs[xx] field
+ */
+void dct64_i386(real *a,real *b,real *c)
+{
+  real bufs[0x40];
+  dct64_1(a,b,bufs,bufs+0x20,c);
+}
+
Index: include/reactos/libs/libmpg123/dct64_i486.c
===================================================================
--- include/reactos/libs/libmpg123/dct64_i486.c	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_i486.c	(working copy)
@@ -0,0 +1,342 @@
+/*
+	dct64_i486.c: DCT64, a plain C variant for i486
+
+	copyright 1998-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Fabrice Bellard
+*/
+
+/* Discrete Cosine Tansform (DCT) for subband synthesis.
+ *
+ * This code is optimized for 80486. It should be compiled with gcc
+ * 2.7.2 or higher.
+ *
+ * Note: This code does not give the necessary accuracy. Moreover, no
+ * overflow test are done.
+ *
+ * (c) 1998 Fabrice Bellard.  
+ */
+
+#include "mpg123lib_intern.h"
+
+#define COS_0_0 16403
+#define COS_0_1 16563
+#define COS_0_2 16890
+#define COS_0_3 17401
+#define COS_0_4 18124
+#define COS_0_5 19101
+#define COS_0_6 20398
+#define COS_0_7 22112
+#define COS_0_8 24396
+#define COS_0_9 27503
+#define COS_0_10 31869
+#define COS_0_11 38320
+#define COS_0_12 48633
+#define COS_0_13 67429
+#define COS_0_14 111660
+#define COS_0_15 333906
+#define COS_1_0 16463
+#define COS_1_1 17121
+#define COS_1_2 18577
+#define COS_1_3 21195
+#define COS_1_4 25826
+#define COS_1_5 34756
+#define COS_1_6 56441
+#define COS_1_7 167154
+#define COS_2_0 16704
+#define COS_2_1 19704
+#define COS_2_2 29490
+#define COS_2_3 83981
+#define COS_3_0 17733
+#define COS_3_1 42813
+#define COS_4_0 23170         
+
+#define SETOUT(out,n,expr) out[FIR_BUFFER_SIZE*(n)]=(expr)
+#define MULL(a,b) (((long long)(a)*(long long)(b)) >> 15)
+#define MUL(a,b) \
+(\
+       ((!(b & 0x3F)) ? (((a)*(b >> 6)) >> 9) :\
+       ((!(b & 0x1F)) ? (((a)*(b >> 5)) >> 10) :\
+       ((!(b & 0x0F)) ? (((a)*(b >> 4)) >> 11) :\
+       ((!(b & 0x07)) ? (((a)*(b >> 3)) >> 12) :\
+       ((!(b & 0x03)) ? (((a)*(b >> 2)) >> 13) :\
+       ((!(b & 0x01)) ? (((a)*(b >> 1)) >> 14) :\
+                        (((a)*(b   )) >> 15))))))))
+
+
+void dct64_1_486(int *out0,int *out1,int *b1,int *b2)
+{
+  b1[0x00] = b2[0x00] + b2[0x1F];
+  b1[0x1F] = MUL((b2[0x00] - b2[0x1F]),COS_0_0);
+
+  b1[0x01] = b2[0x01] + b2[0x1E];
+  b1[0x1E] = MUL((b2[0x01] - b2[0x1E]),COS_0_1);
+
+  b1[0x02] = b2[0x02] + b2[0x1D];
+  b1[0x1D] = MUL((b2[0x02] - b2[0x1D]),COS_0_2);
+
+  b1[0x03] = b2[0x03] + b2[0x1C];
+  b1[0x1C] = MUL((b2[0x03] - b2[0x1C]),COS_0_3);
+
+  b1[0x04] = b2[0x04] + b2[0x1B];
+  b1[0x1B] = MUL((b2[0x04] - b2[0x1B]),COS_0_4);
+
+  b1[0x05] = b2[0x05] + b2[0x1A];
+  b1[0x1A] = MUL((b2[0x05] - b2[0x1A]),COS_0_5);
+
+  b1[0x06] = b2[0x06] + b2[0x19];
+  b1[0x19] = MUL((b2[0x06] - b2[0x19]),COS_0_6);
+
+  b1[0x07] = b2[0x07] + b2[0x18];
+  b1[0x18] = MUL((b2[0x07] - b2[0x18]),COS_0_7);
+
+  b1[0x08] = b2[0x08] + b2[0x17];
+  b1[0x17] = MUL((b2[0x08] - b2[0x17]),COS_0_8);
+
+  b1[0x09] = b2[0x09] + b2[0x16];
+  b1[0x16] = MUL((b2[0x09] - b2[0x16]),COS_0_9);
+
+  b1[0x0A] = b2[0x0A] + b2[0x15];
+  b1[0x15] = MUL((b2[0x0A] - b2[0x15]),COS_0_10);
+
+  b1[0x0B] = b2[0x0B] + b2[0x14];
+  b1[0x14] = MUL((b2[0x0B] - b2[0x14]),COS_0_11);
+
+  b1[0x0C] = b2[0x0C] + b2[0x13];
+  b1[0x13] = MUL((b2[0x0C] - b2[0x13]),COS_0_12);
+
+  b1[0x0D] = b2[0x0D] + b2[0x12];
+  b1[0x12] = MULL((b2[0x0D] - b2[0x12]),COS_0_13);
+
+  b1[0x0E] = b2[0x0E] + b2[0x11];
+  b1[0x11] = MULL((b2[0x0E] - b2[0x11]),COS_0_14);
+
+  b1[0x0F] = b2[0x0F] + b2[0x10];
+  b1[0x10] = MULL((b2[0x0F] - b2[0x10]),COS_0_15);
+
+
+  b2[0x00] = b1[0x00] + b1[0x0F]; 
+  b2[0x0F] = MUL((b1[0x00] - b1[0x0F]),COS_1_0);
+  b2[0x01] = b1[0x01] + b1[0x0E]; 
+  b2[0x0E] = MUL((b1[0x01] - b1[0x0E]),COS_1_1);
+  b2[0x02] = b1[0x02] + b1[0x0D]; 
+  b2[0x0D] = MUL((b1[0x02] - b1[0x0D]),COS_1_2);
+  b2[0x03] = b1[0x03] + b1[0x0C]; 
+  b2[0x0C] = MUL((b1[0x03] - b1[0x0C]),COS_1_3);
+  b2[0x04] = b1[0x04] + b1[0x0B]; 
+  b2[0x0B] = MUL((b1[0x04] - b1[0x0B]),COS_1_4);
+  b2[0x05] = b1[0x05] + b1[0x0A]; 
+  b2[0x0A] = MUL((b1[0x05] - b1[0x0A]),COS_1_5);
+  b2[0x06] = b1[0x06] + b1[0x09]; 
+  b2[0x09] = MUL((b1[0x06] - b1[0x09]),COS_1_6);
+  b2[0x07] = b1[0x07] + b1[0x08]; 
+  b2[0x08] = MULL((b1[0x07] - b1[0x08]),COS_1_7);
+
+  b2[0x10] = b1[0x10] + b1[0x1F];
+  b2[0x1F] = MUL((b1[0x1F] - b1[0x10]),COS_1_0);
+  b2[0x11] = b1[0x11] + b1[0x1E];
+  b2[0x1E] = MUL((b1[0x1E] - b1[0x11]),COS_1_1);
+  b2[0x12] = b1[0x12] + b1[0x1D];
+  b2[0x1D] = MUL((b1[0x1D] - b1[0x12]),COS_1_2);
+  b2[0x13] = b1[0x13] + b1[0x1C];
+  b2[0x1C] = MUL((b1[0x1C] - b1[0x13]),COS_1_3);
+  b2[0x14] = b1[0x14] + b1[0x1B];
+  b2[0x1B] = MUL((b1[0x1B] - b1[0x14]),COS_1_4);
+  b2[0x15] = b1[0x15] + b1[0x1A];
+  b2[0x1A] = MUL((b1[0x1A] - b1[0x15]),COS_1_5);
+  b2[0x16] = b1[0x16] + b1[0x19];
+  b2[0x19] = MUL((b1[0x19] - b1[0x16]),COS_1_6);
+  b2[0x17] = b1[0x17] + b1[0x18];
+  b2[0x18] = MULL((b1[0x18] - b1[0x17]),COS_1_7);
+
+
+  b1[0x00] = b2[0x00] + b2[0x07];
+  b1[0x07] = MUL((b2[0x00] - b2[0x07]),COS_2_0);
+  b1[0x01] = b2[0x01] + b2[0x06];
+  b1[0x06] = MUL((b2[0x01] - b2[0x06]),COS_2_1);
+  b1[0x02] = b2[0x02] + b2[0x05];
+  b1[0x05] = MUL((b2[0x02] - b2[0x05]),COS_2_2);
+  b1[0x03] = b2[0x03] + b2[0x04];
+  b1[0x04] = MULL((b2[0x03] - b2[0x04]),COS_2_3);
+
+  b1[0x08] = b2[0x08] + b2[0x0F];
+  b1[0x0F] = MUL((b2[0x0F] - b2[0x08]),COS_2_0);
+  b1[0x09] = b2[0x09] + b2[0x0E];
+  b1[0x0E] = MUL((b2[0x0E] - b2[0x09]),COS_2_1);
+  b1[0x0A] = b2[0x0A] + b2[0x0D];
+  b1[0x0D] = MUL((b2[0x0D] - b2[0x0A]),COS_2_2);
+  b1[0x0B] = b2[0x0B] + b2[0x0C];
+  b1[0x0C] = MULL((b2[0x0C] - b2[0x0B]),COS_2_3);
+
+  b1[0x10] = b2[0x10] + b2[0x17];
+  b1[0x17] = MUL((b2[0x10] - b2[0x17]),COS_2_0);
+  b1[0x11] = b2[0x11] + b2[0x16];
+  b1[0x16] = MUL((b2[0x11] - b2[0x16]),COS_2_1);
+  b1[0x12] = b2[0x12] + b2[0x15];
+  b1[0x15] = MUL((b2[0x12] - b2[0x15]),COS_2_2);
+  b1[0x13] = b2[0x13] + b2[0x14];
+  b1[0x14] = MULL((b2[0x13] - b2[0x14]),COS_2_3);
+
+  b1[0x18] = b2[0x18] + b2[0x1F];
+  b1[0x1F] = MUL((b2[0x1F] - b2[0x18]),COS_2_0);
+  b1[0x19] = b2[0x19] + b2[0x1E];
+  b1[0x1E] = MUL((b2[0x1E] - b2[0x19]),COS_2_1);
+  b1[0x1A] = b2[0x1A] + b2[0x1D];
+  b1[0x1D] = MUL((b2[0x1D] - b2[0x1A]),COS_2_2);
+  b1[0x1B] = b2[0x1B] + b2[0x1C];
+  b1[0x1C] = MULL((b2[0x1C] - b2[0x1B]),COS_2_3);
+
+
+  b2[0x00] = b1[0x00] + b1[0x03];
+  b2[0x03] = MUL((b1[0x00] - b1[0x03]),COS_3_0);
+  b2[0x01] = b1[0x01] + b1[0x02];
+  b2[0x02] = MUL((b1[0x01] - b1[0x02]),COS_3_1);
+
+  b2[0x04] = b1[0x04] + b1[0x07];
+  b2[0x07] = MUL((b1[0x07] - b1[0x04]),COS_3_0);
+  b2[0x05] = b1[0x05] + b1[0x06];
+  b2[0x06] = MUL((b1[0x06] - b1[0x05]),COS_3_1);
+
+  b2[0x08] = b1[0x08] + b1[0x0B];
+  b2[0x0B] = MUL((b1[0x08] - b1[0x0B]),COS_3_0);
+  b2[0x09] = b1[0x09] + b1[0x0A];
+  b2[0x0A] = MUL((b1[0x09] - b1[0x0A]),COS_3_1);
+  
+  b2[0x0C] = b1[0x0C] + b1[0x0F];
+  b2[0x0F] = MUL((b1[0x0F] - b1[0x0C]),COS_3_0);
+  b2[0x0D] = b1[0x0D] + b1[0x0E];
+  b2[0x0E] = MUL((b1[0x0E] - b1[0x0D]),COS_3_1);
+
+  b2[0x10] = b1[0x10] + b1[0x13];
+  b2[0x13] = MUL((b1[0x10] - b1[0x13]),COS_3_0);
+  b2[0x11] = b1[0x11] + b1[0x12];
+  b2[0x12] = MUL((b1[0x11] - b1[0x12]),COS_3_1);
+
+  b2[0x14] = b1[0x14] + b1[0x17];
+  b2[0x17] = MUL((b1[0x17] - b1[0x14]),COS_3_0);
+  b2[0x15] = b1[0x15] + b1[0x16];
+  b2[0x16] = MUL((b1[0x16] - b1[0x15]),COS_3_1);
+
+  b2[0x18] = b1[0x18] + b1[0x1B];
+  b2[0x1B] = MUL((b1[0x18] - b1[0x1B]),COS_3_0);
+  b2[0x19] = b1[0x19] + b1[0x1A];
+  b2[0x1A] = MUL((b1[0x19] - b1[0x1A]),COS_3_1);
+
+  b2[0x1C] = b1[0x1C] + b1[0x1F];
+  b2[0x1F] = MUL((b1[0x1F] - b1[0x1C]),COS_3_0);
+  b2[0x1D] = b1[0x1D] + b1[0x1E];
+  b2[0x1E] = MUL((b1[0x1E] - b1[0x1D]),COS_3_1);
+
+  {
+    int i;
+    for(i=0;i<32;i+=4) {
+      b1[i+0x00] = b2[i+0x00] + b2[i+0x01];
+      b1[i+0x01] = MUL((b2[i+0x00] - b2[i+0x01]),COS_4_0);
+      b1[i+0x02] = b2[i+0x02] + b2[i+0x03];
+      b1[i+0x03] = MUL((b2[i+0x03] - b2[i+0x02]),COS_4_0);
+    }
+  }
+
+  b1[0x02] += b1[0x03];
+  b1[0x06] += b1[0x07];
+  b1[0x04] += b1[0x06];
+  b1[0x06] += b1[0x05];
+  b1[0x05] += b1[0x07];
+
+  b1[0x0A] += b1[0x0B];
+  b1[0x0E] += b1[0x0F];
+  b1[0x0C] += b1[0x0E];
+  b1[0x0E] += b1[0x0D];
+  b1[0x0D] += b1[0x0F];
+
+  b1[0x12] += b1[0x13];
+  b1[0x16] += b1[0x17];
+  b1[0x14] += b1[0x16];
+  b1[0x16] += b1[0x15];
+  b1[0x15] += b1[0x17];
+
+  b1[0x1A] += b1[0x1B];
+  b1[0x1E] += b1[0x1F];
+  b1[0x1C] += b1[0x1E];
+  b1[0x1E] += b1[0x1D];
+  b1[0x1D] += b1[0x1F];
+
+ SETOUT(out0,16,b1[0x00]);
+ SETOUT(out0,12,b1[0x04]);
+ SETOUT(out0, 8,b1[0x02]);
+ SETOUT(out0, 4,b1[0x06]);
+ SETOUT(out0, 0,b1[0x01]);
+ SETOUT(out1, 0,b1[0x01]);
+ SETOUT(out1, 4,b1[0x05]);
+ SETOUT(out1, 8,b1[0x03]);
+ SETOUT(out1,12,b1[0x07]);
+
+ b1[0x08] += b1[0x0C];
+ SETOUT(out0,14,b1[0x08]);
+ b1[0x0C] += b1[0x0a];
+ SETOUT(out0,10,b1[0x0C]);
+ b1[0x0A] += b1[0x0E];
+ SETOUT(out0, 6,b1[0x0A]);
+ b1[0x0E] += b1[0x09];
+ SETOUT(out0, 2,b1[0x0E]);
+ b1[0x09] += b1[0x0D];
+ SETOUT(out1, 2,b1[0x09]);
+ b1[0x0D] += b1[0x0B];
+ SETOUT(out1, 6,b1[0x0D]);
+ b1[0x0B] += b1[0x0F];
+ SETOUT(out1,10,b1[0x0B]);
+ SETOUT(out1,14,b1[0x0F]);
+
+ b1[0x18] += b1[0x1C];
+ SETOUT(out0,15,b1[0x10] + b1[0x18]);
+ SETOUT(out0,13,b1[0x18] + b1[0x14]);
+ b1[0x1C] += b1[0x1a];
+ SETOUT(out0,11,b1[0x14] + b1[0x1C]);
+ SETOUT(out0, 9,b1[0x1C] + b1[0x12]);
+ b1[0x1A] += b1[0x1E];
+ SETOUT(out0, 7,b1[0x12] + b1[0x1A]);
+ SETOUT(out0, 5,b1[0x1A] + b1[0x16]);
+ b1[0x1E] += b1[0x19];
+ SETOUT(out0, 3,b1[0x16] + b1[0x1E]);
+ SETOUT(out0, 1,b1[0x1E] + b1[0x11]);
+ b1[0x19] += b1[0x1D];
+ SETOUT(out1, 1,b1[0x11] + b1[0x19]);
+ SETOUT(out1, 3,b1[0x19] + b1[0x15]);
+ b1[0x1D] += b1[0x1B];
+ SETOUT(out1, 5,b1[0x15] + b1[0x1D]);
+ SETOUT(out1, 7,b1[0x1D] + b1[0x13]);
+ b1[0x1B] += b1[0x1F];
+ SETOUT(out1, 9,b1[0x13] + b1[0x1B]);
+ SETOUT(out1,11,b1[0x1B] + b1[0x17]);
+ SETOUT(out1,13,b1[0x17] + b1[0x1F]);
+ SETOUT(out1,15,b1[0x1F]);
+}
+
+
+/*
+ * the call via dct64 is a trick to force GCC to use
+ * (new) registers for the b1,b2 pointer to the bufs[xx] field
+ */
+void dct64_i486(int *a,int *b,real *samples)
+{
+  int bufs[64];
+  int i;
+
+#ifdef REAL_IS_FIXED  
+#define TOINT(a) ((a) * 32768 / (int)REAL_FACTOR)
+
+  for(i=0;i<32;i++) {
+    bufs[i]=TOINT(samples[i]);
+  }
+#else      
+  int *p = bufs;
+  register double const scale = ((65536.0 * 32) + 1) * 65536.0;
+  
+  for(i=0;i<32;i++) {
+    *((double *) (p++)) = scale + *samples++; /* beware on bufs overrun: 8B store from x87 */
+  }
+#endif
+  
+  dct64_1_486(a,b,bufs+32,bufs);
+}
+
Index: include/reactos/libs/libmpg123/dct64_mmx.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_mmx.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_mmx.S	(working copy)
@@ -0,0 +1,811 @@
+/*
+	dct64_mmx.s: MMX optimized DCT64
+
+	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by the mysterious higway (apparently)
+*/
+
+#include "mangle.h"
+
+.text
+
+	ALIGN32
+.globl ASM_NAME(dct64_mmx)
+ASM_NAME(dct64_mmx):
+
+	xorl %ecx,%ecx
+.globl ASM_NAME(dct64_MMX)
+ASM_NAME(dct64_MMX):
+	pushl %ebx
+	pushl %esi
+	pushl %edi
+	subl $256,%esp
+	movl 280(%esp),%eax
+	flds     (%eax)
+	leal 128(%esp),%edx
+	fadds 124(%eax)
+	movl 272(%esp),%esi
+	fstps    (%edx)
+	movl 276(%esp),%edi
+	flds    4(%eax)
+	movl ASM_VALUE(costab_mmxsse),%ebx
+	fadds 120(%eax)
+	orl %ecx,%ecx
+	fstps   4(%edx)
+	flds     (%eax)
+	movl %esp,%ecx
+	fsubs 124(%eax)
+	fmuls    (%ebx)
+	fstps 124(%edx)
+	flds    4(%eax)
+	fsubs 120(%eax)
+	fmuls   4(%ebx)
+	fstps 120(%edx)
+	flds    8(%eax)
+	fadds 116(%eax)
+	fstps   8(%edx)
+	flds   12(%eax)
+	fadds 112(%eax)
+	fstps  12(%edx)
+	flds    8(%eax)
+	fsubs 116(%eax)
+	fmuls   8(%ebx)
+	fstps 116(%edx)
+	flds   12(%eax)
+	fsubs 112(%eax)
+	fmuls  12(%ebx)
+	fstps 112(%edx)
+	flds   16(%eax)
+	fadds 108(%eax)
+	fstps  16(%edx)
+	flds   20(%eax)
+	fadds 104(%eax)
+	fstps  20(%edx)
+	flds   16(%eax)
+	fsubs 108(%eax)
+	fmuls  16(%ebx)
+	fstps 108(%edx)
+	flds   20(%eax)
+	fsubs 104(%eax)
+	fmuls  20(%ebx)
+	fstps 104(%edx)
+	flds   24(%eax)
+	fadds 100(%eax)
+	fstps  24(%edx)
+	flds   28(%eax)
+	fadds  96(%eax)
+	fstps  28(%edx)
+	flds   24(%eax)
+	fsubs 100(%eax)
+	fmuls  24(%ebx)
+	fstps 100(%edx)
+	flds   28(%eax)
+	fsubs  96(%eax)
+	fmuls  28(%ebx)
+	fstps  96(%edx)
+	flds   32(%eax)
+	fadds  92(%eax)
+	fstps  32(%edx)
+	flds   36(%eax)
+	fadds  88(%eax)
+	fstps  36(%edx)
+	flds   32(%eax)
+	fsubs  92(%eax)
+	fmuls  32(%ebx)
+	fstps  92(%edx)
+	flds   36(%eax)
+	fsubs  88(%eax)
+	fmuls  36(%ebx)
+	fstps  88(%edx)
+	flds   40(%eax)
+	fadds  84(%eax)
+	fstps  40(%edx)
+	flds   44(%eax)
+	fadds  80(%eax)
+	fstps  44(%edx)
+	flds   40(%eax)
+	fsubs  84(%eax)
+	fmuls  40(%ebx)
+	fstps  84(%edx)
+	flds   44(%eax)
+	fsubs  80(%eax)
+	fmuls  44(%ebx)
+	fstps  80(%edx)
+	flds   48(%eax)
+	fadds  76(%eax)
+	fstps  48(%edx)
+	flds   52(%eax)
+	fadds  72(%eax)
+	fstps  52(%edx)
+	flds   48(%eax)
+	fsubs  76(%eax)
+	fmuls  48(%ebx)
+	fstps  76(%edx)
+	flds   52(%eax)
+	fsubs  72(%eax)
+	fmuls  52(%ebx)
+	fstps  72(%edx)
+	flds   56(%eax)
+	fadds  68(%eax)
+	fstps  56(%edx)
+	flds   60(%eax)
+	fadds  64(%eax)
+	fstps  60(%edx)
+	flds   56(%eax)
+	fsubs  68(%eax)
+	fmuls  56(%ebx)
+	fstps  68(%edx)
+	flds   60(%eax)
+	fsubs  64(%eax)
+	fmuls  60(%ebx)
+	fstps  64(%edx)
+	
+	flds     (%edx)
+	fadds  60(%edx)
+	fstps    (%ecx)
+	flds    4(%edx)
+	fadds  56(%edx)
+	fstps   4(%ecx)
+	flds     (%edx)
+	fsubs  60(%edx)
+	fmuls  64(%ebx)
+	fstps  60(%ecx)
+	flds    4(%edx)
+	fsubs  56(%edx)
+	fmuls  68(%ebx)
+	fstps  56(%ecx)
+	flds    8(%edx)
+	fadds  52(%edx)
+	fstps   8(%ecx)
+	flds   12(%edx)
+	fadds  48(%edx)
+	fstps  12(%ecx)
+	flds    8(%edx)
+	fsubs  52(%edx)
+	fmuls  72(%ebx)
+	fstps  52(%ecx)
+	flds   12(%edx)
+	fsubs  48(%edx)
+	fmuls  76(%ebx)
+	fstps  48(%ecx)
+	flds   16(%edx)
+	fadds  44(%edx)
+	fstps  16(%ecx)
+	flds   20(%edx)
+	fadds  40(%edx)
+	fstps  20(%ecx)
+	flds   16(%edx)
+	fsubs  44(%edx)
+	fmuls  80(%ebx)
+	fstps  44(%ecx)
+	flds   20(%edx)
+	fsubs  40(%edx)
+	fmuls  84(%ebx)
+	fstps  40(%ecx)
+	flds   24(%edx)
+	fadds  36(%edx)
+	fstps  24(%ecx)
+	flds   28(%edx)
+	fadds  32(%edx)
+	fstps  28(%ecx)
+	flds   24(%edx)
+	fsubs  36(%edx)
+	fmuls  88(%ebx)
+	fstps  36(%ecx)
+	flds   28(%edx)
+	fsubs  32(%edx)
+	fmuls  92(%ebx)
+	fstps  32(%ecx)
+	
+	flds   64(%edx)
+	fadds 124(%edx)
+	fstps  64(%ecx)
+	flds   68(%edx)
+	fadds 120(%edx)
+	fstps  68(%ecx)
+	flds  124(%edx)
+	fsubs  64(%edx)
+	fmuls  64(%ebx)
+	fstps 124(%ecx)
+	flds  120(%edx)
+	fsubs  68(%edx)
+	fmuls  68(%ebx)
+	fstps 120(%ecx)
+	flds   72(%edx)
+	fadds 116(%edx)
+	fstps  72(%ecx)
+	flds   76(%edx)
+	fadds 112(%edx)
+	fstps  76(%ecx)
+	flds  116(%edx)
+	fsubs  72(%edx)
+	fmuls  72(%ebx)
+	fstps 116(%ecx)
+	flds  112(%edx)
+	fsubs  76(%edx)
+	fmuls  76(%ebx)
+	fstps 112(%ecx)
+	flds   80(%edx)
+	fadds 108(%edx)
+	fstps  80(%ecx)
+	flds   84(%edx)
+	fadds 104(%edx)
+	fstps  84(%ecx)
+	flds  108(%edx)
+	fsubs  80(%edx)
+	fmuls  80(%ebx)
+	fstps 108(%ecx)
+	flds  104(%edx)
+	fsubs  84(%edx)
+	fmuls  84(%ebx)
+	fstps 104(%ecx)
+	flds   88(%edx)
+	fadds 100(%edx)
+	fstps  88(%ecx)
+	flds   92(%edx)
+	fadds  96(%edx)
+	fstps  92(%ecx)
+	flds  100(%edx)
+	fsubs  88(%edx)
+	fmuls  88(%ebx)
+	fstps 100(%ecx)
+	flds   96(%edx)
+	fsubs  92(%edx)
+	fmuls  92(%ebx)
+	fstps  96(%ecx)
+	
+	flds     (%ecx)
+	fadds  28(%ecx)
+	fstps    (%edx)
+	flds     (%ecx)
+	fsubs  28(%ecx)
+	fmuls  96(%ebx)
+	fstps  28(%edx)
+	flds    4(%ecx)
+	fadds  24(%ecx)
+	fstps   4(%edx)
+	flds    4(%ecx)
+	fsubs  24(%ecx)
+	fmuls 100(%ebx)
+	fstps  24(%edx)
+	flds    8(%ecx)
+	fadds  20(%ecx)
+	fstps   8(%edx)
+	flds    8(%ecx)
+	fsubs  20(%ecx)
+	fmuls 104(%ebx)
+	fstps  20(%edx)
+	flds   12(%ecx)
+	fadds  16(%ecx)
+	fstps  12(%edx)
+	flds   12(%ecx)
+	fsubs  16(%ecx)
+	fmuls 108(%ebx)
+	fstps  16(%edx)
+	flds   32(%ecx)
+	fadds  60(%ecx)
+	fstps  32(%edx)
+	flds   60(%ecx)
+	fsubs  32(%ecx)
+	fmuls  96(%ebx)
+	fstps  60(%edx)
+	flds   36(%ecx)
+	fadds  56(%ecx)
+	fstps  36(%edx)
+	flds   56(%ecx)
+	fsubs  36(%ecx)
+	fmuls 100(%ebx)
+	fstps  56(%edx)
+	flds   40(%ecx)
+	fadds  52(%ecx)
+	fstps  40(%edx)
+	flds   52(%ecx)
+	fsubs  40(%ecx)
+	fmuls 104(%ebx)
+	fstps  52(%edx)
+	flds   44(%ecx)
+	fadds  48(%ecx)
+	fstps  44(%edx)
+	flds   48(%ecx)
+	fsubs  44(%ecx)
+	fmuls 108(%ebx)
+	fstps  48(%edx)
+	flds   64(%ecx)
+	fadds  92(%ecx)
+	fstps  64(%edx)
+	flds   64(%ecx)
+	fsubs  92(%ecx)
+	fmuls  96(%ebx)
+	fstps  92(%edx)
+	flds   68(%ecx)
+	fadds  88(%ecx)
+	fstps  68(%edx)
+	flds   68(%ecx)
+	fsubs  88(%ecx)
+	fmuls 100(%ebx)
+	fstps  88(%edx)
+	flds   72(%ecx)
+	fadds  84(%ecx)
+	fstps  72(%edx)
+	flds   72(%ecx)
+	fsubs  84(%ecx)
+	fmuls 104(%ebx)
+	fstps  84(%edx)
+	flds   76(%ecx)
+	fadds  80(%ecx)
+	fstps  76(%edx)
+	flds   76(%ecx)
+	fsubs  80(%ecx)
+	fmuls 108(%ebx)
+	fstps  80(%edx)
+	flds   96(%ecx)
+	fadds 124(%ecx)
+	fstps  96(%edx)
+	flds  124(%ecx)
+	fsubs  96(%ecx)
+	fmuls  96(%ebx)
+	fstps 124(%edx)
+	flds  100(%ecx)
+	fadds 120(%ecx)
+	fstps 100(%edx)
+	flds  120(%ecx)
+	fsubs 100(%ecx)
+	fmuls 100(%ebx)
+	fstps 120(%edx)
+	flds  104(%ecx)
+	fadds 116(%ecx)
+	fstps 104(%edx)
+	flds  116(%ecx)
+	fsubs 104(%ecx)
+	fmuls 104(%ebx)
+	fstps 116(%edx)
+	flds  108(%ecx)
+	fadds 112(%ecx)
+	fstps 108(%edx)
+	flds  112(%ecx)
+	fsubs 108(%ecx)
+	fmuls 108(%ebx)
+	fstps 112(%edx)
+	flds     (%edx)
+	fadds  12(%edx)
+	fstps    (%ecx)
+	flds     (%edx)
+	fsubs  12(%edx)
+	fmuls 112(%ebx)
+	fstps  12(%ecx)
+	flds    4(%edx)
+	fadds   8(%edx)
+	fstps   4(%ecx)
+	flds    4(%edx)
+	fsubs   8(%edx)
+	fmuls 116(%ebx)
+	fstps   8(%ecx)
+	flds   16(%edx)
+	fadds  28(%edx)
+	fstps  16(%ecx)
+	flds   28(%edx)
+	fsubs  16(%edx)
+	fmuls 112(%ebx)
+	fstps  28(%ecx)
+	flds   20(%edx)
+	fadds  24(%edx)
+	fstps  20(%ecx)
+	flds   24(%edx)
+	fsubs  20(%edx)
+	fmuls 116(%ebx)
+	fstps  24(%ecx)
+	flds   32(%edx)
+	fadds  44(%edx)
+	fstps  32(%ecx)
+	flds   32(%edx)
+	fsubs  44(%edx)
+	fmuls 112(%ebx)
+	fstps  44(%ecx)
+	flds   36(%edx)
+	fadds  40(%edx)
+	fstps  36(%ecx)
+	flds   36(%edx)
+	fsubs  40(%edx)
+	fmuls 116(%ebx)
+	fstps  40(%ecx)
+	flds   48(%edx)
+	fadds  60(%edx)
+	fstps  48(%ecx)
+	flds   60(%edx)
+	fsubs  48(%edx)
+	fmuls 112(%ebx)
+	fstps  60(%ecx)
+	flds   52(%edx)
+	fadds  56(%edx)
+	fstps  52(%ecx)
+	flds   56(%edx)
+	fsubs  52(%edx)
+	fmuls 116(%ebx)
+	fstps  56(%ecx)
+	flds   64(%edx)
+	fadds  76(%edx)
+	fstps  64(%ecx)
+	flds   64(%edx)
+	fsubs  76(%edx)
+	fmuls 112(%ebx)
+	fstps  76(%ecx)
+	flds   68(%edx)
+	fadds  72(%edx)
+	fstps  68(%ecx)
+	flds   68(%edx)
+	fsubs  72(%edx)
+	fmuls 116(%ebx)
+	fstps  72(%ecx)
+	flds   80(%edx)
+	fadds  92(%edx)
+	fstps  80(%ecx)
+	flds   92(%edx)
+	fsubs  80(%edx)
+	fmuls 112(%ebx)
+	fstps  92(%ecx)
+	flds   84(%edx)
+	fadds  88(%edx)
+	fstps  84(%ecx)
+	flds   88(%edx)
+	fsubs  84(%edx)
+	fmuls 116(%ebx)
+	fstps  88(%ecx)
+	flds   96(%edx)
+	fadds 108(%edx)
+	fstps  96(%ecx)
+	flds   96(%edx)
+	fsubs 108(%edx)
+	fmuls 112(%ebx)
+	fstps 108(%ecx)
+	flds  100(%edx)
+	fadds 104(%edx)
+	fstps 100(%ecx)
+	flds  100(%edx)
+	fsubs 104(%edx)
+	fmuls 116(%ebx)
+	fstps 104(%ecx)
+	flds  112(%edx)
+	fadds 124(%edx)
+	fstps 112(%ecx)
+	flds  124(%edx)
+	fsubs 112(%edx)
+	fmuls 112(%ebx)
+	fstps 124(%ecx)
+	flds  116(%edx)
+	fadds 120(%edx)
+	fstps 116(%ecx)
+	flds  120(%edx)
+	fsubs 116(%edx)
+	fmuls 116(%ebx)
+	fstps 120(%ecx)
+	
+	flds   32(%ecx)
+	fadds  36(%ecx)
+	fstps  32(%edx)
+	flds   32(%ecx)
+	fsubs  36(%ecx)
+	fmuls 120(%ebx)
+	fstps  36(%edx)
+	flds   44(%ecx)
+	fsubs  40(%ecx)
+	fmuls 120(%ebx)
+	fsts   44(%edx)
+	fadds  40(%ecx)
+	fadds  44(%ecx)
+	fstps  40(%edx)
+	flds   48(%ecx)
+	fsubs  52(%ecx)
+	fmuls 120(%ebx)
+	flds   60(%ecx)
+	fsubs  56(%ecx)
+	fmuls 120(%ebx)
+	fld      %st(0)
+	fadds  56(%ecx)
+	fadds  60(%ecx)
+	fld      %st(0)
+	fadds  48(%ecx)
+	fadds  52(%ecx)
+	fstps  48(%edx)
+	fadd     %st(2)
+	fstps  56(%edx)
+	fsts   60(%edx)
+	faddp    %st(1)
+	fstps  52(%edx)
+	flds   64(%ecx)
+	fadds  68(%ecx)
+	fstps  64(%edx)
+	flds   64(%ecx)
+	fsubs  68(%ecx)
+	fmuls 120(%ebx)
+	fstps  68(%edx)
+	flds   76(%ecx)
+	fsubs  72(%ecx)
+	fmuls 120(%ebx)
+	fsts   76(%edx)
+	fadds  72(%ecx)
+	fadds  76(%ecx)
+	fstps  72(%edx)
+	flds   92(%ecx)
+	fsubs  88(%ecx)
+	fmuls 120(%ebx)
+	fsts   92(%edx)
+	fadds  92(%ecx)
+	fadds  88(%ecx)
+	fld      %st(0)
+	fadds  80(%ecx)
+	fadds  84(%ecx)
+	fstps  80(%edx)
+	flds   80(%ecx)
+	fsubs  84(%ecx)
+	fmuls 120(%ebx)
+	fadd  %st(0), %st(1)
+	fadds 92(%edx)
+	fstps 84(%edx)
+	fstps 88(%edx)
+	flds   96(%ecx)
+	fadds 100(%ecx)
+	fstps  96(%edx)
+	flds   96(%ecx)
+	fsubs 100(%ecx)
+	fmuls 120(%ebx)
+	fstps 100(%edx)
+	flds  108(%ecx)
+	fsubs 104(%ecx)
+	fmuls 120(%ebx)
+	fsts  108(%edx)
+	fadds 104(%ecx)
+	fadds 108(%ecx)
+	fstps 104(%edx)
+	flds  124(%ecx)
+	fsubs 120(%ecx)
+	fmuls 120(%ebx)
+	fsts  124(%edx)
+	fadds 120(%ecx)
+	fadds 124(%ecx)
+	fld      %st(0)
+	fadds 112(%ecx)
+	fadds 116(%ecx)
+	fstps 112(%edx)
+	flds  112(%ecx)
+	fsubs 116(%ecx)
+	fmuls 120(%ebx)
+	fadd  %st(0),%st(1)
+	fadds 124(%edx)
+	fstps 116(%edx)
+	fstps 120(%edx)
+	jnz .L01
+	
+	flds      (%ecx)
+	fadds    4(%ecx)
+	fstps 1024(%esi)
+	flds      (%ecx)
+	fsubs    4(%ecx)
+	fmuls  120(%ebx)
+	fsts      (%esi)
+	fstps     (%edi)
+	flds   12(%ecx)
+	fsubs   8(%ecx)
+	fmuls 120(%ebx)
+	fsts  512(%edi)
+	fadds  12(%ecx)
+	fadds   8(%ecx)
+	fstps 512(%esi)
+	flds   16(%ecx)
+	fsubs  20(%ecx)
+	fmuls 120(%ebx)
+	flds   28(%ecx)
+	fsubs  24(%ecx)
+	fmuls 120(%ebx)
+	fsts  768(%edi)
+	fld      %st(0)
+	fadds  24(%ecx)
+	fadds  28(%ecx)
+	fld      %st(0)
+	fadds  16(%ecx)
+	fadds  20(%ecx)
+	fstps 768(%esi)
+	fadd     %st(2)
+	fstps 256(%esi)
+	faddp    %st(1)
+	fstps 256(%edi)
+	
+	flds   32(%edx)
+	fadds  48(%edx)
+	fstps 896(%esi)
+	flds   48(%edx)
+	fadds  40(%edx)
+	fstps 640(%esi)
+	flds   40(%edx)
+	fadds  56(%edx)
+	fstps 384(%esi)
+	flds   56(%edx)
+	fadds  36(%edx)
+	fstps 128(%esi)
+	flds   36(%edx)
+	fadds  52(%edx)
+	fstps 128(%edi)
+	flds   52(%edx)
+	fadds  44(%edx)
+	fstps 384(%edi)
+	flds   60(%edx)
+	fsts  896(%edi)
+	fadds  44(%edx)
+	fstps 640(%edi)
+	flds   96(%edx)
+	fadds 112(%edx)
+	fld      %st(0)
+	fadds  64(%edx)
+	fstps 960(%esi)
+	fadds  80(%edx)
+	fstps 832(%esi)
+	flds  112(%edx)
+	fadds 104(%edx)
+	fld      %st(0)
+	fadds  80(%edx)
+	fstps 704(%esi)
+	fadds  72(%edx)
+	fstps 576(%esi)
+	flds  104(%edx)
+	fadds 120(%edx)
+	fld      %st(0)
+	fadds  72(%edx)
+	fstps 448(%esi)
+	fadds  88(%edx)
+	fstps 320(%esi)
+	flds  120(%edx)
+	fadds 100(%edx)
+	fld      %st(0)
+	fadds  88(%edx)
+	fstps 192(%esi)
+	fadds  68(%edx)
+	fstps  64(%esi)
+	flds  100(%edx)
+	fadds 116(%edx)
+	fld      %st(0)
+	fadds  68(%edx)
+	fstps  64(%edi)
+	fadds  84(%edx)
+	fstps 192(%edi)
+	flds  116(%edx)
+	fadds 108(%edx)
+	fld      %st(0)
+	fadds  84(%edx)
+	fstps 320(%edi)
+	fadds  76(%edx)
+	fstps 448(%edi)
+	flds  108(%edx)
+	fadds 124(%edx)
+	fld      %st(0)
+	fadds  76(%edx)
+	fstps 576(%edi)
+	fadds  92(%edx)
+	fstps 704(%edi)
+	flds  124(%edx)
+	fsts  960(%edi)
+	fadds  92(%edx)
+	fstps 832(%edi)
+	addl $256,%esp
+	popl %edi
+	popl %esi
+	popl %ebx
+	ret
+.L01:	
+	flds      (%ecx)
+	fadds    4(%ecx)
+	fistps 512(%esi)
+	flds      (%ecx)
+	fsubs    4(%ecx)
+	fmuls  120(%ebx)
+
+	fistps    (%esi)
+
+	flds    12(%ecx)
+	fsubs    8(%ecx)
+	fmuls  120(%ebx)
+	fists  256(%edi)
+	fadds   12(%ecx)
+	fadds    8(%ecx)
+	fistps 256(%esi)
+	flds   16(%ecx)
+	fsubs  20(%ecx)
+	fmuls 120(%ebx)
+	flds   28(%ecx)
+	fsubs  24(%ecx)
+	fmuls 120(%ebx)
+	fists 384(%edi)
+	fld      %st(0)
+	fadds  24(%ecx)
+	fadds  28(%ecx)
+	fld      %st(0)
+	fadds  16(%ecx)
+	fadds  20(%ecx)
+	fistps 384(%esi)
+	fadd     %st(2)
+	fistps 128(%esi)
+	faddp    %st(1)
+	fistps 128(%edi)
+	
+	flds    32(%edx)
+	fadds   48(%edx)
+	fistps 448(%esi)
+	flds   48(%edx)
+	fadds  40(%edx)
+	fistps 320(%esi)
+	flds   40(%edx)
+	fadds  56(%edx)
+	fistps 192(%esi)
+	flds   56(%edx)
+	fadds  36(%edx)
+	fistps 64(%esi)
+	flds   36(%edx)
+	fadds  52(%edx)
+	fistps 64(%edi)
+	flds   52(%edx)
+	fadds  44(%edx)
+	fistps 192(%edi)
+	flds   60(%edx)
+	fists  448(%edi)
+	fadds  44(%edx)
+	fistps 320(%edi)
+	flds   96(%edx)
+	fadds 112(%edx)
+	fld      %st(0)
+	fadds  64(%edx)
+	fistps 480(%esi)
+	fadds  80(%edx)
+	fistps 416(%esi)
+	flds  112(%edx)
+	fadds 104(%edx)
+	fld      %st(0)
+	fadds  80(%edx)
+	fistps 352(%esi)
+	fadds  72(%edx)
+	fistps 288(%esi)
+	flds  104(%edx)
+	fadds 120(%edx)
+	fld      %st(0)
+	fadds  72(%edx)
+	fistps 224(%esi)
+	fadds  88(%edx)
+	fistps 160(%esi)
+	flds  120(%edx)
+	fadds 100(%edx)
+	fld      %st(0)
+	fadds  88(%edx)
+	fistps 96(%esi)
+	fadds  68(%edx)
+	fistps 32(%esi)
+	flds  100(%edx)
+	fadds 116(%edx)
+	fld      %st(0)
+	fadds  68(%edx)
+	fistps 32(%edi)
+	fadds  84(%edx)
+	fistps 96(%edi)
+	flds  116(%edx)
+	fadds 108(%edx)
+	fld      %st(0)
+	fadds  84(%edx)
+	fistps 160(%edi)
+	fadds  76(%edx)
+	fistps 224(%edi)
+	flds  108(%edx)
+	fadds 124(%edx)
+	fld      %st(0)
+	fadds  76(%edx)
+	fistps 288(%edi)
+	fadds  92(%edx)
+	fistps 352(%edi)
+	flds  124(%edx)
+	fists  480(%edi)
+	fadds  92(%edx)
+	fistps 416(%edi)
+	movsw
+	addl $256,%esp
+	popl %edi
+	popl %esi
+	popl %ebx
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64_neon.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_neon.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_neon.S	(working copy)
@@ -0,0 +1,308 @@
+/*
+	dct64_neon: ARM NEON optimized dct64
+
+	copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+	.code 32
+#ifndef __APPLE__
+	.fpu neon
+#endif
+	
+	.text
+	ALIGN16
+costab_arm:
+	.word 1056974725
+	.word 1057056395
+	.word 1057223771
+	.word 1057485416
+	.word 1057855544
+	.word 1058356026
+	.word 1059019886
+	.word 1059897405
+	.word 1061067246
+	.word 1062657950
+	.word 1064892987
+	.word 1066774581
+	.word 1069414683
+	.word 1073984175
+	.word 1079645762
+	.word 1092815430
+	.word 1057005197
+	.word 1057342072
+	.word 1058087743
+	.word 1059427869
+	.word 1061799040
+	.word 1065862217
+	.word 1071413542
+	.word 1084439708
+	.word 1057128951
+	.word 1058664893
+	.word 1063675095
+	.word 1076102863
+	.word 1057655764
+	.word 1067924853
+	.word 1060439283
+	.word 1060439283
+	ALIGN4
+	.globl ASM_NAME(dct64_neon)
+#ifdef __ELF__
+	.type ASM_NAME(dct64_neon), %function
+#endif
+ASM_NAME(dct64_neon):
+	vpush		{q4-q7}
+
+	adr			r3, costab_arm
+	vld1.32		{q0, q1}, [r2]!
+	vld1.32		{q2, q3}, [r2]!
+	vld1.32		{q4, q5}, [r2]!
+	vld1.32		{q6, q7}, [r2]
+	vld1.32		{q12, q13}, [r3, :128]!
+	vld1.32		{q14, q15}, [r3, :128]!
+
+	vrev64.32	q4, q4
+	vrev64.32	q5, q5
+	vrev64.32	q6, q6
+	vrev64.32	q7, q7
+	vswp		d8, d9
+	vswp		d10, d11
+	vswp		d12, d13
+	vswp		d14, d15
+
+	vsub.f32	q8, q0, q7
+	vsub.f32	q9, q1, q6
+	vsub.f32	q10, q2, q5
+	vsub.f32	q11, q3, q4
+	vadd.f32	q0, q0, q7
+	vadd.f32	q1, q1, q6
+	vadd.f32	q2, q2, q5
+	vadd.f32	q3, q3, q4
+	vmul.f32	q4, q8, q12
+	vmul.f32	q5, q9, q13
+	vmul.f32	q6, q10, q14
+	vmul.f32	q7, q11, q15
+
+	vld1.32		{q12, q13}, [r3, :128]!
+	vld1.32		{q14, q15}, [r3, :128]
+
+	vrev64.32	q2, q2
+	vrev64.32	q3, q3
+	vrev64.32	q6, q6
+	vrev64.32	q7, q7
+	vswp		d4, d5
+	vswp		d6, d7
+	vswp		d12, d13
+	vswp		d14, d15
+
+	vsub.f32	q8, q0, q3
+	vsub.f32	q9, q1, q2
+	vsub.f32	q10, q4, q7
+	vsub.f32	q11, q5, q6
+	vadd.f32	q0, q0, q3
+	vadd.f32	q1, q1, q2
+	vadd.f32	q4, q4, q7
+	vadd.f32	q5, q5, q6
+	vmul.f32	q2, q8, q12
+	vmul.f32	q3, q9, q13
+	vmul.f32	q6, q10, q12
+	vmul.f32	q7, q11, q13
+
+	vrev64.32	q1, q1
+	vrev64.32	q3, q3
+	vrev64.32	q5, q5
+	vrev64.32	q7, q7
+	vswp		d2, d3
+	vswp		d6, d7
+	vswp		d10, d11
+	vswp		d14, d15
+
+	vsub.f32	q8, q0, q1
+	vsub.f32	q9, q2, q3
+	vsub.f32	q10, q4, q5
+	vsub.f32	q11, q6, q7
+	vadd.f32	q0, q0, q1
+	vadd.f32	q2, q2, q3
+	vadd.f32	q4, q4, q5
+	vadd.f32	q6, q6, q7
+	vmul.f32	q1, q8, q14
+	vmul.f32	q3, q9, q14
+	vmul.f32	q5, q10, q14
+	vmul.f32	q7, q11, q14
+
+	vdup.32		q12, d31[0]
+	vmov		d31, d30
+
+	vswp		d1, d2
+	vswp		d5, d6
+	vswp		d9, d10
+	vswp		d13, d14
+	vrev64.32	q1, q1
+	vrev64.32	q3, q3
+	vrev64.32	q5, q5
+	vrev64.32	q7, q7
+
+	vsub.f32	q8, q0, q1
+	vsub.f32	q9, q2, q3
+	vsub.f32	q10, q4, q5
+	vsub.f32	q11, q6, q7
+	vadd.f32	q0, q0, q1
+	vadd.f32	q2, q2, q3
+	vadd.f32	q4, q4, q5
+	vadd.f32	q6, q6, q7
+	vmul.f32	q1, q8, q15
+	vmul.f32	q3, q9, q15
+	vmul.f32	q5, q10, q15
+	vmul.f32	q7, q11, q15
+
+	vtrn.32		q0, q1
+	vtrn.32		q2, q3
+	vtrn.32		q4, q5
+	vtrn.32		q6, q7
+
+	vsub.f32	q8, q0, q1
+	vsub.f32	q9, q2, q3
+	vsub.f32	q10, q4, q5
+	vsub.f32	q11, q6, q7
+	vadd.f32	q0, q0, q1
+	vadd.f32	q2, q2, q3
+	vadd.f32	q4, q4, q5
+	vadd.f32	q6, q6, q7
+	vmul.f32	q1, q8, q12
+	vmul.f32	q3, q9, q12
+	vmul.f32	q5, q10, q12
+	vmul.f32	q7, q11, q12
+
+	vtrn.32		q0, q1
+	vtrn.32		q2, q3
+	vtrn.32		q4, q5
+	vtrn.32		q6, q7
+	vswp		d1, d2
+	vswp		d5, d6
+	vswp		d9, d10
+	vswp		d13, d14
+
+	vshr.u64	d16, d1, #32
+	vshr.u64	d17, d3, #32
+	vshr.u64	d18, d5, #32
+	vshr.u64	d19, d7, #32
+	vadd.f32	d1, d1, d16
+	vadd.f32	d3, d3, d17
+	vadd.f32	d5, d5, d18
+	vadd.f32	d7, d7, d19
+	vshr.u64	d20, d9, #32
+	vshr.u64	d21, d11, #32
+	vshr.u64	d22, d13, #32
+	vshr.u64	d23, d15, #32
+	vadd.f32	d9, d9, d20
+	vadd.f32	d11, d11, d21
+	vadd.f32	d13, d13, d22
+	vadd.f32	d15, d15, d23
+
+	vshr.u64	d16, d2, #32
+	vshr.u64	d18, d6, #32
+	vshr.u64	d20, d10, #32
+	vshr.u64	d22, d14, #32
+	vext.8		q8, q1, q8, #8
+	vext.8		q9, q3, q9, #8
+	vext.8		q10, q5, q10, #8
+	vext.8		q11, q7, q11, #8
+	vadd.f32	q1, q1, q8
+	vadd.f32	q3, q3, q9
+	vadd.f32	q5, q5, q10
+	vadd.f32	q7, q7, q11
+
+	vshr.u64	d16, d4, #32
+	vshr.u64	d18, d12, #32
+	vext.8		q8, q2, q8, #8
+	vext.8		q9, q6, q9, #8
+	vadd.f32	q2, q2, q3
+	vadd.f32	q6, q6, q7
+	vadd.f32	q3, q3, q8
+	vadd.f32	q7, q7, q9
+
+	vrev64.32	q8, q4
+	vshr.u64	d19, d9, #32
+	vext.8		d17, d17, d16, #4
+	vswp		d9, d10
+	vswp		d13, d14
+	vtrn.32		q4, q5
+	vtrn.32		q6, q7
+	vmov		d16, d9
+	vmov		d18, d11
+
+	vadd.f32	q4, q6
+	vadd.f32	q5, q7
+	vadd.f32	q6, q8
+	vadd.f32	q7, q9
+
+	vmov.i32	q8, #0x4b000000
+	vorr.i32	q8, #0x00400000
+	vadd.f32	q0, q0, q8
+	vadd.f32	q1, q1, q8
+	vadd.f32	q2, q2, q8
+	vadd.f32	q3, q3, q8
+	vadd.f32	q4, q4, q8
+	vadd.f32	q5, q5, q8
+	vadd.f32	q6, q6, q8
+	vadd.f32	q7, q7, q8
+	vshl.i32	q0, q0, #10
+	vshl.i32	q1, q1, #10
+	vshl.i32	q2, q2, #10
+	vshl.i32	q3, q3, #10
+	vshl.i32	q4, q4, #10
+	vshl.i32	q5, q5, #10
+	vshl.i32	q6, q6, #10
+	vshl.i32	q7, q7, #10
+	vqshrn.s32	d0, q0, #10
+	vqshrn.s32	d2, q1, #10
+	vqshrn.s32	d4, q2, #10
+	vqshrn.s32	d6, q3, #10
+	vqshrn.s32	d8, q4, #10
+	vqshrn.s32	d10, q5, #10
+	vqshrn.s32	d12, q6, #10
+	vqshrn.s32	d14, q7, #10
+
+	mov			r3, #32
+	vst1.16		{d0[1]}, [r0, :16], r3
+	vst1.16		{d12[3]}, [r0, :16], r3
+	vst1.16		{d6[2]}, [r0, :16], r3
+	vst1.16		{d8[3]}, [r0, :16], r3
+	vst1.16		{d2[2]}, [r0, :16], r3
+	vst1.16		{d12[1]}, [r0, :16], r3
+	vst1.16		{d4[2]}, [r0, :16], r3
+	vst1.16		{d8[1]}, [r0, :16], r3
+	vst1.16		{d0[2]}, [r0, :16], r3
+	vst1.16		{d12[2]}, [r0, :16], r3
+	vst1.16		{d6[0]}, [r0, :16], r3
+	vst1.16		{d8[2]}, [r0, :16], r3
+	vst1.16		{d2[0]}, [r0, :16], r3
+	vst1.16		{d12[0]}, [r0, :16], r3
+	vst1.16		{d4[0]}, [r0, :16], r3
+	vst1.16		{d8[0]}, [r0, :16], r3
+	vst1.16		{d0[0]}, [r0, :16]
+
+	vst1.16		{d0[1]}, [r1, :16], r3
+	vst1.16		{d10[0]}, [r1, :16], r3
+	vst1.16		{d4[1]}, [r1, :16], r3
+	vst1.16		{d14[0]}, [r1, :16], r3
+	vst1.16		{d2[1]}, [r1, :16], r3
+	vst1.16		{d10[2]}, [r1, :16], r3
+	vst1.16		{d6[1]}, [r1, :16], r3
+	vst1.16		{d14[2]}, [r1, :16], r3
+	vst1.16		{d0[3]}, [r1, :16], r3
+	vst1.16		{d10[1]}, [r1, :16], r3
+	vst1.16		{d4[3]}, [r1, :16], r3
+	vst1.16		{d14[1]}, [r1, :16], r3
+	vst1.16		{d2[3]}, [r1, :16], r3
+	vst1.16		{d10[3]}, [r1, :16], r3
+	vst1.16		{d6[3]}, [r1, :16], r3
+	vst1.16		{d14[3]}, [r1, :16]
+
+	vpop		{q4-q7}
+	bx			lr
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64_neon64.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_neon64.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_neon64.S	(working copy)
@@ -0,0 +1,299 @@
+/*
+	dct64_neon64: NEON optimized dct64 for AArch64
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+costab_neon_aarch64:
+	.word 1056974725
+	.word 1057056395
+	.word 1057223771
+	.word 1057485416
+	.word 1057855544
+	.word 1058356026
+	.word 1059019886
+	.word 1059897405
+	.word 1061067246
+	.word 1062657950
+	.word 1064892987
+	.word 1066774581
+	.word 1069414683
+	.word 1073984175
+	.word 1079645762
+	.word 1092815430
+	.word 1057005197
+	.word 1057342072
+	.word 1058087743
+	.word 1059427869
+	.word 1061799040
+	.word 1065862217
+	.word 1071413542
+	.word 1084439708
+	.word 1057128951
+	.word 1058664893
+	.word 1063675095
+	.word 1076102863
+	.word 1057655764
+	.word 1067924853
+	.word 1060439283
+	.word 1060439283
+	.text
+	ALIGN4
+	.globl ASM_NAME(dct64_neon64)
+#ifdef __ELF__
+	.type ASM_NAME(dct64_neon64), %function
+#endif
+ASM_NAME(dct64_neon64):
+	add		x3, x2, #64
+	adrp	x4, AARCH64_PCREL_HI(costab_neon_aarch64)
+	add		x4, x4, AARCH64_PCREL_LO(costab_neon_aarch64)
+	ld1		{v0.4s, v1.4s, v2.4s, v3.4s}, [x2]
+	ld1		{v16.4s, v17.4s, v18.4s, v19.4s}, [x3]
+	ld1		{v20.4s, v21.4s, v22.4s, v23.4s}, [x4], #64
+	
+	rev64	v19.4s, v19.4s
+	rev64	v18.4s, v18.4s
+	rev64	v17.4s, v17.4s
+	rev64	v16.4s, v16.4s
+	ext		v4.16b, v19.16b, v19.16b, #8
+	ext		v5.16b, v18.16b, v18.16b, #8
+	ext		v6.16b, v17.16b, v17.16b, #8
+	ext		v7.16b, v16.16b, v16.16b, #8
+	
+	fsub	v16.4s, v3.4s, v7.4s
+	fsub	v17.4s, v2.4s, v6.4s
+	fsub	v18.4s, v1.4s, v5.4s
+	fsub	v19.4s, v0.4s, v4.4s
+	fadd	v0.4s, v0.4s, v4.4s		/* bs[0,1,2,3] */
+	fadd	v1.4s, v1.4s, v5.4s		/* bs[4,5,6,7] */
+	fadd	v2.4s, v2.4s, v6.4s		/* bs[8,9,10,11] */
+	fadd	v3.4s, v3.4s, v7.4s		/* bs[12,13,14,15] */
+	fmul	v16.4s, v16.4s, v23.4s	/* bs[19,18,17,16] */
+	fmul	v17.4s, v17.4s, v22.4s	/* bs[23,22,21,20] */
+	fmul	v18.4s, v18.4s, v21.4s	/* bs[27,26,25,24] */
+	fmul	v19.4s, v19.4s, v20.4s	/* bs[31,30,29,28] */
+	
+	ld1		{v20.4s, v21.4s}, [x4], #32
+	rev64	v22.4s, v3.4s
+	rev64	v23.4s, v2.4s
+	rev64	v24.4s, v16.4s
+	rev64	v25.4s, v17.4s
+	ext		v4.16b, v22.16b, v22.16b, #8	/* bs[15,14,13,12] */
+	ext		v5.16b, v23.16b, v23.16b, #8	/* bs[11,10,9,8] */
+	ext		v6.16b, v24.16b, v24.16b, #8	/* bs[16,17,18,19] */
+	ext		v7.16b, v25.16b, v25.16b, #8	/* bs[20,21,22,23] */
+	
+	fsub	v26.4s, v1.4s, v5.4s
+	fsub	v27.4s, v0.4s, v4.4s
+	fsub	v28.4s, v18.4s, v7.4s
+	fsub	v29.4s, v19.4s, v6.4s
+	fadd	v4.4s, v0.4s, v4.4s		/* bs[32,33,34,35] */
+	fadd	v5.4s, v1.4s, v5.4s		/* bs[36,37,38,39] */
+	fadd	v6.4s, v6.4s, v19.4s	/* bs[48,49,50,51] */
+	fadd	v7.4s, v7.4s, v18.4s	/* bs[52,53,54,55] */
+	fmul	v26.4s, v26.4s, v21.4s	/* bs[43,42,41,40] */
+	fmul	v27.4s, v27.4s, v20.4s	/* bs[47,46,45,44] */
+	fmul	v28.4s, v28.4s, v21.4s	/* bs[59,58,57,56] */
+	fmul	v29.4s, v29.4s, v20.4s	/* bs[63,62,61,60] */
+	
+	ld1		{v20.4s}, [x4], #16
+	rev64	v16.4s, v5.4s
+	rev64	v17.4s, v26.4s
+	rev64	v18.4s, v7.4s
+	rev64	v19.4s, v28.4s
+	ext		v0.16b, v16.16b, v16.16b, #8	/* bs[39,38,37,36] */
+	ext		v1.16b, v17.16b, v17.16b, #8	/* bs[40,41,42,43] */
+	ext		v2.16b, v18.16b, v18.16b, #8	/* bs[55,54,53,52] */
+	ext		v3.16b, v19.16b, v19.16b, #8	/* bs[56,57,58,59] */
+	
+	fsub	v16.4s, v4.4s, v0.4s
+	fsub	v17.4s, v27.4s, v1.4s
+	fsub	v18.4s, v6.4s, v2.4s
+	fsub	v19.4s, v29.4s, v3.4s
+	fadd	v0.4s, v4.4s, v0.4s		/* bs[0,1,2,3] */
+	fadd	v1.4s, v1.4s, v27.4s	/* bs[8,9,10,11] */
+	fadd	v2.4s, v6.4s, v2.4s		/* bs[16,17,18,19] */
+	fadd	v3.4s, v3.4s, v29.4s	/* bs[24,25,26,27] */
+	fmul	v16.4s, v16.4s, v20.4s	/* bs[7,6,5,4] */
+	fmul	v17.4s, v17.4s, v20.4s	/* bs[15,14,13,12] */
+	fmul	v18.4s, v18.4s, v20.4s	/* bs[23,22,21,20] */
+	fmul	v19.4s, v19.4s, v20.4s	/* bs[31,30,29,28] */
+	
+	ld1		{v28.4s}, [x4]
+	zip1	v4.2d, v0.2d, v16.2d	/* bs[0,1,7,6] */
+	zip2	v5.2d, v0.2d, v16.2d	/* bs[2,3,5,4] */
+	zip1	v6.2d, v1.2d, v17.2d	/* bs[8,9,15,14] */
+	zip2	v7.2d, v1.2d, v17.2d	/* bs[10,11,13,12] */
+	zip1	v20.2d, v2.2d, v18.2d	/* bs[16,17,23,22] */
+	zip2	v21.2d, v2.2d, v18.2d	/* bs[18,19,21,20] */
+	zip1	v22.2d, v3.2d, v19.2d	/* bs[24,25,31,30] */
+	zip2	v23.2d, v3.2d, v19.2d	/* bs[26,27,29,28] */
+	rev64	v5.4s, v5.4s			/* bs[3,2,4,5] */
+	rev64	v7.4s, v7.4s			/* bs[11,10,12,13] */
+	rev64	v21.4s, v21.4s			/* bs[19,18,20,21] */
+	rev64	v23.4s, v23.4s			/* bs[27,26,28,29] */
+	AARCH64_DUP_2D(v29, v28, 0)
+	AARCH64_DUP_4S(v28, v28, 2)
+	
+	fsub	v16.4s, v4.4s, v5.4s	
+	fsub	v17.4s, v6.4s, v7.4s
+	fsub	v18.4s, v20.4s, v21.4s
+	fsub	v19.4s, v22.4s, v23.4s
+	fadd	v0.4s, v4.4s, v5.4s		/* bs[32,33,36,37] */
+	fadd	v1.4s, v6.4s, v7.4s		/* bs[40,41,44,45] */
+	fadd	v2.4s, v20.4s, v21.4s	/* bs[48,49,52,53] */
+	fadd	v3.4s, v22.4s, v23.4s	/* bs[56,57,60,61] */
+	fmul	v16.4s, v16.4s, v29.4s	/* bs[35,34,39,38] */
+	fmul	v17.4s, v17.4s, v29.4s	/* bs[43,42,47,46] */
+	fmul	v18.4s, v18.4s, v29.4s	/* bs[51,50,55,54] */
+	fmul	v19.4s, v19.4s, v29.4s	/* bs[59,58,63,62] */
+	
+	uzp1	v4.4s, v0.4s, v16.4s	/* bs[32,36,35,39] */
+	uzp2	v5.4s, v0.4s, v16.4s	/* bs[33,37,34,38] */
+	uzp1	v6.4s, v1.4s, v17.4s	/* bs[40,44,43,47] */
+	uzp2	v7.4s, v1.4s, v17.4s	/* bs[41,45,42,46] */
+	uzp1	v20.4s, v2.4s, v18.4s	/* bs[48,52,51,55] */
+	uzp2	v21.4s, v2.4s, v18.4s	/* bs[49,53,50,54] */
+	uzp1	v22.4s, v3.4s, v19.4s	/* bs[56,60,59,63] */
+	uzp2	v23.4s, v3.4s, v19.4s	/* bs[57,61,58,62] */
+	
+	fsub	v16.4s, v4.4s, v5.4s
+	fsub	v17.4s, v6.4s, v7.4s
+	fsub	v18.4s, v20.4s, v21.4s
+	fsub	v19.4s, v22.4s, v23.4s
+	fadd	v0.4s, v4.4s, v5.4s		/* bs[0,4,2,6] */
+	fadd	v1.4s, v6.4s, v7.4s		/* bs[8,12,10,14] */
+	fadd	v2.4s, v20.4s, v21.4s	/* bs[16,20,18,22] */
+	fadd	v3.4s, v22.4s, v23.4s	/* bs[24,28,26,30] */
+	fmul	v16.4s, v16.4s, v28.4s	/* bs[1,5,3,7] */
+	fmul	v17.4s, v17.4s, v28.4s	/* bs[9,13,11,15] */
+	fmul	v18.4s, v18.4s, v28.4s	/* bs[17,21,19,23] */
+	fmul	v19.4s, v19.4s, v28.4s	/* bs[25,29,27,31] */
+	
+	zip2	v4.2d, v0.2d, v1.2d		/* bs[2,6,10,14] */
+	zip2	v5.2d, v16.2d, v17.2d	/* bs[3,7,11,15] */
+	zip2	v6.2d, v2.2d, v3.2d		/* bs[18,22,26,30] */
+	zip2	v7.2d, v18.2d, v19.2d	/* bs[19,23,27,31] */
+	fadd	v4.4s, v4.4s, v5.4s		/* bs[2,6,10,14] */
+	fadd	v6.4s, v6.4s, v7.4s		/* bs[18,22,26,30] */
+	ins		v0.d[1], v4.d[0]		/* bs[0,4,2,6] */
+	ins		v1.d[1], v4.d[1]		/* bs[8,12,10,14] */
+	ins		v2.d[1], v6.d[0]		/* bs[16,20,18,22] */
+	ins		v3.d[1], v6.d[1]		/* bs[24,28,26,30] */
+	
+	eor		v31.16b, v31.16b, v31.16b
+	zip1	v4.4s, v0.4s, v16.4s	/* bs[0,1,4,5] */
+	zip2	v5.4s, v0.4s, v16.4s	/* bs[2,3,6,7] */
+	zip1	v6.4s, v1.4s, v17.4s	/* bs[8,9,12,13] */
+	zip2	v7.4s, v1.4s, v17.4s	/* bs[10,11,14,15] */
+	zip1	v20.4s, v2.4s, v18.4s	/* bs[16,17,20,21] */
+	zip2	v21.4s, v2.4s, v18.4s	/* bs[18,19,22,23] */
+	zip1	v22.4s, v3.4s, v19.4s	/* bs[24,25,28,29] */
+	zip2	v23.4s, v3.4s, v19.4s	/* bs[26,27,30,31] */
+	zip1	v0.2d, v4.2d, v5.2d		/* bs[0,1,2,3] */
+	zip2	v1.2d, v4.2d, v5.2d		/* bs[4,5,6,7] */
+	zip1	v2.2d, v6.2d, v7.2d		/* bs[8,9,10,11] */
+	zip2	v3.2d, v6.2d, v7.2d		/* bs[12,13,14,15] */
+	rev64	v16.4s, v4.4s
+	rev64	v17.4s,	v6.4s
+	zip1	v24.2d, v7.2d, v17.2d
+	zip2	v16.2d, v5.2d, v16.2d
+	zip2	v17.2d, v7.2d, v17.2d
+	zip1	v4.2d, v20.2d, v21.2d	/* bs[16,17,18,19] */
+	zip2	v5.2d, v20.2d, v21.2d	/* bs[20,21,22,23] */
+	zip1	v6.2d, v22.2d, v23.2d	/* bs[24,25,26,27] */
+	zip2	v7.2d, v22.2d, v23.2d	/* bs[28,29,30,31] */
+	rev64	v18.4s, v20.4s
+	rev64	v19.4s, v22.4s
+	zip1	v25.2d, v23.2d, v19.2d
+	zip1	v26.2d, v21.2d, v18.2d
+	zip2	v18.2d, v21.2d, v18.2d
+	zip2	v19.2d, v23.2d, v19.2d
+	ins		v16.s[3], v31.s[0]		/* bs[6,7,5,-] */
+	ins		v17.s[3], v31.s[0]		/* bs[14,15,13,-] */
+	ins		v18.s[3], v31.s[0]		/* bs[22,23,21,-] */
+	ins		v19.s[3], v31.s[0]		/* bs[30,31,29,-] */
+	ins		v24.s[3], v31.s[0]		/* bs[10,11,9,-] */
+	ins		v25.s[3], v31.s[0]		/* bs[26,27,25,-] */
+	ins		v26.s[3], v31.s[0]		/* bs[18,19,17,-] */
+	
+	fadd	v1.4s, v1.4s, v16.4s
+	fadd	v3.4s, v3.4s, v17.4s
+	fadd	v5.4s, v5.4s, v18.4s
+	fadd	v7.4s, v7.4s, v19.4s
+	
+	fadd	v2.4s, v2.4s, v3.4s
+	fadd	v3.4s, v3.4s, v24.4s
+	fadd	v6.4s, v6.4s, v7.4s
+	fadd	v7.4s, v7.4s, v25.4s
+	
+	fadd	v4.4s, v4.4s, v6.4s
+	fadd	v6.4s, v6.4s, v5.4s
+	fadd	v5.4s, v5.4s, v7.4s
+	fadd	v7.4s, v7.4s, v26.4s
+	
+	fcvtns	v0.4s, v0.4s
+	fcvtns	v1.4s, v1.4s
+	fcvtns	v2.4s, v2.4s
+	fcvtns	v3.4s, v3.4s
+	fcvtns	v4.4s, v4.4s
+	fcvtns	v5.4s, v5.4s
+	fcvtns	v6.4s, v6.4s
+	fcvtns	v7.4s, v7.4s
+	sqxtn	v0.4h, v0.4s
+	sqxtn	v1.4h, v1.4s
+	sqxtn	v2.4h, v2.4s
+	sqxtn	v3.4h, v3.4s
+	sqxtn	v4.4h, v4.4s
+	sqxtn	v5.4h, v5.4s
+	sqxtn	v6.4h, v6.4s
+	sqxtn	v7.4h, v7.4s
+	
+	mov		x3, #32
+	st1		{v0.h}[1], [x0], x3
+	st1		{v7.h}[2], [x0], x3
+	st1		{v3.h}[2], [x0], x3
+	st1		{v5.h}[2], [x0], x3
+	st1		{v1.h}[2], [x0], x3
+	st1		{v6.h}[2], [x0], x3
+	st1		{v2.h}[2], [x0], x3
+	st1		{v4.h}[2], [x0], x3
+	st1		{v0.h}[2], [x0], x3
+	st1		{v7.h}[0], [x0], x3
+	st1		{v3.h}[0], [x0], x3
+	st1		{v5.h}[0], [x0], x3
+	st1		{v1.h}[0], [x0], x3
+	st1		{v6.h}[0], [x0], x3
+	st1		{v2.h}[0], [x0], x3
+	st1		{v4.h}[0], [x0], x3
+	st1		{v0.h}[0], [x0]
+	st1		{v0.h}[1], [x1], x3
+	st1		{v4.h}[1], [x1], x3
+	st1		{v2.h}[1], [x1], x3
+	st1		{v6.h}[1], [x1], x3
+	st1		{v1.h}[1], [x1], x3
+	st1		{v5.h}[1], [x1], x3
+	st1		{v3.h}[1], [x1], x3
+	st1		{v7.h}[1], [x1], x3
+	st1		{v0.h}[3], [x1], x3
+	st1		{v4.h}[3], [x1], x3
+	st1		{v2.h}[3], [x1], x3
+	st1		{v6.h}[3], [x1], x3
+	st1		{v1.h}[3], [x1], x3
+	st1		{v5.h}[3], [x1], x3
+	st1		{v3.h}[3], [x1], x3
+	st1		{v7.h}[3], [x1]
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64_neon64_float.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_neon64_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_neon64_float.S	(working copy)
@@ -0,0 +1,282 @@
+/*
+	dct64_neon64_float: NEON optimized dct64 for AArch64 (float output version)
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+costab_neon_aarch64:
+	.word 1056974725
+	.word 1057056395
+	.word 1057223771
+	.word 1057485416
+	.word 1057855544
+	.word 1058356026
+	.word 1059019886
+	.word 1059897405
+	.word 1061067246
+	.word 1062657950
+	.word 1064892987
+	.word 1066774581
+	.word 1069414683
+	.word 1073984175
+	.word 1079645762
+	.word 1092815430
+	.word 1057005197
+	.word 1057342072
+	.word 1058087743
+	.word 1059427869
+	.word 1061799040
+	.word 1065862217
+	.word 1071413542
+	.word 1084439708
+	.word 1057128951
+	.word 1058664893
+	.word 1063675095
+	.word 1076102863
+	.word 1057655764
+	.word 1067924853
+	.word 1060439283
+	.word 1060439283
+	.text
+	ALIGN4
+	.globl ASM_NAME(dct64_real_neon64)
+#ifdef __ELF__
+	.type ASM_NAME(dct64_real_neon64), %function
+#endif
+ASM_NAME(dct64_real_neon64):
+	add		x3, x2, #64
+	adrp	x4, AARCH64_PCREL_HI(costab_neon_aarch64)
+	add		x4, x4, AARCH64_PCREL_LO(costab_neon_aarch64)
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x2]
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x3]
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x4], #64
+	
+	rev64	v19.4s, v19.4s
+	rev64	v18.4s, v18.4s
+	rev64	v17.4s, v17.4s
+	rev64	v16.4s, v16.4s
+	ext		v4.16b, v19.16b, v19.16b, #8
+	ext		v5.16b, v18.16b, v18.16b, #8
+	ext		v6.16b, v17.16b, v17.16b, #8
+	ext		v7.16b, v16.16b, v16.16b, #8
+	
+	fsub	v16.4s, v3.4s, v7.4s
+	fsub	v17.4s, v2.4s, v6.4s
+	fsub	v18.4s, v1.4s, v5.4s
+	fsub	v19.4s, v0.4s, v4.4s
+	fadd	v0.4s, v0.4s, v4.4s		/* bs[0,1,2,3] */
+	fadd	v1.4s, v1.4s, v5.4s		/* bs[4,5,6,7] */
+	fadd	v2.4s, v2.4s, v6.4s		/* bs[8,9,10,11] */
+	fadd	v3.4s, v3.4s, v7.4s		/* bs[12,13,14,15] */
+	fmul	v16.4s, v16.4s, v23.4s	/* bs[19,18,17,16] */
+	fmul	v17.4s, v17.4s, v22.4s	/* bs[23,22,21,20] */
+	fmul	v18.4s, v18.4s, v21.4s	/* bs[27,26,25,24] */
+	fmul	v19.4s, v19.4s, v20.4s	/* bs[31,30,29,28] */
+	
+	ld1		{v20.4s, v21.4s}, [x4], #32
+	rev64	v22.4s, v3.4s
+	rev64	v23.4s, v2.4s
+	rev64	v24.4s, v16.4s
+	rev64	v25.4s, v17.4s
+	ext		v4.16b, v22.16b, v22.16b, #8	/* bs[15,14,13,12] */
+	ext		v5.16b, v23.16b, v23.16b, #8	/* bs[11,10,9,8] */
+	ext		v6.16b, v24.16b, v24.16b, #8	/* bs[16,17,18,19] */
+	ext		v7.16b, v25.16b, v25.16b, #8	/* bs[20,21,22,23] */
+	
+	fsub	v26.4s, v1.4s, v5.4s
+	fsub	v27.4s, v0.4s, v4.4s
+	fsub	v28.4s, v18.4s, v7.4s
+	fsub	v29.4s, v19.4s, v6.4s
+	fadd	v4.4s, v0.4s, v4.4s		/* bs[32,33,34,35] */
+	fadd	v5.4s, v1.4s, v5.4s		/* bs[36,37,38,39] */
+	fadd	v6.4s, v6.4s, v19.4s	/* bs[48,49,50,51] */
+	fadd	v7.4s, v7.4s, v18.4s	/* bs[52,53,54,55] */
+	fmul	v26.4s, v26.4s, v21.4s	/* bs[43,42,41,40] */
+	fmul	v27.4s, v27.4s, v20.4s	/* bs[47,46,45,44] */
+	fmul	v28.4s, v28.4s, v21.4s	/* bs[59,58,57,56] */
+	fmul	v29.4s, v29.4s, v20.4s	/* bs[63,62,61,60] */
+	
+	ld1		{v20.4s}, [x4], #16
+	rev64	v16.4s, v5.4s
+	rev64	v17.4s, v26.4s
+	rev64	v18.4s, v7.4s
+	rev64	v19.4s, v28.4s
+	ext		v0.16b, v16.16b, v16.16b, #8	/* bs[39,38,37,36] */
+	ext		v1.16b, v17.16b, v17.16b, #8	/* bs[40,41,42,43] */
+	ext		v2.16b, v18.16b, v18.16b, #8	/* bs[55,54,53,52] */
+	ext		v3.16b, v19.16b, v19.16b, #8	/* bs[56,57,58,59] */
+	
+	fsub	v16.4s, v4.4s, v0.4s
+	fsub	v17.4s, v27.4s, v1.4s
+	fsub	v18.4s, v6.4s, v2.4s
+	fsub	v19.4s, v29.4s, v3.4s
+	fadd	v0.4s, v4.4s, v0.4s		/* bs[0,1,2,3] */
+	fadd	v1.4s, v1.4s, v27.4s	/* bs[8,9,10,11] */
+	fadd	v2.4s, v6.4s, v2.4s		/* bs[16,17,18,19] */
+	fadd	v3.4s, v3.4s, v29.4s	/* bs[24,25,26,27] */
+	fmul	v16.4s, v16.4s, v20.4s	/* bs[7,6,5,4] */
+	fmul	v17.4s, v17.4s, v20.4s	/* bs[15,14,13,12] */
+	fmul	v18.4s, v18.4s, v20.4s	/* bs[23,22,21,20] */
+	fmul	v19.4s, v19.4s, v20.4s	/* bs[31,30,29,28] */
+	
+	ld1		{v28.4s}, [x4]
+	zip1	v4.2d, v0.2d, v16.2d	/* bs[0,1,7,6] */
+	zip2	v5.2d, v0.2d, v16.2d	/* bs[2,3,5,4] */
+	zip1	v6.2d, v1.2d, v17.2d	/* bs[8,9,15,14] */
+	zip2	v7.2d, v1.2d, v17.2d	/* bs[10,11,13,12] */
+	zip1	v20.2d, v2.2d, v18.2d	/* bs[16,17,23,22] */
+	zip2	v21.2d, v2.2d, v18.2d	/* bs[18,19,21,20] */
+	zip1	v22.2d, v3.2d, v19.2d	/* bs[24,25,31,30] */
+	zip2	v23.2d, v3.2d, v19.2d	/* bs[26,27,29,28] */
+	rev64	v5.4s, v5.4s			/* bs[3,2,4,5] */
+	rev64	v7.4s, v7.4s			/* bs[11,10,12,13] */
+	rev64	v21.4s, v21.4s			/* bs[19,18,20,21] */
+	rev64	v23.4s, v23.4s			/* bs[27,26,28,29] */
+	AARCH64_DUP_2D(v29, v28, 0)
+	AARCH64_DUP_4S(v28, v28, 2)
+	
+	fsub	v16.4s, v4.4s, v5.4s	
+	fsub	v17.4s, v6.4s, v7.4s
+	fsub	v18.4s, v20.4s, v21.4s
+	fsub	v19.4s, v22.4s, v23.4s
+	fadd	v0.4s, v4.4s, v5.4s		/* bs[32,33,36,37] */
+	fadd	v1.4s, v6.4s, v7.4s		/* bs[40,41,44,45] */
+	fadd	v2.4s, v20.4s, v21.4s	/* bs[48,49,52,53] */
+	fadd	v3.4s, v22.4s, v23.4s	/* bs[56,57,60,61] */
+	fmul	v16.4s, v16.4s, v29.4s	/* bs[35,34,39,38] */
+	fmul	v17.4s, v17.4s, v29.4s	/* bs[43,42,47,46] */
+	fmul	v18.4s, v18.4s, v29.4s	/* bs[51,50,55,54] */
+	fmul	v19.4s, v19.4s, v29.4s	/* bs[59,58,63,62] */
+	
+	uzp1	v4.4s, v0.4s, v16.4s	/* bs[32,36,35,39] */
+	uzp2	v5.4s, v0.4s, v16.4s	/* bs[33,37,34,38] */
+	uzp1	v6.4s, v1.4s, v17.4s	/* bs[40,44,43,47] */
+	uzp2	v7.4s, v1.4s, v17.4s	/* bs[41,45,42,46] */
+	uzp1	v20.4s, v2.4s, v18.4s	/* bs[48,52,51,55] */
+	uzp2	v21.4s, v2.4s, v18.4s	/* bs[49,53,50,54] */
+	uzp1	v22.4s, v3.4s, v19.4s	/* bs[56,60,59,63] */
+	uzp2	v23.4s, v3.4s, v19.4s	/* bs[57,61,58,62] */
+	
+	fsub	v16.4s, v4.4s, v5.4s
+	fsub	v17.4s, v6.4s, v7.4s
+	fsub	v18.4s, v20.4s, v21.4s
+	fsub	v19.4s, v22.4s, v23.4s
+	fadd	v0.4s, v4.4s, v5.4s		/* bs[0,4,2,6] */
+	fadd	v1.4s, v6.4s, v7.4s		/* bs[8,12,10,14] */
+	fadd	v2.4s, v20.4s, v21.4s	/* bs[16,20,18,22] */
+	fadd	v3.4s, v22.4s, v23.4s	/* bs[24,28,26,30] */
+	fmul	v16.4s, v16.4s, v28.4s	/* bs[1,5,3,7] */
+	fmul	v17.4s, v17.4s, v28.4s	/* bs[9,13,11,15] */
+	fmul	v18.4s, v18.4s, v28.4s	/* bs[17,21,19,23] */
+	fmul	v19.4s, v19.4s, v28.4s	/* bs[25,29,27,31] */
+	
+	zip2	v4.2d, v0.2d, v1.2d		/* bs[2,6,10,14] */
+	zip2	v5.2d, v16.2d, v17.2d	/* bs[3,7,11,15] */
+	zip2	v6.2d, v2.2d, v3.2d		/* bs[18,22,26,30] */
+	zip2	v7.2d, v18.2d, v19.2d	/* bs[19,23,27,31] */
+	fadd	v4.4s, v4.4s, v5.4s		/* bs[2,6,10,14] */
+	fadd	v6.4s, v6.4s, v7.4s		/* bs[18,22,26,30] */
+	ins		v0.d[1], v4.d[0]		/* bs[0,4,2,6] */
+	ins		v1.d[1], v4.d[1]		/* bs[8,12,10,14] */
+	ins		v2.d[1], v6.d[0]		/* bs[16,20,18,22] */
+	ins		v3.d[1], v6.d[1]		/* bs[24,28,26,30] */
+	
+	eor		v31.16b, v31.16b, v31.16b
+	zip1	v4.4s, v0.4s, v16.4s	/* bs[0,1,4,5] */
+	zip2	v5.4s, v0.4s, v16.4s	/* bs[2,3,6,7] */
+	zip1	v6.4s, v1.4s, v17.4s	/* bs[8,9,12,13] */
+	zip2	v7.4s, v1.4s, v17.4s	/* bs[10,11,14,15] */
+	zip1	v20.4s, v2.4s, v18.4s	/* bs[16,17,20,21] */
+	zip2	v21.4s, v2.4s, v18.4s	/* bs[18,19,22,23] */
+	zip1	v22.4s, v3.4s, v19.4s	/* bs[24,25,28,29] */
+	zip2	v23.4s, v3.4s, v19.4s	/* bs[26,27,30,31] */
+	zip1	v0.2d, v4.2d, v5.2d		/* bs[0,1,2,3] */
+	zip2	v1.2d, v4.2d, v5.2d		/* bs[4,5,6,7] */
+	zip1	v2.2d, v6.2d, v7.2d		/* bs[8,9,10,11] */
+	zip2	v3.2d, v6.2d, v7.2d		/* bs[12,13,14,15] */
+	rev64	v16.4s, v4.4s
+	rev64	v17.4s,	v6.4s
+	zip1	v24.2d, v7.2d, v17.2d
+	zip2	v16.2d, v5.2d, v16.2d
+	zip2	v17.2d, v7.2d, v17.2d
+	zip1	v4.2d, v20.2d, v21.2d	/* bs[16,17,18,19] */
+	zip2	v5.2d, v20.2d, v21.2d	/* bs[20,21,22,23] */
+	zip1	v6.2d, v22.2d, v23.2d	/* bs[24,25,26,27] */
+	zip2	v7.2d, v22.2d, v23.2d	/* bs[28,29,30,31] */
+	rev64	v18.4s, v20.4s
+	rev64	v19.4s, v22.4s
+	zip1	v25.2d, v23.2d, v19.2d
+	zip1	v26.2d, v21.2d, v18.2d
+	zip2	v18.2d, v21.2d, v18.2d
+	zip2	v19.2d, v23.2d, v19.2d
+	ins		v16.s[3], v31.s[0]		/* bs[6,7,5,-] */
+	ins		v17.s[3], v31.s[0]		/* bs[14,15,13,-] */
+	ins		v18.s[3], v31.s[0]		/* bs[22,23,21,-] */
+	ins		v19.s[3], v31.s[0]		/* bs[30,31,29,-] */
+	ins		v24.s[3], v31.s[0]		/* bs[10,11,9,-] */
+	ins		v25.s[3], v31.s[0]		/* bs[26,27,25,-] */
+	ins		v26.s[3], v31.s[0]		/* bs[18,19,17,-] */
+	
+	fadd	v1.4s, v1.4s, v16.4s
+	fadd	v3.4s, v3.4s, v17.4s
+	fadd	v5.4s, v5.4s, v18.4s
+	fadd	v7.4s, v7.4s, v19.4s
+	
+	fadd	v2.4s, v2.4s, v3.4s
+	fadd	v3.4s, v3.4s, v24.4s
+	fadd	v6.4s, v6.4s, v7.4s
+	fadd	v7.4s, v7.4s, v25.4s
+	
+	fadd	v4.4s, v4.4s, v6.4s
+	fadd	v6.4s, v6.4s, v5.4s
+	fadd	v5.4s, v5.4s, v7.4s
+	fadd	v7.4s, v7.4s, v26.4s
+	
+	mov		x3, #64
+	st1		{v0.s}[1], [x0], x3
+	st1		{v7.s}[2], [x0], x3
+	st1		{v3.s}[2], [x0], x3
+	st1		{v5.s}[2], [x0], x3
+	st1		{v1.s}[2], [x0], x3
+	st1		{v6.s}[2], [x0], x3
+	st1		{v2.s}[2], [x0], x3
+	st1		{v4.s}[2], [x0], x3
+	st1		{v0.s}[2], [x0], x3
+	st1		{v7.s}[0], [x0], x3
+	st1		{v3.s}[0], [x0], x3
+	st1		{v5.s}[0], [x0], x3
+	st1		{v1.s}[0], [x0], x3
+	st1		{v6.s}[0], [x0], x3
+	st1		{v2.s}[0], [x0], x3
+	st1		{v4.s}[0], [x0], x3
+	st1		{v0.s}[0], [x0]
+	st1		{v0.s}[1], [x1], x3
+	st1		{v4.s}[1], [x1], x3
+	st1		{v2.s}[1], [x1], x3
+	st1		{v6.s}[1], [x1], x3
+	st1		{v1.s}[1], [x1], x3
+	st1		{v5.s}[1], [x1], x3
+	st1		{v3.s}[1], [x1], x3
+	st1		{v7.s}[1], [x1], x3
+	st1		{v0.s}[3], [x1], x3
+	st1		{v4.s}[3], [x1], x3
+	st1		{v2.s}[3], [x1], x3
+	st1		{v6.s}[3], [x1], x3
+	st1		{v1.s}[3], [x1], x3
+	st1		{v5.s}[3], [x1], x3
+	st1		{v3.s}[3], [x1], x3
+	st1		{v7.s}[3], [x1]
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64_neon_float.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_neon_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_neon_float.S	(working copy)
@@ -0,0 +1,281 @@
+/*
+	dct64_neon_float: ARM NEON optimized dct64 (float output version)
+
+	copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+	.code 32
+#ifndef __APPLE__
+	.fpu neon
+#endif
+
+	.text
+	ALIGN16
+costab_arm:
+	.word 1056974725
+	.word 1057056395
+	.word 1057223771
+	.word 1057485416
+	.word 1057855544
+	.word 1058356026
+	.word 1059019886
+	.word 1059897405
+	.word 1061067246
+	.word 1062657950
+	.word 1064892987
+	.word 1066774581
+	.word 1069414683
+	.word 1073984175
+	.word 1079645762
+	.word 1092815430
+	.word 1057005197
+	.word 1057342072
+	.word 1058087743
+	.word 1059427869
+	.word 1061799040
+	.word 1065862217
+	.word 1071413542
+	.word 1084439708
+	.word 1057128951
+	.word 1058664893
+	.word 1063675095
+	.word 1076102863
+	.word 1057655764
+	.word 1067924853
+	.word 1060439283
+	.word 1060439283
+	ALIGN4
+	.globl ASM_NAME(dct64_real_neon)
+#ifdef __ELF__
+	.type ASM_NAME(dct64_real_neon), %function
+#endif
+ASM_NAME(dct64_real_neon):
+	vpush		{q4-q7}
+
+	adr			r3, costab_arm
+	vld1.32		{q0, q1}, [r2]!
+	vld1.32		{q2, q3}, [r2]!
+	vld1.32		{q4, q5}, [r2]!
+	vld1.32		{q6, q7}, [r2]
+	vld1.32		{q12, q13}, [r3, :128]!
+	vld1.32		{q14, q15}, [r3, :128]!
+
+	vrev64.32	q4, q4
+	vrev64.32	q5, q5
+	vrev64.32	q6, q6
+	vrev64.32	q7, q7
+	vswp		d8, d9
+	vswp		d10, d11
+	vswp		d12, d13
+	vswp		d14, d15
+
+	vsub.f32	q8, q0, q7
+	vsub.f32	q9, q1, q6
+	vsub.f32	q10, q2, q5
+	vsub.f32	q11, q3, q4
+	vadd.f32	q0, q0, q7
+	vadd.f32	q1, q1, q6
+	vadd.f32	q2, q2, q5
+	vadd.f32	q3, q3, q4
+	vmul.f32	q4, q8, q12
+	vmul.f32	q5, q9, q13
+	vmul.f32	q6, q10, q14
+	vmul.f32	q7, q11, q15
+
+	vld1.32		{q12, q13}, [r3, :128]!
+	vld1.32		{q14, q15}, [r3, :128]
+
+	vrev64.32	q2, q2
+	vrev64.32	q3, q3
+	vrev64.32	q6, q6
+	vrev64.32	q7, q7
+	vswp		d4, d5
+	vswp		d6, d7
+	vswp		d12, d13
+	vswp		d14, d15
+
+	vsub.f32	q8, q0, q3
+	vsub.f32	q9, q1, q2
+	vsub.f32	q10, q4, q7
+	vsub.f32	q11, q5, q6
+	vadd.f32	q0, q0, q3
+	vadd.f32	q1, q1, q2
+	vadd.f32	q4, q4, q7
+	vadd.f32	q5, q5, q6
+	vmul.f32	q2, q8, q12
+	vmul.f32	q3, q9, q13
+	vmul.f32	q6, q10, q12
+	vmul.f32	q7, q11, q13
+
+	vrev64.32	q1, q1
+	vrev64.32	q3, q3
+	vrev64.32	q5, q5
+	vrev64.32	q7, q7
+	vswp		d2, d3
+	vswp		d6, d7
+	vswp		d10, d11
+	vswp		d14, d15
+
+	vsub.f32	q8, q0, q1
+	vsub.f32	q9, q2, q3
+	vsub.f32	q10, q4, q5
+	vsub.f32	q11, q6, q7
+	vadd.f32	q0, q0, q1
+	vadd.f32	q2, q2, q3
+	vadd.f32	q4, q4, q5
+	vadd.f32	q6, q6, q7
+	vmul.f32	q1, q8, q14
+	vmul.f32	q3, q9, q14
+	vmul.f32	q5, q10, q14
+	vmul.f32	q7, q11, q14
+
+	vdup.32		q12, d31[0]
+	vmov		d31, d30
+
+	vswp		d1, d2
+	vswp		d5, d6
+	vswp		d9, d10
+	vswp		d13, d14
+	vrev64.32	q1, q1
+	vrev64.32	q3, q3
+	vrev64.32	q5, q5
+	vrev64.32	q7, q7
+
+	vsub.f32	q8, q0, q1
+	vsub.f32	q9, q2, q3
+	vsub.f32	q10, q4, q5
+	vsub.f32	q11, q6, q7
+	vadd.f32	q0, q0, q1
+	vadd.f32	q2, q2, q3
+	vadd.f32	q4, q4, q5
+	vadd.f32	q6, q6, q7
+	vmul.f32	q1, q8, q15
+	vmul.f32	q3, q9, q15
+	vmul.f32	q5, q10, q15
+	vmul.f32	q7, q11, q15
+
+	vtrn.32		q0, q1
+	vtrn.32		q2, q3
+	vtrn.32		q4, q5
+	vtrn.32		q6, q7
+
+	vsub.f32	q8, q0, q1
+	vsub.f32	q9, q2, q3
+	vsub.f32	q10, q4, q5
+	vsub.f32	q11, q6, q7
+	vadd.f32	q0, q0, q1
+	vadd.f32	q2, q2, q3
+	vadd.f32	q4, q4, q5
+	vadd.f32	q6, q6, q7
+	vmul.f32	q1, q8, q12
+	vmul.f32	q3, q9, q12
+	vmul.f32	q5, q10, q12
+	vmul.f32	q7, q11, q12
+
+	vtrn.32		q0, q1
+	vtrn.32		q2, q3
+	vtrn.32		q4, q5
+	vtrn.32		q6, q7
+	vswp		d1, d2
+	vswp		d5, d6
+	vswp		d9, d10
+	vswp		d13, d14
+
+	vshr.u64	d16, d1, #32
+	vshr.u64	d17, d3, #32
+	vshr.u64	d18, d5, #32
+	vshr.u64	d19, d7, #32
+	vadd.f32	d1, d1, d16
+	vadd.f32	d3, d3, d17
+	vadd.f32	d5, d5, d18
+	vadd.f32	d7, d7, d19
+	vshr.u64	d20, d9, #32
+	vshr.u64	d21, d11, #32
+	vshr.u64	d22, d13, #32
+	vshr.u64	d23, d15, #32
+	vadd.f32	d9, d9, d20
+	vadd.f32	d11, d11, d21
+	vadd.f32	d13, d13, d22
+	vadd.f32	d15, d15, d23
+
+	vshr.u64	d16, d2, #32
+	vshr.u64	d18, d6, #32
+	vshr.u64	d20, d10, #32
+	vshr.u64	d22, d14, #32
+	vext.8		q8, q1, q8, #8
+	vext.8		q9, q3, q9, #8
+	vext.8		q10, q5, q10, #8
+	vext.8		q11, q7, q11, #8
+	vadd.f32	q1, q1, q8
+	vadd.f32	q3, q3, q9
+	vadd.f32	q5, q5, q10
+	vadd.f32	q7, q7, q11
+
+	vshr.u64	d16, d4, #32
+	vshr.u64	d18, d12, #32
+	vext.8		q8, q2, q8, #8
+	vext.8		q9, q6, q9, #8
+	vadd.f32	q2, q2, q3
+	vadd.f32	q6, q6, q7
+	vadd.f32	q3, q3, q8
+	vadd.f32	q7, q7, q9
+
+	vrev64.32	q8, q4
+	vshr.u64	d19, d9, #32
+	vext.8		d17, d17, d16, #4
+	vswp		d9, d10
+	vswp		d13, d14
+	vtrn.32		q4, q5
+	vtrn.32		q6, q7
+	vmov		d16, d9
+	vmov		d18, d11
+
+	vadd.f32	q4, q6
+	vadd.f32	q5, q7
+	vadd.f32	q6, q8
+	vadd.f32	q7, q9
+
+	mov			r3, #64
+	vst1.32		{d0[1]}, [r0, :32], r3
+	vst1.32		{d13[1]}, [r0, :32], r3
+	vst1.32		{d7[0]}, [r0, :32], r3
+	vst1.32		{d9[1]}, [r0, :32], r3
+	vst1.32		{d3[0]}, [r0, :32], r3
+	vst1.32		{d12[1]}, [r0, :32], r3
+	vst1.32		{d5[0]}, [r0, :32], r3
+	vst1.32		{d8[1]}, [r0, :32], r3
+	vst1.32		{d1[0]}, [r0, :32], r3
+	vst1.32		{d13[0]}, [r0, :32], r3
+	vst1.32		{d6[0]}, [r0, :32], r3
+	vst1.32		{d9[0]}, [r0, :32], r3
+	vst1.32		{d2[0]}, [r0, :32], r3
+	vst1.32		{d12[0]}, [r0, :32], r3
+	vst1.32		{d4[0]}, [r0, :32], r3
+	vst1.32		{d8[0]}, [r0, :32], r3
+	vst1.32		{d0[0]}, [r0, :32]
+
+	vst1.32		{d0[1]}, [r1, :32], r3
+	vst1.32		{d10[0]}, [r1, :32], r3
+	vst1.32		{d4[1]}, [r1, :32], r3
+	vst1.32		{d14[0]}, [r1, :32], r3
+	vst1.32		{d2[1]}, [r1, :32], r3
+	vst1.32		{d11[0]}, [r1, :32], r3
+	vst1.32		{d6[1]}, [r1, :32], r3
+	vst1.32		{d15[0]}, [r1, :32], r3
+	vst1.32		{d1[1]}, [r1, :32], r3
+	vst1.32		{d10[1]}, [r1, :32], r3
+	vst1.32		{d5[1]}, [r1, :32], r3
+	vst1.32		{d14[1]}, [r1, :32], r3
+	vst1.32		{d3[1]}, [r1, :32], r3
+	vst1.32		{d11[1]}, [r1, :32], r3
+	vst1.32		{d7[1]}, [r1, :32], r3
+	vst1.32		{d15[1]}, [r1, :32]
+
+	vpop		{q4-q7}
+	bx			lr
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64_sse.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_sse.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_sse.S	(working copy)
@@ -0,0 +1,454 @@
+/*
+	dct64_sse: MMX/SSE optimized dct64
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define ARG(n) (8+n*4)(%ebp)
+#define TEMP(n) (4+n*16)(%esp)
+#define TEMP_BYTE(n) (4+n)(%esp)
+
+/*
+	void dct64_sse(short *out0, short *out1, real *samples);
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+pnpn:
+	.long	0
+	.long	-2147483648
+	.long	0
+	.long	-2147483648
+	ALIGN16
+mask:
+	.long	-1
+	.long	-1
+	.long	-1
+	.long	0
+	
+	.text
+	ALIGN16
+.globl ASM_NAME(dct64_sse)
+ASM_NAME(dct64_sse):
+	pushl		%ebp
+	movl		%esp, %ebp
+	
+	andl		$-16, %esp /* align the stack at 16 bytes */
+	subl		$128, %esp /* reserve space for temporal store */
+	pushl		%ebx
+	
+	movl		ARG(0), %ecx
+	movl		ARG(1), %ebx
+	movl		ARG(2), %eax
+	
+	MOVUAPS 	(%eax), %xmm7
+	MOVUAPS 	16(%eax), %xmm6
+	MOVUAPS 	112(%eax), %xmm0
+	MOVUAPS 	96(%eax), %xmm1
+	shufps 		$0x1b, %xmm0, %xmm0
+	shufps 		$0x1b, %xmm1, %xmm1
+	movaps 		%xmm7, %xmm4
+	movaps		%xmm6, %xmm5
+	addps 		%xmm0, %xmm4
+	addps 		%xmm1, %xmm5
+	subps 		%xmm0, %xmm7
+	subps 		%xmm1, %xmm6
+	movaps		%xmm4, TEMP(0)
+	movaps		%xmm5, TEMP(1)
+	
+	MOVUAPS 	32(%eax), %xmm2
+	MOVUAPS 	48(%eax), %xmm3
+	MOVUAPS 	80(%eax), %xmm0
+	MOVUAPS 	64(%eax), %xmm1
+	shufps 		$0x1b, %xmm0, %xmm0
+	shufps 		$0x1b, %xmm1, %xmm1
+	movaps 		%xmm2, %xmm5
+	movaps		%xmm3, %xmm4
+	addps 		%xmm0, %xmm2
+	addps 		%xmm1, %xmm3
+	subps 		%xmm0, %xmm5
+	subps 		%xmm1, %xmm4
+	
+	mulps		ASM_NAME(costab_mmxsse), %xmm7
+	mulps		ASM_NAME(costab_mmxsse)+16, %xmm6
+	mulps		ASM_NAME(costab_mmxsse)+32, %xmm5
+	mulps		ASM_NAME(costab_mmxsse)+48, %xmm4
+	
+	shufps		$0x1b, %xmm2, %xmm2
+	shufps		$0x1b, %xmm3, %xmm3
+	shufps		$0x1b, %xmm4, %xmm4
+	shufps		$0x1b, %xmm5, %xmm5
+	movaps		TEMP(0), %xmm0
+	movaps		TEMP(1), %xmm1
+	subps		%xmm3, %xmm0
+	subps		%xmm2, %xmm1
+	addps		TEMP(0), %xmm3
+	addps		TEMP(1), %xmm2
+	movaps		%xmm3, TEMP(0)
+	movaps		%xmm2, TEMP(1)
+	movaps		%xmm6, %xmm2
+	movaps		%xmm7, %xmm3
+	subps		%xmm5, %xmm6
+	subps		%xmm4, %xmm7
+	addps		%xmm3, %xmm4
+	addps		%xmm2, %xmm5
+	mulps		ASM_NAME(costab_mmxsse)+64, %xmm0
+	mulps		ASM_NAME(costab_mmxsse)+80, %xmm1
+	mulps		ASM_NAME(costab_mmxsse)+80, %xmm6
+	mulps		ASM_NAME(costab_mmxsse)+64, %xmm7
+	
+	movaps		TEMP(0), %xmm2
+	movaps		TEMP(1), %xmm3
+	shufps		$0x1b, %xmm3, %xmm3
+	shufps		$0x1b, %xmm5, %xmm5
+	shufps		$0x1b, %xmm1, %xmm1
+	shufps		$0x1b, %xmm6, %xmm6
+	movaps		%xmm0, TEMP(1)
+	subps		%xmm3, %xmm2
+	subps		%xmm1, %xmm0
+	addps		TEMP(0), %xmm3
+	addps		TEMP(1), %xmm1
+	movaps		%xmm3, TEMP(0)
+	movaps		%xmm1, TEMP(2)
+	movaps		%xmm5, %xmm1
+	movaps		%xmm4, %xmm5
+	movaps		%xmm7, %xmm3
+	subps		%xmm1, %xmm5
+	subps		%xmm6, %xmm7
+	addps		%xmm1, %xmm4
+	addps		%xmm3, %xmm6
+	mulps		ASM_NAME(costab_mmxsse)+96, %xmm2
+	mulps		ASM_NAME(costab_mmxsse)+96, %xmm0
+	mulps		ASM_NAME(costab_mmxsse)+96, %xmm5
+	mulps		ASM_NAME(costab_mmxsse)+96, %xmm7
+	movaps		%xmm2, TEMP(1)
+	movaps		%xmm0, TEMP(3)
+	
+	movaps		%xmm4, %xmm2
+	movaps		%xmm5, %xmm3
+	shufps		$0x44, %xmm6, %xmm2
+	shufps		$0xbb, %xmm7, %xmm5
+	shufps		$0xbb, %xmm6, %xmm4
+	shufps		$0x44, %xmm7, %xmm3
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	subps		%xmm4, %xmm2
+	subps		%xmm5, %xmm3
+	addps		%xmm6, %xmm4
+	addps		%xmm7, %xmm5
+	movaps		ASM_NAME(costab_mmxsse)+112, %xmm0
+	movlhps		%xmm0, %xmm0
+	mulps		%xmm0, %xmm2
+	mulps		%xmm0, %xmm3
+	movaps		%xmm0, TEMP(4)
+	movaps		%xmm4, %xmm6
+	movaps		%xmm5, %xmm7
+	shufps		$0x14, %xmm2, %xmm4
+	shufps		$0xbe, %xmm2, %xmm6
+	shufps		$0x14, %xmm3, %xmm5
+	shufps		$0xbe, %xmm3, %xmm7
+	movaps		%xmm5, TEMP(5)
+	movaps		%xmm7, TEMP(7)
+	
+	movaps		TEMP(0), %xmm0
+	movaps		TEMP(1), %xmm1
+	movaps		%xmm0, %xmm2
+	movaps		%xmm1, %xmm3
+	shufps		$0x44, TEMP(2), %xmm2
+	shufps		$0xbb, TEMP(3), %xmm1
+	shufps		$0xbb, TEMP(2), %xmm0
+	shufps		$0x44, TEMP(3), %xmm3
+	movaps		%xmm2, %xmm5
+	movaps		%xmm3, %xmm7
+	subps		%xmm0, %xmm2
+	subps		%xmm1, %xmm3
+	addps		%xmm5, %xmm0
+	addps		%xmm7, %xmm1
+	mulps		TEMP(4), %xmm2
+	mulps		TEMP(4), %xmm3
+	movaps		%xmm0, %xmm5
+	movaps		%xmm1, %xmm7
+	shufps		$0x14, %xmm2, %xmm0
+	shufps		$0xbe, %xmm2, %xmm5
+	shufps		$0x14, %xmm3, %xmm1
+	shufps		$0xbe, %xmm3, %xmm7
+	
+	movaps		%xmm0, TEMP(0)
+	movaps		%xmm1, TEMP(1)
+	movaps		%xmm5, TEMP(2)
+	movaps		%xmm7, TEMP(3)
+	
+	movss		ASM_NAME(costab_mmxsse)+120, %xmm5
+	shufps		$0x00, %xmm5, %xmm5
+	xorps		pnpn, %xmm5
+	
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	TEMP(5), %xmm4
+	unpckhps	TEMP(5), %xmm0
+	unpcklps	TEMP(7), %xmm6
+	unpckhps	TEMP(7), %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm6, %xmm3
+	unpcklps	%xmm0, %xmm4
+	unpckhps	%xmm0, %xmm2
+	unpcklps	%xmm1, %xmm6
+	unpckhps	%xmm1, %xmm3
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	subps		%xmm2, %xmm0
+	subps		%xmm3, %xmm1
+	addps		%xmm2, %xmm4
+	addps		%xmm3, %xmm6
+	mulps		%xmm5, %xmm0
+	mulps		%xmm5, %xmm1
+	movaps		%xmm5, TEMP(5)
+	movaps		%xmm4, %xmm5
+	movaps		%xmm6, %xmm7
+	unpcklps	%xmm0, %xmm4
+	unpckhps	%xmm0, %xmm5
+	unpcklps	%xmm1, %xmm6
+	unpckhps	%xmm1, %xmm7
+	
+	movaps		TEMP(0), %xmm0
+	movaps		TEMP(2), %xmm2
+	movaps		%xmm4, TEMP(4)
+	movaps		%xmm6, TEMP(6)
+	
+	movaps		%xmm0, %xmm4
+	movaps		%xmm2, %xmm6
+	unpcklps	TEMP(1), %xmm0
+	unpckhps	TEMP(1), %xmm4
+	unpcklps	TEMP(3), %xmm2
+	unpckhps	TEMP(3), %xmm6
+	movaps		%xmm0, %xmm1
+	movaps		%xmm2, %xmm3
+	unpcklps	%xmm4, %xmm0
+	unpckhps	%xmm4, %xmm1
+	unpcklps	%xmm6, %xmm2
+	unpckhps	%xmm6, %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm2, %xmm6
+	subps		%xmm1, %xmm4
+	subps		%xmm3, %xmm6
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	mulps		TEMP(5), %xmm4
+	mulps		TEMP(5), %xmm6
+	movaps		%xmm0, %xmm1
+	movaps		%xmm2, %xmm3
+	unpcklps	%xmm4, %xmm0
+	unpckhps	%xmm4, %xmm1
+	unpcklps	%xmm6, %xmm2
+	unpckhps	%xmm6, %xmm3
+	
+	movaps		%xmm0, TEMP(0)
+	movaps		%xmm1, TEMP(1)
+	movaps		%xmm2, TEMP(2)
+	movaps		%xmm3, TEMP(3)
+	movaps		%xmm5, TEMP(5)
+	movaps		%xmm7, TEMP(7)
+	
+	movss		TEMP_BYTE(12), %xmm0
+	movss		TEMP_BYTE(28), %xmm1
+	movss		TEMP_BYTE(44), %xmm2
+	movss		TEMP_BYTE(60), %xmm3
+	addss		TEMP_BYTE(8), %xmm0
+	addss		TEMP_BYTE(24), %xmm1
+	addss		TEMP_BYTE(40), %xmm2
+	addss		TEMP_BYTE(56), %xmm3
+	movss		%xmm0, TEMP_BYTE(8)
+	movss		%xmm1, TEMP_BYTE(24)
+	movss		%xmm2, TEMP_BYTE(40)
+	movss		%xmm3, TEMP_BYTE(56)
+	movss		TEMP_BYTE(76), %xmm0
+	movss		TEMP_BYTE(92), %xmm1
+	movss		TEMP_BYTE(108), %xmm2
+	movss		TEMP_BYTE(124), %xmm3
+	addss		TEMP_BYTE(72), %xmm0
+	addss		TEMP_BYTE(88), %xmm1
+	addss		TEMP_BYTE(104), %xmm2
+	addss		TEMP_BYTE(120), %xmm3
+	movss		%xmm0, TEMP_BYTE(72)
+	movss		%xmm1, TEMP_BYTE(88)
+	movss		%xmm2, TEMP_BYTE(104)
+	movss		%xmm3, TEMP_BYTE(120)
+	
+	movaps		TEMP_BYTE(16), %xmm1
+	movaps		TEMP_BYTE(48), %xmm3
+	movaps		TEMP_BYTE(80), %xmm5
+	movaps		TEMP_BYTE(112), %xmm7
+	movaps		%xmm1, %xmm0
+	movaps		%xmm3, %xmm2
+	movaps		%xmm5, %xmm4
+	movaps		%xmm7, %xmm6
+	shufps		$0x1e, %xmm0, %xmm0
+	shufps		$0x1e, %xmm2, %xmm2
+	shufps		$0x1e, %xmm4, %xmm4
+	shufps		$0x1e, %xmm6, %xmm6
+	andps		mask, %xmm0
+	andps		mask, %xmm2
+	andps		mask, %xmm4
+	andps		mask, %xmm6
+	addps		%xmm0, %xmm1
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm5
+	addps		%xmm6, %xmm7
+	
+	movaps		TEMP_BYTE(32), %xmm2
+	movaps		TEMP_BYTE(96), %xmm6
+	movaps		%xmm2, %xmm0
+	movaps		%xmm6, %xmm4
+	shufps		$0x1e, %xmm0, %xmm0
+	shufps		$0x1e, %xmm4, %xmm4
+	andps		mask, %xmm0
+	andps		mask, %xmm4
+	addps		%xmm3, %xmm2
+	addps		%xmm0, %xmm3
+	addps		%xmm7, %xmm6
+	addps		%xmm4, %xmm7
+	
+	movaps		TEMP_BYTE(0), %xmm0
+	movaps		TEMP_BYTE(64), %xmm4
+	
+	cvtps2pi	%xmm0, %mm0
+	cvtps2pi	%xmm1, %mm1
+	movhlps		%xmm0, %xmm0
+	movhlps		%xmm1, %xmm1
+	cvtps2pi	%xmm0, %mm2
+	cvtps2pi	%xmm1, %mm3
+	packssdw	%mm2, %mm0
+	packssdw	%mm3, %mm1
+	
+	cvtps2pi	%xmm2, %mm2
+	cvtps2pi	%xmm3, %mm3
+	movhlps		%xmm2, %xmm2
+	movhlps		%xmm3, %xmm3
+	cvtps2pi	%xmm2, %mm4
+	cvtps2pi	%xmm3, %mm5
+	packssdw	%mm4, %mm2
+	packssdw	%mm5, %mm3
+	
+	movd		%mm0, %eax
+	movd		%mm1, %edx
+	movw		%ax, 512(%ecx)
+	movw		%dx, 384(%ecx)
+	shrl		$16, %eax
+	shrl		$16, %edx
+	movw		%ax, (%ecx)
+	movw		%ax, (%ebx)
+	movw		%dx, 128(%ebx)
+	
+	movd		%mm2, %eax
+	movd		%mm3, %edx
+	movw		%ax, 448(%ecx)
+	movw		%dx, 320(%ecx)
+	shrl		$16, %eax
+	shrl		$16, %edx
+	movw		%ax, 64(%ebx)
+	movw		%dx, 192(%ebx)
+	
+	psrlq		$32, %mm0
+	psrlq		$32, %mm1
+	movd		%mm0, %eax
+	movd		%mm1, %edx
+	movw		%ax, 256(%ecx)
+	movw		%dx, 128(%ecx)
+	shrl		$16, %eax
+	shrl		$16, %edx
+	movw		%ax, 256(%ebx)
+	movw		%dx, 384(%ebx)
+	
+	psrlq		$32, %mm2
+	psrlq		$32, %mm3
+	movd		%mm2, %eax
+	movd		%mm3, %edx
+	movw		%ax, 192(%ecx)
+	movw		%dx, 64(%ecx)
+	shrl		$16, %eax
+	shrl		$16, %edx
+	movw		%ax, 320(%ebx)
+	movw		%dx, 448(%ebx)
+	
+	movaps		%xmm4, %xmm0
+	shufps		$0x1e, %xmm0, %xmm0
+	movaps		%xmm5, %xmm1
+	andps		mask, %xmm0
+	
+	addps		%xmm6, %xmm4
+	addps		%xmm7, %xmm5
+	addps		%xmm1, %xmm6
+	addps		%xmm0, %xmm7
+	
+	cvtps2pi	%xmm4, %mm0
+	cvtps2pi	%xmm5, %mm1
+	movhlps		%xmm4, %xmm4
+	movhlps		%xmm5, %xmm5
+	cvtps2pi	%xmm4, %mm2
+	cvtps2pi	%xmm5, %mm3
+	packssdw	%mm2, %mm0
+	packssdw	%mm3, %mm1
+	
+	cvtps2pi	%xmm6, %mm2
+	cvtps2pi	%xmm7, %mm3
+	movhlps		%xmm6, %xmm6
+	movhlps		%xmm7, %xmm7
+	cvtps2pi	%xmm6, %mm4
+	cvtps2pi	%xmm7, %mm5
+	packssdw	%mm4, %mm2
+	packssdw	%mm5, %mm3
+	
+	movd		%mm0, %eax
+	movd		%mm2, %edx
+	movw		%ax, 480(%ecx)
+	movw		%dx, 416(%ecx)
+	shrl		$16, %eax
+	shrl		$16, %edx
+	movw		%ax, 32(%ebx)
+	movw		%dx, 96(%ebx)
+	
+	psrlq		$32, %mm0
+	psrlq		$32, %mm2
+	movd		%mm0, %eax
+	movd		%mm2, %edx
+	movw		%ax, 224(%ecx)
+	movw		%dx, 160(%ecx)
+	shrl		$16, %eax
+	shrl		$16, %edx
+	movw		%ax, 288(%ebx)
+	movw		%dx, 352(%ebx)
+	
+	movd		%mm1, %eax
+	movd		%mm3, %edx
+	movw		%ax, 352(%ecx)
+	movw		%dx, 288(%ecx)
+	shrl		$16, %eax
+	shrl		$16, %edx
+	movw		%ax, 160(%ebx)
+	movw		%dx, 224(%ebx)
+	
+	psrlq		$32, %mm1
+	psrlq		$32, %mm3
+	movd		%mm1, %eax
+	movd		%mm3, %edx
+	movw		%ax, 96(%ecx)
+	movw		%dx, 32(%ecx)
+	shrl		$16, %eax
+	shrl		$16, %edx
+	movw		%ax, 416(%ebx)
+	movw		%dx, 480(%ebx)
+	
+	popl		%ebx
+	movl		%ebp, %esp
+	popl		%ebp
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64_sse_float.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_sse_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_sse_float.S	(working copy)
@@ -0,0 +1,401 @@
+/*
+	dct64_sse_float: SSE optimized dct64 (float output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define ARG(n) (8+n*4)(%ebp)
+#define TEMP(n) (4+n*16)(%esp)
+#define TEMP_BYTE(n) (4+n)(%esp)
+
+/*
+	void dct64_real_sse(real *out0, real *out1, real *samples);
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+pnpn:
+	.long	0
+	.long	-2147483648
+	.long	0
+	.long	-2147483648
+	ALIGN16
+mask:
+	.long	-1
+	.long	-1
+	.long	-1
+	.long	0
+	
+	.text
+	ALIGN16
+.globl ASM_NAME(dct64_real_sse)
+ASM_NAME(dct64_real_sse):
+	pushl		%ebp
+	movl		%esp, %ebp
+	
+	andl		$-16, %esp /* align the stack at 16 bytes */
+	subl		$128, %esp /* reserve space for temporal store */
+	pushl		%ebx
+	
+	movl		ARG(0), %ecx
+	movl		ARG(1), %ebx
+	movl		ARG(2), %eax
+	
+	MOVUAPS 	(%eax), %xmm7
+	MOVUAPS 	16(%eax), %xmm6
+	MOVUAPS 	112(%eax), %xmm0
+	MOVUAPS 	96(%eax), %xmm1
+	shufps 		$0x1b, %xmm0, %xmm0
+	shufps 		$0x1b, %xmm1, %xmm1
+	movaps 		%xmm7, %xmm4
+	movaps		%xmm6, %xmm5
+	addps 		%xmm0, %xmm4
+	addps 		%xmm1, %xmm5
+	subps 		%xmm0, %xmm7
+	subps 		%xmm1, %xmm6
+	movaps		%xmm4, TEMP(0)
+	movaps		%xmm5, TEMP(1)
+	
+	MOVUAPS 	32(%eax), %xmm2
+	MOVUAPS 	48(%eax), %xmm3
+	MOVUAPS 	80(%eax), %xmm0
+	MOVUAPS 	64(%eax), %xmm1
+	shufps 		$0x1b, %xmm0, %xmm0
+	shufps 		$0x1b, %xmm1, %xmm1
+	movaps 		%xmm2, %xmm5
+	movaps		%xmm3, %xmm4
+	addps 		%xmm0, %xmm2
+	addps 		%xmm1, %xmm3
+	subps 		%xmm0, %xmm5
+	subps 		%xmm1, %xmm4
+	
+	mulps		ASM_NAME(costab_mmxsse), %xmm7
+	mulps		ASM_NAME(costab_mmxsse)+16, %xmm6
+	mulps		ASM_NAME(costab_mmxsse)+32, %xmm5
+	mulps		ASM_NAME(costab_mmxsse)+48, %xmm4
+	
+	shufps		$0x1b, %xmm2, %xmm2
+	shufps		$0x1b, %xmm3, %xmm3
+	shufps		$0x1b, %xmm4, %xmm4
+	shufps		$0x1b, %xmm5, %xmm5
+	movaps		TEMP(0), %xmm0
+	movaps		TEMP(1), %xmm1
+	subps		%xmm3, %xmm0
+	subps		%xmm2, %xmm1
+	addps		TEMP(0), %xmm3
+	addps		TEMP(1), %xmm2
+	movaps		%xmm3, TEMP(0)
+	movaps		%xmm2, TEMP(1)
+	movaps		%xmm6, %xmm2
+	movaps		%xmm7, %xmm3
+	subps		%xmm5, %xmm6
+	subps		%xmm4, %xmm7
+	addps		%xmm3, %xmm4
+	addps		%xmm2, %xmm5
+	mulps		ASM_NAME(costab_mmxsse)+64, %xmm0
+	mulps		ASM_NAME(costab_mmxsse)+80, %xmm1
+	mulps		ASM_NAME(costab_mmxsse)+80, %xmm6
+	mulps		ASM_NAME(costab_mmxsse)+64, %xmm7
+	
+	movaps		TEMP(0), %xmm2
+	movaps		TEMP(1), %xmm3
+	shufps		$0x1b, %xmm3, %xmm3
+	shufps		$0x1b, %xmm5, %xmm5
+	shufps		$0x1b, %xmm1, %xmm1
+	shufps		$0x1b, %xmm6, %xmm6
+	movaps		%xmm0, TEMP(1)
+	subps		%xmm3, %xmm2
+	subps		%xmm1, %xmm0
+	addps		TEMP(0), %xmm3
+	addps		TEMP(1), %xmm1
+	movaps		%xmm3, TEMP(0)
+	movaps		%xmm1, TEMP(2)
+	movaps		%xmm5, %xmm1
+	movaps		%xmm4, %xmm5
+	movaps		%xmm7, %xmm3
+	subps		%xmm1, %xmm5
+	subps		%xmm6, %xmm7
+	addps		%xmm1, %xmm4
+	addps		%xmm3, %xmm6
+	mulps		ASM_NAME(costab_mmxsse)+96, %xmm2
+	mulps		ASM_NAME(costab_mmxsse)+96, %xmm0
+	mulps		ASM_NAME(costab_mmxsse)+96, %xmm5
+	mulps		ASM_NAME(costab_mmxsse)+96, %xmm7
+	movaps		%xmm2, TEMP(1)
+	movaps		%xmm0, TEMP(3)
+	
+	movaps		%xmm4, %xmm2
+	movaps		%xmm5, %xmm3
+	shufps		$0x44, %xmm6, %xmm2
+	shufps		$0xbb, %xmm7, %xmm5
+	shufps		$0xbb, %xmm6, %xmm4
+	shufps		$0x44, %xmm7, %xmm3
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	subps		%xmm4, %xmm2
+	subps		%xmm5, %xmm3
+	addps		%xmm6, %xmm4
+	addps		%xmm7, %xmm5
+	movaps		ASM_NAME(costab_mmxsse)+112, %xmm0
+	movlhps		%xmm0, %xmm0
+	mulps		%xmm0, %xmm2
+	mulps		%xmm0, %xmm3
+	movaps		%xmm0, TEMP(4)
+	movaps		%xmm4, %xmm6
+	movaps		%xmm5, %xmm7
+	shufps		$0x14, %xmm2, %xmm4
+	shufps		$0xbe, %xmm2, %xmm6
+	shufps		$0x14, %xmm3, %xmm5
+	shufps		$0xbe, %xmm3, %xmm7
+	movaps		%xmm5, TEMP(5)
+	movaps		%xmm7, TEMP(7)
+	
+	movaps		TEMP(0), %xmm0
+	movaps		TEMP(1), %xmm1
+	movaps		%xmm0, %xmm2
+	movaps		%xmm1, %xmm3
+	shufps		$0x44, TEMP(2), %xmm2
+	shufps		$0xbb, TEMP(3), %xmm1
+	shufps		$0xbb, TEMP(2), %xmm0
+	shufps		$0x44, TEMP(3), %xmm3
+	movaps		%xmm2, %xmm5
+	movaps		%xmm3, %xmm7
+	subps		%xmm0, %xmm2
+	subps		%xmm1, %xmm3
+	addps		%xmm5, %xmm0
+	addps		%xmm7, %xmm1
+	mulps		TEMP(4), %xmm2
+	mulps		TEMP(4), %xmm3
+	movaps		%xmm0, %xmm5
+	movaps		%xmm1, %xmm7
+	shufps		$0x14, %xmm2, %xmm0
+	shufps		$0xbe, %xmm2, %xmm5
+	shufps		$0x14, %xmm3, %xmm1
+	shufps		$0xbe, %xmm3, %xmm7
+	
+	movaps		%xmm0, TEMP(0)
+	movaps		%xmm1, TEMP(1)
+	movaps		%xmm5, TEMP(2)
+	movaps		%xmm7, TEMP(3)
+	
+	movss		ASM_NAME(costab_mmxsse)+120, %xmm5
+	shufps		$0x00, %xmm5, %xmm5
+	xorps		pnpn, %xmm5
+	
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	TEMP(5), %xmm4
+	unpckhps	TEMP(5), %xmm0
+	unpcklps	TEMP(7), %xmm6
+	unpckhps	TEMP(7), %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm6, %xmm3
+	unpcklps	%xmm0, %xmm4
+	unpckhps	%xmm0, %xmm2
+	unpcklps	%xmm1, %xmm6
+	unpckhps	%xmm1, %xmm3
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	subps		%xmm2, %xmm0
+	subps		%xmm3, %xmm1
+	addps		%xmm2, %xmm4
+	addps		%xmm3, %xmm6
+	mulps		%xmm5, %xmm0
+	mulps		%xmm5, %xmm1
+	movaps		%xmm5, TEMP(5)
+	movaps		%xmm4, %xmm5
+	movaps		%xmm6, %xmm7
+	unpcklps	%xmm0, %xmm4
+	unpckhps	%xmm0, %xmm5
+	unpcklps	%xmm1, %xmm6
+	unpckhps	%xmm1, %xmm7
+	
+	movaps		TEMP(0), %xmm0
+	movaps		TEMP(2), %xmm2
+	movaps		%xmm4, TEMP(4)
+	movaps		%xmm6, TEMP(6)
+	
+	movaps		%xmm0, %xmm4
+	movaps		%xmm2, %xmm6
+	unpcklps	TEMP(1), %xmm0
+	unpckhps	TEMP(1), %xmm4
+	unpcklps	TEMP(3), %xmm2
+	unpckhps	TEMP(3), %xmm6
+	movaps		%xmm0, %xmm1
+	movaps		%xmm2, %xmm3
+	unpcklps	%xmm4, %xmm0
+	unpckhps	%xmm4, %xmm1
+	unpcklps	%xmm6, %xmm2
+	unpckhps	%xmm6, %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm2, %xmm6
+	subps		%xmm1, %xmm4
+	subps		%xmm3, %xmm6
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	mulps		TEMP(5), %xmm4
+	mulps		TEMP(5), %xmm6
+	movaps		%xmm0, %xmm1
+	movaps		%xmm2, %xmm3
+	unpcklps	%xmm4, %xmm0
+	unpckhps	%xmm4, %xmm1
+	unpcklps	%xmm6, %xmm2
+	unpckhps	%xmm6, %xmm3
+	
+	movaps		%xmm0, TEMP(0)
+	movaps		%xmm1, TEMP(1)
+	movaps		%xmm2, TEMP(2)
+	movaps		%xmm3, TEMP(3)
+	movaps		%xmm5, TEMP(5)
+	movaps		%xmm7, TEMP(7)
+	
+	movss		TEMP_BYTE(12), %xmm0
+	movss		TEMP_BYTE(28), %xmm1
+	movss		TEMP_BYTE(44), %xmm2
+	movss		TEMP_BYTE(60), %xmm3
+	addss		TEMP_BYTE(8), %xmm0
+	addss		TEMP_BYTE(24), %xmm1
+	addss		TEMP_BYTE(40), %xmm2
+	addss		TEMP_BYTE(56), %xmm3
+	movss		%xmm0, TEMP_BYTE(8)
+	movss		%xmm1, TEMP_BYTE(24)
+	movss		%xmm2, TEMP_BYTE(40)
+	movss		%xmm3, TEMP_BYTE(56)
+	movss		TEMP_BYTE(76), %xmm0
+	movss		TEMP_BYTE(92), %xmm1
+	movss		TEMP_BYTE(108), %xmm2
+	movss		TEMP_BYTE(124), %xmm3
+	addss		TEMP_BYTE(72), %xmm0
+	addss		TEMP_BYTE(88), %xmm1
+	addss		TEMP_BYTE(104), %xmm2
+	addss		TEMP_BYTE(120), %xmm3
+	movss		%xmm0, TEMP_BYTE(72)
+	movss		%xmm1, TEMP_BYTE(88)
+	movss		%xmm2, TEMP_BYTE(104)
+	movss		%xmm3, TEMP_BYTE(120)
+	
+	movaps		TEMP_BYTE(16), %xmm1
+	movaps		TEMP_BYTE(48), %xmm3
+	movaps		TEMP_BYTE(80), %xmm5
+	movaps		TEMP_BYTE(112), %xmm7
+	movaps		%xmm1, %xmm0
+	movaps		%xmm3, %xmm2
+	movaps		%xmm5, %xmm4
+	movaps		%xmm7, %xmm6
+	shufps		$0x1e, %xmm0, %xmm0
+	shufps		$0x1e, %xmm2, %xmm2
+	shufps		$0x1e, %xmm4, %xmm4
+	shufps		$0x1e, %xmm6, %xmm6
+	andps		mask, %xmm0
+	andps		mask, %xmm2
+	andps		mask, %xmm4
+	andps		mask, %xmm6
+	addps		%xmm0, %xmm1
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm5
+	addps		%xmm6, %xmm7
+	
+	movaps		TEMP_BYTE(32), %xmm2
+	movaps		TEMP_BYTE(96), %xmm6
+	movaps		%xmm2, %xmm0
+	movaps		%xmm6, %xmm4
+	shufps		$0x1e, %xmm0, %xmm0
+	shufps		$0x1e, %xmm4, %xmm4
+	andps		mask, %xmm0
+	andps		mask, %xmm4
+	addps		%xmm3, %xmm2
+	addps		%xmm0, %xmm3
+	addps		%xmm7, %xmm6
+	addps		%xmm4, %xmm7
+	
+	movaps		TEMP_BYTE(0), %xmm0
+	movaps		TEMP_BYTE(64), %xmm4
+	
+	movss		%xmm0, 1024(%ecx)
+	movss		%xmm2, 896(%ecx)
+	movss		%xmm1, 768(%ecx)
+	movss		%xmm3, 640(%ecx)
+	
+	shufps		$0xe1, %xmm0, %xmm0
+	shufps		$0xe1, %xmm2, %xmm2
+	shufps		$0xe1, %xmm1, %xmm1
+	shufps		$0xe1, %xmm3, %xmm3
+	movss		%xmm0, (%ecx)
+	movss		%xmm0, (%ebx)
+	movss		%xmm2, 128(%ebx)
+	movss		%xmm1, 256(%ebx)
+	movss		%xmm3, 384(%ebx)
+	
+	movhlps		%xmm0, %xmm0
+	movhlps		%xmm2, %xmm2
+	movhlps		%xmm1, %xmm1
+	movhlps		%xmm3, %xmm3
+	movss		%xmm0, 512(%ecx)
+	movss		%xmm2, 384(%ecx)
+	movss		%xmm1, 256(%ecx)
+	movss		%xmm3, 128(%ecx)
+	
+	shufps		$0xe1, %xmm0, %xmm0
+	shufps		$0xe1, %xmm2, %xmm2
+	shufps		$0xe1, %xmm1, %xmm1
+	shufps		$0xe1, %xmm3, %xmm3
+	movss		%xmm0, 512(%ebx)
+	movss		%xmm2, 640(%ebx)
+	movss		%xmm1, 768(%ebx)
+	movss		%xmm3, 896(%ebx)
+	
+	movaps		%xmm4, %xmm0
+	shufps		$0x1e, %xmm0, %xmm0
+	movaps		%xmm5, %xmm1
+	andps		mask, %xmm0
+	
+	addps		%xmm6, %xmm4
+	addps		%xmm7, %xmm5
+	addps		%xmm1, %xmm6
+	addps		%xmm0, %xmm7
+	
+	movss		%xmm4, 960(%ecx)
+	movss		%xmm6, 832(%ecx)
+	movss		%xmm5, 704(%ecx)
+	movss		%xmm7, 576(%ecx)
+	movhlps		%xmm4, %xmm0
+	movhlps		%xmm6, %xmm1
+	movhlps		%xmm5, %xmm2
+	movhlps		%xmm7, %xmm3
+	movss		%xmm0, 448(%ecx)
+	movss		%xmm1, 320(%ecx)
+	movss		%xmm2, 192(%ecx)
+	movss		%xmm3, 64(%ecx)
+	
+	shufps		$0xe1, %xmm4, %xmm4
+	shufps		$0xe1, %xmm6, %xmm6
+	shufps		$0xe1, %xmm5, %xmm5
+	shufps		$0xe1, %xmm7, %xmm7
+	movss		%xmm4, 64(%ebx)
+	movss		%xmm6, 192(%ebx)
+	movss		%xmm5, 320(%ebx)
+	movss		%xmm7, 448(%ebx)
+	
+	shufps		$0xe1, %xmm0, %xmm0
+	shufps		$0xe1, %xmm1, %xmm1
+	shufps		$0xe1, %xmm2, %xmm2
+	shufps		$0xe1, %xmm3, %xmm3
+	movss		%xmm0, 576(%ebx)
+	movss		%xmm1, 704(%ebx)
+	movss		%xmm2, 832(%ebx)
+	movss		%xmm3, 960(%ebx)
+
+	popl		%ebx
+	movl		%ebp, %esp
+	popl		%ebp
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64_x86_64.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_x86_64.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_x86_64.S	(working copy)
@@ -0,0 +1,464 @@
+/*
+	dct64_x86_64: SSE optimized dct64 for x86-64
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* short *out0 */
+#define ARG0 %r9
+/* short *out1 */
+#define ARG1 %rdx
+/* real *samples */
+#define ARG2 %r8
+#else
+/* short *out0 */
+#define ARG0 %rdi
+/* short *out1 */
+#define ARG1 %rsi
+/* real *samples */
+#define ARG2 %rdx
+#endif
+
+/*
+	void dct64_x86_64(short *out0, short *out1, real *samples);
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(costab_x86_64):
+	.long 1056974725
+	.long 1057056395
+	.long 1057223771
+	.long 1057485416
+	.long 1057855544
+	.long 1058356026
+	.long 1059019886
+	.long 1059897405
+	.long 1061067246
+	.long 1062657950
+	.long 1064892987
+	.long 1066774581
+	.long 1069414683
+	.long 1073984175
+	.long 1079645762
+	.long 1092815430
+	.long 1057005197
+	.long 1057342072
+	.long 1058087743
+	.long 1059427869
+	.long 1061799040
+	.long 1065862217
+	.long 1071413542
+	.long 1084439708
+	.long 1057128951
+	.long 1058664893
+	.long 1063675095
+	.long 1076102863
+	.long 1057655764
+	.long 1067924853
+	.long 1060439283
+	.long 0
+	.text
+	ALIGN16
+.globl ASM_NAME(dct64_x86_64)
+ASM_NAME(dct64_x86_64):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	movq		%rcx, ARG0
+	subq		$168, %rsp /* stack alignment + 10 xmm registers */
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	movaps		%xmm13, 112(%rsp)
+	movaps		%xmm14, 128(%rsp)
+	movaps		%xmm15, 144(%rsp)
+#endif
+
+	leaq		ASM_NAME(costab_x86_64)(%rip), %rcx
+
+	MOVUAPS 	(ARG2), %xmm15
+	MOVUAPS 	16(ARG2), %xmm14
+	MOVUAPS 	112(ARG2), %xmm0
+	MOVUAPS 	96(ARG2), %xmm1
+	shufps 		$0x1b, %xmm0, %xmm0
+	shufps 		$0x1b, %xmm1, %xmm1
+	movaps 		%xmm15, %xmm8
+	movaps		%xmm14, %xmm9
+	addps 		%xmm0, %xmm8
+	addps 		%xmm1, %xmm9
+	subps 		%xmm0, %xmm15
+	subps 		%xmm1, %xmm14
+	
+	MOVUAPS 	32(ARG2), %xmm13
+	MOVUAPS 	48(ARG2), %xmm12
+	MOVUAPS 	80(ARG2), %xmm0
+	MOVUAPS 	64(ARG2), %xmm1
+	shufps		$0x1b, %xmm0, %xmm0
+	shufps		$0x1b, %xmm1, %xmm1
+	movaps		%xmm13, %xmm10
+	movaps		%xmm12, %xmm11
+	addps		%xmm0, %xmm10
+	addps		%xmm1, %xmm11
+	subps		%xmm0, %xmm13
+	subps		%xmm1, %xmm12
+	
+	movaps		(%rcx), %xmm0
+	movaps		16(%rcx), %xmm1
+	movaps		32(%rcx), %xmm2
+	movaps		48(%rcx), %xmm3
+	mulps		%xmm0, %xmm15
+	mulps		%xmm1, %xmm14
+	mulps		%xmm2, %xmm13
+	mulps		%xmm3, %xmm12
+	
+	movaps		64(%rcx), %xmm0
+	movaps		80(%rcx), %xmm1
+	
+	pshufd		$0x1b, %xmm11, %xmm2
+	pshufd		$0x1b, %xmm10, %xmm3
+	shufps		$0x1b, %xmm13, %xmm13
+	shufps		$0x1b, %xmm12, %xmm12
+	movaps		%xmm8, %xmm11
+	movaps		%xmm9, %xmm10
+	movaps		%xmm14, %xmm4
+	movaps		%xmm15, %xmm5
+	subps		%xmm2, %xmm11
+	subps		%xmm3, %xmm10
+	subps		%xmm13, %xmm14
+	subps		%xmm12, %xmm15
+	addps		%xmm2, %xmm8
+	addps		%xmm3, %xmm9
+	addps		%xmm5, %xmm12
+	addps		%xmm4, %xmm13
+	mulps		%xmm0, %xmm11
+	mulps		%xmm1, %xmm10
+	mulps		%xmm1, %xmm14
+	mulps		%xmm0, %xmm15
+	
+	movaps		96(%rcx), %xmm0
+	
+	pshufd		$0x1b, %xmm9, %xmm1
+	pshufd		$0x1b, %xmm13, %xmm2
+	shufps		$0x1b, %xmm10, %xmm10
+	shufps		$0x1b, %xmm14, %xmm14
+	movaps		%xmm8, %xmm9
+	movaps		%xmm12, %xmm13
+	movaps		%xmm11, %xmm3
+	movaps		%xmm15, %xmm4
+	subps		%xmm1, %xmm9
+	subps		%xmm2, %xmm13
+	subps		%xmm10, %xmm11
+	subps		%xmm14, %xmm15
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm12
+	addps		%xmm3, %xmm10
+	addps		%xmm4, %xmm14
+	mulps		%xmm0, %xmm9
+	mulps		%xmm0, %xmm13
+	mulps		%xmm0, %xmm11
+	mulps		%xmm0, %xmm15
+	
+	movaps		112(%rcx), %xmm0
+	movaps		%xmm0, %xmm1
+	movlhps		%xmm1, %xmm1
+	
+	movaps		%xmm8, %xmm2
+	movaps		%xmm9, %xmm3
+	shufps		$0x44, %xmm10, %xmm2
+	shufps		$0xbb, %xmm11, %xmm9
+	shufps		$0xbb, %xmm10, %xmm8
+	shufps		$0x44, %xmm11, %xmm3
+	movaps		%xmm2, %xmm4
+	movaps		%xmm3, %xmm5
+	subps		%xmm8, %xmm2
+	subps		%xmm9, %xmm3
+	addps		%xmm4, %xmm8
+	addps		%xmm5, %xmm9
+	mulps		%xmm1, %xmm2
+	mulps		%xmm1, %xmm3
+	movaps		%xmm8, %xmm10
+	movaps		%xmm9, %xmm11
+	shufps		$0x14, %xmm2, %xmm8
+	shufps		$0xbe, %xmm2, %xmm10
+	shufps		$0x14, %xmm3, %xmm9
+	shufps		$0xbe, %xmm3, %xmm11
+	
+	movaps		%xmm12, %xmm2
+	movaps		%xmm13, %xmm3
+	shufps		$0x44, %xmm14, %xmm2
+	shufps		$0xbb, %xmm15, %xmm13
+	shufps		$0xbb, %xmm14, %xmm12
+	shufps		$0x44, %xmm15, %xmm3
+	movaps		%xmm2, %xmm4
+	movaps		%xmm3, %xmm5
+	subps		%xmm12, %xmm2
+	subps		%xmm13, %xmm3
+	addps		%xmm4, %xmm12
+	addps		%xmm5, %xmm13
+	mulps		%xmm1, %xmm2
+	mulps		%xmm1, %xmm3
+	movaps		%xmm12, %xmm14
+	movaps		%xmm13, %xmm15
+	shufps		$0x14, %xmm2, %xmm12
+	shufps		$0xbe, %xmm2, %xmm14
+	shufps		$0x14, %xmm3, %xmm13
+	shufps		$0xbe, %xmm3, %xmm15
+	
+	shufps		$0xaa, %xmm0, %xmm0
+	pcmpeqd		%xmm1, %xmm1
+	pslld		$31, %xmm1
+	psllq		$32, %xmm1
+	xorps		%xmm1, %xmm0
+	
+	movaps		%xmm8, %xmm1
+	movaps		%xmm10, %xmm2
+	unpcklps	%xmm9, %xmm8
+	unpckhps	%xmm9, %xmm1
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm11, %xmm2
+	movaps		%xmm8, %xmm3
+	movaps		%xmm10, %xmm4
+	unpcklps	%xmm1, %xmm8
+	unpckhps	%xmm1, %xmm3
+	unpcklps	%xmm2, %xmm10
+	unpckhps	%xmm2, %xmm4
+	movaps		%xmm8, %xmm1
+	movaps		%xmm10, %xmm2
+	subps		%xmm3, %xmm1
+	subps		%xmm4, %xmm2
+	addps		%xmm3, %xmm8
+	addps		%xmm4, %xmm10
+	mulps		%xmm0, %xmm1
+	mulps		%xmm0, %xmm2
+	movaps		%xmm8, %xmm9
+	movaps		%xmm10, %xmm11
+	unpcklps	%xmm1, %xmm8
+	unpckhps	%xmm1, %xmm9
+	unpcklps	%xmm2, %xmm10
+	unpckhps	%xmm2, %xmm11
+	
+	movaps		%xmm12, %xmm1
+	movaps		%xmm14, %xmm2
+	unpcklps	%xmm13, %xmm12
+	unpckhps	%xmm13, %xmm1
+	unpcklps	%xmm15, %xmm14
+	unpckhps	%xmm15, %xmm2
+	movaps		%xmm12, %xmm3
+	movaps		%xmm14, %xmm4
+	unpcklps	%xmm1, %xmm12
+	unpckhps	%xmm1, %xmm3
+	unpcklps	%xmm2, %xmm14
+	unpckhps	%xmm2, %xmm4
+	movaps		%xmm12, %xmm1
+	movaps		%xmm14, %xmm2
+	subps		%xmm3, %xmm1
+	subps		%xmm4, %xmm2
+	addps		%xmm3, %xmm12
+	addps		%xmm4, %xmm14
+	mulps		%xmm0, %xmm1
+	mulps		%xmm0, %xmm2
+	movaps		%xmm12, %xmm13
+	movaps		%xmm14, %xmm15
+	unpcklps	%xmm1, %xmm12
+	unpckhps	%xmm1, %xmm13
+	unpcklps	%xmm2, %xmm14
+	unpckhps	%xmm2, %xmm15
+	
+	
+	xorps		%xmm0, %xmm0
+	xorps		%xmm1, %xmm1
+	shufpd		$0x2, %xmm8, %xmm0
+	shufpd		$0x2, %xmm9, %xmm1
+	psrlq		$32, %xmm0
+	psrlq		$32, %xmm1
+	addps		%xmm0, %xmm8
+	addps		%xmm1, %xmm9
+	
+	xorps		%xmm0, %xmm0
+	xorps		%xmm1, %xmm1
+	shufpd		$0x2, %xmm10, %xmm0
+	shufpd		$0x2, %xmm11, %xmm1
+	psrlq		$32, %xmm0
+	psrlq		$32, %xmm1
+	addps		%xmm0, %xmm10
+	addps		%xmm1, %xmm11
+	
+	xorps		%xmm0, %xmm0
+	xorps		%xmm1, %xmm1
+	shufpd		$0x2, %xmm12, %xmm0
+	shufpd		$0x2, %xmm13, %xmm1
+	psrlq		$32, %xmm0
+	psrlq		$32, %xmm1
+	addps		%xmm0, %xmm12
+	addps		%xmm1, %xmm13
+	
+	xorps		%xmm0, %xmm0
+	xorps		%xmm1, %xmm1
+	shufpd		$0x2, %xmm14, %xmm0
+	shufpd		$0x2, %xmm15, %xmm1
+	psrlq		$32, %xmm0
+	psrlq		$32, %xmm1
+	addps		%xmm0, %xmm14
+	addps		%xmm1, %xmm15
+	
+	pshufd		$0x78, %xmm9, %xmm0
+	pshufd		$0x78, %xmm11, %xmm1
+	pshufd		$0x78, %xmm13, %xmm2
+	pshufd		$0x78, %xmm15, %xmm3
+	psrldq		$4, %xmm0
+	psrldq		$4, %xmm1
+	psrldq		$4, %xmm2
+	psrldq		$4, %xmm3
+	addps		%xmm0, %xmm9
+	addps		%xmm1, %xmm11
+	addps		%xmm2, %xmm13
+	addps		%xmm3, %xmm15
+	
+	pshufd		$0x78, %xmm10, %xmm0
+	pshufd		$0x78, %xmm14, %xmm1
+	psrldq		$4, %xmm0
+	psrldq		$4, %xmm1
+	addps		%xmm11, %xmm10
+	addps		%xmm15, %xmm14
+	addps		%xmm0, %xmm11
+	addps		%xmm1, %xmm15
+	
+	cvtps2dq	%xmm8, %xmm8
+	cvtps2dq	%xmm9, %xmm9
+	cvtps2dq	%xmm10, %xmm10
+	cvtps2dq	%xmm11, %xmm11
+	packssdw	%xmm10, %xmm8
+	packssdw	%xmm11, %xmm9
+	
+	movd		%xmm8, %eax
+	movd		%xmm9, %ecx
+	movw		%ax, 512(ARG0)
+	movw		%cx, 384(ARG0)
+	shrl		$16, %eax
+	shrl		$16, %ecx
+	movw		%ax, (ARG0)
+	movw		%ax, (ARG1)
+	movw		%cx, 128(ARG1)
+	
+	movhlps		%xmm8, %xmm0
+	movhlps		%xmm9, %xmm1
+	movd		%xmm0, %eax
+	movd		%xmm1, %ecx
+	movw		%ax, 448(ARG0)
+	movw		%cx, 320(ARG0)
+	shrl		$16, %eax
+	shrl		$16, %ecx
+	movw		%ax, 64(ARG1)
+	movw		%cx, 192(ARG1)
+	
+	pshuflw		$0xee, %xmm8, %xmm2
+	pshuflw		$0xee, %xmm9, %xmm3
+	movd		%xmm2, %eax
+	movd		%xmm3, %ecx
+	movw		%ax, 256(ARG0)
+	movw		%cx, 128(ARG0)
+	shrl		$16, %eax
+	shrl		$16, %ecx
+	movw		%ax, 256(ARG1)
+	movw		%cx, 384(ARG1)
+	
+	pshuflw		$0xee, %xmm0, %xmm0
+	pshuflw		$0xee, %xmm1, %xmm1
+	movd		%xmm0, %eax
+	movd		%xmm1, %ecx
+	movw		%ax, 192(ARG0)
+	movw		%cx, 64(ARG0)
+	shrl		$16, %eax
+	shrl		$16, %ecx
+	movw		%ax, 320(ARG1)
+	movw		%cx, 448(ARG1)
+	
+	movaps		%xmm12, %xmm0
+	movaps		%xmm13, %xmm1
+	movaps		%xmm14, %xmm2
+	movaps		%xmm15, %xmm3
+	shufps		$0x1e, %xmm0, %xmm0
+	pslldq		$4, %xmm0
+	psrldq		$4, %xmm0
+	addps		%xmm2, %xmm12
+	addps		%xmm3, %xmm13
+	addps		%xmm1, %xmm14
+	addps		%xmm0, %xmm15
+	
+	cvtps2dq	%xmm12, %xmm12
+	cvtps2dq	%xmm13, %xmm13
+	cvtps2dq	%xmm14, %xmm14
+	cvtps2dq	%xmm15, %xmm15
+	packssdw	%xmm13, %xmm12
+	packssdw	%xmm15, %xmm14
+	
+	movd		%xmm12, %eax
+	movd		%xmm14, %ecx
+	movw		%ax, 480(ARG0)
+	movw		%cx, 416(ARG0)
+	shrl		$16, %eax
+	shrl		$16, %ecx
+	movw		%ax, 32(ARG1)
+	movw		%cx, 96(ARG1)
+	
+	pshuflw		$0xee, %xmm12, %xmm0
+	pshuflw		$0xee, %xmm14, %xmm1
+	movd		%xmm0, %eax
+	movd		%xmm1, %ecx
+	movw		%ax, 224(ARG0)
+	movw		%cx, 160(ARG0)
+	shrl		$16, %eax
+	shrl		$16, %ecx
+	movw		%ax, 288(ARG1)
+	movw		%cx, 352(ARG1)
+	
+	movhlps		%xmm12, %xmm0
+	movhlps		%xmm14, %xmm1
+	movd		%xmm0, %eax
+	movd		%xmm1, %ecx
+	movw		%ax, 352(ARG0)
+	movw		%cx, 288(ARG0)
+	shrl		$16, %eax
+	shrl		$16, %ecx
+	movw		%ax, 160(ARG1)
+	movw		%cx, 224(ARG1)
+	
+	pshuflw		$0xee, %xmm0, %xmm0
+	pshuflw		$0xee, %xmm1, %xmm1
+	movd		%xmm0, %eax
+	movd		%xmm1, %ecx
+	movw		%ax, 96(ARG0)
+	movw		%cx, 32(ARG0)
+	shrl		$16, %eax
+	shrl		$16, %ecx
+	movw		%ax, 416(ARG1)
+	movw		%cx, 480(ARG1)
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	movaps		144(%rsp), %xmm15
+	addq		$168, %rsp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/dct64_x86_64_float.S
===================================================================
--- include/reactos/libs/libmpg123/dct64_x86_64_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/dct64_x86_64_float.S	(working copy)
@@ -0,0 +1,426 @@
+/*
+	dct64_x86_64_float: SSE optimized dct64 for x86-64 (float output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* short *out0 */
+#define ARG0 %r9
+/* short *out1 */
+#define ARG1 %rdx
+/* real *samples */
+#define ARG2 %r8
+#else
+/* real *out0 */
+#define ARG0 %rdi
+/* real *out1 */
+#define ARG1 %rsi
+/* real *samples */
+#define ARG2 %rdx
+#endif
+
+/*
+	void dct64_real_x86_64(real *out0, real *out1, real *samples);
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(costab_x86_64):
+	.long 1056974725
+	.long 1057056395
+	.long 1057223771
+	.long 1057485416
+	.long 1057855544
+	.long 1058356026
+	.long 1059019886
+	.long 1059897405
+	.long 1061067246
+	.long 1062657950
+	.long 1064892987
+	.long 1066774581
+	.long 1069414683
+	.long 1073984175
+	.long 1079645762
+	.long 1092815430
+	.long 1057005197
+	.long 1057342072
+	.long 1058087743
+	.long 1059427869
+	.long 1061799040
+	.long 1065862217
+	.long 1071413542
+	.long 1084439708
+	.long 1057128951
+	.long 1058664893
+	.long 1063675095
+	.long 1076102863
+	.long 1057655764
+	.long 1067924853
+	.long 1060439283
+	.long 0
+	.text
+	ALIGN16
+.globl ASM_NAME(dct64_real_x86_64)
+ASM_NAME(dct64_real_x86_64):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	movq		%rcx, ARG0
+	subq		$168, %rsp /* stack alignment + 10 xmm registers */
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	movaps		%xmm13, 112(%rsp)
+	movaps		%xmm14, 128(%rsp)
+	movaps		%xmm15, 144(%rsp)
+#endif
+
+	leaq		ASM_NAME(costab_x86_64)(%rip), %rcx
+
+	MOVUAPS 	(ARG2), %xmm15
+	MOVUAPS 	16(ARG2), %xmm14
+	MOVUAPS 	112(ARG2), %xmm0
+	MOVUAPS 	96(ARG2), %xmm1
+	shufps 		$0x1b, %xmm0, %xmm0
+	shufps 		$0x1b, %xmm1, %xmm1
+	movaps 		%xmm15, %xmm8
+	movaps		%xmm14, %xmm9
+	addps 		%xmm0, %xmm8
+	addps 		%xmm1, %xmm9
+	subps 		%xmm0, %xmm15
+	subps 		%xmm1, %xmm14
+	
+	MOVUAPS 	32(ARG2), %xmm13
+	MOVUAPS 	48(ARG2), %xmm12
+	MOVUAPS 	80(ARG2), %xmm0
+	MOVUAPS 	64(ARG2), %xmm1
+	shufps		$0x1b, %xmm0, %xmm0
+	shufps		$0x1b, %xmm1, %xmm1
+	movaps		%xmm13, %xmm10
+	movaps		%xmm12, %xmm11
+	addps		%xmm0, %xmm10
+	addps		%xmm1, %xmm11
+	subps		%xmm0, %xmm13
+	subps		%xmm1, %xmm12
+	
+	movaps		(%rcx), %xmm0
+	movaps		16(%rcx), %xmm1
+	movaps		32(%rcx), %xmm2
+	movaps		48(%rcx), %xmm3
+	mulps		%xmm0, %xmm15
+	mulps		%xmm1, %xmm14
+	mulps		%xmm2, %xmm13
+	mulps		%xmm3, %xmm12
+	
+	movaps		64(%rcx), %xmm0
+	movaps		80(%rcx), %xmm1
+	
+	pshufd		$0x1b, %xmm11, %xmm2
+	pshufd		$0x1b, %xmm10, %xmm3
+	shufps		$0x1b, %xmm13, %xmm13
+	shufps		$0x1b, %xmm12, %xmm12
+	movaps		%xmm8, %xmm11
+	movaps		%xmm9, %xmm10
+	movaps		%xmm14, %xmm4
+	movaps		%xmm15, %xmm5
+	subps		%xmm2, %xmm11
+	subps		%xmm3, %xmm10
+	subps		%xmm13, %xmm14
+	subps		%xmm12, %xmm15
+	addps		%xmm2, %xmm8
+	addps		%xmm3, %xmm9
+	addps		%xmm5, %xmm12
+	addps		%xmm4, %xmm13
+	mulps		%xmm0, %xmm11
+	mulps		%xmm1, %xmm10
+	mulps		%xmm1, %xmm14
+	mulps		%xmm0, %xmm15
+	
+	movaps		96(%rcx), %xmm0
+	
+	pshufd		$0x1b, %xmm9, %xmm1
+	pshufd		$0x1b, %xmm13, %xmm2
+	shufps		$0x1b, %xmm10, %xmm10
+	shufps		$0x1b, %xmm14, %xmm14
+	movaps		%xmm8, %xmm9
+	movaps		%xmm12, %xmm13
+	movaps		%xmm11, %xmm3
+	movaps		%xmm15, %xmm4
+	subps		%xmm1, %xmm9
+	subps		%xmm2, %xmm13
+	subps		%xmm10, %xmm11
+	subps		%xmm14, %xmm15
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm12
+	addps		%xmm3, %xmm10
+	addps		%xmm4, %xmm14
+	mulps		%xmm0, %xmm9
+	mulps		%xmm0, %xmm13
+	mulps		%xmm0, %xmm11
+	mulps		%xmm0, %xmm15
+	
+	movaps		112(%rcx), %xmm0
+	movaps		%xmm0, %xmm1
+	movlhps		%xmm1, %xmm1
+	
+	movaps		%xmm8, %xmm2
+	movaps		%xmm9, %xmm3
+	shufps		$0x44, %xmm10, %xmm2
+	shufps		$0xbb, %xmm11, %xmm9
+	shufps		$0xbb, %xmm10, %xmm8
+	shufps		$0x44, %xmm11, %xmm3
+	movaps		%xmm2, %xmm4
+	movaps		%xmm3, %xmm5
+	subps		%xmm8, %xmm2
+	subps		%xmm9, %xmm3
+	addps		%xmm4, %xmm8
+	addps		%xmm5, %xmm9
+	mulps		%xmm1, %xmm2
+	mulps		%xmm1, %xmm3
+	movaps		%xmm8, %xmm10
+	movaps		%xmm9, %xmm11
+	shufps		$0x14, %xmm2, %xmm8
+	shufps		$0xbe, %xmm2, %xmm10
+	shufps		$0x14, %xmm3, %xmm9
+	shufps		$0xbe, %xmm3, %xmm11
+	
+	movaps		%xmm12, %xmm2
+	movaps		%xmm13, %xmm3
+	shufps		$0x44, %xmm14, %xmm2
+	shufps		$0xbb, %xmm15, %xmm13
+	shufps		$0xbb, %xmm14, %xmm12
+	shufps		$0x44, %xmm15, %xmm3
+	movaps		%xmm2, %xmm4
+	movaps		%xmm3, %xmm5
+	subps		%xmm12, %xmm2
+	subps		%xmm13, %xmm3
+	addps		%xmm4, %xmm12
+	addps		%xmm5, %xmm13
+	mulps		%xmm1, %xmm2
+	mulps		%xmm1, %xmm3
+	movaps		%xmm12, %xmm14
+	movaps		%xmm13, %xmm15
+	shufps		$0x14, %xmm2, %xmm12
+	shufps		$0xbe, %xmm2, %xmm14
+	shufps		$0x14, %xmm3, %xmm13
+	shufps		$0xbe, %xmm3, %xmm15
+	
+	shufps		$0xaa, %xmm0, %xmm0
+	pcmpeqd		%xmm1, %xmm1
+	pslld		$31, %xmm1
+	psllq		$32, %xmm1
+	xorps		%xmm1, %xmm0
+	
+	movaps		%xmm8, %xmm1
+	movaps		%xmm10, %xmm2
+	unpcklps	%xmm9, %xmm8
+	unpckhps	%xmm9, %xmm1
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm11, %xmm2
+	movaps		%xmm8, %xmm3
+	movaps		%xmm10, %xmm4
+	unpcklps	%xmm1, %xmm8
+	unpckhps	%xmm1, %xmm3
+	unpcklps	%xmm2, %xmm10
+	unpckhps	%xmm2, %xmm4
+	movaps		%xmm8, %xmm1
+	movaps		%xmm10, %xmm2
+	subps		%xmm3, %xmm1
+	subps		%xmm4, %xmm2
+	addps		%xmm3, %xmm8
+	addps		%xmm4, %xmm10
+	mulps		%xmm0, %xmm1
+	mulps		%xmm0, %xmm2
+	movaps		%xmm8, %xmm9
+	movaps		%xmm10, %xmm11
+	unpcklps	%xmm1, %xmm8
+	unpckhps	%xmm1, %xmm9
+	unpcklps	%xmm2, %xmm10
+	unpckhps	%xmm2, %xmm11
+	
+	movaps		%xmm12, %xmm1
+	movaps		%xmm14, %xmm2
+	unpcklps	%xmm13, %xmm12
+	unpckhps	%xmm13, %xmm1
+	unpcklps	%xmm15, %xmm14
+	unpckhps	%xmm15, %xmm2
+	movaps		%xmm12, %xmm3
+	movaps		%xmm14, %xmm4
+	unpcklps	%xmm1, %xmm12
+	unpckhps	%xmm1, %xmm3
+	unpcklps	%xmm2, %xmm14
+	unpckhps	%xmm2, %xmm4
+	movaps		%xmm12, %xmm1
+	movaps		%xmm14, %xmm2
+	subps		%xmm3, %xmm1
+	subps		%xmm4, %xmm2
+	addps		%xmm3, %xmm12
+	addps		%xmm4, %xmm14
+	mulps		%xmm0, %xmm1
+	mulps		%xmm0, %xmm2
+	movaps		%xmm12, %xmm13
+	movaps		%xmm14, %xmm15
+	unpcklps	%xmm1, %xmm12
+	unpckhps	%xmm1, %xmm13
+	unpcklps	%xmm2, %xmm14
+	unpckhps	%xmm2, %xmm15
+	
+	
+	xorps		%xmm0, %xmm0
+	xorps		%xmm1, %xmm1
+	shufpd		$0x2, %xmm8, %xmm0
+	shufpd		$0x2, %xmm9, %xmm1
+	psrlq		$32, %xmm0
+	psrlq		$32, %xmm1
+	addps		%xmm0, %xmm8
+	addps		%xmm1, %xmm9
+	
+	xorps		%xmm0, %xmm0
+	xorps		%xmm1, %xmm1
+	shufpd		$0x2, %xmm10, %xmm0
+	shufpd		$0x2, %xmm11, %xmm1
+	psrlq		$32, %xmm0
+	psrlq		$32, %xmm1
+	addps		%xmm0, %xmm10
+	addps		%xmm1, %xmm11
+	
+	xorps		%xmm0, %xmm0
+	xorps		%xmm1, %xmm1
+	shufpd		$0x2, %xmm12, %xmm0
+	shufpd		$0x2, %xmm13, %xmm1
+	psrlq		$32, %xmm0
+	psrlq		$32, %xmm1
+	addps		%xmm0, %xmm12
+	addps		%xmm1, %xmm13
+	
+	xorps		%xmm0, %xmm0
+	xorps		%xmm1, %xmm1
+	shufpd		$0x2, %xmm14, %xmm0
+	shufpd		$0x2, %xmm15, %xmm1
+	psrlq		$32, %xmm0
+	psrlq		$32, %xmm1
+	addps		%xmm0, %xmm14
+	addps		%xmm1, %xmm15
+	
+	pshufd		$0x78, %xmm9, %xmm0
+	pshufd		$0x78, %xmm11, %xmm1
+	pshufd		$0x78, %xmm13, %xmm2
+	pshufd		$0x78, %xmm15, %xmm3
+	psrldq		$4, %xmm0
+	psrldq		$4, %xmm1
+	psrldq		$4, %xmm2
+	psrldq		$4, %xmm3
+	addps		%xmm0, %xmm9
+	addps		%xmm1, %xmm11
+	addps		%xmm2, %xmm13
+	addps		%xmm3, %xmm15
+	
+	pshufd		$0x78, %xmm10, %xmm0
+	pshufd		$0x78, %xmm14, %xmm1
+	psrldq		$4, %xmm0
+	psrldq		$4, %xmm1
+	addps		%xmm11, %xmm10
+	addps		%xmm15, %xmm14
+	addps		%xmm0, %xmm11
+	addps		%xmm1, %xmm15
+	
+	
+	movss		%xmm8, 1024(ARG0)
+	movss		%xmm10, 896(ARG0)
+	movss		%xmm9, 768(ARG0)
+	movss		%xmm11, 640(ARG0)
+	movhlps		%xmm8, %xmm0
+	movhlps		%xmm10, %xmm1
+	movhlps		%xmm9, %xmm2
+	movhlps		%xmm11, %xmm3
+	movss		%xmm0, 512(ARG0)
+	movss		%xmm1, 384(ARG0)
+	movss		%xmm2, 256(ARG0)
+	movss		%xmm3, 128(ARG0)
+	
+	pshuflw		$0xee, %xmm8, %xmm4
+	pshuflw		$0xee, %xmm10, %xmm5
+	pshuflw		$0xee, %xmm9, %xmm6
+	pshuflw		$0xee, %xmm11, %xmm7
+	movss		%xmm4, (ARG0)
+	movss		%xmm4, (ARG1)
+	movss		%xmm5, 128(ARG1)
+	movss		%xmm6, 256(ARG1)
+	movss		%xmm7, 384(ARG1)
+	
+	pshuflw		$0xee, %xmm0, %xmm0
+	pshuflw		$0xee, %xmm1, %xmm1
+	pshuflw		$0xee, %xmm2, %xmm2
+	pshuflw		$0xee, %xmm3, %xmm3
+	movss		%xmm0, 512(ARG1)
+	movss		%xmm1, 640(ARG1)
+	movss		%xmm2, 768(ARG1)
+	movss		%xmm3, 896(ARG1)
+	
+	pshufd		$0x78, %xmm12, %xmm0
+	movaps		%xmm13, %xmm1
+	psrldq		$4, %xmm0
+	
+	addps		%xmm14, %xmm12
+	addps		%xmm15, %xmm13
+	addps		%xmm1, %xmm14
+	addps		%xmm0, %xmm15
+	
+	movss		%xmm12, 960(ARG0)
+	movss		%xmm14, 832(ARG0)
+	movss		%xmm13, 704(ARG0)
+	movss		%xmm15, 576(ARG0)
+	movhlps		%xmm12, %xmm0
+	movhlps		%xmm14, %xmm1
+	movhlps		%xmm13, %xmm2
+	movhlps		%xmm15, %xmm3
+	movss		%xmm0, 448(ARG0)
+	movss		%xmm1, 320(ARG0)
+	movss		%xmm2, 192(ARG0)
+	movss		%xmm3, 64(ARG0)
+	
+	pshuflw		$0xee, %xmm12, %xmm4
+	pshuflw		$0xee, %xmm14, %xmm5
+	pshuflw		$0xee, %xmm13, %xmm6
+	pshuflw		$0xee, %xmm15, %xmm7
+	movss		%xmm4, 64(ARG1)
+	movss		%xmm5, 192(ARG1)
+	movss		%xmm6, 320(ARG1)
+	movss		%xmm7, 448(ARG1)
+	
+	pshuflw		$0xee, %xmm0, %xmm0
+	pshuflw		$0xee, %xmm1, %xmm1
+	pshuflw		$0xee, %xmm2, %xmm2
+	pshuflw		$0xee, %xmm3, %xmm3
+	movss		%xmm0, 576(ARG1)
+	movss		%xmm1, 704(ARG1)
+	movss		%xmm2, 832(ARG1)
+	movss		%xmm3, 960(ARG1)
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	movaps		144(%rsp), %xmm15
+	addq		$168, %rsp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/debug.h
===================================================================
--- include/reactos/libs/libmpg123/debug.h	(revision 63976)
+++ include/reactos/libs/libmpg123/debug.h	(working copy)
@@ -97,23 +97,23 @@
 #endif
 
 /* error macros also here... */
-#ifndef NO_ERROR
-#define error(s) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__)
-#define error1(s, a) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a)
-#define error2(s, a, b) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b)
-#define error3(s, a, b, c) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c)
-#define error4(s, a, b, c, d) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d)
-#define error5(s, a, b, c, d, e) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e)
-#define error6(s, a, b, c, d, e, f) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f)
-#define error7(s, a, b, c, d, e, f, g) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g)
-#define error8(s, a, b, c, d, e, f, g, h) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h)
-#define error9(s, a, b, c, d, e, f, g, h, i) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i)
-#define error10(s, a, b, c, d, e, f, g, h, i, j) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j)
-#define error11(s, a, b, c, d, e, f, g, h, i, j, k) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k)
-#define error12(s, a, b, c, d, e, f, g, h, i, j, k, l) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l)
-#define error13(s, a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m)
-#define error14(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
-#define error15(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(stderr, "[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
+#ifndef NO_ERRORMSG
+#define error(s) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__)
+#define error1(s, a) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a)
+#define error2(s, a, b) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b)
+#define error3(s, a, b, c) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c)
+#define error4(s, a, b, c, d) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d)
+#define error5(s, a, b, c, d, e) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e)
+#define error6(s, a, b, c, d, e, f) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f)
+#define error7(s, a, b, c, d, e, f, g) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g)
+#define error8(s, a, b, c, d, e, f, g, h) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h)
+#define error9(s, a, b, c, d, e, f, g, h, i) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i)
+#define error10(s, a, b, c, d, e, f, g, h, i, j) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j)
+#define error11(s, a, b, c, d, e, f, g, h, i, j, k) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k)
+#define error12(s, a, b, c, d, e, f, g, h, i, j, k, l) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l)
+#define error13(s, a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m)
+#define error14(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
+#define error15(s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(stderr, "\n[" __FILE__ ":%i] error: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
 #else
 #define error(s) 
 #define error1(s, a) 
@@ -135,22 +135,22 @@
 
 /* ereturn macros also here... */
 #ifndef NO_ERETURN
-#define ereturn(rv, s) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__); return rv; }while(0)
-#define ereturn1(rv, s, a) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a); return rv; }while(0)
-#define ereturn2(rv, s, a, b) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b); return rv; }while(0)
-#define ereturn3(rv, s, a, b, c) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c); return rv; }while(0)
-#define ereturn4(rv, s, a, b, c, d) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d); return rv; }while(0)
-#define ereturn5(rv, s, a, b, c, d, e) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e); return rv; }while(0)
-#define ereturn6(rv, s, a, b, c, d, e, f) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f); return rv; }while(0)
-#define ereturn7(rv, s, a, b, c, d, e, f, g) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g); return rv; }while(0)
-#define ereturn8(rv, s, a, b, c, d, e, f, g, h) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h); return rv; }while(0)
-#define ereturn9(rv, s, a, b, c, d, e, f, g, h, i) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i); return rv; }while(0)
-#define ereturn10(rv, s, a, b, c, d, e, f, g, h, i, j) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j); return rv; }while(0)
-#define ereturn11(rv, s, a, b, c, d, e, f, g, h, i, j, k) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k); return rv; }while(0)
-#define ereturn12(rv, s, a, b, c, d, e, f, g, h, i, j, k, l) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l); return rv; }while(0)
-#define ereturn13(rv, s, a, b, c, d, e, f, g, h, i, j, k, l, m) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m); return rv; }while(0)
-#define ereturn14(rv, s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n); return rv; }while(0)
-#define ereturn15(rv, s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) do{ fprintf(stderr, "[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o); return rv; }while(0)
+#define ereturn(rv, s) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__); return rv; }while(0)
+#define ereturn1(rv, s, a) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a); return rv; }while(0)
+#define ereturn2(rv, s, a, b) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b); return rv; }while(0)
+#define ereturn3(rv, s, a, b, c) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c); return rv; }while(0)
+#define ereturn4(rv, s, a, b, c, d) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d); return rv; }while(0)
+#define ereturn5(rv, s, a, b, c, d, e) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e); return rv; }while(0)
+#define ereturn6(rv, s, a, b, c, d, e, f) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f); return rv; }while(0)
+#define ereturn7(rv, s, a, b, c, d, e, f, g) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g); return rv; }while(0)
+#define ereturn8(rv, s, a, b, c, d, e, f, g, h) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h); return rv; }while(0)
+#define ereturn9(rv, s, a, b, c, d, e, f, g, h, i) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i); return rv; }while(0)
+#define ereturn10(rv, s, a, b, c, d, e, f, g, h, i, j) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j); return rv; }while(0)
+#define ereturn11(rv, s, a, b, c, d, e, f, g, h, i, j, k) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k); return rv; }while(0)
+#define ereturn12(rv, s, a, b, c, d, e, f, g, h, i, j, k, l) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l); return rv; }while(0)
+#define ereturn13(rv, s, a, b, c, d, e, f, g, h, i, j, k, l, m) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m); return rv; }while(0)
+#define ereturn14(rv, s, a, b, c, d, e, f, g, h, i, j, k, l, m, n) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n); return rv; }while(0)
+#define ereturn15(rv, s, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) do{ fprintf(stderr, "\n[" __FILE__ ":%i] ereturn: " s "\n", __LINE__, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o); return rv; }while(0)
 #else
 #define ereturn(rv, s) return rv
 #define ereturn1(rv, s, a) return rv
Index: include/reactos/libs/libmpg123/decode.h
===================================================================
--- include/reactos/libs/libmpg123/decode.h	(revision 63976)
+++ include/reactos/libs/libmpg123/decode.h	(working copy)
@@ -53,7 +53,13 @@
 int synth_1to1_stereo_altivec(real*, real*, mpg123_handle*);
 int synth_1to1_x86_64     (real*, int, mpg123_handle*, int);
 int synth_1to1_stereo_x86_64(real*, real*, mpg123_handle*);
+int synth_1to1_avx        (real*, int, mpg123_handle*, int);
+int synth_1to1_stereo_avx (real*, real*, mpg123_handle*);
 int synth_1to1_arm        (real*, int, mpg123_handle*, int);
+int synth_1to1_neon       (real*, int, mpg123_handle*, int);
+int synth_1to1_stereo_neon(real*, real*, mpg123_handle*);
+int synth_1to1_neon64     (real*, int, mpg123_handle*, int);
+int synth_1to1_stereo_neon64(real*, real*, mpg123_handle*);
 /* This is different, special usage in layer3.c only.
    Hence, the name... and now forget about it.
    Never use it outside that special portion of code inside layer3.c! */
@@ -60,7 +66,7 @@
 int absynth_1to1_i486(real*, int, mpg123_handle*, int);
 /* These mono/stereo converters use one of the above for the grunt work. */
 int synth_1to1_mono       (real*, mpg123_handle*);
-int synth_1to1_mono2stereo(real*, mpg123_handle*);
+int synth_1to1_m2s(real*, mpg123_handle*);
 
 /* Sample rate decimation comes in less flavours. */
 #ifndef NO_DOWNSAMPLE
@@ -68,18 +74,18 @@
 int synth_2to1_dither     (real*, int, mpg123_handle*, int);
 int synth_2to1_i386       (real*, int, mpg123_handle*, int);
 int synth_2to1_mono       (real*, mpg123_handle*);
-int synth_2to1_mono2stereo(real*, mpg123_handle*);
+int synth_2to1_m2s(real*, mpg123_handle*);
 int synth_4to1            (real *,int, mpg123_handle*, int);
 int synth_4to1_dither     (real *,int, mpg123_handle*, int);
 int synth_4to1_i386       (real*, int, mpg123_handle*, int);
 int synth_4to1_mono       (real*, mpg123_handle*);
-int synth_4to1_mono2stereo(real*, mpg123_handle*);
+int synth_4to1_m2s(real*, mpg123_handle*);
 #endif
 #ifndef NO_NTOM
 /* NtoM is really just one implementation. */
 int synth_ntom (real *,int, mpg123_handle*, int);
 int synth_ntom_mono (real *, mpg123_handle *);
-int synth_ntom_mono2stereo (real *, mpg123_handle *);
+int synth_ntom_m2s (real *, mpg123_handle *);
 #endif
 #endif
 
@@ -92,25 +98,25 @@
 int synth_1to1_8bit_wrap       (real*, int, mpg123_handle*, int);
 int synth_1to1_8bit_mono       (real*, mpg123_handle*);
 #endif
-int synth_1to1_8bit_mono2stereo(real*, mpg123_handle*);
+int synth_1to1_8bit_m2s(real*, mpg123_handle*);
 #ifndef NO_16BIT
 int synth_1to1_8bit_wrap_mono       (real*, mpg123_handle*);
-int synth_1to1_8bit_wrap_mono2stereo(real*, mpg123_handle*);
+int synth_1to1_8bit_wrap_m2s(real*, mpg123_handle*);
 #endif
 #ifndef NO_DOWNSAMPLE
 int synth_2to1_8bit            (real*, int, mpg123_handle*, int);
 int synth_2to1_8bit_i386       (real*, int, mpg123_handle*, int);
 int synth_2to1_8bit_mono       (real*, mpg123_handle*);
-int synth_2to1_8bit_mono2stereo(real*, mpg123_handle*);
+int synth_2to1_8bit_m2s(real*, mpg123_handle*);
 int synth_4to1_8bit            (real*, int, mpg123_handle*, int);
 int synth_4to1_8bit_i386       (real*, int, mpg123_handle*, int);
 int synth_4to1_8bit_mono       (real*, mpg123_handle*);
-int synth_4to1_8bit_mono2stereo(real*, mpg123_handle*);
+int synth_4to1_8bit_m2s(real*, mpg123_handle*);
 #endif
 #ifndef NO_NTOM
 int synth_ntom_8bit            (real*, int, mpg123_handle*, int);
 int synth_ntom_8bit_mono       (real*, mpg123_handle*);
-int synth_ntom_8bit_mono2stereo(real*, mpg123_handle*);
+int synth_ntom_8bit_m2s(real*, mpg123_handle*);
 #endif
 #endif
 
@@ -124,24 +130,30 @@
 int synth_1to1_real_stereo_sse (real*, real*, mpg123_handle*);
 int synth_1to1_real_x86_64     (real*, int, mpg123_handle*, int);
 int synth_1to1_real_stereo_x86_64(real*, real*, mpg123_handle*);
+int synth_1to1_real_avx        (real*, int, mpg123_handle*, int);
+int synth_1to1_real_stereo_avx (real*, real*, mpg123_handle*);
 int synth_1to1_real_altivec    (real*, int, mpg123_handle*, int);
 int synth_1to1_real_stereo_altivec(real*, real*, mpg123_handle*);
+int synth_1to1_real_neon       (real*, int, mpg123_handle*, int);
+int synth_1to1_real_stereo_neon(real*, real*, mpg123_handle*);
+int synth_1to1_real_neon64     (real*, int, mpg123_handle*, int);
+int synth_1to1_real_stereo_neon64(real*, real*, mpg123_handle*);
 int synth_1to1_real_mono       (real*, mpg123_handle*);
-int synth_1to1_real_mono2stereo(real*, mpg123_handle*);
+int synth_1to1_real_m2s(real*, mpg123_handle*);
 #ifndef NO_DOWNSAMPLE
 int synth_2to1_real            (real*, int, mpg123_handle*, int);
 int synth_2to1_real_i386       (real*, int, mpg123_handle*, int);
 int synth_2to1_real_mono       (real*, mpg123_handle*);
-int synth_2to1_real_mono2stereo(real*, mpg123_handle*);
+int synth_2to1_real_m2s(real*, mpg123_handle*);
 int synth_4to1_real            (real*, int, mpg123_handle*, int);
 int synth_4to1_real_i386       (real*, int, mpg123_handle*, int);
 int synth_4to1_real_mono       (real*, mpg123_handle*);
-int synth_4to1_real_mono2stereo(real*, mpg123_handle*);
+int synth_4to1_real_m2s(real*, mpg123_handle*);
 #endif
 #ifndef NO_NTOM
 int synth_ntom_real            (real*, int, mpg123_handle*, int);
 int synth_ntom_real_mono       (real*, mpg123_handle*);
-int synth_ntom_real_mono2stereo(real*, mpg123_handle*);
+int synth_ntom_real_m2s(real*, mpg123_handle*);
 #endif
 #endif
 
@@ -153,24 +165,30 @@
 int synth_1to1_s32_stereo_sse (real*, real*, mpg123_handle*);
 int synth_1to1_s32_x86_64     (real*, int, mpg123_handle*, int);
 int synth_1to1_s32_stereo_x86_64(real*, real*, mpg123_handle*);
+int synth_1to1_s32_avx        (real*, int, mpg123_handle*, int);
+int synth_1to1_s32_stereo_avx (real*, real*, mpg123_handle*);
 int synth_1to1_s32_altivec    (real*, int, mpg123_handle*, int);
 int synth_1to1_s32_stereo_altivec(real*, real*, mpg123_handle*);
+int synth_1to1_s32_neon       (real*, int, mpg123_handle*, int);
+int synth_1to1_s32_stereo_neon(real*, real*, mpg123_handle*);
+int synth_1to1_s32_neon64     (real*, int, mpg123_handle*, int);
+int synth_1to1_s32_stereo_neon64(real*, real*, mpg123_handle*);
 int synth_1to1_s32_mono       (real*, mpg123_handle*);
-int synth_1to1_s32_mono2stereo(real*, mpg123_handle*);
+int synth_1to1_s32_m2s(real*, mpg123_handle*);
 #ifndef NO_DOWNSAMPLE
 int synth_2to1_s32            (real*, int, mpg123_handle*, int);
 int synth_2to1_s32_i386       (real*, int, mpg123_handle*, int);
 int synth_2to1_s32_mono       (real*, mpg123_handle*);
-int synth_2to1_s32_mono2stereo(real*, mpg123_handle*);
+int synth_2to1_s32_m2s(real*, mpg123_handle*);
 int synth_4to1_s32            (real*, int, mpg123_handle*, int);
 int synth_4to1_s32_i386       (real*, int, mpg123_handle*, int);
 int synth_4to1_s32_mono       (real*, mpg123_handle*);
-int synth_4to1_s32_mono2stereo(real*, mpg123_handle*);
+int synth_4to1_s32_m2s(real*, mpg123_handle*);
 #endif
 #ifndef NO_NTOM
 int synth_ntom_s32            (real*, int, mpg123_handle*, int);
 int synth_ntom_s32_mono       (real*, mpg123_handle*);
-int synth_ntom_s32_mono2stereo(real*, mpg123_handle*);
+int synth_ntom_s32_m2s(real*, mpg123_handle*);
 #endif
 #endif
 
@@ -189,6 +207,11 @@
 void dct36         (real *,real *,real *,real *,real *);
 void dct36_3dnow   (real *,real *,real *,real *,real *);
 void dct36_3dnowext(real *,real *,real *,real *,real *);
+void dct36_x86_64  (real *,real *,real *,real *,real *);
+void dct36_sse     (real *,real *,real *,real *,real *);
+void dct36_avx     (real *,real *,real *,real *,real *);
+void dct36_neon    (real *,real *,real *,real *,real *);
+void dct36_neon64  (real *,real *,real *,real *,real *);
 
 /* Tools for NtoM resampling synth, defined in ntom.c . */
 int synth_ntom_set_step(mpg123_handle *fr); /* prepare ntom decoding */
Index: include/reactos/libs/libmpg123/dither.c
===================================================================
--- include/reactos/libs/libmpg123/dither.c	(revision 0)
+++ include/reactos/libs/libmpg123/dither.c	(working copy)
@@ -0,0 +1,119 @@
+/*
+	dither: Generate shaped noise for dithering
+
+	copyright 2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "config.h"
+#include "compat.h"
+#include "dither.h"
+
+static const uint32_t init_seed = 2463534242UL;
+
+#define LAP 100
+
+/*
+	xorshift random number generator, with output scaling to [-0.5, 0.5]
+	This is the white noise...
+	See http://www.jstatsoft.org/v08/i14/paper on XOR shift random number generators.
+*/
+static float rand_xorshift32(uint32_t *seed)
+{
+	union
+	{
+		uint32_t i;
+		float f;
+	} fi;
+	
+	fi.i = *seed;
+	fi.i ^= (fi.i<<13);
+	fi.i ^= (fi.i>>17);
+	fi.i ^= (fi.i<<5);
+	*seed = fi.i;
+	
+	/* scale the number to [-0.5, 0.5] */
+#ifdef IEEE_FLOAT
+	fi.i = (fi.i>>9)|0x3f800000;
+	fi.f -= 1.5f;
+#else
+	fi.f = (double)fi.i / 4294967295.0;
+	fi.f -= 0.5f;
+#endif
+	return fi.f;
+}
+
+static void white_noise(float *table, size_t count)
+{
+	size_t i;
+	uint32_t seed = init_seed;
+	
+	for(i=0; i<count; ++i)
+	table[i] = rand_xorshift32(&seed);
+}
+
+static void tpdf_noise(float *table, size_t count)
+{
+	size_t i;
+	uint32_t seed = init_seed;
+	
+	for(i=0; i<count; ++i)
+	table[i] = rand_xorshift32(&seed) + rand_xorshift32(&seed);
+}
+
+static void highpass_tpdf_noise(float *table, size_t count)
+{
+	size_t i;
+	uint32_t seed = init_seed;
+	/* Ensure some minimum lap for keeping the high-pass filter circular. */
+	size_t lap = count > 2*LAP ? LAP : count/2;
+
+	float input_noise;
+	float xv[9], yv[9];
+
+	for(i=0;i<9;i++)
+	{
+		xv[i] = yv[i] = 0.0f;
+	}
+
+	for(i=0;i<count+lap;i++)
+	{
+		if(i==count) seed=init_seed;
+		
+		/* generate and add 2 random numbers, to make a TPDF noise distribution */
+		input_noise = rand_xorshift32(&seed) + rand_xorshift32(&seed);
+
+		/* apply 8th order Chebyshev high-pass IIR filter */
+		/* Coefficients are from http://www-users.cs.york.ac.uk/~fisher/mkfilter/trad.html
+		   Given parameters are: Chebyshev, Highpass, ripple=-1, order=8, samplerate=44100, corner1=19000 */
+		xv[0] = xv[1]; xv[1] = xv[2]; xv[2] = xv[3]; xv[3] = xv[4]; xv[4] = xv[5]; xv[5] = xv[6]; xv[6] = xv[7]; xv[7] = xv[8]; 
+		xv[8] = input_noise / 1.382814179e+07;
+		yv[0] = yv[1]; yv[1] = yv[2]; yv[2] = yv[3]; yv[3] = yv[4]; yv[4] = yv[5]; yv[5] = yv[6]; yv[6] = yv[7]; yv[7] = yv[8]; 
+		yv[8] = (xv[0] + xv[8]) - 8 * (xv[1] + xv[7]) + 28 * (xv[2] + xv[6])
+				- 56 * (xv[3] + xv[5]) + 70 * xv[4]
+				+ ( -0.6706204984 * yv[0]) + ( -5.3720827038 * yv[1])
+				+ (-19.0865382480 * yv[2]) + (-39.2831607860 * yv[3])
+				+ (-51.2308985070 * yv[4]) + (-43.3590135780 * yv[5])
+				+ (-23.2632305320 * yv[6]) + ( -7.2370122050 * yv[7]);
+		if(i>=lap) table[i-lap] = yv[8] * 3.0f;
+	}
+}
+
+void mpg123_noise(float* table, size_t count, enum mpg123_noise_type noisetype)
+{
+	switch(noisetype)
+	{
+		case mpg123_white_noise: white_noise(table, count); break;
+		case mpg123_tpdf_noise:  tpdf_noise(table, count);  break;
+		case mpg123_highpass_tpdf_noise:
+			highpass_tpdf_noise(table, count);
+		break;
+	}
+}
+
+/* Generate white noise and shape it with a high pass filter. */
+void dither_table_init(float *dithertable)
+{
+	highpass_tpdf_noise(dithertable, DITHERSIZE);
+}
Index: include/reactos/libs/libmpg123/equalizer.c
===================================================================
--- include/reactos/libs/libmpg123/equalizer.c	(revision 0)
+++ include/reactos/libs/libmpg123/equalizer.c	(working copy)
@@ -0,0 +1,17 @@
+/*
+	equalizer.c: equalizer settings
+
+	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp
+*/
+
+
+#include "mpg123lib_intern.h"
+
+void do_equalizer(real *bandPtr,int channel, real equalizer[2][32]) 
+{
+	int i;
+	for(i=0;i<32;i++)
+	bandPtr[i] = REAL_MUL(bandPtr[i], equalizer[channel][i]);
+}
Index: include/reactos/libs/libmpg123/equalizer_3dnow.S
===================================================================
--- include/reactos/libs/libmpg123/equalizer_3dnow.S	(revision 0)
+++ include/reactos/libs/libmpg123/equalizer_3dnow.S	(working copy)
@@ -0,0 +1,70 @@
+/*
+	equalizer_3dnow: 3DNow! optimized do_equalizer()
+
+	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by KIMURA Takuhiro
+*/
+
+#include "mangle.h"
+
+.text
+	ALIGN4
+.globl ASM_NAME(do_equalizer_3dnow)
+/*	.type	 ASM_NAME(do_equalizer_3dnow),@function */
+/* void do_equalizer(real *bandPtr,int channel, real equalizer[2][32]); */
+ASM_NAME(do_equalizer_3dnow):
+	pushl %esi
+	pushl %ebx
+	/* bandPtr */
+	movl 12(%esp),%ebx
+	/* channel */
+	movl 16(%esp),%ecx
+	xorl %edx,%edx
+	/* equalizer */
+	movl 20(%esp),%esi
+	sall $7,%ecx
+	ALIGN4
+.L9:
+	movq (%ebx,%edx),%mm0
+	pfmul (%esi,%ecx),%mm0
+
+	movq 8(%ebx,%edx),%mm1
+	pfmul 8(%esi,%ecx),%mm1
+	movq %mm0,(%ebx,%edx)
+	
+	movq 16(%ebx,%edx),%mm0
+	pfmul 16(%esi,%ecx),%mm0
+	movq %mm1,8(%ebx,%edx)
+	
+	movq 24(%ebx,%edx),%mm1
+	pfmul 24(%esi,%ecx),%mm1
+	movq %mm0,16(%ebx,%edx)
+
+	movq 32(%ebx,%edx),%mm0
+	pfmul 32(%esi,%ecx),%mm0
+	movq %mm1,24(%ebx,%edx)
+
+	movq 40(%ebx,%edx),%mm1
+	pfmul 40(%esi,%ecx),%mm1
+	movq %mm0,32(%ebx,%edx)
+	
+	movq 48(%ebx,%edx),%mm0
+	pfmul 48(%esi,%ecx),%mm0
+	movq %mm1,40(%ebx,%edx)
+	
+	movq 56(%ebx,%edx),%mm1
+	pfmul 56(%esi,%ecx),%mm1
+	movq %mm0,48(%ebx,%edx)
+	movq %mm1,56(%ebx,%edx)
+	
+	addl $64,%edx
+	addl $32,%ecx
+	cmpl $124,%edx
+	jle .L9
+	ALIGN4
+	popl %ebx
+	popl %esi
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/feature.c
===================================================================
--- include/reactos/libs/libmpg123/feature.c	(revision 0)
+++ include/reactos/libs/libmpg123/feature.c	(working copy)
@@ -0,0 +1,106 @@
+#include "mpg123lib_intern.h"
+
+int mpg123_feature(const enum mpg123_feature_set key)
+{
+	switch(key)
+	{
+		case MPG123_FEATURE_ABI_UTF8OPEN:
+#ifdef WANT_WIN32_UNICODE
+		return 1;
+#else
+		return 0;
+#endif /* WANT_WIN32_UNICODE */
+
+		case MPG123_FEATURE_OUTPUT_8BIT:
+#ifdef NO_8BIT
+		return 0;
+#else
+		return 1;
+#endif /* mpg123_output_8bit */
+
+		case MPG123_FEATURE_OUTPUT_16BIT:
+#ifdef NO_16BIT
+		return 0;
+#else
+		return 1;
+#endif /* mpg123_output_16bit */
+
+		case MPG123_FEATURE_OUTPUT_32BIT:
+#ifdef NO_32BIT
+		return 0;
+#else
+		return 1;
+#endif /* mpg123_output_32bit */
+
+		case MPG123_FEATURE_PARSE_ID3V2:
+#ifdef NO_ID3V2
+		return 0;
+#else
+		return 1;
+#endif /* NO_ID3V2 */
+
+		case MPG123_FEATURE_DECODE_LAYER1:
+#ifdef NO_LAYER1
+		return 0;
+#else
+		return 1;
+#endif /* NO_LAYER1 */
+
+		case MPG123_FEATURE_DECODE_LAYER2:
+#ifdef NO_LAYER2
+		return 0;
+#else
+		return 1;
+#endif /* NO_LAYER2 */
+
+		case MPG123_FEATURE_DECODE_LAYER3:
+#ifdef NO_LAYER3
+		return 0;
+#else
+		return 1;
+#endif /* NO_LAYER3 */
+
+		case MPG123_FEATURE_DECODE_ACCURATE:
+#ifdef ACCURATE_ROUNDING
+		return 1;
+#else
+		return 0;
+#endif /* ACCURATE_ROUNDING */
+
+		case MPG123_FEATURE_DECODE_DOWNSAMPLE:
+#ifdef NO_DOWNSAMPLE
+		return 0;
+#else
+		return 1;
+#endif /* NO_DOWNSAMPLE */
+
+		case MPG123_FEATURE_DECODE_NTOM:
+#ifdef NO_NTOM
+		return 0;
+#else
+		return 1;
+#endif /* NO_NTOM */
+
+		case MPG123_FEATURE_PARSE_ICY:
+#ifdef NO_ICY
+		return 0;
+#else
+		return 1;
+#endif /* NO_ICY */
+
+		case MPG123_FEATURE_INDEX:
+#ifdef FRAME_INDEX
+		return 1;
+#else
+		return 0;
+#endif /* FRAME_INDEX */
+		case MPG123_FEATURE_TIMEOUT_READ:
+#ifdef TIMEOUT_READ
+		return 1;
+#else
+		return 0;
+#endif
+
+		default: return 0;
+	}
+}
Index: include/reactos/libs/libmpg123/format.c
===================================================================
--- include/reactos/libs/libmpg123/format.c	(revision 0)
+++ include/reactos/libs/libmpg123/format.c	(working copy)
@@ -0,0 +1,694 @@
+/*
+	format:routines to deal with audio (output) format
+
+	copyright 2008-14 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Thomas Orgis, starting with parts of the old audio.c, with only faintly manage to show now
+
+	A Major change from mpg123 <= 1.18 is that all encodings are only really
+	disabled when done so via specific build configuration. Otherwise, the
+	missing support of decoders to produce a certain format is augmented by
+	postprocessing that converts the samples. This means happily creating
+	data with higher resolution from less accurate decoder output.
+
+	The main point is to still offer float encoding when the decoding core uses
+	a fixed point representation that has only 16 bit output. Actually, that's
+	the only point: A fixed-point build needs to create float from 16 bit, also
+	32 or 24 bit from the same source. That's all there is to it: Everything else
+	is covered by fallback synth functions. It may be a further step to check if
+	there are cases where conversion in postprocessing works well enough to omit
+	a certain specialized decoder ... but usually, they are justified by some
+	special way to get from float to integer to begin with.
+
+	I won't cover the case of faking double output with float/s16 decoders here.
+	Double precision output is a thing for experimental builds anyway. Mostly
+	theoretical and without a point.
+*/
+
+#include "mpg123lib_intern.h"
+#include "debug.h"
+
+/* static int chans[NUM_CHANNELS] = { 1 , 2 }; */
+static const long my_rates[MPG123_RATES] = /* only the standard rates */
+{
+	 8000, 11025, 12000, 
+	16000, 22050, 24000,
+	32000, 44100, 48000,
+};
+
+static const int my_encodings[MPG123_ENCODINGS] =
+{
+	MPG123_ENC_SIGNED_16,
+	MPG123_ENC_UNSIGNED_16,
+	MPG123_ENC_SIGNED_32,
+	MPG123_ENC_UNSIGNED_32,
+	MPG123_ENC_SIGNED_24,
+	MPG123_ENC_UNSIGNED_24,
+	/* Floating point range, see below. */
+	MPG123_ENC_FLOAT_32,
+	MPG123_ENC_FLOAT_64,
+	/* 8 bit range, see below. */
+	MPG123_ENC_SIGNED_8,
+	MPG123_ENC_UNSIGNED_8,
+	MPG123_ENC_ULAW_8,
+	MPG123_ENC_ALAW_8
+};
+
+/* Make that match the above table.
+   And yes, I still don't like this kludgy stuff. */
+/* range[0] <= i < range[1] for forced floating point */
+static const int enc_float_range[2] = { 6, 8 };
+/* same for 8 bit encodings */
+static const int enc_8bit_range[2] = { 8, 12 };
+
+/*
+	Only one type of float is supported.
+	Actually, double is a very special experimental case not occuring in normal
+	builds. Might actually get rid of it.
+
+	Remember here: Also with REAL_IS_FIXED, I want to be able to produce float
+	output (f32) via post-processing.
+*/
+# ifdef REAL_IS_DOUBLE
+#  define MPG123_FLOAT_ENC MPG123_ENC_FLOAT_64
+# else
+#  define MPG123_FLOAT_ENC MPG123_ENC_FLOAT_32
+# endif
+
+/* The list of actually possible encodings. */
+static const int good_encodings[] =
+{
+#ifndef NO_16BIT
+	MPG123_ENC_SIGNED_16,
+	MPG123_ENC_UNSIGNED_16,
+#endif
+#ifndef NO_32BIT
+	MPG123_ENC_SIGNED_32,
+	MPG123_ENC_UNSIGNED_32,
+	MPG123_ENC_SIGNED_24,
+	MPG123_ENC_UNSIGNED_24,
+#endif
+#ifndef NO_REAL
+	MPG123_FLOAT_ENC,
+#endif
+#ifndef NO_8BIT
+	MPG123_ENC_SIGNED_8,
+	MPG123_ENC_UNSIGNED_8,
+	MPG123_ENC_ULAW_8,
+	MPG123_ENC_ALAW_8
+#endif
+};
+
+/* Check if encoding is a valid one in this build.
+   ...lazy programming: linear search. */
+static int good_enc(const int enc)
+{
+	size_t i;
+	for(i=0; i<sizeof(good_encodings)/sizeof(int); ++i)
+	if(enc == good_encodings[i]) return TRUE;
+
+	return FALSE;
+}
+
+void attribute_align_arg mpg123_rates(const long **list, size_t *number)
+{
+	if(list   != NULL) *list   = my_rates;
+	if(number != NULL) *number = sizeof(my_rates)/sizeof(long);
+}
+
+/* Now that's a bit tricky... One build of the library knows only a subset of the encodings. */
+void attribute_align_arg mpg123_encodings(const int **list, size_t *number)
+{
+	if(list   != NULL) *list   = good_encodings;
+	if(number != NULL) *number = sizeof(good_encodings)/sizeof(int);
+}
+
+int attribute_align_arg mpg123_encsize(int encoding)
+{
+	if(encoding & MPG123_ENC_8)
+	return 1;
+	else if(encoding & MPG123_ENC_16)
+	return 2;
+	else if(encoding & MPG123_ENC_24)
+	return 3;
+	else if(encoding & MPG123_ENC_32 || encoding == MPG123_ENC_FLOAT_32)
+	return 4;
+	else if(encoding == MPG123_ENC_FLOAT_64)
+	return 8;
+	else
+	return 0;
+}
+
+/*	char audio_caps[NUM_CHANNELS][MPG123_RATES+1][MPG123_ENCODINGS]; */
+
+static int rate2num(mpg123_pars *mp, long r)
+{
+	int i;
+	for(i=0;i<MPG123_RATES;i++) if(my_rates[i] == r) return i;
+#ifndef NO_NTOM
+	if(mp && mp->force_rate != 0 && mp->force_rate == r) return MPG123_RATES;
+#endif
+
+	return -1;
+}
+
+static int enc2num(int encoding)
+{
+	int i;
+	for(i=0;i<MPG123_ENCODINGS;++i)
+	if(my_encodings[i] == encoding) return i;
+
+	return -1;
+}
+
+static int cap_fit(mpg123_handle *fr, struct audioformat *nf, int f0, int f2)
+{
+	int i;
+	int c  = nf->channels-1;
+	int rn = rate2num(&fr->p, nf->rate);
+	if(rn >= 0)	for(i=f0;i<f2;i++)
+	{
+		if(fr->p.audio_caps[c][rn][i])
+		{
+			nf->encoding = my_encodings[i];
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int freq_fit(mpg123_handle *fr, struct audioformat *nf, int f0, int f2)
+{
+	nf->rate = frame_freq(fr)>>fr->p.down_sample;
+	if(cap_fit(fr,nf,f0,f2)) return 1;
+	if(fr->p.flags & MPG123_AUTO_RESAMPLE)
+	{
+		nf->rate>>=1;
+		if(cap_fit(fr,nf,f0,f2)) return 1;
+		nf->rate>>=1;
+		if(cap_fit(fr,nf,f0,f2)) return 1;
+	}
+#ifndef NO_NTOM
+	/* If nothing worked, try the other rates, only without constrains from user.
+	   In case you didn't guess: We enable flexible resampling if we find a working rate. */
+	if(  fr->p.flags & MPG123_AUTO_RESAMPLE &&
+	    !fr->p.force_rate && fr->p.down_sample == 0)
+	{
+		int i;
+		int c  = nf->channels-1;
+		int rn = rate2num(&fr->p, frame_freq(fr));
+		int rrn;
+		if(rn < 0) return 0;
+		/* Try higher rates first. */
+		for(i=f0;i<f2;i++) for(rrn=rn+1; rrn<MPG123_RATES; ++rrn)
+		if(fr->p.audio_caps[c][rrn][i])
+		{
+			nf->rate = my_rates[rrn];
+			nf->encoding = my_encodings[i];
+			return 1;
+		}
+		/* Then lower rates. */
+		for(i=f0;i<f2;i++) for(rrn=rn-1; rrn>=0; --rrn)
+		if(fr->p.audio_caps[c][rrn][i])
+		{
+			nf->rate = my_rates[rrn];
+			nf->encoding = my_encodings[i];
+			return 1;
+		}
+	}
+#endif
+
+	return 0;
+}
+
+/* match constraints against supported audio formats, store possible setup in frame
+  return: -1: error; 0: no format change; 1: format change */
+int frame_output_format(mpg123_handle *fr)
+{
+	struct audioformat nf;
+	int f0=0;
+	int f2=MPG123_ENCODINGS; /* Omit the 32bit and float encodings. */
+	mpg123_pars *p = &fr->p;
+	/* initialize new format, encoding comes later */
+	nf.channels = fr->stereo;
+
+	/* All this forcing should be removed in favour of the capabilities table... */
+	if(p->flags & MPG123_FORCE_8BIT)
+	{
+		f0 = enc_8bit_range[0];
+		f2 = enc_8bit_range[1];
+	}
+	if(p->flags & MPG123_FORCE_FLOAT)
+	{
+		f0 = enc_float_range[0];
+		f2 = enc_float_range[1];
+	}
+
+	/* force stereo is stronger */
+	if(p->flags & MPG123_FORCE_MONO)   nf.channels = 1;
+	if(p->flags & MPG123_FORCE_STEREO) nf.channels = 2;
+
+#ifndef NO_NTOM
+	if(p->force_rate)
+	{
+		nf.rate = p->force_rate;
+		if(cap_fit(fr,&nf,f0,2)) goto end;            /* 16bit encodings */
+		if(cap_fit(fr,&nf,f0<=2 ? 2 : f0,f2)) goto end; /*  8bit encodings */
+
+		/* try again with different stereoness */
+		if(nf.channels == 2 && !(p->flags & MPG123_FORCE_STEREO)) nf.channels = 1;
+		else if(nf.channels == 1 && !(p->flags & MPG123_FORCE_MONO)) nf.channels = 2;
+
+		if(cap_fit(fr,&nf,f0,2)) goto end;            /* 16bit encodings */
+		if(cap_fit(fr,&nf,f0<=2 ? 2 : f0,f2)) goto end; /*  8bit encodings */
+
+		if(NOQUIET)
+		error3( "Unable to set up output format! Constraints: %s%s%liHz.",
+		        ( p->flags & MPG123_FORCE_STEREO ? "stereo, " :
+		          (p->flags & MPG123_FORCE_MONO ? "mono, " : "") ),
+		        (p->flags & MPG123_FORCE_8BIT ? "8bit, " : ""),
+		        p->force_rate );
+/*		if(NOQUIET && p->verbose <= 1) print_capabilities(fr); */
+
+		fr->err = MPG123_BAD_OUTFORMAT;
+		return -1;
+	}
+#endif
+
+	if(freq_fit(fr, &nf, f0, 2)) goto end; /* try rates with 16bit */
+	if(freq_fit(fr, &nf, f0<=2 ? 2 : f0, f2)) goto end; /* ... 8bit */
+
+	/* try again with different stereoness */
+	if(nf.channels == 2 && !(p->flags & MPG123_FORCE_STEREO)) nf.channels = 1;
+	else if(nf.channels == 1 && !(p->flags & MPG123_FORCE_MONO)) nf.channels = 2;
+
+	if(freq_fit(fr, &nf, f0, 2)) goto end; /* try rates with 16bit */
+	if(freq_fit(fr, &nf,  f0<=2 ? 2 : f0, f2)) goto end; /* ... 8bit */
+
+	/* Here is the _bad_ end. */
+	if(NOQUIET)
+	{
+		error5( "Unable to set up output format! Constraints: %s%s%li, %li or %liHz.",
+		        ( p->flags & MPG123_FORCE_STEREO ? "stereo, " :
+		          (p->flags & MPG123_FORCE_MONO ? "mono, "  : "") ),
+		        (p->flags & MPG123_FORCE_8BIT  ? "8bit, " : ""),
+		        frame_freq(fr),  frame_freq(fr)>>1, frame_freq(fr)>>2 );
+	}
+/*	if(NOQUIET && p->verbose <= 1) print_capabilities(fr); */
+
+	fr->err = MPG123_BAD_OUTFORMAT;
+	return -1;
+
+end: /* Here is the _good_ end. */
+	/* we had a successful match, now see if there's a change */
+	if(nf.rate == fr->af.rate && nf.channels == fr->af.channels && nf.encoding == fr->af.encoding)
+	{
+		debug2("Old format with %i channels, and FORCE_MONO=%li", nf.channels, p->flags & MPG123_FORCE_MONO);
+		return 0; /* the same format as before */
+	}
+	else /* a new format */
+	{
+		debug1("New format with %i channels!", nf.channels);
+		fr->af.rate = nf.rate;
+		fr->af.channels = nf.channels;
+		fr->af.encoding = nf.encoding;
+		/* Cache the size of one sample in bytes, for ease of use. */
+		fr->af.encsize = mpg123_encsize(fr->af.encoding);
+		if(fr->af.encsize < 1)
+		{
+			if(NOQUIET) error1("Some unknown encoding??? (%i)", fr->af.encoding);
+
+			fr->err = MPG123_BAD_OUTFORMAT;
+			return -1;
+		}
+		/* Set up the decoder synth format. Might differ. */
+#ifdef NO_SYNTH32
+		/* Without high-precision synths, 16 bit signed is the basis for
+		   everything higher than 8 bit. */
+		if(fr->af.encsize > 2)
+		fr->af.dec_enc = MPG123_ENC_SIGNED_16;
+		else
+		{
+#endif
+			switch(fr->af.encoding)
+			{
+#ifndef NO_32BIT
+			case MPG123_ENC_SIGNED_24:
+			case MPG123_ENC_UNSIGNED_24:
+			case MPG123_ENC_UNSIGNED_32:
+				fr->af.dec_enc = MPG123_ENC_SIGNED_32;
+			break;
+#endif
+#ifndef NO_16BIT
+			case MPG123_ENC_UNSIGNED_16:
+				fr->af.dec_enc = MPG123_ENC_SIGNED_16;
+			break;
+#endif
+			default:
+				fr->af.dec_enc = fr->af.encoding;
+			}
+#ifdef NO_SYNTH32
+		}
+#endif
+		fr->af.dec_encsize = mpg123_encsize(fr->af.dec_enc);
+		return 1;
+	}
+}
+
+int attribute_align_arg mpg123_format_none(mpg123_handle *mh)
+{
+	int r;
+	if(mh == NULL) return MPG123_ERR;
+
+	r = mpg123_fmt_none(&mh->p);
+	if(r != MPG123_OK){ mh->err = r; r = MPG123_ERR; }
+
+	return r;
+}
+
+int attribute_align_arg mpg123_fmt_none(mpg123_pars *mp)
+{
+	if(mp == NULL) return MPG123_BAD_PARS;
+
+	if(PVERB(mp,3)) fprintf(stderr, "Note: Disabling all formats.\n");
+
+	memset(mp->audio_caps,0,sizeof(mp->audio_caps));
+	return MPG123_OK;
+}
+
+int attribute_align_arg mpg123_format_all(mpg123_handle *mh)
+{
+	int r;
+	if(mh == NULL) return MPG123_ERR;
+
+	r = mpg123_fmt_all(&mh->p);
+	if(r != MPG123_OK){ mh->err = r; r = MPG123_ERR; }
+
+	return r;
+}
+
+int attribute_align_arg mpg123_fmt_all(mpg123_pars *mp)
+{
+	size_t rate, ch, enc;
+	if(mp == NULL) return MPG123_BAD_PARS;
+
+	if(PVERB(mp,3)) fprintf(stderr, "Note: Enabling all formats.\n");
+
+	for(ch=0;   ch   < NUM_CHANNELS;     ++ch)
+	for(rate=0; rate < MPG123_RATES+1;   ++rate)
+	for(enc=0;  enc  < MPG123_ENCODINGS; ++enc)
+	mp->audio_caps[ch][rate][enc] = good_enc(my_encodings[enc]) ? 1 : 0;
+
+	return MPG123_OK;
+}
+
+int attribute_align_arg mpg123_format(mpg123_handle *mh, long rate, int channels, int encodings)
+{
+	int r;
+	if(mh == NULL) return MPG123_ERR;
+	r = mpg123_fmt(&mh->p, rate, channels, encodings);
+	if(r != MPG123_OK){ mh->err = r; r = MPG123_ERR; }
+
+	return r;
+}
+
+int attribute_align_arg mpg123_fmt(mpg123_pars *mp, long rate, int channels, int encodings)
+{
+	int ie, ic, ratei;
+	int ch[2] = {0, 1};
+	if(mp == NULL) return MPG123_BAD_PARS;
+	if(!(channels & (MPG123_MONO|MPG123_STEREO))) return MPG123_BAD_CHANNEL;
+
+	if(PVERB(mp,3)) fprintf(stderr, "Note: Want to enable format %li/%i for encodings 0x%x.\n", rate, channels, encodings);
+
+	if(!(channels & MPG123_STEREO)) ch[1] = 0;     /* {0,0} */
+	else if(!(channels & MPG123_MONO)) ch[0] = 1; /* {1,1} */
+	ratei = rate2num(mp, rate);
+	if(ratei < 0) return MPG123_BAD_RATE;
+
+	/* now match the encodings */
+	for(ic = 0; ic < 2; ++ic)
+	{
+		for(ie = 0; ie < MPG123_ENCODINGS; ++ie)
+		if(good_enc(my_encodings[ie]) && ((my_encodings[ie] & encodings) == my_encodings[ie]))
+		mp->audio_caps[ch[ic]][ratei][ie] = 1;
+
+		if(ch[0] == ch[1]) break; /* no need to do it again */
+	}
+
+	return MPG123_OK;
+}
+
+int attribute_align_arg mpg123_format_support(mpg123_handle *mh, long rate, int encoding)
+{
+	if(mh == NULL) return 0;
+	else return mpg123_fmt_support(&mh->p, rate, encoding);
+}
+
+int attribute_align_arg mpg123_fmt_support(mpg123_pars *mp, long rate, int encoding)
+{
+	int ch = 0;
+	int ratei, enci;
+	ratei = rate2num(mp, rate);
+	enci  = enc2num(encoding);
+	if(mp == NULL || ratei < 0 || enci < 0) return 0;
+	if(mp->audio_caps[0][ratei][enci]) ch |= MPG123_MONO;
+	if(mp->audio_caps[1][ratei][enci]) ch |= MPG123_STEREO;
+	return ch;
+}
+
+/* Call this one to ensure that any valid format will be something different than this. */
+void invalidate_format(struct audioformat *af)
+{
+	af->encoding = 0;
+	af->rate     = 0;
+	af->channels = 0;
+}
+
+/* Number of bytes the decoder produces. */
+off_t decoder_synth_bytes(mpg123_handle *fr, off_t s)
+{
+	return s * fr->af.dec_encsize * fr->af.channels;
+}
+
+/* Samples/bytes for output buffer after post-processing. */
+/* take into account: channels, bytes per sample -- NOT resampling!*/
+off_t samples_to_bytes(mpg123_handle *fr , off_t s)
+{
+	return s * fr->af.encsize * fr->af.channels;
+}
+
+off_t bytes_to_samples(mpg123_handle *fr , off_t b)
+{
+	return b / fr->af.encsize / fr->af.channels;
+}
+
+/* Number of bytes needed for decoding _and_ post-processing. */
+off_t outblock_bytes(mpg123_handle *fr, off_t s)
+{
+	int encsize = (fr->af.encoding & MPG123_ENC_24)
+	? 4 /* Intermediate 32 bit. */
+	: (fr->af.encsize > fr->af.dec_encsize
+		? fr->af.encsize
+		: fr->af.dec_encsize);
+	return s * encsize * fr->af.channels;
+}
+
+#ifndef NO_32BIT
+/* Remove every fourth byte, facilitating conversion from 32 bit to 24 bit integers.
+   This has to be aware of endianness, of course. */
+static void chop_fourth_byte(struct outbuffer *buf)
+{
+	unsigned char *wpos = buf->data;
+	unsigned char *rpos = buf->data;
+#ifdef WORDS_BIGENDIAN
+	while((size_t) (rpos - buf->data + 4) <= buf->fill)
+	{
+		/* Really stupid: Copy, increment. Byte per byte. */
+		*wpos = *rpos;
+		wpos++; rpos++;
+		*wpos = *rpos;
+		wpos++; rpos++;
+		*wpos = *rpos;
+		wpos++; rpos++;
+		rpos++; /* Skip the lowest byte (last). */
+	}
+#else
+	while((size_t) (rpos - buf->data + 4) <= buf->fill)
+	{
+		/* Really stupid: Copy, increment. Byte per byte. */
+		rpos++; /* Skip the lowest byte (first). */
+		*wpos = *rpos;
+		wpos++; rpos++;
+		*wpos = *rpos;
+		wpos++; rpos++;
+		*wpos = *rpos;
+		wpos++; rpos++;
+	}
+#endif
+	buf->fill = wpos-buf->data;
+}
+
+static void conv_s32_to_u32(struct outbuffer *buf)
+{
+	size_t i;
+	int32_t  *ssamples = (int32_t*)  buf->data;
+	uint32_t *usamples = (uint32_t*) buf->data;
+	size_t count = buf->fill/sizeof(int32_t);
+
+	for(i=0; i<count; ++i)
+	{
+		/* Different strategy since we don't have a larger type at hand.
+			 Also watch out for silly +-1 fun because integer constants are signed in C90! */
+		if(ssamples[i] >= 0)
+		usamples[i] = (uint32_t)ssamples[i] + 2147483647+1;
+		/* The smallest value goes zero. */
+		else if(ssamples[i] == ((int32_t)-2147483647-1))
+		usamples[i] = 0;
+		/* Now -value is in the positive range of signed int ... so it's a possible value at all. */
+		else
+		usamples[i] = (uint32_t)2147483647+1 - (uint32_t)(-ssamples[i]);
+	}
+}
+
+#endif
+
+
+/* We always assume that whole numbers are written!
+   partials will be cut out. */
+
+static const char *bufsizeerr = "Fatal: Buffer too small for postprocessing!";
+
+
+#ifndef NO_16BIT
+
+static void conv_s16_to_u16(struct outbuffer *buf)
+{
+	size_t i;
+	int16_t  *ssamples = (int16_t*) buf->data;
+	uint16_t *usamples = (uint16_t*)buf->data;
+	size_t count = buf->fill/sizeof(int16_t);
+
+	for(i=0; i<count; ++i)
+	{
+		long tmp = (long)ssamples[i]+32768;
+		usamples[i] = (uint16_t)tmp;
+	}
+}
+
+#ifndef NO_REAL
+static void conv_s16_to_f32(struct outbuffer *buf)
+{
+	ssize_t i;
+	int16_t *in = (int16_t*) buf->data;
+	float  *out = (float*)   buf->data;
+	size_t count = buf->fill/sizeof(int16_t);
+	/* Does that make any sense? In x86, there is an actual instruction to divide
+	   float by integer ... but then, if we have that FPU, we don't really need
+	   fixed point decoder hacks ...? */
+	float scale = 1./SHORT_SCALE;
+
+	if(buf->size < count*sizeof(float))
+	{
+		error1("%s", bufsizeerr);
+		return;
+	}
+
+	/* Work from the back since output is bigger. */
+	for(i=count-1; i>=0; --i)
+	out[i] = (float)in[i] * scale;
+
+	buf->fill = count*sizeof(float);
+}
+#endif
+
+#ifndef NO_32BIT
+static void conv_s16_to_s32(struct outbuffer *buf)
+{
+	ssize_t i;
+	int16_t  *in = (int16_t*) buf->data;
+	int32_t *out = (int32_t*) buf->data;
+	size_t count = buf->fill/sizeof(int16_t);
+
+	if(buf->size < count*sizeof(int32_t))
+	{
+		error1("%s", bufsizeerr);
+		return;
+	}
+
+	/* Work from the back since output is bigger. */
+	for(i=count-1; i>=0; --i)
+	{
+		out[i] = in[i];
+		/* Could just shift bytes, but would have to mess with sign bit. */
+		out[i] *= S32_RESCALE;
+	}
+
+	buf->fill = count*sizeof(int32_t);
+}
+#endif
+#endif
+
+
+void postprocess_buffer(mpg123_handle *fr)
+{
+	/*
+		This caters for the final output formats that are never produced by
+		decoder synth directly (wide unsigned and 24 bit formats) or that are
+		missing because of limited decoder precision (16 bit synth but 32 or
+		24 bit output).
+	*/
+	switch(fr->af.dec_enc)
+	{
+#ifndef NO_32BIT
+	case MPG123_ENC_SIGNED_32:
+		switch(fr->af.encoding)
+		{
+		case MPG123_ENC_UNSIGNED_32:
+			conv_s32_to_u32(&fr->buffer);
+		break;
+		case MPG123_ENC_UNSIGNED_24:
+			conv_s32_to_u32(&fr->buffer);
+			chop_fourth_byte(&fr->buffer);
+		break;
+		case MPG123_ENC_SIGNED_24:
+			chop_fourth_byte(&fr->buffer);
+		break;
+		}
+	break;
+#endif
+#ifndef NO_16BIT
+	case MPG123_ENC_SIGNED_16:
+		switch(fr->af.encoding)
+		{
+		case MPG123_ENC_UNSIGNED_16:
+			conv_s16_to_u16(&fr->buffer);
+		break;
+#ifndef NO_REAL
+		case MPG123_ENC_FLOAT_32:
+			conv_s16_to_f32(&fr->buffer);
+		break;
+#endif
+#ifndef NO_32BIT
+		case MPG123_ENC_SIGNED_32:
+			conv_s16_to_s32(&fr->buffer);
+		break;
+		case MPG123_ENC_UNSIGNED_32:
+			conv_s16_to_s32(&fr->buffer);
+			conv_s32_to_u32(&fr->buffer);
+		break;
+		case MPG123_ENC_UNSIGNED_24:
+			conv_s16_to_s32(&fr->buffer);
+			conv_s32_to_u32(&fr->buffer);
+			chop_fourth_byte(&fr->buffer);
+		break;
+		case MPG123_ENC_SIGNED_24:
+			conv_s16_to_s32(&fr->buffer);
+			chop_fourth_byte(&fr->buffer);
+		break;
+#endif
+		}
+	break;
+#endif
+	}
+}
Index: include/reactos/libs/libmpg123/frame.c
===================================================================
--- include/reactos/libs/libmpg123/frame.c	(revision 0)
+++ include/reactos/libs/libmpg123/frame.c	(working copy)
@@ -0,0 +1,1018 @@
+/*
+	frame: Heap of routines dealing with the core mpg123 data structure.
+
+	copyright 2008-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Thomas Orgis
+*/
+
+#include "mpg123lib_intern.h"
+#include "getcpuflags.h"
+#include "debug.h"
+
+static void frame_fixed_reset(mpg123_handle *fr);
+
+/* that's doubled in decode_ntom.c */
+#define NTOM_MUL (32768)
+
+#define aligned_pointer(p, type, alignment) align_the_pointer(p, alignment)
+static void *align_the_pointer(void *base, unsigned int alignment)
+{
+	/*
+		Work in unsigned integer realm, explicitly.
+		Tricking the compiler into integer operations like % by invoking base-NULL is dangerous: It results into ptrdiff_t, which gets negative on big addresses. Big screw up, that.
+		I try to do it "properly" here: Casting only to uintptr_t and no artihmethic with void*.
+	*/
+	uintptr_t baseval = (uintptr_t)(char*)base;
+	uintptr_t aoff = baseval % alignment;
+
+	debug3("align_the_pointer: pointer %p is off by %u from %u",
+	       base, (unsigned int)aoff, alignment);
+
+	if(aoff) return (char*)base+alignment-aoff;
+	else     return base;
+}
+
+static void frame_default_pars(mpg123_pars *mp)
+{
+	mp->outscale = 1.0;
+	mp->flags = 0;
+#ifdef GAPLESS
+	mp->flags |= MPG123_GAPLESS;
+#endif
+	mp->flags |= MPG123_AUTO_RESAMPLE;
+#ifndef NO_NTOM
+	mp->force_rate = 0;
+#endif
+	mp->down_sample = 0;
+	mp->rva = 0;
+	mp->halfspeed = 0;
+	mp->doublespeed = 0;
+	mp->verbose = 0;
+#ifndef NO_ICY
+	mp->icy_interval = 0;
+#endif
+	mp->timeout = 0;
+	mp->resync_limit = 1024;
+#ifdef FRAME_INDEX
+	mp->index_size = INDEX_SIZE;
+#endif
+	mp->preframes = 4; /* That's good  for layer 3 ISO compliance bitstream. */
+	mpg123_fmt_all(mp);
+	/* Default of keeping some 4K buffers at hand, should cover the "usual" use case (using 16K pipe buffers as role model). */
+#ifndef NO_FEEDER
+	mp->feedpool = 5; 
+	mp->feedbuffer = 4096;
+#endif
+}
+
+void frame_init(mpg123_handle *fr)
+{
+	frame_init_par(fr, NULL);
+}
+
+void frame_init_par(mpg123_handle *fr, mpg123_pars *mp)
+{
+	fr->own_buffer = TRUE;
+	fr->buffer.data = NULL;
+	fr->buffer.rdata = NULL;
+	fr->buffer.fill = 0;
+	fr->buffer.size = 0;
+	fr->rawbuffs = NULL;
+	fr->rawbuffss = 0;
+	fr->rawdecwin = NULL;
+	fr->rawdecwins = 0;
+#ifndef NO_8BIT
+	fr->conv16to8_buf = NULL;
+#endif
+#ifdef OPT_DITHER
+	fr->dithernoise = NULL;
+#endif
+	fr->layerscratch = NULL;
+	fr->xing_toc = NULL;
+	fr->cpu_opts.type = defdec();
+	fr->cpu_opts.class = decclass(fr->cpu_opts.type);
+#ifndef NO_NTOM
+	/* these two look unnecessary, check guarantee for synth_ntom_set_step (in control_generic, even)! */
+	fr->ntom_val[0] = NTOM_MUL>>1;
+	fr->ntom_val[1] = NTOM_MUL>>1;
+	fr->ntom_step = NTOM_MUL;
+#endif
+	/* unnecessary: fr->buffer.size = fr->buffer.fill = 0; */
+	mpg123_reset_eq(fr);
+	init_icy(&fr->icy);
+	init_id3(fr);
+	/* frame_outbuffer is missing... */
+	/* frame_buffers is missing... that one needs cpu opt setting! */
+	/* after these... frame_reset is needed before starting full decode */
+	invalidate_format(&fr->af);
+	fr->rdat.r_read = NULL;
+	fr->rdat.r_lseek = NULL;
+	fr->rdat.iohandle = NULL;
+	fr->rdat.r_read_handle = NULL;
+	fr->rdat.r_lseek_handle = NULL;
+	fr->rdat.cleanup_handle = NULL;
+	fr->wrapperdata = NULL;
+	fr->wrapperclean = NULL;
+	fr->decoder_change = 1;
+	fr->err = MPG123_OK;
+	if(mp == NULL) frame_default_pars(&fr->p);
+	else memcpy(&fr->p, mp, sizeof(struct mpg123_pars_struct));
+
+#ifndef NO_FEEDER
+	bc_prepare(&fr->rdat.buffer, fr->p.feedpool, fr->p.feedbuffer);
+#endif
+
+	fr->down_sample = 0; /* Initialize to silence harmless errors when debugging. */
+	frame_fixed_reset(fr); /* Reset only the fixed data, dynamic buffers are not there yet! */
+	fr->synth = NULL;
+	fr->synth_mono = NULL;
+	fr->make_decode_tables = NULL;
+#ifdef FRAME_INDEX
+	fi_init(&fr->index);
+	frame_index_setup(fr); /* Apply the size setting. */
+#endif
+}
+
+#ifdef OPT_DITHER
+/* Also, only allocate the memory for the table on demand.
+   In future, one could create special noise for different sampling frequencies(?). */
+int frame_dither_init(mpg123_handle *fr)
+{
+	/* run-time dither noise table generation */
+	if(fr->dithernoise == NULL)
+	{
+		fr->dithernoise = malloc(sizeof(float)*DITHERSIZE);
+		if(fr->dithernoise == NULL) return 0;
+
+		dither_table_init(fr->dithernoise);
+	}
+	return 1;
+}
+#endif
+
+mpg123_pars attribute_align_arg *mpg123_new_pars(int *error)
+{
+	mpg123_pars *mp = malloc(sizeof(struct mpg123_pars_struct));
+	if(mp != NULL){ frame_default_pars(mp); if(error != NULL) *error = MPG123_OK; }
+	else if(error != NULL) *error = MPG123_OUT_OF_MEM;
+	return mp;
+}
+
+void attribute_align_arg mpg123_delete_pars(mpg123_pars* mp)
+{
+	if(mp != NULL) free(mp);
+}
+
+int attribute_align_arg mpg123_reset_eq(mpg123_handle *mh)
+{
+	int i;
+	mh->have_eq_settings = 0;
+	for(i=0; i < 32; ++i) mh->equalizer[0][i] = mh->equalizer[1][i] = DOUBLE_TO_REAL(1.0);
+
+	return MPG123_OK;
+}
+
+int frame_outbuffer(mpg123_handle *fr)
+{
+	size_t size = fr->outblock;
+	if(!fr->own_buffer)
+	{
+		if(fr->buffer.size < size)
+		{
+			fr->err = MPG123_BAD_BUFFER;
+			if(NOQUIET) error2("have external buffer of size %"SIZE_P", need %"SIZE_P, (size_p)fr->buffer.size, (size_p)size);
+
+			return MPG123_ERR;
+		}
+	}
+
+	debug1("need frame buffer of %"SIZE_P, (size_p)size);
+	if(fr->buffer.rdata != NULL && fr->buffer.size != size)
+	{
+		free(fr->buffer.rdata);
+		fr->buffer.rdata = NULL;
+	}
+	fr->buffer.size = size;
+	fr->buffer.data = NULL;
+	/* be generous: use 16 byte alignment */
+	if(fr->buffer.rdata == NULL) fr->buffer.rdata = (unsigned char*) malloc(fr->buffer.size+15);
+	if(fr->buffer.rdata == NULL)
+	{
+		fr->err = MPG123_OUT_OF_MEM;
+		return MPG123_ERR;
+	}
+	fr->buffer.data = aligned_pointer(fr->buffer.rdata, unsigned char*, 16);
+	fr->own_buffer = TRUE;
+	fr->buffer.fill = 0;
+	return MPG123_OK;
+}
+
+int attribute_align_arg mpg123_replace_buffer(mpg123_handle *mh, unsigned char *data, size_t size)
+{
+	debug2("replace buffer with %p size %"SIZE_P, data, (size_p)size);
+	/* Will accept any size, the error comes later... */
+	if(data == NULL)
+	{
+		mh->err = MPG123_BAD_BUFFER;
+		return MPG123_ERR;
+	}
+	if(mh->buffer.rdata != NULL) free(mh->buffer.rdata);
+	mh->own_buffer = FALSE;
+	mh->buffer.rdata = NULL;
+	mh->buffer.data = data;
+	mh->buffer.size = size;
+	mh->buffer.fill = 0;
+	return MPG123_OK;
+}
+
+#ifdef FRAME_INDEX
+int frame_index_setup(mpg123_handle *fr)
+{
+	int ret = MPG123_ERR;
+	if(fr->p.index_size >= 0)
+	{ /* Simple fixed index. */
+		fr->index.grow_size = 0;
+		debug1("resizing index to %li", fr->p.index_size);
+		ret = fi_resize(&fr->index, (size_t)fr->p.index_size);
+		debug2("index resized... %lu at %p", (unsigned long)fr->index.size, (void*)fr->index.data);
+	}
+	else
+	{ /* A growing index. We give it a start, though. */
+		fr->index.grow_size = (size_t)(- fr->p.index_size);
+		if(fr->index.size < fr->index.grow_size)
+		ret = fi_resize(&fr->index, fr->index.grow_size);
+		else
+		ret = MPG123_OK; /* We have minimal size already... and since growing is OK... */
+	}
+	debug2("set up frame index of size %lu (ret=%i)", (unsigned long)fr->index.size, ret);
+
+	return ret;
+}
+#endif
+
+static void frame_decode_buffers_reset(mpg123_handle *fr)
+{
+	memset(fr->rawbuffs, 0, fr->rawbuffss);
+}
+
+int frame_buffers(mpg123_handle *fr)
+{
+	int buffssize = 0;
+	debug1("frame %p buffer", (void*)fr);
+/*
+	the used-to-be-static buffer of the synth functions, has some subtly different types/sizes
+
+	2to1, 4to1, ntom, generic, i386: real[2][2][0x110]
+	mmx, sse: short[2][2][0x110]
+	i586(_dither): 4352 bytes; int/long[2][2][0x110]
+	i486: int[2][2][17*FIR_BUFFER_SIZE]
+	altivec: static real __attribute__ ((aligned (16))) buffs[4][4][0x110]
+
+	Huh, altivec looks like fun. Well, let it be large... then, the 16 byte alignment seems to be implicit on MacOSX malloc anyway.
+	Let's make a reasonable attempt to allocate enough memory...
+	Keep in mind: biggest ones are i486 and altivec (mutually exclusive!), then follows i586 and normal real.
+	mmx/sse use short but also real for resampling.
+	Thus, minimum is 2*2*0x110*sizeof(real).
+*/
+	if(fr->cpu_opts.type == altivec) buffssize = 4*4*0x110*sizeof(real);
+#ifdef OPT_I486
+	else if(fr->cpu_opts.type == ivier) buffssize = 2*2*17*FIR_BUFFER_SIZE*sizeof(int);
+#endif
+	else if(fr->cpu_opts.type == ifuenf || fr->cpu_opts.type == ifuenf_dither || fr->cpu_opts.type == dreidnow)
+	buffssize = 2*2*0x110*4; /* don't rely on type real, we need 4352 bytes */
+
+	if(2*2*0x110*sizeof(real) > buffssize)
+	buffssize = 2*2*0x110*sizeof(real);
+	buffssize += 15; /* For 16-byte alignment (SSE likes that). */
+
+	if(fr->rawbuffs != NULL && fr->rawbuffss != buffssize)
+	{
+		free(fr->rawbuffs);
+		fr->rawbuffs = NULL;
+	}
+
+	if(fr->rawbuffs == NULL) fr->rawbuffs = (unsigned char*) malloc(buffssize);
+	if(fr->rawbuffs == NULL) return -1;
+	fr->rawbuffss = buffssize;
+	fr->short_buffs[0][0] = aligned_pointer(fr->rawbuffs,short,16);
+	fr->short_buffs[0][1] = fr->short_buffs[0][0] + 0x110;
+	fr->short_buffs[1][0] = fr->short_buffs[0][1] + 0x110;
+	fr->short_buffs[1][1] = fr->short_buffs[1][0] + 0x110;
+	fr->real_buffs[0][0] = aligned_pointer(fr->rawbuffs,real,16);
+	fr->real_buffs[0][1] = fr->real_buffs[0][0] + 0x110;
+	fr->real_buffs[1][0] = fr->real_buffs[0][1] + 0x110;
+	fr->real_buffs[1][1] = fr->real_buffs[1][0] + 0x110;
+#ifdef OPT_I486
+	if(fr->cpu_opts.type == ivier)
+	{
+		fr->int_buffs[0][0] = (int*) fr->rawbuffs;
+		fr->int_buffs[0][1] = fr->int_buffs[0][0] + 17*FIR_BUFFER_SIZE;
+		fr->int_buffs[1][0] = fr->int_buffs[0][1] + 17*FIR_BUFFER_SIZE;
+		fr->int_buffs[1][1] = fr->int_buffs[1][0] + 17*FIR_BUFFER_SIZE;
+	}
+#endif
+#ifdef OPT_ALTIVEC
+	if(fr->cpu_opts.type == altivec)
+	{
+		int i,j;
+		fr->areal_buffs[0][0] = (real*) fr->rawbuffs;
+		for(i=0; i<4; ++i) for(j=0; j<4; ++j)
+		fr->areal_buffs[i][j] = fr->areal_buffs[0][0] + (i*4+j)*0x110;
+	}
+#endif
+	/* now the different decwins... all of the same size, actually */
+	/* The MMX ones want 32byte alignment, which I'll try to ensure manually */
+	{
+		int decwin_size = (512+32)*sizeof(real);
+#ifdef OPT_MMXORSSE
+#ifdef OPT_MULTI
+		if(fr->cpu_opts.class == mmxsse)
+		{
+#endif
+			/* decwin_mmx will share, decwins will be appended ... sizeof(float)==4 */
+			if(decwin_size < (512+32)*4) decwin_size = (512+32)*4;
+
+			/* the second window + alignment zone -- we align for 32 bytes for SSE as
+			   requirement, 64 byte for matching cache line size (that matters!) */
+			decwin_size += (512+32)*4 + 63;
+			/* (512+32)*4/32 == 2176/32 == 68, so one decwin block retains alignment for 32 or 64 bytes */
+#ifdef OPT_MULTI
+		}
+#endif
+#endif
+#if defined(OPT_ALTIVEC) || defined(OPT_ARM) 
+		/* sizeof(real) >= 4 ... yes, it could be 8, for example.
+		   We got it intialized to at least (512+32)*sizeof(real).*/
+		decwin_size += 512*sizeof(real);
+#endif
+		/* Hm, that's basically realloc() ... */
+		if(fr->rawdecwin != NULL && fr->rawdecwins != decwin_size)
+		{
+			free(fr->rawdecwin);
+			fr->rawdecwin = NULL;
+		}
+
+		if(fr->rawdecwin == NULL)
+		fr->rawdecwin = (unsigned char*) malloc(decwin_size);
+
+		if(fr->rawdecwin == NULL) return -1;
+
+		fr->rawdecwins = decwin_size;
+		fr->decwin = (real*) fr->rawdecwin;
+#ifdef OPT_MMXORSSE
+#ifdef OPT_MULTI
+		if(fr->cpu_opts.class == mmxsse)
+		{
+#endif
+			/* align decwin, assign that to decwin_mmx, append decwins */
+			/* I need to add to decwin what is missing to the next full 64 byte -- also I want to make gcc -pedantic happy... */
+			fr->decwin = aligned_pointer(fr->rawdecwin,real,64);
+			debug1("aligned decwin: %p", (void*)fr->decwin);
+			fr->decwin_mmx = (float*)fr->decwin;
+			fr->decwins = fr->decwin_mmx+512+32;
+#ifdef OPT_MULTI
+		}
+		else debug("no decwins/decwin_mmx for that class");
+#endif
+#endif
+	}
+
+	/* Layer scratch buffers are of compile-time fixed size, so allocate only once. */
+	if(fr->layerscratch == NULL)
+	{
+		/* Allocate specific layer1/2/3 buffers, so that we know they'll work for SSE. */
+		size_t scratchsize = 0;
+		real *scratcher;
+#ifndef NO_LAYER1
+		scratchsize += sizeof(real) * 2 * SBLIMIT;
+#endif
+#ifndef NO_LAYER2
+		scratchsize += sizeof(real) * 2 * 4 * SBLIMIT;
+#endif
+#ifndef NO_LAYER3
+		scratchsize += sizeof(real) * 2 * SBLIMIT * SSLIMIT; /* hybrid_in */
+		scratchsize += sizeof(real) * 2 * SSLIMIT * SBLIMIT; /* hybrid_out */
+#endif
+		/*
+			Now figure out correct alignment:
+			We need 16 byte minimum, smallest unit of the blocks is 2*SBLIMIT*sizeof(real), which is 64*4=256. Let's do 64bytes as heuristic for cache line (as proven useful in buffs above).
+		*/
+		fr->layerscratch = malloc(scratchsize+63);
+		if(fr->layerscratch == NULL) return -1;
+
+		/* Get aligned part of the memory, then divide it up. */
+		scratcher = aligned_pointer(fr->layerscratch,real,64);
+		/* Those funky pointer casts silence compilers...
+		   One might change the code at hand to really just use 1D arrays, but in practice, that would not make a (positive) difference. */
+#ifndef NO_LAYER1
+		fr->layer1.fraction = (real(*)[SBLIMIT])scratcher;
+		scratcher += 2 * SBLIMIT;
+#endif
+#ifndef NO_LAYER2
+		fr->layer2.fraction = (real(*)[4][SBLIMIT])scratcher;
+		scratcher += 2 * 4 * SBLIMIT;
+#endif
+#ifndef NO_LAYER3
+		fr->layer3.hybrid_in = (real(*)[SBLIMIT][SSLIMIT])scratcher;
+		scratcher += 2 * SBLIMIT * SSLIMIT;
+		fr->layer3.hybrid_out = (real(*)[SSLIMIT][SBLIMIT])scratcher;
+		scratcher += 2 * SSLIMIT * SBLIMIT;
+#endif
+		/* Note: These buffers don't need resetting here. */
+	}
+
+	/* Only reset the buffers we created just now. */
+	frame_decode_buffers_reset(fr);
+
+	debug1("frame %p buffer done", (void*)fr);
+	return 0;
+}
+
+int frame_buffers_reset(mpg123_handle *fr)
+{
+	fr->buffer.fill = 0; /* hm, reset buffer fill... did we do a flush? */
+	fr->bsnum = 0;
+	/* Wondering: could it be actually _wanted_ to retain buffer contents over different files? (special gapless / cut stuff) */
+	fr->bsbuf = fr->bsspace[1];
+	fr->bsbufold = fr->bsbuf;
+	fr->bitreservoir = 0;
+	frame_decode_buffers_reset(fr);
+	memset(fr->bsspace, 0, 2*(MAXFRAMESIZE+512));
+	memset(fr->ssave, 0, 34);
+	fr->hybrid_blc[0] = fr->hybrid_blc[1] = 0;
+	memset(fr->hybrid_block, 0, sizeof(real)*2*2*SBLIMIT*SSLIMIT);
+	return 0;
+}
+
+static void frame_icy_reset(mpg123_handle* fr)
+{
+#ifndef NO_ICY
+	if(fr->icy.data != NULL) free(fr->icy.data);
+	fr->icy.data = NULL;
+	fr->icy.interval = 0;
+	fr->icy.next = 0;
+#endif
+}
+
+static void frame_free_toc(mpg123_handle *fr)
+{
+	if(fr->xing_toc != NULL){ free(fr->xing_toc); fr->xing_toc = NULL; }
+}
+
+/* Just copy the Xing TOC over... */
+int frame_fill_toc(mpg123_handle *fr, unsigned char* in)
+{
+	if(fr->xing_toc == NULL) fr->xing_toc = malloc(100);
+	if(fr->xing_toc != NULL)
+	{
+		memcpy(fr->xing_toc, in, 100);
+#ifdef DEBUG
+		debug("Got a TOC! Showing the values...");
+		{
+			int i;
+			for(i=0; i<100; ++i)
+			debug2("entry %i = %i", i, fr->xing_toc[i]);
+		}
+#endif
+		return TRUE;
+	}
+	return FALSE;
+}
+
+/* Prepare the handle for a new track.
+   Reset variables, buffers... */
+int frame_reset(mpg123_handle* fr)
+{
+	frame_buffers_reset(fr);
+	frame_fixed_reset(fr);
+	frame_free_toc(fr);
+#ifdef FRAME_INDEX
+	fi_reset(&fr->index);
+#endif
+
+	return 0;
+}
+
+/* Reset everythign except dynamic memory. */
+static void frame_fixed_reset(mpg123_handle *fr)
+{
+	frame_icy_reset(fr);
+	open_bad(fr);
+	fr->to_decode = FALSE;
+	fr->to_ignore = FALSE;
+	fr->metaflags = 0;
+	fr->outblock = 0; /* This will be set before decoding! */
+	fr->num = -1;
+	fr->input_offset = -1;
+	fr->playnum = -1;
+	fr->state_flags = FRAME_ACCURATE;
+	fr->silent_resync = 0;
+	fr->audio_start = 0;
+	fr->clip = 0;
+	fr->oldhead = 0;
+	fr->firsthead = 0;
+	fr->vbr = MPG123_CBR;
+	fr->abr_rate = 0;
+	fr->track_frames = 0;
+	fr->track_samples = -1;
+	fr->framesize=0; 
+	fr->mean_frames = 0;
+	fr->mean_framesize = 0;
+	fr->freesize = 0;
+	fr->lastscale = -1;
+	fr->rva.level[0] = -1;
+	fr->rva.level[1] = -1;
+	fr->rva.gain[0] = 0;
+	fr->rva.gain[1] = 0;
+	fr->rva.peak[0] = 0;
+	fr->rva.peak[1] = 0;
+	fr->fsizeold = 0;
+	fr->firstframe = 0;
+	fr->ignoreframe = fr->firstframe-fr->p.preframes;
+	fr->header_change = 0;
+	fr->lastframe = -1;
+	fr->fresh = 1;
+	fr->new_format = 0;
+#ifdef GAPLESS
+	frame_gapless_init(fr,-1,0,0);
+	fr->lastoff = 0;
+	fr->firstoff = 0;
+#endif
+#ifdef OPT_I486
+	fr->i486bo[0] = fr->i486bo[1] = FIR_SIZE-1;
+#endif
+	fr->bo = 1; /* the usual bo */
+#ifdef OPT_DITHER
+	fr->ditherindex = 0;
+#endif
+	reset_id3(fr);
+	reset_icy(&fr->icy);
+	/* ICY stuff should go into icy.c, eh? */
+#ifndef NO_ICY
+	fr->icy.interval = 0;
+	fr->icy.next = 0;
+#endif
+	fr->halfphase = 0; /* here or indeed only on first-time init? */
+	fr->error_protection = 0;
+	fr->freeformat_framesize = -1;
+}
+
+static void frame_free_buffers(mpg123_handle *fr)
+{
+	if(fr->rawbuffs != NULL) free(fr->rawbuffs);
+	fr->rawbuffs = NULL;
+	fr->rawbuffss = 0;
+	if(fr->rawdecwin != NULL) free(fr->rawdecwin);
+	fr->rawdecwin = NULL;
+	fr->rawdecwins = 0;
+#ifndef NO_8BIT
+	if(fr->conv16to8_buf != NULL) free(fr->conv16to8_buf);
+	fr->conv16to8_buf = NULL;
+#endif
+	if(fr->layerscratch != NULL) free(fr->layerscratch);
+}
+
+void frame_exit(mpg123_handle *fr)
+{
+	if(fr->buffer.rdata != NULL)
+	{
+		debug1("freeing buffer at %p", (void*)fr->buffer.rdata);
+		free(fr->buffer.rdata);
+	}
+	fr->buffer.rdata = NULL;
+	frame_free_buffers(fr);
+	frame_free_toc(fr);
+#ifdef FRAME_INDEX
+	fi_exit(&fr->index);
+#endif
+#ifdef OPT_DITHER
+	if(fr->dithernoise != NULL)
+	{
+		free(fr->dithernoise);
+		fr->dithernoise = NULL;
+	}
+#endif
+	exit_id3(fr);
+	clear_icy(&fr->icy);
+	/* Clean up possible mess from LFS wrapper. */
+	if(fr->wrapperclean != NULL)
+	{
+		fr->wrapperclean(fr->wrapperdata);
+		fr->wrapperdata = NULL;
+	}
+#ifndef NO_FEEDER
+	bc_cleanup(&fr->rdat.buffer);
+#endif
+}
+
+int attribute_align_arg mpg123_framedata(mpg123_handle *mh, unsigned long *header, unsigned char **bodydata, size_t *bodybytes)
+{
+	if(mh == NULL)     return MPG123_ERR;
+	if(!mh->to_decode) return MPG123_ERR;
+
+	if(header    != NULL) *header    = mh->oldhead;
+	if(bodydata  != NULL) *bodydata  = mh->bsbuf;
+	if(bodybytes != NULL) *bodybytes = mh->framesize;
+
+	return MPG123_OK;
+}
+
+/*
+	Fuzzy frame offset searching (guessing).
+	When we don't have an accurate position, we may use an inaccurate one.
+	Possibilities:
+		- use approximate positions from Xing TOC (not yet parsed)
+		- guess wildly from mean framesize and offset of first frame / beginning of file.
+*/
+
+static off_t frame_fuzzy_find(mpg123_handle *fr, off_t want_frame, off_t* get_frame)
+{
+	/* Default is to go to the beginning. */
+	off_t ret = fr->audio_start;
+	*get_frame = 0;
+
+	/* But we try to find something better. */
+	/* Xing VBR TOC works with relative positions, both in terms of audio frames and stream bytes.
+	   Thus, it only works when whe know the length of things.
+	   Oh... I assume the offsets are relative to the _total_ file length. */
+	if(fr->xing_toc != NULL && fr->track_frames > 0 && fr->rdat.filelen > 0)
+	{
+		/* One could round... */
+		int toc_entry = (int) ((double)want_frame*100./fr->track_frames);
+		/* It is an index in the 100-entry table. */
+		if(toc_entry < 0)  toc_entry = 0;
+		if(toc_entry > 99) toc_entry = 99;
+
+		/* Now estimate back what frame we get. */
+		*get_frame = (off_t) ((double)toc_entry/100. * fr->track_frames);
+		fr->state_flags &= ~FRAME_ACCURATE;
+		fr->silent_resync = 1;
+		/* Question: Is the TOC for whole file size (with/without ID3) or the "real" audio data only?
+		   ID3v1 info could also matter. */
+		ret = (off_t) ((double)fr->xing_toc[toc_entry]/256.* fr->rdat.filelen);
+	}
+	else if(fr->mean_framesize > 0)
+	{	/* Just guess with mean framesize (may be exact with CBR files). */
+		/* Query filelen here or not? */
+		fr->state_flags &= ~FRAME_ACCURATE; /* Fuzzy! */
+		fr->silent_resync = 1;
+		*get_frame = want_frame;
+		ret = (off_t) (fr->audio_start+fr->mean_framesize*want_frame);
+	}
+	debug5("fuzzy: want %li of %li, get %li at %li B of %li B",
+		(long)want_frame, (long)fr->track_frames, (long)*get_frame, (long)ret, (long)(fr->rdat.filelen-fr->audio_start));
+	return ret;
+}
+
+/*
+	find the best frame in index just before the wanted one, seek to there
+	then step to just before wanted one with read_frame
+	do not care tabout the stuff that was in buffer but not played back
+	everything that left the decoder is counted as played
+	
+	Decide if you want low latency reaction and accurate timing info or stable long-time playback with buffer!
+*/
+
+off_t frame_index_find(mpg123_handle *fr, off_t want_frame, off_t* get_frame)
+{
+	/* default is file start if no index position */
+	off_t gopos = 0;
+	*get_frame = 0;
+#ifdef FRAME_INDEX
+	/* Possibly use VBRI index, too? I'd need an example for this... */
+	if(fr->index.fill)
+	{
+		/* find in index */
+		size_t fi;
+		/* at index fi there is frame step*fi... */
+		fi = want_frame/fr->index.step;
+		if(fi >= fr->index.fill) /* If we are beyond the end of frame index...*/
+		{
+			/* When fuzzy seek is allowed, we have some limited tolerance for the frames we want to read rather then jump over. */
+			if(fr->p.flags & MPG123_FUZZY && want_frame - (fr->index.fill-1)*fr->index.step > 10)
+			{
+				gopos = frame_fuzzy_find(fr, want_frame, get_frame);
+				if(gopos > fr->audio_start) return gopos; /* Only in that case, we have a useful guess. */
+				/* Else... just continue, fuzzyness didn't help. */
+			}
+			/* Use the last available position, slowly advancing from that one. */
+			fi = fr->index.fill - 1;
+		}
+		/* We have index position, that yields frame and byte offsets. */
+		*get_frame = fi*fr->index.step;
+		gopos = fr->index.data[fi];
+		fr->state_flags |= FRAME_ACCURATE; /* When using the frame index, we are accurate. */
+	}
+	else
+	{
+#endif
+		if(fr->p.flags & MPG123_FUZZY)
+		return frame_fuzzy_find(fr, want_frame, get_frame);
+		/* A bit hackish here... but we need to be fresh when looking for the first header again. */
+		fr->firsthead = 0;
+		fr->oldhead = 0;
+#ifdef FRAME_INDEX
+	}
+#endif
+	debug2("index: 0x%lx for frame %li", (unsigned long)gopos, (long) *get_frame);
+	return gopos;
+}
+
+off_t frame_ins2outs(mpg123_handle *fr, off_t ins)
+{	
+	off_t outs = 0;
+	switch(fr->down_sample)
+	{
+		case 0:
+#		ifndef NO_DOWNSAMPLE
+		case 1:
+		case 2:
+#		endif
+			outs = ins>>fr->down_sample;
+		break;
+#		ifndef NO_NTOM
+		case 3: outs = ntom_ins2outs(fr, ins); break;
+#		endif
+		default: error1("Bad down_sample (%i) ... should not be possible!!", fr->down_sample);
+	}
+	return outs;
+}
+
+off_t frame_outs(mpg123_handle *fr, off_t num)
+{
+	off_t outs = 0;
+	switch(fr->down_sample)
+	{
+		case 0:
+#		ifndef NO_DOWNSAMPLE
+		case 1:
+		case 2:
+#		endif
+			outs = (fr->spf>>fr->down_sample)*num;
+		break;
+#ifndef NO_NTOM
+		case 3: outs = ntom_frmouts(fr, num); break;
+#endif
+		default: error1("Bad down_sample (%i) ... should not be possible!!", fr->down_sample);
+	}
+	return outs;
+}
+
+/* Compute the number of output samples we expect from this frame.
+   This is either simple spf() or a tad more elaborate for ntom. */
+off_t frame_expect_outsamples(mpg123_handle *fr)
+{
+	off_t outs = 0;
+	switch(fr->down_sample)
+	{
+		case 0:
+#		ifndef NO_DOWNSAMPLE
+		case 1:
+		case 2:
+#		endif
+			outs = fr->spf>>fr->down_sample;
+		break;
+#ifndef NO_NTOM
+		case 3: outs = ntom_frame_outsamples(fr); break;
+#endif
+		default: error1("Bad down_sample (%i) ... should not be possible!!", fr->down_sample);
+	}
+	return outs;
+}
+
+off_t frame_offset(mpg123_handle *fr, off_t outs)
+{
+	off_t num = 0;
+	switch(fr->down_sample)
+	{
+		case 0:
+#		ifndef NO_DOWNSAMPLE
+		case 1:
+		case 2:
+#		endif
+			num = outs/(fr->spf>>fr->down_sample);
+		break;
+#ifndef NO_NTOM
+		case 3: num = ntom_frameoff(fr, outs); break;
+#endif
+		default: error("Bad down_sample ... should not be possible!!");
+	}
+	return num;
+}
+
+#ifdef GAPLESS
+/* input in _input_ samples */
+void frame_gapless_init(mpg123_handle *fr, off_t framecount, off_t bskip, off_t eskip)
+{
+	debug3("frame_gapless_init: given %"OFF_P" frames, skip %"OFF_P" and %"OFF_P, (off_p)framecount, (off_p)bskip, (off_p)eskip);
+	fr->gapless_frames = framecount;
+	if(fr->gapless_frames > 0 && bskip >=0 && eskip >= 0)
+	{
+		fr->begin_s = bskip+GAPLESS_DELAY;
+		fr->end_s = framecount*fr->spf-eskip+GAPLESS_DELAY;
+	}
+	else fr->begin_s = fr->end_s = 0;
+	/* These will get proper values later, from above plus resampling info. */
+	fr->begin_os = 0;
+	fr->end_os = 0;
+	fr->fullend_os = 0;
+	debug2("frame_gapless_init: from %"OFF_P" to %"OFF_P" samples", (off_p)fr->begin_s, (off_p)fr->end_s);
+}
+
+void frame_gapless_realinit(mpg123_handle *fr)
+{
+	fr->begin_os = frame_ins2outs(fr, fr->begin_s);
+	fr->end_os   = frame_ins2outs(fr, fr->end_s);
+	if(fr->gapless_frames > 0)
+	fr->fullend_os = frame_ins2outs(fr, fr->gapless_frames*fr->spf);
+	else fr->fullend_os = 0;
+
+	debug4("frame_gapless_realinit: from %"OFF_P" to %"OFF_P" samples (%"OFF_P", %"OFF_P")", (off_p)fr->begin_os, (off_p)fr->end_os, (off_p)fr->fullend_os, (off_p)fr->gapless_frames);
+}
+
+/* At least note when there is trouble... */
+void frame_gapless_update(mpg123_handle *fr, off_t total_samples)
+{
+	off_t gapless_samples = fr->gapless_frames*fr->spf;
+	debug2("gapless update with new sample count %"OFF_P" as opposed to known %"OFF_P, total_samples, gapless_samples);
+	if(NOQUIET && total_samples != gapless_samples)
+	fprintf(stderr, "\nWarning: Real sample count %"OFF_P" differs from given gapless sample count %"OFF_P". Frankenstein stream?\n"
+	, total_samples, gapless_samples);
+
+	if(gapless_samples > total_samples)
+	{
+		if(NOQUIET) error2("End sample count smaller than gapless end! (%"OFF_P" < %"OFF_P"). Disabling gapless mode from now on.", (off_p)total_samples, (off_p)fr->end_s);
+		/* This invalidates the current position... but what should I do? */
+		frame_gapless_init(fr, -1, 0, 0);
+		frame_gapless_realinit(fr);
+		fr->lastframe = -1;
+		fr->lastoff = 0;
+	}
+}
+
+#endif
+
+/* Compute the needed frame to ignore from, for getting accurate/consistent output for intended firstframe. */
+static off_t ignoreframe(mpg123_handle *fr)
+{
+	off_t preshift = fr->p.preframes;
+	/* Layer 3 _really_ needs at least one frame before. */
+	if(fr->lay==3 && preshift < 1) preshift = 1;
+	/* Layer 1 & 2 reall do not need more than 2. */
+	if(fr->lay!=3 && preshift > 2) preshift = 2;
+
+	return fr->firstframe - preshift;
+}
+
+/* The frame seek... This is not simply the seek to fe*fr->spf samples in output because we think of _input_ frames here.
+   Seek to frame offset 1 may be just seek to 200 samples offset in output since the beginning of first frame is delay/padding.
+   Hm, is that right? OK for the padding stuff, but actually, should the decoder delay be better totally hidden or not?
+   With gapless, even the whole frame position could be advanced further than requested (since Homey don't play dat). */
+void frame_set_frameseek(mpg123_handle *fr, off_t fe)
+{
+	fr->firstframe = fe;
+#ifdef GAPLESS
+	if(fr->p.flags & MPG123_GAPLESS && fr->gapless_frames > 0)
+	{
+		/* Take care of the beginning... */
+		off_t beg_f = frame_offset(fr, fr->begin_os);
+		if(fe <= beg_f)
+		{
+			fr->firstframe = beg_f;
+			fr->firstoff   = fr->begin_os - frame_outs(fr, beg_f);
+		}
+		else fr->firstoff = 0;
+		/* The end is set once for a track at least, on the frame_set_frameseek called in get_next_frame() */
+		if(fr->end_os > 0)
+		{
+			fr->lastframe  = frame_offset(fr,fr->end_os);
+			fr->lastoff    = fr->end_os - frame_outs(fr, fr->lastframe);
+		} else {fr->lastframe = -1; fr->lastoff = 0; }
+	} else { fr->firstoff = fr->lastoff = 0; fr->lastframe = -1; }
+#endif
+	fr->ignoreframe = ignoreframe(fr);
+#ifdef GAPLESS
+	debug5("frame_set_frameseek: begin at %li frames and %li samples, end at %li and %li; ignore from %li",
+	       (long) fr->firstframe, (long) fr->firstoff,
+	       (long) fr->lastframe,  (long) fr->lastoff, (long) fr->ignoreframe);
+#else
+	debug3("frame_set_frameseek: begin at %li frames, end at %li; ignore from %li",
+	       (long) fr->firstframe, (long) fr->lastframe, (long) fr->ignoreframe);
+#endif
+}
+
+void frame_skip(mpg123_handle *fr)
+{
+#ifndef NO_LAYER3
+	if(fr->lay == 3) set_pointer(fr, 512);
+#endif
+}
+
+/* Sample accurate seek prepare for decoder. */
+/* This gets unadjusted output samples and takes resampling into account */
+void frame_set_seek(mpg123_handle *fr, off_t sp)
+{
+	fr->firstframe = frame_offset(fr, sp);
+	debug1("frame_set_seek: from %"OFF_P, fr->num);
+#ifndef NO_NTOM
+	if(fr->down_sample == 3) ntom_set_ntom(fr, fr->firstframe);
+#endif
+	fr->ignoreframe = ignoreframe(fr);
+#ifdef GAPLESS /* The sample offset is used for non-gapless mode, too! */
+	fr->firstoff = sp - frame_outs(fr, fr->firstframe);
+	debug5("frame_set_seek: begin at %li frames and %li samples, end at %li and %li; ignore from %li",
+	       (long) fr->firstframe, (long) fr->firstoff,
+	       (long) fr->lastframe,  (long) fr->lastoff, (long) fr->ignoreframe);
+#else
+	debug3("frame_set_seek: begin at %li frames, end at %li; ignore from %li",
+	       (long) fr->firstframe, (long) fr->lastframe, (long) fr->ignoreframe);
+#endif
+}
+
+int attribute_align_arg mpg123_volume_change(mpg123_handle *mh, double change)
+{
+	if(mh == NULL) return MPG123_ERR;
+	return mpg123_volume(mh, change + (double) mh->p.outscale);
+}
+
+int attribute_align_arg mpg123_volume(mpg123_handle *mh, double vol)
+{
+	if(mh == NULL) return MPG123_ERR;
+
+	if(vol >= 0) mh->p.outscale = vol;
+	else mh->p.outscale = 0.;
+
+	do_rva(mh);
+	return MPG123_OK;
+}
+
+static int get_rva(mpg123_handle *fr, double *peak, double *gain)
+{
+	double p = -1;
+	double g = 0;
+	int ret = 0;
+	if(fr->p.rva)
+	{
+		int rt = 0;
+		/* Should one assume a zero RVA as no RVA? */
+		if(fr->p.rva == 2 && fr->rva.level[1] != -1) rt = 1;
+		if(fr->rva.level[rt] != -1)
+		{
+			p = fr->rva.peak[rt];
+			g = fr->rva.gain[rt];
+			ret = 1; /* Success. */
+		}
+	}
+	if(peak != NULL) *peak = p;
+	if(gain != NULL) *gain = g;
+	return ret;
+}
+
+/* adjust the volume, taking both fr->outscale and rva values into account */
+void do_rva(mpg123_handle *fr)
+{
+	double peak = 0;
+	double gain = 0;
+	double newscale;
+	double rvafact = 1;
+	if(get_rva(fr, &peak, &gain))
+	{
+		if(NOQUIET && fr->p.verbose > 1) fprintf(stderr, "Note: doing RVA with gain %f\n", gain);
+		rvafact = pow(10,gain/20);
+	}
+
+	newscale = fr->p.outscale*rvafact;
+
+	/* if peak is unknown (== 0) this check won't hurt */
+	if((peak*newscale) > 1.0)
+	{
+		newscale = 1.0/peak;
+		warning2("limiting scale value to %f to prevent clipping with indicated peak factor of %f", newscale, peak);
+	}
+	/* first rva setting is forced with fr->lastscale < 0 */
+	if(newscale != fr->lastscale || fr->decoder_change)
+	{
+		debug3("changing scale value from %f to %f (peak estimated to %f)", fr->lastscale != -1 ? fr->lastscale : fr->p.outscale, newscale, (double) (newscale*peak));
+		fr->lastscale = newscale;
+		/* It may be too early, actually. */
+		if(fr->make_decode_tables != NULL) fr->make_decode_tables(fr); /* the actual work */
+	}
+}
+
+
+int attribute_align_arg mpg123_getvolume(mpg123_handle *mh, double *base, double *really, double *rva_db)
+{
+	if(mh == NULL) return MPG123_ERR;
+	if(base)   *base   = mh->p.outscale;
+	if(really) *really = mh->lastscale;
+	get_rva(mh, NULL, rva_db);
+	return MPG123_OK;
+}
+
+off_t attribute_align_arg mpg123_framepos(mpg123_handle *mh)
+{
+	if(mh == NULL) return MPG123_ERR;
+
+	return mh->input_offset;
+}
Index: include/reactos/libs/libmpg123/frame.h
===================================================================
--- include/reactos/libs/libmpg123/frame.h	(revision 63976)
+++ include/reactos/libs/libmpg123/frame.h	(working copy)
@@ -38,16 +38,19 @@
 /* the output buffer, used to be pcm_sample, pcm_point and audiobufsize */
 struct outbuffer
 {
-	unsigned char *data;
+	unsigned char *data; /* main data pointer, aligned */
 	unsigned char *p; /* read pointer  */
 	size_t fill; /* fill from read pointer */
-	size_t size; /* that's actually more like a safe size, after we have more than that, flush it */
+	size_t size;
+	unsigned char *rdata; /* unaligned base pointer */
 };
 
 struct audioformat
 {
-	int encoding;
+	int encoding; /* Final encoding, after post-processing. */
 	int encsize; /* Size of one sample in bytes, plain int should be fine here... */
+	int dec_enc;  /* Encoding of decoder synth. */
+	int dec_encsize; /* Size of one decoder sample. */
 	int channels;
 	long rate;
 };
@@ -77,10 +80,19 @@
 	long resync_limit;
 	long index_size; /* Long, because: negative values have a meaning. */
 	long preframes;
+#ifndef NO_FEEDER
+	long feedpool;
+	long feedbuffer;
+#endif
 };
 
+enum frame_state_flags
+{
+	 FRAME_ACCURATE      = 0x1  /**<     0001 Positions are considered accurate. */
+	,FRAME_FRANKENSTEIN  = 0x2  /**<     0010 This stream is concatenated. */
+	,FRAME_FRESH_DECODER = 0x4  /**<     0100 Decoder is fleshly initialized. */
+};
 
-
 /* There is a lot to condense here... many ints can be merged as flags; though the main space is still consumed by buffers. */
 struct mpg123_handle_struct
 {
@@ -149,7 +161,7 @@
 #ifdef OPT_MULTI
 
 #ifndef NO_LAYER3
-#if (defined OPT_3DNOW || defined OPT_3DNOWEXT)
+#if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX || defined OPT_NEON || defined OPT_NEON64)
 		void (*the_dct36)(real *,real *,real *,real *,real *);
 #endif
 #endif
@@ -184,6 +196,7 @@
 	int down_sample;
 	int header_change;
 	int lay;
+	long spf; /* cached count of samples per frame */
 	int (*do_layer)(mpg123_handle *);
 	int error_protection;
 	int bitrate_index;
@@ -199,9 +212,10 @@
 	int freesize;  /* free format frame size */
 	enum mpg123_vbr vbr; /* 1 if variable bitrate was detected */
 	off_t num; /* frame offset ... */
+	off_t input_offset; /* byte offset of this frame in input stream */
 	off_t playnum; /* playback offset... includes repetitions, reset at seeks */
 	off_t audio_start; /* The byte offset in the file where audio data begins. */
-	char accurate; /* Flag to see if we trust the frame number. */
+	int state_flags;
 	char silent_resync; /* Do not complain for the next n resyncs. */
 	unsigned char* xing_toc; /* The seek TOC from Xing header. */
 	int freeformat;
@@ -237,7 +251,9 @@
 	unsigned char *bsbuf;
 	unsigned char *bsbufold;
 	int bsnum;
+	/* That is the header matching the last read frame body. */
 	unsigned long oldhead;
+	/* That is the header that is supposedly the first of the stream. */
 	unsigned long firsthead;
 	int abr_rate;
 #ifdef FRAME_INDEX
@@ -255,6 +271,7 @@
 	off_t lastframe;   /* last frame to decode (for gapless or num_frames limit) */
 	off_t ignoreframe; /* frames to decode but discard before firstframe */
 #ifdef GAPLESS
+	off_t gapless_frames; /* frame count for the gapless part */
 	off_t firstoff; /* number of samples to ignore from firstframe */
 	off_t lastoff;  /* number of samples to use from lastframe */
 	off_t begin_s;  /* overall begin offset in samples */
@@ -261,6 +278,7 @@
 	off_t begin_os;
 	off_t end_s;    /* overall end offset in samples */
 	off_t end_os;
+	off_t fullend_os; /* gapless_frames translated to output samples */
 #endif
 	unsigned int crc; /* Well, I need a safe 16bit type, actually. But wider doesn't hurt. */
 	struct reader *rd; /* pointer to the reading functions */
@@ -356,13 +374,11 @@
 1152
 576
 */
-#define spf(fr) ((fr)->lay == 1 ? 384 : ((fr)->lay==2 ? 1152 : ((fr)->lsf || (fr)->mpeg25 ? 576 : 1152)))
 
 #ifdef GAPLESS
 /* well, I take that one for granted... at least layer3 */
 #define GAPLESS_DELAY 529
-/* still fine-tuning the "real music" window... see read_frame */
-void frame_gapless_init(mpg123_handle *fr, off_t b, off_t e);
+void frame_gapless_init(mpg123_handle *fr, off_t framecount, off_t bskip, off_t eskip);
 void frame_gapless_realinit(mpg123_handle *fr);
 void frame_gapless_update(mpg123_handle *mh, off_t total_samples);
 /*void frame_gapless_position(mpg123_handle* fr);
@@ -394,8 +410,4 @@
 off_t frame_tell_seek(mpg123_handle *fr);
 /* Take a copy of the Xing VBR TOC for fuzzy seeking. */
 int frame_fill_toc(mpg123_handle *fr, unsigned char* in);
-
-
-/* adjust volume to current outscale and rva values if wanted */
-void do_rva(mpg123_handle *fr);
 #endif
Index: include/reactos/libs/libmpg123/gapless.h
===================================================================
--- include/reactos/libs/libmpg123/gapless.h	(revision 0)
+++ include/reactos/libs/libmpg123/gapless.h	(working copy)
@@ -0,0 +1,119 @@
+/*
+	sampleadjust: gapless sample offset math
+
+	copyright 1995-2012 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+
+	This is no stand-alone header, precisely to be able to fool it into using fake handle types for testing the math.
+*/
+
+#include "debug.h"
+
+#ifdef GAPLESS
+/* From internal sample number to external. */
+static off_t sample_adjust(mpg123_handle *mh, off_t x)
+{
+	off_t s;
+	if(mh->p.flags & MPG123_GAPLESS)
+	{
+		/* It's a bit tricky to do this computation for the padding samples.
+		   They are not there on the outside. */
+		if(x > mh->end_os)
+		{
+			if(x < mh->fullend_os)
+			s = mh->end_os - mh->begin_os;
+			else
+			s = x - (mh->fullend_os - mh->end_os + mh->begin_os);
+		}
+		else
+		s = x - mh->begin_os;
+	}
+	else
+	s = x;
+
+	return s;
+}
+
+/* from external samples to internal */
+static off_t sample_unadjust(mpg123_handle *mh, off_t x)
+{
+	off_t s;
+	if(mh->p.flags & MPG123_GAPLESS)
+	{
+		s = x + mh->begin_os;
+		/* There is a hole; we don't create sample positions in there.
+		   Jump from the end of the gapless track directly to after the padding. */
+		if(s >= mh->end_os)
+		s += mh->fullend_os - mh->end_os;
+	}
+	else s = x;
+
+	return s;
+}
+
+/*
+	Take the buffer after a frame decode (strictly: it is the data from frame fr->num!) and cut samples out.
+	fr->buffer.fill may then be smaller than before...
+*/
+static void frame_buffercheck(mpg123_handle *fr)
+{
+	/* When we have no accurate position, gapless code does not make sense. */
+	if(!(fr->state_flags & FRAME_ACCURATE)) return;
+
+	/* Get a grip on dirty streams that start with a gapless header.
+	   Simply accept all data from frames that are too much,
+	   they are supposedly attached to the stream after the fact. */
+	if(fr->gapless_frames > 0 && fr->num >= fr->gapless_frames) return;
+
+	/* Important: We first cut samples from the end, then cut from beginning (including left-shift of the buffer).
+	   This order works also for the case where firstframe == lastframe. */
+
+	/* The last interesting (planned) frame: Only use some leading samples.
+	   Note a difference from the below: The last frame and offset are unchanges by seeks.
+	   The lastoff keeps being valid. */
+	if(fr->lastframe > -1 && fr->num >= fr->lastframe)
+	{
+		/* There can be more than one frame of padding at the end, so we ignore the whole frame if we are beyond lastframe. */
+		off_t byteoff = (fr->num == fr->lastframe) ? samples_to_bytes(fr, fr->lastoff) : 0;
+		if((off_t)fr->buffer.fill > byteoff)
+		{
+			fr->buffer.fill = byteoff;
+		}
+		if(VERBOSE3) fprintf(stderr, "\nNote: Cut frame %"OFF_P" buffer on end of stream to %"OFF_P" samples, fill now %"SIZE_P" bytes.\n", (off_p)fr->num, (off_p)(fr->num == fr->lastframe ? fr->lastoff : 0), (size_p)fr->buffer.fill);
+	}
+
+	/* The first interesting frame: Skip some leading samples. */
+	if(fr->firstoff && fr->num == fr->firstframe)
+	{
+		off_t byteoff = samples_to_bytes(fr, fr->firstoff);
+		if((off_t)fr->buffer.fill > byteoff)
+		{
+			fr->buffer.fill -= byteoff;
+			/* buffer.p != buffer.data only for own buffer */
+			debug6("cutting %li samples/%li bytes on begin, own_buffer=%i at %p=%p, buf[1]=%i",
+			        (long)fr->firstoff, (long)byteoff, fr->own_buffer, (void*)fr->buffer.p, (void*)fr->buffer.data, ((short*)fr->buffer.p)[2]);
+			if(fr->own_buffer) fr->buffer.p = fr->buffer.data + byteoff;
+			else memmove(fr->buffer.data, fr->buffer.data + byteoff, fr->buffer.fill);
+			debug3("done cutting, buffer at %p =? %p, buf[1]=%i",
+			        (void*)fr->buffer.p, (void*)fr->buffer.data, ((short*)fr->buffer.p)[2]);
+		}
+		else fr->buffer.fill = 0;
+
+		if(VERBOSE3) fprintf(stderr, "\nNote: Cut frame %"OFF_P" buffer on beginning of stream by %"OFF_P" samples, fill now %"SIZE_P" bytes.\n", (off_p)fr->num, (off_p)fr->firstoff, (size_p)fr->buffer.fill);
+		/* We can only reach this frame again by seeking. And on seeking, firstoff will be recomputed.
+		   So it is safe to null it here (and it makes the if() decision abort earlier). */
+		fr->firstoff = 0;
+	}
+}
+
+#define SAMPLE_ADJUST(mh,x)     sample_adjust(mh,x)
+#define SAMPLE_UNADJUST(mh,x)   sample_unadjust(mh,x)
+#define FRAME_BUFFERCHECK(mh) frame_buffercheck(mh)
+
+#else /* no gapless code included */
+
+#define SAMPLE_ADJUST(mh,x)   (x)
+#define SAMPLE_UNADJUST(mh,x) (x)
+#define FRAME_BUFFERCHECK(mh)
+
+#endif
Index: include/reactos/libs/libmpg123/getcpuflags.h
===================================================================
--- include/reactos/libs/libmpg123/getcpuflags.h	(revision 63976)
+++ include/reactos/libs/libmpg123/getcpuflags.h	(working copy)
@@ -12,7 +12,8 @@
 
 /* standard level flags part 1 (ECX)*/
 #define FLAG_SSE3      0x00000001
-
+#define FLAG_SSSE3     0x00000200
+#define FLAG_AVX       0x1C000000
 /* standard level flags part 2 (EDX) */
 #define FLAG2_MMX       0x00800000
 #define FLAG2_SSE       0x02000000
@@ -22,17 +23,23 @@
 #define XFLAG_MMX      0x00800000
 #define XFLAG_3DNOW    0x80000000
 #define XFLAG_3DNOWEXT 0x40000000
+/* eXtended Control Register 0 */
+#define XCR0FLAG_AVX   0x00000006
 
+
 struct cpuflags
 {
+#if defined(OPT_ARM) || defined(OPT_NEON) || defined(OPT_NEON64)
+	unsigned int has_neon;
+#else
 	unsigned int id;
 	unsigned int std;
 	unsigned int std2;
 	unsigned int ext;
+	unsigned int xcr0_lo;
+#endif
 };
 
-extern struct cpuflags cpu_flags;
-
 unsigned int getcpuflags(struct cpuflags* cf);
 
 /* checks the family */
@@ -45,5 +52,9 @@
 #define cpu_sse(s) (FLAG2_SSE & s.std2)
 #define cpu_sse2(s) (FLAG2_SSE2 & s.std2)
 #define cpu_sse3(s) (FLAG_SSE3 & s.std)
+#define cpu_avx(s) ((FLAG_AVX & s.std) == FLAG_AVX && (XCR0FLAG_AVX & s.xcr0_lo) == XCR0FLAG_AVX)
+#define cpu_fast_sse(s) ((((s.id & 0xf00)>>8) == 6 && FLAG_SSSE3 & s.std) /* for Intel/VIA; family 6 CPUs with SSSE3 */ || \
+						   (((s.id & 0xf00)>>8) == 0xf && (((s.id & 0x0ff00000)>>20) > 0 && ((s.id & 0x0ff00000)>>20) != 5))) /* for AMD; family > 0xF CPUs except Bobcat */
+#define cpu_neon(s) (s.has_neon)
 
 #endif
Index: include/reactos/libs/libmpg123/getcpuflags.S
===================================================================
--- include/reactos/libs/libmpg123/getcpuflags.S	(revision 0)
+++ include/reactos/libs/libmpg123/getcpuflags.S	(working copy)
@@ -0,0 +1,102 @@
+/*
+	getcpucpuflags: get cpuflags for ia32
+
+	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http:#mpg123.org
+	initially written by KIMURA Takuhiro (for 3DNow!)
+	extended for general use by Thomas Orgis
+
+	 extern int getcpuid(struct cpuflags*)
+	or just 
+	 extern int getcpuid(unsigned int*)
+	where there is memory for 4 ints
+	 -> the first set of idflags (basic cpu family info)
+	    and the idflags, stdflags, std2flags, extflags written to the parameter
+	 -> 0x00000000 (CPUID instruction not supported)
+*/
+
+#include "mangle.h"
+
+.text
+	ALIGN4
+
+.globl ASM_NAME(getcpuflags)
+/*	.type ASM_NAME(getcpuflags),@function */
+ASM_NAME(getcpuflags):
+	pushl %ebp
+	movl %esp,%ebp
+	pushl %edx
+	pushl %ecx
+	pushl %ebx
+	pushl %esi
+/* get the int pointer for storing the flags */
+	movl 8(%ebp), %esi
+/* does that one make sense? */
+	movl $0x80000000,%eax
+/* now save the flags and do a check for cpuid availability */
+	pushfl
+	pushfl
+	popl %eax
+	movl %eax,%ebx
+/* set that bit... */
+	xorl $0x00200000,%eax
+	pushl %eax
+	popfl
+/* ...and read back the flags to see if it is understood */
+	pushfl
+	popl %eax
+	popfl
+	cmpl %ebx,%eax
+	je .Lnocpuid
+/* In principle, I would have to check the CPU's identify first to be sure how to interpret the extended flags. */
+/* now get the info, first extended */
+	movl $0x0, 12(%esi) /* clear value */
+	movl $0x0, 16(%esi) /* clear value */
+/* only if supported... */
+	movl $0x80000000, %eax
+	cpuid
+/* IDT CPUs should not change EAX, generally I hope that non-3DNow cpus do not set a bogus support level here. */
+	cmpl $0x80000001, %eax
+	jb .Lnoextended /* Skip ext check without minimal support level. */
+/* is supported, get flags value */
+	movl $0x80000001,%eax
+	cpuid
+	movl %edx,12(%esi)
+.Lnoextended:
+/* then the other ones, called last to get the id flags in %eax for ret */
+	movl $0x00000001,%eax
+	cpuid
+	movl %eax, (%esi)
+	movl %ecx, 4(%esi)
+	movl %edx, 8(%esi)
+/* check if xgetbv instruction is available */
+	test	$0x04000000, %ecx
+	jz		.Lend
+	test	$0x08000000, %ecx
+	jz		.Lend
+	xor		%ecx, %ecx
+	.byte	0x0f, 0x01, 0xd0 /* xgetbv instruction */
+	movl	%eax, 16(%esi)
+	movl	(%esi), %eax
+	jmp .Lend
+	ALIGN4
+.Lnocpuid:
+/* error: set everything to zero */
+	movl $0, %eax
+	movl $0, (%esi)
+	movl $0, 4(%esi)
+	movl $0, 8(%esi)
+	movl $0, 12(%esi)
+	movl $0, 16(%esi)
+	ALIGN4
+.Lend:
+/* return value are the id flags, still stored in %eax */
+	popl %esi
+	popl %ebx
+	popl %ecx
+	popl %edx
+	movl %ebp,%esp
+	popl %ebp
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/getcpuflags_arm.c
===================================================================
--- include/reactos/libs/libmpg123/getcpuflags_arm.c	(revision 0)
+++ include/reactos/libs/libmpg123/getcpuflags_arm.c	(working copy)
@@ -0,0 +1,41 @@
+/*
+	getcpuflags_arm: get cpuflags for ARM
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Momma
+*/
+
+#include <setjmp.h>
+#include <signal.h>
+#include "mpg123lib_intern.h"
+#include "getcpuflags.h"
+
+extern void check_neon(void);
+
+static sigjmp_buf jmpbuf;
+
+static void mpg123_arm_catch_sigill(int sig)
+{
+	siglongjmp(jmpbuf, 1);
+}
+
+unsigned int getcpuflags(struct cpuflags* cf)
+{
+	struct sigaction act, act_old;
+	act.sa_handler = mpg123_arm_catch_sigill;
+	act.sa_flags = SA_RESTART;
+	sigemptyset(&act.sa_mask);
+	sigaction(SIGILL, &act, &act_old);
+	
+	cf->has_neon = 0;
+	
+	if(!sigsetjmp(jmpbuf, 1)) {
+		check_neon();
+		cf->has_neon = 1;
+	}
+	
+	sigaction(SIGILL, &act_old, NULL);
+	
+	return 0;
+}
Index: include/reactos/libs/libmpg123/getcpuflags_x86_64.S
===================================================================
--- include/reactos/libs/libmpg123/getcpuflags_x86_64.S	(revision 0)
+++ include/reactos/libs/libmpg123/getcpuflags_x86_64.S	(working copy)
@@ -0,0 +1,57 @@
+/*
+	getcpuflags_x86_64: get cpuflags for x86-64
+
+	copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+	.text
+	ALIGN4
+	.globl ASM_NAME(getcpuflags)
+ASM_NAME(getcpuflags):
+	push	%rbp
+	mov		%rsp, %rbp
+	push	%rbx
+	
+#ifdef IS_MSABI
+	push	%rdi
+	mov		%rcx, %rdi
+#endif
+
+	movl	$0, 12(%rdi)
+	movl	$0, 16(%rdi)
+
+	mov		$0x80000000, %eax
+	cpuid
+	cmp		$0x80000001, %eax
+	jb		1f
+	mov		$0x80000001, %eax
+	cpuid
+	movl	%edx, 12(%rdi)
+1:
+	mov		$0x00000001, %eax
+	cpuid
+	movl	%eax, (%rdi)
+	movl	%ecx, 4(%rdi)
+	movl	%edx, 8(%rdi)
+	test	$0x04000000, %ecx
+	jz		2f
+	test	$0x08000000, %ecx
+	jz		2f
+	xor		%ecx, %ecx
+	.byte	0x0f, 0x01, 0xd0 /* xgetbv instruction */
+	movl	%eax, 16(%rdi)
+	movl	(%rdi), %eax
+2:
+#ifdef IS_MSABI
+	pop		%rdi
+#endif
+	pop		%rbx
+	mov		%rbp, %rsp
+	pop		%rbp
+	ret
+	
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/huffman.h
===================================================================
--- include/reactos/libs/libmpg123/huffman.h	(revision 63976)
+++ include/reactos/libs/libmpg123/huffman.h	(working copy)
@@ -1,5 +1,5 @@
 /*
-	huffman.h: huffman tables ... recalcualted to work with optimzed decoder scheme (MH)
+	huffman.h: huffman tables ... recalcualted to work with optimized decoder scheme (MH)
 
 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
@@ -16,32 +16,32 @@
 struct newhuff 
 {
   unsigned int linbits;
-  short *table;
+  const short *table;
 };
 
-static short tab0[] = 
+static const short tab0[] = 
 { 
    0
 };
 
-static short tab1[] =
+static const short tab1[] =
 {
   -5,  -3,  -1,  17,   1,  16,   0
 };
 
-static short tab2[] =
+static const short tab2[] =
 {
  -15, -11,  -9,  -5,  -3,  -1,  34,   2,  18,  -1,  33,  32,  17,  -1,   1,
   16,   0
 };
 
-static short tab3[] =
+static const short tab3[] =
 {
  -13, -11,  -9,  -5,  -3,  -1,  34,   2,  18,  -1,  33,  32,  16,  17,  -1,
    1,   0
 };
 
-static short tab5[] =
+static const short tab5[] =
 {
  -29, -25, -23, -15,  -7,  -5,  -3,  -1,  51,  35,  50,  49,  -3,  -1,  19,
    3,  -1,  48,  34,  -3,  -1,  18,  33,  -1,   2,  32,  17,  -1,   1,  16,
@@ -48,7 +48,7 @@
    0
 };
 
-static short tab6[] =
+static const short tab6[] =
 {
  -25, -19, -13,  -9,  -5,  -3,  -1,  51,   3,  35,  -1,  50,  48,  -1,  19,
   49,  -3,  -1,  34,   2,  18,  -3,  -1,  33,  32,   1,  -1,  17,  -1,  16,
@@ -55,7 +55,7 @@
    0
 };
 
-static short tab7[] =
+static const short tab7[] =
 {
  -69, -65, -57, -39, -29, -17, -11,  -7,  -3,  -1,  85,  69,  -1,  84,  83,
   -1,  53,  68,  -3,  -1,  37,  82,  21,  -5,  -1,  81,  -1,   5,  52,  -1,
@@ -64,7 +64,7 @@
   -5,  -1,  33,  -1,   2,  32,  17,  -1,   1,  16,   0
 };
 
-static short tab8[] =
+static const short tab8[] =
 {
  -65, -63, -59, -45, -31, -19, -13,  -7,  -5,  -3,  -1,  85,  84,  69,  83,
   -3,  -1,  53,  68,  37,  -3,  -1,  82,   5,  21,  -5,  -1,  81,  -1,  52,
@@ -73,7 +73,7 @@
    2,  32,  -1,  18,  33,  17,  -3,  -1,   1,  16,   0
 };
 
-static short tab9[] =
+static const short tab9[] =
 {
  -63, -53, -41, -29, -19, -11,  -5,  -3,  -1,  85,  69,  53,  -1,  83,  -1,
   84,   5,  -3,  -1,  68,  37,  -1,  82,  21,  -3,  -1,  81,  52,  -1,  67,
@@ -82,7 +82,7 @@
   18,  -1,  33,  32,  -3,  -1,  17,   1,  -1,  16,   0
 };
 
-static short tab10[] =
+static const short tab10[] =
 {
 -125,-121,-111, -83, -55, -35, -21, -13,  -7,  -3,  -1, 119, 103,  -1, 118,
   87,  -3,  -1, 117, 102,  71,  -3,  -1, 116,  86,  -1, 101,  55,  -9,  -3,
@@ -95,7 +95,7 @@
    2,  32,  17,  -1,   1,  16,   0
 };
 
-static short tab11[] =
+static const short tab11[] =
 {
 -121,-113, -89, -59, -43, -27, -17,  -7,  -3,  -1, 119, 103,  -1, 118, 117,
   -3,  -1, 102,  71,  -1, 116,  -1,  87,  85,  -5,  -3,  -1,  86, 101,  55,
@@ -108,7 +108,7 @@
   32,  17,  -3,  -1,   1,  16,   0
 };
 
-static short tab12[] =
+static const short tab12[] =
 {
 -115, -99, -73, -45, -27, -17,  -9,  -5,  -3,  -1, 119, 103, 118,  -1,  87,
  117,  -3,  -1, 102,  71,  -1, 116, 101,  -3,  -1,  86,  55,  -3,  -1, 115,
@@ -121,7 +121,7 @@
    2,  32,   0,  17,  -1,   1,  16
 };
 
-static short tab13[] =
+static const short tab13[] =
 {
 -509,-503,-475,-405,-333,-265,-205,-153,-115, -83, -53, -35, -21, -13,  -9,
   -7,  -5,  -3,  -1, 254, 252, 253, 237, 255,  -1, 239, 223,  -3,  -1, 238,
@@ -160,7 +160,7 @@
    0
 };
 
-static short tab15[] =
+static const short tab15[] =
 {
 -495,-445,-355,-263,-183,-115, -77, -43, -27, -13,  -7,  -3,  -1, 255, 239,
   -1, 254, 223,  -1, 238,  -1, 253, 207,  -7,  -3,  -1, 252, 222,  -1, 237,
@@ -199,7 +199,7 @@
    0
 };
 
-static short tab16[] =
+static const short tab16[] =
 {
 -509,-503,-461,-323,-103, -37, -27, -15,  -7,  -3,  -1, 239, 254,  -1, 223,
  253,  -3,  -1, 207, 252,  -1, 191, 251,  -5,  -1, 175,  -1, 250, 159,  -3,
@@ -238,7 +238,7 @@
    0
 };
 
-static short tab24[] =
+static const short tab24[] =
 {
 -451,-117, -43, -25, -15,  -7,  -3,  -1, 239, 254,  -1, 223, 253,  -3,  -1,
  207, 252,  -1, 191, 251,  -5,  -1, 250,  -1, 175, 159,  -1, 249, 248,  -9,
@@ -277,7 +277,7 @@
    0
 };
 
-static short tab_c0[] =
+static const short tab_c0[] =
 {
  -29, -21, -13,  -7,  -3,  -1,  11,  15,  -1,  13,  14,  -3,  -1,   7,   5,
    9,  -3,  -1,   6,   3,  -1,  10,  12,  -3,  -1,   2,   1,  -1,   4,   8,
@@ -284,7 +284,7 @@
    0
 };
 
-static short tab_c1[] =
+static const short tab_c1[] =
 {
  -15,  -7,  -3,  -1,  15,  14,  -1,  13,  12,  -3,  -1,  11,  10,  -1,   9,
    8,  -7,  -3,  -1,   7,   6,  -1,   5,   4,  -3,  -1,   3,   2,  -1,   1,
@@ -293,7 +293,7 @@
 
 
 
-static struct newhuff ht[] = 
+static const struct newhuff ht[] = 
 {
  { /* 0 */ 0 , tab0  } ,
  { /* 2 */ 0 , tab1  } ,
@@ -330,7 +330,7 @@
  { /* 16 */ 13, tab24 }
 };
 
-static struct newhuff htc[] = 
+static const struct newhuff htc[] = 
 {
  { /* 1 , 1 , */ 0 , tab_c0 } ,
  { /* 1 , 1 , */ 0 , tab_c1 }
Index: include/reactos/libs/libmpg123/icy.c
===================================================================
--- include/reactos/libs/libmpg123/icy.c	(revision 0)
+++ include/reactos/libs/libmpg123/icy.c	(working copy)
@@ -0,0 +1,32 @@
+/*
+	icy: Puny code to pretend for a serious ICY data structure.
+
+	copyright 2007 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Thomas Orgis
+*/
+
+#include "icy.h"
+
+void init_icy(struct icy_meta *icy)
+{
+	icy->data = NULL;
+}
+
+void clear_icy(struct icy_meta *icy)
+{
+	if(icy->data != NULL) free(icy->data);
+	init_icy(icy);
+}
+
+void reset_icy(struct icy_meta *icy)
+{
+	clear_icy(icy);
+	init_icy(icy);
+}
+/*void set_icy(struct icy_meta *icy, char* new_data)
+{
+	if(icy->data) free(icy->data);
+	icy->data = new_data;
+	icy->changed = 1;
+}*/
Index: include/reactos/libs/libmpg123/icy.h
===================================================================
--- include/reactos/libs/libmpg123/icy.h	(revision 63976)
+++ include/reactos/libs/libmpg123/icy.h	(working copy)
@@ -26,8 +26,11 @@
 
 #else
 
+#undef init_icy
 #define init_icy(a)
+#undef clear_icy
 #define clear_icy(a)
+#undef reset_icy
 #define reset_icy(a)
 
 #endif /* NO_ICY */
Index: include/reactos/libs/libmpg123/icy2utf8.c
===================================================================
--- include/reactos/libs/libmpg123/icy2utf8.c	(revision 0)
+++ include/reactos/libs/libmpg123/icy2utf8.c	(working copy)
@@ -0,0 +1,438 @@
+/* mpg123 note: This is BSD-licensed code that is no problem for mpg123 usage under LGPL.
+   It's Free, understood? ;-) */
+
+/* Another note: This code is basically written by Thorsten Glaser,
+   Thomas Orgis did just some rearrangements and comments. */
+
+/*-
+ * Copyright (c) 2008
+ *	Thorsten Glaser <tg@mirbsd.org>
+ *
+ * Provided that these terms and disclaimer and all copyright notices
+ * are retained or reproduced in an accompanying document, permission
+ * is granted to deal in this work without restriction, including un-
+ * limited rights to use, publicly perform, distribute, sell, modify,
+ * merge, give away, or sublicence.
+ *
+ * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
+ * the utmost extent permitted by applicable law, neither express nor
+ * implied; without malicious intent or gross negligence. In no event
+ * may a licensor, author or contributor be held liable for indirect,
+ * direct, other damage, loss, or other issues arising in any way out
+ * of dealing in the work, even if advised of the possibility of such
+ * damage or existence of a defect, except proven that it results out
+ * of said person's immediate fault when using the work as intended.
+ *-
+ * Convert from ICY encoding (windows-1252 codepage) to UTF-8
+ */
+
+/* Includes string and stdlib headers... */
+#include "compat.h"
+
+/* ThOr: too lazy for this type check; also we use char/short all around anyway.
+   Of cource, it would be the proper way to use _these_ kind of types all around. */
+#define uint8_t  unsigned char
+#define uint16_t unsigned short
+
+static const uint8_t cp1252_utf8[] = {
+	/* 0x00 @   0 */	0x00,
+	/* 0x01 @   1 */	0x01,
+	/* 0x02 @   2 */	0x02,
+	/* 0x03 @   3 */	0x03,
+	/* 0x04 @   4 */	0x04,
+	/* 0x05 @   5 */	0x05,
+	/* 0x06 @   6 */	0x06,
+	/* 0x07 @   7 */	0x07,
+	/* 0x08 @   8 */	0x08,
+	/* 0x09 @   9 */	0x09,
+	/* 0x0A @  10 */	0x0A,
+	/* 0x0B @  11 */	0x0B,
+	/* 0x0C @  12 */	0x0C,
+	/* 0x0D @  13 */	0x0D,
+	/* 0x0E @  14 */	0x0E,
+	/* 0x0F @  15 */	0x0F,
+	/* 0x10 @  16 */	0x10,
+	/* 0x11 @  17 */	0x11,
+	/* 0x12 @  18 */	0x12,
+	/* 0x13 @  19 */	0x13,
+	/* 0x14 @  20 */	0x14,
+	/* 0x15 @  21 */	0x15,
+	/* 0x16 @  22 */	0x16,
+	/* 0x17 @  23 */	0x17,
+	/* 0x18 @  24 */	0x18,
+	/* 0x19 @  25 */	0x19,
+	/* 0x1A @  26 */	0x1A,
+	/* 0x1B @  27 */	0x1B,
+	/* 0x1C @  28 */	0x1C,
+	/* 0x1D @  29 */	0x1D,
+	/* 0x1E @  30 */	0x1E,
+	/* 0x1F @  31 */	0x1F,
+	/* 0x20 @  32 */	0x20,
+	/* 0x21 @  33 */	0x21,
+	/* 0x22 @  34 */	0x22,
+	/* 0x23 @  35 */	0x23,
+	/* 0x24 @  36 */	0x24,
+	/* 0x25 @  37 */	0x25,
+	/* 0x26 @  38 */	0x26,
+	/* 0x27 @  39 */	0x27,
+	/* 0x28 @  40 */	0x28,
+	/* 0x29 @  41 */	0x29,
+	/* 0x2A @  42 */	0x2A,
+	/* 0x2B @  43 */	0x2B,
+	/* 0x2C @  44 */	0x2C,
+	/* 0x2D @  45 */	0x2D,
+	/* 0x2E @  46 */	0x2E,
+	/* 0x2F @  47 */	0x2F,
+	/* 0x30 @  48 */	0x30,
+	/* 0x31 @  49 */	0x31,
+	/* 0x32 @  50 */	0x32,
+	/* 0x33 @  51 */	0x33,
+	/* 0x34 @  52 */	0x34,
+	/* 0x35 @  53 */	0x35,
+	/* 0x36 @  54 */	0x36,
+	/* 0x37 @  55 */	0x37,
+	/* 0x38 @  56 */	0x38,
+	/* 0x39 @  57 */	0x39,
+	/* 0x3A @  58 */	0x3A,
+	/* 0x3B @  59 */	0x3B,
+	/* 0x3C @  60 */	0x3C,
+	/* 0x3D @  61 */	0x3D,
+	/* 0x3E @  62 */	0x3E,
+	/* 0x3F @  63 */	0x3F,
+	/* 0x40 @  64 */	0x40,
+	/* 0x41 @  65 */	0x41,
+	/* 0x42 @  66 */	0x42,
+	/* 0x43 @  67 */	0x43,
+	/* 0x44 @  68 */	0x44,
+	/* 0x45 @  69 */	0x45,
+	/* 0x46 @  70 */	0x46,
+	/* 0x47 @  71 */	0x47,
+	/* 0x48 @  72 */	0x48,
+	/* 0x49 @  73 */	0x49,
+	/* 0x4A @  74 */	0x4A,
+	/* 0x4B @  75 */	0x4B,
+	/* 0x4C @  76 */	0x4C,
+	/* 0x4D @  77 */	0x4D,
+	/* 0x4E @  78 */	0x4E,
+	/* 0x4F @  79 */	0x4F,
+	/* 0x50 @  80 */	0x50,
+	/* 0x51 @  81 */	0x51,
+	/* 0x52 @  82 */	0x52,
+	/* 0x53 @  83 */	0x53,
+	/* 0x54 @  84 */	0x54,
+	/* 0x55 @  85 */	0x55,
+	/* 0x56 @  86 */	0x56,
+	/* 0x57 @  87 */	0x57,
+	/* 0x58 @  88 */	0x58,
+	/* 0x59 @  89 */	0x59,
+	/* 0x5A @  90 */	0x5A,
+	/* 0x5B @  91 */	0x5B,
+	/* 0x5C @  92 */	0x5C,
+	/* 0x5D @  93 */	0x5D,
+	/* 0x5E @  94 */	0x5E,
+	/* 0x5F @  95 */	0x5F,
+	/* 0x60 @  96 */	0x60,
+	/* 0x61 @  97 */	0x61,
+	/* 0x62 @  98 */	0x62,
+	/* 0x63 @  99 */	0x63,
+	/* 0x64 @ 100 */	0x64,
+	/* 0x65 @ 101 */	0x65,
+	/* 0x66 @ 102 */	0x66,
+	/* 0x67 @ 103 */	0x67,
+	/* 0x68 @ 104 */	0x68,
+	/* 0x69 @ 105 */	0x69,
+	/* 0x6A @ 106 */	0x6A,
+	/* 0x6B @ 107 */	0x6B,
+	/* 0x6C @ 108 */	0x6C,
+	/* 0x6D @ 109 */	0x6D,
+	/* 0x6E @ 110 */	0x6E,
+	/* 0x6F @ 111 */	0x6F,
+	/* 0x70 @ 112 */	0x70,
+	/* 0x71 @ 113 */	0x71,
+	/* 0x72 @ 114 */	0x72,
+	/* 0x73 @ 115 */	0x73,
+	/* 0x74 @ 116 */	0x74,
+	/* 0x75 @ 117 */	0x75,
+	/* 0x76 @ 118 */	0x76,
+	/* 0x77 @ 119 */	0x77,
+	/* 0x78 @ 120 */	0x78,
+	/* 0x79 @ 121 */	0x79,
+	/* 0x7A @ 122 */	0x7A,
+	/* 0x7B @ 123 */	0x7B,
+	/* 0x7C @ 124 */	0x7C,
+	/* 0x7D @ 125 */	0x7D,
+	/* 0x7E @ 126 */	0x7E,
+	/* 0x7F @ 127 */	0x7F,
+	/* 0x80 @ 128 */	0xE2, 0x82, 0xAC,
+	/* 0x81 @ 131 */	0xEF, 0xBF, 0xBD,
+	/* 0x82 @ 134 */	0xE2, 0x80, 0x9A,
+	/* 0x83 @ 137 */	0xC6, 0x92,
+	/* 0x84 @ 139 */	0xE2, 0x80, 0x9E,
+	/* 0x85 @ 142 */	0xE2, 0x80, 0xA6,
+	/* 0x86 @ 145 */	0xE2, 0x80, 0xA0,
+	/* 0x87 @ 148 */	0xE2, 0x80, 0xA1,
+	/* 0x88 @ 151 */	0xCB, 0x86,
+	/* 0x89 @ 153 */	0xE2, 0x80, 0xB0,
+	/* 0x8A @ 156 */	0xC5, 0xA0,
+	/* 0x8B @ 158 */	0xE2, 0x80, 0xB9,
+	/* 0x8C @ 161 */	0xC5, 0x92,
+	/* 0x8D @ 163 */	0xEF, 0xBF, 0xBD,
+	/* 0x8E @ 166 */	0xC5, 0xBD,
+	/* 0x8F @ 168 */	0xEF, 0xBF, 0xBD,
+	/* 0x90 @ 171 */	0xEF, 0xBF, 0xBD,
+	/* 0x91 @ 174 */	0xE2, 0x80, 0x98,
+	/* 0x92 @ 177 */	0xE2, 0x80, 0x99,
+	/* 0x93 @ 180 */	0xE2, 0x80, 0x9C,
+	/* 0x94 @ 183 */	0xE2, 0x80, 0x9D,
+	/* 0x95 @ 186 */	0xE2, 0x80, 0xA2,
+	/* 0x96 @ 189 */	0xE2, 0x80, 0x93,
+	/* 0x97 @ 192 */	0xE2, 0x80, 0x94,
+	/* 0x98 @ 195 */	0xCB, 0x9C,
+	/* 0x99 @ 197 */	0xE2, 0x84, 0xA2,
+	/* 0x9A @ 200 */	0xC5, 0xA1,
+	/* 0x9B @ 202 */	0xE2, 0x80, 0xBA,
+	/* 0x9C @ 205 */	0xC5, 0x93,
+	/* 0x9D @ 207 */	0xEF, 0xBF, 0xBD,
+	/* 0x9E @ 210 */	0xC5, 0xBE,
+	/* 0x9F @ 212 */	0xC5, 0xB8,
+	/* 0xA0 @ 214 */	0xC2, 0xA0,
+	/* 0xA1 @ 216 */	0xC2, 0xA1,
+	/* 0xA2 @ 218 */	0xC2, 0xA2,
+	/* 0xA3 @ 220 */	0xC2, 0xA3,
+	/* 0xA4 @ 222 */	0xC2, 0xA4,
+	/* 0xA5 @ 224 */	0xC2, 0xA5,
+	/* 0xA6 @ 226 */	0xC2, 0xA6,
+	/* 0xA7 @ 228 */	0xC2, 0xA7,
+	/* 0xA8 @ 230 */	0xC2, 0xA8,
+	/* 0xA9 @ 232 */	0xC2, 0xA9,
+	/* 0xAA @ 234 */	0xC2, 0xAA,
+	/* 0xAB @ 236 */	0xC2, 0xAB,
+	/* 0xAC @ 238 */	0xC2, 0xAC,
+	/* 0xAD @ 240 */	0xC2, 0xAD,
+	/* 0xAE @ 242 */	0xC2, 0xAE,
+	/* 0xAF @ 244 */	0xC2, 0xAF,
+	/* 0xB0 @ 246 */	0xC2, 0xB0,
+	/* 0xB1 @ 248 */	0xC2, 0xB1,
+	/* 0xB2 @ 250 */	0xC2, 0xB2,
+	/* 0xB3 @ 252 */	0xC2, 0xB3,
+	/* 0xB4 @ 254 */	0xC2, 0xB4,
+	/* 0xB5 @ 256 */	0xC2, 0xB5,
+	/* 0xB6 @ 258 */	0xC2, 0xB6,
+	/* 0xB7 @ 260 */	0xC2, 0xB7,
+	/* 0xB8 @ 262 */	0xC2, 0xB8,
+	/* 0xB9 @ 264 */	0xC2, 0xB9,
+	/* 0xBA @ 266 */	0xC2, 0xBA,
+	/* 0xBB @ 268 */	0xC2, 0xBB,
+	/* 0xBC @ 270 */	0xC2, 0xBC,
+	/* 0xBD @ 272 */	0xC2, 0xBD,
+	/* 0xBE @ 274 */	0xC2, 0xBE,
+	/* 0xBF @ 276 */	0xC2, 0xBF,
+	/* 0xC0 @ 278 */	0xC3, 0x80,
+	/* 0xC1 @ 280 */	0xC3, 0x81,
+	/* 0xC2 @ 282 */	0xC3, 0x82,
+	/* 0xC3 @ 284 */	0xC3, 0x83,
+	/* 0xC4 @ 286 */	0xC3, 0x84,
+	/* 0xC5 @ 288 */	0xC3, 0x85,
+	/* 0xC6 @ 290 */	0xC3, 0x86,
+	/* 0xC7 @ 292 */	0xC3, 0x87,
+	/* 0xC8 @ 294 */	0xC3, 0x88,
+	/* 0xC9 @ 296 */	0xC3, 0x89,
+	/* 0xCA @ 298 */	0xC3, 0x8A,
+	/* 0xCB @ 300 */	0xC3, 0x8B,
+	/* 0xCC @ 302 */	0xC3, 0x8C,
+	/* 0xCD @ 304 */	0xC3, 0x8D,
+	/* 0xCE @ 306 */	0xC3, 0x8E,
+	/* 0xCF @ 308 */	0xC3, 0x8F,
+	/* 0xD0 @ 310 */	0xC3, 0x90,
+	/* 0xD1 @ 312 */	0xC3, 0x91,
+	/* 0xD2 @ 314 */	0xC3, 0x92,
+	/* 0xD3 @ 316 */	0xC3, 0x93,
+	/* 0xD4 @ 318 */	0xC3, 0x94,
+	/* 0xD5 @ 320 */	0xC3, 0x95,
+	/* 0xD6 @ 322 */	0xC3, 0x96,
+	/* 0xD7 @ 324 */	0xC3, 0x97,
+	/* 0xD8 @ 326 */	0xC3, 0x98,
+	/* 0xD9 @ 328 */	0xC3, 0x99,
+	/* 0xDA @ 330 */	0xC3, 0x9A,
+	/* 0xDB @ 332 */	0xC3, 0x9B,
+	/* 0xDC @ 334 */	0xC3, 0x9C,
+	/* 0xDD @ 336 */	0xC3, 0x9D,
+	/* 0xDE @ 338 */	0xC3, 0x9E,
+	/* 0xDF @ 340 */	0xC3, 0x9F,
+	/* 0xE0 @ 342 */	0xC3, 0xA0,
+	/* 0xE1 @ 344 */	0xC3, 0xA1,
+	/* 0xE2 @ 346 */	0xC3, 0xA2,
+	/* 0xE3 @ 348 */	0xC3, 0xA3,
+	/* 0xE4 @ 350 */	0xC3, 0xA4,
+	/* 0xE5 @ 352 */	0xC3, 0xA5,
+	/* 0xE6 @ 354 */	0xC3, 0xA6,
+	/* 0xE7 @ 356 */	0xC3, 0xA7,
+	/* 0xE8 @ 358 */	0xC3, 0xA8,
+	/* 0xE9 @ 360 */	0xC3, 0xA9,
+	/* 0xEA @ 362 */	0xC3, 0xAA,
+	/* 0xEB @ 364 */	0xC3, 0xAB,
+	/* 0xEC @ 366 */	0xC3, 0xAC,
+	/* 0xED @ 368 */	0xC3, 0xAD,
+	/* 0xEE @ 370 */	0xC3, 0xAE,
+	/* 0xEF @ 372 */	0xC3, 0xAF,
+	/* 0xF0 @ 374 */	0xC3, 0xB0,
+	/* 0xF1 @ 376 */	0xC3, 0xB1,
+	/* 0xF2 @ 378 */	0xC3, 0xB2,
+	/* 0xF3 @ 380 */	0xC3, 0xB3,
+	/* 0xF4 @ 382 */	0xC3, 0xB4,
+	/* 0xF5 @ 384 */	0xC3, 0xB5,
+	/* 0xF6 @ 386 */	0xC3, 0xB6,
+	/* 0xF7 @ 388 */	0xC3, 0xB7,
+	/* 0xF8 @ 390 */	0xC3, 0xB8,
+	/* 0xF9 @ 392 */	0xC3, 0xB9,
+	/* 0xFA @ 394 */	0xC3, 0xBA,
+	/* 0xFB @ 396 */	0xC3, 0xBB,
+	/* 0xFC @ 398 */	0xC3, 0xBC,
+	/* 0xFD @ 400 */	0xC3, 0xBD,
+	/* 0xFE @ 402 */	0xC3, 0xBE,
+	/* 0xFF @ 404 */	0xC3, 0xBF,
+};
+
+static const uint16_t tblofs[257] = {
+	/* 0x00 */   0,   1,   2,   3,   4,   5,   6,   7,
+	/* 0x08 */   8,   9,  10,  11,  12,  13,  14,  15,
+	/* 0x10 */  16,  17,  18,  19,  20,  21,  22,  23,
+	/* 0x18 */  24,  25,  26,  27,  28,  29,  30,  31,
+	/* 0x20 */  32,  33,  34,  35,  36,  37,  38,  39,
+	/* 0x28 */  40,  41,  42,  43,  44,  45,  46,  47,
+	/* 0x30 */  48,  49,  50,  51,  52,  53,  54,  55,
+	/* 0x38 */  56,  57,  58,  59,  60,  61,  62,  63,
+	/* 0x40 */  64,  65,  66,  67,  68,  69,  70,  71,
+	/* 0x48 */  72,  73,  74,  75,  76,  77,  78,  79,
+	/* 0x50 */  80,  81,  82,  83,  84,  85,  86,  87,
+	/* 0x58 */  88,  89,  90,  91,  92,  93,  94,  95,
+	/* 0x60 */  96,  97,  98,  99, 100, 101, 102, 103,
+	/* 0x68 */ 104, 105, 106, 107, 108, 109, 110, 111,
+	/* 0x70 */ 112, 113, 114, 115, 116, 117, 118, 119,
+	/* 0x78 */ 120, 121, 122, 123, 124, 125, 126, 127,
+	/* 0x80 */ 128, 131, 134, 137, 139, 142, 145, 148,
+	/* 0x88 */ 151, 153, 156, 158, 161, 163, 166, 168,
+	/* 0x90 */ 171, 174, 177, 180, 183, 186, 189, 192,
+	/* 0x98 */ 195, 197, 200, 202, 205, 207, 210, 212,
+	/* 0xA0 */ 214, 216, 218, 220, 222, 224, 226, 228,
+	/* 0xA8 */ 230, 232, 234, 236, 238, 240, 242, 244,
+	/* 0xB0 */ 246, 248, 250, 252, 254, 256, 258, 260,
+	/* 0xB8 */ 262, 264, 266, 268, 270, 272, 274, 276,
+	/* 0xC0 */ 278, 280, 282, 284, 286, 288, 290, 292,
+	/* 0xC8 */ 294, 296, 298, 300, 302, 304, 306, 308,
+	/* 0xD0 */ 310, 312, 314, 316, 318, 320, 322, 324,
+	/* 0xD8 */ 326, 328, 330, 332, 334, 336, 338, 340,
+	/* 0xE0 */ 342, 344, 346, 348, 350, 352, 354, 356,
+	/* 0xE8 */ 358, 360, 362, 364, 366, 368, 370, 372,
+	/* 0xF0 */ 374, 376, 378, 380, 382, 384, 386, 388,
+	/* 0xF8 */ 390, 392, 394, 396, 398, 400, 402, 404,
+	/* sizeof (cp1252_utf8) */ 406
+};
+
+/* Check if a string qualifies as UTF-8. */
+static int
+is_utf8(const char* src)
+{
+	uint8_t ch;
+	size_t i;
+	const uint8_t* s = (const uint8_t*) src;
+
+	/* We make a loop over every character, until we find a null one.
+	   Remember: The string is supposed to end with a NUL, so ahead checks are safe. */
+	while ((ch = *s++))	{
+		/* Ye olde 7bit ASCII chars 'rr fine for anything */
+		if(ch < 0x80) continue;
+
+		/* Now, we watch out for non-UTF conform sequences. */
+		else if ((ch < 0xC2) || (ch > 0xFD))
+			return 0;
+		/* check for some misformed sequences */
+		if (((ch == 0xC2) && (s[0] < 0xA0)) ||
+		    ((ch == 0xEF) && (s[0] == 0xBF) && (s[1] > 0xBD)))
+			/* XXX add more for outside the BMP */
+			return 0;
+
+		/* Check the continuation bytes. */
+		if      (ch < 0xE0) i = 1;
+		else if (ch < 0xF0) i = 2;
+		else if (ch < 0xF8)	i = 3;
+		else if (ch < 0xFC)	i = 4;
+		else
+			i = 5;
+
+		while (i--)
+			if ((*s++ & 0xC0) != 0x80)
+				return 0;
+	}
+
+	/* If no check failed, the string indeed looks like valid UTF-8. */
+	return 1;
+}
+
+/* The main conversion routine.
+   ICY in CP-1252 (or UTF-8 alreay) to UTF-8 encoded string.
+   If force is applied, it will always encode to UTF-8, without checking. */
+char *
+icy2utf8(const char *src, int force)
+{
+	const uint8_t *s = (const uint8_t *)src;
+	size_t srclen, dstlen, i, k;
+	uint8_t ch, *d;
+	char *dst;
+
+	/* Some funny streams from Apple/iTunes give ICY info in UTF-8 already.
+	   So, be prepared and don't try to re-encode such. Unless forced. */
+	if(!force && is_utf8(src)) return (strdup(src));
+
+	srclen = strlen(src) + 1;
+	/* allocate conservatively */
+	if ((d = malloc(srclen * 3)) == NULL)
+		return (NULL);
+
+	i = 0;
+	dstlen = 0;
+	while (i < srclen) {
+		ch = s[i++];
+		k = tblofs[ch];
+		while (k < tblofs[ch + 1])
+			d[dstlen++] = cp1252_utf8[k++];
+	}
+
+	/* dstlen includes trailing NUL since srclen also does */
+	if ((dst = realloc(d, dstlen)) == NULL) {
+		free(d);
+		return (NULL);
+	}
+	return (dst);
+}
+
+/* This stuff is for testing only. */
+#ifdef TEST
+static const char intext[] = "\225 Gr\374\337e kosten 0,55 \200\205";
+
+#include <stdio.h>
+
+int
+main(void)
+{
+	char *t, *t2;
+
+	if ((t = icy2utf8(intext, 0)) == NULL) {
+		fprintf(stderr, "out of memory\n");
+		return (1);
+	}
+
+	/* make sure it won't be converted twice */
+	if ((t2 = icy2utf8(t), 0) == NULL) {
+		fprintf(stderr, "out of memory\n");
+		return (1);
+	}
+
+	printf("Result is:\t\343\200\214%s\343\200\215\n"
+		"\t\t\343\200\214%s\343\200\215\n", t, t2);
+
+	free(t);
+	free(t2);
+	return (0);
+}
+#endif
Index: include/reactos/libs/libmpg123/id3.c
===================================================================
--- include/reactos/libs/libmpg123/id3.c	(revision 0)
+++ include/reactos/libs/libmpg123/id3.c	(working copy)
@@ -0,0 +1,1123 @@
+/*
+	id3: ID3v2.3 and ID3v2.4 parsing (a relevant subset)
+
+	copyright 2006-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Thomas Orgis
+*/
+
+#include "mpg123lib_intern.h"
+#include "id3.h"
+#include "debug.h"
+
+#ifndef NO_ID3V2 /* Only the main parsing routine will always be there. */
+
+/* We know the usual text frames plus some specifics. */
+#define KNOWN_FRAMES 5
+static const char frame_type[KNOWN_FRAMES][5] = { "COMM", "TXXX", "RVA2", "USLT", "APIC" };
+enum frame_types { unknown = -2, text = -1, comment, extra, rva2, uslt, picture };
+
+/* UTF support definitions */
+
+typedef void (*text_converter)(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
+
+static void convert_latin1  (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
+static void convert_utf16bom(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
+static void convert_utf8    (mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet);
+
+static const text_converter text_converters[4] =
+{
+	convert_latin1,
+	/* We always check for (multiple) BOM in 16bit unicode. Without BOM, UTF16 BE is the default.
+	   Errors in encoding are detected anyway. */
+	convert_utf16bom,
+	convert_utf16bom,
+	convert_utf8
+};
+
+static const unsigned int encoding_widths[4] = { 1, 2, 2, 1 };
+
+/* the code starts here... */
+
+static void null_id3_links(mpg123_handle *fr)
+{
+	fr->id3v2.title  = NULL;
+	fr->id3v2.artist = NULL;
+	fr->id3v2.album  = NULL;
+	fr->id3v2.year   = NULL;
+	fr->id3v2.genre  = NULL;
+	fr->id3v2.comment = NULL;
+}
+
+void init_id3(mpg123_handle *fr)
+{
+	fr->id3v2.version = 0; /* nothing there */
+	null_id3_links(fr);
+	fr->id3v2.comments     = 0;
+	fr->id3v2.comment_list = NULL;
+	fr->id3v2.texts    = 0;
+	fr->id3v2.text     = NULL;
+	fr->id3v2.extras   = 0;
+	fr->id3v2.extra    = NULL;
+	fr->id3v2.pictures   = 0;
+	fr->id3v2.picture    = NULL;
+}
+
+/* Managing of the text, comment and extra lists. */
+
+/* Initialize one element. */
+static void init_mpg123_text(mpg123_text *txt)
+{
+	mpg123_init_string(&txt->text);
+	mpg123_init_string(&txt->description);
+	txt->id[0] = 0;
+	txt->id[1] = 0;
+	txt->id[2] = 0;
+	txt->id[3] = 0;
+	txt->lang[0] = 0;
+	txt->lang[1] = 0;
+	txt->lang[2] = 0;
+}
+
+static void init_mpg123_picture(mpg123_picture *pic)
+{
+	mpg123_init_string(&pic->mime_type);
+	mpg123_init_string(&pic->description);
+	pic->type = 0;
+	pic->size = 0;
+	pic->data = NULL;
+}
+
+/* Free memory of one element. */
+static void free_mpg123_text(mpg123_text *txt)
+{
+	mpg123_free_string(&txt->text);
+	mpg123_free_string(&txt->description);
+}
+
+static void free_mpg123_picture(mpg123_picture * pic)
+{
+	mpg123_free_string(&pic->mime_type);
+	mpg123_free_string(&pic->description);
+	if (pic->data != NULL)
+		free(pic->data);
+}
+
+/* Free memory of whole list. */
+#define free_comment(mh) free_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
+#define free_text(mh)    free_id3_text(&((mh)->id3v2.text),         &((mh)->id3v2.texts))
+#define free_extra(mh)   free_id3_text(&((mh)->id3v2.extra),        &((mh)->id3v2.extras))
+#define free_picture(mh) free_id3_picture(&((mh)->id3v2.picture),   &((mh)->id3v2.pictures))
+static void free_id3_text(mpg123_text **list, size_t *size)
+{
+	size_t i;
+	for(i=0; i<*size; ++i) free_mpg123_text(&((*list)[i]));
+
+	free(*list);
+	*list = NULL;
+	*size = 0;
+}
+static void free_id3_picture(mpg123_picture **list, size_t *size)
+{
+	size_t i;
+	for(i=0; i<*size; ++i) free_mpg123_picture(&((*list)[i]));
+
+	free(*list);
+	*list = NULL;
+	*size = 0;
+}
+
+/* Add items to the list. */
+#define add_comment(mh) add_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
+#define add_text(mh)    add_id3_text(&((mh)->id3v2.text),         &((mh)->id3v2.texts))
+#define add_extra(mh)   add_id3_text(&((mh)->id3v2.extra),        &((mh)->id3v2.extras))
+#define add_picture(mh)   add_id3_picture(&((mh)->id3v2.picture),       &((mh)->id3v2.pictures))
+static mpg123_text *add_id3_text(mpg123_text **list, size_t *size)
+{
+	mpg123_text *x = safe_realloc(*list, sizeof(mpg123_text)*(*size+1));
+	if(x == NULL) return NULL; /* bad */
+
+	*list  = x;
+	*size += 1;
+	init_mpg123_text(&((*list)[*size-1]));
+
+	return &((*list)[*size-1]); /* Return pointer to the added text. */
+}
+static mpg123_picture *add_id3_picture(mpg123_picture **list, size_t *size)
+{
+	mpg123_picture *x = safe_realloc(*list, sizeof(mpg123_picture)*(*size+1));
+	if(x == NULL) return NULL; /* bad */
+
+	*list  = x;
+	*size += 1;
+	init_mpg123_picture(&((*list)[*size-1]));
+
+	return &((*list)[*size-1]); /* Return pointer to the added picture. */
+}
+
+
+/* Remove the last item. */
+#define pop_comment(mh) pop_id3_text(&((mh)->id3v2.comment_list), &((mh)->id3v2.comments))
+#define pop_text(mh)    pop_id3_text(&((mh)->id3v2.text),         &((mh)->id3v2.texts))
+#define pop_extra(mh)   pop_id3_text(&((mh)->id3v2.extra),        &((mh)->id3v2.extras))
+#define pop_picture(mh)   pop_id3_picture(&((mh)->id3v2.picture),       &((mh)->id3v2.pictures))
+static void pop_id3_text(mpg123_text **list, size_t *size)
+{
+	mpg123_text *x;
+	if(*size < 1) return;
+
+	free_mpg123_text(&((*list)[*size-1]));
+	if(*size > 1)
+	{
+		x = safe_realloc(*list, sizeof(mpg123_text)*(*size-1));
+		if(x != NULL){ *list  = x; *size -= 1; }
+	}
+	else
+	{
+		free(*list);
+		*list = NULL;
+		*size = 0;
+	}
+}
+static void pop_id3_picture(mpg123_picture **list, size_t *size)
+{
+	mpg123_picture *x;
+	if(*size < 1) return;
+
+	free_mpg123_picture(&((*list)[*size-1]));
+	if(*size > 1)
+	{
+		x = safe_realloc(*list, sizeof(mpg123_picture)*(*size-1));
+		if(x != NULL){ *list  = x; *size -= 1; }
+	}
+	else
+	{
+		free(*list);
+		*list = NULL;
+		*size = 0;
+	}
+}
+
+/* OK, back to the higher level functions. */
+
+void exit_id3(mpg123_handle *fr)
+{
+	free_picture(fr);
+	free_comment(fr);
+	free_extra(fr);
+	free_text(fr);
+}
+
+void reset_id3(mpg123_handle *fr)
+{
+	exit_id3(fr);
+	init_id3(fr);
+}
+
+/* Set the id3v2.artist id3v2.title ... links to elements of the array. */
+void id3_link(mpg123_handle *fr)
+{
+	size_t i;
+	mpg123_id3v2 *v2 = &fr->id3v2;
+	debug("linking ID3v2");
+	null_id3_links(fr);
+	for(i=0; i<v2->texts; ++i)
+	{
+		mpg123_text *entry = &v2->text[i];
+		if     (!strncmp("TIT2", entry->id, 4)) v2->title  = &entry->text;
+		else if(!strncmp("TALB", entry->id, 4)) v2->album  = &entry->text;
+		else if(!strncmp("TPE1", entry->id, 4)) v2->artist = &entry->text;
+		else if(!strncmp("TYER", entry->id, 4)) v2->year   = &entry->text;
+		else if(!strncmp("TCON", entry->id, 4)) v2->genre  = &entry->text;
+	}
+	for(i=0; i<v2->comments; ++i)
+	{
+		mpg123_text *entry = &v2->comment_list[i];
+		if(entry->description.fill == 0 || entry->description.p[0] == 0)
+		v2->comment = &entry->text;
+	}
+	/* When no generic comment found, use the last non-generic one. */
+	if(v2->comment == NULL && v2->comments > 0)
+	v2->comment = &v2->comment_list[v2->comments-1].text;
+}
+
+/*
+	Store ID3 text data in an mpg123_string; either verbatim copy or everything translated to UTF-8 encoding.
+	Preserve the zero string separator (I don't need strlen for the total size).
+
+	ID3v2 standard says that there should be one text frame of specific type per tag, and subsequent tags overwrite old values.
+	So, I always replace the text that may be stored already (perhaps with a list of zero-separated strings, though).
+*/
+static void store_id3_text(mpg123_string *sb, unsigned char *source, size_t source_size, const int noquiet, const int notranslate)
+{
+	if(!source_size)
+	{
+		debug("Empty id3 data!");
+		return;
+	}
+
+	/* We shall just copy the data. Client wants to decode itself. */
+	if(notranslate)
+	{
+		/* Future: Add a path for ID3 errors. */
+		if(!mpg123_resize_string(sb, source_size))
+		{
+			if(noquiet) error("Cannot resize target string, out of memory?");
+			return;
+		}
+		memcpy(sb->p, source, source_size);
+		sb->fill = source_size;
+		debug1("stored undecoded ID3 text of size %"SIZE_P, (size_p)source_size);
+		return;
+	}
+
+	id3_to_utf8(sb, source[0], source+1, source_size-1, noquiet);
+
+	if(sb->fill) debug1("UTF-8 string (the first one): %s", sb->p);
+	else if(noquiet) error("unable to convert string to UTF-8 (out of memory, junk input?)!");
+}
+
+/* On error, sb->size is 0. */
+void id3_to_utf8(mpg123_string *sb, unsigned char encoding, const unsigned char *source, size_t source_size, int noquiet)
+{
+	unsigned int bwidth;
+	debug1("encoding: %u", encoding);
+	/* A note: ID3v2.3 uses UCS-2 non-variable 16bit encoding, v2.4 uses UTF16.
+	   UTF-16 uses a reserved/private range in UCS-2 to add the magic, so we just always treat it as UTF. */
+	if(encoding > mpg123_id3_enc_max)
+	{
+		if(noquiet) error1("Unknown text encoding %u, I take no chances, sorry!", encoding);
+
+		mpg123_free_string(sb);
+		return;
+	}
+	bwidth = encoding_widths[encoding];
+	/* Hack! I've seen a stray zero byte before BOM. Is that supposed to happen? */
+	if(encoding != mpg123_id3_utf16be) /* UTF16be _can_ beging with a null byte! */
+	while(source_size > bwidth && source[0] == 0)
+	{
+		--source_size;
+		++source;
+		debug("skipped leading zero");
+	}
+	if(source_size % bwidth)
+	{
+		/* When we need two bytes for a character, it's strange to have an uneven bytestream length. */
+		if(noquiet) warning2("Weird tag size %d for encoding %u - I will probably trim too early or something but I think the MP3 is broken.", (int)source_size, encoding);
+		source_size -= source_size % bwidth;
+	}
+	text_converters[encoding](sb, source, source_size, noquiet);
+}
+
+static unsigned char *next_text(unsigned char* prev, unsigned char encoding, size_t limit)
+{
+	unsigned char *text = prev;
+	size_t width = encoding_widths[encoding];
+
+	/* So I go lengths to find zero or double zero...
+	   Remember bug 2834636: Only check for aligned NULLs! */
+	while(text-prev < (ssize_t)limit)
+	{
+		if(text[0] == 0)
+		{
+			if(width <= limit-(text-prev))
+			{
+				size_t i = 1;
+				for(; i<width; ++i) if(text[i] != 0) break;
+
+				if(i == width) /* found a null wide enough! */
+				{
+					text += width;
+					break;
+				}
+			}
+			else return NULL; /* No full character left? This text is broken */
+		}
+
+		text += width;
+	}
+	if((size_t)(text-prev) >= limit) text = NULL;
+
+	return text;
+}
+
+static const char *enc_name(unsigned char enc)
+{
+	switch(enc)
+	{
+		case 0:  return "Latin 1";
+		case 1:  return "UTF-16 BOM";
+		case 2:  return "UTF-16 BE";
+		case 3:  return "UTF-8";
+		default: return "unknown!";
+	}
+}
+
+static void process_text(mpg123_handle *fr, unsigned char *realdata, size_t realsize, char *id)
+{
+	/* Text encoding          $xx */
+	/* The text (encoded) ... */
+	mpg123_text *t = add_text(fr);
+	if(VERBOSE4) fprintf(stderr, "Note: Storing text from %s encoding\n", enc_name(realdata[0]));
+	if(t == NULL)
+	{
+		if(NOQUIET) error("Unable to attach new text!");
+		return;
+	}
+	memcpy(t->id, id, 4);
+	store_id3_text(&t->text, realdata, realsize, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
+	if(VERBOSE4) fprintf(stderr, "Note: ID3v2 %c%c%c%c text frame: %s\n", id[0], id[1], id[2], id[3], t->text.p);
+}
+
+static void process_picture(mpg123_handle *fr, unsigned char *realdata, size_t realsize)
+{
+	unsigned char encoding = realdata[0];
+	mpg123_picture *i = NULL;
+	unsigned char* workpoint;
+	if(realsize == 0)
+	{
+		debug("Empty id3 data!");
+		return;
+	}
+	if(VERBOSE4) fprintf(stderr, "Note: Storing picture from APIC frame.\n");
+	/* decompose realdata accordingly */
+	i = add_picture(fr);
+	if(i == NULL)
+	{
+		if(NOQUIET) error("Unable to attach new picture!");
+		return;
+	}
+	realdata++; realsize--;
+	/* get mime type (encoding is always latin-1) */
+	workpoint = next_text(realdata, 0, realsize);
+	if (workpoint == NULL) {
+		pop_picture(fr);
+		if (NOQUIET) error("Unable to get mime type for picture; skipping picture.");
+		return;
+	}
+	id3_to_utf8(&i->mime_type, 0, realdata, workpoint - realdata, NOQUIET);
+	realsize -= workpoint - realdata;
+	realdata = workpoint;
+	/* get picture type */
+	i->type = realdata[0];
+	realdata++; realsize--;
+	/* get description (encoding is encoding) */
+	workpoint = next_text(realdata, encoding, realsize);
+	if (workpoint == NULL) {
+		if (NOQUIET) error("Unable to get description for picture; skipping picture.");
+		pop_picture(fr);
+		return;
+	}
+	id3_to_utf8(&i->description, encoding, realdata, workpoint - realdata, NOQUIET);
+	realsize -= workpoint - realdata;
+	if (realsize == 0) {
+		if (NOQUIET) error("No picture data defined; skipping picture.");
+		pop_picture(fr);
+		return;
+	}
+	/* store_id3_picture(i, picture, realsize, NOQUIET)) */
+	i->data = (unsigned char*)malloc(realsize);
+	if (i->data == NULL) {
+		if (NOQUIET) error("Unable to allocate memory for picture; skipping picture");
+		pop_picture(fr);
+		return;
+	}
+	memcpy(i->data, workpoint, realsize);
+	i->size = realsize;
+	if(VERBOSE4) fprintf(stderr, "Note: ID3v2 APIC picture frame of type: %d\n", i->type);
+}
+
+/* Store a new comment that perhaps is a RVA / RVA_ALBUM/AUDIOPHILE / RVA_MIX/RADIO one
+   Special gimmik: It also stores USLT to the texts. Stucture is the same as for comments. */
+static void process_comment(mpg123_handle *fr, enum frame_types tt, unsigned char *realdata, size_t realsize, int rva_level, char *id)
+{
+	/* Text encoding          $xx */
+	/* Language               $xx xx xx */
+	/* Short description (encoded!)      <text> $00 (00) */
+	/* Then the comment text (encoded) ... */
+	unsigned char  encoding = realdata[0];
+	unsigned char *lang     = realdata+1; /* I'll only use the 3 bytes! */
+	unsigned char *descr    = realdata+4;
+	unsigned char *text     = NULL;
+	mpg123_text *xcom = NULL;
+	mpg123_text localcom; /* UTF-8 variant for local processing. */
+
+	if(realsize < (size_t)(descr-realdata))
+	{
+		if(NOQUIET) error1("Invalid frame size of %"SIZE_P" (too small for anything).", (size_p)realsize);
+		return;
+	}
+	xcom = (tt == uslt ? add_text(fr) : add_comment(fr));
+	if(VERBOSE4) fprintf(stderr, "Note: Storing comment from %s encoding\n", enc_name(realdata[0]));
+	if(xcom == NULL)
+	{
+		if(NOQUIET) error("Unable to attach new comment!");
+		return;
+	}
+	memcpy(xcom->lang, lang, 3);
+	memcpy(xcom->id, id, 4);
+	/* Now I can abuse a byte from lang for the encoding. */
+	descr[-1] = encoding;
+	/* Be careful with finding the end of description, I have to honor encoding here. */
+	text = next_text(descr, encoding, realsize-(descr-realdata));
+	if(text == NULL)
+	{
+		if(NOQUIET) error("No comment text / valid description?");
+		pop_comment(fr);
+		return;
+	}
+
+	init_mpg123_text(&localcom);
+	/* Store the text, without translation to UTF-8, but for comments always a local copy in UTF-8.
+	   Reminder: No bailing out from here on without freeing the local comment data! */
+	store_id3_text(&xcom->description, descr-1, text-descr+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
+	if(tt == comment)
+	store_id3_text(&localcom.description, descr-1, text-descr+1, NOQUIET, 0);
+
+	text[-1] = encoding; /* Byte abusal for encoding... */
+	store_id3_text(&xcom->text, text-1, realsize+1-(text-realdata), NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
+	/* Remember: I will probably decode the above (again) for rva comment checking. So no messing around, please. */
+
+	if(VERBOSE4) /* Do _not_ print the verbatim text: The encoding might be funny! */
+	{
+		fprintf(stderr, "Note: ID3 comm/uslt desc of length %"SIZE_P".\n", (size_p)xcom->description.fill);
+		fprintf(stderr, "Note: ID3 comm/uslt text of length %"SIZE_P".\n", (size_p)xcom->text.fill);
+	}
+	/* Look out for RVA info only when we really deal with a straight comment. */
+	if(tt == comment && localcom.description.fill > 0)
+	{
+		int rva_mode = -1; /* mix / album */
+		if(    !strcasecmp(localcom.description.p, "rva")
+			 || !strcasecmp(localcom.description.p, "rva_mix")
+			 || !strcasecmp(localcom.description.p, "rva_track")
+			 || !strcasecmp(localcom.description.p, "rva_radio") )
+		rva_mode = 0;
+		else if(    !strcasecmp(localcom.description.p, "rva_album")
+		         || !strcasecmp(localcom.description.p, "rva_audiophile")
+		         || !strcasecmp(localcom.description.p, "rva_user") )
+		rva_mode = 1;
+		if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
+		{
+			/* Only translate the contents in here where we really need them. */
+			store_id3_text(&localcom.text, text-1, realsize+1-(text-realdata), NOQUIET, 0);
+			if(localcom.text.fill > 0)
+			{
+				fr->rva.gain[rva_mode] = (float) atof(localcom.text.p);
+				if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
+				fr->rva.peak[rva_mode] = 0;
+				fr->rva.level[rva_mode] = rva_level;
+			}
+		}
+	}
+	/* Make sure to free the local memory... */
+	free_mpg123_text(&localcom);
+}
+
+static void process_extra(mpg123_handle *fr, unsigned char* realdata, size_t realsize, int rva_level, char *id)
+{
+	/* Text encoding          $xx */
+	/* Description        ... $00 (00) */
+	/* Text ... */
+	unsigned char encoding = realdata[0];
+	unsigned char *descr   = realdata+1; /* remember, the encoding is descr[-1] */
+	unsigned char *text;
+	mpg123_text *xex;
+	mpg123_text localex;
+
+	if((int)realsize < descr-realdata)
+	{
+		if(NOQUIET) error1("Invalid frame size of %lu (too small for anything).", (unsigned long)realsize);
+		return;
+	}
+	text = next_text(descr, encoding, realsize-(descr-realdata));
+	if(VERBOSE4) fprintf(stderr, "Note: Storing extra from %s encoding\n", enc_name(realdata[0]));
+	if(text == NULL)
+	{
+		if(NOQUIET) error("No extra frame text / valid description?");
+		return;
+	}
+	xex = add_extra(fr);
+	if(xex == NULL)
+	{
+		if(NOQUIET) error("Unable to attach new extra text!");
+		return;
+	}
+	memcpy(xex->id, id, 4);
+	init_mpg123_text(&localex); /* For our local copy. */
+
+	/* The outside storage gets reencoded to UTF-8 only if not requested otherwise.
+	   Remember that we really need the -1 here to hand in the encoding byte!*/
+	store_id3_text(&xex->description, descr-1, text-descr+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
+	/* Our local copy is always stored in UTF-8! */
+	store_id3_text(&localex.description, descr-1, text-descr+1, NOQUIET, 0);
+	/* At first, only store the outside copy of the payload. We may not need the local copy. */
+	text[-1] = encoding;
+	store_id3_text(&xex->text, text-1, realsize-(text-realdata)+1, NOQUIET, fr->p.flags & MPG123_PLAIN_ID3TEXT);
+
+	/* Now check if we would like to interpret this extra info for RVA. */
+	if(localex.description.fill > 0)
+	{
+		int is_peak = 0;
+		int rva_mode = -1; /* mix / album */
+
+		if(!strncasecmp(localex.description.p, "replaygain_track_",17))
+		{
+			if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain track gain/peak\n");
+
+			rva_mode = 0;
+			if(!strcasecmp(localex.description.p, "replaygain_track_peak")) is_peak = 1;
+			else if(strcasecmp(localex.description.p, "replaygain_track_gain")) rva_mode = -1;
+		}
+		else
+		if(!strncasecmp(localex.description.p, "replaygain_album_",17))
+		{
+			if(VERBOSE3) fprintf(stderr, "Note: RVA ReplayGain album gain/peak\n");
+
+			rva_mode = 1;
+			if(!strcasecmp(localex.description.p, "replaygain_album_peak")) is_peak = 1;
+			else if(strcasecmp(localex.description.p, "replaygain_album_gain")) rva_mode = -1;
+		}
+		if((rva_mode > -1) && (fr->rva.level[rva_mode] <= rva_level))
+		{
+			/* Now we need the translated copy of the data. */
+			store_id3_text(&localex.text, text-1, realsize-(text-realdata)+1, NOQUIET, 0);
+			if(localex.text.fill > 0)
+			{
+				if(is_peak)
+				{
+					fr->rva.peak[rva_mode] = (float) atof(localex.text.p);
+					if(VERBOSE3) fprintf(stderr, "Note: RVA peak %f\n", fr->rva.peak[rva_mode]);
+				}
+				else
+				{
+					fr->rva.gain[rva_mode] = (float) atof(localex.text.p);
+					if(VERBOSE3) fprintf(stderr, "Note: RVA gain %fdB\n", fr->rva.gain[rva_mode]);
+				}
+				fr->rva.level[rva_mode] = rva_level;
+			}
+		}
+	}
+
+	free_mpg123_text(&localex);
+}
+
+/* Make a ID3v2.3+ 4-byte ID from a ID3v2.2 3-byte ID
+   Note that not all frames survived to 2.4; the mapping goes to 2.3 .
+   A notable miss is the old RVA frame, which is very unspecific anyway.
+   This function returns -1 when a not known 3 char ID was encountered, 0 otherwise. */
+static int promote_framename(mpg123_handle *fr, char *id) /* fr because of VERBOSE macros */
+{
+	size_t i;
+	char *old[] =
+	{
+		"COM",  "TAL",  "TBP",  "TCM",  "TCO",  "TCR",  "TDA",  "TDY",  "TEN",  "TFT",
+		"TIM",  "TKE",  "TLA",  "TLE",  "TMT",  "TOA",  "TOF",  "TOL",  "TOR",  "TOT",
+		"TP1",  "TP2",  "TP3",  "TP4",  "TPA",  "TPB",  "TRC",  "TDA",  "TRK",  "TSI",
+		"TSS",  "TT1",  "TT2",  "TT3",  "TXT",  "TXX",  "TYE"
+	};
+	char *new[] =
+	{
+		"COMM", "TALB", "TBPM", "TCOM", "TCON", "TCOP", "TDAT", "TDLY", "TENC", "TFLT",
+		"TIME", "TKEY", "TLAN", "TLEN", "TMED", "TOPE", "TOFN", "TOLY", "TORY", "TOAL",
+		"TPE1", "TPE2", "TPE3", "TPE4", "TPOS", "TPUB", "TSRC", "TRDA", "TRCK", "TSIZ",
+		"TSSE", "TIT1", "TIT2", "TIT3", "TEXT", "TXXX", "TYER"
+	};
+	for(i=0; i<sizeof(old)/sizeof(char*); ++i)
+	{
+		if(!strncmp(id, old[i], 3))
+		{
+			memcpy(id, new[i], 4);
+			if(VERBOSE3) fprintf(stderr, "Translated ID3v2.2 frame %s to %s\n", old[i], new[i]);
+			return 0;
+		}
+	}
+	if(VERBOSE3) fprintf(stderr, "Ignoring untranslated ID3v2.2 frame %c%c%c\n", id[0], id[1], id[2]);
+	return -1;
+}
+
+#endif /* NO_ID3V2 */
+
+/*
+	trying to parse ID3v2.3 and ID3v2.4 tags...
+
+	returns:  0: bad or just unparseable tag
+	          1: good, (possibly) new tag info
+	         <0: reader error (may need more data feed, try again)
+*/
+int parse_new_id3(mpg123_handle *fr, unsigned long first4bytes)
+{
+	#define UNSYNC_FLAG 128
+	#define EXTHEAD_FLAG 64
+	#define EXP_FLAG 32
+	#define FOOTER_FLAG 16
+	#define UNKNOWN_FLAGS 15 /* 00001111*/
+	unsigned char buf[6];
+	unsigned long length=0;
+	unsigned char flags = 0;
+	int ret = 1;
+	int ret2;
+	unsigned char major = first4bytes & 0xff;
+	debug1("ID3v2: major tag version: %i", major);
+	if(major == 0xff) return 0; /* Invalid... */
+	if((ret2 = fr->rd->read_frame_body(fr, buf, 6)) < 0) /* read more header information */
+	return ret2;
+
+	if(buf[0] == 0xff) return 0; /* Revision, will never be 0xff. */
+
+	/* second new byte are some nice flags, if these are invalid skip the whole thing */
+	flags = buf[1];
+	debug1("ID3v2: flags 0x%08x", flags);
+	/* use 4 bytes from buf to construct 28bit uint value and return 1; return 0 if bytes are not synchsafe */
+	#define synchsafe_to_long(buf,res) \
+	( \
+		(((buf)[0]|(buf)[1]|(buf)[2]|(buf)[3]) & 0x80) ? 0 : \
+		(res =  (((unsigned long) (buf)[0]) << 21) \
+		     | (((unsigned long) (buf)[1]) << 14) \
+		     | (((unsigned long) (buf)[2]) << 7) \
+		     |  ((unsigned long) (buf)[3]) \
+		,1) \
+	)
+	/* id3v2.3 does not store synchsafe frame sizes, but synchsafe tag size - doh! */
+	#define bytes_to_long(buf,res) \
+	( \
+		major == 3 ? \
+		(res =  (((unsigned long) (buf)[0]) << 24) \
+		     | (((unsigned long) (buf)[1]) << 16) \
+		     | (((unsigned long) (buf)[2]) << 8) \
+		     |  ((unsigned long) (buf)[3]) \
+		,1) : synchsafe_to_long(buf,res) \
+	)
+	/* for id3v2.2 only */
+	#define threebytes_to_long(buf,res) \
+	( \
+		res =  (((unsigned long) (buf)[0]) << 16) \
+		     | (((unsigned long) (buf)[1]) << 8) \
+		     |  ((unsigned long) (buf)[2]) \
+	)
+
+	/* length-10 or length-20 (footer present); 4 synchsafe integers == 28 bit number  */
+	/* we have already read 10 bytes, so left are length or length+10 bytes belonging to tag */
+	if(!synchsafe_to_long(buf+2,length))
+	{
+		if(NOQUIET) error4("Bad tag length (not synchsafe): 0x%02x%02x%02x%02x; You got a bad ID3 tag here.", buf[2],buf[3],buf[4],buf[5]);
+		return 0;
+	}
+	debug1("ID3v2: tag data length %lu", length);
+#ifndef NO_ID3V2
+	if(VERBOSE2) fprintf(stderr,"Note: ID3v2.%i rev %i tag of %lu bytes\n", major, buf[0], length);
+	/* skip if unknown version/scary flags, parse otherwise */
+	if(fr->p.flags & MPG123_SKIP_ID3V2 || ((flags & UNKNOWN_FLAGS) || (major > 4) || (major < 2)))
+	{
+		if(NOQUIET)
+		{
+			if(fr->p.flags & MPG123_SKIP_ID3V2)
+			{
+				if(VERBOSE3) fprintf(stderr, "Note: Skipping ID3v2 tag per user request.\n");
+			}
+			else /* Must be because of scary Tag properties. */
+			warning2("ID3v2: Won't parse the ID3v2 tag with major version %u and flags 0x%xu - some extra code may be needed", major, flags);
+		}
+#endif
+		if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) /* will not store data in backbuff! */
+		ret = ret2;
+#ifndef NO_ID3V2
+	}
+	else
+	{
+		unsigned char* tagdata = NULL;
+		fr->id3v2.version = major;
+		/* try to interpret that beast */
+		if((tagdata = (unsigned char*) malloc(length+1)) != NULL)
+		{
+			debug("ID3v2: analysing frames...");
+			if((ret2 = fr->rd->read_frame_body(fr,tagdata,length)) > 0)
+			{
+				unsigned long tagpos = 0;
+				debug1("ID3v2: have read at all %lu bytes for the tag now", (unsigned long)length+6);
+				/* going to apply strlen for strings inside frames, make sure that it doesn't overflow! */
+				tagdata[length] = 0;
+				if(flags & EXTHEAD_FLAG)
+				{
+					debug("ID3v2: skipping extended header");
+					if(!bytes_to_long(tagdata, tagpos))
+					{
+						ret = 0;
+						if(NOQUIET) error4("Bad (non-synchsafe) tag offset: 0x%02x%02x%02x%02x", tagdata[0], tagdata[1], tagdata[2], tagdata[3]);
+					}
+				}
+				if(ret > 0)
+				{
+					char id[5];
+					unsigned long framesize;
+					unsigned long fflags; /* need 16 bits, actually */
+					id[4] = 0;
+					/* pos now advanced after ext head, now a frame has to follow */
+					while(tagpos < length-10) /* I want to read at least a full header */
+					{
+						int i = 0;
+						unsigned long pos = tagpos;
+						int head_part = fr->id3v2.version == 2 ? 3 : 4; /* bytes of frame title and of framesize value */
+						/* level 1,2,3 - 0 is info from lame/info tag! */
+						/* rva tags with ascending significance, then general frames */
+						enum frame_types tt = unknown;
+						/* we may have entered the padding zone or any other strangeness: check if we have valid frame id characters */
+						for(i=0; i< head_part; ++i)
+						if( !( ((tagdata[tagpos+i] > 47) && (tagdata[tagpos+i] < 58))
+						    || ((tagdata[tagpos+i] > 64) && (tagdata[tagpos+i] < 91)) ) )
+						{
+							debug5("ID3v2: real tag data apparently ended after %lu bytes with 0x%02x%02x%02x%02x", tagpos, tagdata[tagpos], tagdata[tagpos+1], tagdata[tagpos+2], tagdata[tagpos+3]);
+							/* This is no hard error... let's just hope that we got something meaningful already (ret==1 in that case). */
+							goto tagparse_cleanup; /* Need to escape two loops here. */
+						}
+						if(ret > 0)
+						{
+							/* 4 or 3 bytes id */
+							strncpy(id, (char*) tagdata+pos, head_part);
+							id[head_part] = 0; /* terminate for 3 or 4 bytes */
+							pos += head_part;
+							tagpos += head_part;
+							/* size as 32 bits or 28 bits */
+							if(fr->id3v2.version == 2) threebytes_to_long(tagdata+pos, framesize);
+							else
+							if(!bytes_to_long(tagdata+pos, framesize))
+							{
+								/* Just assume that up to now there was some good data. */
+								if(NOQUIET) error1("ID3v2: non-syncsafe size of %s frame, skipping the remainder of tag", id);
+								break;
+							}
+							if(VERBOSE3) fprintf(stderr, "Note: ID3v2 %s frame of size %lu\n", id, framesize);
+							tagpos += head_part + framesize; /* the important advancement in whole tag */
+							if(tagpos > length)
+							{
+								if(NOQUIET) error("Whoa! ID3v2 frame claims to be larger than the whole rest of the tag.");
+								break;
+							}
+							pos += head_part;
+							if(fr->id3v2.version > 2)
+							{
+								fflags  = (((unsigned long) tagdata[pos]) << 8) | ((unsigned long) tagdata[pos+1]);
+								pos    += 2;
+								tagpos += 2;
+							}
+							else fflags = 0;
+							/* for sanity, after full parsing tagpos should be == pos */
+							/* debug4("ID3v2: found %s frame, size %lu (as bytes: 0x%08lx), flags 0x%016lx", id, framesize, framesize, fflags); */
+							/* %0abc0000 %0h00kmnp */
+							#define BAD_FFLAGS (unsigned long) 36784
+							#define PRES_TAG_FFLAG 16384
+							#define PRES_FILE_FFLAG 8192
+							#define READ_ONLY_FFLAG 4096
+							#define GROUP_FFLAG 64
+							#define COMPR_FFLAG 8
+							#define ENCR_FFLAG 4
+							#define UNSYNC_FFLAG 2
+							#define DATLEN_FFLAG 1
+							if(head_part < 4 && promote_framename(fr, id) != 0) continue;
+
+							/* shall not or want not handle these */
+							if(fflags & (BAD_FFLAGS | COMPR_FFLAG | ENCR_FFLAG))
+							{
+								if(NOQUIET) warning("ID3v2: skipping invalid/unsupported frame");
+								continue;
+							}
+
+							for(i = 0; i < KNOWN_FRAMES; ++i)
+							if(!strncmp(frame_type[i], id, 4)){ tt = i; break; }
+
+							if(id[0] == 'T' && tt != extra) tt = text;
+
+							if(tt != unknown)
+							{
+								int rva_mode = -1; /* mix / album */
+								unsigned long realsize = framesize;
+								unsigned char* realdata = tagdata+pos;
+								if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG))
+								{
+									unsigned long ipos = 0;
+									unsigned long opos = 0;
+									debug("Id3v2: going to de-unsync the frame data");
+									/* de-unsync: FF00 -> FF; real FF00 is simply represented as FF0000 ... */
+									/* damn, that means I have to delete bytes from withing the data block... thus need temporal storage */
+									/* standard mandates that de-unsync should always be safe if flag is set */
+									realdata = (unsigned char*) malloc(framesize); /* will need <= bytes */
+									if(realdata == NULL)
+									{
+										if(NOQUIET) error("ID3v2: unable to allocate working buffer for de-unsync");
+										continue;
+									}
+									/* now going byte per byte through the data... */
+									realdata[0] = tagdata[pos];
+									opos = 1;
+									for(ipos = pos+1; ipos < pos+framesize; ++ipos)
+									{
+										if(!((tagdata[ipos] == 0) && (tagdata[ipos-1] == 0xff)))
+										{
+											realdata[opos++] = tagdata[ipos];
+										}
+									}
+									realsize = opos;
+									debug2("ID3v2: de-unsync made %lu out of %lu bytes", realsize, framesize);
+								}
+								pos = 0; /* now at the beginning again... */
+								switch(tt)
+								{
+									case comment:
+									case uslt:
+										process_comment(fr, tt, realdata, realsize, comment+1, id);
+									break;
+									case extra: /* perhaps foobar2000's work */
+										process_extra(fr, realdata, realsize, extra+1, id);
+									break;
+									case rva2: /* "the" RVA tag */
+									{
+										/* starts with null-terminated identification */
+										if(VERBOSE3) fprintf(stderr, "Note: RVA2 identification \"%s\"\n", realdata);
+										/* default: some individual value, mix mode */
+										rva_mode = 0;
+										if( !strncasecmp((char*)realdata, "album", 5)
+										    || !strncasecmp((char*)realdata, "audiophile", 10)
+										    || !strncasecmp((char*)realdata, "user", 4))
+										rva_mode = 1;
+										if(fr->rva.level[rva_mode] <= rva2+1)
+										{
+											pos += strlen((char*) realdata) + 1;
+											if(realdata[pos] == 1)
+											{
+												++pos;
+												/* only handle master channel */
+												debug("ID3v2: it is for the master channel");
+												/* two bytes adjustment, one byte for bits representing peak - n bytes, eh bits, for peak */
+												/* 16 bit signed integer = dB * 512  ... the double cast is needed to preserve the sign of negative values! */
+												fr->rva.gain[rva_mode] = (float) ( (((short)((signed char)realdata[pos])) << 8) | realdata[pos+1] ) / 512;
+												pos += 2;
+												if(VERBOSE3) fprintf(stderr, "Note: RVA value %fdB\n", fr->rva.gain[rva_mode]);
+												/* heh, the peak value is represented by a number of bits - but in what manner? Skipping that part */
+												fr->rva.peak[rva_mode] = 0;
+												fr->rva.level[rva_mode] = rva2+1;
+											}
+										}
+									}
+									break;
+									/* non-rva metainfo, simply store... */
+									case text:
+										process_text(fr, realdata, realsize, id);
+									break;
+									case picture:
+										if (fr->p.flags & MPG123_PICTURE)
+										process_picture(fr, realdata, realsize);
+
+										break;
+									default: if(NOQUIET) error1("ID3v2: unknown frame type %i", tt);
+								}
+								if((flags & UNSYNC_FLAG) || (fflags & UNSYNC_FFLAG)) free(realdata);
+							}
+							#undef BAD_FFLAGS
+							#undef PRES_TAG_FFLAG
+							#undef PRES_FILE_FFLAG
+							#undef READ_ONLY_FFLAG
+							#undef GROUP_FFLAG
+							#undef COMPR_FFLAG
+							#undef ENCR_FFLAG
+							#undef UNSYNC_FFLAG
+							#undef DATLEN_FFLAG
+						}
+						else break;
+						#undef KNOWN_FRAMES
+					}
+				}
+			}
+			else
+			{
+				/* There are tags with zero length. Strictly not an error, then. */
+				if(length > 0 && NOQUIET && ret2 != MPG123_NEED_MORE) error("ID3v2: Duh, not able to read ID3v2 tag data.");
+				ret = ret2;
+			}
+tagparse_cleanup:
+			free(tagdata);
+		}
+		else
+		{
+			if(NOQUIET) error1("ID3v2: Arrg! Unable to allocate %lu bytes for interpreting ID3v2 data - trying to skip instead.", length);
+			if((ret2 = fr->rd->skip_bytes(fr,length)) < 0) ret = ret2; /* will not store data in backbuff! */
+			else ret = 0;
+		}
+	}
+#endif /* NO_ID3V2 */
+	/* skip footer if present */
+	if((ret > 0) && (flags & FOOTER_FLAG) && ((ret2 = fr->rd->skip_bytes(fr,length)) < 0)) ret = ret2;
+
+	return ret;
+	#undef UNSYNC_FLAG
+	#undef EXTHEAD_FLAG
+	#undef EXP_FLAG
+	#undef FOOTER_FLAG
+	#undef UNKOWN_FLAGS
+}
+
+#ifndef NO_ID3V2 /* Disabling all the rest... */
+
+static void convert_latin1(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet)
+{
+	size_t length = l;
+	size_t i;
+	unsigned char *p;
+	/* determine real length, a latin1 character can at most take 2  in UTF8 */
+	for(i=0; i<l; ++i)
+	if(s[i] >= 0x80) ++length;
+
+	debug1("UTF-8 length: %lu", (unsigned long)length);
+	/* one extra zero byte for paranoia */
+	if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
+
+	p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
+	for(i=0; i<l; ++i)
+	if(s[i] < 0x80){ *p = s[i]; ++p; }
+	else /* two-byte encoding */
+	{
+		*p     = 0xc0 | (s[i]>>6);
+		*(p+1) = 0x80 | (s[i] & 0x3f);
+		p+=2;
+	}
+
+	sb->p[length] = 0;
+	sb->fill = length+1;
+}
+
+/*
+	Check if we have a byte oder mark(s) there, return:
+	-1: little endian
+	 0: no BOM
+	 1: big endian
+
+	This modifies source and len to indicate the data _after_ the BOM(s).
+	Note on nasty data: The last encountered BOM determines the endianness.
+	I have seen data with multiple BOMS, namely from "the" id3v2 program.
+	Not nice, but what should I do?
+*/
+static int check_bom(const unsigned char** source, size_t *len)
+{
+	int this_bom    = 0;
+	int further_bom = 0;
+
+	if(*len < 2) return 0;
+
+	if((*source)[0] == 0xff && (*source)[1] == 0xfe)
+	this_bom = -1;
+
+	if((*source)[0] == 0xfe && (*source)[1] == 0xff)
+	this_bom = 1;
+
+	/* Skip the detected BOM. */
+	if(this_bom != 0)
+	{
+		*source += 2;
+		*len    -= 2;
+		/* Check for following BOMs. The last one wins! */
+		further_bom = check_bom(source, len);
+		if(further_bom == 0) return this_bom; /* End of the recursion. */
+		else                 return further_bom;
+	}
+	else return 0;
+}
+
+#define FULLPOINT(f,s) ( (((f)&0x3ff)<<10) + ((s)&0x3ff) + 0x10000 )
+/* Remember: There's a limit at 0x1ffff. */
+#define UTF8LEN(x) ( (x)<0x80 ? 1 : ((x)<0x800 ? 2 : ((x)<0x10000 ? 3 : 4)))
+static void convert_utf16bom(mpg123_string *sb, const unsigned char* s, size_t l, const int noquiet)
+{
+	size_t i;
+	size_t n; /* number bytes that make up full pairs */
+	unsigned char *p;
+	size_t length = 0; /* the resulting UTF-8 length */
+	/* Determine real length... extreme case can be more than utf-16 length. */
+	size_t high = 0;
+	size_t low  = 1;
+	int bom_endian;
+
+	debug1("convert_utf16 with length %lu", (unsigned long)l);
+
+	bom_endian = check_bom(&s, &l);
+	debug1("UTF16 endianness check: %i", bom_endian);
+
+	if(bom_endian == -1) /* little-endian */
+	{
+		high = 1; /* The second byte is the high byte. */
+		low  = 0; /* The first byte is the low byte. */
+	}
+
+	n = (l/2)*2; /* number bytes that make up full pairs */
+
+	/* first: get length, check for errors -- stop at first one */
+	for(i=0; i < n; i+=2)
+	{
+		unsigned long point = ((unsigned long) s[i+high]<<8) + s[i+low];
+		if((point & 0xd800) == 0xd800) /* lead surrogate */
+		{
+			unsigned short second = (i+3 < l) ? (s[i+2+high]<<8) + s[i+2+low] : 0;
+			if((second & 0xdc00) == 0xdc00) /* good... */
+			{
+				point = FULLPOINT(point,second);
+				length += UTF8LEN(point); /* possibly 4 bytes */
+				i+=2; /* We overstepped one word. */
+			}
+			else /* if no valid pair, break here */
+			{
+				if(noquiet) error2("Invalid UTF16 surrogate pair at %li (0x%04lx).", (unsigned long)i, point);
+				n = i; /* Forget the half pair, END! */
+				break;
+			}
+		}
+		else length += UTF8LEN(point); /* 1,2 or 3 bytes */
+	}
+
+	if(!mpg123_resize_string(sb, length+1)){ mpg123_free_string(sb); return ; }
+
+	/* Now really convert, skip checks as these have been done just before. */
+	p = (unsigned char*) sb->p; /* Signedness doesn't matter but it shows I thought about the non-issue */
+	for(i=0; i < n; i+=2)
+	{
+		unsigned long codepoint = ((unsigned long) s[i+high]<<8) + s[i+low];
+		if((codepoint & 0xd800) == 0xd800) /* lead surrogate */
+		{
+			unsigned short second = (s[i+2+high]<<8) + s[i+2+low];
+			codepoint = FULLPOINT(codepoint,second);
+			i+=2; /* We overstepped one word. */
+		}
+		if(codepoint < 0x80) *p++ = (unsigned char) codepoint;
+		else if(codepoint < 0x800)
+		{
+			*p++ = (unsigned char) (0xc0 | (codepoint>>6));
+			*p++ = (unsigned char) (0x80 | (codepoint & 0x3f));
+		}
+		else if(codepoint < 0x10000)
+		{
+			*p++ = (unsigned char) (0xe0 | (codepoint>>12));
+			*p++ = 0x80 | ((codepoint>>6) & 0x3f);
+			*p++ = 0x80 | (codepoint & 0x3f);
+		}
+		else if (codepoint < 0x200000)
+		{
+			*p++ = (unsigned char) (0xf0 | codepoint>>18);
+			*p++ = (unsigned char) (0x80 | ((codepoint>>12) & 0x3f));
+			*p++ = (unsigned char) (0x80 | ((codepoint>>6) & 0x3f));
+			*p++ = (unsigned char) (0x80 | (codepoint & 0x3f));
+		} /* ignore bigger ones (that are not possible here anyway) */
+	}
+	sb->p[sb->size-1] = 0; /* paranoia... */
+	sb->fill = sb->size;
+}
+#undef UTF8LEN
+#undef FULLPOINT
+
+static void convert_utf8(mpg123_string *sb, const unsigned char* source, size_t len, const int noquiet)
+{
+	if(mpg123_resize_string(sb, len+1))
+	{
+		memcpy(sb->p, source, len);
+		sb->p[len] = 0;
+		sb->fill = len+1;
+	}
+	else mpg123_free_string(sb);
+}
+
+#endif
Index: include/reactos/libs/libmpg123/id3.h
===================================================================
--- include/reactos/libs/libmpg123/id3.h	(revision 63976)
+++ include/reactos/libs/libmpg123/id3.h	(working copy)
@@ -13,9 +13,21 @@
 #include "frame.h"
 
 #ifdef NO_ID3V2
+# ifdef init_id3
+#  undef init_id3
+# endif
 # define init_id3(fr)
+# ifdef exit_id3
+#  undef exit_id3
+# endif
 # define exit_id3(fr)
+# ifdef reset_id3
+#  undef reset_id3
+# endif
 # define reset_id3(fr)
+# ifdef id3_link
+#  undef id3_link
+# endif
 # define id3_link(fr)
 #else
 void init_id3(mpg123_handle *fr);
Index: include/reactos/libs/libmpg123/index.c
===================================================================
--- include/reactos/libs/libmpg123/index.c	(revision 0)
+++ include/reactos/libs/libmpg123/index.c	(working copy)
@@ -0,0 +1,134 @@
+/*
+	index: frame index data structure and functions
+
+	copyright 2007-8 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Thomas Orgis
+*/
+
+#include "index.h"
+#include "debug.h"
+
+/* The next expected frame offset, one step ahead. */
+static off_t fi_next(struct frame_index *fi)
+{
+	return (off_t)fi->fill*fi->step;
+}
+
+/* Shrink down the used index to the half.
+   Be careful with size = 1 ... there's no shrinking possible there. */
+static void fi_shrink(struct frame_index *fi)
+{
+	if(fi->fill < 2) return; /* Won't shrink below 1. */
+	else
+	{ /* Double the step, half the fill. Should work as well for fill%2 = 1 */
+		size_t c;
+		debug2("shrink index with fill %lu and step %lu", (unsigned long)fi->fill, (unsigned long)fi->step);
+		fi->step *= 2;
+		fi->fill /= 2;
+		/* Move the data down. */
+		for(c = 0; c < fi->fill; ++c)
+		fi->data[c] = fi->data[2*c];
+	}
+
+	fi->next = fi_next(fi);
+}
+
+void fi_init(struct frame_index *fi)
+{
+	fi->data = NULL;
+	fi->step = 1;
+	fi->fill = 0;
+	fi->size = 0;
+	fi->grow_size = 0;
+	fi->next = fi_next(fi);
+}
+
+void fi_exit(struct frame_index *fi)
+{
+	debug2("fi_exit: %p and %lu", (void*)fi->data, (unsigned long)fi->size);
+	if(fi->size && fi->data != NULL) free(fi->data);
+
+	fi_init(fi); /* Be prepared for further fun, still. */
+}
+
+int fi_resize(struct frame_index *fi, size_t newsize)
+{
+	off_t *newdata = NULL;
+	if(newsize == fi->size) return 0;
+
+	if(newsize > 0 && newsize < fi->size)
+	{ /* When we reduce buffer size a bit, shrink stuff. */
+		while(fi->fill > newsize){ fi_shrink(fi); }
+	}
+
+	newdata = safe_realloc(fi->data, newsize*sizeof(off_t));
+	if(newsize == 0 || newdata != NULL)
+	{
+		fi->data = newdata;
+		fi->size = newsize;
+		if(fi->fill > fi->size) fi->fill = fi->size;
+
+		fi->next = fi_next(fi);
+		debug2("new index of size %lu at %p", (unsigned long)fi->size, (void*)fi->data);
+		return 0;
+	}
+	else
+	{
+		error("failed to resize index!");
+		return -1;
+	}
+}
+
+void fi_add(struct frame_index *fi, off_t pos)
+{
+	debug3("wanting to add to fill %lu, step %lu, size %lu", (unsigned long)fi->fill, (unsigned long)fi->step, (unsigned long)fi->size);
+	if(fi->fill == fi->size)
+	{ /* Index is full, we need to shrink... or grow. */
+		/* Store the current frame number to check later if we still want it. */
+		off_t framenum = fi->fill*fi->step;
+		/* If we want not / cannot grow, we shrink. */	
+		if( !(fi->grow_size && fi_resize(fi, fi->size+fi->grow_size)==0) )
+		fi_shrink(fi);
+
+		/* Now check if we still want to add this frame (could be that not, because of changed step). */
+		if(fi->next != framenum) return;
+	}
+	/* When we are here, we want that frame. */
+	if(fi->fill < fi->size) /* safeguard for size=1, or just generally */
+	{
+		debug1("adding to index at %p", (void*)(fi->data+fi->fill));
+		fi->data[fi->fill] = pos;
+		++fi->fill;
+		fi->next = fi_next(fi);
+		debug3("added pos %li to index with fill %lu and step %lu", (long) pos, (unsigned long)fi->fill, (unsigned long)fi->step);
+	}
+}
+
+int fi_set(struct frame_index *fi, off_t *offsets, off_t step, size_t fill)
+{
+	if(fi_resize(fi, fill) == -1) return -1;
+	fi->step = step;
+	if(offsets != NULL)
+	{
+		memcpy(fi->data, offsets, fill*sizeof(off_t));
+		fi->fill = fill;
+	}
+	else
+	{
+		/* allocation only, no entries in index yet */
+		fi->fill = 0;
+	}
+	fi->next = fi_next(fi);
+	debug3("set new index of fill %lu, size %lu at %p",
+	(unsigned long)fi->fill, (unsigned long)fi->size, (void*)fi->data);
+	return 0;
+}
+
+void fi_reset(struct frame_index *fi)
+{
+	debug1("reset with size %"SIZE_P, (size_p)fi->size);
+	fi->fill = 0;
+	fi->step = 1;
+	fi->next = fi_next(fi);
+}
Index: include/reactos/libs/libmpg123/intsym.h
===================================================================
--- include/reactos/libs/libmpg123/intsym.h	(revision 0)
+++ include/reactos/libs/libmpg123/intsym.h	(working copy)
@@ -0,0 +1,284 @@
+#ifndef MPG123_INTMAP_H
+#define MPG123_INTMAP_H
+/* Mapping of internal mpg123 symbols to something that is less likely to conflict in case of static linking. */
+#define COS9 INT123_COS9
+#define tfcos36 INT123_tfcos36
+#define pnts INT123_pnts
+#define safe_realloc INT123_safe_realloc
+#define compat_open INT123_compat_open
+#define compat_close INT123_compat_close
+#define win32_wide_utf8 INT123_win32_wide_utf8
+#define win32_utf8_wide INT123_win32_utf8_wide
+#define ntom_set_ntom INT123_ntom_set_ntom
+#define synth_1to1 INT123_synth_1to1
+#define synth_1to1_dither INT123_synth_1to1_dither
+#define synth_1to1_i386 INT123_synth_1to1_i386
+#define synth_1to1_i586 INT123_synth_1to1_i586
+#define synth_1to1_i586_dither INT123_synth_1to1_i586_dither
+#define synth_1to1_mmx INT123_synth_1to1_mmx
+#define synth_1to1_3dnow INT123_synth_1to1_3dnow
+#define synth_1to1_sse INT123_synth_1to1_sse
+#define synth_1to1_stereo_sse INT123_synth_1to1_stereo_sse
+#define synth_1to1_3dnowext INT123_synth_1to1_3dnowext
+#define synth_1to1_altivec INT123_synth_1to1_altivec
+#define synth_1to1_stereo_altivec INT123_synth_1to1_stereo_altivec
+#define synth_1to1_x86_64 INT123_synth_1to1_x86_64
+#define synth_1to1_stereo_x86_64 INT123_synth_1to1_stereo_x86_64
+#define synth_1to1_avx INT123_synth_1to1_avx
+#define synth_1to1_stereo_avx INT123_synth_1to1_stereo_avx
+#define synth_1to1_arm INT123_synth_1to1_arm
+#define synth_1to1_neon INT123_synth_1to1_neon
+#define synth_1to1_stereo_neon INT123_synth_1to1_stereo_neon
+#define absynth_1to1_i486 INT123_absynth_1to1_i486
+#define synth_1to1_mono INT123_synth_1to1_mono
+#define synth_1to1_m2s INT123_synth_1to1_m2s
+#define synth_2to1 INT123_synth_2to1
+#define synth_2to1_dither INT123_synth_2to1_dither
+#define synth_2to1_i386 INT123_synth_2to1_i386
+#define synth_2to1_mono INT123_synth_2to1_mono
+#define synth_2to1_m2s INT123_synth_2to1_m2s
+#define synth_4to1 INT123_synth_4to1
+#define synth_4to1_dither INT123_synth_4to1_dither
+#define synth_4to1_i386 INT123_synth_4to1_i386
+#define synth_4to1_mono INT123_synth_4to1_mono
+#define synth_4to1_m2s INT123_synth_4to1_m2s
+#define synth_ntom INT123_synth_ntom
+#define synth_ntom_mono INT123_synth_ntom_mono
+#define synth_ntom_m2s INT123_synth_ntom_m2s
+#define synth_1to1_8bit INT123_synth_1to1_8bit
+#define synth_1to1_8bit_i386 INT123_synth_1to1_8bit_i386
+#define synth_1to1_8bit_wrap INT123_synth_1to1_8bit_wrap
+#define synth_1to1_8bit_mono INT123_synth_1to1_8bit_mono
+#define synth_1to1_8bit_m2s INT123_synth_1to1_8bit_m2s
+#define synth_1to1_8bit_wrap_mono INT123_synth_1to1_8bit_wrap_mono
+#define synth_1to1_8bit_wrap_m2s INT123_synth_1to1_8bit_wrap_m2s
+#define synth_2to1_8bit INT123_synth_2to1_8bit
+#define synth_2to1_8bit_i386 INT123_synth_2to1_8bit_i386
+#define synth_2to1_8bit_mono INT123_synth_2to1_8bit_mono
+#define synth_2to1_8bit_m2s INT123_synth_2to1_8bit_m2s
+#define synth_4to1_8bit INT123_synth_4to1_8bit
+#define synth_4to1_8bit_i386 INT123_synth_4to1_8bit_i386
+#define synth_4to1_8bit_mono INT123_synth_4to1_8bit_mono
+#define synth_4to1_8bit_m2s INT123_synth_4to1_8bit_m2s
+#define synth_ntom_8bit INT123_synth_ntom_8bit
+#define synth_ntom_8bit_mono INT123_synth_ntom_8bit_mono
+#define synth_ntom_8bit_m2s INT123_synth_ntom_8bit_m2s
+#define synth_1to1_real INT123_synth_1to1_real
+#define synth_1to1_real_i386 INT123_synth_1to1_real_i386
+#define synth_1to1_real_sse INT123_synth_1to1_real_sse
+#define synth_1to1_real_stereo_sse INT123_synth_1to1_real_stereo_sse
+#define synth_1to1_real_x86_64 INT123_synth_1to1_real_x86_64
+#define synth_1to1_real_stereo_x86_64 INT123_synth_1to1_real_stereo_x86_64
+#define synth_1to1_real_avx INT123_synth_1to1_real_avx
+#define synth_1to1_real_stereo_avx INT123_synth_1to1_real_stereo_avx
+#define synth_1to1_real_altivec INT123_synth_1to1_real_altivec
+#define synth_1to1_real_stereo_altivec INT123_synth_1to1_real_stereo_altivec
+#define synth_1to1_real_neon INT123_synth_1to1_real_neon
+#define synth_1to1_real_stereo_neon INT123_synth_1to1_real_stereo_neon
+#define synth_1to1_real_mono INT123_synth_1to1_real_mono
+#define synth_1to1_real_m2s INT123_synth_1to1_real_m2s
+#define synth_2to1_real INT123_synth_2to1_real
+#define synth_2to1_real_i386 INT123_synth_2to1_real_i386
+#define synth_2to1_real_mono INT123_synth_2to1_real_mono
+#define synth_2to1_real_m2s INT123_synth_2to1_real_m2s
+#define synth_4to1_real INT123_synth_4to1_real
+#define synth_4to1_real_i386 INT123_synth_4to1_real_i386
+#define synth_4to1_real_mono INT123_synth_4to1_real_mono
+#define synth_4to1_real_m2s INT123_synth_4to1_real_m2s
+#define synth_ntom_real INT123_synth_ntom_real
+#define synth_ntom_real_mono INT123_synth_ntom_real_mono
+#define synth_ntom_real_m2s INT123_synth_ntom_real_m2s
+#define synth_1to1_s32 INT123_synth_1to1_s32
+#define synth_1to1_s32_i386 INT123_synth_1to1_s32_i386
+#define synth_1to1_s32_sse INT123_synth_1to1_s32_sse
+#define synth_1to1_s32_stereo_sse INT123_synth_1to1_s32_stereo_sse
+#define synth_1to1_s32_x86_64 INT123_synth_1to1_s32_x86_64
+#define synth_1to1_s32_stereo_x86_64 INT123_synth_1to1_s32_stereo_x86_64
+#define synth_1to1_s32_avx INT123_synth_1to1_s32_avx
+#define synth_1to1_s32_stereo_avx INT123_synth_1to1_s32_stereo_avx
+#define synth_1to1_s32_altivec INT123_synth_1to1_s32_altivec
+#define synth_1to1_s32_stereo_altivec INT123_synth_1to1_s32_stereo_altivec
+#define synth_1to1_s32_neon INT123_synth_1to1_s32_neon
+#define synth_1to1_s32_stereo_neon INT123_synth_1to1_s32_stereo_neon
+#define synth_1to1_s32_mono INT123_synth_1to1_s32_mono
+#define synth_1to1_s32_m2s INT123_synth_1to1_s32_m2s
+#define synth_2to1_s32 INT123_synth_2to1_s32
+#define synth_2to1_s32_i386 INT123_synth_2to1_s32_i386
+#define synth_2to1_s32_mono INT123_synth_2to1_s32_mono
+#define synth_2to1_s32_m2s INT123_synth_2to1_s32_m2s
+#define synth_4to1_s32 INT123_synth_4to1_s32
+#define synth_4to1_s32_i386 INT123_synth_4to1_s32_i386
+#define synth_4to1_s32_mono INT123_synth_4to1_s32_mono
+#define synth_4to1_s32_m2s INT123_synth_4to1_s32_m2s
+#define synth_ntom_s32 INT123_synth_ntom_s32
+#define synth_ntom_s32_mono INT123_synth_ntom_s32_mono
+#define synth_ntom_s32_m2s INT123_synth_ntom_s32_m2s
+#define dct64 INT123_dct64
+#define dct64_i386 INT123_dct64_i386
+#define dct64_altivec INT123_dct64_altivec
+#define dct64_i486 INT123_dct64_i486
+#define dct36 INT123_dct36
+#define dct36_3dnow INT123_dct36_3dnow
+#define dct36_3dnowext INT123_dct36_3dnowext
+#define dct36_sse INT123_dct36_sse
+#define dct36_x86_64 INT123_dct36_x86_64
+#define dct36_avx INT123_dct36_avx
+#define dct36_neon INT123_dct36_neon
+#define dct36_neon64 INT123_dct36_neon64
+#define synth_ntom_set_step INT123_synth_ntom_set_step
+#define ntom_val INT123_ntom_val
+#define ntom_frame_outsamples INT123_ntom_frame_outsamples
+#define ntom_frmouts INT123_ntom_frmouts
+#define ntom_ins2outs INT123_ntom_ins2outs
+#define ntom_frameoff INT123_ntom_frameoff
+#define init_layer3 INT123_init_layer3
+#define init_layer3_gainpow2 INT123_init_layer3_gainpow2
+#define init_layer3_stuff INT123_init_layer3_stuff
+#define init_layer12 INT123_init_layer12
+#define init_layer12_table INT123_init_layer12_table
+#define init_layer12_stuff INT123_init_layer12_stuff
+#define prepare_decode_tables INT123_prepare_decode_tables
+#define make_decode_tables INT123_make_decode_tables
+#define make_decode_tables_mmx INT123_make_decode_tables_mmx
+#define init_layer3_gainpow2_mmx INT123_init_layer3_gainpow2_mmx
+#define init_layer12_table_mmx INT123_init_layer12_table_mmx
+#define make_conv16to8_table INT123_make_conv16to8_table
+#define do_layer3 INT123_do_layer3
+#define do_layer2 INT123_do_layer2
+#define do_layer1 INT123_do_layer1
+#define do_equalizer INT123_do_equalizer
+#define dither_table_init INT123_dither_table_init
+#define frame_dither_init INT123_frame_dither_init
+#define invalidate_format INT123_invalidate_format
+#define frame_init INT123_frame_init
+#define frame_init_par INT123_frame_init_par
+#define frame_outbuffer INT123_frame_outbuffer
+#define frame_output_format INT123_frame_output_format
+#define frame_buffers INT123_frame_buffers
+#define frame_reset INT123_frame_reset
+#define frame_buffers_reset INT123_frame_buffers_reset
+#define frame_exit INT123_frame_exit
+#define frame_index_find INT123_frame_index_find
+#define frame_index_setup INT123_frame_index_setup
+#define do_volume INT123_do_volume
+#define do_rva INT123_do_rva
+#define frame_gapless_init INT123_frame_gapless_init
+#define frame_gapless_realinit INT123_frame_gapless_realinit
+#define frame_gapless_update INT123_frame_gapless_update
+#define frame_gapless_bytify INT123_frame_gapless_bytify
+#define frame_gapless_ignore INT123_frame_gapless_ignore
+#define frame_expect_outsamples INT123_frame_expect_outsamples
+#define frame_skip INT123_frame_skip
+#define frame_ins2outs INT123_frame_ins2outs
+#define frame_outs INT123_frame_outs
+#define frame_expect_outsampels INT123_frame_expect_outsampels
+#define frame_offset INT123_frame_offset
+#define frame_set_frameseek INT123_frame_set_frameseek
+#define frame_set_seek INT123_frame_set_seek
+#define frame_tell_seek INT123_frame_tell_seek
+#define frame_fill_toc INT123_frame_fill_toc
+#define getbits INT123_getbits
+#define getcpuflags INT123_getcpuflags
+#define icy2utf8 INT123_icy2utf8
+#define init_icy INT123_init_icy
+#define clear_icy INT123_clear_icy
+#define reset_icy INT123_reset_icy
+#define init_id3 INT123_init_id3
+#define exit_id3 INT123_exit_id3
+#define reset_id3 INT123_reset_id3
+#define id3_link INT123_id3_link
+#define parse_new_id3 INT123_parse_new_id3
+#define id3_to_utf8 INT123_id3_to_utf8
+#define fi_init INT123_fi_init
+#define fi_exit INT123_fi_exit
+#define fi_resize INT123_fi_resize
+#define fi_add INT123_fi_add
+#define fi_set INT123_fi_set
+#define fi_reset INT123_fi_reset
+#define double_to_long_rounded INT123_double_to_long_rounded
+#define scale_rounded INT123_scale_rounded
+#define decode_update INT123_decode_update
+#define samples_to_bytes INT123_samples_to_bytes
+#define bytes_to_samples INT123_bytes_to_samples
+#define frame_cpu_opt INT123_frame_cpu_opt
+#define set_synth_functions INT123_set_synth_functions
+#define dectype INT123_dectype
+#define defdec INT123_defdec
+#define decclass INT123_decclass
+#define check_decoders INT123_check_decoders
+#define read_frame_init INT123_read_frame_init
+#define frame_bitrate INT123_frame_bitrate
+#define frame_freq INT123_frame_freq
+#define read_frame_recover INT123_read_frame_recover
+#define read_frame INT123_read_frame
+#define set_pointer INT123_set_pointer
+#define position_info INT123_position_info
+#define compute_bpf INT123_compute_bpf
+#define time_to_frame INT123_time_to_frame
+#define get_songlen INT123_get_songlen
+#define open_stream INT123_open_stream
+#define open_stream_handle INT123_open_stream_handle
+#define open_feed INT123_open_feed
+#define feed_more INT123_feed_more
+#define feed_forget INT123_feed_forget
+#define feed_set_pos INT123_feed_set_pos
+#define open_bad INT123_open_bad
+#define dct64_3dnow INT123_dct64_3dnow
+#define dct64_3dnowext INT123_dct64_3dnowext
+#define dct64_mmx INT123_dct64_mmx
+#define dct64_MMX INT123_dct64_MMX
+#define dct64_sse INT123_dct64_sse
+#define dct64_real_sse INT123_dct64_real_sse
+#define dct64_x86_64 INT123_dct64_x86_64
+#define dct64_real_x86_64 INT123_dct64_real_x86_64
+#define dct64_avx INT123_dct64_avx
+#define dct64_real_avx INT123_dct64_real_avx
+#define dct64_neon INT123_dct64_neon
+#define dct64_real_neon INT123_dct64_real_neon
+#define dct64_neon64 INT123_dct64_neon64
+#define dct64_real_neon64 INT123_dct64_real_neon64
+#define do_equalizer_3dnow INT123_do_equalizer_3dnow
+#define synth_1to1_3dnow_asm INT123_synth_1to1_3dnow_asm
+#define synth_1to1_arm_asm INT123_synth_1to1_arm_asm
+#define synth_1to1_arm_accurate_asm INT123_synth_1to1_arm_accurate_asm
+#define synth_1to1_i586_asm INT123_synth_1to1_i586_asm
+#define synth_1to1_i586_asm_dither INT123_synth_1to1_i586_asm_dither
+#define synth_1to1_MMX INT123_synth_1to1_MMX
+#define synth_1to1_sse_accurate_asm INT123_synth_1to1_sse_accurate_asm
+#define synth_1to1_real_sse_asm INT123_synth_1to1_real_sse_asm
+#define synth_1to1_s32_sse_asm INT123_synth_1to1_s32_sse_asm
+#define synth_1to1_s_sse_accurate_asm INT123_synth_1to1_s_sse_accurate_asm
+#define synth_1to1_real_s_sse_asm INT123_synth_1to1_real_s_sse_asm
+#define synth_1to1_s32_s_sse_asm INT123_synth_1to1_s32_s_sse_asm
+#define synth_1to1_s_x86_64_asm INT123_synth_1to1_s_x86_64_asm
+#define synth_1to1_s_x86_64_accurate_asm INT123_synth_1to1_s_x86_64_accurate_asm
+#define synth_1to1_real_s_x86_64_asm INT123_synth_1to1_real_s_x86_64_asm
+#define synth_1to1_s32_s_x86_64_asm INT123_synth_1to1_s32_s_x86_64_asm
+#define synth_1to1_x86_64_asm INT123_synth_1to1_x86_64_asm
+#define synth_1to1_x86_64_accurate_asm INT123_synth_1to1_x86_64_accurate_asm
+#define synth_1to1_real_x86_64_asm INT123_synth_1to1_real_x86_64_asm
+#define synth_1to1_s32_x86_64_asm INT123_synth_1to1_s32_x86_64_asm
+#define synth_1to1_s_avx_asm INT123_synth_1to1_s_avx_asm
+#define synth_1to1_s_avx_accurate_asm INT123_synth_1to1_s_avx_accurate_asm
+#define synth_1to1_real_s_avx_asm INT123_synth_1to1_real_s_avx_asm
+#define synth_1to1_s32_s_avx_asm INT123_synth_1to1_s32_s_avx_asm
+#define synth_1to1_neon_asm INT123_synth_1to1_neon_asm
+#define synth_1to1_neon_accurate_asm INT123_synth_1to1_neon_accurate_asm
+#define synth_1to1_real_neon_asm INT123_synth_1to1_real_neon_asm
+#define synth_1to1_s32_neon_asm INT123_synth_1to1_s32_neon_asm
+#define synth_1to1_s_neon_asm INT123_synth_1to1_s_neon_asm
+#define synth_1to1_s_neon_accurate_asm INT123_synth_1to1_s_neon_accurate_asm
+#define synth_1to1_real_s_neon_asm INT123_synth_1to1_real_s_neon_asm
+#define synth_1to1_s32_s_neon_asm INT123_synth_1to1_s32_s_neon_asm
+#define synth_1to1_neon64_asm INT123_synth_1to1_neon64_asm
+#define synth_1to1_neon64_accurate_asm INT123_synth_1to1_neon64_accurate_asm
+#define synth_1to1_real_neon64_asm INT123_synth_1to1_real_neon64_asm
+#define synth_1to1_s32_neon64_asm INT123_synth_1to1_s32_neon64_asm
+#define synth_1to1_s_neon64_asm INT123_synth_1to1_s_neon64_asm
+#define synth_1to1_s_neon64_accurate_asm INT123_synth_1to1_s_neon64_accurate_asm
+#define synth_1to1_real_s_neon64_asm INT123_synth_1to1_real_s_neon64_asm
+#define synth_1to1_s32_s_neon64_asm INT123_synth_1to1_s32_s_neon64_asm
+#define costab_mmxsse INT123_costab_mmxsse
+#define make_decode_tables_mmx_asm INT123_make_decode_tables_mmx_asm
+#define check_neon INT123_check_neon
+#endif
Index: include/reactos/libs/libmpg123/l12_integer_tables.h
===================================================================
--- include/reactos/libs/libmpg123/l12_integer_tables.h	(revision 63976)
+++ include/reactos/libs/libmpg123/l12_integer_tables.h	(working copy)
@@ -11,7 +11,11 @@
 
 static const real layer12_table[27][64] =
 {
-	{
+	{ /* C90 does not like empty initializer. Fill with junk. */
+		1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
+	, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38
+	, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56
+	, 57, 58, 59, 60, 61, 62, 63, 64
 	},
 	{
 		-1431655765,-1136305934,-901886617,-715827883,-568152967,-450943309,-357913941,-284076483,
Index: include/reactos/libs/libmpg123/l2tables.h
===================================================================
--- include/reactos/libs/libmpg123/l2tables.h	(revision 63976)
+++ include/reactos/libs/libmpg123/l2tables.h	(working copy)
@@ -13,7 +13,7 @@
 #ifndef _MPG123_L2TABLES_H_
 #define _MPG123_L2TABLES_H_
 
-const struct al_table alloc_0[] = {
+static const struct al_table alloc_0[] = {
 	{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
 	{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
 	{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
@@ -53,7 +53,7 @@
 	{2,0},{5,3},{7,5},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767} };
 
-const struct al_table alloc_1[] = {
+static const struct al_table alloc_1[] = {
 	{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
 	{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},{16,-32767},
 	{4,0},{5,3},{3,-3},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},{10,-511},
@@ -96,7 +96,7 @@
 	{2,0},{5,3},{7,5},{16,-32767},
 	{2,0},{5,3},{7,5},{16,-32767} };
 
-const struct al_table alloc_2[] = {
+static const struct al_table alloc_2[] = {
 	{4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},
 	{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},
 	{4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},
@@ -108,7 +108,7 @@
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63} };
 
-const struct al_table alloc_3[] = {
+static const struct al_table alloc_3[] = {
 	{4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},
 	{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},{15,-16383},
 	{4,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},{9,-255},
@@ -124,7 +124,7 @@
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},
 	{3,0},{5,3},{7,5},{10,9},{4,-7},{5,-15},{6,-31},{7,-63} };
 
-const struct al_table alloc_4[] = {
+static const struct al_table alloc_4[] = {
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
 		{9,-255},{10,-511},{11,-1023},{12,-2047},{13,-4095},{14,-8191},
 	{4,0},{5,3},{7,5},{3,-3},{10,9},{4,-7},{5,-15},{6,-31},{7,-63},{8,-127},
Index: include/reactos/libs/libmpg123/layer1.c
===================================================================
--- include/reactos/libs/libmpg123/layer1.c	(revision 0)
+++ include/reactos/libs/libmpg123/layer1.c	(working copy)
@@ -0,0 +1,188 @@
+/*
+	layer1.c: the layer 1 decoder
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp
+
+	may have a few bugs after last optimization ... 
+*/
+
+#include "mpg123lib_intern.h"
+#include "getbits.h"
+#include "debug.h"
+
+/*
+	Allocation value is not allowed to be 15. Initially, libmad showed me the
+	error that mpg123 used to ignore. Then, I found a quote on that in
+	Shlien, S. (1994): Guide to MPEG-1 Audio Standard. 
+	IEEE Transactions on Broadcasting 40, 4
+
+	"To avoid conflicts with the synchronization code, code '1111' is defined
+	to be illegal."
+*/
+static int check_balloc(mpg123_handle *fr, unsigned int *balloc, unsigned int *end)
+{
+	unsigned int *ba;
+	for(ba=balloc; ba != end; ++ba)
+	if(*ba == 15)
+	{
+		if(NOQUIET) error("Illegal bit allocation value.");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int I_step_one(unsigned int balloc[], unsigned int scale_index[2][SBLIMIT],mpg123_handle *fr)
+{
+	unsigned int *ba=balloc;
+	unsigned int *sca = (unsigned int *) scale_index;
+
+	if(fr->stereo == 2)
+	{
+		int i;
+		int jsbound = fr->jsbound;
+		for(i=0;i<jsbound;i++)
+		{
+			*ba++ = getbits(fr, 4);
+			*ba++ = getbits(fr, 4);
+		}
+		for(i=jsbound;i<SBLIMIT;i++) *ba++ = getbits(fr, 4);
+
+		if(check_balloc(fr, balloc, ba)) return -1;
+
+		ba = balloc;
+
+		for(i=0;i<jsbound;i++)
+		{
+			if ((*ba++))
+				*sca++ = getbits(fr, 6);
+			if ((*ba++))
+				*sca++ = getbits(fr, 6);
+		}
+		for (i=jsbound;i<SBLIMIT;i++)
+		if((*ba++))
+		{
+			*sca++ =  getbits(fr, 6);
+			*sca++ =  getbits(fr, 6);
+		}
+	}
+	else
+	{
+		int i;
+		for(i=0;i<SBLIMIT;i++) *ba++ = getbits(fr, 4);
+
+		if(check_balloc(fr, balloc, ba)) return -1;
+
+		ba = balloc;
+		for (i=0;i<SBLIMIT;i++)
+		if ((*ba++))
+		*sca++ = getbits(fr, 6);
+	}
+
+	return 0;
+}
+
+static void I_step_two(real fraction[2][SBLIMIT],unsigned int balloc[2*SBLIMIT], unsigned int scale_index[2][SBLIMIT],mpg123_handle *fr)
+{
+	int i,n;
+	int smpb[2*SBLIMIT]; /* values: 0-65535 */
+	int *sample;
+	register unsigned int *ba;
+	register unsigned int *sca = (unsigned int *) scale_index;
+
+	if(fr->stereo == 2)
+	{
+		int jsbound = fr->jsbound;
+		register real *f0 = fraction[0];
+		register real *f1 = fraction[1];
+		ba = balloc;
+		for(sample=smpb,i=0;i<jsbound;i++)
+		{
+			if((n = *ba++)) *sample++ = getbits(fr, n+1);
+
+			if((n = *ba++)) *sample++ = getbits(fr, n+1);
+		}
+		for(i=jsbound;i<SBLIMIT;i++) 
+		if((n = *ba++))
+		*sample++ = getbits(fr, n+1);
+
+		ba = balloc;
+		for(sample=smpb,i=0;i<jsbound;i++)
+		{
+			if((n=*ba++))
+			*f0++ = REAL_MUL_SCALE_LAYER12(DOUBLE_TO_REAL_15( ((-1)<<n) + (*sample++) + 1), fr->muls[n+1][*sca++]);
+			else *f0++ = DOUBLE_TO_REAL(0.0);
+
+			if((n=*ba++))
+			*f1++ = REAL_MUL_SCALE_LAYER12(DOUBLE_TO_REAL_15( ((-1)<<n) + (*sample++) + 1), fr->muls[n+1][*sca++]);
+			else *f1++ = DOUBLE_TO_REAL(0.0);
+		}
+		for(i=jsbound;i<SBLIMIT;i++)
+		{
+			if((n=*ba++))
+			{
+				real samp = DOUBLE_TO_REAL_15( ((-1)<<n) + (*sample++) + 1);
+				*f0++ = REAL_MUL_SCALE_LAYER12(samp, fr->muls[n+1][*sca++]);
+				*f1++ = REAL_MUL_SCALE_LAYER12(samp, fr->muls[n+1][*sca++]);
+			}
+			else *f0++ = *f1++ = DOUBLE_TO_REAL(0.0);
+		}
+		for(i=fr->down_sample_sblimit;i<32;i++)
+		fraction[0][i] = fraction[1][i] = 0.0;
+	}
+	else
+	{
+		register real *f0 = fraction[0];
+		ba = balloc;
+		for(sample=smpb,i=0;i<SBLIMIT;i++)
+		if ((n = *ba++))
+		*sample++ = getbits(fr, n+1);
+
+		ba = balloc;
+		for(sample=smpb,i=0;i<SBLIMIT;i++)
+		{
+			if((n=*ba++))
+			*f0++ = REAL_MUL_SCALE_LAYER12(DOUBLE_TO_REAL_15( ((-1)<<n) + (*sample++) + 1), fr->muls[n+1][*sca++]);
+			else *f0++ = DOUBLE_TO_REAL(0.0);
+		}
+		for(i=fr->down_sample_sblimit;i<32;i++)
+		fraction[0][i] = DOUBLE_TO_REAL(0.0);
+	}
+}
+
+int do_layer1(mpg123_handle *fr)
+{
+	int clip=0;
+	int i,stereo = fr->stereo;
+	unsigned int balloc[2*SBLIMIT];
+	unsigned int scale_index[2][SBLIMIT];
+	real (*fraction)[SBLIMIT] = fr->layer1.fraction; /* fraction[2][SBLIMIT] */
+	int single = fr->single;
+
+	fr->jsbound = (fr->mode == MPG_MD_JOINT_STEREO) ? (fr->mode_ext<<2)+4 : 32;
+
+	if(stereo == 1 || single == SINGLE_MIX) /* I don't see mixing handled here */
+	single = SINGLE_LEFT;
+
+	if(I_step_one(balloc,scale_index,fr))
+	{
+		if(NOQUIET) error("Aborting layer I decoding after step one.\n");
+		return clip;
+	}
+
+	for(i=0;i<SCALE_BLOCK;i++)
+	{
+		I_step_two(fraction,balloc,scale_index,fr);
+
+		if(single != SINGLE_STEREO)
+		clip += (fr->synth_mono)(fraction[single], fr);
+		else
+		clip += (fr->synth_stereo)(fraction[0], fraction[1], fr);
+	}
+
+	return clip;
+}
+
+
Index: include/reactos/libs/libmpg123/layer2.c
===================================================================
--- include/reactos/libs/libmpg123/layer2.c	(revision 0)
+++ include/reactos/libs/libmpg123/layer2.c	(working copy)
@@ -0,0 +1,371 @@
+/*
+	layer2.c: the layer 2 decoder, root of mpg123
+
+	copyright 1994-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp
+
+	mpg123 started as mp2 decoder a long time ago...
+	part of this file is required for layer 1, too.
+*/
+
+
+#include "mpg123lib_intern.h"
+#ifndef NO_LAYER2
+#include "l2tables.h"
+#endif
+#include "getbits.h"
+
+#ifndef NO_LAYER12 /* Stuff  needed for layer I and II. */
+
+static int grp_3tab[32 * 3] = { 0, };   /* used: 27 */
+static int grp_5tab[128 * 3] = { 0, };  /* used: 125 */
+static int grp_9tab[1024 * 3] = { 0, }; /* used: 729 */
+
+#if defined(REAL_IS_FIXED) && defined(PRECALC_TABLES)
+#include "l12_integer_tables.h"
+#else
+static const double mulmul[27] =
+{
+	0.0 , -2.0/3.0 , 2.0/3.0 ,
+	2.0/7.0 , 2.0/15.0 , 2.0/31.0, 2.0/63.0 , 2.0/127.0 , 2.0/255.0 ,
+	2.0/511.0 , 2.0/1023.0 , 2.0/2047.0 , 2.0/4095.0 , 2.0/8191.0 ,
+	2.0/16383.0 , 2.0/32767.0 , 2.0/65535.0 ,
+	-4.0/5.0 , -2.0/5.0 , 2.0/5.0, 4.0/5.0 ,
+	-8.0/9.0 , -4.0/9.0 , -2.0/9.0 , 2.0/9.0 , 4.0/9.0 , 8.0/9.0
+};
+#endif
+
+void init_layer12(void)
+{
+	const int base[3][9] =
+	{
+		{ 1 , 0, 2 , } ,
+		{ 17, 18, 0 , 19, 20 , } ,
+		{ 21, 1, 22, 23, 0, 24, 25, 2, 26 }
+	};
+	int i,j,k,l,len;
+	const int tablen[3] = { 3 , 5 , 9 };
+	int *itable;
+	int *tables[3] = { grp_3tab , grp_5tab , grp_9tab };
+
+	for(i=0;i<3;i++)
+	{
+		itable = tables[i];
+		len = tablen[i];
+		for(j=0;j<len;j++)
+		for(k=0;k<len;k++)
+		for(l=0;l<len;l++)
+		{
+			*itable++ = base[i][l];
+			*itable++ = base[i][k];
+			*itable++ = base[i][j];
+		}
+	}
+}
+
+void init_layer12_stuff(mpg123_handle *fr, real* (*init_table)(mpg123_handle *fr, real *table, int m))
+{
+	int k;
+	real *table;
+	for(k=0;k<27;k++)
+	{
+		table = init_table(fr, fr->muls[k], k);
+		*table++ = 0.0;
+	}
+}
+
+real* init_layer12_table(mpg123_handle *fr, real *table, int m)
+{
+#if defined(REAL_IS_FIXED) && defined(PRECALC_TABLES)
+	int i;
+	for(i=0;i<63;i++)
+	*table++ = layer12_table[m][i];
+#else
+	int i,j;
+	for(j=3,i=0;i<63;i++,j--)
+	*table++ = DOUBLE_TO_REAL_SCALE_LAYER12(mulmul[m] * pow(2.0,(double) j / 3.0));
+#endif
+
+	return table;
+}
+
+#ifdef OPT_MMXORSSE
+real* init_layer12_table_mmx(mpg123_handle *fr, real *table, int m)
+{
+	int i,j;
+	if(!fr->p.down_sample) 
+	{
+		for(j=3,i=0;i<63;i++,j--)
+			*table++ = DOUBLE_TO_REAL(16384 * mulmul[m] * pow(2.0,(double) j / 3.0));
+	}
+	else
+	{
+		for(j=3,i=0;i<63;i++,j--)
+		*table++ = DOUBLE_TO_REAL(mulmul[m] * pow(2.0,(double) j / 3.0));
+	}
+	return table;
+}
+#endif
+
+#endif /* NO_LAYER12 */
+
+/* The rest is the actual decoding of layer II data. */
+
+#ifndef NO_LAYER2
+
+static void II_step_one(unsigned int *bit_alloc,int *scale,mpg123_handle *fr)
+{
+	int stereo = fr->stereo-1;
+	int sblimit = fr->II_sblimit;
+	int jsbound = fr->jsbound;
+	int sblimit2 = fr->II_sblimit<<stereo;
+	const struct al_table *alloc1 = fr->alloc;
+	int i;
+	unsigned int scfsi_buf[64];
+	unsigned int *scfsi,*bita;
+	int sc,step;
+
+	bita = bit_alloc;
+	if(stereo)
+	{
+		for(i=jsbound;i;i--,alloc1+=(1<<step))
+		{
+			step=alloc1->bits;
+			*bita++ = (char) getbits(fr, step);
+			*bita++ = (char) getbits(fr, step);
+		}
+		for(i=sblimit-jsbound;i;i--,alloc1+=(1<<step))
+		{
+			step=alloc1->bits;
+			bita[0] = (char) getbits(fr, step);
+			bita[1] = bita[0];
+			bita+=2;
+		}
+		bita = bit_alloc;
+		scfsi=scfsi_buf;
+
+		for(i=sblimit2;i;i--)
+		if(*bita++) *scfsi++ = (char) getbits_fast(fr, 2);
+	}
+	else /* mono */
+	{
+		for(i=sblimit;i;i--,alloc1+=(1<<step))
+		{
+			step=alloc1->bits;
+			*bita++ = (char) getbits(fr, step);
+		}
+		bita = bit_alloc;
+		scfsi=scfsi_buf;
+		for(i=sblimit;i;i--)
+		if(*bita++) *scfsi++ = (char) getbits_fast(fr, 2);
+	}
+
+	bita = bit_alloc;
+	scfsi=scfsi_buf;
+	for(i=sblimit2;i;i--)
+	if(*bita++)
+	switch(*scfsi++)
+	{
+		case 0: 
+			*scale++ = getbits_fast(fr, 6);
+			*scale++ = getbits_fast(fr, 6);
+			*scale++ = getbits_fast(fr, 6);
+		break;
+		case 1 : 
+			*scale++ = sc = getbits_fast(fr, 6);
+			*scale++ = sc;
+			*scale++ = getbits_fast(fr, 6);
+		break;
+		case 2: 
+			*scale++ = sc = getbits_fast(fr, 6);
+			*scale++ = sc;
+			*scale++ = sc;
+		break;
+		default:              /* case 3 */
+			*scale++ = getbits_fast(fr, 6);
+			*scale++ = sc = getbits_fast(fr, 6);
+			*scale++ = sc;
+		break;
+	}
+}
+
+
+static void II_step_two(unsigned int *bit_alloc,real fraction[2][4][SBLIMIT],int *scale,mpg123_handle *fr,int x1)
+{
+	int i,j,k,ba;
+	int stereo = fr->stereo;
+	int sblimit = fr->II_sblimit;
+	int jsbound = fr->jsbound;
+	const struct al_table *alloc2,*alloc1 = fr->alloc;
+	unsigned int *bita=bit_alloc;
+	int d1,step;
+
+	for(i=0;i<jsbound;i++,alloc1+=(1<<step))
+	{
+		step = alloc1->bits;
+		for(j=0;j<stereo;j++)
+		{
+			if( (ba=*bita++) ) 
+			{
+				k=(alloc2 = alloc1+ba)->bits;
+				if( (d1=alloc2->d) < 0) 
+				{
+					real cm=fr->muls[k][scale[x1]];
+					fraction[j][0][i] = REAL_MUL_SCALE_LAYER12(DOUBLE_TO_REAL_15((int)getbits(fr, k) + d1), cm);
+					fraction[j][1][i] = REAL_MUL_SCALE_LAYER12(DOUBLE_TO_REAL_15((int)getbits(fr, k) + d1), cm);
+					fraction[j][2][i] = REAL_MUL_SCALE_LAYER12(DOUBLE_TO_REAL_15((int)getbits(fr, k) + d1), cm);
+				}        
+				else 
+				{
+					const int *table[] = { 0,0,0,grp_3tab,0,grp_5tab,0,0,0,grp_9tab };
+					unsigned int idx,*tab,m=scale[x1];
+					idx = (unsigned int) getbits(fr, k);
+					tab = (unsigned int *) (table[d1] + idx + idx + idx);
+					fraction[j][0][i] = REAL_SCALE_LAYER12(fr->muls[*tab++][m]);
+					fraction[j][1][i] = REAL_SCALE_LAYER12(fr->muls[*tab++][m]);
+					fraction[j][2][i] = REAL_SCALE_LAYER12(fr->muls[*tab][m]);  
+				}
+				scale+=3;
+			}
+			else
+			fraction[j][0][i] = fraction[j][1][i] = fraction[j][2][i] = DOUBLE_TO_REAL(0.0);
+		}
+	}
+
+	for(i=jsbound;i<sblimit;i++,alloc1+=(1<<step))
+	{
+		step = alloc1->bits;
+		bita++;	/* channel 1 and channel 2 bitalloc are the same */
+		if( (ba=*bita++) )
+		{
+			k=(alloc2 = alloc1+ba)->bits;
+			if( (d1=alloc2->d) < 0)
+			{
+				real cm;
+				cm=fr->muls[k][scale[x1+3]];
+				fraction[0][0][i] = DOUBLE_TO_REAL_15((int)getbits(fr, k) + d1);
+				fraction[0][1][i] = DOUBLE_TO_REAL_15((int)getbits(fr, k) + d1);
+				fraction[0][2][i] = DOUBLE_TO_REAL_15((int)getbits(fr, k) + d1);
+				fraction[1][0][i] = REAL_MUL_SCALE_LAYER12(fraction[0][0][i], cm);
+				fraction[1][1][i] = REAL_MUL_SCALE_LAYER12(fraction[0][1][i], cm);
+				fraction[1][2][i] = REAL_MUL_SCALE_LAYER12(fraction[0][2][i], cm);
+				cm=fr->muls[k][scale[x1]];
+				fraction[0][0][i] = REAL_MUL_SCALE_LAYER12(fraction[0][0][i], cm);
+				fraction[0][1][i] = REAL_MUL_SCALE_LAYER12(fraction[0][1][i], cm);
+				fraction[0][2][i] = REAL_MUL_SCALE_LAYER12(fraction[0][2][i], cm);
+			}
+			else
+			{
+				const int *table[] = { 0,0,0,grp_3tab,0,grp_5tab,0,0,0,grp_9tab };
+				unsigned int idx,*tab,m1,m2;
+				m1 = scale[x1]; m2 = scale[x1+3];
+				idx = (unsigned int) getbits(fr, k);
+				tab = (unsigned int *) (table[d1] + idx + idx + idx);
+				fraction[0][0][i] = REAL_SCALE_LAYER12(fr->muls[*tab][m1]); fraction[1][0][i] = REAL_SCALE_LAYER12(fr->muls[*tab++][m2]);
+				fraction[0][1][i] = REAL_SCALE_LAYER12(fr->muls[*tab][m1]); fraction[1][1][i] = REAL_SCALE_LAYER12(fr->muls[*tab++][m2]);
+				fraction[0][2][i] = REAL_SCALE_LAYER12(fr->muls[*tab][m1]); fraction[1][2][i] = REAL_SCALE_LAYER12(fr->muls[*tab][m2]);
+			}
+			scale+=6;
+		}
+		else
+		{
+			fraction[0][0][i] = fraction[0][1][i] = fraction[0][2][i] =
+			fraction[1][0][i] = fraction[1][1][i] = fraction[1][2][i] = DOUBLE_TO_REAL(0.0);
+		}
+/*
+	Historic comment...
+	should we use individual scalefac for channel 2 or
+	is the current way the right one , where we just copy channel 1 to
+	channel 2 ?? 
+	The current 'strange' thing is, that we throw away the scalefac
+	values for the second channel ...!!
+	-> changed .. now we use the scalefac values of channel one !! 
+*/
+	}
+
+	if(sblimit > (fr->down_sample_sblimit) )
+	sblimit = fr->down_sample_sblimit;
+
+	for(i=sblimit;i<SBLIMIT;i++)
+	for (j=0;j<stereo;j++)
+	fraction[j][0][i] = fraction[j][1][i] = fraction[j][2][i] = DOUBLE_TO_REAL(0.0);
+}
+
+
+static void II_select_table(mpg123_handle *fr)
+{
+	const int translate[3][2][16] =
+	{
+		{
+			{ 0,2,2,2,2,2,2,0,0,0,1,1,1,1,1,0 },
+			{ 0,2,2,0,0,0,1,1,1,1,1,1,1,1,1,0 }
+		},
+		{
+			{ 0,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0 },
+			{ 0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0 }
+		},
+		{
+			{ 0,3,3,3,3,3,3,0,0,0,1,1,1,1,1,0 },
+			{ 0,3,3,0,0,0,1,1,1,1,1,1,1,1,1,0 }
+		}
+	};
+
+	int table,sblim;
+	const struct al_table *tables[5] = { alloc_0, alloc_1, alloc_2, alloc_3 , alloc_4 };
+	const int sblims[5] = { 27 , 30 , 8, 12 , 30 };
+
+	if(fr->sampling_frequency >= 3)	/* Or equivalent: (fr->lsf == 1) */
+	table = 4;
+	else
+	table = translate[fr->sampling_frequency][2-fr->stereo][fr->bitrate_index];
+
+	sblim = sblims[table];
+	fr->alloc      = tables[table];
+	fr->II_sblimit = sblim;
+}
+
+
+int do_layer2(mpg123_handle *fr)
+{
+	int clip=0;
+	int i,j;
+	int stereo = fr->stereo;
+	/* pick_table clears unused subbands */
+	/* replacement for real fraction[2][4][SBLIMIT], needs alignment. */
+	real (*fraction)[4][SBLIMIT] = fr->layer2.fraction;
+	unsigned int bit_alloc[64];
+	int scale[192];
+	int single = fr->single;
+
+	II_select_table(fr);
+	fr->jsbound = (fr->mode == MPG_MD_JOINT_STEREO) ? (fr->mode_ext<<2)+4 : fr->II_sblimit;
+
+	if(fr->jsbound > fr->II_sblimit)
+	{
+		fprintf(stderr, "Truncating stereo boundary to sideband limit.\n");
+		fr->jsbound=fr->II_sblimit;
+	}
+
+	/* TODO: What happens with mono mixing, actually? */
+	if(stereo == 1 || single == SINGLE_MIX) /* also, mix not really handled */
+	single = SINGLE_LEFT;
+
+	II_step_one(bit_alloc, scale, fr);
+
+	for(i=0;i<SCALE_BLOCK;i++)
+	{
+		II_step_two(bit_alloc,fraction,scale,fr,i>>2);
+		for(j=0;j<3;j++) 
+		{
+			if(single != SINGLE_STEREO)
+			clip += (fr->synth_mono)(fraction[single][j], fr);
+			else
+			clip += (fr->synth_stereo)(fraction[0][j], fraction[1][j], fr);
+		}
+	}
+
+	return clip;
+}
+
+#endif /* NO_LAYER2 */
Index: include/reactos/libs/libmpg123/layer3.c
===================================================================
--- include/reactos/libs/libmpg123/layer3.c	(revision 0)
+++ include/reactos/libs/libmpg123/layer3.c	(working copy)
@@ -0,0 +1,2085 @@
+/*
+	layer3.c: the layer 3 decoder
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp
+
+	Dear visitor:
+	If you feel you don't understand fully the works of this file, your feeling might be correct.
+
+	Optimize-TODO: put short bands into the band-field without the stride of 3 reals
+	Length-optimze: unify long and short band code where it is possible
+
+	The int-vs-pointer situation has to be cleaned up.
+*/
+
+#include "mpg123lib_intern.h"
+#ifdef USE_NEW_HUFFTABLE
+#include "newhuffman.h"
+#else
+#include "huffman.h"
+#endif
+#include "getbits.h"
+#include "debug.h"
+
+
+
+/* define CUT_SFB21 if you want to cut-off the frequency above 16kHz */
+#if 0
+#define CUT_SFB21
+#endif
+
+#ifdef REAL_IS_FIXED
+#define NEW_DCT9
+#include "l3_integer_tables.h"
+#else
+/* static one-time calculated tables... or so */
+static real ispow[8207];
+static real aa_ca[8],aa_cs[8];
+static ALIGNED(16) real win[4][36];
+static ALIGNED(16) real win1[4][36];
+real COS9[9]; /* dct36_3dnow wants to use that */
+static real COS6_1,COS6_2;
+real tfcos36[9]; /* dct36_3dnow wants to use that */
+static real tfcos12[3];
+#define NEW_DCT9
+#ifdef NEW_DCT9
+static real cos9[3],cos18[3];
+static real tan1_1[16],tan2_1[16],tan1_2[16],tan2_2[16];
+static real pow1_1[2][16],pow2_1[2][16],pow1_2[2][16],pow2_2[2][16];
+#endif
+#endif
+
+/* Decoder state data, living on the stack of do_layer3. */
+
+struct gr_info_s
+{
+	int scfsi;
+	unsigned part2_3_length;
+	unsigned big_values;
+	unsigned scalefac_compress;
+	unsigned block_type;
+	unsigned mixed_block_flag;
+	unsigned table_select[3];
+	/* Making those two signed int as workaround for open64/pathscale/sun compilers, and also for consistency, since they're worked on together with other signed variables. */
+	int maxband[3];
+	int maxbandl;
+	unsigned maxb;
+	unsigned region1start;
+	unsigned region2start;
+	unsigned preflag;
+	unsigned scalefac_scale;
+	unsigned count1table_select;
+	real *full_gain[3];
+	real *pow2gain;
+};
+
+struct III_sideinfo
+{
+	unsigned main_data_begin;
+	unsigned private_bits;
+	/* Hm, funny... struct inside struct... */
+	struct { struct gr_info_s gr[2]; } ch[2];
+};
+
+struct bandInfoStruct
+{
+	unsigned short longIdx[23];
+	unsigned char longDiff[22];
+	unsigned short shortIdx[14];
+	unsigned char shortDiff[13];
+};
+
+/* Techy details about our friendly MPEG data. Fairly constant over the years;-) */
+static const struct bandInfoStruct bandInfo[9] =
+{
+	{ /* MPEG 1.0 */
+		{0,4,8,12,16,20,24,30,36,44,52,62,74, 90,110,134,162,196,238,288,342,418,576},
+		{4,4,4,4,4,4,6,6,8, 8,10,12,16,20,24,28,34,42,50,54, 76,158},
+		{0,4*3,8*3,12*3,16*3,22*3,30*3,40*3,52*3,66*3, 84*3,106*3,136*3,192*3},
+		{4,4,4,4,6,8,10,12,14,18,22,30,56}
+	},
+	{
+		{0,4,8,12,16,20,24,30,36,42,50,60,72, 88,106,128,156,190,230,276,330,384,576},
+		{4,4,4,4,4,4,6,6,6, 8,10,12,16,18,22,28,34,40,46,54, 54,192},
+		{0,4*3,8*3,12*3,16*3,22*3,28*3,38*3,50*3,64*3, 80*3,100*3,126*3,192*3},
+		{4,4,4,4,6,6,10,12,14,16,20,26,66}
+	},
+	{
+		{0,4,8,12,16,20,24,30,36,44,54,66,82,102,126,156,194,240,296,364,448,550,576},
+		{4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102, 26},
+		{0,4*3,8*3,12*3,16*3,22*3,30*3,42*3,58*3,78*3,104*3,138*3,180*3,192*3},
+		{4,4,4,4,6,8,12,16,20,26,34,42,12}
+	},
+	{ /* MPEG 2.0 */
+		{0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
+		{6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54 } ,
+		{0,4*3,8*3,12*3,18*3,24*3,32*3,42*3,56*3,74*3,100*3,132*3,174*3,192*3} ,
+		{4,4,4,6,6,8,10,14,18,26,32,42,18 }
+	},
+	{ /* Twiddling 3 values here (not just 330->332!) fixed bug 1895025. */
+		{0,6,12,18,24,30,36,44,54,66,80,96,114,136,162,194,232,278,332,394,464,540,576},
+		{6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36 },
+		{0,4*3,8*3,12*3,18*3,26*3,36*3,48*3,62*3,80*3,104*3,136*3,180*3,192*3},
+		{4,4,4,6,8,10,12,14,18,24,32,44,12 }
+	},
+	{
+		{0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
+		{6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54 },
+		{0,4*3,8*3,12*3,18*3,26*3,36*3,48*3,62*3,80*3,104*3,134*3,174*3,192*3},
+		{4,4,4,6,8,10,12,14,18,24,30,40,18 }
+	},
+	{ /* MPEG 2.5 */
+		{0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
+		{6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54},
+		{0,12,24,36,54,78,108,144,186,240,312,402,522,576},
+		{4,4,4,6,8,10,12,14,18,24,30,40,18}
+	},
+	{
+		{0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
+		{6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54},
+		{0,12,24,36,54,78,108,144,186,240,312,402,522,576},
+		{4,4,4,6,8,10,12,14,18,24,30,40,18}
+	},
+	{
+		{0,12,24,36,48,60,72,88,108,132,160,192,232,280,336,400,476,566,568,570,572,574,576},
+		{12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2},
+		{0, 24, 48, 72,108,156,216,288,372,480,486,492,498,576},
+		{8,8,8,12,16,20,24,28,36,2,2,2,26}
+	}
+};
+
+static int mapbuf0[9][152];
+static int mapbuf1[9][156];
+static int mapbuf2[9][44];
+static int *map[9][3];
+static int *mapend[9][3];
+
+static unsigned int n_slen2[512]; /* MPEG 2.0 slen for 'normal' mode */
+static unsigned int i_slen2[256]; /* MPEG 2.0 slen for intensity stereo */
+
+/* Some helpers used in init_layer3 */
+
+#ifdef OPT_MMXORSSE
+real init_layer3_gainpow2_mmx(mpg123_handle *fr, int i)
+{
+	if(!fr->p.down_sample) return DOUBLE_TO_REAL(16384.0 * pow((double)2.0,-0.25 * (double) (i+210) ));
+	else return DOUBLE_TO_REAL(pow((double)2.0,-0.25 * (double) (i+210)));
+}
+#endif
+
+real init_layer3_gainpow2(mpg123_handle *fr, int i)
+{
+#if defined(REAL_IS_FIXED) && defined(PRECALC_TABLES)
+	return gainpow2[i+256];
+#else
+	return DOUBLE_TO_REAL_SCALE_LAYER3(pow((double)2.0,-0.25 * (double) (i+210)),i+256);
+#endif
+}
+
+
+/* init tables for layer-3 ... specific with the downsampling... */
+void init_layer3(void)
+{
+	int i,j,k,l;
+
+#if !defined(REAL_IS_FIXED) || !defined(PRECALC_TABLES)
+	for(i=0;i<8207;i++)
+	ispow[i] = DOUBLE_TO_REAL_POW43(pow((double)i,(double)4.0/3.0));
+
+	for(i=0;i<8;i++)
+	{
+		const double Ci[8] = {-0.6,-0.535,-0.33,-0.185,-0.095,-0.041,-0.0142,-0.0037};
+		double sq = sqrt(1.0+Ci[i]*Ci[i]);
+		aa_cs[i] = DOUBLE_TO_REAL(1.0/sq);
+		aa_ca[i] = DOUBLE_TO_REAL(Ci[i]/sq);
+	}
+
+	for(i=0;i<18;i++)
+	{
+		win[0][i]    = win[1][i]    =
+			DOUBLE_TO_REAL( 0.5*sin(M_PI/72.0 * (double)(2*(i+0) +1)) / cos(M_PI * (double)(2*(i+0) +19) / 72.0) );
+		win[0][i+18] = win[3][i+18] =
+			DOUBLE_TO_REAL( 0.5*sin(M_PI/72.0 * (double)(2*(i+18)+1)) / cos(M_PI * (double)(2*(i+18)+19) / 72.0) );
+	}
+	for(i=0;i<6;i++)
+	{
+		win[1][i+18] = DOUBLE_TO_REAL(0.5 / cos ( M_PI * (double) (2*(i+18)+19) / 72.0 ));
+		win[3][i+12] = DOUBLE_TO_REAL(0.5 / cos ( M_PI * (double) (2*(i+12)+19) / 72.0 ));
+		win[1][i+24] = DOUBLE_TO_REAL(0.5 * sin( M_PI / 24.0 * (double) (2*i+13) ) / cos ( M_PI * (double) (2*(i+24)+19) / 72.0 ));
+		win[1][i+30] = win[3][i] = DOUBLE_TO_REAL(0.0);
+		win[3][i+6 ] = DOUBLE_TO_REAL(0.5 * sin( M_PI / 24.0 * (double) (2*i+1 ) ) / cos ( M_PI * (double) (2*(i+6 )+19) / 72.0 ));
+	}
+
+	for(i=0;i<9;i++)
+	COS9[i] = DOUBLE_TO_REAL(cos( M_PI / 18.0 * (double) i));
+
+	for(i=0;i<9;i++)
+	tfcos36[i] = DOUBLE_TO_REAL(0.5 / cos ( M_PI * (double) (i*2+1) / 36.0 ));
+
+	for(i=0;i<3;i++)
+	tfcos12[i] = DOUBLE_TO_REAL(0.5 / cos ( M_PI * (double) (i*2+1) / 12.0 ));
+
+	COS6_1 = DOUBLE_TO_REAL(cos( M_PI / 6.0 * (double) 1));
+	COS6_2 = DOUBLE_TO_REAL(cos( M_PI / 6.0 * (double) 2));
+
+#ifdef NEW_DCT9
+	cos9[0]  = DOUBLE_TO_REAL(cos(1.0*M_PI/9.0));
+	cos9[1]  = DOUBLE_TO_REAL(cos(5.0*M_PI/9.0));
+	cos9[2]  = DOUBLE_TO_REAL(cos(7.0*M_PI/9.0));
+	cos18[0] = DOUBLE_TO_REAL(cos(1.0*M_PI/18.0));
+	cos18[1] = DOUBLE_TO_REAL(cos(11.0*M_PI/18.0));
+	cos18[2] = DOUBLE_TO_REAL(cos(13.0*M_PI/18.0));
+#endif
+
+	for(i=0;i<12;i++)
+	{
+		win[2][i] = DOUBLE_TO_REAL(0.5 * sin( M_PI / 24.0 * (double) (2*i+1) ) / cos ( M_PI * (double) (2*i+7) / 24.0 ));
+	}
+
+	for(i=0;i<16;i++)
+	{
+		double t = tan( (double) i * M_PI / 12.0 );
+		tan1_1[i] = DOUBLE_TO_REAL_15(t / (1.0+t));
+		tan2_1[i] = DOUBLE_TO_REAL_15(1.0 / (1.0 + t));
+		tan1_2[i] = DOUBLE_TO_REAL_15(M_SQRT2 * t / (1.0+t));
+		tan2_2[i] = DOUBLE_TO_REAL_15(M_SQRT2 / (1.0 + t));
+
+		for(j=0;j<2;j++)
+		{
+			double base = pow(2.0,-0.25*(j+1.0));
+			double p1=1.0,p2=1.0;
+			if(i > 0)
+			{
+				if( i & 1 ) p1 = pow(base,(i+1.0)*0.5);
+				else p2 = pow(base,i*0.5);
+			}
+			pow1_1[j][i] = DOUBLE_TO_REAL_15(p1);
+			pow2_1[j][i] = DOUBLE_TO_REAL_15(p2);
+			pow1_2[j][i] = DOUBLE_TO_REAL_15(M_SQRT2 * p1);
+			pow2_2[j][i] = DOUBLE_TO_REAL_15(M_SQRT2 * p2);
+		}
+	}
+#endif
+
+	for(j=0;j<4;j++)
+	{
+		const int len[4] = { 36,36,12,36 };
+		for(i=0;i<len[j];i+=2) win1[j][i] = + win[j][i];
+
+		for(i=1;i<len[j];i+=2) win1[j][i] = - win[j][i];
+	}
+
+	for(j=0;j<9;j++)
+	{
+		const struct bandInfoStruct *bi = &bandInfo[j];
+		int *mp;
+		int cb,lwin;
+		const unsigned char *bdf;
+
+		mp = map[j][0] = mapbuf0[j];
+		bdf = bi->longDiff;
+		for(i=0,cb = 0; cb < 8 ; cb++,i+=*bdf++)
+		{
+			*mp++ = (*bdf) >> 1;
+			*mp++ = i;
+			*mp++ = 3;
+			*mp++ = cb;
+		}
+		bdf = bi->shortDiff+3;
+		for(cb=3;cb<13;cb++)
+		{
+			int l = (*bdf++) >> 1;
+			for(lwin=0;lwin<3;lwin++)
+			{
+				*mp++ = l;
+				*mp++ = i + lwin;
+				*mp++ = lwin;
+				*mp++ = cb;
+			}
+			i += 6*l;
+		}
+		mapend[j][0] = mp;
+
+		mp = map[j][1] = mapbuf1[j];
+		bdf = bi->shortDiff+0;
+		for(i=0,cb=0;cb<13;cb++)
+		{
+			int l = (*bdf++) >> 1;
+			for(lwin=0;lwin<3;lwin++)
+			{
+				*mp++ = l;
+				*mp++ = i + lwin;
+				*mp++ = lwin;
+				*mp++ = cb;
+			}
+			i += 6*l;
+		}
+		mapend[j][1] = mp;
+
+		mp = map[j][2] = mapbuf2[j];
+		bdf = bi->longDiff;
+		for(cb = 0; cb < 22 ; cb++)
+		{
+			*mp++ = (*bdf++) >> 1;
+			*mp++ = cb;
+		}
+		mapend[j][2] = mp;
+	}
+
+	/* Now for some serious loopings! */
+	for(i=0;i<5;i++)
+	for(j=0;j<6;j++)
+	for(k=0;k<6;k++)
+	{
+		int n = k + j * 6 + i * 36;
+		i_slen2[n] = i|(j<<3)|(k<<6)|(3<<12);
+	}
+	for(i=0;i<4;i++)
+	for(j=0;j<4;j++)
+	for(k=0;k<4;k++)
+	{
+		int n = k + j * 4 + i * 16;
+		i_slen2[n+180] = i|(j<<3)|(k<<6)|(4<<12);
+	}
+	for(i=0;i<4;i++)
+	for(j=0;j<3;j++)
+	{
+		int n = j + i * 3;
+		i_slen2[n+244] = i|(j<<3) | (5<<12);
+		n_slen2[n+500] = i|(j<<3) | (2<<12) | (1<<15);
+	}
+	for(i=0;i<5;i++)
+	for(j=0;j<5;j++)
+	for(k=0;k<4;k++)
+	for(l=0;l<4;l++)
+	{
+		int n = l + k * 4 + j * 16 + i * 80;
+		n_slen2[n] = i|(j<<3)|(k<<6)|(l<<9)|(0<<12);
+	}
+	for(i=0;i<5;i++)
+	for(j=0;j<5;j++)
+	for(k=0;k<4;k++)
+	{
+		int n = k + j * 4 + i * 20;
+		n_slen2[n+400] = i|(j<<3)|(k<<6)|(1<<12);
+	}
+}
+
+
+void init_layer3_stuff(mpg123_handle *fr, real (*gainpow2)(mpg123_handle *fr, int i))
+{
+	int i,j;
+
+	for(i=-256;i<118+4;i++)	fr->gainpow2[i+256] = gainpow2(fr,i);
+
+	for(j=0;j<9;j++)
+	{
+		for(i=0;i<23;i++)
+		{
+			fr->longLimit[j][i] = (bandInfo[j].longIdx[i] - 1 + 8) / 18 + 1;
+			if(fr->longLimit[j][i] > (fr->down_sample_sblimit) )
+			fr->longLimit[j][i] = fr->down_sample_sblimit;
+		}
+		for(i=0;i<14;i++)
+		{
+			fr->shortLimit[j][i] = (bandInfo[j].shortIdx[i] - 1) / 18 + 1;
+			if(fr->shortLimit[j][i] > (fr->down_sample_sblimit) )
+			fr->shortLimit[j][i] = fr->down_sample_sblimit;
+		}
+	}
+}
+
+/*
+	Observe!
+	Now come the actualy decoding routines.
+*/
+
+/* read additional side information (for MPEG 1 and MPEG 2) */
+static int III_get_side_info(mpg123_handle *fr, struct III_sideinfo *si,int stereo, int ms_stereo,long sfreq,int single)
+{
+	int ch, gr;
+	int powdiff = (single == SINGLE_MIX) ? 4 : 0;
+
+	const int tabs[2][5] = { { 2,9,5,3,4 } , { 1,8,1,2,9 } };
+	const int *tab = tabs[fr->lsf];
+
+	si->main_data_begin = getbits(fr, tab[1]);
+
+	if(si->main_data_begin > fr->bitreservoir)
+	{
+		if(!fr->to_ignore && VERBOSE2) fprintf(stderr, "Note: missing %d bytes in bit reservoir for frame %li\n", (int)(si->main_data_begin - fr->bitreservoir), (long)fr->num);
+
+		/*  overwrite main_data_begin for the really available bit reservoir */
+		backbits(fr, tab[1]);
+		if(fr->lsf == 0)
+		{
+			fr->wordpointer[0] = (unsigned char) (fr->bitreservoir >> 1);
+			fr->wordpointer[1] = (unsigned char) ((fr->bitreservoir & 1) << 7);
+		}
+		else fr->wordpointer[0] = (unsigned char) fr->bitreservoir;
+
+		/* zero "side-info" data for a silence-frame
+		without touching audio data used as bit reservoir for following frame */
+		memset(fr->wordpointer+2, 0, fr->ssize-2);
+
+		/* reread the new bit reservoir offset */
+		si->main_data_begin = getbits(fr, tab[1]);
+	}
+
+	/* Keep track of the available data bytes for the bit reservoir.
+	Think: Substract the 2 crc bytes in parser already? */
+	fr->bitreservoir = fr->bitreservoir + fr->framesize - fr->ssize - (fr->error_protection ? 2 : 0);
+	/* Limit the reservoir to the max for MPEG 1.0 or 2.x . */
+	if(fr->bitreservoir > (unsigned int) (fr->lsf == 0 ? 511 : 255))
+	fr->bitreservoir = (fr->lsf == 0 ? 511 : 255);
+
+	/* Now back into less commented territory. It's code. It works. */
+
+	if (stereo == 1)
+	si->private_bits = getbits_fast(fr, tab[2]);
+	else 
+	si->private_bits = getbits_fast(fr, tab[3]);
+
+	if(!fr->lsf) for(ch=0; ch<stereo; ch++)
+	{
+		si->ch[ch].gr[0].scfsi = -1;
+		si->ch[ch].gr[1].scfsi = getbits_fast(fr, 4);
+	}
+
+	for (gr=0; gr<tab[0]; gr++)
+	for (ch=0; ch<stereo; ch++)
+	{
+		register struct gr_info_s *gr_info = &(si->ch[ch].gr[gr]);
+
+		gr_info->part2_3_length = getbits(fr, 12);
+		gr_info->big_values = getbits(fr, 9);
+		if(gr_info->big_values > 288)
+		{
+			if(NOQUIET) error("big_values too large!");
+			gr_info->big_values = 288;
+		}
+		gr_info->pow2gain = fr->gainpow2+256 - getbits_fast(fr, 8) + powdiff;
+		if(ms_stereo) gr_info->pow2gain += 2;
+
+		gr_info->scalefac_compress = getbits(fr, tab[4]);
+
+		if(get1bit(fr))
+		{ /* window switch flag  */
+			int i;
+			gr_info->block_type       = getbits_fast(fr, 2);
+			gr_info->mixed_block_flag = get1bit(fr);
+			gr_info->table_select[0]  = getbits_fast(fr, 5);
+			gr_info->table_select[1]  = getbits_fast(fr, 5);
+			/*
+				table_select[2] not needed, because there is no region2,
+				but to satisfy some verification tools we set it either.
+			*/
+			gr_info->table_select[2] = 0;
+			for(i=0;i<3;i++)
+			gr_info->full_gain[i] = gr_info->pow2gain + (getbits_fast(fr, 3)<<3);
+
+			if(gr_info->block_type == 0)
+			{
+				if(NOQUIET) error("Blocktype == 0 and window-switching == 1 not allowed.");
+				return 1;
+			}
+
+			/* region_count/start parameters are implicit in this case. */       
+			if( (!fr->lsf || (gr_info->block_type == 2)) && !fr->mpeg25)
+			{
+				gr_info->region1start = 36>>1;
+				gr_info->region2start = 576>>1;
+			}
+			else
+			{
+				if(fr->mpeg25)
+				{ 
+					int r0c,r1c;
+					if((gr_info->block_type == 2) && (!gr_info->mixed_block_flag) ) r0c = 5;
+					else r0c = 7;
+
+					/* r0c+1+r1c+1 == 22, always. */
+					r1c = 20 - r0c;
+					gr_info->region1start = bandInfo[sfreq].longIdx[r0c+1] >> 1 ;
+					gr_info->region2start = bandInfo[sfreq].longIdx[r0c+1+r1c+1] >> 1; 
+				}
+				else
+				{
+					gr_info->region1start = 54>>1;
+					gr_info->region2start = 576>>1; 
+				} 
+			}
+		}
+		else
+		{
+			int i,r0c,r1c;
+			for (i=0; i<3; i++)
+			gr_info->table_select[i] = getbits_fast(fr, 5);
+
+			r0c = getbits_fast(fr, 4); /* 0 .. 15 */
+			r1c = getbits_fast(fr, 3); /* 0 .. 7 */
+			gr_info->region1start = bandInfo[sfreq].longIdx[r0c+1] >> 1 ;
+
+			/* max(r0c+r1c+2) = 15+7+2 = 24 */
+			if(r0c+1+r1c+1 > 22) gr_info->region2start = 576>>1;
+			else gr_info->region2start = bandInfo[sfreq].longIdx[r0c+1+r1c+1] >> 1;
+
+			gr_info->block_type = 0;
+			gr_info->mixed_block_flag = 0;
+		}
+		if(!fr->lsf) gr_info->preflag = get1bit(fr);
+
+		gr_info->scalefac_scale = get1bit(fr);
+		gr_info->count1table_select = get1bit(fr);
+	}
+	return 0;
+}
+
+
+/* read scalefactors */
+static int III_get_scale_factors_1(mpg123_handle *fr, int *scf,struct gr_info_s *gr_info,int ch,int gr)
+{
+	const unsigned char slen[2][16] =
+	{
+		{0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4},
+		{0, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3}
+	};
+	int numbits;
+	int num0 = slen[0][gr_info->scalefac_compress];
+	int num1 = slen[1][gr_info->scalefac_compress];
+
+	if(gr_info->block_type == 2)
+	{
+		int i=18;
+		numbits = (num0 + num1) * 18;
+
+		if(gr_info->mixed_block_flag)
+		{
+			for (i=8;i;i--)
+			*scf++ = getbits_fast(fr, num0);
+
+			i = 9;
+			numbits -= num0; /* num0 * 17 + num1 * 18 */
+		}
+
+		for(;i;i--) *scf++ = getbits_fast(fr, num0);
+
+		for(i = 18; i; i--) *scf++ = getbits_fast(fr, num1);
+
+		*scf++ = 0; *scf++ = 0; *scf++ = 0; /* short[13][0..2] = 0 */
+	}
+	else
+	{
+		int i;
+		int scfsi = gr_info->scfsi;
+
+		if(scfsi < 0)
+		{ /* scfsi < 0 => granule == 0 */
+			for(i=11;i;i--) *scf++ = getbits_fast(fr, num0);
+
+			for(i=10;i;i--) *scf++ = getbits_fast(fr, num1);
+
+			numbits = (num0 + num1) * 10 + num0;
+			*scf++ = 0;
+		}
+		else
+		{
+			numbits = 0;
+			if(!(scfsi & 0x8))
+			{
+				for (i=0;i<6;i++) *scf++ = getbits_fast(fr, num0);
+
+				numbits += num0 * 6;
+			}
+			else scf += 6; 
+
+			if(!(scfsi & 0x4))
+			{
+				for (i=0;i<5;i++) *scf++ = getbits_fast(fr, num0);
+
+				numbits += num0 * 5;
+			}
+			else scf += 5;
+
+			if(!(scfsi & 0x2))
+			{
+				for(i=0;i<5;i++) *scf++ = getbits_fast(fr, num1);
+
+				numbits += num1 * 5;
+			}
+			else scf += 5;
+
+			if(!(scfsi & 0x1))
+			{
+				for (i=0;i<5;i++) *scf++ = getbits_fast(fr, num1);
+
+				numbits += num1 * 5;
+			}
+			else scf += 5;
+
+			*scf++ = 0;  /* no l[21] in original sources */
+		}
+	}
+	return numbits;
+}
+
+
+static int III_get_scale_factors_2(mpg123_handle *fr, int *scf,struct gr_info_s *gr_info,int i_stereo)
+{
+	const unsigned char *pnt;
+	int i,j,n=0,numbits=0;
+	unsigned int slen;
+
+	const unsigned char stab[3][6][4] =
+	{
+		{
+			{ 6, 5, 5,5 } , { 6, 5, 7,3 } , { 11,10,0,0},
+			{ 7, 7, 7,0 } , { 6, 6, 6,3 } , {  8, 8,5,0}
+		},
+		{
+			{ 9, 9, 9,9 } , { 9, 9,12,6 } , { 18,18,0,0},
+			{12,12,12,0 } , {12, 9, 9,6 } , { 15,12,9,0}
+		},
+		{
+			{ 6, 9, 9,9 } , { 6, 9,12,6 } , { 15,18,0,0},
+			{ 6,15,12,0 } , { 6,12, 9,6 } , {  6,18,9,0}
+		}
+	}; 
+
+	if(i_stereo) /* i_stereo AND second channel -> do_layer3() checks this */
+	slen = i_slen2[gr_info->scalefac_compress>>1];
+	else
+	slen = n_slen2[gr_info->scalefac_compress];
+
+	gr_info->preflag = (slen>>15) & 0x1;
+
+	n = 0;  
+	if( gr_info->block_type == 2 )
+	{
+		n++;
+		if(gr_info->mixed_block_flag) n++;
+	}
+
+	pnt = stab[n][(slen>>12)&0x7];
+
+	for(i=0;i<4;i++)
+	{
+		int num = slen & 0x7;
+		slen >>= 3;
+		if(num)
+		{
+			for(j=0;j<(int)(pnt[i]);j++) *scf++ = getbits_fast(fr, num);
+
+			numbits += pnt[i] * num;
+		}
+		else
+		for(j=0;j<(int)(pnt[i]);j++) *scf++ = 0;
+	}
+  
+	n = (n << 1) + 1;
+	for(i=0;i<n;i++) *scf++ = 0;
+
+	return numbits;
+}
+
+static unsigned char pretab_choice[2][22] =
+{
+	{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
+	{0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,3,3,2,0}
+};
+
+/*
+	Dequantize samples
+	...includes Huffman decoding
+*/
+
+/* 24 is enough because tab13 has max. a 19 bit huffvector */
+#define BITSHIFT ((sizeof(long)-1)*8)
+#define REFRESH_MASK \
+	while(num < BITSHIFT) { \
+		mask |= ((unsigned long)getbyte(fr))<<(BITSHIFT-num); \
+		num += 8; \
+		part2remain -= 8; }
+
+static int III_dequantize_sample(mpg123_handle *fr, real xr[SBLIMIT][SSLIMIT],int *scf, struct gr_info_s *gr_info,int sfreq,int part2bits)
+{
+	int shift = 1 + gr_info->scalefac_scale;
+	real *xrpnt = (real *) xr;
+	int l[3],l3;
+	int part2remain = gr_info->part2_3_length - part2bits;
+	int *me;
+#ifdef REAL_IS_FIXED
+	int gainpow2_scale_idx = 378;
+#endif
+
+	/* mhipp tree has this split up a bit... */
+	int num=getbitoffset(fr);
+	long mask;
+	/* We must split this, because for num==0 the shift is undefined if you do it in one step. */
+	mask  = ((unsigned long) getbits(fr, num))<<BITSHIFT;
+	mask <<= 8-num;
+	part2remain -= num;
+
+	{
+		int bv       = gr_info->big_values;
+		int region1  = gr_info->region1start;
+		int region2  = gr_info->region2start;
+		l3 = ((576>>1)-bv)>>1;   
+
+		/* we may lose the 'odd' bit here !! check this later again */
+		if(bv <= region1)
+		{
+			l[0] = bv;
+			l[1] = 0;
+			l[2] = 0;
+		}
+		else
+		{
+			l[0] = region1;
+			if(bv <= region2)
+			{
+				l[1] = bv - l[0];
+				l[2] = 0;
+			}
+			else
+			{
+				l[1] = region2 - l[0];
+				l[2] = bv - region2;
+			}
+		}
+	}
+ 
+	if(gr_info->block_type == 2)
+	{
+		/* decoding with short or mixed mode BandIndex table */
+		int i,max[4];
+		int step=0,lwin=3,cb=0;
+		register real v = 0.0;
+		register int *m,mc;
+
+		if(gr_info->mixed_block_flag)
+		{
+			max[3] = -1;
+			max[0] = max[1] = max[2] = 2;
+			m = map[sfreq][0];
+			me = mapend[sfreq][0];
+		}
+		else
+		{
+			max[0] = max[1] = max[2] = max[3] = -1;
+			/* max[3] not really needed in this case */
+			m = map[sfreq][1];
+			me = mapend[sfreq][1];
+		}
+
+		mc = 0;
+		for(i=0;i<2;i++)
+		{
+			int lp = l[i];
+			const struct newhuff *h = ht+gr_info->table_select[i];
+			for(;lp;lp--,mc--)
+			{
+				register long x,y;
+				if( (!mc) )
+				{
+					mc    = *m++;
+					xrpnt = ((real *) xr) + (*m++);
+					lwin  = *m++;
+					cb    = *m++;
+					if(lwin == 3)
+					{
+#ifdef REAL_IS_FIXED
+						gainpow2_scale_idx = (int)(gr_info->pow2gain + (*scf << shift) - fr->gainpow2);
+#endif
+						v = gr_info->pow2gain[(*scf++) << shift];
+						step = 1;
+					}
+					else
+					{
+#ifdef REAL_IS_FIXED
+						gainpow2_scale_idx = (int)(gr_info->full_gain[lwin] + (*scf << shift) - fr->gainpow2);
+#endif
+						v = gr_info->full_gain[lwin][(*scf++) << shift];
+						step = 3;
+					}
+				}
+				{
+					const short *val = h->table;
+					REFRESH_MASK;
+#ifdef USE_NEW_HUFFTABLE
+					while((y=val[(unsigned long)mask>>(BITSHIFT+4)])<0)
+					{
+						val -= y;
+						num -= 4;
+						mask <<= 4;
+					}
+					num -= (y >> 8);
+					mask <<= (y >> 8);
+					x = (y >> 4) & 0xf;
+					y &= 0xf;
+#else
+					while((y=*val++)<0)
+					{
+						if (mask < 0) val -= y;
+
+						num--;
+						mask <<= 1;
+					}
+					x = y >> 4;
+					y &= 0xf;
+#endif
+				}
+				if(x == 15 && h->linbits)
+				{
+					max[lwin] = cb;
+					REFRESH_MASK;
+					x += ((unsigned long) mask) >> (BITSHIFT+8-h->linbits);
+					num -= h->linbits+1;
+					mask <<= h->linbits;
+					if(mask < 0) *xrpnt = REAL_MUL_SCALE_LAYER3(-ispow[x], v, gainpow2_scale_idx);
+					else         *xrpnt = REAL_MUL_SCALE_LAYER3( ispow[x], v, gainpow2_scale_idx);
+
+					mask <<= 1;
+				}
+				else if(x)
+				{
+					max[lwin] = cb;
+					if(mask < 0) *xrpnt = REAL_MUL_SCALE_LAYER3(-ispow[x], v, gainpow2_scale_idx);
+					else         *xrpnt = REAL_MUL_SCALE_LAYER3( ispow[x], v, gainpow2_scale_idx);
+
+					num--;
+					mask <<= 1;
+				}
+				else *xrpnt = DOUBLE_TO_REAL(0.0);
+
+				xrpnt += step;
+				if(y == 15 && h->linbits)
+				{
+					max[lwin] = cb;
+					REFRESH_MASK;
+					y += ((unsigned long) mask) >> (BITSHIFT+8-h->linbits);
+					num -= h->linbits+1;
+					mask <<= h->linbits;
+					if(mask < 0) *xrpnt = REAL_MUL_SCALE_LAYER3(-ispow[y], v, gainpow2_scale_idx);
+					else         *xrpnt = REAL_MUL_SCALE_LAYER3( ispow[y], v, gainpow2_scale_idx);
+
+					mask <<= 1;
+				}
+				else if(y)
+				{
+					max[lwin] = cb;
+					if(mask < 0) *xrpnt = REAL_MUL_SCALE_LAYER3(-ispow[y], v, gainpow2_scale_idx);
+					else         *xrpnt = REAL_MUL_SCALE_LAYER3( ispow[y], v, gainpow2_scale_idx);
+
+					num--;
+					mask <<= 1;
+				}
+				else *xrpnt = DOUBLE_TO_REAL(0.0);
+
+				xrpnt += step;
+			}
+		}
+
+		for(;l3 && (part2remain+num > 0);l3--)
+		{
+			const struct newhuff* h;
+			const short* val;
+			register short a;
+			/*
+				This is only a humble hack to prevent a special segfault.
+				More insight into the real workings is still needed.
+				Especially why there are (valid?) files that make xrpnt exceed the array with 4 bytes without segfaulting, more seems to be really bad, though.
+			*/
+			#ifdef DEBUG
+			if(!(xrpnt < &xr[SBLIMIT][0]))
+			{
+				if(VERBOSE) debug2("attempted soft xrpnt overflow (%p !< %p) ?", (void*) xrpnt, (void*) &xr[SBLIMIT][0]);
+			}
+			#endif
+			if(!(xrpnt < &xr[SBLIMIT][0]+5))
+			{
+				if(NOQUIET) error2("attempted xrpnt overflow (%p !< %p)", (void*) xrpnt, (void*) &xr[SBLIMIT][0]);
+				return 2;
+			}
+			h = htc+gr_info->count1table_select;
+			val = h->table;
+
+			REFRESH_MASK;
+			while((a=*val++)<0)
+			{
+				if(mask < 0) val -= a;
+
+				num--;
+				mask <<= 1;
+			}
+			if(part2remain+num <= 0)
+			{
+				num -= part2remain+num;
+				break;
+			}
+
+			for(i=0;i<4;i++)
+			{
+				if(!(i & 1))
+				{
+					if(!mc)
+					{
+						mc = *m++;
+						xrpnt = ((real *) xr) + (*m++);
+						lwin = *m++;
+						cb = *m++;
+						if(lwin == 3)
+						{
+#ifdef REAL_IS_FIXED
+							gainpow2_scale_idx = (int)(gr_info->pow2gain + (*scf << shift) - fr->gainpow2);
+#endif
+							v = gr_info->pow2gain[(*scf++) << shift];
+							step = 1;
+						}
+						else
+						{
+#ifdef REAL_IS_FIXED
+							gainpow2_scale_idx = (int)(gr_info->full_gain[lwin] + (*scf << shift) - fr->gainpow2);
+#endif
+							v = gr_info->full_gain[lwin][(*scf++) << shift];
+							step = 3;
+						}
+					}
+					mc--;
+				}
+				if( (a & (0x8>>i)) )
+				{
+					max[lwin] = cb;
+					if(part2remain+num <= 0)
+					break;
+
+					if(mask < 0) *xrpnt = -REAL_SCALE_LAYER3(v, gainpow2_scale_idx);
+					else         *xrpnt =  REAL_SCALE_LAYER3(v, gainpow2_scale_idx);
+
+					num--;
+					mask <<= 1;
+				}
+				else *xrpnt = DOUBLE_TO_REAL(0.0);
+
+				xrpnt += step;
+			}
+		}
+
+		if(lwin < 3)
+		{ /* short band? */
+			while(1)
+			{
+				for(;mc > 0;mc--)
+				{
+					*xrpnt = DOUBLE_TO_REAL(0.0); xrpnt += 3; /* short band -> step=3 */
+					*xrpnt = DOUBLE_TO_REAL(0.0); xrpnt += 3;
+				}
+				if(m >= me)
+				break;
+
+				mc    = *m++;
+				xrpnt = ((real *) xr) + *m++;
+				if(*m++ == 0)
+				break; /* optimize: field will be set to zero at the end of the function */
+
+				m++; /* cb */
+			}
+		}
+
+		gr_info->maxband[0] = max[0]+1;
+		gr_info->maxband[1] = max[1]+1;
+		gr_info->maxband[2] = max[2]+1;
+		gr_info->maxbandl   = max[3]+1;
+
+		{
+			int rmax = max[0] > max[1] ? max[0] : max[1];
+			rmax = (rmax > max[2] ? rmax : max[2]) + 1;
+			gr_info->maxb = rmax ? fr->shortLimit[sfreq][rmax] : fr->longLimit[sfreq][max[3]+1];
+		}
+
+	}
+	else
+	{
+		/* decoding with 'long' BandIndex table (block_type != 2) */
+		const unsigned char *pretab = pretab_choice[gr_info->preflag];
+		int i,max = -1;
+		int cb = 0;
+		int *m = map[sfreq][2];
+		register real v = 0.0;
+		int mc = 0;
+
+		/* long hash table values */
+		for(i=0;i<3;i++)
+		{
+			int lp = l[i];
+			const struct newhuff *h = ht+gr_info->table_select[i];
+
+			for(;lp;lp--,mc--)
+			{
+				long x,y;
+				if(!mc)
+				{
+					mc = *m++;
+					cb = *m++;
+#ifdef CUT_SFB21
+					if(cb == 21)
+						v = 0.0;
+					else
+#endif
+					{
+#ifdef REAL_IS_FIXED
+						gainpow2_scale_idx = (int)(gr_info->pow2gain + (*scf << shift) - fr->gainpow2);
+#endif
+						v = gr_info->pow2gain[(*(scf++) + (*pretab++)) << shift];
+					}
+				}
+				{
+					const short *val = h->table;
+					REFRESH_MASK;
+#ifdef USE_NEW_HUFFTABLE
+					while((y=val[(unsigned long)mask>>(BITSHIFT+4)])<0)
+					{
+						val -= y;
+						num -= 4;
+						mask <<= 4;
+					}
+					num -= (y >> 8);
+					mask <<= (y >> 8);
+					x = (y >> 4) & 0xf;
+					y &= 0xf;
+#else
+					while((y=*val++)<0)
+					{
+						if (mask < 0) val -= y;
+
+						num--;
+						mask <<= 1;
+					}
+					x = y >> 4;
+					y &= 0xf;
+#endif
+				}
+
+				if(x == 15 && h->linbits)
+				{
+					max = cb;
+					REFRESH_MASK;
+					x += ((unsigned long) mask) >> (BITSHIFT+8-h->linbits);
+					num -= h->linbits+1;
+					mask <<= h->linbits;
+					if(mask < 0) *xrpnt++ = REAL_MUL_SCALE_LAYER3(-ispow[x], v, gainpow2_scale_idx);
+					else         *xrpnt++ = REAL_MUL_SCALE_LAYER3( ispow[x], v, gainpow2_scale_idx);
+
+					mask <<= 1;
+				}
+				else if(x)
+				{
+					max = cb;
+					if(mask < 0) *xrpnt++ = REAL_MUL_SCALE_LAYER3(-ispow[x], v, gainpow2_scale_idx);
+					else         *xrpnt++ = REAL_MUL_SCALE_LAYER3( ispow[x], v, gainpow2_scale_idx);
+					num--;
+
+					mask <<= 1;
+				}
+				else *xrpnt++ = DOUBLE_TO_REAL(0.0);
+
+				if(y == 15 && h->linbits)
+				{
+					max = cb;
+					REFRESH_MASK;
+					y += ((unsigned long) mask) >> (BITSHIFT+8-h->linbits);
+					num -= h->linbits+1;
+					mask <<= h->linbits;
+					if(mask < 0) *xrpnt++ = REAL_MUL_SCALE_LAYER3(-ispow[y], v, gainpow2_scale_idx);
+					else         *xrpnt++ = REAL_MUL_SCALE_LAYER3( ispow[y], v, gainpow2_scale_idx);
+
+					mask <<= 1;
+				}
+				else if(y)
+				{
+					max = cb;
+					if(mask < 0) *xrpnt++ = REAL_MUL_SCALE_LAYER3(-ispow[y], v, gainpow2_scale_idx);
+					else         *xrpnt++ = REAL_MUL_SCALE_LAYER3( ispow[y], v, gainpow2_scale_idx);
+
+					num--;
+					mask <<= 1;
+				}
+				else *xrpnt++ = DOUBLE_TO_REAL(0.0);
+			}
+		}
+
+		/* short (count1table) values */
+		for(;l3 && (part2remain+num > 0);l3--)
+		{
+			const struct newhuff *h = htc+gr_info->count1table_select;
+			const short *val = h->table;
+			register short a;
+
+			REFRESH_MASK;
+			while((a=*val++)<0)
+			{
+				if (mask < 0) val -= a;
+
+				num--;
+				mask <<= 1;
+			}
+			if(part2remain+num <= 0)
+			{
+				num -= part2remain+num;
+				break;
+			}
+
+			for(i=0;i<4;i++)
+			{
+				if(!(i & 1))
+				{
+					if(!mc)
+					{
+						mc = *m++;
+						cb = *m++;
+#ifdef CUT_SFB21
+						if(cb == 21)
+							v = 0.0;
+						else
+#endif
+						{
+#ifdef REAL_IS_FIXED
+							gainpow2_scale_idx = (int)(gr_info->pow2gain + (*scf << shift) - fr->gainpow2);
+#endif
+							v = gr_info->pow2gain[((*scf++) + (*pretab++)) << shift];
+						}
+					}
+					mc--;
+				}
+				if( (a & (0x8>>i)) )
+				{
+					max = cb;
+					if(part2remain+num <= 0)
+					break;
+
+					if(mask < 0) *xrpnt++ = -REAL_SCALE_LAYER3(v, gainpow2_scale_idx);
+					else         *xrpnt++ =  REAL_SCALE_LAYER3(v, gainpow2_scale_idx);
+
+					num--;
+					mask <<= 1;
+				}
+				else *xrpnt++ = DOUBLE_TO_REAL(0.0);
+			}
+		}
+
+		gr_info->maxbandl = max+1;
+		gr_info->maxb = fr->longLimit[sfreq][gr_info->maxbandl];
+	}
+
+	part2remain += num;
+	backbits(fr, num);
+	num = 0;
+
+	while(xrpnt < &xr[SBLIMIT][0]) 
+	*xrpnt++ = DOUBLE_TO_REAL(0.0);
+
+	while( part2remain > 16 )
+	{
+		skipbits(fr, 16); /* Dismiss stuffing Bits */
+		part2remain -= 16;
+	}
+	if(part2remain > 0) skipbits(fr, part2remain);
+	else if(part2remain < 0)
+	{
+		debug1("Can't rewind stream by %d bits!",-part2remain);
+		return 1; /* -> error */
+	}
+	return 0;
+}
+
+
+/* calculate real channel values for Joint-I-Stereo-mode */
+static void III_i_stereo(real xr_buf[2][SBLIMIT][SSLIMIT],int *scalefac, struct gr_info_s *gr_info,int sfreq,int ms_stereo,int lsf)
+{
+	real (*xr)[SBLIMIT*SSLIMIT] = (real (*)[SBLIMIT*SSLIMIT] ) xr_buf;
+	const struct bandInfoStruct *bi = &bandInfo[sfreq];
+
+	const real *tab1,*tab2;
+
+#if 1
+	int tab;
+/* TODO: optimize as static */
+	const real *tabs[3][2][2] =
+	{ 
+		{ { tan1_1,tan2_1 }       , { tan1_2,tan2_2 } },
+		{ { pow1_1[0],pow2_1[0] } , { pow1_2[0],pow2_2[0] } },
+		{ { pow1_1[1],pow2_1[1] } , { pow1_2[1],pow2_2[1] } }
+	};
+
+	tab = lsf + (gr_info->scalefac_compress & lsf);
+	tab1 = tabs[tab][ms_stereo][0];
+	tab2 = tabs[tab][ms_stereo][1];
+#else
+	if(lsf)
+	{
+		int p = gr_info->scalefac_compress & 0x1;
+		if(ms_stereo)
+		{
+			tab1 = pow1_2[p];
+			tab2 = pow2_2[p];
+		}
+		else
+		{
+			tab1 = pow1_1[p];
+			tab2 = pow2_1[p];
+		}
+	}
+	else
+	{
+		if(ms_stereo)
+		{
+			tab1 = tan1_2;
+			tab2 = tan2_2;
+		}
+		else
+		{
+			tab1 = tan1_1;
+			tab2 = tan2_1;
+		}
+	}
+#endif
+
+	if(gr_info->block_type == 2)
+	{
+		int lwin,do_l = 0;
+		if( gr_info->mixed_block_flag ) do_l = 1;
+
+		for(lwin=0;lwin<3;lwin++)
+		{ /* process each window */
+			/* get first band with zero values */
+			int is_p,sb,idx,sfb = gr_info->maxband[lwin];  /* sfb is minimal 3 for mixed mode */
+			if(sfb > 3) do_l = 0;
+
+			for(;sfb<12;sfb++)
+			{
+				is_p = scalefac[sfb*3+lwin-gr_info->mixed_block_flag]; /* scale: 0-15 */ 
+				if(is_p != 7)
+				{
+					real t1,t2;
+					sb  = bi->shortDiff[sfb];
+					idx = bi->shortIdx[sfb] + lwin;
+					t1  = tab1[is_p]; t2 = tab2[is_p];
+					for (; sb > 0; sb--,idx+=3)
+					{
+						real v = xr[0][idx];
+						xr[0][idx] = REAL_MUL_15(v, t1);
+						xr[1][idx] = REAL_MUL_15(v, t2);
+					}
+				}
+			}
+
+#if 1
+/* in the original: copy 10 to 11 , here: copy 11 to 12 
+maybe still wrong??? (copy 12 to 13?) */
+			is_p = scalefac[11*3+lwin-gr_info->mixed_block_flag]; /* scale: 0-15 */
+			sb   = bi->shortDiff[12];
+			idx  = bi->shortIdx[12] + lwin;
+#else
+			is_p = scalefac[10*3+lwin-gr_info->mixed_block_flag]; /* scale: 0-15 */
+			sb   = bi->shortDiff[11];
+			idx  = bi->shortIdx[11] + lwin;
+#endif
+			if(is_p != 7)
+			{
+				real t1,t2;
+				t1 = tab1[is_p]; t2 = tab2[is_p];
+				for( ; sb > 0; sb--,idx+=3 )
+				{  
+					real v = xr[0][idx];
+					xr[0][idx] = REAL_MUL_15(v, t1);
+					xr[1][idx] = REAL_MUL_15(v, t2);
+				}
+			}
+		} /* end for(lwin; .. ; . ) */
+
+		/* also check l-part, if ALL bands in the three windows are 'empty' and mode = mixed_mode */
+		if(do_l)
+		{
+			int sfb = gr_info->maxbandl;
+			int idx;
+			if(sfb > 21) return; /* similarity fix related to CVE-2006-1655 */
+
+			idx = bi->longIdx[sfb];
+			for( ; sfb<8; sfb++ )
+			{
+				int sb = bi->longDiff[sfb];
+				int is_p = scalefac[sfb]; /* scale: 0-15 */
+				if(is_p != 7)
+				{
+					real t1,t2;
+					t1 = tab1[is_p]; t2 = tab2[is_p];
+					for( ; sb > 0; sb--,idx++)
+					{
+						real v = xr[0][idx];
+						xr[0][idx] = REAL_MUL_15(v, t1);
+						xr[1][idx] = REAL_MUL_15(v, t2);
+					}
+				}
+				else idx += sb;
+			}
+		}     
+	} 
+	else
+	{ /* ((gr_info->block_type != 2)) */
+		int sfb = gr_info->maxbandl;
+		int is_p,idx;
+		if(sfb > 21) return; /* tightened fix for CVE-2006-1655 */
+
+		idx = bi->longIdx[sfb];
+		for ( ; sfb<21; sfb++)
+		{
+			int sb = bi->longDiff[sfb];
+			is_p = scalefac[sfb]; /* scale: 0-15 */
+			if(is_p != 7)
+			{
+				real t1,t2;
+				t1 = tab1[is_p]; t2 = tab2[is_p];
+				for( ; sb > 0; sb--,idx++)
+				{
+					 real v = xr[0][idx];
+					 xr[0][idx] = REAL_MUL_15(v, t1);
+					 xr[1][idx] = REAL_MUL_15(v, t2);
+				}
+			}
+			else idx += sb;
+		}
+
+		is_p = scalefac[20];
+		if(is_p != 7)
+		{  /* copy l-band 20 to l-band 21 */
+			int sb;
+			real t1 = tab1[is_p],t2 = tab2[is_p]; 
+
+			for( sb = bi->longDiff[21]; sb > 0; sb--,idx++ )
+			{
+				real v = xr[0][idx];
+				xr[0][idx] = REAL_MUL_15(v, t1);
+				xr[1][idx] = REAL_MUL_15(v, t2);
+			}
+		}
+	}
+}
+
+
+static void III_antialias(real xr[SBLIMIT][SSLIMIT],struct gr_info_s *gr_info)
+{
+	int sblim;
+
+	if(gr_info->block_type == 2)
+	{
+			if(!gr_info->mixed_block_flag) return;
+
+			sblim = 1; 
+	}
+	else sblim = gr_info->maxb-1;
+
+	/* 31 alias-reduction operations between each pair of sub-bands */
+	/* with 8 butterflies between each pair                         */
+
+	{
+		int sb;
+		real *xr1=(real *) xr[1];
+
+		for(sb=sblim; sb; sb--,xr1+=10)
+		{
+			int ss;
+			real *cs=aa_cs,*ca=aa_ca;
+			real *xr2 = xr1;
+
+			for(ss=7;ss>=0;ss--)
+			{ /* upper and lower butterfly inputs */
+				register real bu = *--xr2,bd = *xr1;
+				*xr2   = REAL_MUL(bu, *cs) - REAL_MUL(bd, *ca);
+				*xr1++ = REAL_MUL(bd, *cs++) + REAL_MUL(bu, *ca++);
+			}
+		}
+	}
+}
+
+/* 
+	This is an optimized DCT from Jeff Tsay's maplay 1.2+ package.
+	Saved one multiplication by doing the 'twiddle factor' stuff
+	together with the window mul. (MH)
+
+	This uses Byeong Gi Lee's Fast Cosine Transform algorithm, but the
+	9 point IDCT needs to be reduced further. Unfortunately, I don't
+	know how to do that, because 9 is not an even number. - Jeff.
+
+	Original Message:
+
+	9 Point Inverse Discrete Cosine Transform
+
+	This piece of code is Copyright 1997 Mikko Tommila and is freely usable
+	by anybody. The algorithm itself is of course in the public domain.
+
+	Again derived heuristically from the 9-point WFTA.
+
+	The algorithm is optimized (?) for speed, not for small rounding errors or
+	good readability.
+
+	36 additions, 11 multiplications
+
+	Again this is very likely sub-optimal.
+
+	The code is optimized to use a minimum number of temporary variables,
+	so it should compile quite well even on 8-register Intel x86 processors.
+	This makes the code quite obfuscated and very difficult to understand.
+
+	References:
+	[1] S. Winograd: "On Computing the Discrete Fourier Transform",
+	    Mathematics of Computation, Volume 32, Number 141, January 1978,
+	    Pages 175-199
+*/
+
+/* Calculation of the inverse MDCT
+   used to be static without 3dnow - does that really matter? */
+void dct36(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf)
+{
+#ifdef NEW_DCT9
+	real tmp[18];
+#endif
+
+	{
+		register real *in = inbuf;
+
+		in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14];
+		in[14]+=in[13]; in[13]+=in[12]; in[12]+=in[11];
+		in[11]+=in[10]; in[10]+=in[9];  in[9] +=in[8];
+		in[8] +=in[7];  in[7] +=in[6];  in[6] +=in[5];
+		in[5] +=in[4];  in[4] +=in[3];  in[3] +=in[2];
+		in[2] +=in[1];  in[1] +=in[0];
+
+		in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9];
+		in[9] +=in[7];  in[7] +=in[5];  in[5] +=in[3];  in[3] +=in[1];
+
+
+#ifdef NEW_DCT9
+#if 1
+		{
+			real t3;
+			{
+				real t0, t1, t2;
+
+				t0 = REAL_MUL(COS6_2, (in[8] + in[16] - in[4]));
+				t1 = REAL_MUL(COS6_2, in[12]);
+
+				t3 = in[0];
+				t2 = t3 - t1 - t1;
+				tmp[1] = tmp[7] = t2 - t0;
+				tmp[4]          = t2 + t0 + t0;
+				t3 += t1;
+
+				t2 = REAL_MUL(COS6_1, (in[10] + in[14] - in[2]));
+				tmp[1] -= t2;
+				tmp[7] += t2;
+			}
+			{
+				real t0, t1, t2;
+
+				t0 = REAL_MUL(cos9[0], (in[4] + in[8] ));
+				t1 = REAL_MUL(cos9[1], (in[8] - in[16]));
+				t2 = REAL_MUL(cos9[2], (in[4] + in[16]));
+
+				tmp[2] = tmp[6] = t3 - t0      - t2;
+				tmp[0] = tmp[8] = t3 + t0 + t1;
+				tmp[3] = tmp[5] = t3      - t1 + t2;
+			}
+		}
+		{
+			real t1, t2, t3;
+
+			t1 = REAL_MUL(cos18[0], (in[2]  + in[10]));
+			t2 = REAL_MUL(cos18[1], (in[10] - in[14]));
+			t3 = REAL_MUL(COS6_1,    in[6]);
+
+			{
+				real t0 = t1 + t2 + t3;
+				tmp[0] += t0;
+				tmp[8] -= t0;
+			}
+
+			t2 -= t3;
+			t1 -= t3;
+
+			t3 = REAL_MUL(cos18[2], (in[2] + in[14]));
+
+			t1 += t3;
+			tmp[3] += t1;
+			tmp[5] -= t1;
+
+			t2 -= t3;
+			tmp[2] += t2;
+			tmp[6] -= t2;
+		}
+
+#else
+		{
+			real t0, t1, t2, t3, t4, t5, t6, t7;
+
+			t1 = REAL_MUL(COS6_2, in[12]);
+			t2 = REAL_MUL(COS6_2, (in[8] + in[16] - in[4]));
+
+			t3 = in[0] + t1;
+			t4 = in[0] - t1 - t1;
+			t5     = t4 - t2;
+			tmp[4] = t4 + t2 + t2;
+
+			t0 = REAL_MUL(cos9[0], (in[4] + in[8]));
+			t1 = REAL_MUL(cos9[1], (in[8] - in[16]));
+
+			t2 = REAL_MUL(cos9[2], (in[4] + in[16]));
+
+			t6 = t3 - t0 - t2;
+			t0 += t3 + t1;
+			t3 += t2 - t1;
+
+			t2 = REAL_MUL(cos18[0], (in[2]  + in[10]));
+			t4 = REAL_MUL(cos18[1], (in[10] - in[14]));
+			t7 = REAL_MUL(COS6_1, in[6]);
+
+			t1 = t2 + t4 + t7;
+			tmp[0] = t0 + t1;
+			tmp[8] = t0 - t1;
+			t1 = REAL_MUL(cos18[2], (in[2] + in[14]));
+			t2 += t1 - t7;
+
+			tmp[3] = t3 + t2;
+			t0 = REAL_MUL(COS6_1, (in[10] + in[14] - in[2]));
+			tmp[5] = t3 - t2;
+
+			t4 -= t1 + t7;
+
+			tmp[1] = t5 - t0;
+			tmp[7] = t5 + t0;
+			tmp[2] = t6 + t4;
+			tmp[6] = t6 - t4;
+		}
+#endif
+
+		{
+			real t0, t1, t2, t3, t4, t5, t6, t7;
+
+			t1 = REAL_MUL(COS6_2, in[13]);
+			t2 = REAL_MUL(COS6_2, (in[9] + in[17] - in[5]));
+
+			t3 = in[1] + t1;
+			t4 = in[1] - t1 - t1;
+			t5 = t4 - t2;
+
+			t0 = REAL_MUL(cos9[0], (in[5] + in[9]));
+			t1 = REAL_MUL(cos9[1], (in[9] - in[17]));
+
+			tmp[13] = REAL_MUL((t4 + t2 + t2), tfcos36[17-13]);
+			t2 = REAL_MUL(cos9[2], (in[5] + in[17]));
+
+			t6 = t3 - t0 - t2;
+			t0 += t3 + t1;
+			t3 += t2 - t1;
+
+			t2 = REAL_MUL(cos18[0], (in[3]  + in[11]));
+			t4 = REAL_MUL(cos18[1], (in[11] - in[15]));
+			t7 = REAL_MUL(COS6_1, in[7]);
+
+			t1 = t2 + t4 + t7;
+			tmp[17] = REAL_MUL((t0 + t1), tfcos36[17-17]);
+			tmp[9]  = REAL_MUL((t0 - t1), tfcos36[17-9]);
+			t1 = REAL_MUL(cos18[2], (in[3] + in[15]));
+			t2 += t1 - t7;
+
+			tmp[14] = REAL_MUL((t3 + t2), tfcos36[17-14]);
+			t0 = REAL_MUL(COS6_1, (in[11] + in[15] - in[3]));
+			tmp[12] = REAL_MUL((t3 - t2), tfcos36[17-12]);
+
+			t4 -= t1 + t7;
+
+			tmp[16] = REAL_MUL((t5 - t0), tfcos36[17-16]);
+			tmp[10] = REAL_MUL((t5 + t0), tfcos36[17-10]);
+			tmp[15] = REAL_MUL((t6 + t4), tfcos36[17-15]);
+			tmp[11] = REAL_MUL((t6 - t4), tfcos36[17-11]);
+		}
+
+#define MACRO(v) { \
+		real tmpval; \
+		tmpval = tmp[(v)] + tmp[17-(v)]; \
+		out2[9+(v)] = REAL_MUL(tmpval, w[27+(v)]); \
+		out2[8-(v)] = REAL_MUL(tmpval, w[26-(v)]); \
+		tmpval = tmp[(v)] - tmp[17-(v)]; \
+		ts[SBLIMIT*(8-(v))] = out1[8-(v)] + REAL_MUL(tmpval, w[8-(v)]); \
+		ts[SBLIMIT*(9+(v))] = out1[9+(v)] + REAL_MUL(tmpval, w[9+(v)]); }
+
+		{
+			register real *out2 = o2;
+			register real *w = wintab;
+			register real *out1 = o1;
+			register real *ts = tsbuf;
+
+			MACRO(0);
+			MACRO(1);
+			MACRO(2);
+			MACRO(3);
+			MACRO(4);
+			MACRO(5);
+			MACRO(6);
+			MACRO(7);
+			MACRO(8);
+		}
+
+#else
+
+		{
+
+#define MACRO0(v) { \
+	real tmp; \
+	out2[9+(v)] = REAL_MUL((tmp = sum0 + sum1), w[27+(v)]); \
+	out2[8-(v)] = REAL_MUL(tmp, w[26-(v)]);   } \
+	sum0 -= sum1; \
+	ts[SBLIMIT*(8-(v))] = out1[8-(v)] + REAL_MUL(sum0, w[8-(v)]); \
+	ts[SBLIMIT*(9+(v))] = out1[9+(v)] + REAL_MUL(sum0, w[9+(v)]);
+#define MACRO1(v) { \
+	real sum0,sum1; \
+	sum0 = tmp1a + tmp2a; \
+	sum1 = REAL_MUL((tmp1b + tmp2b), tfcos36[(v)]); \
+	MACRO0(v); }
+#define MACRO2(v) { \
+	real sum0,sum1; \
+	sum0 = tmp2a - tmp1a; \
+	sum1 = REAL_MUL((tmp2b - tmp1b), tfcos36[(v)]); \
+	MACRO0(v); }
+
+			register const real *c = COS9;
+			register real *out2 = o2;
+			register real *w = wintab;
+			register real *out1 = o1;
+			register real *ts = tsbuf;
+
+			real ta33,ta66,tb33,tb66;
+
+			ta33 = REAL_MUL(in[2*3+0], c[3]);
+			ta66 = REAL_MUL(in[2*6+0], c[6]);
+			tb33 = REAL_MUL(in[2*3+1], c[3]);
+			tb66 = REAL_MUL(in[2*6+1], c[6]);
+
+			{ 
+				real tmp1a,tmp2a,tmp1b,tmp2b;
+				tmp1a = REAL_MUL(in[2*1+0], c[1]) + ta33 + REAL_MUL(in[2*5+0], c[5]) + REAL_MUL(in[2*7+0], c[7]);
+				tmp1b = REAL_MUL(in[2*1+1], c[1]) + tb33 + REAL_MUL(in[2*5+1], c[5]) + REAL_MUL(in[2*7+1], c[7]);
+				tmp2a = REAL_MUL(in[2*2+0], c[2]) + REAL_MUL(in[2*4+0], c[4]) + ta66 + REAL_MUL(in[2*8+0], c[8]);
+				tmp2b = REAL_MUL(in[2*2+1], c[2]) + REAL_MUL(in[2*4+1], c[4]) + tb66 + REAL_MUL(in[2*8+1], c[8]);
+
+				MACRO1(0);
+				MACRO2(8);
+			}
+
+			{
+				real tmp1a,tmp2a,tmp1b,tmp2b;
+				tmp1a = REAL_MUL(( in[2*1+0] - in[2*5+0] - in[2*7+0] ), c[3]);
+				tmp1b = REAL_MUL(( in[2*1+1] - in[2*5+1] - in[2*7+1] ), c[3]);
+				tmp2a = REAL_MUL(( in[2*2+0] - in[2*4+0] - in[2*8+0] ), c[6]) - in[2*6+0] + in[2*0+0];
+				tmp2b = REAL_MUL(( in[2*2+1] - in[2*4+1] - in[2*8+1] ), c[6]) - in[2*6+1] + in[2*0+1];
+
+				MACRO1(1);
+				MACRO2(7);
+			}
+
+			{
+				real tmp1a,tmp2a,tmp1b,tmp2b;
+				tmp1a =   REAL_MUL(in[2*1+0], c[5]) - ta33 - REAL_MUL(in[2*5+0], c[7]) + REAL_MUL(in[2*7+0], c[1]);
+				tmp1b =   REAL_MUL(in[2*1+1], c[5]) - tb33 - REAL_MUL(in[2*5+1], c[7]) + REAL_MUL(in[2*7+1], c[1]);
+				tmp2a = - REAL_MUL(in[2*2+0], c[8]) - REAL_MUL(in[2*4+0], c[2]) + ta66 + REAL_MUL(in[2*8+0], c[4]);
+				tmp2b = - REAL_MUL(in[2*2+1], c[8]) - REAL_MUL(in[2*4+1], c[2]) + tb66 + REAL_MUL(in[2*8+1], c[4]);
+
+				MACRO1(2);
+				MACRO2(6);
+			}
+
+			{
+				real tmp1a,tmp2a,tmp1b,tmp2b;
+				tmp1a =   REAL_MUL(in[2*1+0], c[7]) - ta33 + REAL_MUL(in[2*5+0], c[1]) - REAL_MUL(in[2*7+0], c[5]);
+				tmp1b =   REAL_MUL(in[2*1+1], c[7]) - tb33 + REAL_MUL(in[2*5+1], c[1]) - REAL_MUL(in[2*7+1], c[5]);
+				tmp2a = - REAL_MUL(in[2*2+0], c[4]) + REAL_MUL(in[2*4+0], c[8]) + ta66 - REAL_MUL(in[2*8+0], c[2]);
+				tmp2b = - REAL_MUL(in[2*2+1], c[4]) + REAL_MUL(in[2*4+1], c[8]) + tb66 - REAL_MUL(in[2*8+1], c[2]);
+
+				MACRO1(3);
+				MACRO2(5);
+			}
+
+			{
+				real sum0,sum1;
+				sum0 =  in[2*0+0] - in[2*2+0] + in[2*4+0] - in[2*6+0] + in[2*8+0];
+				sum1 = REAL_MUL((in[2*0+1] - in[2*2+1] + in[2*4+1] - in[2*6+1] + in[2*8+1] ), tfcos36[4]);
+				MACRO0(4);
+			}
+		}
+#endif
+
+	}
+}
+
+
+/* new DCT12 */
+static void dct12(real *in,real *rawout1,real *rawout2,register real *wi,register real *ts)
+{
+#define DCT12_PART1 \
+	in5 = in[5*3];  \
+	in5 += (in4 = in[4*3]); \
+	in4 += (in3 = in[3*3]); \
+	in3 += (in2 = in[2*3]); \
+	in2 += (in1 = in[1*3]); \
+	in1 += (in0 = in[0*3]); \
+	\
+	in5 += in3; in3 += in1; \
+	\
+	in2 = REAL_MUL(in2, COS6_1); \
+	in3 = REAL_MUL(in3, COS6_1);
+
+#define DCT12_PART2 \
+	in0 += REAL_MUL(in4, COS6_2); \
+	\
+	in4 = in0 + in2; \
+	in0 -= in2;      \
+	\
+	in1 += REAL_MUL(in5, COS6_2); \
+	\
+	in5 = REAL_MUL((in1 + in3), tfcos12[0]); \
+	in1 = REAL_MUL((in1 - in3), tfcos12[2]); \
+	\
+	in3 = in4 + in5; \
+	in4 -= in5;      \
+	\
+	in2 = in0 + in1; \
+	in0 -= in1;
+
+	{
+		real in0,in1,in2,in3,in4,in5;
+		register real *out1 = rawout1;
+		ts[SBLIMIT*0] = out1[0]; ts[SBLIMIT*1] = out1[1]; ts[SBLIMIT*2] = out1[2];
+		ts[SBLIMIT*3] = out1[3]; ts[SBLIMIT*4] = out1[4]; ts[SBLIMIT*5] = out1[5];
+ 
+		DCT12_PART1
+
+		{
+			real tmp0,tmp1 = (in0 - in4);
+			{
+				real tmp2 = REAL_MUL((in1 - in5), tfcos12[1]);
+				tmp0 = tmp1 + tmp2;
+				tmp1 -= tmp2;
+			}
+			ts[(17-1)*SBLIMIT] = out1[17-1] + REAL_MUL(tmp0, wi[11-1]);
+			ts[(12+1)*SBLIMIT] = out1[12+1] + REAL_MUL(tmp0, wi[6+1]);
+			ts[(6 +1)*SBLIMIT] = out1[6 +1] + REAL_MUL(tmp1, wi[1]);
+			ts[(11-1)*SBLIMIT] = out1[11-1] + REAL_MUL(tmp1, wi[5-1]);
+		}
+
+		DCT12_PART2
+
+		ts[(17-0)*SBLIMIT] = out1[17-0] + REAL_MUL(in2, wi[11-0]);
+		ts[(12+0)*SBLIMIT] = out1[12+0] + REAL_MUL(in2, wi[6+0]);
+		ts[(12+2)*SBLIMIT] = out1[12+2] + REAL_MUL(in3, wi[6+2]);
+		ts[(17-2)*SBLIMIT] = out1[17-2] + REAL_MUL(in3, wi[11-2]);
+
+		ts[(6 +0)*SBLIMIT]  = out1[6+0] + REAL_MUL(in0, wi[0]);
+		ts[(11-0)*SBLIMIT] = out1[11-0] + REAL_MUL(in0, wi[5-0]);
+		ts[(6 +2)*SBLIMIT]  = out1[6+2] + REAL_MUL(in4, wi[2]);
+		ts[(11-2)*SBLIMIT] = out1[11-2] + REAL_MUL(in4, wi[5-2]);
+	}
+
+	in++;
+
+	{
+		real in0,in1,in2,in3,in4,in5;
+		register real *out2 = rawout2;
+ 
+		DCT12_PART1
+
+		{
+			real tmp0,tmp1 = (in0 - in4);
+			{
+				real tmp2 = REAL_MUL((in1 - in5), tfcos12[1]);
+				tmp0 = tmp1 + tmp2;
+				tmp1 -= tmp2;
+			}
+			out2[5-1] = REAL_MUL(tmp0, wi[11-1]);
+			out2[0+1] = REAL_MUL(tmp0, wi[6+1]);
+			ts[(12+1)*SBLIMIT] += REAL_MUL(tmp1, wi[1]);
+			ts[(17-1)*SBLIMIT] += REAL_MUL(tmp1, wi[5-1]);
+		}
+
+		DCT12_PART2
+
+		out2[5-0] = REAL_MUL(in2, wi[11-0]);
+		out2[0+0] = REAL_MUL(in2, wi[6+0]);
+		out2[0+2] = REAL_MUL(in3, wi[6+2]);
+		out2[5-2] = REAL_MUL(in3, wi[11-2]);
+
+		ts[(12+0)*SBLIMIT] += REAL_MUL(in0, wi[0]);
+		ts[(17-0)*SBLIMIT] += REAL_MUL(in0, wi[5-0]);
+		ts[(12+2)*SBLIMIT] += REAL_MUL(in4, wi[2]);
+		ts[(17-2)*SBLIMIT] += REAL_MUL(in4, wi[5-2]);
+	}
+
+	in++; 
+
+	{
+		real in0,in1,in2,in3,in4,in5;
+		register real *out2 = rawout2;
+		out2[12]=out2[13]=out2[14]=out2[15]=out2[16]=out2[17]=0.0;
+
+		DCT12_PART1
+
+		{
+			real tmp0,tmp1 = (in0 - in4);
+			{
+				real tmp2 = REAL_MUL((in1 - in5), tfcos12[1]);
+				tmp0 = tmp1 + tmp2;
+				tmp1 -= tmp2;
+			}
+			out2[11-1] = REAL_MUL(tmp0, wi[11-1]);
+			out2[6 +1] = REAL_MUL(tmp0, wi[6+1]);
+			out2[0+1] += REAL_MUL(tmp1, wi[1]);
+			out2[5-1] += REAL_MUL(tmp1, wi[5-1]);
+		}
+
+		DCT12_PART2
+
+		out2[11-0] = REAL_MUL(in2, wi[11-0]);
+		out2[6 +0] = REAL_MUL(in2, wi[6+0]);
+		out2[6 +2] = REAL_MUL(in3, wi[6+2]);
+		out2[11-2] = REAL_MUL(in3, wi[11-2]);
+
+		out2[0+0] += REAL_MUL(in0, wi[0]);
+		out2[5-0] += REAL_MUL(in0, wi[5-0]);
+		out2[0+2] += REAL_MUL(in4, wi[2]);
+		out2[5-2] += REAL_MUL(in4, wi[5-2]);
+	}
+}
+
+
+static void III_hybrid(real fsIn[SBLIMIT][SSLIMIT], real tsOut[SSLIMIT][SBLIMIT], int ch,struct gr_info_s *gr_info, mpg123_handle *fr)
+{
+	real (*block)[2][SBLIMIT*SSLIMIT] = fr->hybrid_block;
+	int *blc = fr->hybrid_blc;
+
+	real *tspnt = (real *) tsOut;
+	real *rawout1,*rawout2;
+	int bt = 0;
+	size_t sb = 0;
+
+	{
+		int b = blc[ch];
+		rawout1=block[b][ch];
+		b=-b+1;
+		rawout2=block[b][ch];
+		blc[ch] = b;
+	}
+  
+	if(gr_info->mixed_block_flag)
+	{
+		sb = 2;
+		opt_dct36(fr)(fsIn[0],rawout1,rawout2,win[0],tspnt);
+		opt_dct36(fr)(fsIn[1],rawout1+18,rawout2+18,win1[0],tspnt+1);
+		rawout1 += 36; rawout2 += 36; tspnt += 2;
+	}
+ 
+	bt = gr_info->block_type;
+	if(bt == 2)
+	{
+		for(; sb<gr_info->maxb; sb+=2,tspnt+=2,rawout1+=36,rawout2+=36)
+		{
+			dct12(fsIn[sb]  ,rawout1   ,rawout2   ,win[2] ,tspnt);
+			dct12(fsIn[sb+1],rawout1+18,rawout2+18,win1[2],tspnt+1);
+		}
+	}
+	else
+	{
+		for(; sb<gr_info->maxb; sb+=2,tspnt+=2,rawout1+=36,rawout2+=36)
+		{
+			opt_dct36(fr)(fsIn[sb],rawout1,rawout2,win[bt],tspnt);
+			opt_dct36(fr)(fsIn[sb+1],rawout1+18,rawout2+18,win1[bt],tspnt+1);
+		}
+	}
+
+	for(;sb<SBLIMIT;sb++,tspnt++)
+	{
+		int i;
+		for(i=0;i<SSLIMIT;i++)
+		{
+			tspnt[i*SBLIMIT] = *rawout1++;
+			*rawout2++ = DOUBLE_TO_REAL(0.0);
+		}
+	}
+}
+
+
+/* And at the end... the main layer3 handler */
+int do_layer3(mpg123_handle *fr)
+{
+	int gr, ch, ss,clip=0;
+	int scalefacs[2][39]; /* max 39 for short[13][3] mode, mixed: 38, long: 22 */
+	struct III_sideinfo sideinfo;
+	int stereo = fr->stereo;
+	int single = fr->single;
+	int ms_stereo,i_stereo;
+	int sfreq = fr->sampling_frequency;
+	int stereo1,granules;
+
+	if(stereo == 1)
+	{ /* stream is mono */
+		stereo1 = 1;
+		single = SINGLE_LEFT;
+	}
+	else if(single != SINGLE_STEREO) /* stream is stereo, but force to mono */
+	stereo1 = 1;
+	else
+	stereo1 = 2;
+
+	if(fr->mode == MPG_MD_JOINT_STEREO)
+	{
+		ms_stereo = (fr->mode_ext & 0x2)>>1;
+		i_stereo  = fr->mode_ext & 0x1;
+	}
+	else ms_stereo = i_stereo = 0;
+
+	granules = fr->lsf ? 1 : 2;
+
+	/* quick hack to keep the music playing */
+	/* after having seen this nasty test file... */
+	if(III_get_side_info(fr, &sideinfo,stereo,ms_stereo,sfreq,single))
+	{
+		if(NOQUIET) error("bad frame - unable to get valid sideinfo");
+		return clip;
+	}
+
+	set_pointer(fr,sideinfo.main_data_begin);
+
+	for(gr=0;gr<granules;gr++)
+	{
+		/*  hybridIn[2][SBLIMIT][SSLIMIT] */
+		real (*hybridIn)[SBLIMIT][SSLIMIT] = fr->layer3.hybrid_in;
+		/*  hybridOut[2][SSLIMIT][SBLIMIT] */
+		real (*hybridOut)[SSLIMIT][SBLIMIT] = fr->layer3.hybrid_out;
+
+		{
+			struct gr_info_s *gr_info = &(sideinfo.ch[0].gr[gr]);
+			long part2bits;
+			if(fr->lsf)
+			part2bits = III_get_scale_factors_2(fr, scalefacs[0],gr_info,0);
+			else
+			part2bits = III_get_scale_factors_1(fr, scalefacs[0],gr_info,0,gr);
+
+			if(III_dequantize_sample(fr, hybridIn[0], scalefacs[0],gr_info,sfreq,part2bits))
+			{
+				if(VERBOSE2) error("dequantization failed!");
+				return clip;
+			}
+		}
+
+		if(stereo == 2)
+		{
+			struct gr_info_s *gr_info = &(sideinfo.ch[1].gr[gr]);
+			long part2bits;
+			if(fr->lsf) 
+			part2bits = III_get_scale_factors_2(fr, scalefacs[1],gr_info,i_stereo);
+			else
+			part2bits = III_get_scale_factors_1(fr, scalefacs[1],gr_info,1,gr);
+
+			if(III_dequantize_sample(fr, hybridIn[1],scalefacs[1],gr_info,sfreq,part2bits))
+			{
+				if(VERBOSE2) error("dequantization failed!");
+				return clip;
+			}
+
+			if(ms_stereo)
+			{
+				int i;
+				unsigned int maxb = sideinfo.ch[0].gr[gr].maxb;
+				if(sideinfo.ch[1].gr[gr].maxb > maxb) maxb = sideinfo.ch[1].gr[gr].maxb;
+
+				for(i=0;i<SSLIMIT*(int)maxb;i++)
+				{
+					real tmp0 = ((real *)hybridIn[0])[i];
+					real tmp1 = ((real *)hybridIn[1])[i];
+					((real *)hybridIn[0])[i] = tmp0 + tmp1;
+					((real *)hybridIn[1])[i] = tmp0 - tmp1;
+				}
+			}
+
+			if(i_stereo) III_i_stereo(hybridIn,scalefacs[1],gr_info,sfreq,ms_stereo,fr->lsf);
+
+			if(ms_stereo || i_stereo || (single == SINGLE_MIX) )
+			{
+				if(gr_info->maxb > sideinfo.ch[0].gr[gr].maxb) 
+				sideinfo.ch[0].gr[gr].maxb = gr_info->maxb;
+				else
+				gr_info->maxb = sideinfo.ch[0].gr[gr].maxb;
+			}
+
+			switch(single)
+			{
+				case SINGLE_MIX:
+				{
+					register int i;
+					register real *in0 = (real *) hybridIn[0],*in1 = (real *) hybridIn[1];
+					for(i=0;i<SSLIMIT*(int)gr_info->maxb;i++,in0++)
+					*in0 = (*in0 + *in1++); /* *0.5 done by pow-scale */ 
+				}
+				break;
+				case SINGLE_RIGHT:
+				{
+					register int i;
+					register real *in0 = (real *) hybridIn[0],*in1 = (real *) hybridIn[1];
+					for(i=0;i<SSLIMIT*(int)gr_info->maxb;i++)
+					*in0++ = *in1++;
+				}
+				break;
+			}
+		}
+
+		for(ch=0;ch<stereo1;ch++)
+		{
+			struct gr_info_s *gr_info = &(sideinfo.ch[ch].gr[gr]);
+			III_antialias(hybridIn[ch],gr_info);
+			III_hybrid(hybridIn[ch], hybridOut[ch], ch,gr_info, fr);
+		}
+
+#ifdef OPT_I486
+		if(single != SINGLE_STEREO || fr->af.encoding != MPG123_ENC_SIGNED_16 || fr->down_sample != 0)
+		{
+#endif
+		for(ss=0;ss<SSLIMIT;ss++)
+		{
+			if(single != SINGLE_STEREO)
+			clip += (fr->synth_mono)(hybridOut[0][ss], fr);
+			else
+			clip += (fr->synth_stereo)(hybridOut[0][ss], hybridOut[1][ss], fr);
+
+		}
+#ifdef OPT_I486
+		} else
+		{
+			/* Only stereo, 16 bits benefit from the 486 optimization. */
+			ss=0;
+			while(ss < SSLIMIT)
+			{
+				int n;
+				n=(fr->buffer.size - fr->buffer.fill) / (2*2*32);
+				if(n > (SSLIMIT-ss)) n=SSLIMIT-ss;
+
+				/* Clip counting makes no sense with this function. */
+				absynth_1to1_i486(hybridOut[0][ss], 0, fr, n);
+				absynth_1to1_i486(hybridOut[1][ss], 1, fr, n);
+				ss+=n;
+				fr->buffer.fill+=(2*2*32)*n;
+			}
+		}
+#endif
+	}
+  
+	return clip;
+}
Index: include/reactos/libs/libmpg123/lfs_alias.c
===================================================================
--- include/reactos/libs/libmpg123/lfs_alias.c	(revision 0)
+++ include/reactos/libs/libmpg123/lfs_alias.c	(working copy)
@@ -0,0 +1,222 @@
+/*
+	lfs_alias: Aliases to the small/native API functions with the size of long int as suffix.
+
+	copyright 2010-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Thomas Orgis
+
+	Use case: Client code on Linux/x86-64 that defines _FILE_OFFSET_BITS to 64,
+	which is the only choice on that platform anyway. It should be no-op, but
+	prompts the platform-agnostic header of mpg123 to define API calls with the
+	corresponding suffix. This file provides the names for this case. It's cruft,
+	but glibc does it, too -- so people rely on it.
+	Oh, and it also caters for the lunatics that define _FILE_OFFSET_BITS=32 on
+	32 bit platforms. In addition, it's needed for platforms that always have
+	off_t /= long, and clients still insisting on defining _FILE_OFFSET_BITS.
+
+	Depending on use case, the aliases map to 32 (small) or 64 bit (large) offset
+	functions, to the ones from libmpg123 or the ones from lfs_wrap.
+	
+	So, two basic cases:
+	1. mpg123_bla_32 alias for mpg123_bla (native)
+	2. mpg123_bla    alias for mpg123_bla_32 (wrapper)
+	Same for 64 bits. Confusing, I know. It sucks.
+
+	Note that the mpg123 header is _not_ used here to avoid definition with whacky off_t.
+	The aliases are always about arguments of native alias_t type. This can be off_t, but
+	on Linux/x86, this is long int. The off_t declarations in mpg123.h confuse things,
+	so reproduce definitions for the wrapper functions in that case. The definitions are
+	pulled by an inline Perl script in any case ... no need to copy anything manually!
+	As a benefit, one can skip undefining possible largefile namings.
+*/
+
+#include "config.h"
+
+/* Hack for Solaris: Some system headers included from compat.h might force _FILE_OFFSET_BITS. Need to follow that here.
+   Also, want it around to have types defined. */
+#include "compat.h"
+
+#ifndef LFS_ALIAS_BITS
+#error "I need the count of alias bits here."
+#endif
+
+#define MACROCAT_REALLY(a, b) a ## b
+#define MACROCAT(a, b) MACROCAT_REALLY(a, b)
+
+/* This is wicked switchery: Decide which way the aliases are facing. */
+
+#if _FILE_OFFSET_BITS+0 == LFS_ALIAS_BITS
+
+/* The native functions have suffix, the aliases not. */
+#define NATIVE_SUFFIX MACROCAT(_, _FILE_OFFSET_BITS)
+#define NATIVE_NAME(func) MACROCAT(func, NATIVE_SUFFIX)
+#define ALIAS_NAME(func) func
+
+#else
+
+/* The alias functions have suffix, the native ones not. */
+#define ALIAS_SUFFIX MACROCAT(_, LFS_ALIAS_BITS)
+#define ALIAS_NAME(func) MACROCAT(func, ALIAS_SUFFIX)
+#define NATIVE_NAME(func) func
+
+#endif
+
+/* Copy of necessary definitions, actually just forward declarations. */
+struct mpg123_handle_struct;
+typedef struct mpg123_handle_struct mpg123_handle;
+
+
+/* Get attribute_align_arg, to stay safe. */
+#include "abi_align.h"
+
+/*
+	Extract the list of functions we need wrappers for, pregenerating the wrappers for simple cases (inline script for nedit):
+perl -ne '
+if(/^\s*EXPORT\s+(\S+)\s+(mpg123_\S+)\((.*)\);\s*$/)
+{
+	my $type = $1;
+	my $name = $2;
+	my $args = $3;
+	next unless ($type =~ /off_t/ or $args =~ /off_t/ or ($name =~ /open/ and $name ne mpg123_open_feed));
+	$type =~ s/off_t/lfs_alias_t/g;
+	my @nargs = ();
+	$args =~ s/off_t/lfs_alias_t/g;
+	foreach my $a (split(/,/, $args))
+	{
+		$a =~ s/^.*\s\**([a-z_]+)$/$1/;
+		push(@nargs, $a);
+	}
+	my $nargs = join(", ", @nargs);
+	$nargs = "Human: figure me out." if($nargs =~ /\(/);
+	print <<EOT
+
+$type NATIVE_NAME($name)($args);
+$type attribute_align_arg ALIAS_NAME($name)($args)
+{
+	return NATIVE_NAME($name)($nargs);
+}
+EOT
+
+}' < mpg123.h.in
+*/
+
+int NATIVE_NAME(mpg123_open)(mpg123_handle *mh, const char *path);
+int attribute_align_arg ALIAS_NAME(mpg123_open)(mpg123_handle *mh, const char *path)
+{
+	return NATIVE_NAME(mpg123_open)(mh, path);
+}
+
+int NATIVE_NAME(mpg123_open_fd)(mpg123_handle *mh, int fd);
+int attribute_align_arg ALIAS_NAME(mpg123_open_fd)(mpg123_handle *mh, int fd)
+{
+	return NATIVE_NAME(mpg123_open_fd)(mh, fd);
+}
+
+int NATIVE_NAME(mpg123_open_handle)(mpg123_handle *mh, void *iohandle);
+int attribute_align_arg ALIAS_NAME(mpg123_open_handle)(mpg123_handle *mh, void *iohandle)
+{
+	return NATIVE_NAME(mpg123_open_handle)(mh, iohandle);
+}
+
+int NATIVE_NAME(mpg123_decode_frame)(mpg123_handle *mh, lfs_alias_t *num, unsigned char **audio, size_t *bytes);
+int attribute_align_arg ALIAS_NAME(mpg123_decode_frame)(mpg123_handle *mh, lfs_alias_t *num, unsigned char **audio, size_t *bytes)
+{
+	return NATIVE_NAME(mpg123_decode_frame)(mh, num, audio, bytes);
+}
+
+int NATIVE_NAME(mpg123_framebyframe_decode)(mpg123_handle *mh, lfs_alias_t *num, unsigned char **audio, size_t *bytes);
+int attribute_align_arg ALIAS_NAME(mpg123_framebyframe_decode)(mpg123_handle *mh, lfs_alias_t *num, unsigned char **audio, size_t *bytes)
+{
+	return NATIVE_NAME(mpg123_framebyframe_decode)(mh, num, audio, bytes);
+}
+
+lfs_alias_t NATIVE_NAME(mpg123_framepos)(mpg123_handle *mh);
+lfs_alias_t attribute_align_arg ALIAS_NAME(mpg123_framepos)(mpg123_handle *mh)
+{
+	return NATIVE_NAME(mpg123_framepos)(mh);
+}
+
+lfs_alias_t NATIVE_NAME(mpg123_tell)(mpg123_handle *mh);
+lfs_alias_t attribute_align_arg ALIAS_NAME(mpg123_tell)(mpg123_handle *mh)
+{
+	return NATIVE_NAME(mpg123_tell)(mh);
+}
+
+lfs_alias_t NATIVE_NAME(mpg123_tellframe)(mpg123_handle *mh);
+lfs_alias_t attribute_align_arg ALIAS_NAME(mpg123_tellframe)(mpg123_handle *mh)
+{
+	return NATIVE_NAME(mpg123_tellframe)(mh);
+}
+
+lfs_alias_t NATIVE_NAME(mpg123_tell_stream)(mpg123_handle *mh);
+lfs_alias_t attribute_align_arg ALIAS_NAME(mpg123_tell_stream)(mpg123_handle *mh)
+{
+	return NATIVE_NAME(mpg123_tell_stream)(mh);
+}
+
+lfs_alias_t NATIVE_NAME(mpg123_seek)(mpg123_handle *mh, lfs_alias_t sampleoff, int whence);
+lfs_alias_t attribute_align_arg ALIAS_NAME(mpg123_seek)(mpg123_handle *mh, lfs_alias_t sampleoff, int whence)
+{
+	return NATIVE_NAME(mpg123_seek)(mh, sampleoff, whence);
+}
+
+lfs_alias_t NATIVE_NAME(mpg123_feedseek)(mpg123_handle *mh, lfs_alias_t sampleoff, int whence, lfs_alias_t *input_offset);
+lfs_alias_t attribute_align_arg ALIAS_NAME(mpg123_feedseek)(mpg123_handle *mh, lfs_alias_t sampleoff, int whence, lfs_alias_t *input_offset)
+{
+	return NATIVE_NAME(mpg123_feedseek)(mh, sampleoff, whence, input_offset);
+}
+
+lfs_alias_t NATIVE_NAME(mpg123_seek_frame)(mpg123_handle *mh, lfs_alias_t frameoff, int whence);
+lfs_alias_t attribute_align_arg ALIAS_NAME(mpg123_seek_frame)(mpg123_handle *mh, lfs_alias_t frameoff, int whence)
+{
+	return NATIVE_NAME(mpg123_seek_frame)(mh, frameoff, whence);
+}
+
+lfs_alias_t NATIVE_NAME(mpg123_timeframe)(mpg123_handle *mh, double sec);
+lfs_alias_t attribute_align_arg ALIAS_NAME(mpg123_timeframe)(mpg123_handle *mh, double sec)
+{
+	return NATIVE_NAME(mpg123_timeframe)(mh, sec);
+}
+
+int NATIVE_NAME(mpg123_index)(mpg123_handle *mh, lfs_alias_t **offsets, lfs_alias_t *step, size_t *fill);
+int attribute_align_arg ALIAS_NAME(mpg123_index)(mpg123_handle *mh, lfs_alias_t **offsets, lfs_alias_t *step, size_t *fill)
+{
+	return NATIVE_NAME(mpg123_index)(mh, offsets, step, fill);
+}
+
+int NATIVE_NAME(mpg123_set_index)(mpg123_handle *mh, lfs_alias_t *offsets, lfs_alias_t step, size_t fill);
+int attribute_align_arg ALIAS_NAME(mpg123_set_index)(mpg123_handle *mh, lfs_alias_t *offsets, lfs_alias_t step, size_t fill)
+{
+	return NATIVE_NAME(mpg123_set_index)(mh, offsets, step, fill);
+}
+
+int NATIVE_NAME(mpg123_position)( mpg123_handle *mh, lfs_alias_t frame_offset, lfs_alias_t buffered_bytes, lfs_alias_t *current_frame, lfs_alias_t *frames_left, double *current_seconds, double *seconds_left);
+int attribute_align_arg ALIAS_NAME(mpg123_position)( mpg123_handle *mh, lfs_alias_t frame_offset, lfs_alias_t buffered_bytes, lfs_alias_t *current_frame, lfs_alias_t *frames_left, double *current_seconds, double *seconds_left)
+{
+	return NATIVE_NAME(mpg123_position)(mh, frame_offset, buffered_bytes, current_frame, frames_left, current_seconds, seconds_left);
+}
+
+lfs_alias_t NATIVE_NAME(mpg123_length)(mpg123_handle *mh);
+lfs_alias_t attribute_align_arg ALIAS_NAME(mpg123_length)(mpg123_handle *mh)
+{
+	return NATIVE_NAME(mpg123_length)(mh);
+}
+
+int NATIVE_NAME(mpg123_set_filesize)(mpg123_handle *mh, lfs_alias_t size);
+int attribute_align_arg ALIAS_NAME(mpg123_set_filesize)(mpg123_handle *mh, lfs_alias_t size)
+{
+	return NATIVE_NAME(mpg123_set_filesize)(mh, size);
+}
+
+int NATIVE_NAME(mpg123_replace_reader)(mpg123_handle *mh, ssize_t (*r_read) (int, void *, size_t), lfs_alias_t (*r_lseek)(int, lfs_alias_t, int));
+int attribute_align_arg ALIAS_NAME(mpg123_replace_reader)(mpg123_handle *mh, ssize_t (*r_read) (int, void *, size_t), lfs_alias_t (*r_lseek)(int, lfs_alias_t, int))
+{
+	return NATIVE_NAME(mpg123_replace_reader)(mh, r_read, r_lseek);
+}
+
+int NATIVE_NAME(mpg123_replace_reader_handle)(mpg123_handle *mh, ssize_t (*r_read) (void *, void *, size_t), lfs_alias_t (*r_lseek)(void *, lfs_alias_t, int), void (*cleanup)(void*));
+int attribute_align_arg ALIAS_NAME(mpg123_replace_reader_handle)(mpg123_handle *mh, ssize_t (*r_read) (void *, void *, size_t), lfs_alias_t (*r_lseek)(void *, lfs_alias_t, int), void (*cleanup)(void*))
+{
+	return NATIVE_NAME(mpg123_replace_reader_handle)(mh, r_read, r_lseek, cleanup);
+}
+
Index: include/reactos/libs/libmpg123/lfs_wrap.c
===================================================================
--- include/reactos/libs/libmpg123/lfs_wrap.c	(revision 0)
+++ include/reactos/libs/libmpg123/lfs_wrap.c	(working copy)
@@ -0,0 +1,751 @@
+/*
+	lfs_wrap: Crappy wrapper code for supporting crappy ambiguous large file support.
+
+	copyright 2010 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+
+	initially written by Thomas Orgis, thanks to Guido Draheim for consulting
+
+	This file contains wrappers for the case that _FILE_OFFSET_BITS (or equivalent, theoretically, depends on mpg123.h) is defined and thus certain mpg123 API calls get renamed with a suffix (p.ex. _64).
+	The renamed calls expect large off_t arguments, and possibly return large off_t values... these wrappers here provide the same functionality with long integer arguments/values.
+
+	Prototypical idea: There is
+		off_t mpg123_seek_64(mpg123_handle*, off_t, int)
+	This code provides
+		long mpg123_seek(mpg123_handle*, long, int)
+
+	This is rather simple business... wouldn't mpg123 offer replacing the I/O core with callbacks. Translating the callbacks between long and off_t world is the main reason why this file contains non-trivial code.
+
+	Note about file descriptors: We just assume that they are generally interchangeable between large and small file code... and that a large file descriptor will trigger errors when accessed with small file code where it may cause trouble (a really large file).
+*/
+
+/* It mainly needs the official API ... */
+/* ... but also some inside access (frame struct, readers). */
+#include "mpg123lib_intern.h"
+/* Include the system headers _after_ the implied config.h!
+   Otherwise _FILE_OFFSET_BITS is not in effect! */
+#include <errno.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "compat.h"
+#include "debug.h"
+
+/*
+	Now, start off easy... translate simple API calls.
+	I need to deal with these here:
+perl -ne '
+if(/^\s*EXPORT\s+(\S+)\s+(mpg123_\S+)\((.*)\);\s*$/)
+{
+	$type = $1;
+	$name = $2;
+	$args = $3;
+	next unless ($type =~ /off_t/ or $args =~ /off_t/);
+	print "$name\n" unless grep {$_ eq $name} 
+		("mpg123_open", "mpg123_open_fd", "mpg123_open_handle", "mpg123_replace_reader", "mpg123_replace_reader_handle");
+}' < mpg123.h.in
+
+mpg123_decode_frame
+mpg123_framebyframe_decode
+mpg123_framepos
+mpg123_tell
+mpg123_tellframe
+mpg123_tell_stream
+mpg123_seek
+mpg123_feedseek
+mpg123_seek_frame
+mpg123_timeframe
+mpg123_index
+mpg123_set_index
+mpg123_position
+mpg123_length
+mpg123_set_filesize
+mpg123_decode_raw  ... that's experimental.
+
+Let's work on them in that order.
+*/
+
+/* I see that I will need custom data storage. Main use is for the replaced I/O later, but the seek table for small file offsets needs extra storage, too. */
+
+/* The wrapper handle for descriptor and handle I/O. */
+
+/* The handle is used for nothing (0), or one of these two modes of operation: */
+#define IO_FD 1 /* Wrapping over callbacks operation on integer file descriptor. */
+#define IO_HANDLE 2 /* Wrapping over custom handle callbacks. */
+
+struct wrap_data
+{
+	/* Storage for small offset index table. */
+	long *indextable;
+	/* I/O handle stuff */
+	int iotype; /* IO_FD or IO_HANDLE */
+	/* Data for IO_FD. */
+	int fd;
+	int my_fd; /* A descriptor that the wrapper code opened itself. */
+	/* The actual callbacks from the outside. */
+	ssize_t (*r_read) (int, void *, size_t);
+	long (*r_lseek)(int, long, int);
+	/* Data for IO_HANDLE. */
+	void* handle;
+	ssize_t (*r_h_read)(void *, void *, size_t);
+	long (*r_h_lseek)(void*, long, int);
+	void (*h_cleanup)(void*);
+};
+
+
+/* Cleanup I/O part of the handle handle... but not deleting the wrapper handle itself.
+   That is stored in the frame and only deleted on mpg123_delete(). */
+static void wrap_io_cleanup(void *handle)
+{
+	struct wrap_data *ioh = handle;
+	if(ioh->iotype == IO_HANDLE)
+	{
+		if(ioh->h_cleanup != NULL && ioh->handle != NULL)
+		ioh->h_cleanup(ioh->handle);
+
+		ioh->handle = NULL;
+	}
+	if(ioh->my_fd >= 0)
+	{
+		close(ioh->my_fd);
+		ioh->my_fd = -1;
+	}
+}
+
+/* Really finish off the handle... freeing all memory. */
+static void wrap_destroy(void *handle)
+{
+	struct wrap_data *wh = handle;
+	wrap_io_cleanup(handle);
+	if(wh->indextable != NULL)
+	free(wh->indextable);
+
+	free(wh);
+}
+
+/* More helper code... extract the special wrapper handle, possible allocate and initialize it. */
+static struct wrap_data* wrap_get(mpg123_handle *mh)
+{
+	struct wrap_data* whd;
+	if(mh == NULL) return NULL;
+
+	/* Access the private storage inside the mpg123 handle.
+	   The real callback functions and handles are stored there. */
+	if(mh->wrapperdata == NULL)
+	{
+		/* Create a new one. */
+		mh->wrapperdata = malloc(sizeof(struct wrap_data));
+		if(mh->wrapperdata == NULL)
+		{
+			mh->err = MPG123_OUT_OF_MEM;
+			return NULL;
+		}
+	/* When we have wrapper data present, the callback for its proper cleanup is needed. */
+		mh->wrapperclean = wrap_destroy;
+
+		whd = mh->wrapperdata;
+		whd->indextable = NULL;
+		whd->iotype = 0;
+		whd->fd = -1;
+		whd->my_fd = -1;
+		whd->r_read = NULL;
+		whd->r_lseek = NULL;
+		whd->handle = NULL;
+		whd->r_h_read = NULL;
+		whd->r_h_lseek = NULL;
+		whd->h_cleanup = NULL;
+	}
+	else whd = mh->wrapperdata;
+
+	return whd;
+}
+
+/* After settling the data... start with some simple wrappers. */
+
+#undef mpg123_decode_frame
+/* int mpg123_decode_frame(mpg123_handle *mh, off_t *num, unsigned char **audio, size_t *bytes) */
+int attribute_align_arg mpg123_decode_frame(mpg123_handle *mh, long *num, unsigned char **audio, size_t *bytes)
+{
+	off_t largenum;
+	int err;
+
+	err = MPG123_LARGENAME(mpg123_decode_frame)(mh, &largenum, audio, bytes);
+	if(err == MPG123_OK && num != NULL)
+	{
+		*num = largenum;
+		if(*num != largenum)
+		{
+			mh->err = MPG123_LFS_OVERFLOW;
+			err = MPG123_ERR;
+		}
+	}
+	return err;
+}
+
+#undef mpg123_framebyframe_decode
+/* int mpg123_framebyframe_decode(mpg123_handle *mh, off_t *num, unsigned char **audio, size_t *bytes); */
+int attribute_align_arg mpg123_framebyframe_decode(mpg123_handle *mh, long *num, unsigned char **audio, size_t *bytes)
+{
+	off_t largenum;
+	int err;
+
+	err = MPG123_LARGENAME(mpg123_framebyframe_decode)(mh, &largenum, audio, bytes);
+	if(err == MPG123_OK && num != NULL)
+	{
+		*num = largenum;
+		if(*num != largenum)
+		{
+			mh->err = MPG123_LFS_OVERFLOW;
+			err = MPG123_ERR;
+		}
+	}
+	return err;
+}
+
+#undef mpg123_framepos
+/* off_t mpg123_framepos(mpg123_handle *mh); */
+long attribute_align_arg mpg123_framepos(mpg123_handle *mh)
+{
+	long val;
+	off_t largeval;
+
+	largeval = MPG123_LARGENAME(mpg123_framepos)(mh);
+	val = largeval;
+	if(val != largeval)
+	{
+		mh->err = MPG123_LFS_OVERFLOW;
+		return MPG123_ERR;
+	}
+	return val;
+}
+
+#undef mpg123_tell
+/* off_t mpg123_tell(mpg123_handle *mh); */
+long attribute_align_arg mpg123_tell(mpg123_handle *mh)
+{
+	long val;
+	off_t largeval;
+
+	largeval = MPG123_LARGENAME(mpg123_tell)(mh);
+	val = largeval;
+	if(val != largeval)
+	{
+		mh->err = MPG123_LFS_OVERFLOW;
+		return MPG123_ERR;
+	}
+	return val;
+}
+
+#undef mpg123_tellframe
+/* off_t mpg123_tellframe(mpg123_handle *mh); */
+long attribute_align_arg mpg123_tellframe(mpg123_handle *mh)
+{
+	long val;
+	off_t largeval;
+
+	largeval = MPG123_LARGENAME(mpg123_tellframe)(mh);
+	val = largeval;
+	if(val != largeval)
+	{
+		mh->err = MPG123_LFS_OVERFLOW;
+		return MPG123_ERR;
+	}
+	return val;
+}
+
+#undef mpg123_tell_stream
+/* off_t mpg123_tell_stream(mpg123_handle *mh); */
+long attribute_align_arg mpg123_tell_stream(mpg123_handle *mh)
+{
+	long val;
+	off_t largeval;
+
+	largeval = MPG123_LARGENAME(mpg123_tell_stream)(mh);
+	val = largeval;
+	if(val != largeval)
+	{
+		mh->err = MPG123_LFS_OVERFLOW;
+		return MPG123_ERR;
+	}
+	return val;
+}
+
+#undef mpg123_seek
+/* off_t mpg123_seek(mpg123_handle *mh, off_t sampleoff, int whence); */
+long attribute_align_arg mpg123_seek(mpg123_handle *mh, long sampleoff, int whence)
+{
+	long val;
+	off_t largeval;
+
+	largeval = MPG123_LARGENAME(mpg123_seek)(mh, sampleoff, whence);
+	val = largeval;
+	if(val != largeval)
+	{
+		mh->err = MPG123_LFS_OVERFLOW;
+		return MPG123_ERR;
+	}
+	return val;
+}
+
+#undef mpg123_feedseek
+/* off_t mpg123_feedseek(mpg123_handle *mh, off_t sampleoff, int whence, off_t *input_offset); */
+long attribute_align_arg mpg123_feedseek(mpg123_handle *mh, long sampleoff, int whence, long *input_offset)
+{
+	long val;
+	off_t largeioff;
+	off_t largeval;
+
+	largeval = MPG123_LARGENAME(mpg123_feedseek)(mh, sampleoff, whence, &largeioff);
+	/* Error/message codes are small... */
+	if(largeval < 0) return (long)largeval;
+
+	val = largeval;
+	*input_offset = largeioff;
+	if(val != largeval || *input_offset != largeioff)
+	{
+		mh->err = MPG123_LFS_OVERFLOW;
+		return MPG123_ERR;
+	}
+	return val;
+}
+
+#undef mpg123_seek_frame
+/* off_t mpg123_seek_frame(mpg123_handle *mh, off_t frameoff, int whence); */
+long attribute_align_arg mpg123_seek_frame(mpg123_handle *mh, long frameoff, int whence)
+{
+	long val;
+	off_t largeval;
+
+	largeval = MPG123_LARGENAME(mpg123_seek_frame)(mh, frameoff, whence);
+	val = largeval;
+	if(val != largeval)
+	{
+		mh->err = MPG123_LFS_OVERFLOW;
+		return MPG123_ERR;
+	}
+	return val;
+}
+
+#undef mpg123_timeframe
+/* off_t mpg123_timeframe(mpg123_handle *mh, double sec); */
+long attribute_align_arg mpg123_timeframe(mpg123_handle *mh, double sec)
+{
+	long val;
+	off_t largeval;
+
+	largeval = MPG123_LARGENAME(mpg123_timeframe)(mh, sec);
+	val = largeval;
+	if(val != largeval)
+	{
+		mh->err = MPG123_LFS_OVERFLOW;
+		return MPG123_ERR;
+	}
+	return val;
+}
+
+/* Now something less simple: Index retrieval and manipulation.
+   The index is an _array_ of off_t, which means that I need to construct a copy with translated long values. */
+#undef mpg123_index
+/* int mpg123_index(mpg123_handle *mh, off_t **offsets, off_t *step, size_t *fill) */
+int attribute_align_arg mpg123_index(mpg123_handle *mh, long **offsets, long *step, size_t *fill)
+{
+	int err;
+	size_t i;
+	long smallstep;
+	size_t thefill;
+	off_t largestep;
+	off_t *largeoffsets;
+	struct wrap_data *whd;
+
+	whd = wrap_get(mh);
+	if(whd == NULL) return MPG123_ERR;
+
+	err = MPG123_LARGENAME(mpg123_index)(mh, &largeoffsets, &largestep, &thefill);
+	if(err != MPG123_OK) return err;
+
+	/* For a _very_ large file, even the step could overflow. */
+	smallstep = largestep;
+	if(smallstep != largestep)
+	{
+		mh->err = MPG123_LFS_OVERFLOW;
+		return MPG123_ERR;
+	}
+	if(step != NULL) *step = smallstep;
+
+	/* When there are no values stored, there is no table content to take care of.
+	   Table pointer does not matter. Mission completed. */
+	if(thefill == 0) return MPG123_OK;
+
+	if(fill != NULL) *fill = thefill;
+
+	/* Construct a copy of the index to hand over to the small-minded client. */
+	*offsets = safe_realloc(whd->indextable, (*fill)*sizeof(long));
+	if(*offsets == NULL)
+	{
+		mh->err = MPG123_OUT_OF_MEM;
+		return MPG123_ERR;
+	}
+	whd->indextable = *offsets;
+	/* Elaborate conversion of each index value, with overflow check. */
+	for(i=0; i<*fill; ++i)
+	{
+		whd->indextable[i] = largeoffsets[i];
+		if(whd->indextable[i] != largeoffsets[i])
+		{
+			mh->err = MPG123_LFS_OVERFLOW;
+			return MPG123_ERR;
+		}
+	}
+	/* If we came that far... there should be a valid copy of the table now. */
+	return MPG123_OK;
+}
+
+/* The writing does basically the same than the above, just the opposite.
+   Oh, and the overflow checks are not needed -- off_t is bigger than long. */
+#undef mpg123_set_index
+/* int mpg123_set_index(mpg123_handle *mh, off_t *offsets, off_t step, size_t fill); */
+int attribute_align_arg mpg123_set_index(mpg123_handle *mh, long *offsets, long step, size_t fill)
+{
+	int err;
+	size_t i;
+	struct wrap_data *whd;
+	off_t *indextmp;
+
+	whd = wrap_get(mh);
+	if(whd == NULL) return MPG123_ERR;
+
+	/* Expensive temporary storage... for staying outside at the API layer. */
+	indextmp = malloc(fill*sizeof(off_t));
+	if(indextmp == NULL)
+	{
+		mh->err = MPG123_OUT_OF_MEM;
+		return MPG123_ERR;
+	}
+
+	if(fill > 0 && offsets == NULL)
+	{
+		mh->err = MPG123_BAD_INDEX_PAR;
+		err = MPG123_ERR;
+	}
+	else
+	{
+		/* Fill the large-file copy of the provided index, then feed it to mpg123. */
+		for(i=0; i<fill; ++i)
+		indextmp[i] = offsets[i];
+
+		err = MPG123_LARGENAME(mpg123_set_index)(mh, indextmp, step, fill);
+	}
+	free(indextmp);
+
+	return err;
+}
+
+/* So... breathe... a couple of simple wrappers before the big mess. */
+#undef mpg123_position
+/* int mpg123_position( mpg123_handle *mh, off_t frame_offset, off_t buffered_bytes, off_t *current_frame, off_t *frames_left, double *current_seconds, double *seconds_left); */
+int attribute_align_arg mpg123_position(mpg123_handle *mh, long frame_offset, long buffered_bytes, long *current_frame, long *frames_left, double *current_seconds, double *seconds_left)
+{
+	off_t curframe, frameleft;
+	long small_curframe, small_frameleft;
+	int err;
+
+	err = MPG123_LARGENAME(mpg123_position)(mh, frame_offset, buffered_bytes, &curframe, &frameleft, current_seconds, seconds_left);
+	if(err != MPG123_OK) return err;
+
+	small_curframe = curframe;
+	small_frameleft = frameleft;
+	if(small_curframe != curframe || small_frameleft != frameleft)
+	{
+		mh->err = MPG123_LFS_OVERFLOW;
+		return MPG123_ERR;
+	}
+
+	if(current_frame != NULL) *current_frame = small_curframe;
+
+	if(frames_left != NULL) *frames_left = small_frameleft;
+
+
+	return MPG123_OK;
+}
+
+#undef mpg123_length
+/* off_t mpg123_length(mpg123_handle *mh); */
+long attribute_align_arg mpg123_length(mpg123_handle *mh)
+{
+	long val;
+	off_t largeval;
+
+	largeval = MPG123_LARGENAME(mpg123_length)(mh);
+	val = largeval;
+	if(val != largeval)
+	{
+		mh->err = MPG123_LFS_OVERFLOW;
+		return MPG123_ERR;
+	}
+	return val;
+}
+
+/* The simplest wrapper of all... */
+#undef mpg123_set_filesize
+/* int mpg123_set_filesize(mpg123_handle *mh, off_t size); */
+int attribute_align_arg mpg123_set_filesize(mpg123_handle *mh, long size)
+{
+	return MPG123_LARGENAME(mpg123_set_filesize)(mh, size);
+}
+
+
+/* =========================================
+             THE BOUNDARY OF SANITY
+               Behold, stranger!
+   ========================================= */
+
+
+/*
+	The messy part: Replacement of I/O core (actally, this is only due to lseek()).
+	Both descriptor and handle replaced I/O are mapped to replaced handle I/O, the handle wrapping over the actual callbacks and the actual handle/descriptor.
+	You got multiple levels of handles and callbacks to think about. Have fun reading and comprehending.
+*/
+
+/* Could go into compat.h ... Windows needs that flag. */
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+/* Read callback needs nothing special. */
+ssize_t wrap_read(void* handle, void *buf, size_t count)
+{
+	struct wrap_data *ioh = handle;
+	switch(ioh->iotype)
+	{
+		case IO_FD: return ioh->r_read(ioh->fd, buf, count);
+		case IO_HANDLE: return ioh->r_h_read(ioh->handle, buf, count);
+	}
+	error("Serious breakage - bad IO type in LFS wrapper!");
+	return -1;
+}
+
+/* Seek callback needs protection from too big offsets. */
+off_t wrap_lseek(void *handle, off_t offset, int whence)
+{
+	struct wrap_data *ioh = handle;
+	long smalloff = offset;
+	if(smalloff == offset)
+	{
+		switch(ioh->iotype)
+		{
+			case IO_FD: return ioh->r_lseek(ioh->fd, smalloff, whence);
+			case IO_HANDLE: return ioh->r_h_lseek(ioh->handle, smalloff, whence);
+		}
+		error("Serious breakage - bad IO type in LFS wrapper!");
+		return -1;
+	}
+	else
+	{
+		errno = EOVERFLOW;
+		return -1;
+	}
+}
+
+
+/*
+	Now, let's replace the API dealing with replacement I/O.
+	Start with undefining the renames...
+*/
+
+#undef mpg123_replace_reader
+#undef mpg123_replace_reader_handle
+#undef mpg123_open
+#undef mpg123_open_fd
+#undef mpg123_open_handle
+
+
+/* Normal reader replacement needs fallback implementations. */
+static ssize_t fallback_read(int fd, void *buf, size_t count)
+{
+	return read(fd, buf, count);
+}
+
+static long fallback_lseek(int fd, long offset, int whence)
+{
+	/* Since the offset is long int already, the returned value really should fit into a long... but whatever. */
+	long newpos_long;
+	off_t newpos;
+	newpos = lseek(fd, offset, whence);
+	newpos_long = newpos;
+	if(newpos_long == newpos)
+	return newpos_long;
+	else
+	{
+		errno = EOVERFLOW;
+		return -1;
+	}
+}
+
+/* Reader replacement prepares the hidden handle storage for next mpg123_open_fd() or plain mpg123_open(). */
+int attribute_align_arg mpg123_replace_reader(mpg123_handle *mh, ssize_t (*r_read) (int, void *, size_t), long (*r_lseek)(int, long, int) )
+{
+	struct wrap_data* ioh;
+
+	if(mh == NULL) return MPG123_ERR;
+
+	mpg123_close(mh);
+	ioh = wrap_get(mh);
+	if(ioh == NULL) return MPG123_ERR;
+
+	/* If both callbacks are NULL, switch totally to internal I/O, else just use fallback for at most half of them. */
+	if(r_read == NULL && r_lseek == NULL)
+	{
+		/* Only the type is actually important to disable the code. */
+		ioh->iotype = 0;
+		ioh->fd = -1;
+		ioh->r_read = NULL;
+		ioh->r_lseek = NULL;
+	}
+	else
+	{
+		ioh->iotype = IO_FD;
+		ioh->fd = -1; /* On next mpg123_open_fd(), this gets a value. */
+		ioh->r_read = r_read != NULL ? r_read : fallback_read;
+		ioh->r_lseek = r_lseek != NULL ? r_lseek : fallback_lseek;
+	}
+
+	/* The real reader replacement will happen while opening. */
+	return MPG123_OK;
+}
+
+int attribute_align_arg mpg123_replace_reader_handle(mpg123_handle *mh, ssize_t (*r_read) (void*, void *, size_t), long (*r_lseek)(void*, long, int), void (*cleanup)(void*))
+{
+	struct wrap_data* ioh;
+
+	if(mh == NULL) return MPG123_ERR;
+
+	mpg123_close(mh);
+	ioh = wrap_get(mh);
+	if(ioh == NULL) return MPG123_ERR;
+
+	ioh->iotype = IO_HANDLE;
+	ioh->handle = NULL;
+	ioh->r_h_read = r_read;
+	ioh->r_h_lseek = r_lseek;
+	ioh->h_cleanup = cleanup;
+
+	/* The real reader replacement will happen while opening. */
+	return MPG123_OK;
+}
+
+/*
+	The open routines always need to watch out for a prepared wrapper handle to use replaced normal I/O.
+	Two cases to consider:
+	1. Plain normal open using internal I/O.
+	2. Client called mpg123_replace_reader() before.
+	The second case needs hackery to activate the client I/O callbacks. For that, we create a custom I/O handle and use the guts of mpg123_open_fd() on it.
+*/
+int attribute_align_arg mpg123_open(mpg123_handle *mh, const char *path)
+{
+	struct wrap_data* ioh;
+
+	if(mh == NULL) return MPG123_ERR;
+
+	ioh = mh->wrapperdata;
+	/* Mimic the use of mpg123_replace_reader() functions by lower levels...
+	   IO_HANDLE is not valid here, though. Only IO_FD. */
+	if(ioh != NULL && ioh->iotype == IO_FD)
+	{
+		int err;
+		err = MPG123_LARGENAME(mpg123_replace_reader_handle)(mh, wrap_read, wrap_lseek, wrap_io_cleanup);
+		if(err != MPG123_OK) return MPG123_ERR;
+
+		/* The above call implied mpg123_close() already */
+		/*
+			I really need to open the file here... to be able to use the replacer handle I/O ...
+			my_fd is used to indicate closing of the descriptor on cleanup.
+		*/
+		ioh->my_fd = compat_open(path, O_RDONLY|O_BINARY);
+		if(ioh->my_fd < 0)
+		{
+			if(!(mh->p.flags & MPG123_QUIET)) error2("Cannot open file %s: %s", path, strerror(errno));
+
+			mh->err = MPG123_BAD_FILE;
+			return MPG123_ERR;
+		}
+		/* Store a copy of the descriptor where it is actually used. */
+		ioh->fd = ioh->my_fd;
+		/* Initiate I/O operating on my handle now. */
+		err = open_stream_handle(mh, ioh);
+		if(err != MPG123_OK)
+		{
+			wrap_io_cleanup(ioh);
+			return MPG123_ERR;
+		}
+		/* All fine... */
+		return MPG123_OK;
+	}
+	else return MPG123_LARGENAME(mpg123_open)(mh, path);
+}
+
+/*
+	This is in fact very similar to the above:
+	The open routines always need to watch out for a prepared wrapper handle to use replaced normal I/O.
+	Two cases to consider:
+	1. Plain normal open_fd using internal I/O.
+	2. Client called mpg123_replace_reader() before.
+	The second case needs hackery to activate the client I/O callbacks. For that, we create a custom I/O handle and use the guts of mpg123_open_fd() on it.
+*/
+
+int attribute_align_arg mpg123_open_fd(mpg123_handle *mh, int fd)
+{
+	struct wrap_data* ioh;
+
+	if(mh == NULL) return MPG123_ERR;
+
+	mpg123_close(mh);
+	ioh = mh->wrapperdata;
+	if(ioh != NULL && ioh->iotype == IO_FD)
+	{
+		int err;
+		err = MPG123_LARGENAME(mpg123_replace_reader_handle)(mh, wrap_read, wrap_lseek, wrap_io_cleanup);
+		if(err != MPG123_OK) return MPG123_ERR;
+
+		/* The above call implied mpg123_close() already */
+
+		/* Store the real file descriptor inside the handle. */
+		ioh->fd = fd;
+		/* Initiate I/O operating on my handle now. */
+		err = open_stream_handle(mh, ioh);
+		if(err != MPG123_OK)
+		{
+			wrap_io_cleanup(ioh);
+			return MPG123_ERR;
+		}
+		/* All fine... */
+		return MPG123_OK;
+	}
+	else return MPG123_LARGENAME(mpg123_open_fd)(mh, fd);
+}
+
+int attribute_align_arg mpg123_open_handle(mpg123_handle *mh, void *handle)
+{
+	struct wrap_data* ioh;
+
+	if(mh == NULL) return MPG123_ERR;
+
+	mpg123_close(mh);
+	ioh = mh->wrapperdata;
+	if(ioh != NULL && ioh->iotype == IO_HANDLE && ioh->r_h_read != NULL)
+	{
+		/* Wrap the custom handle into my handle. */
+		int err;
+		err = MPG123_LARGENAME(mpg123_replace_reader_handle)(mh, wrap_read, wrap_lseek, wrap_io_cleanup);
+		if(err != MPG123_OK) return MPG123_ERR;
+
+		ioh->handle = handle;
+		/* No extra error handling, keep behaviour of the original open_handle. */
+		return open_stream_handle(mh, ioh);
+	}
+	else
+	{
+		/* This is an error ... you need to prepare the I/O before using it. */
+		mh->err = MPG123_BAD_CUSTOM_IO;
+		return MPG123_ERR;
+	}
+}
+
Index: include/reactos/libs/libmpg123/libmpg123.c
===================================================================
--- include/reactos/libs/libmpg123/libmpg123.c	(revision 0)
+++ include/reactos/libs/libmpg123/libmpg123.c	(working copy)
@@ -0,0 +1,1663 @@
+/*
+	libmpg123: MPEG Audio Decoder library
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+
+*/
+
+#include "mpg123lib_intern.h"
+#include "icy2utf8.h"
+#include "debug.h"
+
+#include "gapless.h"
+
+#define SEEKFRAME(mh) ((mh)->ignoreframe < 0 ? 0 : (mh)->ignoreframe)
+
+static int initialized = 0;
+
+int attribute_align_arg mpg123_init(void)
+{
+	if((sizeof(short) != 2) || (sizeof(long) < 4)) return MPG123_BAD_TYPES;
+
+	if(initialized) return MPG123_OK; /* no need to initialize twice */
+
+#ifndef NO_LAYER12
+	init_layer12(); /* inits also shared tables with layer1 */
+#endif
+#ifndef NO_LAYER3
+	init_layer3();
+#endif
+	prepare_decode_tables();
+	check_decoders();
+	initialized = 1;
+	return MPG123_OK;
+}
+
+void attribute_align_arg mpg123_exit(void)
+{
+	/* nothing yet, but something later perhaps */
+}
+
+/* create a new handle with specified decoder, decoder can be "", "auto" or NULL for auto-detection */
+mpg123_handle attribute_align_arg *mpg123_new(const char* decoder, int *error)
+{
+	return mpg123_parnew(NULL, decoder, error);
+}
+
+/* ...the full routine with optional initial parameters to override defaults. */
+mpg123_handle attribute_align_arg *mpg123_parnew(mpg123_pars *mp, const char* decoder, int *error)
+{
+	mpg123_handle *fr = NULL;
+	int err = MPG123_OK;
+
+	if(initialized) fr = (mpg123_handle*) malloc(sizeof(mpg123_handle));
+	else err = MPG123_NOT_INITIALIZED;
+	if(fr != NULL)
+	{
+		frame_init_par(fr, mp);
+		debug("cpu opt setting");
+		if(frame_cpu_opt(fr, decoder) != 1)
+		{
+			err = MPG123_BAD_DECODER;
+			frame_exit(fr);
+			free(fr);
+			fr = NULL;
+		}
+	}
+	if(fr != NULL)
+	{
+		fr->decoder_change = 1;
+	}
+	else if(err == MPG123_OK) err = MPG123_OUT_OF_MEM;
+
+	if(error != NULL) *error = err;
+	return fr;
+}
+
+int attribute_align_arg mpg123_decoder(mpg123_handle *mh, const char* decoder)
+{
+	enum optdec dt = dectype(decoder);
+
+	if(mh == NULL) return MPG123_ERR;
+
+	if(dt == nodec)
+	{
+		mh->err = MPG123_BAD_DECODER;
+		return MPG123_ERR;
+	}
+	if(dt == mh->cpu_opts.type) return MPG123_OK;
+
+	/* Now really change. */
+	/* frame_exit(mh);
+	frame_init(mh); */
+	debug("cpu opt setting");
+	if(frame_cpu_opt(mh, decoder) != 1)
+	{
+		mh->err = MPG123_BAD_DECODER;
+		frame_exit(mh);
+		return MPG123_ERR;
+	}
+	/* New buffers for decoder are created in frame_buffers() */
+	if((frame_outbuffer(mh) != 0))
+	{
+		mh->err = MPG123_NO_BUFFERS;
+		frame_exit(mh);
+		return MPG123_ERR;
+	}
+	/* Do _not_ call decode_update here! That is only allowed after a first MPEG frame has been met. */
+	mh->decoder_change = 1;
+	return MPG123_OK;
+}
+
+int attribute_align_arg mpg123_param(mpg123_handle *mh, enum mpg123_parms key, long val, double fval)
+{
+	int r;
+
+	if(mh == NULL) return MPG123_ERR;
+	r = mpg123_par(&mh->p, key, val, fval);
+	if(r != MPG123_OK){ mh->err = r; r = MPG123_ERR; }
+	else
+	{ /* Special treatment for some settings. */
+#ifdef FRAME_INDEX
+		if(key == MPG123_INDEX_SIZE)
+		{ /* Apply frame index size and grow property on the fly. */
+			r = frame_index_setup(mh);
+			if(r != MPG123_OK) mh->err = MPG123_INDEX_FAIL;
+		}
+#endif
+#ifndef NO_FEEDER
+		/* Feeder pool size is applied right away, reader will react to that. */
+		if(key == MPG123_FEEDPOOL || key == MPG123_FEEDBUFFER)
+		bc_poolsize(&mh->rdat.buffer, mh->p.feedpool, mh->p.feedbuffer);
+#endif
+	}
+	return r;
+}
+
+int attribute_align_arg mpg123_par(mpg123_pars *mp, enum mpg123_parms key, long val, double fval)
+{
+	int ret = MPG123_OK;
+
+	if(mp == NULL) return MPG123_BAD_PARS;
+	switch(key)
+	{
+		case MPG123_VERBOSE:
+			mp->verbose = val;
+		break;
+		case MPG123_FLAGS:
+#ifndef GAPLESS
+			if(val & MPG123_GAPLESS) ret = MPG123_NO_GAPLESS;
+#endif
+			if(ret == MPG123_OK) mp->flags = val;
+			debug1("set flags to 0x%lx", (unsigned long) mp->flags);
+		break;
+		case MPG123_ADD_FLAGS:
+#ifndef GAPLESS
+			/* Enabling of gapless mode doesn't work when it's not there, but disabling (below) is no problem. */
+			if(val & MPG123_GAPLESS) ret = MPG123_NO_GAPLESS;
+			else
+#endif
+			mp->flags |= val;
+			debug1("set flags to 0x%lx", (unsigned long) mp->flags);
+		break;
+		case MPG123_REMOVE_FLAGS:
+			mp->flags &= ~val;
+			debug1("set flags to 0x%lx", (unsigned long) mp->flags);
+		break;
+		case MPG123_FORCE_RATE: /* should this trigger something? */
+#ifdef NO_NTOM
+			if(val > 0)
+			ret = MPG123_BAD_RATE;
+#else
+			if(val > 96000) ret = MPG123_BAD_RATE;
+			else mp->force_rate = val < 0 ? 0 : val; /* >0 means enable, 0 disable */
+#endif
+		break;
+		case MPG123_DOWN_SAMPLE:
+#ifdef NO_DOWNSAMPLE
+			if(val != 0) ret = MPG123_BAD_RATE;
+#else
+			if(val < 0 || val > 2) ret = MPG123_BAD_RATE;
+			else mp->down_sample = (int)val;
+#endif
+		break;
+		case MPG123_RVA:
+			if(val < 0 || val > MPG123_RVA_MAX) ret = MPG123_BAD_RVA;
+			else mp->rva = (int)val;
+		break;
+		case MPG123_DOWNSPEED:
+			mp->halfspeed = val < 0 ? 0 : val;
+		break;
+		case MPG123_UPSPEED:
+			mp->doublespeed = val < 0 ? 0 : val;
+		break;
+		case MPG123_ICY_INTERVAL:
+#ifndef NO_ICY
+			mp->icy_interval = val > 0 ? val : 0;
+#else
+			if(val > 0) ret = MPG123_BAD_PARAM;
+#endif
+		break;
+		case MPG123_OUTSCALE:
+			/* Choose the value that is non-zero, if any.
+			   Downscaling integers to 1.0 . */
+			mp->outscale = val == 0 ? fval : (double)val/SHORT_SCALE;
+		break;
+		case MPG123_TIMEOUT:
+#ifdef TIMEOUT_READ
+			mp->timeout = val >= 0 ? val : 0;
+#else
+			if(val > 0) ret = MPG123_NO_TIMEOUT;
+#endif
+		break;
+		case MPG123_RESYNC_LIMIT:
+			mp->resync_limit = val;
+		break;
+		case MPG123_INDEX_SIZE:
+#ifdef FRAME_INDEX
+			mp->index_size = val;
+#else
+			ret = MPG123_NO_INDEX;
+#endif
+		break;
+		case MPG123_PREFRAMES:
+			if(val >= 0) mp->preframes = val;
+			else ret = MPG123_BAD_VALUE;
+		break;
+		case MPG123_FEEDPOOL:
+#ifndef NO_FEEDER
+			if(val >= 0) mp->feedpool = val;
+			else ret = MPG123_BAD_VALUE;
+#else
+			ret = MPG123_MISSING_FEATURE;
+#endif
+		break;
+		case MPG123_FEEDBUFFER:
+#ifndef NO_FEEDER
+			if(val > 0) mp->feedbuffer = val;
+			else ret = MPG123_BAD_VALUE;
+#else
+			ret = MPG123_MISSING_FEATURE;
+#endif
+		break;
+		default:
+			ret = MPG123_BAD_PARAM;
+	}
+	return ret;
+}
+
+int attribute_align_arg mpg123_getparam(mpg123_handle *mh, enum mpg123_parms key, long *val, double *fval)
+{
+	int r;
+
+	if(mh == NULL) return MPG123_ERR;
+	r = mpg123_getpar(&mh->p, key, val, fval);
+	if(r != MPG123_OK){ mh->err = r; r = MPG123_ERR; }
+	return r;
+}
+
+int attribute_align_arg mpg123_getpar(mpg123_pars *mp, enum mpg123_parms key, long *val, double *fval)
+{
+	int ret = 0;
+
+	if(mp == NULL) return MPG123_BAD_PARS;
+	switch(key)
+	{
+		case MPG123_VERBOSE:
+			if(val) *val = mp->verbose;
+		break;
+		case MPG123_FLAGS:
+		case MPG123_ADD_FLAGS:
+			if(val) *val = mp->flags;
+		break;
+		case MPG123_FORCE_RATE:
+			if(val) 
+#ifdef NO_NTOM
+			*val = 0;
+#else
+			*val = mp->force_rate;
+#endif
+		break;
+		case MPG123_DOWN_SAMPLE:
+			if(val) *val = mp->down_sample;
+		break;
+		case MPG123_RVA:
+			if(val) *val = mp->rva;
+		break;
+		case MPG123_DOWNSPEED:
+			if(val) *val = mp->halfspeed;
+		break;
+		case MPG123_UPSPEED:
+			if(val) *val = mp->doublespeed;
+		break;
+		case MPG123_ICY_INTERVAL:
+#ifndef NO_ICY
+			if(val) *val = (long)mp->icy_interval;
+#else
+			if(val) *val = 0;
+#endif
+		break;
+		case MPG123_OUTSCALE:
+			if(fval) *fval = mp->outscale;
+			if(val) *val = (long)(mp->outscale*SHORT_SCALE);
+		break;
+		case MPG123_RESYNC_LIMIT:
+			if(val) *val = mp->resync_limit;
+		break;
+		case MPG123_INDEX_SIZE:
+			if(val)
+#ifdef FRAME_INDEX
+			*val = mp->index_size;
+#else
+			*val = 0; /* graceful fallback: no index is index of zero size */
+#endif
+		break;
+		case MPG123_PREFRAMES:
+			*val = mp->preframes;
+		break;
+		case MPG123_FEEDPOOL:
+#ifndef NO_FEEDER
+			*val = mp->feedpool;
+#else
+			ret = MPG123_MISSING_FEATURE;
+#endif
+		break;
+		case MPG123_FEEDBUFFER:
+#ifndef NO_FEEDER
+			*val = mp->feedbuffer;
+#else
+			ret = MPG123_MISSING_FEATURE;
+#endif
+		break;
+		default:
+			ret = MPG123_BAD_PARAM;
+	}
+	return ret;
+}
+
+int attribute_align_arg mpg123_getstate(mpg123_handle *mh, enum mpg123_state key, long *val, double *fval)
+{
+	int ret = MPG123_OK;
+	long theval = 0;
+	double thefval = 0.;
+
+	if(mh == NULL) return MPG123_ERR;
+
+	switch(key)
+	{
+		case MPG123_ACCURATE:
+			theval = mh->state_flags & FRAME_ACCURATE;
+		break;
+		case MPG123_FRANKENSTEIN:
+			theval = mh->state_flags & FRAME_FRANKENSTEIN;
+		break;
+		case MPG123_BUFFERFILL:
+#ifndef NO_FEEDER
+		{
+			size_t sval = bc_fill(&mh->rdat.buffer);
+			theval = (long)sval;
+			if((size_t)theval != sval)
+			{
+				mh->err = MPG123_INT_OVERFLOW;
+				ret = MPG123_ERR;
+			}
+		}
+#else
+			mh->err = MPG123_MISSING_FEATURE;
+			ret = MPG123_ERR;
+#endif
+		break;
+		case MPG123_FRESH_DECODER:
+			theval = mh->state_flags & FRAME_FRESH_DECODER;
+			mh->state_flags &= ~FRAME_FRESH_DECODER;
+		break;
+		default:
+			mh->err = MPG123_BAD_KEY;
+			ret = MPG123_ERR;
+	}
+
+	if(val  != NULL) *val  = theval;
+	if(fval != NULL) *fval = thefval;
+
+	return ret;
+}
+
+int attribute_align_arg mpg123_eq(mpg123_handle *mh, enum mpg123_channels channel, int band, double val)
+{
+	if(mh == NULL) return MPG123_ERR;
+	if(band < 0 || band > 31){ mh->err = MPG123_BAD_BAND; return MPG123_ERR; }
+	switch(channel)
+	{
+		case MPG123_LEFT|MPG123_RIGHT:
+			mh->equalizer[0][band] = mh->equalizer[1][band] = DOUBLE_TO_REAL(val);
+		break;
+		case MPG123_LEFT:  mh->equalizer[0][band] = DOUBLE_TO_REAL(val); break;
+		case MPG123_RIGHT: mh->equalizer[1][band] = DOUBLE_TO_REAL(val); break;
+		default:
+			mh->err=MPG123_BAD_CHANNEL;
+			return MPG123_ERR;
+	}
+	mh->have_eq_settings = TRUE;
+	return MPG123_OK;
+}
+
+double attribute_align_arg mpg123_geteq(mpg123_handle *mh, enum mpg123_channels channel, int band)
+{
+	double ret = 0.;
+
+	if(mh == NULL) return MPG123_ERR;
+
+	/* Handle this gracefully. When there is no band, it has no volume. */
+	if(band > -1 && band < 32)
+	switch(channel)
+	{
+		case MPG123_LEFT|MPG123_RIGHT:
+			ret = 0.5*(REAL_TO_DOUBLE(mh->equalizer[0][band])+REAL_TO_DOUBLE(mh->equalizer[1][band]));
+		break;
+		case MPG123_LEFT:  ret = REAL_TO_DOUBLE(mh->equalizer[0][band]); break;
+		case MPG123_RIGHT: ret = REAL_TO_DOUBLE(mh->equalizer[1][band]); break;
+		/* Default case is already handled: ret = 0 */
+	}
+
+	return ret;
+}
+
+
+/* plain file access, no http! */
+int attribute_align_arg mpg123_open(mpg123_handle *mh, const char *path)
+{
+	if(mh == NULL) return MPG123_ERR;
+
+	mpg123_close(mh);
+	return open_stream(mh, path, -1);
+}
+
+int attribute_align_arg mpg123_open_fd(mpg123_handle *mh, int fd)
+{
+	if(mh == NULL) return MPG123_ERR;
+
+	mpg123_close(mh);
+	return open_stream(mh, NULL, fd);
+}
+
+int attribute_align_arg mpg123_open_handle(mpg123_handle *mh, void *iohandle)
+{
+	if(mh == NULL) return MPG123_ERR;
+
+	mpg123_close(mh);
+	if(mh->rdat.r_read_handle == NULL)
+	{
+		mh->err = MPG123_BAD_CUSTOM_IO;
+		return MPG123_ERR;
+	}
+	return open_stream_handle(mh, iohandle);
+}
+
+int attribute_align_arg mpg123_open_feed(mpg123_handle *mh)
+{
+	if(mh == NULL) return MPG123_ERR;
+
+	mpg123_close(mh);
+	return open_feed(mh);
+}
+
+int attribute_align_arg mpg123_replace_reader( mpg123_handle *mh,
+                           ssize_t (*r_read) (int, void *, size_t),
+                           off_t   (*r_lseek)(int, off_t, int) )
+{
+	if(mh == NULL) return MPG123_ERR;
+
+	mpg123_close(mh);
+	mh->rdat.r_read = r_read;
+	mh->rdat.r_lseek = r_lseek;
+	return MPG123_OK;
+}
+
+int attribute_align_arg mpg123_replace_reader_handle( mpg123_handle *mh,
+                           ssize_t (*r_read) (void*, void *, size_t),
+                           off_t   (*r_lseek)(void*, off_t, int),
+                           void    (*cleanup)(void*)  )
+{
+	if(mh == NULL) return MPG123_ERR;
+
+	mpg123_close(mh);
+	mh->rdat.r_read_handle = r_read;
+	mh->rdat.r_lseek_handle = r_lseek;
+	mh->rdat.cleanup_handle = cleanup;
+	return MPG123_OK;
+}
+
+/* Update decoding engine for
+   a) a new choice of decoder
+   b) a changed native format of the MPEG stream
+   ... calls are only valid after parsing some MPEG frame! */
+int decode_update(mpg123_handle *mh)
+{
+	long native_rate;
+	int b;
+
+	if(mh->num < 0)
+	{
+		if(!(mh->p.flags & MPG123_QUIET)) error("decode_update() has been called before reading the first MPEG frame! Internal programming error.");
+
+		mh->err = MPG123_BAD_DECODER_SETUP;
+		return MPG123_ERR;
+	}
+
+	mh->state_flags |= FRAME_FRESH_DECODER;
+	native_rate = frame_freq(mh);
+
+	b = frame_output_format(mh); /* Select the new output format based on given constraints. */
+	if(b < 0) return MPG123_ERR;
+
+	if(b == 1) mh->new_format = 1; /* Store for later... */
+
+	debug3("updating decoder structure with native rate %li and af.rate %li (new format: %i)", native_rate, mh->af.rate, mh->new_format);
+	if(mh->af.rate == native_rate) mh->down_sample = 0;
+	else if(mh->af.rate == native_rate>>1) mh->down_sample = 1;
+	else if(mh->af.rate == native_rate>>2) mh->down_sample = 2;
+	else mh->down_sample = 3; /* flexible (fixed) rate */
+	switch(mh->down_sample)
+	{
+		case 0:
+		case 1:
+		case 2:
+			mh->down_sample_sblimit = SBLIMIT>>(mh->down_sample);
+			/* With downsampling I get less samples per frame */
+			mh->outblock = outblock_bytes(mh, (mh->spf>>mh->down_sample));
+		break;
+#ifndef NO_NTOM
+		case 3:
+		{
+			if(synth_ntom_set_step(mh) != 0) return -1;
+			if(frame_freq(mh) > mh->af.rate)
+			{
+				mh->down_sample_sblimit = SBLIMIT * mh->af.rate;
+				mh->down_sample_sblimit /= frame_freq(mh);
+			}
+			else mh->down_sample_sblimit = SBLIMIT;
+			mh->outblock = outblock_bytes(mh,
+			                 ( ( NTOM_MUL-1+mh->spf
+			                   * (((size_t)NTOM_MUL*mh->af.rate)/frame_freq(mh))
+			                 )/NTOM_MUL ));
+		}
+		break;
+#endif
+	}
+
+	if(!(mh->p.flags & MPG123_FORCE_MONO))
+	{
+		if(mh->af.channels == 1) mh->single = SINGLE_MIX;
+		else mh->single = SINGLE_STEREO;
+	}
+	else mh->single = (mh->p.flags & MPG123_FORCE_MONO)-1;
+	if(set_synth_functions(mh) != 0) return -1;;
+
+	/* The needed size of output buffer may have changed. */
+	if(frame_outbuffer(mh) != MPG123_OK) return -1;
+
+	do_rva(mh);
+	debug3("done updating decoder structure with native rate %li and af.rate %li and down_sample %i", frame_freq(mh), mh->af.rate, mh->down_sample);
+
+	return 0;
+}
+
+size_t attribute_align_arg mpg123_safe_buffer(void)
+{
+	/* real is the largest possible output (it's 32bit float, 32bit int or 64bit double). */
+	return sizeof(real)*2*1152*NTOM_MAX;
+}
+
+size_t attribute_align_arg mpg123_outblock(mpg123_handle *mh)
+{
+	/* Try to be helpful and never return zero output block size. */
+	if(mh != NULL && mh->outblock > 0) return mh->outblock;
+	else return mpg123_safe_buffer();
+}
+
+/* Read in the next frame we actually want for decoding.
+   This includes skipping/ignoring frames, in additon to skipping junk in the parser. */
+static int get_next_frame(mpg123_handle *mh)
+{
+	int change = mh->decoder_change;
+	/* Ensure we got proper decoder for ignoring frames.
+	   Header can be changed from seeking around. But be careful: Only after at
+	   least one frame got read, decoder update makes sense. */
+	if(mh->header_change > 1 && mh->num >= 0)
+	{
+		change = 1;
+		mh->header_change = 0;
+		debug("starting with big header change");
+		if(decode_update(mh) < 0)
+		return MPG123_ERR;
+	}
+
+	do
+	{
+		int b;
+		/* Decode & discard some frame(s) before beginning. */
+		if(mh->to_ignore && mh->num < mh->firstframe && mh->num >= mh->ignoreframe)
+		{
+			debug1("ignoring frame %li", (long)mh->num);
+			/* Decoder structure must be current! decode_update has been called before... */
+			(mh->do_layer)(mh); mh->buffer.fill = 0;
+#ifndef NO_NTOM
+			/* The ignored decoding may have failed. Make sure ntom stays consistent. */
+			if(mh->down_sample == 3) ntom_set_ntom(mh, mh->num+1);
+#endif
+			mh->to_ignore = mh->to_decode = FALSE;
+		}
+		/* Read new frame data; possibly breaking out here for MPG123_NEED_MORE. */
+		debug("read frame");
+		mh->to_decode = FALSE;
+		b = read_frame(mh); /* That sets to_decode only if a full frame was read. */
+		debug4("read of frame %li returned %i (to_decode=%i) at sample %li", (long)mh->num, b, mh->to_decode, (long)mpg123_tell(mh));
+		if(b == MPG123_NEED_MORE) return MPG123_NEED_MORE; /* need another call with data */
+		else if(b <= 0)
+		{
+			/* More sophisticated error control? */
+			if(b==0 || (mh->rdat.filelen >= 0 && mh->rdat.filepos == mh->rdat.filelen))
+			{ /* We simply reached the end. */
+				mh->track_frames = mh->num + 1;
+				debug("What about updating/checking gapless sample count here?");
+				return MPG123_DONE;
+			}
+			else return MPG123_ERR; /* Some real error. */
+		}
+		/* Now, there should be new data to decode ... and also possibly new stream properties */
+		if(mh->header_change > 1)
+		{
+			debug("big header change");
+			change = 1;
+			mh->header_change = 0;
+			/* Need to update decoder structure right away since frame might need to
+			   be decoded on next loop iteration for properly ignoring its output. */
+			if(decode_update(mh) < 0)
+			return MPG123_ERR;
+		}
+		/* Now some accounting: Look at the numbers and decide if we want this frame. */
+		++mh->playnum;
+		/* Plain skipping without decoding, only when frame is not ignored on next cycle. */
+		if(mh->num < mh->firstframe || (mh->p.doublespeed && (mh->playnum % mh->p.doublespeed)))
+		{
+			if(!(mh->to_ignore && mh->num < mh->firstframe && mh->num >= mh->ignoreframe))
+			{
+				frame_skip(mh);
+				/* Should one fix NtoM here or not?
+				   It is not work the trouble for doublespeed, but what with leading frames? */
+			}
+		}
+		/* Or, we are finally done and have a new frame. */
+		else break;
+	} while(1);
+
+	/* If we reach this point, we got a new frame ready to be decoded.
+	   All other situations resulted in returns from the loop. */
+	if(change)
+	{
+		mh->decoder_change = 0;
+		if(mh->fresh)
+		{
+#ifdef GAPLESS
+			int b=0;
+			/* Prepare offsets for gapless decoding. */
+			debug1("preparing gapless stuff with native rate %li", frame_freq(mh));
+			frame_gapless_realinit(mh);
+			frame_set_frameseek(mh, mh->num);
+#endif
+			mh->fresh = 0;
+#ifdef GAPLESS
+			/* Could this possibly happen? With a real big gapless offset... */
+			if(mh->num < mh->firstframe) b = get_next_frame(mh);
+			if(b < 0) return b; /* Could be error, need for more, new format... */
+#endif
+		}
+	}
+	return MPG123_OK;
+}
+
+/* Assumption: A buffer full of zero samples can be constructed by repetition of this byte.
+   Oh, and it handles some format conversion.
+   Only to be used by decode_the_frame() ... */
+static int zero_byte(mpg123_handle *fr)
+{
+#ifndef NO_8BIT
+	return fr->af.encoding & MPG123_ENC_8 ? fr->conv16to8[0] : 0;
+#else
+	return 0; /* All normal signed formats have the zero here (even in byte form -- that may be an assumption for your funny machine...). */
+#endif
+}
+
+/*
+	Not part of the api. This just decodes the frame and fills missing bits with zeroes.
+	There can be frames that are broken and thus make do_layer() fail.
+*/
+static void decode_the_frame(mpg123_handle *fr)
+{
+	size_t needed_bytes = decoder_synth_bytes(fr, frame_expect_outsamples(fr));
+	fr->clip += (fr->do_layer)(fr);
+	/*fprintf(stderr, "frame %"OFF_P": got %"SIZE_P" / %"SIZE_P"\n", fr->num,(size_p)fr->buffer.fill, (size_p)needed_bytes);*/
+	/* There could be less data than promised.
+	   Also, then debugging, we look out for coding errors that could result in _more_ data than expected. */
+#ifdef DEBUG
+	if(fr->buffer.fill != needed_bytes)
+	{
+#endif
+		if(fr->buffer.fill < needed_bytes)
+		{
+			if(VERBOSE2)
+			fprintf(stderr, "Note: broken frame %li, filling up with %"SIZE_P" zeroes, from %"SIZE_P"\n", (long)fr->num, (size_p)(needed_bytes-fr->buffer.fill), (size_p)fr->buffer.fill);
+
+			/*
+				One could do a loop with individual samples instead... but zero is zero
+				Actually, that is wrong: zero is mostly a series of null bytes,
+				but we have funny 8bit formats that have a different opinion on zero...
+				Unsigned 16 or 32 bit formats are handled later.
+			*/
+			memset( fr->buffer.data + fr->buffer.fill, zero_byte(fr), needed_bytes - fr->buffer.fill );
+
+			fr->buffer.fill = needed_bytes;
+#ifndef NO_NTOM
+			/* ntom_val will be wrong when the decoding wasn't carried out completely */
+			ntom_set_ntom(fr, fr->num+1);
+#endif
+		}
+#ifdef DEBUG
+		else
+		{
+			if(NOQUIET)
+			error2("I got _more_ bytes than expected (%"SIZE_P" / %"SIZE_P"), that should not be possible!", (size_p)fr->buffer.fill, (size_p)needed_bytes);
+		}
+	}
+#endif
+	postprocess_buffer(fr);
+}
+
+/*
+	Decode the current frame into the frame structure's buffer, accessible at the location stored in <audio>, with <bytes> bytes available.
+	<num> will contain the last decoded frame number. This function should be called after mpg123_framebyframe_next positioned the stream at a
+	valid mp3 frame. The buffer contents will get lost on the next call to mpg123_framebyframe_next or mpg123_framebyframe_decode.
+	returns
+	MPG123_OK -- successfully decoded or ignored the frame, you get your output data or in case of ignored frames 0 bytes
+	MPG123_DONE -- decoding finished, should not happen
+	MPG123_ERR -- some error occured.
+	MPG123_ERR_NULL -- audio or bytes are not pointing to valid storage addresses
+	MPG123_BAD_HANDLE -- mh has not been initialized
+	MPG123_NO_SPACE -- not enough space in buffer for safe decoding, should not happen
+*/
+int attribute_align_arg mpg123_framebyframe_decode(mpg123_handle *mh, off_t *num, unsigned char **audio, size_t *bytes)
+{
+	if(bytes == NULL) return MPG123_ERR_NULL;
+	if(audio == NULL) return MPG123_ERR_NULL;
+	if(mh == NULL) return MPG123_BAD_HANDLE;
+	if(mh->buffer.size < mh->outblock) return MPG123_NO_SPACE;
+
+	*bytes = 0;
+	mh->buffer.fill = 0; /* always start fresh */
+	if(!mh->to_decode) return MPG123_OK;
+
+	if(num != NULL) *num = mh->num;
+	debug("decoding");
+	decode_the_frame(mh);
+	mh->to_decode = mh->to_ignore = FALSE;
+	mh->buffer.p = mh->buffer.data;
+	FRAME_BUFFERCHECK(mh);
+	*audio = mh->buffer.p;
+	*bytes = mh->buffer.fill;
+	return MPG123_OK;
+}
+
+/*
+	Find, read and parse the next mp3 frame while skipping junk and parsing id3 tags, lame headers, etc.
+	Prepares everything for decoding using mpg123_framebyframe_decode.
+	returns
+	MPG123_OK -- new frame was read and parsed, call mpg123_framebyframe_decode to actually decode
+	MPG123_NEW_FORMAT -- new frame was read, it results in changed output format, call mpg123_framebyframe_decode to actually decode
+	MPG123_BAD_HANDLE -- mh has not been initialized
+	MPG123_NEED_MORE  -- more input data is needed to advance to the next frame. supply more input data using mpg123_feed
+*/
+int attribute_align_arg mpg123_framebyframe_next(mpg123_handle *mh)
+{
+	int b;
+	if(mh == NULL) return MPG123_BAD_HANDLE;
+
+	mh->to_decode = mh->to_ignore = FALSE;
+	mh->buffer.fill = 0;
+
+	b = get_next_frame(mh);
+	if(b < 0) return b;
+	debug1("got next frame, %i", mh->to_decode);
+
+	/* mpg123_framebyframe_decode will return MPG123_OK with 0 bytes decoded if mh->to_decode is 0 */
+	if(!mh->to_decode)
+		return MPG123_OK;
+
+	if(mh->new_format)
+	{
+		debug("notifiying new format");
+		mh->new_format = 0;
+		return MPG123_NEW_FORMAT;
+	}
+
+	return MPG123_OK;
+}
+
+/*
+	Put _one_ decoded frame into the frame structure's buffer, accessible at the location stored in <audio>, with <bytes> bytes available.
+	The buffer contents will be lost on next call to mpg123_decode_frame.
+	MPG123_OK -- successfully decoded the frame, you get your output data
+	MPg123_DONE -- This is it. End.
+	MPG123_ERR -- some error occured...
+	MPG123_NEW_FORMAT -- new frame was read, it results in changed output format -> will be decoded on next call
+	MPG123_NEED_MORE  -- that should not happen as this function is intended for in-library stream reader but if you force it...
+	MPG123_NO_SPACE   -- not enough space in buffer for safe decoding, also should not happen
+
+	num will be updated to the last decoded frame number (may possibly _not_ increase, p.ex. when format changed).
+*/
+int attribute_align_arg mpg123_decode_frame(mpg123_handle *mh, off_t *num, unsigned char **audio, size_t *bytes)
+{
+	if(bytes != NULL) *bytes = 0;
+	if(mh == NULL) return MPG123_ERR;
+	if(mh->buffer.size < mh->outblock) return MPG123_NO_SPACE;
+	mh->buffer.fill = 0; /* always start fresh */
+	while(TRUE)
+	{
+		/* decode if possible */
+		if(mh->to_decode)
+		{
+			if(mh->new_format)
+			{
+				debug("notifiying new format");
+				mh->new_format = 0;
+				return MPG123_NEW_FORMAT;
+			}
+			if(num != NULL) *num = mh->num;
+			debug("decoding");
+
+			decode_the_frame(mh);
+
+			mh->to_decode = mh->to_ignore = FALSE;
+			mh->buffer.p = mh->buffer.data;
+			FRAME_BUFFERCHECK(mh);
+			if(audio != NULL) *audio = mh->buffer.p;
+			if(bytes != NULL) *bytes = mh->buffer.fill;
+
+			return MPG123_OK;
+		}
+		else
+		{
+			int b = get_next_frame(mh);
+			if(b < 0) return b;
+			debug1("got next frame, %i", mh->to_decode);
+		}
+	}
+}
+
+int attribute_align_arg mpg123_read(mpg123_handle *mh, unsigned char *out, size_t size, size_t *done)
+{
+	return mpg123_decode(mh, NULL, 0, out, size, done);
+}
+
+int attribute_align_arg mpg123_feed(mpg123_handle *mh, const unsigned char *in, size_t size)
+{
+	if(mh == NULL) return MPG123_ERR;
+#ifndef NO_FEEDER
+	if(size > 0)
+	{
+		if(in != NULL)
+		{
+			if(feed_more(mh, in, size) != 0) return MPG123_ERR;
+			else
+			{
+				/* The need for more data might have triggered an error.
+				   This one is outdated now with the new data. */
+				if(mh->err == MPG123_ERR_READER) mh->err = MPG123_OK;
+
+				return MPG123_OK;
+			}
+		}
+		else
+		{
+			mh->err = MPG123_NULL_BUFFER;
+			return MPG123_ERR;
+		}
+	}
+	return MPG123_OK;
+#else
+	mh->err = MPG123_MISSING_FEATURE;
+	return MPG123_ERR;
+#endif
+}
+
+/*
+	The old picture:
+	while(1) {
+		len = read(0,buf,16384);
+		if(len <= 0)
+			break;
+		ret = decodeMP3(&mp,buf,len,out,8192,&size);
+		while(ret == MP3_OK) {
+			write(1,out,size);
+			ret = decodeMP3(&mp,NULL,0,out,8192,&size);
+		}
+	}
+*/
+
+int attribute_align_arg mpg123_decode(mpg123_handle *mh, const unsigned char *inmemory, size_t inmemsize, unsigned char *outmemory, size_t outmemsize, size_t *done)
+{
+	int ret = MPG123_OK;
+	size_t mdone = 0;
+
+	if(done != NULL) *done = 0;
+	if(mh == NULL) return MPG123_ERR;
+#ifndef NO_FEEDER
+	if(inmemsize > 0 && mpg123_feed(mh, inmemory, inmemsize) != MPG123_OK)
+	{
+		ret = MPG123_ERR;
+		goto decodeend;
+	}
+	if(outmemory == NULL) outmemsize = 0; /* Not just give error, give chance to get a status message. */
+
+	while(ret == MPG123_OK)
+	{
+		debug4("decode loop, fill %i (%li vs. %li); to_decode: %i", (int)mh->buffer.fill, (long)mh->num, (long)mh->firstframe, mh->to_decode);
+		/* Decode a frame that has been read before.
+		   This only happens when buffer is empty! */
+		if(mh->to_decode)
+		{
+			if(mh->new_format)
+			{
+				debug("notifiying new format");
+				mh->new_format = 0;
+				ret = MPG123_NEW_FORMAT;
+				goto decodeend;
+			}
+			if(mh->buffer.size - mh->buffer.fill < mh->outblock)
+			{
+				ret = MPG123_NO_SPACE;
+				goto decodeend;
+			}
+			decode_the_frame(mh);
+			mh->to_decode = mh->to_ignore = FALSE;
+			mh->buffer.p = mh->buffer.data;
+			debug2("decoded frame %li, got %li samples in buffer", (long)mh->num, (long)(mh->buffer.fill / (samples_to_bytes(mh, 1))));
+			FRAME_BUFFERCHECK(mh);
+		}
+		if(mh->buffer.fill) /* Copy (part of) the decoded data to the caller's buffer. */
+		{
+			/* get what is needed - or just what is there */
+			int a = mh->buffer.fill > (outmemsize - mdone) ? outmemsize - mdone : mh->buffer.fill;
+			debug4("buffer fill: %i; copying %i (%i - %li)", (int)mh->buffer.fill, a, (int)outmemsize, (long)mdone);
+			memcpy(outmemory, mh->buffer.p, a);
+			/* less data in frame buffer, less needed, output pointer increase, more data given... */
+			mh->buffer.fill -= a;
+			outmemory  += a;
+			mdone += a;
+			mh->buffer.p += a;
+			if(!(outmemsize > mdone)) goto decodeend;
+		}
+		else /* If we didn't have data, get a new frame. */
+		{
+			int b = get_next_frame(mh);
+			if(b < 0){ ret = b; goto decodeend; }
+		}
+	}
+decodeend:
+	if(done != NULL) *done = mdone;
+	return ret;
+#else
+	mh->err = MPG123_MISSING_FEATURE;
+	return MPG123_ERR;
+#endif
+}
+
+long attribute_align_arg mpg123_clip(mpg123_handle *mh)
+{
+	long ret = 0;
+
+	if(mh != NULL)
+	{
+		ret = mh->clip;
+		mh->clip = 0;
+	}
+	return ret;
+}
+
+/* Simples: Track needs initializtion if no initial frame has been read yet. */
+#define track_need_init(mh) ((mh)->num < 0)
+
+static int init_track(mpg123_handle *mh)
+{
+	if(track_need_init(mh))
+	{
+		/* Fresh track, need first frame for basic info. */
+		int b = get_next_frame(mh);
+		if(b < 0) return b;
+	}
+	return 0;
+}
+
+int attribute_align_arg mpg123_info(mpg123_handle *mh, struct mpg123_frameinfo *mi)
+{
+	int b;
+
+	if(mh == NULL) return MPG123_ERR;
+	if(mi == NULL)
+	{
+		mh->err = MPG123_ERR_NULL;
+		return MPG123_ERR;
+	}
+	b = init_track(mh);
+	if(b < 0) return b;
+
+	mi->version = mh->mpeg25 ? MPG123_2_5 : (mh->lsf ? MPG123_2_0 : MPG123_1_0);
+	mi->layer = mh->lay;
+	mi->rate = frame_freq(mh);
+	switch(mh->mode)
+	{
+		case 0: mi->mode = MPG123_M_STEREO; break;
+		case 1: mi->mode = MPG123_M_JOINT;  break;
+		case 2: mi->mode = MPG123_M_DUAL;   break;
+		case 3: mi->mode = MPG123_M_MONO;   break;
+		default: error("That mode cannot be!");
+	}
+	mi->mode_ext = mh->mode_ext;
+	mi->framesize = mh->framesize+4; /* Include header. */
+	mi->flags = 0;
+	if(mh->error_protection) mi->flags |= MPG123_CRC;
+	if(mh->copyright)        mi->flags |= MPG123_COPYRIGHT;
+	if(mh->extension)        mi->flags |= MPG123_PRIVATE;
+	if(mh->original)         mi->flags |= MPG123_ORIGINAL;
+	mi->emphasis = mh->emphasis;
+	mi->bitrate  = frame_bitrate(mh);
+	mi->abr_rate = mh->abr_rate;
+	mi->vbr = mh->vbr;
+	return MPG123_OK;
+}
+
+int attribute_align_arg mpg123_getformat(mpg123_handle *mh, long *rate, int *channels, int *encoding)
+{
+	int b;
+
+	if(mh == NULL) return MPG123_ERR;
+	b = init_track(mh);
+	if(b < 0) return b;
+
+	if(rate != NULL) *rate = mh->af.rate;
+	if(channels != NULL) *channels = mh->af.channels;
+	if(encoding != NULL) *encoding = mh->af.encoding;
+	mh->new_format = 0;
+	return MPG123_OK;
+}
+
+off_t attribute_align_arg mpg123_timeframe(mpg123_handle *mh, double seconds)
+{
+	off_t b;
+
+	if(mh == NULL) return MPG123_ERR;
+	b = init_track(mh);
+	if(b<0) return b;
+	return (off_t)(seconds/mpg123_tpf(mh));
+}
+
+/*
+	Now, where are we? We need to know the last decoded frame... and what's left of it in buffer.
+	The current frame number can mean the last decoded frame or the to-be-decoded frame.
+	If mh->to_decode, then mh->num frames have been decoded, the frame mh->num now coming next.
+	If not, we have the possibility of mh->num+1 frames being decoded or nothing at all.
+	Then, there is firstframe...when we didn't reach it yet, then the next data will come from there.
+	mh->num starts with -1
+*/
+off_t attribute_align_arg mpg123_tell(mpg123_handle *mh)
+{
+	if(mh == NULL) return MPG123_ERR;
+	if(track_need_init(mh)) return 0;
+	/* Now we have all the info at hand. */
+	debug5("tell: %li/%i first %li buffer %lu; frame_outs=%li", (long)mh->num, mh->to_decode, (long)mh->firstframe, (unsigned long)mh->buffer.fill, (long)frame_outs(mh, mh->num));
+
+	{ /* Funny block to keep C89 happy. */
+		off_t pos = 0;
+		if((mh->num < mh->firstframe) || (mh->num == mh->firstframe && mh->to_decode))
+		{ /* We are at the beginning, expect output from firstframe on. */
+			pos = frame_outs(mh, mh->firstframe);
+#ifdef GAPLESS
+			pos += mh->firstoff;
+#endif
+		}
+		else if(mh->to_decode)
+		{ /* We start fresh with this frame. Buffer should be empty, but we make sure to count it in.  */
+			pos = frame_outs(mh, mh->num) - bytes_to_samples(mh, mh->buffer.fill);
+		}
+		else
+		{ /* We serve what we have in buffer and then the beginning of next frame... */
+			pos = frame_outs(mh, mh->num+1) - bytes_to_samples(mh, mh->buffer.fill);
+		}
+		/* Substract padding and delay from the beginning. */
+		pos = SAMPLE_ADJUST(mh,pos);
+		/* Negative sample offsets are not right, less than nothing is still nothing. */
+		return pos>0 ? pos : 0;
+	}
+}
+
+off_t attribute_align_arg mpg123_tellframe(mpg123_handle *mh)
+{
+	if(mh == NULL) return MPG123_ERR;
+	if(mh->num < mh->firstframe) return mh->firstframe;
+	if(mh->to_decode) return mh->num;
+	/* Consider firstoff? */
+	return mh->buffer.fill ? mh->num : mh->num + 1;
+}
+
+off_t attribute_align_arg mpg123_tell_stream(mpg123_handle *mh)
+{
+	if(mh == NULL) return MPG123_ERR;
+	/* mh->rd is at least a bad_reader, so no worry. */
+	return mh->rd->tell(mh);
+}
+
+static int do_the_seek(mpg123_handle *mh)
+{
+	int b;
+	off_t fnum = SEEKFRAME(mh);
+	mh->buffer.fill = 0;
+
+	/* If we are inside the ignoreframe - firstframe window, we may get away without actual seeking. */
+	if(mh->num < mh->firstframe)
+	{
+		mh->to_decode = FALSE; /* In any case, don't decode the current frame, perhaps ignore instead. */
+		if(mh->num > fnum) return MPG123_OK;
+	}
+
+	/* If we are already there, we are fine either for decoding or for ignoring. */
+	if(mh->num == fnum && (mh->to_decode || fnum < mh->firstframe)) return MPG123_OK;
+	/* We have the frame before... just go ahead as normal. */
+	if(mh->num == fnum-1)
+	{
+		mh->to_decode = FALSE;
+		return MPG123_OK;
+	}
+
+	/* OK, real seeking follows... clear buffers and go for it. */
+	frame_buffers_reset(mh);
+#ifndef NO_NTOM
+	if(mh->down_sample == 3)
+	{
+		ntom_set_ntom(mh, fnum);
+		debug3("fixed ntom for frame %"OFF_P" to %lu, num=%"OFF_P, (off_p)fnum, mh->ntom_val[0], (off_p)mh->num);
+	}
+#endif
+	b = mh->rd->seek_frame(mh, fnum);
+	if(mh->header_change > 1)
+	{
+		if(decode_update(mh) < 0) return MPG123_ERR;
+		mh->header_change = 0;
+	}
+	debug1("seek_frame returned: %i", b);
+	if(b<0) return b;
+	/* Only mh->to_ignore is TRUE. */
+	if(mh->num < mh->firstframe) mh->to_decode = FALSE;
+
+	mh->playnum = mh->num;
+	return 0;
+}
+
+off_t attribute_align_arg mpg123_seek(mpg123_handle *mh, off_t sampleoff, int whence)
+{
+	int b;
+	off_t pos;
+
+	pos = mpg123_tell(mh); /* adjusted samples */
+	/* pos < 0 also can mean that simply a former seek failed at the lower levels.
+	  In that case, we only allow absolute seeks. */
+	if(pos < 0 && whence != SEEK_SET)
+	{ /* Unless we got the obvious error of NULL handle, this is a special seek failure. */
+		if(mh != NULL) mh->err = MPG123_NO_RELSEEK;
+		return MPG123_ERR;
+	}
+	if((b=init_track(mh)) < 0) return b;
+	switch(whence)
+	{
+		case SEEK_CUR: pos += sampleoff; break;
+		case SEEK_SET: pos  = sampleoff; break;
+		case SEEK_END:
+			/* When we do not know the end already, we can try to find it. */
+			if(mh->track_frames < 1 && (mh->rdat.flags & READER_SEEKABLE))
+			mpg123_scan(mh);
+			if(mh->track_frames > 0) pos = SAMPLE_ADJUST(mh,frame_outs(mh, mh->track_frames)) - sampleoff;
+#ifdef GAPLESS
+			else if(mh->end_os > 0) pos = SAMPLE_ADJUST(mh,mh->end_os) - sampleoff;
+#endif
+			else
+			{
+				mh->err = MPG123_NO_SEEK_FROM_END;
+				return MPG123_ERR;
+			}
+		break;
+		default: mh->err = MPG123_BAD_WHENCE; return MPG123_ERR;
+	}
+	if(pos < 0) pos = 0;
+	/* pos now holds the wanted sample offset in adjusted samples */
+	frame_set_seek(mh, SAMPLE_UNADJUST(mh,pos));
+	pos = do_the_seek(mh);
+	if(pos < 0) return pos;
+
+	return mpg123_tell(mh);
+}
+
+/*
+	A bit more tricky... libmpg123 does not do the seeking itself.
+	All it can do is to ignore frames until the wanted one is there.
+	The caller doesn't know where a specific frame starts and mpg123 also only knows the general region after it scanned the file.
+	Well, it is tricky...
+*/
+off_t attribute_align_arg mpg123_feedseek(mpg123_handle *mh, off_t sampleoff, int whence, off_t *input_offset)
+{
+	int b;
+	off_t pos;
+
+	pos = mpg123_tell(mh); /* adjusted samples */
+	debug3("seek from %li to %li (whence=%i)", (long)pos, (long)sampleoff, whence);
+	/* The special seek error handling does not apply here... there is no lowlevel I/O. */
+	if(pos < 0) return pos; /* mh == NULL is covered in mpg123_tell() */
+#ifndef NO_FEEDER
+	if(input_offset == NULL)
+	{
+		mh->err = MPG123_NULL_POINTER;
+		return MPG123_ERR;
+	}
+
+	if((b=init_track(mh)) < 0) return b; /* May need more to do anything at all. */
+
+	switch(whence)
+	{
+		case SEEK_CUR: pos += sampleoff; break;
+		case SEEK_SET: pos  = sampleoff; break;
+		case SEEK_END:
+			if(mh->track_frames > 0) pos = SAMPLE_ADJUST(mh,frame_outs(mh, mh->track_frames)) - sampleoff;
+#ifdef GAPLESS
+			else if(mh->end_os >= 0) pos = SAMPLE_ADJUST(mh,mh->end_os) - sampleoff;
+#endif
+			else
+			{
+				mh->err = MPG123_NO_SEEK_FROM_END;
+				return MPG123_ERR;
+			}
+		break;
+		default: mh->err = MPG123_BAD_WHENCE; return MPG123_ERR;
+	}
+	if(pos < 0) pos = 0;
+	frame_set_seek(mh, SAMPLE_UNADJUST(mh,pos));
+	pos = SEEKFRAME(mh);
+	mh->buffer.fill = 0;
+
+	/* Shortcuts without modifying input stream. */
+	*input_offset = mh->rdat.buffer.fileoff + mh->rdat.buffer.size;
+	if(mh->num < mh->firstframe) mh->to_decode = FALSE;
+	if(mh->num == pos && mh->to_decode) goto feedseekend;
+	if(mh->num == pos-1) goto feedseekend;
+	/* Whole way. */
+	*input_offset = feed_set_pos(mh, frame_index_find(mh, SEEKFRAME(mh), &pos));
+	mh->num = pos-1; /* The next read frame will have num = pos. */
+	if(*input_offset < 0) return MPG123_ERR;
+
+feedseekend:
+	return mpg123_tell(mh);
+#else
+	mh->err = MPG123_MISSING_FEATURE;
+	return MPG123_ERR;
+#endif
+}
+
+off_t attribute_align_arg mpg123_seek_frame(mpg123_handle *mh, off_t offset, int whence)
+{
+	int b;
+	off_t pos = 0;
+
+	if(mh == NULL) return MPG123_ERR;
+	if((b=init_track(mh)) < 0) return b;
+
+	/* Could play games here with to_decode... */
+	pos = mh->num;
+	switch(whence)
+	{
+		case SEEK_CUR: pos += offset; break;
+		case SEEK_SET: pos  = offset; break;
+		case SEEK_END:
+			if(mh->track_frames > 0) pos = mh->track_frames - offset;
+			else
+			{
+				mh->err = MPG123_NO_SEEK_FROM_END;
+				return MPG123_ERR;
+			}
+		break;
+		default:
+			mh->err = MPG123_BAD_WHENCE;
+			return MPG123_ERR;
+	}
+	if(pos < 0) pos = 0;
+	/* Not limiting the possible position on end for the chance that there might be more to the stream than announced via track_frames. */
+
+	frame_set_frameseek(mh, pos);
+	pos = do_the_seek(mh);
+	if(pos < 0) return pos;
+
+	return mpg123_tellframe(mh);
+}
+
+int attribute_align_arg mpg123_set_filesize(mpg123_handle *mh, off_t size)
+{
+	if(mh == NULL) return MPG123_ERR;
+
+	mh->rdat.filelen = size;
+	return MPG123_OK;
+}
+
+off_t attribute_align_arg mpg123_length(mpg123_handle *mh)
+{
+	int b;
+	off_t length;
+
+	if(mh == NULL) return MPG123_ERR;
+	b = init_track(mh);
+	if(b<0) return b;
+	if(mh->track_samples > -1) length = mh->track_samples;
+	else if(mh->track_frames > 0) length = mh->track_frames*mh->spf;
+	else if(mh->rdat.filelen > 0) /* Let the case of 0 length just fall through. */
+	{
+		/* A bad estimate. Ignoring tags 'n stuff. */
+		double bpf = mh->mean_framesize ? mh->mean_framesize : compute_bpf(mh);
+		length = (off_t)((double)(mh->rdat.filelen)/bpf*mh->spf);
+	}
+	else if(mh->rdat.filelen == 0) return mpg123_tell(mh); /* we could be in feeder mode */
+	else return MPG123_ERR; /* No length info there! */
+
+	debug1("mpg123_length: internal sample length: %"OFF_P, (off_p)length);
+
+	length = frame_ins2outs(mh, length);
+	debug1("mpg123_length: external sample length: %"OFF_P, (off_p)length);
+	length = SAMPLE_ADJUST(mh,length);
+	return length;
+}
+
+int attribute_align_arg mpg123_scan(mpg123_handle *mh)
+{
+	int b;
+	off_t oldpos;
+	off_t track_frames = 0;
+	off_t track_samples = 0;
+
+	if(mh == NULL) return MPG123_ERR;
+	if(!(mh->rdat.flags & READER_SEEKABLE)){ mh->err = MPG123_NO_SEEK; return MPG123_ERR; }
+	/* Scan through the _whole_ file, since the current position is no count but computed assuming constant samples per frame. */
+	/* Also, we can just keep the current buffer and seek settings. Just operate on input frames here. */
+	debug("issuing scan");
+	b = init_track(mh); /* mh->num >= 0 !! */
+	if(b<0)
+	{
+		if(b == MPG123_DONE) return MPG123_OK;
+		else return MPG123_ERR; /* Must be error here, NEED_MORE is not for seekable streams. */
+	}
+	oldpos = mpg123_tell(mh);
+	b = mh->rd->seek_frame(mh, 0);
+	if(b<0 || mh->num != 0) return MPG123_ERR;
+	/* One frame must be there now. */
+	track_frames = 1;
+	track_samples = mh->spf; /* Internal samples. */
+	debug("TODO: We should disable gapless code when encountering inconsistent mh->spf!");
+	debug("      ... at least unset MPG123_ACCURATE.");
+	/* Do not increment mh->track_frames in the loop as tha would confuse Frankenstein detection. */
+	while(read_frame(mh) == 1)
+	{
+		++track_frames;
+		track_samples += mh->spf;
+	}
+	mh->track_frames = track_frames;
+	mh->track_samples = track_samples;
+	debug2("Scanning yielded %"OFF_P" track samples, %"OFF_P" frames.", (off_p)mh->track_samples, (off_p)mh->track_frames);
+#ifdef GAPLESS
+	/* Also, think about usefulness of that extra value track_samples ... it could be used for consistency checking. */
+	if(mh->p.flags & MPG123_GAPLESS) frame_gapless_update(mh, mh->track_samples);
+#endif
+	return mpg123_seek(mh, oldpos, SEEK_SET) >= 0 ? MPG123_OK : MPG123_ERR;
+}
+
+int attribute_align_arg mpg123_meta_check(mpg123_handle *mh)
+{
+	if(mh != NULL) return mh->metaflags;
+	else return 0;
+}
+
+void attribute_align_arg mpg123_meta_free(mpg123_handle *mh)
+{
+	if(mh == NULL) return;
+
+	reset_id3(mh);
+	reset_icy(&mh->icy);
+}
+
+int attribute_align_arg mpg123_id3(mpg123_handle *mh, mpg123_id3v1 **v1, mpg123_id3v2 **v2)
+{
+	if(v1 != NULL) *v1 = NULL;
+	if(v2 != NULL) *v2 = NULL;
+	if(mh == NULL) return MPG123_ERR;
+
+	if(mh->metaflags & MPG123_ID3)
+	{
+		id3_link(mh);
+		if(v1 != NULL && mh->rdat.flags & READER_ID3TAG) *v1 = (mpg123_id3v1*) mh->id3buf;
+		if(v2 != NULL)
+#ifdef NO_ID3V2
+		*v2 = NULL;
+#else
+		*v2 = &mh->id3v2;
+#endif
+
+		mh->metaflags |= MPG123_ID3;
+		mh->metaflags &= ~MPG123_NEW_ID3;
+	}
+	return MPG123_OK;
+}
+
+int attribute_align_arg mpg123_icy(mpg123_handle *mh, char **icy_meta)
+{
+	if(mh == NULL) return MPG123_ERR;
+#ifndef NO_ICY
+	if(icy_meta == NULL)
+	{
+		mh->err = MPG123_NULL_POINTER;
+		return MPG123_ERR;
+	}
+	*icy_meta = NULL;
+
+	if(mh->metaflags & MPG123_ICY)
+	{
+		*icy_meta = mh->icy.data;
+		mh->metaflags |= MPG123_ICY;
+		mh->metaflags &= ~MPG123_NEW_ICY;
+	}
+	return MPG123_OK;
+#else
+	mh->err = MPG123_MISSING_FEATURE;
+	return MPG123_ERR;
+#endif
+}
+
+char* attribute_align_arg mpg123_icy2utf8(const char* icy_text)
+{
+#ifndef NO_ICY
+	return icy2utf8(icy_text, 0);
+#else
+	return NULL;
+#endif
+}
+
+/* That one is always defined... it's not worth it to remove it for NO_ID3V2. */
+enum mpg123_text_encoding attribute_align_arg mpg123_enc_from_id3(unsigned char id3_enc_byte)
+{
+	switch(id3_enc_byte)
+	{
+		case mpg123_id3_latin1:   return mpg123_text_latin1;
+		case mpg123_id3_utf16bom: return mpg123_text_utf16bom; /* ID3v2.3 has UCS-2 with BOM here. */
+		case mpg123_id3_utf16be:  return mpg123_text_utf16be;
+		case mpg123_id3_utf8:     return mpg123_text_utf8;
+		default: return mpg123_text_unknown;
+	}
+}
+
+#ifndef NO_STRING
+int mpg123_store_utf8(mpg123_string *sb, enum mpg123_text_encoding enc, const unsigned char *source, size_t source_size)
+{
+	switch(enc)
+	{
+#ifndef NO_ID3V2
+		/* The encodings we get from ID3v2 tags. */
+		case mpg123_text_utf8:
+			id3_to_utf8(sb, mpg123_id3_utf8, source, source_size, 0);
+		break;
+		case mpg123_text_latin1:
+			id3_to_utf8(sb, mpg123_id3_latin1, source, source_size, 0);
+		break;
+		case mpg123_text_utf16bom:
+		case mpg123_text_utf16:
+			id3_to_utf8(sb, mpg123_id3_utf16bom, source, source_size, 0);
+		break;
+		/* Special because one cannot skip zero bytes here. */
+		case mpg123_text_utf16be:
+			id3_to_utf8(sb, mpg123_id3_utf16be, source, source_size, 0);
+		break;
+#endif
+#ifndef NO_ICY
+		/* ICY encoding... */
+		case mpg123_text_icy:
+		case mpg123_text_cp1252:
+		{
+			mpg123_free_string(sb);
+			/* Paranoia: Make sure that the string ends inside the buffer... */
+			if(source[source_size-1] == 0)
+			{
+				/* Convert from ICY encoding... with force applied or not. */
+				char *tmpstring = icy2utf8((const char*)source, enc == mpg123_text_cp1252 ? 1 : 0);
+				if(tmpstring != NULL)
+				{
+					mpg123_set_string(sb, tmpstring);
+					free(tmpstring);
+				}
+			}
+		}
+		break;
+#endif
+		default:
+			mpg123_free_string(sb);
+	}
+	/* At least a trailing null of some form should be there... */
+	return (sb->fill > 0) ? 1 : 0;
+}
+#endif
+
+int attribute_align_arg mpg123_index(mpg123_handle *mh, off_t **offsets, off_t *step, size_t *fill)
+{
+	if(mh == NULL) return MPG123_ERR;
+	if(offsets == NULL || step == NULL || fill == NULL)
+	{
+		mh->err = MPG123_BAD_INDEX_PAR;
+		return MPG123_ERR;
+	}
+#ifdef FRAME_INDEX
+	*offsets = mh->index.data;
+	*step    = mh->index.step;
+	*fill    = mh->index.fill;
+#else
+	*offsets = NULL;
+	*step    = 0;
+	*fill    = 0;
+#endif
+	return MPG123_OK;
+}
+
+int attribute_align_arg mpg123_set_index(mpg123_handle *mh, off_t *offsets, off_t step, size_t fill)
+{
+	if(mh == NULL) return MPG123_ERR;
+#ifdef FRAME_INDEX
+	if(step == 0)
+	{
+		mh->err = MPG123_BAD_INDEX_PAR;
+		return MPG123_ERR;
+	}
+	if(fi_set(&mh->index, offsets, step, fill) == -1)
+	{
+		mh->err = MPG123_OUT_OF_MEM;
+		return MPG123_ERR;
+	}
+	return MPG123_OK;
+#else
+	mh->err = MPG123_MISSING_FEATURE;
+	return MPG123_ERR;
+#endif
+}
+
+int attribute_align_arg mpg123_close(mpg123_handle *mh)
+{
+	if(mh == NULL) return MPG123_ERR;
+
+	/* mh->rd is never NULL! */
+	if(mh->rd->close != NULL) mh->rd->close(mh);
+
+	if(mh->new_format)
+	{
+		debug("Hey, we are closing a track before the new format has been queried...");
+		invalidate_format(&mh->af);
+		mh->new_format = 0;
+	}
+	/* Always reset the frame buffers on close, so we cannot forget it in funky opening routines (wrappers, even). */
+	frame_reset(mh);
+	return MPG123_OK;
+}
+
+void attribute_align_arg mpg123_delete(mpg123_handle *mh)
+{
+	if(mh != NULL)
+	{
+		mpg123_close(mh);
+		frame_exit(mh); /* free buffers in frame */
+		free(mh); /* free struct; cast? */
+	}
+}
+
+static const char *mpg123_error[] =
+{
+	"No error... (code 0)",
+	"Unable to set up output format! (code 1)",
+	"Invalid channel number specified. (code 2)",
+	"Invalid sample rate specified. (code 3)",
+	"Unable to allocate memory for 16 to 8 converter table! (code 4)",
+	"Bad parameter id! (code 5)",
+	"Bad buffer given -- invalid pointer or too small size. (code 6)",
+	"Out of memory -- some malloc() failed. (code 7)",
+	"You didn't initialize the library! (code 8)",
+	"Invalid decoder choice. (code 9)",
+	"Invalid mpg123 handle. (code 10)",
+	"Unable to initialize frame buffers (out of memory?)! (code 11)",
+	"Invalid RVA mode. (code 12)",
+	"This build doesn't support gapless decoding. (code 13)",
+	"Not enough buffer space. (code 14)",
+	"Incompatible numeric data types. (code 15)",
+	"Bad equalizer band. (code 16)",
+	"Null pointer given where valid storage address needed. (code 17)",
+	"Error reading the stream. (code 18)",
+	"Cannot seek from end (end is not known). (code 19)",
+	"Invalid 'whence' for seek function. (code 20)",
+	"Build does not support stream timeouts. (code 21)",
+	"File access error. (code 22)",
+	"Seek not supported by stream. (code 23)",
+	"No stream opened. (code 24)",
+	"Bad parameter handle. (code 25)",
+	"Invalid parameter addresses for index retrieval. (code 26)",
+	"Lost track in the bytestream and did not attempt resync. (code 27)",
+	"Failed to find valid MPEG data within limit on resync. (code 28)",
+	"No 8bit encoding possible. (code 29)",
+	"Stack alignment is not good. (code 30)",
+	"You gave me a NULL buffer? (code 31)",
+	"File position is screwed up, please do an absolute seek (code 32)",
+	"Inappropriate NULL-pointer provided.",
+	"Bad key value given.",
+	"There is no frame index (disabled in this build).",
+	"Frame index operation failed.",
+	"Decoder setup failed (invalid combination of settings?)",
+	"Feature not in this build."
+	,"Some bad value has been provided."
+	,"Low-level seeking has failed (call to lseek(), usually)."
+	,"Custom I/O obviously not prepared."
+	,"Overflow in LFS (large file support) conversion."
+	,"Overflow in integer conversion."
+};
+
+const char* attribute_align_arg mpg123_plain_strerror(int errcode)
+{
+	if(errcode >= 0 && errcode < sizeof(mpg123_error)/sizeof(char*))
+	return mpg123_error[errcode];
+	else switch(errcode)
+	{
+		case MPG123_ERR:
+			return "A generic mpg123 error.";
+		case MPG123_DONE:
+			return "Message: I am done with this track.";
+		case MPG123_NEED_MORE:
+			return "Message: Feed me more input data!";
+		case MPG123_NEW_FORMAT:
+			return "Message: Prepare for a changed audio format (query the new one)!";
+		default:
+			return "I have no idea - an unknown error code!";
+	}
+}
+
+int attribute_align_arg mpg123_errcode(mpg123_handle *mh)
+{
+	if(mh != NULL) return mh->err;
+	return MPG123_BAD_HANDLE;
+}
+
+const char* attribute_align_arg mpg123_strerror(mpg123_handle *mh)
+{
+	return mpg123_plain_strerror(mpg123_errcode(mh));
+}
Index: include/reactos/libs/libmpg123/Makefile.am
===================================================================
--- include/reactos/libs/libmpg123/Makefile.am	(revision 0)
+++ include/reactos/libs/libmpg123/Makefile.am	(working copy)
@@ -0,0 +1,195 @@
+## Makefile.am: produce Makefile.in from this
+
+## copyright by the mpg123 project - free software under the terms of the LGPL 2.1
+## see COPYING and AUTHORS files in distribution or http://mpg123.org
+## initially written by Nicholas J. Humfrey
+
+#AM_CFLAGS = @AUDIO_CFLAGS@
+#AM_LDFLAGS = 
+AM_CPPFLAGS = -I$(top_srcdir)/src -I$(top_srcdir)/src/libmpg123
+
+EXTRA_DIST = mpg123.h.in
+
+EXTRA_PROGRAMS = testcpu
+testcpu_dependencies = getcpuflags.$(OBJEXT)
+testcpu_sources = testcpu.c
+testcpu_LDADD = getcpuflags.$(OBJEXT)
+
+
+CLEANFILES = *.a
+
+
+# The library can have different names, depending on largefile setup.
+# Libtool macros think they're smart. Because of that mpg123.la does not work, it must be libmpg123.la .
+lib_LTLIBRARIES = libmpg123.la
+nodist_include_HEADERS = mpg123.h
+
+libmpg123_la_LDFLAGS = -no-undefined -version-info @LIBMPG123_VERSION@ -export-symbols-regex '^mpg123_'
+libmpg123_la_LIBADD = @DECODER_LOBJ@ @LFS_LOBJ@
+libmpg123_la_DEPENDENCIES = @DECODER_LOBJ@ @LFS_LOBJ@
+
+libmpg123_la_SOURCES = \
+	intsym.h \
+	compat.c \
+	compat.h \
+	mpeghead.h \
+	parse.c \
+	parse.h \
+	frame.c \
+	format.c \
+	frame.h \
+	reader.h \
+	debug.h \
+	decode.h \
+	sample.h \
+	dct64.c \
+	synth.h \
+	synth_mono.h \
+	synth_ntom.h \
+	synth_8bit.h \
+	synths.h \
+	equalizer.c \
+	huffman.h \
+	newhuffman.h \
+	icy.h \
+	icy2utf8.h \
+	id3.h \
+	id3.c \
+	true.h \
+	getbits.h \
+	optimize.h \
+	optimize.c \
+	readers.c \
+	tabinit.c \
+	libmpg123.c \
+	gapless.h \
+	mpg123lib_intern.h \
+	abi_align.h \
+	mangle.h \
+	getcpuflags.h \
+	index.h \
+	index.c
+
+EXTRA_libmpg123_la_SOURCES = \
+	lfs_alias.c \
+	lfs_wrap.c \
+	icy.c \
+	icy2utf8.c \
+	l2tables.h \
+	layer1.c \
+	layer2.c \
+	layer3.c \
+	dither.h \
+	dither.c \
+	feature.c \
+	dct36_3dnowext.S \
+	dct36_3dnow.S \
+	dct36_sse.S \
+	dct36_x86_64.S \
+	dct36_avx.S \
+	dct36_neon.S \
+	dct36_neon64.S \
+	dct64_3dnowext.S \
+	dct64_3dnow.S \
+	dct64_altivec.c \
+	dct64_i386.c \
+	dct64_i486.c \
+	dct64_mmx.S \
+	dct64_sse.S \
+	dct64_sse_float.S \
+	dct64_x86_64.S \
+	dct64_x86_64_float.S \
+	dct64_neon.S \
+	dct64_neon_float.S \
+	dct64_neon64.S \
+	dct64_neon64_float.S \
+	dct64_avx.S \
+	dct64_avx_float.S \
+	synth_3dnowext.S \
+	synth_3dnow.S \
+	synth_altivec.c \
+	synth_i486.c \
+	synth_i586_dither.S \
+	synth_i586.S \
+	synth_mmx.S \
+	synth_sse3d.h \
+	synth_sse.S \
+	synth_sse_float.S \
+	synth_sse_s32.S \
+	synth_sse_accurate.S \
+	synth_stereo_sse_float.S \
+	synth_stereo_sse_s32.S \
+	synth_stereo_sse_accurate.S \
+	synth_x86_64.S \
+	synth_x86_64_float.S \
+	synth_x86_64_s32.S \
+	synth_x86_64_accurate.S \
+	synth_stereo_x86_64.S \
+	synth_stereo_x86_64_float.S \
+	synth_stereo_x86_64_s32.S \
+	synth_stereo_x86_64_accurate.S \
+	synth_arm.S \
+	synth_arm_accurate.S \
+	synth_neon.S \
+	synth_neon_float.S \
+	synth_neon_s32.S \
+	synth_neon_accurate.S \
+	synth_stereo_neon.S \
+	synth_stereo_neon_float.S \
+	synth_stereo_neon_s32.S \
+	synth_stereo_neon_accurate.S \
+	synth_neon64.S \
+	synth_neon64_float.S \
+	synth_neon64_s32.S \
+	synth_neon64_accurate.S \
+	synth_stereo_neon64.S \
+	synth_stereo_neon64_float.S \
+	synth_stereo_neon64_s32.S \
+	synth_stereo_neon64_accurate.S \
+	synth_stereo_avx.S \
+	synth_stereo_avx_float.S \
+	synth_stereo_avx_s32.S \
+	synth_stereo_avx_accurate.S \
+	ntom.c \
+	synth.c \
+	synth_8bit.c \
+	synth_real.c \
+	synth_s32.c \
+	equalizer_3dnow.S \
+	tabinit_mmx.S \
+	stringbuf.c \
+	getcpuflags.S \
+	getcpuflags_x86_64.S \
+	getcpuflags_arm.c \
+	check_neon.S \
+	l12_integer_tables.h \
+	l3_integer_tables.h
+
+if USE_YASM_FOR_AVX
+## Override rules for the sources that should be assembled with yasm
+
+AVX_SRCS = \
+	dct36_avx.S \
+	dct64_avx.S \
+	dct64_avx_float.S \
+	synth_stereo_avx.S \
+	synth_stereo_avx_float.S \
+	synth_stereo_avx_s32.S \
+	synth_stereo_avx_accurate.S
+
+AVX_OBJS = $(AVX_SRCS:.S=.@OBJEXT@)
+
+ASM_DEPS = \
+	mangle.h \
+	$(top_builddir)/src/config.h \
+	intsym.h
+
+$(AVX_OBJS): %.@OBJEXT@: %.S $(ASM_DEPS)
+	$(CPP) $(DEFAULT_INCLUDES) $(INCLUDES) -DASMALIGN_BALIGN $< | @YASM@ - @YASMFLAGS@ @YASM_FORMAT@ -o $@
+
+$(AVX_OBJS:.@OBJEXT@=.lo): %.lo: %.@OBJEXT@
+	@echo "# Generated by ltmain.sh (GNU libtool)" >$@
+	@echo "pic_object='$<'" >>$@
+	@echo "non_pic_object='$<'" >>$@
+
+endif
Index: include/reactos/libs/libmpg123/Makefile.in
===================================================================
--- include/reactos/libs/libmpg123/Makefile.in	(revision 0)
+++ include/reactos/libs/libmpg123/Makefile.in	(working copy)
@@ -0,0 +1,1069 @@
+# Makefile.in generated by automake 1.14 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+EXTRA_PROGRAMS = testcpu$(EXEEXT)
+subdir = src/libmpg123
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+	$(srcdir)/mpg123.h.in $(top_srcdir)/build/depcomp
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/addrconfig.m4 \
+	$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \
+	$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \
+	$(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/src/config.h
+CONFIG_CLEAN_FILES = mpg123.h
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"
+LTLIBRARIES = $(lib_LTLIBRARIES)
+am_libmpg123_la_OBJECTS = compat.lo parse.lo frame.lo format.lo \
+	dct64.lo equalizer.lo id3.lo optimize.lo readers.lo tabinit.lo \
+	libmpg123.lo index.lo
+libmpg123_la_OBJECTS = $(am_libmpg123_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+libmpg123_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(libmpg123_la_LDFLAGS) $(LDFLAGS) -o $@
+testcpu_SOURCES = testcpu.c
+testcpu_OBJECTS = testcpu.$(OBJEXT)
+testcpu_DEPENDENCIES = getcpuflags.$(OBJEXT)
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src
+depcomp = $(SHELL) $(top_srcdir)/build/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS)
+LTCPPASCOMPILE = $(LIBTOOL) $(AM_V_lt) $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CCASFLAGS) $(CCASFLAGS)
+AM_V_CPPAS = $(am__v_CPPAS_@AM_V@)
+am__v_CPPAS_ = $(am__v_CPPAS_@AM_DEFAULT_V@)
+am__v_CPPAS_0 = @echo "  CPPAS   " $@;
+am__v_CPPAS_1 = 
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(libmpg123_la_SOURCES) $(EXTRA_libmpg123_la_SOURCES) \
+	testcpu.c
+DIST_SOURCES = $(libmpg123_la_SOURCES) $(EXTRA_libmpg123_la_SOURCES) \
+	testcpu.c
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+HEADERS = $(nodist_include_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AIX_CFLAGS = @AIX_CFLAGS@
+AIX_LDFLAGS = @AIX_LDFLAGS@
+AIX_LIBS = @AIX_LIBS@
+ALIB_CFLAGS = @ALIB_CFLAGS@
+ALIB_LDFLAGS = @ALIB_LDFLAGS@
+ALIB_LIBS = @ALIB_LIBS@
+ALSA_CFLAGS = @ALSA_CFLAGS@
+ALSA_LDFLAGS = @ALSA_LDFLAGS@
+ALSA_LIBS = @ALSA_LIBS@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+API_VERSION = @API_VERSION@
+AR = @AR@
+ARTS_CFLAGS = @ARTS_CFLAGS@
+ARTS_LDFLAGS = @ARTS_LDFLAGS@
+ARTS_LIBS = @ARTS_LIBS@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+COREAUDIO_CFLAGS = @COREAUDIO_CFLAGS@
+COREAUDIO_LDFLAGS = @COREAUDIO_LDFLAGS@
+COREAUDIO_LIBS = @COREAUDIO_LIBS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DECODER_LOBJ = @DECODER_LOBJ@
+DECODER_OBJ = @DECODER_OBJ@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMMY_CFLAGS = @DUMMY_CFLAGS@
+DUMMY_LDFLAGS = @DUMMY_LDFLAGS@
+DUMMY_LIBS = @DUMMY_LIBS@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ESD_CFLAGS = @ESD_CFLAGS@
+ESD_LDFLAGS = @ESD_LDFLAGS@
+ESD_LIBS = @ESD_LIBS@
+EXEC_LT_LDFLAGS = @EXEC_LT_LDFLAGS@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+HP_CFLAGS = @HP_CFLAGS@
+HP_LDFLAGS = @HP_LDFLAGS@
+HP_LIBS = @HP_LIBS@
+INCLUDE_STDIO_H = @INCLUDE_STDIO_H@
+INCLUDE_STDLIB_H = @INCLUDE_STDLIB_H@
+INCLUDE_SYS_TYPE_H = @INCLUDE_SYS_TYPE_H@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JACK_CFLAGS = @JACK_CFLAGS@
+JACK_LDFLAGS = @JACK_LDFLAGS@
+JACK_LIBS = @JACK_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LFS_LOBJ = @LFS_LOBJ@
+LIBMPG123_VERSION = @LIBMPG123_VERSION@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_LDFLAGS = @LT_LDFLAGS@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MINT_CFLAGS = @MINT_CFLAGS@
+MINT_LDFLAGS = @MINT_LDFLAGS@
+MINT_LIBS = @MINT_LIBS@
+MKDIR_P = @MKDIR_P@
+MODULE_OBJ = @MODULE_OBJ@
+NAS_CFLAGS = @NAS_CFLAGS@
+NAS_LDFLAGS = @NAS_LDFLAGS@
+NAS_LIBS = @NAS_LIBS@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OPENAL_CFLAGS = @OPENAL_CFLAGS@
+OPENAL_LDFLAGS = @OPENAL_LDFLAGS@
+OPENAL_LIBS = @OPENAL_LIBS@
+OS2_CFLAGS = @OS2_CFLAGS@
+OS2_LDFLAGS = @OS2_LDFLAGS@
+OS2_LIBS = @OS2_LIBS@
+OSS_CFLAGS = @OSS_CFLAGS@
+OSS_LDFLAGS = @OSS_LDFLAGS@
+OSS_LIBS = @OSS_LIBS@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+OUTPUT_CFLAGS = @OUTPUT_CFLAGS@
+OUTPUT_LDFLAGS = @OUTPUT_LDFLAGS@
+OUTPUT_LIBS = @OUTPUT_LIBS@
+OUTPUT_MOD = @OUTPUT_MOD@
+OUTPUT_OBJ = @OUTPUT_OBJ@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PORTAUDIO_CFLAGS = @PORTAUDIO_CFLAGS@
+PORTAUDIO_LDFLAGS = @PORTAUDIO_LDFLAGS@
+PORTAUDIO_LIBS = @PORTAUDIO_LIBS@
+PULSE_CFLAGS = @PULSE_CFLAGS@
+PULSE_LDFLAGS = @PULSE_LDFLAGS@
+PULSE_LIBS = @PULSE_LIBS@
+QSA_CFLAGS = @QSA_CFLAGS@
+QSA_LDFLAGS = @QSA_LDFLAGS@
+QSA_LIBS = @QSA_LIBS@
+RANLIB = @RANLIB@
+SDL_CFLAGS = @SDL_CFLAGS@
+SDL_LDFLAGS = @SDL_LDFLAGS@
+SDL_LIBS = @SDL_LIBS@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SGI_CFLAGS = @SGI_CFLAGS@
+SGI_LDFLAGS = @SGI_LDFLAGS@
+SGI_LIBS = @SGI_LIBS@
+SHELL = @SHELL@
+SNDIO_CFLAGS = @SNDIO_CFLAGS@
+SNDIO_LDFLAGS = @SNDIO_LDFLAGS@
+SNDIO_LIBS = @SNDIO_LIBS@
+STRIP = @STRIP@
+SUN_CFLAGS = @SUN_CFLAGS@
+SUN_LDFLAGS = @SUN_LDFLAGS@
+SUN_LIBS = @SUN_LIBS@
+TINYALSA_CFLAGS = @TINYALSA_CFLAGS@
+TINYALSA_LDFLAGS = @TINYALSA_LDFLAGS@
+TINYALSA_LIBS = @TINYALSA_LIBS@
+VERSION = @VERSION@
+WIN32_CFLAGS = @WIN32_CFLAGS@
+WIN32_LDFLAGS = @WIN32_LDFLAGS@
+WIN32_LIBS = @WIN32_LIBS@
+WIN32_WASAPI_CFLAGS = @WIN32_WASAPI_CFLAGS@
+WIN32_WASAPI_LDFLAGS = @WIN32_WASAPI_LDFLAGS@
+WIN32_WASAPI_LIBS = @WIN32_WASAPI_LIBS@
+YASM = @YASM@
+YASMFLAGS = @YASMFLAGS@
+YASM_FORMAT = @YASM_FORMAT@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+
+#AM_CFLAGS = @AUDIO_CFLAGS@
+#AM_LDFLAGS = 
+AM_CPPFLAGS = -I$(top_srcdir)/src -I$(top_srcdir)/src/libmpg123
+EXTRA_DIST = mpg123.h.in
+testcpu_dependencies = getcpuflags.$(OBJEXT)
+testcpu_sources = testcpu.c
+testcpu_LDADD = getcpuflags.$(OBJEXT)
+CLEANFILES = *.a
+
+# The library can have different names, depending on largefile setup.
+# Libtool macros think they're smart. Because of that mpg123.la does not work, it must be libmpg123.la .
+lib_LTLIBRARIES = libmpg123.la
+nodist_include_HEADERS = mpg123.h
+libmpg123_la_LDFLAGS = -no-undefined -version-info @LIBMPG123_VERSION@ -export-symbols-regex '^mpg123_'
+libmpg123_la_LIBADD = @DECODER_LOBJ@ @LFS_LOBJ@
+libmpg123_la_DEPENDENCIES = @DECODER_LOBJ@ @LFS_LOBJ@
+libmpg123_la_SOURCES = \
+	intsym.h \
+	compat.c \
+	compat.h \
+	mpeghead.h \
+	parse.c \
+	parse.h \
+	frame.c \
+	format.c \
+	frame.h \
+	reader.h \
+	debug.h \
+	decode.h \
+	sample.h \
+	dct64.c \
+	synth.h \
+	synth_mono.h \
+	synth_ntom.h \
+	synth_8bit.h \
+	synths.h \
+	equalizer.c \
+	huffman.h \
+	newhuffman.h \
+	icy.h \
+	icy2utf8.h \
+	id3.h \
+	id3.c \
+	true.h \
+	getbits.h \
+	optimize.h \
+	optimize.c \
+	readers.c \
+	tabinit.c \
+	libmpg123.c \
+	gapless.h \
+	mpg123lib_intern.h \
+	abi_align.h \
+	mangle.h \
+	getcpuflags.h \
+	index.h \
+	index.c
+
+EXTRA_libmpg123_la_SOURCES = \
+	lfs_alias.c \
+	lfs_wrap.c \
+	icy.c \
+	icy2utf8.c \
+	l2tables.h \
+	layer1.c \
+	layer2.c \
+	layer3.c \
+	dither.h \
+	dither.c \
+	feature.c \
+	dct36_3dnowext.S \
+	dct36_3dnow.S \
+	dct36_sse.S \
+	dct36_x86_64.S \
+	dct36_avx.S \
+	dct36_neon.S \
+	dct36_neon64.S \
+	dct64_3dnowext.S \
+	dct64_3dnow.S \
+	dct64_altivec.c \
+	dct64_i386.c \
+	dct64_i486.c \
+	dct64_mmx.S \
+	dct64_sse.S \
+	dct64_sse_float.S \
+	dct64_x86_64.S \
+	dct64_x86_64_float.S \
+	dct64_neon.S \
+	dct64_neon_float.S \
+	dct64_neon64.S \
+	dct64_neon64_float.S \
+	dct64_avx.S \
+	dct64_avx_float.S \
+	synth_3dnowext.S \
+	synth_3dnow.S \
+	synth_altivec.c \
+	synth_i486.c \
+	synth_i586_dither.S \
+	synth_i586.S \
+	synth_mmx.S \
+	synth_sse3d.h \
+	synth_sse.S \
+	synth_sse_float.S \
+	synth_sse_s32.S \
+	synth_sse_accurate.S \
+	synth_stereo_sse_float.S \
+	synth_stereo_sse_s32.S \
+	synth_stereo_sse_accurate.S \
+	synth_x86_64.S \
+	synth_x86_64_float.S \
+	synth_x86_64_s32.S \
+	synth_x86_64_accurate.S \
+	synth_stereo_x86_64.S \
+	synth_stereo_x86_64_float.S \
+	synth_stereo_x86_64_s32.S \
+	synth_stereo_x86_64_accurate.S \
+	synth_arm.S \
+	synth_arm_accurate.S \
+	synth_neon.S \
+	synth_neon_float.S \
+	synth_neon_s32.S \
+	synth_neon_accurate.S \
+	synth_stereo_neon.S \
+	synth_stereo_neon_float.S \
+	synth_stereo_neon_s32.S \
+	synth_stereo_neon_accurate.S \
+	synth_neon64.S \
+	synth_neon64_float.S \
+	synth_neon64_s32.S \
+	synth_neon64_accurate.S \
+	synth_stereo_neon64.S \
+	synth_stereo_neon64_float.S \
+	synth_stereo_neon64_s32.S \
+	synth_stereo_neon64_accurate.S \
+	synth_stereo_avx.S \
+	synth_stereo_avx_float.S \
+	synth_stereo_avx_s32.S \
+	synth_stereo_avx_accurate.S \
+	ntom.c \
+	synth.c \
+	synth_8bit.c \
+	synth_real.c \
+	synth_s32.c \
+	equalizer_3dnow.S \
+	tabinit_mmx.S \
+	stringbuf.c \
+	getcpuflags.S \
+	getcpuflags_x86_64.S \
+	getcpuflags_arm.c \
+	check_neon.S \
+	l12_integer_tables.h \
+	l3_integer_tables.h
+
+@USE_YASM_FOR_AVX_TRUE@AVX_SRCS = \
+@USE_YASM_FOR_AVX_TRUE@	dct36_avx.S \
+@USE_YASM_FOR_AVX_TRUE@	dct64_avx.S \
+@USE_YASM_FOR_AVX_TRUE@	dct64_avx_float.S \
+@USE_YASM_FOR_AVX_TRUE@	synth_stereo_avx.S \
+@USE_YASM_FOR_AVX_TRUE@	synth_stereo_avx_float.S \
+@USE_YASM_FOR_AVX_TRUE@	synth_stereo_avx_s32.S \
+@USE_YASM_FOR_AVX_TRUE@	synth_stereo_avx_accurate.S
+
+@USE_YASM_FOR_AVX_TRUE@AVX_OBJS = $(AVX_SRCS:.S=.@OBJEXT@)
+@USE_YASM_FOR_AVX_TRUE@ASM_DEPS = \
+@USE_YASM_FOR_AVX_TRUE@	mangle.h \
+@USE_YASM_FOR_AVX_TRUE@	$(top_builddir)/src/config.h \
+@USE_YASM_FOR_AVX_TRUE@	intsym.h
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .S .c .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/libmpg123/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu src/libmpg123/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+mpg123.h: $(top_builddir)/config.status $(srcdir)/mpg123.h.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+	@$(NORMAL_INSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	list2=; for p in $$list; do \
+	  if test -f $$p; then \
+	    list2="$$list2 $$p"; \
+	  else :; fi; \
+	done; \
+	test -z "$$list2" || { \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
+	}
+
+uninstall-libLTLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	for p in $$list; do \
+	  $(am__strip_dir) \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
+	done
+
+clean-libLTLIBRARIES:
+	-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+	@list='$(lib_LTLIBRARIES)'; \
+	locs=`for p in $$list; do echo $$p; done | \
+	      sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+	      sort -u`; \
+	test -z "$$locs" || { \
+	  echo rm -f $${locs}; \
+	  rm -f $${locs}; \
+	}
+
+libmpg123.la: $(libmpg123_la_OBJECTS) $(libmpg123_la_DEPENDENCIES) $(EXTRA_libmpg123_la_DEPENDENCIES) 
+	$(AM_V_CCLD)$(libmpg123_la_LINK) -rpath $(libdir) $(libmpg123_la_OBJECTS) $(libmpg123_la_LIBADD) $(LIBS)
+
+testcpu$(EXEEXT): $(testcpu_OBJECTS) $(testcpu_DEPENDENCIES) $(EXTRA_testcpu_DEPENDENCIES) 
+	@rm -f testcpu$(EXEEXT)
+	$(AM_V_CCLD)$(LINK) $(testcpu_OBJECTS) $(testcpu_LDADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/check_neon.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compat.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct36_3dnow.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct36_3dnowext.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct36_avx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct36_neon.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct36_neon64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct36_sse.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct36_x86_64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_3dnow.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_3dnowext.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_altivec.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_avx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_avx_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_i386.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_i486.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_mmx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_neon.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_neon64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_neon64_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_neon_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_sse.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_sse_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_x86_64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dct64_x86_64_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dither.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/equalizer.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/equalizer_3dnow.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/feature.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/format.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/frame.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getcpuflags.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getcpuflags_arm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getcpuflags_x86_64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icy.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/icy2utf8.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/id3.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/index.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/layer1.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/layer2.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/layer3.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lfs_alias.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lfs_wrap.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libmpg123.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ntom.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/optimize.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parse.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/readers.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stringbuf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_3dnow.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_3dnowext.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_8bit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_altivec.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_arm.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_arm_accurate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_i486.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_i586.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_i586_dither.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_mmx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_neon.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_neon64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_neon64_accurate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_neon64_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_neon64_s32.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_neon_accurate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_neon_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_neon_s32.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_real.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_s32.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_sse.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_sse_accurate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_sse_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_sse_s32.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_avx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_avx_accurate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_avx_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_avx_s32.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_neon.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_neon64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_neon64_accurate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_neon64_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_neon64_s32.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_neon_accurate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_neon_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_neon_s32.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_sse_accurate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_sse_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_sse_s32.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_x86_64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_x86_64_accurate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_x86_64_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_stereo_x86_64_s32.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_x86_64.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_x86_64_accurate.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_x86_64_float.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/synth_x86_64_s32.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tabinit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tabinit_mmx.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testcpu.Po@am__quote@
+
+.S.o:
+@am__fastdepCCAS_TRUE@	$(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ $<
+
+.S.obj:
+@am__fastdepCCAS_TRUE@	$(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCCAS_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.S.lo:
+@am__fastdepCCAS_TRUE@	$(AM_V_CPPAS)$(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCCAS_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCCAS_FALSE@	DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCCAS_FALSE@	$(AM_V_CPPAS@am__nodep@)$(LTCPPASCOMPILE) -c -o $@ $<
+
+.c.o:
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+install-nodist_includeHEADERS: $(nodist_include_HEADERS)
+	@$(NORMAL_INSTALL)
+	@list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \
+	fi; \
+	for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; \
+	done | $(am__base_list) | \
+	while read files; do \
+	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \
+	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \
+	done
+
+uninstall-nodist_includeHEADERS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(nodist_include_HEADERS)'; test -n "$(includedir)" || list=; \
+	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+	dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES) $(HEADERS)
+installdirs:
+	for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+	-test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+	mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-nodist_includeHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-libLTLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libLTLIBRARIES uninstall-nodist_includeHEADERS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+	clean-libLTLIBRARIES clean-libtool cscopelist-am ctags \
+	ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-html install-html-am install-info \
+	install-info-am install-libLTLIBRARIES install-man \
+	install-nodist_includeHEADERS install-pdf install-pdf-am \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am uninstall-libLTLIBRARIES \
+	uninstall-nodist_includeHEADERS
+
+
+@USE_YASM_FOR_AVX_TRUE@$(AVX_OBJS): %.@OBJEXT@: %.S $(ASM_DEPS)
+@USE_YASM_FOR_AVX_TRUE@	$(CPP) $(DEFAULT_INCLUDES) $(INCLUDES) -DASMALIGN_BALIGN $< | @YASM@ - @YASMFLAGS@ @YASM_FORMAT@ -o $@
+
+@USE_YASM_FOR_AVX_TRUE@$(AVX_OBJS:.@OBJEXT@=.lo): %.lo: %.@OBJEXT@
+@USE_YASM_FOR_AVX_TRUE@	@echo "# Generated by ltmain.sh (GNU libtool)" >$@
+@USE_YASM_FOR_AVX_TRUE@	@echo "pic_object='$<'" >>$@
+@USE_YASM_FOR_AVX_TRUE@	@echo "non_pic_object='$<'" >>$@
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
Index: include/reactos/libs/libmpg123/mangle.h
===================================================================
--- include/reactos/libs/libmpg123/mangle.h	(revision 63976)
+++ include/reactos/libs/libmpg123/mangle.h	(working copy)
@@ -11,6 +11,7 @@
 #define __MANGLE_H
 
 #include "config.h"
+#include "intsym.h"
 
 #ifdef CCALIGN
 #define MOVUAPS movaps
@@ -18,18 +19,42 @@
 #define MOVUAPS movups
 #endif
 
+/*
+	ALIGNX: align to X bytes
+	This differs per compiler/platform in taking the byte count or an exponent for base 2.
+	A way out is balign, if the assembler supports it (gas extension).
+*/
+
+#ifdef ASMALIGN_BALIGN
+
+#define ALIGN4  .balign 4
+#define ALIGN8  .balign 8
+#define ALIGN16 .balign 16
+#define ALIGN32 .balign 32
+#define ALIGN64 .balign 64
+
+#else
+
 #ifdef ASMALIGN_EXP
 #define ALIGN4  .align 2
 #define ALIGN8  .align 3
 #define ALIGN16 .align 4
 #define ALIGN32 .align 5
+#define ALIGN64 .align 6
 #else
+#ifdef ASMALIGN_BYTE
 #define ALIGN4  .align 4
 #define ALIGN8  .align 8
 #define ALIGN16 .align 16
 #define ALIGN32 .align 32
+#define ALIGN64 .align 64
+#else
+#error "Dunno how assembler alignment works. Please specify."
 #endif
+#endif
 
+#endif
+
 #define MANGLE_MACROCAT_REALLY(a, b) a ## b
 #define MANGLE_MACROCAT(a, b) MANGLE_MACROCAT_REALLY(a, b)
 /* Feel free to add more to the list, eg. a.out IMO */
@@ -60,10 +85,37 @@
 /* Mark non-executable stack.
    It's mainly for GNU on Linux... who else does (not) like this? */
 #if !defined(__SUNPRO_C) && defined(__linux__) && defined(__ELF__)
+#if defined(__arm__)
 #define NONEXEC_STACK .section .note.GNU-stack,"",%progbits
 #else
+#define NONEXEC_STACK .section .note.GNU-stack,"",@progbits
+#endif
+#else
 #define NONEXEC_STACK
 #endif
 
+#if defined(__x86_64__) && (defined(_WIN64) || defined (__CYGWIN__))
+#define IS_MSABI 1 /* Not using SYSV */
+#endif
+
+/* Macros for +-4GiB PC-relative addressing on AArch64 */
+#ifdef __APPLE__
+#define AARCH64_PCREL_HI(label) label@PAGE
+#define AARCH64_PCREL_LO(label) label@PAGEOFF
+#else
+#define AARCH64_PCREL_HI(label) label
+#define AARCH64_PCREL_LO(label) :lo12:label
+#endif
+
+#ifdef __APPLE__
+#define AARCH64_DUP_4S(dst, src, elem) dup.4s dst, src[elem]
+#define AARCH64_DUP_2D(dst, src, elem) dup.2d dst, src[elem]
+#define AARCH64_SQXTN2_8H(dst, src) sqxtn2.8h dst, src
+#else
+#define AARCH64_DUP_4S(dst, src, elem) dup dst.4s, src.s[elem]
+#define AARCH64_DUP_2D(dst, src, elem) dup dst.2d, src.d[elem]
+#define AARCH64_SQXTN2_8H(dst, src) sqxtn2 dst.8h, src.4s
+#endif
+
 #endif /* !__MANGLE_H */
 
Index: include/reactos/libs/libmpg123/mpeghead.h
===================================================================
--- include/reactos/libs/libmpg123/mpeghead.h	(revision 0)
+++ include/reactos/libs/libmpg123/mpeghead.h	(working copy)
@@ -0,0 +1,89 @@
+/*
+	mpeghead: the bits of an MPEG frame header
+
+	copyright ?-2011 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp & Thomas Orgis (from parse.c)
+*/
+#ifndef MPG123_MPEGHEAD_H
+#define MPG123_MPEGHEAD_H
+
+/*
+	Avoid human error, let perl do the work of dissecting an MPEG header into parts.
+	To be clear: Never edit the following definitions by hand, modify the code block inside this comment and run it through perl instead!
+
+	$head = "AAAAAAAA AAABBCCD EEEEFFGH IIJJKLMM";
+	%parts = qw(A sync B version C layer D crc E bitrate F samplerate G padding H private I channel J chanex K copyright L original M emphasis);
+	for(sort keys %parts)
+	{
+		$name = uc($parts{$_});
+		$bits = $head;
+		$bits =~ s/$_/1/g;
+		$bits =~ s/[^1 ]/0/g;
+		print "\/\* $bits \*\/\n";
+		$bits =~ s/\s//g;
+		print "#define HDR_$name".(" " x (18-length($name))).sprintf("0x%08x", eval("0b$bits"))."\n";
+		$bits =~ m/(0*)$/;
+		print "#define HDR_${name}_VAL(h)".(" " x (11-length($name)))."(((h)\&HDR_$name) >> ".length($1).")\n";
+	}
+*/
+
+/* 11111111 11100000 00000000 00000000 */
+#define HDR_SYNC              0xffe00000
+#define HDR_SYNC_VAL(h)       (((h)&HDR_SYNC) >> 21)
+/* 00000000 00011000 00000000 00000000 */
+#define HDR_VERSION           0x00180000
+#define HDR_VERSION_VAL(h)    (((h)&HDR_VERSION) >> 19)
+/* 00000000 00000110 00000000 00000000 */
+#define HDR_LAYER             0x00060000
+#define HDR_LAYER_VAL(h)      (((h)&HDR_LAYER) >> 17)
+/* 00000000 00000001 00000000 00000000 */
+#define HDR_CRC               0x00010000
+#define HDR_CRC_VAL(h)        (((h)&HDR_CRC) >> 16)
+/* 00000000 00000000 11110000 00000000 */
+#define HDR_BITRATE           0x0000f000
+#define HDR_BITRATE_VAL(h)    (((h)&HDR_BITRATE) >> 12)
+/* 00000000 00000000 00001100 00000000 */
+#define HDR_SAMPLERATE        0x00000c00
+#define HDR_SAMPLERATE_VAL(h) (((h)&HDR_SAMPLERATE) >> 10)
+/* 00000000 00000000 00000010 00000000 */
+#define HDR_PADDING           0x00000200
+#define HDR_PADDING_VAL(h)    (((h)&HDR_PADDING) >> 9)
+/* 00000000 00000000 00000001 00000000 */
+#define HDR_PRIVATE           0x00000100
+#define HDR_PRIVATE_VAL(h)    (((h)&HDR_PRIVATE) >> 8)
+/* 00000000 00000000 00000000 11000000 */
+#define HDR_CHANNEL           0x000000c0
+#define HDR_CHANNEL_VAL(h)    (((h)&HDR_CHANNEL) >> 6)
+/* 00000000 00000000 00000000 00110000 */
+#define HDR_CHANEX            0x00000030
+#define HDR_CHANEX_VAL(h)     (((h)&HDR_CHANEX) >> 4)
+/* 00000000 00000000 00000000 00001000 */
+#define HDR_COPYRIGHT         0x00000008
+#define HDR_COPYRIGHT_VAL(h)  (((h)&HDR_COPYRIGHT) >> 3)
+/* 00000000 00000000 00000000 00000100 */
+#define HDR_ORIGINAL          0x00000004
+#define HDR_ORIGINAL_VAL(h)   (((h)&HDR_ORIGINAL) >> 2)
+/* 00000000 00000000 00000000 00000011 */
+#define HDR_EMPHASIS          0x00000003
+#define HDR_EMPHASIS_VAL(h)   (((h)&HDR_EMPHASIS) >> 0)
+
+/*
+	A generic mask for telling if a header is somewhat valid for the current stream.
+	Meaning: Most basic info is not allowed to change.
+	Checking of channel count needs to be done, too, though. So,
+	if channel count matches, frames are decoded the same way: frame buffers and decoding
+	routines can stay the same, especially frame buffers (think spf * channels!).
+*/
+#define HDR_CMPMASK (HDR_SYNC|HDR_VERSION|HDR_LAYER|HDR_SAMPLERATE)
+
+/* A stricter mask, for matching free format headers. */
+#define HDR_SAMEMASK (HDR_SYNC|HDR_VERSION|HDR_LAYER|HDR_BITRATE|HDR_SAMPLERATE|HDR_CHANNEL|HDR_CHANEX)
+
+/* Free format headers have zero bitrate value. */
+#define HDR_FREE_FORMAT(head) (!(head & HDR_BITRATE))
+
+/* A mask for changed sampling rate (version or rate bits). */
+#define HDR_SAMPMASK (HDR_VERSION|HDR_SAMPLERATE)
+
+#endif
Index: include/reactos/libs/libmpg123/mpg123.h.in
===================================================================
--- include/reactos/libs/libmpg123/mpg123.h.in	(revision 0)
+++ include/reactos/libs/libmpg123/mpg123.h.in	(working copy)
@@ -0,0 +1,1128 @@
+/*
+	libmpg123: MPEG Audio Decoder library (version @PACKAGE_VERSION@)
+
+	copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+*/
+
+#ifndef MPG123_LIB_H
+#define MPG123_LIB_H
+
+/** \file mpg123.h The header file for the libmpg123 MPEG Audio decoder */
+
+/* A macro to check at compile time which set of API functions to expect.
+   This should be incremented at least each time a new symbol is added to the header. */
+#define MPG123_API_VERSION @API_VERSION@
+
+/* These aren't actually in use... seems to work without using libtool. */
+#ifdef BUILD_MPG123_DLL
+/* The dll exports. */
+#define EXPORT __declspec(dllexport)
+#else
+#ifdef LINK_MPG123_DLL
+/* The exe imports. */
+#define EXPORT __declspec(dllimport)
+#else
+/* Nothing on normal/UNIX builds */
+#define EXPORT
+#endif
+#endif
+
+#ifndef MPG123_NO_CONFIGURE /* Enable use of this file without configure. */
+@INCLUDE_STDLIB_H@
+@INCLUDE_SYS_TYPE_H@
+
+/* Simplified large file handling.
+	I used to have a check here that prevents building for a library with conflicting large file setup
+	(application that uses 32 bit offsets with library that uses 64 bits).
+	While that was perfectly fine in an environment where there is one incarnation of the library,
+	it hurt GNU/Linux and Solaris systems with multilib where the distribution fails to provide the
+	correct header matching the 32 bit library (where large files need explicit support) or
+	the 64 bit library (where there is no distinction).
+
+	New approach: When the app defines _FILE_OFFSET_BITS, it wants non-default large file support,
+	and thus functions with added suffix (mpg123_open_64).
+	Any mismatch will be caught at link time because of the _FILE_OFFSET_BITS setting used when
+	building libmpg123. Plus, there's dual mode large file support in mpg123 since 1.12 now.
+	Link failure is not the expected outcome of any half-sane usage anymore.
+
+	More complication: What about client code defining _LARGEFILE64_SOURCE? It might want direct access to the _64 functions, along with the ones without suffix. Well, that's possible now via defining MPG123_NO_LARGENAME and MPG123_LARGESUFFIX, respectively, for disabling or enforcing the suffix names.
+*/
+
+/*
+	Now, the renaming of large file aware functions.
+	By default, it appends underscore _FILE_OFFSET_BITS (so, mpg123_seek_64 for mpg123_seek), if _FILE_OFFSET_BITS is defined. You can force a different suffix via MPG123_LARGESUFFIX (that must include the underscore), or you can just disable the whole mess by defining MPG123_NO_LARGENAME.
+*/
+#if (!defined MPG123_NO_LARGENAME) && ((defined _FILE_OFFSET_BITS) || (defined MPG123_LARGESUFFIX))
+
+/* Need some trickery to concatenate the value(s) of the given macro(s). */
+#define MPG123_MACROCAT_REALLY(a, b) a ## b
+#define MPG123_MACROCAT(a, b) MPG123_MACROCAT_REALLY(a, b)
+#ifndef MPG123_LARGESUFFIX
+#define MPG123_LARGESUFFIX MPG123_MACROCAT(_, _FILE_OFFSET_BITS)
+#endif
+#define MPG123_LARGENAME(func) MPG123_MACROCAT(func, MPG123_LARGESUFFIX)
+
+#define mpg123_open         MPG123_LARGENAME(mpg123_open)
+#define mpg123_open_fd      MPG123_LARGENAME(mpg123_open_fd)
+#define mpg123_open_handle  MPG123_LARGENAME(mpg123_open_handle)
+#define mpg123_framebyframe_decode MPG123_LARGENAME(mpg123_framebyframe_decode)
+#define mpg123_decode_frame MPG123_LARGENAME(mpg123_decode_frame)
+#define mpg123_tell         MPG123_LARGENAME(mpg123_tell)
+#define mpg123_tellframe    MPG123_LARGENAME(mpg123_tellframe)
+#define mpg123_tell_stream  MPG123_LARGENAME(mpg123_tell_stream)
+#define mpg123_seek         MPG123_LARGENAME(mpg123_seek)
+#define mpg123_feedseek     MPG123_LARGENAME(mpg123_feedseek)
+#define mpg123_seek_frame   MPG123_LARGENAME(mpg123_seek_frame)
+#define mpg123_timeframe    MPG123_LARGENAME(mpg123_timeframe)
+#define mpg123_index        MPG123_LARGENAME(mpg123_index)
+#define mpg123_set_index    MPG123_LARGENAME(mpg123_set_index)
+#define mpg123_position     MPG123_LARGENAME(mpg123_position)
+#define mpg123_length       MPG123_LARGENAME(mpg123_length)
+#define mpg123_set_filesize MPG123_LARGENAME(mpg123_set_filesize)
+#define mpg123_replace_reader MPG123_LARGENAME(mpg123_replace_reader)
+#define mpg123_replace_reader_handle MPG123_LARGENAME(mpg123_replace_reader_handle)
+#define mpg123_framepos MPG123_LARGENAME(mpg123_framepos)
+
+#endif /* largefile hackery */
+
+#endif /* MPG123_NO_CONFIGURE */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \defgroup mpg123_init mpg123 library and handle setup
+ *
+ * Functions to initialise and shutdown the mpg123 library and handles.
+ * The parameters of handles have workable defaults, you only have to tune them when you want to tune something;-)
+ * Tip: Use a RVA setting...
+ *
+ * @{
+ */
+
+/** Opaque structure for the libmpg123 decoder handle. */
+struct mpg123_handle_struct;
+
+/** Opaque structure for the libmpg123 decoder handle.
+ *  Most functions take a pointer to a mpg123_handle as first argument and operate on its data in an object-oriented manner.
+ */
+typedef struct mpg123_handle_struct mpg123_handle;
+
+/** Function to initialise the mpg123 library. 
+ *	This function is not thread-safe. Call it exactly once per process, before any other (possibly threaded) work with the library.
+ *
+ *	\return MPG123_OK if successful, otherwise an error number.
+ */
+EXPORT int  mpg123_init(void);
+
+/** Function to close down the mpg123 library. 
+ *	This function is not thread-safe. Call it exactly once per process, before any other (possibly threaded) work with the library. */
+EXPORT void mpg123_exit(void);
+
+/** Create a handle with optional choice of decoder (named by a string, see mpg123_decoders() or mpg123_supported_decoders()).
+ *  and optional retrieval of an error code to feed to mpg123_plain_strerror().
+ *  Optional means: Any of or both the parameters may be NULL.
+ *
+ *  \return Non-NULL pointer when successful.
+ */
+EXPORT mpg123_handle *mpg123_new(const char* decoder, int *error);
+
+/** Delete handle, mh is either a valid mpg123 handle or NULL. */
+EXPORT void mpg123_delete(mpg123_handle *mh);
+
+/** Enumeration of the parameters types that it is possible to set/get. */
+enum mpg123_parms
+{
+	MPG123_VERBOSE = 0,        /**< set verbosity value for enabling messages to stderr, >= 0 makes sense (integer) */
+	MPG123_FLAGS,          /**< set all flags, p.ex val = MPG123_GAPLESS|MPG123_MONO_MIX (integer) */
+	MPG123_ADD_FLAGS,      /**< add some flags (integer) */
+	MPG123_FORCE_RATE,     /**< when value > 0, force output rate to that value (integer) */
+	MPG123_DOWN_SAMPLE,    /**< 0=native rate, 1=half rate, 2=quarter rate (integer) */
+	MPG123_RVA,            /**< one of the RVA choices above (integer) */
+	MPG123_DOWNSPEED,      /**< play a frame N times (integer) */
+	MPG123_UPSPEED,        /**< play every Nth frame (integer) */
+	MPG123_START_FRAME,    /**< start with this frame (skip frames before that, integer) */ 
+	MPG123_DECODE_FRAMES,  /**< decode only this number of frames (integer) */
+	MPG123_ICY_INTERVAL,   /**< stream contains ICY metadata with this interval (integer) */
+	MPG123_OUTSCALE,       /**< the scale for output samples (amplitude - integer or float according to mpg123 output format, normally integer) */
+	MPG123_TIMEOUT,        /**< timeout for reading from a stream (not supported on win32, integer) */
+	MPG123_REMOVE_FLAGS,   /**< remove some flags (inverse of MPG123_ADD_FLAGS, integer) */
+	MPG123_RESYNC_LIMIT,   /**< Try resync on frame parsing for that many bytes or until end of stream (<0 ... integer). This can enlarge the limit for skipping junk on beginning, too (but not reduce it).  */
+	MPG123_INDEX_SIZE      /**< Set the frame index size (if supported). Values <0 mean that the index is allowed to grow dynamically in these steps (in positive direction, of course) -- Use this when you really want a full index with every individual frame. */
+	,MPG123_PREFRAMES /**< Decode/ignore that many frames in advance for layer 3. This is needed to fill bit reservoir after seeking, for example (but also at least one frame in advance is needed to have all "normal" data for layer 3). Give a positive integer value, please.*/
+	,MPG123_FEEDPOOL  /**< For feeder mode, keep that many buffers in a pool to avoid frequent malloc/free. The pool is allocated on mpg123_open_feed(). If you change this parameter afterwards, you can trigger growth and shrinkage during decoding. The default value could change any time. If you care about this, then set it. (integer) */
+	,MPG123_FEEDBUFFER /**< Minimal size of one internal feeder buffer, again, the default value is subject to change. (integer) */
+};
+
+/** Flag bits for MPG123_FLAGS, use the usual binary or to combine. */
+enum mpg123_param_flags
+{
+	 MPG123_FORCE_MONO   = 0x7  /**<     0111 Force some mono mode: This is a test bitmask for seeing if any mono forcing is active. */
+	,MPG123_MONO_LEFT    = 0x1  /**<     0001 Force playback of left channel only.  */
+	,MPG123_MONO_RIGHT   = 0x2  /**<     0010 Force playback of right channel only. */
+	,MPG123_MONO_MIX     = 0x4  /**<     0100 Force playback of mixed mono.         */
+	,MPG123_FORCE_STEREO = 0x8  /**<     1000 Force stereo output.                  */
+	,MPG123_FORCE_8BIT   = 0x10 /**< 00010000 Force 8bit formats.                   */
+	,MPG123_QUIET        = 0x20 /**< 00100000 Suppress any printouts (overrules verbose).                    */
+	,MPG123_GAPLESS      = 0x40 /**< 01000000 Enable gapless decoding (default on if libmpg123 has support). */
+	,MPG123_NO_RESYNC    = 0x80 /**< 10000000 Disable resync stream after error.                             */
+	,MPG123_SEEKBUFFER   = 0x100 /**< 000100000000 Enable small buffer on non-seekable streams to allow some peek-ahead (for better MPEG sync). */
+	,MPG123_FUZZY        = 0x200 /**< 001000000000 Enable fuzzy seeks (guessing byte offsets or using approximate seek points from Xing TOC) */
+	,MPG123_FORCE_FLOAT  = 0x400 /**< 010000000000 Force floating point output (32 or 64 bits depends on mpg123 internal precision). */
+	,MPG123_PLAIN_ID3TEXT = 0x800 /**< 100000000000 Do not translate ID3 text data to UTF-8. ID3 strings will contain the raw text data, with the first byte containing the ID3 encoding code. */
+	,MPG123_IGNORE_STREAMLENGTH = 0x1000 /**< 1000000000000 Ignore any stream length information contained in the stream, which can be contained in a 'TLEN' frame of an ID3v2 tag or a Xing tag */
+	,MPG123_SKIP_ID3V2 = 0x2000 /**< 10 0000 0000 0000 Do not parse ID3v2 tags, just skip them. */
+	,MPG123_IGNORE_INFOFRAME = 0x4000 /**< 100 0000 0000 0000 Do not parse the LAME/Xing info frame, treat it as normal MPEG data. */
+	,MPG123_AUTO_RESAMPLE = 0x8000 /**< 1000 0000 0000 0000 Allow automatic internal resampling of any kind (default on if supported). Especially when going lowlevel with replacing output buffer, you might want to unset this flag. Setting MPG123_DOWNSAMPLE or MPG123_FORCE_RATE will override this. */
+	,MPG123_PICTURE = 0x10000 /**< 17th bit: Enable storage of pictures from tags (ID3v2 APIC). */
+};
+
+/** choices for MPG123_RVA */
+enum mpg123_param_rva
+{
+	 MPG123_RVA_OFF   = 0 /**< RVA disabled (default).   */
+	,MPG123_RVA_MIX   = 1 /**< Use mix/track/radio gain. */
+	,MPG123_RVA_ALBUM = 2 /**< Use album/audiophile gain */
+	,MPG123_RVA_MAX   = MPG123_RVA_ALBUM /**< The maximum RVA code, may increase in future. */
+};
+
+/* TODO: Assess the possibilities and troubles of changing parameters during playback. */
+
+/** Set a specific parameter, for a specific mpg123_handle, using a parameter 
+ *  type key chosen from the mpg123_parms enumeration, to the specified value. */
+EXPORT int mpg123_param(mpg123_handle *mh, enum mpg123_parms type, long value, double fvalue);
+
+/** Get a specific parameter, for a specific mpg123_handle. 
+ *  See the mpg123_parms enumeration for a list of available parameters. */
+EXPORT int mpg123_getparam(mpg123_handle *mh, enum mpg123_parms type, long *val, double *fval);
+
+/** Feature set available for query with mpg123_feature. */
+enum mpg123_feature_set
+{
+	 MPG123_FEATURE_ABI_UTF8OPEN = 0     /**< mpg123 expects path names to be given in UTF-8 encoding instead of plain native. */
+	,MPG123_FEATURE_OUTPUT_8BIT          /**< 8bit output   */
+	,MPG123_FEATURE_OUTPUT_16BIT         /**< 16bit output  */
+	,MPG123_FEATURE_OUTPUT_32BIT         /**< 32bit output  */
+	,MPG123_FEATURE_INDEX                /**< support for building a frame index for accurate seeking */
+	,MPG123_FEATURE_PARSE_ID3V2          /**< id3v2 parsing */
+	,MPG123_FEATURE_DECODE_LAYER1        /**< mpeg layer-1 decoder enabled */
+	,MPG123_FEATURE_DECODE_LAYER2        /**< mpeg layer-2 decoder enabled */
+	,MPG123_FEATURE_DECODE_LAYER3        /**< mpeg layer-3 decoder enabled */
+	,MPG123_FEATURE_DECODE_ACCURATE      /**< accurate decoder rounding    */
+	,MPG123_FEATURE_DECODE_DOWNSAMPLE    /**< downsample (sample omit)     */
+	,MPG123_FEATURE_DECODE_NTOM          /**< flexible rate decoding       */
+	,MPG123_FEATURE_PARSE_ICY            /**< ICY support                  */
+	,MPG123_FEATURE_TIMEOUT_READ         /**< Reader with timeout (network). */
+};
+
+/** Query libmpg123 feature, 1 for success, 0 for unimplemented functions. */
+EXPORT int mpg123_feature(const enum mpg123_feature_set key);
+
+/* @} */
+
+
+/** \defgroup mpg123_error mpg123 error handling
+ *
+ * Functions to get text version of the error numbers and an enumeration
+ * of the error codes returned by libmpg123.
+ *
+ * Most functions operating on a mpg123_handle simply return MPG123_OK on success and MPG123_ERR on failure (setting the internal error variable of the handle to the specific error code).
+ * Decoding/seek functions may also return message codes MPG123_DONE, MPG123_NEW_FORMAT and MPG123_NEED_MORE (please read up on these on how to react!).
+ * The positive range of return values is used for "useful" values when appropriate.
+ *
+ * @{
+ */
+
+/** Enumeration of the message and error codes and returned by libmpg123 functions. */
+enum mpg123_errors
+{
+	MPG123_DONE=-12,	/**< Message: Track ended. Stop decoding. */
+	MPG123_NEW_FORMAT=-11,	/**< Message: Output format will be different on next call. Note that some libmpg123 versions between 1.4.3 and 1.8.0 insist on you calling mpg123_getformat() after getting this message code. Newer verisons behave like advertised: You have the chance to call mpg123_getformat(), but you can also just continue decoding and get your data. */
+	MPG123_NEED_MORE=-10,	/**< Message: For feed reader: "Feed me more!" (call mpg123_feed() or mpg123_decode() with some new input data). */
+	MPG123_ERR=-1,			/**< Generic Error */
+	MPG123_OK=0, 			/**< Success */
+	MPG123_BAD_OUTFORMAT, 	/**< Unable to set up output format! */
+	MPG123_BAD_CHANNEL,		/**< Invalid channel number specified. */
+	MPG123_BAD_RATE,		/**< Invalid sample rate specified.  */
+	MPG123_ERR_16TO8TABLE,	/**< Unable to allocate memory for 16 to 8 converter table! */
+	MPG123_BAD_PARAM,		/**< Bad parameter id! */
+	MPG123_BAD_BUFFER,		/**< Bad buffer given -- invalid pointer or too small size. */
+	MPG123_OUT_OF_MEM,		/**< Out of memory -- some malloc() failed. */
+	MPG123_NOT_INITIALIZED,	/**< You didn't initialize the library! */
+	MPG123_BAD_DECODER,		/**< Invalid decoder choice. */
+	MPG123_BAD_HANDLE,		/**< Invalid mpg123 handle. */
+	MPG123_NO_BUFFERS,		/**< Unable to initialize frame buffers (out of memory?). */
+	MPG123_BAD_RVA,			/**< Invalid RVA mode. */
+	MPG123_NO_GAPLESS,		/**< This build doesn't support gapless decoding. */
+	MPG123_NO_SPACE,		/**< Not enough buffer space. */
+	MPG123_BAD_TYPES,		/**< Incompatible numeric data types. */
+	MPG123_BAD_BAND,		/**< Bad equalizer band. */
+	MPG123_ERR_NULL,		/**< Null pointer given where valid storage address needed. */
+	MPG123_ERR_READER,		/**< Error reading the stream. */
+	MPG123_NO_SEEK_FROM_END,/**< Cannot seek from end (end is not known). */
+	MPG123_BAD_WHENCE,		/**< Invalid 'whence' for seek function.*/
+	MPG123_NO_TIMEOUT,		/**< Build does not support stream timeouts. */
+	MPG123_BAD_FILE,		/**< File access error. */
+	MPG123_NO_SEEK,			/**< Seek not supported by stream. */
+	MPG123_NO_READER,		/**< No stream opened. */
+	MPG123_BAD_PARS,		/**< Bad parameter handle. */
+	MPG123_BAD_INDEX_PAR,	/**< Bad parameters to mpg123_index() and mpg123_set_index() */
+	MPG123_OUT_OF_SYNC,	/**< Lost track in bytestream and did not try to resync. */
+	MPG123_RESYNC_FAIL,	/**< Resync failed to find valid MPEG data. */
+	MPG123_NO_8BIT,	/**< No 8bit encoding possible. */
+	MPG123_BAD_ALIGN,	/**< Stack aligmnent error */
+	MPG123_NULL_BUFFER,	/**< NULL input buffer with non-zero size... */
+	MPG123_NO_RELSEEK,	/**< Relative seek not possible (screwed up file offset) */
+	MPG123_NULL_POINTER, /**< You gave a null pointer somewhere where you shouldn't have. */
+	MPG123_BAD_KEY,	/**< Bad key value given. */
+	MPG123_NO_INDEX,	/**< No frame index in this build. */
+	MPG123_INDEX_FAIL,	/**< Something with frame index went wrong. */
+	MPG123_BAD_DECODER_SETUP,	/**< Something prevents a proper decoder setup */
+	MPG123_MISSING_FEATURE  /**< This feature has not been built into libmpg123. */
+	,MPG123_BAD_VALUE /**< A bad value has been given, somewhere. */
+	,MPG123_LSEEK_FAILED /**< Low-level seek failed. */
+	,MPG123_BAD_CUSTOM_IO /**< Custom I/O not prepared. */
+	,MPG123_LFS_OVERFLOW /**< Offset value overflow during translation of large file API calls -- your client program cannot handle that large file. */
+	,MPG123_INT_OVERFLOW /**< Some integer overflow. */
+};
+
+/** Return a string describing that error errcode means. */
+EXPORT const char* mpg123_plain_strerror(int errcode);
+
+/** Give string describing what error has occured in the context of handle mh.
+ *  When a function operating on an mpg123 handle returns MPG123_ERR, you should check for the actual reason via
+ *  char *errmsg = mpg123_strerror(mh)
+ *  This function will catch mh == NULL and return the message for MPG123_BAD_HANDLE. */
+EXPORT const char* mpg123_strerror(mpg123_handle *mh);
+
+/** Return the plain errcode intead of a string. */
+EXPORT int mpg123_errcode(mpg123_handle *mh);
+
+/*@}*/
+
+
+/** \defgroup mpg123_decoder mpg123 decoder selection
+ *
+ * Functions to list and select the available decoders.
+ * Perhaps the most prominent feature of mpg123: You have several (optimized) decoders to choose from (on x86 and PPC (MacOS) systems, that is).
+ *
+ * @{
+ */
+
+/** Return a NULL-terminated array of generally available decoder names (plain 8bit ASCII). */
+EXPORT const char **mpg123_decoders(void);
+
+/** Return a NULL-terminated array of the decoders supported by the CPU (plain 8bit ASCII). */
+EXPORT const char **mpg123_supported_decoders(void);
+
+/** Set the chosen decoder to 'decoder_name' */
+EXPORT int mpg123_decoder(mpg123_handle *mh, const char* decoder_name);
+
+/** Get the currently active decoder engine name.
+    The active decoder engine can vary depening on output constraints,
+    mostly non-resampling, integer output is accelerated via 3DNow & Co. but for other modes a fallback engine kicks in.
+    Note that this can return a decoder that is ony active in the hidden and not available as decoder choice from the outside.
+    \return The decoder name or NULL on error. */
+EXPORT const char* mpg123_current_decoder(mpg123_handle *mh);
+
+/*@}*/
+
+
+/** \defgroup mpg123_output mpg123 output audio format 
+ *
+ * Functions to get and select the format of the decoded audio.
+ *
+ * Before you dive in, please be warned that you might get confused by this. This seems to happen a lot, therefore I am trying to explain in advance.
+ *
+ * The mpg123 library decides what output format to use when encountering the first frame in a stream, or actually any frame that is still valid but differs from the frames before in the prompted output format. At such a deciding point, an internal table of allowed encodings, sampling rates and channel setups is consulted. According to this table, an output format is chosen and the decoding engine set up accordingly (including ptimized routines for different output formats). This might seem unusual but it just follows from the non-existence of "MPEG audio files" with defined overall properties. There are streams, streams are concatenations of (semi) independent frames. We store streams on disk and call them "MPEG audio files", but that does not change their nature as the decoder is concerned (the LAME/Xing header for gapless decoding makes things interesting again).
+ *
+ * To get to the point: What you do with mpg123_format() and friends is to fill the internal table of allowed formats before it is used. That includes removing support for some formats or adding your forced sample rate (see MPG123_FORCE_RATE) that will be used with the crude internal resampler. Also keep in mind that the sample encoding is just a question of choice -- the MPEG frames do only indicate their native sampling rate and channel count. If you want to decode to integer or float samples, 8 or 16 bit ... that is your decision. In a "clean" world, libmpg123 would always decode to 32 bit float and let you handle any sample conversion. But there are optimized routines that work faster by directly decoding to the desired encoding / accuracy. We prefer efficiency over conceptual tidyness.
+ *
+ * People often start out thinking that mpg123_format() should change the actual decoding format on the fly. That is wrong. It only has effect on the next natural change of output format, when libmpg123 will consult its format table again. To make life easier, you might want to call mpg123_format_none() before any thing else and then just allow one desired encoding and a limited set of sample rates / channel choices that you actually intend to deal with. You can force libmpg123 to decode everything to 44100 KHz, stereo, 16 bit integer ... it will duplicate mono channels and even do resampling if needed (unless that feature is disabled in the build, same with some encodings). But I have to stress that the resampling of libmpg123 is very crude and doesn't even contain any kind of "proper" interpolation.
+ *
+ * In any case, watch out for MPG123_NEW_FORMAT as return message from decoding routines and call mpg123_getformat() to get the currently active output format.
+ *
+ * @{
+ */
+
+/** An enum over all sample types possibly known to mpg123.
+ *  The values are designed as bit flags to allow bitmasking for encoding families.
+ *
+ *  Note that (your build of) libmpg123 does not necessarily support all these.
+ *  Usually, you can expect the 8bit encodings and signed 16 bit.
+ *  Also 32bit float will be usual beginning with mpg123-1.7.0 .
+ *  What you should bear in mind is that (SSE, etc) optimized routines may be absent
+ *  for some formats. We do have SSE for 16, 32 bit and float, though.
+ *  24 bit integer is done via postprocessing of 32 bit output -- just cutting
+ *  the last byte, no rounding, even. If you want better, do it yourself.
+ *
+ *  All formats are in native byte order. If you need different endinaness, you
+ *  can simply postprocess the output buffers (libmpg123 wouldn't do anything else).
+ *  mpg123_encsize() can be helpful there.
+ */
+enum mpg123_enc_enum
+{
+	 MPG123_ENC_8      = 0x00f  /**<      0000 0000 1111 Some 8 bit  integer encoding. */
+	,MPG123_ENC_16     = 0x040  /**<      0000 0100 0000 Some 16 bit integer encoding. */
+	,MPG123_ENC_24     = 0x4000 /**< 0100 0000 0000 0000 Some 24 bit integer encoding. */
+	,MPG123_ENC_32     = 0x100  /**<      0001 0000 0000 Some 32 bit integer encoding. */
+	,MPG123_ENC_SIGNED = 0x080  /**<      0000 1000 0000 Some signed integer encoding. */
+	,MPG123_ENC_FLOAT  = 0xe00  /**<      1110 0000 0000 Some float encoding. */
+	,MPG123_ENC_SIGNED_16   = (MPG123_ENC_16|MPG123_ENC_SIGNED|0x10) /**<           1101 0000 signed 16 bit */
+	,MPG123_ENC_UNSIGNED_16 = (MPG123_ENC_16|0x20)                   /**<           0110 0000 unsigned 16 bit */
+	,MPG123_ENC_UNSIGNED_8  = 0x01                                   /**<           0000 0001 unsigned 8 bit */
+	,MPG123_ENC_SIGNED_8    = (MPG123_ENC_SIGNED|0x02)               /**<           1000 0010 signed 8 bit */
+	,MPG123_ENC_ULAW_8      = 0x04                                   /**<           0000 0100 ulaw 8 bit */
+	,MPG123_ENC_ALAW_8      = 0x08                                   /**<           0000 1000 alaw 8 bit */
+	,MPG123_ENC_SIGNED_32   = MPG123_ENC_32|MPG123_ENC_SIGNED|0x1000 /**< 0001 0001 1000 0000 signed 32 bit */
+	,MPG123_ENC_UNSIGNED_32 = MPG123_ENC_32|0x2000                   /**< 0010 0001 0000 0000 unsigned 32 bit */
+	,MPG123_ENC_SIGNED_24   = MPG123_ENC_24|MPG123_ENC_SIGNED|0x1000 /**< 0101 0000 1000 0000 signed 24 bit */
+	,MPG123_ENC_UNSIGNED_24 = MPG123_ENC_24|0x2000                   /**< 0110 0000 0000 0000 unsigned 24 bit */
+	,MPG123_ENC_FLOAT_32    = 0x200                                  /**<      0010 0000 0000 32bit float */
+	,MPG123_ENC_FLOAT_64    = 0x400                                  /**<      0100 0000 0000 64bit float */
+	,MPG123_ENC_ANY = ( MPG123_ENC_SIGNED_16  | MPG123_ENC_UNSIGNED_16 | MPG123_ENC_UNSIGNED_8
+	                  | MPG123_ENC_SIGNED_8   | MPG123_ENC_ULAW_8      | MPG123_ENC_ALAW_8
+	                  | MPG123_ENC_SIGNED_32  | MPG123_ENC_UNSIGNED_32
+	                  | MPG123_ENC_SIGNED_24  | MPG123_ENC_UNSIGNED_24
+	                  | MPG123_ENC_FLOAT_32   | MPG123_ENC_FLOAT_64 ) /**< Any encoding on the list. */
+};
+
+/** They can be combined into one number (3) to indicate mono and stereo... */
+enum mpg123_channelcount
+{
+	 MPG123_MONO   = 1
+	,MPG123_STEREO = 2
+};
+
+/** An array of supported standard sample rates
+ *  These are possible native sample rates of MPEG audio files.
+ *  You can still force mpg123 to resample to a different one, but by default you will only get audio in one of these samplings.
+ *  \param list Store a pointer to the sample rates array there.
+ *  \param number Store the number of sample rates there. */
+EXPORT void mpg123_rates(const long **list, size_t *number);
+
+/** An array of supported audio encodings.
+ *  An audio encoding is one of the fully qualified members of mpg123_enc_enum (MPG123_ENC_SIGNED_16, not MPG123_SIGNED).
+ *  \param list Store a pointer to the encodings array there.
+ *  \param number Store the number of encodings there. */
+EXPORT void mpg123_encodings(const int **list, size_t *number);
+
+/** Return the size (in bytes) of one mono sample of the named encoding.
+ * \param encoding The encoding value to analyze.
+ * \return positive size of encoding in bytes, 0 on invalid encoding. */
+EXPORT int mpg123_encsize(int encoding);
+
+/** Configure a mpg123 handle to accept no output format at all, 
+ *  use before specifying supported formats with mpg123_format */
+EXPORT int mpg123_format_none(mpg123_handle *mh);
+
+/** Configure mpg123 handle to accept all formats 
+ *  (also any custom rate you may set) -- this is default. */
+EXPORT int mpg123_format_all(mpg123_handle *mh);
+
+/** Set the audio format support of a mpg123_handle in detail:
+ *  \param mh audio decoder handle
+ *  \param rate The sample rate value (in Hertz).
+ *  \param channels A combination of MPG123_STEREO and MPG123_MONO.
+ *  \param encodings A combination of accepted encodings for rate and channels, p.ex MPG123_ENC_SIGNED16 | MPG123_ENC_ULAW_8 (or 0 for no support). Please note that some encodings may not be supported in the library build and thus will be ignored here.
+ *  \return MPG123_OK on success, MPG123_ERR if there was an error. */
+EXPORT int mpg123_format(mpg123_handle *mh, long rate, int channels, int encodings);
+
+/** Check to see if a specific format at a specific rate is supported 
+ *  by mpg123_handle.
+ *  \return 0 for no support (that includes invalid parameters), MPG123_STEREO, 
+ *          MPG123_MONO or MPG123_STEREO|MPG123_MONO. */
+EXPORT int mpg123_format_support(mpg123_handle *mh, long rate, int encoding);
+
+/** Get the current output format written to the addresses givenr. */
+EXPORT int mpg123_getformat(mpg123_handle *mh, long *rate, int *channels, int *encoding);
+
+/*@}*/
+
+
+/** \defgroup mpg123_input mpg123 file input and decoding
+ *
+ * Functions for input bitstream and decoding operations.
+ * Decoding/seek functions may also return message codes MPG123_DONE, MPG123_NEW_FORMAT and MPG123_NEED_MORE (please read up on these on how to react!).
+ * @{
+ */
+
+/* reading samples / triggering decoding, possible return values: */
+/** Enumeration of the error codes returned by libmpg123 functions. */
+
+/** Open and prepare to decode the specified file by filesystem path.
+ *  This does not open HTTP urls; libmpg123 contains no networking code.
+ *  If you want to decode internet streams, use mpg123_open_fd() or mpg123_open_feed().
+ */
+EXPORT int mpg123_open(mpg123_handle *mh, const char *path);
+
+/** Use an already opened file descriptor as the bitstream input
+ *  mpg123_close() will _not_ close the file descriptor.
+ */
+EXPORT int mpg123_open_fd(mpg123_handle *mh, int fd);
+
+/** Use an opaque handle as bitstream input. This works only with the
+ *  replaced I/O from mpg123_replace_reader_handle()!
+ *  mpg123_close() will call the cleanup callback for your handle (if you gave one).
+ */
+EXPORT int mpg123_open_handle(mpg123_handle *mh, void *iohandle);
+
+/** Open a new bitstream and prepare for direct feeding
+ *  This works together with mpg123_decode(); you are responsible for reading and feeding the input bitstream.
+ */
+EXPORT int mpg123_open_feed(mpg123_handle *mh);
+
+/** Closes the source, if libmpg123 opened it. */
+EXPORT int mpg123_close(mpg123_handle *mh);
+
+/** Read from stream and decode up to outmemsize bytes.
+ *  \param outmemory address of output buffer to write to
+ *  \param outmemsize maximum number of bytes to write
+ *  \param done address to store the number of actually decoded bytes to
+ *  \return error/message code (watch out for MPG123_DONE and friends!) */
+EXPORT int mpg123_read(mpg123_handle *mh, unsigned char *outmemory, size_t outmemsize, size_t *done);
+
+/** Feed data for a stream that has been opened with mpg123_open_feed().
+ *  It's give and take: You provide the bytestream, mpg123 gives you the decoded samples.
+ *  \param in input buffer
+ *  \param size number of input bytes
+ *  \return error/message code. */
+EXPORT int mpg123_feed(mpg123_handle *mh, const unsigned char *in, size_t size);
+
+/** Decode MPEG Audio from inmemory to outmemory. 
+ *  This is very close to a drop-in replacement for old mpglib.
+ *  When you give zero-sized output buffer the input will be parsed until 
+ *  decoded data is available. This enables you to get MPG123_NEW_FORMAT (and query it) 
+ *  without taking decoded data.
+ *  Think of this function being the union of mpg123_read() and mpg123_feed() (which it actually is, sort of;-).
+ *  You can actually always decide if you want those specialized functions in separate steps or one call this one here.
+ *  \param inmemory input buffer
+ *  \param inmemsize number of input bytes
+ *  \param outmemory output buffer
+ *  \param outmemsize maximum number of output bytes
+ *  \param done address to store the number of actually decoded bytes to
+ *  \return error/message code (watch out especially for MPG123_NEED_MORE)
+ */
+EXPORT int mpg123_decode(mpg123_handle *mh, const unsigned char *inmemory, size_t inmemsize, unsigned char *outmemory, size_t outmemsize, size_t *done);
+
+/** Decode next MPEG frame to internal buffer
+ *  or read a frame and return after setting a new format.
+ *  \param num current frame offset gets stored there
+ *  \param audio This pointer is set to the internal buffer to read the decoded audio from.
+ *  \param bytes number of output bytes ready in the buffer
+ */
+EXPORT int mpg123_decode_frame(mpg123_handle *mh, off_t *num, unsigned char **audio, size_t *bytes);
+
+/** Decode current MPEG frame to internal buffer.
+ * Warning: This is experimental API that might change in future releases!
+ * Please watch mpg123 development closely when using it.
+ *  \param num last frame offset gets stored there
+ *  \param audio this pointer is set to the internal buffer to read the decoded audio from.
+ *  \param bytes number of output bytes ready in the buffer
+ */
+EXPORT int mpg123_framebyframe_decode(mpg123_handle *mh, off_t *num, unsigned char **audio, size_t *bytes);
+
+/** Find, read and parse the next mp3 frame
+ * Warning: This is experimental API that might change in future releases!
+ * Please watch mpg123 development closely when using it.
+ */
+EXPORT int mpg123_framebyframe_next(mpg123_handle *mh);
+
+/** Get access to the raw input data for the last parsed frame.
+ * This gives you a direct look (and write access) to the frame body data.
+ * Together with the raw header, you can reconstruct the whole raw MPEG stream without junk and meta data, or play games by actually modifying the frame body data before decoding this frame (mpg123_framebyframe_decode()).
+ * A more sane use would be to use this for CRC checking (see mpg123_info() and MPG123_CRC), the first two bytes of the body make up the CRC16 checksum, if present.
+ * You can provide NULL for a parameter pointer when you are not interested in the value.
+ *
+ * \param header the 4-byte MPEG header
+ * \param bodydata pointer to the frame body stored in the handle (without the header)
+ * \param bodybytes size of frame body in bytes (without the header)
+ * \return MPG123_OK if there was a yet un-decoded frame to get the data from, MPG123_ERR otherwise (without further explanation, the error state of the mpg123_handle is not modified by this function).
+ */
+EXPORT int mpg123_framedata(mpg123_handle *mh, unsigned long *header, unsigned char **bodydata, size_t *bodybytes);
+
+/** Get the input position (byte offset in stream) of the last parsed frame.
+ * This can be used for external seek index building, for example.
+ * It just returns the internally stored offset, regardless of validity -- you ensure that a valid frame has been parsed before! */
+EXPORT off_t mpg123_framepos(mpg123_handle *mh);
+
+/*@}*/
+
+
+/** \defgroup mpg123_seek mpg123 position and seeking
+ *
+ * Functions querying and manipulating position in the decoded audio bitstream.
+ * The position is measured in decoded audio samples, or MPEG frame offset for the specific functions.
+ * If gapless code is in effect, the positions are adjusted to compensate the skipped padding/delay - meaning, you should not care about that at all and just use the position defined for the samples you get out of the decoder;-)
+ * The general usage is modelled after stdlib's ftell() and fseek().
+ * Especially, the whence parameter for the seek functions has the same meaning as the one for fseek() and needs the same constants from stdlib.h: 
+ * - SEEK_SET: set position to (or near to) specified offset
+ * - SEEK_CUR: change position by offset from now
+ * - SEEK_END: set position to offset from end
+ *
+ * Note that sample-accurate seek only works when gapless support has been enabled at compile time; seek is frame-accurate otherwise.
+ * Also, really sample-accurate seeking (meaning that you get the identical sample value after seeking compared to plain decoding up to the position) is only guaranteed when you do not mess with the position code by using MPG123_UPSPEED, MPG123_DOWNSPEED or MPG123_START_FRAME. The first two mainly should cause trouble with NtoM resampling, but in any case with these options in effect, you have to keep in mind that the sample offset is not the same as counting the samples you get from decoding since mpg123 counts the skipped samples, too (or the samples played twice only once)!
+ * Short: When you care about the sample position, don't mess with those parameters;-)
+ * Also, seeking is not guaranteed to work for all streams (underlying stream may not support it).
+ * And yet another caveat: If the stream is concatenated out of differing pieces (Frankenstein stream), seeking may suffer, too.
+ *
+ * @{
+ */
+
+/** Returns the current position in samples.
+ *  On the next read, you'd get that sample. */
+EXPORT off_t mpg123_tell(mpg123_handle *mh);
+
+/** Returns the frame number that the next read will give you data from. */
+EXPORT off_t mpg123_tellframe(mpg123_handle *mh);
+
+/** Returns the current byte offset in the input stream. */
+EXPORT off_t mpg123_tell_stream(mpg123_handle *mh);
+
+/** Seek to a desired sample offset. 
+ *  Set whence to SEEK_SET, SEEK_CUR or SEEK_END.
+ *  \return The resulting offset >= 0 or error/message code */
+EXPORT off_t mpg123_seek(mpg123_handle *mh, off_t sampleoff, int whence);
+
+/** Seek to a desired sample offset in data feeding mode. 
+ *  This just prepares things to be right only if you ensure that the next chunk of input data will be from input_offset byte position.
+ *  \param input_offset The position it expects to be at the 
+ *                      next time data is fed to mpg123_decode().
+ *  \return The resulting offset >= 0 or error/message code */
+EXPORT off_t mpg123_feedseek(mpg123_handle *mh, off_t sampleoff, int whence, off_t *input_offset);
+
+/** Seek to a desired MPEG frame index.
+ *  Set whence to SEEK_SET, SEEK_CUR or SEEK_END.
+ *  \return The resulting offset >= 0 or error/message code */
+EXPORT off_t mpg123_seek_frame(mpg123_handle *mh, off_t frameoff, int whence);
+
+/** Return a MPEG frame offset corresponding to an offset in seconds.
+ *  This assumes that the samples per frame do not change in the file/stream, which is a good assumption for any sane file/stream only.
+ *  \return frame offset >= 0 or error/message code */
+EXPORT off_t mpg123_timeframe(mpg123_handle *mh, double sec);
+
+/** Give access to the frame index table that is managed for seeking.
+ *  You are asked not to modify the values... Use mpg123_set_index to set the
+ *  seek index
+ *  \param offsets pointer to the index array
+ *  \param step one index byte offset advances this many MPEG frames
+ *  \param fill number of recorded index offsets; size of the array */
+EXPORT int mpg123_index(mpg123_handle *mh, off_t **offsets, off_t *step, size_t *fill);
+
+/** Set the frame index table
+ *  Setting offsets to NULL and fill > 0 will allocate fill entries. Setting offsets
+ *  to NULL and fill to 0 will clear the index and free the allocated memory used by the index.
+ *  \param offsets pointer to the index array
+ *  \param step    one index byte offset advances this many MPEG frames
+ *  \param fill    number of recorded index offsets; size of the array */ 
+EXPORT int mpg123_set_index(mpg123_handle *mh, off_t *offsets, off_t step, size_t fill);
+
+/** Get information about current and remaining frames/seconds.
+ *  WARNING: This function is there because of special usage by standalone mpg123 and may be removed in the final version of libmpg123!
+ *  You provide an offset (in frames) from now and a number of output bytes 
+ *  served by libmpg123 but not yet played. You get the projected current frame 
+ *  and seconds, as well as the remaining frames/seconds. This does _not_ care 
+ *  about skipped samples due to gapless playback. */
+EXPORT int mpg123_position( mpg123_handle *mh, off_t frame_offset, off_t buffered_bytes, off_t *current_frame, off_t *frames_left, double *current_seconds, double *seconds_left);
+
+/*@}*/
+
+
+/** \defgroup mpg123_voleq mpg123 volume and equalizer
+ *
+ * @{
+ */
+
+enum mpg123_channels
+{
+	 MPG123_LEFT=0x1	/**< The Left Channel. */
+	,MPG123_RIGHT=0x2	/**< The Right Channel. */
+	,MPG123_LR=0x3	/**< Both left and right channel; same as MPG123_LEFT|MPG123_RIGHT */
+};
+
+/** Set the 32 Band Audio Equalizer settings.
+ *  \param channel Can be MPG123_LEFT, MPG123_RIGHT or MPG123_LEFT|MPG123_RIGHT for both.
+ *  \param band The equaliser band to change (from 0 to 31)
+ *  \param val The (linear) adjustment factor. */
+EXPORT int mpg123_eq(mpg123_handle *mh, enum mpg123_channels channel, int band, double val);
+
+/** Get the 32 Band Audio Equalizer settings.
+ *  \param channel Can be MPG123_LEFT, MPG123_RIGHT or MPG123_LEFT|MPG123_RIGHT for (arithmetic mean of) both.
+ *  \param band The equaliser band to change (from 0 to 31)
+ *  \return The (linear) adjustment factor. */
+EXPORT double mpg123_geteq(mpg123_handle *mh, enum mpg123_channels channel, int band);
+
+/** Reset the 32 Band Audio Equalizer settings to flat */
+EXPORT int mpg123_reset_eq(mpg123_handle *mh);
+
+/** Set the absolute output volume including the RVA setting, 
+ *  vol<0 just applies (a possibly changed) RVA setting. */
+EXPORT int mpg123_volume(mpg123_handle *mh, double vol);
+
+/** Adjust output volume including the RVA setting by chosen amount */
+EXPORT int mpg123_volume_change(mpg123_handle *mh, double change);
+
+/** Return current volume setting, the actual value due to RVA, and the RVA 
+ *  adjustment itself. It's all as double float value to abstract the sample 
+ *  format. The volume values are linear factors / amplitudes (not percent) 
+ *  and the RVA value is in decibels. */
+EXPORT int mpg123_getvolume(mpg123_handle *mh, double *base, double *really, double *rva_db);
+
+/* TODO: Set some preamp in addition / to replace internal RVA handling? */
+
+/*@}*/
+
+
+/** \defgroup mpg123_status mpg123 status and information
+ *
+ * @{
+ */
+
+/** Enumeration of the mode types of Variable Bitrate */
+enum mpg123_vbr {
+	MPG123_CBR=0,	/**< Constant Bitrate Mode (default) */
+	MPG123_VBR,		/**< Variable Bitrate Mode */
+	MPG123_ABR		/**< Average Bitrate Mode */
+};
+
+/** Enumeration of the MPEG Versions */
+enum mpg123_version {
+	MPG123_1_0=0,	/**< MPEG Version 1.0 */
+	MPG123_2_0,		/**< MPEG Version 2.0 */
+	MPG123_2_5		/**< MPEG Version 2.5 */
+};
+
+
+/** Enumeration of the MPEG Audio mode.
+ *  Only the mono mode has 1 channel, the others have 2 channels. */
+enum mpg123_mode {
+	MPG123_M_STEREO=0,	/**< Standard Stereo. */
+	MPG123_M_JOINT,		/**< Joint Stereo. */
+	MPG123_M_DUAL,		/**< Dual Channel. */
+	MPG123_M_MONO		/**< Single Channel. */
+};
+
+
+/** Enumeration of the MPEG Audio flag bits */
+enum mpg123_flags {
+	MPG123_CRC=0x1,			/**< The bitstream is error protected using 16-bit CRC. */
+	MPG123_COPYRIGHT=0x2,	/**< The bitstream is copyrighted. */
+	MPG123_PRIVATE=0x4,		/**< The private bit has been set. */
+	MPG123_ORIGINAL=0x8	/**< The bitstream is an original, not a copy. */
+};
+
+/** Data structure for storing information about a frame of MPEG Audio */
+struct mpg123_frameinfo
+{
+	enum mpg123_version version;	/**< The MPEG version (1.0/2.0/2.5). */
+	int layer;						/**< The MPEG Audio Layer (MP1/MP2/MP3). */
+	long rate; 						/**< The sampling rate in Hz. */
+	enum mpg123_mode mode;			/**< The audio mode (Mono, Stereo, Joint-stero, Dual Channel). */
+	int mode_ext;					/**< The mode extension bit flag. */
+	int framesize;					/**< The size of the frame (in bytes, including header). */
+	enum mpg123_flags flags;		/**< MPEG Audio flag bits. Just now I realize that it should be declared as int, not enum. It's a bitwise combination of the enum values. */
+	int emphasis;					/**< The emphasis type. */
+	int bitrate;					/**< Bitrate of the frame (kbps). */
+	int abr_rate;					/**< The target average bitrate. */
+	enum mpg123_vbr vbr;			/**< The VBR mode. */
+};
+
+/** Get frame information about the MPEG audio bitstream and store it in a mpg123_frameinfo structure. */
+EXPORT int mpg123_info(mpg123_handle *mh, struct mpg123_frameinfo *mi);
+
+/** Get the safe output buffer size for all cases (when you want to replace the internal buffer) */
+EXPORT size_t mpg123_safe_buffer(void); 
+
+/** Make a full parsing scan of each frame in the file. ID3 tags are found. An accurate length 
+ *  value is stored. Seek index will be filled. A seek back to current position 
+ *  is performed. At all, this function refuses work when stream is 
+ *  not seekable. 
+ *  \return MPG123_OK or MPG123_ERR.
+ */
+EXPORT int mpg123_scan(mpg123_handle *mh);
+
+/** Return, if possible, the full (expected) length of current track in samples.
+  * \return length >= 0 or MPG123_ERR if there is no length guess possible. */
+EXPORT off_t mpg123_length(mpg123_handle *mh);
+
+/** Override the value for file size in bytes.
+  * Useful for getting sensible track length values in feed mode or for HTTP streams.
+  * \return MPG123_OK or MPG123_ERR */
+EXPORT int mpg123_set_filesize(mpg123_handle *mh, off_t size);
+
+/** Returns the time (seconds) per frame; <0 is error. */
+EXPORT double mpg123_tpf(mpg123_handle *mh);
+
+/** Returns the samples per frame for the most recently parsed frame; <0 is error. */
+EXPORT int mpg123_spf(mpg123_handle *mh);
+
+/** Get and reset the clip count. */
+EXPORT long mpg123_clip(mpg123_handle *mh);
+
+
+/** The key values for state information from mpg123_getstate(). */
+enum mpg123_state
+{
+	 MPG123_ACCURATE = 1 /**< Query if positons are currently accurate (integer value, 0 if false, 1 if true). */
+	,MPG123_BUFFERFILL   /**< Get fill of internal (feed) input buffer as integer byte count returned as long and as double. An error is returned on integer overflow while converting to (signed) long, but the returned floating point value shold still be fine. */
+	,MPG123_FRANKENSTEIN /**< Stream consists of carelessly stitched together files. Seeking may yield unexpected results (also with MPG123_ACCURATE, it may be confused). */
+	,MPG123_FRESH_DECODER /**< Decoder structure has been updated, possibly indicating changed stream (integer value, 0 if false, 1 if true). Flag is cleared after retrieval. */
+};
+
+/** Get various current decoder/stream state information.
+ *  \param key the key to identify the information to give.
+ *  \param val the address to return (long) integer values to
+ *  \param fval the address to return floating point values to
+ *  \return MPG123_OK or MPG123_ERR for success
+ */
+EXPORT int mpg123_getstate(mpg123_handle *mh, enum mpg123_state key, long *val, double *fval);
+
+/*@}*/
+
+
+/** \defgroup mpg123_metadata mpg123 metadata handling
+ *
+ * Functions to retrieve the metadata from MPEG Audio files and streams.
+ * Also includes string handling functions.
+ *
+ * @{
+ */
+
+/** Data structure for storing strings in a safer way than a standard C-String.
+ *  Can also hold a number of null-terminated strings. */
+typedef struct 
+{
+	char* p;     /**< pointer to the string data */
+	size_t size; /**< raw number of bytes allocated */
+	size_t fill; /**< number of used bytes (including closing zero byte) */
+} mpg123_string;
+
+/** Create and allocate memory for a new mpg123_string */
+EXPORT void mpg123_init_string(mpg123_string* sb);
+
+/** Free-up mempory for an existing mpg123_string */
+EXPORT void mpg123_free_string(mpg123_string* sb);
+
+/** Change the size of a mpg123_string
+ *  \return 0 on error, 1 on success */
+EXPORT int  mpg123_resize_string(mpg123_string* sb, size_t news);
+
+/** Increase size of a mpg123_string if necessary (it may stay larger).
+ *  Note that the functions for adding and setting in current libmpg123 use this instead of mpg123_resize_string().
+ *  That way, you can preallocate memory and safely work afterwards with pieces.
+ *  \return 0 on error, 1 on success */
+EXPORT int  mpg123_grow_string(mpg123_string* sb, size_t news);
+
+/** Copy the contents of one mpg123_string string to another.
+ *  \return 0 on error, 1 on success */
+EXPORT int  mpg123_copy_string(mpg123_string* from, mpg123_string* to);
+
+/** Append a C-String to an mpg123_string
+ *  \return 0 on error, 1 on success */
+EXPORT int  mpg123_add_string(mpg123_string* sb, const char* stuff);
+
+/** Append a C-substring to an mpg123 string
+ *  \return 0 on error, 1 on success
+ *  \param from offset to copy from
+ *  \param count number of characters to copy (a null-byte is always appended) */
+EXPORT int  mpg123_add_substring(mpg123_string *sb, const char *stuff, size_t from, size_t count);
+
+/** Set the conents of a mpg123_string to a C-string
+ *  \return 0 on error, 1 on success */
+EXPORT int  mpg123_set_string(mpg123_string* sb, const char* stuff);
+
+/** Set the contents of a mpg123_string to a C-substring
+ *  \return 0 on error, 1 on success
+ *  \param from offset to copy from
+ *  \param count number of characters to copy (a null-byte is always appended) */
+EXPORT int  mpg123_set_substring(mpg123_string *sb, const char *stuff, size_t from, size_t count);
+
+/** Count characters in a mpg123 string (non-null bytes or UTF-8 characters).
+ *  \return character count
+ *  \param sb the string
+ *  \param utf8 a flag to tell if the string is in utf8 encoding
+ *  Even with the fill property, the character count is not obvious as there could be multiple trailing null bytes.
+*/
+EXPORT size_t mpg123_strlen(mpg123_string *sb, int utf8);
+
+/** Remove trailing \r and \n, if present.
+ *  \return 0 on error, 1 on success
+ *  \param sb the string
+ */
+EXPORT int mpg123_chomp_string(mpg123_string *sb);
+
+/** The mpg123 text encodings. This contains encodings we encounter in ID3 tags or ICY meta info. */
+enum mpg123_text_encoding
+{
+	 mpg123_text_unknown  = 0 /**< Unkown encoding... mpg123_id3_encoding can return that on invalid codes. */
+	,mpg123_text_utf8     = 1 /**< UTF-8 */
+	,mpg123_text_latin1   = 2 /**< ISO-8859-1. Note that sometimes latin1 in ID3 is abused for totally different encodings. */
+	,mpg123_text_icy      = 3 /**< ICY metadata encoding, usually CP-1252 but we take it as UTF-8 if it qualifies as such. */
+	,mpg123_text_cp1252   = 4 /**< Really CP-1252 without any guessing. */
+	,mpg123_text_utf16    = 5 /**< Some UTF-16 encoding. The last of a set of leading BOMs (byte order mark) rules.
+	                           *   When there is no BOM, big endian ordering is used. Note that UCS-2 qualifies as UTF-8 when
+	                           *   you don't mess with the reserved code points. If you want to decode little endian data
+	                           *   without BOM you need to prepend 0xff 0xfe yourself. */
+	,mpg123_text_utf16bom = 6 /**< Just an alias for UTF-16, ID3v2 has this as distinct code. */
+	,mpg123_text_utf16be  = 7 /**< Another alias for UTF16 from ID3v2. Note, that, because of the mess that is reality,
+	                           *   BOMs are used if encountered. There really is not much distinction between the UTF16 types for mpg123
+	                           *   One exception: Since this is seen in ID3v2 tags, leading null bytes are skipped for all other UTF16
+	                           *   types (we expect a BOM before real data there), not so for utf16be!*/
+	,mpg123_text_max      = 7 /**< Placeholder for the maximum encoding value. */
+};
+
+/** The encoding byte values from ID3v2. */
+enum mpg123_id3_enc
+{
+	 mpg123_id3_latin1   = 0 /**< Note: This sometimes can mean anything in practice... */
+	,mpg123_id3_utf16bom = 1 /**< UTF16, UCS-2 ... it's all the same for practical purposes. */
+	,mpg123_id3_utf16be  = 2 /**< Big-endian UTF-16, BOM see note for mpg123_text_utf16be. */
+	,mpg123_id3_utf8     = 3 /**< Our lovely overly ASCII-compatible 8 byte encoding for the world. */
+	,mpg123_id3_enc_max  = 3 /**< Placeholder to check valid range of encoding byte. */
+};
+
+/** Convert ID3 encoding byte to mpg123 encoding index. */
+EXPORT enum mpg123_text_encoding mpg123_enc_from_id3(unsigned char id3_enc_byte);
+
+/** Store text data in string, after converting to UTF-8 from indicated encoding
+ *  \return 0 on error, 1 on success (on error, mpg123_free_string is called on sb)
+ *  \param sb  target string
+ *  \param enc mpg123 text encoding value
+ *  \param source source buffer with plain unsigned bytes (you might need to cast from char *)
+ *  \param source_size number of bytes in the source buffer
+ *
+ *  A prominent error can be that you provided an unknown encoding value, or this build of libmpg123 lacks support for certain encodings (ID3 or ICY stuff missing).
+ *  Also, you might want to take a bit of care with preparing the data; for example, strip leading zeroes (I have seen that).
+ */
+EXPORT int mpg123_store_utf8(mpg123_string *sb, enum mpg123_text_encoding enc, const unsigned char *source, size_t source_size);
+
+/** Sub data structure for ID3v2, for storing various text fields (including comments).
+ *  This is for ID3v2 COMM, TXXX and all the other text fields.
+ *  Only COMM and TXXX have a description, only COMM and USLT have a language.
+ *  You should consult the ID3v2 specification for the use of the various text fields ("frames" in ID3v2 documentation, I use "fields" here to separate from MPEG frames). */
+typedef struct
+{
+	char lang[3]; /**< Three-letter language code (not terminated). */
+	char id[4];   /**< The ID3v2 text field id, like TALB, TPE2, ... (4 characters, no string termination). */
+	mpg123_string description; /**< Empty for the generic comment... */
+	mpg123_string text;        /**< ... */
+} mpg123_text;
+
+/** The picture type values from ID3v2. */
+enum mpg123_id3_pic_type
+{
+	 mpg123_id3_pic_other          =  0
+	,mpg123_id3_pic_icon           =  1
+	,mpg123_id3_pic_other_icon     =  2
+	,mpg123_id3_pic_front_cover    =  3
+	,mpg123_id3_pic_back_cover     =  4
+	,mpg123_id3_pic_leaflet        =  5
+	,mpg123_id3_pic_media          =  6
+	,mpg123_id3_pic_lead           =  7
+	,mpg123_id3_pic_artist         =  8
+	,mpg123_id3_pic_conductor      =  9
+	,mpg123_id3_pic_orchestra      = 10
+	,mpg123_id3_pic_composer       = 11
+	,mpg123_id3_pic_lyricist       = 12
+	,mpg123_id3_pic_location       = 13
+	,mpg123_id3_pic_recording      = 14
+	,mpg123_id3_pic_performance    = 15
+	,mpg123_id3_pic_video          = 16
+	,mpg123_id3_pic_fish           = 17
+	,mpg123_id3_pic_illustration   = 18
+	,mpg123_id3_pic_artist_logo    = 19
+	,mpg123_id3_pic_publisher_logo = 20
+};
+
+/** Sub data structure for ID3v2, for storing picture data including comment.
+ *  This is for the ID3v2 APIC field. You should consult the ID3v2 specification
+ *  for the use of the APIC field ("frames" in ID3v2 documentation, I use "fields"
+ *  here to separate from MPEG frames). */
+typedef struct
+{
+	char type;
+	mpg123_string description;
+	mpg123_string mime_type;
+	size_t size;
+	unsigned char* data;
+} mpg123_picture;
+
+/** Data structure for storing IDV3v2 tags.
+ *  This structure is not a direct binary mapping with the file contents.
+ *  The ID3v2 text frames are allowed to contain multiple strings.
+ *  So check for null bytes until you reach the mpg123_string fill.
+ *  All text is encoded in UTF-8. */
+typedef struct
+{
+	unsigned char version; /**< 3 or 4 for ID3v2.3 or ID3v2.4. */
+	mpg123_string *title;   /**< Title string (pointer into text_list). */
+	mpg123_string *artist;  /**< Artist string (pointer into text_list). */
+	mpg123_string *album;   /**< Album string (pointer into text_list). */
+	mpg123_string *year;    /**< The year as a string (pointer into text_list). */
+	mpg123_string *genre;   /**< Genre String (pointer into text_list). The genre string(s) may very well need postprocessing, esp. for ID3v2.3. */
+	mpg123_string *comment; /**< Pointer to last encountered comment text with empty description. */
+	/* Encountered ID3v2 fields are appended to these lists.
+	   There can be multiple occurences, the pointers above always point to the last encountered data. */
+	mpg123_text    *comment_list; /**< Array of comments. */
+	size_t          comments;     /**< Number of comments. */
+	mpg123_text    *text;         /**< Array of ID3v2 text fields (including USLT) */
+	size_t          texts;        /**< Numer of text fields. */
+	mpg123_text    *extra;        /**< The array of extra (TXXX) fields. */
+	size_t          extras;       /**< Number of extra text (TXXX) fields. */
+	mpg123_picture  *picture;     /**< Array of ID3v2 pictures fields (APIC). */
+	size_t           pictures;    /**< Number of picture (APIC) fields. */
+} mpg123_id3v2;
+
+/** Data structure for ID3v1 tags (the last 128 bytes of a file).
+ *  Don't take anything for granted (like string termination)!
+ *  Also note the change ID3v1.1 did: comment[28] = 0; comment[29] = track_number
+ *  It is your task to support ID3v1 only or ID3v1.1 ...*/
+typedef struct
+{
+	char tag[3];         /**< Always the string "TAG", the classic intro. */
+	char title[30];      /**< Title string.  */
+	char artist[30];     /**< Artist string. */
+	char album[30];      /**< Album string. */
+	char year[4];        /**< Year string. */
+	char comment[30];    /**< Comment string. */
+	unsigned char genre; /**< Genre index. */
+} mpg123_id3v1;
+
+#define MPG123_ID3     0x3 /**< 0011 There is some ID3 info. Also matches 0010 or NEW_ID3. */
+#define MPG123_NEW_ID3 0x1 /**< 0001 There is ID3 info that changed since last call to mpg123_id3. */
+#define MPG123_ICY     0xc /**< 1100 There is some ICY info. Also matches 0100 or NEW_ICY.*/
+#define MPG123_NEW_ICY 0x4 /**< 0100 There is ICY info that changed since last call to mpg123_icy. */
+
+/** Query if there is (new) meta info, be it ID3 or ICY (or something new in future).
+   The check function returns a combination of flags. */
+EXPORT int mpg123_meta_check(mpg123_handle *mh); /* On error (no valid handle) just 0 is returned. */
+
+/** Clean up meta data storage (ID3v2 and ICY), freeing memory. */
+EXPORT void mpg123_meta_free(mpg123_handle *mh);
+
+/** Point v1 and v2 to existing data structures wich may change on any next read/decode function call.
+ *  v1 and/or v2 can be set to NULL when there is no corresponding data.
+ *  \return Return value is MPG123_OK or MPG123_ERR,  */
+EXPORT int mpg123_id3(mpg123_handle *mh, mpg123_id3v1 **v1, mpg123_id3v2 **v2);
+
+/** Point icy_meta to existing data structure wich may change on any next read/decode function call.
+ *  \return Return value is MPG123_OK or MPG123_ERR,  */
+EXPORT int mpg123_icy(mpg123_handle *mh, char **icy_meta); /* same for ICY meta string */
+
+/** Decode from windows-1252 (the encoding ICY metainfo used) to UTF-8.
+ *  Note that this is very similar to mpg123_store_utf8(&sb, mpg123_text_icy, icy_text, strlen(icy_text+1)) .
+ *  \param icy_text The input data in ICY encoding
+ *  \return pointer to newly allocated buffer with UTF-8 data (You free() it!) */
+EXPORT char* mpg123_icy2utf8(const char* icy_text);
+
+
+/* @} */
+
+
+/** \defgroup mpg123_advpar mpg123 advanced parameter API
+ *
+ *  Direct access to a parameter set without full handle around it.
+ *	Possible uses:
+ *    - Influence behaviour of library _during_ initialization of handle (MPG123_VERBOSE).
+ *    - Use one set of parameters for multiple handles.
+ *
+ *	The functions for handling mpg123_pars (mpg123_par() and mpg123_fmt() 
+ *  family) directly return a fully qualified mpg123 error code, the ones 
+ *  operating on full handles normally MPG123_OK or MPG123_ERR, storing the 
+ *  specific error code itseld inside the handle. 
+ *
+ * @{
+ */
+
+/** Opaque structure for the libmpg123 decoder parameters. */
+struct mpg123_pars_struct;
+
+/** Opaque structure for the libmpg123 decoder parameters. */
+typedef struct mpg123_pars_struct   mpg123_pars;
+
+/** Create a handle with preset parameters. */
+EXPORT mpg123_handle *mpg123_parnew(mpg123_pars *mp, const char* decoder, int *error);
+
+/** Allocate memory for and return a pointer to a new mpg123_pars */
+EXPORT mpg123_pars *mpg123_new_pars(int *error);
+
+/** Delete and free up memory used by a mpg123_pars data structure */
+EXPORT void         mpg123_delete_pars(mpg123_pars* mp);
+
+/** Configure mpg123 parameters to accept no output format at all, 
+ * use before specifying supported formats with mpg123_format */
+EXPORT int mpg123_fmt_none(mpg123_pars *mp);
+
+/** Configure mpg123 parameters to accept all formats 
+ *  (also any custom rate you may set) -- this is default. */
+EXPORT int mpg123_fmt_all(mpg123_pars *mp);
+
+/** Set the audio format support of a mpg123_pars in detail:
+	\param rate The sample rate value (in Hertz).
+	\param channels A combination of MPG123_STEREO and MPG123_MONO.
+	\param encodings A combination of accepted encodings for rate and channels, p.ex MPG123_ENC_SIGNED16|MPG123_ENC_ULAW_8 (or 0 for no support).
+	\return 0 on success, -1 if there was an error. /
+*/
+EXPORT int mpg123_fmt(mpg123_pars *mh, long rate, int channels, int encodings); /* 0 is good, -1 is error */
+
+/** Check to see if a specific format at a specific rate is supported 
+ *  by mpg123_pars.
+ *  \return 0 for no support (that includes invalid parameters), MPG123_STEREO, 
+ *          MPG123_MONO or MPG123_STEREO|MPG123_MONO. */
+EXPORT int mpg123_fmt_support(mpg123_pars *mh,   long rate, int encoding);
+
+/** Set a specific parameter, for a specific mpg123_pars, using a parameter 
+ *  type key chosen from the mpg123_parms enumeration, to the specified value. */
+EXPORT int mpg123_par(mpg123_pars *mp, enum mpg123_parms type, long value, double fvalue);
+
+/** Get a specific parameter, for a specific mpg123_pars. 
+ *  See the mpg123_parms enumeration for a list of available parameters. */
+EXPORT int mpg123_getpar(mpg123_pars *mp, enum mpg123_parms type, long *val, double *fval);
+
+/* @} */
+
+
+/** \defgroup mpg123_lowio mpg123 low level I/O
+  * You may want to do tricky stuff with I/O that does not work with mpg123's default file access or you want to make it decode into your own pocket...
+  *
+  * @{ */
+
+/** Replace default internal buffer with user-supplied buffer.
+  * Instead of working on it's own private buffer, mpg123 will directly use the one you provide for storing decoded audio.
+  * Note that the required buffer size could be bigger than expected from output
+  * encoding if libmpg123 has to convert from primary decoder output (p.ex. 32 bit
+  * storage for 24 bit output. */
+EXPORT int mpg123_replace_buffer(mpg123_handle *mh, unsigned char *data, size_t size);
+
+/** The max size of one frame's decoded output with current settings.
+ *  Use that to determine an appropriate minimum buffer size for decoding one frame. */
+EXPORT size_t mpg123_outblock(mpg123_handle *mh);
+
+/** Replace low-level stream access functions; read and lseek as known in POSIX.
+ *  You can use this to make any fancy file opening/closing yourself, 
+ *  using mpg123_open_fd() to set the file descriptor for your read/lseek (doesn't need to be a "real" file descriptor...).
+ *  Setting a function to NULL means that the default internal read is 
+ *  used (active from next mpg123_open call on).
+ *  Note: As it would be troublesome to mess with this while having a file open,
+ *  this implies mpg123_close(). */
+EXPORT int mpg123_replace_reader(mpg123_handle *mh, ssize_t (*r_read) (int, void *, size_t), off_t (*r_lseek)(int, off_t, int));
+
+/** Replace I/O functions with your own ones operating on some kind of handle instead of integer descriptors.
+ *  The handle is a void pointer, so you can pass any data you want...
+ *  mpg123_open_handle() is the call you make to use the I/O defined here.
+ *  There is no fallback to internal read/seek here.
+ *  Note: As it would be troublesome to mess with this while having a file open,
+ *  this mpg123_close() is implied here.
+ *  \param r_read The callback for reading (behaviour like posix read).
+ *  \param r_lseek The callback for seeking (like posix lseek).
+ *  \param cleanup A callback to clean up an I/O handle on mpg123_close, can be NULL for none (you take care of cleaning your handles). */
+EXPORT int mpg123_replace_reader_handle(mpg123_handle *mh, ssize_t (*r_read) (void *, void *, size_t), off_t (*r_lseek)(void *, off_t, int), void (*cleanup)(void*));
+
+/* @} */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
Index: include/reactos/libs/libmpg123/mpg123lib_intern.h
===================================================================
--- include/reactos/libs/libmpg123/mpg123lib_intern.h	(revision 63976)
+++ include/reactos/libs/libmpg123/mpg123lib_intern.h	(working copy)
@@ -11,34 +11,13 @@
 #define MPG123_H_INTERN
 
 #define MPG123_RATES 9
-#define MPG123_ENCODINGS 10
+#define MPG123_ENCODINGS 12
 
 #include "config.h" /* Load this before _anything_ */
+#include "intsym.h" /* Prefixing of internal symbols that still are public in a static lib. */
 
-/* ABI conformance for other compilers.
-   mpg123 needs 16byte-aligned stack for SSE and friends.
-   gcc provides that, but others don't necessarily. */
-#ifdef ABI_ALIGN_FUN
-#ifndef attribute_align_arg
-#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1)
-#    define attribute_align_arg __attribute__((force_align_arg_pointer))
-/* The gcc that can align the stack does not need the check... nor does it work with gcc 4.3+, anyway. */
-#else
+#include "abi_align.h"
 
-#    define attribute_align_arg
-/* Other compilers get code to catch misaligned stack.
-   Well, except Sun Studio, which accepts the aligned attribute but does not honor it. */
-#if !defined(__SUNPRO_C)
-#    define NEED_ALIGNCHECK
-#endif
-
-#endif
-#endif
-#else
-#define attribute_align_arg
-/* We won't try the align check... */
-#endif
-
 /* export DLL symbols */
 #if defined(WIN32) && defined(DYNAMIC_BUILD)
 #define BUILD_MPG123_DLL
@@ -59,19 +38,15 @@
 #define memmove(dst,src,size) bcopy(src,dst,size)
 #endif
 
-/* some stuff has to go back to mpg123.h */
+/* We don't really do long double... there are 3 options for REAL:
+   float, long and double. */
+
 #ifdef REAL_IS_FLOAT
 #  define real float
-#  define REAL_SCANF "%f"
-#  define REAL_PRINTF "%f"
-#elif defined(REAL_IS_LONG_DOUBLE)
-#  define real long double
-#  define REAL_SCANF "%Lf"
-#  define REAL_PRINTF "%Lf"
 #elif defined(REAL_IS_FIXED)
-/* Disable some output formats for fixed point decoder... */
 
-# define real long
+# define real  int32_t
+# define dreal int64_t
 
 /*
   for fixed-point decoders, use pre-calculated tables to avoid expensive floating-point maths
@@ -82,14 +57,14 @@
 # define REAL_RADIX				24
 # define REAL_FACTOR			16777216.0
 
-static inline long double_to_long_rounded(double x, double scalefac)
+static inline int32_t double_to_long_rounded(double x, double scalefac)
 {
 	x *= scalefac;
 	x += (x > 0) ? 0.5 : -0.5;
-	return (long)x;
+	return (int32_t)x;
 }
 
-static inline long scale_rounded(long x, int shift)
+static inline int32_t scale_rounded(int32_t x, int shift)
 {
 	x += (x >> 31);
 	x >>= (shift - 1);
@@ -136,7 +111,7 @@
 		"srwi %0, %0, %4 \n\t" \
 		"rlwimi %0, %1, %5, 0, %6 \n\t" \
 		: "=&r" (_mull), "=&r" (_mulh) \
-		: "%r" (_x), "r" (_y), "i" (radix), "i" (32-(radix)), "i" ((radix)-1) \
+		: "r" (_x), "r" (_y), "i" (radix), "i" (32-(radix)), "i" ((radix)-1) \
 	); \
 	_mull; \
 })
@@ -152,7 +127,7 @@
 		"slw %1, %1, %2 \n\t" \
 		"or %0, %0, %1 \n\t" \
 		: "=&r" (_mull), "=&r" (_mulh), "=&r" (_radix2) \
-		: "%r" (_x), "r" (_y), "r" (_radix) \
+		: "r" (_x), "r" (_y), "r" (_radix) \
 		: "cc" \
 	); \
 	_mull; \
@@ -167,7 +142,7 @@
 		"mov %0, %0, lsr %4 \n\t" \
 		"orr %0, %0, %1, lsl %5 \n\t" \
 		: "=&r" (_mull), "=&r" (_mulh) \
-		: "%r" (_x), "r" (_y), "M" (radix), "M" (32-(radix)) \
+		: "r" (_x), "r" (_y), "M" (radix), "M" (32-(radix)) \
 	); \
 	_mull; \
 })
@@ -179,9 +154,10 @@
 		"smull %0, %1, %3, %4 \n\t" \
 		"mov %0, %0, lsr %5 \n\t" \
 		"rsb %2, %5, #32 \n\t" \
-		"orr %0, %0, %1, lsl %2 \n\t" \
+		"mov %1, %1, lsl %2 \n\t" \
+		"orr %0, %0, %1 \n\t" \
 		: "=&r" (_mull), "=&r" (_mulh), "=&r" (_radix2) \
-		: "%r" (_x), "r" (_y), "r" (_radix) \
+		: "r" (_x), "r" (_y), "r" (_radix) \
 	); \
 	_mull; \
 })
@@ -188,7 +164,7 @@
 #  endif
 # endif
 
-/* I just changed the (int) to (long) there... seemed right. */
+/* I just changed the (int) to (real) there... seemed right. */
 # define DOUBLE_TO_REAL(x)					(double_to_long_rounded(x, REAL_FACTOR))
 # define DOUBLE_TO_REAL_15(x)				(double_to_long_rounded(x, 32768.0))
 # define DOUBLE_TO_REAL_POW43(x)			(double_to_long_rounded(x, 8192.0))
@@ -200,17 +176,17 @@
 #  define REAL_MUL_15(x, y)					REAL_MUL_ASM(x, y, 15)
 #  define REAL_MUL_SCALE_LAYER12(x, y)		REAL_MUL_ASM(x, y, 15 + 30 - REAL_RADIX)
 # else
-#  define REAL_MUL(x, y)					(((long long)(x) * (long long)(y)) >> REAL_RADIX)
-#  define REAL_MUL_15(x, y)					(((long long)(x) * (long long)(y)) >> 15)
-#  define REAL_MUL_SCALE_LAYER12(x, y)		(((long long)(x) * (long long)(y)) >> (15 + 30 - REAL_RADIX))
+#  define REAL_MUL(x, y)					(((dreal)(x) * (dreal)(y)) >> REAL_RADIX)
+#  define REAL_MUL_15(x, y)					(((dreal)(x) * (dreal)(y)) >> 15)
+#  define REAL_MUL_SCALE_LAYER12(x, y)		(((dreal)(x) * (dreal)(y)) >> (15 + 30 - REAL_RADIX))
 # endif
 # ifdef REAL_MUL_SCALE_LAYER3_ASM
 #  define REAL_MUL_SCALE_LAYER3(x, y, z)	REAL_MUL_SCALE_LAYER3_ASM(x, y, 13 + gainpow2_scale[z] - REAL_RADIX)
 # else
-#  define REAL_MUL_SCALE_LAYER3(x, y, z)	(((long long)(x) * (long long)(y)) >> (13 + gainpow2_scale[z] - REAL_RADIX))
+#  define REAL_MUL_SCALE_LAYER3(x, y, z)	(((dreal)(x) * (dreal)(y)) >> (13 + gainpow2_scale[z] - REAL_RADIX))
 # endif
-# define REAL_SCALE_LAYER12(x)				((long)((x) >> (30 - REAL_RADIX)))
-# define REAL_SCALE_LAYER3(x, y)			((long)((x) >> (gainpow2_scale[y] - REAL_RADIX)))
+# define REAL_SCALE_LAYER12(x)				((real)((x) >> (30 - REAL_RADIX)))
+# define REAL_SCALE_LAYER3(x, y)			((real)((x) >> (gainpow2_scale[y] - REAL_RADIX)))
 # ifdef ACCURATE_ROUNDING
 #  define REAL_MUL_SYNTH(x, y)				REAL_MUL(x, y)
 #  define REAL_SCALE_DCT64(x)				(x)
@@ -220,13 +196,12 @@
 #  define REAL_SCALE_DCT64(x)				((x) >> 8)
 #  define REAL_SCALE_WINDOW(x)				scale_rounded(x, 16)
 # endif
-#  define REAL_SCANF "%ld"
-#  define REAL_PRINTF "%ld"
 
 #else
+/* Just define a symbol to make things clear.
+   Existing code still uses (not (float or fixed)) for that. */
+#  define REAL_IS_DOUBLE
 #  define real double
-#  define REAL_SCANF "%lf"
-#  define REAL_PRINTF "%f"
 #endif
 
 #ifndef REAL_IS_FIXED
@@ -321,8 +296,12 @@
 
 int decode_update(mpg123_handle *mh);
 /* residing in format.c  */
+off_t decoder_synth_bytes(mpg123_handle *fr , off_t s);
 off_t samples_to_bytes(mpg123_handle *fr , off_t s);
 off_t bytes_to_samples(mpg123_handle *fr , off_t b);
+off_t outblock_bytes(mpg123_handle *fr, off_t s);
+/* Postprocessing format conversion of freshly decoded buffer. */
+void postprocess_buffer(mpg123_handle *fr);
 
 /* If networking is enabled and we really mean internal networking, the timeout_read function is available. */
 #if defined (NETWORK) && !defined (WANT_WIN32_SOCKETS)
Index: include/reactos/libs/libmpg123/newhuffman.h
===================================================================
--- include/reactos/libs/libmpg123/newhuffman.h	(revision 0)
+++ include/reactos/libs/libmpg123/newhuffman.h	(working copy)
@@ -0,0 +1,824 @@
+/*
+	nwehuffman.h: optimized huffman tables (radix-4 lookup)
+
+	copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+
+#ifndef _MPG123_NEWHUFFMAN_H_
+#define _MPG123_NEWHUFFMAN_H_
+
+struct newhuff 
+{
+  unsigned int linbits;
+  const short *table;
+};
+
+static const short tab0_[] =
+{
+	     0,      0,      0,      0,      0,      0,      0,      0,
+	     0,      0,      0,      0,      0,      0,      0,      0,
+};
+
+static const short tab1_[] =
+{
+	0x0311, 0x0311, 0x0301, 0x0301, 0x0210, 0x0210, 0x0210, 0x0210,
+	0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100,
+};
+
+static const short tab2_[] =
+{
+	   -16,    -32, 0x0311, 0x0311, 0x0301, 0x0301, 0x0310, 0x0310, 
+	0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 
+	0x0222, 0x0222, 0x0222, 0x0222, 0x0202, 0x0202, 0x0202, 0x0202, 
+	0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 
+	0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 
+	0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120,
+};
+
+static const short tab3_[] =
+{
+	   -16,    -32, 0x0310, 0x0310, 0x0211, 0x0211, 0x0211, 0x0211, 
+	0x0201, 0x0201, 0x0201, 0x0201, 0x0200, 0x0200, 0x0200, 0x0200, 
+	0x0222, 0x0222, 0x0222, 0x0222, 0x0202, 0x0202, 0x0202, 0x0202, 
+	0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 
+	0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 
+	0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120
+};
+
+static const short tab5_[] =
+{
+	   -16,    -32, 0x0311, 0x0311, 0x0301, 0x0301, 0x0310, 0x0310, 
+	0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 
+	0x0433, 0x0423, 0x0332, 0x0332, 0x0231, 0x0231, 0x0231, 0x0231, 
+	0x0313, 0x0313, 0x0303, 0x0303, 0x0330, 0x0330, 0x0322, 0x0322, 
+	0x0212, 0x0212, 0x0212, 0x0212, 0x0221, 0x0221, 0x0221, 0x0221, 
+	0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220,
+};
+
+static const short tab6_[] =
+{
+	   -16,    -32,    -48, 0x0412, 0x0421, 0x0420, 0x0301, 0x0301, 
+	0x0211, 0x0211, 0x0211, 0x0211, 0x0310, 0x0310, 0x0300, 0x0300, 
+	0x0333, 0x0333, 0x0303, 0x0303, 0x0223, 0x0223, 0x0223, 0x0223, 
+	0x0232, 0x0232, 0x0232, 0x0232, 0x0230, 0x0230, 0x0230, 0x0230, 
+	0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 
+	0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 
+	0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 
+	0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102,
+};
+
+static const short tab7_[] =
+{
+	   -16,    -32,    -48, 0x0411, 0x0301, 0x0301, 0x0310, 0x0310, 
+	0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 
+	   -48,    -64,    -80, 0x0415, 0x0451,    -96, 0x0450,   -112, 
+	0x0424, 0x0442, 0x0314, 0x0314, 0x0341, 0x0341, 0x0340, 0x0340, 
+	0x0404, 0x0423, 0x0432, 0x0403, 0x0313, 0x0313, 0x0331, 0x0331, 
+	0x0330, 0x0330, 0x0322, 0x0322, 0x0212, 0x0212, 0x0212, 0x0212, 
+	0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 
+	0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, 
+	0x0255, 0x0255, 0x0255, 0x0255, 0x0245, 0x0245, 0x0245, 0x0245, 
+	0x0254, 0x0254, 0x0254, 0x0254, 0x0253, 0x0253, 0x0253, 0x0253, 
+	0x0135, 0x0135, 0x0135, 0x0135, 0x0135, 0x0135, 0x0135, 0x0135, 
+	0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 
+	0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 
+	0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 
+	0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 
+	0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 
+	0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 
+	0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133,
+};
+
+static const short tab8_[] =
+{
+	   -16,    -32, 0x0412, 0x0421, 0x0211, 0x0211, 0x0211, 0x0211, 
+	0x0301, 0x0301, 0x0310, 0x0310, 0x0200, 0x0200, 0x0200, 0x0200, 
+	   -32,    -48,    -64, 0x0415, 0x0451,    -80,    -96, 0x0424, 
+	0x0442, 0x0414, 0x0341, 0x0341, 0x0404, 0x0440, 0x0423, 0x0432, 
+	0x0413, 0x0431, 0x0403, 0x0430, 0x0222, 0x0222, 0x0222, 0x0222, 
+	0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, 
+	0x0355, 0x0355, 0x0354, 0x0354, 0x0245, 0x0245, 0x0245, 0x0245, 
+	0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 
+	0x0235, 0x0235, 0x0235, 0x0235, 0x0244, 0x0244, 0x0244, 0x0244, 
+	0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 
+	0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 
+	0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 
+	0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 
+	0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 
+	0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 
+	0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133,
+};
+
+static const short tab9_[] =
+{
+	   -16,    -32,    -48,    -64,    -80, 0x0412, 0x0421, 0x0420, 
+	0x0311, 0x0311, 0x0301, 0x0301, 0x0310, 0x0310, 0x0300, 0x0300, 
+	   -80, 0x0435, 0x0453,    -96, 0x0444, 0x0425, 0x0452, 0x0415, 
+	0x0351, 0x0351, 0x0334, 0x0334, 0x0343, 0x0343, 0x0450, 0x0404, 
+	0x0324, 0x0324, 0x0342, 0x0342, 0x0333, 0x0333, 0x0340, 0x0340, 
+	0x0214, 0x0214, 0x0214, 0x0214, 0x0241, 0x0241, 0x0241, 0x0241, 
+	0x0223, 0x0223, 0x0223, 0x0223, 0x0232, 0x0232, 0x0232, 0x0232, 
+	0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 
+	0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 
+	0x0203, 0x0203, 0x0203, 0x0203, 0x0230, 0x0230, 0x0230, 0x0230, 
+	0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 
+	0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 
+	0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 
+	0x0145, 0x0145, 0x0145, 0x0145, 0x0145, 0x0145, 0x0145, 0x0145, 
+	0x0154, 0x0154, 0x0154, 0x0154, 0x0154, 0x0154, 0x0154, 0x0154, 
+	0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105,
+};
+
+static const short tab10_[] =
+{
+	   -16,    -32,    -48, 0x0411, 0x0301, 0x0301, 0x0310, 0x0310, 
+	0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 
+	   -48,    -64,    -80,    -96,   -112,   -128,   -144, 0x0417, 
+	0x0471,   -160,   -176,   -192, 0x0416, 0x0461, 0x0460,   -208, 
+	  -208,   -224, 0x0414, 0x0441, 0x0440, 0x0423, 0x0432, 0x0403, 
+	0x0313, 0x0313, 0x0331, 0x0331, 0x0330, 0x0330, 0x0322, 0x0322, 
+	0x0212, 0x0212, 0x0212, 0x0212, 0x0221, 0x0221, 0x0221, 0x0221, 
+	0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, 
+	0x0377, 0x0377, 0x0367, 0x0367, 0x0376, 0x0376, 0x0357, 0x0357, 
+	0x0375, 0x0375, 0x0366, 0x0366, 0x0247, 0x0247, 0x0247, 0x0247, 
+	0x0274, 0x0274, 0x0274, 0x0274, 0x0256, 0x0256, 0x0256, 0x0256, 
+	0x0265, 0x0265, 0x0265, 0x0265, 0x0237, 0x0237, 0x0237, 0x0237, 
+	0x0273, 0x0273, 0x0273, 0x0273, 0x0246, 0x0246, 0x0246, 0x0246, 
+	0x0355, 0x0355, 0x0354, 0x0354, 0x0263, 0x0263, 0x0263, 0x0263, 
+	0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 
+	0x0172, 0x0172, 0x0172, 0x0172, 0x0172, 0x0172, 0x0172, 0x0172, 
+	0x0264, 0x0264, 0x0264, 0x0264, 0x0207, 0x0207, 0x0207, 0x0207, 
+	0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 
+	0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 
+	0x0245, 0x0245, 0x0245, 0x0245, 0x0235, 0x0235, 0x0235, 0x0235, 
+	0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 
+	0x0253, 0x0253, 0x0253, 0x0253, 0x0244, 0x0244, 0x0244, 0x0244, 
+	0x0136, 0x0136, 0x0136, 0x0136, 0x0136, 0x0136, 0x0136, 0x0136, 
+	0x0126, 0x0126, 0x0126, 0x0126, 0x0126, 0x0126, 0x0126, 0x0126, 
+	0x0225, 0x0225, 0x0225, 0x0225, 0x0252, 0x0252, 0x0252, 0x0252, 
+	0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 
+	0x0151, 0x0151, 0x0151, 0x0151, 0x0151, 0x0151, 0x0151, 0x0151, 
+	0x0234, 0x0234, 0x0234, 0x0234, 0x0243, 0x0243, 0x0243, 0x0243, 
+	0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 
+	0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 
+	0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 
+	0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 
+	0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 
+	0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 0x0104,
+};
+
+static const short tab11_[] =
+{
+	   -16,    -32,    -48,    -64, 0x0412,    -80, 0x0311, 0x0311, 
+	0x0301, 0x0301, 0x0310, 0x0310, 0x0200, 0x0200, 0x0200, 0x0200, 
+	   -80,    -96,   -112,   -128,   -144, 0x0427, 0x0472,   -160, 
+	0x0371, 0x0371, 0x0417, 0x0470, 0x0436, 0x0463, 0x0460,   -176, 
+	  -176, 0x0415, 0x0362, 0x0362, 0x0426, 0x0406, 0x0316, 0x0316, 
+	0x0361, 0x0361, 0x0451, 0x0434, 0x0450,   -192, 0x0424, 0x0442, 
+	0x0414, 0x0441, 0x0404, 0x0440, 0x0323, 0x0323, 0x0332, 0x0332, 
+	0x0213, 0x0213, 0x0213, 0x0213, 0x0231, 0x0231, 0x0231, 0x0231, 
+	0x0303, 0x0303, 0x0330, 0x0330, 0x0222, 0x0222, 0x0222, 0x0222, 
+	0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 
+	0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 
+	0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 
+	0x0277, 0x0277, 0x0277, 0x0277, 0x0267, 0x0267, 0x0267, 0x0267, 
+	0x0276, 0x0276, 0x0276, 0x0276, 0x0275, 0x0275, 0x0275, 0x0275, 
+	0x0266, 0x0266, 0x0266, 0x0266, 0x0247, 0x0247, 0x0247, 0x0247, 
+	0x0274, 0x0274, 0x0274, 0x0274, 0x0357, 0x0357, 0x0355, 0x0355, 
+	0x0256, 0x0256, 0x0256, 0x0256, 0x0265, 0x0265, 0x0265, 0x0265, 
+	0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 
+	0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 
+	0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 
+	0x0245, 0x0245, 0x0245, 0x0245, 0x0254, 0x0254, 0x0254, 0x0254, 
+	0x0235, 0x0235, 0x0235, 0x0235, 0x0253, 0x0253, 0x0253, 0x0253, 
+	0x0164, 0x0164, 0x0164, 0x0164, 0x0164, 0x0164, 0x0164, 0x0164, 
+	0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 
+	0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 0x0144, 
+	0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 
+	0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 
+	0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 
+	0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 
+	0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133,
+};
+
+static const short tab12_[] =
+{
+	   -16,    -32,    -48,    -64,    -80,    -96, 0x0412, 0x0421, 
+	  -112, 0x0400, 0x0311, 0x0311, 0x0301, 0x0301, 0x0310, 0x0310, 
+	  -112,   -128,   -144,   -160, 0x0456, 0x0437,   -176, 0x0427, 
+	0x0472, 0x0446, 0x0464, 0x0417, 0x0471,   -192, 0x0436, 0x0463, 
+	0x0445, 0x0454, 0x0444,   -192, 0x0326, 0x0326, 0x0362, 0x0362, 
+	0x0361, 0x0361, 0x0416, 0x0460, 0x0435, 0x0453, 0x0425, 0x0452, 
+	0x0315, 0x0315, 0x0351, 0x0351, 0x0334, 0x0334, 0x0343, 0x0343, 
+	0x0450, 0x0404, 0x0324, 0x0324, 0x0342, 0x0342, 0x0314, 0x0314, 
+	0x0233, 0x0233, 0x0233, 0x0233, 0x0241, 0x0241, 0x0241, 0x0241, 
+	0x0223, 0x0223, 0x0223, 0x0223, 0x0232, 0x0232, 0x0232, 0x0232, 
+	0x0340, 0x0340, 0x0303, 0x0303, 0x0230, 0x0230, 0x0230, 0x0230, 
+	0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 0x0113, 
+	0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 0x0131, 
+	0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 
+	0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 
+	0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 
+	0x0277, 0x0277, 0x0277, 0x0277, 0x0267, 0x0267, 0x0267, 0x0267, 
+	0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 
+	0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 
+	0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 
+	0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 
+	0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 
+	0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 
+	0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 
+	0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 
+	0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 0x0155, 
+	0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 
+	0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 
+	0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 
+	0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105,
+};
+
+static const short tab13_[] =
+{
+	   -16,    -32,    -48,    -64, 0x0411, 0x0401, 0x0310, 0x0310, 
+	0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 
+	   -64,    -80,    -96,   -112,   -128,   -144,   -160,   -176, 
+	  -192,   -208,   -224,   -240,   -256,   -272,   -288,   -304, 
+	  -304,   -320,   -336,   -352, 0x0481,   -368,   -384,   -400, 
+	  -416,   -432, 0x0415, 0x0451,   -448,   -464,   -480, 0x0414, 
+	0x0341, 0x0341, 0x0404, 0x0440, 0x0423, 0x0432, 0x0313, 0x0313, 
+	0x0331, 0x0331, 0x0303, 0x0303, 0x0330, 0x0330, 0x0322, 0x0322, 
+	0x0212, 0x0212, 0x0212, 0x0212, 0x0221, 0x0221, 0x0221, 0x0221, 
+	0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, 
+	  -448,   -464,   -480,   -496,   -512,   -528,   -544,   -560, 
+	  -576,   -592,   -608,   -624,   -640,   -656, 0x041f, 0x04f1, 
+	0x04f0,   -656,   -672,   -688, 0x04e2,   -704, 0x041e, 0x04e1, 
+	  -720,   -736,   -752,   -768,   -784,   -800, 0x04c6, 0x043d, 
+	  -800, 0x042d, 0x04d2, 0x041d, 0x04b7,   -816,   -832, 0x04c3, 
+	  -848, 0x044b, 0x03d1, 0x03d1, 0x040d, 0x04d0, 0x048a, 0x04a8, 
+	0x044c, 0x04c4, 0x046b, 0x04b6, 0x033c, 0x033c, 0x032c, 0x032c, 
+	0x03c2, 0x03c2, 0x035b, 0x035b, 0x04b5, 0x0489, 0x031c, 0x031c, 
+	0x03c1, 0x03c1, 0x0498, 0x040c, 0x03c0, 0x03c0, 0x04b4, 0x046a, 
+	0x04a6, 0x0479, 0x033b, 0x033b, 0x03b3, 0x03b3, 0x0488, 0x045a, 
+	0x032b, 0x032b, 0x04a5, 0x0469, 0x03a4, 0x03a4, 0x0478, 0x0487, 
+	0x0394, 0x0394, 0x0477, 0x0476, 0x02b2, 0x02b2, 0x02b2, 0x02b2, 
+	0x021b, 0x021b, 0x021b, 0x021b, 0x02b1, 0x02b1, 0x02b1, 0x02b1, 
+	0x030b, 0x030b, 0x03b0, 0x03b0, 0x0396, 0x0396, 0x034a, 0x034a, 
+	0x033a, 0x033a, 0x03a3, 0x03a3, 0x0359, 0x0359, 0x0395, 0x0395, 
+	0x022a, 0x022a, 0x022a, 0x022a, 0x02a2, 0x02a2, 0x02a2, 0x02a2, 
+	0x021a, 0x021a, 0x021a, 0x021a, 0x02a1, 0x02a1, 0x02a1, 0x02a1, 
+	0x030a, 0x030a, 0x0368, 0x0368, 0x02a0, 0x02a0, 0x02a0, 0x02a0, 
+	0x0386, 0x0386, 0x0349, 0x0349, 0x0293, 0x0293, 0x0293, 0x0293, 
+	0x0339, 0x0339, 0x0358, 0x0358, 0x0385, 0x0385, 0x0367, 0x0367, 
+	0x0229, 0x0229, 0x0229, 0x0229, 0x0292, 0x0292, 0x0292, 0x0292, 
+	0x0357, 0x0357, 0x0375, 0x0375, 0x0238, 0x0238, 0x0238, 0x0238, 
+	0x0283, 0x0283, 0x0283, 0x0283, 0x0366, 0x0366, 0x0347, 0x0347, 
+	0x0374, 0x0374, 0x0356, 0x0356, 0x0365, 0x0365, 0x0373, 0x0373, 
+	0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 
+	0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 
+	0x0209, 0x0209, 0x0209, 0x0209, 0x0290, 0x0290, 0x0290, 0x0290, 
+	0x0248, 0x0248, 0x0248, 0x0248, 0x0284, 0x0284, 0x0284, 0x0284, 
+	0x0272, 0x0272, 0x0272, 0x0272, 0x0346, 0x0346, 0x0364, 0x0364, 
+	0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 
+	0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 
+	0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 
+	0x0237, 0x0237, 0x0237, 0x0237, 0x0227, 0x0227, 0x0227, 0x0227, 
+	0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 
+	0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 
+	0x0255, 0x0255, 0x0255, 0x0255, 0x0207, 0x0207, 0x0207, 0x0207, 
+	0x0270, 0x0270, 0x0270, 0x0270, 0x0236, 0x0236, 0x0236, 0x0236, 
+	0x0263, 0x0263, 0x0263, 0x0263, 0x0245, 0x0245, 0x0245, 0x0245, 
+	0x0254, 0x0254, 0x0254, 0x0254, 0x0226, 0x0226, 0x0226, 0x0226, 
+	0x0262, 0x0262, 0x0262, 0x0262, 0x0235, 0x0235, 0x0235, 0x0235, 
+	0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 
+	0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 
+	0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 
+	0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 
+	0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 
+	0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 
+	0x0253, 0x0253, 0x0253, 0x0253, 0x0244, 0x0244, 0x0244, 0x0244, 
+	0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 
+	0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 
+	0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 
+	0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 
+	0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 
+	0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 
+	0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 
+	0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 
+	0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 
+	  -448, 0x04ff, 0x04ef, 0x04df, 0x04ee, 0x04cf, 0x04de, 0x04bf, 
+	0x04fb, 0x04ce, 0x04dc,   -464, 0x03ec, 0x03ec, 0x03dd, 0x03dd, 
+	0x04fa, 0x04cd, 0x03be, 0x03be, 0x03eb, 0x03eb, 0x039f, 0x039f, 
+	0x03f9, 0x03f9, 0x03ea, 0x03ea, 0x03bd, 0x03bd, 0x03db, 0x03db, 
+	0x038f, 0x038f, 0x03f8, 0x03f8, 0x03cc, 0x03cc, 0x04ae, 0x049e, 
+	0x038e, 0x038e, 0x047f, 0x047e, 0x02f7, 0x02f7, 0x02f7, 0x02f7, 
+	0x02da, 0x02da, 0x02da, 0x02da, 0x03ad, 0x03ad, 0x03bc, 0x03bc, 
+	0x03cb, 0x03cb, 0x03f6, 0x03f6, 0x026f, 0x026f, 0x026f, 0x026f, 
+	0x02e8, 0x02e8, 0x02e8, 0x02e8, 0x025f, 0x025f, 0x025f, 0x025f, 
+	0x029d, 0x029d, 0x029d, 0x029d, 0x02d9, 0x02d9, 0x02d9, 0x02d9, 
+	0x02f5, 0x02f5, 0x02f5, 0x02f5, 0x02e7, 0x02e7, 0x02e7, 0x02e7, 
+	0x02ac, 0x02ac, 0x02ac, 0x02ac, 0x02bb, 0x02bb, 0x02bb, 0x02bb, 
+	0x024f, 0x024f, 0x024f, 0x024f, 0x02f4, 0x02f4, 0x02f4, 0x02f4, 
+	0x03ca, 0x03ca, 0x03e6, 0x03e6, 0x02f3, 0x02f3, 0x02f3, 0x02f3, 
+	0x013f, 0x013f, 0x013f, 0x013f, 0x013f, 0x013f, 0x013f, 0x013f, 
+	0x028d, 0x028d, 0x028d, 0x028d, 0x02d8, 0x02d8, 0x02d8, 0x02d8, 
+	0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 
+	0x01f2, 0x01f2, 0x01f2, 0x01f2, 0x01f2, 0x01f2, 0x01f2, 0x01f2, 
+	0x026e, 0x026e, 0x026e, 0x026e, 0x029c, 0x029c, 0x029c, 0x029c, 
+	0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 
+	0x02c9, 0x02c9, 0x02c9, 0x02c9, 0x025e, 0x025e, 0x025e, 0x025e, 
+	0x01ab, 0x01ab, 0x01ab, 0x01ab, 0x01ab, 0x01ab, 0x01ab, 0x01ab, 
+	0x027d, 0x027d, 0x027d, 0x027d, 0x02d7, 0x02d7, 0x02d7, 0x02d7, 
+	0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 
+	0x02c8, 0x02c8, 0x02c8, 0x02c8, 0x02d6, 0x02d6, 0x02d6, 0x02d6, 
+	0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 
+	0x01b9, 0x01b9, 0x01b9, 0x01b9, 0x01b9, 0x01b9, 0x01b9, 0x01b9, 
+	0x029b, 0x029b, 0x029b, 0x029b, 0x02aa, 0x02aa, 0x02aa, 0x02aa, 
+	0x01ba, 0x01ba, 0x01ba, 0x01ba, 0x01ba, 0x01ba, 0x01ba, 0x01ba, 
+	0x01e5, 0x01e5, 0x01e5, 0x01e5, 0x01e5, 0x01e5, 0x01e5, 0x01e5, 
+	0x01e4, 0x01e4, 0x01e4, 0x01e4, 0x01e4, 0x01e4, 0x01e4, 0x01e4, 
+	0x018c, 0x018c, 0x018c, 0x018c, 0x018c, 0x018c, 0x018c, 0x018c, 
+	0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 
+	0x01e3, 0x01e3, 0x01e3, 0x01e3, 0x01e3, 0x01e3, 0x01e3, 0x01e3, 
+	0x012e, 0x012e, 0x012e, 0x012e, 0x012e, 0x012e, 0x012e, 0x012e, 
+	0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 
+	0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 
+	0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 
+	0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 
+	0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 
+	0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 
+	0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 
+	0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 
+	0x01b8, 0x01b8, 0x01b8, 0x01b8, 0x01b8, 0x01b8, 0x01b8, 0x01b8, 
+	0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 
+	0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 
+	0x01a9, 0x01a9, 0x01a9, 0x01a9, 0x01a9, 0x01a9, 0x01a9, 0x01a9, 
+	0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 
+	0x01d3, 0x01d3, 0x01d3, 0x01d3, 0x01d3, 0x01d3, 0x01d3, 0x01d3, 
+	0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 
+	0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 
+	0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 
+	0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 
+	0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 
+	0x01a7, 0x01a7, 0x01a7, 0x01a7, 0x01a7, 0x01a7, 0x01a7, 0x01a7, 
+	0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 
+	0x03fe, 0x03fe, 0x03fc, 0x03fc, 0x02fd, 0x02fd, 0x02fd, 0x02fd, 
+	0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 
+	0x01af, 0x01af, 0x01af, 0x01af, 0x01af, 0x01af, 0x01af, 0x01af, 
+	0x01e9, 0x01e9, 0x01e9, 0x01e9, 0x01e9, 0x01e9, 0x01e9, 0x01e9, 
+};
+
+static const short tab15_[] =
+{
+	   -16,    -32,    -48,    -64,    -80,    -96,   -112,   -128, 
+	  -144,   -160, 0x0311, 0x0311, 0x0401, 0x0410, 0x0300, 0x0300, 
+	  -160,   -176,   -192,   -208,   -224,   -240,   -256,   -272, 
+	  -288,   -304,   -320,   -336,   -352,   -368,   -384,   -400, 
+	  -400,   -416,   -432,   -448,   -464,   -480,   -496,   -512, 
+	  -528,   -544,   -560,   -576,   -592,   -608,   -624,   -640, 
+	  -640,   -656, 0x0491,   -672,   -688,   -704,   -720,   -736, 
+	0x0428, 0x0482, 0x0418, 0x0481,   -752,   -768,   -784,   -800, 
+	0x0427, 0x0472, 0x0464, 0x0417, 0x0455, 0x0471,   -800, 0x0436, 
+	0x0463, 0x0445, 0x0454, 0x0426, 0x0462, 0x0416,   -816, 0x0435, 
+	0x0361, 0x0361, 0x0453, 0x0444, 0x0325, 0x0325, 0x0352, 0x0352, 
+	0x0315, 0x0315, 0x0351, 0x0351, 0x0405, 0x0450, 0x0334, 0x0334, 
+	0x0343, 0x0343, 0x0324, 0x0324, 0x0342, 0x0342, 0x0333, 0x0333, 
+	0x0241, 0x0241, 0x0241, 0x0241, 0x0314, 0x0314, 0x0304, 0x0304, 
+	0x0223, 0x0223, 0x0223, 0x0223, 0x0232, 0x0232, 0x0232, 0x0232, 
+	0x0340, 0x0340, 0x0303, 0x0303, 0x0213, 0x0213, 0x0213, 0x0213, 
+	0x0231, 0x0231, 0x0231, 0x0231, 0x0230, 0x0230, 0x0230, 0x0230, 
+	0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 
+	0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 
+	0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 
+	0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 0x0102, 
+	0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 0x0120, 
+	  -720,   -736, 0x04ee,   -752,   -768,   -784, 0x04fb,   -800, 
+	0x04dd, 0x04af, 0x04fa, 0x04be, 0x04eb, 0x04cd, 0x04dc, 0x049f, 
+	0x04f9, 0x04ea, 0x04bd, 0x04db, 0x048f, 0x04f8, 0x04cc, 0x049e, 
+	0x04e9, 0x047f, 0x04f7, 0x04ad, 0x04da, 0x04bc, 0x046f,   -800, 
+	0x03cb, 0x03cb, 0x03f6, 0x03f6, 0x048e, 0x04e8, 0x045f, 0x049d, 
+	0x03f5, 0x03f5, 0x037e, 0x037e, 0x03e7, 0x03e7, 0x03ac, 0x03ac, 
+	0x03ca, 0x03ca, 0x03bb, 0x03bb, 0x04d9, 0x048d, 0x034f, 0x034f, 
+	0x03f4, 0x03f4, 0x033f, 0x033f, 0x03f3, 0x03f3, 0x03d8, 0x03d8, 
+	0x03e6, 0x03e6, 0x032f, 0x032f, 0x03f2, 0x03f2, 0x046e, 0x04f0, 
+	0x031f, 0x031f, 0x03f1, 0x03f1, 0x039c, 0x039c, 0x03c9, 0x03c9, 
+	0x035e, 0x035e, 0x03ab, 0x03ab, 0x03ba, 0x03ba, 0x03e5, 0x03e5, 
+	0x037d, 0x037d, 0x03d7, 0x03d7, 0x034e, 0x034e, 0x03e4, 0x03e4, 
+	0x038c, 0x038c, 0x03c8, 0x03c8, 0x033e, 0x033e, 0x036d, 0x036d, 
+	0x03d6, 0x03d6, 0x03e3, 0x03e3, 0x039b, 0x039b, 0x03b9, 0x03b9, 
+	0x032e, 0x032e, 0x03aa, 0x03aa, 0x03e2, 0x03e2, 0x031e, 0x031e, 
+	0x03e1, 0x03e1, 0x040e, 0x04e0, 0x035d, 0x035d, 0x03d5, 0x03d5, 
+	0x037c, 0x037c, 0x03c7, 0x03c7, 0x034d, 0x034d, 0x038b, 0x038b, 
+	0x02d4, 0x02d4, 0x02d4, 0x02d4, 0x03b8, 0x03b8, 0x039a, 0x039a, 
+	0x03a9, 0x03a9, 0x036c, 0x036c, 0x03c6, 0x03c6, 0x033d, 0x033d, 
+	0x02d3, 0x02d3, 0x02d3, 0x02d3, 0x02d2, 0x02d2, 0x02d2, 0x02d2, 
+	0x032d, 0x032d, 0x030d, 0x030d, 0x021d, 0x021d, 0x021d, 0x021d, 
+	0x027b, 0x027b, 0x027b, 0x027b, 0x02b7, 0x02b7, 0x02b7, 0x02b7, 
+	0x02d1, 0x02d1, 0x02d1, 0x02d1, 0x035c, 0x035c, 0x03d0, 0x03d0, 
+	0x02c5, 0x02c5, 0x02c5, 0x02c5, 0x028a, 0x028a, 0x028a, 0x028a, 
+	0x02a8, 0x02a8, 0x02a8, 0x02a8, 0x024c, 0x024c, 0x024c, 0x024c, 
+	0x02c4, 0x02c4, 0x02c4, 0x02c4, 0x026b, 0x026b, 0x026b, 0x026b, 
+	0x02b6, 0x02b6, 0x02b6, 0x02b6, 0x0399, 0x0399, 0x030c, 0x030c, 
+	0x023c, 0x023c, 0x023c, 0x023c, 0x02c3, 0x02c3, 0x02c3, 0x02c3, 
+	0x027a, 0x027a, 0x027a, 0x027a, 0x02a7, 0x02a7, 0x02a7, 0x02a7, 
+	0x02a6, 0x02a6, 0x02a6, 0x02a6, 0x03c0, 0x03c0, 0x030b, 0x030b, 
+	0x01c2, 0x01c2, 0x01c2, 0x01c2, 0x01c2, 0x01c2, 0x01c2, 0x01c2, 
+	0x022c, 0x022c, 0x022c, 0x022c, 0x025b, 0x025b, 0x025b, 0x025b, 
+	0x02b5, 0x02b5, 0x02b5, 0x02b5, 0x021c, 0x021c, 0x021c, 0x021c, 
+	0x0289, 0x0289, 0x0289, 0x0289, 0x0298, 0x0298, 0x0298, 0x0298, 
+	0x02c1, 0x02c1, 0x02c1, 0x02c1, 0x024b, 0x024b, 0x024b, 0x024b, 
+	0x02b4, 0x02b4, 0x02b4, 0x02b4, 0x026a, 0x026a, 0x026a, 0x026a, 
+	0x023b, 0x023b, 0x023b, 0x023b, 0x0279, 0x0279, 0x0279, 0x0279, 
+	0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 
+	0x0297, 0x0297, 0x0297, 0x0297, 0x0288, 0x0288, 0x0288, 0x0288, 
+	0x022b, 0x022b, 0x022b, 0x022b, 0x025a, 0x025a, 0x025a, 0x025a, 
+	0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 
+	0x02a5, 0x02a5, 0x02a5, 0x02a5, 0x021b, 0x021b, 0x021b, 0x021b, 
+	0x01b1, 0x01b1, 0x01b1, 0x01b1, 0x01b1, 0x01b1, 0x01b1, 0x01b1, 
+	0x02b0, 0x02b0, 0x02b0, 0x02b0, 0x0269, 0x0269, 0x0269, 0x0269, 
+	0x0296, 0x0296, 0x0296, 0x0296, 0x024a, 0x024a, 0x024a, 0x024a, 
+	0x02a4, 0x02a4, 0x02a4, 0x02a4, 0x0278, 0x0278, 0x0278, 0x0278, 
+	0x0287, 0x0287, 0x0287, 0x0287, 0x023a, 0x023a, 0x023a, 0x023a, 
+	0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 
+	0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 
+	0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 
+	0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 
+	0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 
+	0x011a, 0x011a, 0x011a, 0x011a, 0x011a, 0x011a, 0x011a, 0x011a, 
+	0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 
+	0x020a, 0x020a, 0x020a, 0x020a, 0x02a0, 0x02a0, 0x02a0, 0x02a0, 
+	0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 
+	0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 
+	0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 
+	0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 
+	0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 
+	0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 
+	0x0277, 0x0277, 0x0277, 0x0277, 0x0209, 0x0209, 0x0209, 0x0209, 
+	0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 
+	0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 
+	0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 
+	0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 
+	0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 
+	0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 
+	0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 
+	0x0190, 0x0190, 0x0190, 0x0190, 0x0190, 0x0190, 0x0190, 0x0190, 
+	0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 
+	0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 
+	0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 
+	0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 
+	0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 
+	0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 
+	0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 
+	0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 
+	0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 
+	0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 0x0108, 
+	0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 0x0180, 
+	0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 
+	0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 
+	0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 
+	0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 0x0173, 
+	0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 0x0146, 
+	0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 0x0107, 
+	0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 0x0170, 
+	0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 
+	0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 
+	0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 0x01ff, 
+	0x01ef, 0x01ef, 0x01ef, 0x01ef, 0x01ef, 0x01ef, 0x01ef, 0x01ef, 
+	0x01fe, 0x01fe, 0x01fe, 0x01fe, 0x01fe, 0x01fe, 0x01fe, 0x01fe, 
+	0x01df, 0x01df, 0x01df, 0x01df, 0x01df, 0x01df, 0x01df, 0x01df, 
+	0x01fd, 0x01fd, 0x01fd, 0x01fd, 0x01fd, 0x01fd, 0x01fd, 0x01fd, 
+	0x01cf, 0x01cf, 0x01cf, 0x01cf, 0x01cf, 0x01cf, 0x01cf, 0x01cf, 
+	0x01fc, 0x01fc, 0x01fc, 0x01fc, 0x01fc, 0x01fc, 0x01fc, 0x01fc, 
+	0x01de, 0x01de, 0x01de, 0x01de, 0x01de, 0x01de, 0x01de, 0x01de, 
+	0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 0x01ed, 
+	0x01bf, 0x01bf, 0x01bf, 0x01bf, 0x01bf, 0x01bf, 0x01bf, 0x01bf, 
+	0x01ce, 0x01ce, 0x01ce, 0x01ce, 0x01ce, 0x01ce, 0x01ce, 0x01ce, 
+	0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 
+	0x01ae, 0x01ae, 0x01ae, 0x01ae, 0x01ae, 0x01ae, 0x01ae, 0x01ae, 
+	0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 
+};
+
+static const short tab16_[] =
+{
+	   -16,    -32,    -48,    -64, 0x0411, 0x0401, 0x0310, 0x0310, 
+	0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 0x0100, 
+	   -64,    -80,    -96, 0x04ff,   -112,   -128,   -144, 0x04f2, 
+	  -160, 0x041f, 0x04f1,   -176,   -192,   -208,   -224,   -240, 
+	  -240,   -256,   -272,   -288,   -304,   -320,   -336,   -352, 
+	  -368,   -384,   -400,   -416,   -432,   -448, 0x0451,   -464, 
+	  -464,   -480,   -496, 0x0414, 0x0441,   -512, 0x0423, 0x0432, 
+	0x0313, 0x0313, 0x0331, 0x0331, 0x0403, 0x0430, 0x0322, 0x0322, 
+	0x0212, 0x0212, 0x0212, 0x0212, 0x0221, 0x0221, 0x0221, 0x0221, 
+	0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, 
+	0x03ef, 0x03ef, 0x03fe, 0x03fe, 0x03df, 0x03df, 0x03fd, 0x03fd, 
+	0x03cf, 0x03cf, 0x03fc, 0x03fc, 0x03bf, 0x03bf, 0x03fb, 0x03fb, 
+	0x02af, 0x02af, 0x02af, 0x02af, 0x03fa, 0x03fa, 0x039f, 0x039f, 
+	0x03f9, 0x03f9, 0x03f8, 0x03f8, 0x028f, 0x028f, 0x028f, 0x028f, 
+	0x027f, 0x027f, 0x027f, 0x027f, 0x02f7, 0x02f7, 0x02f7, 0x02f7, 
+	0x026f, 0x026f, 0x026f, 0x026f, 0x02f6, 0x02f6, 0x02f6, 0x02f6, 
+	0x025f, 0x025f, 0x025f, 0x025f, 0x02f5, 0x02f5, 0x02f5, 0x02f5, 
+	0x014f, 0x014f, 0x014f, 0x014f, 0x014f, 0x014f, 0x014f, 0x014f, 
+	0x01f4, 0x01f4, 0x01f4, 0x01f4, 0x01f4, 0x01f4, 0x01f4, 0x01f4, 
+	0x01f3, 0x01f3, 0x01f3, 0x01f3, 0x01f3, 0x01f3, 0x01f3, 0x01f3, 
+	0x01f0, 0x01f0, 0x01f0, 0x01f0, 0x01f0, 0x01f0, 0x01f0, 0x01f0, 
+	0x023f, 0x023f, 0x023f, 0x023f,   -416,   -432,   -448,   -464, 
+	0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 0x012f, 
+	0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 
+	  -448,   -464,   -480,   -496,   -512,   -528,   -544,   -560, 
+	  -576,   -592,   -608, 0x04e3,   -624,   -640,   -656,   -672, 
+	  -672,   -688,   -704, 0x040d,   -720,   -736,   -752, 0x043c, 
+	  -768, 0x041c, 0x04c0,   -784, 0x03e2, 0x03e2, 0x042e, 0x041e, 
+	0x04d3, 0x042d, 0x04d2, 0x04d1, 0x043b,   -784, 0x031d, 0x031d, 
+	0x04c4, 0x046b, 0x04c3, 0x04a7, 0x032c, 0x032c, 0x04c2, 0x04b5, 
+	0x04c1, 0x040c, 0x044b, 0x04b4, 0x046a, 0x04a6, 0x03b3, 0x03b3, 
+	0x045a, 0x04a5, 0x032b, 0x032b, 0x03b2, 0x03b2, 0x031b, 0x031b, 
+	0x03b1, 0x03b1, 0x040b, 0x04b0, 0x0469, 0x0496, 0x044a, 0x04a4, 
+	0x0478, 0x0487, 0x03a3, 0x03a3, 0x043a, 0x0459, 0x032a, 0x032a, 
+	0x0495, 0x0468, 0x03a1, 0x03a1, 0x0486, 0x0477, 0x0394, 0x0394, 
+	0x0449, 0x0457, 0x0367, 0x0367, 0x02a2, 0x02a2, 0x02a2, 0x02a2, 
+	0x021a, 0x021a, 0x021a, 0x021a, 0x030a, 0x030a, 0x03a0, 0x03a0, 
+	0x0339, 0x0339, 0x0393, 0x0393, 0x0358, 0x0358, 0x0385, 0x0385, 
+	0x0229, 0x0229, 0x0229, 0x0229, 0x0292, 0x0292, 0x0292, 0x0292, 
+	0x0376, 0x0376, 0x0309, 0x0309, 0x0219, 0x0219, 0x0219, 0x0219, 
+	0x0291, 0x0291, 0x0291, 0x0291, 0x0390, 0x0390, 0x0348, 0x0348, 
+	0x0384, 0x0384, 0x0375, 0x0375, 0x0338, 0x0338, 0x0383, 0x0383, 
+	0x0366, 0x0366, 0x0328, 0x0328, 0x0282, 0x0282, 0x0282, 0x0282, 
+	0x0347, 0x0347, 0x0374, 0x0374, 0x0218, 0x0218, 0x0218, 0x0218, 
+	0x0281, 0x0281, 0x0281, 0x0281, 0x0280, 0x0280, 0x0280, 0x0280, 
+	0x0308, 0x0308, 0x0356, 0x0356, 0x0237, 0x0237, 0x0237, 0x0237, 
+	0x0273, 0x0273, 0x0273, 0x0273, 0x0365, 0x0365, 0x0346, 0x0346, 
+	0x0227, 0x0227, 0x0227, 0x0227, 0x0272, 0x0272, 0x0272, 0x0272, 
+	0x0364, 0x0364, 0x0355, 0x0355, 0x0207, 0x0207, 0x0207, 0x0207, 
+	0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 
+	0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 0x0171, 
+	0x0270, 0x0270, 0x0270, 0x0270, 0x0236, 0x0236, 0x0236, 0x0236, 
+	0x0263, 0x0263, 0x0263, 0x0263, 0x0245, 0x0245, 0x0245, 0x0245, 
+	0x0254, 0x0254, 0x0254, 0x0254, 0x0226, 0x0226, 0x0226, 0x0226, 
+	0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 0x0162, 
+	0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 0x0116, 
+	0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 0x0161, 
+	0x0206, 0x0206, 0x0206, 0x0206, 0x0260, 0x0260, 0x0260, 0x0260, 
+	0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 0x0153, 
+	0x0235, 0x0235, 0x0235, 0x0235, 0x0244, 0x0244, 0x0244, 0x0244, 
+	0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 
+	0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 0x0152, 
+	0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 0x0115, 
+	0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 
+	0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 0x0134, 
+	0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 0x0143, 
+	0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 
+	0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 0x0124, 
+	0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 0x0142, 
+	0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 0x0133, 
+	0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 0x0104, 
+	0x0140, 0x0140, 0x0140, 0x0140, 0x0140, 0x0140, 0x0140, 0x0140, 
+	0x04ce,   -448, 0x03de, 0x03de, 0x03e9, 0x03e9, 0x04ea, 0x04d9, 
+	0x02ee, 0x02ee, 0x02ee, 0x02ee, 0x03ed, 0x03ed, 0x03eb, 0x03eb, 
+	0x02be, 0x02be, 0x02be, 0x02be, 0x02cd, 0x02cd, 0x02cd, 0x02cd, 
+	0x03dc, 0x03dc, 0x03db, 0x03db, 0x02ae, 0x02ae, 0x02ae, 0x02ae, 
+	0x02cc, 0x02cc, 0x02cc, 0x02cc, 0x03ad, 0x03ad, 0x03da, 0x03da, 
+	0x037e, 0x037e, 0x03ac, 0x03ac, 0x02ca, 0x02ca, 0x02ca, 0x02ca, 
+	0x03c9, 0x03c9, 0x037d, 0x037d, 0x025e, 0x025e, 0x025e, 0x025e, 
+	0x01bd, 0x01bd, 0x01bd, 0x01bd, 0x01bd, 0x01bd, 0x01bd, 0x01bd, 
+	0x019e, 0x019e, 0x019e, 0x019e, 0x019e, 0x019e, 0x019e, 0x019e, 
+	0x02bc, 0x02bc, 0x02bc, 0x02bc, 0x02cb, 0x02cb, 0x02cb, 0x02cb, 
+	0x028e, 0x028e, 0x028e, 0x028e, 0x02e8, 0x02e8, 0x02e8, 0x02e8, 
+	0x029d, 0x029d, 0x029d, 0x029d, 0x02e7, 0x02e7, 0x02e7, 0x02e7, 
+	0x02bb, 0x02bb, 0x02bb, 0x02bb, 0x028d, 0x028d, 0x028d, 0x028d, 
+	0x02d8, 0x02d8, 0x02d8, 0x02d8, 0x026e, 0x026e, 0x026e, 0x026e, 
+	0x01e6, 0x01e6, 0x01e6, 0x01e6, 0x01e6, 0x01e6, 0x01e6, 0x01e6, 
+	0x019c, 0x019c, 0x019c, 0x019c, 0x019c, 0x019c, 0x019c, 0x019c, 
+	0x02ab, 0x02ab, 0x02ab, 0x02ab, 0x02ba, 0x02ba, 0x02ba, 0x02ba, 
+	0x02e5, 0x02e5, 0x02e5, 0x02e5, 0x02d7, 0x02d7, 0x02d7, 0x02d7, 
+	0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 0x014e, 
+	0x02e4, 0x02e4, 0x02e4, 0x02e4, 0x028c, 0x028c, 0x028c, 0x028c, 
+	0x01c8, 0x01c8, 0x01c8, 0x01c8, 0x01c8, 0x01c8, 0x01c8, 0x01c8, 
+	0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 0x013e, 
+	0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 0x016d, 
+	0x02d6, 0x02d6, 0x02d6, 0x02d6, 0x029b, 0x029b, 0x029b, 0x029b, 
+	0x02b9, 0x02b9, 0x02b9, 0x02b9, 0x02aa, 0x02aa, 0x02aa, 0x02aa, 
+	0x01e1, 0x01e1, 0x01e1, 0x01e1, 0x01e1, 0x01e1, 0x01e1, 0x01e1, 
+	0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 0x01d4, 
+	0x02b8, 0x02b8, 0x02b8, 0x02b8, 0x02a9, 0x02a9, 0x02a9, 0x02a9, 
+	0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 0x017b, 
+	0x02b7, 0x02b7, 0x02b7, 0x02b7, 0x02d0, 0x02d0, 0x02d0, 0x02d0, 
+	0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 0x010e, 
+	0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 0x01e0, 
+	0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 0x015d, 
+	0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 0x01d5, 
+	0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 0x017c, 
+	0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 0x01c7, 
+	0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 0x014d, 
+	0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 0x018b, 
+	0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 0x019a, 
+	0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 0x016c, 
+	0x01c6, 0x01c6, 0x01c6, 0x01c6, 0x01c6, 0x01c6, 0x01c6, 0x01c6, 
+	0x013d, 0x013d, 0x013d, 0x013d, 0x013d, 0x013d, 0x013d, 0x013d, 
+	0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 0x015c, 
+	0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 0x01c5, 
+	0x018a, 0x018a, 0x018a, 0x018a, 0x018a, 0x018a, 0x018a, 0x018a, 
+	0x01a8, 0x01a8, 0x01a8, 0x01a8, 0x01a8, 0x01a8, 0x01a8, 0x01a8, 
+	0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 0x0199, 
+	0x014c, 0x014c, 0x014c, 0x014c, 0x014c, 0x014c, 0x014c, 0x014c, 
+	0x01b6, 0x01b6, 0x01b6, 0x01b6, 0x01b6, 0x01b6, 0x01b6, 0x01b6, 
+	0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 0x017a, 
+	0x015b, 0x015b, 0x015b, 0x015b, 0x015b, 0x015b, 0x015b, 0x015b, 
+	0x0189, 0x0189, 0x0189, 0x0189, 0x0189, 0x0189, 0x0189, 0x0189, 
+	0x0198, 0x0198, 0x0198, 0x0198, 0x0198, 0x0198, 0x0198, 0x0198, 
+	0x0179, 0x0179, 0x0179, 0x0179, 0x0179, 0x0179, 0x0179, 0x0179, 
+	0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 0x0197, 
+	0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 
+	0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 0x01ec, 
+	0x01dd, 0x01dd, 0x01dd, 0x01dd, 0x01dd, 0x01dd, 0x01dd, 0x01dd, 
+};
+
+static const short tab24_[] =
+{
+	   -16,    -32,    -48, 0x04ff,    -64,    -80,    -96,   -112, 
+	  -128,   -144,   -160,   -176, 0x0411, 0x0401, 0x0410, 0x0400, 
+	0x04ef, 0x04fe, 0x04df, 0x04fd, 0x04cf, 0x04fc, 0x04bf, 0x04fb, 
+	0x03fa, 0x03fa, 0x04af, 0x049f, 0x03f9, 0x03f9, 0x03f8, 0x03f8, 
+	0x048f, 0x047f, 0x03f7, 0x03f7, 0x036f, 0x036f, 0x03f6, 0x03f6, 
+	0x035f, 0x035f, 0x03f5, 0x03f5, 0x034f, 0x034f, 0x03f4, 0x03f4, 
+	0x033f, 0x033f, 0x03f3, 0x03f3, 0x032f, 0x032f, 0x03f2, 0x03f2, 
+	0x03f1, 0x03f1, 0x041f, 0x04f0,   -144,   -160,   -176,   -192, 
+	  -192,   -208,   -224,   -240,   -256,   -272,   -288,   -304, 
+	  -320,   -336,   -352,   -368,   -384,   -400,   -416,   -432, 
+	  -432,   -448,   -464,   -480,   -496,   -512,   -528,   -544, 
+	  -560,   -576,   -592,   -608,   -624,   -640,   -656,   -672, 
+	  -672,   -688,   -704,   -720,   -736,   -752,   -768,   -784, 
+	  -800,   -816,   -832,   -848,   -864, 0x0473,   -880, 0x0472, 
+	0x0446, 0x0464, 0x0455, 0x0471, 0x0436, 0x0463, 0x0445, 0x0454, 
+	0x0426, 0x0462, 0x0416, 0x0461,   -880, 0x0435, 0x0453, 0x0444, 
+	0x0425, 0x0452, 0x0415,   -880, 0x0351, 0x0351, 0x0434, 0x0443, 
+	0x0324, 0x0324, 0x0342, 0x0342, 0x0333, 0x0333, 0x0314, 0x0314, 
+	0x0341, 0x0341, 0x0404, 0x0440, 0x0323, 0x0323, 0x0332, 0x0332, 
+	0x0213, 0x0213, 0x0213, 0x0213, 0x0231, 0x0231, 0x0231, 0x0231, 
+	0x0303, 0x0303, 0x0330, 0x0330, 0x0222, 0x0222, 0x0222, 0x0222, 
+	0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 0x0112, 
+	0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 0x0121, 
+	0x0202, 0x0202, 0x0202, 0x0202, 0x0220, 0x0220, 0x0220, 0x0220, 
+	0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 0x010f, 
+	0x03ee, 0x03ee, 0x03de, 0x03de, 0x03ed, 0x03ed, 0x03ce, 0x03ce, 
+	0x03ec, 0x03ec, 0x03dd, 0x03dd, 0x03be, 0x03be, 0x03eb, 0x03eb, 
+	0x03cd, 0x03cd, 0x03dc, 0x03dc, 0x03ae, 0x03ae, 0x03ea, 0x03ea, 
+	0x03bd, 0x03bd, 0x03db, 0x03db, 0x03cc, 0x03cc, 0x039e, 0x039e, 
+	0x03e9, 0x03e9, 0x03ad, 0x03ad, 0x03da, 0x03da, 0x03bc, 0x03bc, 
+	0x03cb, 0x03cb, 0x038e, 0x038e, 0x03e8, 0x03e8, 0x039d, 0x039d, 
+	0x03d9, 0x03d9, 0x037e, 0x037e, 0x03e7, 0x03e7, 0x03ac, 0x03ac, 
+	0x03ca, 0x03ca, 0x03bb, 0x03bb, 0x038d, 0x038d, 0x03d8, 0x03d8, 
+	0x040e, 0x04e0, 0x030d, 0x030d, 0x02e6, 0x02e6, 0x02e6, 0x02e6, 
+	0x036e, 0x036e, 0x039c, 0x039c, 0x02c9, 0x02c9, 0x02c9, 0x02c9, 
+	0x025e, 0x025e, 0x025e, 0x025e, 0x02ba, 0x02ba, 0x02ba, 0x02ba, 
+	0x02e5, 0x02e5, 0x02e5, 0x02e5, 0x03ab, 0x03ab, 0x037d, 0x037d, 
+	0x02d7, 0x02d7, 0x02d7, 0x02d7, 0x02e4, 0x02e4, 0x02e4, 0x02e4, 
+	0x028c, 0x028c, 0x028c, 0x028c, 0x02c8, 0x02c8, 0x02c8, 0x02c8, 
+	0x034e, 0x034e, 0x032e, 0x032e, 0x023e, 0x023e, 0x023e, 0x023e, 
+	0x026d, 0x026d, 0x026d, 0x026d, 0x02d6, 0x02d6, 0x02d6, 0x02d6, 
+	0x02e3, 0x02e3, 0x02e3, 0x02e3, 0x029b, 0x029b, 0x029b, 0x029b, 
+	0x02b9, 0x02b9, 0x02b9, 0x02b9, 0x02aa, 0x02aa, 0x02aa, 0x02aa, 
+	0x02e2, 0x02e2, 0x02e2, 0x02e2, 0x021e, 0x021e, 0x021e, 0x021e, 
+	0x02e1, 0x02e1, 0x02e1, 0x02e1, 0x025d, 0x025d, 0x025d, 0x025d, 
+	0x02d5, 0x02d5, 0x02d5, 0x02d5, 0x027c, 0x027c, 0x027c, 0x027c, 
+	0x02c7, 0x02c7, 0x02c7, 0x02c7, 0x024d, 0x024d, 0x024d, 0x024d, 
+	0x028b, 0x028b, 0x028b, 0x028b, 0x02b8, 0x02b8, 0x02b8, 0x02b8, 
+	0x02d4, 0x02d4, 0x02d4, 0x02d4, 0x029a, 0x029a, 0x029a, 0x029a, 
+	0x02a9, 0x02a9, 0x02a9, 0x02a9, 0x026c, 0x026c, 0x026c, 0x026c, 
+	0x02c6, 0x02c6, 0x02c6, 0x02c6, 0x023d, 0x023d, 0x023d, 0x023d, 
+	0x02d3, 0x02d3, 0x02d3, 0x02d3, 0x022d, 0x022d, 0x022d, 0x022d, 
+	0x02d2, 0x02d2, 0x02d2, 0x02d2, 0x021d, 0x021d, 0x021d, 0x021d, 
+	0x027b, 0x027b, 0x027b, 0x027b, 0x02b7, 0x02b7, 0x02b7, 0x02b7, 
+	0x02d1, 0x02d1, 0x02d1, 0x02d1, 0x025c, 0x025c, 0x025c, 0x025c, 
+	0x02c5, 0x02c5, 0x02c5, 0x02c5, 0x028a, 0x028a, 0x028a, 0x028a, 
+	0x02a8, 0x02a8, 0x02a8, 0x02a8, 0x0299, 0x0299, 0x0299, 0x0299, 
+	0x024c, 0x024c, 0x024c, 0x024c, 0x02c4, 0x02c4, 0x02c4, 0x02c4, 
+	0x026b, 0x026b, 0x026b, 0x026b, 0x02b6, 0x02b6, 0x02b6, 0x02b6, 
+	0x03d0, 0x03d0, 0x030c, 0x030c, 0x023c, 0x023c, 0x023c, 0x023c, 
+	0x02c3, 0x02c3, 0x02c3, 0x02c3, 0x027a, 0x027a, 0x027a, 0x027a, 
+	0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x022c, 0x022c, 0x022c, 0x022c, 
+	0x02c2, 0x02c2, 0x02c2, 0x02c2, 0x025b, 0x025b, 0x025b, 0x025b, 
+	0x02b5, 0x02b5, 0x02b5, 0x02b5, 0x021c, 0x021c, 0x021c, 0x021c, 
+	0x0289, 0x0289, 0x0289, 0x0289, 0x0298, 0x0298, 0x0298, 0x0298, 
+	0x02c1, 0x02c1, 0x02c1, 0x02c1, 0x024b, 0x024b, 0x024b, 0x024b, 
+	0x03c0, 0x03c0, 0x030b, 0x030b, 0x023b, 0x023b, 0x023b, 0x023b, 
+	0x03b0, 0x03b0, 0x030a, 0x030a, 0x021a, 0x021a, 0x021a, 0x021a, 
+	0x01b4, 0x01b4, 0x01b4, 0x01b4, 0x01b4, 0x01b4, 0x01b4, 0x01b4, 
+	0x026a, 0x026a, 0x026a, 0x026a, 0x02a6, 0x02a6, 0x02a6, 0x02a6, 
+	0x0279, 0x0279, 0x0279, 0x0279, 0x0297, 0x0297, 0x0297, 0x0297, 
+	0x03a0, 0x03a0, 0x0309, 0x0309, 0x0290, 0x0290, 0x0290, 0x0290, 
+	0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 0x01b3, 
+	0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 0x0188, 
+	0x022b, 0x022b, 0x022b, 0x022b, 0x025a, 0x025a, 0x025a, 0x025a, 
+	0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 0x01b2, 
+	0x02a5, 0x02a5, 0x02a5, 0x02a5, 0x021b, 0x021b, 0x021b, 0x021b, 
+	0x02b1, 0x02b1, 0x02b1, 0x02b1, 0x0269, 0x0269, 0x0269, 0x0269, 
+	0x0196, 0x0196, 0x0196, 0x0196, 0x0196, 0x0196, 0x0196, 0x0196, 
+	0x01a4, 0x01a4, 0x01a4, 0x01a4, 0x01a4, 0x01a4, 0x01a4, 0x01a4, 
+	0x024a, 0x024a, 0x024a, 0x024a, 0x0278, 0x0278, 0x0278, 0x0278, 
+	0x0187, 0x0187, 0x0187, 0x0187, 0x0187, 0x0187, 0x0187, 0x0187, 
+	0x013a, 0x013a, 0x013a, 0x013a, 0x013a, 0x013a, 0x013a, 0x013a, 
+	0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 0x01a3, 
+	0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 0x0159, 
+	0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 0x0195, 
+	0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 0x012a, 
+	0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 0x01a2, 
+	0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 0x01a1, 
+	0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 0x0168, 
+	0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 0x0186, 
+	0x0177, 0x0177, 0x0177, 0x0177, 0x0177, 0x0177, 0x0177, 0x0177, 
+	0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 0x0149, 
+	0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 0x0194, 
+	0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 0x0139, 
+	0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 0x0193, 
+	0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 0x0158, 
+	0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 0x0185, 
+	0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 0x0129, 
+	0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 0x0167, 
+	0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 0x0176, 
+	0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 0x0192, 
+	0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 0x0119, 
+	0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 0x0191, 
+	0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 0x0148, 
+	0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 0x0184, 
+	0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 0x0157, 
+	0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 0x0175, 
+	0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 0x0138, 
+	0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 0x0183, 
+	0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 0x0166, 
+	0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 0x0128, 
+	0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 0x0182, 
+	0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 0x0118, 
+	0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 0x0147, 
+	0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 0x0174, 
+	0x0181, 0x0181, 0x0181, 0x0181, 0x0181, 0x0181, 0x0181, 0x0181, 
+	0x0208, 0x0208, 0x0208, 0x0208, 0x0280, 0x0280, 0x0280, 0x0280, 
+	0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 0x0156, 
+	0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 0x0165, 
+	0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 0x0117, 
+	0x0207, 0x0207, 0x0207, 0x0207, 0x0270, 0x0270, 0x0270, 0x0270, 
+	0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 0x0137, 
+	0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 0x0127, 
+	0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 0x0106, 
+	0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 0x0160, 
+	0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 0x0105, 
+	0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 0x0150, 
+};
+
+static const short tab_c0[] =
+{
+ -29, -21, -13,  -7,  -3,  -1,  11,  15,  -1,  13,  14,  -3,  -1,   7,   5,
+   9,  -3,  -1,   6,   3,  -1,  10,  12,  -3,  -1,   2,   1,  -1,   4,   8,
+   0
+};
+
+static const short tab_c1[] =
+{
+ -15,  -7,  -3,  -1,  15,  14,  -1,  13,  12,  -3,  -1,  11,  10,  -1,   9,
+   8,  -7,  -3,  -1,   7,   6,  -1,   5,   4,  -3,  -1,   3,   2,  -1,   1,
+   0
+};
+
+static const struct newhuff ht[] = 
+{
+ { /* 0 */ 0 , tab0_  } ,
+ { /* 2 */ 0 , tab1_  } ,
+ { /* 3 */ 0 , tab2_  } ,
+ { /* 3 */ 0 , tab3_  } ,
+ { /* 0 */ 0 , tab0_  } ,
+ { /* 4 */ 0 , tab5_  } ,
+ { /* 4 */ 0 , tab6_  } ,
+ { /* 6 */ 0 , tab7_  } ,
+ { /* 6 */ 0 , tab8_  } ,
+ { /* 6 */ 0 , tab9_  } ,
+ { /* 8 */ 0 , tab10_ } ,
+ { /* 8 */ 0 , tab11_ } ,
+ { /* 8 */ 0 , tab12_ } ,
+ { /* 16 */ 0 , tab13_ } ,
+ { /* 0  */ 0 , tab0_  } ,
+ { /* 16 */ 0 , tab15_ } ,
+
+ { /* 16 */ 1 , tab16_ } ,
+ { /* 16 */ 2 , tab16_ } ,
+ { /* 16 */ 3 , tab16_ } ,
+ { /* 16 */ 4 , tab16_ } ,
+ { /* 16 */ 6 , tab16_ } ,
+ { /* 16 */ 8 , tab16_ } ,
+ { /* 16 */ 10, tab16_ } ,
+ { /* 16 */ 13, tab16_ } ,
+ { /* 16 */ 4 , tab24_ } ,
+ { /* 16 */ 5 , tab24_ } ,
+ { /* 16 */ 6 , tab24_ } ,
+ { /* 16 */ 7 , tab24_ } ,
+ { /* 16 */ 8 , tab24_ } ,
+ { /* 16 */ 9 , tab24_ } ,
+ { /* 16 */ 11, tab24_ } ,
+ { /* 16 */ 13, tab24_ }
+};
+
+static const struct newhuff htc[] = 
+{
+ { /* 1 , 1 , */ 0 , tab_c0 } ,
+ { /* 1 , 1 , */ 0 , tab_c1 }
+};
+
+#endif
Index: include/reactos/libs/libmpg123/ntom.c
===================================================================
--- include/reactos/libs/libmpg123/ntom.c	(revision 0)
+++ include/reactos/libs/libmpg123/ntom.c	(working copy)
@@ -0,0 +1,148 @@
+/*
+	ntom.c: N->M down/up sampling; the setup code.
+
+	copyright 1995-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp
+*/
+
+#define SAFE_NTOM /* Do not depend on off_t*off_t with big values still being in the range... */
+#include "mpg123lib_intern.h"
+#include "debug.h"
+
+int synth_ntom_set_step(mpg123_handle *fr)
+{
+	long m,n;
+	m = frame_freq(fr);
+	n = fr->af.rate;
+	if(VERBOSE2)
+		fprintf(stderr,"Init rate converter: %ld->%ld\n",m,n);
+
+	if(n > NTOM_MAX_FREQ || m > NTOM_MAX_FREQ || m <= 0 || n <= 0) {
+		if(NOQUIET) error("NtoM converter: illegal rates");
+		fr->err = MPG123_BAD_RATE;
+		return -1;
+	}
+
+	n *= NTOM_MUL;
+	fr->ntom_step = (unsigned long) n / m;
+
+	if(fr->ntom_step > (unsigned long)NTOM_MAX*NTOM_MUL) {
+		if(NOQUIET) error3("max. 1:%i conversion allowed (%lu vs %lu)!", NTOM_MAX, fr->ntom_step, (unsigned long)8*NTOM_MUL);
+		fr->err = MPG123_BAD_RATE;
+		return -1;
+	}
+
+	fr->ntom_val[0] = fr->ntom_val[1] = ntom_val(fr, fr->num);
+	return 0;
+}
+
+/*
+	The SAFE_NTOM does iterative loops instead of straight multiplication.
+	The safety is not just about the algorithm closely mimicking the decoder instead of applying some formula,
+	it is more about avoiding multiplication of possibly big sample offsets (a 32bit off_t could overflow too easily).
+*/
+
+unsigned long ntom_val(mpg123_handle *fr, off_t frame)
+{
+	off_t ntm;
+#ifdef SAFE_NTOM /* Carry out the loop, without the threatening integer overflow. */
+	off_t f;
+	ntm = NTOM_MUL>>1; /* for frame 0 */
+	for(f=0; f<frame; ++f)   /* for frame > 0 */
+	{
+		ntm += fr->spf*fr->ntom_step;
+		ntm -= (ntm/NTOM_MUL)*NTOM_MUL;
+	}
+#else /* Just make one computation with overall sample offset. */
+	ntm  = (NTOM_MUL>>1) + fr->spf*frame*fr->ntom_step;
+	ntm -= (ntm/NTOM_MUL)*NTOM_MUL;
+#endif
+	return (unsigned long) ntm;
+}
+
+/* Set the ntom value for next expected frame to be decoded.
+   This is for keeping output consistent across seeks. */
+void ntom_set_ntom(mpg123_handle *fr, off_t num)
+{
+	fr->ntom_val[1] = fr->ntom_val[0] = ntom_val(fr, num);
+}
+
+/* Carry out the ntom sample count operation for this one frame. 
+   No fear of integer overflow here. */
+off_t ntom_frame_outsamples(mpg123_handle *fr)
+{
+	/* The do this before decoding the separate channels, so there is only one common ntom value. */
+	int ntm = fr->ntom_val[0];
+	ntm += fr->spf*fr->ntom_step;
+	return ntm/NTOM_MUL;
+}
+
+/* Convert frame offset to unadjusted output sample offset. */
+off_t ntom_frmouts(mpg123_handle *fr, off_t frame)
+{
+#ifdef SAFE_NTOM
+	off_t f;
+#endif
+	off_t soff = 0;
+	off_t ntm = ntom_val(fr,0);
+#ifdef SAFE_NTOM
+	if(frame <= 0) return 0;
+	for(f=0; f<frame; ++f)
+	{
+		ntm  += fr->spf*fr->ntom_step;
+		soff += ntm/NTOM_MUL;
+		ntm  -= (ntm/NTOM_MUL)*NTOM_MUL;
+	}
+#else
+	soff = (ntm + frame*(off_t)fr->spf*(off_t)fr->ntom_step)/(off_t)NTOM_MUL;
+#endif
+	return soff;
+}
+
+/* Convert input samples to unadjusted output samples. */
+off_t ntom_ins2outs(mpg123_handle *fr, off_t ins)
+{
+	off_t soff = 0;
+	off_t ntm = ntom_val(fr,0);
+#ifdef SAFE_NTOM
+	{
+		off_t block = fr->spf;
+		if(ins <= 0) return 0;
+		do
+		{
+			off_t nowblock = ins > block ? block : ins;
+			ntm  += nowblock*fr->ntom_step;
+			soff += ntm/NTOM_MUL;
+			ntm  -= (ntm/NTOM_MUL)*NTOM_MUL;
+			ins -= nowblock;
+		} while(ins > 0);
+	}
+#else
+	/* Beware of overflows: when off_t is 32bits, the multiplication blows too easily.
+	   Of course, it blows for 64bits, too, in theory, but that's for _really_ large files. */
+	soff = ((off_t)ntm + (off_t)ins*(off_t)fr->ntom_step)/(off_t)NTOM_MUL;
+#endif
+	return soff;
+}
+
+/* Determine frame offset from unadjusted output sample offset. */
+off_t ntom_frameoff(mpg123_handle *fr, off_t soff)
+{
+	off_t ioff = 0; /* frames or samples */
+	off_t ntm = ntom_val(fr,0);
+#ifdef SAFE_NTOM
+	if(soff <= 0) return 0;
+	for(ioff=0; 1; ++ioff)
+	{
+		ntm  += fr->spf*fr->ntom_step;
+		if(ntm/NTOM_MUL > soff) break;
+		soff -= ntm/NTOM_MUL;
+		ntm  -= (ntm/NTOM_MUL)*NTOM_MUL;
+	}
+	return ioff;
+#else
+	ioff = (soff*(off_t)NTOM_MUL-ntm)/(off_t)fr->ntom_step;
+	return ioff/(off_t)fr->spf;
+#endif
+}
Index: include/reactos/libs/libmpg123/optimize.c
===================================================================
--- include/reactos/libs/libmpg123/optimize.c	(revision 0)
+++ include/reactos/libs/libmpg123/optimize.c	(working copy)
@@ -0,0 +1,1178 @@
+/*
+	optimize: get a grip on the different optimizations
+
+	copyright 2006-9 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]
+
+	Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.
+*/
+
+#define I_AM_OPTIMIZE
+#include "mpg123lib_intern.h" /* includes optimize.h */
+#include "debug.h"
+
+#if ((defined OPT_X86) || (defined OPT_X86_64) || (defined OPT_NEON) || (defined OPT_NEON64)) && (defined OPT_MULTI)
+#include "getcpuflags.h"
+static struct cpuflags cpu_flags;
+#else
+/* Faking stuff for non-multi builds. The same code for synth function choice is used.
+   Just no runtime dependency of result... */
+#define cpu_flags nothing
+#define cpu_i586(s)     1
+#define cpu_fpu(s)      1
+#define cpu_mmx(s)      1
+#define cpu_3dnow(s)    1
+#define cpu_3dnowext(s) 1
+#define cpu_sse(s)      1
+#define cpu_sse2(s)     1
+#define cpu_sse3(s)     1
+#define cpu_avx(s)      1
+#define cpu_neon(s)     1
+#endif
+
+/* Ugly macros to build conditional synth function array values. */
+
+#ifndef NO_8BIT
+#define IF8(synth) synth,
+#else
+#define IF8(synth)
+#endif
+
+#ifndef NO_SYNTH32
+
+#ifndef NO_REAL
+#define IFREAL(synth) synth,
+#else
+#define IFREAL(synth)
+#endif
+
+#ifndef NO_32BIT
+#define IF32(synth) synth
+#else
+#define IF32(synth)
+#endif
+
+#else
+
+#define IFREAL(synth)
+#define IF32(synth)
+
+#endif
+
+#ifndef NO_16BIT
+#	define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
+#else
+#	define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
+#endif
+
+/* The call of left and right plain synth, wrapped.
+   This may be replaced by a direct stereo optimized synth. */
+static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	int clip;
+	clip  = (fr->synth)(bandPtr_l, 0, fr, 0);
+	clip += (fr->synth)(bandPtr_r, 1, fr, 1);
+	return clip;
+}
+
+static const struct synth_s synth_base =
+{
+	{ /* plain */
+		 OUT_SYNTHS(synth_1to1, synth_1to1_8bit, synth_1to1_real, synth_1to1_s32)
+#		ifndef NO_DOWNSAMPLE
+		,OUT_SYNTHS(synth_2to1, synth_2to1_8bit, synth_2to1_real, synth_2to1_s32)
+		,OUT_SYNTHS(synth_4to1, synth_4to1_8bit, synth_4to1_real, synth_4to1_s32)
+#		endif
+#		ifndef NO_NTOM
+		,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
+#		endif
+	},
+	{ /* stereo, by default only wrappers over plain synth */
+		 OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
+#		ifndef NO_DOWNSAMPLE
+		,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
+		,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
+#		endif
+#		ifndef NO_NTOM
+		,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
+#		endif
+	},
+	{ /* mono2stereo */
+		 OUT_SYNTHS(synth_1to1_m2s, synth_1to1_8bit_m2s, synth_1to1_real_m2s, synth_1to1_s32_m2s)
+#		ifndef NO_DOWNSAMPLE
+		,OUT_SYNTHS(synth_2to1_m2s, synth_2to1_8bit_m2s, synth_2to1_real_m2s, synth_2to1_s32_m2s)
+		,OUT_SYNTHS(synth_4to1_m2s, synth_4to1_8bit_m2s, synth_4to1_real_m2s, synth_4to1_s32_m2s)
+#		endif
+#		ifndef NO_NTOM
+		,OUT_SYNTHS(synth_ntom_m2s, synth_ntom_8bit_m2s, synth_ntom_real_m2s, synth_ntom_s32_m2s)
+#		endif
+	},
+	{ /* mono*/
+		 OUT_SYNTHS(synth_1to1_mono, synth_1to1_8bit_mono, synth_1to1_real_mono, synth_1to1_s32_mono)
+#		ifndef NO_DOWNSAMPLE
+		,OUT_SYNTHS(synth_2to1_mono, synth_2to1_8bit_mono, synth_2to1_real_mono, synth_2to1_s32_mono)
+		,OUT_SYNTHS(synth_4to1_mono, synth_4to1_8bit_mono, synth_4to1_real_mono, synth_4to1_s32_mono)
+#		endif
+#		ifndef NO_NTOM
+		,OUT_SYNTHS(synth_ntom_mono, synth_ntom_8bit_mono, synth_ntom_real_mono, synth_ntom_s32_mono)
+#endif
+	}
+};
+
+#ifdef OPT_X86
+/* More plain synths for i386 */
+const func_synth plain_i386[r_limit][f_limit] =
+{ /* plain */
+	 OUT_SYNTHS(synth_1to1_i386, synth_1to1_8bit_i386, synth_1to1_real_i386, synth_1to1_s32_i386)
+#	ifndef NO_DOWNSAMPLE
+	,OUT_SYNTHS(synth_2to1_i386, synth_2to1_8bit_i386, synth_2to1_real_i386, synth_2to1_s32_i386)
+	,OUT_SYNTHS(synth_4to1_i386, synth_4to1_8bit_i386, synth_4to1_real_i386, synth_4to1_s32_i386)
+#	endif
+#	ifndef NO_NTOM
+	,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
+#	endif
+};
+#endif
+
+
+enum optdec defdec(void){ return defopt; }
+
+enum optcla decclass(const enum optdec type)
+{
+	return
+	(
+		   type == mmx
+		|| type == sse
+		|| type == sse_vintage
+		|| type == dreidnowext
+		|| type == dreidnowext_vintage
+		|| type == x86_64
+		|| type == neon
+		|| type == neon64
+		|| type == avx
+	) ? mmxsse : normal;
+}
+
+static int find_synth(func_synth synth,  const func_synth synths[r_limit][f_limit])
+{
+	enum synth_resample ri;
+	enum synth_format   fi;
+	for(ri=0; ri<r_limit; ++ri)
+	for(fi=0; fi<f_limit; ++fi)
+	if(synth == synths[ri][fi])
+	return TRUE;
+
+	return FALSE;
+}
+
+
+#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
+/* After knowing that it is either vintage or current SSE,
+   this separates the two. In case of non-OPT_MULTI, only one
+   of OPT_SSE and OPT_SSE_VINTAGE is active. */
+static enum optdec sse_or_vintage(mpg123_handle *fr)
+{
+	enum optdec type;
+	type = sse_vintage;
+#	ifdef OPT_SSE
+#	ifdef OPT_MULTI
+	if(fr->cpu_opts.the_dct36 == dct36_sse)
+#	endif
+	type = sse;
+#	endif
+	return type;
+}
+#endif
+
+/* Determine what kind of decoder is actually active
+   This depends on runtime choices which may cause fallback to i386 or generic code. */
+static int find_dectype(mpg123_handle *fr)
+{
+	enum optdec type = nodec;
+	/* Direct and indirect usage, 1to1 stereo decoding.
+	   Concentrating on the plain stereo synth should be fine, mono stuff is derived. */
+	func_synth basic_synth = fr->synth;
+#ifndef NO_8BIT
+#ifndef NO_16BIT
+	if(basic_synth == synth_1to1_8bit_wrap)
+	basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */
+#endif
+#endif
+
+	if(FALSE) ; /* Just to initialize the else if ladder. */
+#ifndef NO_16BIT
+#if defined(OPT_3DNOWEXT) || defined(OPT_3DNOWEXT_VINTAGE)
+	else if(basic_synth == synth_1to1_3dnowext)
+	{
+		type = dreidnowext;
+#		ifdef OPT_3DNOWEXT_VINTAGE
+#		ifdef OPT_MULTI
+		if(fr->cpu_opts.the_dct36 == dct36_3dnowext)
+#		endif
+		type = dreidnowext_vintage;
+#		endif
+	}
+#endif
+#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
+	else if(basic_synth == synth_1to1_sse)
+	{
+		type = sse_or_vintage(fr);
+	}
+#endif
+#if defined(OPT_3DNOW) || defined(OPT_3DNOW_VINTAGE)
+	else if(basic_synth == synth_1to1_3dnow)
+	{
+		type = dreidnow;
+#		ifdef OPT_3DNOW_VINTAGE
+#		ifdef OPT_MULTI
+		if(fr->cpu_opts.the_dct36 == dct36_3dnow)
+#		endif
+		type = dreidnow_vintage;
+#		endif
+	}
+#endif
+#ifdef OPT_MMX
+	else if(basic_synth == synth_1to1_mmx) type = mmx;
+#endif
+#ifdef OPT_I586_DITHER
+	else if(basic_synth == synth_1to1_i586_dither) type = ifuenf_dither;
+#endif
+#ifdef OPT_I586
+	else if(basic_synth == synth_1to1_i586) type = ifuenf;
+#endif
+#ifdef OPT_ALTIVEC
+	else if(basic_synth == synth_1to1_altivec) type = altivec;
+#endif
+#ifdef OPT_X86_64
+	else if(basic_synth == synth_1to1_x86_64) type = x86_64;
+#endif
+#ifdef OPT_AVX
+	else if(basic_synth == synth_1to1_avx) type = avx;
+#endif
+#ifdef OPT_ARM
+	else if(basic_synth == synth_1to1_arm) type = arm;
+#endif
+#ifdef OPT_NEON
+	else if(basic_synth == synth_1to1_neon) type = neon;
+#endif
+#ifdef OPT_NEON64
+	else if(basic_synth == synth_1to1_neon64) type = neon64;
+#endif
+#ifdef OPT_GENERIC_DITHER
+	else if(basic_synth == synth_1to1_dither) type = generic_dither;
+#endif
+#ifdef OPT_DITHER /* either i586 or generic! */
+#ifndef NO_DOWNSAMPLE
+	else if
+	(
+		   basic_synth == synth_2to1_dither
+		|| basic_synth == synth_4to1_dither
+	) type = generic_dither;
+#endif
+#endif
+#endif /* 16bit */
+
+#ifndef NO_SYNTH32
+
+#ifndef NO_REAL
+#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
+	else if(basic_synth == synth_1to1_real_sse)
+	{
+		type = sse_or_vintage(fr);
+	}
+#endif
+#ifdef OPT_X86_64
+	else if(basic_synth == synth_1to1_real_x86_64) type = x86_64;
+#endif
+#ifdef OPT_AVX
+	else if(basic_synth == synth_1to1_real_avx) type = avx;
+#endif
+#ifdef OPT_ALTIVEC
+	else if(basic_synth == synth_1to1_real_altivec) type = altivec;
+#endif
+#ifdef OPT_NEON
+	else if(basic_synth == synth_1to1_real_neon) type = neon;
+#endif
+#ifdef OPT_NEON64
+	else if(basic_synth == synth_1to1_real_neon64) type = neon64;
+#endif
+
+#endif /* real */
+
+#ifndef NO_32BIT
+#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
+	else if(basic_synth == synth_1to1_s32_sse)
+	{
+		type = sse_or_vintage(fr);
+	}
+#endif
+#ifdef OPT_X86_64
+	else if(basic_synth == synth_1to1_s32_x86_64) type = x86_64;
+#endif
+#ifdef OPT_AVX
+	else if(basic_synth == synth_1to1_s32_avx) type = avx;
+#endif
+#ifdef OPT_ALTIVEC
+	else if(basic_synth == synth_1to1_s32_altivec) type = altivec;
+#endif
+#ifdef OPT_NEON
+	else if(basic_synth == synth_1to1_s32_neon) type = neon;
+#endif
+#ifdef OPT_NEON64
+	else if(basic_synth == synth_1to1_s32_neon64) type = neon64;
+#endif
+#endif /* 32bit */
+
+#endif /* any 32 bit synth */
+
+#ifdef OPT_X86
+	else if(find_synth(basic_synth, plain_i386))
+	type = idrei;
+#endif
+
+	else if(find_synth(basic_synth, synth_base.plain))
+	type = generic;
+
+
+
+#ifdef OPT_I486
+	/* i486 is special ... the specific code is in use for 16bit 1to1 stereo
+	   otherwise we have i386 active... but still, the distinction doesn't matter*/
+	type = ivier;
+#endif
+
+	if(type != nodec)
+	{
+		fr->cpu_opts.type = type;
+		fr->cpu_opts.class = decclass(type);
+
+		debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class);
+		return MPG123_OK;
+	}
+	else
+	{
+		if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!");
+
+		fr->err = MPG123_BAD_DECODER_SETUP;
+		return MPG123_ERR;
+	}
+}
+
+/* set synth functions for current frame, optimizations handled by opt_* macros */
+int set_synth_functions(mpg123_handle *fr)
+{
+	enum synth_resample resample = r_none;
+	enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */
+
+	/* Select the basic output format, different from 16bit: 8bit, real. */
+	if(FALSE){}
+#ifndef NO_16BIT
+	else if(fr->af.dec_enc & MPG123_ENC_16)
+	basic_format = f_16;
+#endif
+#ifndef NO_8BIT
+	else if(fr->af.dec_enc & MPG123_ENC_8)
+	basic_format = f_8;
+#endif
+#ifndef NO_REAL
+	else if(fr->af.dec_enc & MPG123_ENC_FLOAT)
+	basic_format = f_real;
+#endif
+#ifndef NO_32BIT
+	/* 24 bit integer means decoding to 32 bit first. */
+	else if(fr->af.dec_enc & MPG123_ENC_32 || fr->af.dec_enc & MPG123_ENC_24)
+	basic_format = f_32;
+#endif
+
+	/* Make sure the chosen format is compiled into this lib. */
+	if(basic_format == f_none)
+	{
+		if(NOQUIET) error("set_synth_functions: This output format is disabled in this build!");
+
+		return -1;
+	}
+
+	/* Be explicit about downsampling variant. */
+	switch(fr->down_sample)
+	{
+		case 0: resample = r_1to1; break;
+#ifndef NO_DOWNSAMPLE
+		case 1: resample = r_2to1; break;
+		case 2: resample = r_4to1; break;
+#endif
+#ifndef NO_NTOM
+		case 3: resample = r_ntom; break;
+#endif
+	}
+
+	if(resample == r_none)
+	{
+		if(NOQUIET) error("set_synth_functions: This resampling mode is not supported in this build!");
+
+		return -1;
+	}
+
+	debug2("selecting synth: resample=%i format=%i", resample, basic_format);
+	/* Finally selecting the synth functions for stereo / mono. */
+	fr->synth = fr->synths.plain[resample][basic_format];
+	fr->synth_stereo = fr->synths.stereo[resample][basic_format];
+	fr->synth_mono = fr->af.channels==2
+		? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */
+		: fr->synths.mono[resample][basic_format];       /* Mono MPEG file decoded to mono. */
+
+	if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */
+	{
+		fr->err = MPG123_BAD_DECODER_SETUP;
+		return MPG123_ERR;
+	}
+
+	if(frame_buffers(fr) != 0)
+	{
+		fr->err = MPG123_NO_BUFFERS;
+		if(NOQUIET) error("Failed to set up decoder buffers!");
+
+		return MPG123_ERR;
+	}
+
+#ifndef NO_8BIT
+	if(basic_format == f_8)
+	{
+		if(make_conv16to8_table(fr) != 0)
+		{
+			if(NOQUIET) error("Failed to set up conv16to8 table!");
+			/* it's a bit more work to get proper error propagation up */
+			return -1;
+		}
+	}
+#endif
+
+#ifdef OPT_MMXORSSE
+	/* Special treatment for MMX, SSE and 3DNowExt stuff.
+	   The real-decoding SSE for x86-64 uses normal tables! */
+	if(fr->cpu_opts.class == mmxsse
+#	ifndef NO_REAL
+	   && basic_format != f_real
+#	endif
+#	ifndef NO_32BIT
+	   && basic_format != f_32
+#	endif
+#	ifdef ACCURATE_ROUNDING
+	   && fr->cpu_opts.type != sse
+	   && fr->cpu_opts.type != sse_vintage
+	   && fr->cpu_opts.type != x86_64
+	   && fr->cpu_opts.type != neon
+	   && fr->cpu_opts.type != neon64
+	   && fr->cpu_opts.type != avx
+#	endif
+	  )
+	{
+#ifndef NO_LAYER3
+		init_layer3_stuff(fr, init_layer3_gainpow2_mmx);
+#endif
+#ifndef NO_LAYER12
+		init_layer12_stuff(fr, init_layer12_table_mmx);
+#endif
+		fr->make_decode_tables = make_decode_tables_mmx;
+	}
+	else
+#endif
+	{
+#ifndef NO_LAYER3
+		init_layer3_stuff(fr, init_layer3_gainpow2);
+#endif
+#ifndef NO_LAYER12
+		init_layer12_stuff(fr, init_layer12_table);
+#endif
+		fr->make_decode_tables = make_decode_tables;
+	}
+
+	/* We allocated the table buffers just now, so (re)create the tables. */
+	fr->make_decode_tables(fr);
+
+	return 0;
+}
+
+int frame_cpu_opt(mpg123_handle *fr, const char* cpu)
+{
+	const char* chosen = ""; /* the chosen decoder opt as string */
+	enum optdec want_dec = nodec;
+	int done = 0;
+	int auto_choose = 0;
+#ifdef OPT_DITHER
+	int dithered = FALSE; /* If some dithered decoder is chosen. */
+#endif
+
+	want_dec = dectype(cpu);
+	auto_choose = want_dec == autodec;
+	/* Fill whole array of synth functions with generic code first. */
+	fr->synths = synth_base;
+
+#ifndef OPT_MULTI
+	{
+		if(!auto_choose && want_dec != defopt)
+		{
+			if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt);
+		}
+		auto_choose = TRUE; /* There will be only one choice anyway. */
+	}
+#endif
+
+	fr->cpu_opts.type = nodec;
+#ifdef OPT_MULTI
+#ifndef NO_LAYER3
+#if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX || defined OPT_NEON || defined OPT_NEON64)
+	fr->cpu_opts.the_dct36 = dct36;
+#endif
+#endif
+#endif
+	/* covers any i386+ cpu; they actually differ only in the synth_1to1 function, mostly... */
+#ifdef OPT_X86
+	if(cpu_i586(cpu_flags))
+	{
+#		ifdef OPT_MULTI
+		debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext);
+#		endif
+#		ifdef OPT_SSE
+		if(   !done && (auto_choose || want_dec == sse)
+		   && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) )
+		{
+			chosen = dn_sse;
+			fr->cpu_opts.type = sse;
+#ifdef OPT_MULTI
+#			ifndef NO_LAYER3
+			/* if(cpu_fast_sse(cpu_flags)) */ fr->cpu_opts.the_dct36 = dct36_sse;
+#			endif
+#endif
+#			ifndef NO_16BIT
+			fr->synths.plain[r_1to1][f_16] = synth_1to1_sse;
+#			ifdef ACCURATE_ROUNDING
+			fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse;
+#			endif
+#			endif
+#			ifndef NO_REAL
+			fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse;
+			fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse;
+#			endif
+#			ifndef NO_32BIT
+			fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse;
+			fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse;
+#			endif
+			done = 1;
+		}
+#		endif
+#		ifdef OPT_SSE_VINTAGE
+		if(   !done && (auto_choose || want_dec == sse_vintage)
+		   && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) )
+		{
+			chosen = dn_sse_vintage;
+			fr->cpu_opts.type = sse_vintage;
+#			ifndef NO_16BIT
+			fr->synths.plain[r_1to1][f_16] = synth_1to1_sse;
+#			ifdef ACCURATE_ROUNDING
+			fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse;
+#			endif
+#			endif
+#			ifndef NO_REAL
+			fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse;
+			fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse;
+#			endif
+#			ifndef NO_32BIT
+			fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse;
+			fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse;
+#			endif
+			done = 1;
+		}
+#		endif
+#		ifdef OPT_3DNOWEXT
+		if(   !done && (auto_choose || want_dec == dreidnowext)
+		   && cpu_3dnow(cpu_flags)
+		   && cpu_3dnowext(cpu_flags)
+		   && cpu_mmx(cpu_flags) )
+		{
+			chosen = dn_dreidnowext;
+			fr->cpu_opts.type = dreidnowext;
+#			ifndef NO_16BIT
+			fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext;
+#			endif
+			done = 1;
+		}
+#		endif
+#		ifdef OPT_3DNOWEXT_VINTAGE
+		if(   !done && (auto_choose || want_dec == dreidnowext_vintage)
+		   && cpu_3dnow(cpu_flags)
+		   && cpu_3dnowext(cpu_flags)
+		   && cpu_mmx(cpu_flags) )
+		{
+			chosen = dn_dreidnowext_vintage;
+			fr->cpu_opts.type = dreidnowext_vintage;
+#ifdef OPT_MULTI
+#			ifndef NO_LAYER3
+			fr->cpu_opts.the_dct36 = dct36_3dnowext;
+#			endif
+#endif
+#			ifndef NO_16BIT
+			fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext;
+#			endif
+			done = 1;
+		}
+#		endif
+#		ifdef OPT_3DNOW
+		if(    !done && (auto_choose || want_dec == dreidnow)
+		    && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) )
+		{
+			chosen = dn_dreidnow;
+			fr->cpu_opts.type = dreidnow;
+#			ifndef NO_16BIT
+			fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow;
+#			endif
+			done = 1;
+		}
+#		endif
+#		ifdef OPT_3DNOW_VINTAGE
+		if(    !done && (auto_choose || want_dec == dreidnow_vintage)
+		    && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) )
+		{
+			chosen = dn_dreidnow_vintage;
+			fr->cpu_opts.type = dreidnow_vintage;
+#ifdef OPT_MULTI
+#			ifndef NO_LAYER3
+			fr->cpu_opts.the_dct36 = dct36_3dnow;
+#			endif
+#endif
+#			ifndef NO_16BIT
+			fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow;
+#			endif
+			done = 1;
+		}
+#		endif
+		#ifdef OPT_MMX
+		if(   !done && (auto_choose || want_dec == mmx)
+		   && cpu_mmx(cpu_flags) )
+		{
+			chosen = dn_mmx;
+			fr->cpu_opts.type = mmx;
+#			ifndef NO_16BIT
+			fr->synths.plain[r_1to1][f_16] = synth_1to1_mmx;
+#			endif
+			done = 1;
+		}
+		#endif
+		#ifdef OPT_I586
+		if(!done && (auto_choose || want_dec == ifuenf))
+		{
+			chosen = "i586/pentium";
+			fr->cpu_opts.type = ifuenf;
+#			ifndef NO_16BIT
+			fr->synths.plain[r_1to1][f_16] = synth_1to1_i586;
+#			endif
+			done = 1;
+		}
+		#endif
+		#ifdef OPT_I586_DITHER
+		if(!done && (auto_choose || want_dec == ifuenf_dither))
+		{
+			chosen = "dithered i586/pentium";
+			fr->cpu_opts.type = ifuenf_dither;
+			dithered = TRUE;
+#			ifndef NO_16BIT
+			fr->synths.plain[r_1to1][f_16] = synth_1to1_i586_dither;
+#			ifndef NO_DOWNSAMPLE
+			fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
+			fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
+#			endif
+#			endif
+			done = 1;
+		}
+		#endif
+	}
+	#ifdef OPT_I486
+	/* That won't cooperate in multi opt mode - forcing i486 in layer3.c
+	   But still... here it is... maybe for real use in future. */
+	if(!done && (auto_choose || want_dec == ivier))
+	{
+		chosen = dn_ivier;
+		fr->cpu_opts.type = ivier;
+		done = 1;
+	}
+	#endif
+	#ifdef OPT_I386
+	if(!done && (auto_choose || want_dec == idrei))
+	{
+		chosen = dn_idrei;
+		fr->cpu_opts.type = idrei;
+		done = 1;
+	}
+	#endif
+
+	if(done)
+	{
+		/*
+			We have chosen some x86 decoder... fillup some i386 stuff.
+			There is an open question about using dithered synth_1to1 for 8bit wrappers.
+			For quality it won't make sense, but wrapped i586_dither wrapped may still be faster...
+		*/
+		enum synth_resample ri;
+		enum synth_format   fi;
+#		ifndef NO_8BIT
+#		ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
+		if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16])
+		{
+			fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
+			fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
+			fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s;
+		}
+#		endif
+#		endif
+		for(ri=0; ri<r_limit; ++ri)
+		for(fi=0; fi<f_limit; ++fi)
+		{
+			if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi])
+			fr->synths.plain[ri][fi] = plain_i386[ri][fi];
+		}
+	}
+
+#endif /* OPT_X86 */
+
+#ifdef OPT_AVX
+	if(!done && (auto_choose || want_dec == avx) && cpu_avx(cpu_flags))
+	{
+		chosen = "x86-64 (AVX)";
+		fr->cpu_opts.type = avx;
+#ifdef OPT_MULTI
+#		ifndef NO_LAYER3
+		fr->cpu_opts.the_dct36 = dct36_avx;
+#		endif
+#endif
+#		ifndef NO_16BIT
+		fr->synths.plain[r_1to1][f_16] = synth_1to1_avx;
+		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_avx;
+#		endif
+#		ifndef NO_REAL
+		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_avx;
+		fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_avx;
+#		endif
+#		ifndef NO_32BIT
+		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_avx;
+		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_avx;
+#		endif
+		done = 1;
+	}
+#endif
+
+#ifdef OPT_X86_64
+	if(!done && (auto_choose || want_dec == x86_64))
+	{
+		chosen = "x86-64 (SSE)";
+		fr->cpu_opts.type = x86_64;
+#ifdef OPT_MULTI
+#		ifndef NO_LAYER3
+		fr->cpu_opts.the_dct36 = dct36_x86_64;
+#		endif
+#endif
+#		ifndef NO_16BIT
+		fr->synths.plain[r_1to1][f_16] = synth_1to1_x86_64;
+		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_x86_64;
+#		endif
+#		ifndef NO_REAL
+		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_x86_64;
+		fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_x86_64;
+#		endif
+#		ifndef NO_32BIT
+		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_x86_64;
+		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_x86_64;
+#		endif
+		done = 1;
+	}
+#endif
+
+#	ifdef OPT_ALTIVEC
+	if(!done && (auto_choose || want_dec == altivec))
+	{
+		chosen = dn_altivec;
+		fr->cpu_opts.type = altivec;
+#		ifndef NO_16BIT
+		fr->synths.plain[r_1to1][f_16] = synth_1to1_altivec;
+		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_altivec;
+#		endif
+#		ifndef NO_REAL
+		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_altivec;
+		fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_altivec;
+#		endif
+#		ifndef NO_32BIT
+		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_altivec;
+		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_altivec;
+#		endif
+		done = 1;
+	}
+#	endif
+
+#	ifdef OPT_NEON
+	if(!done && (auto_choose || want_dec == neon) && cpu_neon(cpu_flags))
+	{
+		chosen = dn_neon;
+		fr->cpu_opts.type = neon;
+#ifdef OPT_MULTI
+#		ifndef NO_LAYER3
+		fr->cpu_opts.the_dct36 = dct36_neon;
+#		endif
+#endif
+#		ifndef NO_16BIT
+		fr->synths.plain[r_1to1][f_16] = synth_1to1_neon;
+		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_neon;
+#		endif
+#		ifndef NO_REAL
+		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_neon;
+		fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_neon;
+#		endif
+#		ifndef NO_32BIT
+		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_neon;
+		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_neon;
+#		endif
+		done = 1;
+	}
+#	endif
+
+#	ifdef OPT_ARM
+	if(!done && (auto_choose || want_dec == arm))
+	{
+		chosen = dn_arm;
+		fr->cpu_opts.type = arm;
+#		ifndef NO_16BIT
+		fr->synths.plain[r_1to1][f_16] = synth_1to1_arm;
+#		endif
+		done = 1;
+	}
+#	endif
+
+#	ifdef OPT_NEON64
+	if(!done && (auto_choose || want_dec == neon64) && cpu_neon(cpu_flags))
+	{
+		chosen = dn_neon64;
+		fr->cpu_opts.type = neon64;
+#ifdef OPT_MULTI
+#		ifndef NO_LAYER3
+		fr->cpu_opts.the_dct36 = dct36_neon64;
+#		endif
+#endif
+#		ifndef NO_16BIT
+		fr->synths.plain[r_1to1][f_16] = synth_1to1_neon64;
+		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_neon64;
+#		endif
+#		ifndef NO_REAL
+		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_neon64;
+		fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_neon64;
+#		endif
+#		ifndef NO_32BIT
+		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_neon64;
+		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_neon64;
+#		endif
+		done = 1;
+	}
+#	endif
+
+#	ifdef OPT_GENERIC
+	if(!done && (auto_choose || want_dec == generic))
+	{
+		chosen = dn_generic;
+		fr->cpu_opts.type = generic;
+		done = 1;
+	}
+#	endif
+
+#ifdef OPT_GENERIC_DITHER
+	if(!done && (auto_choose || want_dec == generic_dither))
+	{
+		chosen = "dithered generic";
+		fr->cpu_opts.type = generic_dither;
+		dithered = TRUE;
+#		ifndef NO_16BIT
+		fr->synths.plain[r_1to1][f_16] = synth_1to1_dither;
+#		ifndef NO_DOWNSAMPLE
+		fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
+		fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
+#		endif
+#		endif
+		done = 1;
+	}
+#endif
+
+	fr->cpu_opts.class = decclass(fr->cpu_opts.type);
+
+#	ifndef NO_8BIT
+#	ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
+	/* Last chance to use some optimized routine via generic wrappers (for 8bit). */
+	if(     fr->cpu_opts.type != ifuenf_dither
+	     && fr->cpu_opts.type != generic_dither
+	     && fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] )
+	{
+		fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
+		fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
+		fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s;
+	}
+#	endif
+#	endif
+
+#ifdef OPT_DITHER
+	if(done && dithered)
+	{
+		/* run-time dither noise table generation */
+		if(!frame_dither_init(fr))
+		{
+			if(NOQUIET) error("Dither noise setup failed!");
+			return 0;
+		}
+	}
+#endif
+
+	if(done)
+	{
+		if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);
+		return 1;
+	}
+	else
+	{
+		if(NOQUIET) error("Could not set optimization!");
+		return 0;
+	}
+}
+
+enum optdec dectype(const char* decoder)
+{
+	enum optdec dt;
+	if(   (decoder == NULL)
+	   || (decoder[0] == 0) )
+	return autodec;
+
+	for(dt=autodec; dt<nodec; ++dt)
+	if(!strcasecmp(decoder, decname[dt])) return dt;
+
+	return nodec; /* If we found nothing... */
+}
+
+#ifdef OPT_MULTI
+
+/* same number of entries as full list, but empty at beginning */
+static const char *mpg123_supported_decoder_list[] =
+{
+	#ifdef OPT_SSE
+	NULL,
+	#endif
+	#ifdef OPT_SSE_VINTAGE
+	NULL,
+	#endif
+	#ifdef OPT_3DNOWEXT
+	NULL,
+	#endif
+	#ifdef OPT_3DNOWEXT_VINTAGE
+	NULL,
+	#endif
+	#ifdef OPT_3DNOW
+	NULL,
+	#endif
+	#ifdef OPT_3DNOW_VINTAGE
+	NULL,
+	#endif
+	#ifdef OPT_MMX
+	NULL,
+	#endif
+	#ifdef OPT_I586
+	NULL,
+	#endif
+	#ifdef OPT_I586_DITHER
+	NULL,
+	#endif
+	#ifdef OPT_I486
+	NULL,
+	#endif
+	#ifdef OPT_I386
+	NULL,
+	#endif
+	#ifdef OPT_ALTIVEC
+	NULL,
+	#endif
+	#ifdef OPT_AVX
+	NULL,
+	#endif
+	#ifdef OPT_X86_64
+	NULL,
+	#endif
+	#ifdef OPT_ARM
+	NULL,
+	#endif
+	#ifdef OPT_NEON
+	NULL,
+	#endif
+	#ifdef OPT_NEON64
+	NULL,
+	#endif
+	#ifdef OPT_GENERIC_FLOAT
+	NULL,
+	#endif
+#	ifdef OPT_GENERIC
+	NULL,
+#	endif
+#	ifdef OPT_GENERIC_DITHER
+	NULL,
+#	endif
+	NULL
+};
+#endif
+
+static const char *mpg123_decoder_list[] =
+{
+	#ifdef OPT_SSE
+	dn_sse,
+	#endif
+	#ifdef OPT_SSE_VINTAGE
+	dn_sse_vintage,
+	#endif
+	#ifdef OPT_3DNOWEXT
+	dn_dreidnowext,
+	#endif
+	#ifdef OPT_3DNOWEXT_VINTAGE
+	dn_dreidnowext_vintage,
+	#endif
+	#ifdef OPT_3DNOW
+	dn_dreidnow,
+	#endif
+	#ifdef OPT_3DNOW_VINTAGE
+	dn_dreidnow_vintage,
+	#endif
+	#ifdef OPT_MMX
+	dn_mmx,
+	#endif
+	#ifdef OPT_I586
+	dn_ifuenf,
+	#endif
+	#ifdef OPT_I586_DITHER
+	dn_ifuenf_dither,
+	#endif
+	#ifdef OPT_I486
+	dn_ivier,
+	#endif
+	#ifdef OPT_I386
+	dn_idrei,
+	#endif
+	#ifdef OPT_ALTIVEC
+	dn_altivec,
+	#endif
+	#ifdef OPT_AVX
+	dn_avx,
+	#endif
+	#ifdef OPT_X86_64
+	dn_x86_64,
+	#endif
+	#ifdef OPT_ARM
+	dn_arm,
+	#endif
+	#ifdef OPT_NEON
+	dn_neon,
+	#endif
+	#ifdef OPT_NEON64
+	dn_neon64,
+	#endif
+	#ifdef OPT_GENERIC
+	dn_generic,
+	#endif
+	#ifdef OPT_GENERIC_DITHER
+	dn_generic_dither,
+	#endif
+	NULL
+};
+
+void check_decoders(void )
+{
+#ifndef OPT_MULTI
+	/* In non-multi mode, only the full list (one entry) is used. */
+	return;
+#else
+	const char **d = mpg123_supported_decoder_list;
+#if (defined OPT_X86) || (defined OPT_X86_64) || (defined OPT_NEON) || (defined OPT_NEON64)
+	getcpuflags(&cpu_flags);
+#endif
+#ifdef OPT_X86
+	if(cpu_i586(cpu_flags))
+	{
+		/* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");
+		if(cpu_sse3(cpu_flags)) printf(" SSE3"); */
+#ifdef OPT_SSE
+		if(cpu_sse(cpu_flags)) *(d++) = dn_sse;
+#endif
+#ifdef OPT_SSE_VINTAGE
+		if(cpu_sse(cpu_flags)) *(d++) = dn_sse_vintage;
+#endif
+#ifdef OPT_3DNOWEXT
+		if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext;
+#endif
+#ifdef OPT_3DNOWEXT_VINTAGE
+		if(cpu_3dnowext(cpu_flags)) *(d++) = dn_dreidnowext_vintage;
+#endif
+#ifdef OPT_3DNOW
+		if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow;
+#endif
+#ifdef OPT_3DNOW_VINTAGE
+		if(cpu_3dnow(cpu_flags)) *(d++) = dn_dreidnow_vintage;
+#endif
+#ifdef OPT_MMX
+		if(cpu_mmx(cpu_flags)) *(d++) = dn_mmx;
+#endif
+#ifdef OPT_I586
+		*(d++) = dn_ifuenf;
+#endif
+#ifdef OPT_I586_DITHER
+		*(d++) = dn_ifuenf_dither;
+#endif
+	}
+#endif
+/* just assume that the i486 built is run on a i486 cpu... */
+#ifdef OPT_I486
+	*(d++) = dn_ivier;
+#endif
+#ifdef OPT_ALTIVEC
+	*(d++) = dn_altivec;
+#endif
+/* every supported x86 can do i386, any cpu can do generic */
+#ifdef OPT_I386
+	*(d++) = dn_idrei;
+#endif
+#ifdef OPT_AVX
+	if(cpu_avx(cpu_flags)) *(d++) = dn_avx;
+#endif
+#ifdef OPT_X86_64
+	*(d++) = dn_x86_64;
+#endif
+#ifdef OPT_ARM
+	*(d++) = dn_arm;
+#endif
+#ifdef OPT_NEON
+	if(cpu_neon(cpu_flags)) *(d++) = dn_neon;
+#endif
+#ifdef OPT_NEON64
+	if(cpu_neon(cpu_flags)) *(d++) = dn_neon64;
+#endif
+#ifdef OPT_GENERIC
+	*(d++) = dn_generic;
+#endif
+#ifdef OPT_GENERIC_DITHER
+	*(d++) = dn_generic_dither;
+#endif
+#endif /* ndef OPT_MULTI */
+}
+
+const char* attribute_align_arg mpg123_current_decoder(mpg123_handle *mh)
+{
+	if(mh == NULL) return NULL;
+
+	return decname[mh->cpu_opts.type];
+}
+
+const char attribute_align_arg **mpg123_decoders(void){ return mpg123_decoder_list; }
+const char attribute_align_arg **mpg123_supported_decoders(void)
+{
+#ifdef OPT_MULTI
+	return mpg123_supported_decoder_list;
+#else
+	return mpg123_decoder_list;
+#endif
+}
Index: include/reactos/libs/libmpg123/optimize.h
===================================================================
--- include/reactos/libs/libmpg123/optimize.h	(revision 63976)
+++ include/reactos/libs/libmpg123/optimize.h	(working copy)
@@ -3,7 +3,7 @@
 /*
 	optimize: get a grip on the different optimizations
 
-	copyright 2007 by the mpg123 project - free software under the terms of the LGPL 2.1
+	copyright 2007-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
 	see COPYING and AUTHORS files in distribution or http://mpg123.org
 	initially written by Thomas Orgis, taking from mpg123.[hc]
 
@@ -16,9 +16,14 @@
 	OPT_I586_DITHER (Intel Pentium with dithering/noise shaping for enhanced quality)
 	OPT_MMX (Intel Pentium and compatibles with MMX, fast, but not the best accuracy)
 	OPT_3DNOW (AMD 3DNow!, K6-2/3, Athlon, compatibles...)
+	OPT_3DNOW_VINTAGE
 	OPT_3DNOWEXT (AMD 3DNow! extended, generally Athlon, compatibles...)
+	OPT_3DNOWEXT_VINTAGE
+	OPT_SSE
+	OPT_SSE_VINTAGE
 	OPT_ALTIVEC (Motorola/IBM PPC with AltiVec under MacOSX)
 	OPT_X86_64 (x86-64 / AMD64 / Intel 64)
+	OPT_AVX
 
 	or you define OPT_MULTI and give a combination which makes sense (do not include i486, do not mix altivec and x86).
 
@@ -33,13 +38,131 @@
 
 /* Runtime optimization interface now here: */
 
+/* Nedit inline Perl script to generate decoder list and name mapping in one place
+   optimize.c defining I_AM_OPTIMIZE to get the names
+
+perl <<'EOT'
+## order is important (autodec first, nodec last)
+@names=
+(
+ ['autodec', 'auto']
+,['generic', 'generic']
+,['generic_dither', 'generic_dither']
+,['idrei', 'i386']
+,['ivier', 'i486']
+,['ifuenf', 'i586']
+,['ifuenf_dither', 'i586_dither']
+,['mmx', 'MMX']
+,['dreidnow', '3DNow']
+,['dreidnowext', '3DNowExt']
+,['altivec', 'AltiVec']
+,['sse', 'SSE']
+,['x86_64', 'x86-64']
+,['arm','ARM']
+,['neon','NEON']
+,['avx','AVX']
+,['dreidnow_vintage', '3DNow_vintage']
+,['dreidnowext_vintage', '3DNowExt_vintage']
+,['sse_vintage', 'SSE_vintage']
+,['nodec', 'nodec']
+);
+
+print "enum optdec\n{\n";
+for my $n (@names)
+{
+	$name = $n->[0];
+	$enum = $name eq 'autodec' ? $name = " $name=0" : ",$name";
+	print "\t$enum\n"
+}
+print "};\n";
+print "##ifdef I_AM_OPTIMIZE\n";
+for my $n (@names)
+{
+	my $key = $n->[0];
+	my $val = $n->[1];
+	print "static const char dn_$key\[\] = \"$val\";\n";
+}
+print "static const char* decname[] =\n{\n";
+for my $n (@names)
+{
+	my $key = $n->[0];
+	print "\t".($key eq 'autodec' ? ' ' : ',')."dn_$key\n";
+}
+print "};\n##endif"
+EOT
+*/
 enum optdec
-{ /* autodec needs to be =0 and the first, nodec needs to be the last -- for loops! */
-	autodec=0, generic, generic_dither, idrei,
-	ivier, ifuenf, ifuenf_dither, mmx,
-	dreidnow, dreidnowext, altivec, sse, x86_64, arm,
-	nodec
+{
+	 autodec=0
+	,generic
+	,generic_dither
+	,idrei
+	,ivier
+	,ifuenf
+	,ifuenf_dither
+	,mmx
+	,dreidnow
+	,dreidnowext
+	,altivec
+	,sse
+	,x86_64
+	,arm
+	,neon
+	,neon64
+	,avx
+	,dreidnow_vintage
+	,dreidnowext_vintage
+	,sse_vintage
+	,nodec
 };
+#ifdef I_AM_OPTIMIZE
+static const char dn_autodec[] = "auto";
+static const char dn_generic[] = "generic";
+static const char dn_generic_dither[] = "generic_dither";
+static const char dn_idrei[] = "i386";
+static const char dn_ivier[] = "i486";
+static const char dn_ifuenf[] = "i586";
+static const char dn_ifuenf_dither[] = "i586_dither";
+static const char dn_mmx[] = "MMX";
+static const char dn_dreidnow[] = "3DNow";
+static const char dn_dreidnowext[] = "3DNowExt";
+static const char dn_altivec[] = "AltiVec";
+static const char dn_sse[] = "SSE";
+static const char dn_x86_64[] = "x86-64";
+static const char dn_arm[] = "ARM";
+static const char dn_neon[] = "NEON";
+static const char dn_neon64[] = "NEON64";
+static const char dn_avx[] = "AVX";
+static const char dn_dreidnow_vintage[] = "3DNow_vintage";
+static const char dn_dreidnowext_vintage[] = "3DNowExt_vintage";
+static const char dn_sse_vintage[] = "SSE_vintage";
+static const char dn_nodec[] = "nodec";
+static const char* decname[] =
+{
+	 dn_autodec
+	,dn_generic
+	,dn_generic_dither
+	,dn_idrei
+	,dn_ivier
+	,dn_ifuenf
+	,dn_ifuenf_dither
+	,dn_mmx
+	,dn_dreidnow
+	,dn_dreidnowext
+	,dn_altivec
+	,dn_sse
+	,dn_x86_64
+	,dn_arm
+	,dn_neon
+	,dn_neon64
+	,dn_avx
+	,dn_dreidnow_vintage
+	,dn_dreidnowext_vintage
+	,dn_sse_vintage
+	,dn_nodec
+};
+#endif
+
 enum optcla { nocla=0, normal, mmxsse };
 
 /*  - Set up the table of synth functions for current decoder choice. */
@@ -58,11 +181,7 @@
 
 /* this is included in mpg123.h, which includes config.h */
 #ifdef CCALIGN
-#ifdef _MSC_VER
-#define ALIGNED(a) __declspec(align(a))
-#else
 #define ALIGNED(a) __attribute__((aligned(a)))
-#endif
 #else
 #define ALIGNED(a)
 #endif
@@ -71,7 +190,11 @@
 #ifdef REAL_IS_FIXED
 #if (defined OPT_I486)  || (defined OPT_I586) || (defined OPT_I586_DITHER) \
  || (defined OPT_MMX)   || (defined OPT_SSE)  || (defined_OPT_ALTIVEC) \
- || (defined OPT_3DNOW) || (defined OPT_3DNOWEXT) || (defined OPT_X86_64) || (defined OPT_GENERIC_DITHER)
+ || (defined OPT_3DNOW) || (defined OPT_3DNOWEXT) || (defined OPT_X86_64) \
+ || (defined OPT_3DNOW_VINTAGE) || (defined OPT_3DNOWEXT_VINTAGE) \
+ || (defined OPT_SSE_VINTAGE) \
+ || (defined OPT_NEON) || (defined OPT_NEON64) || (defined OPT_AVX) \
+ || (defined OPT_GENERIC_DITHER)
 #error "Bad decoder choice together with fixed point math!"
 #endif
 #endif
@@ -142,9 +265,19 @@
 #define OPT_X86
 #ifndef OPT_MULTI
 #	define defopt sse
+#	define opt_dct36(fr) dct36_sse
 #endif
 #endif
 
+#ifdef OPT_SSE_VINTAGE
+#define OPT_MMXORSSE
+#define OPT_MPLAYER
+#define OPT_X86
+#ifndef OPT_MULTI
+#	define defopt sse
+#endif
+#endif
+
 #ifdef OPT_3DNOWEXT
 #define OPT_MMXORSSE
 #define OPT_MPLAYER
@@ -151,6 +284,16 @@
 #define OPT_X86
 #ifndef OPT_MULTI
 #	define defopt dreidnowext
+#endif
+#endif
+
+/* same as above but also using 3DNowExt dct36 */
+#ifdef OPT_3DNOWEXT_VINTAGE
+#define OPT_MMXORSSE
+#define OPT_MPLAYER
+#define OPT_X86
+#ifndef OPT_MULTI
+#	define defopt dreidnowext_vintage
 #	define opt_dct36(fr) dct36_3dnowext
 #endif
 #endif
@@ -165,6 +308,14 @@
 #define OPT_X86
 #ifndef OPT_MULTI
 #	define defopt dreidnow
+#endif
+#endif
+
+/* same as above but also using 3DNow dct36 */
+#ifdef OPT_3DNOW_VINTAGE
+#define OPT_X86
+#ifndef OPT_MULTI
+#	define defopt dreidnow_vintage
 #	define opt_dct36(fr) dct36_3dnow
 #endif
 #endif
@@ -179,9 +330,18 @@
 #define OPT_MMXORSSE
 #ifndef OPT_MULTI
 #	define defopt x86_64
+#	define opt_dct36(fr) dct36_x86_64
 #endif
 #endif
 
+#ifdef OPT_AVX
+#define OPT_MMXORSSE
+#ifndef OPT_MULTI
+#	define defopt avx
+#	define opt_dct36(fr) dct36_avx
+#endif
+#endif
+
 #ifdef OPT_ARM
 #ifndef OPT_MULTI
 #	define defopt arm
@@ -188,6 +348,22 @@
 #endif
 #endif
 
+#ifdef OPT_NEON
+#define OPT_MMXORSSE
+#ifndef OPT_MULTI
+#	define defopt neon
+#	define opt_dct36(fr) dct36_neon
+#endif
+#endif
+
+#ifdef OPT_NEON64
+#define OPT_MMXORSSE
+#ifndef OPT_MULTI
+#	define defopt neon64
+#	define opt_dct36(fr) dct36_neon64
+#endif
+#endif
+
 /* used for multi opt mode and the single 3dnow mode to have the old 3dnow test flag still working */
 void check_decoders(void);
 
@@ -201,7 +377,7 @@
 
 #	define defopt nodec
 
-#	if (defined OPT_3DNOW || defined OPT_3DNOWEXT)
+#	if (defined OPT_3DNOW_VINTAGE || defined OPT_3DNOWEXT_VINTAGE || defined OPT_SSE || defined OPT_X86_64 || defined OPT_AVX || defined OPT_NEON || defined OPT_NEON64)
 #		define opt_dct36(fr) ((fr)->cpu_opts.the_dct36)
 #	endif
 
Index: include/reactos/libs/libmpg123/parse.c
===================================================================
--- include/reactos/libs/libmpg123/parse.c	(revision 0)
+++ include/reactos/libs/libmpg123/parse.c	(working copy)
@@ -0,0 +1,1230 @@
+/*
+	parse: spawned from common; clustering around stream/frame parsing
+
+	copyright ?-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp & Thomas Orgis
+*/
+
+#include "mpg123lib_intern.h"
+
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "getbits.h"
+
+#if defined (WANT_WIN32_SOCKETS)
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#endif
+
+/* a limit for number of frames in a track; beyond that unsigned long may not be enough to hold byte addresses */
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#endif
+#ifndef ULONG_MAX
+/* hm, is this portable across preprocessors? */
+#define ULONG_MAX ((unsigned long)-1)
+#endif
+#define TRACK_MAX_FRAMES ULONG_MAX/4/1152
+
+#include "mpeghead.h"
+
+#include "debug.h"
+
+#define bsbufid(fr) (fr)->bsbuf==(fr)->bsspace[0] ? 0 : ((fr)->bsbuf==fr->bsspace[1] ? 1 : ( (fr)->bsbuf==(fr)->bsspace[0]+512 ? 2 : ((fr)->bsbuf==fr->bsspace[1]+512 ? 3 : -1) ) )
+
+/* PARSE_GOOD and PARSE_BAD have to be 1 and 0 (TRUE and FALSE), others can vary. */
+enum parse_codes
+{
+	 PARSE_MORE = MPG123_NEED_MORE
+	,PARSE_ERR  = MPG123_ERR
+	,PARSE_END  = 10 /* No more audio data to find. */
+	,PARSE_GOOD = 1 /* Everything's fine. */
+	,PARSE_BAD  = 0 /* Not fine (invalid data). */
+	,PARSE_RESYNC = 2 /* Header not good, go into resync. */
+	,PARSE_AGAIN  = 3 /* Really start over, throw away and read a new header, again. */
+};
+
+/* bitrates for [mpeg1/2][layer] */
+static const int tabsel_123[2][3][16] =
+{
+	{
+		{0,32,64,96,128,160,192,224,256,288,320,352,384,416,448,},
+		{0,32,48,56, 64, 80, 96,112,128,160,192,224,256,320,384,},
+		{0,32,40,48, 56, 64, 80, 96,112,128,160,192,224,256,320,}
+	},
+	{
+		{0,32,48,56,64,80,96,112,128,144,160,176,192,224,256,},
+		{0,8,16,24,32,40,48,56,64,80,96,112,128,144,160,},
+		{0,8,16,24,32,40,48,56,64,80,96,112,128,144,160,}
+	}
+};
+
+static const long freqs[9] = { 44100, 48000, 32000, 22050, 24000, 16000 , 11025 , 12000 , 8000 };
+
+static int decode_header(mpg123_handle *fr,unsigned long newhead, int *freeformat_count);
+static int skip_junk(mpg123_handle *fr, unsigned long *newheadp, long *headcount);
+static int do_readahead(mpg123_handle *fr, unsigned long newhead);
+static int wetwork(mpg123_handle *fr, unsigned long *newheadp);
+
+/* These two are to be replaced by one function that gives all the frame parameters (for outsiders).*/
+/* Those functions are unsafe regarding bad arguments (inside the mpg123_handle), but just returning anything would also be unsafe, the caller code has to be trusted. */
+
+int frame_bitrate(mpg123_handle *fr)
+{
+	return tabsel_123[fr->lsf][fr->lay-1][fr->bitrate_index];
+}
+
+long frame_freq(mpg123_handle *fr)
+{
+	return freqs[fr->sampling_frequency];
+}
+
+/* compiler is smart enought to inline this one or should I really do it as macro...? */
+static int head_check(unsigned long head)
+{
+	if
+	(
+		((head & HDR_SYNC) != HDR_SYNC)
+		||
+		/* layer: 01,10,11 is 1,2,3; 00 is reserved */
+		(!(HDR_LAYER_VAL(head)))
+		||
+		/* 1111 means bad bitrate */
+		(HDR_BITRATE_VAL(head) == 0xf)
+		||
+		/* sampling freq: 11 is reserved */
+		(HDR_SAMPLERATE_VAL(head) == 0x3)
+		/* here used to be a mpeg 2.5 check... re-enabled 2.5 decoding due to lack of evidence that it is really not good */
+	)
+	{
+		return FALSE;
+	}
+	/* if no check failed, the header is valid (hopefully)*/
+	else
+	{
+		return TRUE;
+	}
+}
+
+static int check_lame_tag(mpg123_handle *fr)
+{
+	/*
+		going to look for Xing or Info at some position after the header
+		                                   MPEG 1  MPEG 2/2.5 (LSF)
+		Stereo, Joint Stereo, Dual Channel  32      17
+		Mono                                17       9
+
+		Also, how to avoid false positives? I guess I should interpret more of the header to rule that out(?).
+		I hope that ensuring all zeros until tag start is enough.
+	*/
+	int lame_offset = (fr->stereo == 2) ? (fr->lsf ? 17 : 32 ) : (fr->lsf ? 9 : 17);
+
+	if(fr->p.flags & MPG123_IGNORE_INFOFRAME) return 0;
+
+	/* Note: CRC or not, that does not matter here. */
+	if(fr->framesize >= 120+lame_offset) /* traditional Xing header is 120 bytes */
+	{
+		int i;
+		int lame_type = 0;
+		debug("do we have lame tag?");
+		/* only search for tag when all zero before it (apart from checksum) */
+		for(i=2; i < lame_offset; ++i) if(fr->bsbuf[i] != 0) break;
+		if(i == lame_offset)
+		{
+			debug("possibly...");
+			if
+			(
+					   (fr->bsbuf[lame_offset] == 'I')
+				&& (fr->bsbuf[lame_offset+1] == 'n')
+				&& (fr->bsbuf[lame_offset+2] == 'f')
+				&& (fr->bsbuf[lame_offset+3] == 'o')
+			)
+			{
+				lame_type = 1; /* We still have to see what there is */
+			}
+			else if
+			(
+					   (fr->bsbuf[lame_offset] == 'X')
+				&& (fr->bsbuf[lame_offset+1] == 'i')
+				&& (fr->bsbuf[lame_offset+2] == 'n')
+				&& (fr->bsbuf[lame_offset+3] == 'g')
+			)
+			{
+				lame_type = 2;
+				fr->vbr = MPG123_VBR; /* Xing header means always VBR */
+			}
+			if(lame_type)
+			{
+				unsigned long xing_flags;
+
+				/* we have one of these headers... */
+				if(VERBOSE2) fprintf(stderr, "Note: Xing/Lame/Info header detected\n");
+				/* now interpret the Xing part, I have 120 bytes total for sure */
+				/* there are 4 bytes for flags, but only the last byte contains known ones */
+				lame_offset += 4; /* now first byte after Xing/Name */
+				/* 4 bytes dword for flags */
+				#define make_long(a, o) ((((unsigned long) a[o]) << 24) | (((unsigned long) a[o+1]) << 16) | (((unsigned long) a[o+2]) << 8) | ((unsigned long) a[o+3]))
+				/* 16 bit */
+				#define make_short(a,o) ((((unsigned short) a[o]) << 8) | ((unsigned short) a[o+1]))
+				xing_flags = make_long(fr->bsbuf, lame_offset);
+				lame_offset += 4;
+				debug1("Xing: flags 0x%08lx", xing_flags);
+				if(xing_flags & 1) /* frames */
+				{
+					if(fr->p.flags & MPG123_IGNORE_STREAMLENGTH)
+					{
+						if(VERBOSE3)
+						fprintf(stderr, "Note: Ignoring Xing frames because of MPG123_IGNORE_STREAMLENGTH\n");
+					}
+					else
+					{
+						fr->track_frames = (off_t) make_long(fr->bsbuf, lame_offset);
+						if(fr->track_frames > TRACK_MAX_FRAMES) fr->track_frames = 0; /* endless stream? */
+#ifdef GAPLESS
+						/* All or nothing: Only if encoder delay/padding is known we'll cut samples for gapless. */
+						if(fr->p.flags & MPG123_GAPLESS)
+						frame_gapless_init(fr, fr->track_frames, 0, 0);
+#endif
+						if(VERBOSE3) fprintf(stderr, "Note: Xing: %lu frames\n", (long unsigned)fr->track_frames);
+					}
+
+					lame_offset += 4;
+				}
+				if(xing_flags & 0x2) /* bytes */
+				{
+					if(fr->p.flags & MPG123_IGNORE_STREAMLENGTH)
+					{
+						if(VERBOSE3)
+						fprintf(stderr, "Note: Ignoring Xing bytes because of MPG123_IGNORE_STREAMLENGTH\n");
+					}
+					else
+					{
+						unsigned long xing_bytes = make_long(fr->bsbuf, lame_offset);					/* We assume that this is the _total_ size of the file, including Xing frame ... and ID3 frames...
+						   It's not that clearly documented... */
+						if(fr->rdat.filelen < 1)
+						fr->rdat.filelen = (off_t) xing_bytes; /* One could start caring for overflow here. */
+						else
+						{
+							if((off_t) xing_bytes != fr->rdat.filelen && NOQUIET)
+							{
+								double diff = 1.0/fr->rdat.filelen * (fr->rdat.filelen - (off_t)xing_bytes);
+								if(diff < 0.) diff = -diff;
+
+								if(VERBOSE3)
+								fprintf(stderr, "Note: Xing stream size %lu differs by %f%% from determined/given file size!\n", xing_bytes, diff);
+
+								if(diff > 1.)
+								fprintf(stderr, "Warning: Xing stream size off by more than 1%%, fuzzy seeking may be even more fuzzy than by design!\n");
+							}
+						}
+
+						if(VERBOSE3)
+						fprintf(stderr, "Note: Xing: %lu bytes\n", (long unsigned)xing_bytes);
+					}
+
+					lame_offset += 4;
+				}
+				if(xing_flags & 0x4) /* TOC */
+				{
+					frame_fill_toc(fr, fr->bsbuf+lame_offset);
+					lame_offset += 100; /* just skip */
+				}
+				if(xing_flags & 0x8) /* VBR quality */
+				{
+					if(VERBOSE3)
+					{
+						unsigned long xing_quality = make_long(fr->bsbuf, lame_offset);
+						fprintf(stderr, "Note: Xing: quality = %lu\n", (long unsigned)xing_quality);
+					}
+					lame_offset += 4;
+				}
+				/* I guess that either 0 or LAME extra data follows */
+				if(fr->bsbuf[lame_offset] != 0)
+				{
+					unsigned char lame_vbr;
+					float replay_gain[2] = {0,0};
+					float peak = 0;
+					float gain_offset = 0; /* going to be +6 for old lame that used 83dB */
+					char nb[10];
+					memcpy(nb, fr->bsbuf+lame_offset, 9);
+					nb[9] = 0;
+					if(VERBOSE3) fprintf(stderr, "Note: Info: Encoder: %s\n", nb);
+					if(!strncmp("LAME", nb, 4))
+					{
+						/* Lame versions before 3.95.1 used 83 dB reference level, later versions 89 dB.
+						   We stick with 89 dB as being "normal", adding 6 dB. */
+						unsigned int major, minor;
+						char rest[6];
+						rest[0] = 0;
+						if(sscanf(nb+4, "%u.%u%s", &major, &minor, rest) >= 2)
+						{
+							debug3("LAME: %u/%u/%s", major, minor, rest);
+							/* We cannot detect LAME 3.95 reliably (same version string as 3.95.1), so this is a blind spot.
+							   Everything < 3.95 is safe, though. */
+							if(major < 3 || (major == 3 && minor < 95))  /* || (major == 3 && minor == 95 && rest[0] == 0)) */
+							{
+								gain_offset = 6;
+								if(VERBOSE3) fprintf(stderr, "Note: Info: Old LAME detected, using ReplayGain preamp of %f dB.\n", gain_offset);
+							}
+						}
+						else if(VERBOSE3) fprintf(stderr, "Note: Info: Cannot determine LAME version.\n");
+					}
+					lame_offset += 9;
+					/* the 4 big bits are tag revision, the small bits vbr method */
+					lame_vbr = fr->bsbuf[lame_offset] & 15;
+					if(VERBOSE3)
+					{
+						fprintf(stderr, "Note: Info: rev %u\n", fr->bsbuf[lame_offset] >> 4);
+						fprintf(stderr, "Note: Info: vbr mode %u\n", lame_vbr);
+					}
+					lame_offset += 1;
+					switch(lame_vbr)
+					{
+						/* from rev1 proposal... not sure if all good in practice */
+						case 1:
+						case 8: fr->vbr = MPG123_CBR; break;
+						case 2:
+						case 9: fr->vbr = MPG123_ABR; break;
+						default: fr->vbr = MPG123_VBR; /* 00==unknown is taken as VBR */
+					}
+					/* skipping: lowpass filter value */
+					lame_offset += 1;
+					/* replaygain */
+					/* 32bit float: peak amplitude -- why did I parse it as int before??*/
+					/* Ah, yes, lame seems to store it as int since some day in 2003; I've only seen zeros anyway until now, bah! */
+					if
+					(
+							 (fr->bsbuf[lame_offset] != 0)
+						|| (fr->bsbuf[lame_offset+1] != 0)
+						|| (fr->bsbuf[lame_offset+2] != 0)
+						|| (fr->bsbuf[lame_offset+3] != 0)
+					)
+					{
+						debug("Wow! Is there _really_ a non-zero peak value? Now is it stored as float or int - how should I know?");
+						/* byte*peak_bytes = (byte*) &peak;
+						... endianess ... just copy bytes to avoid floating point operation on unaligned memory?
+						peak_bytes[0] = ...
+						peak = *(float*) (fr->bsbuf+lame_offset); */
+					}
+					if(VERBOSE3) fprintf(stderr, "Note: Info: peak = %f (I won't use this)\n", peak);
+					peak = 0; /* until better times arrived */
+					lame_offset += 4;
+					/*
+						ReplayGain values - lame only writes radio mode gain...
+						16bit gain, 3 bits name, 3 bits originator, sign (1=-, 0=+), dB value*10 in 9 bits (fixed point)
+						ignore the setting if name or originator == 000!
+						radio 0 0 1 0 1 1 1 0 0 1 1 1 1 1 0 1
+						audiophile 0 1 0 0 1 0 0 0 0 0 0 1 0 1 0 0
+					*/
+
+					for(i =0; i < 2; ++i)
+					{
+						unsigned char origin = (fr->bsbuf[lame_offset] >> 2) & 0x7; /* the 3 bits after that... */
+						if(origin != 0)
+						{
+							unsigned char gt = fr->bsbuf[lame_offset] >> 5; /* only first 3 bits */
+							if(gt == 1) gt = 0; /* radio */
+							else if(gt == 2) gt = 1; /* audiophile */
+							else continue;
+							/* get the 9 bits into a number, divide by 10, multiply sign... happy bit banging */
+							replay_gain[gt] = (float) ((fr->bsbuf[lame_offset] & 0x2) ? -0.1 : 0.1) * (make_short(fr->bsbuf, lame_offset) & 0x1ff);
+							/* If this is an automatic value from LAME (or whatever), the automatic gain offset applies.
+							   If a user or whoever set the value, do not touch it! 011 is automatic origin. */
+							if(origin == 3) replay_gain[gt] += gain_offset;
+						}
+						lame_offset += 2;
+					}
+					if(VERBOSE3) 
+					{
+						fprintf(stderr, "Note: Info: Radio Gain = %03.1fdB\n", replay_gain[0]);
+						fprintf(stderr, "Note: Info: Audiophile Gain = %03.1fdB\n", replay_gain[1]);
+					}
+					for(i=0; i < 2; ++i)
+					{
+						if(fr->rva.level[i] <= 0)
+						{
+							fr->rva.peak[i] = 0; /* at some time the parsed peak should be used */
+							fr->rva.gain[i] = replay_gain[i];
+							fr->rva.level[i] = 0;
+						}
+					}
+					lame_offset += 1; /* skipping encoding flags byte */
+					if(fr->vbr == MPG123_ABR)
+					{
+						fr->abr_rate = fr->bsbuf[lame_offset];
+						if(VERBOSE3) fprintf(stderr, "Note: Info: ABR rate = %u\n", fr->abr_rate);
+					}
+					lame_offset += 1;
+					/* encoder delay and padding, two 12 bit values... lame does write them from int ...*/
+					if(VERBOSE3)
+					fprintf(stderr, "Note: Encoder delay = %i; padding = %i\n",
+					        ((((int) fr->bsbuf[lame_offset]) << 4) | (((int) fr->bsbuf[lame_offset+1]) >> 4)),
+					        (((((int) fr->bsbuf[lame_offset+1]) << 8) | (((int) fr->bsbuf[lame_offset+2]))) & 0xfff) );
+					#ifdef GAPLESS
+					if(fr->p.flags & MPG123_GAPLESS)
+					{
+						off_t skipbegin = ((((int) fr->bsbuf[lame_offset]) << 4) | (((int) fr->bsbuf[lame_offset+1]) >> 4));
+						off_t skipend = (((((int) fr->bsbuf[lame_offset+1]) << 8) | (((int) fr->bsbuf[lame_offset+2]))) & 0xfff);
+						frame_gapless_init(fr, fr->track_frames, skipbegin, skipend);
+					}
+					#endif
+				}
+				/* switch buffer back ... */
+				fr->bsbuf = fr->bsspace[fr->bsnum]+512;
+				fr->bsnum = (fr->bsnum + 1) & 1;
+				return 1; /* got it! */
+			}
+		}
+	}
+	return 0; /* no lame tag */
+}
+
+/* Just tell if the header is some mono. */
+static int header_mono(unsigned long newhead)
+{
+	return HDR_CHANNEL_VAL(newhead) == MPG_MD_MONO ? TRUE : FALSE;
+}
+
+/* true if the two headers will work with the same decoding routines */
+static int head_compatible(unsigned long fred, unsigned long bret)
+{
+	return ( (fred & HDR_CMPMASK) == (bret & HDR_CMPMASK)
+		&&       header_mono(fred) == header_mono(bret)    );
+}
+
+static void halfspeed_prepare(mpg123_handle *fr)
+{
+	/* save for repetition */
+	if(fr->p.halfspeed && fr->lay == 3)
+	{
+		debug("halfspeed - reusing old bsbuf ");
+		memcpy (fr->ssave, fr->bsbuf, fr->ssize);
+	}
+}
+
+/* If this returns 1, the next frame is the repetition. */
+static int halfspeed_do(mpg123_handle *fr)
+{
+	/* Speed-down hack: Play it again, Sam (the frame, I mean). */
+	if (fr->p.halfspeed) 
+	{
+		if(fr->halfphase) /* repeat last frame */
+		{
+			debug("repeat!");
+			fr->to_decode = fr->to_ignore = TRUE;
+			--fr->halfphase;
+			fr->bitindex = 0;
+			fr->wordpointer = (unsigned char *) fr->bsbuf;
+			if(fr->lay == 3) memcpy (fr->bsbuf, fr->ssave, fr->ssize);
+			if(fr->error_protection) fr->crc = getbits(fr, 16); /* skip crc */
+			return 1;
+		}
+		else
+		{
+			fr->halfphase = fr->p.halfspeed - 1;
+		}
+	}
+	return 0;
+}
+
+/* 
+	Temporary macro until we got this worked out.
+	Idea is to filter out special return values that shall trigger direct jumps to end / resync / read again. 
+	Particularily, the generic ret==PARSE_BAD==0 and ret==PARSE_GOOD==1 are not affected.
+*/
+#define JUMP_CONCLUSION(ret) \
+{ \
+if(ret < 0){ debug1("%s", ret == MPG123_NEED_MORE ? "need more" : "read error"); goto read_frame_bad; } \
+else if(ret == PARSE_AGAIN) goto read_again; \
+else if(ret == PARSE_RESYNC) goto init_resync; \
+else if(ret == PARSE_END){ ret=0; goto read_frame_bad; } \
+}
+
+/*
+	That's a big one: read the next frame. 1 is success, <= 0 is some error
+	Special error READER_MORE means: Please feed more data and try again.
+*/
+int read_frame(mpg123_handle *fr)
+{
+	/* TODO: rework this thing */
+	int freeformat_count = 0;
+	unsigned long newhead;
+	off_t framepos;
+	int ret;
+	/* stuff that needs resetting if complete frame reading fails */
+	int oldsize  = fr->framesize;
+	int oldphase = fr->halfphase;
+
+	/* The counter for the search-first-header loop.
+	   It is persistent outside the loop to prevent seemingly endless loops
+	   when repeatedly headers are found that do not have valid followup headers. */
+	long headcount = 0;
+
+	fr->fsizeold=fr->framesize;       /* for Layer3 */
+
+	if(halfspeed_do(fr) == 1) return 1;
+
+read_again:
+	/* In case we are looping to find a valid frame, discard any buffered data before the current position.
+	   This is essential to prevent endless looping, always going back to the beginning when feeder buffer is exhausted. */
+	if(fr->rd->forget != NULL) fr->rd->forget(fr);
+
+	debug2("trying to get frame %"OFF_P" at %"OFF_P, (off_p)fr->num+1, (off_p)fr->rd->tell(fr));
+	if((ret = fr->rd->head_read(fr,&newhead)) <= 0){ debug1("need more? (%i)", ret); goto read_frame_bad;}
+
+init_resync:
+
+#ifdef SKIP_JUNK
+	if(!fr->firsthead && !head_check(newhead))
+	{
+		ret = skip_junk(fr, &newhead, &headcount);
+		JUMP_CONCLUSION(ret);
+	}
+#endif
+
+	ret = head_check(newhead);
+	if(ret) ret = decode_header(fr, newhead, &freeformat_count);
+
+	JUMP_CONCLUSION(ret); /* That only continues for ret == PARSE_BAD or PARSE_GOOD. */
+	if(ret == PARSE_BAD)
+	{ /* Header was not good. */
+		ret = wetwork(fr, &newhead); /* Messy stuff, handle junk, resync ... */
+		JUMP_CONCLUSION(ret);
+		/* Normally, we jumped already. If for some reason everything's fine to continue, do continue. */
+		if(ret != PARSE_GOOD) goto read_frame_bad;
+	}
+
+	if(!fr->firsthead)
+	{
+		ret = do_readahead(fr, newhead);
+		/* readahead can fail mit NEED_MORE, in which case we must also make the just read header available again for next go */
+		if(ret < 0) fr->rd->back_bytes(fr, 4);
+		JUMP_CONCLUSION(ret);
+	}
+
+	/* Now we should have our valid header and proceed to reading the frame. */
+
+	/* if filepos is invalid, so is framepos */
+	framepos = fr->rd->tell(fr) - 4;
+	/* flip/init buffer for Layer 3 */
+	{
+		unsigned char *newbuf = fr->bsspace[fr->bsnum]+512;
+		/* read main data into memory */
+		if((ret=fr->rd->read_frame_body(fr,newbuf,fr->framesize))<0)
+		{
+			/* if failed: flip back */
+			debug("need more?");
+			goto read_frame_bad;
+		}
+		fr->bsbufold = fr->bsbuf;
+		fr->bsbuf = newbuf;
+	}
+	fr->bsnum = (fr->bsnum + 1) & 1;
+
+	if(!fr->firsthead)
+	{
+		fr->firsthead = newhead; /* _now_ it's time to store it... the first real header */
+		/* This is the first header of our current stream segment.
+		   It is only the actual first header of the whole stream when fr->num is still below zero!
+		   Think of resyncs where firsthead has been reset for format flexibility. */
+		if(fr->num < 0)
+		{
+			fr->audio_start = framepos;
+			/* Only check for LAME  tag at beginning of whole stream
+			   ... when there indeed is one in between, it's the user's problem. */
+			if(fr->lay == 3 && check_lame_tag(fr) == 1)
+			{ /* ...in practice, Xing/LAME tags are layer 3 only. */
+				if(fr->rd->forget != NULL) fr->rd->forget(fr);
+
+				fr->oldhead = 0;
+				goto read_again;
+			}
+			/* now adjust volume */
+			do_rva(fr);
+		}
+
+		debug2("fr->firsthead: %08lx, audio_start: %li", fr->firsthead, (long int)fr->audio_start);
+	}
+
+  fr->bitindex = 0;
+  fr->wordpointer = (unsigned char *) fr->bsbuf;
+	/* Question: How bad does the floating point value get with repeated recomputation?
+	   Also, considering that we can play the file or parts of many times. */
+	if(++fr->mean_frames != 0)
+	{
+		fr->mean_framesize = ((fr->mean_frames-1)*fr->mean_framesize+compute_bpf(fr)) / fr->mean_frames ;
+	}
+	++fr->num; /* 0 for first frame! */
+	debug4("Frame %"OFF_P" %08lx %i, next filepos=%"OFF_P, 
+	(off_p)fr->num, newhead, fr->framesize, (off_p)fr->rd->tell(fr));
+	if(!(fr->state_flags & FRAME_FRANKENSTEIN) && (
+		(fr->track_frames > 0 && fr->num >= fr->track_frames)
+#ifdef GAPLESS
+		|| (fr->gapless_frames > 0 && fr->num >= fr->gapless_frames)
+#endif
+	))
+	{
+		fr->state_flags |= FRAME_FRANKENSTEIN;
+		if(NOQUIET) fprintf(stderr, "\nWarning: Encountered more data after announced end of track (frame %"OFF_P"/%"OFF_P"). Frankenstein!\n", (off_p)fr->num, 
+#ifdef GAPLESS
+		fr->gapless_frames > 0 ? (off_p)fr->gapless_frames : 
+#endif
+		(off_p)fr->track_frames);
+	}
+
+	halfspeed_prepare(fr);
+
+	/* index the position */
+	fr->input_offset = framepos;
+#ifdef FRAME_INDEX
+	/* Keep track of true frame positions in our frame index.
+	   but only do so when we are sure that the frame number is accurate... */
+	if((fr->state_flags & FRAME_ACCURATE) && FI_NEXT(fr->index, fr->num))
+	fi_add(&fr->index, framepos);
+#endif
+
+	if(fr->silent_resync > 0) --fr->silent_resync;
+
+	if(fr->rd->forget != NULL) fr->rd->forget(fr);
+
+	fr->to_decode = fr->to_ignore = TRUE;
+	if(fr->error_protection) fr->crc = getbits(fr, 16); /* skip crc */
+
+	/*
+		Let's check for header change after deciding that the new one is good
+		and actually having read a frame.
+
+		header_change > 1: decoder structure has to be updated
+		Preserve header_change value from previous runs if it is serious.
+		If we still have a big change pending, it should be dealt with outside,
+		fr->header_change set to zero afterwards.
+	*/
+	if(fr->header_change < 2)
+	{
+		fr->header_change = 2; /* output format change is possible... */
+		if(fr->oldhead)        /* check a following header for change */
+		{
+			if(fr->oldhead == newhead) fr->header_change = 0;
+			else
+			/* Headers that match in this test behave the same for the outside world.
+			   namely: same decoding routines, same amount of decoded data. */
+			if(head_compatible(fr->oldhead, newhead))
+			fr->header_change = 1;
+			else
+			{
+				fr->state_flags |= FRAME_FRANKENSTEIN;
+				if(NOQUIET)
+				fprintf(stderr, "\nWarning: Big change (MPEG version, layer, rate). Frankenstein stream?\n");
+			}
+		}
+		else if(fr->firsthead && !head_compatible(fr->firsthead, newhead))
+		{
+			fr->state_flags |= FRAME_FRANKENSTEIN;
+			if(NOQUIET)
+			fprintf(stderr, "\nWarning: Big change from first (MPEG version, layer, rate). Frankenstein stream?\n");
+		}
+	}
+
+	fr->oldhead = newhead;
+
+	return 1;
+read_frame_bad:
+	/* Also if we searched for valid data in vein, we can forget skipped data.
+	   Otherwise, the feeder would hold every dead old byte in memory until the first valid frame! */
+	if(fr->rd->forget != NULL) fr->rd->forget(fr);
+
+	fr->silent_resync = 0;
+	if(fr->err == MPG123_OK) fr->err = MPG123_ERR_READER;
+	fr->framesize = oldsize;
+	fr->halfphase = oldphase;
+	/* That return code might be inherited from some feeder action, or reader error. */
+	return ret;
+}
+
+
+/*
+ * read ahead and find the next MPEG header, to guess framesize
+ * return value: success code
+ *  PARSE_GOOD: found a valid frame size (stored in the handle).
+ * <0: error codes, possibly from feeder buffer (NEED_MORE)
+ *  PARSE_BAD: cannot get the framesize for some reason and shall silentry try the next possible header (if this is no free format stream after all...)
+ */
+static int guess_freeformat_framesize(mpg123_handle *fr)
+{
+	long i;
+	int ret;
+	unsigned long head;
+	if(!(fr->rdat.flags & (READER_SEEKABLE|READER_BUFFERED)))
+	{
+		if(NOQUIET) error("Cannot look for freeformat frame size with non-seekable and non-buffered stream!");
+
+		return PARSE_BAD;
+	}
+	if((ret=fr->rd->head_read(fr,&head))<=0)
+	return ret;
+
+	/* We are already 4 bytes into it */
+	for(i=4;i<MAXFRAMESIZE+4;i++)
+	{
+		if((ret=fr->rd->head_shift(fr,&head))<=0) return ret;
+
+		/* No head_check needed, the mask contains all relevant bits. */
+		if((head & HDR_SAMEMASK) == (fr->oldhead & HDR_SAMEMASK))
+		{
+			fr->rd->back_bytes(fr,i+1);
+			fr->framesize = i-3;
+			return PARSE_GOOD; /* Success! */
+		}
+	}
+	fr->rd->back_bytes(fr,i);
+	return PARSE_BAD;
+}
+
+
+/*
+ * decode a header and write the information
+ * into the frame structure
+ * Return values are compatible with those of read_frame, namely:
+ *  1: success
+ *  0: no valid header
+ * <0: some error
+ * You are required to do a head_check() before calling!
+ */
+static int decode_header(mpg123_handle *fr,unsigned long newhead, int *freeformat_count)
+{
+#ifdef DEBUG /* Do not waste cycles checking the header twice all the time. */
+	if(!head_check(newhead))
+	{
+		error1("trying to decode obviously invalid header 0x%08lx", newhead);
+	}
+#endif
+	/* For some reason, the layer and sampling freq settings used to be wrapped
+	   in a weird conditional including MPG123_NO_RESYNC. What was I thinking?
+	   This information has to be consistent. */
+	fr->lay = 4 - HDR_LAYER_VAL(newhead);
+
+	if(HDR_VERSION_VAL(newhead) & 0x2)
+	{
+		fr->lsf = (HDR_VERSION_VAL(newhead) & 0x1) ? 0 : 1;
+		fr->mpeg25 = 0;
+		fr->sampling_frequency = HDR_SAMPLERATE_VAL(newhead) + (fr->lsf*3);
+	}
+	else
+	{
+		fr->lsf = 1;
+		fr->mpeg25 = 1;
+		fr->sampling_frequency = 6 + HDR_SAMPLERATE_VAL(newhead);
+	}
+
+	#ifdef DEBUG
+	/* seen a file where this varies (old lame tag without crc, track with crc) */
+	if((HDR_CRC_VAL(newhead)^0x1) != fr->error_protection) debug("changed crc bit!");
+	#endif
+	fr->error_protection = HDR_CRC_VAL(newhead)^0x1;
+	fr->bitrate_index    = HDR_BITRATE_VAL(newhead);
+	fr->padding          = HDR_PADDING_VAL(newhead);
+	fr->extension        = HDR_PRIVATE_VAL(newhead);
+	fr->mode             = HDR_CHANNEL_VAL(newhead);
+	fr->mode_ext         = HDR_CHANEX_VAL(newhead);
+	fr->copyright        = HDR_COPYRIGHT_VAL(newhead);
+	fr->original         = HDR_ORIGINAL_VAL(newhead);
+	fr->emphasis         = HDR_EMPHASIS_VAL(newhead);
+	fr->freeformat       = !(newhead & HDR_BITRATE);
+
+	fr->stereo = (fr->mode == MPG_MD_MONO) ? 1 : 2;
+
+	/* we can't use tabsel_123 for freeformat, so trying to guess framesize... */
+	if(fr->freeformat)
+	{
+		/* when we first encounter the frame with freeformat, guess framesize */
+		if(fr->freeformat_framesize < 0)
+		{
+			int ret;
+			*freeformat_count += 1;
+			if(*freeformat_count > 5)
+			{
+				if(VERBOSE3) error("You fooled me too often. Refusing to guess free format frame size _again_.");
+				return PARSE_BAD;
+			}
+			ret = guess_freeformat_framesize(fr);
+			if(ret == PARSE_GOOD)
+			{
+				fr->freeformat_framesize = fr->framesize - fr->padding;
+				if(VERBOSE2)
+				fprintf(stderr, "Note: free format frame size %li\n", fr->freeformat_framesize);
+			}
+			else
+			{
+				if(ret == MPG123_NEED_MORE)
+				debug("Need more data to guess free format frame size.");
+				else if(VERBOSE3)
+				error("Encountered free format header, but failed to guess frame size.");
+
+				return ret;
+			}
+		}
+		/* freeformat should be CBR, so the same framesize can be used at the 2nd reading or later */
+		else
+		{
+			fr->framesize = fr->freeformat_framesize + fr->padding;
+		}
+	}
+
+	switch(fr->lay)
+	{
+#ifndef NO_LAYER1
+		case 1:
+			fr->spf = 384;
+			fr->do_layer = do_layer1;
+			if(!fr->freeformat)
+			{
+				fr->framesize  = (long) tabsel_123[fr->lsf][0][fr->bitrate_index] * 12000;
+				fr->framesize /= freqs[fr->sampling_frequency];
+				fr->framesize  = ((fr->framesize+fr->padding)<<2)-4;
+			}
+		break;
+#endif
+#ifndef NO_LAYER2
+		case 2:
+			fr->spf = 1152;
+			fr->do_layer = do_layer2;
+			if(!fr->freeformat)
+			{
+				debug2("bitrate index: %i (%i)", fr->bitrate_index, tabsel_123[fr->lsf][1][fr->bitrate_index] );
+				fr->framesize = (long) tabsel_123[fr->lsf][1][fr->bitrate_index] * 144000;
+				fr->framesize /= freqs[fr->sampling_frequency];
+				fr->framesize += fr->padding - 4;
+			}
+		break;
+#endif
+#ifndef NO_LAYER3
+		case 3:
+			fr->spf = fr->lsf ? 576 : 1152; /* MPEG 2.5 implies LSF.*/
+			fr->do_layer = do_layer3;
+			if(fr->lsf)
+			fr->ssize = (fr->stereo == 1) ? 9 : 17;
+			else
+			fr->ssize = (fr->stereo == 1) ? 17 : 32;
+
+			if(fr->error_protection)
+			fr->ssize += 2;
+
+			if(!fr->freeformat)
+			{
+				fr->framesize  = (long) tabsel_123[fr->lsf][2][fr->bitrate_index] * 144000;
+				fr->framesize /= freqs[fr->sampling_frequency]<<(fr->lsf);
+				fr->framesize = fr->framesize + fr->padding - 4;
+			}
+		break;
+#endif 
+		default:
+			if(NOQUIET) error1("Layer type %i not supported in this build!", fr->lay); 
+
+			return PARSE_BAD;
+	}
+	if (fr->framesize > MAXFRAMESIZE)
+	{
+		if(NOQUIET) error1("Frame size too big: %d", fr->framesize+4-fr->padding);
+
+		return PARSE_BAD;
+	}
+	return PARSE_GOOD;
+}
+
+void set_pointer(mpg123_handle *fr, long backstep)
+{
+	fr->wordpointer = fr->bsbuf + fr->ssize - backstep;
+	if (backstep)
+	memcpy(fr->wordpointer,fr->bsbufold+fr->fsizeold-backstep,backstep);
+
+	fr->bitindex = 0; 
+}
+
+/********************************/
+
+double compute_bpf(mpg123_handle *fr)
+{
+	double bpf;
+
+	switch(fr->lay) 
+	{
+		case 1:
+			bpf = tabsel_123[fr->lsf][0][fr->bitrate_index];
+			bpf *= 12000.0 * 4.0;
+			bpf /= freqs[fr->sampling_frequency] <<(fr->lsf);
+		break;
+		case 2:
+		case 3:
+			bpf = tabsel_123[fr->lsf][fr->lay-1][fr->bitrate_index];
+			bpf *= 144000;
+			bpf /= freqs[fr->sampling_frequency] << (fr->lsf);
+		break;
+		default:
+			bpf = 1.0;
+	}
+
+	return bpf;
+}
+
+int attribute_align_arg mpg123_spf(mpg123_handle *mh)
+{
+	if(mh == NULL) return MPG123_ERR;
+
+	return mh->firsthead ? mh->spf : MPG123_ERR;
+}
+
+double attribute_align_arg mpg123_tpf(mpg123_handle *fr)
+{
+	static int bs[4] = { 0,384,1152,1152 };
+	double tpf;
+	if(fr == NULL || !fr->firsthead) return -1;
+
+	tpf = (double) bs[fr->lay];
+	tpf /= freqs[fr->sampling_frequency] << (fr->lsf);
+	return tpf;
+}
+
+int attribute_align_arg mpg123_position(mpg123_handle *fr, off_t no, off_t buffsize,
+	off_t  *current_frame,   off_t  *frames_left,
+	double *current_seconds, double *seconds_left)
+{
+	double tpf;
+	double dt = 0.0;
+	off_t cur, left;
+	double curs, lefts;
+
+	if(!fr || !fr->rd) /* Isn't this too paranoid? */
+	{
+		debug("reader troubles!");
+		return MPG123_ERR;
+	}
+
+	no += fr->num; /* no starts out as offset */
+	cur = no;
+	tpf = mpg123_tpf(fr);
+	if(buffsize > 0 && fr->af.rate > 0 && fr->af.channels > 0)
+	{
+		dt = (double) buffsize / fr->af.rate / fr->af.channels;
+		if(fr->af.encoding & MPG123_ENC_16) dt *= 0.5;
+	}
+
+	left = 0;
+
+	if((fr->track_frames != 0) && (fr->track_frames >= fr->num)) left = no < fr->track_frames ? fr->track_frames - no : 0;
+	else
+	if(fr->rdat.filelen >= 0)
+	{
+		double bpf;
+		off_t t = fr->rd->tell(fr);
+		bpf = fr->mean_framesize ? fr->mean_framesize : compute_bpf(fr);
+		left = (off_t)((double)(fr->rdat.filelen-t)/bpf);
+		/* no can be different for prophetic purposes, file pointer is always associated with fr->num! */
+		if(fr->num != no)
+		{
+			if(fr->num > no) left += fr->num - no;
+			else
+			{
+				if(left >= (no - fr->num)) left -= no - fr->num;
+				else left = 0; /* uh, oh! */
+			}
+		}
+		/* I totally don't understand why we should re-estimate the given correct(?) value */
+		/* fr->num = (unsigned long)((double)t/bpf); */
+	}
+
+	/* beginning with 0 or 1?*/
+	curs = (double) no*tpf-dt;
+	lefts = (double)left*tpf+dt;
+#if 0
+	curs = curs < 0 ? 0.0 : curs;
+#endif
+	if(left < 0 || lefts < 0)
+	{ /* That is the case for non-seekable streams. */
+		left  = 0;
+		lefts = 0.0;
+	}
+	if(current_frame != NULL) *current_frame = cur;
+	if(frames_left   != NULL) *frames_left   = left;
+	if(current_seconds != NULL) *current_seconds = curs;
+	if(seconds_left    != NULL) *seconds_left   = lefts;
+	return MPG123_OK;
+}
+
+int get_songlen(mpg123_handle *fr,int no)
+{
+	double tpf;
+	
+	if(!fr)
+		return 0;
+	
+	if(no < 0) {
+		if(!fr->rd || fr->rdat.filelen < 0)
+			return 0;
+		no = (int) ((double) fr->rdat.filelen / compute_bpf(fr));
+	}
+
+	tpf = mpg123_tpf(fr);
+	return (int) (no*tpf);
+}
+
+/* first attempt of read ahead check to find the real first header; cannot believe what junk is out there! */
+static int do_readahead(mpg123_handle *fr, unsigned long newhead)
+{
+	unsigned long nexthead = 0;
+	int hd = 0;
+	off_t start, oret;
+	int ret;
+
+	if( ! (!fr->firsthead && fr->rdat.flags & (READER_SEEKABLE|READER_BUFFERED)) )
+	return PARSE_GOOD;
+
+	start = fr->rd->tell(fr);
+
+	debug2("doing ahead check with BPF %d at %"OFF_P, fr->framesize+4, (off_p)start);
+	/* step framesize bytes forward and read next possible header*/
+	if((oret=fr->rd->skip_bytes(fr, fr->framesize))<0)
+	{
+		if(oret==READER_ERROR && NOQUIET) error("cannot seek!");
+
+		return oret == MPG123_NEED_MORE ? PARSE_MORE : PARSE_ERR;
+	}
+
+	/* Read header, seek back. */
+	hd = fr->rd->head_read(fr,&nexthead);
+	if( fr->rd->back_bytes(fr, fr->rd->tell(fr)-start) < 0 )
+	{
+		if(NOQUIET) error("Cannot seek back!");
+
+		return PARSE_ERR;
+	}
+	if(hd == MPG123_NEED_MORE) return PARSE_MORE;
+
+	debug1("After fetching next header, at %"OFF_P, (off_p)fr->rd->tell(fr));
+	if(!hd)
+	{
+		if(NOQUIET) warning("Cannot read next header, a one-frame stream? Duh...");
+		return PARSE_END;
+	}
+
+	debug2("does next header 0x%08lx match first 0x%08lx?", nexthead, newhead);
+	if(!head_check(nexthead) || !head_compatible(newhead, nexthead))
+	{
+		debug("No, the header was not valid, start from beginning...");
+		fr->oldhead = 0; /* start over */
+		/* try next byte for valid header */
+		if((ret=fr->rd->back_bytes(fr, 3))<0)
+		{
+			if(NOQUIET) error("Cannot seek 3 bytes back!");
+
+			return PARSE_ERR;
+		}
+		return PARSE_AGAIN;
+	}
+	else return PARSE_GOOD;
+}
+
+static int handle_id3v2(mpg123_handle *fr, unsigned long newhead)
+{
+	int ret;
+	fr->oldhead = 0; /* Think about that. Used to be present only for skipping of junk, not resync-style wetwork. */
+	ret = parse_new_id3(fr, newhead);
+	if     (ret < 0) return ret;
+#ifndef NO_ID3V2
+	else if(ret > 0){ debug("got ID3v2"); fr->metaflags  |= MPG123_NEW_ID3|MPG123_ID3; }
+	else debug("no useful ID3v2");
+#endif
+	return PARSE_AGAIN;
+}
+
+/* Advance a byte in stream to get next possible header and forget 
+   buffered data if possible (for feed reader). */
+#define FORGET_INTERVAL 1024 /* Used by callers to set forget flag each <n> bytes. */
+static int forget_head_shift(mpg123_handle *fr, unsigned long *newheadp, int forget)
+{
+	int ret;
+	if((ret=fr->rd->head_shift(fr,newheadp))<=0) return ret;
+	/* Try to forget buffered data as early as possible to speed up parsing where
+	   new data needs to be added for resync (and things would be re-parsed again
+	   and again because of the start from beginning after hitting end). */
+	if(forget && fr->rd->forget != NULL)
+	{
+		/* Ensure that the last 4 bytes stay in buffers for reading the header
+		   anew. */
+		if(!fr->rd->back_bytes(fr, 4))
+		{
+			fr->rd->forget(fr);
+			fr->rd->back_bytes(fr, -4);
+		}
+	}
+	return ret; /* No surprise here, error already triggered early return. */
+}
+
+/* watch out for junk/tags on beginning of stream by invalid header */
+static int skip_junk(mpg123_handle *fr, unsigned long *newheadp, long *headcount)
+{
+	int ret;
+	int freeformat_count = 0;
+	long limit = 65536;
+	unsigned long newhead = *newheadp;
+	unsigned int forgetcount = 0;
+	/* check for id3v2; first three bytes (of 4) are "ID3" */
+	if((newhead & (unsigned long) 0xffffff00) == (unsigned long) 0x49443300)
+	{
+		return handle_id3v2(fr, newhead);
+	}
+	else if(VERBOSE2 && fr->silent_resync == 0) fprintf(stderr,"Note: Junk at the beginning (0x%08lx)\n",newhead);
+
+	/* I even saw RIFF headers at the beginning of MPEG streams ;( */
+	if(newhead == ('R'<<24)+('I'<<16)+('F'<<8)+'F')
+	{
+		if(VERBOSE2 && fr->silent_resync == 0) fprintf(stderr, "Note: Looks like a RIFF header.\n");
+
+		if((ret=fr->rd->head_read(fr,&newhead))<=0) return ret;
+
+		while(newhead != ('d'<<24)+('a'<<16)+('t'<<8)+'a')
+		{
+			if(++forgetcount > FORGET_INTERVAL) forgetcount = 0;
+			if((ret=forget_head_shift(fr,&newhead,!forgetcount))<=0) return ret;
+		}
+		if((ret=fr->rd->head_read(fr,&newhead))<=0) return ret;
+
+		if(VERBOSE2 && fr->silent_resync == 0) fprintf(stderr,"Note: Skipped RIFF header!\n");
+
+		fr->oldhead = 0;
+		*newheadp = newhead;
+		return PARSE_AGAIN;
+	}
+
+	/*
+		Unhandled junk... just continue search for a header, stepping in single bytes through next 64K.
+		This is rather identical to the resync loop.
+	*/
+	debug("searching for header...");
+	*newheadp = 0; /* Invalidate the external value. */
+	ret = 0; /* We will check the value after the loop. */
+
+	/* We prepare for at least the 64K bytes as usual, unless
+	   user explicitly wanted more (even infinity). Never less. */
+	if(fr->p.resync_limit < 0 || fr->p.resync_limit > limit)
+	limit = fr->p.resync_limit;
+
+	do
+	{
+		++(*headcount);
+		if(limit >= 0 && *headcount >= limit) break;				
+
+		if(++forgetcount > FORGET_INTERVAL) forgetcount = 0;
+		if((ret=forget_head_shift(fr, &newhead, !forgetcount))<=0) return ret;
+
+		if(head_check(newhead) && (ret=decode_header(fr, newhead, &freeformat_count))) break;
+	} while(1);
+	if(ret<0) return ret;
+
+	if(limit >= 0 && *headcount >= limit)
+	{
+		if(NOQUIET) error1("Giving up searching valid MPEG header after %li bytes of junk.", *headcount);
+		return PARSE_END;
+	}
+	else debug1("hopefully found one at %"OFF_P, (off_p)fr->rd->tell(fr));
+
+	/* If the new header ist good, it is already decoded. */
+	*newheadp = newhead;
+	return PARSE_GOOD;
+}
+
+/* The newhead is bad, so let's check if it is something special, otherwise just resync. */
+static int wetwork(mpg123_handle *fr, unsigned long *newheadp)
+{
+	int ret = PARSE_ERR;
+	unsigned long newhead = *newheadp;
+	*newheadp = 0;
+
+	/* Classic ID3 tags. Read, then start parsing again. */
+	if((newhead & 0xffffff00) == ('T'<<24)+('A'<<16)+('G'<<8))
+	{
+		fr->id3buf[0] = (unsigned char) ((newhead >> 24) & 0xff);
+		fr->id3buf[1] = (unsigned char) ((newhead >> 16) & 0xff);
+		fr->id3buf[2] = (unsigned char) ((newhead >> 8)  & 0xff);
+		fr->id3buf[3] = (unsigned char) ( newhead        & 0xff);
+
+		if((ret=fr->rd->fullread(fr,fr->id3buf+4,124)) < 0) return ret;
+
+		fr->metaflags  |= MPG123_NEW_ID3|MPG123_ID3;
+		fr->rdat.flags |= READER_ID3TAG; /* that marks id3v1 */
+		if(VERBOSE3) fprintf(stderr,"Note: Skipped ID3v1 tag.\n");
+
+		return PARSE_AGAIN;
+	}
+	/* This is similar to initial junk skipping code... */
+	/* Check for id3v2; first three bytes (of 4) are "ID3" */
+	if((newhead & (unsigned long) 0xffffff00) == (unsigned long) 0x49443300)
+	{
+		return handle_id3v2(fr, newhead);
+	}
+	else if(NOQUIET && fr->silent_resync == 0)
+	{
+		fprintf(stderr,"Note: Illegal Audio-MPEG-Header 0x%08lx at offset %"OFF_P".\n",
+			newhead, (off_p)fr->rd->tell(fr)-4);
+	}
+
+	/* Now we got something bad at hand, try to recover. */
+
+	if(NOQUIET && (newhead & 0xffffff00) == ('b'<<24)+('m'<<16)+('p'<<8)) fprintf(stderr,"Note: Could be a BMP album art.\n");
+
+	if( !(fr->p.flags & MPG123_NO_RESYNC) )
+	{
+		long try = 0;
+		long limit = fr->p.resync_limit;
+		unsigned int forgetcount = 0;
+
+		/* If a resync is needed the bitreservoir of previous frames is no longer valid */
+		fr->bitreservoir = 0;
+
+		if(NOQUIET && fr->silent_resync == 0) fprintf(stderr, "Note: Trying to resync...\n");
+
+		do /* ... shift the header with additional single bytes until be found something that could be a header. */
+		{
+			++try;
+			if(limit >= 0 && try >= limit) break;				
+
+			if(++forgetcount > FORGET_INTERVAL) forgetcount = 0;
+			if((ret=forget_head_shift(fr,&newhead,!forgetcount)) <= 0)
+			{
+				*newheadp = newhead;
+				if(NOQUIET) fprintf (stderr, "Note: Hit end of (available) data during resync.\n");
+
+				return ret ? ret : PARSE_END;
+			}
+			if(VERBOSE3) debug3("resync try %li at %"OFF_P", got newhead 0x%08lx", try, (off_p)fr->rd->tell(fr),  newhead);
+		} while(!head_check(newhead));
+
+		*newheadp = newhead;
+		if(NOQUIET && fr->silent_resync == 0) fprintf (stderr, "Note: Skipped %li bytes in input.\n", try);
+
+		/* Now we either got something that could be a header, or we gave up. */
+		if(limit >= 0 && try >= limit)
+		{
+			if(NOQUIET)
+			error1("Giving up resync after %li bytes - your stream is not nice... (maybe increasing resync limit could help).", try);
+
+			fr->err = MPG123_RESYNC_FAIL;
+			return PARSE_ERR;
+		}
+		else
+		{
+			debug1("Found possibly valid header 0x%lx... unsetting oldhead to reinit stream.", newhead);
+			fr->oldhead = 0;
+			return PARSE_RESYNC;
+		}
+	}
+	else
+	{
+		if(NOQUIET) error("not attempting to resync...");
+
+		fr->err = MPG123_OUT_OF_SYNC;
+		return PARSE_ERR;
+	}
+	/* Control never goes here... we return before that. */
+}
Index: include/reactos/libs/libmpg123/reader.h
===================================================================
--- include/reactos/libs/libmpg123/reader.h	(revision 63976)
+++ include/reactos/libs/libmpg123/reader.h	(working copy)
@@ -12,26 +12,44 @@
 #include "config.h"
 #include "mpg123.h"
 
+#ifndef NO_FEEDER
 struct buffy
 {
 	unsigned char *data;
-	long size;
-	long realsize;
+	ssize_t size;
+	ssize_t realsize;
 	struct buffy *next;
 };
 
+
 struct bufferchain
 {
 	struct buffy* first; /* The beginning of the chain. */
 	struct buffy* last;  /* The end...    of the chain. */
-	long size;        /* Aggregated size of all buffies. */
+	ssize_t size;        /* Aggregated size of all buffies. */
 	/* These positions are relative to buffer chain beginning. */
-	long pos;         /* Position in whole chain. */
-	long firstpos;    /* The point of return on non-forget() */
+	ssize_t pos;         /* Position in whole chain. */
+	ssize_t firstpos;    /* The point of return on non-forget() */
 	/* The "real" filepos is fileoff + pos. */
 	off_t fileoff;       /* Beginning of chain is at this file offset. */
+	size_t bufblock;     /* Default (minimal) size of buffers. */
+	size_t pool_size;    /* Keep that many buffers in storage. */
+	size_t pool_fill;    /* That many buffers are there. */
+	/* A pool of buffers to re-use, if activated. It's a linked list that is worked on from the front. */
+	struct buffy *pool;
 };
 
+/* Call this before any buffer chain use (even bc_init()). */
+void bc_prepare(struct bufferchain *, size_t pool_size, size_t bufblock);
+/* Free persistent data in the buffer chain, after bc_reset(). */
+void bc_cleanup(struct bufferchain *);
+/* Change pool size. This does not actually allocate/free anything on itself, just instructs later operations to free less / allocate more buffers. */
+void bc_poolsize(struct bufferchain *, size_t pool_size, size_t bufblock);
+/* Return available byte count in the buffer. */
+size_t bc_fill(struct bufferchain *bc);
+
+#endif
+
 struct reader_data
 {
 	off_t filelen; /* total file length or total buffer size */
@@ -41,22 +59,24 @@
 	void *iohandle;
 	int   flags;
 	long timeout_sec;
-	long (*fdread) (mpg123_handle *, void *, size_t);
+	ssize_t (*fdread) (mpg123_handle *, void *, size_t);
 	/* User can replace the read and lseek functions. The r_* are the stored replacement functions or NULL. */
-	long (*r_read) (int fd, void *buf, size_t count);
+	ssize_t (*r_read) (int fd, void *buf, size_t count);
 	off_t   (*r_lseek)(int fd, off_t offset, int whence);
 	/* These are custom I/O routines for opaque user handles.
 	   They get picked if there's some iohandle set. */
-	long (*r_read_handle) (void *handle, void *buf, size_t count);
+	ssize_t (*r_read_handle) (void *handle, void *buf, size_t count);
 	off_t   (*r_lseek_handle)(void *handle, off_t offset, int whence);
 	/* An optional cleaner for the handle on closing the stream. */
 	void    (*cleanup_handle)(void *handle);
 	/* These two pointers are the actual workers (default map to POSIX read/lseek). */
-	long (*read) (int fd, void *buf, size_t count);
+	ssize_t (*read) (int fd, void *buf, size_t count);
 	off_t   (*lseek)(int fd, off_t offset, int whence);
 	/* Buffered readers want that abstracted, set internally. */
-	long (*fullread)(mpg123_handle *, unsigned char *, long);
+	ssize_t (*fullread)(mpg123_handle *, unsigned char *, ssize_t);
+#ifndef NO_FEEDER
 	struct bufferchain buffer; /* Not dynamically allocated, these few struct bytes aren't worth the trouble. */
+#endif
 };
 
 /* start to use off_t to properly do LFS in future ... used to be long */
@@ -64,7 +84,7 @@
 {
 	int     (*init)           (mpg123_handle *);
 	void    (*close)          (mpg123_handle *);
-	long    (*fullread)       (mpg123_handle *, unsigned char *, long);
+	ssize_t (*fullread)       (mpg123_handle *, unsigned char *, ssize_t);
 	int     (*head_read)      (mpg123_handle *, unsigned long *newhead);    /* succ: TRUE, else <= 0 (FALSE or READER_MORE) */
 	int     (*head_shift)     (mpg123_handle *, unsigned long *head);       /* succ: TRUE, else <= 0 (FALSE or READER_MORE) */
 	off_t   (*skip_bytes)     (mpg123_handle *, off_t len);                 /* succ: >=0, else error or READER_MORE         */
Index: include/reactos/libs/libmpg123/readers.c
===================================================================
--- include/reactos/libs/libmpg123/readers.c	(revision 0)
+++ include/reactos/libs/libmpg123/readers.c	(working copy)
@@ -0,0 +1,1235 @@
+/* TODO: Check all read calls (in loops, especially!) for return value 0 (EOF)! */
+
+/*
+	readers.c: reading input data
+
+	copyright ?-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp
+*/
+
+#include "mpg123lib_intern.h"
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+/* For select(), I need select.h according to POSIX 2001, else: sys/time.h sys/types.h unistd.h (the latter two included in compat.h already). */
+#ifdef HAVE_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef _MSC_VER
+#include <io.h>
+#endif
+
+#include "compat.h"
+#include "debug.h"
+
+static int default_init(mpg123_handle *fr);
+static off_t get_fileinfo(mpg123_handle *);
+static ssize_t posix_read(int fd, void *buf, size_t count){ return read(fd, buf, count); }
+static off_t   posix_lseek(int fd, off_t offset, int whence){ return lseek(fd, offset, whence); }
+static off_t     nix_lseek(int fd, off_t offset, int whence){ return -1; }
+
+static ssize_t plain_fullread(mpg123_handle *fr,unsigned char *buf, ssize_t count);
+
+/* Wrapper to decide between descriptor-based and external handle-based I/O. */
+static off_t io_seek(struct reader_data *rdat, off_t offset, int whence);
+static ssize_t io_read(struct reader_data *rdat, void *buf, size_t count);
+
+#ifndef NO_FEEDER
+/* Bufferchain methods. */
+static void bc_init(struct bufferchain *bc);
+static void bc_reset(struct bufferchain *bc);
+static int bc_append(struct bufferchain *bc, ssize_t size);
+#if 0
+static void bc_drop(struct bufferchain *bc);
+#endif
+static int bc_add(struct bufferchain *bc, const unsigned char *data, ssize_t size);
+static ssize_t bc_give(struct bufferchain *bc, unsigned char *out, ssize_t size);
+static ssize_t bc_skip(struct bufferchain *bc, ssize_t count);
+static ssize_t bc_seekback(struct bufferchain *bc, ssize_t count);
+static void bc_forget(struct bufferchain *bc);
+#endif
+
+/* A normal read and a read with timeout. */
+static ssize_t plain_read(mpg123_handle *fr, void *buf, size_t count)
+{
+	ssize_t ret = io_read(&fr->rdat, buf, count);
+	if(VERBOSE3) debug2("read %li bytes of %li", (long)ret, (long)count);
+	return ret;
+}
+
+#ifdef TIMEOUT_READ
+
+/* Wait for data becoming available, allowing soft-broken network connection to die
+   This is needed for Shoutcast servers that have forgotten about us while connection was temporarily down. */
+static ssize_t timeout_read(mpg123_handle *fr, void *buf, size_t count)
+{
+	struct timeval tv;
+	ssize_t ret = 0;
+	fd_set fds;
+	tv.tv_sec = fr->rdat.timeout_sec;
+	tv.tv_usec = 0;
+	FD_ZERO(&fds);
+	FD_SET(fr->rdat.filept, &fds);
+	ret = select(fr->rdat.filept+1, &fds, NULL, NULL, &tv);
+	/* This works only with "my" read function. Not user-replaced. */
+	if(ret > 0) ret = read(fr->rdat.filept, buf, count);
+	else
+	{
+		ret=-1; /* no activity is the error */
+		if(NOQUIET) error("stream timed out");
+	}
+	return ret;
+}
+#endif
+
+#ifndef NO_ICY
+/* stream based operation  with icy meta data*/
+static ssize_t icy_fullread(mpg123_handle *fr, unsigned char *buf, ssize_t count)
+{
+	ssize_t ret,cnt;
+	cnt = 0;
+	if(fr->rdat.flags & READER_SEEKABLE)
+	{
+		if(NOQUIET) error("mpg123 programmer error: I don't do ICY on seekable streams.");
+		return -1;
+	}
+	/*
+		There used to be a check for expected file end here (length value or ID3 flag).
+		This is not needed:
+		1. EOF is indicated by fdread returning zero bytes anyway.
+		2. We get false positives of EOF for either files that grew or
+		3. ... files that have ID3v1 tags in between (stream with intro).
+	*/
+
+	while(cnt < count)
+	{
+		/* all icy code is inside this if block, everything else is the plain fullread we know */
+		/* debug1("read: %li left", (long) count-cnt); */
+		if(fr->icy.next < count-cnt)
+		{
+			unsigned char temp_buff;
+			size_t meta_size;
+			ssize_t cut_pos;
+
+			/* we are near icy-metaint boundary, read up to the boundary */
+			if(fr->icy.next > 0)
+			{
+				cut_pos = fr->icy.next;
+				ret = fr->rdat.fdread(fr,buf+cnt,cut_pos);
+				if(ret < 1)
+				{
+					if(ret == 0) break; /* Just EOF. */
+					if(NOQUIET) error("icy boundary read");
+
+					return READER_ERROR;
+				}
+
+				if(!(fr->rdat.flags & READER_BUFFERED)) fr->rdat.filepos += ret;
+				cnt += ret;
+				fr->icy.next -= ret;
+				if(fr->icy.next > 0)
+				{
+					debug1("another try... still %li left", (long)fr->icy.next);
+					continue;
+				}
+			}
+			/* now off to read icy data */
+
+			/* one byte icy-meta size (must be multiplied by 16 to get icy-meta length) */
+			
+			ret = fr->rdat.fdread(fr,&temp_buff,1); /* Getting one single byte hast to suceed. */
+			if(ret < 0){ if(NOQUIET) error("reading icy size"); return READER_ERROR; }
+			if(ret == 0) break;
+
+			debug2("got meta-size byte: %u, at filepos %li", temp_buff, (long)fr->rdat.filepos );
+			if(!(fr->rdat.flags & READER_BUFFERED)) fr->rdat.filepos += ret; /* 1... */
+
+			if((meta_size = ((size_t) temp_buff) * 16))
+			{
+				/* we have got some metadata */
+				char *meta_buff;
+				/* TODO: Get rid of this malloc ... perhaps hooking into the reader buffer pool? */
+				meta_buff = malloc(meta_size+1);
+				if(meta_buff != NULL)
+				{
+					ssize_t left = meta_size;
+					while(left > 0)
+					{
+						ret = fr->rdat.fdread(fr,meta_buff+meta_size-left,left);
+						/* 0 is error here, too... there _must_ be the ICY data, the server promised! */
+						if(ret < 1){ if(NOQUIET) error("reading icy-meta"); return READER_ERROR; }
+						left -= ret;
+					}
+					meta_buff[meta_size] = 0; /* string paranoia */
+					if(!(fr->rdat.flags & READER_BUFFERED)) fr->rdat.filepos += ret;
+
+					if(fr->icy.data) free(fr->icy.data);
+					fr->icy.data = meta_buff;
+					fr->metaflags |= MPG123_NEW_ICY;
+					debug2("icy-meta: %s size: %d bytes", fr->icy.data, (int)meta_size);
+				}
+				else
+				{
+					if(NOQUIET) error1("cannot allocate memory for meta_buff (%lu bytes) ... trying to skip the metadata!", (unsigned long)meta_size);
+					fr->rd->skip_bytes(fr, meta_size);
+				}
+			}
+			fr->icy.next = fr->icy.interval;
+		}
+		else
+		{
+			ret = plain_fullread(fr, buf+cnt, count-cnt);
+			if(ret < 0){ if(NOQUIET) error1("reading the rest of %li", (long)(count-cnt)); return READER_ERROR; }
+			if(ret == 0) break;
+
+			cnt += ret;
+			fr->icy.next -= ret;
+		}
+	}
+	/* debug1("done reading, got %li", (long)cnt); */
+	return cnt;
+}
+#else
+#define icy_fullread NULL
+#endif /* NO_ICY */
+
+/* stream based operation */
+static ssize_t plain_fullread(mpg123_handle *fr,unsigned char *buf, ssize_t count)
+{
+	ssize_t ret,cnt=0;
+
+#ifdef EXTRA_DEBUG
+	debug1("plain fullread of %"SSIZE_P, (size_p)count);
+#endif
+	/*
+		There used to be a check for expected file end here (length value or ID3 flag).
+		This is not needed:
+		1. EOF is indicated by fdread returning zero bytes anyway.
+		2. We get false positives of EOF for either files that grew or
+		3. ... files that have ID3v1 tags in between (stream with intro).
+	*/
+	while(cnt < count)
+	{
+		ret = fr->rdat.fdread(fr,buf+cnt,count-cnt);
+		if(ret < 0) return READER_ERROR;
+		if(ret == 0) break;
+		if(!(fr->rdat.flags & READER_BUFFERED)) fr->rdat.filepos += ret;
+		cnt += ret;
+	}
+	return cnt;
+}
+
+static off_t stream_lseek(mpg123_handle *fr, off_t pos, int whence)
+{
+	off_t ret;
+	ret = io_seek(&fr->rdat, pos, whence);
+	if (ret >= 0)	fr->rdat.filepos = ret;
+	else
+	{
+		fr->err = MPG123_LSEEK_FAILED;
+		ret = READER_ERROR; /* not the original value */
+	}
+	return ret;
+}
+
+static void stream_close(mpg123_handle *fr)
+{
+	if(fr->rdat.flags & READER_FD_OPENED) compat_close(fr->rdat.filept);
+
+	fr->rdat.filept = 0;
+
+#ifndef NO_FEEDER
+	if(fr->rdat.flags & READER_BUFFERED)  bc_reset(&fr->rdat.buffer);
+#endif
+	if(fr->rdat.flags & READER_HANDLEIO)
+	{
+		if(fr->rdat.cleanup_handle != NULL) fr->rdat.cleanup_handle(fr->rdat.iohandle);
+
+		fr->rdat.iohandle = NULL;
+	}
+}
+
+static int stream_seek_frame(mpg123_handle *fr, off_t newframe)
+{
+	debug2("seek_frame to %"OFF_P" (from %"OFF_P")", (off_p)newframe, (off_p)fr->num);
+	/* Seekable streams can go backwards and jump forwards.
+	   Non-seekable streams still can go forward, just not jump. */
+	if((fr->rdat.flags & READER_SEEKABLE) || (newframe >= fr->num))
+	{
+		off_t preframe; /* a leading frame we jump to */
+		off_t seek_to;  /* the byte offset we want to reach */
+		off_t to_skip;  /* bytes to skip to get there (can be negative) */
+		/*
+			now seek to nearest leading index position and read from there until newframe is reached.
+			We use skip_bytes, which handles seekable and non-seekable streams
+			(the latter only for positive offset, which we ensured before entering here).
+		*/
+		seek_to = frame_index_find(fr, newframe, &preframe);
+		/* No need to seek to index position if we are closer already.
+		   But I am picky about fr->num == newframe, play safe by reading the frame again.
+		   If you think that's stupid, don't call a seek to the current frame. */
+		if(fr->num >= newframe || fr->num < preframe)
+		{
+			to_skip = seek_to - fr->rd->tell(fr);
+			if(fr->rd->skip_bytes(fr, to_skip) != seek_to)
+			return READER_ERROR;
+
+			debug2("going to %lu; just got %lu", (long unsigned)newframe, (long unsigned)preframe);
+			fr->num = preframe-1; /* Watch out! I am going to read preframe... fr->num should indicate the frame before! */
+		}
+		while(fr->num < newframe)
+		{
+			/* try to be non-fatal now... frameNum only gets advanced on success anyway */
+			if(!read_frame(fr)) break;
+		}
+		/* Now the wanted frame should be ready for decoding. */
+		debug1("arrived at %lu", (long unsigned)fr->num);
+
+		return MPG123_OK;
+	}
+	else
+	{
+		fr->err = MPG123_NO_SEEK;
+		return READER_ERROR; /* invalid, no seek happened */
+	}
+}
+
+/* return FALSE on error, TRUE on success, READER_MORE on occasion */
+static int generic_head_read(mpg123_handle *fr,unsigned long *newhead)
+{
+	unsigned char hbuf[4];
+	int ret = fr->rd->fullread(fr,hbuf,4);
+	if(ret == READER_MORE) return ret;
+	if(ret != 4) return FALSE;
+
+	*newhead = ((unsigned long) hbuf[0] << 24) |
+	           ((unsigned long) hbuf[1] << 16) |
+	           ((unsigned long) hbuf[2] << 8)  |
+	            (unsigned long) hbuf[3];
+
+	return TRUE;
+}
+
+/* return FALSE on error, TRUE on success, READER_MORE on occasion */
+static int generic_head_shift(mpg123_handle *fr,unsigned long *head)
+{
+	unsigned char hbuf;
+	int ret = fr->rd->fullread(fr,&hbuf,1);
+	if(ret == READER_MORE) return ret;
+	if(ret != 1) return FALSE;
+
+	*head <<= 8;
+	*head |= hbuf;
+	*head &= 0xffffffff;
+	return TRUE;
+}
+
+/* returns reached position... negative ones are bad... */
+static off_t stream_skip_bytes(mpg123_handle *fr,off_t len)
+{
+	if(fr->rdat.flags & READER_SEEKABLE)
+	{
+		off_t ret = stream_lseek(fr, len, SEEK_CUR);
+		return (ret < 0) ? READER_ERROR : ret;
+	}
+	else if(len >= 0)
+	{
+		unsigned char buf[1024]; /* ThOr: Compaq cxx complained and it makes sense to me... or should one do a cast? What for? */
+		ssize_t ret;
+		while (len > 0)
+		{
+			ssize_t num = len < (off_t)sizeof(buf) ? (ssize_t)len : (ssize_t)sizeof(buf);
+			ret = fr->rd->fullread(fr, buf, num);
+			if (ret < 0) return ret;
+			else if(ret == 0) break; /* EOF... an error? interface defined to tell the actual position... */
+			len -= ret;
+		}
+		return fr->rd->tell(fr);
+	}
+#ifndef NO_FEEDER
+	else if(fr->rdat.flags & READER_BUFFERED)
+	{ /* Perhaps we _can_ go a bit back. */
+		if(fr->rdat.buffer.pos >= -len)
+		{
+			fr->rdat.buffer.pos += len;
+			return fr->rd->tell(fr);
+		}
+		else
+		{
+			fr->err = MPG123_NO_SEEK;
+			return READER_ERROR;
+		}
+	}
+#endif
+	else
+	{
+		fr->err = MPG123_NO_SEEK;
+		return READER_ERROR;
+	}
+}
+
+/* Return 0 on success... */
+static int stream_back_bytes(mpg123_handle *fr, off_t bytes)
+{
+	off_t want = fr->rd->tell(fr)-bytes;
+	if(want < 0) return READER_ERROR;
+	if(stream_skip_bytes(fr,-bytes) != want) return READER_ERROR;
+
+	return 0;
+}
+
+
+/* returns size on success... */
+static int generic_read_frame_body(mpg123_handle *fr,unsigned char *buf, int size)
+{
+	long l;
+
+	if((l=fr->rd->fullread(fr,buf,size)) != size)
+	{
+		long ll = l;
+		if(ll <= 0) ll = 0;
+		return READER_MORE;
+	}
+	return l;
+}
+
+static off_t generic_tell(mpg123_handle *fr)
+{
+#ifndef NO_FEEDER
+	if(fr->rdat.flags & READER_BUFFERED)
+	fr->rdat.filepos = fr->rdat.buffer.fileoff+fr->rdat.buffer.pos;
+#endif
+
+	return fr->rdat.filepos;
+}
+
+/* This does not (fully) work for non-seekable streams... You have to check for that flag, pal! */
+static void stream_rewind(mpg123_handle *fr)
+{
+	if(fr->rdat.flags & READER_SEEKABLE)
+	{
+		fr->rdat.filepos = stream_lseek(fr,0,SEEK_SET);
+#ifndef NO_FEEDER
+		fr->rdat.buffer.fileoff = fr->rdat.filepos;
+#endif
+	}
+#ifndef NO_FEEDER
+	if(fr->rdat.flags & READER_BUFFERED)
+	{
+		fr->rdat.buffer.pos      = 0;
+		fr->rdat.buffer.firstpos = 0;
+		fr->rdat.filepos = fr->rdat.buffer.fileoff;
+	}
+#endif
+}
+
+/*
+ * returns length of a file (if filept points to a file)
+ * reads the last 128 bytes information into buffer
+ * ... that is not totally safe...
+ */
+static off_t get_fileinfo(mpg123_handle *fr)
+{
+	off_t len;
+
+	if((len=io_seek(&fr->rdat,0,SEEK_END)) < 0)	return -1;
+
+	if(io_seek(&fr->rdat,-128,SEEK_END) < 0) return -1;
+
+	if(fr->rd->fullread(fr,(unsigned char *)fr->id3buf,128) != 128)	return -1;
+
+	if(!strncmp((char*)fr->id3buf,"TAG",3))	len -= 128;
+
+	if(io_seek(&fr->rdat,0,SEEK_SET) < 0)	return -1;
+
+	if(len <= 0)	return -1;
+
+	return len;
+}
+
+#ifndef NO_FEEDER
+/* Methods for the buffer chain, mainly used for feed reader, but not just that. */
+
+
+static struct buffy* buffy_new(size_t size, size_t minsize)
+{
+	struct buffy *newbuf;
+	newbuf = malloc(sizeof(struct buffy));
+	if(newbuf == NULL) return NULL;
+
+	newbuf->realsize = size > minsize ? size : minsize;
+	newbuf->data = malloc(newbuf->realsize);
+	if(newbuf->data == NULL)
+	{
+		free(newbuf);
+		return NULL;
+	}
+	newbuf->size = 0;
+	newbuf->next = NULL;
+	return newbuf;
+}
+
+static void buffy_del(struct buffy* buf)
+{
+	if(buf)
+	{
+		free(buf->data);
+		free(buf);
+	}
+}
+
+/* Delete this buffy and all following buffies. */
+static void buffy_del_chain(struct buffy* buf)
+{
+	while(buf)
+	{
+		struct buffy* next = buf->next;
+		buffy_del(buf);
+		buf = next;
+	}
+}
+
+void bc_prepare(struct bufferchain *bc, size_t pool_size, size_t bufblock)
+{
+	bc_poolsize(bc, pool_size, bufblock);
+	bc->pool = NULL;
+	bc->pool_fill = 0;
+	bc_init(bc); /* Ensure that members are zeroed for read-only use. */
+}
+
+size_t bc_fill(struct bufferchain *bc)
+{
+	return (size_t)(bc->size - bc->pos);
+}
+
+void bc_poolsize(struct bufferchain *bc, size_t pool_size, size_t bufblock)
+{
+	bc->pool_size = pool_size;
+	bc->bufblock = bufblock;
+}
+
+void bc_cleanup(struct bufferchain *bc)
+{
+	buffy_del_chain(bc->pool);
+	bc->pool = NULL;
+	bc->pool_fill = 0;
+}
+
+/* Fetch a buffer from the pool (if possible) or create one. */
+static struct buffy* bc_alloc(struct bufferchain *bc, size_t size)
+{
+	/* Easy route: Just try the first available buffer.
+	   Size does not matter, it's only a hint for creation of new buffers. */
+	if(bc->pool)
+	{
+		struct buffy *buf = bc->pool;
+		bc->pool = buf->next;
+		buf->next = NULL; /* That shall be set to a sensible value later. */
+		buf->size = 0;
+		--bc->pool_fill;
+		debug2("bc_alloc: picked %p from pool (fill now %"SIZE_P")", (void*)buf, (size_p)bc->pool_fill);
+		return buf;
+	}
+	else return buffy_new(size, bc->bufblock);
+}
+
+/* Either stuff the buffer back into the pool or free it for good. */
+static void bc_free(struct bufferchain *bc, struct buffy* buf)
+{
+	if(!buf) return;
+
+	if(bc->pool_fill < bc->pool_size)
+	{
+		buf->next = bc->pool;
+		bc->pool = buf;
+		++bc->pool_fill;
+	}
+	else buffy_del(buf);
+}
+
+/* Make the buffer count in the pool match the pool size. */
+static int bc_fill_pool(struct bufferchain *bc)
+{
+	/* Remove superfluous ones. */
+	while(bc->pool_fill > bc->pool_size)
+	{
+		/* Lazyness: Just work on the front. */
+		struct buffy* buf = bc->pool;
+		bc->pool = buf->next;
+		buffy_del(buf);
+		--bc->pool_fill;
+	}
+
+	/* Add missing ones. */
+	while(bc->pool_fill < bc->pool_size)
+	{
+		/* Again, just work on the front. */
+		struct buffy* buf;
+		buf = buffy_new(0, bc->bufblock); /* Use default block size. */
+		if(!buf) return -1;
+
+		buf->next = bc->pool;
+		bc->pool = buf;
+		++bc->pool_fill;
+	}
+
+	return 0;
+}
+
+
+static void bc_init(struct bufferchain *bc)
+{
+	bc->first = NULL;
+	bc->last  = bc->first;
+	bc->size  = 0;
+	bc->pos   = 0;
+	bc->firstpos = 0;
+	bc->fileoff  = 0;
+}
+
+static void bc_reset(struct bufferchain *bc)
+{
+	/* Free current chain, possibly stuffing back into the pool. */
+	while(bc->first)
+	{
+		struct buffy* buf = bc->first;
+		bc->first = buf->next;
+		bc_free(bc, buf);
+	}
+	bc_fill_pool(bc); /* Ignoring an error here... */
+	bc_init(bc);
+}
+
+/* Create a new buffy at the end to be filled. */
+static int bc_append(struct bufferchain *bc, ssize_t size)
+{
+	struct buffy *newbuf;
+	if(size < 1) return -1;
+
+	newbuf = bc_alloc(bc, size);
+	if(newbuf == NULL) return -2;
+
+	if(bc->last != NULL)  bc->last->next = newbuf;
+	else if(bc->first == NULL) bc->first = newbuf;
+
+	bc->last  = newbuf;
+	debug3("bc_append: new last buffer %p with %"SSIZE_P" B (really %"SSIZE_P")", (void*)bc->last, (ssize_p)bc->last->size, (ssize_p)bc->last->realsize);
+	return 0;
+}
+
+/* Append a new buffer and copy content to it. */
+static int bc_add(struct bufferchain *bc, const unsigned char *data, ssize_t size)
+{
+	int ret = 0;
+	ssize_t part = 0;
+	debug2("bc_add: adding %"SSIZE_P" bytes at %"OFF_P, (ssize_p)size, (off_p)(bc->fileoff+bc->size));
+	if(size >=4) debug4("first bytes: %02x %02x %02x %02x", data[0], data[1], data[2], data[3]);
+
+	while(size > 0)
+	{
+		/* Try to fill up the last buffer block. */
+		if(bc->last != NULL && bc->last->size < bc->last->realsize)
+		{
+			part = bc->last->realsize - bc->last->size;
+			if(part > size) part = size;
+
+			debug2("bc_add: adding %"SSIZE_P" B to existing block %p", (ssize_p)part, (void*)bc->last);
+			memcpy(bc->last->data+bc->last->size, data, part);
+			bc->last->size += part;
+			size -= part;
+			bc->size += part;
+			data += part;
+		}
+
+		/* If there is still data left, put it into a new buffer block. */
+		if(size > 0 && (ret = bc_append(bc, size)) != 0)
+		break;
+	}
+
+	return ret;
+}
+
+/* Common handler for "You want more than I can give." situation. */
+static ssize_t bc_need_more(struct bufferchain *bc)
+{
+	debug3("hit end, back to beginning (%li - %li < %li)", (long)bc->size, (long)bc->pos, (long)bc->size);
+	/* go back to firstpos, undo the previous reads */
+	bc->pos = bc->firstpos;
+	return READER_MORE;
+}
+
+/* Give some data, advancing position but not forgetting yet. */
+static ssize_t bc_give(struct bufferchain *bc, unsigned char *out, ssize_t size)
+{
+	struct buffy *b = bc->first;
+	ssize_t gotcount = 0;
+	ssize_t offset = 0;
+	if(bc->size - bc->pos < size) return bc_need_more(bc);
+
+	/* find the current buffer */
+	while(b != NULL && (offset + b->size) <= bc->pos)
+	{
+		offset += b->size;
+		b = b->next;
+	}
+	/* now start copying from there */
+	while(gotcount < size && (b != NULL))
+	{
+		ssize_t loff = bc->pos - offset;
+		ssize_t chunk = size - gotcount; /* amount of bytes to get from here... */
+		if(chunk > b->size - loff) chunk = b->size - loff;
+
+#ifdef EXTRA_DEBUG
+		debug3("copying %liB from %p+%li",(long)chunk, b->data, (long)loff);
+#endif
+
+		memcpy(out+gotcount, b->data+loff, chunk);
+		gotcount += chunk;
+		bc->pos  += chunk;
+		offset += b->size;
+		b = b->next;
+	}
+#ifdef EXTRA_DEBUG
+	debug2("got %li bytes, pos advanced to %li", (long)gotcount, (long)bc->pos);
+#endif
+
+	return gotcount;
+}
+
+/* Skip some bytes and return the new position.
+   The buffers are still there, just the read pointer is moved! */
+static ssize_t bc_skip(struct bufferchain *bc, ssize_t count)
+{
+	if(count >= 0)
+	{
+		if(bc->size - bc->pos < count) return bc_need_more(bc);
+		else return bc->pos += count;
+	}
+	else return READER_ERROR;
+}
+
+static ssize_t bc_seekback(struct bufferchain *bc, ssize_t count)
+{
+	if(count >= 0 && count <= bc->pos) return bc->pos -= count;
+	else return READER_ERROR;
+}
+
+/* Throw away buffies that we passed. */
+static void bc_forget(struct bufferchain *bc)
+{
+	struct buffy *b = bc->first;
+	/* free all buffers that are def'n'tly outdated */
+	/* we have buffers until filepos... delete all buffers fully below it */
+	if(b) debug2("bc_forget: block %lu pos %lu", (unsigned long)b->size, (unsigned long)bc->pos);
+	else debug("forget with nothing there!");
+
+	while(b != NULL && bc->pos >= b->size)
+	{
+		struct buffy *n = b->next; /* != NULL or this is indeed the end and the last cycle anyway */
+		if(n == NULL) bc->last = NULL; /* Going to delete the last buffy... */
+		bc->fileoff += b->size;
+		bc->pos  -= b->size;
+		bc->size -= b->size;
+
+		debug5("bc_forget: forgot %p with %lu, pos=%li, size=%li, fileoff=%li", (void*)b->data, (long)b->size, (long)bc->pos,  (long)bc->size, (long)bc->fileoff);
+
+		bc_free(bc, b);
+		b = n;
+	}
+	bc->first = b;
+	bc->firstpos = bc->pos;
+}
+
+/* reader for input via manually provided buffers */
+
+static int feed_init(mpg123_handle *fr)
+{
+	bc_init(&fr->rdat.buffer);
+	bc_fill_pool(&fr->rdat.buffer);
+	fr->rdat.filelen = 0;
+	fr->rdat.filepos = 0;
+	fr->rdat.flags |= READER_BUFFERED;
+	return 0;
+}
+
+/* externally called function, returns 0 on success, -1 on error */
+int feed_more(mpg123_handle *fr, const unsigned char *in, long count)
+{
+	int ret = 0;
+	if(VERBOSE3) debug("feed_more");
+	if((ret = bc_add(&fr->rdat.buffer, in, count)) != 0)
+	{
+		ret = READER_ERROR;
+		if(NOQUIET) error1("Failed to add buffer, return: %i", ret);
+	}
+	else /* Not talking about filelen... that stays at 0. */
+
+	if(VERBOSE3) debug3("feed_more: %p %luB bufsize=%lu", fr->rdat.buffer.last->data,
+		(unsigned long)fr->rdat.buffer.last->size, (unsigned long)fr->rdat.buffer.size);
+	return ret;
+}
+
+static ssize_t feed_read(mpg123_handle *fr, unsigned char *out, ssize_t count)
+{
+	ssize_t gotcount = bc_give(&fr->rdat.buffer, out, count);
+	if(gotcount >= 0 && gotcount != count) return READER_ERROR;
+	else return gotcount;
+}
+
+/* returns reached position... negative ones are bad... */
+static off_t feed_skip_bytes(mpg123_handle *fr,off_t len)
+{
+	/* This is either the new buffer offset or some negative error value. */
+	off_t res = bc_skip(&fr->rdat.buffer, (ssize_t)len);
+	if(res < 0) return res;
+
+	return fr->rdat.buffer.fileoff+res;
+}
+
+static int feed_back_bytes(mpg123_handle *fr, off_t bytes)
+{
+	if(bytes >=0)
+	return bc_seekback(&fr->rdat.buffer, (ssize_t)bytes) >= 0 ? 0 : READER_ERROR;
+	else
+	return feed_skip_bytes(fr, -bytes) >= 0 ? 0 : READER_ERROR;
+}
+
+static int feed_seek_frame(mpg123_handle *fr, off_t num){ return READER_ERROR; }
+
+/* Not just for feed reader, also for self-feeding buffered reader. */
+static void buffered_forget(mpg123_handle *fr)
+{
+	bc_forget(&fr->rdat.buffer);
+	fr->rdat.filepos = fr->rdat.buffer.fileoff + fr->rdat.buffer.pos;
+}
+
+off_t feed_set_pos(mpg123_handle *fr, off_t pos)
+{
+	struct bufferchain *bc = &fr->rdat.buffer;
+	if(pos >= bc->fileoff && pos-bc->fileoff < bc->size)
+	{ /* We have the position! */
+		bc->pos = (ssize_t)(pos - bc->fileoff);
+		debug1("feed_set_pos inside, next feed from %"OFF_P, (off_p)(bc->fileoff+bc->size));
+		return bc->fileoff+bc->size; /* Next input after end of buffer... */
+	}
+	else
+	{ /* I expect to get the specific position on next feed. Forget what I have now. */
+		bc_reset(bc);
+		bc->fileoff = pos;
+		debug1("feed_set_pos outside, buffer reset, next feed from %"OFF_P, (off_p)pos);
+		return pos; /* Next input from exactly that position. */
+	}
+}
+
+/* The specific stuff for buffered stream reader. */
+
+static ssize_t buffered_fullread(mpg123_handle *fr, unsigned char *out, ssize_t count)
+{
+	struct bufferchain *bc = &fr->rdat.buffer;
+	ssize_t gotcount;
+	if(bc->size - bc->pos < count)
+	{ /* Add more stuff to buffer. If hitting end of file, adjust count. */
+		unsigned char readbuf[4096];
+		ssize_t need = count - (bc->size-bc->pos);
+		while(need>0)
+		{
+			int ret;
+			ssize_t got = fr->rdat.fullread(fr, readbuf, sizeof(readbuf));
+			if(got < 0)
+			{
+				if(NOQUIET) error("buffer reading");
+				return READER_ERROR;
+			}
+
+			if(VERBOSE3) debug1("buffered_fullread: buffering %li bytes from stream (if > 0)", (long)got);
+			if(got > 0 && (ret=bc_add(bc, readbuf, got)) != 0)
+			{
+				if(NOQUIET) error1("unable to add to chain, return: %i", ret);
+				return READER_ERROR;
+			}
+
+			need -= got; /* May underflow here... */
+			if(got < sizeof(readbuf)) /* That naturally catches got == 0, too. */
+			{
+				if(VERBOSE3) fprintf(stderr, "Note: Input data end.\n");
+				break; /* End. */
+			}
+		}
+		if(bc->size - bc->pos < count)
+		count = bc->size - bc->pos; /* We want only what we got. */
+	}
+	gotcount = bc_give(bc, out, count);
+
+	if(VERBOSE3) debug2("wanted %li, got %li", (long)count, (long)gotcount);
+
+	if(gotcount != count){ if(NOQUIET) error("gotcount != count"); return READER_ERROR; }
+	else return gotcount;
+}
+#else
+int feed_more(mpg123_handle *fr, const unsigned char *in, long count)
+{
+	fr->err = MPG123_MISSING_FEATURE;
+	return -1;
+}
+off_t feed_set_pos(mpg123_handle *fr, off_t pos)
+{
+	fr->err = MPG123_MISSING_FEATURE;
+	return -1;
+}
+#endif /* NO_FEEDER */
+
+/*****************************************************************
+ * read frame helper
+ */
+
+#define bugger_off { mh->err = MPG123_NO_READER; return MPG123_ERR; }
+static int bad_init(mpg123_handle *mh) bugger_off
+static void bad_close(mpg123_handle *mh){}
+static ssize_t bad_fullread(mpg123_handle *mh, unsigned char *data, ssize_t count) bugger_off
+static int bad_head_read(mpg123_handle *mh, unsigned long *newhead) bugger_off
+static int bad_head_shift(mpg123_handle *mh, unsigned long *head) bugger_off
+static off_t bad_skip_bytes(mpg123_handle *mh, off_t len) bugger_off
+static int bad_read_frame_body(mpg123_handle *mh, unsigned char *data, int size) bugger_off
+static int bad_back_bytes(mpg123_handle *mh, off_t bytes) bugger_off
+static int bad_seek_frame(mpg123_handle *mh, off_t num) bugger_off
+static off_t bad_tell(mpg123_handle *mh) bugger_off
+static void bad_rewind(mpg123_handle *mh){}
+#undef bugger_off
+
+#define READER_STREAM 0
+#define READER_ICY_STREAM 1
+#define READER_FEED       2
+#define READER_BUF_STREAM 3
+#define READER_BUF_ICY_STREAM 4
+static struct reader readers[] =
+{
+	{ /* READER_STREAM */
+		default_init,
+		stream_close,
+		plain_fullread,
+		generic_head_read,
+		generic_head_shift,
+		stream_skip_bytes,
+		generic_read_frame_body,
+		stream_back_bytes,
+		stream_seek_frame,
+		generic_tell,
+		stream_rewind,
+		NULL
+	} ,
+	{ /* READER_ICY_STREAM */
+		default_init,
+		stream_close,
+		icy_fullread,
+		generic_head_read,
+		generic_head_shift,
+		stream_skip_bytes,
+		generic_read_frame_body,
+		stream_back_bytes,
+		stream_seek_frame,
+		generic_tell,
+		stream_rewind,
+		NULL
+	},
+#ifdef NO_FEEDER
+#define feed_init NULL
+#define feed_read NULL
+#define buffered_fullread NULL
+#define feed_seek_frame NULL
+#define feed_back_bytes NULL
+#define feed_skip_bytes NULL
+#define buffered_forget NULL
+#endif
+	{ /* READER_FEED */
+		feed_init,
+		stream_close,
+		feed_read,
+		generic_head_read,
+		generic_head_shift,
+		feed_skip_bytes,
+		generic_read_frame_body,
+		feed_back_bytes,
+		feed_seek_frame,
+		generic_tell,
+		stream_rewind,
+		buffered_forget
+	},
+	{ /* READER_BUF_STREAM */
+		default_init,
+		stream_close,
+		buffered_fullread,
+		generic_head_read,
+		generic_head_shift,
+		stream_skip_bytes,
+		generic_read_frame_body,
+		stream_back_bytes,
+		stream_seek_frame,
+		generic_tell,
+		stream_rewind,
+		buffered_forget
+	} ,
+	{ /* READER_BUF_ICY_STREAM */
+		default_init,
+		stream_close,
+		buffered_fullread,
+		generic_head_read,
+		generic_head_shift,
+		stream_skip_bytes,
+		generic_read_frame_body,
+		stream_back_bytes,
+		stream_seek_frame,
+		generic_tell,
+		stream_rewind,
+		buffered_forget
+	},
+#ifdef READ_SYSTEM
+	,{
+		system_init,
+		NULL,	/* filled in by system_init() */
+		fullread,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+	}
+#endif
+};
+
+static struct reader bad_reader =
+{
+	bad_init,
+	bad_close,
+	bad_fullread,
+	bad_head_read,
+	bad_head_shift,
+	bad_skip_bytes,
+	bad_read_frame_body,
+	bad_back_bytes,
+	bad_seek_frame,
+	bad_tell,
+	bad_rewind,
+	NULL
+};
+
+static int default_init(mpg123_handle *fr)
+{
+#ifdef TIMEOUT_READ
+	if(fr->p.timeout > 0)
+	{
+		int flags;
+		if(fr->rdat.r_read != NULL)
+		{
+			error("Timeout reading does not work with user-provided read function. Implement it yourself!");
+			return -1;
+		}
+		flags = fcntl(fr->rdat.filept, F_GETFL);
+		flags |= O_NONBLOCK;
+		fcntl(fr->rdat.filept, F_SETFL, flags);
+		fr->rdat.fdread = timeout_read;
+		fr->rdat.timeout_sec = fr->p.timeout;
+		fr->rdat.flags |= READER_NONBLOCK;
+	}
+	else
+#endif
+	fr->rdat.fdread = plain_read;
+
+	fr->rdat.read  = fr->rdat.r_read  != NULL ? fr->rdat.r_read  : posix_read;
+	fr->rdat.lseek = fr->rdat.r_lseek != NULL ? fr->rdat.r_lseek : posix_lseek;
+#ifndef NO_ICY
+	/* ICY streams of any sort shall not be seekable. */
+	if(fr->p.icy_interval > 0) fr->rdat.lseek = nix_lseek;
+#endif
+
+	fr->rdat.filelen = get_fileinfo(fr);
+	fr->rdat.filepos = 0;
+	/*
+		Don't enable seeking on ICY streams, just plain normal files.
+		This check is necessary since the client can enforce ICY parsing on files that would otherwise be seekable.
+		It is a task for the future to make the ICY parsing safe with seeks ... or not.
+	*/
+	if(fr->rdat.filelen >= 0)
+	{
+		fr->rdat.flags |= READER_SEEKABLE;
+		if(!strncmp((char*)fr->id3buf,"TAG",3))
+		{
+			fr->rdat.flags |= READER_ID3TAG;
+			fr->metaflags  |= MPG123_NEW_ID3;
+		}
+	}
+	/* Switch reader to a buffered one, if allowed. */
+	else if(fr->p.flags & MPG123_SEEKBUFFER)
+	{
+#ifdef NO_FEEDER
+		error("Buffered readers not supported in this build.");
+		fr->err = MPG123_MISSING_FEATURE;
+		return -1;
+#else
+		if     (fr->rd == &readers[READER_STREAM])
+		{
+			fr->rd = &readers[READER_BUF_STREAM];
+			fr->rdat.fullread = plain_fullread;
+		}
+#ifndef NO_ICY
+		else if(fr->rd == &readers[READER_ICY_STREAM])
+		{
+			fr->rd = &readers[READER_BUF_ICY_STREAM];
+			fr->rdat.fullread = icy_fullread;
+		}
+#endif
+		else
+		{
+			if(NOQUIET) error("mpg123 Programmer's fault: invalid reader");
+			return -1;
+		}
+		bc_init(&fr->rdat.buffer);
+		fr->rdat.filelen = 0; /* We carry the offset, but never know how big the stream is. */
+		fr->rdat.flags |= READER_BUFFERED;
+#endif /* NO_FEEDER */
+	}
+	return 0;
+}
+
+
+void open_bad(mpg123_handle *mh)
+{
+	debug("open_bad");
+#ifndef NO_ICY
+	clear_icy(&mh->icy);
+#endif
+	mh->rd = &bad_reader;
+	mh->rdat.flags = 0;
+#ifndef NO_FEEDER
+	bc_init(&mh->rdat.buffer);
+#endif
+	mh->rdat.filelen = -1;
+}
+
+int open_feed(mpg123_handle *fr)
+{
+	debug("feed reader");
+#ifdef NO_FEEDER
+	error("Buffered readers not supported in this build.");
+	fr->err = MPG123_MISSING_FEATURE;
+	return -1;
+#else
+#ifndef NO_ICY
+	if(fr->p.icy_interval > 0)
+	{
+		if(NOQUIET) error("Feed reader cannot do ICY parsing!");
+
+		return -1;
+	}
+	clear_icy(&fr->icy);
+#endif
+	fr->rd = &readers[READER_FEED];
+	fr->rdat.flags = 0;
+	if(fr->rd->init(fr) < 0) return -1;
+
+	debug("feed reader init successful");
+	return 0;
+#endif /* NO_FEEDER */
+}
+
+/* Final code common to open_stream and open_stream_handle. */
+static int open_finish(mpg123_handle *fr)
+{
+#ifndef NO_ICY
+	if(fr->p.icy_interval > 0)
+	{
+		debug("ICY reader");
+		fr->icy.interval = fr->p.icy_interval;
+		fr->icy.next = fr->icy.interval;
+		fr->rd = &readers[READER_ICY_STREAM];
+	}
+	else
+#endif
+	{
+		fr->rd = &readers[READER_STREAM];
+		debug("stream reader");
+	}
+
+	if(fr->rd->init(fr) < 0) return -1;
+
+	return MPG123_OK;
+}
+
+int open_stream(mpg123_handle *fr, const char *bs_filenam, int fd)
+{
+	int filept_opened = 1;
+	int filept; /* descriptor of opened file/stream */
+
+	clear_icy(&fr->icy); /* can be done inside frame_clear ...? */
+
+	if(!bs_filenam) /* no file to open, got a descriptor (stdin) */
+	{
+		filept = fd;
+		filept_opened = 0; /* and don't try to close it... */
+	}
+	#ifndef O_BINARY
+	#define O_BINARY (0)
+	#endif
+	else if((filept = compat_open(bs_filenam, O_RDONLY|O_BINARY)) < 0) /* a plain old file to open... */
+	{
+		if(NOQUIET) error2("Cannot open file %s: %s", bs_filenam, strerror(errno));
+		fr->err = MPG123_BAD_FILE;
+		return MPG123_ERR; /* error... */
+	}
+
+	/* now we have something behind filept and can init the reader */
+	fr->rdat.filelen = -1;
+	fr->rdat.filept  = filept;
+	fr->rdat.flags = 0;
+	if(filept_opened)	fr->rdat.flags |= READER_FD_OPENED;
+
+	return open_finish(fr);
+}
+
+int open_stream_handle(mpg123_handle *fr, void *iohandle)
+{
+	clear_icy(&fr->icy); /* can be done inside frame_clear ...? */
+	fr->rdat.filelen = -1;
+	fr->rdat.filept  = -1;
+	fr->rdat.iohandle = iohandle;
+	fr->rdat.flags = 0;
+	fr->rdat.flags |= READER_HANDLEIO;
+
+	return open_finish(fr);
+}
+
+/* Wrappers for actual reading/seeking... I'm full of wrappers here. */
+static off_t io_seek(struct reader_data *rdat, off_t offset, int whence)
+{
+	if(rdat->flags & READER_HANDLEIO)
+	{
+		if(rdat->r_lseek_handle != NULL)
+		{
+			return rdat->r_lseek_handle(rdat->iohandle, offset, whence);
+		}
+		else return -1;
+	}
+	else
+	return rdat->lseek(rdat->filept, offset, whence);
+}
+
+static ssize_t io_read(struct reader_data *rdat, void *buf, size_t count)
+{
+	if(rdat->flags & READER_HANDLEIO)
+	{
+		if(rdat->r_read_handle != NULL)
+		{
+			return rdat->r_read_handle(rdat->iohandle, buf, count);
+		}
+		else return -1;
+	}
+	else
+	return rdat->read(rdat->filept, buf, count);
+}
Index: include/reactos/libs/libmpg123/sample.h
===================================================================
--- include/reactos/libs/libmpg123/sample.h	(revision 63976)
+++ include/reactos/libs/libmpg123/sample.h	(working copy)
@@ -14,11 +14,11 @@
 
 /* Special case is fixed point math... which does work, but not that nice yet.  */
 #ifdef REAL_IS_FIXED
-static inline short idiv_signed_rounded(long x, int shift)
+static inline int16_t idiv_signed_rounded(int32_t x, int shift)
 {
 	x >>= (shift - 1);
 	x += (x & 1);
-	return (short)(x >> 1);
+	return (int16_t)(x >> 1);
 }
 #  define REAL_PLUS_32767       ( 32767 << 15 )
 #  define REAL_MINUS_32768      ( -32768 << 15 )
@@ -36,7 +36,7 @@
 # if (defined REAL_IS_FLOAT) && (defined IEEE_FLOAT)
 /* This function is only available for IEEE754 single-precision values
    This is nearly identical to proper rounding, just -+0.5 is rounded to 0 */
-static inline short ftoi16(float x)
+static inline int16_t ftoi16(float x)
 {
 	union
 	{
@@ -44,7 +44,7 @@
 		int32_t i;
 	} u_fi;
 	u_fi.f = x + 12582912.0f; /* Magic Number: 2^23 + 2^22 */
-	return (short)u_fi.i;
+	return (int16_t)u_fi.i;
 }
 #  define REAL_TO_SHORT_ACCURATE(x)      ftoi16(x)
 # else
@@ -88,6 +88,25 @@
 /* The actual storage of a decoded sample is separated in the following macros.
    We can handle different types, we could also handle dithering here. */
 
+#ifdef NEWOLD_WRITE_SAMPLE
+
+/* This is the old new mpg123 WRITE_SAMPLE, fixed for newer GCC by MPlayer folks.
+   Makes a huge difference on old machines. */
+#if WORDS_BIGENDIAN 
+#define MANTISSA_OFFSET 1
+#else
+#define MANTISSA_OFFSET 0
+#endif
+#define WRITE_SHORT_SAMPLE(samples,sum,clip) { \
+  union { double dtemp; int itemp[2]; } u; int v; \
+  u.dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum);\
+  v = u.itemp[MANTISSA_OFFSET] - 0x80000000; \
+  if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \
+  else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \
+  else { *(samples) = v; }  \
+}
+
+#else
 /* Macro to produce a short (signed 16bit) output sample from internal representation,
    which may be float, double or indeed some integer for fixed point handling. */
 #define WRITE_SHORT_SAMPLE(samples,sum,clip) \
@@ -94,6 +113,7 @@
   if( (sum) > REAL_PLUS_32767) { *(samples) = 0x7fff; (clip)++; } \
   else if( (sum) < REAL_MINUS_32768) { *(samples) = -0x8000; (clip)++; } \
   else { *(samples) = REAL_TO_SHORT(sum); }
+#endif
 
 /* Same as above, but always using accurate rounding. Would we want softer clipping here, too? */
 #define WRITE_SHORT_SAMPLE_ACCURATE(samples,sum,clip) \
@@ -119,7 +139,7 @@
 /* Produce an 8bit sample, via 16bit intermediate. */
 #define WRITE_8BIT_SAMPLE(samples,sum,clip) \
 { \
-	short write_8bit_tmp; \
+	int16_t write_8bit_tmp; \
 	if( (sum) > REAL_PLUS_32767) { write_8bit_tmp = 0x7fff; (clip)++; } \
 	else if( (sum) < REAL_MINUS_32768) { write_8bit_tmp = -0x8000; (clip)++; } \
 	else { write_8bit_tmp = REAL_TO_SHORT(sum); } \
Index: include/reactos/libs/libmpg123/stringbuf.c
===================================================================
--- include/reactos/libs/libmpg123/stringbuf.c	(revision 0)
+++ include/reactos/libs/libmpg123/stringbuf.c	(working copy)
@@ -0,0 +1,186 @@
+/*
+	stringbuf: mimicking a bit of C++ to more safely handle strings
+
+	copyright 2006-10 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Thomas Orgis
+*/
+
+#include "mpg123lib_intern.h"
+#include "config.h"
+#include "mpg123.h"
+#include "compat.h"
+#include <string.h>
+#include "debug.h"
+
+void attribute_align_arg mpg123_init_string(mpg123_string* sb)
+{
+	sb->p = NULL;
+	sb->size = 0;
+	sb->fill = 0;
+}
+
+void attribute_align_arg mpg123_free_string(mpg123_string* sb)
+{
+	if(sb->p != NULL) free(sb->p);
+	mpg123_init_string(sb);
+}
+
+int attribute_align_arg mpg123_grow_string(mpg123_string* sb, size_t new)
+{
+	if(sb->size < new) return mpg123_resize_string(sb, new);
+	else return 1;
+}
+
+int attribute_align_arg mpg123_resize_string(mpg123_string* sb, size_t new)
+{
+	debug3("resizing string pointer %p from %lu to %lu", (void*) sb->p, (unsigned long)sb->size, (unsigned long)new);
+	if(new == 0)
+	{
+		if(sb->size && sb->p != NULL) free(sb->p);
+		mpg123_init_string(sb);
+		return 1;
+	}
+	if(sb->size != new)
+	{
+		char* t;
+		debug("really!");
+		t = (char*) safe_realloc(sb->p, new*sizeof(char));
+		debug1("safe_realloc returned %p", (void*) t); 
+		if(t != NULL)
+		{
+			sb->p = t;
+			sb->size = new;
+			return 1;
+		}
+		else return 0;
+	}
+	else return 1; /* success */
+}
+
+int attribute_align_arg mpg123_copy_string(mpg123_string* from, mpg123_string* to)
+{
+	size_t fill;
+	char  *text;
+	if(to == NULL) return -1;
+
+	debug2("called copy_string with %p -> %p", (void*)from, (void*)to);
+	if(from == NULL)
+	{
+		fill = 0;
+		text = NULL;
+	}
+	else
+	{
+		fill = from->fill;
+		text = from->p;
+	}
+
+	if(mpg123_resize_string(to, fill))
+	{
+		memcpy(to->p, text, fill);
+		to->fill = fill;
+		return 1;
+	}
+	else return 0;
+}
+
+int attribute_align_arg mpg123_add_string(mpg123_string* sb, const char* stuff)
+{
+	debug1("adding %s", stuff);
+	return mpg123_add_substring(sb, stuff, 0, strlen(stuff));
+}
+
+int attribute_align_arg mpg123_add_substring(mpg123_string *sb, const char *stuff, size_t from, size_t count)
+{
+	debug("adding a substring");
+	if(sb->fill) /* includes zero byte... */
+	{
+		if( (SIZE_MAX - sb->fill >= count) /* Avoid overflow. */
+		    && (sb->size >= sb->fill+count || mpg123_grow_string(sb, sb->fill+count)) )
+		{
+			memcpy(sb->p+sb->fill-1, stuff+from, count);
+			sb->fill += count;
+			sb->p[sb->fill-1] = 0; /* Terminate! */
+		}
+		else return 0;
+	}
+	else
+	{
+		if( count < SIZE_MAX && mpg123_grow_string(sb, count+1) )
+		{
+			memcpy(sb->p, stuff+from, count);
+			sb->fill = count+1;
+			sb->p[sb->fill-1] = 0; /* Terminate! */
+		}
+		else return 0;
+	}
+	return 1;
+}
+
+int attribute_align_arg mpg123_set_substring(mpg123_string* sb, const char* stuff, size_t from, size_t count)
+{
+	sb->fill = 0;
+	return mpg123_add_substring(sb, stuff, from, count);
+}
+
+int attribute_align_arg mpg123_set_string(mpg123_string* sb, const char* stuff)
+{
+	sb->fill = 0;
+	return mpg123_add_string(sb, stuff);
+}
+
+size_t attribute_align_arg mpg123_strlen(mpg123_string *sb, int utf8)
+{
+	size_t i;
+	size_t bytelen;
+
+	/* Notions of empty string. If there's only a single character, it has to be the trailing zero, and if the first is the trailing zero anyway, we got empty. */
+	if(sb->fill < 2 || sb->p[0] == 0) return 0;
+
+	/* Find the first non-null character from the back.
+	   We already established that the first character is non-null
+	   That at fill-2 has to be null, though. */
+	for(i=sb->fill-2; i>0; --i)
+	if(sb->p[i] != 0) break;
+
+	/* For simple byte strings, we are done now. */
+	bytelen = i+1;
+
+	if(!utf8) return bytelen;
+	else
+	{
+		/* Work out the actual count of UTF8 bytes.
+		   This employs no particular encoding error checking. */
+		size_t len = 0;
+		for(i=0; i<bytelen; ++i)
+		{
+			/* Every byte that is not a continuation byte ( 0xc0 == 10xx xxxx ) stands for a character. */
+			if((sb->p[i] & 0xc0) != 0x80) len++;
+		}
+		return len;
+	}
+}
+
+int attribute_align_arg mpg123_chomp_string(mpg123_string *sb)
+{
+	ssize_t i;
+	if(!sb || !sb->fill) return 0;
+
+	/* Ensure that it is zero-terminated. */
+	sb->p[sb->fill-1] = 0;
+	for(i=sb->fill-2; i>=0; --i)
+	{
+		char *c = sb->p+i;
+		/* Stop at the first proper character. */
+		if(*c && *c != '\r' && *c != '\n') break;
+		else *c = 0;
+	}
+	/* initial fill at least 1, so i at least -1,
+	   +2 means nothing happened for fill=1 .
+	   With i=0, we got one non-null character, fill shall be 2
+	   to accomodate the trailing zero. */
+	sb->fill = (size_t)i+2;
+
+	return 1;
+}
Index: include/reactos/libs/libmpg123/synth.c
===================================================================
--- include/reactos/libs/libmpg123/synth.c	(revision 0)
+++ include/reactos/libs/libmpg123/synth.c	(working copy)
@@ -0,0 +1,1184 @@
+/*
+	synth.c: The functions for synthesizing samples, at the end of decoding.
+
+	copyright 1995-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp, heavily dissected and rearranged by Thomas Orgis
+*/
+
+#include "mpg123lib_intern.h"
+#include "sample.h"
+#include "debug.h"
+
+/*
+	Part 1: All synth functions that produce signed short.
+	That is:
+		- synth_1to1 with cpu-specific variants (synth_1to1_i386, synth_1to1_i586 ...)
+		- synth_1to1_mono and synth_1to1_m2s; which use fr->synths.plain[r_1to1][f_16].
+	Nearly every decoder variant has it's own synth_1to1, while the mono conversion is shared.
+*/
+
+#define SAMPLE_T short
+#define WRITE_SAMPLE(samples,sum,clip) WRITE_SHORT_SAMPLE(samples,sum,clip)
+
+/* Part 1a: All straight 1to1 decoding functions */
+#define BLOCK 0x40 /* One decoding block is 64 samples. */
+
+#define SYNTH_NAME synth_1to1
+#include "synth.h"
+#undef SYNTH_NAME
+
+/* Mono-related synths; they wrap over _some_ synth_1to1. */
+#define SYNTH_NAME       fr->synths.plain[r_1to1][f_16]
+#define MONO_NAME        synth_1to1_mono
+#define MONO2STEREO_NAME synth_1to1_m2s
+#include "synth_mono.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+/* Now we have possibly some special synth_1to1 ...
+   ... they produce signed short; the mono functions defined above work on the special synths, too. */
+
+#ifdef OPT_GENERIC_DITHER
+#define SYNTH_NAME synth_1to1_dither
+/* We need the accurate sample writing... */
+#undef WRITE_SAMPLE
+#define WRITE_SAMPLE(samples,sum,clip) WRITE_SHORT_SAMPLE_ACCURATE(samples,sum,clip)
+
+#define USE_DITHER
+#include "synth.h"
+#undef USE_DITHER
+#undef SYNTH_NAME
+
+#undef WRITE_SAMPLE
+#define WRITE_SAMPLE(samples,sum,clip) WRITE_SHORT_SAMPLE(samples,sum,clip)
+
+#endif
+
+#ifdef OPT_X86
+/* The i386-specific C code, here as short variant, later 8bit and float. */
+#define NO_AUTOINCREMENT
+#define SYNTH_NAME synth_1to1_i386
+#include "synth.h"
+#undef SYNTH_NAME
+/* i386 uses the normal mono functions. */
+#undef NO_AUTOINCREMENT
+#endif
+
+#undef BLOCK /* Following functions are so special that they don't need this. */
+
+#ifdef OPT_I586
+/* This is defined in assembler. */
+int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin);
+/* This is just a hull to use the mpg123 handle. */
+int synth_1to1_i586(real *bandPtr, int channel, mpg123_handle *fr, int final)
+{
+	int ret;
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	ret = synth_1to1_i586_asm(bandPtr, channel, fr->buffer.data+fr->buffer.fill, fr->rawbuffs, &fr->bo, fr->decwin);
+	if(final) fr->buffer.fill += 128;
+	return ret;
+}
+#endif
+
+#ifdef OPT_I586_DITHER
+/* This is defined in assembler. */
+int synth_1to1_i586_asm_dither(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin, float *dithernoise);
+/* This is just a hull to use the mpg123 handle. */
+int synth_1to1_i586_dither(real *bandPtr, int channel, mpg123_handle *fr, int final)
+{
+	int ret;
+	int bo_dither[2]; /* Temporary workaround? Could expand the asm code. */
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	/* Applying this hack, to change the asm only bit by bit (adding dithernoise pointer). */
+	bo_dither[0] = fr->bo;
+	bo_dither[1] = fr->ditherindex;
+	ret = synth_1to1_i586_asm_dither(bandPtr, channel, fr->buffer.data+fr->buffer.fill, fr->rawbuffs, bo_dither, fr->decwin, fr->dithernoise);
+	fr->bo          = bo_dither[0];
+	fr->ditherindex = bo_dither[1];
+
+	if(final) fr->buffer.fill += 128;
+	return ret;
+}
+#endif
+
+#if defined(OPT_3DNOW) || defined(OPT_3DNOW_VINTAGE)
+/* Those are defined in assembler. */
+void do_equalizer_3dnow(real *bandPtr,int channel, real equalizer[2][32]);
+int synth_1to1_3dnow_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin);
+/* This is just a hull to use the mpg123 handle. */
+int synth_1to1_3dnow(real *bandPtr, int channel, mpg123_handle *fr, int final)
+{
+	int ret;
+
+	if(fr->have_eq_settings) do_equalizer_3dnow(bandPtr,channel,fr->equalizer);
+
+	/* this is in asm, can be dither or not */
+	/* uh, is this return from pointer correct? */ 
+	ret = (int) synth_1to1_3dnow_asm(bandPtr, channel, fr->buffer.data+fr->buffer.fill, fr->rawbuffs, &fr->bo, fr->decwin);
+	if(final) fr->buffer.fill += 128;
+	return ret;
+}
+#endif
+
+#ifdef OPT_MMX
+/* This is defined in assembler. */
+int synth_1to1_MMX(real *bandPtr, int channel, short *out, short *buffs, int *bo, float *decwins);
+/* This is just a hull to use the mpg123 handle. */
+int synth_1to1_mmx(real *bandPtr, int channel, mpg123_handle *fr, int final)
+{
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	/* in asm */
+	synth_1to1_MMX(bandPtr, channel, (short*) (fr->buffer.data+fr->buffer.fill), (short *) fr->rawbuffs, &fr->bo, fr->decwins);
+	if(final) fr->buffer.fill += 128;
+	return 0;
+}
+#endif
+
+#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
+#ifdef ACCURATE_ROUNDING
+/* This is defined in assembler. */
+int synth_1to1_sse_accurate_asm(real *window, real *b0, short *samples, int bo1);
+int synth_1to1_s_sse_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1);
+void dct64_real_sse(real *out0, real *out1, real *samples);
+/* This is just a hull to use the mpg123 handle. */
+int synth_1to1_sse(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);	
+	real *b0, **buf;
+	int clip; 
+	int bo1;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1) 
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_sse(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_sse(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_sse_accurate_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 128;
+
+	return clip;
+}
+
+int synth_1to1_stereo_sse(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_sse(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_sse(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_sse(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_sse(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s_sse_accurate_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 128;
+
+	return clip;
+}
+#else
+/* This is defined in assembler. */
+void synth_1to1_sse_asm(real *bandPtr, int channel, short *samples, short *buffs, int *bo, real *decwin);
+/* This is just a hull to use the mpg123 handle. */
+int synth_1to1_sse(real *bandPtr, int channel, mpg123_handle *fr, int final)
+{
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	synth_1to1_sse_asm(bandPtr, channel, (short*) (fr->buffer.data+fr->buffer.fill), (short *) fr->rawbuffs, &fr->bo, fr->decwins);
+	if(final) fr->buffer.fill += 128;
+	return 0;
+}
+#endif
+#endif
+
+#if defined(OPT_3DNOWEXT) || defined(OPT_3DNOWEXT_VINTAGE)
+/* This is defined in assembler. */
+void synth_1to1_3dnowext_asm(real *bandPtr, int channel, short *samples, short *buffs, int *bo, real *decwin);
+/* This is just a hull to use the mpg123 handle. */
+int synth_1to1_3dnowext(real *bandPtr, int channel, mpg123_handle *fr, int final)
+{
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	synth_1to1_3dnowext_asm(bandPtr, channel, (short*) (fr->buffer.data+fr->buffer.fill), (short *) fr->rawbuffs, &fr->bo, fr->decwins);
+	if(final) fr->buffer.fill += 128;
+	return 0;
+}
+#endif
+
+#ifdef OPT_X86_64
+#ifdef ACCURATE_ROUNDING
+/* Assembler routines. */
+int synth_1to1_x86_64_accurate_asm(real *window, real *b0, short *samples, int bo1);
+int synth_1to1_s_x86_64_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1);
+void dct64_real_x86_64(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_x86_64(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_x86_64(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_x86_64(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_x86_64_accurate_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 128;
+
+	return clip;
+}
+
+int synth_1to1_stereo_x86_64(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_x86_64(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_x86_64(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_x86_64(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_x86_64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s_x86_64_accurate_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 128;
+
+	return clip;
+}
+#else
+/* This is defined in assembler. */
+int synth_1to1_x86_64_asm(short *window, short *b0, short *samples, int bo1);
+int synth_1to1_s_x86_64_asm(short *window, short *b0l, short *b0r, short *samples, int bo1);
+void dct64_x86_64(short *out0, short *out1, real *samples);
+/* This is just a hull to use the mpg123 handle. */
+int synth_1to1_x86_64(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);	
+	short *b0, **buf;
+	int clip; 
+	int bo1;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->short_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->short_buffs[1];
+	}
+
+	if(fr->bo & 0x1) 
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_x86_64(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_x86_64(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_x86_64_asm((short *)fr->decwins, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 128;
+
+	return clip;
+}
+
+int synth_1to1_stereo_x86_64(real *bandPtr_l,real *bandPtr_r, mpg123_handle *fr)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+	short *b0l, *b0r, **bufl, **bufr;
+	int clip; 
+	int bo1;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->short_buffs[0];
+	bufr = fr->short_buffs[1];
+
+	if(fr->bo & 0x1) 
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_x86_64(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_x86_64(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_x86_64(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_x86_64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s_x86_64_asm((short *)fr->decwins, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 128;
+
+	return clip;
+}
+#endif
+#endif
+
+#ifdef OPT_AVX
+#ifdef ACCURATE_ROUNDING
+/* Assembler routines. */
+#ifndef OPT_X86_64
+int synth_1to1_x86_64_accurate_asm(real *window, real *b0, short *samples, int bo1);
+#endif
+int synth_1to1_s_avx_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1);
+void dct64_real_avx(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_avx(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_avx(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_avx(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_x86_64_accurate_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 128;
+
+	return clip;
+}
+
+int synth_1to1_stereo_avx(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_avx(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_avx(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_avx(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_avx(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s_avx_accurate_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 128;
+
+	return clip;
+}
+#else
+/* This is defined in assembler. */
+#ifndef OPT_X86_64
+int synth_1to1_x86_64_asm(short *window, short *b0, short *samples, int bo1);
+#endif
+int synth_1to1_s_avx_asm(short *window, short *b0l, short *b0r, short *samples, int bo1);
+void dct64_avx(short *out0, short *out1, real *samples);
+/* This is just a hull to use the mpg123 handle. */
+int synth_1to1_avx(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);	
+	short *b0, **buf;
+	int clip; 
+	int bo1;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->short_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->short_buffs[1];
+	}
+
+	if(fr->bo & 0x1) 
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_avx(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_avx(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_x86_64_asm((short *)fr->decwins, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 128;
+
+	return clip;
+}
+
+int synth_1to1_stereo_avx(real *bandPtr_l,real *bandPtr_r, mpg123_handle *fr)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+	short *b0l, *b0r, **bufl, **bufr;
+	int clip; 
+	int bo1;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->short_buffs[0];
+	bufr = fr->short_buffs[1];
+
+	if(fr->bo & 0x1) 
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_avx(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_avx(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_avx(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_avx(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s_avx_asm((short *)fr->decwins, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 128;
+
+	return clip;
+}
+#endif
+#endif
+
+#ifdef OPT_ARM
+#ifdef ACCURATE_ROUNDING
+/* Assembler routines. */
+int synth_1to1_arm_accurate_asm(real *window, real *b0, short *samples, int bo1);
+/* Hull for C mpg123 API */
+int synth_1to1_arm(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_arm_accurate_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 128;
+
+	return clip;
+}
+#else
+/* Assembler routines. */
+int synth_1to1_arm_asm(real *window, real *b0, short *samples, int bo1);
+/* Hull for C mpg123 API */
+int synth_1to1_arm(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_arm_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 128;
+
+	return clip;
+}
+#endif
+#endif
+
+#ifdef OPT_NEON
+#ifdef ACCURATE_ROUNDING
+/* This is defined in assembler. */
+int synth_1to1_neon_accurate_asm(real *window, real *b0, short *samples, int bo1);
+int synth_1to1_s_neon_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1);
+void dct64_real_neon(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_neon(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_neon(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_neon(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_neon_accurate_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 128;
+
+	return clip;
+}
+
+int synth_1to1_stereo_neon(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_neon(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_neon(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_neon(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_neon(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s_neon_accurate_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 128;
+
+	return clip;
+}
+#else
+/* This is defined in assembler. */
+int synth_1to1_neon_asm(short *window, short *b0, short *samples, int bo1);
+int synth_1to1_s_neon_asm(short *window, short *b0l, short *b0r, short *samples, int bo1);
+void dct64_neon(short *out0, short *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_neon(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);	
+	short *b0, **buf;
+	int clip; 
+	int bo1;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->short_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->short_buffs[1];
+	}
+
+	if(fr->bo & 0x1) 
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_neon(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_neon(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_neon_asm((short *)fr->decwins, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 128;
+
+	return clip;
+}
+
+int synth_1to1_stereo_neon(real *bandPtr_l,real *bandPtr_r, mpg123_handle *fr)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+	short *b0l, *b0r, **bufl, **bufr;
+	int clip; 
+	int bo1;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->short_buffs[0];
+	bufr = fr->short_buffs[1];
+
+	if(fr->bo & 0x1) 
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_neon(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_neon(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_neon(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_neon(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s_neon_asm((short *)fr->decwins, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 128;
+
+	return clip;
+}
+#endif
+#endif
+
+#ifdef OPT_NEON64
+#ifdef ACCURATE_ROUNDING
+/* This is defined in assembler. */
+int synth_1to1_neon64_accurate_asm(real *window, real *b0, short *samples, int bo1);
+int synth_1to1_s_neon64_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1);
+void dct64_real_neon64(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_neon64(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_neon64(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_neon64(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_neon64_accurate_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 128;
+
+	return clip;
+}
+
+int synth_1to1_stereo_neon64(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_neon64(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_neon64(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_neon64(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_neon64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s_neon64_accurate_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 128;
+
+	return clip;
+}
+#else
+/* This is defined in assembler. */
+int synth_1to1_neon64_asm(short *window, short *b0, short *samples, int bo1);
+int synth_1to1_s_neon64_asm(short *window, short *b0l, short *b0r, short *samples, int bo1);
+void dct64_neon64(short *out0, short *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_neon64(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);	
+	short *b0, **buf;
+	int clip; 
+	int bo1;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->short_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->short_buffs[1];
+	}
+
+	if(fr->bo & 0x1) 
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_neon64(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_neon64(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_neon64_asm((short *)fr->decwins, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 128;
+
+	return clip;
+}
+
+int synth_1to1_stereo_neon64(real *bandPtr_l,real *bandPtr_r, mpg123_handle *fr)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+	short *b0l, *b0r, **bufl, **bufr;
+	int clip; 
+	int bo1;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->short_buffs[0];
+	bufr = fr->short_buffs[1];
+
+	if(fr->bo & 0x1) 
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_neon64(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_neon64(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_neon64(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_neon64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s_neon64_asm((short *)fr->decwins, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 128;
+
+	return clip;
+}
+#endif
+#endif
+
+#ifndef NO_DOWNSAMPLE
+
+/*
+	Part 1b: 2to1 synth.
+	Only generic and i386 functions this time.
+*/
+#define BLOCK 0x20 /* One decoding block is 32 samples. */
+
+#define SYNTH_NAME synth_2to1
+#include "synth.h"
+#undef SYNTH_NAME
+
+#ifdef OPT_DITHER /* Used for generic_dither and as fallback for i586_dither. */
+#define SYNTH_NAME synth_2to1_dither
+#define USE_DITHER
+#include "synth.h"
+#undef USE_DITHER
+#undef SYNTH_NAME
+#endif
+
+#define SYNTH_NAME       fr->synths.plain[r_2to1][f_16]
+#define MONO_NAME        synth_2to1_mono
+#define MONO2STEREO_NAME synth_2to1_m2s
+#include "synth_mono.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#ifdef OPT_X86
+#define NO_AUTOINCREMENT
+#define SYNTH_NAME synth_2to1_i386
+#include "synth.h"
+#undef SYNTH_NAME
+/* i386 uses the normal mono functions. */
+#undef NO_AUTOINCREMENT
+#endif
+
+#undef BLOCK
+
+/*
+	Part 1c: 4to1 synth.
+	Same procedure as above...
+*/
+#define BLOCK 0x10 /* One decoding block is 16 samples. */
+
+#define SYNTH_NAME synth_4to1
+#include "synth.h"
+#undef SYNTH_NAME
+
+#ifdef OPT_DITHER
+#define SYNTH_NAME synth_4to1_dither
+#define USE_DITHER
+#include "synth.h"
+#undef USE_DITHER
+#undef SYNTH_NAME
+#endif
+
+#define SYNTH_NAME       fr->synths.plain[r_4to1][f_16] /* This is just for the _i386 one... gotta check if it is really useful... */
+#define MONO_NAME        synth_4to1_mono
+#define MONO2STEREO_NAME synth_4to1_m2s
+#include "synth_mono.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#ifdef OPT_X86
+#define NO_AUTOINCREMENT
+#define SYNTH_NAME synth_4to1_i386
+#include "synth.h"
+#undef SYNTH_NAME
+/* i386 uses the normal mono functions. */
+#undef NO_AUTOINCREMENT
+#endif
+
+#undef BLOCK
+
+#endif /* NO_DOWNSAMPLE */
+
+#ifndef NO_NTOM
+/*
+	Part 1d: ntom synth.
+	Same procedure as above... Just no extra play anymore, straight synth that uses the plain dct64.
+*/
+
+/* These are all in one header, there's no flexibility to gain. */
+#define SYNTH_NAME       synth_ntom
+#define MONO_NAME        synth_ntom_mono
+#define MONO2STEREO_NAME synth_ntom_m2s
+#include "synth_ntom.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#endif
+
+/* Done with short output. */
+#undef SAMPLE_T
+#undef WRITE_SAMPLE
Index: include/reactos/libs/libmpg123/synth_3dnow.S
===================================================================
--- include/reactos/libs/libmpg123/synth_3dnow.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_3dnow.S	(working copy)
@@ -0,0 +1,318 @@
+/*
+	decode_3dnow.s - 3DNow! optimized synth_1to1()
+
+	copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Syuuhei Kashiyama
+
+	This code based 'decode_3dnow.s' by Syuuhei Kashiyama
+	<squash@mb.kcom.ne.jp>,only two types of changes have been made:
+
+	- remove PREFETCH instruction for speedup
+	- change function name for support 3DNow! automatic detect
+	- femms moved to before 'call dct64_3dnow'
+
+	You can find Kashiyama's original 3dnow! support patch
+	(for mpg123-0.59o) at
+	http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
+
+	by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
+                  	<kim@comtec.co.jp>               - after  1.Apr.1999
+
+
+
+	Replacement of synth_1to1() with AMD's 3DNow! SIMD operations support
+
+	Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
+
+	The author of this program disclaim whole expressed or implied
+	warranties with regard to this program, and in no event shall the
+	author of this program liable to whatever resulted from the use of
+	this program. Use it at your own risk.
+*/
+
+#include "mangle.h"
+
+#ifdef ACCURATE_ROUNDING
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN8
+max_s16:
+	.long   1191181824 /* 32767.0 */
+	.long   1191181824
+min_s16:
+	.long   -956301312 /* -32768.0 */
+	.long   -956301312
+ftoi_magic:
+	.long	1262485504 /* 2^23 + 2^22 */
+	.long	1262485504
+#endif
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_3dnow_asm)
+/* int synth_1to1_3dnow_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); */
+ASM_NAME(synth_1to1_3dnow_asm):
+	subl $24,%esp
+	pushl %ebp
+	pushl %edi
+	xorl %ebp,%ebp
+	pushl %esi
+	pushl %ebx
+/* stack old: 0=ebx 4=esi 8=edi 12=ebp 16,20,24,28,32,36=local 40=back 44=bandptr 48=channel 52=out 56=pnt */
+/* stack new: 0=ebx 4=esi 8=edi 12=ebp 16,20,24,28,32,36=local 40=back 44=bandptr 48=channel 52=out 56=buffs 60=bo 64=decwin */
+#define OUT     52(%esp)
+#define CHANNEL 48(%esp)
+#define BANDPTR 44(%esp)
+#define BUFFS   56(%esp)
+#define BO      60(%esp)
+#define DECWIN  64(%esp)
+#define LOCAL0  16(%esp)
+#define LOCAL1  20(%esp)
+#define LOCAL5  36(%esp)
+	movl OUT,%esi
+	movl %esi,LOCAL0 /* save buffer start (samples pointer) to another local var */
+	movl CHANNEL,%ebx
+	movl BO,%esi     /* bo address */
+	movl (%esi),%edx /* bo value */
+
+	femms
+	testl %ebx,%ebx
+	jne .L26
+/* if(!channel) */
+	decl %edx   /* --bo */
+	andl $15,%edx
+	movl %edx,(%esi) /* save bo */
+	movl BUFFS,%ecx
+	jmp .L27
+.L26: /* if(channel) */
+	addl $2,LOCAL0   /* samples++ */
+	movl BUFFS,%ecx
+	addl $2176,%ecx
+.L27:
+/* edx (and it's lower end) still holds bo value */
+	testb $1,%dl  /* bo & 0x1 */
+	je .L28
+	movl %edx,LOCAL5
+	movl %ecx,%ebx
+	movl BANDPTR,%esi
+	movl %edx,%edi
+	pushl %esi
+	sall $2,%edi
+	movl %ebx,%eax
+	movl %edi,24(%esp) /* LOCAL1, actually */
+	addl %edi,%eax
+	pushl %eax
+	movl %edx,%eax
+	incl %eax
+	andl $15,%eax
+	leal 1088(,%eax,4),%eax
+	addl %ebx,%eax
+	pushl %eax
+	call ASM_NAME(dct64_3dnow)
+	addl $12,%esp
+	jmp .L29
+.L28:
+	leal 1(%edx),%esi
+	movl BANDPTR,%edi
+	movl %esi,LOCAL5
+	leal 1092(%ecx,%edx,4),%eax
+	pushl %edi
+	leal 1088(%ecx),%ebx
+	pushl %eax
+	sall $2,%esi
+	leal (%ecx,%edx,4),%eax
+	pushl %eax
+	call ASM_NAME(dct64_3dnow)
+	addl $12,%esp
+	movl %esi,LOCAL1
+.L29:
+	movl DECWIN,%edx
+	addl $64,%edx
+	movl $16,%ecx
+	subl LOCAL1,%edx
+	movl LOCAL0,%edi
+
+	pcmpeqb %mm7,%mm7
+	pslld $31,%mm7
+	movq (%edx),%mm0
+	movq (%ebx),%mm1
+	ALIGN32
+.L33:
+	movq 8(%edx),%mm3
+	pfmul %mm1,%mm0
+	movq 8(%ebx),%mm4
+	movq 16(%edx),%mm5
+	pfmul %mm4,%mm3
+	movq 16(%ebx),%mm6
+	pfadd %mm3,%mm0
+	movq 24(%edx),%mm1
+	pfmul %mm6,%mm5
+	movq 24(%ebx),%mm2
+	pfadd %mm5,%mm0
+	movq 32(%edx),%mm3
+	pfmul %mm2,%mm1
+	movq 32(%ebx),%mm4
+	pfadd %mm1,%mm0
+	movq 40(%edx),%mm5
+	pfmul %mm4,%mm3
+	movq 40(%ebx),%mm6
+	pfadd %mm3,%mm0
+	movq 48(%edx),%mm1
+	pfmul %mm6,%mm5
+	movq 48(%ebx),%mm2
+	pfadd %mm0,%mm5
+	movq 56(%edx),%mm3
+	pfmul %mm1,%mm2
+	movq 56(%ebx),%mm4
+	pfadd %mm5,%mm2
+	addl $64,%ebx
+	subl $-128,%edx
+	movq (%edx),%mm0
+	pfmul %mm4,%mm3
+	movq (%ebx),%mm1
+	pfadd %mm3,%mm2
+	movq %mm2,%mm3
+	psrlq $32,%mm3
+	pfsub %mm3,%mm2
+	incl %ebp
+#ifdef ACCURATE_ROUNDING
+	pfmin (max_s16),%mm2
+	pfmax (min_s16),%mm2
+	pfadd (ftoi_magic),%mm2
+#else
+	pf2id %mm2,%mm2
+	packssdw %mm2,%mm2
+#endif
+	movd %mm2,%eax
+	movw %ax,0(%edi)
+	addl $4,%edi
+	decl %ecx
+	jnz .L33
+
+	movd (%ebx),%mm0
+	movd (%edx),%mm1
+	punpckldq 8(%ebx),%mm0
+	punpckldq 8(%edx),%mm1
+	movd 16(%ebx),%mm3
+	movd 16(%edx),%mm4
+	pfmul %mm1,%mm0
+	punpckldq 24(%ebx),%mm3
+	punpckldq 24(%edx),%mm4
+	movd 32(%ebx),%mm5
+	movd 32(%edx),%mm6
+	pfmul %mm4,%mm3
+	punpckldq 40(%ebx),%mm5
+	punpckldq 40(%edx),%mm6
+	pfadd %mm3,%mm0
+	movd 48(%ebx),%mm1
+	movd 48(%edx),%mm2
+	pfmul %mm6,%mm5
+	punpckldq 56(%ebx),%mm1
+	punpckldq 56(%edx),%mm2
+	pfadd %mm5,%mm0
+	pfmul %mm2,%mm1
+	pfadd %mm1,%mm0
+	pfacc %mm1,%mm0
+#ifdef ACCURATE_ROUNDING
+	pfmin (max_s16),%mm0
+	pfmax (min_s16),%mm0
+	pfadd (ftoi_magic),%mm0
+#else
+	pf2id %mm0,%mm0
+	packssdw %mm0,%mm0
+#endif
+	movd %mm0,%eax
+	movw %ax,0(%edi)
+	incl %ebp
+	movl LOCAL5,%esi
+	addl $-64,%ebx
+	movl $15,%ebp
+	addl $4,%edi
+	leal -128(%edx,%esi,8),%edx
+
+	movl $15,%ecx
+	movd (%ebx),%mm0
+	movd -4(%edx),%mm1
+	punpckldq 4(%ebx),%mm0
+	punpckldq -8(%edx),%mm1
+	ALIGN32
+.L46:
+	movd 8(%ebx),%mm3
+	movd -12(%edx),%mm4
+	pfmul %mm1,%mm0
+	punpckldq 12(%ebx),%mm3
+	punpckldq -16(%edx),%mm4
+	movd 16(%ebx),%mm5
+	movd -20(%edx),%mm6
+	pfmul %mm4,%mm3
+	punpckldq 20(%ebx),%mm5
+	punpckldq -24(%edx),%mm6
+	pfadd %mm3,%mm0
+	movd 24(%ebx),%mm1
+	movd -28(%edx),%mm2
+	pfmul %mm6,%mm5
+	punpckldq 28(%ebx),%mm1
+	punpckldq -32(%edx),%mm2
+	pfadd %mm5,%mm0
+	movd 32(%ebx),%mm3
+	movd -36(%edx),%mm4
+	pfmul %mm2,%mm1
+	punpckldq 36(%ebx),%mm3
+	punpckldq -40(%edx),%mm4
+	pfadd %mm1,%mm0
+	movd 40(%ebx),%mm5
+	movd -44(%edx),%mm6
+	pfmul %mm4,%mm3
+	punpckldq 44(%ebx),%mm5
+	punpckldq -48(%edx),%mm6
+	pfadd %mm3,%mm0
+	movd 48(%ebx),%mm1
+	movd -52(%edx),%mm2
+	pfmul %mm6,%mm5
+	punpckldq 52(%ebx),%mm1
+	punpckldq -56(%edx),%mm2
+	pfadd %mm0,%mm5
+	movd 56(%ebx),%mm3
+	movd -60(%edx),%mm4
+	pfmul %mm2,%mm1
+	punpckldq 60(%ebx),%mm3
+	punpckldq (%edx),%mm4
+	pfadd %mm1,%mm5
+	addl $-128,%edx
+	addl $-64,%ebx
+	movd (%ebx),%mm0
+	movd -4(%edx),%mm1
+	pfmul %mm4,%mm3
+	punpckldq 4(%ebx),%mm0
+	punpckldq -8(%edx),%mm1
+	pfadd %mm5,%mm3
+	pfacc %mm3,%mm3
+	incl %ebp
+	pxor %mm7,%mm3
+#ifdef ACCURATE_ROUNDING
+	pfmin (max_s16),%mm3
+	pfmax (min_s16),%mm3
+	pfadd (ftoi_magic),%mm3
+#else
+	pf2id %mm3,%mm3
+	packssdw %mm3,%mm3
+#endif
+	movd %mm3,%eax
+	movw %ax,(%edi)
+	addl $4,%edi
+	decl %ecx
+	jnz .L46
+
+	femms
+	movl %ebp,%eax
+	popl %ebx
+	popl %esi
+	popl %edi
+	popl %ebp
+	addl $24,%esp
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_3dnowext.S
===================================================================
--- include/reactos/libs/libmpg123/synth_3dnowext.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_3dnowext.S	(working copy)
@@ -0,0 +1,6 @@
+#include "mangle.h"
+#define MPL_DCT64 ASM_NAME(dct64_3dnowext)
+#define SYNTH_NAME ASM_NAME(synth_1to1_3dnowext_asm)
+#include "synth_sse3d.h"
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_8bit.c
===================================================================
--- include/reactos/libs/libmpg123/synth_8bit.c	(revision 0)
+++ include/reactos/libs/libmpg123/synth_8bit.c	(working copy)
@@ -0,0 +1,142 @@
+/*
+	synth_8bit.c: The functions for synthesizing 8bit samples, at the end of decoding.
+
+	copyright 1995-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp, heavily dissected and rearranged by Thomas Orgis
+*/
+
+#include "mpg123lib_intern.h"
+#include "sample.h"
+#include "debug.h"
+
+/* 
+	Part 2: All synth functions that produce 8bit output.
+	What we need is just a special WRITE_SAMPLE. For the generic and i386 functions, that is.
+	For the rather optimized synth_1to1, we will need the postprocessing 8bit converters from synth_8bit.h .
+*/
+
+#define SAMPLE_T unsigned char
+#define WRITE_SAMPLE(samples,sum,clip) WRITE_8BIT_SAMPLE(samples,sum,clip)
+
+/* Part 2a: All straight 1to1 decoding functions */
+#define BLOCK 0x40 /* One decoding block is 64 samples. */
+
+#define SYNTH_NAME synth_1to1_8bit
+#include "synth.h"
+#undef SYNTH_NAME
+
+/* Mono-related synths; they wrap over _some_ synth_1to1_8bit (could be generic, could be i386). */
+#define SYNTH_NAME       fr->synths.plain[r_1to1][f_8]
+#define MONO_NAME        synth_1to1_8bit_mono
+#define MONO2STEREO_NAME synth_1to1_8bit_m2s
+#include "synth_mono.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#ifdef OPT_X86
+#define NO_AUTOINCREMENT
+#define SYNTH_NAME synth_1to1_8bit_i386
+#include "synth.h"
+#undef SYNTH_NAME
+/* i386 uses the normal mono functions. */
+#undef NO_AUTOINCREMENT
+#endif
+
+/* But now, we need functions that take the 16bit output of optimized synth_1to1 and convert it.
+   I suppose that is still faster than dropping the optimization altogether! */
+
+#define BASE_SYNTH_NAME  fr->synths.plain[r_1to1][f_16]
+#define SYNTH_NAME       synth_1to1_8bit_wrap
+#define MONO_NAME        synth_1to1_8bit_wrap_mono
+#define MONO2STEREO_NAME synth_1to1_8bit_wrap_m2s
+#include "synth_8bit.h"
+#undef BASE_SYNTH_NAME
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#undef BLOCK
+
+#ifndef NO_DOWNSAMPLE
+
+/*
+	Part 2b: 2to1 synth. Only generic and i386.
+*/
+#define BLOCK 0x20 /* One decoding block is 32 samples. */
+
+#define SYNTH_NAME synth_2to1_8bit
+#include "synth.h"
+#undef SYNTH_NAME
+
+/* Mono-related synths; they wrap over _some_ synth_2to1_8bit (could be generic, could be i386). */
+#define SYNTH_NAME       fr->synths.plain[r_2to1][f_8]
+#define MONO_NAME        synth_2to1_8bit_mono
+#define MONO2STEREO_NAME synth_2to1_8bit_m2s
+#include "synth_mono.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#ifdef OPT_X86
+#define NO_AUTOINCREMENT
+#define SYNTH_NAME synth_2to1_8bit_i386
+#include "synth.h"
+#undef SYNTH_NAME
+/* i386 uses the normal mono functions. */
+#undef NO_AUTOINCREMENT
+#endif
+
+#undef BLOCK
+
+/*
+	Part 2c: 4to1 synth. Only generic and i386.
+*/
+#define BLOCK 0x10 /* One decoding block is 16 samples. */
+
+#define SYNTH_NAME synth_4to1_8bit
+#include "synth.h"
+#undef SYNTH_NAME
+
+/* Mono-related synths; they wrap over _some_ synth_4to1_8bit (could be generic, could be i386). */
+#define SYNTH_NAME       fr->synths.plain[r_4to1][f_8]
+#define MONO_NAME        synth_4to1_8bit_mono
+#define MONO2STEREO_NAME synth_4to1_8bit_m2s
+#include "synth_mono.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#ifdef OPT_X86
+#define NO_AUTOINCREMENT
+#define SYNTH_NAME synth_4to1_8bit_i386
+#include "synth.h"
+#undef SYNTH_NAME
+/* i386 uses the normal mono functions. */
+#undef NO_AUTOINCREMENT
+#endif
+
+#undef BLOCK
+
+#endif /* NO_DOWNSAMPLE */
+
+#ifndef NO_NTOM
+/*
+	Part 2d: ntom synth.
+	Same procedure as above... Just no extra play anymore, straight synth that may use an optimized dct64.
+*/
+
+/* These are all in one header, there's no flexibility to gain. */
+#define SYNTH_NAME       synth_ntom_8bit
+#define MONO_NAME        synth_ntom_8bit_mono
+#define MONO2STEREO_NAME synth_ntom_8bit_m2s
+#include "synth_ntom.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#endif
+
+#undef SAMPLE_T
+#undef WRITE_SAMPLE
Index: include/reactos/libs/libmpg123/synth_altivec.c
===================================================================
--- include/reactos/libs/libmpg123/synth_altivec.c	(revision 0)
+++ include/reactos/libs/libmpg123/synth_altivec.c	(working copy)
@@ -0,0 +1,1057 @@
+/*
+	decode.c: decoding samples...
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp
+	altivec optimization by tmkk
+*/
+
+#include "mpg123lib_intern.h"
+
+#ifndef __APPLE__
+#include <altivec.h>
+#endif
+
+/* A macro for normal synth functions */
+#define SYNTH_ALTIVEC(B0STEP) \
+	v1 = vec_ld(0,window); \
+	v2 = vec_ld(16,window); \
+	v3 = vec_ld(32,window); \
+	v4 = vec_ld(48,window); \
+	v5 = vec_ld(64,window); \
+	v1 = vec_perm(v1,v2,vperm1); \
+	v6 = vec_ld(0,b0); \
+	v2 = vec_perm(v2,v3,vperm1); \
+	v7 = vec_ld(16,b0); \
+	v3 = vec_perm(v3,v4,vperm1); \
+	v8 = vec_ld(32,b0); \
+	v4 = vec_perm(v4,v5,vperm1); \
+	v9 = vec_ld(48,b0); \
+	 \
+	vsum = vec_madd(v1,v6,vzero); \
+	vsum = vec_madd(v2,v7,vsum); \
+	vsum = vec_madd(v3,v8,vsum); \
+	vsum = vec_madd(v4,v9,vsum); \
+	 \
+	window += 32; \
+	b0 += B0STEP; \
+	 \
+	v1 = vec_ld(0,window); \
+	v2 = vec_ld(16,window); \
+	v3 = vec_ld(32,window); \
+	v4 = vec_ld(48,window); \
+	v5 = vec_ld(64,window); \
+	v1 = vec_perm(v1,v2,vperm1); \
+	v6 = vec_ld(0,b0); \
+	v2 = vec_perm(v2,v3,vperm1); \
+	v7 = vec_ld(16,b0); \
+	v3 = vec_perm(v3,v4,vperm1); \
+	v8 = vec_ld(32,b0); \
+	v4 = vec_perm(v4,v5,vperm1); \
+	v9 = vec_ld(48,b0); \
+	 \
+	vsum2 = vec_madd(v1,v6,vzero); \
+	vsum2 = vec_madd(v2,v7,vsum2); \
+	vsum2 = vec_madd(v3,v8,vsum2); \
+	vsum2 = vec_madd(v4,v9,vsum2); \
+	 \
+	window += 32; \
+	b0 += B0STEP; \
+	 \
+	v1 = vec_ld(0,window); \
+	v2 = vec_ld(16,window); \
+	v3 = vec_ld(32,window); \
+	v4 = vec_ld(48,window); \
+	v5 = vec_ld(64,window); \
+	v1 = vec_perm(v1,v2,vperm1); \
+	v6 = vec_ld(0,b0); \
+	v2 = vec_perm(v2,v3,vperm1); \
+	v7 = vec_ld(16,b0); \
+	v3 = vec_perm(v3,v4,vperm1); \
+	v8 = vec_ld(32,b0); \
+	v4 = vec_perm(v4,v5,vperm1); \
+	v9 = vec_ld(48,b0); \
+	 \
+	vsum3 = vec_madd(v1,v6,vzero); \
+	vsum3 = vec_madd(v2,v7,vsum3); \
+	vsum3 = vec_madd(v3,v8,vsum3); \
+	vsum3 = vec_madd(v4,v9,vsum3); \
+	 \
+	window += 32; \
+	b0 += B0STEP; \
+	 \
+	v1 = vec_ld(0,window); \
+	v2 = vec_ld(16,window); \
+	v3 = vec_ld(32,window); \
+	v4 = vec_ld(48,window); \
+	v5 = vec_ld(64,window); \
+	v1 = vec_perm(v1,v2,vperm1); \
+	v6 = vec_ld(0,b0); \
+	v2 = vec_perm(v2,v3,vperm1); \
+	v7 = vec_ld(16,b0); \
+	v3 = vec_perm(v3,v4,vperm1); \
+	v8 = vec_ld(32,b0); \
+	v4 = vec_perm(v4,v5,vperm1); \
+	v9 = vec_ld(48,b0); \
+	 \
+	vsum4 = vec_madd(v1,v6,vzero); \
+	vsum4 = vec_madd(v2,v7,vsum4); \
+	vsum4 = vec_madd(v3,v8,vsum4); \
+	vsum4 = vec_madd(v4,v9,vsum4); \
+	 \
+	window += 32; \
+	b0 += B0STEP; \
+	 \
+	v1 = vec_mergeh(vsum,vsum3); \
+	v2 = vec_mergeh(vsum2,vsum4); \
+	v3 = vec_mergel(vsum,vsum3); \
+	v4 = vec_mergel(vsum2,vsum4); \
+	v5 = vec_mergeh(v1,v2); \
+	v6 = vec_mergel(v1,v2); \
+	v7 = vec_mergeh(v3,v4); \
+	v8 = vec_mergel(v3,v4);
+
+/* A macro for stereo synth functions */
+#define SYNTH_STEREO_ALTIVEC(B0STEP) \
+	v1 = vec_ld(0,window); \
+	v2 = vec_ld(16,window); \
+	v3 = vec_ld(32,window); \
+	v4 = vec_ld(48,window); \
+	v5 = vec_ld(64,window); \
+	v1 = vec_perm(v1,v2,vperm1); \
+	v6 = vec_ld(0,b0l); \
+	v10 = vec_ld(0,b0r); \
+	v2 = vec_perm(v2,v3,vperm1); \
+	v7 = vec_ld(16,b0l); \
+	v11 = vec_ld(16,b0r); \
+	v3 = vec_perm(v3,v4,vperm1); \
+	v8 = vec_ld(32,b0l); \
+	v12 = vec_ld(32,b0r); \
+	v4 = vec_perm(v4,v5,vperm1); \
+	v9 = vec_ld(48,b0l); \
+	v13 = vec_ld(48,b0r); \
+	 \
+	vsum = vec_madd(v1,v6,vzero); \
+	vsum5 = vec_madd(v1,v10,vzero); \
+	vsum = vec_madd(v2,v7,vsum); \
+	vsum5 = vec_madd(v2,v11,vsum5); \
+	vsum = vec_madd(v3,v8,vsum); \
+	vsum5 = vec_madd(v3,v12,vsum5); \
+	vsum = vec_madd(v4,v9,vsum); \
+	vsum5 = vec_madd(v4,v13,vsum5); \
+	 \
+	window += 32; \
+	b0l += B0STEP; \
+	b0r += B0STEP; \
+	 \
+	v1 = vec_ld(0,window); \
+	v2 = vec_ld(16,window); \
+	v3 = vec_ld(32,window); \
+	v4 = vec_ld(48,window); \
+	v5 = vec_ld(64,window); \
+	v1 = vec_perm(v1,v2,vperm1); \
+	v6 = vec_ld(0,b0l); \
+	v10 = vec_ld(0,b0r); \
+	v2 = vec_perm(v2,v3,vperm1); \
+	v7 = vec_ld(16,b0l); \
+	v11 = vec_ld(16,b0r); \
+	v3 = vec_perm(v3,v4,vperm1); \
+	v8 = vec_ld(32,b0l); \
+	v12 = vec_ld(32,b0r); \
+	v4 = vec_perm(v4,v5,vperm1); \
+	v9 = vec_ld(48,b0l); \
+	v13 = vec_ld(48,b0r); \
+	 \
+	vsum2 = vec_madd(v1,v6,vzero); \
+	vsum6 = vec_madd(v1,v10,vzero); \
+	vsum2 = vec_madd(v2,v7,vsum2); \
+	vsum6 = vec_madd(v2,v11,vsum6); \
+	vsum2 = vec_madd(v3,v8,vsum2); \
+	vsum6 = vec_madd(v3,v12,vsum6); \
+	vsum2 = vec_madd(v4,v9,vsum2); \
+	vsum6 = vec_madd(v4,v13,vsum6); \
+	 \
+	window += 32; \
+	b0l += B0STEP; \
+	b0r += B0STEP; \
+	 \
+	v1 = vec_ld(0,window); \
+	v2 = vec_ld(16,window); \
+	v3 = vec_ld(32,window); \
+	v4 = vec_ld(48,window); \
+	v5 = vec_ld(64,window); \
+	v1 = vec_perm(v1,v2,vperm1); \
+	v6 = vec_ld(0,b0l); \
+	v10 = vec_ld(0,b0r); \
+	v2 = vec_perm(v2,v3,vperm1); \
+	v7 = vec_ld(16,b0l); \
+	v11 = vec_ld(16,b0r); \
+	v3 = vec_perm(v3,v4,vperm1); \
+	v8 = vec_ld(32,b0l); \
+	v12 = vec_ld(32,b0r); \
+	v4 = vec_perm(v4,v5,vperm1); \
+	v9 = vec_ld(48,b0l); \
+	v13 = vec_ld(48,b0r); \
+	 \
+	vsum3 = vec_madd(v1,v6,vzero); \
+	vsum7 = vec_madd(v1,v10,vzero); \
+	vsum3 = vec_madd(v2,v7,vsum3); \
+	vsum7 = vec_madd(v2,v11,vsum7); \
+	vsum3 = vec_madd(v3,v8,vsum3); \
+	vsum7 = vec_madd(v3,v12,vsum7); \
+	vsum3 = vec_madd(v4,v9,vsum3); \
+	vsum7 = vec_madd(v4,v13,vsum7); \
+	 \
+	window += 32; \
+	b0l += B0STEP; \
+	b0r += B0STEP; \
+	 \
+	v1 = vec_ld(0,window); \
+	v2 = vec_ld(16,window); \
+	v3 = vec_ld(32,window); \
+	v4 = vec_ld(48,window); \
+	v5 = vec_ld(64,window); \
+	v1 = vec_perm(v1,v2,vperm1); \
+	v6 = vec_ld(0,b0l); \
+	v10 = vec_ld(0,b0r); \
+	v2 = vec_perm(v2,v3,vperm1); \
+	v7 = vec_ld(16,b0l); \
+	v11 = vec_ld(16,b0r); \
+	v3 = vec_perm(v3,v4,vperm1); \
+	v8 = vec_ld(32,b0l); \
+	v12 = vec_ld(32,b0r); \
+	v4 = vec_perm(v4,v5,vperm1); \
+	v9 = vec_ld(48,b0l); \
+	v13 = vec_ld(48,b0r); \
+	 \
+	vsum4 = vec_madd(v1,v6,vzero); \
+	vsum8 = vec_madd(v1,v10,vzero); \
+	vsum4 = vec_madd(v2,v7,vsum4); \
+	vsum8 = vec_madd(v2,v11,vsum8); \
+	vsum4 = vec_madd(v3,v8,vsum4); \
+	vsum8 = vec_madd(v3,v12,vsum8); \
+	vsum4 = vec_madd(v4,v9,vsum4); \
+	vsum8 = vec_madd(v4,v13,vsum8); \
+	 \
+	window += 32; \
+	b0l += B0STEP; \
+	b0r += B0STEP; \
+	 \
+	v1 = vec_mergeh(vsum,vsum3); \
+	v5 = vec_mergeh(vsum5,vsum7); \
+	v2 = vec_mergeh(vsum2,vsum4); \
+	v6 = vec_mergeh(vsum6,vsum8); \
+	v3 = vec_mergel(vsum,vsum3); \
+	v7 = vec_mergel(vsum5,vsum7); \
+	v4 = vec_mergel(vsum2,vsum4); \
+	v8 = vec_mergel(vsum6,vsum8); \
+	vsum = vec_mergeh(v1,v2); \
+	vsum5 = vec_mergeh(v5,v6); \
+	vsum2 = vec_mergel(v1,v2); \
+	vsum6 = vec_mergel(v5,v6); \
+	vsum3 = vec_mergeh(v3,v4); \
+	vsum7 = vec_mergeh(v7,v8); \
+	vsum4 = vec_mergel(v3,v4); \
+	vsum8 = vec_mergel(v7,v8);
+
+int synth_1to1_altivec(real *bandPtr,int channel,mpg123_handle *fr, int final)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+	
+	real *b0, **buf;
+	int clip; 
+	int bo1;
+	
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+	
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+	
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+	
+	
+	{
+		register int j;
+		real *window = fr->decwin + 16 - bo1;
+		
+		ALIGNED(16) int clip_tmp[4];
+		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9;
+		vector unsigned char vperm1,vperm2,vperm3,vperm4;
+		vector float vsum,vsum2,vsum3,vsum4,vmin,vmax,vzero;
+		vector signed int vclip;
+		vector signed short vsample1,vsample2;
+		vector unsigned int vshift;
+		vclip = vec_xor(vclip,vclip);
+		vzero = vec_xor(vzero,vzero);
+		vshift = vec_splat_u32(-1); /* 31 */
+#ifdef __APPLE__
+		vmax = (vector float)(32767.0f);
+		vmin = (vector float)(-32768.0f);
+		vperm4 = (vector unsigned char)(0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31);
+#else
+		vmax = (vector float){32767.0f,32767.0f,32767.0f,32767.0f};
+		vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
+		vperm4 = (vector unsigned char){0,1,18,19,2,3,22,23,4,5,26,27,6,7,30,31};
+#endif
+		
+		vperm1 = vec_lvsl(0,window);
+		vperm2 = vec_lvsl(0,samples);
+		vperm3 = vec_lvsr(0,samples);
+		for (j=4;j;j--)
+		{
+			SYNTH_ALTIVEC(16);
+			
+			vsum = vec_sub(v5,v6);
+			v9 = vec_sub(v7,v8);
+			vsum = vec_add(vsum,v9);
+			
+			v3 = vec_round(vsum);
+			v3 = (vector float)vec_cts(v3,0);
+			v1 = (vector float)vec_cmpgt(vsum,vmax);
+			v2 = (vector float)vec_cmplt(vsum,vmin);
+			vsample1 = vec_ld(0,samples);
+			vsample2 = vec_ld(15,samples);
+			v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3);
+			v4 = (vector float)vec_perm(vsample1,vsample2,vperm2);
+			v5 = (vector float)vec_perm(v3,v4,vperm4);
+			v6 = (vector float)vec_perm(vsample2,vsample1,vperm2);
+			v7 = (vector float)vec_perm(v5,v6,vperm3);
+			v8 = (vector float)vec_perm(v6,v5,vperm3);
+			vec_st((vector signed short)v7,15,samples);
+			vec_st((vector signed short)v8,0,samples);
+			samples += 8;
+			
+			v1 = (vector float)vec_sr((vector unsigned int)v1, vshift);
+			v2 = (vector float)vec_sr((vector unsigned int)v2, vshift);
+			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
+			vclip = vec_sums((vector signed int)v1,vclip);
+		}
+		
+		for (j=4;j;j--)
+		{
+			SYNTH_ALTIVEC(-16);
+			
+			vsum = vec_add(v5,v6);
+			v9 = vec_add(v7,v8);
+			vsum = vec_add(vsum,v9);
+			
+			v3 = vec_round(vsum);
+			v3 = (vector float)vec_cts(v3,0);
+			v1 = (vector float)vec_cmpgt(vsum,vmax);
+			v2 = (vector float)vec_cmplt(vsum,vmin);
+			vsample1 = vec_ld(0,samples);
+			vsample2 = vec_ld(15,samples);
+			v3 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v3);
+			v4 = (vector float)vec_perm(vsample1,vsample2,vperm2);
+			v5 = (vector float)vec_perm(v3,v4,vperm4);
+			v6 = (vector float)vec_perm(vsample2,vsample1,vperm2);
+			v7 = (vector float)vec_perm(v5,v6,vperm3);
+			v8 = (vector float)vec_perm(v6,v5,vperm3);
+			vec_st((vector signed short)v7,15,samples);
+			vec_st((vector signed short)v8,0,samples);
+			samples += 8;
+			
+			v1 = (vector float)vec_sr((vector unsigned int)v1, vshift);
+			v2 = (vector float)vec_sr((vector unsigned int)v2, vshift);
+			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
+			vclip = vec_sums((vector signed int)v1,vclip);
+		}
+
+		vec_st(vclip,0,clip_tmp);
+		clip = clip_tmp[3];
+	}
+	if(final) fr->buffer.fill += 128;
+	
+	return clip;
+}
+
+int synth_1to1_stereo_altivec(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+	
+	real *b0l, *b0r, **bufl, **bufr;
+	int clip; 
+	int bo1;
+	
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+	
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+	
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_altivec(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_altivec(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_altivec(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_altivec(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+	
+	
+	{
+		register int j;
+		real *window = fr->decwin + 16 - bo1;
+		
+		ALIGNED(16) int clip_tmp[4];
+		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13;
+		vector unsigned char vperm1,vperm2;
+		vector float vsum,vsum2,vsum3,vsum4,vsum5,vsum6,vsum7,vsum8,vmin,vmax,vzero;
+		vector signed int vclip;
+		vector unsigned int vshift;
+		vector signed short vprev;
+		vclip = vec_xor(vclip,vclip);
+		vzero = vec_xor(vzero,vzero);
+		vshift = vec_splat_u32(-1); /* 31 */
+#ifdef __APPLE__
+		vmax = (vector float)(32767.0f);
+		vmin = (vector float)(-32768.0f);
+#else
+		vmax = (vector float){32767.0f,32767.0f,32767.0f,32767.0f};
+		vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
+#endif
+		
+		vperm1 = vec_lvsl(0,window);
+		vperm2 = vec_lvsr(0,samples);
+		vprev = vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
+		for (j=4;j;j--)
+		{
+			SYNTH_STEREO_ALTIVEC(16);
+			
+			vsum = vec_sub(vsum,vsum2);
+			vsum2 = vec_sub(vsum5,vsum6);
+			vsum3 = vec_sub(vsum3,vsum4);
+			vsum4 = vec_sub(vsum7,vsum8);
+			vsum = vec_add(vsum,vsum3);
+			vsum2 = vec_add(vsum2,vsum4);
+			
+			v1 = vec_round(vsum);
+			v2 = vec_round(vsum2);
+			v1 = (vector float)vec_cts(v1,0);
+			v2 = (vector float)vec_cts(v2,0);
+			v3 = vec_mergeh(v1, v2);
+			v4 = vec_mergel(v1, v2);
+			v5 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v4);
+			v6 = (vector float)vec_perm(vprev,(vector signed short)v5,vperm2);
+			vprev = (vector signed short)v5;
+			v1 = (vector float)vec_cmpgt(vsum,vmax);
+			v2 = (vector float)vec_cmplt(vsum,vmin);
+			v3 = (vector float)vec_cmpgt(vsum2,vmax);
+			v4 = (vector float)vec_cmplt(vsum2,vmin);
+			vec_st((vector signed short)v6,0,samples);
+			samples += 8;
+			
+			v1 = (vector float)vec_sr((vector unsigned int)v1, vshift);
+			v2 = (vector float)vec_sr((vector unsigned int)v2, vshift);
+			v3 = (vector float)vec_sr((vector unsigned int)v3, vshift);
+			v4 = (vector float)vec_sr((vector unsigned int)v4, vshift);
+			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
+			v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4);
+			vclip = vec_sums((vector signed int)v1,vclip);
+			vclip = vec_sums((vector signed int)v2,vclip);
+		}
+		
+		for (j=4;j;j--)
+		{
+			SYNTH_STEREO_ALTIVEC(-16);
+			
+			vsum = vec_add(vsum,vsum2);
+			vsum2 = vec_add(vsum5,vsum6);
+			vsum3 = vec_add(vsum3,vsum4);
+			vsum4 = vec_add(vsum7,vsum8);
+			vsum = vec_add(vsum,vsum3);
+			vsum2 = vec_add(vsum2,vsum4);
+			
+			v1 = vec_round(vsum);
+			v2 = vec_round(vsum2);
+			v1 = (vector float)vec_cts(v1,0);
+			v2 = (vector float)vec_cts(v2,0);
+			v3 = vec_mergeh(v1, v2);
+			v4 = vec_mergel(v1, v2);
+			v5 = (vector float)vec_packs((vector signed int)v3,(vector signed int)v4);
+			v6 = (vector float)vec_perm(vprev,(vector signed short)v5,vperm2);
+			vprev = (vector signed short)v5;
+			v1 = (vector float)vec_cmpgt(vsum,vmax);
+			v2 = (vector float)vec_cmplt(vsum,vmin);
+			v3 = (vector float)vec_cmpgt(vsum2,vmax);
+			v4 = (vector float)vec_cmplt(vsum2,vmin);
+			vec_st((vector signed short)v6,0,samples);
+			samples += 8;
+			
+			v1 = (vector float)vec_sr((vector unsigned int)v1, vshift);
+			v2 = (vector float)vec_sr((vector unsigned int)v2, vshift);
+			v3 = (vector float)vec_sr((vector unsigned int)v3, vshift);
+			v4 = (vector float)vec_sr((vector unsigned int)v4, vshift);
+			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
+			v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4);
+			vclip = vec_sums((vector signed int)v1,vclip);
+			vclip = vec_sums((vector signed int)v2,vclip);
+		}
+		
+		if((size_t)samples & 0xf)
+		{
+			v1 = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
+			v2 = (vector float)vec_perm(vprev,(vector signed short)v1,vperm2);
+			vec_st((vector signed short)v2,0,samples);
+		}
+
+		vec_st(vclip,0,clip_tmp);
+		clip = clip_tmp[3];
+	}
+	fr->buffer.fill += 128;
+	
+	return clip;
+}
+
+int synth_1to1_real_altivec(real *bandPtr,int channel,mpg123_handle *fr, int final)
+{
+	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
+	
+	real *b0, **buf;
+	int bo1;
+	
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+	
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+	
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+	
+	
+	{
+		register int j;
+		real *window = fr->decwin + 16 - bo1;
+		
+		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9;
+		vector unsigned char vperm1,vperm2,vperm3,vperm4, vperm5;
+		vector float vsum,vsum2,vsum3,vsum4,vscale,vzero;
+		vector float vsample1,vsample2,vsample3;
+		vzero = vec_xor(vzero, vzero);
+#ifdef __APPLE__
+		vscale = (vector float)(1.0f/32768.0f);
+		vperm4 = (vector unsigned char)(0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31);
+		vperm5 = (vector unsigned char)(8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31);
+#else
+		vscale = (vector float){1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f};
+		vperm4 = (vector unsigned char){0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31};
+		vperm5 = (vector unsigned char){8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31};
+#endif
+		
+		vperm1 = vec_lvsl(0,window);
+		vperm2 = vec_lvsl(0,samples);
+		vperm3 = vec_lvsr(0,samples);
+		for (j=4;j;j--)
+		{
+			SYNTH_ALTIVEC(16);
+			
+			vsum = vec_sub(v5,v6);
+			v9 = vec_sub(v7,v8);
+			vsum = vec_add(vsum,v9);
+			vsum = vec_madd(vsum, vscale, vzero);
+			
+			vsample1 = vec_ld(0,samples);
+			vsample2 = vec_ld(16,samples);
+			vsample3 = vec_ld(31,samples);
+			v1 = vec_perm(vsample1, vsample2, vperm2);
+			v2 = vec_perm(vsample2, vsample3, vperm2);
+			v1 = vec_perm(vsum, v1, vperm4);
+			v2 = vec_perm(vsum, v2, vperm5);
+			v3 = vec_perm(vsample3, vsample2, vperm2);
+			v4 = vec_perm(vsample2, vsample1, vperm2);
+			v5 = vec_perm(v2, v3, vperm3);
+			v6 = vec_perm(v1, v2, vperm3);
+			v7 = vec_perm(v4, v1, vperm3);
+			vec_st(v5,31,samples);
+			vec_st(v6,16,samples);
+			vec_st(v7,0,samples);
+			samples += 8;
+		}
+		
+		for (j=4;j;j--)
+		{
+			SYNTH_ALTIVEC(-16);
+			
+			vsum = vec_add(v5,v6);
+			v9 = vec_add(v7,v8);
+			vsum = vec_add(vsum,v9);
+			vsum = vec_madd(vsum, vscale, vzero);
+			
+			vsample1 = vec_ld(0,samples);
+			vsample2 = vec_ld(16,samples);
+			vsample3 = vec_ld(31,samples);
+			v1 = vec_perm(vsample1, vsample2, vperm2);
+			v2 = vec_perm(vsample2, vsample3, vperm2);
+			v1 = vec_perm(vsum, v1, vperm4);
+			v2 = vec_perm(vsum, v2, vperm5);
+			v3 = vec_perm(vsample3, vsample2, vperm2);
+			v4 = vec_perm(vsample2, vsample1, vperm2);
+			v5 = vec_perm(v2, v3, vperm3);
+			v6 = vec_perm(v1, v2, vperm3);
+			v7 = vec_perm(v4, v1, vperm3);
+			vec_st(v5,31,samples);
+			vec_st(v6,16,samples);
+			vec_st(v7,0,samples);
+			samples += 8;
+		}
+	}
+	if(final) fr->buffer.fill += 256;
+	
+	return 0;
+}
+
+int synth_1to1_real_stereo_altivec(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
+	
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+	
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+	
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+	
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_altivec(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_altivec(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_altivec(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_altivec(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+	
+	
+	{
+		register int j;
+		real *window = fr->decwin + 16 - bo1;
+		
+		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13;
+		vector unsigned char vperm1,vperm2;
+		vector float vsum,vsum2,vsum3,vsum4,vsum5,vsum6,vsum7,vsum8,vscale,vzero;
+		vector float vprev;
+		vzero = vec_xor(vzero,vzero);
+#ifdef __APPLE__
+		vscale = (vector float)(1.0f/32768.0f);
+#else
+		vscale = (vector float){1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f,1.0f/32768.0f};
+#endif
+		
+		vperm1 = vec_lvsl(0,window);
+		vperm2 = vec_lvsr(0,samples);
+		vprev = vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
+		for (j=4;j;j--)
+		{
+			SYNTH_STEREO_ALTIVEC(16);
+			
+			vsum = vec_sub(vsum,vsum2);
+			vsum2 = vec_sub(vsum5,vsum6);
+			vsum3 = vec_sub(vsum3,vsum4);
+			vsum4 = vec_sub(vsum7,vsum8);
+			vsum = vec_add(vsum,vsum3);
+			vsum2 = vec_add(vsum2,vsum4);
+			vsum = vec_madd(vsum, vscale, vzero);
+			vsum2 = vec_madd(vsum2, vscale, vzero);
+			
+			v1 = vec_mergeh(vsum, vsum2);
+			v2 = vec_mergel(vsum, vsum2);
+			v3 = vec_perm(vprev,v1,vperm2);
+			v4 = vec_perm(v1,v2,vperm2);
+			vprev = v2;
+			vec_st(v3,0,samples);
+			vec_st(v4,16,samples);
+			samples += 8;
+		}
+		
+		for (j=4;j;j--)
+		{
+			SYNTH_STEREO_ALTIVEC(-16);
+			
+			vsum = vec_add(vsum,vsum2);
+			vsum2 = vec_add(vsum5,vsum6);
+			vsum3 = vec_add(vsum3,vsum4);
+			vsum4 = vec_add(vsum7,vsum8);
+			vsum = vec_add(vsum,vsum3);
+			vsum2 = vec_add(vsum2,vsum4);
+			vsum = vec_madd(vsum, vscale, vzero);
+			vsum2 = vec_madd(vsum2, vscale, vzero);
+			
+			v1 = vec_mergeh(vsum, vsum2);
+			v2 = vec_mergel(vsum, vsum2);
+			v3 = vec_perm(vprev,v1,vperm2);
+			v4 = vec_perm(v1,v2,vperm2);
+			vprev = v2;
+			vec_st(v3,0,samples);
+			vec_st(v4,16,samples);
+			samples += 8;
+		}
+		
+		if((size_t)samples & 0xf)
+		{
+			v1 = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
+			v2 = (vector float)vec_perm(vprev,v1,vperm2);
+			vec_st(v2,0,samples);
+		}
+	}
+	fr->buffer.fill += 256;
+	
+	return 0;
+}
+
+int synth_1to1_s32_altivec(real *bandPtr,int channel,mpg123_handle *fr, int final)
+{
+	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
+	
+	real *b0, **buf;
+	int clip;
+	int bo1;
+	
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+	
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+	
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_altivec(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_altivec(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+	
+	
+	{
+		register int j;
+		real *window = fr->decwin + 16 - bo1;
+		
+		ALIGNED(16) int clip_tmp[4];
+		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9;
+		vector unsigned char vperm1,vperm2,vperm3,vperm4,vperm5;
+		vector float vsum,vsum2,vsum3,vsum4,vmax,vmin,vzero;
+		vector signed int vsample1,vsample2,vsample3;
+		vector unsigned int vshift;
+		vector signed int vclip;
+		vzero = vec_xor(vzero, vzero);
+		vclip = vec_xor(vclip, vclip);
+		vshift = vec_splat_u32(-1); /* 31 */
+#ifdef __APPLE__
+		vmax = (vector float)(32767.999f);
+		vmin = (vector float)(-32768.0f);
+		vperm4 = (vector unsigned char)(0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31);
+		vperm5 = (vector unsigned char)(8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31);
+#else
+		vmax = (vector float){32767.999f,32767.999f,32767.999f,32767.999f};
+		vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
+		vperm4 = (vector unsigned char){0,1,2,3,20,21,22,23,4,5,6,7,28,29,30,31};
+		vperm5 = (vector unsigned char){8,9,10,11,20,21,22,23,12,13,14,15,28,29,30,31};
+#endif
+		
+		vperm1 = vec_lvsl(0,window);
+		vperm2 = vec_lvsl(0,samples);
+		vperm3 = vec_lvsr(0,samples);
+		for (j=4;j;j--)
+		{
+			SYNTH_ALTIVEC(16);
+			
+			vsum = vec_sub(v5,v6);
+			v9 = vec_sub(v7,v8);
+			v1 = vec_add(vsum,v9);
+			vsum = (vector float)vec_cts(v1,16);
+			v8 = (vector float)vec_cmpgt(v1,vmax);
+			v9 = (vector float)vec_cmplt(v1,vmin);
+			
+			vsample1 = vec_ld(0,samples);
+			vsample2 = vec_ld(16,samples);
+			vsample3 = vec_ld(31,samples);
+			v1 = (vector float)vec_perm(vsample1, vsample2, vperm2);
+			v2 = (vector float)vec_perm(vsample2, vsample3, vperm2);
+			v1 = vec_perm(vsum, v1, vperm4);
+			v2 = vec_perm(vsum, v2, vperm5);
+			v3 = (vector float)vec_perm(vsample3, vsample2, vperm2);
+			v4 = (vector float)vec_perm(vsample2, vsample1, vperm2);
+			v5 = vec_perm(v2, v3, vperm3);
+			v6 = vec_perm(v1, v2, vperm3);
+			v7 = vec_perm(v4, v1, vperm3);
+			vec_st((vector signed int)v5,31,samples);
+			vec_st((vector signed int)v6,16,samples);
+			vec_st((vector signed int)v7,0,samples);
+			samples += 8;
+			
+			v1 = (vector float)vec_sr((vector unsigned int)v8, vshift);
+			v2 = (vector float)vec_sr((vector unsigned int)v9, vshift);
+			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
+			vclip = vec_sums((vector signed int)v1,vclip);
+		}
+		
+		for (j=4;j;j--)
+		{
+			SYNTH_ALTIVEC(-16);
+			
+			vsum = vec_add(v5,v6);
+			v9 = vec_add(v7,v8);
+			v1 = vec_add(vsum,v9);
+			vsum = (vector float)vec_cts(v1,16);
+			v8 = (vector float)vec_cmpgt(v1,vmax);
+			v9 = (vector float)vec_cmplt(v1,vmin);
+			
+			vsample1 = vec_ld(0,samples);
+			vsample2 = vec_ld(16,samples);
+			vsample3 = vec_ld(31,samples);
+			v1 = (vector float)vec_perm(vsample1, vsample2, vperm2);
+			v2 = (vector float)vec_perm(vsample2, vsample3, vperm2);
+			v1 = vec_perm(vsum, v1, vperm4);
+			v2 = vec_perm(vsum, v2, vperm5);
+			v3 = (vector float)vec_perm(vsample3, vsample2, vperm2);
+			v4 = (vector float)vec_perm(vsample2, vsample1, vperm2);
+			v5 = vec_perm(v2, v3, vperm3);
+			v6 = vec_perm(v1, v2, vperm3);
+			v7 = vec_perm(v4, v1, vperm3);
+			vec_st((vector signed int)v5,31,samples);
+			vec_st((vector signed int)v6,16,samples);
+			vec_st((vector signed int)v7,0,samples);
+			samples += 8;
+			
+			v1 = (vector float)vec_sr((vector unsigned int)v8, vshift);
+			v2 = (vector float)vec_sr((vector unsigned int)v9, vshift);
+			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
+			vclip = vec_sums((vector signed int)v1,vclip);
+		}
+		
+		vec_st(vclip,0,clip_tmp);
+		clip = clip_tmp[3];
+	}
+	if(final) fr->buffer.fill += 256;
+	
+	return clip;
+}
+
+
+int synth_1to1_s32_stereo_altivec(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
+	
+	real *b0l, *b0r, **bufl, **bufr;
+	int clip;
+	int bo1;
+	
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+	
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+	
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_altivec(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_altivec(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_altivec(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_altivec(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+	
+	
+	{
+		register int j;
+		real *window = fr->decwin + 16 - bo1;
+		
+		ALIGNED(16) int clip_tmp[4];
+		vector float v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13;
+		vector unsigned char vperm1,vperm2;
+		vector float vsum,vsum2,vsum3,vsum4,vsum5,vsum6,vsum7,vsum8,vmax,vmin,vzero;
+		vector float vprev;
+		vector unsigned int vshift;
+		vector signed int vclip;
+		vzero = vec_xor(vzero, vzero);
+		vclip = vec_xor(vclip, vclip);
+		vshift = vec_splat_u32(-1); /* 31 */
+#ifdef __APPLE__
+		vmax = (vector float)(32767.999f);
+		vmin = (vector float)(-32768.0f);
+#else
+		vmax = (vector float){32767.999f,32767.999f,32767.999f,32767.999f};
+		vmin = (vector float){-32768.0f,-32768.0f,-32768.0f,-32768.0f};
+#endif
+		
+		vperm1 = vec_lvsl(0,window);
+		vperm2 = vec_lvsr(0,samples);
+		vprev = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
+		for (j=4;j;j--)
+		{
+			SYNTH_STEREO_ALTIVEC(16);
+			
+			vsum = vec_sub(vsum,vsum2);
+			vsum2 = vec_sub(vsum5,vsum6);
+			vsum3 = vec_sub(vsum3,vsum4);
+			vsum4 = vec_sub(vsum7,vsum8);
+			v1 = vec_add(vsum,vsum3);
+			v2 = vec_add(vsum2,vsum4);
+			vsum = (vector float)vec_cts(v1,16);
+			vsum2 = (vector float)vec_cts(v2,16);
+			v5 = (vector float)vec_cmpgt(v1,vmax);
+			v6 = (vector float)vec_cmplt(v1,vmin);
+			v7 = (vector float)vec_cmpgt(v2,vmax);
+			v8 = (vector float)vec_cmplt(v2,vmin);
+			
+			v1 = vec_mergeh(vsum, vsum2);
+			v2 = vec_mergel(vsum, vsum2);
+			v3 = vec_perm(vprev,v1,vperm2);
+			v4 = vec_perm(v1,v2,vperm2);
+			vprev = v2;
+			vec_st((vector signed int)v3,0,samples);
+			vec_st((vector signed int)v4,16,samples);
+			samples += 8;
+			
+			v1 = (vector float)vec_sr((vector unsigned int)v5, vshift);
+			v2 = (vector float)vec_sr((vector unsigned int)v6, vshift);
+			v3 = (vector float)vec_sr((vector unsigned int)v7, vshift);
+			v4 = (vector float)vec_sr((vector unsigned int)v8, vshift);
+			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
+			v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4);
+			vclip = vec_sums((vector signed int)v1,vclip);
+			vclip = vec_sums((vector signed int)v2,vclip);
+		}
+		
+		for (j=4;j;j--)
+		{
+			SYNTH_STEREO_ALTIVEC(-16);
+			
+			vsum = vec_add(vsum,vsum2);
+			vsum2 = vec_add(vsum5,vsum6);
+			vsum3 = vec_add(vsum3,vsum4);
+			vsum4 = vec_add(vsum7,vsum8);
+			v1 = vec_add(vsum,vsum3);
+			v2 = vec_add(vsum2,vsum4);
+			vsum = (vector float)vec_cts(v1,16);
+			vsum2 = (vector float)vec_cts(v2,16);
+			v5 = (vector float)vec_cmpgt(v1,vmax);
+			v6 = (vector float)vec_cmplt(v1,vmin);
+			v7 = (vector float)vec_cmpgt(v2,vmax);
+			v8 = (vector float)vec_cmplt(v2,vmin);
+			
+			v1 = vec_mergeh(vsum, vsum2);
+			v2 = vec_mergel(vsum, vsum2);
+			v3 = vec_perm(vprev,v1,vperm2);
+			v4 = vec_perm(v1,v2,vperm2);
+			vprev = v2;
+			vec_st((vector signed int)v3,0,samples);
+			vec_st((vector signed int)v4,16,samples);
+			samples += 8;
+			
+			v1 = (vector float)vec_sr((vector unsigned int)v5, vshift);
+			v2 = (vector float)vec_sr((vector unsigned int)v6, vshift);
+			v3 = (vector float)vec_sr((vector unsigned int)v7, vshift);
+			v4 = (vector float)vec_sr((vector unsigned int)v8, vshift);
+			v1 = (vector float)vec_add((vector unsigned int)v1,(vector unsigned int)v2);
+			v2 = (vector float)vec_add((vector unsigned int)v3,(vector unsigned int)v4);
+			vclip = vec_sums((vector signed int)v1,vclip);
+			vclip = vec_sums((vector signed int)v2,vclip);
+		}
+		
+		if((size_t)samples & 0xf)
+		{
+			v1 = (vector float)vec_perm(vec_ld(0,samples),vec_ld(0,samples),vec_lvsl(0,samples));
+			v2 = (vector float)vec_perm(vprev,v1,vperm2);
+			vec_st((vector signed int)v2,0,samples);
+		}
+		
+		vec_st(vclip,0,clip_tmp);
+		clip = clip_tmp[3];
+	}
+	fr->buffer.fill += 256;
+	
+	return clip;
+}
Index: include/reactos/libs/libmpg123/synth_arm.S
===================================================================
--- include/reactos/libs/libmpg123/synth_arm.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_arm.S	(working copy)
@@ -0,0 +1,278 @@
+/*
+	synth_arm: ARM optimized synth
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define WINDOW r0
+#define B0 r1
+#define SAMPLES r2
+#define REG_CLIP r4
+#define REG_MAX r12
+
+/*
+	int synth_1to1_arm_asm(real *window, real *b0, short *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+	.code 32
+
+	.text
+	ALIGN4
+	.globl ASM_NAME(synth_1to1_arm_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_arm_asm), %function
+#endif
+ASM_NAME(synth_1to1_arm_asm):
+	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, lr}
+	
+	add		WINDOW, WINDOW, #64
+	sub		WINDOW, WINDOW, r3, lsl #2
+	eor		REG_CLIP, REG_CLIP, REG_CLIP
+	mov		REG_MAX, #1073741824
+	sub		REG_MAX, REG_MAX, #32768
+	
+	mov		r3, #16
+	
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+1:
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mul		r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mul		r10, r8, r9
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #68
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	
+	sub		r7, r7, r10
+	
+	cmp		r7, REG_MAX
+	movgt	r7, REG_MAX
+	addgt	REG_CLIP, REG_CLIP, #1
+	cmp		r7, #-1073741824
+	movlt	r7, #-1073741824
+	addlt	REG_CLIP, REG_CLIP, #1
+	movs	r7, r7, asr #15
+	adc		r7, r7, #0
+	strh	r7, [SAMPLES], #4
+	
+	subs	r3, r3, #1
+	bne		1b
+	
+	add		WINDOW, WINDOW, #4
+	add		B0, B0, #4
+	
+	ldr		r8, [WINDOW], #8
+	ldr		r9, [B0], #8
+	mul		r7, r5, r6
+	ldr		r5, [WINDOW], #8
+	ldr		r6, [B0], #8
+	mul		r10, r8, r9
+	ldr		r8, [WINDOW], #8
+	ldr		r9, [B0], #8
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #8
+	ldr		r6, [B0], #8
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #8
+	ldr		r9, [B0], #8
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #8
+	ldr		r6, [B0], #8
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #72
+	ldr		r9, [B0], #-120
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	
+	add		r7, r7, r10
+	
+	cmp		r7, REG_MAX
+	movgt	r7, REG_MAX
+	addgt	REG_CLIP, REG_CLIP, #1
+	cmp		r7, #-1073741824
+	movlt	r7, #-1073741824
+	addlt	REG_CLIP, REG_CLIP, #1
+	movs	r7, r7, asr #15
+	adc		r7, r7, #0
+	strh	r7, [SAMPLES], #4
+	
+	mov		r3, #14
+	
+1:
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mul		r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mul		r10, r8, r9
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #68
+	ldr		r9, [B0], #-124
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	
+	add		r7, r7, r10
+	
+	cmp		r7, REG_MAX
+	movgt	r7, REG_MAX
+	addgt	REG_CLIP, REG_CLIP, #1
+	cmp		r7, #-1073741824
+	movlt	r7, #-1073741824
+	addlt	REG_CLIP, REG_CLIP, #1
+	movs	r7, r7, asr #15
+	adc		r7, r7, #0
+	strh	r7, [SAMPLES], #4
+	
+	subs	r3, r3, #1
+	bne		1b
+	
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mul		r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mul		r10, r8, r9
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW], #4
+	ldr		r9, [B0], #4
+	mla		r7, r5, r6, r7
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	mla		r10, r8, r9, r10
+	ldr		r8, [WINDOW]
+	ldr		r9, [B0]
+	mla		r7, r5, r6, r7
+	mla		r10, r8, r9, r10
+	
+	add		r7, r7, r10
+	
+	cmp		r7, REG_MAX
+	movgt	r7, REG_MAX
+	addgt	REG_CLIP, REG_CLIP, #1
+	cmp		r7, #-1073741824
+	movlt	r7, #-1073741824
+	addlt	REG_CLIP, REG_CLIP, #1
+	movs	r7, r7, asr #15
+	adc		r7, r7, #0
+	strh	r7, [SAMPLES]
+	
+	mov		r0, REG_CLIP
+	
+	ldmfd   sp!, {r4, r5, r6, r7, r8, r9, r10, pc}
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_arm_accurate.S
===================================================================
--- include/reactos/libs/libmpg123/synth_arm_accurate.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_arm_accurate.S	(working copy)
@@ -0,0 +1,294 @@
+/*
+	synth_arm_accurate: ARM optimized synth (ISO compliant 16bit output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define WINDOW r0
+#define B0 r1
+#define SAMPLES r2
+#define REG_CLIP r4
+#define REG_MAX r14
+
+/*
+	int synth_1to1_arm_accurate_asm(real *window, real *b0, short *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+	.code 32
+
+	.text
+	ALIGN4
+	.globl ASM_NAME(synth_1to1_arm_accurate_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_arm_accurate_asm), %function
+#endif
+ASM_NAME(synth_1to1_arm_accurate_asm):
+	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+	
+	add		WINDOW, WINDOW, #64
+	sub		WINDOW, WINDOW, r3, lsl #2
+	eor		REG_CLIP, REG_CLIP, REG_CLIP
+	mov		REG_MAX, #1073741824
+	sub		REG_MAX, REG_MAX, #32768
+	
+	mov		r3, #16
+	
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+1:
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smull	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smull	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #68
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	
+	mov		r8, r8, lsr #24
+	mov		r12, r12, lsr #24
+	orr		r8, r8, r7, lsl #8
+	orr		r12, r12, r11, lsl #8
+	sub		r8, r8, r12
+	
+	cmp		r8, REG_MAX
+	movgt	r8, REG_MAX
+	addgt	REG_CLIP, REG_CLIP, #1
+	cmp		r8, #-1073741824
+	movlt	r8, #-1073741824
+	addlt	REG_CLIP, REG_CLIP, #1
+	movs	r8, r8, asr #15
+	adc		r8, r8, #0
+	strh	r8, [SAMPLES], #4
+	
+	subs	r3, r3, #1
+	bne		1b
+	
+	add		WINDOW, WINDOW, #4
+	add		B0, B0, #4
+	
+	ldr		r9, [WINDOW], #8
+	ldr		r10, [B0], #8
+	smull	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #8
+	ldr		r6, [B0], #8
+	smull	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #8
+	ldr		r10, [B0], #8
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #8
+	ldr		r6, [B0], #8
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #8
+	ldr		r10, [B0], #8
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #8
+	ldr		r6, [B0], #8
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #72
+	ldr		r10, [B0], #-120
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	
+	mov		r8, r8, lsr #24
+	mov		r12, r12, lsr #24
+	orr		r8, r8, r7, lsl #8
+	orr		r12, r12, r11, lsl #8
+	add		r8, r8, r12
+	
+	cmp		r8, REG_MAX
+	movgt	r8, REG_MAX
+	addgt	REG_CLIP, REG_CLIP, #1
+	cmp		r8, #-1073741824
+	movlt	r8, #-1073741824
+	addlt	REG_CLIP, REG_CLIP, #1
+	movs	r8, r8, asr #15
+	adc		r8, r8, #0
+	strh	r8, [SAMPLES], #4
+	
+	mov		r3, #14
+	
+1:
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smull	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smull	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #68
+	ldr		r10, [B0], #-124
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	
+	mov		r8, r8, lsr #24
+	mov		r12, r12, lsr #24
+	orr		r8, r8, r7, lsl #8
+	orr		r12, r12, r11, lsl #8
+	add		r8, r8, r12
+	
+	cmp		r8, REG_MAX
+	movgt	r8, REG_MAX
+	addgt	REG_CLIP, REG_CLIP, #1
+	cmp		r8, #-1073741824
+	movlt	r8, #-1073741824
+	addlt	REG_CLIP, REG_CLIP, #1
+	movs	r8, r8, asr #15
+	adc		r8, r8, #0
+	strh	r8, [SAMPLES], #4
+	
+	subs	r3, r3, #1
+	bne		1b
+	
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smull	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smull	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #4
+	ldr		r10, [B0], #4
+	smlal	r8, r7, r5, r6
+	ldr		r5, [WINDOW], #4
+	ldr		r6, [B0], #4
+	smlal	r12, r11, r9, r10
+	ldr		r9, [WINDOW], #68
+	ldr		r10, [B0], #-124
+	smlal	r8, r7, r5, r6
+	smlal	r12, r11, r9, r10
+	
+	mov		r8, r8, lsr #24
+	mov		r12, r12, lsr #24
+	orr		r8, r8, r7, lsl #8
+	orr		r12, r12, r11, lsl #8
+	add		r8, r8, r12
+	
+	cmp		r8, REG_MAX
+	movgt	r8, REG_MAX
+	addgt	REG_CLIP, REG_CLIP, #1
+	cmp		r8, #-1073741824
+	movlt	r8, #-1073741824
+	addlt	REG_CLIP, REG_CLIP, #1
+	movs	r8, r8, asr #15
+	adc		r8, r8, #0
+	strh	r8, [SAMPLES]
+	
+	mov		r0, REG_CLIP
+	
+	ldmfd   sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_i486.c
===================================================================
--- include/reactos/libs/libmpg123/synth_i486.c	(revision 0)
+++ include/reactos/libs/libmpg123/synth_i486.c	(working copy)
@@ -0,0 +1,252 @@
+/*
+	decode_i486.c: i486 decode
+
+	copyright 1998-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Fabrice Bellard
+
+	One has to see if the modification for non-static memory kills this optimization (cache locality?).
+*/
+
+/* 
+ * Subband Synthesis for MPEG Audio. 
+ *
+ * Version optimized for 80486 by using integer arithmetic,
+ * multiplications by shift and add, and by increasing locality in
+ * order to fit the 8KB L1 cache. This code should be compiled with gcc
+ * 2.7.2 or higher.
+ *
+ * Note: this version does not guaranty a good accuracy. The filter
+ * coefficients are quantified on 14 bits.
+ *
+ * (c) 1998 Fabrice Bellard 
+ */
+
+#include "mpg123lib_intern.h"
+
+#define FIR16_1(pos,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15) \
+{\
+  int sum;\
+  sum=(c0)*b0[0]+(c1)*b0[1]+(c2)*b0[2]+(c3)*b0[3]+\
+  (c4)*b0[4]+(c5)*b0[5]+(c6)*b0[6]+(c7)*b0[7]+\
+  (c8)*b0[8]+(c9)*b0[9]+(c10)*b0[10]+(c11)*b0[11]+\
+  (c12)*b0[12]+(c13)*b0[13]+(c14)*b0[14]+(c15)*b0[15];\
+  sum=(sum+(1 << 13))>>14;\
+  if (sum<-32768) sum=-32768;\
+  else if (sum>32767) sum=32767;\
+  samples[2*(pos)]=sum;\
+  b0+=FIR_BUFFER_SIZE;\
+}
+
+#define FIR16_2(pos1,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,\
+              pos2,d0,d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14,d15) \
+{\
+  int sum1,sum2,v;\
+\
+  v=b0[0];\
+  sum1=(c0)*v;\
+  sum2=(d0)*v;\
+  v=b0[1];\
+  sum1+=(c1)*v;\
+  sum2+=(d1)*v;\
+  v=b0[2];\
+  sum1+=(c2)*v;\
+  sum2+=(d2)*v;\
+  v=b0[3];\
+  sum1+=(c3)*v;\
+  sum2+=(d3)*v;\
+  v=b0[4];\
+  sum1+=(c4)*v;\
+  sum2+=(d4)*v;\
+  v=b0[5];\
+  sum1+=(c5)*v;\
+  sum2+=(d5)*v;\
+  v=b0[6];\
+  sum1+=(c6)*v;\
+  sum2+=(d6)*v;\
+  v=b0[7];\
+  sum1+=(c7)*v;\
+  sum2+=(d7)*v;\
+  v=b0[8];\
+  sum1+=(c8)*v;\
+  sum2+=(d8)*v;\
+  v=b0[9];\
+  sum1+=(c9)*v;\
+  sum2+=(d9)*v;\
+  v=b0[10];\
+  sum1+=(c10)*v;\
+  sum2+=(d10)*v;\
+  v=b0[11];\
+  sum1+=(c11)*v;\
+  sum2+=(d11)*v;\
+  v=b0[12];\
+  sum1+=(c12)*v;\
+  sum2+=(d12)*v;\
+  v=b0[13];\
+  sum1+=(c13)*v;\
+  sum2+=(d13)*v;\
+  v=b0[14];\
+  sum1+=(c14)*v;\
+  sum2+=(d14)*v;\
+  v=b0[15];\
+  sum1+=(c15)*v;\
+  sum2+=(d15)*v;\
+\
+  sum1=(sum1+(1<<13))>>14;\
+  sum2=(sum2+(1<<13))>>14;\
+\
+  if (sum1<-32768) sum1=-32768;\
+  else if (sum1>32767) sum1=32767;\
+  samples[(pos1)*2]=sum1;\
+\
+  if (sum2<-32768) sum2=-32768;\
+  else if (sum2>32767) sum2=32767;\
+  samples[(pos2)*2]=sum2;\
+  b0+=FIR_BUFFER_SIZE;\
+}
+
+int absynth_1to1_i486(real *bandPtr, int channel, mpg123_handle *fr, int nb_blocks)
+{
+  short *samples = (short *) (fr->buffer.data+fr->buffer.fill);
+  int *b0,**buf;
+  int clip = 0; 
+  int block,b,bo_start;
+
+  /* samples address */
+  samples+=channel;
+
+  bo_start=fr->i486bo[channel];
+  buf = fr->int_buffs[channel];
+
+  b=bo_start;
+  for(block=0;block<nb_blocks;block++) {
+
+    /* FIR offset */
+    b++;
+    if (b >= FIR_BUFFER_SIZE) {
+      int *p,*q;
+      int c,i,j;
+      
+      /* we shift the buffers */
+      for(c=0;c<2;c++) {
+        p=&buf[c][0]+1;
+        q=p+(FIR_BUFFER_SIZE-FIR_SIZE);
+        for(i=0;i<17;i++) {
+          for(j=0;j<FIR_SIZE-1;j++) p[j]=q[j];
+          p+=FIR_BUFFER_SIZE;
+          q+=FIR_BUFFER_SIZE;
+        }
+      }
+      /* we update 'bo' accordingly */
+      b=fr->i486bo[channel]=FIR_SIZE;
+    }
+    
+    if(b & 1) {
+      dct64_i486(buf[1]+b,buf[0]+b,bandPtr);
+    } else {
+      dct64_i486(buf[0]+b,buf[1]+b,bandPtr);
+    }
+    bandPtr+=32;
+  }
+  fr->i486bo[channel]=b;
+
+  /* filter bank: part 1 */
+  b=bo_start;
+  for(block=0;block<nb_blocks;block++) {
+    b++;
+    if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
+    if(b & 1) {
+      b0 = buf[0] + b - (FIR_SIZE-1);
+    } else {
+      b0 = buf[1] + b - (FIR_SIZE-1);
+    }
+
+ FIR16_1(0,-7,53,-114,509,-1288,1643,-9372,18759,9372,1643,1288,509,114,53,7,0);
+ FIR16_2(1,-6,52,-100,515,-1197,1783,-8910,18748,9834,1489,1379,500,129,54,7,0,
+ 31,0,-7,54,-129,500,-1379,1489,-9834,18748,8910,1783,1197,515,100,52,6);
+ FIR16_2(2,-6,50,-86,520,-1106,1910,-8447,18714,10294,1322,1469,488,145,55,8,0,
+ 30,0,-8,55,-145,488,-1469,1322,-10294,18714,8447,1910,1106,520,86,50,6);
+ FIR16_2(3,-5,49,-73,521,-1015,2023,-7986,18657,10751,1140,1559,473,161,56,9,0,
+ 29,0,-9,56,-161,473,-1559,1140,-10751,18657,7986,2023,1015,521,73,49,5);              
+    samples+=64;
+  }
+  samples-=64*nb_blocks;
+  
+  /* filter bank: part 2 */
+
+  b=bo_start;
+  for(block=0;block<nb_blocks;block++) {
+    b++;
+    if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
+    if(b & 1) {
+      b0 = buf[0] + b - (FIR_SIZE-1) + 4*FIR_BUFFER_SIZE;
+    } else {
+      b0 = buf[1] + b - (FIR_SIZE-1) + 4*FIR_BUFFER_SIZE;
+    }
+
+ FIR16_2(4,-4,47,-61,521,-926,2123,-7528,18578,11205,944,1647,455,177,56,10,0,
+ 28,0,-10,56,-177,455,-1647,944,-11205,18578,7528,2123,926,521,61,47,4);
+ FIR16_2(5,-4,45,-49,518,-837,2210,-7072,18477,11654,733,1733,434,194,57,11,0,
+ 27,0,-11,57,-194,434,-1733,733,-11654,18477,7072,2210,837,518,49,45,4);
+ FIR16_2(6,-4,44,-38,514,-751,2284,-6620,18353,12097,509,1817,411,212,57,12,0,
+ 26,0,-12,57,-212,411,-1817,509,-12097,18353,6620,2284,751,514,38,44,4);
+ FIR16_2(7,-3,42,-27,508,-665,2347,-6173,18208,12534,270,1899,383,229,56,13,0,
+ 25,0,-13,56,-229,383,-1899,270,-12534,18208,6173,2347,665,508,27,42,3);             
+
+    samples+=64;
+  }
+  samples-=64*nb_blocks;
+
+  /* filter bank: part 3 */
+
+  b=bo_start;
+  for(block=0;block<nb_blocks;block++) {
+    b++;
+    if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
+    if(b & 1) {
+      b0 = buf[0] + b - (FIR_SIZE-1) + 8*FIR_BUFFER_SIZE;
+    } else {
+      b0 = buf[1] + b - (FIR_SIZE-1) + 8*FIR_BUFFER_SIZE;
+    }
+
+ FIR16_2(8,-3,40,-18,500,-582,2398,-5732,18042,12963,17,1977,353,247,56,14,0,
+ 24,0,-14,56,-247,353,-1977,17,-12963,18042,5732,2398,582,500,18,40,3);
+ FIR16_2(9,-2,38,-9,490,-501,2437,-5297,17855,13383,-249,2052,320,266,55,15,0,
+ 23,0,-15,55,-266,320,-2052,-249,-13383,17855,5297,2437,501,490,9,38,2);
+ FIR16_2(10,-2,36,0,479,-423,2465,-4869,17647,13794,-530,2122,282,284,53,17,0,
+ 22,0,-17,53,-284,282,-2122,-530,-13794,17647,4869,2465,423,479,0,36,2);
+ FIR16_2(11,-2,34,7,467,-347,2483,-4449,17419,14194,-825,2188,242,302,52,18,0,
+ 21,0,-18,52,-302,242,-2188,-825,-14194,17419,4449,2483,347,467,-7,34,2);          
+
+    samples+=64;
+  }
+  samples-=64*nb_blocks;
+
+  /* filter bank: part 4 */
+
+  b=bo_start;
+  for(block=0;block<nb_blocks;block++) {
+    b++;
+    if (b >= FIR_BUFFER_SIZE) b=FIR_SIZE;
+    if(b & 1) {
+      b0 = buf[0] + b - (FIR_SIZE-1) + 12*FIR_BUFFER_SIZE;
+    } else {
+      b0 = buf[1] + b - (FIR_SIZE-1) + 12*FIR_BUFFER_SIZE;
+    }
+
+ FIR16_2(12,-2,33,14,454,-273,2491,-4038,17173,14583,-1133,2249,198,320,50,19,0,
+ 20,0,-19,50,-320,198,-2249,-1133,-14583,17173,4038,2491,273,454,-14,33,2);
+ FIR16_2(13,-1,31,20,439,-203,2489,-3637,16907,14959,-1454,2304,151,339,47,21,-1,
+ 19,-1,-21,47,-339,151,-2304,-1454,-14959,16907,3637,2489,203,439,-20,31,1);
+ FIR16_2(14,-1,29,26,424,-136,2479,-3245,16623,15322,-1788,2354,100,357,44,22,-1,
+ 18,-1,-22,44,-357,100,-2354,-1788,-15322,16623,3245,2479,136,424,-26,29,1);
+ FIR16_2(15,-1,27,31,408,-72,2459,-2863,16322,15671,-2135,2396,46,374,40,24,-1,
+ 17,-1,-24,40,-374,46,-2396,-2135,-15671,16322,2863,2459,72,408,-31,27,1);
+ FIR16_1(16,-1,0,36,0,-11,0,-2493,0,16004,0,2431,0,391,0,26,0);      
+
+    samples+=64;
+  }
+
+  return clip;
+}
+
Index: include/reactos/libs/libmpg123/synth_i586.S
===================================================================
--- include/reactos/libs/libmpg123/synth_i586.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_i586.S	(working copy)
@@ -0,0 +1,336 @@
+/*
+ decode_i586: asm synth
+
+ copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+ see COPYING and AUTHORS files in distribution or http://mpg123.org
+ initially written by Stefan Bieschewski
+ 
+ synth_1to1 works the same way as the c version of this
+ file.  only two types of changes have been made:
+ - reordered floating point instructions to
+   prevent pipline stalls
+ - made WRITE_SAMPLE use integer instead of
+   (slower) floating point
+ all kinds of x86 processors should benefit from these
+ modifications.
+
+ useful sources of information on optimizing x86 code include:
+
+     Intel Architecture Optimization Manual
+     http://www.intel.com/design/pentium/manuals/242816.htm
+
+     Cyrix 6x86 Instruction Set Summary
+     ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
+
+     AMD-K5 Processor Software Development
+     http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
+
+ Stefan Bieschewski <stb@acm.org>
+
+ $Id: decode_i586.s 1 2004-09-18 13:30:08Z thomas $
+*/
+
+#include "mangle.h"
+
+.data
+#ifndef __APPLE__
+.section .rodata
+#endif
+	ALIGN8
+.LC0:
+	.long 0x0,0x40dfffc0
+	ALIGN8
+.LC1:
+	.long 0x0,0xc0e00000
+	ALIGN8
+.text
+/* int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); */
+.globl ASM_NAME(synth_1to1_i586_asm)
+ASM_NAME(synth_1to1_i586_asm):
+	subl $12,%esp
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %ebx
+/* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16,20,24=local, 28=back, 32=bandPtr, 36=channel, 40=out, 44=buffs, 48=bo, 52=decwin */
+	movl 32(%esp),%eax /* *bandPtr */
+	movl 40(%esp),%esi /* *out */
+	movl 48(%esp),%edi /* *bo */
+	movl (%edi),%ebp   /* store bo value in ebp */
+	xorl %edi,%edi
+	cmpl %edi,36(%esp)
+	jne .L48           /* if(!channel) */
+	decl %ebp          /* bo-- */
+	andl $15,%ebp      /* bo &= 0xf */
+	movl 48(%esp),	%edi /* *bo */
+	movl %ebp,(%edi)   /* write back bo */
+	xorl %edi,%edi     /* restore %edi to 0; it's used later */
+	movl 44(%esp),%ecx /* use buffs */
+	jmp .L49
+.L48: /* if(channel) use buffs+2176 */
+	addl $2,%esi
+	movl 44(%esp),%ecx /* *buffs */
+	addl $2176,%ecx
+.L49:
+	testl $1,%ebp
+	je .L50
+	movl %ecx,%ebx
+	movl %ebp,16(%esp)
+	pushl %eax
+	movl 20(%esp),%edx
+	leal (%ebx,%edx,4),%eax
+	pushl %eax
+	movl 24(%esp),%eax
+	incl %eax
+	andl $15,%eax
+	leal 1088(,%eax,4),%eax
+	addl %ebx,%eax
+	jmp .L74
+.L50:
+	leal 1088(%ecx),%ebx
+	leal 1(%ebp),%edx
+	movl %edx,16(%esp)
+	pushl %eax
+	leal 1092(%ecx,%ebp,4),%eax
+	pushl %eax
+	leal (%ecx,%ebp,4),%eax
+.L74:
+	pushl %eax
+	call ASM_NAME(dct64_i386)
+	addl $12,%esp
+/* stack now back on track */
+	movl 16(%esp),%edx
+	leal 0(,%edx,4),%edx
+	movl 52(%esp),%eax /* decwin */
+	addl $64,%eax
+	movl %eax,%ecx
+	subl %edx,%ecx
+	movl $16,%ebp
+.L55:
+	flds (%ecx)
+	fmuls (%ebx)
+	flds 4(%ecx)
+	fmuls 4(%ebx)
+	fxch %st(1)
+	flds 8(%ecx)
+	fmuls 8(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 12(%ecx)
+	fmuls 12(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 16(%ecx)
+	fmuls 16(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 20(%ecx)
+	fmuls 20(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 24(%ecx)
+	fmuls 24(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 28(%ecx)
+	fmuls 28(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 32(%ecx)
+	fmuls 32(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 36(%ecx)
+	fmuls 36(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 40(%ecx)
+	fmuls 40(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 44(%ecx)
+	fmuls 44(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 48(%ecx)
+	fmuls 48(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 52(%ecx)
+	fmuls 52(%ebx)
+	fxch %st(2)         
+	faddp %st,%st(1)
+	flds 56(%ecx)
+	fmuls 56(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 60(%ecx)
+	fmuls 60(%ebx)
+	fxch %st(2)
+	subl $4,%esp
+	faddp %st,%st(1)
+	fxch %st(1)
+	fsubrp %st,%st(1)
+	fistpl (%esp)
+	popl %eax
+	cmpl $32767,%eax
+	jg 1f
+	cmpl $-32768,%eax
+	jl 2f
+	movw %ax,(%esi)
+	jmp 4f
+1:	movw $32767,(%esi)
+	jmp 3f
+2:	movw $-32768,(%esi)
+3:	incl %edi
+4:
+.L54:
+	addl $64,%ebx
+	subl $-128,%ecx
+	addl $4,%esi
+	decl %ebp
+	jnz .L55
+	flds (%ecx)
+	fmuls (%ebx)
+	flds 8(%ecx)
+	fmuls 8(%ebx)
+	flds 16(%ecx)
+	fmuls 16(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 24(%ecx)
+	fmuls 24(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 32(%ecx)
+	fmuls 32(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 40(%ecx)
+	fmuls 40(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 48(%ecx)
+	fmuls 48(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 56(%ecx)
+	fmuls 56(%ebx)
+	fxch %st(2)
+	subl $4,%esp
+	faddp %st,%st(1)
+	fxch %st(1)
+	faddp %st,%st(1)
+	fistpl (%esp)
+	popl %eax
+	cmpl $32767,%eax
+	jg 1f
+	cmpl $-32768,%eax
+	jl 2f
+	movw %ax,(%esi)
+	jmp 4f
+1:	movw $32767,(%esi)
+	jmp 3f
+2:	movw $-32768,(%esi)
+3:	incl %edi
+4:
+.L62:
+	addl $-64,%ebx
+	addl $4,%esi
+	movl 16(%esp),%edx
+	leal -128(%ecx,%edx,8),%ecx
+	movl $15,%ebp
+.L68:
+	flds -4(%ecx)
+	fchs
+	fmuls (%ebx)
+	flds -8(%ecx)
+	fmuls 4(%ebx)
+	fxch %st(1)
+	flds -12(%ecx)
+	fmuls 8(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -16(%ecx)
+	fmuls 12(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -20(%ecx)
+	fmuls 16(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -24(%ecx)
+	fmuls 20(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -28(%ecx)
+	fmuls 24(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -32(%ecx)
+	fmuls 28(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -36(%ecx)
+	fmuls 32(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -40(%ecx)
+	fmuls 36(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -44(%ecx)
+	fmuls 40(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -48(%ecx)
+	fmuls 44(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -52(%ecx)
+	fmuls 48(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -56(%ecx)
+	fmuls 52(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -60(%ecx)
+	fmuls 56(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds (%ecx)
+	fmuls 60(%ebx)
+	fxch %st(2)
+	subl $4,%esp
+	fsubrp %st,%st(1)
+	fxch %st(1)
+	fsubrp %st,%st(1)
+	fistpl (%esp)
+	popl %eax
+	cmpl $32767,%eax
+	jg 1f
+	cmpl $-32768,%eax
+	jl 2f
+	movw %ax,(%esi)
+	jmp 4f
+1:	movw $32767,(%esi)
+	jmp 3f
+2:	movw $-32768,(%esi)
+3:	incl %edi
+4:
+.L67:
+	addl $-64,%ebx
+	addl $-128,%ecx
+	addl $4,%esi
+	decl %ebp
+	jnz .L68
+	movl %edi,%eax
+	popl %ebx
+	popl %esi
+	popl %edi
+	popl %ebp
+	addl $12,%esp
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_i586_dither.S
===================================================================
--- include/reactos/libs/libmpg123/synth_i586_dither.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_i586_dither.S	(working copy)
@@ -0,0 +1,375 @@
+/*
+	decode_i586_dither: asm synth with dither noise
+
+	copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Stefan Bieschewski as decode_i586.s without dither
+
+	This version uses "circular" 64k dither noise.
+	(Patch by Adrian <adrian.bacon@xs4all.nl>)
+
+	Thomas learned something about assembler and the stack while making this one thread safe (removing static data).
+*/
+
+#include "mangle.h"
+
+.data
+#ifndef __APPLE__
+		.section	.rodata
+#endif
+	ALIGN8
+.LC0:
+	.long 0x0,0x40dfffc0
+	ALIGN8
+.LC1:
+	.long 0x0,0xc0e00000
+	ALIGN8
+.text
+/* int synth_1to1_i586_asm_dither(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int bo_and_ditherindex[2], real *decwin, real* dithernoise); */
+.globl ASM_NAME(synth_1to1_i586_asm_dither)
+ASM_NAME(synth_1to1_i586_asm_dither):
+	subl $16,%esp
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %ebx
+/* stack: 0(%esp)=%ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo 56=decwin 60=dithernoise */
+#define BANDPTR 36(%esp)
+#define CHANNEL 40(%esp)
+#define OUT     44(%esp)
+#define BUFFS   48(%esp)
+#define BO      52(%esp)
+#define DECWIN  56(%esp)
+#define DITHERNOISE 60(%esp)
+/*#define DITHERNOISE $(ASM_NAME(dithernoise))*/
+#define LOC0    16(%esp)
+#define LOC1    20(%esp)
+#define LOC2    24(%esp)
+#define DITHERINDEX  28(%esp)
+/* During application of the dithering, we need the shifted locations because there's an additional value on the stack. */
+#define DITHERNOISE2 64(%esp)
+#define DITHERINDEX2 32(%esp)
+
+	movl BANDPTR,%eax
+	movl OUT,%esi
+	movl BO, %ebx
+	movl (%ebx),%ebp    /* get bo value */
+	movl 4(%ebx),%edi;  /* get the ditherindex behind bo */
+	movl %edi,DITHERINDEX
+	xorl %edi,%edi
+	cmpl %edi,CHANNEL
+	jne .L48
+	decl %ebp
+	andl $15,%ebp
+	movl %ebp,(%ebx)   /* save bo back */
+	movl BUFFS,%ecx
+	jmp .L49
+.L48:
+/*       In stereo mode , "rewind" dither pointer 32 samples , so 2nd channel */
+/*       has same dither values. Tested OK for mono and stereo MP2 and MP3 */
+	subl $128,DITHERINDEX /* better move to %edi for the two calculations? */
+	andl $0x0003fffc,DITHERINDEX
+	addl $2,%esi
+	movl BUFFS,%ecx
+	addl $2176,%ecx
+.L49:
+/* now the call of dct64 is prepared, stuff pushed to the stack, but soon after it's removed again */
+	testl $1,%ebp
+	je .L50
+	movl %ecx,%ebx
+	movl %ebp,LOC0
+	pushl %eax
+	movl LOC1,%edx
+	leal (%ebx,%edx,4),%eax
+	pushl %eax
+	movl LOC2,%eax
+	incl %eax
+	andl $15,%eax
+	leal 1088(,%eax,4),%eax
+	addl %ebx,%eax
+	jmp .L74
+.L50:
+	leal 1088(%ecx),%ebx
+	leal 1(%ebp),%edx
+	movl %edx,LOC0
+	pushl %eax
+	leal 1092(%ecx,%ebp,4),%eax
+	pushl %eax
+	leal (%ecx,%ebp,4),%eax
+.L74:
+	pushl %eax
+	call ASM_NAME(dct64_i386)
+	addl $12,%esp
+/* Now removed the parameters.
+   stack: 0(%esp)=%ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo 56=decwin 60=dithernoise */
+	movl LOC0,%edx
+	leal 0(,%edx,4),%edx
+	/* movl ASM_VALUE(decwin)+64,%eax */
+	movl DECWIN,%eax
+	addl $64,%eax
+	movl %eax,%ecx
+	subl %edx,%ecx
+	movl $16,%ebp
+.L55:
+	flds (%ecx)
+	fmuls (%ebx)
+	flds 4(%ecx)
+	fmuls 4(%ebx)
+	fxch %st(1)
+	flds 8(%ecx)
+	fmuls 8(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 12(%ecx)
+	fmuls 12(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 16(%ecx)
+	fmuls 16(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 20(%ecx)
+	fmuls 20(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 24(%ecx)
+	fmuls 24(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 28(%ecx)
+	fmuls 28(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 32(%ecx)
+	fmuls 32(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 36(%ecx)
+	fmuls 36(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 40(%ecx)
+	fmuls 40(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 44(%ecx)
+	fmuls 44(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 48(%ecx)
+	fmuls 48(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 52(%ecx)
+	fmuls 52(%ebx)
+	fxch %st(2)         
+	faddp %st,%st(1)
+	flds 56(%ecx)
+	fmuls 56(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds 60(%ecx)
+	fmuls 60(%ebx)
+	fxch %st(2)
+	subl $4,%esp
+	faddp %st,%st(1)
+	fxch %st(1)
+	fsubrp %st,%st(1)
+
+	addl $4,DITHERINDEX2
+	andl $0x0003fffc,DITHERINDEX2
+	movl DITHERNOISE2,%edi
+	addl DITHERINDEX2,%edi	
+
+	fadds (%edi)
+
+/* fistpl and popl as a unit keep the stack unchanged */
+	fistpl (%esp)
+	popl %eax
+	cmpl $32767,%eax
+	jg 1f
+	cmpl $-32768,%eax
+	jl 2f
+	movw %ax,(%esi)
+	jmp 4f
+1:	movw $32767,(%esi)
+	jmp 3f
+2:	movw $-32768,(%esi)
+3:
+/*	incl %edi */
+4:
+.L54:
+	addl $64,%ebx
+	subl $-128,%ecx
+	addl $4,%esi
+	decl %ebp
+	jnz .L55
+	flds (%ecx)
+	fmuls (%ebx)
+	flds 8(%ecx)
+	fmuls 8(%ebx)
+	flds 16(%ecx)
+	fmuls 16(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 24(%ecx)
+	fmuls 24(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 32(%ecx)
+	fmuls 32(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 40(%ecx)
+	fmuls 40(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 48(%ecx)
+	fmuls 48(%ebx)
+	fxch %st(2)
+	faddp %st,%st(1)
+	flds 56(%ecx)
+	fmuls 56(%ebx)
+	fxch %st(2)
+	subl $4,%esp
+	faddp %st,%st(1)
+	fxch %st(1)
+	faddp %st,%st(1)
+
+	addl $4,DITHERINDEX2
+	andl $0x0003fffc,DITHERINDEX2
+	movl DITHERNOISE2,%edi
+	addl DITHERINDEX2,%edi	
+
+	fadds (%edi)
+/* fistpl and popl as a unit keep the stack unchanged */
+	fistpl (%esp)
+	popl %eax
+	cmpl $32767,%eax
+	jg 1f
+	cmpl $-32768,%eax
+	jl 2f
+	movw %ax,(%esi)
+	jmp 4f
+1:	movw $32767,(%esi)
+	jmp 3f
+2:	movw $-32768,(%esi)
+3:
+/*	incl %edi */
+4:
+.L62:
+	addl $-64,%ebx
+	addl $4,%esi
+	movl LOC0,%edx
+	leal -128(%ecx,%edx,8),%ecx
+	movl $15,%ebp
+.L68:
+	flds -4(%ecx)
+	fchs
+	fmuls (%ebx)
+	flds -8(%ecx)
+	fmuls 4(%ebx)
+	fxch %st(1)
+	flds -12(%ecx)
+	fmuls 8(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -16(%ecx)
+	fmuls 12(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -20(%ecx)
+	fmuls 16(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -24(%ecx)
+	fmuls 20(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -28(%ecx)
+	fmuls 24(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -32(%ecx)
+	fmuls 28(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -36(%ecx)
+	fmuls 32(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -40(%ecx)
+	fmuls 36(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -44(%ecx)
+	fmuls 40(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -48(%ecx)
+	fmuls 44(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -52(%ecx)
+	fmuls 48(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -56(%ecx)
+	fmuls 52(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds -60(%ecx)
+	fmuls 56(%ebx)
+	fxch %st(2)
+	fsubrp %st,%st(1)
+	flds (%ecx)
+	fmuls 60(%ebx)
+	fxch %st(2)
+	subl $4,%esp
+	fsubrp %st,%st(1)
+	fxch %st(1)
+	fsubrp %st,%st(1)
+
+	addl $4,DITHERINDEX2
+	andl $0x0003fffc,DITHERINDEX2
+	movl DITHERNOISE2,%edi
+	addl DITHERINDEX2,%edi	
+
+	fadds (%edi)
+/* fistpl and popl as a unit keep the stack unchanged */
+	fistpl (%esp)
+	popl %eax
+	cmpl $32767,%eax
+	jg 1f
+	cmpl $-32768,%eax
+	jl 2f
+	movw %ax,(%esi)
+	jmp 4f
+1:	movw $32767,(%esi)
+	jmp 3f
+2:	movw $-32768,(%esi)
+3:
+/*	incl %edi */
+4:
+.L67:
+	addl $-64,%ebx
+	addl $-128,%ecx
+	addl $4,%esi
+	decl %ebp
+	jnz .L68
+/* return ipv edi 0 in eax */
+	movl $0,%eax
+/* save ditherindex */
+	movl BO,%ebx
+	movl DITHERINDEX,%esi
+	movl %esi,4(%ebx);
+/* stack: 0=ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo */
+	popl %ebx
+	popl %esi
+	popl %edi
+	popl %ebp
+	addl $16,%esp
+/* The stack must be now: 0=back 4=bandptr 8=channel 12=out 16=buffs 20=bo */
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_mmx.S
===================================================================
--- include/reactos/libs/libmpg123/synth_mmx.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_mmx.S	(working copy)
@@ -0,0 +1,125 @@
+/*
+	decode_MMX.s: MMX optimized synth
+
+	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by the mysterious higway (apparently)
+
+ Thomas' words about a note:
+ Initially, I found the note "this code comes under GPL" in this file.
+ After asking Michael about legal status of the MMX files, he said that he got them without any comment and thus I believe that the GPL comment was made by Michael, since he made mpg123 GPL at some time - and marked some files that way, but not all.
+ Based on that thought, I now consider this file along with the other parts of higway's MMX optimization to be licensed under LGPL 2.1 by Michael's decision.
+*/
+
+#include "mangle.h"
+
+.text
+
+.globl ASM_NAME(synth_1to1_MMX)
+/* int synth_1to1_MMX(real *bandPtr, int channel, short *out, short *buffs, int *bo, float *decwins); */
+ASM_NAME(synth_1to1_MMX):
+        pushl %ebp
+        pushl %edi
+        pushl %esi
+        pushl %ebx
+/* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16=back, 20=bandPtr, 24=channel, 28=out, 32=buffs, 36=bo, 40=decwins */
+        movl 24(%esp),%ecx
+        movl 28(%esp),%edi
+        movl $15,%ebx
+        movl 36(%esp),%edx
+        leal (%edi,%ecx,2),%edi
+	decl %ecx
+        movl 32(%esp),%esi
+        movl (%edx),%eax
+        jecxz 1f
+        decl %eax
+        andl %ebx,%eax
+        leal 1088(%esi),%esi
+        movl %eax,(%edx)
+1:
+        leal (%esi,%eax,2),%edx
+        movl %eax,%ebp
+        incl %eax
+        pushl 20(%esp)
+        andl %ebx,%eax
+        leal 544(%esi,%eax,2),%ecx
+        incl %ebx
+	testl $1, %eax
+	jnz 2f
+        xchgl %edx,%ecx
+	incl %ebp
+        leal 544(%esi),%esi
+2:
+        pushl %edx
+        pushl %ecx
+        call ASM_NAME(dct64_MMX)
+        addl $12,%esp
+/* stack like before, pushed 3, incremented again */
+	leal 1(%ebx), %ecx
+        subl %ebp,%ebx
+	pushl %eax
+	movl 44(%esp),%eax /* decwins */
+	leal (%eax,%ebx,2), %edx
+	popl %eax
+3:
+        movq  (%edx),%mm0
+        pmaddwd (%esi),%mm0
+        movq  8(%edx),%mm1
+        pmaddwd 8(%esi),%mm1
+        movq  16(%edx),%mm2
+        pmaddwd 16(%esi),%mm2
+        movq  24(%edx),%mm3
+        pmaddwd 24(%esi),%mm3
+        paddd %mm1,%mm0
+        paddd %mm2,%mm0
+        paddd %mm3,%mm0
+        movq  %mm0,%mm1
+        psrlq $32,%mm1
+        paddd %mm1,%mm0
+        psrad $13,%mm0
+        packssdw %mm0,%mm0
+        movd %mm0,%eax
+	movw %ax, (%edi)
+
+        leal 32(%esi),%esi
+        leal 64(%edx),%edx
+        leal 4(%edi),%edi
+        loop 3b
+
+
+        subl $64,%esi
+        movl $15,%ecx
+4:
+        movq  (%edx),%mm0
+        pmaddwd (%esi),%mm0
+        movq  8(%edx),%mm1
+        pmaddwd 8(%esi),%mm1
+        movq  16(%edx),%mm2
+        pmaddwd 16(%esi),%mm2
+        movq  24(%edx),%mm3
+        pmaddwd 24(%esi),%mm3
+        paddd %mm1,%mm0
+        paddd %mm2,%mm0
+        paddd %mm3,%mm0
+        movq  %mm0,%mm1
+        psrlq $32,%mm1
+        paddd %mm0,%mm1
+        psrad $13,%mm1
+        packssdw %mm1,%mm1
+        psubd %mm0,%mm0
+        psubsw %mm1,%mm0
+        movd %mm0,%eax
+	movw %ax,(%edi)
+
+        subl $32,%esi
+        addl $64,%edx
+        leal 4(%edi),%edi
+        loop 4b
+	emms
+        popl %ebx
+        popl %esi
+        popl %edi
+        popl %ebp
+        ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_neon.S
===================================================================
--- include/reactos/libs/libmpg123/synth_neon.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_neon.S	(working copy)
@@ -0,0 +1,133 @@
+/*
+	synth_neon: ARM NEON optimized synth
+
+	copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define WINDOW r0
+#define B0 r1
+#define SAMPLES r2
+
+/*
+	int synth_1to1_neon_asm(short *window, short *b0, short *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+	.code 32
+#ifndef __APPLE__
+	.fpu neon
+#endif
+
+	.text
+	.globl ASM_NAME(synth_1to1_neon_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_neon_asm), %function
+#endif
+	ALIGN4
+ASM_NAME(synth_1to1_neon_asm):
+	push		{r4-r5, lr}
+	vpush		{q4-q7}
+
+	add			WINDOW, WINDOW, #32
+	sub			WINDOW, WINDOW, r3, lsl #1
+
+	mov			r3, #4
+	mov			r4, #64
+1:
+	vld1.16		{d0-d3}, [WINDOW], r4
+	vld1.16		{d4-d7}, [B0, :128]!
+	vld1.16		{d8-d11}, [WINDOW], r4
+	vswp		d1, d4
+	vld1.16		{d12-d15}, [B0, :128]!
+	vld1.16		{d16-d19}, [WINDOW], r4
+	vld1.16		{d20-d23}, [B0, :128]!
+	vswp		d9, d12
+	vld1.16		{d24-d27}, [WINDOW], r4
+	vld1.16		{d28-d31}, [B0, :128]!
+	vswp		d17, d20
+	vswp		d25, d28
+	vmull.s16	q0, d0, d1
+	vmull.s16	q4, d8, d9
+	vmull.s16	q8, d16, d17
+	vmull.s16	q12, d24, d25
+	vmlal.s16	q0, d4, d5
+	vmlal.s16	q4, d12, d13
+	vmlal.s16	q8, d20, d21
+	vmlal.s16	q12, d28, d29
+	vmlal.s16	q0, d2, d6
+	vmlal.s16	q4, d10, d14
+	vmlal.s16	q8, d18, d22
+	vmlal.s16	q12, d26, d30
+	vmlal.s16	q0, d3, d7
+	vmlal.s16	q4, d11, d15
+	vmlal.s16	q8, d19, d23
+	vmlal.s16	q12, d27, d31
+	vpadd.i32	d0, d0, d1
+	vpadd.i32	d8, d8, d9
+	vpadd.i32	d16, d16, d17
+	vpadd.i32	d24, d24, d25
+	vpadd.i32	d0, d0, d8
+	vpadd.i32	d1, d16, d24
+
+	vld2.16		{d2,d3}, [SAMPLES]
+	vqrshrn.s32	d1, q0, #13
+	vst2.16		{d1,d3}, [SAMPLES]!
+
+	subs		r3, r3, #1
+	bne			1b
+
+	mov			r3, #4
+	mov			r5, #-32
+1:
+	vld1.16		{d0-d3}, [WINDOW], r4
+	vld1.16		{d4-d7}, [B0, :128], r5
+	vld1.16		{d8-d11}, [WINDOW], r4
+	vswp		d1, d4
+	vld1.16		{d12-d15}, [B0, :128], r5
+	vld1.16		{d16-d19}, [WINDOW], r4
+	vld1.16		{d20-d23}, [B0, :128], r5
+	vswp		d9, d12
+	vld1.16		{d24-d27}, [WINDOW], r4
+	vld1.16		{d28-d31}, [B0, :128], r5
+	vswp		d17, d20
+	vswp		d25, d28
+	vmull.s16	q0, d0, d1
+	vmull.s16	q4, d8, d9
+	vmull.s16	q8, d16, d17
+	vmull.s16	q12, d24, d25
+	vmlal.s16	q0, d4, d5
+	vmlal.s16	q4, d12, d13
+	vmlal.s16	q8, d20, d21
+	vmlal.s16	q12, d28, d29
+	vmlal.s16	q0, d2, d6
+	vmlal.s16	q4, d10, d14
+	vmlal.s16	q8, d18, d22
+	vmlal.s16	q12, d26, d30
+	vmlal.s16	q0, d3, d7
+	vmlal.s16	q4, d11, d15
+	vmlal.s16	q8, d19, d23
+	vmlal.s16	q12, d27, d31
+	vpadd.i32	d0, d0, d1
+	vpadd.i32	d8, d8, d9
+	vpadd.i32	d16, d16, d17
+	vpadd.i32	d24, d24, d25
+	vpadd.i32	d0, d0, d8
+	vpadd.i32	d1, d16, d24
+
+	vld2.16		{d2,d3}, [SAMPLES]
+	vqrshrn.s32	d1, q0, #13
+	vst2.16		{d1,d3}, [SAMPLES]!
+
+	subs		r3, r3, #1
+	bne			1b
+
+	mov			r0, #0
+
+	vpop		{q4-q7}
+	pop			{r4-r5, pc}
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_neon64.S
===================================================================
--- include/reactos/libs/libmpg123/synth_neon64.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_neon64.S	(working copy)
@@ -0,0 +1,127 @@
+/*
+	synth_neon64: NEON optimized synth for AArch64
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+maxmin_s16:
+	.word   32767
+	.word   -32768
+	.text
+	ALIGN4
+	.globl ASM_NAME(synth_1to1_neon64_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_neon64_asm), %function
+#endif
+ASM_NAME(synth_1to1_neon64_asm):
+	add		x0, x0, #32
+	sub		x0, x0, x3, lsl #1
+	eor		v31.16b, v31.16b, v31.16b
+	adrp	x5, AARCH64_PCREL_HI(maxmin_s16)
+	add		x5, x5, AARCH64_PCREL_LO(maxmin_s16)
+	ld2r	{v28.4s,v29.4s}, [x5]
+	
+	mov		w4, #4
+	mov		x5, #64
+1:
+	ld1		{v0.8h,v1.8h}, [x0], x5
+	ld1		{v2.8h,v3.8h}, [x0], x5
+	ld1		{v4.8h,v5.8h}, [x0], x5
+	ld1		{v6.8h,v7.8h}, [x0], x5
+	ld1		{v16.8h,v17.8h,v18.8h,v19.8h}, [x1], #64
+	ld1		{v20.8h,v21.8h,v22.8h,v23.8h}, [x1], #64
+	
+	smull	v24.4s, v0.4h, v16.4h
+	smull	v25.4s, v2.4h, v18.4h
+	smull	v26.4s, v4.4h, v20.4h
+	smull	v27.4s, v6.4h, v22.4h
+	smlal2	v24.4s, v0.8h, v16.8h
+	smlal2	v25.4s, v2.8h, v18.8h
+	smlal2	v26.4s, v4.8h, v20.8h
+	smlal2	v27.4s, v6.8h, v22.8h
+	smlal	v24.4s, v1.4h, v17.4h
+	smlal	v25.4s, v3.4h, v19.4h
+	smlal	v26.4s, v5.4h, v21.4h
+	smlal	v27.4s, v7.4h, v23.4h
+	smlal2	v24.4s, v1.8h, v17.8h
+	smlal2	v25.4s, v3.8h, v19.8h
+	smlal2	v26.4s, v5.8h, v21.8h
+	smlal2	v27.4s, v7.8h, v23.8h
+	
+	addp	v0.4s, v24.4s, v25.4s
+	addp	v1.4s, v26.4s, v27.4s
+	addp	v0.4s, v0.4s, v1.4s
+	ld2		{v4.4h,v5.4h}, [x2]
+	sqrshrn	v4.4h, v0.4s, #13
+	cmgt	v2.4s, v0.4s, v28.4s
+	cmgt	v3.4s, v29.4s, v0.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v31.4s, v31.4s, v2.4s
+	st2		{v4.4h,v5.4h}, [x2], #16
+	
+	subs	w4, w4, #1
+	b.ne	1b
+	
+	mov		w4, #4
+	mov		x6, #-32
+2:
+	ld1		{v0.8h,v1.8h}, [x0], x5
+	ld1		{v2.8h,v3.8h}, [x0], x5
+	ld1		{v4.8h,v5.8h}, [x0], x5
+	ld1		{v6.8h,v7.8h}, [x0], x5
+	ld1		{v16.8h,v17.8h}, [x1], x6
+	ld1		{v18.8h,v19.8h}, [x1], x6
+	ld1		{v20.8h,v21.8h}, [x1], x6
+	ld1		{v22.8h,v23.8h}, [x1], x6
+	
+	smull	v24.4s, v0.4h, v16.4h
+	smull	v25.4s, v2.4h, v18.4h
+	smull	v26.4s, v4.4h, v20.4h
+	smull	v27.4s, v6.4h, v22.4h
+	smlal2	v24.4s, v0.8h, v16.8h
+	smlal2	v25.4s, v2.8h, v18.8h
+	smlal2	v26.4s, v4.8h, v20.8h
+	smlal2	v27.4s, v6.8h, v22.8h
+	smlal	v24.4s, v1.4h, v17.4h
+	smlal	v25.4s, v3.4h, v19.4h
+	smlal	v26.4s, v5.4h, v21.4h
+	smlal	v27.4s, v7.4h, v23.4h
+	smlal2	v24.4s, v1.8h, v17.8h
+	smlal2	v25.4s, v3.8h, v19.8h
+	smlal2	v26.4s, v5.8h, v21.8h
+	smlal2	v27.4s, v7.8h, v23.8h
+	
+	addp	v0.4s, v24.4s, v25.4s
+	addp	v1.4s, v26.4s, v27.4s
+	addp	v0.4s, v0.4s, v1.4s
+	ld2		{v4.4h,v5.4h}, [x2]
+	sqrshrn	v4.4h, v0.4s, #13
+	cmgt	v2.4s, v0.4s, v28.4s
+	cmgt	v3.4s, v29.4s, v0.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v31.4s, v31.4s, v2.4s
+	st2		{v4.4h,v5.4h}, [x2], #16
+	
+	subs	w4, w4, #1
+	b.ne	2b
+	
+	AARCH64_DUP_2D(v0, v31, 1)
+	add		v0.4s, v0.4s, v31.4s
+	AARCH64_DUP_4S(v1, v0, 1)
+	add		v0.4s, v0.4s, v1.4s
+	umov	w0, v0.s[0]
+	neg		w0, w0
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_neon64_accurate.S
===================================================================
--- include/reactos/libs/libmpg123/synth_neon64_accurate.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_neon64_accurate.S	(working copy)
@@ -0,0 +1,135 @@
+/*
+	synth_neon64_accurate: NEON optimized synth for AArch64 (MPEG compliant 16-bit output version)
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+maxmin_s16:
+	.word   1191181824
+	.word   -956301312
+	.text
+	ALIGN4
+	.globl ASM_NAME(synth_1to1_neon64_accurate_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_neon64_accurate_asm), %function
+#endif
+ASM_NAME(synth_1to1_neon64_accurate_asm):
+	add		x0, x0, #64
+	sub		x0, x0, x3, lsl #2
+	eor		v31.16b, v31.16b, v31.16b
+	adrp	x5, AARCH64_PCREL_HI(maxmin_s16)
+	add		x5, x5, AARCH64_PCREL_LO(maxmin_s16)
+	ld2r	{v28.4s,v29.4s}, [x5]
+	
+	mov		w4, #4
+	mov		x5, #128
+1:
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v4.4s, v20.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v5.4s, v21.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v6.4s, v22.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v7.4s, v23.4s
+	
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64
+	
+	fmul	v26.4s, v0.4s, v16.4s
+	fmul	v27.4s, v4.4s, v20.4s
+	fmla	v26.4s, v1.4s, v17.4s
+	fmla	v27.4s, v5.4s, v21.4s
+	fmla	v26.4s, v2.4s, v18.4s
+	fmla	v27.4s, v6.4s, v22.4s
+	fmla	v26.4s, v3.4s, v19.4s
+	fmla	v27.4s, v7.4s, v23.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	ld2		{v4.4h,v5.4h}, [x2]
+	fcvtns	v1.4s, v0.4s
+	fcmgt	v2.4s, v0.4s, v28.4s
+	fcmgt	v3.4s, v29.4s, v0.4s
+	sqxtn	v4.4h, v1.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v31.4s, v31.4s, v2.4s
+	st2		{v4.4h,v5.4h}, [x2], #16
+	
+	subs	w4, w4, #1
+	b.ne	1b
+	
+	mov		w4, #4
+	mov		x6, #-64
+2:
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x1], x6
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v4.4s, v20.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v5.4s, v21.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v6.4s, v22.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v7.4s, v23.4s
+	
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x1], x6
+	
+	fmul	v26.4s, v0.4s, v16.4s
+	fmul	v27.4s, v4.4s, v20.4s
+	fmla	v26.4s, v1.4s, v17.4s
+	fmla	v27.4s, v5.4s, v21.4s
+	fmla	v26.4s, v2.4s, v18.4s
+	fmla	v27.4s, v6.4s, v22.4s
+	fmla	v26.4s, v3.4s, v19.4s
+	fmla	v27.4s, v7.4s, v23.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	ld2		{v4.4h,v5.4h}, [x2]
+	fcvtns	v1.4s, v0.4s
+	fcmgt	v2.4s, v0.4s, v28.4s
+	fcmgt	v3.4s, v29.4s, v0.4s
+	sqxtn	v4.4h, v1.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v31.4s, v31.4s, v2.4s
+	st2		{v4.4h,v5.4h}, [x2], #16
+	
+	subs	w4, w4, #1
+	b.ne	2b
+	
+	AARCH64_DUP_2D(v0, v31, 1)
+	add		v0.4s, v0.4s, v31.4s
+	AARCH64_DUP_4S(v1, v0, 1)
+	add		v0.4s, v0.4s, v1.4s
+	umov	w0, v0.s[0]
+	neg		w0, w0
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_neon64_float.S
===================================================================
--- include/reactos/libs/libmpg123/synth_neon64_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_neon64_float.S	(working copy)
@@ -0,0 +1,118 @@
+/*
+	synth_neon64_float: NEON optimized synth for AArch64 (float output version)
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+scale:
+	.word   939524096
+	.text
+	ALIGN4
+	.globl ASM_NAME(synth_1to1_real_neon64_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_real_neon64_asm), %function
+#endif
+ASM_NAME(synth_1to1_real_neon64_asm):
+	add		x0, x0, #64
+	sub		x0, x0, x3, lsl #2
+	adrp	x5, AARCH64_PCREL_HI(scale)
+	add		x5, x5, AARCH64_PCREL_LO(scale)
+	ld1r	{v28.4s}, [x5]
+	
+	mov		w4, #4
+	mov		x5, #128
+1:
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v4.4s, v20.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v5.4s, v21.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v6.4s, v22.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v7.4s, v23.4s
+	
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64
+	
+	fmul	v26.4s, v0.4s, v16.4s
+	fmul	v27.4s, v4.4s, v20.4s
+	fmla	v26.4s, v1.4s, v17.4s
+	fmla	v27.4s, v5.4s, v21.4s
+	fmla	v26.4s, v2.4s, v18.4s
+	fmla	v27.4s, v6.4s, v22.4s
+	fmla	v26.4s, v3.4s, v19.4s
+	fmla	v27.4s, v7.4s, v23.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	ld2		{v4.4s,v5.4s}, [x2]
+	fmul	v4.4s, v0.4s, v28.4s
+	st2		{v4.4s,v5.4s}, [x2], #32
+	
+	subs	w4, w4, #1
+	b.ne	1b
+	
+	mov		w4, #4
+	mov		x6, #-64
+2:
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x1], x6
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v4.4s, v20.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v5.4s, v21.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v6.4s, v22.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v7.4s, v23.4s
+	
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x1], x6
+	
+	fmul	v26.4s, v0.4s, v16.4s
+	fmul	v27.4s, v4.4s, v20.4s
+	fmla	v26.4s, v1.4s, v17.4s
+	fmla	v27.4s, v5.4s, v21.4s
+	fmla	v26.4s, v2.4s, v18.4s
+	fmla	v27.4s, v6.4s, v22.4s
+	fmla	v26.4s, v3.4s, v19.4s
+	fmla	v27.4s, v7.4s, v23.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	ld2		{v4.4s,v5.4s}, [x2]
+	fmul	v4.4s, v0.4s, v28.4s
+	st2		{v4.4s,v5.4s}, [x2], #32
+	
+	subs	w4, w4, #1
+	b.ne	2b
+	
+	eor		w0, w0, w0
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_neon64_s32.S
===================================================================
--- include/reactos/libs/libmpg123/synth_neon64_s32.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_neon64_s32.S	(working copy)
@@ -0,0 +1,136 @@
+/*
+	synth_neon64_s32: NEON optimized synth for AArch64 (32-bit output version)
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+maxmin_s32:
+	.word   1191182335
+	.word   -956301312
+	.word   1199570944
+	.text
+	ALIGN4
+	.globl ASM_NAME(synth_1to1_s32_neon64_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_s32_neon64_asm), %function
+#endif
+ASM_NAME(synth_1to1_s32_neon64_asm):
+	add		x0, x0, #64
+	sub		x0, x0, x3, lsl #2
+	eor		v31.16b, v31.16b, v31.16b
+	adrp	x5, AARCH64_PCREL_HI(maxmin_s32)
+	add		x5, x5, AARCH64_PCREL_LO(maxmin_s32)
+	ld3r	{v28.4s,v29.4s,v30.4s}, [x5]
+	
+	mov		w4, #4
+	mov		x5, #128
+1:
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v4.4s, v20.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v5.4s, v21.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v6.4s, v22.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v7.4s, v23.4s
+	
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64
+	
+	fmul	v26.4s, v0.4s, v16.4s
+	fmul	v27.4s, v4.4s, v20.4s
+	fmla	v26.4s, v1.4s, v17.4s
+	fmla	v27.4s, v5.4s, v21.4s
+	fmla	v26.4s, v2.4s, v18.4s
+	fmla	v27.4s, v6.4s, v22.4s
+	fmla	v26.4s, v3.4s, v19.4s
+	fmla	v27.4s, v7.4s, v23.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fmul	v1.4s, v0.4s, v30.4s
+	ld2		{v4.4s,v5.4s}, [x2]
+	fcvtns	v4.4s, v1.4s
+	fcmgt	v2.4s, v0.4s, v28.4s
+	fcmgt	v3.4s, v29.4s, v0.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v31.4s, v31.4s, v2.4s
+	st2		{v4.4s,v5.4s}, [x2], #32
+	
+	subs	w4, w4, #1
+	b.ne	1b
+	
+	mov		w4, #4
+	mov		x6, #-64
+2:
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x1], x6
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v4.4s, v20.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v5.4s, v21.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v6.4s, v22.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v7.4s, v23.4s
+	
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x1], x6
+	
+	fmul	v26.4s, v0.4s, v16.4s
+	fmul	v27.4s, v4.4s, v20.4s
+	fmla	v26.4s, v1.4s, v17.4s
+	fmla	v27.4s, v5.4s, v21.4s
+	fmla	v26.4s, v2.4s, v18.4s
+	fmla	v27.4s, v6.4s, v22.4s
+	fmla	v26.4s, v3.4s, v19.4s
+	fmla	v27.4s, v7.4s, v23.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fmul	v1.4s, v0.4s, v30.4s
+	ld2		{v4.4s,v5.4s}, [x2]
+	fcvtns	v4.4s, v1.4s
+	fcmgt	v2.4s, v0.4s, v28.4s
+	fcmgt	v3.4s, v29.4s, v0.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v31.4s, v31.4s, v2.4s
+	st2		{v4.4s,v5.4s}, [x2], #32
+	
+	subs	w4, w4, #1
+	b.ne	2b
+	
+	AARCH64_DUP_2D(v0, v31, 1)
+	add		v0.4s, v0.4s, v31.4s
+	AARCH64_DUP_4S(v1, v0, 1)
+	add		v0.4s, v0.4s, v1.4s
+	umov	w0, v0.s[0]
+	neg		w0, w0
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_neon_accurate.S
===================================================================
--- include/reactos/libs/libmpg123/synth_neon_accurate.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_neon_accurate.S	(working copy)
@@ -0,0 +1,183 @@
+/*
+	synth_neon_accurate: ARM NEON optimized synth (MPEG compliant 16-bit output version)
+
+	copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define WINDOW r0
+#define B0 r1
+#define SAMPLES r2
+
+/*
+	int synth_1to1_real_neon_accurate_asm(real *window, real *b0, real *samples, int bo1);
+	return value: number of clipped samples (0)
+*/
+
+	.code 32
+#ifndef __APPLE__
+	.fpu neon
+#endif
+
+	.text
+	.globl ASM_NAME(synth_1to1_neon_accurate_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_neon_accurate_asm), %function
+#endif
+ASM_NAME(synth_1to1_neon_accurate_asm):
+	push		{r4-r6, lr}
+	vpush		{q4-q7}
+	mov			r6, sp
+	sub			sp, sp, #16
+	bic			sp, #0xff
+	
+	add			WINDOW, WINDOW, #64
+	sub			WINDOW, WINDOW, r3, lsl #2
+
+	mov			r3, #4
+	mov			r4, #128
+	mov			r5, #64
+1:
+	vld1.32		{q0,q1}, [WINDOW], r4
+	vld1.32		{q2,q3}, [WINDOW], r4
+	vld1.32		{q4,q5}, [WINDOW], r4
+	vld1.32		{q6,q7}, [WINDOW]
+	sub			WINDOW, WINDOW, #352
+	vld1.32		{q8,q9}, [B0, :128], r5
+	vld1.32		{q10,q11}, [B0, :128], r5
+	vld1.32		{q12,q13}, [B0, :128], r5
+	vld1.32		{q14,q15}, [B0, :128]
+	vswp		q1, q4
+	vswp		q3, q6
+	sub			B0, B0, #160
+	vmul.f32	q0, q0, q8
+	vmul.f32	q2, q2, q10
+	vmul.f32	q1, q1, q12
+	vmul.f32	q3, q3, q14
+	vmla.f32	q0, q4, q9
+	vmla.f32	q2, q6, q11
+	vmla.f32	q1, q5, q13
+	vmla.f32	q3, q7, q15
+	vld1.32		{q4,q5}, [WINDOW], r4
+	vld1.32		{q6,q7}, [WINDOW], r4
+	vld1.32		{q8,q9}, [WINDOW], r4
+	vld1.32		{q10,q11}, [B0, :128], r5
+	vld1.32		{q12,q13}, [B0, :128], r5
+	vld1.32		{q14,q15}, [B0, :128], r5
+	vswp		q5, q6
+	vswp		q11, q12
+	vmla.f32	q0, q4, q10
+	vmla.f32	q2, q5, q11
+	vmla.f32	q1, q8, q14
+	vld1.32		{q4,q5}, [WINDOW]
+	vld1.32		{q10,q11}, [B0, :128]!
+	add			WINDOW, WINDOW, #96
+	vmla.f32	q3, q4, q10
+	vmla.f32	q0, q6, q12
+	vmla.f32	q2, q7, q13
+	vmla.f32	q1, q9, q15
+	vmla.f32	q3, q5, q11
+	vmov.i32	q4, #0x4b000000
+	vmvn.i32	q5, #0xb9000000
+	vorr.i32	q4, #0x00400000
+	vpadd.f32	d0, d0, d1
+	vpadd.f32	d4, d4, d5
+	vpadd.f32	d2, d2, d3
+	vpadd.f32	d6, d6, d7
+	vld1.32		{q6}, [sp, :128]
+	vpadd.f32	d0, d0, d4
+	vpadd.f32	d1, d2, d6
+
+	vadd.f32	q3, q0, q4
+	vacgt.f32	q5, q0, q5
+	vld2.16		{d4,d5}, [SAMPLES]
+	vshl.i32	q3, q3, #10
+	vqshrn.s32	d3, q3, #10
+	vshr.u32	q5, q5, #31
+	vst2.16		{d3,d5}, [SAMPLES]!
+	vadd.i32	q5, q5, q6
+	vst1.32		{q5}, [sp, :128]
+
+	subs		r3, r3, #1
+	bne			1b
+
+	mov			r3, #4
+	mov			r5, #-64
+1:
+	vld1.32		{q0,q1}, [WINDOW], r4
+	vld1.32		{q2,q3}, [WINDOW], r4
+	vld1.32		{q4,q5}, [WINDOW], r4
+	vld1.32		{q6,q7}, [WINDOW]
+	sub			WINDOW, WINDOW, #352
+	vld1.32		{q8,q9}, [B0, :128], r5
+	vld1.32		{q10,q11}, [B0, :128], r5
+	vld1.32		{q12,q13}, [B0, :128], r5
+	vld1.32		{q14,q15}, [B0, :128]
+	vswp		q1, q4
+	vswp		q3, q6
+	add			B0, B0, #224
+	vmul.f32	q0, q0, q8
+	vmul.f32	q2, q2, q10
+	vmul.f32	q1, q1, q12
+	vmul.f32	q3, q3, q14
+	vmla.f32	q0, q4, q9
+	vmla.f32	q2, q6, q11
+	vmla.f32	q1, q5, q13
+	vmla.f32	q3, q7, q15
+	vld1.32		{q4,q5}, [WINDOW], r4
+	vld1.32		{q6,q7}, [WINDOW], r4
+	vld1.32		{q8,q9}, [WINDOW], r4
+	vld1.32		{q10,q11}, [B0, :128], r5
+	vld1.32		{q12,q13}, [B0, :128], r5
+	vld1.32		{q14,q15}, [B0, :128], r5
+	vswp		q5, q6
+	vswp		q11, q12
+	vmla.f32	q0, q4, q10
+	vmla.f32	q2, q5, q11
+	vmla.f32	q1, q8, q14
+	vld1.32		{q4,q5}, [WINDOW]
+	vld1.32		{q10,q11}, [B0, :128]
+	add			WINDOW, WINDOW, #96
+	sub			B0, B0, #96
+	vmla.f32	q3, q4, q10
+	vmla.f32	q0, q6, q12
+	vmla.f32	q2, q7, q13
+	vmla.f32	q1, q9, q15
+	vmla.f32	q3, q5, q11
+	vmov.i32	q4, #0x4b000000
+	vmvn.i32	q5, #0xb9000000
+	vorr.i32	q4, #0x00400000
+	vpadd.f32	d0, d0, d1
+	vpadd.f32	d4, d4, d5
+	vpadd.f32	d2, d2, d3
+	vpadd.f32	d6, d6, d7
+	vld1.32		{q6}, [sp, :128]
+	vpadd.f32	d0, d0, d4
+	vpadd.f32	d1, d2, d6
+
+	vadd.f32	q3, q0, q4
+	vacgt.f32	q5, q0, q5
+	vld2.16		{d4,d5}, [SAMPLES]
+	vshl.i32	q3, q3, #10
+	vqshrn.s32	d3, q3, #10
+	vshr.u32	q5, q5, #31
+	vst2.16		{d3,d5}, [SAMPLES]!
+	vadd.i32	q5, q5, q6
+	vst1.32		{q5}, [sp, :128]
+
+	subs		r3, r3, #1
+	bne			1b
+
+	vld1.32		{q0}, [sp, :128]
+	vpadd.i32	d0, d0, d1
+	vpadd.i32	d0, d0, d0
+	vmov.32		r0, d0[0]
+
+	mov			sp, r6
+	vpop		{q4-q7}
+	pop			{r4-r6, pc}
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_neon_float.S
===================================================================
--- include/reactos/libs/libmpg123/synth_neon_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_neon_float.S	(working copy)
@@ -0,0 +1,159 @@
+/*
+	synth_neon_float: ARM NEON optimized synth (float output version)
+
+	copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define WINDOW r0
+#define B0 r1
+#define SAMPLES r2
+
+/*
+	int synth_1to1_real_neon_asm(real *window, real *b0, real *samples, int bo1);
+	return value: number of clipped samples (0)
+*/
+
+	.code 32
+#ifndef __APPLE__
+	.fpu neon
+#endif
+
+	.text
+	.globl ASM_NAME(synth_1to1_real_neon_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_real_neon_asm), %function
+#endif
+	ALIGN4
+ASM_NAME(synth_1to1_real_neon_asm):
+	push		{r4-r5, lr}
+	vpush		{q4-q7}
+
+	add			WINDOW, WINDOW, #64
+	sub			WINDOW, WINDOW, r3, lsl #2
+
+	mov			r3, #4
+	mov			r4, #128
+	mov			r5, #64
+1:
+	vld1.32		{q0,q1}, [WINDOW], r4
+	vld1.32		{q2,q3}, [WINDOW], r4
+	vld1.32		{q4,q5}, [WINDOW], r4
+	vld1.32		{q6,q7}, [WINDOW]
+	sub			WINDOW, WINDOW, #352
+	vld1.32		{q8,q9}, [B0, :128], r5
+	vld1.32		{q10,q11}, [B0, :128], r5
+	vld1.32		{q12,q13}, [B0, :128], r5
+	vld1.32		{q14,q15}, [B0, :128]
+	vswp		q1, q4
+	vswp		q3, q6
+	sub			B0, B0, #160
+	vmul.f32	q0, q0, q8
+	vmul.f32	q2, q2, q10
+	vmul.f32	q1, q1, q12
+	vmul.f32	q3, q3, q14
+	vmla.f32	q0, q4, q9
+	vmla.f32	q2, q6, q11
+	vmla.f32	q1, q5, q13
+	vmla.f32	q3, q7, q15
+	vld1.32		{q4,q5}, [WINDOW], r4
+	vld1.32		{q6,q7}, [WINDOW], r4
+	vld1.32		{q8,q9}, [WINDOW], r4
+	vld1.32		{q10,q11}, [B0, :128], r5
+	vld1.32		{q12,q13}, [B0, :128], r5
+	vld1.32		{q14,q15}, [B0, :128], r5
+	vswp		q5, q6
+	vswp		q11, q12
+	vmla.f32	q0, q4, q10
+	vmla.f32	q2, q5, q11
+	vmla.f32	q1, q8, q14
+	vld1.32		{q4,q5}, [WINDOW]
+	vld1.32		{q10,q11}, [B0, :128]!
+	add			WINDOW, WINDOW, #96
+	vmla.f32	q3, q4, q10
+	vmla.f32	q0, q6, q12
+	vmla.f32	q2, q7, q13
+	vmla.f32	q1, q9, q15
+	vmla.f32	q3, q5, q11
+	vld2.32		{q4,q5}, [SAMPLES]
+	vpadd.f32	d0, d0, d1
+	vpadd.f32	d4, d4, d5
+	vpadd.f32	d2, d2, d3
+	vpadd.f32	d6, d6, d7
+	vpadd.f32	d0, d0, d4
+	vpadd.f32	d1, d2, d6
+
+	vmov.i32	q1, #0x38000000
+	vmul.f32	q4, q0, q1
+	vst2.32		{q4,q5}, [SAMPLES]!
+
+	subs		r3, r3, #1
+	bne			1b
+
+	mov			r3, #4
+	mov			r5, #-64
+1:
+	vld1.32		{q0,q1}, [WINDOW], r4
+	vld1.32		{q2,q3}, [WINDOW], r4
+	vld1.32		{q4,q5}, [WINDOW], r4
+	vld1.32		{q6,q7}, [WINDOW]
+	sub			WINDOW, WINDOW, #352
+	vld1.32		{q8,q9}, [B0, :128], r5
+	vld1.32		{q10,q11}, [B0, :128], r5
+	vld1.32		{q12,q13}, [B0, :128], r5
+	vld1.32		{q14,q15}, [B0, :128]
+	vswp		q1, q4
+	vswp		q3, q6
+	add			B0, B0, #224
+	vmul.f32	q0, q0, q8
+	vmul.f32	q2, q2, q10
+	vmul.f32	q1, q1, q12
+	vmul.f32	q3, q3, q14
+	vmla.f32	q0, q4, q9
+	vmla.f32	q2, q6, q11
+	vmla.f32	q1, q5, q13
+	vmla.f32	q3, q7, q15
+	vld1.32		{q4,q5}, [WINDOW], r4
+	vld1.32		{q6,q7}, [WINDOW], r4
+	vld1.32		{q8,q9}, [WINDOW], r4
+	vld1.32		{q10,q11}, [B0, :128], r5
+	vld1.32		{q12,q13}, [B0, :128], r5
+	vld1.32		{q14,q15}, [B0, :128], r5
+	vswp		q5, q6
+	vswp		q11, q12
+	vmla.f32	q0, q4, q10
+	vmla.f32	q2, q5, q11
+	vmla.f32	q1, q8, q14
+	vld1.32		{q4,q5}, [WINDOW]
+	vld1.32		{q10,q11}, [B0, :128]
+	add			WINDOW, WINDOW, #96
+	sub			B0, B0, #96
+	vmla.f32	q3, q4, q10
+	vmla.f32	q0, q6, q12
+	vmla.f32	q2, q7, q13
+	vmla.f32	q1, q9, q15
+	vmla.f32	q3, q5, q11
+	vld2.32		{q4,q5}, [SAMPLES]
+	vpadd.f32	d0, d0, d1
+	vpadd.f32	d4, d4, d5
+	vpadd.f32	d2, d2, d3
+	vpadd.f32	d6, d6, d7
+	vpadd.f32	d0, d0, d4
+	vpadd.f32	d1, d2, d6
+
+	vmov.i32	q1, #0x38000000
+	vmul.f32	q4, q0, q1
+	vst2.32		{q4,q5}, [SAMPLES]!
+
+	subs		r3, r3, #1
+	bne			1b
+
+	mov			r0, #0
+
+	vpop		{q4-q7}
+	pop			{r4-r5, pc}
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_neon_s32.S
===================================================================
--- include/reactos/libs/libmpg123/synth_neon_s32.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_neon_s32.S	(working copy)
@@ -0,0 +1,178 @@
+/*
+	synth_neon_s32: ARM NEON optimized synth (32-bit output version)
+
+	copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define WINDOW r0
+#define B0 r1
+#define SAMPLES r2
+
+/*
+	int synth_1to1_s32_neon_asm(real *window, real *b0, int *samples, int bo1);
+	return value: number of clipped samples (0)
+*/
+
+	.code 32
+#ifndef __APPLE__
+	.fpu neon
+#endif
+
+	.text
+	.globl ASM_NAME(synth_1to1_s32_neon_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_s32_neon_asm), %function
+#endif
+	ALIGN4
+ASM_NAME(synth_1to1_s32_neon_asm):
+	push		{r4-r6, lr}
+	vpush		{q4-q7}
+	mov			r6, sp
+	sub			sp, sp, #16
+	bic			sp, #0xff
+	
+	add			WINDOW, WINDOW, #64
+	sub			WINDOW, WINDOW, r3, lsl #2
+
+	mov			r3, #4
+	mov			r4, #128
+	mov			r5, #64
+1:
+	vld1.32		{q0,q1}, [WINDOW], r4
+	vld1.32		{q2,q3}, [WINDOW], r4
+	vld1.32		{q4,q5}, [WINDOW], r4
+	vld1.32		{q6,q7}, [WINDOW]
+	sub			WINDOW, WINDOW, #352
+	vld1.32		{q8,q9}, [B0, :128], r5
+	vld1.32		{q10,q11}, [B0, :128], r5
+	vld1.32		{q12,q13}, [B0, :128], r5
+	vld1.32		{q14,q15}, [B0, :128]
+	vswp		q1, q4
+	vswp		q3, q6
+	sub			B0, B0, #160
+	vmul.f32	q0, q0, q8
+	vmul.f32	q2, q2, q10
+	vmul.f32	q1, q1, q12
+	vmul.f32	q3, q3, q14
+	vmla.f32	q0, q4, q9
+	vmla.f32	q2, q6, q11
+	vmla.f32	q1, q5, q13
+	vmla.f32	q3, q7, q15
+	vld1.32		{q4,q5}, [WINDOW], r4
+	vld1.32		{q6,q7}, [WINDOW], r4
+	vld1.32		{q8,q9}, [WINDOW], r4
+	vld1.32		{q10,q11}, [B0, :128], r5
+	vld1.32		{q12,q13}, [B0, :128], r5
+	vld1.32		{q14,q15}, [B0, :128], r5
+	vswp		q5, q6
+	vswp		q11, q12
+	vmla.f32	q0, q4, q10
+	vmla.f32	q2, q5, q11
+	vmla.f32	q1, q8, q14
+	vld1.32		{q4,q5}, [WINDOW]
+	vld1.32		{q10,q11}, [B0, :128]!
+	add			WINDOW, WINDOW, #96
+	vmla.f32	q3, q4, q10
+	vmla.f32	q0, q6, q12
+	vmla.f32	q2, q7, q13
+	vmla.f32	q1, q9, q15
+	vmla.f32	q3, q5, q11
+	vmvn.i32	q5, #0xb9000000
+	vpadd.f32	d0, d0, d1
+	vpadd.f32	d4, d4, d5
+	vpadd.f32	d2, d2, d3
+	vpadd.f32	d6, d6, d7
+	vld1.32		{q6}, [sp, :128]
+	vpadd.f32	d0, d0, d4
+	vpadd.f32	d1, d2, d6
+
+	vcvt.s32.f32	q3, q0, #16
+	vacgt.f32	q5, q0, q5
+	vld2.32		{q1,q2}, [SAMPLES]
+	vshr.u32	q5, q5, #31
+	vmov		q1, q3
+	vst2.32		{q1,q2}, [SAMPLES]!
+	vadd.i32	q5, q5, q6
+	vst1.32		{q5}, [sp, :128]
+
+	subs		r3, r3, #1
+	bne			1b
+
+	mov			r3, #4
+	mov			r5, #-64
+1:
+	vld1.32		{q0,q1}, [WINDOW], r4
+	vld1.32		{q2,q3}, [WINDOW], r4
+	vld1.32		{q4,q5}, [WINDOW], r4
+	vld1.32		{q6,q7}, [WINDOW]
+	sub			WINDOW, WINDOW, #352
+	vld1.32		{q8,q9}, [B0, :128], r5
+	vld1.32		{q10,q11}, [B0, :128], r5
+	vld1.32		{q12,q13}, [B0, :128], r5
+	vld1.32		{q14,q15}, [B0, :128]
+	vswp		q1, q4
+	vswp		q3, q6
+	add			B0, B0, #224
+	vmul.f32	q0, q0, q8
+	vmul.f32	q2, q2, q10
+	vmul.f32	q1, q1, q12
+	vmul.f32	q3, q3, q14
+	vmla.f32	q0, q4, q9
+	vmla.f32	q2, q6, q11
+	vmla.f32	q1, q5, q13
+	vmla.f32	q3, q7, q15
+	vld1.32		{q4,q5}, [WINDOW], r4
+	vld1.32		{q6,q7}, [WINDOW], r4
+	vld1.32		{q8,q9}, [WINDOW], r4
+	vld1.32		{q10,q11}, [B0, :128], r5
+	vld1.32		{q12,q13}, [B0, :128], r5
+	vld1.32		{q14,q15}, [B0, :128], r5
+	vswp		q5, q6
+	vswp		q11, q12
+	vmla.f32	q0, q4, q10
+	vmla.f32	q2, q5, q11
+	vmla.f32	q1, q8, q14
+	vld1.32		{q4,q5}, [WINDOW]
+	vld1.32		{q10,q11}, [B0, :128]
+	add			WINDOW, WINDOW, #96
+	sub			B0, B0, #96
+	vmla.f32	q3, q4, q10
+	vmla.f32	q0, q6, q12
+	vmla.f32	q2, q7, q13
+	vmla.f32	q1, q9, q15
+	vmla.f32	q3, q5, q11
+	vmvn.i32	q5, #0xb9000000
+	vpadd.f32	d0, d0, d1
+	vpadd.f32	d4, d4, d5
+	vpadd.f32	d2, d2, d3
+	vpadd.f32	d6, d6, d7
+	vld1.32		{q6}, [sp, :128]
+	vpadd.f32	d0, d0, d4
+	vpadd.f32	d1, d2, d6
+
+	vcvt.s32.f32	q3, q0, #16
+	vacgt.f32	q5, q0, q5
+	vld2.32		{q1,q2}, [SAMPLES]
+	vshr.u32	q5, q5, #31
+	vmov		q1, q3
+	vst2.32		{q1,q2}, [SAMPLES]!
+	vadd.i32	q5, q5, q6
+	vst1.32		{q5}, [sp, :128]
+
+	subs		r3, r3, #1
+	bne			1b
+
+	vld1.32		{q0}, [sp, :128]
+	vpadd.i32	d0, d0, d1
+	vpadd.i32	d0, d0, d0
+	vmov.32		r0, d0[0]
+
+	mov			sp, r6
+	vpop		{q4-q7}
+	pop			{r4-r6, pc}
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_real.c
===================================================================
--- include/reactos/libs/libmpg123/synth_real.c	(revision 0)
+++ include/reactos/libs/libmpg123/synth_real.c	(working copy)
@@ -0,0 +1,585 @@
+/*
+	synth_real.c: The functions for synthesizing real (float) samples, at the end of decoding.
+
+	copyright 1995-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp, heavily dissected and rearranged by Thomas Orgis
+*/
+
+#include "mpg123lib_intern.h"
+#include "sample.h"
+#include "debug.h"
+
+#ifdef REAL_IS_FIXED
+#error "Do not build this file with fixed point math!"
+#else
+/* 
+	Part 3: All synth functions that produce float output.
+	What we need is just a special WRITE_SAMPLE. For the generic and i386 functions, that is.
+	The optimized synths would need to be changed internally to support float output.
+*/
+
+#define SAMPLE_T real
+#define WRITE_SAMPLE(samples,sum,clip) WRITE_REAL_SAMPLE(samples,sum,clip)
+
+/* Part 3a: All straight 1to1 decoding functions */
+#define BLOCK 0x40 /* One decoding block is 64 samples. */
+
+#define SYNTH_NAME synth_1to1_real
+#include "synth.h"
+#undef SYNTH_NAME
+
+/* Mono-related synths; they wrap over _some_ synth_1to1_real (could be generic, could be i386). */
+#define SYNTH_NAME       fr->synths.plain[r_1to1][f_real]
+#define MONO_NAME        synth_1to1_real_mono
+#define MONO2STEREO_NAME synth_1to1_real_m2s
+#include "synth_mono.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#ifdef OPT_X86
+#define NO_AUTOINCREMENT
+#define SYNTH_NAME synth_1to1_real_i386
+#include "synth.h"
+#undef SYNTH_NAME
+/* i386 uses the normal mono functions. */
+#undef NO_AUTOINCREMENT
+#endif
+
+#undef BLOCK
+
+/* At least one optimized real decoder... */
+#ifdef OPT_X86_64
+/* Assembler routines. */
+int synth_1to1_real_x86_64_asm(real *window, real *b0, real *samples, int bo1);
+int synth_1to1_real_s_x86_64_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+void dct64_real_x86_64(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_real_x86_64(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_x86_64(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_x86_64(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	synth_1to1_real_x86_64_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 256;
+
+	return 0;
+}
+
+int synth_1to1_real_stereo_x86_64(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_x86_64(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_x86_64(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_x86_64(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_x86_64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	synth_1to1_real_s_x86_64_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 256;
+
+	return 0;
+}
+#endif
+
+#ifdef OPT_AVX
+/* Assembler routines. */
+#ifndef OPT_X86_64
+int synth_1to1_real_x86_64_asm(real *window, real *b0, real *samples, int bo1);
+#endif
+int synth_1to1_real_s_avx_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+void dct64_real_avx(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_real_avx(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_avx(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_avx(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	synth_1to1_real_x86_64_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 256;
+
+	return 0;
+}
+
+int synth_1to1_real_stereo_avx(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_avx(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_avx(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_avx(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_avx(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	synth_1to1_real_s_avx_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 256;
+
+	return 0;
+}
+#endif
+
+#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
+/* Assembler routines. */
+int synth_1to1_real_sse_asm(real *window, real *b0, real *samples, int bo1);
+int synth_1to1_real_s_sse_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+void dct64_real_sse(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_real_sse(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_sse(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_sse(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	synth_1to1_real_sse_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 256;
+
+	return 0;
+}
+
+int synth_1to1_real_stereo_sse(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_sse(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_sse(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_sse(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_sse(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	synth_1to1_real_s_sse_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 256;
+
+	return 0;
+}
+#endif
+
+#ifdef OPT_NEON
+/* Assembler routines. */
+int synth_1to1_real_neon_asm(real *window, real *b0, real *samples, int bo1);
+int synth_1to1_real_s_neon_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+void dct64_real_neon(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_real_neon(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_neon(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_neon(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	synth_1to1_real_neon_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 256;
+
+	return 0;
+}
+int synth_1to1_real_stereo_neon(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_neon(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_neon(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_neon(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_neon(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	synth_1to1_real_s_neon_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 256;
+
+	return 0;
+}
+#endif
+
+#ifdef OPT_NEON64
+/* Assembler routines. */
+int synth_1to1_real_neon64_asm(real *window, real *b0, real *samples, int bo1);
+int synth_1to1_real_s_neon64_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+void dct64_real_neon64(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_real_neon64(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_neon64(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_neon64(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	synth_1to1_real_neon64_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 256;
+
+	return 0;
+}
+int synth_1to1_real_stereo_neon64(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	real *samples = (real *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_neon64(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_neon64(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_neon64(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_neon64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	synth_1to1_real_s_neon64_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 256;
+
+	return 0;
+}
+#endif
+
+#ifndef NO_DOWNSAMPLE
+
+/*
+	Part 3b: 2to1 synth. Only generic and i386.
+*/
+#define BLOCK 0x20 /* One decoding block is 32 samples. */
+
+#define SYNTH_NAME synth_2to1_real
+#include "synth.h"
+#undef SYNTH_NAME
+
+/* Mono-related synths; they wrap over _some_ synth_2to1_real (could be generic, could be i386). */
+#define SYNTH_NAME       fr->synths.plain[r_2to1][f_real]
+#define MONO_NAME        synth_2to1_real_mono
+#define MONO2STEREO_NAME synth_2to1_real_m2s
+#include "synth_mono.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#ifdef OPT_X86
+#define NO_AUTOINCREMENT
+#define SYNTH_NAME synth_2to1_real_i386
+#include "synth.h"
+#undef SYNTH_NAME
+/* i386 uses the normal mono functions. */
+#undef NO_AUTOINCREMENT
+#endif
+
+#undef BLOCK
+
+/*
+	Part 3c: 4to1 synth. Only generic and i386.
+*/
+#define BLOCK 0x10 /* One decoding block is 16 samples. */
+
+#define SYNTH_NAME synth_4to1_real
+#include "synth.h"
+#undef SYNTH_NAME
+
+/* Mono-related synths; they wrap over _some_ synth_4to1_real (could be generic, could be i386). */
+#define SYNTH_NAME       fr->synths.plain[r_4to1][f_real]
+#define MONO_NAME        synth_4to1_real_mono
+#define MONO2STEREO_NAME synth_4to1_real_m2s
+#include "synth_mono.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#ifdef OPT_X86
+#define NO_AUTOINCREMENT
+#define SYNTH_NAME synth_4to1_real_i386
+#include "synth.h"
+#undef SYNTH_NAME
+/* i386 uses the normal mono functions. */
+#undef NO_AUTOINCREMENT
+#endif
+
+#undef BLOCK
+
+#endif /* NO_DOWNSAMPLE */
+
+#ifndef NO_NTOM
+/*
+	Part 3d: ntom synth.
+	Same procedure as above... Just no extra play anymore, straight synth that may use an optimized dct64.
+*/
+
+/* These are all in one header, there's no flexibility to gain. */
+#define SYNTH_NAME       synth_ntom_real
+#define MONO_NAME        synth_ntom_real_mono
+#define MONO2STEREO_NAME synth_ntom_real_m2s
+#include "synth_ntom.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#endif
+
+#undef SAMPLE_T
+#undef WRITE_SAMPLE
+
+#endif /* non-fixed type */
Index: include/reactos/libs/libmpg123/synth_s32.c
===================================================================
--- include/reactos/libs/libmpg123/synth_s32.c	(revision 0)
+++ include/reactos/libs/libmpg123/synth_s32.c	(working copy)
@@ -0,0 +1,598 @@
+/*
+	synth_s32.c: The functions for synthesizing real (float) samples, at the end of decoding.
+
+	copyright 1995-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp, heavily dissected and rearranged by Thomas Orgis
+*/
+
+#include "mpg123lib_intern.h"
+#include "sample.h"
+#include "debug.h"
+
+#ifdef REAL_IS_FIXED
+#error "Do not build this file with fixed point math!"
+#else
+/* 
+	Part 4: All synth functions that produce signed 32 bit output.
+	What we need is just a special WRITE_SAMPLE.
+*/
+
+#define SAMPLE_T int32_t
+#define WRITE_SAMPLE(samples,sum,clip) WRITE_S32_SAMPLE(samples,sum,clip)
+
+/* Part 4a: All straight 1to1 decoding functions */
+#define BLOCK 0x40 /* One decoding block is 64 samples. */
+
+#define SYNTH_NAME synth_1to1_s32
+#include "synth.h"
+#undef SYNTH_NAME
+
+/* Mono-related synths; they wrap over _some_ synth_1to1_s32 (could be generic, could be i386). */
+#define SYNTH_NAME       fr->synths.plain[r_1to1][f_32]
+#define MONO_NAME        synth_1to1_s32_mono
+#define MONO2STEREO_NAME synth_1to1_s32_m2s
+#include "synth_mono.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#ifdef OPT_X86
+#define NO_AUTOINCREMENT
+#define SYNTH_NAME synth_1to1_s32_i386
+#include "synth.h"
+#undef SYNTH_NAME
+/* i386 uses the normal mono functions. */
+#undef NO_AUTOINCREMENT
+#endif
+
+#ifdef OPT_X86_64
+/* Assembler routines. */
+int synth_1to1_s32_x86_64_asm(real *window, real *b0, int32_t *samples, int bo1);
+int synth_1to1_s32_s_x86_64_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1);
+void dct64_real_x86_64(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_s32_x86_64(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_x86_64(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_x86_64(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_s32_x86_64_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 256;
+
+	return clip;
+}
+
+
+int synth_1to1_s32_stereo_x86_64(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_x86_64(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_x86_64(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_x86_64(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_x86_64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s32_s_x86_64_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 256;
+
+	return clip;
+}
+#endif
+
+#ifdef OPT_AVX
+/* Assembler routines. */
+#ifndef OPT_x86_64
+int synth_1to1_s32_x86_64_asm(real *window, real *b0, int32_t *samples, int bo1);
+#endif
+int synth_1to1_s32_s_avx_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1);
+void dct64_real_avx(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_s32_avx(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_avx(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_avx(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_s32_x86_64_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 256;
+
+	return clip;
+}
+
+
+int synth_1to1_s32_stereo_avx(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_avx(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_avx(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_avx(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_avx(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s32_s_avx_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 256;
+
+	return clip;
+}
+#endif
+
+#if defined(OPT_SSE) || defined(OPT_SSE_VINTAGE)
+/* Assembler routines. */
+int synth_1to1_s32_sse_asm(real *window, real *b0, int32_t *samples, int bo1);
+int synth_1to1_s32_s_sse_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1);
+void dct64_real_sse(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_s32_sse(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_sse(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_sse(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_s32_sse_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 256;
+
+	return clip;
+}
+
+
+int synth_1to1_s32_stereo_sse(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_sse(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_sse(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_sse(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_sse(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s32_s_sse_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 256;
+
+	return clip;
+}
+#endif
+
+#ifdef OPT_NEON
+/* Assembler routines. */
+int synth_1to1_s32_neon_asm(real *window, real *b0, int32_t *samples, int bo1);
+int synth_1to1_s32_s_neon_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1);
+void dct64_real_neon(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_s32_neon(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_neon(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_neon(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_s32_neon_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 256;
+
+	return clip;
+}
+
+int synth_1to1_s32_stereo_neon(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_neon(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_neon(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_neon(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_neon(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s32_s_neon_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 256;
+
+	return clip;
+}
+#endif
+
+#ifdef OPT_NEON64
+/* Assembler routines. */
+int synth_1to1_s32_neon64_asm(real *window, real *b0, int32_t *samples, int bo1);
+int synth_1to1_s32_s_neon64_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1);
+void dct64_real_neon64(real *out0, real *out1, real *samples);
+/* Hull for C mpg123 API */
+int synth_1to1_s32_neon64(real *bandPtr,int channel, mpg123_handle *fr, int final)
+{
+	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0, **buf;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings) do_equalizer(bandPtr,channel,fr->equalizer);
+
+	if(!channel)
+	{
+		fr->bo--;
+		fr->bo &= 0xf;
+		buf = fr->real_buffs[0];
+	}
+	else
+	{
+		samples++;
+		buf = fr->real_buffs[1];
+	}
+
+	if(fr->bo & 0x1)
+	{
+		b0 = buf[0];
+		bo1 = fr->bo;
+		dct64_real_neon64(buf[1]+((fr->bo+1)&0xf),buf[0]+fr->bo,bandPtr);
+	}
+	else
+	{
+		b0 = buf[1];
+		bo1 = fr->bo+1;
+		dct64_real_neon64(buf[0]+fr->bo,buf[1]+fr->bo+1,bandPtr);
+	}
+
+	clip = synth_1to1_s32_neon64_asm(fr->decwin, b0, samples, bo1);
+
+	if(final) fr->buffer.fill += 256;
+
+	return clip;
+}
+
+int synth_1to1_s32_stereo_neon64(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
+{
+	int32_t *samples = (int32_t *) (fr->buffer.data+fr->buffer.fill);
+
+	real *b0l, *b0r, **bufl, **bufr;
+	int bo1;
+	int clip;
+
+	if(fr->have_eq_settings)
+	{
+		do_equalizer(bandPtr_l,0,fr->equalizer);
+		do_equalizer(bandPtr_r,1,fr->equalizer);
+	}
+
+	fr->bo--;
+	fr->bo &= 0xf;
+	bufl = fr->real_buffs[0];
+	bufr = fr->real_buffs[1];
+
+	if(fr->bo & 0x1)
+	{
+		b0l = bufl[0];
+		b0r = bufr[0];
+		bo1 = fr->bo;
+		dct64_real_neon64(bufl[1]+((fr->bo+1)&0xf),bufl[0]+fr->bo,bandPtr_l);
+		dct64_real_neon64(bufr[1]+((fr->bo+1)&0xf),bufr[0]+fr->bo,bandPtr_r);
+	}
+	else
+	{
+		b0l = bufl[1];
+		b0r = bufr[1];
+		bo1 = fr->bo+1;
+		dct64_real_neon64(bufl[0]+fr->bo,bufl[1]+fr->bo+1,bandPtr_l);
+		dct64_real_neon64(bufr[0]+fr->bo,bufr[1]+fr->bo+1,bandPtr_r);
+	}
+
+	clip = synth_1to1_s32_s_neon64_asm(fr->decwin, b0l, b0r, samples, bo1);
+
+	fr->buffer.fill += 256;
+
+	return clip;
+}
+#endif
+
+#undef BLOCK
+
+#ifndef NO_DOWNSAMPLE
+
+/*
+	Part 4b: 2to1 synth. Only generic and i386.
+*/
+#define BLOCK 0x20 /* One decoding block is 32 samples. */
+
+#define SYNTH_NAME synth_2to1_s32
+#include "synth.h"
+#undef SYNTH_NAME
+
+/* Mono-related synths; they wrap over _some_ synth_2to1_s32 (could be generic, could be i386). */
+#define SYNTH_NAME       fr->synths.plain[r_2to1][f_32]
+#define MONO_NAME        synth_2to1_s32_mono
+#define MONO2STEREO_NAME synth_2to1_s32_m2s
+#include "synth_mono.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#ifdef OPT_X86
+#define NO_AUTOINCREMENT
+#define SYNTH_NAME synth_2to1_s32_i386
+#include "synth.h"
+#undef SYNTH_NAME
+/* i386 uses the normal mono functions. */
+#undef NO_AUTOINCREMENT
+#endif
+
+#undef BLOCK
+
+/*
+	Part 4c: 4to1 synth. Only generic and i386.
+*/
+#define BLOCK 0x10 /* One decoding block is 16 samples. */
+
+#define SYNTH_NAME synth_4to1_s32
+#include "synth.h"
+#undef SYNTH_NAME
+
+/* Mono-related synths; they wrap over _some_ synth_4to1_s32 (could be generic, could be i386). */
+#define SYNTH_NAME       fr->synths.plain[r_4to1][f_32]
+#define MONO_NAME        synth_4to1_s32_mono
+#define MONO2STEREO_NAME synth_4to1_s32_m2s
+#include "synth_mono.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#ifdef OPT_X86
+#define NO_AUTOINCREMENT
+#define SYNTH_NAME synth_4to1_s32_i386
+#include "synth.h"
+#undef SYNTH_NAME
+/* i386 uses the normal mono functions. */
+#undef NO_AUTOINCREMENT
+#endif
+
+#undef BLOCK
+
+#endif /* NO_DOWNSAMPLE */
+
+#ifndef NO_NTOM
+/*
+	Part 4d: ntom synth.
+	Same procedure as above... Just no extra play anymore, straight synth that may use an optimized dct64.
+*/
+
+/* These are all in one header, there's no flexibility to gain. */
+#define SYNTH_NAME       synth_ntom_s32
+#define MONO_NAME        synth_ntom_s32_mono
+#define MONO2STEREO_NAME synth_ntom_s32_m2s
+#include "synth_ntom.h"
+#undef SYNTH_NAME
+#undef MONO_NAME
+#undef MONO2STEREO_NAME
+
+#endif
+
+#undef SAMPLE_T
+#undef WRITE_SAMPLE
+
+#endif /* non-fixed type */
Index: include/reactos/libs/libmpg123/synth_sse.S
===================================================================
--- include/reactos/libs/libmpg123/synth_sse.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_sse.S	(working copy)
@@ -0,0 +1,6 @@
+#include "mangle.h"
+#define MPL_DCT64 ASM_NAME(dct64_sse)
+#define SYNTH_NAME ASM_NAME(synth_1to1_sse_asm)
+#include "synth_sse3d.h"
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_sse3d.h
===================================================================
--- include/reactos/libs/libmpg123/synth_sse3d.h	(revision 63976)
+++ include/reactos/libs/libmpg123/synth_sse3d.h	(working copy)
@@ -63,12 +63,12 @@
 	decl %ecx
 	movl 20(%ebp),%esi
 	movl (%edx),%eax
-	jecxz .L01
+	jecxz 1f
 	decl %eax
 	andl %ebx,%eax
 	leal 1088(%esi),%esi
 	movl %eax,(%edx)
-	.L01:
+1:
 	leal (%esi,%eax,2),%edx
 	movl %eax,TEMP
 	incl %eax
@@ -76,11 +76,11 @@
 	leal 544(%esi,%eax,2),%ecx
 	incl %ebx
 	testl $1, %eax
-	jnz .L02
+	jnz 2f
 	xchgl %edx,%ecx
 	incl TEMP
 	leal 544(%esi),%esi
-	.L02:
+2:
 	pushl 8(%ebp)
 	pushl %edx
 	pushl %ecx
@@ -95,7 +95,7 @@
 	movl (%esp),%ecx /* restore, but leave value on stack */
 	shrl $1, %ecx
 	ALIGN16
-	.L03:
+3:
 	movq  (%edx),%mm0
 	movq  64(%edx),%mm4
 	pmaddwd (%esi),%mm0
@@ -138,10 +138,10 @@
 	leal 128(%edx),%edx
 	leal 8(%edi),%edi
 	decl %ecx
-	jnz  .L03
+	jnz  3b
 	popl %ecx
 	andl $1, %ecx
-	jecxz .next_loop
+	jecxz 4f
 	movq  (%edx),%mm0
 	pmaddwd (%esi),%mm0
 	movq  8(%edx),%mm1
@@ -163,11 +163,11 @@
 	leal 32(%esi),%esi
 	leal 64(%edx),%edx
 	leal 4(%edi),%edi
-	.next_loop:
+4:
 	subl $64,%esi
 	movl $7,%ecx
 	ALIGN16
-	.L04:
+5:
 	movq  (%edx),%mm0
 	movq  64(%edx),%mm4
 	pmaddwd (%esi),%mm0
@@ -214,7 +214,7 @@
 	addl $128,%edx
 	leal 8(%edi),%edi
 	decl %ecx
-	jnz  .L04
+	jnz  5b
 	movq  (%edx),%mm0
 	pmaddwd (%esi),%mm0
 	movq  8(%edx),%mm1
Index: include/reactos/libs/libmpg123/synth_sse_accurate.S
===================================================================
--- include/reactos/libs/libmpg123/synth_sse_accurate.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_sse_accurate.S	(working copy)
@@ -0,0 +1,294 @@
+/*
+	synth_sse_accurate: SSE optimized synth (MPEG-compliant 16bit output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+/* real *window; */
+#define WINDOW %ebx
+/* real *b0; */
+#define B0 %edx
+/* real *samples; */
+#define SAMPLES %esi
+
+#define MMREG_CLIP %mm7
+
+/*
+	int synth_1to1_sse_accurate_asm(real *window, real *b0, short *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(maxmin_s16):
+	.long   1191181824 /* 32767.0 */
+	.long   1191181824
+	.long   1191181824
+	.long   1191181824
+	.long   -956301312 /* -32768.0 */
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_sse_accurate_asm)
+ASM_NAME(synth_1to1_sse_accurate_asm):
+	pushl		%ebp
+	movl		%esp, %ebp
+	pushl		%ebx
+	pushl		%esi
+	
+	pxor		MMREG_CLIP, MMREG_CLIP
+	
+	movl		8(%ebp), WINDOW
+	movl		12(%ebp), B0
+	movl		16(%ebp), SAMPLES
+	movl		20(%ebp), %eax
+	shll		$2, %eax
+	
+	leal		64(WINDOW), WINDOW
+	subl		%eax, WINDOW
+
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm4
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	mulps		0(B0), %xmm0
+	mulps		16(B0), %xmm1
+	mulps		32(B0), %xmm2
+	mulps		48(B0), %xmm3
+	mulps		64(B0), %xmm4
+	mulps		80(B0), %xmm5
+	mulps		96(B0), %xmm6
+	mulps		112(B0), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm4, %xmm5
+	movaps		%xmm0, %xmm4
+	
+	leal		256(WINDOW), WINDOW
+	leal		128(B0), B0
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm6
+	movups		144(WINDOW), %xmm7
+	mulps		(B0), %xmm0
+	mulps		16(B0), %xmm1
+	mulps		32(B0), %xmm2
+	mulps		48(B0), %xmm3
+	mulps		64(B0), %xmm6
+	mulps		80(B0), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm7, %xmm6
+	movups		160(WINDOW), %xmm1
+	movups		176(WINDOW), %xmm3
+	mulps		96(B0), %xmm1
+	mulps		112(B0), %xmm3
+	addps		%xmm2, %xmm0
+	addps		%xmm3, %xmm1
+	addps		%xmm1, %xmm6
+	movaps		%xmm6, %xmm7
+	movaps		%xmm0, %xmm6
+	
+	leal		256(WINDOW), WINDOW
+	leal		128(B0), B0
+	
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	subps		%xmm6, %xmm4
+	subps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	
+	movaps		%xmm0, %xmm1
+	movaps		%xmm0, %xmm2
+	pshufw		$0xdd, (SAMPLES), %mm2
+	pshufw		$0xdd, 8(SAMPLES), %mm3
+	cmpnleps	ASM_NAME(maxmin_s16), %xmm1
+	cmpltps		ASM_NAME(maxmin_s16)+16, %xmm2
+	cvtps2pi	%xmm0, %mm0
+	movhlps		%xmm0, %xmm0
+	cvtps2pi	%xmm0, %mm1
+	packssdw	%mm1, %mm0
+	movq		%mm0, %mm1
+	punpcklwd	%mm2, %mm0
+	punpckhwd	%mm3, %mm1
+	movq		%mm0, (SAMPLES)
+	movq		%mm1, 8(SAMPLES)
+	
+	cvtps2pi	%xmm1, %mm0
+	cvtps2pi	%xmm2, %mm1
+	movhlps		%xmm1, %xmm1
+	movhlps		%xmm2, %xmm2
+	cvtps2pi	%xmm1, %mm2
+	cvtps2pi	%xmm2, %mm3
+	packssdw	%mm2, %mm0
+	packssdw	%mm3, %mm1
+	psrlw		$15, %mm0
+	psrlw		$15, %mm1
+	paddw		%mm1, %mm0
+	paddw		%mm0, MMREG_CLIP
+	
+	leal		16(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm4
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	mulps		0(B0), %xmm0
+	mulps		16(B0), %xmm1
+	mulps		32(B0), %xmm2
+	mulps		48(B0), %xmm3
+	mulps		-64(B0), %xmm4
+	mulps		-48(B0), %xmm5
+	mulps		-32(B0), %xmm6
+	mulps		-16(B0), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm4, %xmm5
+	movaps		%xmm0, %xmm4
+	
+	leal		256(WINDOW), WINDOW
+	leal		-128(B0), B0
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm6
+	movups		144(WINDOW), %xmm7
+	mulps		(B0), %xmm0
+	mulps		16(B0), %xmm1
+	mulps		32(B0), %xmm2
+	mulps		48(B0), %xmm3
+	mulps		-64(B0), %xmm6
+	mulps		-48(B0), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm7, %xmm6
+	movups		160(WINDOW), %xmm1
+	movups		176(WINDOW), %xmm3
+	mulps		-32(B0), %xmm1
+	mulps		-16(B0), %xmm3
+	addps		%xmm2, %xmm0
+	addps		%xmm3, %xmm1
+	addps		%xmm1, %xmm6
+	movaps		%xmm6, %xmm7
+	movaps		%xmm0, %xmm6
+	
+	leal		256(WINDOW), WINDOW
+	leal		-128(B0), B0
+	
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	addps		%xmm6, %xmm4
+	addps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	
+	movaps		%xmm0, %xmm1
+	movaps		%xmm0, %xmm2
+	pshufw		$0xdd, (SAMPLES), %mm2
+	pshufw		$0xdd, 8(SAMPLES), %mm3
+	cmpnleps	ASM_NAME(maxmin_s16), %xmm1
+	cmpltps		ASM_NAME(maxmin_s16)+16, %xmm2
+	cvtps2pi	%xmm0, %mm0
+	movhlps		%xmm0, %xmm0
+	cvtps2pi	%xmm0, %mm1
+	packssdw	%mm1, %mm0
+	movq		%mm0, %mm1
+	punpcklwd	%mm2, %mm0
+	punpckhwd	%mm3, %mm1
+	movq		%mm0, (SAMPLES)
+	movq		%mm1, 8(SAMPLES)
+	
+	cvtps2pi	%xmm1, %mm0
+	cvtps2pi	%xmm2, %mm1
+	movhlps		%xmm1, %xmm1
+	movhlps		%xmm2, %xmm2
+	cvtps2pi	%xmm1, %mm2
+	cvtps2pi	%xmm2, %mm3
+	packssdw	%mm2, %mm0
+	packssdw	%mm3, %mm1
+	psrlw		$15, %mm0
+	psrlw		$15, %mm1
+	paddw		%mm1, %mm0
+	paddw		%mm0, MMREG_CLIP
+	
+	leal		16(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	pshufw		$0xee, MMREG_CLIP, %mm0
+	paddw		MMREG_CLIP, %mm0
+	pshufw		$0x55, %mm0, %mm1
+	paddw		%mm1, %mm0
+	movd		%mm0, %eax
+	andl		$0xffff, %eax
+	
+	popl		%esi
+	popl		%ebx
+	movl		%ebp, %esp
+	popl		%ebp
+	
+	emms
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_sse_float.S
===================================================================
--- include/reactos/libs/libmpg123/synth_sse_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_sse_float.S	(working copy)
@@ -0,0 +1,241 @@
+/*
+	synth_sse_float: SSE optimized synth (float output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+/* real *window; */
+#define WINDOW %ebx
+/* real *b0; */
+#define B0 %edx
+/* real *samples; */
+#define SAMPLES %esi
+
+/*
+	int synth_1to1_real_sse_asm(real *window, real *b0, real *samples, int bo1);
+	return value: number of clipped samples (0)
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(scale_sse):
+	.long   939524096
+	.long   939524096
+	.long   939524096
+	.long   939524096
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_real_sse_asm)
+ASM_NAME(synth_1to1_real_sse_asm):
+	pushl		%ebp
+	movl		%esp, %ebp
+	pushl		%ebx
+	pushl		%esi
+	
+	movl		8(%ebp), WINDOW
+	movl		12(%ebp), B0
+	movl		16(%ebp), SAMPLES
+	movl		20(%ebp), %eax
+	shll		$2, %eax
+	
+	leal		64(WINDOW), WINDOW
+	subl		%eax, WINDOW
+
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm4
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	mulps		0(B0), %xmm0
+	mulps		16(B0), %xmm1
+	mulps		32(B0), %xmm2
+	mulps		48(B0), %xmm3
+	mulps		64(B0), %xmm4
+	mulps		80(B0), %xmm5
+	mulps		96(B0), %xmm6
+	mulps		112(B0), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm4, %xmm5
+	movaps		%xmm0, %xmm4
+	
+	leal		256(WINDOW), WINDOW
+	leal		128(B0), B0
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm6
+	movups		144(WINDOW), %xmm7
+	mulps		(B0), %xmm0
+	mulps		16(B0), %xmm1
+	mulps		32(B0), %xmm2
+	mulps		48(B0), %xmm3
+	mulps		64(B0), %xmm6
+	mulps		80(B0), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm7, %xmm6
+	movups		160(WINDOW), %xmm1
+	movups		176(WINDOW), %xmm3
+	mulps		96(B0), %xmm1
+	mulps		112(B0), %xmm3
+	addps		%xmm2, %xmm0
+	addps		%xmm3, %xmm1
+	addps		%xmm1, %xmm6
+	movaps		%xmm6, %xmm7
+	movaps		%xmm0, %xmm6
+	
+	leal		256(WINDOW), WINDOW
+	leal		128(B0), B0
+	
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	subps		%xmm6, %xmm4
+	subps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	
+	movups		(SAMPLES), %xmm1
+	movups		16(SAMPLES), %xmm2
+	mulps		ASM_NAME(scale_sse), %xmm0
+	shufps		$0xdd, %xmm2, %xmm1
+	movaps		%xmm0, %xmm2
+	unpcklps	%xmm1, %xmm0
+	unpckhps	%xmm1, %xmm2
+	movups		%xmm0, (SAMPLES)
+	movups		%xmm2, 16(SAMPLES)
+	
+	leal		32(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm4
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	mulps		0(B0), %xmm0
+	mulps		16(B0), %xmm1
+	mulps		32(B0), %xmm2
+	mulps		48(B0), %xmm3
+	mulps		-64(B0), %xmm4
+	mulps		-48(B0), %xmm5
+	mulps		-32(B0), %xmm6
+	mulps		-16(B0), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm4, %xmm5
+	movaps		%xmm0, %xmm4
+	
+	leal		256(WINDOW), WINDOW
+	leal		-128(B0), B0
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm6
+	movups		144(WINDOW), %xmm7
+	mulps		(B0), %xmm0
+	mulps		16(B0), %xmm1
+	mulps		32(B0), %xmm2
+	mulps		48(B0), %xmm3
+	mulps		-64(B0), %xmm6
+	mulps		-48(B0), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm7, %xmm6
+	movups		160(WINDOW), %xmm1
+	movups		176(WINDOW), %xmm3
+	mulps		-32(B0), %xmm1
+	mulps		-16(B0), %xmm3
+	addps		%xmm2, %xmm0
+	addps		%xmm3, %xmm1
+	addps		%xmm1, %xmm6
+	movaps		%xmm6, %xmm7
+	movaps		%xmm0, %xmm6
+	
+	leal		256(WINDOW), WINDOW
+	leal		-128(B0), B0
+	
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	addps		%xmm6, %xmm4
+	addps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	
+	movups		(SAMPLES), %xmm1
+	movups		16(SAMPLES), %xmm2
+	mulps		ASM_NAME(scale_sse), %xmm0
+	shufps		$0xdd, %xmm2, %xmm1
+	movaps		%xmm0, %xmm2
+	unpcklps	%xmm1, %xmm0
+	unpckhps	%xmm1, %xmm2
+	movups		%xmm0, (SAMPLES)
+	movups		%xmm2, 16(SAMPLES)
+	
+	leal		32(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	xorl		%eax, %eax
+	
+	popl		%esi
+	popl		%ebx
+	movl		%ebp, %esp
+	popl		%ebp
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_sse_s32.S
===================================================================
--- include/reactos/libs/libmpg123/synth_sse_s32.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_sse_s32.S	(working copy)
@@ -0,0 +1,306 @@
+/*
+	synth_sse_s32: SSE optimized synth (s32 output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+/* real *window; */
+#define WINDOW %ebx
+/* real *b0; */
+#define B0 %edx
+/* real *samples; */
+#define SAMPLES %esi
+
+#define MMREG_CLIP %mm7
+
+/*
+	int synth_1to1_s32_sse_asm(real *window, real *b0, int32_t *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(scale_s32):
+	.long   1199570944 /* 65536.0 */
+	.long   1199570944
+	.long   1199570944
+	.long   1199570944
+	ALIGN16
+ASM_NAME(maxmin_s32):
+	.long   1191182335 /* 32767.999 */
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   -956301312 /* -32768.0 */
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_s32_sse_asm)
+ASM_NAME(synth_1to1_s32_sse_asm):
+	pushl		%ebp
+	movl		%esp, %ebp
+	pushl		%ebx
+	pushl		%esi
+	
+	pxor		MMREG_CLIP, MMREG_CLIP
+	
+	movl		8(%ebp), WINDOW
+	movl		12(%ebp), B0
+	movl		16(%ebp), SAMPLES
+	movl		20(%ebp), %eax
+	shll		$2, %eax
+	
+	leal		64(WINDOW), WINDOW
+	subl		%eax, WINDOW
+
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm4
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	mulps		0(B0), %xmm0
+	mulps		16(B0), %xmm1
+	mulps		32(B0), %xmm2
+	mulps		48(B0), %xmm3
+	mulps		64(B0), %xmm4
+	mulps		80(B0), %xmm5
+	mulps		96(B0), %xmm6
+	mulps		112(B0), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm4, %xmm5
+	movaps		%xmm0, %xmm4
+	
+	leal		256(WINDOW), WINDOW
+	leal		128(B0), B0
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm6
+	movups		144(WINDOW), %xmm7
+	mulps		(B0), %xmm0
+	mulps		16(B0), %xmm1
+	mulps		32(B0), %xmm2
+	mulps		48(B0), %xmm3
+	mulps		64(B0), %xmm6
+	mulps		80(B0), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm7, %xmm6
+	movups		160(WINDOW), %xmm1
+	movups		176(WINDOW), %xmm3
+	mulps		96(B0), %xmm1
+	mulps		112(B0), %xmm3
+	addps		%xmm2, %xmm0
+	addps		%xmm3, %xmm1
+	addps		%xmm1, %xmm6
+	movaps		%xmm6, %xmm7
+	movaps		%xmm0, %xmm6
+	
+	leal		256(WINDOW), WINDOW
+	leal		128(B0), B0
+	
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	subps		%xmm6, %xmm4
+	subps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	
+	movaps		%xmm0, %xmm1
+	movaps		%xmm0, %xmm2
+	mulps		ASM_NAME(scale_s32), %xmm0
+	cmpnleps	ASM_NAME(maxmin_s32), %xmm1
+	cmpltps		ASM_NAME(maxmin_s32)+16, %xmm2
+	cvtps2pi	%xmm0, %mm0
+	movhlps		%xmm0, %xmm0
+	cvtps2pi	%xmm0, %mm1
+	cvtps2pi	%xmm1, %mm2
+	movhlps		%xmm1, %xmm1
+	cvtps2pi	%xmm1, %mm3
+	psrad		$31, %mm2
+	psrad		$31, %mm3
+	pxor		%mm2, %mm0
+	pxor		%mm3, %mm1
+	movd		%mm0, (SAMPLES)
+	psrlq		$32, %mm0
+	movd		%mm0, 8(SAMPLES)
+	movd		%mm1, 16(SAMPLES)
+	psrlq		$32, %mm1
+	movd		%mm1, 24(SAMPLES)
+	
+	cvtps2pi	%xmm2, %mm0
+	movhlps		%xmm2, %xmm2
+	cvtps2pi	%xmm2, %mm1
+	packssdw	%mm3, %mm2
+	packssdw	%mm1, %mm0
+	psrlw		$15, %mm2
+	psrlw		$15, %mm0
+	paddw		%mm2, %mm0
+	paddw		%mm0, MMREG_CLIP
+	
+	leal		32(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm4
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	mulps		0(B0), %xmm0
+	mulps		16(B0), %xmm1
+	mulps		32(B0), %xmm2
+	mulps		48(B0), %xmm3
+	mulps		-64(B0), %xmm4
+	mulps		-48(B0), %xmm5
+	mulps		-32(B0), %xmm6
+	mulps		-16(B0), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm4, %xmm5
+	movaps		%xmm0, %xmm4
+	
+	leal		256(WINDOW), WINDOW
+	leal		-128(B0), B0
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm6
+	movups		144(WINDOW), %xmm7
+	mulps		(B0), %xmm0
+	mulps		16(B0), %xmm1
+	mulps		32(B0), %xmm2
+	mulps		48(B0), %xmm3
+	mulps		-64(B0), %xmm6
+	mulps		-48(B0), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm7, %xmm6
+	movups		160(WINDOW), %xmm1
+	movups		176(WINDOW), %xmm3
+	mulps		-32(B0), %xmm1
+	mulps		-16(B0), %xmm3
+	addps		%xmm2, %xmm0
+	addps		%xmm3, %xmm1
+	addps		%xmm1, %xmm6
+	movaps		%xmm6, %xmm7
+	movaps		%xmm0, %xmm6
+	
+	leal		256(WINDOW), WINDOW
+	leal		-128(B0), B0
+	
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	addps		%xmm6, %xmm4
+	addps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	
+	movaps		%xmm0, %xmm1
+	movaps		%xmm0, %xmm2
+	mulps		ASM_NAME(scale_s32), %xmm0
+	cmpnleps	ASM_NAME(maxmin_s32), %xmm1
+	cmpltps		ASM_NAME(maxmin_s32)+16, %xmm2
+	cvtps2pi	%xmm0, %mm0
+	movhlps		%xmm0, %xmm0
+	cvtps2pi	%xmm0, %mm1
+	cvtps2pi	%xmm1, %mm2
+	movhlps		%xmm1, %xmm1
+	cvtps2pi	%xmm1, %mm3
+	psrad		$31, %mm2
+	psrad		$31, %mm3
+	pxor		%mm2, %mm0
+	pxor		%mm3, %mm1
+	movd		%mm0, (SAMPLES)
+	psrlq		$32, %mm0
+	movd		%mm0, 8(SAMPLES)
+	movd		%mm1, 16(SAMPLES)
+	psrlq		$32, %mm1
+	movd		%mm1, 24(SAMPLES)
+	
+	cvtps2pi	%xmm2, %mm0
+	movhlps		%xmm2, %xmm2
+	cvtps2pi	%xmm2, %mm1
+	packssdw	%mm3, %mm2
+	packssdw	%mm1, %mm0
+	psrlw		$15, %mm2
+	psrlw		$15, %mm0
+	paddw		%mm2, %mm0
+	paddw		%mm0, MMREG_CLIP
+	
+	leal		32(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	pshufw		$0xee, MMREG_CLIP, %mm0
+	paddw		MMREG_CLIP, %mm0
+	pshufw		$0x55, %mm0, %mm1
+	paddw		%mm1, %mm0
+	movd		%mm0, %eax
+	andl		$0xffff, %eax
+	
+	popl		%esi
+	popl		%ebx
+	movl		%ebp, %esp
+	popl		%ebp
+	
+	emms
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_avx.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_avx.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_avx.S	(working copy)
@@ -0,0 +1,256 @@
+/*
+	synth_stereo_avx: AVX optimized synth for x86-64 (stereo specific version)
+
+	copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* short *window; */
+#define WINDOW %r10
+/* short *b0l; */
+#define B0L %rdx
+/* short *b0r; */
+#define B0R %r8
+/* short *samples; */
+#define SAMPLES %r9
+#else
+/* short *window; */
+#define WINDOW %rdi
+/* short *b0l; */
+#define B0L %rsi
+/* short *b0r; */
+#define B0R %rdx
+/* short *samples; */
+#define SAMPLES %r9
+#endif
+
+/*
+	int synth_1to1_s_avx_asm(short *window, short *b0l, short *b0r, short *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+	ALIGN16
+.globl ASM_NAME(synth_1to1_s_avx_asm)
+ASM_NAME(synth_1to1_s_avx_asm):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	push		%rbp
+	mov			%rsp, %rbp
+	sub			$144, %rsp
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	movaps		%xmm13, 112(%rsp)
+	movaps		%xmm14, 128(%rsp)
+	movl		48(%rbp), %eax /* 5th argument; placed after 32-byte shadow space */
+#endif
+	
+#ifdef IS_MSABI
+	shl			$1, %eax
+	mov			%rcx, WINDOW
+#else
+	mov			%r8d, %eax
+	shl			$1, %eax
+	movq		%rcx, SAMPLES
+#endif
+	add			$32, WINDOW
+	sub			%rax, WINDOW
+	
+	mov			$64, %rax
+	movl		$4, %ecx
+	vpxor		%xmm14, %xmm14, %xmm14
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm8
+	movups		16(WINDOW), %xmm9
+	movups		(WINDOW,%rax), %xmm10
+	movups		16(WINDOW,%rax), %xmm11
+	vpmaddwd	(B0L), %xmm8, %xmm0
+	vpmaddwd	16(B0L), %xmm9, %xmm1
+	vpmaddwd	(B0R), %xmm8, %xmm2
+	vpmaddwd	16(B0R), %xmm9, %xmm3
+	vpmaddwd	32(B0L), %xmm10, %xmm4
+	vpmaddwd	48(B0L), %xmm11, %xmm5
+	vpmaddwd	32(B0R), %xmm10, %xmm6
+	vpmaddwd	48(B0R), %xmm11, %xmm7
+	vpaddd		%xmm1, %xmm0, %xmm8
+	vpaddd		%xmm3, %xmm2, %xmm0
+	vpaddd		%xmm5, %xmm4, %xmm9
+	vpaddd		%xmm7, %xmm6, %xmm1
+	lea			(WINDOW,%rax,2), WINDOW
+	add			%rax, B0L
+	add			%rax, B0R
+	
+	movups		(WINDOW), %xmm10
+	movups		16(WINDOW), %xmm11
+	movups		(WINDOW,%rax), %xmm12
+	movups		16(WINDOW,%rax), %xmm13
+	vpmaddwd	(B0L), %xmm10, %xmm2
+	vpmaddwd	16(B0L), %xmm11, %xmm3
+	vpmaddwd	(B0R), %xmm10, %xmm4
+	vpmaddwd	16(B0R), %xmm11, %xmm5
+	vpmaddwd	32(B0L), %xmm12, %xmm6
+	vpmaddwd	48(B0L), %xmm13, %xmm10
+	vpmaddwd	32(B0R), %xmm12, %xmm7
+	vpmaddwd	48(B0R), %xmm13, %xmm11
+	vpaddd		%xmm3, %xmm2, %xmm2
+	vpaddd		%xmm5, %xmm4, %xmm3
+	vpaddd		%xmm6, %xmm10, %xmm4
+	vpaddd		%xmm7, %xmm11, %xmm5
+	lea			(WINDOW,%rax,2), WINDOW
+	add			%rax, B0L
+	add			%rax, B0R
+	
+	vpunpckldq	%xmm0, %xmm8, %xmm6
+	vpunpckhdq	%xmm0, %xmm8, %xmm0
+	vpunpckldq	%xmm1, %xmm9, %xmm7
+	vpunpckhdq	%xmm1, %xmm9, %xmm1
+	vpaddd		%xmm6, %xmm0, %xmm0
+	vpaddd		%xmm7, %xmm1, %xmm1
+	vpunpckldq	%xmm3, %xmm2, %xmm6
+	vpunpckhdq	%xmm3, %xmm2, %xmm2
+	vpunpckldq	%xmm5, %xmm4, %xmm7
+	vpunpckhdq	%xmm5, %xmm4, %xmm3
+	vpaddd		%xmm6, %xmm2, %xmm2
+	vpaddd		%xmm7, %xmm3, %xmm3
+	
+	vpunpcklqdq	%xmm1, %xmm0, %xmm4
+	vpunpckhqdq	%xmm1, %xmm0, %xmm0
+	vpunpcklqdq	%xmm3, %xmm2, %xmm5
+	vpunpckhqdq	%xmm3, %xmm2, %xmm1
+	vpaddd		%xmm0, %xmm4, %xmm0
+	vpaddd		%xmm1, %xmm5, %xmm1
+	vpsrad		$13, %xmm0, %xmm0
+	vpsrad		$13, %xmm1, %xmm1
+	vpackssdw	%xmm1, %xmm0, %xmm2
+	vpcmpeqd	%xmm3, %xmm3, %xmm3
+	vpslld		$16, %xmm0, %xmm0
+	vpslld		$16, %xmm1, %xmm1
+	vpsrld		$16, %xmm0, %xmm0
+	vpsrld		$16, %xmm1, %xmm1
+	vpackusdw	%xmm1, %xmm0, %xmm0
+	vpcmpeqw	%xmm2, %xmm0, %xmm0
+	vpxor		%xmm3, %xmm0, %xmm0
+	vpaddw		%xmm0, %xmm14, %xmm14
+	
+	movups		%xmm2, (SAMPLES)
+	add			$16, SAMPLES
+	dec			%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm8
+	movups		16(WINDOW), %xmm9
+	movups		(WINDOW,%rax), %xmm10
+	movups		16(WINDOW,%rax), %xmm11
+	vpmaddwd	(B0L), %xmm8, %xmm0
+	vpmaddwd	16(B0L), %xmm9, %xmm1
+	vpmaddwd	(B0R), %xmm8, %xmm2
+	vpmaddwd	16(B0R), %xmm9, %xmm3
+	vpmaddwd	-32(B0L), %xmm10, %xmm4
+	vpmaddwd	-16(B0L), %xmm11, %xmm5
+	vpmaddwd	-32(B0R), %xmm10, %xmm6
+	vpmaddwd	-16(B0R), %xmm11, %xmm7
+	vpaddd		%xmm1, %xmm0, %xmm8
+	vpaddd		%xmm3, %xmm2, %xmm0
+	vpaddd		%xmm5, %xmm4, %xmm9
+	vpaddd		%xmm7, %xmm6, %xmm1
+	lea			(WINDOW,%rax,2), WINDOW
+	sub			%rax, B0L
+	sub			%rax, B0R
+	
+	movups		(WINDOW), %xmm10
+	movups		16(WINDOW), %xmm11
+	movups		(WINDOW,%rax), %xmm12
+	movups		16(WINDOW,%rax), %xmm13
+	vpmaddwd	(B0L), %xmm10, %xmm2
+	vpmaddwd	16(B0L), %xmm11, %xmm3
+	vpmaddwd	(B0R), %xmm10, %xmm4
+	vpmaddwd	16(B0R), %xmm11, %xmm5
+	vpmaddwd	-32(B0L), %xmm12, %xmm6
+	vpmaddwd	-16(B0L), %xmm13, %xmm10
+	vpmaddwd	-32(B0R), %xmm12, %xmm7
+	vpmaddwd	-16(B0R), %xmm13, %xmm11
+	vpaddd		%xmm3, %xmm2, %xmm2
+	vpaddd		%xmm5, %xmm4, %xmm3
+	vpaddd		%xmm6, %xmm10, %xmm4
+	vpaddd		%xmm7, %xmm11, %xmm5
+	lea			(WINDOW,%rax,2), WINDOW
+	sub			%rax, B0L
+	sub			%rax, B0R
+	
+	vpunpckldq	%xmm0, %xmm8, %xmm6
+	vpunpckhdq	%xmm0, %xmm8, %xmm0
+	vpunpckldq	%xmm1, %xmm9, %xmm7
+	vpunpckhdq	%xmm1, %xmm9, %xmm1
+	vpaddd		%xmm6, %xmm0, %xmm0
+	vpaddd		%xmm7, %xmm1, %xmm1
+	vpunpckldq	%xmm3, %xmm2, %xmm6
+	vpunpckhdq	%xmm3, %xmm2, %xmm2
+	vpunpckldq	%xmm5, %xmm4, %xmm7
+	vpunpckhdq	%xmm5, %xmm4, %xmm3
+	vpaddd		%xmm6, %xmm2, %xmm2
+	vpaddd		%xmm7, %xmm3, %xmm3
+	
+	vpunpcklqdq	%xmm1, %xmm0, %xmm4
+	vpunpckhqdq	%xmm1, %xmm0, %xmm0
+	vpunpcklqdq	%xmm3, %xmm2, %xmm5
+	vpunpckhqdq	%xmm3, %xmm2, %xmm1
+	vpaddd		%xmm0, %xmm4, %xmm0
+	vpaddd		%xmm1, %xmm5, %xmm1
+	vpsrad		$13, %xmm0, %xmm0
+	vpsrad		$13, %xmm1, %xmm1
+	vpackssdw	%xmm1, %xmm0, %xmm2
+	vpcmpeqd	%xmm3, %xmm3, %xmm3
+	vpslld		$16, %xmm0, %xmm0
+	vpslld		$16, %xmm1, %xmm1
+	vpsrld		$16, %xmm0, %xmm0
+	vpsrld		$16, %xmm1, %xmm1
+	vpackusdw	%xmm1, %xmm0, %xmm0
+	vpcmpeqw	%xmm2, %xmm0, %xmm0
+	vpxor		%xmm3, %xmm0, %xmm0
+	vpaddw		%xmm0, %xmm14, %xmm14
+	
+	movups		%xmm2, (SAMPLES)
+	add			$16, SAMPLES
+	dec			%ecx
+	jnz			1b
+	
+	pxor		%xmm1, %xmm1
+	psubw		%xmm14, %xmm1
+	pshufd		$0x4e, %xmm1, %xmm0
+	paddw		%xmm1, %xmm0
+	pshuflw		$0x4e, %xmm0, %xmm1
+	paddw		%xmm1, %xmm0
+	pshuflw		$0x11, %xmm0, %xmm1
+	paddw		%xmm1, %xmm0
+	movd		%xmm0, %eax
+	and			$0x7f, %eax
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	mov			%rbp, %rsp
+	pop			%rbp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_avx_accurate.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_avx_accurate.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_avx_accurate.S	(working copy)
@@ -0,0 +1,286 @@
+/*
+	synth_stereo_avx_accurate: AVX optimized synth for x86-64 (stereo specific, MPEG-compliant 16bit output version)
+
+	copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* real *window; */
+#define WINDOW %r10
+/* real *b0l; */
+#define B0L %rdx
+/* real *b0r; */
+#define B0R %r8
+/* real *samples; */
+#define SAMPLES %r9
+#else
+/* real *window; */
+#define WINDOW %rdi
+/* real *b0l; */
+#define B0L %rsi
+/* real *b0r; */
+#define B0R %rdx
+/* real *samples; */
+#define SAMPLES %r9
+#endif
+
+/*
+	int synth_1to1_s_avx_accurate_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+maxmin_avx:
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.text
+	ALIGN16
+	.globl ASM_NAME(synth_1to1_s_avx_accurate_asm)
+ASM_NAME(synth_1to1_s_avx_accurate_asm):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	push		%rbp
+	mov			%rsp, %rbp
+	sub			$144, %rsp
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	movaps		%xmm13, 112(%rsp)
+	movaps		%xmm14, 128(%rsp)
+	movl		48(%rbp), %eax /* 5th argument; placed after 32-byte shadow space */
+#endif
+	
+#ifdef IS_MSABI
+	shl			$2, %eax
+	mov			%rcx, WINDOW
+#else
+	mov			%r8d, %eax
+	shl			$2, %eax
+	mov			%rcx, SAMPLES
+#endif
+	add			$64, WINDOW
+	sub			%rax, WINDOW
+
+	mov			$128, %rax
+	mov			$4, %ecx
+	vpxor		%xmm14, %xmm14, %xmm14
+	
+	ALIGN16
+1:
+	vmovups		(WINDOW), %ymm8
+	vmovups		32(WINDOW), %ymm9
+	vmovups		(WINDOW,%rax), %ymm10
+	vmovups		32(WINDOW,%rax), %ymm11
+	vmulps		(B0L), %ymm8, %ymm0
+	vmulps		32(B0L), %ymm9, %ymm1
+	vmulps		(B0R), %ymm8, %ymm2
+	vmulps		32(B0R), %ymm9, %ymm3
+	vmulps		64(B0L), %ymm10, %ymm4
+	vmulps		96(B0L), %ymm11, %ymm5
+	vmulps		64(B0R), %ymm10, %ymm6
+	vmulps		96(B0R), %ymm11, %ymm7
+	vaddps		%ymm1, %ymm0, %ymm8
+	vaddps		%ymm3, %ymm2, %ymm0
+	vaddps		%ymm5, %ymm4, %ymm9
+	vaddps		%ymm7, %ymm6, %ymm1
+	lea			(WINDOW,%rax,2), WINDOW
+	add			%rax, B0L
+	add			%rax, B0R
+	
+	vmovups		(WINDOW), %ymm10
+	vmovups		32(WINDOW), %ymm11
+	vmovups		(WINDOW,%rax), %ymm12
+	vmovups		32(WINDOW,%rax), %ymm13
+	vmulps		(B0L), %ymm10, %ymm2
+	vmulps		32(B0L), %ymm11, %ymm3
+	vmulps		(B0R), %ymm10, %ymm4
+	vmulps		32(B0R), %ymm11, %ymm5
+	vmulps		64(B0L), %ymm12, %ymm6
+	vmulps		96(B0L), %ymm13, %ymm10
+	vmulps		64(B0R), %ymm12, %ymm7
+	vmulps		96(B0R), %ymm13, %ymm11
+	vaddps		%ymm3, %ymm2, %ymm2
+	vaddps		%ymm5, %ymm4, %ymm3
+	vaddps		%ymm6, %ymm10, %ymm4
+	vaddps		%ymm7, %ymm11, %ymm5
+	lea			(WINDOW,%rax,2), WINDOW
+	add			%rax, B0L
+	add			%rax, B0R
+	
+	vunpcklps	%ymm0, %ymm8, %ymm6
+	vunpckhps	%ymm0, %ymm8, %ymm0
+	vunpcklps	%ymm1, %ymm9, %ymm7
+	vunpckhps	%ymm1, %ymm9, %ymm1
+	vaddps		%ymm6, %ymm0, %ymm0
+	vaddps		%ymm7, %ymm1, %ymm1
+	vunpcklps	%ymm3, %ymm2, %ymm6
+	vunpckhps	%ymm3, %ymm2, %ymm2
+	vunpcklps	%ymm5, %ymm4, %ymm7
+	vunpckhps	%ymm5, %ymm4, %ymm3
+	vaddps		%ymm6, %ymm2, %ymm2
+	vaddps		%ymm7, %ymm3, %ymm3
+	
+	vunpcklpd	%ymm1, %ymm0, %ymm4
+	vunpckhpd	%ymm1, %ymm0, %ymm0
+	vunpcklpd	%ymm3, %ymm2, %ymm5
+	vunpckhpd	%ymm3, %ymm2, %ymm1
+	vsubps		%ymm0, %ymm4, %ymm0
+	vsubps		%ymm1, %ymm5, %ymm1
+	vperm2f128	$0x20, %ymm1, %ymm0, %ymm2
+	vperm2f128	$0x31, %ymm1, %ymm0, %ymm3
+	vaddps		%ymm3, %ymm2, %ymm0
+	vcmpnleps	maxmin_avx(%rip), %ymm0, %ymm1
+	vcmpltps	32+maxmin_avx(%rip), %ymm0, %ymm2
+	vextractf128	$0x1, %ymm1, %xmm3
+	vextractf128	$0x1, %ymm2, %xmm4
+	vpackssdw	%xmm2, %xmm1, %xmm1
+	vpackssdw	%xmm4, %xmm3, %xmm3
+	vpaddw		%xmm3, %xmm1, %xmm1
+	vpaddw		%xmm1, %xmm14, %xmm14
+	vcvtps2dq	%ymm0, %ymm0
+	vextractf128	$0x1, %ymm0, %xmm1
+	vpackssdw	%xmm1, %xmm0, %xmm0
+	
+	vmovups		%xmm0, (SAMPLES)
+	add			$16, SAMPLES
+	dec			%ecx
+	jnz			1b
+	
+	mov			$4, %ecx
+	
+	ALIGN16
+1:
+	vmovups		(WINDOW), %ymm8
+	vmovups		32(WINDOW), %ymm9
+	vmovups		(WINDOW,%rax), %ymm10
+	vmovups		32(WINDOW,%rax), %ymm11
+	vmulps		(B0L), %ymm8, %ymm0
+	vmulps		32(B0L), %ymm9, %ymm1
+	vmulps		(B0R), %ymm8, %ymm2
+	vmulps		32(B0R), %ymm9, %ymm3
+	vmulps		-64(B0L), %ymm10, %ymm4
+	vmulps		-32(B0L), %ymm11, %ymm5
+	vmulps		-64(B0R), %ymm10, %ymm6
+	vmulps		-32(B0R), %ymm11, %ymm7
+	vaddps		%ymm1, %ymm0, %ymm8
+	vaddps		%ymm3, %ymm2, %ymm0
+	vaddps		%ymm5, %ymm4, %ymm9
+	vaddps		%ymm7, %ymm6, %ymm1
+	lea			(WINDOW,%rax,2), WINDOW
+	sub			%rax, B0L
+	sub			%rax, B0R
+	
+	vmovups		(WINDOW), %ymm10
+	vmovups		32(WINDOW), %ymm11
+	vmovups		(WINDOW,%rax), %ymm12
+	vmovups		32(WINDOW,%rax), %ymm13
+	vmulps		(B0L), %ymm10, %ymm2
+	vmulps		32(B0L), %ymm11, %ymm3
+	vmulps		(B0R), %ymm10, %ymm4
+	vmulps		32(B0R), %ymm11, %ymm5
+	vmulps		-64(B0L), %ymm12, %ymm6
+	vmulps		-32(B0L), %ymm13, %ymm10
+	vmulps		-64(B0R), %ymm12, %ymm7
+	vmulps		-32(B0R), %ymm13, %ymm11
+	vaddps		%ymm3, %ymm2, %ymm2
+	vaddps		%ymm5, %ymm4, %ymm3
+	vaddps		%ymm6, %ymm10, %ymm4
+	vaddps		%ymm7, %ymm11, %ymm5
+	lea			(WINDOW,%rax,2), WINDOW
+	sub			%rax, B0L
+	sub			%rax, B0R
+	
+	vunpcklps	%ymm0, %ymm8, %ymm6
+	vunpckhps	%ymm0, %ymm8, %ymm0
+	vunpcklps	%ymm1, %ymm9, %ymm7
+	vunpckhps	%ymm1, %ymm9, %ymm1
+	vaddps		%ymm6, %ymm0, %ymm0
+	vaddps		%ymm7, %ymm1, %ymm1
+	vunpcklps	%ymm3, %ymm2, %ymm6
+	vunpckhps	%ymm3, %ymm2, %ymm2
+	vunpcklps	%ymm5, %ymm4, %ymm7
+	vunpckhps	%ymm5, %ymm4, %ymm3
+	vaddps		%ymm6, %ymm2, %ymm2
+	vaddps		%ymm7, %ymm3, %ymm3
+	
+	vunpcklpd	%ymm1, %ymm0, %ymm4
+	vunpckhpd	%ymm1, %ymm0, %ymm0
+	vunpcklpd	%ymm3, %ymm2, %ymm5
+	vunpckhpd	%ymm3, %ymm2, %ymm1
+	vaddps		%ymm0, %ymm4, %ymm0
+	vaddps		%ymm1, %ymm5, %ymm1
+	vperm2f128	$0x20, %ymm1, %ymm0, %ymm2
+	vperm2f128	$0x31, %ymm1, %ymm0, %ymm3
+	vaddps		%ymm3, %ymm2, %ymm0
+	vcmpnleps	maxmin_avx(%rip), %ymm0, %ymm1
+	vcmpltps	32+maxmin_avx(%rip), %ymm0, %ymm2
+	vextractf128	$0x1, %ymm1, %xmm3
+	vextractf128	$0x1, %ymm2, %xmm4
+	vpackssdw	%xmm2, %xmm1, %xmm1
+	vpackssdw	%xmm4, %xmm3, %xmm3
+	vpaddw		%xmm3, %xmm1, %xmm1
+	vpaddw		%xmm1, %xmm14, %xmm14
+	vcvtps2dq	%ymm0, %ymm0
+	vextractf128	$0x1, %ymm0, %xmm1
+	vpackssdw	%xmm1, %xmm0, %xmm0
+	
+	vmovups		%xmm0, (SAMPLES)
+	add			$16, SAMPLES
+	dec			%ecx
+	jnz			1b
+	
+	vzeroupper
+	
+	pxor		%xmm1, %xmm1
+	psubw		%xmm14, %xmm1
+	pshufd		$0x4e, %xmm1, %xmm0
+	paddw		%xmm1, %xmm0
+	pshuflw		$0x4e, %xmm0, %xmm1
+	paddw		%xmm1, %xmm0
+	pshuflw		$0x11, %xmm0, %xmm1
+	paddw		%xmm1, %xmm0
+	movd		%xmm0, %eax
+	and			$0x7f, %eax
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	mov			%rbp, %rsp
+	pop			%rbp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_avx_float.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_avx_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_avx_float.S	(working copy)
@@ -0,0 +1,243 @@
+/*
+	synth_stereo_avx_float: AVX optimized synth for x86-64 (stereo specific, float output version)
+
+	copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* real *window; */
+#define WINDOW %r10
+/* real *b0l; */
+#define B0L %rdx
+/* real *b0r; */
+#define B0R %r8
+/* real *samples; */
+#define SAMPLES %r9
+#else
+/* real *window; */
+#define WINDOW %rdi
+/* real *b0l; */
+#define B0L %rsi
+/* real *b0r; */
+#define B0R %rdx
+/* real *samples; */
+#define SAMPLES %r9
+#endif
+
+/*
+	int synth_1to1_real_s_avx_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+	return value: number of clipped samples (0)
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+scale_avx:
+	.long   939524096
+	.text
+	ALIGN16
+	.globl ASM_NAME(synth_1to1_real_s_avx_asm)
+ASM_NAME(synth_1to1_real_s_avx_asm):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	push		%rbp
+	mov			%rsp, %rbp
+	sub			$144, %rsp
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	movaps		%xmm13, 112(%rsp)
+	movaps		%xmm14, 128(%rsp)
+	movl		48(%rbp), %eax /* 5th argument; placed after 32-byte shadow space */
+#endif
+
+	vbroadcastss	scale_avx(%rip), %ymm14
+	
+#ifdef IS_MSABI
+	shl			$2, %eax
+	mov			%rcx, WINDOW
+#else
+	mov			%r8d, %eax
+	shl			$2, %eax
+	mov			%rcx, SAMPLES
+#endif
+	add			$64, WINDOW
+	sub			%rax, WINDOW
+
+	mov			$128, %rax
+	mov			$4, %ecx
+	
+	ALIGN16
+1:
+	vmovups		(WINDOW), %ymm8
+	vmovups		32(WINDOW), %ymm9
+	vmovups		(WINDOW,%rax), %ymm10
+	vmovups		32(WINDOW,%rax), %ymm11
+	vmulps		(B0L), %ymm8, %ymm0
+	vmulps		32(B0L), %ymm9, %ymm1
+	vmulps		(B0R), %ymm8, %ymm2
+	vmulps		32(B0R), %ymm9, %ymm3
+	vmulps		64(B0L), %ymm10, %ymm4
+	vmulps		96(B0L), %ymm11, %ymm5
+	vmulps		64(B0R), %ymm10, %ymm6
+	vmulps		96(B0R), %ymm11, %ymm7
+	vaddps		%ymm1, %ymm0, %ymm8
+	vaddps		%ymm3, %ymm2, %ymm0
+	vaddps		%ymm5, %ymm4, %ymm9
+	vaddps		%ymm7, %ymm6, %ymm1
+	lea			(WINDOW,%rax,2), WINDOW
+	add			%rax, B0L
+	add			%rax, B0R
+	
+	vmovups		(WINDOW), %ymm10
+	vmovups		32(WINDOW), %ymm11
+	vmovups		(WINDOW,%rax), %ymm12
+	vmovups		32(WINDOW,%rax), %ymm13
+	vmulps		(B0L), %ymm10, %ymm2
+	vmulps		32(B0L), %ymm11, %ymm3
+	vmulps		(B0R), %ymm10, %ymm4
+	vmulps		32(B0R), %ymm11, %ymm5
+	vmulps		64(B0L), %ymm12, %ymm6
+	vmulps		96(B0L), %ymm13, %ymm10
+	vmulps		64(B0R), %ymm12, %ymm7
+	vmulps		96(B0R), %ymm13, %ymm11
+	vaddps		%ymm3, %ymm2, %ymm2
+	vaddps		%ymm5, %ymm4, %ymm3
+	vaddps		%ymm6, %ymm10, %ymm4
+	vaddps		%ymm7, %ymm11, %ymm5
+	lea			(WINDOW,%rax,2), WINDOW
+	add			%rax, B0L
+	add			%rax, B0R
+	
+	vunpcklps	%ymm0, %ymm8, %ymm6
+	vunpckhps	%ymm0, %ymm8, %ymm0
+	vunpcklps	%ymm1, %ymm9, %ymm7
+	vunpckhps	%ymm1, %ymm9, %ymm1
+	vaddps		%ymm6, %ymm0, %ymm0
+	vaddps		%ymm7, %ymm1, %ymm1
+	vunpcklps	%ymm3, %ymm2, %ymm6
+	vunpckhps	%ymm3, %ymm2, %ymm2
+	vunpcklps	%ymm5, %ymm4, %ymm7
+	vunpckhps	%ymm5, %ymm4, %ymm3
+	vaddps		%ymm6, %ymm2, %ymm2
+	vaddps		%ymm7, %ymm3, %ymm3
+	
+	vunpcklpd	%ymm1, %ymm0, %ymm4
+	vunpckhpd	%ymm1, %ymm0, %ymm0
+	vunpcklpd	%ymm3, %ymm2, %ymm5
+	vunpckhpd	%ymm3, %ymm2, %ymm1
+	vsubps		%ymm0, %ymm4, %ymm0
+	vsubps		%ymm1, %ymm5, %ymm1
+	vperm2f128	$0x20, %ymm1, %ymm0, %ymm2
+	vperm2f128	$0x31, %ymm1, %ymm0, %ymm3
+	vaddps		%ymm3, %ymm2, %ymm0
+	vmulps		%ymm14, %ymm0, %ymm0
+	
+	vmovups		%ymm0, (SAMPLES)
+	add			$32, SAMPLES
+	dec			%ecx
+	jnz			1b
+	
+	mov			$4, %ecx
+	
+	ALIGN16
+1:
+	vmovups		(WINDOW), %ymm8
+	vmovups		32(WINDOW), %ymm9
+	vmovups		(WINDOW,%rax), %ymm10
+	vmovups		32(WINDOW,%rax), %ymm11
+	vmulps		(B0L), %ymm8, %ymm0
+	vmulps		32(B0L), %ymm9, %ymm1
+	vmulps		(B0R), %ymm8, %ymm2
+	vmulps		32(B0R), %ymm9, %ymm3
+	vmulps		-64(B0L), %ymm10, %ymm4
+	vmulps		-32(B0L), %ymm11, %ymm5
+	vmulps		-64(B0R), %ymm10, %ymm6
+	vmulps		-32(B0R), %ymm11, %ymm7
+	vaddps		%ymm1, %ymm0, %ymm8
+	vaddps		%ymm3, %ymm2, %ymm0
+	vaddps		%ymm5, %ymm4, %ymm9
+	vaddps		%ymm7, %ymm6, %ymm1
+	lea			(WINDOW,%rax,2), WINDOW
+	sub			%rax, B0L
+	sub			%rax, B0R
+	
+	vmovups		(WINDOW), %ymm10
+	vmovups		32(WINDOW), %ymm11
+	vmovups		(WINDOW,%rax), %ymm12
+	vmovups		32(WINDOW,%rax), %ymm13
+	vmulps		(B0L), %ymm10, %ymm2
+	vmulps		32(B0L), %ymm11, %ymm3
+	vmulps		(B0R), %ymm10, %ymm4
+	vmulps		32(B0R), %ymm11, %ymm5
+	vmulps		-64(B0L), %ymm12, %ymm6
+	vmulps		-32(B0L), %ymm13, %ymm10
+	vmulps		-64(B0R), %ymm12, %ymm7
+	vmulps		-32(B0R), %ymm13, %ymm11
+	vaddps		%ymm3, %ymm2, %ymm2
+	vaddps		%ymm5, %ymm4, %ymm3
+	vaddps		%ymm6, %ymm10, %ymm4
+	vaddps		%ymm7, %ymm11, %ymm5
+	lea			(WINDOW,%rax,2), WINDOW
+	sub			%rax, B0L
+	sub			%rax, B0R
+	
+	vunpcklps	%ymm0, %ymm8, %ymm6
+	vunpckhps	%ymm0, %ymm8, %ymm0
+	vunpcklps	%ymm1, %ymm9, %ymm7
+	vunpckhps	%ymm1, %ymm9, %ymm1
+	vaddps		%ymm6, %ymm0, %ymm0
+	vaddps		%ymm7, %ymm1, %ymm1
+	vunpcklps	%ymm3, %ymm2, %ymm6
+	vunpckhps	%ymm3, %ymm2, %ymm2
+	vunpcklps	%ymm5, %ymm4, %ymm7
+	vunpckhps	%ymm5, %ymm4, %ymm3
+	vaddps		%ymm6, %ymm2, %ymm2
+	vaddps		%ymm7, %ymm3, %ymm3
+	
+	vunpcklpd	%ymm1, %ymm0, %ymm4
+	vunpckhpd	%ymm1, %ymm0, %ymm0
+	vunpcklpd	%ymm3, %ymm2, %ymm5
+	vunpckhpd	%ymm3, %ymm2, %ymm1
+	vaddps		%ymm0, %ymm4, %ymm0
+	vaddps		%ymm1, %ymm5, %ymm1
+	vperm2f128	$0x20, %ymm1, %ymm0, %ymm2
+	vperm2f128	$0x31, %ymm1, %ymm0, %ymm3
+	vaddps		%ymm3, %ymm2, %ymm0
+	vmulps		%ymm14, %ymm0, %ymm0
+	
+	vmovups		%ymm0, (SAMPLES)
+	add			$32, SAMPLES
+	dec			%ecx
+	jnz			1b
+	
+	vzeroupper
+	
+	xor			%eax, %eax
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	mov			%rbp, %rsp
+	pop			%rbp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_avx_s32.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_avx_s32.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_avx_s32.S	(working copy)
@@ -0,0 +1,292 @@
+/*
+	synth_stereo_avx_s32: AVX optimized synth for x86-64 (stereo specific, s32 output version)
+
+	copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* real *window; */
+#define WINDOW %r10
+/* real *b0l; */
+#define B0L %rdx
+/* real *b0r; */
+#define B0R %r8
+/* real *samples; */
+#define SAMPLES %r9
+#else
+/* real *window; */
+#define WINDOW %rdi
+/* real *b0l; */
+#define B0L %rsi
+/* real *b0r; */
+#define B0R %rdx
+/* real *samples; */
+#define SAMPLES %r9
+#endif
+
+/*
+	int synth_1to1_s32_s_avx_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+maxmin_avx:
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+scale_avx:
+	.long   1199570944
+	.text
+	ALIGN16
+	.globl ASM_NAME(synth_1to1_s32_s_avx_asm)
+ASM_NAME(synth_1to1_s32_s_avx_asm):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	push		%rbp
+	mov			%rsp, %rbp
+	sub			$160, %rsp
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	movaps		%xmm13, 112(%rsp)
+	movaps		%xmm14, 128(%rsp)
+	movaps		%xmm15, 144(%rsp)
+	movl		48(%rbp), %eax /* 5th argument; placed after 32-byte shadow space */
+#endif
+
+	vbroadcastss	scale_avx(%rip), %ymm14
+	
+#ifdef IS_MSABI
+	shl			$2, %eax
+	mov			%rcx, WINDOW
+#else
+	mov			%r8d, %eax
+	shl			$2, %eax
+	mov			%rcx, SAMPLES
+#endif
+	add			$64, WINDOW
+	sub			%rax, WINDOW
+
+	mov			$128, %rax
+	mov			$4, %ecx
+	vpxor		%xmm15, %xmm15, %xmm15
+	
+	ALIGN16
+1:
+	vmovups		(WINDOW), %ymm8
+	vmovups		32(WINDOW), %ymm9
+	vmovups		(WINDOW,%rax), %ymm10
+	vmovups		32(WINDOW,%rax), %ymm11
+	vmulps		(B0L), %ymm8, %ymm0
+	vmulps		32(B0L), %ymm9, %ymm1
+	vmulps		(B0R), %ymm8, %ymm2
+	vmulps		32(B0R), %ymm9, %ymm3
+	vmulps		64(B0L), %ymm10, %ymm4
+	vmulps		96(B0L), %ymm11, %ymm5
+	vmulps		64(B0R), %ymm10, %ymm6
+	vmulps		96(B0R), %ymm11, %ymm7
+	vaddps		%ymm1, %ymm0, %ymm8
+	vaddps		%ymm3, %ymm2, %ymm0
+	vaddps		%ymm5, %ymm4, %ymm9
+	vaddps		%ymm7, %ymm6, %ymm1
+	lea			(WINDOW,%rax,2), WINDOW
+	add			%rax, B0L
+	add			%rax, B0R
+	
+	vmovups		(WINDOW), %ymm10
+	vmovups		32(WINDOW), %ymm11
+	vmovups		(WINDOW,%rax), %ymm12
+	vmovups		32(WINDOW,%rax), %ymm13
+	vmulps		(B0L), %ymm10, %ymm2
+	vmulps		32(B0L), %ymm11, %ymm3
+	vmulps		(B0R), %ymm10, %ymm4
+	vmulps		32(B0R), %ymm11, %ymm5
+	vmulps		64(B0L), %ymm12, %ymm6
+	vmulps		96(B0L), %ymm13, %ymm10
+	vmulps		64(B0R), %ymm12, %ymm7
+	vmulps		96(B0R), %ymm13, %ymm11
+	vaddps		%ymm3, %ymm2, %ymm2
+	vaddps		%ymm5, %ymm4, %ymm3
+	vaddps		%ymm6, %ymm10, %ymm4
+	vaddps		%ymm7, %ymm11, %ymm5
+	lea			(WINDOW,%rax,2), WINDOW
+	add			%rax, B0L
+	add			%rax, B0R
+	
+	vunpcklps	%ymm0, %ymm8, %ymm6
+	vunpckhps	%ymm0, %ymm8, %ymm0
+	vunpcklps	%ymm1, %ymm9, %ymm7
+	vunpckhps	%ymm1, %ymm9, %ymm1
+	vaddps		%ymm6, %ymm0, %ymm0
+	vaddps		%ymm7, %ymm1, %ymm1
+	vunpcklps	%ymm3, %ymm2, %ymm6
+	vunpckhps	%ymm3, %ymm2, %ymm2
+	vunpcklps	%ymm5, %ymm4, %ymm7
+	vunpckhps	%ymm5, %ymm4, %ymm3
+	vaddps		%ymm6, %ymm2, %ymm2
+	vaddps		%ymm7, %ymm3, %ymm3
+	
+	vunpcklpd	%ymm1, %ymm0, %ymm4
+	vunpckhpd	%ymm1, %ymm0, %ymm0
+	vunpcklpd	%ymm3, %ymm2, %ymm5
+	vunpckhpd	%ymm3, %ymm2, %ymm1
+	vsubps		%ymm0, %ymm4, %ymm0
+	vsubps		%ymm1, %ymm5, %ymm1
+	vperm2f128	$0x20, %ymm1, %ymm0, %ymm2
+	vperm2f128	$0x31, %ymm1, %ymm0, %ymm3
+	vaddps		%ymm3, %ymm2, %ymm0
+	vcmpnleps	maxmin_avx(%rip), %ymm0, %ymm1
+	vcmpltps	32+maxmin_avx(%rip), %ymm0, %ymm2
+	vmulps		%ymm14, %ymm0, %ymm0
+	vextractf128	$0x1, %ymm1, %xmm3
+	vextractf128	$0x1, %ymm2, %xmm4
+	vpackssdw	%xmm2, %xmm1, %xmm5
+	vpackssdw	%xmm4, %xmm3, %xmm3
+	vcvtps2dq	%ymm0, %ymm0
+	vpaddw		%xmm3, %xmm5, %xmm5
+	vpaddw		%xmm5, %xmm15, %xmm15
+	vxorps		%ymm1, %ymm0, %ymm0
+	
+	vmovups		%ymm0, (SAMPLES)
+	add			$32, SAMPLES
+	dec			%ecx
+	jnz			1b
+	
+	mov			$4, %ecx
+	
+	ALIGN16
+1:
+	vmovups		(WINDOW), %ymm8
+	vmovups		32(WINDOW), %ymm9
+	vmovups		(WINDOW,%rax), %ymm10
+	vmovups		32(WINDOW,%rax), %ymm11
+	vmulps		(B0L), %ymm8, %ymm0
+	vmulps		32(B0L), %ymm9, %ymm1
+	vmulps		(B0R), %ymm8, %ymm2
+	vmulps		32(B0R), %ymm9, %ymm3
+	vmulps		-64(B0L), %ymm10, %ymm4
+	vmulps		-32(B0L), %ymm11, %ymm5
+	vmulps		-64(B0R), %ymm10, %ymm6
+	vmulps		-32(B0R), %ymm11, %ymm7
+	vaddps		%ymm1, %ymm0, %ymm8
+	vaddps		%ymm3, %ymm2, %ymm0
+	vaddps		%ymm5, %ymm4, %ymm9
+	vaddps		%ymm7, %ymm6, %ymm1
+	lea			(WINDOW,%rax,2), WINDOW
+	sub			%rax, B0L
+	sub			%rax, B0R
+	
+	vmovups		(WINDOW), %ymm10
+	vmovups		32(WINDOW), %ymm11
+	vmovups		(WINDOW,%rax), %ymm12
+	vmovups		32(WINDOW,%rax), %ymm13
+	vmulps		(B0L), %ymm10, %ymm2
+	vmulps		32(B0L), %ymm11, %ymm3
+	vmulps		(B0R), %ymm10, %ymm4
+	vmulps		32(B0R), %ymm11, %ymm5
+	vmulps		-64(B0L), %ymm12, %ymm6
+	vmulps		-32(B0L), %ymm13, %ymm10
+	vmulps		-64(B0R), %ymm12, %ymm7
+	vmulps		-32(B0R), %ymm13, %ymm11
+	vaddps		%ymm3, %ymm2, %ymm2
+	vaddps		%ymm5, %ymm4, %ymm3
+	vaddps		%ymm6, %ymm10, %ymm4
+	vaddps		%ymm7, %ymm11, %ymm5
+	lea			(WINDOW,%rax,2), WINDOW
+	sub			%rax, B0L
+	sub			%rax, B0R
+	
+	vunpcklps	%ymm0, %ymm8, %ymm6
+	vunpckhps	%ymm0, %ymm8, %ymm0
+	vunpcklps	%ymm1, %ymm9, %ymm7
+	vunpckhps	%ymm1, %ymm9, %ymm1
+	vaddps		%ymm6, %ymm0, %ymm0
+	vaddps		%ymm7, %ymm1, %ymm1
+	vunpcklps	%ymm3, %ymm2, %ymm6
+	vunpckhps	%ymm3, %ymm2, %ymm2
+	vunpcklps	%ymm5, %ymm4, %ymm7
+	vunpckhps	%ymm5, %ymm4, %ymm3
+	vaddps		%ymm6, %ymm2, %ymm2
+	vaddps		%ymm7, %ymm3, %ymm3
+	
+	vunpcklpd	%ymm1, %ymm0, %ymm4
+	vunpckhpd	%ymm1, %ymm0, %ymm0
+	vunpcklpd	%ymm3, %ymm2, %ymm5
+	vunpckhpd	%ymm3, %ymm2, %ymm1
+	vaddps		%ymm0, %ymm4, %ymm0
+	vaddps		%ymm1, %ymm5, %ymm1
+	vperm2f128	$0x20, %ymm1, %ymm0, %ymm2
+	vperm2f128	$0x31, %ymm1, %ymm0, %ymm3
+	vaddps		%ymm3, %ymm2, %ymm0
+	vcmpnleps	maxmin_avx(%rip), %ymm0, %ymm1
+	vcmpltps	32+maxmin_avx(%rip), %ymm0, %ymm2
+	vmulps		%ymm14, %ymm0, %ymm0
+	vextractf128	$0x1, %ymm1, %xmm3
+	vextractf128	$0x1, %ymm2, %xmm4
+	vpackssdw	%xmm2, %xmm1, %xmm5
+	vpackssdw	%xmm4, %xmm3, %xmm3
+	vcvtps2dq	%ymm0, %ymm0
+	vpaddw		%xmm3, %xmm5, %xmm5
+	vpaddw		%xmm5, %xmm15, %xmm15
+	vxorps		%ymm1, %ymm0, %ymm0
+	
+	vmovups		%ymm0, (SAMPLES)
+	add			$32, SAMPLES
+	dec			%ecx
+	jnz			1b
+	
+	vzeroupper
+	
+	pxor		%xmm1, %xmm1
+	psubw		%xmm15, %xmm1
+	pshufd		$0x4e, %xmm1, %xmm0
+	paddw		%xmm1, %xmm0
+	pshuflw		$0x4e, %xmm0, %xmm1
+	paddw		%xmm1, %xmm0
+	pshuflw		$0x11, %xmm0, %xmm1
+	paddw		%xmm1, %xmm0
+	movd		%xmm0, %eax
+	and			$0x7f, %eax
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	movaps		144(%rsp), %xmm15
+	mov			%rbp, %rsp
+	pop			%rbp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_neon.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_neon.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_neon.S	(working copy)
@@ -0,0 +1,185 @@
+/*
+	synth_stereo_neon: ARM NEON optimized synth (stereo specific version)
+
+	copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define WINDOW r0
+#define B0L r1
+#define B0R r2
+#define SAMPLES r3
+
+/*
+	int synth_1to1_s_neon_asm(short *window, short *b0l, short *b0r, short *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+	.code 32
+#ifndef __APPLE__
+	.fpu neon
+#endif
+
+	.text
+	.globl ASM_NAME(synth_1to1_s_neon_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_s_neon_asm), %function
+#endif
+	ALIGN4
+ASM_NAME(synth_1to1_s_neon_asm):
+	push		{r4-r6, lr}
+	vpush		{q4-q7}
+
+	ldr			r4, [sp, #80]
+	add			WINDOW, WINDOW, #32
+	sub			WINDOW, WINDOW, r4, lsl #1
+
+	mov			r4, #4
+	mov			r5, #64
+1:
+	vld1.16		{d0-d3}, [WINDOW], r5
+	vld1.16		{d4-d7}, [WINDOW], r5
+	vld1.16		{d8-d11}, [B0L, :128]!
+	vld1.16		{d12-d15}, [B0R, :128]!
+	vld1.16		{d16-d19}, [B0L, :128]!
+	vld1.16		{d20-d23}, [B0R, :128]!
+	
+	vmull.s16	q12, d0, d8
+	vmull.s16	q13, d0, d12
+	vmull.s16	q14, d4, d16
+	vmull.s16	q15, d4, d20
+	vmlal.s16	q12, d1, d9
+	vmlal.s16	q13, d1, d13
+	vmlal.s16	q14, d5, d17
+	vmlal.s16	q15, d5, d21
+	vmlal.s16	q12, d2, d10
+	vmlal.s16	q13, d2, d14
+	vmlal.s16	q14, d6, d18
+	vmlal.s16	q15, d6, d22
+	vmlal.s16	q12, d3, d11
+	vmlal.s16	q13, d3, d15
+	vmlal.s16	q14, d7, d19
+	vmlal.s16	q15, d7, d23
+	vpadd.i32	d24, d24, d25
+	vpadd.i32	d26, d26, d27
+	vpadd.i32	d28, d28, d29
+	vpadd.i32	d30, d30, d31
+	vpadd.i32	d24, d24, d26
+	vpadd.i32	d25, d28, d30
+	vqrshrn.s32	d0, q12, #13
+	vst1.16		{d0}, [SAMPLES]!
+	
+	vld1.16		{d0-d3}, [WINDOW], r5
+	vld1.16		{d4-d7}, [WINDOW], r5
+	vld1.16		{d8-d11}, [B0L, :128]!
+	vld1.16		{d12-d15}, [B0R, :128]!
+	vld1.16		{d16-d19}, [B0L, :128]!
+	vld1.16		{d20-d23}, [B0R, :128]!
+	
+	vmull.s16	q12, d0, d8
+	vmull.s16	q13, d0, d12
+	vmull.s16	q14, d4, d16
+	vmull.s16	q15, d4, d20
+	vmlal.s16	q12, d1, d9
+	vmlal.s16	q13, d1, d13
+	vmlal.s16	q14, d5, d17
+	vmlal.s16	q15, d5, d21
+	vmlal.s16	q12, d2, d10
+	vmlal.s16	q13, d2, d14
+	vmlal.s16	q14, d6, d18
+	vmlal.s16	q15, d6, d22
+	vmlal.s16	q12, d3, d11
+	vmlal.s16	q13, d3, d15
+	vmlal.s16	q14, d7, d19
+	vmlal.s16	q15, d7, d23
+	vpadd.i32	d24, d24, d25
+	vpadd.i32	d26, d26, d27
+	vpadd.i32	d28, d28, d29
+	vpadd.i32	d30, d30, d31
+	vpadd.i32	d24, d24, d26
+	vpadd.i32	d25, d28, d30
+	vqrshrn.s32	d0, q12, #13
+	vst1.16		{d0}, [SAMPLES]!
+
+	subs		r4, r4, #1
+	bne			1b
+
+	mov			r4, #4
+	mov			r6, #-32
+1:
+	vld1.16		{d0-d3}, [WINDOW], r5
+	vld1.16		{d4-d7}, [WINDOW], r5
+	vld1.16		{d8-d11}, [B0L, :128], r6
+	vld1.16		{d12-d15}, [B0R, :128], r6
+	vld1.16		{d16-d19}, [B0L, :128], r6
+	vld1.16		{d20-d23}, [B0R, :128], r6
+	
+	vmull.s16	q12, d0, d8
+	vmull.s16	q13, d0, d12
+	vmull.s16	q14, d4, d16
+	vmull.s16	q15, d4, d20
+	vmlal.s16	q12, d1, d9
+	vmlal.s16	q13, d1, d13
+	vmlal.s16	q14, d5, d17
+	vmlal.s16	q15, d5, d21
+	vmlal.s16	q12, d2, d10
+	vmlal.s16	q13, d2, d14
+	vmlal.s16	q14, d6, d18
+	vmlal.s16	q15, d6, d22
+	vmlal.s16	q12, d3, d11
+	vmlal.s16	q13, d3, d15
+	vmlal.s16	q14, d7, d19
+	vmlal.s16	q15, d7, d23
+	vpadd.i32	d24, d24, d25
+	vpadd.i32	d26, d26, d27
+	vpadd.i32	d28, d28, d29
+	vpadd.i32	d30, d30, d31
+	vpadd.i32	d24, d24, d26
+	vpadd.i32	d25, d28, d30
+	vqrshrn.s32	d0, q12, #13
+	vst1.16		{d0}, [SAMPLES]!
+	
+	vld1.16		{d0-d3}, [WINDOW], r5
+	vld1.16		{d4-d7}, [WINDOW], r5
+	vld1.16		{d8-d11}, [B0L, :128], r6
+	vld1.16		{d12-d15}, [B0R, :128], r6
+	vld1.16		{d16-d19}, [B0L, :128], r6
+	vld1.16		{d20-d23}, [B0R, :128], r6
+	
+	vmull.s16	q12, d0, d8
+	vmull.s16	q13, d0, d12
+	vmull.s16	q14, d4, d16
+	vmull.s16	q15, d4, d20
+	vmlal.s16	q12, d1, d9
+	vmlal.s16	q13, d1, d13
+	vmlal.s16	q14, d5, d17
+	vmlal.s16	q15, d5, d21
+	vmlal.s16	q12, d2, d10
+	vmlal.s16	q13, d2, d14
+	vmlal.s16	q14, d6, d18
+	vmlal.s16	q15, d6, d22
+	vmlal.s16	q12, d3, d11
+	vmlal.s16	q13, d3, d15
+	vmlal.s16	q14, d7, d19
+	vmlal.s16	q15, d7, d23
+	vpadd.i32	d24, d24, d25
+	vpadd.i32	d26, d26, d27
+	vpadd.i32	d28, d28, d29
+	vpadd.i32	d30, d30, d31
+	vpadd.i32	d24, d24, d26
+	vpadd.i32	d25, d28, d30
+	vqrshrn.s32	d0, q12, #13
+	vst1.16		{d0}, [SAMPLES]!
+
+	subs		r4, r4, #1
+	bne			1b
+
+	mov			r0, #0
+
+	vpop		{q4-q7}
+	pop			{r4-r6, pc}
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_neon64.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_neon64.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_neon64.S	(working copy)
@@ -0,0 +1,185 @@
+/*
+	synth_stereo_neon64: NEON optimized synth for AArch64 (stereo specific version)
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+maxmin_s16:
+	.word   32767
+	.word   -32768
+	.text
+	ALIGN4
+	.globl ASM_NAME(synth_1to1_s_neon64_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_s_neon64_asm), %function
+#endif
+ASM_NAME(synth_1to1_s_neon64_asm):
+	add		x0, x0, #32
+	sub		x0, x0, x4, lsl #1
+	eor		v30.16b, v30.16b, v30.16b
+	adrp	x5, AARCH64_PCREL_HI(maxmin_s16)
+	add		x5, x5, AARCH64_PCREL_LO(maxmin_s16)
+	ld2r	{v28.4s,v29.4s}, [x5]
+	
+	mov		w4, #4
+	mov		x5, #64
+1:
+	ld1		{v0.8h,v1.8h}, [x0], x5
+	ld1		{v2.8h,v3.8h}, [x0], x5
+	ld1		{v4.8h,v5.8h,v6.8h,v7.8h}, [x1], #64
+	ld1		{v16.8h,v17.8h,v18.8h,v19.8h}, [x2], #64
+	
+	smull	v24.4s, v0.4h, v4.4h
+	smull	v25.4s, v0.4h, v16.4h
+	smull	v26.4s, v2.4h, v6.4h
+	smull	v27.4s, v2.4h, v18.4h
+	smlal2	v24.4s, v0.8h, v4.8h
+	smlal2	v25.4s, v0.8h, v16.8h
+	smlal2	v26.4s, v2.8h, v6.8h
+	smlal2	v27.4s, v2.8h, v18.8h
+	smlal	v24.4s, v1.4h, v5.4h
+	smlal	v25.4s, v1.4h, v17.4h
+	smlal	v26.4s, v3.4h, v7.4h
+	smlal	v27.4s, v3.4h, v19.4h
+	smlal2	v24.4s, v1.8h, v5.8h
+	smlal2	v25.4s, v1.8h, v17.8h
+	smlal2	v26.4s, v3.8h, v7.8h
+	smlal2	v27.4s, v3.8h, v19.8h
+		
+	addp	v0.4s, v24.4s, v25.4s
+	addp	v1.4s, v26.4s, v27.4s
+	addp	v0.4s, v0.4s, v1.4s
+	sqrshrn	v31.4h, v0.4s, #13
+	cmgt	v2.4s, v0.4s, v28.4s
+	cmgt	v3.4s, v29.4s, v0.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v30.4s, v30.4s, v2.4s
+	
+	ld1		{v0.8h,v1.8h}, [x0], x5
+	ld1		{v2.8h,v3.8h}, [x0], x5
+	ld1		{v4.8h,v5.8h,v6.8h,v7.8h}, [x1], #64
+	ld1		{v16.8h,v17.8h,v18.8h,v19.8h}, [x2], #64
+	
+	smull	v24.4s, v0.4h, v4.4h
+	smull	v25.4s, v0.4h, v16.4h
+	smull	v26.4s, v2.4h, v6.4h
+	smull	v27.4s, v2.4h, v18.4h
+	smlal2	v24.4s, v0.8h, v4.8h
+	smlal2	v25.4s, v0.8h, v16.8h
+	smlal2	v26.4s, v2.8h, v6.8h
+	smlal2	v27.4s, v2.8h, v18.8h
+	smlal	v24.4s, v1.4h, v5.4h
+	smlal	v25.4s, v1.4h, v17.4h
+	smlal	v26.4s, v3.4h, v7.4h
+	smlal	v27.4s, v3.4h, v19.4h
+	smlal2	v24.4s, v1.8h, v5.8h
+	smlal2	v25.4s, v1.8h, v17.8h
+	smlal2	v26.4s, v3.8h, v7.8h
+	smlal2	v27.4s, v3.8h, v19.8h
+	
+	addp	v0.4s, v24.4s, v25.4s
+	addp	v1.4s, v26.4s, v27.4s
+	addp	v0.4s, v0.4s, v1.4s
+	sqrshrn2	v31.8h, v0.4s, #13
+	cmgt	v2.4s, v0.4s, v28.4s
+	cmgt	v3.4s, v29.4s, v0.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v30.4s, v30.4s, v2.4s
+	st1		{v31.4s}, [x3], #16
+	
+	subs	w4, w4, #1
+	b.ne	1b
+	
+	mov		w4, #4
+	mov		x6, #-32
+2:
+	ld1		{v0.8h,v1.8h}, [x0], x5
+	ld1		{v2.8h,v3.8h}, [x0], x5
+	ld1		{v4.8h,v5.8h}, [x1], x6
+	ld1		{v6.8h,v7.8h}, [x1], x6
+	ld1		{v16.8h,v17.8h}, [x2], x6
+	ld1		{v18.8h,v19.8h}, [x2], x6
+	
+	smull	v24.4s, v0.4h, v4.4h
+	smull	v25.4s, v0.4h, v16.4h
+	smull	v26.4s, v2.4h, v6.4h
+	smull	v27.4s, v2.4h, v18.4h
+	smlal2	v24.4s, v0.8h, v4.8h
+	smlal2	v25.4s, v0.8h, v16.8h
+	smlal2	v26.4s, v2.8h, v6.8h
+	smlal2	v27.4s, v2.8h, v18.8h
+	smlal	v24.4s, v1.4h, v5.4h
+	smlal	v25.4s, v1.4h, v17.4h
+	smlal	v26.4s, v3.4h, v7.4h
+	smlal	v27.4s, v3.4h, v19.4h
+	smlal2	v24.4s, v1.8h, v5.8h
+	smlal2	v25.4s, v1.8h, v17.8h
+	smlal2	v26.4s, v3.8h, v7.8h
+	smlal2	v27.4s, v3.8h, v19.8h
+	
+	addp	v0.4s, v24.4s, v25.4s
+	addp	v1.4s, v26.4s, v27.4s
+	addp	v0.4s, v0.4s, v1.4s
+	sqrshrn	v31.4h, v0.4s, #13
+	cmgt	v2.4s, v0.4s, v28.4s
+	cmgt	v3.4s, v29.4s, v0.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v30.4s, v30.4s, v2.4s
+	
+	ld1		{v0.8h,v1.8h}, [x0], x5
+	ld1		{v2.8h,v3.8h}, [x0], x5
+	ld1		{v4.8h,v5.8h}, [x1], x6
+	ld1		{v6.8h,v7.8h}, [x1], x6
+	ld1		{v16.8h,v17.8h}, [x2], x6
+	ld1		{v18.8h,v19.8h}, [x2], x6
+	
+	smull	v24.4s, v0.4h, v4.4h
+	smull	v25.4s, v0.4h, v16.4h
+	smull	v26.4s, v2.4h, v6.4h
+	smull	v27.4s, v2.4h, v18.4h
+	smlal2	v24.4s, v0.8h, v4.8h
+	smlal2	v25.4s, v0.8h, v16.8h
+	smlal2	v26.4s, v2.8h, v6.8h
+	smlal2	v27.4s, v2.8h, v18.8h
+	smlal	v24.4s, v1.4h, v5.4h
+	smlal	v25.4s, v1.4h, v17.4h
+	smlal	v26.4s, v3.4h, v7.4h
+	smlal	v27.4s, v3.4h, v19.4h
+	smlal2	v24.4s, v1.8h, v5.8h
+	smlal2	v25.4s, v1.8h, v17.8h
+	smlal2	v26.4s, v3.8h, v7.8h
+	smlal2	v27.4s, v3.8h, v19.8h
+	
+	addp	v0.4s, v24.4s, v25.4s
+	addp	v1.4s, v26.4s, v27.4s
+	addp	v0.4s, v0.4s, v1.4s
+	sqrshrn2	v31.8h, v0.4s, #13
+	cmgt	v2.4s, v0.4s, v28.4s
+	cmgt	v3.4s, v29.4s, v0.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v30.4s, v30.4s, v2.4s
+	st1		{v31.4s}, [x3], #16
+	
+	subs	w4, w4, #1
+	b.ne	2b
+	
+	AARCH64_DUP_2D(v0, v30, 1)
+	add		v0.4s, v0.4s, v30.4s
+	AARCH64_DUP_4S(v1, v0, 1)
+	add		v0.4s, v0.4s, v1.4s
+	umov	w0, v0.s[0]
+	neg		w0, w0
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_neon64_accurate.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_neon64_accurate.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_neon64_accurate.S	(working copy)
@@ -0,0 +1,200 @@
+/*
+	synth_stereo_neon64_accurate: NEON optimized synth for AArch64 (stereo specific, MPEG-compliant 16bit output version)
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+maxmin_s16:
+	.word   1191181824
+	.word   -956301312
+	.text
+	ALIGN4
+	.globl ASM_NAME(synth_1to1_s_neon64_accurate_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_s_neon64_accurate_asm), %function
+#endif
+ASM_NAME(synth_1to1_s_neon64_accurate_asm):
+	add		x0, x0, #64
+	sub		x0, x0, x4, lsl #2
+	eor		v30.16b, v30.16b, v30.16b
+	adrp	x5, AARCH64_PCREL_HI(maxmin_s16)
+	add		x5, x5, AARCH64_PCREL_LO(maxmin_s16)
+	ld2r	{v28.4s,v29.4s}, [x5]
+	sub		sp, sp, #32
+	st1		{v8.2s,v9.2s,v10.2s,v11.2s}, [sp]
+	sub		sp, sp, #32
+	st1		{v12.2s,v13.2s,v14.2s,v15.2s}, [sp]
+	
+	mov		w4, #4
+	mov		x5, #128
+1:
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x2], #64
+	ld1		{v8.4s,v9.4s,v10.4s,v11.4s}, [x1], #64
+	ld1		{v12.4s,v13.4s,v14.4s,v15.4s}, [x2], #64
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v0.4s, v20.4s
+	fmul	v26.4s, v4.4s, v8.4s
+	fmul	v27.4s, v4.4s, v12.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v1.4s, v21.4s
+	fmla	v26.4s, v5.4s, v9.4s
+	fmla	v27.4s, v5.4s, v13.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v2.4s, v22.4s
+	fmla	v26.4s, v6.4s, v10.4s
+	fmla	v27.4s, v6.4s, v14.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v3.4s, v23.4s
+	fmla	v26.4s, v7.4s, v11.4s
+	fmla	v27.4s, v7.4s, v15.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fcvtns	v1.4s, v0.4s
+	fcmgt	v2.4s, v0.4s, v28.4s
+	fcmgt	v3.4s, v29.4s, v0.4s
+	sqxtn	v31.4h, v1.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v30.4s, v30.4s, v2.4s
+	
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x2], #64
+	ld1		{v8.4s,v9.4s,v10.4s,v11.4s}, [x1], #64
+	ld1		{v12.4s,v13.4s,v14.4s,v15.4s}, [x2], #64
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v0.4s, v20.4s
+	fmul	v26.4s, v4.4s, v8.4s
+	fmul	v27.4s, v4.4s, v12.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v1.4s, v21.4s
+	fmla	v26.4s, v5.4s, v9.4s
+	fmla	v27.4s, v5.4s, v13.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v2.4s, v22.4s
+	fmla	v26.4s, v6.4s, v10.4s
+	fmla	v27.4s, v6.4s, v14.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v3.4s, v23.4s
+	fmla	v26.4s, v7.4s, v11.4s
+	fmla	v27.4s, v7.4s, v15.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fcvtns	v1.4s, v0.4s
+	fcmgt	v2.4s, v0.4s, v28.4s
+	fcmgt	v3.4s, v29.4s, v0.4s
+	AARCH64_SQXTN2_8H(v31, v1)
+	add		v2.4s, v2.4s, v3.4s
+	add		v30.4s, v30.4s, v2.4s
+	st1		{v31.4s}, [x3], #16
+	
+	subs	w4, w4, #1
+	b.ne	1b
+	
+	mov		w4, #4
+	mov		x6, #-64
+2:
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x2], x6
+	ld1		{v8.4s,v9.4s,v10.4s,v11.4s}, [x1], x6
+	ld1		{v12.4s,v13.4s,v14.4s,v15.4s}, [x2], x6
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v0.4s, v20.4s
+	fmul	v26.4s, v4.4s, v8.4s
+	fmul	v27.4s, v4.4s, v12.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v1.4s, v21.4s
+	fmla	v26.4s, v5.4s, v9.4s
+	fmla	v27.4s, v5.4s, v13.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v2.4s, v22.4s
+	fmla	v26.4s, v6.4s, v10.4s
+	fmla	v27.4s, v6.4s, v14.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v3.4s, v23.4s
+	fmla	v26.4s, v7.4s, v11.4s
+	fmla	v27.4s, v7.4s, v15.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fcvtns	v1.4s, v0.4s
+	fcmgt	v2.4s, v0.4s, v28.4s
+	fcmgt	v3.4s, v29.4s, v0.4s
+	sqxtn	v31.4h, v1.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v30.4s, v30.4s, v2.4s
+	
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x2], x6
+	ld1		{v8.4s,v9.4s,v10.4s,v11.4s}, [x1], x6
+	ld1		{v12.4s,v13.4s,v14.4s,v15.4s}, [x2], x6
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v0.4s, v20.4s
+	fmul	v26.4s, v4.4s, v8.4s
+	fmul	v27.4s, v4.4s, v12.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v1.4s, v21.4s
+	fmla	v26.4s, v5.4s, v9.4s
+	fmla	v27.4s, v5.4s, v13.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v2.4s, v22.4s
+	fmla	v26.4s, v6.4s, v10.4s
+	fmla	v27.4s, v6.4s, v14.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v3.4s, v23.4s
+	fmla	v26.4s, v7.4s, v11.4s
+	fmla	v27.4s, v7.4s, v15.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fcvtns	v1.4s, v0.4s
+	fcmgt	v2.4s, v0.4s, v28.4s
+	fcmgt	v3.4s, v29.4s, v0.4s
+	AARCH64_SQXTN2_8H(v31, v1)
+	add		v2.4s, v2.4s, v3.4s
+	add		v30.4s, v30.4s, v2.4s
+	st1		{v31.4s}, [x3], #16
+	
+	subs	w4, w4, #1
+	b.ne	2b
+	
+	AARCH64_DUP_2D(v0, v30, 1)
+	add		v0.4s, v0.4s, v30.4s
+	AARCH64_DUP_4S(v1, v0, 1)
+	add		v0.4s, v0.4s, v1.4s
+	umov	w0, v0.s[0]
+	neg		w0, w0
+	
+	ld1		{v12.2s,v13.2s,v14.2s,v15.2s}, [sp], #32
+	ld1		{v8.2s,v9.2s,v10.2s,v11.2s}, [sp], #32
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_neon64_float.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_neon64_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_neon64_float.S	(working copy)
@@ -0,0 +1,174 @@
+/*
+	synth_stereo_neon64_float: NEON optimized synth for AArch64 (stereo specific, float output version)
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+scale:
+	.word   939524096
+	.text
+	ALIGN4
+	.globl ASM_NAME(synth_1to1_real_s_neon64_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_real_s_neon64_asm), %function
+#endif
+ASM_NAME(synth_1to1_real_s_neon64_asm):
+	add		x0, x0, #64
+	sub		x0, x0, x4, lsl #2
+	adrp	x5, AARCH64_PCREL_HI(scale)
+	add		x5, x5, AARCH64_PCREL_LO(scale)
+	ld1r	{v28.4s}, [x5]
+	sub		sp, sp, #32
+	st1		{v8.2s,v9.2s,v10.2s,v11.2s}, [sp]
+	sub		sp, sp, #32
+	st1		{v12.2s,v13.2s,v14.2s,v15.2s}, [sp]
+	
+	mov		w4, #4
+	mov		x5, #128
+1:
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x2], #64
+	ld1		{v8.4s,v9.4s,v10.4s,v11.4s}, [x1], #64
+	ld1		{v12.4s,v13.4s,v14.4s,v15.4s}, [x2], #64
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v0.4s, v20.4s
+	fmul	v26.4s, v4.4s, v8.4s
+	fmul	v27.4s, v4.4s, v12.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v1.4s, v21.4s
+	fmla	v26.4s, v5.4s, v9.4s
+	fmla	v27.4s, v5.4s, v13.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v2.4s, v22.4s
+	fmla	v26.4s, v6.4s, v10.4s
+	fmla	v27.4s, v6.4s, v14.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v3.4s, v23.4s
+	fmla	v26.4s, v7.4s, v11.4s
+	fmla	v27.4s, v7.4s, v15.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fmul	v0.4s, v0.4s, v28.4s
+	st1		{v0.4s}, [x3], #16
+	
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x2], #64
+	ld1		{v8.4s,v9.4s,v10.4s,v11.4s}, [x1], #64
+	ld1		{v12.4s,v13.4s,v14.4s,v15.4s}, [x2], #64
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v0.4s, v20.4s
+	fmul	v26.4s, v4.4s, v8.4s
+	fmul	v27.4s, v4.4s, v12.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v1.4s, v21.4s
+	fmla	v26.4s, v5.4s, v9.4s
+	fmla	v27.4s, v5.4s, v13.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v2.4s, v22.4s
+	fmla	v26.4s, v6.4s, v10.4s
+	fmla	v27.4s, v6.4s, v14.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v3.4s, v23.4s
+	fmla	v26.4s, v7.4s, v11.4s
+	fmla	v27.4s, v7.4s, v15.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fmul	v0.4s, v0.4s, v28.4s
+	st1		{v0.4s}, [x3], #16
+	
+	subs	w4, w4, #1
+	b.ne	1b
+	
+	mov		w4, #4
+	mov		x6, #-64
+2:
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x2], x6
+	ld1		{v8.4s,v9.4s,v10.4s,v11.4s}, [x1], x6
+	ld1		{v12.4s,v13.4s,v14.4s,v15.4s}, [x2], x6
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v0.4s, v20.4s
+	fmul	v26.4s, v4.4s, v8.4s
+	fmul	v27.4s, v4.4s, v12.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v1.4s, v21.4s
+	fmla	v26.4s, v5.4s, v9.4s
+	fmla	v27.4s, v5.4s, v13.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v2.4s, v22.4s
+	fmla	v26.4s, v6.4s, v10.4s
+	fmla	v27.4s, v6.4s, v14.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v3.4s, v23.4s
+	fmla	v26.4s, v7.4s, v11.4s
+	fmla	v27.4s, v7.4s, v15.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fmul	v0.4s, v0.4s, v28.4s
+	st1		{v0.4s}, [x3], #16
+	
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x2], x6
+	ld1		{v8.4s,v9.4s,v10.4s,v11.4s}, [x1], x6
+	ld1		{v12.4s,v13.4s,v14.4s,v15.4s}, [x2], x6
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v0.4s, v20.4s
+	fmul	v26.4s, v4.4s, v8.4s
+	fmul	v27.4s, v4.4s, v12.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v1.4s, v21.4s
+	fmla	v26.4s, v5.4s, v9.4s
+	fmla	v27.4s, v5.4s, v13.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v2.4s, v22.4s
+	fmla	v26.4s, v6.4s, v10.4s
+	fmla	v27.4s, v6.4s, v14.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v3.4s, v23.4s
+	fmla	v26.4s, v7.4s, v11.4s
+	fmla	v27.4s, v7.4s, v15.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fmul	v0.4s, v0.4s, v28.4s
+	st1		{v0.4s}, [x3], #16
+	
+	subs	w4, w4, #1
+	b.ne	2b
+	
+	eor		w0, w0, w0
+	ld1		{v12.2s,v13.2s,v14.2s,v15.2s}, [sp], #32
+	ld1		{v8.2s,v9.2s,v10.2s,v11.2s}, [sp], #32
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_neon64_s32.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_neon64_s32.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_neon64_s32.S	(working copy)
@@ -0,0 +1,203 @@
+/*
+	synth_stereo_neon64_s32: NEON optimized synth for AArch64 (stereo specific, 32bit output version)
+
+	copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN16
+maxmin_s32:
+	.word   1191181824
+	.word   -956301312
+	.word   1199570944
+	.text
+	ALIGN4
+	.globl ASM_NAME(synth_1to1_s32_s_neon64_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_s32_s_neon64_asm), %function
+#endif
+ASM_NAME(synth_1to1_s32_s_neon64_asm):
+	add		x0, x0, #64
+	sub		x0, x0, x4, lsl #2
+	eor		v31.16b, v31.16b, v31.16b
+	adrp	x5, AARCH64_PCREL_HI(maxmin_s32)
+	add		x5, x5, AARCH64_PCREL_LO(maxmin_s32)
+	ld3r	{v28.4s,v29.4s,v30.4s}, [x5]
+	sub		sp, sp, #32
+	st1		{v8.2s,v9.2s,v10.2s,v11.2s}, [sp]
+	sub		sp, sp, #32
+	st1		{v12.2s,v13.2s,v14.2s,v15.2s}, [sp]
+	
+	mov		w4, #4
+	mov		x5, #128
+1:
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x2], #64
+	ld1		{v8.4s,v9.4s,v10.4s,v11.4s}, [x1], #64
+	ld1		{v12.4s,v13.4s,v14.4s,v15.4s}, [x2], #64
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v0.4s, v20.4s
+	fmul	v26.4s, v4.4s, v8.4s
+	fmul	v27.4s, v4.4s, v12.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v1.4s, v21.4s
+	fmla	v26.4s, v5.4s, v9.4s
+	fmla	v27.4s, v5.4s, v13.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v2.4s, v22.4s
+	fmla	v26.4s, v6.4s, v10.4s
+	fmla	v27.4s, v6.4s, v14.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v3.4s, v23.4s
+	fmla	v26.4s, v7.4s, v11.4s
+	fmla	v27.4s, v7.4s, v15.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fmul	v1.4s, v0.4s, v30.4s
+	fcmgt	v2.4s, v0.4s, v28.4s
+	fcmgt	v3.4s, v29.4s, v0.4s
+	fcvtns	v1.4s, v1.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v31.4s, v31.4s, v2.4s
+	st1		{v1.4s}, [x3], #16
+	
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x2], #64
+	ld1		{v8.4s,v9.4s,v10.4s,v11.4s}, [x1], #64
+	ld1		{v12.4s,v13.4s,v14.4s,v15.4s}, [x2], #64
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v0.4s, v20.4s
+	fmul	v26.4s, v4.4s, v8.4s
+	fmul	v27.4s, v4.4s, v12.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v1.4s, v21.4s
+	fmla	v26.4s, v5.4s, v9.4s
+	fmla	v27.4s, v5.4s, v13.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v2.4s, v22.4s
+	fmla	v26.4s, v6.4s, v10.4s
+	fmla	v27.4s, v6.4s, v14.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v3.4s, v23.4s
+	fmla	v26.4s, v7.4s, v11.4s
+	fmla	v27.4s, v7.4s, v15.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fmul	v1.4s, v0.4s, v30.4s
+	fcmgt	v2.4s, v0.4s, v28.4s
+	fcmgt	v3.4s, v29.4s, v0.4s
+	fcvtns	v1.4s, v1.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v31.4s, v31.4s, v2.4s
+	st1		{v1.4s}, [x3], #16
+	
+	subs	w4, w4, #1
+	b.ne	1b
+	
+	mov		w4, #4
+	mov		x6, #-64
+2:
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x2], x6
+	ld1		{v8.4s,v9.4s,v10.4s,v11.4s}, [x1], x6
+	ld1		{v12.4s,v13.4s,v14.4s,v15.4s}, [x2], x6
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v0.4s, v20.4s
+	fmul	v26.4s, v4.4s, v8.4s
+	fmul	v27.4s, v4.4s, v12.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v1.4s, v21.4s
+	fmla	v26.4s, v5.4s, v9.4s
+	fmla	v27.4s, v5.4s, v13.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v2.4s, v22.4s
+	fmla	v26.4s, v6.4s, v10.4s
+	fmla	v27.4s, v6.4s, v14.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v3.4s, v23.4s
+	fmla	v26.4s, v7.4s, v11.4s
+	fmla	v27.4s, v7.4s, v15.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fmul	v1.4s, v0.4s, v30.4s
+	fcmgt	v2.4s, v0.4s, v28.4s
+	fcmgt	v3.4s, v29.4s, v0.4s
+	fcvtns	v1.4s, v1.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v31.4s, v31.4s, v2.4s
+	st1		{v1.4s}, [x3], #16
+	
+	ld1		{v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
+	ld1		{v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
+	ld1		{v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
+	ld1		{v20.4s,v21.4s,v22.4s,v23.4s}, [x2], x6
+	ld1		{v8.4s,v9.4s,v10.4s,v11.4s}, [x1], x6
+	ld1		{v12.4s,v13.4s,v14.4s,v15.4s}, [x2], x6
+	
+	fmul	v24.4s, v0.4s, v16.4s
+	fmul	v25.4s, v0.4s, v20.4s
+	fmul	v26.4s, v4.4s, v8.4s
+	fmul	v27.4s, v4.4s, v12.4s
+	fmla	v24.4s, v1.4s, v17.4s
+	fmla	v25.4s, v1.4s, v21.4s
+	fmla	v26.4s, v5.4s, v9.4s
+	fmla	v27.4s, v5.4s, v13.4s
+	fmla	v24.4s, v2.4s, v18.4s
+	fmla	v25.4s, v2.4s, v22.4s
+	fmla	v26.4s, v6.4s, v10.4s
+	fmla	v27.4s, v6.4s, v14.4s
+	fmla	v24.4s, v3.4s, v19.4s
+	fmla	v25.4s, v3.4s, v23.4s
+	fmla	v26.4s, v7.4s, v11.4s
+	fmla	v27.4s, v7.4s, v15.4s
+	
+	faddp	v0.4s, v24.4s, v25.4s
+	faddp	v1.4s, v26.4s, v27.4s
+	faddp	v0.4s, v0.4s, v1.4s
+	fmul	v1.4s, v0.4s, v30.4s
+	fcmgt	v2.4s, v0.4s, v28.4s
+	fcmgt	v3.4s, v29.4s, v0.4s
+	fcvtns	v1.4s, v1.4s
+	add		v2.4s, v2.4s, v3.4s
+	add		v31.4s, v31.4s, v2.4s
+	st1		{v1.4s}, [x3], #16
+	
+	subs	w4, w4, #1
+	b.ne	2b
+	
+	AARCH64_DUP_2D(v0, v31, 1)
+	add		v0.4s, v0.4s, v31.4s
+	AARCH64_DUP_4S(v1, v0, 1)
+	add		v0.4s, v0.4s, v1.4s
+	umov	w0, v0.s[0]
+	neg		w0, w0
+	
+	ld1		{v12.2s,v13.2s,v14.2s,v15.2s}, [sp], #32
+	ld1		{v8.2s,v9.2s,v10.2s,v11.2s}, [sp], #32
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_neon_accurate.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_neon_accurate.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_neon_accurate.S	(working copy)
@@ -0,0 +1,272 @@
+/*
+	synth_stereo_neon_accurate: ARM NEON optimized synth (stereo specific, MPEG compliant 16-bit output version)
+
+	copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define WINDOW r0
+#define B0L r1
+#define B0R r2
+#define SAMPLES r3
+
+/*
+	int synth_1to1_s_neon_accurate_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+	.code 32
+#ifndef __APPLE__
+	.fpu neon
+#endif
+
+	.text
+	.globl ASM_NAME(synth_1to1_s_neon_accurate_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_s_neon_accurate_asm), %function
+#endif
+ASM_NAME(synth_1to1_s_neon_accurate_asm):
+	push		{r4-r7, lr}
+	vpush		{q4-q7}
+	ldr			r4, [sp, #84]
+	mov			r7, sp
+	sub			sp, sp, #16
+	bic			sp, #0xff
+	
+	add			WINDOW, WINDOW, #64
+	sub			WINDOW, WINDOW, r4, lsl #2
+
+	mov			r4, #4
+	mov			r5, #128
+	mov			r6, #64
+1:
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vmul.f32	q12, q0, q4
+	vmul.f32	q13, q0, q6
+	vmul.f32	q14, q2, q8
+	vmul.f32	q15, q2, q10
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	sub			WINDOW, WINDOW, #96
+	sub			B0L, B0L, #32
+	sub			B0R, B0R, #32
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]!
+	vld1.32		{q10,q11}, [B0R, :128]!
+	vmla.f32	q12, q0, q4
+	vmla.f32	q13, q0, q6
+	vmla.f32	q14, q2, q8
+	vmla.f32	q15, q2, q10
+	add			WINDOW, WINDOW, #96
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vpadd.f32	d26, d28, d29
+	vpadd.f32	d27, d30, d31
+	vmov.i32	q15, #0x4b000000
+	vmvn.i32	q14, #0xb9000000
+	vorr.i32	q15, #0x00400000
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vacgt.f32	q14, q12, q14
+	vadd.f32	q13, q12, q15
+	vld1.32		{q15}, [sp, :128]
+	vshr.u32	q14, q14, #31
+	vshl.i32	q13, q13, #10
+	vadd.i32	q14, q14, q15
+	vqshrn.s32	d26, q13, #10
+	vst1.32		{q14}, [sp, :128]
+	vst1.16		{d26}, [SAMPLES]!
+	vmul.f32	q12, q0, q4
+	vmul.f32	q13, q0, q6
+	vmul.f32	q14, q2, q8
+	vmul.f32	q15, q2, q10
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	sub			WINDOW, WINDOW, #96
+	sub			B0L, B0L, #32
+	sub			B0R, B0R, #32
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]!
+	vld1.32		{q10,q11}, [B0R, :128]!
+	vmla.f32	q12, q0, q4
+	vmla.f32	q13, q0, q6
+	vmla.f32	q14, q2, q8
+	vmla.f32	q15, q2, q10
+	add			WINDOW, WINDOW, #96
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vpadd.f32	d26, d28, d29
+	vpadd.f32	d27, d30, d31
+	vmov.i32	q15, #0x4b000000
+	vmvn.i32	q14, #0xb9000000
+	vorr.i32	q15, #0x00400000
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vacgt.f32	q14, q12, q14
+	vadd.f32	q13, q12, q15
+	vld1.32		{q15}, [sp, :128]
+	vshr.u32	q14, q14, #31
+	vshl.i32	q13, q13, #10
+	vadd.i32	q14, q14, q15
+	vqshrn.s32	d26, q13, #10
+	vst1.32		{q14}, [sp, :128]
+	vst1.16		{d26}, [SAMPLES]!
+
+	subs		r4, r4, #1
+	bne			1b
+
+	mov			r4, #4
+	mov			r6, #-64
+1:
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vmul.f32	q12, q0, q4
+	vmul.f32	q13, q0, q6
+	vmul.f32	q14, q2, q8
+	vmul.f32	q15, q2, q10
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	sub			WINDOW, WINDOW, #96
+	add			B0L, B0L, #96
+	add			B0R, B0R, #96
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vmla.f32	q12, q0, q4
+	vmla.f32	q13, q0, q6
+	vmla.f32	q14, q2, q8
+	vmla.f32	q15, q2, q10
+	add			WINDOW, WINDOW, #96
+	sub			B0L, B0L, #96
+	sub			B0R, B0R, #96
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vpadd.f32	d26, d28, d29
+	vpadd.f32	d27, d30, d31
+	vmov.i32	q15, #0x4b000000
+	vmvn.i32	q14, #0xb9000000
+	vorr.i32	q15, #0x00400000
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vacgt.f32	q14, q12, q14
+	vadd.f32	q13, q12, q15
+	vld1.32		{q15}, [sp, :128]
+	vshr.u32	q14, q14, #31
+	vshl.i32	q13, q13, #10
+	vadd.i32	q14, q14, q15
+	vqshrn.s32	d26, q13, #10
+	vst1.32		{q14}, [sp, :128]
+	vst1.16		{d26}, [SAMPLES]!
+	vmul.f32	q12, q0, q4
+	vmul.f32	q13, q0, q6
+	vmul.f32	q14, q2, q8
+	vmul.f32	q15, q2, q10
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	sub			WINDOW, WINDOW, #96
+	add			B0L, B0L, #96
+	add			B0R, B0R, #96
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vmla.f32	q12, q0, q4
+	vmla.f32	q13, q0, q6
+	vmla.f32	q14, q2, q8
+	vmla.f32	q15, q2, q10
+	add			WINDOW, WINDOW, #96
+	sub			B0L, B0L, #96
+	sub			B0R, B0R, #96
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vpadd.f32	d26, d28, d29
+	vpadd.f32	d27, d30, d31
+	vmov.i32	q15, #0x4b000000
+	vmvn.i32	q14, #0xb9000000
+	vorr.i32	q15, #0x00400000
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vacgt.f32	q14, q12, q14
+	vadd.f32	q13, q12, q15
+	vld1.32		{q15}, [sp, :128]
+	vshr.u32	q14, q14, #31
+	vshl.i32	q13, q13, #10
+	vadd.i32	q14, q14, q15
+	vqshrn.s32	d26, q13, #10
+	vst1.32		{q14}, [sp, :128]
+	vst1.16		{d26}, [SAMPLES]!
+
+	subs		r4, r4, #1
+	bne			1b
+
+	vld1.32		{q0}, [sp, :128]
+	vpadd.i32	d0, d0, d1
+	vpadd.i32	d0, d0, d0
+	vmov.32		r0, d0[0]
+
+	mov			sp, r7
+	vpop		{q4-q7}
+	pop			{r4-r7, pc}
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_neon_float.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_neon_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_neon_float.S	(working copy)
@@ -0,0 +1,230 @@
+/*
+	synth_stereo_neon_float: ARM NEON optimized synth (stereo specific, float output version)
+
+	copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define WINDOW r0
+#define B0L r1
+#define B0R r2
+#define SAMPLES r3
+
+/*
+	int synth_1to1_real_s_neon_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+	return value: number of clipped samples (0)
+*/
+
+	.code 32
+#ifndef __APPLE__
+	.fpu neon
+#endif
+
+	.text
+	.globl ASM_NAME(synth_1to1_real_s_neon_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_real_s_neon_asm), %function
+#endif
+	ALIGN4
+ASM_NAME(synth_1to1_real_s_neon_asm):
+	push		{r4-r6, lr}
+	vpush		{q4-q7}
+
+	ldr			r4, [sp, #80]
+	add			WINDOW, WINDOW, #64
+	sub			WINDOW, WINDOW, r4, lsl #2
+
+	mov			r4, #4
+	mov			r5, #128
+	mov			r6, #64
+1:
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vmul.f32	q12, q0, q4
+	vmul.f32	q13, q0, q6
+	vmul.f32	q14, q2, q8
+	vmul.f32	q15, q2, q10
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	sub			WINDOW, WINDOW, #96
+	sub			B0L, B0L, #32
+	sub			B0R, B0R, #32
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]!
+	vld1.32		{q10,q11}, [B0R, :128]!
+	vmla.f32	q12, q0, q4
+	vmla.f32	q13, q0, q6
+	vmla.f32	q14, q2, q8
+	vmla.f32	q15, q2, q10
+	add			WINDOW, WINDOW, #96
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vpadd.f32	d26, d28, d29
+	vpadd.f32	d27, d30, d31
+	vmov.i32	q14, #0x38000000
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vmul.f32	q15, q12, q14
+	vmul.f32	q12, q0, q4
+	vmul.f32	q13, q0, q6
+	vmul.f32	q14, q2, q8
+	vst1.32		{q15}, [SAMPLES]!
+	vmul.f32	q15, q2, q10
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	sub			WINDOW, WINDOW, #96
+	sub			B0L, B0L, #32
+	sub			B0R, B0R, #32
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]!
+	vld1.32		{q10,q11}, [B0R, :128]!
+	vmla.f32	q12, q0, q4
+	vmla.f32	q13, q0, q6
+	vmla.f32	q14, q2, q8
+	vmla.f32	q15, q2, q10
+	add			WINDOW, WINDOW, #96
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vpadd.f32	d26, d28, d29
+	vpadd.f32	d27, d30, d31
+	vmov.i32	q14, #0x38000000
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vmul.f32	q12, q12, q14
+	vst1.32		{q12}, [SAMPLES]!
+
+	subs		r4, r4, #1
+	bne			1b
+
+	mov			r4, #4
+	mov			r6, #-64
+1:
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vmul.f32	q12, q0, q4
+	vmul.f32	q13, q0, q6
+	vmul.f32	q14, q2, q8
+	vmul.f32	q15, q2, q10
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	sub			WINDOW, WINDOW, #96
+	add			B0L, B0L, #96
+	add			B0R, B0R, #96
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vmla.f32	q12, q0, q4
+	vmla.f32	q13, q0, q6
+	vmla.f32	q14, q2, q8
+	vmla.f32	q15, q2, q10
+	add			WINDOW, WINDOW, #96
+	sub			B0L, B0L, #96
+	sub			B0R, B0R, #96
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vpadd.f32	d26, d28, d29
+	vpadd.f32	d27, d30, d31
+	vmov.i32	q14, #0x38000000
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vmul.f32	q15, q12, q14
+	vmul.f32	q12, q0, q4
+	vmul.f32	q13, q0, q6
+	vmul.f32	q14, q2, q8
+	vst1.32		{q15}, [SAMPLES]!
+	vmul.f32	q15, q2, q10
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	sub			WINDOW, WINDOW, #96
+	add			B0L, B0L, #96
+	add			B0R, B0R, #96
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vmla.f32	q12, q0, q4
+	vmla.f32	q13, q0, q6
+	vmla.f32	q14, q2, q8
+	vmla.f32	q15, q2, q10
+	add			WINDOW, WINDOW, #96
+	sub			B0L, B0L, #96
+	sub			B0R, B0R, #96
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vpadd.f32	d26, d28, d29
+	vpadd.f32	d27, d30, d31
+	vmov.i32	q14, #0x38000000
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vmul.f32	q12, q12, q14
+	vst1.32		{q12}, [SAMPLES]!
+
+	subs		r4, r4, #1
+	bne			1b
+
+	mov			r0, #0
+
+	vpop		{q4-q7}
+	pop			{r4-r6, pc}
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_neon_s32.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_neon_s32.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_neon_s32.S	(working copy)
@@ -0,0 +1,257 @@
+/*
+	synth_stereo_neon_s32: ARM NEON optimized synth (stereo specific, 32-bit output version)
+
+	copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#define WINDOW r0
+#define B0L r1
+#define B0R r2
+#define SAMPLES r3
+
+/*
+	int synth_1to1_s32_s_neon_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+	.code 32
+#ifndef __APPLE__
+	.fpu neon
+#endif
+
+	.text
+	.globl ASM_NAME(synth_1to1_s32_s_neon_asm)
+#ifdef __ELF__
+	.type ASM_NAME(synth_1to1_s32_s_neon_asm), %function
+#endif
+	ALIGN4
+ASM_NAME(synth_1to1_s32_s_neon_asm):
+	push		{r4-r7, lr}
+	vpush		{q4-q7}
+	ldr			r4, [sp, #84]
+	mov			r7, sp
+	sub			sp, sp, #16
+	bic			sp, #0xff
+	
+	add			WINDOW, WINDOW, #64
+	sub			WINDOW, WINDOW, r4, lsl #2
+
+	mov			r4, #4
+	mov			r5, #128
+	mov			r6, #64
+1:
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vmul.f32	q12, q0, q4
+	vmul.f32	q13, q0, q6
+	vmul.f32	q14, q2, q8
+	vmul.f32	q15, q2, q10
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	sub			WINDOW, WINDOW, #96
+	sub			B0L, B0L, #32
+	sub			B0R, B0R, #32
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]!
+	vld1.32		{q10,q11}, [B0R, :128]!
+	vmla.f32	q12, q0, q4
+	vmla.f32	q13, q0, q6
+	vmla.f32	q14, q2, q8
+	vmla.f32	q15, q2, q10
+	add			WINDOW, WINDOW, #96
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vpadd.f32	d26, d28, d29
+	vpadd.f32	d27, d30, d31
+	vmvn.i32	q14, #0xb9000000
+	vld1.32		{q15}, [sp, :128]
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vacgt.f32	q14, q12, q14
+	vcvt.s32.f32	q13, q12, #16
+	vshr.u32	q14, q14, #31
+	vst1.32		{q13}, [SAMPLES]!
+	vadd.i32	q14, q14, q15
+	vst1.32		{q14}, [sp, :128]
+	vmul.f32	q12, q0, q4
+	vmul.f32	q13, q0, q6
+	vmul.f32	q14, q2, q8
+	vmul.f32	q15, q2, q10
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	sub			WINDOW, WINDOW, #96
+	sub			B0L, B0L, #32
+	sub			B0R, B0R, #32
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]!
+	vld1.32		{q10,q11}, [B0R, :128]!
+	vmla.f32	q12, q0, q4
+	vmla.f32	q13, q0, q6
+	vmla.f32	q14, q2, q8
+	vmla.f32	q15, q2, q10
+	add			WINDOW, WINDOW, #96
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vpadd.f32	d26, d28, d29
+	vpadd.f32	d27, d30, d31
+	vmvn.i32	q14, #0xb9000000
+	vld1.32		{q15}, [sp, :128]
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vacgt.f32	q14, q12, q14
+	vcvt.s32.f32	q13, q12, #16
+	vshr.u32	q14, q14, #31
+	vst1.32		{q13}, [SAMPLES]!
+	vadd.i32	q14, q14, q15
+	vst1.32		{q14}, [sp, :128]
+
+	subs		r4, r4, #1
+	bne			1b
+
+	mov			r4, #4
+	mov			r6, #-64
+1:
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vmul.f32	q12, q0, q4
+	vmul.f32	q13, q0, q6
+	vmul.f32	q14, q2, q8
+	vmul.f32	q15, q2, q10
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	sub			WINDOW, WINDOW, #96
+	add			B0L, B0L, #96
+	add			B0R, B0R, #96
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vmla.f32	q12, q0, q4
+	vmla.f32	q13, q0, q6
+	vmla.f32	q14, q2, q8
+	vmla.f32	q15, q2, q10
+	add			WINDOW, WINDOW, #96
+	sub			B0L, B0L, #96
+	sub			B0R, B0R, #96
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vpadd.f32	d26, d28, d29
+	vpadd.f32	d27, d30, d31
+	vmvn.i32	q14, #0xb9000000
+	vld1.32		{q15}, [sp, :128]
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vacgt.f32	q14, q12, q14
+	vcvt.s32.f32	q13, q12, #16
+	vshr.u32	q14, q14, #31
+	vst1.32		{q13}, [SAMPLES]!
+	vadd.i32	q14, q14, q15
+	vst1.32		{q14}, [sp, :128]
+	vmul.f32	q12, q0, q4
+	vmul.f32	q13, q0, q6
+	vmul.f32	q14, q2, q8
+	vmul.f32	q15, q2, q10
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	sub			WINDOW, WINDOW, #96
+	add			B0L, B0L, #96
+	add			B0R, B0R, #96
+	vld1.32		{q0,q1}, [WINDOW], r5
+	vld1.32		{q2,q3}, [WINDOW]
+	vld1.32		{q4,q5}, [B0L, :128], r6
+	vld1.32		{q6,q7}, [B0R, :128], r6
+	vld1.32		{q8,q9}, [B0L, :128]
+	vld1.32		{q10,q11}, [B0R, :128]
+	vmla.f32	q12, q0, q4
+	vmla.f32	q13, q0, q6
+	vmla.f32	q14, q2, q8
+	vmla.f32	q15, q2, q10
+	add			WINDOW, WINDOW, #96
+	sub			B0L, B0L, #96
+	sub			B0R, B0R, #96
+	vmla.f32	q12, q1, q5
+	vmla.f32	q13, q1, q7
+	vmla.f32	q14, q3, q9
+	vmla.f32	q15, q3, q11
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vpadd.f32	d26, d28, d29
+	vpadd.f32	d27, d30, d31
+	vmvn.i32	q14, #0xb9000000
+	vld1.32		{q15}, [sp, :128]
+	vpadd.f32	d24, d24, d25
+	vpadd.f32	d25, d26, d27
+	vacgt.f32	q14, q12, q14
+	vcvt.s32.f32	q13, q12, #16
+	vshr.u32	q14, q14, #31
+	vst1.32		{q13}, [SAMPLES]!
+	vadd.i32	q14, q14, q15
+	vst1.32		{q14}, [sp, :128]
+
+	subs		r4, r4, #1
+	bne			1b
+
+	vld1.32		{q0}, [sp, :128]
+	vpadd.i32	d0, d0, d1
+	vpadd.i32	d0, d0, d0
+	vmov.32		r0, d0[0]
+
+	mov			sp, r7
+	vpop		{q4-q7}
+	pop			{r4-r7, pc}
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_sse_accurate.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_sse_accurate.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_sse_accurate.S	(working copy)
@@ -0,0 +1,508 @@
+/*
+	synth_stereo_sse_accurate: SSE optimized synth (stereo specific, MPEG-compliant 16bit output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+/* real *window; */
+#define WINDOW %ebx
+/* real *b0l; */
+#define B0L %edx
+/* real *b0r; */
+#define B0R %esi
+/* real *samples; */
+#define SAMPLES %edi
+
+#define TEMP(n) (12+16*n)(%esp)
+#define MMREG_CLIP %mm7
+
+/*
+	int synth_1to1_s_sse_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(maxmin_s16):
+	.long   1191181824 /* 32767.0 */
+	.long   1191181824
+	.long   1191181824
+	.long   1191181824
+	.long   -956301312 /* -32768.0 */
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_s_sse_accurate_asm)
+ASM_NAME(synth_1to1_s_sse_accurate_asm):
+	pushl		%ebp
+	movl		%esp, %ebp
+	andl		$-16, %esp
+	subl		$128, %esp
+	pushl		%ebx
+	pushl		%esi
+	pushl		%edi
+	
+	pxor		MMREG_CLIP, MMREG_CLIP
+	
+	movl		8(%ebp), WINDOW
+	movl		12(%ebp), B0L
+	movl		16(%ebp), B0R
+	movl		20(%ebp), SAMPLES
+	movl		24(%ebp), %eax
+	shll		$2, %eax
+	
+	leal		64(WINDOW), WINDOW
+	subl		%eax, WINDOW
+
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(0)
+	movaps		%xmm4, TEMP(4)
+	
+	leal		128(WINDOW), WINDOW
+	leal		64(B0L), B0L
+	leal		64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(1)
+	movaps		%xmm4, TEMP(5)
+	
+	leal		128(WINDOW), WINDOW
+	leal		64(B0L), B0L
+	leal		64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(2)
+	movaps		%xmm4, TEMP(6)
+	
+	leal		128(WINDOW), WINDOW
+	leal		64(B0L), B0L
+	leal		64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, %xmm7
+	movaps		%xmm4, TEMP(7)
+	
+	leal		128(WINDOW), WINDOW
+	leal		64(B0L), B0L
+	leal		64(B0R), B0R
+	
+	movaps		TEMP(0), %xmm4
+	movaps		TEMP(1), %xmm5
+	movaps		TEMP(2), %xmm6
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	subps		%xmm6, %xmm4
+	subps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	movaps		%xmm0, %xmm2
+	
+	movaps		TEMP(4), %xmm4
+	movaps		TEMP(5), %xmm5
+	movaps		TEMP(6), %xmm6
+	movaps		TEMP(7), %xmm7
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm2, %xmm5
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	subps		%xmm6, %xmm4
+	subps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	
+	movaps		%xmm5, %xmm1
+	movaps		%xmm5, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm0, %xmm4
+	cmpnleps	ASM_NAME(maxmin_s16), %xmm1
+	cmpltps		ASM_NAME(maxmin_s16)+16, %xmm2
+	cmpnleps	ASM_NAME(maxmin_s16), %xmm3
+	cmpltps		ASM_NAME(maxmin_s16)+16, %xmm4
+	cvtps2pi	%xmm5, %mm0
+	cvtps2pi	%xmm0, %mm1
+	movhlps		%xmm5, %xmm5
+	movhlps		%xmm0, %xmm0
+	cvtps2pi	%xmm5, %mm2
+	cvtps2pi	%xmm0, %mm3
+	packssdw	%mm2, %mm0
+	packssdw	%mm3, %mm1
+	movq		%mm0, %mm2
+	punpcklwd	%mm1, %mm0
+	punpckhwd	%mm1, %mm2
+	movq		%mm0, (SAMPLES)
+	movq		%mm2, 8(SAMPLES)
+	
+	cvtps2pi	%xmm1, %mm2
+	cvtps2pi	%xmm3, %mm3
+	movhlps		%xmm1, %xmm1
+	movhlps		%xmm3, %xmm3
+	cvtps2pi	%xmm1, %mm4
+	cvtps2pi	%xmm3, %mm5
+	packssdw	%mm4, %mm2
+	packssdw	%mm5, %mm3
+	psrlw		$15, %mm2
+	psrlw		$15, %mm3
+	cvtps2pi	%xmm2, %mm0
+	cvtps2pi	%xmm4, %mm1
+	movhlps		%xmm2, %xmm2
+	movhlps		%xmm4, %xmm4
+	cvtps2pi	%xmm2, %mm4
+	cvtps2pi	%xmm4, %mm5
+	packssdw	%mm4, %mm0
+	packssdw	%mm5, %mm1
+	psrlw		$15, %mm0
+	psrlw		$15, %mm1
+	paddw		%mm3, %mm2
+	paddw		%mm1, %mm0
+	paddw		%mm2, %mm0
+	paddw		%mm0, MMREG_CLIP
+	
+	leal		16(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(0)
+	movaps		%xmm4, TEMP(4)
+	
+	leal		128(WINDOW), WINDOW
+	leal		-64(B0L), B0L
+	leal		-64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(1)
+	movaps		%xmm4, TEMP(5)
+	
+	leal		128(WINDOW), WINDOW
+	leal		-64(B0L), B0L
+	leal		-64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(2)
+	movaps		%xmm4, TEMP(6)
+	
+	leal		128(WINDOW), WINDOW
+	leal		-64(B0L), B0L
+	leal		-64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, %xmm7
+	movaps		%xmm4, TEMP(7)
+	
+	leal		128(WINDOW), WINDOW
+	leal		-64(B0L), B0L
+	leal		-64(B0R), B0R
+	
+	movaps		TEMP(0), %xmm4
+	movaps		TEMP(1), %xmm5
+	movaps		TEMP(2), %xmm6
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	addps		%xmm6, %xmm4
+	addps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	movaps		%xmm0, %xmm2
+	
+	movaps		TEMP(4), %xmm4
+	movaps		TEMP(5), %xmm5
+	movaps		TEMP(6), %xmm6
+	movaps		TEMP(7), %xmm7
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm2, %xmm5
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	addps		%xmm6, %xmm4
+	addps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	
+	movaps		%xmm5, %xmm1
+	movaps		%xmm5, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm0, %xmm4
+	cmpnleps	ASM_NAME(maxmin_s16), %xmm1
+	cmpltps		ASM_NAME(maxmin_s16)+16, %xmm2
+	cmpnleps	ASM_NAME(maxmin_s16), %xmm3
+	cmpltps		ASM_NAME(maxmin_s16)+16, %xmm4
+	cvtps2pi	%xmm5, %mm0
+	cvtps2pi	%xmm0, %mm1
+	movhlps		%xmm5, %xmm5
+	movhlps		%xmm0, %xmm0
+	cvtps2pi	%xmm5, %mm2
+	cvtps2pi	%xmm0, %mm3
+	packssdw	%mm2, %mm0
+	packssdw	%mm3, %mm1
+	movq		%mm0, %mm2
+	punpcklwd	%mm1, %mm0
+	punpckhwd	%mm1, %mm2
+	movq		%mm0, (SAMPLES)
+	movq		%mm2, 8(SAMPLES)
+	
+	cvtps2pi	%xmm1, %mm2
+	cvtps2pi	%xmm3, %mm3
+	movhlps		%xmm1, %xmm1
+	movhlps		%xmm3, %xmm3
+	cvtps2pi	%xmm1, %mm4
+	cvtps2pi	%xmm3, %mm5
+	packssdw	%mm4, %mm2
+	packssdw	%mm5, %mm3
+	psrlw		$15, %mm2
+	psrlw		$15, %mm3
+	cvtps2pi	%xmm2, %mm0
+	cvtps2pi	%xmm4, %mm1
+	movhlps		%xmm2, %xmm2
+	movhlps		%xmm4, %xmm4
+	cvtps2pi	%xmm2, %mm4
+	cvtps2pi	%xmm4, %mm5
+	packssdw	%mm4, %mm0
+	packssdw	%mm5, %mm1
+	psrlw		$15, %mm0
+	psrlw		$15, %mm1
+	paddw		%mm3, %mm2
+	paddw		%mm1, %mm0
+	paddw		%mm2, %mm0
+	paddw		%mm0, MMREG_CLIP
+	
+	leal		16(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	pshufw		$0xee, MMREG_CLIP, %mm0
+	paddw		MMREG_CLIP, %mm0
+	pshufw		$0x55, %mm0, %mm1
+	paddw		%mm1, %mm0
+	movd		%mm0, %eax
+	andl		$0xffff, %eax
+	
+	popl		%edi
+	popl		%esi
+	popl		%ebx
+	movl		%ebp, %esp
+	popl		%ebp
+	
+	emms
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_sse_float.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_sse_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_sse_float.S	(working copy)
@@ -0,0 +1,416 @@
+/*
+	synth_sse_float: SSE optimized synth (stereo specific, float output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+/* real *window; */
+#define WINDOW %ebx
+/* real *b0l; */
+#define B0L %edx
+/* real *b0r; */
+#define B0R %esi
+/* real *samples; */
+#define SAMPLES %edi
+
+#define TEMP(n) (12+16*n)(%esp)
+
+/*
+	int synth_1to1_real_s_sse_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+	return value: number of clipped samples (0)
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(scale_sse):
+	.long   939524096
+	.long   939524096
+	.long   939524096
+	.long   939524096
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_real_s_sse_asm)
+ASM_NAME(synth_1to1_real_s_sse_asm):
+	pushl		%ebp
+	movl		%esp, %ebp
+	andl		$-16, %esp
+	subl		$128, %esp
+	pushl		%ebx
+	pushl		%esi
+	pushl		%edi
+	
+	movl		8(%ebp), WINDOW
+	movl		12(%ebp), B0L
+	movl		16(%ebp), B0R
+	movl		20(%ebp), SAMPLES
+	movl		24(%ebp), %eax
+	shll		$2, %eax
+	
+	leal		64(WINDOW), WINDOW
+	subl		%eax, WINDOW
+
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(0)
+	movaps		%xmm4, TEMP(4)
+	
+	leal		128(WINDOW), WINDOW
+	leal		64(B0L), B0L
+	leal		64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(1)
+	movaps		%xmm4, TEMP(5)
+	
+	leal		128(WINDOW), WINDOW
+	leal		64(B0L), B0L
+	leal		64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(2)
+	movaps		%xmm4, TEMP(6)
+	
+	leal		128(WINDOW), WINDOW
+	leal		64(B0L), B0L
+	leal		64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, %xmm7
+	movaps		%xmm4, TEMP(7)
+	
+	leal		128(WINDOW), WINDOW
+	leal		64(B0L), B0L
+	leal		64(B0R), B0R
+	
+	movaps		TEMP(0), %xmm4
+	movaps		TEMP(1), %xmm5
+	movaps		TEMP(2), %xmm6
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	subps		%xmm6, %xmm4
+	subps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	movaps		%xmm0, %xmm2
+	
+	movaps		TEMP(4), %xmm4
+	movaps		TEMP(5), %xmm5
+	movaps		TEMP(6), %xmm6
+	movaps		TEMP(7), %xmm7
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm2, %xmm5
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	subps		%xmm6, %xmm4
+	subps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	
+	mulps		ASM_NAME(scale_sse), %xmm5
+	mulps		ASM_NAME(scale_sse), %xmm0
+	movaps		%xmm5, %xmm1
+	unpcklps	%xmm0, %xmm5
+	unpckhps	%xmm0, %xmm1
+	movups		%xmm5, (SAMPLES)
+	movups		%xmm1, 16(SAMPLES)
+	
+	leal		32(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(0)
+	movaps		%xmm4, TEMP(4)
+	
+	leal		128(WINDOW), WINDOW
+	leal		-64(B0L), B0L
+	leal		-64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(1)
+	movaps		%xmm4, TEMP(5)
+	
+	leal		128(WINDOW), WINDOW
+	leal		-64(B0L), B0L
+	leal		-64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(2)
+	movaps		%xmm4, TEMP(6)
+	
+	leal		128(WINDOW), WINDOW
+	leal		-64(B0L), B0L
+	leal		-64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, %xmm7
+	movaps		%xmm4, TEMP(7)
+	
+	leal		128(WINDOW), WINDOW
+	leal		-64(B0L), B0L
+	leal		-64(B0R), B0R
+	
+	movaps		TEMP(0), %xmm4
+	movaps		TEMP(1), %xmm5
+	movaps		TEMP(2), %xmm6
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	addps		%xmm6, %xmm4
+	addps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	movaps		%xmm0, %xmm2
+	
+	movaps		TEMP(4), %xmm4
+	movaps		TEMP(5), %xmm5
+	movaps		TEMP(6), %xmm6
+	movaps		TEMP(7), %xmm7
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm2, %xmm5
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	addps		%xmm6, %xmm4
+	addps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	
+	mulps		ASM_NAME(scale_sse), %xmm5
+	mulps		ASM_NAME(scale_sse), %xmm0
+	movaps		%xmm5, %xmm1
+	unpcklps	%xmm0, %xmm5
+	unpckhps	%xmm0, %xmm1
+	movups		%xmm5, (SAMPLES)
+	movups		%xmm1, 16(SAMPLES)
+	
+	leal		32(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	xorl		%eax, %eax
+	
+	popl		%edi
+	popl		%esi
+	popl		%ebx
+	movl		%ebp, %esp
+	popl		%ebp
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_sse_s32.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_sse_s32.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_sse_s32.S	(working copy)
@@ -0,0 +1,540 @@
+/*
+	synth_stereo_sse_s32: SSE optimized synth (stereo specific, s32 output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+/* real *window; */
+#define WINDOW %ebx
+/* real *b0l; */
+#define B0L %edx
+/* real *b0r; */
+#define B0R %esi
+/* real *samples; */
+#define SAMPLES %edi
+
+#define TEMP(n) (12+16*n)(%esp)
+#define MMREG_CLIP %mm7
+
+/*
+	int synth_1to1_s32_s_sse_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(scale_s32):
+	.long   1199570944 /* 65536.0 */
+	.long   1199570944
+	.long   1199570944
+	.long   1199570944
+	ALIGN16
+ASM_NAME(maxmin_s32):
+	.long   1191182335 /* 32767.999 */
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   -956301312 /* -32768.0 */
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_s32_s_sse_asm)
+ASM_NAME(synth_1to1_s32_s_sse_asm):
+	pushl		%ebp
+	movl		%esp, %ebp
+	andl		$-16, %esp
+	subl		$128, %esp
+	pushl		%ebx
+	pushl		%esi
+	pushl		%edi
+	
+	pxor		MMREG_CLIP, MMREG_CLIP
+	
+	movl		8(%ebp), WINDOW
+	movl		12(%ebp), B0L
+	movl		16(%ebp), B0R
+	movl		20(%ebp), SAMPLES
+	movl		24(%ebp), %eax
+	shll		$2, %eax
+	
+	leal		64(WINDOW), WINDOW
+	subl		%eax, WINDOW
+
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(0)
+	movaps		%xmm4, TEMP(4)
+	
+	leal		128(WINDOW), WINDOW
+	leal		64(B0L), B0L
+	leal		64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(1)
+	movaps		%xmm4, TEMP(5)
+	
+	leal		128(WINDOW), WINDOW
+	leal		64(B0L), B0L
+	leal		64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(2)
+	movaps		%xmm4, TEMP(6)
+	
+	leal		128(WINDOW), WINDOW
+	leal		64(B0L), B0L
+	leal		64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, %xmm7
+	movaps		%xmm4, TEMP(7)
+	
+	leal		128(WINDOW), WINDOW
+	leal		64(B0L), B0L
+	leal		64(B0R), B0R
+	
+	movaps		TEMP(0), %xmm4
+	movaps		TEMP(1), %xmm5
+	movaps		TEMP(2), %xmm6
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	subps		%xmm6, %xmm4
+	subps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	movaps		%xmm0, %xmm2
+	
+	movaps		TEMP(4), %xmm4
+	movaps		TEMP(5), %xmm5
+	movaps		TEMP(6), %xmm6
+	movaps		TEMP(7), %xmm7
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm2, %xmm5
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	subps		%xmm6, %xmm4
+	subps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	
+	movaps		%xmm5, %xmm1
+	movaps		%xmm5, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm0, %xmm4
+	mulps		ASM_NAME(scale_s32), %xmm5
+	mulps		ASM_NAME(scale_s32), %xmm0
+	cmpnleps	ASM_NAME(maxmin_s32), %xmm1
+	cmpltps		ASM_NAME(maxmin_s32)+16, %xmm2
+	cmpnleps	ASM_NAME(maxmin_s32), %xmm3
+	cmpltps		ASM_NAME(maxmin_s32)+16, %xmm4
+	cvtps2pi	%xmm5, %mm0
+	cvtps2pi	%xmm0, %mm1
+	cvtps2pi	%xmm1, %mm2
+	cvtps2pi	%xmm3, %mm3
+	psrad		$31, %mm2
+	psrad		$31, %mm3
+	pxor		%mm2, %mm0
+	pxor		%mm3, %mm1
+	movq		%mm0, %mm4
+	punpckldq	%mm1, %mm0
+	punpckhdq	%mm1, %mm4
+	movq		%mm0, (SAMPLES)
+	movq		%mm4, 8(SAMPLES)
+	movhlps		%xmm5, %xmm5
+	movhlps		%xmm0, %xmm0
+	movhlps		%xmm1, %xmm1
+	movhlps		%xmm3, %xmm3
+	cvtps2pi	%xmm5, %mm0
+	cvtps2pi	%xmm0, %mm1
+	cvtps2pi	%xmm1, %mm4
+	cvtps2pi	%xmm3, %mm5
+	psrad		$31, %mm4
+	psrad		$31, %mm5
+	pxor		%mm4, %mm0
+	pxor		%mm5, %mm1
+	movq		%mm0, %mm6
+	punpckldq	%mm1, %mm0
+	punpckhdq	%mm1, %mm6
+	movq		%mm0, 16(SAMPLES)
+	movq		%mm6, 24(SAMPLES)
+	
+	packssdw	%mm4, %mm2
+	packssdw	%mm5, %mm3
+	psrlw		$15, %mm2
+	psrlw		$15, %mm3
+	cvtps2pi	%xmm2, %mm0
+	cvtps2pi	%xmm4, %mm1
+	movhlps		%xmm2, %xmm2
+	movhlps		%xmm4, %xmm4
+	cvtps2pi	%xmm2, %mm4
+	cvtps2pi	%xmm4, %mm5
+	packssdw	%mm4, %mm0
+	packssdw	%mm5, %mm1
+	psrlw		$15, %mm0
+	psrlw		$15, %mm1
+	paddw		%mm3, %mm2
+	paddw		%mm1, %mm0
+	paddw		%mm2, %mm0
+	paddw		%mm0, MMREG_CLIP
+	
+	leal		32(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(0)
+	movaps		%xmm4, TEMP(4)
+	
+	leal		128(WINDOW), WINDOW
+	leal		-64(B0L), B0L
+	leal		-64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(1)
+	movaps		%xmm4, TEMP(5)
+	
+	leal		128(WINDOW), WINDOW
+	leal		-64(B0L), B0L
+	leal		-64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, TEMP(2)
+	movaps		%xmm4, TEMP(6)
+	
+	leal		128(WINDOW), WINDOW
+	leal		-64(B0L), B0L
+	leal		-64(B0R), B0R
+	
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movaps		%xmm0, %xmm4
+	movaps		%xmm1, %xmm5
+	movaps		%xmm2, %xmm6
+	movaps		%xmm3, %xmm7
+	mulps		0(B0L), %xmm0
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		0(B0R), %xmm4
+	mulps		16(B0R), %xmm5
+	mulps		32(B0R), %xmm6
+	mulps		48(B0R), %xmm7
+	addps		%xmm1, %xmm0
+	addps		%xmm3, %xmm2
+	addps		%xmm5, %xmm4
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm0
+	addps		%xmm6, %xmm4
+	movaps		%xmm0, %xmm7
+	movaps		%xmm4, TEMP(7)
+	
+	leal		128(WINDOW), WINDOW
+	leal		-64(B0L), B0L
+	leal		-64(B0R), B0R
+	
+	movaps		TEMP(0), %xmm4
+	movaps		TEMP(1), %xmm5
+	movaps		TEMP(2), %xmm6
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	addps		%xmm6, %xmm4
+	addps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	movaps		%xmm0, %xmm2
+	
+	movaps		TEMP(4), %xmm4
+	movaps		TEMP(5), %xmm5
+	movaps		TEMP(6), %xmm6
+	movaps		TEMP(7), %xmm7
+	movaps		%xmm4, %xmm0
+	movaps		%xmm6, %xmm1
+	unpcklps	%xmm5, %xmm4
+	unpcklps	%xmm7, %xmm6
+	unpckhps	%xmm5, %xmm0
+	unpckhps	%xmm7, %xmm1
+	movaps		%xmm2, %xmm5
+	movaps		%xmm4, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm6, %xmm4
+	movhlps		%xmm2, %xmm6
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	addps		%xmm6, %xmm4
+	addps		%xmm1, %xmm0
+	addps		%xmm4, %xmm0
+	
+	movaps		%xmm5, %xmm1
+	movaps		%xmm5, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm0, %xmm4
+	mulps		ASM_NAME(scale_s32), %xmm5
+	mulps		ASM_NAME(scale_s32), %xmm0
+	cmpnleps	ASM_NAME(maxmin_s32), %xmm1
+	cmpltps		ASM_NAME(maxmin_s32)+16, %xmm2
+	cmpnleps	ASM_NAME(maxmin_s32), %xmm3
+	cmpltps		ASM_NAME(maxmin_s32)+16, %xmm4
+	cvtps2pi	%xmm5, %mm0
+	cvtps2pi	%xmm0, %mm1
+	cvtps2pi	%xmm1, %mm2
+	cvtps2pi	%xmm3, %mm3
+	psrad		$31, %mm2
+	psrad		$31, %mm3
+	pxor		%mm2, %mm0
+	pxor		%mm3, %mm1
+	movq		%mm0, %mm4
+	punpckldq	%mm1, %mm0
+	punpckhdq	%mm1, %mm4
+	movq		%mm0, (SAMPLES)
+	movq		%mm4, 8(SAMPLES)
+	movhlps		%xmm5, %xmm5
+	movhlps		%xmm0, %xmm0
+	movhlps		%xmm1, %xmm1
+	movhlps		%xmm3, %xmm3
+	cvtps2pi	%xmm5, %mm0
+	cvtps2pi	%xmm0, %mm1
+	cvtps2pi	%xmm1, %mm4
+	cvtps2pi	%xmm3, %mm5
+	psrad		$31, %mm4
+	psrad		$31, %mm5
+	pxor		%mm4, %mm0
+	pxor		%mm5, %mm1
+	movq		%mm0, %mm6
+	punpckldq	%mm1, %mm0
+	punpckhdq	%mm1, %mm6
+	movq		%mm0, 16(SAMPLES)
+	movq		%mm6, 24(SAMPLES)
+	
+	packssdw	%mm4, %mm2
+	packssdw	%mm5, %mm3
+	psrlw		$15, %mm2
+	psrlw		$15, %mm3
+	cvtps2pi	%xmm2, %mm0
+	cvtps2pi	%xmm4, %mm1
+	movhlps		%xmm2, %xmm2
+	movhlps		%xmm4, %xmm4
+	cvtps2pi	%xmm2, %mm4
+	cvtps2pi	%xmm4, %mm5
+	packssdw	%mm4, %mm0
+	packssdw	%mm5, %mm1
+	psrlw		$15, %mm0
+	psrlw		$15, %mm1
+	paddw		%mm3, %mm2
+	paddw		%mm1, %mm0
+	paddw		%mm2, %mm0
+	paddw		%mm0, MMREG_CLIP
+	
+	leal		32(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	pshufw		$0xee, MMREG_CLIP, %mm0
+	paddw		MMREG_CLIP, %mm0
+	pshufw		$0x55, %mm0, %mm1
+	paddw		%mm1, %mm0
+	movd		%mm0, %eax
+	andl		$0xffff, %eax
+	
+	popl		%edi
+	popl		%esi
+	popl		%ebx
+	movl		%ebp, %esp
+	popl		%ebp
+	
+	emms
+	
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_x86_64.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_x86_64.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_x86_64.S	(working copy)
@@ -0,0 +1,335 @@
+/*
+	synth_stereo_x86_64: SSE optimized synth for x86-64 (stereo specific version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* short *window; */
+#define WINDOW %r10
+/* short *b0l; */
+#define B0L %rdx
+/* short *b0r; */
+#define B0R %r8
+/* short *samples; */
+#define SAMPLES %r9
+#else
+/* short *window; */
+#define WINDOW %rdi
+/* short *b0l; */
+#define B0L %rsi
+/* short *b0r; */
+#define B0R %rdx
+/* short *samples; */
+#define SAMPLES %r9
+#endif
+
+#define XMMREG_CLIP %xmm15
+#define XMMREG_MAX %xmm14 /* {32767, 32767, 32767, 32767} */
+#define XMMREG_MIN %xmm13 /* {-32769, -32769, -32769, -32769} : not -32768 because SSE doesn't have "less than" comparison... */
+#define XMMREG_FULL %xmm12 /* {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} */
+
+/*
+	int synth_1to1_s_x86_64_asm(short *window, short *b0l, short *b0r, short *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(maxmin_x86_64):
+	.long   32767
+	.long   32767
+	.long   32767
+	.long   32767
+	.long   -32769
+	.long   -32769
+	.long   -32769
+	.long   -32769
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_s_x86_64_asm)
+ASM_NAME(synth_1to1_s_x86_64_asm):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	movl		40(%rsp), %eax /* 5th argument; placed after 32-byte shadow space */
+	subq		$168, %rsp /* stack alignment + 10 xmm registers */
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	movaps		%xmm13, 112(%rsp)
+	movaps		%xmm14, 128(%rsp)
+	movaps		%xmm15, 144(%rsp)
+#endif
+	
+#ifdef IS_MSABI
+	shlq		$32, %rax
+	shrq		$31, %rax
+	movq		%rcx, %r10
+#else
+	movq		%r8, %rax
+	shlq		$32, %rax
+	shrq		$31, %rax
+	movq		%rcx, %r9
+#endif
+	leaq		32(WINDOW), WINDOW
+	subq		%rax, WINDOW
+	
+	leaq		ASM_NAME(maxmin_x86_64)(%rip), %rax
+	movaps		(%rax), XMMREG_MAX
+	movaps		16(%rax), XMMREG_MIN
+	pxor		XMMREG_CLIP, XMMREG_CLIP
+	pcmpeqd		XMMREG_FULL, XMMREG_FULL
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		64(WINDOW), %xmm2
+	movups		80(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm4
+	movups		144(WINDOW), %xmm5
+	movups		192(WINDOW), %xmm6
+	movups		208(WINDOW), %xmm7
+	movaps		%xmm0, %xmm8
+	movaps		%xmm1, %xmm9
+	movaps		%xmm2, %xmm10
+	movaps		%xmm3, %xmm11
+	pmaddwd		(B0L), %xmm0
+	pmaddwd		16(B0L), %xmm1
+	pmaddwd		32(B0L), %xmm2
+	pmaddwd		48(B0L), %xmm3
+	pmaddwd		(B0R), %xmm8
+	pmaddwd		16(B0R), %xmm9
+	pmaddwd		32(B0R), %xmm10
+	pmaddwd		48(B0R), %xmm11
+	paddd		%xmm1, %xmm0
+	paddd		%xmm3, %xmm2
+	paddd		%xmm9, %xmm8
+	paddd		%xmm11, %xmm10
+	movaps		%xmm4, %xmm1
+	movaps		%xmm5, %xmm9
+	movaps		%xmm6, %xmm3
+	movaps		%xmm7, %xmm11
+	pmaddwd		64(B0L), %xmm4
+	pmaddwd		80(B0L), %xmm5
+	pmaddwd		96(B0L), %xmm6
+	pmaddwd		112(B0L), %xmm7
+	pmaddwd		64(B0R), %xmm1
+	pmaddwd		80(B0R), %xmm9
+	pmaddwd		96(B0R), %xmm3
+	pmaddwd		112(B0R), %xmm11
+	paddd		%xmm5, %xmm4
+	paddd		%xmm7, %xmm6
+	paddd		%xmm1, %xmm9
+	paddd		%xmm3, %xmm11
+	
+	movaps		%xmm0, %xmm1
+	movaps		%xmm4, %xmm3
+	movaps		%xmm8, %xmm5
+	movaps		%xmm9, %xmm7
+	punpckldq	%xmm2, %xmm0
+	punpckldq	%xmm6, %xmm4
+	punpckhdq	%xmm2, %xmm1
+	punpckhdq	%xmm6, %xmm3
+	punpckldq	%xmm10, %xmm8
+	punpckldq	%xmm11, %xmm9
+	punpckhdq	%xmm10, %xmm5
+	punpckhdq	%xmm11, %xmm7
+	movaps		%xmm0, %xmm2
+	movaps		%xmm1, %xmm6
+	movaps		%xmm8, %xmm10
+	movaps		%xmm5, %xmm11
+	movlhps		%xmm4, %xmm0
+	movhlps		%xmm2, %xmm4
+	movlhps		%xmm3, %xmm1
+	movhlps		%xmm6, %xmm3
+	movlhps		%xmm9, %xmm8
+	movhlps		%xmm10, %xmm9
+	movlhps		%xmm7, %xmm5
+	movhlps		%xmm11, %xmm7
+	paddd		%xmm4, %xmm0
+	paddd		%xmm3, %xmm1
+	paddd		%xmm9, %xmm8
+	paddd		%xmm7, %xmm5
+	paddd		%xmm1, %xmm0
+	paddd		%xmm5, %xmm8
+	psrad		$13, %xmm0
+	psrad		$13, %xmm8
+	
+	movaps		%xmm0, %xmm1
+	movaps		%xmm0, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm8, %xmm4
+	punpckldq	%xmm8, %xmm0
+	punpckhdq	%xmm8, %xmm1
+	packssdw	%xmm1, %xmm0
+	movups		%xmm0, (SAMPLES)
+	
+	pcmpgtd		XMMREG_MAX, %xmm2
+	pcmpgtd		XMMREG_MIN, %xmm3
+	pcmpgtd		XMMREG_MAX, %xmm4
+	pcmpgtd		XMMREG_MIN, %xmm8
+	packssdw	%xmm4, %xmm2
+	packssdw	%xmm8, %xmm3
+	pxor		XMMREG_FULL, %xmm3
+	psrlw		$15, %xmm2
+	psrlw		$15, %xmm3
+	paddw		%xmm3, %xmm2
+	paddw		%xmm2, XMMREG_CLIP
+	
+	leaq		256(WINDOW), WINDOW
+	leaq		128(B0L), B0L
+	leaq		128(B0R), B0R
+	leaq		16(SAMPLES), SAMPLES
+	
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm0
+	movups		16(WINDOW), %xmm1
+	movups		64(WINDOW), %xmm2
+	movups		80(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm4
+	movups		144(WINDOW), %xmm5
+	movups		192(WINDOW), %xmm6
+	movups		208(WINDOW), %xmm7
+	movaps		%xmm0, %xmm8
+	movaps		%xmm1, %xmm9
+	movaps		%xmm2, %xmm10
+	movaps		%xmm3, %xmm11
+	pmaddwd		(B0L), %xmm0
+	pmaddwd		16(B0L), %xmm1
+	pmaddwd		-32(B0L), %xmm2
+	pmaddwd		-16(B0L), %xmm3
+	pmaddwd		(B0R), %xmm8
+	pmaddwd		16(B0R), %xmm9
+	pmaddwd		-32(B0R), %xmm10
+	pmaddwd		-16(B0R), %xmm11
+	paddd		%xmm1, %xmm0
+	paddd		%xmm3, %xmm2
+	paddd		%xmm9, %xmm8
+	paddd		%xmm11, %xmm10
+	movaps		%xmm4, %xmm1
+	movaps		%xmm5, %xmm9
+	movaps		%xmm6, %xmm3
+	movaps		%xmm7, %xmm11
+	pmaddwd		-64(B0L), %xmm4
+	pmaddwd		-48(B0L), %xmm5
+	pmaddwd		-96(B0L), %xmm6
+	pmaddwd		-80(B0L), %xmm7
+	pmaddwd		-64(B0R), %xmm1
+	pmaddwd		-48(B0R), %xmm9
+	pmaddwd		-96(B0R), %xmm3
+	pmaddwd		-80(B0R), %xmm11
+	paddd		%xmm5, %xmm4
+	paddd		%xmm7, %xmm6
+	paddd		%xmm1, %xmm9
+	paddd		%xmm3, %xmm11
+	
+	movaps		%xmm0, %xmm1
+	movaps		%xmm4, %xmm3
+	movaps		%xmm8, %xmm5
+	movaps		%xmm9, %xmm7
+	punpckldq	%xmm2, %xmm0
+	punpckldq	%xmm6, %xmm4
+	punpckhdq	%xmm2, %xmm1
+	punpckhdq	%xmm6, %xmm3
+	punpckldq	%xmm10, %xmm8
+	punpckldq	%xmm11, %xmm9
+	punpckhdq	%xmm10, %xmm5
+	punpckhdq	%xmm11, %xmm7
+	movaps		%xmm0, %xmm2
+	movaps		%xmm1, %xmm6
+	movaps		%xmm8, %xmm10
+	movaps		%xmm5, %xmm11
+	movlhps		%xmm4, %xmm0
+	movhlps		%xmm2, %xmm4
+	movlhps		%xmm3, %xmm1
+	movhlps		%xmm6, %xmm3
+	movlhps		%xmm9, %xmm8
+	movhlps		%xmm10, %xmm9
+	movlhps		%xmm7, %xmm5
+	movhlps		%xmm11, %xmm7
+	paddd		%xmm4, %xmm0
+	paddd		%xmm3, %xmm1
+	paddd		%xmm9, %xmm8
+	paddd		%xmm7, %xmm5
+	paddd		%xmm1, %xmm0
+	paddd		%xmm5, %xmm8
+	psrad		$13, %xmm0
+	psrad		$13, %xmm8
+	
+	movaps		%xmm0, %xmm1
+	movaps		%xmm0, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm8, %xmm4
+	punpckldq	%xmm8, %xmm0
+	punpckhdq	%xmm8, %xmm1
+	packssdw	%xmm1, %xmm0
+	movups		%xmm0, (SAMPLES)
+	
+	pcmpgtd		XMMREG_MAX, %xmm2
+	pcmpgtd		XMMREG_MIN, %xmm3
+	pcmpgtd		XMMREG_MAX, %xmm4
+	pcmpgtd		XMMREG_MIN, %xmm8
+	packssdw	%xmm4, %xmm2
+	packssdw	%xmm8, %xmm3
+	pxor		XMMREG_FULL, %xmm3
+	psrlw		$15, %xmm2
+	psrlw		$15, %xmm3
+	paddw		%xmm3, %xmm2
+	paddw		%xmm2, XMMREG_CLIP
+	
+	leaq		256(WINDOW), WINDOW
+	leaq		-128(B0L), B0L
+	leaq		-128(B0R), B0R
+	leaq		16(SAMPLES), SAMPLES
+	
+	decl		%ecx
+	jnz			1b
+	
+	movhlps		XMMREG_CLIP, %xmm0
+	paddw		XMMREG_CLIP, %xmm0
+	pshuflw		$0x55, %xmm0, %xmm1
+	pshuflw		$0xaa, %xmm0, %xmm2
+	pshuflw		$0xff, %xmm0, %xmm3
+	paddw		%xmm1, %xmm0
+	paddw		%xmm2, %xmm0
+	paddw		%xmm3, %xmm0
+	
+	movd		%xmm0, %eax
+	andl		$0xffff, %eax
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	movaps		144(%rsp), %xmm15
+	addq		$168, %rsp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_x86_64_accurate.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_x86_64_accurate.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_x86_64_accurate.S	(working copy)
@@ -0,0 +1,454 @@
+/*
+	synth_stereo_x86_64_accurate: SSE optimized synth for x86-64 (stereo specific, MPEG-compliant 16bit output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* short *window; */
+#define WINDOW %rsi
+/* short *b0l; */
+#define B0L %rdx
+/* short *b0r; */
+#define B0R %r8
+/* short *samples; */
+#define SAMPLES %r9
+#else
+/* real *window; */
+#define WINDOW %rdi
+/* real *b0l; */
+#define B0L %rsi
+/* real *b0r; */
+#define B0R %rdx
+/* real *samples; */
+#define SAMPLES %r8
+#endif
+
+#define XMMREG_MAX (%r10)  /* {32767.0, 32767.0, 32767.0, 32767.0} */
+#define XMMREG_MIN (%r11)  /* {-32768.0, -32768.0, -32768.0, -32768.0} */
+#define TEMP_CLIP (%rsp)
+
+/*
+	int synth_1to1_s_x86_64_accurate_asm(real *window, real *b0l, real *b0r, short *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(maxmin_s16):
+	.long   1191181824
+	.long   1191181824
+	.long   1191181824
+	.long   1191181824
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_s_x86_64_accurate_asm)
+ASM_NAME(synth_1to1_s_x86_64_accurate_asm):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	movl		40(%rsp), %eax /* 5th argument; placed after 32-byte shadow space */
+	pushq		%rsi
+	subq		$176, %rsp /* 10 xmm registers + temp */
+	movaps		%xmm6, 16(%rsp)
+	movaps		%xmm7, 32(%rsp)
+	movaps		%xmm8, 48(%rsp)
+	movaps		%xmm9, 64(%rsp)
+	movaps		%xmm10, 80(%rsp)
+	movaps		%xmm11, 96(%rsp)
+	movaps		%xmm12, 112(%rsp)
+	movaps		%xmm13, 128(%rsp)
+	movaps		%xmm14, 144(%rsp)
+	movaps		%xmm15, 160(%rsp)
+#else
+	subq		$24, %rsp /* stack alignment + temp */
+#endif
+	
+	leaq		ASM_NAME(maxmin_s16)(%rip), %r10
+	leaq		16(%r10), %r11
+	xorps		%xmm0, %xmm0
+	movaps		%xmm0, TEMP_CLIP
+	
+#ifdef IS_MSABI
+	shlq		$32, %rax
+	shrq		$30, %rax
+	movq		%rcx, %rbx
+#else
+	movq		%r8, %rax
+	shlq		$32, %rax
+	shrq		$30, %rax
+	movq		%rcx, %r8
+#endif
+	leaq		64(WINDOW), WINDOW
+	subq		%rax, WINDOW
+
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm8
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm9
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	movaps		%xmm8, %xmm0
+	movaps		%xmm1, %xmm4
+	movaps		%xmm2, %xmm10
+	movaps		%xmm3, %xmm11
+	movaps		%xmm9, %xmm12
+	movaps		%xmm5, %xmm13
+	movaps		%xmm6, %xmm14
+	movaps		%xmm7, %xmm15
+	mulps		(B0L), %xmm8
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		64(B0L), %xmm9
+	mulps		80(B0L), %xmm5
+	mulps		96(B0L), %xmm6
+	mulps		112(B0L), %xmm7
+	mulps		(B0R), %xmm0
+	mulps		16(B0R), %xmm4
+	mulps		32(B0R), %xmm10
+	mulps		48(B0R), %xmm11
+	mulps		64(B0R), %xmm12
+	mulps		80(B0R), %xmm13
+	mulps		96(B0R), %xmm14
+	mulps		112(B0R), %xmm15
+	
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm0
+	addps		%xmm11, %xmm10
+	addps		%xmm5, %xmm9
+	addps		%xmm7, %xmm6
+	addps		%xmm13, %xmm12
+	addps		%xmm15, %xmm14
+	addps		%xmm3, %xmm8
+	addps		%xmm6, %xmm9
+	addps		%xmm10, %xmm0
+	addps		%xmm12, %xmm14
+	movaps		%xmm0, %xmm12
+	movaps		%xmm14, %xmm13
+	leaq		256(WINDOW), WINDOW
+	leaq		128(B0L), B0L
+	leaq		128(B0R), B0R
+	
+	movups		(WINDOW), %xmm10
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm11
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	movaps		%xmm10, %xmm0
+	movaps		%xmm1, %xmm4
+	movaps		%xmm2, %xmm14
+	movaps		%xmm3, %xmm15
+	mulps		(B0L), %xmm10
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		(B0R), %xmm0
+	mulps		16(B0R), %xmm4
+	mulps		32(B0R), %xmm14
+	mulps		48(B0R), %xmm15
+	addps		%xmm1, %xmm10
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm0
+	addps		%xmm15, %xmm14
+	movaps		%xmm11, %xmm1
+	movaps		%xmm5, %xmm2
+	movaps		%xmm6, %xmm4
+	movaps		%xmm7, %xmm15
+	mulps		64(B0L), %xmm11
+	mulps		80(B0L), %xmm5
+	mulps		96(B0L), %xmm6
+	mulps		112(B0L), %xmm7
+	mulps		64(B0R), %xmm1
+	mulps		80(B0R), %xmm2
+	mulps		96(B0R), %xmm4
+	mulps		112(B0R), %xmm15
+	addps		%xmm5, %xmm11
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm1
+	addps		%xmm15, %xmm4
+	
+	addps		%xmm3, %xmm10
+	addps		%xmm6, %xmm11
+	addps		%xmm0, %xmm14
+	addps		%xmm4, %xmm1
+	movaps		%xmm1, %xmm15
+	leaq		256(WINDOW), WINDOW
+	leaq		128(B0L), B0L
+	leaq		128(B0R), B0R
+	
+	movaps		%xmm8, %xmm0
+	movaps		%xmm10, %xmm1
+	movaps		%xmm12, %xmm4
+	movaps		%xmm14, %xmm5
+	unpcklps	%xmm9, %xmm8
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm9, %xmm0
+	unpckhps	%xmm11, %xmm1
+	unpcklps	%xmm13, %xmm12
+	unpcklps	%xmm15, %xmm14
+	unpckhps	%xmm13, %xmm4
+	unpckhps	%xmm15, %xmm5
+	movaps		%xmm8, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm12, %xmm6
+	movaps		%xmm4, %xmm7
+	movlhps		%xmm10, %xmm8
+	movhlps		%xmm2, %xmm10
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	movlhps		%xmm14, %xmm12
+	movhlps		%xmm6, %xmm14
+	movlhps		%xmm5, %xmm4
+	movhlps		%xmm7, %xmm5
+	subps		%xmm10, %xmm8
+	subps		%xmm1, %xmm0
+	subps		%xmm14, %xmm12
+	subps		%xmm5, %xmm4
+	addps		%xmm8, %xmm0
+	addps		%xmm12, %xmm4
+	
+	movaps		%xmm0, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm4, %xmm5
+	movaps		%xmm4, %xmm6
+	cmpnleps	XMMREG_MAX, %xmm2
+	cmpltps		XMMREG_MIN, %xmm3
+	cmpnleps	XMMREG_MAX, %xmm5
+	cmpltps		XMMREG_MIN, %xmm6
+	cvtps2dq	%xmm0, %xmm0
+	cvtps2dq	%xmm4, %xmm4
+	movaps		%xmm0, %xmm1
+	unpcklps	%xmm4, %xmm0
+	unpckhps	%xmm4, %xmm1
+	packssdw	%xmm1, %xmm0
+	movups		%xmm0, (SAMPLES)
+	
+	packssdw	%xmm5, %xmm2
+	packssdw	%xmm6, %xmm3
+	psrlw		$15, %xmm2
+	psrlw		$15, %xmm3
+	paddw		%xmm3, %xmm2
+	paddw		TEMP_CLIP, %xmm2
+	movaps		%xmm2, TEMP_CLIP
+	
+	leaq		16(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm8
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm9
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	movaps		%xmm8, %xmm0
+	movaps		%xmm1, %xmm4
+	movaps		%xmm2, %xmm10
+	movaps		%xmm3, %xmm11
+	movaps		%xmm9, %xmm12
+	movaps		%xmm5, %xmm13
+	movaps		%xmm6, %xmm14
+	movaps		%xmm7, %xmm15
+	mulps		(B0L), %xmm8
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		-64(B0L), %xmm9
+	mulps		-48(B0L), %xmm5
+	mulps		-32(B0L), %xmm6
+	mulps		-16(B0L), %xmm7
+	mulps		(B0R), %xmm0
+	mulps		16(B0R), %xmm4
+	mulps		32(B0R), %xmm10
+	mulps		48(B0R), %xmm11
+	mulps		-64(B0R), %xmm12
+	mulps		-48(B0R), %xmm13
+	mulps		-32(B0R), %xmm14
+	mulps		-16(B0R), %xmm15
+	
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm0
+	addps		%xmm11, %xmm10
+	addps		%xmm5, %xmm9
+	addps		%xmm7, %xmm6
+	addps		%xmm13, %xmm12
+	addps		%xmm15, %xmm14
+	addps		%xmm3, %xmm8
+	addps		%xmm6, %xmm9
+	addps		%xmm10, %xmm0
+	addps		%xmm12, %xmm14
+	movaps		%xmm0, %xmm12
+	movaps		%xmm14, %xmm13
+	leaq		256(WINDOW), WINDOW
+	leaq		-128(B0L), B0L
+	leaq		-128(B0R), B0R
+	
+	movups		(WINDOW), %xmm10
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm11
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	movaps		%xmm10, %xmm0
+	movaps		%xmm1, %xmm4
+	movaps		%xmm2, %xmm14
+	movaps		%xmm3, %xmm15
+	mulps		(B0L), %xmm10
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		(B0R), %xmm0
+	mulps		16(B0R), %xmm4
+	mulps		32(B0R), %xmm14
+	mulps		48(B0R), %xmm15
+	addps		%xmm1, %xmm10
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm0
+	addps		%xmm15, %xmm14
+	movaps		%xmm11, %xmm1
+	movaps		%xmm5, %xmm2
+	movaps		%xmm6, %xmm4
+	movaps		%xmm7, %xmm15
+	mulps		-64(B0L), %xmm11
+	mulps		-48(B0L), %xmm5
+	mulps		-32(B0L), %xmm6
+	mulps		-16(B0L), %xmm7
+	mulps		-64(B0R), %xmm1
+	mulps		-48(B0R), %xmm2
+	mulps		-32(B0R), %xmm4
+	mulps		-16(B0R), %xmm15
+	addps		%xmm5, %xmm11
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm1
+	addps		%xmm15, %xmm4
+	
+	addps		%xmm3, %xmm10
+	addps		%xmm6, %xmm11
+	addps		%xmm0, %xmm14
+	addps		%xmm4, %xmm1
+	movaps		%xmm1, %xmm15
+	leaq		256(WINDOW), WINDOW
+	leaq		-128(B0L), B0L
+	leaq		-128(B0R), B0R
+	
+	movaps		%xmm8, %xmm0
+	movaps		%xmm10, %xmm1
+	movaps		%xmm12, %xmm4
+	movaps		%xmm14, %xmm5
+	unpcklps	%xmm9, %xmm8
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm9, %xmm0
+	unpckhps	%xmm11, %xmm1
+	unpcklps	%xmm13, %xmm12
+	unpcklps	%xmm15, %xmm14
+	unpckhps	%xmm13, %xmm4
+	unpckhps	%xmm15, %xmm5
+	movaps		%xmm8, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm12, %xmm6
+	movaps		%xmm4, %xmm7
+	movlhps		%xmm10, %xmm8
+	movhlps		%xmm2, %xmm10
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	movlhps		%xmm14, %xmm12
+	movhlps		%xmm6, %xmm14
+	movlhps		%xmm5, %xmm4
+	movhlps		%xmm7, %xmm5
+	addps		%xmm10, %xmm8
+	addps		%xmm1, %xmm0
+	addps		%xmm14, %xmm12
+	addps		%xmm5, %xmm4
+	addps		%xmm8, %xmm0
+	addps		%xmm12, %xmm4
+	
+	movaps		%xmm0, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm4, %xmm5
+	movaps		%xmm4, %xmm6
+	cmpnleps	XMMREG_MAX, %xmm2
+	cmpltps		XMMREG_MIN, %xmm3
+	cmpnleps	XMMREG_MAX, %xmm5
+	cmpltps		XMMREG_MIN, %xmm6
+	cvtps2dq	%xmm0, %xmm0
+	cvtps2dq	%xmm4, %xmm4
+	movaps		%xmm0, %xmm1
+	unpcklps	%xmm4, %xmm0
+	unpckhps	%xmm4, %xmm1
+	packssdw	%xmm1, %xmm0
+	movups		%xmm0, (SAMPLES)
+	
+	packssdw	%xmm5, %xmm2
+	packssdw	%xmm6, %xmm3
+	psrlw		$15, %xmm2
+	psrlw		$15, %xmm3
+	paddw		%xmm3, %xmm2
+	paddw		TEMP_CLIP, %xmm2
+	movaps		%xmm2, TEMP_CLIP
+	
+	leaq		16(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	movaps		TEMP_CLIP, %xmm4
+	movhlps		%xmm4, %xmm0
+	paddw		%xmm4, %xmm0
+	pshuflw		$0x55, %xmm0, %xmm1
+	pshuflw		$0xaa, %xmm0, %xmm2
+	pshuflw		$0xff, %xmm0, %xmm3
+	paddw		%xmm1, %xmm0
+	paddw		%xmm2, %xmm0
+	paddw		%xmm3, %xmm0
+	
+	movd		%xmm0, %eax
+	andl		$0xffff, %eax
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	movaps		144(%rsp), %xmm15
+	addq		$176, %rsp
+	popq		%rsi
+#else
+	addq		$24, %rsp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_x86_64_float.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_x86_64_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_x86_64_float.S	(working copy)
@@ -0,0 +1,396 @@
+/*
+	synth_stereo_x86_64_float: SSE optimized synth for x86-64 (stereo specific, float output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* short *window; */
+#define WINDOW %r10
+/* short *b0l; */
+#define B0L %rdx
+/* short *b0r; */
+#define B0R %r8
+/* short *samples; */
+#define SAMPLES %r9
+#else
+/* real *window; */
+#define WINDOW %rdi
+/* real *b0l; */
+#define B0L %rsi
+/* real *b0r; */
+#define B0R %rdx
+/* real *samples; */
+#define SAMPLES %r9
+#endif
+
+#define XMMREG_SCALE (%r11)  /* {1/32768.0, 1/32768.0, 1/32768.0, 1/32768.0} */
+
+/*
+	int synth_1to1_real_s_x86_64_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
+	return value: number of clipped samples (0)
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(scale_x86_64):
+	.long   939524096
+	.long   939524096
+	.long   939524096
+	.long   939524096
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_real_s_x86_64_asm)
+ASM_NAME(synth_1to1_real_s_x86_64_asm):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	movl		40(%rsp), %eax /* 5th argument; placed after 32-byte shadow space */
+	subq		$168, %rsp /* stack alignment + 10 xmm registers */
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	movaps		%xmm13, 112(%rsp)
+	movaps		%xmm14, 128(%rsp)
+	movaps		%xmm15, 144(%rsp)
+#endif
+
+	leaq		ASM_NAME(scale_x86_64)(%rip), %r11
+	
+#ifdef IS_MSABI
+	shlq		$32, %rax
+	shrq		$30, %rax
+	movq		%rcx, %r10
+#else
+	movq		%r8, %rax
+	shlq		$32, %rax
+	shrq		$30, %rax
+	movq		%rcx, %r9
+#endif
+	leaq		64(WINDOW), WINDOW
+	subq		%rax, WINDOW
+
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm8
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm9
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	movaps		%xmm8, %xmm0
+	movaps		%xmm1, %xmm4
+	movaps		%xmm2, %xmm10
+	movaps		%xmm3, %xmm11
+	movaps		%xmm9, %xmm12
+	movaps		%xmm5, %xmm13
+	movaps		%xmm6, %xmm14
+	movaps		%xmm7, %xmm15
+	mulps		(B0L), %xmm8
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		64(B0L), %xmm9
+	mulps		80(B0L), %xmm5
+	mulps		96(B0L), %xmm6
+	mulps		112(B0L), %xmm7
+	mulps		(B0R), %xmm0
+	mulps		16(B0R), %xmm4
+	mulps		32(B0R), %xmm10
+	mulps		48(B0R), %xmm11
+	mulps		64(B0R), %xmm12
+	mulps		80(B0R), %xmm13
+	mulps		96(B0R), %xmm14
+	mulps		112(B0R), %xmm15
+	
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm0
+	addps		%xmm11, %xmm10
+	addps		%xmm5, %xmm9
+	addps		%xmm7, %xmm6
+	addps		%xmm13, %xmm12
+	addps		%xmm15, %xmm14
+	addps		%xmm3, %xmm8
+	addps		%xmm6, %xmm9
+	addps		%xmm10, %xmm0
+	addps		%xmm12, %xmm14
+	movaps		%xmm0, %xmm12
+	movaps		%xmm14, %xmm13
+	leaq		256(WINDOW), WINDOW
+	leaq		128(B0L), B0L
+	leaq		128(B0R), B0R
+	
+	movups		(WINDOW), %xmm10
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm11
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	movaps		%xmm10, %xmm0
+	movaps		%xmm1, %xmm4
+	movaps		%xmm2, %xmm14
+	movaps		%xmm3, %xmm15
+	mulps		(B0L), %xmm10
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		(B0R), %xmm0
+	mulps		16(B0R), %xmm4
+	mulps		32(B0R), %xmm14
+	mulps		48(B0R), %xmm15
+	addps		%xmm1, %xmm10
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm0
+	addps		%xmm15, %xmm14
+	movaps		%xmm11, %xmm1
+	movaps		%xmm5, %xmm2
+	movaps		%xmm6, %xmm4
+	movaps		%xmm7, %xmm15
+	mulps		64(B0L), %xmm11
+	mulps		80(B0L), %xmm5
+	mulps		96(B0L), %xmm6
+	mulps		112(B0L), %xmm7
+	mulps		64(B0R), %xmm1
+	mulps		80(B0R), %xmm2
+	mulps		96(B0R), %xmm4
+	mulps		112(B0R), %xmm15
+	addps		%xmm5, %xmm11
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm1
+	addps		%xmm15, %xmm4
+	
+	addps		%xmm3, %xmm10
+	addps		%xmm6, %xmm11
+	addps		%xmm0, %xmm14
+	addps		%xmm4, %xmm1
+	movaps		%xmm1, %xmm15
+	leaq		256(WINDOW), WINDOW
+	leaq		128(B0L), B0L
+	leaq		128(B0R), B0R
+	
+	movaps		%xmm8, %xmm0
+	movaps		%xmm10, %xmm1
+	movaps		%xmm12, %xmm4
+	movaps		%xmm14, %xmm5
+	unpcklps	%xmm9, %xmm8
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm9, %xmm0
+	unpckhps	%xmm11, %xmm1
+	unpcklps	%xmm13, %xmm12
+	unpcklps	%xmm15, %xmm14
+	unpckhps	%xmm13, %xmm4
+	unpckhps	%xmm15, %xmm5
+	movaps		%xmm8, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm12, %xmm6
+	movaps		%xmm4, %xmm7
+	movlhps		%xmm10, %xmm8
+	movhlps		%xmm2, %xmm10
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	movlhps		%xmm14, %xmm12
+	movhlps		%xmm6, %xmm14
+	movlhps		%xmm5, %xmm4
+	movhlps		%xmm7, %xmm5
+	subps		%xmm10, %xmm8
+	subps		%xmm1, %xmm0
+	subps		%xmm14, %xmm12
+	subps		%xmm5, %xmm4
+	addps		%xmm8, %xmm0
+	addps		%xmm12, %xmm4
+	
+	mulps		XMMREG_SCALE, %xmm0
+	mulps		XMMREG_SCALE, %xmm4
+	movaps		%xmm0, %xmm1
+	unpcklps	%xmm4, %xmm0
+	unpckhps	%xmm4, %xmm1
+	movups		%xmm0, (SAMPLES)
+	movups		%xmm1, 16(SAMPLES)
+	
+	leaq		32(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm8
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm9
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	movaps		%xmm8, %xmm0
+	movaps		%xmm1, %xmm4
+	movaps		%xmm2, %xmm10
+	movaps		%xmm3, %xmm11
+	movaps		%xmm9, %xmm12
+	movaps		%xmm5, %xmm13
+	movaps		%xmm6, %xmm14
+	movaps		%xmm7, %xmm15
+	mulps		(B0L), %xmm8
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		-64(B0L), %xmm9
+	mulps		-48(B0L), %xmm5
+	mulps		-32(B0L), %xmm6
+	mulps		-16(B0L), %xmm7
+	mulps		(B0R), %xmm0
+	mulps		16(B0R), %xmm4
+	mulps		32(B0R), %xmm10
+	mulps		48(B0R), %xmm11
+	mulps		-64(B0R), %xmm12
+	mulps		-48(B0R), %xmm13
+	mulps		-32(B0R), %xmm14
+	mulps		-16(B0R), %xmm15
+	
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm0
+	addps		%xmm11, %xmm10
+	addps		%xmm5, %xmm9
+	addps		%xmm7, %xmm6
+	addps		%xmm13, %xmm12
+	addps		%xmm15, %xmm14
+	addps		%xmm3, %xmm8
+	addps		%xmm6, %xmm9
+	addps		%xmm10, %xmm0
+	addps		%xmm12, %xmm14
+	movaps		%xmm0, %xmm12
+	movaps		%xmm14, %xmm13
+	leaq		256(WINDOW), WINDOW
+	leaq		-128(B0L), B0L
+	leaq		-128(B0R), B0R
+	
+	movups		(WINDOW), %xmm10
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm11
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	movaps		%xmm10, %xmm0
+	movaps		%xmm1, %xmm4
+	movaps		%xmm2, %xmm14
+	movaps		%xmm3, %xmm15
+	mulps		(B0L), %xmm10
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		(B0R), %xmm0
+	mulps		16(B0R), %xmm4
+	mulps		32(B0R), %xmm14
+	mulps		48(B0R), %xmm15
+	addps		%xmm1, %xmm10
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm0
+	addps		%xmm15, %xmm14
+	movaps		%xmm11, %xmm1
+	movaps		%xmm5, %xmm2
+	movaps		%xmm6, %xmm4
+	movaps		%xmm7, %xmm15
+	mulps		-64(B0L), %xmm11
+	mulps		-48(B0L), %xmm5
+	mulps		-32(B0L), %xmm6
+	mulps		-16(B0L), %xmm7
+	mulps		-64(B0R), %xmm1
+	mulps		-48(B0R), %xmm2
+	mulps		-32(B0R), %xmm4
+	mulps		-16(B0R), %xmm15
+	addps		%xmm5, %xmm11
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm1
+	addps		%xmm15, %xmm4
+	
+	addps		%xmm3, %xmm10
+	addps		%xmm6, %xmm11
+	addps		%xmm0, %xmm14
+	addps		%xmm4, %xmm1
+	movaps		%xmm1, %xmm15
+	leaq		256(WINDOW), WINDOW
+	leaq		-128(B0L), B0L
+	leaq		-128(B0R), B0R
+	
+	movaps		%xmm8, %xmm0
+	movaps		%xmm10, %xmm1
+	movaps		%xmm12, %xmm4
+	movaps		%xmm14, %xmm5
+	unpcklps	%xmm9, %xmm8
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm9, %xmm0
+	unpckhps	%xmm11, %xmm1
+	unpcklps	%xmm13, %xmm12
+	unpcklps	%xmm15, %xmm14
+	unpckhps	%xmm13, %xmm4
+	unpckhps	%xmm15, %xmm5
+	movaps		%xmm8, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm12, %xmm6
+	movaps		%xmm4, %xmm7
+	movlhps		%xmm10, %xmm8
+	movhlps		%xmm2, %xmm10
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	movlhps		%xmm14, %xmm12
+	movhlps		%xmm6, %xmm14
+	movlhps		%xmm5, %xmm4
+	movhlps		%xmm7, %xmm5
+	addps		%xmm10, %xmm8
+	addps		%xmm1, %xmm0
+	addps		%xmm14, %xmm12
+	addps		%xmm5, %xmm4
+	addps		%xmm8, %xmm0
+	addps		%xmm12, %xmm4
+	
+	mulps		XMMREG_SCALE, %xmm0
+	mulps		XMMREG_SCALE, %xmm4
+	movaps		%xmm0, %xmm1
+	unpcklps	%xmm4, %xmm0
+	unpckhps	%xmm4, %xmm1
+	movups		%xmm0, (SAMPLES)
+	movups		%xmm1, 16(SAMPLES)
+	
+	leaq		32(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	xorl		%eax, %eax
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	movaps		144(%rsp), %xmm15
+	addq		$168, %rsp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_stereo_x86_64_s32.S
===================================================================
--- include/reactos/libs/libmpg123/synth_stereo_x86_64_s32.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_stereo_x86_64_s32.S	(working copy)
@@ -0,0 +1,473 @@
+/*
+	synth_stereo_x86_64_s32: SSE optimized synth for x86-64 (stereo specific, s32 output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* short *window; */
+#define WINDOW %rsi
+/* short *b0l; */
+#define B0L %rdx
+/* short *b0r; */
+#define B0R %r8
+/* short *samples; */
+#define SAMPLES %rdi
+#else
+/* real *window; */
+#define WINDOW %rdi
+/* real *b0l; */
+#define B0L %rsi
+/* real *b0r; */
+#define B0R %rdx
+/* real *samples; */
+#define SAMPLES %r8
+#endif
+
+#define XMMREG_SCALE (%r9)  /* {65536.0, 65536.0, 65536.0, 65536.0} */
+#define XMMREG_MAX (%r10)  /* {32767.999, 32767.999, 32767.999, 32767.999} */
+#define XMMREG_MIN (%r11)  /* {-32768.0, -32768.0, -32768.0, -32768.0} */
+#define TEMP_CLIP (%rsp)
+
+/*
+	int synth_1to1_s32_s_x86_64_asm(real *window, real *b0l, real *b0r, int32_t *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(scale_s32):
+	.long   1199570944
+	.long   1199570944
+	.long   1199570944
+	.long   1199570944
+	ALIGN16
+ASM_NAME(maxmin_s32):
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_s32_s_x86_64_asm)
+ASM_NAME(synth_1to1_s32_s_x86_64_asm):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	movl		40(%rsp), %eax /* 5th argument; placed after 32-byte shadow space */
+	pushq		%rsi
+	pushq		%rdi
+	subq		$184, %rsp /* stack alignment + 10 xmm registers + temp */
+	movaps		%xmm6, 16(%rsp)
+	movaps		%xmm7, 32(%rsp)
+	movaps		%xmm8, 48(%rsp)
+	movaps		%xmm9, 64(%rsp)
+	movaps		%xmm10, 80(%rsp)
+	movaps		%xmm11, 96(%rsp)
+	movaps		%xmm12, 112(%rsp)
+	movaps		%xmm13, 128(%rsp)
+	movaps		%xmm14, 144(%rsp)
+	movaps		%xmm15, 160(%rsp)
+#else
+	subq		$24, %rsp  /* stack alignment + temp */
+#endif
+	
+#ifdef IS_MSABI
+	shlq		$32, %rax
+	shrq		$30, %rax
+	movq		%rcx, %rsi
+	movq		%r9, %rdi
+#else
+	movq		%r8, %rax
+	shlq		$32, %rax
+	shrq		$30, %rax
+	movq		%rcx, %r8
+#endif
+	leaq		64(WINDOW), WINDOW
+	subq		%rax, WINDOW
+	
+	leaq		ASM_NAME(scale_s32)(%rip), %r9
+	leaq		ASM_NAME(maxmin_s32)(%rip), %r10
+	leaq		16(%r10), %r11
+	xorps		%xmm0, %xmm0
+	movaps		%xmm0, TEMP_CLIP
+
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm8
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm9
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	movaps		%xmm8, %xmm0
+	movaps		%xmm1, %xmm4
+	movaps		%xmm2, %xmm10
+	movaps		%xmm3, %xmm11
+	movaps		%xmm9, %xmm12
+	movaps		%xmm5, %xmm13
+	movaps		%xmm6, %xmm14
+	movaps		%xmm7, %xmm15
+	mulps		(B0L), %xmm8
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		64(B0L), %xmm9
+	mulps		80(B0L), %xmm5
+	mulps		96(B0L), %xmm6
+	mulps		112(B0L), %xmm7
+	mulps		(B0R), %xmm0
+	mulps		16(B0R), %xmm4
+	mulps		32(B0R), %xmm10
+	mulps		48(B0R), %xmm11
+	mulps		64(B0R), %xmm12
+	mulps		80(B0R), %xmm13
+	mulps		96(B0R), %xmm14
+	mulps		112(B0R), %xmm15
+	
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm0
+	addps		%xmm11, %xmm10
+	addps		%xmm5, %xmm9
+	addps		%xmm7, %xmm6
+	addps		%xmm13, %xmm12
+	addps		%xmm15, %xmm14
+	addps		%xmm3, %xmm8
+	addps		%xmm6, %xmm9
+	addps		%xmm10, %xmm0
+	addps		%xmm12, %xmm14
+	movaps		%xmm0, %xmm12
+	movaps		%xmm14, %xmm13
+	leaq		256(WINDOW), WINDOW
+	leaq		128(B0L), B0L
+	leaq		128(B0R), B0R
+	
+	movups		(WINDOW), %xmm10
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm11
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	movaps		%xmm10, %xmm0
+	movaps		%xmm1, %xmm4
+	movaps		%xmm2, %xmm14
+	movaps		%xmm3, %xmm15
+	mulps		(B0L), %xmm10
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		(B0R), %xmm0
+	mulps		16(B0R), %xmm4
+	mulps		32(B0R), %xmm14
+	mulps		48(B0R), %xmm15
+	addps		%xmm1, %xmm10
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm0
+	addps		%xmm15, %xmm14
+	movaps		%xmm11, %xmm1
+	movaps		%xmm5, %xmm2
+	movaps		%xmm6, %xmm4
+	movaps		%xmm7, %xmm15
+	mulps		64(B0L), %xmm11
+	mulps		80(B0L), %xmm5
+	mulps		96(B0L), %xmm6
+	mulps		112(B0L), %xmm7
+	mulps		64(B0R), %xmm1
+	mulps		80(B0R), %xmm2
+	mulps		96(B0R), %xmm4
+	mulps		112(B0R), %xmm15
+	addps		%xmm5, %xmm11
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm1
+	addps		%xmm15, %xmm4
+	
+	addps		%xmm3, %xmm10
+	addps		%xmm6, %xmm11
+	addps		%xmm0, %xmm14
+	addps		%xmm4, %xmm1
+	movaps		%xmm1, %xmm15
+	leaq		256(WINDOW), WINDOW
+	leaq		128(B0L), B0L
+	leaq		128(B0R), B0R
+	
+	movaps		%xmm8, %xmm0
+	movaps		%xmm10, %xmm1
+	movaps		%xmm12, %xmm4
+	movaps		%xmm14, %xmm5
+	unpcklps	%xmm9, %xmm8
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm9, %xmm0
+	unpckhps	%xmm11, %xmm1
+	unpcklps	%xmm13, %xmm12
+	unpcklps	%xmm15, %xmm14
+	unpckhps	%xmm13, %xmm4
+	unpckhps	%xmm15, %xmm5
+	movaps		%xmm8, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm12, %xmm6
+	movaps		%xmm4, %xmm7
+	movlhps		%xmm10, %xmm8
+	movhlps		%xmm2, %xmm10
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	movlhps		%xmm14, %xmm12
+	movhlps		%xmm6, %xmm14
+	movlhps		%xmm5, %xmm4
+	movhlps		%xmm7, %xmm5
+	subps		%xmm10, %xmm8
+	subps		%xmm1, %xmm0
+	subps		%xmm14, %xmm12
+	subps		%xmm5, %xmm4
+	addps		%xmm8, %xmm0
+	addps		%xmm12, %xmm4
+	
+	movaps		%xmm0, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm4, %xmm5
+	movaps		%xmm4, %xmm6
+	mulps		XMMREG_SCALE, %xmm0
+	mulps		XMMREG_SCALE, %xmm4
+	cmpnleps	XMMREG_MAX, %xmm2
+	cmpltps		XMMREG_MIN, %xmm3
+	cmpnleps	XMMREG_MAX, %xmm5
+	cmpltps		XMMREG_MIN, %xmm6
+	cvtps2dq	%xmm0, %xmm0
+	cvtps2dq	%xmm4, %xmm4
+	xorps		%xmm2, %xmm0
+	xorps		%xmm5, %xmm4
+	movaps		%xmm0, %xmm1
+	unpcklps	%xmm4, %xmm0
+	unpckhps	%xmm4, %xmm1
+	movups		%xmm0, (SAMPLES)
+	movups		%xmm1, 16(SAMPLES)
+	
+	packssdw	%xmm5, %xmm2
+	packssdw	%xmm6, %xmm3
+	psrlw		$15, %xmm2
+	psrlw		$15, %xmm3
+	paddw		%xmm3, %xmm2
+	paddw		TEMP_CLIP, %xmm2
+	movaps		%xmm2, TEMP_CLIP
+	
+	leaq		32(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(WINDOW), %xmm8
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm9
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	movaps		%xmm8, %xmm0
+	movaps		%xmm1, %xmm4
+	movaps		%xmm2, %xmm10
+	movaps		%xmm3, %xmm11
+	movaps		%xmm9, %xmm12
+	movaps		%xmm5, %xmm13
+	movaps		%xmm6, %xmm14
+	movaps		%xmm7, %xmm15
+	mulps		(B0L), %xmm8
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		-64(B0L), %xmm9
+	mulps		-48(B0L), %xmm5
+	mulps		-32(B0L), %xmm6
+	mulps		-16(B0L), %xmm7
+	mulps		(B0R), %xmm0
+	mulps		16(B0R), %xmm4
+	mulps		32(B0R), %xmm10
+	mulps		48(B0R), %xmm11
+	mulps		-64(B0R), %xmm12
+	mulps		-48(B0R), %xmm13
+	mulps		-32(B0R), %xmm14
+	mulps		-16(B0R), %xmm15
+	
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm0
+	addps		%xmm11, %xmm10
+	addps		%xmm5, %xmm9
+	addps		%xmm7, %xmm6
+	addps		%xmm13, %xmm12
+	addps		%xmm15, %xmm14
+	addps		%xmm3, %xmm8
+	addps		%xmm6, %xmm9
+	addps		%xmm10, %xmm0
+	addps		%xmm12, %xmm14
+	movaps		%xmm0, %xmm12
+	movaps		%xmm14, %xmm13
+	leaq		256(WINDOW), WINDOW
+	leaq		-128(B0L), B0L
+	leaq		-128(B0R), B0R
+	
+	movups		(WINDOW), %xmm10
+	movups		16(WINDOW), %xmm1
+	movups		32(WINDOW), %xmm2
+	movups		48(WINDOW), %xmm3
+	movups		128(WINDOW), %xmm11
+	movups		144(WINDOW), %xmm5
+	movups		160(WINDOW), %xmm6
+	movups		176(WINDOW), %xmm7
+	movaps		%xmm10, %xmm0
+	movaps		%xmm1, %xmm4
+	movaps		%xmm2, %xmm14
+	movaps		%xmm3, %xmm15
+	mulps		(B0L), %xmm10
+	mulps		16(B0L), %xmm1
+	mulps		32(B0L), %xmm2
+	mulps		48(B0L), %xmm3
+	mulps		(B0R), %xmm0
+	mulps		16(B0R), %xmm4
+	mulps		32(B0R), %xmm14
+	mulps		48(B0R), %xmm15
+	addps		%xmm1, %xmm10
+	addps		%xmm2, %xmm3
+	addps		%xmm4, %xmm0
+	addps		%xmm15, %xmm14
+	movaps		%xmm11, %xmm1
+	movaps		%xmm5, %xmm2
+	movaps		%xmm6, %xmm4
+	movaps		%xmm7, %xmm15
+	mulps		-64(B0L), %xmm11
+	mulps		-48(B0L), %xmm5
+	mulps		-32(B0L), %xmm6
+	mulps		-16(B0L), %xmm7
+	mulps		-64(B0R), %xmm1
+	mulps		-48(B0R), %xmm2
+	mulps		-32(B0R), %xmm4
+	mulps		-16(B0R), %xmm15
+	addps		%xmm5, %xmm11
+	addps		%xmm7, %xmm6
+	addps		%xmm2, %xmm1
+	addps		%xmm15, %xmm4
+	
+	addps		%xmm3, %xmm10
+	addps		%xmm6, %xmm11
+	addps		%xmm0, %xmm14
+	addps		%xmm4, %xmm1
+	movaps		%xmm1, %xmm15
+	leaq		256(WINDOW), WINDOW
+	leaq		-128(B0L), B0L
+	leaq		-128(B0R), B0R
+	
+	movaps		%xmm8, %xmm0
+	movaps		%xmm10, %xmm1
+	movaps		%xmm12, %xmm4
+	movaps		%xmm14, %xmm5
+	unpcklps	%xmm9, %xmm8
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm9, %xmm0
+	unpckhps	%xmm11, %xmm1
+	unpcklps	%xmm13, %xmm12
+	unpcklps	%xmm15, %xmm14
+	unpckhps	%xmm13, %xmm4
+	unpckhps	%xmm15, %xmm5
+	movaps		%xmm8, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm12, %xmm6
+	movaps		%xmm4, %xmm7
+	movlhps		%xmm10, %xmm8
+	movhlps		%xmm2, %xmm10
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	movlhps		%xmm14, %xmm12
+	movhlps		%xmm6, %xmm14
+	movlhps		%xmm5, %xmm4
+	movhlps		%xmm7, %xmm5
+	addps		%xmm10, %xmm8
+	addps		%xmm1, %xmm0
+	addps		%xmm14, %xmm12
+	addps		%xmm5, %xmm4
+	addps		%xmm8, %xmm0
+	addps		%xmm12, %xmm4
+	
+	movaps		%xmm0, %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm4, %xmm5
+	movaps		%xmm4, %xmm6
+	mulps		XMMREG_SCALE, %xmm0
+	mulps		XMMREG_SCALE, %xmm4
+	cmpnleps	XMMREG_MAX, %xmm2
+	cmpltps		XMMREG_MIN, %xmm3
+	cmpnleps	XMMREG_MAX, %xmm5
+	cmpltps		XMMREG_MIN, %xmm6
+	cvtps2dq	%xmm0, %xmm0
+	cvtps2dq	%xmm4, %xmm4
+	xorps		%xmm2, %xmm0
+	xorps		%xmm5, %xmm4
+	movaps		%xmm0, %xmm1
+	unpcklps	%xmm4, %xmm0
+	unpckhps	%xmm4, %xmm1
+	movups		%xmm0, (SAMPLES)
+	movups		%xmm1, 16(SAMPLES)
+	
+	packssdw	%xmm5, %xmm2
+	packssdw	%xmm6, %xmm3
+	psrlw		$15, %xmm2
+	psrlw		$15, %xmm3
+	paddw		%xmm3, %xmm2
+	paddw		TEMP_CLIP, %xmm2
+	movaps		%xmm2, TEMP_CLIP
+	
+	leaq		32(SAMPLES), SAMPLES
+	decl		%ecx
+	jnz			1b
+	
+	movaps		TEMP_CLIP, %xmm4
+	movhlps		%xmm4, %xmm0
+	paddw		%xmm4, %xmm0
+	pshuflw		$0x55, %xmm0, %xmm1
+	pshuflw		$0xaa, %xmm0, %xmm2
+	pshuflw		$0xff, %xmm0, %xmm3
+	paddw		%xmm1, %xmm0
+	paddw		%xmm2, %xmm0
+	paddw		%xmm3, %xmm0
+	
+	movd		%xmm0, %eax
+	andl		$0xffff, %eax
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	movaps		144(%rsp), %xmm15
+	addq		$184, %rsp
+	popq		%rdi
+	popq		%rsi
+#else
+	addq		$24, %rsp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_x86_64.S
===================================================================
--- include/reactos/libs/libmpg123/synth_x86_64.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_x86_64.S	(working copy)
@@ -0,0 +1,244 @@
+/*
+	synth_x86_64: SSE optimized synth for x86-64
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* short *window; */
+#define ARG0 %r10
+/* short *b0; */
+#define ARG1 %rdx
+/* short *samples; */
+#define ARG2 %r8
+/* int bo1; */
+#define ARG3 %r9
+#else
+/* short *window; */
+#define ARG0 %rdi
+/* short *b0; */
+#define ARG1 %rsi
+/* short *samples; */
+#define ARG2 %rdx
+/* int bo1; */
+#define ARG3 %rcx
+#endif
+
+#define XMMREG_CLIP %xmm15
+#define XMMREG_MAX %xmm14 /* {32767, 32767, 32767, 32767} */
+#define XMMREG_MIN %xmm13 /* {-32769, -32769, -32769, -32769} : not -32768 because SSE doesn't have "less than" comparison... */
+#define XMMREG_FULL %xmm12 /* {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF} */
+
+/*
+	int synth_1to1_x86_64_asm(short *window, short *b0, short *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(maxmin_x86_64):
+	.long   32767
+	.long   32767
+	.long   32767
+	.long   32767
+	.long   -32769
+	.long   -32769
+	.long   -32769
+	.long   -32769
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_x86_64_asm)
+ASM_NAME(synth_1to1_x86_64_asm):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	movq		%rcx, ARG0
+	subq		$104, %rsp /* stack alignment + 6 xmm registers */
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm12, 32(%rsp)
+	movaps		%xmm13, 48(%rsp)
+	movaps		%xmm14, 64(%rsp)
+	movaps		%xmm15, 80(%rsp)
+#endif
+
+	leaq		ASM_NAME(maxmin_x86_64)(%rip), %rax
+	movaps		(%rax), XMMREG_MAX
+	movaps		16(%rax), XMMREG_MIN
+	pxor		XMMREG_CLIP, XMMREG_CLIP
+	pcmpeqd		XMMREG_FULL, XMMREG_FULL
+	
+	andq		$0xf, ARG3
+	shlq		$1, ARG3
+	leaq		32(ARG0), ARG0
+	subq		ARG3, ARG0
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(ARG0), %xmm0
+	movups		16(ARG0), %xmm1
+	movups		64(ARG0), %xmm2
+	movups		80(ARG0), %xmm3
+	movups		128(ARG0), %xmm4
+	movups		144(ARG0), %xmm5
+	movups		192(ARG0), %xmm6
+	movups		208(ARG0), %xmm7
+	pmaddwd		(ARG1), %xmm0
+	pmaddwd		16(ARG1), %xmm1
+	pmaddwd		32(ARG1), %xmm2
+	pmaddwd		48(ARG1), %xmm3
+	pmaddwd		64(ARG1), %xmm4
+	pmaddwd		80(ARG1), %xmm5
+	pmaddwd		96(ARG1), %xmm6
+	pmaddwd		112(ARG1), %xmm7
+	
+	paddd		%xmm1, %xmm0
+	paddd		%xmm3, %xmm2
+	paddd		%xmm5, %xmm4
+	paddd		%xmm7, %xmm6
+	
+	movaps		%xmm0, %xmm1
+	movaps		%xmm4, %xmm3
+	punpckldq	%xmm2, %xmm0
+	punpckldq	%xmm6, %xmm4
+	punpckhdq	%xmm2, %xmm1
+	punpckhdq	%xmm6, %xmm3
+	movaps		%xmm0, %xmm5
+	movaps		%xmm1, %xmm7
+	movlhps		%xmm4, %xmm0
+	movhlps		%xmm5, %xmm4
+	movlhps		%xmm3, %xmm1
+	movhlps		%xmm7, %xmm3
+	paddd		%xmm4, %xmm0
+	paddd		%xmm3, %xmm1
+	paddd		%xmm1, %xmm0
+	psrad		$13, %xmm0
+	
+	movups		(ARG2), %xmm3
+	movaps		%xmm0, %xmm1
+	movaps		%xmm0, %xmm2
+	packssdw	%xmm0, %xmm0
+	pcmpgtd		XMMREG_MAX, %xmm1
+	pcmpgtd		XMMREG_MIN, %xmm2
+	movhlps		%xmm3, %xmm4
+	pshuflw		$0xdd, %xmm3, %xmm3
+	pshuflw		$0xdd, %xmm4, %xmm4
+	psrlq		$32, %xmm3
+	psllq		$32, %xmm4
+	por			%xmm4, %xmm3
+	punpcklwd	%xmm3, %xmm0
+	movups		%xmm0, (ARG2)
+	
+	pxor		XMMREG_FULL, %xmm2
+	psrld		$31, %xmm1
+	psrld		$31, %xmm2
+	paddd		%xmm2, %xmm1
+	paddd		%xmm1, XMMREG_CLIP
+	
+	leaq		256(ARG0), ARG0
+	leaq		128(ARG1), ARG1
+	leaq		16(ARG2), ARG2
+	
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(ARG0), %xmm0
+	movups		16(ARG0), %xmm1
+	movups		64(ARG0), %xmm2
+	movups		80(ARG0), %xmm3
+	movups		128(ARG0), %xmm4
+	movups		144(ARG0), %xmm5
+	movups		192(ARG0), %xmm6
+	movups		208(ARG0), %xmm7
+	pmaddwd		(ARG1), %xmm0
+	pmaddwd		16(ARG1), %xmm1
+	pmaddwd		-32(ARG1), %xmm2
+	pmaddwd		-16(ARG1), %xmm3
+	pmaddwd		-64(ARG1), %xmm4
+	pmaddwd		-48(ARG1), %xmm5
+	pmaddwd		-96(ARG1), %xmm6
+	pmaddwd		-80(ARG1), %xmm7
+	
+	paddd		%xmm1, %xmm0
+	paddd		%xmm3, %xmm2
+	paddd		%xmm5, %xmm4
+	paddd		%xmm7, %xmm6
+	
+	movaps		%xmm0, %xmm1
+	movaps		%xmm4, %xmm3
+	punpckldq	%xmm2, %xmm0
+	punpckldq	%xmm6, %xmm4
+	punpckhdq	%xmm2, %xmm1
+	punpckhdq	%xmm6, %xmm3
+	movaps		%xmm0, %xmm5
+	movaps		%xmm1, %xmm7
+	movlhps		%xmm4, %xmm0
+	movhlps		%xmm5, %xmm4
+	movlhps		%xmm3, %xmm1
+	movhlps		%xmm7, %xmm3
+	paddd		%xmm4, %xmm0
+	paddd		%xmm3, %xmm1
+	paddd		%xmm1, %xmm0
+	psrad		$13, %xmm0
+	
+	movups		(ARG2), %xmm3
+	movaps		%xmm0, %xmm1
+	movaps		%xmm0, %xmm2
+	packssdw	%xmm0, %xmm0
+	pcmpgtd		XMMREG_MAX, %xmm1
+	pcmpgtd		XMMREG_MIN, %xmm2
+	movhlps		%xmm3, %xmm4
+	pshuflw		$0xdd, %xmm3, %xmm3
+	pshuflw		$0xdd, %xmm4, %xmm4
+	psrlq		$32, %xmm3
+	psllq		$32, %xmm4
+	por			%xmm4, %xmm3
+	punpcklwd	%xmm3, %xmm0
+	movups		%xmm0, (ARG2)
+	
+	pxor		XMMREG_FULL, %xmm2
+	psrld		$31, %xmm1
+	psrld		$31, %xmm2
+	paddd		%xmm2, %xmm1
+	paddd		%xmm1, XMMREG_CLIP
+	
+	leaq		256(ARG0), ARG0
+	leaq		-128(ARG1), ARG1
+	leaq		16(ARG2), ARG2
+	
+	decl		%ecx
+	jnz			1b
+	
+	pshuflw		$0xee, XMMREG_CLIP, %xmm0
+	movhlps		XMMREG_CLIP, %xmm1
+	pshuflw		$0xee, %xmm1, %xmm2
+	paddd		%xmm0, XMMREG_CLIP
+	paddd		%xmm1, XMMREG_CLIP
+	paddd		%xmm2, XMMREG_CLIP
+	
+	movd		XMMREG_CLIP, %eax
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm12
+	movaps		48(%rsp), %xmm13
+	movaps		64(%rsp), %xmm14
+	movaps		80(%rsp), %xmm15
+	addq		$104, %rsp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_x86_64_accurate.S
===================================================================
--- include/reactos/libs/libmpg123/synth_x86_64_accurate.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_x86_64_accurate.S	(working copy)
@@ -0,0 +1,301 @@
+/*
+	synth_x86_64_accurate: SSE optimized synth for x86-64 (MPEG-compliant 16bit output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* short *window; */
+#define ARG0 %r10
+/* short *b0; */
+#define ARG1 %rdx
+/* short *samples; */
+#define ARG2 %r8
+/* int bo1; */
+#define ARG3 %r9
+#else
+/* real *window; */
+#define ARG0 %rdi
+/* real *b0; */
+#define ARG1 %rsi
+/* real *samples; */
+#define ARG2 %rdx
+/* int bo1; */
+#define ARG3 %rcx
+#endif
+
+#define XMMREG_MAX %xmm15  /* {32767.0, 32767.0, 32767.0, 32767.0} */
+#define XMMREG_MIN %xmm14  /* {-32768.0, -32768.0, -32768.0, -32768.0} */
+#define XMMREG_CLIP %xmm13
+
+/*
+	int synth_1to1_x86_64_accurate_asm(real *window, real *b0, short *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(maxmin_s16):
+	.long   1191181824
+	.long   1191181824
+	.long   1191181824
+	.long   1191181824
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_x86_64_accurate_asm)
+ASM_NAME(synth_1to1_x86_64_accurate_asm):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	movq		%rcx, ARG0
+	subq		$152, %rsp /* stack alignment + 9 xmm registers */
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm13, 96(%rsp)
+	movaps		%xmm14, 112(%rsp)
+	movaps		%xmm15, 128(%rsp)
+#endif
+
+	leaq		ASM_NAME(maxmin_s16)(%rip), %rax
+	movaps		(%rax), XMMREG_MAX
+	movaps		16(%rax), XMMREG_MIN
+	
+	xorps		XMMREG_CLIP, XMMREG_CLIP
+	
+	andq		$0xf, ARG3
+	shlq		$2, ARG3
+	leaq		64(ARG0), ARG0
+	subq		ARG3, ARG0
+
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(ARG0), %xmm8
+	movups		16(ARG0), %xmm1
+	movups		32(ARG0), %xmm2
+	movups		48(ARG0), %xmm3
+	movups		128(ARG0), %xmm9
+	movups		144(ARG0), %xmm5
+	movups		160(ARG0), %xmm6
+	movups		176(ARG0), %xmm7
+	mulps		(ARG1), %xmm8
+	mulps		16(ARG1), %xmm1
+	mulps		32(ARG1), %xmm2
+	mulps		48(ARG1), %xmm3
+	mulps		64(ARG1), %xmm9
+	mulps		80(ARG1), %xmm5
+	mulps		96(ARG1), %xmm6
+	mulps		112(ARG1), %xmm7
+	
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm3
+	addps		%xmm5, %xmm9
+	addps		%xmm7, %xmm6
+	addps		%xmm3, %xmm8
+	addps		%xmm6, %xmm9
+	leaq		256(ARG0), ARG0
+	leaq		128(ARG1), ARG1
+	
+	movups		(ARG0), %xmm10
+	movups		16(ARG0), %xmm1
+	movups		32(ARG0), %xmm2
+	movups		48(ARG0), %xmm3
+	movups		128(ARG0), %xmm11
+	movups		144(ARG0), %xmm5
+	movups		160(ARG0), %xmm6
+	movups		176(ARG0), %xmm7
+	mulps		(ARG1), %xmm10
+	mulps		16(ARG1), %xmm1
+	mulps		32(ARG1), %xmm2
+	mulps		48(ARG1), %xmm3
+	mulps		64(ARG1), %xmm11
+	mulps		80(ARG1), %xmm5
+	mulps		96(ARG1), %xmm6
+	mulps		112(ARG1), %xmm7
+	
+	addps		%xmm1, %xmm10
+	addps		%xmm2, %xmm3
+	addps		%xmm5, %xmm11
+	addps		%xmm7, %xmm6
+	addps		%xmm3, %xmm10
+	addps		%xmm6, %xmm11
+	leaq		256(ARG0), ARG0
+	leaq		128(ARG1), ARG1
+	
+	movaps		%xmm8, %xmm0
+	movaps		%xmm10, %xmm1
+	unpcklps	%xmm9, %xmm8
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm9, %xmm0
+	unpckhps	%xmm11, %xmm1
+	movaps		%xmm8, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm10, %xmm8
+	movhlps		%xmm2, %xmm10
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	subps		%xmm10, %xmm8
+	subps		%xmm1, %xmm0
+	addps		%xmm8, %xmm0
+	
+	movups		(ARG2), %xmm3
+	movaps		%xmm0, %xmm1
+	movaps		%xmm0, %xmm2
+	cmpnleps	XMMREG_MAX, %xmm1
+	cmpltps		XMMREG_MIN, %xmm2
+	cvtps2dq	%xmm0, %xmm0
+	packssdw	%xmm0, %xmm0
+	movhlps		%xmm3, %xmm4
+	pshuflw		$0xdd, %xmm3, %xmm3
+	pshuflw		$0xdd, %xmm4, %xmm4
+	psrlq		$32, %xmm3
+	psllq		$32, %xmm4
+	por			%xmm4, %xmm3
+	punpcklwd	%xmm3, %xmm0
+	movups		%xmm0, (ARG2)
+	
+	psrld		$31, %xmm1
+	psrld		$31, %xmm2
+	paddd		%xmm2, %xmm1
+	paddd		%xmm1, XMMREG_CLIP
+	
+	leaq		16(ARG2), ARG2
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(ARG0), %xmm8
+	movups		16(ARG0), %xmm1
+	movups		32(ARG0), %xmm2
+	movups		48(ARG0), %xmm3
+	movups		128(ARG0), %xmm9
+	movups		144(ARG0), %xmm5
+	movups		160(ARG0), %xmm6
+	movups		176(ARG0), %xmm7
+	mulps		(ARG1), %xmm8
+	mulps		16(ARG1), %xmm1
+	mulps		32(ARG1), %xmm2
+	mulps		48(ARG1), %xmm3
+	mulps		-64(ARG1), %xmm9
+	mulps		-48(ARG1), %xmm5
+	mulps		-32(ARG1), %xmm6
+	mulps		-16(ARG1), %xmm7
+	
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm3
+	addps		%xmm5, %xmm9
+	addps		%xmm7, %xmm6
+	addps		%xmm3, %xmm8
+	addps		%xmm6, %xmm9
+	leaq		256(ARG0), ARG0
+	leaq		-128(ARG1), ARG1
+	
+	movups		(ARG0), %xmm10
+	movups		16(ARG0), %xmm1
+	movups		32(ARG0), %xmm2
+	movups		48(ARG0), %xmm3
+	movups		128(ARG0), %xmm11
+	movups		144(ARG0), %xmm5
+	movups		160(ARG0), %xmm6
+	movups		176(ARG0), %xmm7
+	mulps		(ARG1), %xmm10
+	mulps		16(ARG1), %xmm1
+	mulps		32(ARG1), %xmm2
+	mulps		48(ARG1), %xmm3
+	mulps		-64(ARG1), %xmm11
+	mulps		-48(ARG1), %xmm5
+	mulps		-32(ARG1), %xmm6
+	mulps		-16(ARG1), %xmm7
+	
+	addps		%xmm1, %xmm10
+	addps		%xmm2, %xmm3
+	addps		%xmm5, %xmm11
+	addps		%xmm7, %xmm6
+	addps		%xmm3, %xmm10
+	addps		%xmm6, %xmm11
+	leaq		256(ARG0), ARG0
+	leaq		-128(ARG1), ARG1
+	
+	movaps		%xmm8, %xmm0
+	movaps		%xmm10, %xmm1
+	unpcklps	%xmm9, %xmm8
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm9, %xmm0
+	unpckhps	%xmm11, %xmm1
+	movaps		%xmm8, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm10, %xmm8
+	movhlps		%xmm2, %xmm10
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	addps		%xmm10, %xmm8
+	addps		%xmm1, %xmm0
+	addps		%xmm8, %xmm0
+	
+	movups		(ARG2), %xmm3
+	movaps		%xmm0, %xmm1
+	movaps		%xmm0, %xmm2
+	cmpnleps	XMMREG_MAX, %xmm1
+	cmpltps		XMMREG_MIN, %xmm2
+	cvtps2dq	%xmm0, %xmm0
+	packssdw	%xmm0, %xmm0
+	movhlps		%xmm3, %xmm4
+	pshuflw		$0xdd, %xmm3, %xmm3
+	pshuflw		$0xdd, %xmm4, %xmm4
+	psrlq		$32, %xmm3
+	psllq		$32, %xmm4
+	por			%xmm4, %xmm3
+	punpcklwd	%xmm3, %xmm0
+	movups		%xmm0, (ARG2)
+	
+	psrld		$31, %xmm1
+	psrld		$31, %xmm2
+	paddd		%xmm2, %xmm1
+	paddd		%xmm1, XMMREG_CLIP
+	
+	leaq		16(ARG2), ARG2
+	decl		%ecx
+	jnz			1b
+	
+	pshuflw		$0xee, XMMREG_CLIP, %xmm0
+	movhlps		XMMREG_CLIP, %xmm1
+	pshuflw		$0xee, %xmm1, %xmm2
+	paddd		%xmm0, XMMREG_CLIP
+	paddd		%xmm1, XMMREG_CLIP
+	paddd		%xmm2, XMMREG_CLIP
+	
+	movd		XMMREG_CLIP, %eax
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm13
+	movaps		112(%rsp), %xmm14
+	movaps		128(%rsp), %xmm15
+	addq		$152, %rsp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_x86_64_float.S
===================================================================
--- include/reactos/libs/libmpg123/synth_x86_64_float.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_x86_64_float.S	(working copy)
@@ -0,0 +1,259 @@
+/*
+	synth_x86_64_float: SSE optimized synth for x86-64 (float output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* short *window; */
+#define ARG0 %r10
+/* short *b0; */
+#define ARG1 %rdx
+/* short *samples; */
+#define ARG2 %r8
+/* int bo1; */
+#define ARG3 %r9
+#else
+/* real *window; */
+#define ARG0 %rdi
+/* real *b0; */
+#define ARG1 %rsi
+/* real *samples; */
+#define ARG2 %rdx
+/* int bo1; */
+#define ARG3 %rcx
+#endif
+
+#define XMMREG_SCALE %xmm15  /* {1/32768.0, 1/32768.0, 1/32768.0, 1/32768.0} */
+
+/*
+	int synth_1to1_real_x86_64_asm(real *window, real *b0, real *samples, int bo1);
+	return value: number of clipped samples (0)
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(scale_x86_64):
+	.long   939524096
+	.long   939524096
+	.long   939524096
+	.long   939524096
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_real_x86_64_asm)
+ASM_NAME(synth_1to1_real_x86_64_asm):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	movq		%rcx, ARG0
+	subq		$120, %rsp /* stack alignment + 7 xmm registers */
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm15, 96(%rsp)
+#endif
+
+	leaq		ASM_NAME(scale_x86_64)(%rip), %rax
+	movaps		(%rax), XMMREG_SCALE
+	
+	andq		$0xf, ARG3
+	shlq		$2, ARG3
+	leaq		64(ARG0), ARG0
+	subq		ARG3, ARG0
+
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(ARG0), %xmm8
+	movups		16(ARG0), %xmm1
+	movups		32(ARG0), %xmm2
+	movups		48(ARG0), %xmm3
+	movups		128(ARG0), %xmm9
+	movups		144(ARG0), %xmm5
+	movups		160(ARG0), %xmm6
+	movups		176(ARG0), %xmm7
+	mulps		(ARG1), %xmm8
+	mulps		16(ARG1), %xmm1
+	mulps		32(ARG1), %xmm2
+	mulps		48(ARG1), %xmm3
+	mulps		64(ARG1), %xmm9
+	mulps		80(ARG1), %xmm5
+	mulps		96(ARG1), %xmm6
+	mulps		112(ARG1), %xmm7
+	
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm3
+	addps		%xmm5, %xmm9
+	addps		%xmm7, %xmm6
+	addps		%xmm3, %xmm8
+	addps		%xmm6, %xmm9
+	leaq		256(ARG0), ARG0
+	leaq		128(ARG1), ARG1
+	
+	movups		(ARG0), %xmm10
+	movups		16(ARG0), %xmm1
+	movups		32(ARG0), %xmm2
+	movups		48(ARG0), %xmm3
+	movups		128(ARG0), %xmm11
+	movups		144(ARG0), %xmm5
+	movups		160(ARG0), %xmm6
+	movups		176(ARG0), %xmm7
+	mulps		(ARG1), %xmm10
+	mulps		16(ARG1), %xmm1
+	mulps		32(ARG1), %xmm2
+	mulps		48(ARG1), %xmm3
+	mulps		64(ARG1), %xmm11
+	mulps		80(ARG1), %xmm5
+	mulps		96(ARG1), %xmm6
+	mulps		112(ARG1), %xmm7
+	
+	addps		%xmm1, %xmm10
+	addps		%xmm2, %xmm3
+	addps		%xmm5, %xmm11
+	addps		%xmm7, %xmm6
+	addps		%xmm3, %xmm10
+	addps		%xmm6, %xmm11
+	leaq		256(ARG0), ARG0
+	leaq		128(ARG1), ARG1
+	
+	movaps		%xmm8, %xmm0
+	movaps		%xmm10, %xmm1
+	unpcklps	%xmm9, %xmm8
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm9, %xmm0
+	unpckhps	%xmm11, %xmm1
+	movaps		%xmm8, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm10, %xmm8
+	movhlps		%xmm2, %xmm10
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	subps		%xmm10, %xmm8
+	subps		%xmm1, %xmm0
+	addps		%xmm8, %xmm0
+	
+	movups		(ARG2), %xmm1
+	movups		16(ARG2), %xmm2
+	mulps		XMMREG_SCALE, %xmm0
+	shufps		$0xdd, %xmm2, %xmm1
+	movaps		%xmm0, %xmm2
+	unpcklps	%xmm1, %xmm0
+	unpckhps	%xmm1, %xmm2
+	movups		%xmm0, (ARG2)
+	movups		%xmm2, 16(ARG2)
+	
+	leaq		32(ARG2), ARG2
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(ARG0), %xmm8
+	movups		16(ARG0), %xmm1
+	movups		32(ARG0), %xmm2
+	movups		48(ARG0), %xmm3
+	movups		128(ARG0), %xmm9
+	movups		144(ARG0), %xmm5
+	movups		160(ARG0), %xmm6
+	movups		176(ARG0), %xmm7
+	mulps		(ARG1), %xmm8
+	mulps		16(ARG1), %xmm1
+	mulps		32(ARG1), %xmm2
+	mulps		48(ARG1), %xmm3
+	mulps		-64(ARG1), %xmm9
+	mulps		-48(ARG1), %xmm5
+	mulps		-32(ARG1), %xmm6
+	mulps		-16(ARG1), %xmm7
+	
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm3
+	addps		%xmm5, %xmm9
+	addps		%xmm7, %xmm6
+	addps		%xmm3, %xmm8
+	addps		%xmm6, %xmm9
+	leaq		256(ARG0), ARG0
+	leaq		-128(ARG1), ARG1
+	
+	movups		(ARG0), %xmm10
+	movups		16(ARG0), %xmm1
+	movups		32(ARG0), %xmm2
+	movups		48(ARG0), %xmm3
+	movups		128(ARG0), %xmm11
+	movups		144(ARG0), %xmm5
+	movups		160(ARG0), %xmm6
+	movups		176(ARG0), %xmm7
+	mulps		(ARG1), %xmm10
+	mulps		16(ARG1), %xmm1
+	mulps		32(ARG1), %xmm2
+	mulps		48(ARG1), %xmm3
+	mulps		-64(ARG1), %xmm11
+	mulps		-48(ARG1), %xmm5
+	mulps		-32(ARG1), %xmm6
+	mulps		-16(ARG1), %xmm7
+	
+	addps		%xmm1, %xmm10
+	addps		%xmm2, %xmm3
+	addps		%xmm5, %xmm11
+	addps		%xmm7, %xmm6
+	addps		%xmm3, %xmm10
+	addps		%xmm6, %xmm11
+	leaq		256(ARG0), ARG0
+	leaq		-128(ARG1), ARG1
+	
+	movaps		%xmm8, %xmm0
+	movaps		%xmm10, %xmm1
+	unpcklps	%xmm9, %xmm8
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm9, %xmm0
+	unpckhps	%xmm11, %xmm1
+	movaps		%xmm8, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm10, %xmm8
+	movhlps		%xmm2, %xmm10
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	addps		%xmm10, %xmm8
+	addps		%xmm1, %xmm0
+	addps		%xmm8, %xmm0
+	
+	movups		(ARG2), %xmm1
+	movups		16(ARG2), %xmm2
+	mulps		XMMREG_SCALE, %xmm0
+	shufps		$0xdd, %xmm2, %xmm1
+	movaps		%xmm0, %xmm2
+	unpcklps	%xmm1, %xmm0
+	unpckhps	%xmm1, %xmm2
+	movups		%xmm0, (ARG2)
+	movups		%xmm2, 16(ARG2)
+	
+	leaq		32(ARG2), ARG2
+	decl		%ecx
+	jnz			1b
+	
+	xorl		%eax, %eax
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm15
+	addq		$120, %rsp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synth_x86_64_s32.S
===================================================================
--- include/reactos/libs/libmpg123/synth_x86_64_s32.S	(revision 0)
+++ include/reactos/libs/libmpg123/synth_x86_64_s32.S	(working copy)
@@ -0,0 +1,312 @@
+/*
+	synth_x86_64_s32: SSE optimized synth for x86-64 (s32 output version)
+
+	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Taihei Monma
+*/
+
+#include "mangle.h"
+
+#ifdef IS_MSABI
+/* short *window; */
+#define ARG0 %r10
+/* short *b0; */
+#define ARG1 %rdx
+/* short *samples; */
+#define ARG2 %r8
+/* int bo1; */
+#define ARG3 %r9
+#else
+/* real *window; */
+#define ARG0 %rdi
+/* real *b0; */
+#define ARG1 %rsi
+/* real *samples; */
+#define ARG2 %rdx
+/* int bo1; */
+#define ARG3 %rcx
+#endif
+
+#define XMMREG_SCALE %xmm15  /* {65536.0, 65536.0, 65536.0, 65536.0} */
+#define XMMREG_MAX %xmm14  /* {32767.999, 32767.999, 32767.999, 32767.999} */
+#define XMMREG_MIN %xmm13  /* {-32768.0, -32768.0, -32768.0, -32768.0} */
+#define XMMREG_CLIP %xmm12
+
+/*
+	int synth_1to1_s32_x86_64_asm(real *window, real *b0, int32_t *samples, int bo1);
+	return value: number of clipped samples
+*/
+
+#ifndef __APPLE__
+	.section	.rodata
+#else
+	.data
+#endif
+	ALIGN32
+ASM_NAME(scale_s32):
+	.long   1199570944
+	.long   1199570944
+	.long   1199570944
+	.long   1199570944
+	ALIGN16
+ASM_NAME(maxmin_s32):
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   1191182335
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.long   -956301312
+	.text
+	ALIGN16
+.globl ASM_NAME(synth_1to1_s32_x86_64_asm)
+ASM_NAME(synth_1to1_s32_x86_64_asm):
+#ifdef IS_MSABI /* should save xmm6-15 */
+	movq		%rcx, ARG0
+	subq		$168, %rsp /* stack alignment + 10 xmm registers */
+	movaps		%xmm6, (%rsp)
+	movaps		%xmm7, 16(%rsp)
+	movaps		%xmm8, 32(%rsp)
+	movaps		%xmm9, 48(%rsp)
+	movaps		%xmm10, 64(%rsp)
+	movaps		%xmm11, 80(%rsp)
+	movaps		%xmm12, 96(%rsp)
+	movaps		%xmm13, 112(%rsp)
+	movaps		%xmm14, 128(%rsp)
+	movaps		%xmm15, 144(%rsp)
+#endif
+	
+	leaq		ASM_NAME(scale_s32)(%rip), %rax
+	movaps		(%rax), XMMREG_SCALE
+	leaq		ASM_NAME(maxmin_s32)(%rip), %rax
+	movaps		(%rax), XMMREG_MAX
+	movaps		16(%rax), XMMREG_MIN
+	
+	xorps		XMMREG_CLIP, XMMREG_CLIP
+	
+	andq		$0xf, ARG3
+	shlq		$2, ARG3
+	leaq		64(ARG0), ARG0
+	subq		ARG3, ARG0
+
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(ARG0), %xmm8
+	movups		16(ARG0), %xmm1
+	movups		32(ARG0), %xmm2
+	movups		48(ARG0), %xmm3
+	movups		128(ARG0), %xmm9
+	movups		144(ARG0), %xmm5
+	movups		160(ARG0), %xmm6
+	movups		176(ARG0), %xmm7
+	mulps		(ARG1), %xmm8
+	mulps		16(ARG1), %xmm1
+	mulps		32(ARG1), %xmm2
+	mulps		48(ARG1), %xmm3
+	mulps		64(ARG1), %xmm9
+	mulps		80(ARG1), %xmm5
+	mulps		96(ARG1), %xmm6
+	mulps		112(ARG1), %xmm7
+	
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm3
+	addps		%xmm5, %xmm9
+	addps		%xmm7, %xmm6
+	addps		%xmm3, %xmm8
+	addps		%xmm6, %xmm9
+	leaq		256(ARG0), ARG0
+	leaq		128(ARG1), ARG1
+	
+	movups		(ARG0), %xmm10
+	movups		16(ARG0), %xmm1
+	movups		32(ARG0), %xmm2
+	movups		48(ARG0), %xmm3
+	movups		128(ARG0), %xmm11
+	movups		144(ARG0), %xmm5
+	movups		160(ARG0), %xmm6
+	movups		176(ARG0), %xmm7
+	mulps		(ARG1), %xmm10
+	mulps		16(ARG1), %xmm1
+	mulps		32(ARG1), %xmm2
+	mulps		48(ARG1), %xmm3
+	mulps		64(ARG1), %xmm11
+	mulps		80(ARG1), %xmm5
+	mulps		96(ARG1), %xmm6
+	mulps		112(ARG1), %xmm7
+	
+	addps		%xmm1, %xmm10
+	addps		%xmm2, %xmm3
+	addps		%xmm5, %xmm11
+	addps		%xmm7, %xmm6
+	addps		%xmm3, %xmm10
+	addps		%xmm6, %xmm11
+	leaq		256(ARG0), ARG0
+	leaq		128(ARG1), ARG1
+	
+	movaps		%xmm8, %xmm0
+	movaps		%xmm10, %xmm1
+	unpcklps	%xmm9, %xmm8
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm9, %xmm0
+	unpckhps	%xmm11, %xmm1
+	movaps		%xmm8, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm10, %xmm8
+	movhlps		%xmm2, %xmm10
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	subps		%xmm10, %xmm8
+	subps		%xmm1, %xmm0
+	addps		%xmm8, %xmm0
+	
+	movups		(ARG2), %xmm1
+	movups		16(ARG2), %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm0, %xmm4
+	mulps		XMMREG_SCALE, %xmm0
+	cmpnleps	XMMREG_MAX, %xmm3
+	cmpltps		XMMREG_MIN, %xmm4
+	cvtps2dq	%xmm0, %xmm0
+	xorps		%xmm3, %xmm0
+	shufps		$0xdd, %xmm2, %xmm1
+	movaps		%xmm0, %xmm2
+	unpcklps	%xmm1, %xmm0
+	unpckhps	%xmm1, %xmm2
+	movups		%xmm0, (ARG2)
+	movups		%xmm2, 16(ARG2)
+	
+	psrld		$31, %xmm3
+	psrld		$31, %xmm4
+	paddd		%xmm4, %xmm3
+	paddd		%xmm3, XMMREG_CLIP
+	
+	leaq		32(ARG2), ARG2
+	decl		%ecx
+	jnz			1b
+	
+	movl		$4, %ecx
+	
+	ALIGN16
+1:
+	movups		(ARG0), %xmm8
+	movups		16(ARG0), %xmm1
+	movups		32(ARG0), %xmm2
+	movups		48(ARG0), %xmm3
+	movups		128(ARG0), %xmm9
+	movups		144(ARG0), %xmm5
+	movups		160(ARG0), %xmm6
+	movups		176(ARG0), %xmm7
+	mulps		(ARG1), %xmm8
+	mulps		16(ARG1), %xmm1
+	mulps		32(ARG1), %xmm2
+	mulps		48(ARG1), %xmm3
+	mulps		-64(ARG1), %xmm9
+	mulps		-48(ARG1), %xmm5
+	mulps		-32(ARG1), %xmm6
+	mulps		-16(ARG1), %xmm7
+	
+	addps		%xmm1, %xmm8
+	addps		%xmm2, %xmm3
+	addps		%xmm5, %xmm9
+	addps		%xmm7, %xmm6
+	addps		%xmm3, %xmm8
+	addps		%xmm6, %xmm9
+	leaq		256(ARG0), ARG0
+	leaq		-128(ARG1), ARG1
+	
+	movups		(ARG0), %xmm10
+	movups		16(ARG0), %xmm1
+	movups		32(ARG0), %xmm2
+	movups		48(ARG0), %xmm3
+	movups		128(ARG0), %xmm11
+	movups		144(ARG0), %xmm5
+	movups		160(ARG0), %xmm6
+	movups		176(ARG0), %xmm7
+	mulps		(ARG1), %xmm10
+	mulps		16(ARG1), %xmm1
+	mulps		32(ARG1), %xmm2
+	mulps		48(ARG1), %xmm3
+	mulps		-64(ARG1), %xmm11
+	mulps		-48(ARG1), %xmm5
+	mulps		-32(ARG1), %xmm6
+	mulps		-16(ARG1), %xmm7
+	
+	addps		%xmm1, %xmm10
+	addps		%xmm2, %xmm3
+	addps		%xmm5, %xmm11
+	addps		%xmm7, %xmm6
+	addps		%xmm3, %xmm10
+	addps		%xmm6, %xmm11
+	leaq		256(ARG0), ARG0
+	leaq		-128(ARG1), ARG1
+	
+	movaps		%xmm8, %xmm0
+	movaps		%xmm10, %xmm1
+	unpcklps	%xmm9, %xmm8
+	unpcklps	%xmm11, %xmm10
+	unpckhps	%xmm9, %xmm0
+	unpckhps	%xmm11, %xmm1
+	movaps		%xmm8, %xmm2
+	movaps		%xmm0, %xmm3
+	movlhps		%xmm10, %xmm8
+	movhlps		%xmm2, %xmm10
+	movlhps		%xmm1, %xmm0
+	movhlps		%xmm3, %xmm1
+	addps		%xmm10, %xmm8
+	addps		%xmm1, %xmm0
+	addps		%xmm8, %xmm0
+	
+	movups		(ARG2), %xmm1
+	movups		16(ARG2), %xmm2
+	movaps		%xmm0, %xmm3
+	movaps		%xmm0, %xmm4
+	mulps		XMMREG_SCALE, %xmm0
+	cmpnleps	XMMREG_MAX, %xmm3
+	cmpltps		XMMREG_MIN, %xmm4
+	cvtps2dq	%xmm0, %xmm0
+	xorps		%xmm3, %xmm0
+	shufps		$0xdd, %xmm2, %xmm1
+	movaps		%xmm0, %xmm2
+	unpcklps	%xmm1, %xmm0
+	unpckhps	%xmm1, %xmm2
+	movups		%xmm0, (ARG2)
+	movups		%xmm2, 16(ARG2)
+	
+	psrld		$31, %xmm3
+	psrld		$31, %xmm4
+	paddd		%xmm4, %xmm3
+	paddd		%xmm3, XMMREG_CLIP
+	
+	leaq		32(ARG2), ARG2
+	decl		%ecx
+	jnz			1b
+	
+	pshuflw		$0xee, XMMREG_CLIP, %xmm0
+	movhlps		XMMREG_CLIP, %xmm1
+	pshuflw		$0xee, %xmm1, %xmm2
+	paddd		%xmm0, XMMREG_CLIP
+	paddd		%xmm1, XMMREG_CLIP
+	paddd		%xmm2, XMMREG_CLIP
+	
+	movd		XMMREG_CLIP, %eax
+	
+#ifdef IS_MSABI
+	movaps		(%rsp), %xmm6
+	movaps		16(%rsp), %xmm7
+	movaps		32(%rsp), %xmm8
+	movaps		48(%rsp), %xmm9
+	movaps		64(%rsp), %xmm10
+	movaps		80(%rsp), %xmm11
+	movaps		96(%rsp), %xmm12
+	movaps		112(%rsp), %xmm13
+	movaps		128(%rsp), %xmm14
+	movaps		144(%rsp), %xmm15
+	addq		$168, %rsp
+#endif
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/synths.h
===================================================================
--- include/reactos/libs/libmpg123/synths.h	(revision 63976)
+++ include/reactos/libs/libmpg123/synths.h	(working copy)
@@ -10,7 +10,7 @@
 typedef int (*func_synth)(real *,int, mpg123_handle *,int );
 typedef int (*func_synth_mono)(real *, mpg123_handle *);
 typedef int (*func_synth_stereo)(real *, real *, mpg123_handle *);
-enum synth_channel  { c_plain=0, c_stereo, c_mono2stereo, c_mono, c_limit };
+enum synth_channel  { c_plain=0, c_stereo, c_m2s, c_mono, c_limit };
 enum synth_resample
 {
 	 r_none=-1
Index: include/reactos/libs/libmpg123/tabinit.c
===================================================================
--- include/reactos/libs/libmpg123/tabinit.c	(revision 0)
+++ include/reactos/libs/libmpg123/tabinit.c	(working copy)
@@ -0,0 +1,361 @@
+/*
+	tabinit.c: initialize tables...
+
+	copyright ?-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Michael Hipp
+*/
+
+#include "mpg123lib_intern.h"
+#include "debug.h"
+
+/* That altivec alignment part here should not hurt generic code, I hope */
+#ifdef OPT_ALTIVEC
+static ALIGNED(16) real cos64[16];
+static ALIGNED(16) real cos32[8];
+static ALIGNED(16) real cos16[4];
+static ALIGNED(16) real cos8[2];
+static ALIGNED(16) real cos4[1];
+#elif defined(REAL_IS_FIXED) && defined(PRECALC_TABLES)
+static real cos64[16] = 
+{
+	8398725,8480395,8647771,8909416,9279544,9780026,10443886,11321405,
+	12491246,14081950,16316987,19619946,24900150,34523836,57170182,170959967
+};
+static real cos32[8] =
+{
+	8429197,8766072,9511743,10851869,13223040,17795219,28897867,85583072
+};
+static real cos16[4] =
+{
+	8552951,10088893,15099095,42998586
+};
+static real cos8[2] =
+{
+	9079764,21920489
+};
+static real cos4[1] =
+{
+	11863283
+};
+#else
+static real cos64[16],cos32[8],cos16[4],cos8[2],cos4[1];
+#endif
+
+real *pnts[] = { cos64,cos32,cos16,cos8,cos4 };
+
+
+static long intwinbase[] = {
+     0,    -1,    -1,    -1,    -1,    -1,    -1,    -2,    -2,    -2,
+    -2,    -3,    -3,    -4,    -4,    -5,    -5,    -6,    -7,    -7,
+    -8,    -9,   -10,   -11,   -13,   -14,   -16,   -17,   -19,   -21,
+   -24,   -26,   -29,   -31,   -35,   -38,   -41,   -45,   -49,   -53,
+   -58,   -63,   -68,   -73,   -79,   -85,   -91,   -97,  -104,  -111,
+  -117,  -125,  -132,  -139,  -147,  -154,  -161,  -169,  -176,  -183,
+  -190,  -196,  -202,  -208,  -213,  -218,  -222,  -225,  -227,  -228,
+  -228,  -227,  -224,  -221,  -215,  -208,  -200,  -189,  -177,  -163,
+  -146,  -127,  -106,   -83,   -57,   -29,     2,    36,    72,   111,
+   153,   197,   244,   294,   347,   401,   459,   519,   581,   645,
+   711,   779,   848,   919,   991,  1064,  1137,  1210,  1283,  1356,
+  1428,  1498,  1567,  1634,  1698,  1759,  1817,  1870,  1919,  1962,
+  2001,  2032,  2057,  2075,  2085,  2087,  2080,  2063,  2037,  2000,
+  1952,  1893,  1822,  1739,  1644,  1535,  1414,  1280,  1131,   970,
+   794,   605,   402,   185,   -45,  -288,  -545,  -814, -1095, -1388,
+ -1692, -2006, -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788,
+ -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597, -7910, -8209,
+ -8491, -8755, -8998, -9219, -9416, -9585, -9727, -9838, -9916, -9959,
+ -9966, -9935, -9863, -9750, -9592, -9389, -9139, -8840, -8492, -8092,
+ -7640, -7134, -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082,
+   -70,   998,  2122,  3300,  4533,  5818,  7154,  8540,  9975, 11455,
+ 12980, 14548, 16155, 17799, 19478, 21189, 22929, 24694, 26482, 28289,
+ 30112, 31947, 33791, 35640, 37489, 39336, 41176, 43006, 44821, 46617,
+ 48390, 50137, 51853, 53534, 55178, 56778, 58333, 59838, 61289, 62684,
+ 64019, 65290, 66494, 67629, 68692, 69679, 70590, 71420, 72169, 72835,
+ 73415, 73908, 74313, 74630, 74856, 74992, 75038 };
+
+void prepare_decode_tables()
+{
+#if !defined(REAL_IS_FIXED) || !defined(PRECALC_TABLES)
+  int i,k,kr,divv;
+  real *costab;
+
+  for(i=0;i<5;i++)
+  {
+    kr=0x10>>i; divv=0x40>>i;
+    costab = pnts[i];
+    for(k=0;k<kr;k++)
+      costab[k] = DOUBLE_TO_REAL(1.0 / (2.0 * cos(M_PI * ((double) k * 2.0 + 1.0) / (double) divv)));
+  }
+#endif
+}
+
+#ifdef OPT_MMXORSSE
+#if !defined(OPT_X86_64) && !defined(OPT_NEON) && !defined(OPT_NEON64) && !defined(OPT_AVX)
+void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins);
+void make_decode_tables_mmx(mpg123_handle *fr)
+{
+	debug("MMX decode tables");
+	/* Take care: The scale should be like before, when we didn't have float output all around. */
+	make_decode_tables_mmx_asm((long)((fr->lastscale < 0 ? fr->p.outscale : fr->lastscale)*SHORT_SCALE), fr->decwin_mmx, fr->decwins);
+	debug("MMX decode tables done");
+}
+#else
+
+/* This mimics round() as defined in C99. We stay C89. */
+static int rounded(double f)
+{
+	return (int)(f>0 ? floor(f+0.5) : ceil(f-0.5));
+}
+
+/* x86-64 doesn't use asm version */
+void make_decode_tables_mmx(mpg123_handle *fr)
+{
+	int i,j,val;
+	int idx = 0;
+	short *ptr = (short *)fr->decwins;
+	/* Scale is always based on 1.0 . */
+	double scaleval = -0.5*(fr->lastscale < 0 ? fr->p.outscale : fr->lastscale);
+	debug1("MMX decode tables with scaleval %g", scaleval);
+	for(i=0,j=0;i<256;i++,j++,idx+=32)
+	{
+		if(idx < 512+16)
+		fr->decwin_mmx[idx+16] = fr->decwin_mmx[idx] = DOUBLE_TO_REAL((double) intwinbase[j] * scaleval);
+		
+		if(i % 32 == 31)
+		idx -= 1023;
+		if(i % 64 == 63)
+		scaleval = - scaleval;
+	}
+	
+	for( /* i=256 */ ;i<512;i++,j--,idx+=32)
+	{
+		if(idx < 512+16)
+		fr->decwin_mmx[idx+16] = fr->decwin_mmx[idx] = DOUBLE_TO_REAL((double) intwinbase[j] * scaleval);
+		
+		if(i % 32 == 31)
+		idx -= 1023;
+		if(i % 64 == 63)
+		scaleval = - scaleval;
+	}
+	
+	for(i=0; i<512; i++) {
+		if(i&1) val = rounded(fr->decwin_mmx[i]*0.5);
+		else val = rounded(fr->decwin_mmx[i]*-0.5);
+		if(val > 32767) val = 32767;
+		else if(val < -32768) val = -32768;
+		ptr[i] = val;
+	}
+	for(i=512; i<512+32; i++) {
+		if(i&1) val = rounded(fr->decwin_mmx[i]*0.5);
+		else val = 0;
+		if(val > 32767) val = 32767;
+		else if(val < -32768) val = -32768;
+		ptr[i] = val;
+	}
+	for(i=0; i<512; i++) {
+		val = rounded(fr->decwin_mmx[511-i]*-0.5);
+		if(val > 32767) val = 32767;
+		else if(val < -32768) val = -32768;
+		ptr[512+32+i] = val;
+	}
+	debug("decode tables done");
+}
+#endif
+#endif
+
+void make_decode_tables(mpg123_handle *fr)
+{
+	int i,j;
+	int idx = 0;
+	double scaleval;
+#ifdef REAL_IS_FIXED
+	real scaleval_long;
+#endif
+	/* Scale is always based on 1.0 . */
+	scaleval = -0.5*(fr->lastscale < 0 ? fr->p.outscale : fr->lastscale);
+	debug1("decode tables with scaleval %g", scaleval);
+#ifdef REAL_IS_FIXED
+	scaleval_long = DOUBLE_TO_REAL_15(scaleval);
+#endif
+	for(i=0,j=0;i<256;i++,j++,idx+=32)
+	{
+		if(idx < 512+16)
+#ifdef REAL_IS_FIXED
+		fr->decwin[idx+16] = fr->decwin[idx] = REAL_SCALE_WINDOW(intwinbase[j] * scaleval_long);
+#else
+		fr->decwin[idx+16] = fr->decwin[idx] = DOUBLE_TO_REAL((double) intwinbase[j] * scaleval);
+#endif
+
+		if(i % 32 == 31)
+		idx -= 1023;
+		if(i % 64 == 63)
+#ifdef REAL_IS_FIXED
+		scaleval_long = - scaleval_long;
+#else
+		scaleval = - scaleval;
+#endif
+	}
+
+	for( /* i=256 */ ;i<512;i++,j--,idx+=32)
+	{
+		if(idx < 512+16)
+#ifdef REAL_IS_FIXED
+		fr->decwin[idx+16] = fr->decwin[idx] = REAL_SCALE_WINDOW(intwinbase[j] * scaleval_long);
+#else
+		fr->decwin[idx+16] = fr->decwin[idx] = DOUBLE_TO_REAL((double) intwinbase[j] * scaleval);
+#endif
+
+		if(i % 32 == 31)
+		idx -= 1023;
+		if(i % 64 == 63)
+#ifdef REAL_IS_FIXED
+		scaleval_long = - scaleval_long;
+#else
+		scaleval = - scaleval;
+#endif
+	}
+#if defined(OPT_X86_64) || defined(OPT_ALTIVEC) || defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) || defined(OPT_ARM) || defined(OPT_NEON) || defined(OPT_NEON64) || defined(OPT_AVX)
+	if(  fr->cpu_opts.type == x86_64
+	  || fr->cpu_opts.type == altivec
+	  || fr->cpu_opts.type == sse
+	  || fr->cpu_opts.type == sse_vintage
+	  || fr->cpu_opts.type == arm
+	  || fr->cpu_opts.type == neon
+	  || fr->cpu_opts.type == neon64
+	  || fr->cpu_opts.type == avx )
+	{ /* for float SSE / AltiVec / ARM decoder */
+		for(i=512; i<512+32; i++)
+		{
+			fr->decwin[i] = (i&1) ? fr->decwin[i] : 0;
+		}
+		for(i=0; i<512; i++)
+		{
+			fr->decwin[512+32+i] = -fr->decwin[511-i];
+		}
+#if defined(OPT_NEON) || defined(OPT_NEON64)
+		if(fr->cpu_opts.type == neon || fr->cpu_opts.type == neon64)
+		{
+			for(i=0; i<512; i+=2)
+			{
+				fr->decwin[i] = -fr->decwin[i];
+			}
+		}
+#endif
+	}
+#endif
+	debug("decode tables done");
+}
+
+#ifndef NO_8BIT
+int make_conv16to8_table(mpg123_handle *fr)
+{
+  int i;
+	int mode = fr->af.dec_enc;
+
+  /*
+   * ????: 8.0 is right but on SB cards '2.0' is a better value ???
+   */
+  const double mul = 8.0;
+
+  if(!fr->conv16to8_buf){
+    fr->conv16to8_buf = (unsigned char *) malloc(8192);
+    if(!fr->conv16to8_buf) {
+      fr->err = MPG123_ERR_16TO8TABLE;
+      if(NOQUIET) error("Can't allocate 16 to 8 converter table!");
+      return -1;
+    }
+    fr->conv16to8 = fr->conv16to8_buf + 4096;
+  }
+
+	switch(mode)
+	{
+	case MPG123_ENC_ULAW_8:
+	{
+		double m=127.0 / log(256.0);
+		int c1;
+
+		for(i=-4096;i<4096;i++)
+		{
+			/* dunno whether this is a valid transformation rule ?!?!? */
+			if(i < 0)
+			c1 = 127 - (int) (log( 1.0 - 255.0 * (double) i*mul / 32768.0 ) * m);
+			else
+			c1 = 255 - (int) (log( 1.0 + 255.0 * (double) i*mul / 32768.0 ) * m);
+			if(c1 < 0 || c1 > 255)
+			{
+				if(NOQUIET) error2("Converror %d %d",i,c1);
+				return -1;
+			}
+			if(c1 == 0)
+			c1 = 2;
+			fr->conv16to8[i] = (unsigned char) c1;
+		}
+	}
+	break;
+	case MPG123_ENC_SIGNED_8:
+		for(i=-4096;i<4096;i++)
+		fr->conv16to8[i] = i>>5;
+	break;
+	case MPG123_ENC_UNSIGNED_8:
+		for(i=-4096;i<4096;i++)
+		fr->conv16to8[i] = (i>>5)+128;
+	break;
+	case MPG123_ENC_ALAW_8:
+	{
+		/*
+			Let's believe Wikipedia (http://en.wikipedia.org/wiki/G.711) that this
+			is the correct table:
+
+			s0000000wxyza... 	n000wxyz  [0-31] -> [0-15]
+			s0000001wxyza... 	n001wxyz  [32-63] -> [16-31]
+			s000001wxyzab... 	n010wxyz  [64-127] -> [32-47]
+			s00001wxyzabc... 	n011wxyz  [128-255] -> [48-63]
+			s0001wxyzabcd... 	n100wxyz  [256-511] -> [64-79]
+			s001wxyzabcde... 	n101wxyz  [512-1023] -> [80-95]
+			s01wxyzabcdef... 	n110wxyz  [1024-2047] -> [96-111]
+			s1wxyzabcdefg... 	n111wxyz  [2048-4095] -> [112-127]
+
+			Let's extend to -4096, too.
+			Also, bytes are xored with 0x55 for transmission.
+
+			Since it sounds OK, I assume it is fine;-)
+		*/
+		for(i=0; i<64; ++i)
+		fr->conv16to8[i] = ((unsigned int)i)>>1;
+		for(i=64; i<128; ++i)
+		fr->conv16to8[i] = ((((unsigned int)i)>>2) & 0xf) | (2<<4);
+		for(i=128; i<256; ++i)
+		fr->conv16to8[i] = ((((unsigned int)i)>>3) & 0xf) | (3<<4);
+		for(i=256; i<512; ++i)
+		fr->conv16to8[i] = ((((unsigned int)i)>>4) & 0xf) | (4<<4);
+		for(i=512; i<1024; ++i)
+		fr->conv16to8[i] = ((((unsigned int)i)>>5) & 0xf) | (5<<4);
+		for(i=1024; i<2048; ++i)
+		fr->conv16to8[i] = ((((unsigned int)i)>>6) & 0xf) | (6<<4);
+		for(i=2048; i<4096; ++i)
+		fr->conv16to8[i] = ((((unsigned int)i)>>7) & 0xf) | (7<<4);
+
+		for(i=-4095; i<0; ++i)
+		fr->conv16to8[i] = fr->conv16to8[-i] | 0x80;
+
+		fr->conv16to8[-4096] = fr->conv16to8[-4095];
+
+		for(i=-4096;i<4096;i++)
+		{
+			/* fr->conv16to8[i] = - i>>5; */
+			/* fprintf(stderr, "table %i %i\n", i<<AUSHIFT, fr->conv16to8[i]); */
+			fr->conv16to8[i] ^= 0x55;
+		}
+	}
+	break;
+	default:
+		fr->err = MPG123_ERR_16TO8TABLE;
+		if(NOQUIET) error("Unknown 8 bit encoding choice.");
+		return -1;
+	break;
+	}
+
+	return 0;
+}
+#endif
+
Index: include/reactos/libs/libmpg123/tabinit_mmx.S
===================================================================
--- include/reactos/libs/libmpg123/tabinit_mmx.S	(revision 0)
+++ include/reactos/libs/libmpg123/tabinit_mmx.S	(working copy)
@@ -0,0 +1,210 @@
+/*
+	tabinit_mmx: make_decode_tables_mmx
+
+	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by the mysterious higway (apparently)
+*/
+
+#include "mangle.h"
+
+.data
+	ALIGN32
+.globl ASM_NAME(costab_mmxsse)
+ASM_NAME(costab_mmxsse):
+	.long 1056974725
+	.long 1057056395
+	.long 1057223771
+	.long 1057485416
+	.long 1057855544
+	.long 1058356026
+	.long 1059019886
+	.long 1059897405
+	.long 1061067246
+	.long 1062657950
+	.long 1064892987
+	.long 1066774581
+	.long 1069414683
+	.long 1073984175
+	.long 1079645762
+	.long 1092815430
+	.long 1057005197
+	.long 1057342072
+	.long 1058087743
+	.long 1059427869
+	.long 1061799040
+	.long 1065862217
+	.long 1071413542
+	.long 1084439708
+	.long 1057128951
+	.long 1058664893
+	.long 1063675095
+	.long 1076102863
+	.long 1057655764
+	.long 1067924853
+	.long 1060439283
+	ALIGN32
+intwinbase:
+	.short      0,    -1,    -1,    -1,    -1,    -1,    -1,    -2
+	.short     -2,    -2,    -2,    -3,    -3,    -4,    -4,    -5
+	.short     -5,    -6,    -7,    -7,    -8,    -9,   -10,   -11
+	.short    -13,   -14,   -16,   -17,   -19,   -21,   -24,   -26
+	.short    -29,   -31,   -35,   -38,   -41,   -45,   -49,   -53
+	.short    -58,   -63,   -68,   -73,   -79,   -85,   -91,   -97
+	.short   -104,  -111,  -117,  -125,  -132,  -139,  -147,  -154
+	.short   -161,  -169,  -176,  -183,  -190,  -196,  -202,  -208
+	.short   -213,  -218,  -222,  -225,  -227,  -228,  -228,  -227
+	.short   -224,  -221,  -215,  -208,  -200,  -189,  -177,  -163
+	.short   -146,  -127,  -106,   -83,   -57,   -29,     2,    36
+	.short     72,   111,   153,   197,   244,   294,   347,   401
+	.short    459,   519,   581,   645,   711,   779,   848,   919
+	.short    991,  1064,  1137,  1210,  1283,  1356,  1428,  1498
+	.short   1567,  1634,  1698,  1759,  1817,  1870,  1919,  1962
+	.short   2001,  2032,  2057,  2075,  2085,  2087,  2080,  2063
+	.short   2037,  2000,  1952,  1893,  1822,  1739,  1644,  1535
+	.short   1414,  1280,  1131,   970,   794,   605,   402,   185
+	.short    -45,  -288,  -545,  -814, -1095, -1388, -1692, -2006
+	.short  -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788
+	.short  -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597
+	.short  -7910, -8209, -8491, -8755, -8998, -9219, -9416, -9585
+	.short  -9727, -9838, -9916, -9959, -9966, -9935, -9863, -9750
+	.short  -9592, -9389, -9139, -8840, -8492, -8092, -7640, -7134
+	.short  -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082
+	.short    -70,   998,  2122,  3300,  4533,  5818,  7154,  8540
+	.short   9975, 11455, 12980, 14548, 16155, 17799, 19478, 21189
+	.short  22929, 24694, 26482, 28289, 30112, 31947,-26209,-24360
+	.short -22511,-20664,-18824,-16994,-15179,-13383,-11610, -9863
+	.short  -8147, -6466, -4822, -3222, -1667,  -162,  1289,  2684
+	.short   4019,  5290,  6494,  7629,  8692,  9679, 10590, 11420
+	.short  12169, 12835, 13415, 13908, 14313, 14630, 14856, 14992
+	.short  15038
+
+intwindiv:
+	.long 0x47800000			# 65536.0
+.text
+	ALIGN32
+/* void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); */
+.globl ASM_NAME(make_decode_tables_mmx_asm)
+ASM_NAME(make_decode_tables_mmx_asm):
+	pushl %edi
+	pushl %esi
+	pushl %ebx
+/* stack: 0=ebx, 4=esi, 8=edi, 12=back, 16=scaleval, 20=decwin_mmx, 24=decwins */
+
+	xorl %ecx,%ecx
+	xorl %ebx,%ebx
+	movl $32,%esi
+	movl $intwinbase,%edi
+	negl 16(%esp)	/* scaleval */
+	pushl $2	/* intwinbase step */
+/* stack: 20=scaleval 24=decwin_mmx, 28=decwins */
+.L00:
+	cmpl $528,%ecx
+	jnc .L02
+	movswl (%edi),%eax
+	cmpl $intwinbase+444,%edi
+	jc .L01
+	addl $60000,%eax
+.L01:
+	pushl %eax
+/* stack: 24=scaleval 28=decwin_mmx, 32=decwins */
+	fildl (%esp)
+	fdivs intwindiv
+	fimull 24(%esp) /* scaleval */
+/* eax used to be popped the line before... I'll just use it here a bit */
+	movl 28(%esp),%eax /* decwin_mmx */
+	fsts    (%eax,%ecx,4)
+	fstps 64(%eax,%ecx,4)
+	popl %eax
+/* stack: 20=scaleval 24=decwin_mmx, 28=decwins */
+.L02:
+	leal -1(%esi),%edx
+	andl %ebx,%edx
+	cmpl $31,%edx
+	jnz .L03
+	addl $-1023,%ecx
+	testl %esi,%ebx
+	jz  .L03
+	negl 20(%esp)
+.L03:
+	addl %esi,%ecx
+	addl (%esp),%edi
+	incl %ebx
+	cmpl $intwinbase,%edi
+	jz .L04
+	cmpl $256,%ebx
+	jnz .L00
+	negl (%esp)
+	jmp .L00
+.L04:
+	popl %eax
+
+	xorl %ecx,%ecx
+	xorl %ebx,%ebx
+	pushl $2 /* paired with popl above */
+.L05:
+	cmpl $528,%ecx
+	jnc .L11
+	movswl (%edi),%eax
+	cmpl $intwinbase+444,%edi
+	jc .L06
+	addl $60000,%eax
+.L06:
+	cltd
+	imull 20(%esp)
+	shrdl $17,%edx,%eax
+	cmpl $32767,%eax
+	movl $1055,%edx
+	jle .L07
+	movl $32767,%eax
+	jmp .L08
+.L07:
+	cmpl $-32767,%eax
+	jge .L08
+	movl $-32767,%eax
+.L08:
+/* going to use ebx for decwins, watch the jumps */
+	pushl %ebx 
+/* stack: 24=scaleval 28=decwin_mmx, 32=decwins */
+	movl 32(%esp),%ebx
+	cmpl $512,%ecx
+	jnc .L09
+	subl %ecx,%edx
+	movw %ax,(%ebx,%edx,2) /* decwins */
+	movw %ax,-32(%ebx,%edx,2)
+.L09:
+	testl $1,%ecx
+	jnz .L10
+	negl %eax
+.L10:
+	movw %ax,(%ebx,%ecx,2)
+	movw %ax,32(%ebx,%ecx,2)
+	popl %ebx /* that has to match the pushl before */
+.L11:
+	leal -1(%esi),%edx
+	andl %ebx,%edx
+	cmpl $31,%edx
+	jnz .L12
+	addl $-1023,%ecx
+	testl %esi,%ebx
+	jz  .L12
+	negl 20(%esp)
+.L12:
+	addl %esi,%ecx
+	addl (%esp),%edi
+	incl %ebx
+	cmpl $intwinbase,%edi
+	jz .L13
+	cmpl $256,%ebx
+	jnz .L05
+	negl (%esp)
+	jmp .L05
+.L13:
+	popl %eax
+	
+	popl %ebx
+	popl %esi
+	popl %edi
+	ret
+
+NONEXEC_STACK
Index: include/reactos/libs/libmpg123/testcpu.c
===================================================================
--- include/reactos/libs/libmpg123/testcpu.c	(revision 0)
+++ include/reactos/libs/libmpg123/testcpu.c	(working copy)
@@ -0,0 +1,35 @@
+/*
+	testcpu: standalone CPU flags tester
+
+	copyright 2007 by the mpg123 project - free software under the terms of the LGPL 2.1
+	see COPYING and AUTHORS files in distribution or http://mpg123.org
+	initially written by Thomas Orgis
+*/
+
+#include <stdio.h>
+#include "getcpuflags.h"
+
+int main()
+{
+	int family;
+	struct cpuflags flags;
+	if(!getcpuflags(&flags)){ printf("CPU won't do cpuid (some old i386 or i486)\n"); return 0; }
+	family = (flags.id & 0xf00)>>8;
+	printf("family: %i\n", family);
+	printf("stdcpuflags:  0x%08x\n", flags.std);
+	printf("std2cpuflags: 0x%08x\n", flags.std2);
+	printf("extcpuflags:  0x%08x\n", flags.ext);
+	if(cpu_i586(flags))
+	{
+		printf("A i586 or better cpu with:");
+		if(cpu_mmx(flags)) printf(" mmx");
+		if(cpu_3dnow(flags)) printf(" 3dnow");
+		if(cpu_3dnowext(flags)) printf(" 3dnowext");
+		if(cpu_sse(flags)) printf(" sse");
+		if(cpu_sse2(flags)) printf(" sse2");
+		if(cpu_sse3(flags)) printf(" sse3");
+		printf("\n");
+	}
+	else printf("I guess you have some i486\n");
+	return 0;
+}