Index: dll/3rdparty/libjpeg/change.log
===================================================================
--- dll/3rdparty/libjpeg/change.log	(revision 62563)
+++ dll/3rdparty/libjpeg/change.log	(working copy)
@@ -1,6 +1,33 @@
 CHANGE LOG for Independent JPEG Group's JPEG software
 
 
+Version 9a  19-Jan-2014
+-----------------------
+
+Add support for wide gamut color spaces (JFIF version 2).
+Improve clarity and accuracy in color conversion modules.
+Note: Requires rebuild of test images.
+
+Extend the bit depth support to all values from 8 to 12
+(BITS_IN_JSAMPLE configuration option in jmorecfg.h).
+jpegtran now supports N bits sample data precision with all N from 8 to 12
+in a single instance.  Thank to Roland Fassauer for inspiration.
+
+Try to resolve issues with new boolean type definition.
+Thank also to v4hn for suggestion.
+
+Enable option to use default Huffman tables for lossless compression
+(for hardware solution), and in this case improve lossless RGB compression
+with reversible color transform.  Thank to Benny Alexandar for hint.
+
+Extend the entropy decoding structure, so that extraneous bytes between
+compressed scan data and following marker can be reported correctly.
+Thank to Nigel Tao for hint.
+
+Add jpegtran -wipe option and extension for -crop.
+Thank to Andrew Senior, David Clunie, and Josef Schmid for suggestion.
+
+
 Version 9  13-Jan-2013
 ----------------------
 
Index: dll/3rdparty/libjpeg/cjpeg.c
===================================================================
--- dll/3rdparty/libjpeg/cjpeg.c	(revision 62563)
+++ dll/3rdparty/libjpeg/cjpeg.c	(working copy)
@@ -174,6 +174,7 @@
 #endif
 #if JPEG_LIB_VERSION_MAJOR >= 9
   fprintf(stderr, "  -rgb1          Create RGB JPEG file with reversible color transform\n");
+  fprintf(stderr, "  -bgycc         Create big gamut YCC JPEG file\n");
 #endif
 #ifdef DCT_ISLOW_SUPPORTED
   fprintf(stderr, "  -dct int       Use integer DCT method%s\n",
@@ -323,6 +324,17 @@
 #endif
       jpeg_set_colorspace(cinfo, JCS_RGB);
 
+    } else if (keymatch(arg, "bgycc", 5)) {
+      /* Force a big gamut YCC JPEG file to be generated. */
+#if JPEG_LIB_VERSION_MAJOR >= 9 && \
+      (JPEG_LIB_VERSION_MAJOR > 9 || JPEG_LIB_VERSION_MINOR >= 1)
+      jpeg_set_colorspace(cinfo, JCS_BG_YCC);
+#else
+      fprintf(stderr, "%s: sorry, BG_YCC colorspace not supported\n",
+	      progname);
+      exit(EXIT_FAILURE);
+#endif
+
     } else if (keymatch(arg, "maxmemory", 3)) {
       /* Maximum memory in Kb (or Mb with 'm'). */
       long lval;
Index: dll/3rdparty/libjpeg/jcapistd.c
===================================================================
--- dll/3rdparty/libjpeg/jcapistd.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jcapistd.c	(working copy)
@@ -2,6 +2,7 @@
  * jcapistd.c
  *
  * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -145,7 +146,7 @@
     (*cinfo->master->pass_startup) (cinfo);
 
   /* Verify that at least one iMCU row has been passed. */
-  lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size;
   if (num_lines < lines_per_iMCU_row)
     ERREXIT(cinfo, JERR_BUFFER_SIZE);
 
Index: dll/3rdparty/libjpeg/jcarith.c
===================================================================
--- dll/3rdparty/libjpeg/jcarith.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jcarith.c	(working copy)
@@ -1,7 +1,7 @@
 /*
  * jcarith.c
  *
- * Developed 1997-2012 by Guido Vollbeding.
+ * Developed 1997-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -362,7 +362,6 @@
 encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  JBLOCKROW block;
   unsigned char *st;
   int blkn, ci, tbl;
   int v, v2, m;
@@ -381,7 +380,6 @@
 
   /* Encode the MCU data blocks */
   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
     ci = cinfo->MCU_membership[blkn];
     tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
 
@@ -388,7 +386,7 @@
     /* Compute the DC value after the required point transform by Al.
      * This is simply an arithmetic right shift.
      */
-    m = IRIGHT_SHIFT((int) ((*block)[0]), cinfo->Al);
+    m = IRIGHT_SHIFT((int) (MCU_data[blkn][0][0]), cinfo->Al);
 
     /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
 
@@ -453,11 +451,11 @@
 encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
   JBLOCKROW block;
   unsigned char *st;
   int tbl, k, ke;
   int v, v2, m;
-  const int * natural_order;
 
   /* Emit restart marker if needed */
   if (cinfo->restart_interval) {
@@ -552,6 +550,8 @@
 
 /*
  * MCU encoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
  */
 
 METHODDEF(boolean)
@@ -593,11 +593,11 @@
 encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
   JBLOCKROW block;
   unsigned char *st;
   int tbl, k, ke, kex;
   int v;
-  const int * natural_order;
 
   /* Emit restart marker if needed */
   if (cinfo->restart_interval) {
@@ -692,12 +692,13 @@
 encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy;
-  jpeg_component_info * compptr;
+  const int * natural_order;
   JBLOCKROW block;
   unsigned char *st;
-  int blkn, ci, tbl, k, ke;
+  int tbl, k, ke;
   int v, v2, m;
-  const int * natural_order;
+  int blkn, ci;
+  jpeg_component_info * compptr;
 
   /* Emit restart marker if needed */
   if (cinfo->restart_interval) {
Index: dll/3rdparty/libjpeg/jccolor.c
===================================================================
--- dll/3rdparty/libjpeg/jccolor.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jccolor.c	(working copy)
@@ -2,7 +2,7 @@
  * jccolor.c
  *
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modified 2011-2012 by Guido Vollbeding.
+ * Modified 2011-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -29,13 +29,25 @@
 /**************** RGB -> YCbCr conversion: most common case **************/
 
 /*
- * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
- * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
- * The conversion equations to be implemented are therefore
- *	Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
- *	Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + CENTERJSAMPLE
- *	Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + CENTERJSAMPLE
- * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ * YCbCr is defined per Recommendation ITU-R BT.601-7 (03/2011),
+ * previously known as Recommendation CCIR 601-1, except that Cb and Cr
+ * are normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * sRGB (standard RGB color space) is defined per IEC 61966-2-1:1999.
+ * sYCC (standard luma-chroma-chroma color space with extended gamut)
+ * is defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex F.
+ * bg-sRGB and bg-sYCC (big gamut standard color spaces)
+ * are defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex G.
+ * Note that the derived conversion coefficients given in some of these
+ * documents are imprecise.  The general conversion equations are
+ *	Y  = Kr * R + (1 - Kr - Kb) * G + Kb * B
+ *	Cb = 0.5 * (B - Y) / (1 - Kb)
+ *	Cr = 0.5 * (R - Y) / (1 - Kr)
+ * With Kr = 0.299 and Kb = 0.114 (derived according to SMPTE RP 177-1993
+ * from the 1953 FCC NTSC primaries and CIE Illuminant C),
+ * the conversion equations to be implemented are therefore
+ *	Y  =  0.299 * R + 0.587 * G + 0.114 * B
+ *	Cb = -0.168735892 * R - 0.331264108 * G + 0.5 * B + CENTERJSAMPLE
+ *	Cr =  0.5 * R - 0.418687589 * G - 0.081312411 * B + CENTERJSAMPLE
  * Note: older versions of the IJG code used a zero offset of MAXJSAMPLE/2,
  * rather than CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
  * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
@@ -49,9 +61,9 @@
  * For even more speed, we avoid doing any multiplications in the inner loop
  * by precalculating the constants times R,G,B for all possible values.
  * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
- * for 12-bit samples it is still acceptable.  It's not very reasonable for
- * 16-bit samples, but if you want lossless storage you shouldn't be changing
- * colorspace anyway.
+ * for 9-bit to 12-bit samples it is still acceptable.  It's not very
+ * reasonable for 16-bit samples, but if you want lossless storage you
+ * shouldn't be changing colorspace anyway.
  * The CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
  * in the tables to save adding them separately in the inner loop.
  */
@@ -96,21 +108,21 @@
 				(TABLE_SIZE * SIZEOF(INT32)));
 
   for (i = 0; i <= MAXJSAMPLE; i++) {
-    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.29900) * i;
-    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.58700) * i;
-    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.11400) * i     + ONE_HALF;
-    rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.16874)) * i;
-    rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.33126)) * i;
+    rgb_ycc_tab[i+R_Y_OFF] = FIX(0.299) * i;
+    rgb_ycc_tab[i+G_Y_OFF] = FIX(0.587) * i;
+    rgb_ycc_tab[i+B_Y_OFF] = FIX(0.114) * i   + ONE_HALF;
+    rgb_ycc_tab[i+R_CB_OFF] = (-FIX(0.168735892)) * i;
+    rgb_ycc_tab[i+G_CB_OFF] = (-FIX(0.331264108)) * i;
     /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
      * This ensures that the maximum output will round to MAXJSAMPLE
      * not MAXJSAMPLE+1, and thus that we don't have to range-limit.
      */
-    rgb_ycc_tab[i+B_CB_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+    rgb_ycc_tab[i+B_CB_OFF] = FIX(0.5) * i    + CBCR_OFFSET + ONE_HALF-1;
 /*  B=>Cb and R=>Cr tables are the same
-    rgb_ycc_tab[i+R_CR_OFF] = FIX(0.50000) * i    + CBCR_OFFSET + ONE_HALF-1;
+    rgb_ycc_tab[i+R_CR_OFF] = FIX(0.5) * i    + CBCR_OFFSET + ONE_HALF-1;
 */
-    rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.41869)) * i;
-    rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.08131)) * i;
+    rgb_ycc_tab[i+G_CR_OFF] = (-FIX(0.418687589)) * i;
+    rgb_ycc_tab[i+B_CR_OFF] = (-FIX(0.081312411)) * i;
   }
 }
 
@@ -274,6 +286,9 @@
  * Convert some rows of samples to the JPEG colorspace.
  * [R,G,B] to [R-G,G,B-G] conversion with modulo calculation
  * (forward reversible color transform).
+ * This can be seen as an adaption of the general RGB->YCbCr
+ * conversion equation with Kr = Kb = 0, while replacing the
+ * normalization by modulo calculation.
  */
 
 METHODDEF(void)
@@ -312,7 +327,7 @@
 /*
  * Convert some rows of samples to the JPEG colorspace.
  * This version handles grayscale output with no conversion.
- * The source can be either plain grayscale or YCbCr (since Y == gray).
+ * The source can be either plain grayscale or YCC (since Y == gray).
  */
 
 METHODDEF(void)
@@ -439,11 +454,13 @@
     break;
 
   case JCS_RGB:
+  case JCS_BG_RGB:
     if (cinfo->input_components != RGB_PIXELSIZE)
       ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
     break;
 
   case JCS_YCbCr:
+  case JCS_BG_YCC:
     if (cinfo->input_components != 3)
       ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
     break;
@@ -460,8 +477,10 @@
     break;
   }
 
-  /* Support color transform only for RGB colorspace */
-  if (cinfo->color_transform && cinfo->jpeg_color_space != JCS_RGB)
+  /* Support color transform only for RGB colorspaces */
+  if (cinfo->color_transform &&
+      cinfo->jpeg_color_space != JCS_RGB &&
+      cinfo->jpeg_color_space != JCS_BG_RGB)
     ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
 
   /* Check num_components, set conversion method based on requested space */
@@ -469,20 +488,26 @@
   case JCS_GRAYSCALE:
     if (cinfo->num_components != 1)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_GRAYSCALE ||
-	cinfo->in_color_space == JCS_YCbCr)
+    switch (cinfo->in_color_space) {
+    case JCS_GRAYSCALE:
+    case JCS_YCbCr:
+    case JCS_BG_YCC:
       cconvert->pub.color_convert = grayscale_convert;
-    else if (cinfo->in_color_space == JCS_RGB) {
+      break;
+    case JCS_RGB:
       cconvert->pub.start_pass = rgb_ycc_start;
       cconvert->pub.color_convert = rgb_gray_convert;
-    } else
+      break;
+    default:
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
     break;
 
   case JCS_RGB:
+  case JCS_BG_RGB:
     if (cinfo->num_components != 3)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_RGB) {
+    if (cinfo->in_color_space == cinfo->jpeg_color_space) {
       switch (cinfo->color_transform) {
       case JCT_NONE:
 	cconvert->pub.color_convert = rgb_convert;
@@ -492,7 +517,6 @@
 	break;
       default:
 	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-	break;
       }
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
@@ -501,15 +525,50 @@
   case JCS_YCbCr:
     if (cinfo->num_components != 3)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_RGB) {
+    switch (cinfo->in_color_space) {
+    case JCS_RGB:
       cconvert->pub.start_pass = rgb_ycc_start;
       cconvert->pub.color_convert = rgb_ycc_convert;
-    } else if (cinfo->in_color_space == JCS_YCbCr)
+      break;
+    case JCS_YCbCr:
       cconvert->pub.color_convert = null_convert;
-    else
+      break;
+    default:
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
     break;
 
+  case JCS_BG_YCC:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    switch (cinfo->in_color_space) {
+    case JCS_RGB:
+      /* For conversion from normal RGB input to BG_YCC representation,
+       * the Cb/Cr values are first computed as usual, and then
+       * quantized further after DCT processing by a factor of
+       * 2 in reference to the nominal quantization factor.
+       */
+      /* need quantization scale by factor of 2 after DCT */
+      cinfo->comp_info[1].component_needed = TRUE;
+      cinfo->comp_info[2].component_needed = TRUE;
+      /* compute normal YCC first */
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub.color_convert = rgb_ycc_convert;
+      break;
+    case JCS_YCbCr:
+      /* need quantization scale by factor of 2 after DCT */
+      cinfo->comp_info[1].component_needed = TRUE;
+      cinfo->comp_info[2].component_needed = TRUE;
+      /*FALLTHROUGH*/
+    case JCS_BG_YCC:
+      /* Pass through for BG_YCC input */
+      cconvert->pub.color_convert = null_convert;
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
   case JCS_CMYK:
     if (cinfo->num_components != 4)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
@@ -522,13 +581,17 @@
   case JCS_YCCK:
     if (cinfo->num_components != 4)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
-    if (cinfo->in_color_space == JCS_CMYK) {
+    switch (cinfo->in_color_space) {
+    case JCS_CMYK:
       cconvert->pub.start_pass = rgb_ycc_start;
       cconvert->pub.color_convert = cmyk_ycck_convert;
-    } else if (cinfo->in_color_space == JCS_YCCK)
+      break;
+    case JCS_YCCK:
       cconvert->pub.color_convert = null_convert;
-    else
+      break;
+    default:
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
     break;
 
   default:			/* allow null conversion of JCS_UNKNOWN */
Index: dll/3rdparty/libjpeg/jcdctmgr.c
===================================================================
--- dll/3rdparty/libjpeg/jcdctmgr.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jcdctmgr.c	(working copy)
@@ -2,6 +2,7 @@
  * jcdctmgr.c
  *
  * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -25,16 +26,9 @@
   /* Pointer to the DCT routine actually in use */
   forward_DCT_method_ptr do_dct[MAX_COMPONENTS];
 
-  /* The actual post-DCT divisors --- not identical to the quant table
-   * entries, because of scaling (especially for an unnormalized DCT).
-   * Each table is given in normal array order.
-   */
-  DCTELEM * divisors[NUM_QUANT_TBLS];
-
 #ifdef DCT_FLOAT_SUPPORTED
   /* Same as above for the floating-point case. */
   float_DCT_method_ptr do_float_dct[MAX_COMPONENTS];
-  FAST_FLOAT * float_divisors[NUM_QUANT_TBLS];
 #endif
 } my_fdct_controller;
 
@@ -41,6 +35,21 @@
 typedef my_fdct_controller * my_fdct_ptr;
 
 
+/* The allocated post-DCT divisor tables -- big enough for any
+ * supported variant and not identical to the quant table entries,
+ * because of scaling (especially for an unnormalized DCT) --
+ * are pointed to by dct_table in the per-component comp_info
+ * structures.  Each table is given in normal array order.
+ */
+
+typedef union {
+  DCTELEM int_array[DCTSIZE2];
+#ifdef DCT_FLOAT_SUPPORTED
+  FAST_FLOAT float_array[DCTSIZE2];
+#endif
+} divisor_table;
+
+
 /* The current scaled-DCT routines require ISLOW-style divisor tables,
  * so be sure to compile that code if either ISLOW or SCALING is requested.
  */
@@ -71,7 +80,7 @@
   /* This routine is heavily used, so it's worth coding it tightly. */
   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
   forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index];
-  DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no];
+  DCTELEM * divisors = (DCTELEM *) compptr->dct_table;
   DCTELEM workspace[DCTSIZE2];	/* work area for FDCT subroutine */
   JDIMENSION bi;
 
@@ -134,7 +143,7 @@
   /* This routine is heavily used, so it's worth coding it tightly. */
   my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct;
   float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index];
-  FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no];
+  FAST_FLOAT * divisors = (FAST_FLOAT *) compptr->dct_table;
   FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */
   JDIMENSION bi;
 
@@ -352,8 +361,7 @@
 	cinfo->quant_tbl_ptrs[qtblno] == NULL)
       ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
     qtbl = cinfo->quant_tbl_ptrs[qtblno];
-    /* Compute divisors for this quant table */
-    /* We may do this more than once for same table, but it's not a big deal */
+    /* Create divisor table from quant table */
     switch (method) {
 #ifdef PROVIDE_ISLOW_TABLES
     case JDCT_ISLOW:
@@ -360,14 +368,10 @@
       /* For LL&M IDCT method, divisors are equal to raw quantization
        * coefficients multiplied by 8 (to counteract scaling).
        */
-      if (fdct->divisors[qtblno] == NULL) {
-	fdct->divisors[qtblno] = (DCTELEM *)
-	  (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-				      DCTSIZE2 * SIZEOF(DCTELEM));
-      }
-      dtbl = fdct->divisors[qtblno];
+      dtbl = (DCTELEM *) compptr->dct_table;
       for (i = 0; i < DCTSIZE2; i++) {
-	dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3;
+	dtbl[i] =
+	  ((DCTELEM) qtbl->quantval[i]) << (compptr->component_needed ? 4 : 3);
       }
       fdct->pub.forward_DCT[ci] = forward_DCT;
       break;
@@ -395,17 +399,12 @@
 	};
 	SHIFT_TEMPS
 
-	if (fdct->divisors[qtblno] == NULL) {
-	  fdct->divisors[qtblno] = (DCTELEM *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					DCTSIZE2 * SIZEOF(DCTELEM));
-	}
-	dtbl = fdct->divisors[qtblno];
+	dtbl = (DCTELEM *) compptr->dct_table;
 	for (i = 0; i < DCTSIZE2; i++) {
 	  dtbl[i] = (DCTELEM)
 	    DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i],
 				  (INT32) aanscales[i]),
-		    CONST_BITS-3);
+		    compptr->component_needed ? CONST_BITS-4 : CONST_BITS-3);
 	}
       }
       fdct->pub.forward_DCT[ci] = forward_DCT;
@@ -422,7 +421,7 @@
 	 * What's actually stored is 1/divisor so that the inner loop can
 	 * use a multiplication rather than a division.
 	 */
-	FAST_FLOAT * fdtbl;
+	FAST_FLOAT * fdtbl = (FAST_FLOAT *) compptr->dct_table;
 	int row, col;
 	static const double aanscalefactor[DCTSIZE] = {
 	  1.0, 1.387039845, 1.306562965, 1.175875602,
@@ -429,18 +428,13 @@
 	  1.0, 0.785694958, 0.541196100, 0.275899379
 	};
 
-	if (fdct->float_divisors[qtblno] == NULL) {
-	  fdct->float_divisors[qtblno] = (FAST_FLOAT *)
-	    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
-					DCTSIZE2 * SIZEOF(FAST_FLOAT));
-	}
-	fdtbl = fdct->float_divisors[qtblno];
 	i = 0;
 	for (row = 0; row < DCTSIZE; row++) {
 	  for (col = 0; col < DCTSIZE; col++) {
 	    fdtbl[i] = (FAST_FLOAT)
-	      (1.0 / (((double) qtbl->quantval[i] *
-		       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
+	      (1.0 / ((double) qtbl->quantval[i] *
+		      aanscalefactor[row] * aanscalefactor[col] *
+		      (compptr->component_needed ? 16.0 : 8.0)));
 	    i++;
 	  }
 	}
@@ -464,19 +458,20 @@
 jinit_forward_dct (j_compress_ptr cinfo)
 {
   my_fdct_ptr fdct;
-  int i;
+  int ci;
+  jpeg_component_info *compptr;
 
   fdct = (my_fdct_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				SIZEOF(my_fdct_controller));
-  cinfo->fdct = (struct jpeg_forward_dct *) fdct;
+  cinfo->fdct = &fdct->pub;
   fdct->pub.start_pass = start_pass_fdctmgr;
 
-  /* Mark divisor tables unallocated */
-  for (i = 0; i < NUM_QUANT_TBLS; i++) {
-    fdct->divisors[i] = NULL;
-#ifdef DCT_FLOAT_SUPPORTED
-    fdct->float_divisors[i] = NULL;
-#endif
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate a divisor table for each component */
+    compptr->dct_table =
+      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				  SIZEOF(divisor_table));
   }
 }
Index: dll/3rdparty/libjpeg/jchuff.c
===================================================================
--- dll/3rdparty/libjpeg/jchuff.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jchuff.c	(working copy)
@@ -2,7 +2,7 @@
  * jchuff.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2006-2009 by Guido Vollbeding.
+ * Modified 2006-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -308,24 +308,27 @@
 /* Emit some bits; return TRUE if successful, FALSE if must suspend */
 {
   /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer = (INT32) code;
-  register int put_bits = state->cur.put_bits;
+  register INT32 put_buffer;
+  register int put_bits;
 
   /* if size is 0, caller used an invalid Huffman table entry */
   if (size == 0)
     ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
 
-  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
-  
-  put_bits += size;		/* new number of bits in buffer */
-  
+  /* mask off any extra bits in code */
+  put_buffer = ((INT32) code) & ((((INT32) 1) << size) - 1);
+
+  /* new number of bits in buffer */
+  put_bits = size + state->cur.put_bits;
+
   put_buffer <<= 24 - put_bits; /* align incoming bits */
 
-  put_buffer |= state->cur.put_buffer; /* and merge with old buffer contents */
-  
+  /* and merge with old buffer contents */
+  put_buffer |= state->cur.put_buffer;
+
   while (put_bits >= 8) {
     int c = (int) ((put_buffer >> 16) & 0xFF);
-    
+
     emit_byte_s(state, c, return FALSE);
     if (c == 0xFF) {		/* need to stuff a zero byte? */
       emit_byte_s(state, 0, return FALSE);
@@ -347,8 +350,8 @@
 /* Emit some bits, unless we are in gather mode */
 {
   /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer = (INT32) code;
-  register int put_bits = entropy->saved.put_bits;
+  register INT32 put_buffer;
+  register int put_bits;
 
   /* if size is 0, caller used an invalid Huffman table entry */
   if (size == 0)
@@ -357,10 +360,12 @@
   if (entropy->gather_statistics)
     return;			/* do nothing if we're only getting stats */
 
-  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
-  
-  put_bits += size;		/* new number of bits in buffer */
+  /* mask off any extra bits in code */
+  put_buffer = ((INT32) code) & ((((INT32) 1) << size) - 1);
 
+  /* new number of bits in buffer */
+  put_bits = size + entropy->saved.put_bits;
+
   put_buffer <<= 24 - put_bits; /* align incoming bits */
 
   /* and merge with old buffer contents */
@@ -543,10 +548,7 @@
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
   register int temp, temp2;
   register int nbits;
-  int blkn, ci;
-  int Al = cinfo->Al;
-  JBLOCKROW block;
-  jpeg_component_info * compptr;
+  int blkn, ci, tbl;
   ISHIFT_TEMPS
 
   entropy->next_output_byte = cinfo->dest->next_output_byte;
@@ -559,21 +561,20 @@
 
   /* Encode the MCU data blocks */
   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
     ci = cinfo->MCU_membership[blkn];
-    compptr = cinfo->cur_comp_info[ci];
+    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
 
     /* Compute the DC value after the required point transform by Al.
      * This is simply an arithmetic right shift.
      */
-    temp2 = IRIGHT_SHIFT((int) ((*block)[0]), Al);
+    temp = IRIGHT_SHIFT((int) (MCU_data[blkn][0][0]), cinfo->Al);
 
     /* DC differences are figured on the point-transformed values. */
-    temp = temp2 - entropy->saved.last_dc_val[ci];
-    entropy->saved.last_dc_val[ci] = temp2;
+    temp2 = temp - entropy->saved.last_dc_val[ci];
+    entropy->saved.last_dc_val[ci] = temp;
 
     /* Encode the DC coefficient difference per section G.1.2.1 */
-    temp2 = temp;
+    temp = temp2;
     if (temp < 0) {
       temp = -temp;		/* temp is abs value of input */
       /* For a negative input, want temp2 = bitwise complement of abs(input) */
@@ -580,7 +581,7 @@
       /* This code assumes we are on a two's complement machine */
       temp2--;
     }
-    
+
     /* Find the number of bits needed for the magnitude of the coefficient */
     nbits = 0;
     while (temp) {
@@ -592,10 +593,10 @@
      */
     if (nbits > MAX_COEF_BITS+1)
       ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-    
+
     /* Count/emit the Huffman-coded symbol for the number of bits */
-    emit_dc_symbol(entropy, compptr->dc_tbl_no, nbits);
-    
+    emit_dc_symbol(entropy, tbl, nbits);
+
     /* Emit that number of bits of the value, if positive, */
     /* or the complement of its magnitude, if negative. */
     if (nbits)			/* emit_bits rejects calls with size 0 */
@@ -628,12 +629,12 @@
 encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
+  JBLOCKROW block;
   register int temp, temp2;
   register int nbits;
   register int r, k;
   int Se, Al;
-  const int * natural_order;
-  JBLOCKROW block;
 
   entropy->next_output_byte = cinfo->dest->next_output_byte;
   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
@@ -731,8 +732,8 @@
 
 /*
  * MCU encoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component, although the spec
- * is not very clear on the point.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
  */
 
 METHODDEF(boolean)
@@ -739,10 +740,7 @@
 encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  register int temp;
-  int blkn;
-  int Al = cinfo->Al;
-  JBLOCKROW block;
+  int Al, blkn;
 
   entropy->next_output_byte = cinfo->dest->next_output_byte;
   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
@@ -752,13 +750,12 @@
     if (entropy->restarts_to_go == 0)
       emit_restart_e(entropy, entropy->next_restart_num);
 
+  Al = cinfo->Al;
+
   /* Encode the MCU data blocks */
   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-
     /* We simply emit the Al'th bit of the DC coefficient value. */
-    temp = (*block)[0];
-    emit_bits_e(entropy, (unsigned int) (temp >> Al), 1);
+    emit_bits_e(entropy, (unsigned int) (MCU_data[blkn][0][0] >> Al), 1);
   }
 
   cinfo->dest->next_output_byte = entropy->next_output_byte;
@@ -786,14 +783,14 @@
 encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data)
 {
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  const int * natural_order;
+  JBLOCKROW block;
   register int temp;
   register int r, k;
+  int Se, Al;
   int EOB;
   char *BR_buffer;
   unsigned int BR;
-  int Se, Al;
-  const int * natural_order;
-  JBLOCKROW block;
   int absvalues[DCTSIZE2];
 
   entropy->next_output_byte = cinfo->dest->next_output_byte;
@@ -918,7 +915,7 @@
 {
   register int temp, temp2;
   register int nbits;
-  register int k, r, i;
+  register int r, k;
   int Se = state->cinfo->lim_Se;
   const int * natural_order = state->cinfo->natural_order;
 
@@ -960,7 +957,7 @@
   r = 0;			/* r = run length of zeros */
 
   for (k = 1; k <= Se; k++) {
-    if ((temp = block[natural_order[k]]) == 0) {
+    if ((temp2 = block[natural_order[k]]) == 0) {
       r++;
     } else {
       /* if run length > 15, must emit special run-length-16 codes (0xF0) */
@@ -970,7 +967,7 @@
 	r -= 16;
       }
 
-      temp2 = temp;
+      temp = temp2;
       if (temp < 0) {
 	temp = -temp;		/* temp is abs value of input */
 	/* This code assumes we are on a two's complement machine */
@@ -986,8 +983,8 @@
 	ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
 
       /* Emit Huffman symbol for run length / number of bits */
-      i = (r << 4) + nbits;
-      if (! emit_bits_s(state, actbl->ehufco[i], actbl->ehufsi[i]))
+      temp = (r << 4) + nbits;
+      if (! emit_bits_s(state, actbl->ehufco[temp], actbl->ehufsi[temp]))
 	return FALSE;
 
       /* Emit that number of bits of the value, if positive, */
@@ -1124,16 +1121,16 @@
 {
   register int temp;
   register int nbits;
-  register int k, r;
+  register int r, k;
   int Se = cinfo->lim_Se;
   const int * natural_order = cinfo->natural_order;
-  
+
   /* Encode the DC coefficient difference per section F.1.2.1 */
-  
+
   temp = block[0] - last_dc_val;
   if (temp < 0)
     temp = -temp;
-  
+
   /* Find the number of bits needed for the magnitude of the coefficient */
   nbits = 0;
   while (temp) {
@@ -1148,11 +1145,11 @@
 
   /* Count the Huffman symbol for the number of bits */
   dc_counts[nbits]++;
-  
+
   /* Encode the AC coefficients per section F.1.2.2 */
-  
+
   r = 0;			/* r = run length of zeros */
-  
+
   for (k = 1; k <= Se; k++) {
     if ((temp = block[natural_order[k]]) == 0) {
       r++;
@@ -1162,11 +1159,11 @@
 	ac_counts[0xF0]++;
 	r -= 16;
       }
-      
+
       /* Find the number of bits needed for the magnitude of the coefficient */
       if (temp < 0)
 	temp = -temp;
-      
+
       /* Find the number of bits needed for the magnitude of the coefficient */
       nbits = 1;		/* there must be at least one 1 bit */
       while ((temp >>= 1))
@@ -1174,10 +1171,10 @@
       /* Check for out-of-range coefficient values */
       if (nbits > MAX_COEF_BITS)
 	ERREXIT(cinfo, JERR_BAD_DCT_COEF);
-      
+
       /* Count Huffman symbol for run length / number of bits */
       ac_counts[(r << 4) + nbits]++;
-      
+
       r = 0;
     }
   }
@@ -1562,7 +1559,7 @@
   entropy = (huff_entropy_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				SIZEOF(huff_entropy_encoder));
-  cinfo->entropy = (struct jpeg_entropy_encoder *) entropy;
+  cinfo->entropy = &entropy->pub;
   entropy->pub.start_pass = start_pass_huff;
 
   /* Mark tables unallocated */
Index: dll/3rdparty/libjpeg/jcinit.c
===================================================================
--- dll/3rdparty/libjpeg/jcinit.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jcinit.c	(working copy)
@@ -2,6 +2,7 @@
  * jcinit.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -29,6 +30,24 @@
 GLOBAL(void)
 jinit_compress_master (j_compress_ptr cinfo)
 {
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Sanity check on image dimensions */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
+      cinfo->input_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  /* Width of an input scanline must be representable as JDIMENSION. */
+  samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components;
+  jd_samplesperrow = (JDIMENSION) samplesperrow;
+  if ((long) jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
   /* Initialize master control (includes parameter checking/processing) */
   jinit_c_master_control(cinfo, FALSE /* full compression */);
 
Index: dll/3rdparty/libjpeg/jcmarker.c
===================================================================
--- dll/3rdparty/libjpeg/jcmarker.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jcmarker.c	(working copy)
@@ -2,7 +2,7 @@
  * jcmarker.c
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2003-2012 by Guido Vollbeding.
+ * Modified 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -508,8 +508,8 @@
  * Write datastream header.
  * This consists of an SOI and optional APPn markers.
  * We recommend use of the JFIF marker, but not the Adobe marker,
- * when using YCbCr or grayscale data.  The JFIF marker should NOT
- * be used for any other JPEG colorspace.  The Adobe marker is helpful
+ * when using YCbCr or grayscale data.  The JFIF marker is also used
+ * for other standard JPEG colorspaces.  The Adobe marker is helpful
  * to distinguish RGB, CMYK, and YCCK colorspaces.
  * Note that an application can write additional header markers after
  * jpeg_start_compress returns.
Index: dll/3rdparty/libjpeg/jcmaster.c
===================================================================
--- dll/3rdparty/libjpeg/jcmaster.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jcmaster.c	(working copy)
@@ -2,7 +2,7 @@
  * jcmaster.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2003-2011 by Guido Vollbeding.
+ * Modified 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -222,8 +222,6 @@
 {
   int ci, ssize;
   jpeg_component_info *compptr;
-  long samplesperrow;
-  JDIMENSION jd_samplesperrow;
 
   if (transcode_only)
     jpeg_calc_trans_dimensions(cinfo);
@@ -251,7 +249,7 @@
 
   /* Sanity check on image dimensions */
   if (cinfo->jpeg_height <= 0 || cinfo->jpeg_width <= 0 ||
-      cinfo->num_components <= 0 || cinfo->input_components <= 0)
+      cinfo->num_components <= 0)
     ERREXIT(cinfo, JERR_EMPTY_IMAGE);
 
   /* Make sure image isn't bigger than I can handle */
@@ -259,14 +257,8 @@
       (long) cinfo->jpeg_width > (long) JPEG_MAX_DIMENSION)
     ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
 
-  /* Width of an input scanline must be representable as JDIMENSION. */
-  samplesperrow = (long) cinfo->image_width * (long) cinfo->input_components;
-  jd_samplesperrow = (JDIMENSION) samplesperrow;
-  if ((long) jd_samplesperrow != samplesperrow)
-    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
-
-  /* For now, precision must match compiled-in value... */
-  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+  /* Only 8 to 12 bits data precision are supported for DCT based JPEG */
+  if (cinfo->data_precision < 8 || cinfo->data_precision > 12)
     ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
 
   /* Check that number of components won't exceed internal array sizes */
@@ -339,8 +331,10 @@
       jdiv_round_up((long) cinfo->jpeg_height *
 		    (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size),
 		    (long) (cinfo->max_v_samp_factor * cinfo->block_size));
-    /* Mark component needed (this flag isn't actually used for compression) */
-    compptr->component_needed = TRUE;
+    /* Don't need quantization scale after DCT,
+     * until color conversion says otherwise.
+     */
+    compptr->component_needed = FALSE;
   }
 
   /* Compute number of fully interleaved MCU rows (number of times that
@@ -811,7 +805,7 @@
   master = (my_master_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				  SIZEOF(my_comp_master));
-  cinfo->master = (struct jpeg_comp_master *) master;
+  cinfo->master = &master->pub;
   master->pub.prepare_for_pass = prepare_for_pass;
   master->pub.pass_startup = pass_startup;
   master->pub.finish_pass = finish_pass_master;
@@ -833,10 +827,14 @@
     cinfo->num_scans = 1;
   }
 
-  if ((cinfo->progressive_mode || cinfo->block_size < DCTSIZE) &&
-      !cinfo->arith_code)			/*  TEMPORARY HACK ??? */
-    /* assume default tables no good for progressive or downscale mode */
-    cinfo->optimize_coding = TRUE;
+  if (cinfo->optimize_coding)
+    cinfo->arith_code = FALSE; /* disable arithmetic coding */
+  else if (! cinfo->arith_code &&
+	   (cinfo->progressive_mode ||
+	    (cinfo->block_size > 1 && cinfo->block_size < DCTSIZE)))
+    /* TEMPORARY HACK ??? */
+    /* assume default tables no good for progressive or reduced AC mode */
+    cinfo->optimize_coding = TRUE; /* force Huffman optimization */
 
   /* Initialize my private state */
   if (transcode_only) {
Index: dll/3rdparty/libjpeg/jcparam.c
===================================================================
--- dll/3rdparty/libjpeg/jcparam.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jcparam.c	(working copy)
@@ -2,7 +2,7 @@
  * jcparam.c
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2003-2012 by Guido Vollbeding.
+ * Modified 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -323,18 +323,17 @@
   /* Expect normal source image, not raw downsampled data */
   cinfo->raw_data_in = FALSE;
 
-  /* Use Huffman coding, not arithmetic coding, by default */
-  cinfo->arith_code = FALSE;
+  /* The standard Huffman tables are only valid for 8-bit data precision.
+   * If the precision is higher, use arithmetic coding.
+   * (Alternatively, using Huffman coding would be possible with forcing
+   * optimization on so that usable tables will be computed, or by
+   * supplying default tables that are valid for the desired precision.)
+   * Otherwise, use Huffman coding by default.
+   */
+  cinfo->arith_code = cinfo->data_precision > 8 ? TRUE : FALSE;
 
   /* By default, don't do extra passes to optimize entropy coding */
   cinfo->optimize_coding = FALSE;
-  /* The standard Huffman tables are only valid for 8-bit data precision.
-   * If the precision is higher, force optimization on so that usable
-   * tables will be computed.  This test can be removed if default tables
-   * are supplied that are valid for the desired precision.
-   */
-  if (cinfo->data_precision > 8)
-    cinfo->optimize_coding = TRUE;
 
   /* By default, use the simpler non-cosited sampling alignment */
   cinfo->CCIR601_sampling = FALSE;
@@ -360,6 +359,9 @@
    * JFIF_minor_version to 2.  We could probably get away with just defaulting
    * to 1.02, but there may still be some decoders in use that will complain
    * about that; saying 1.01 should minimize compatibility problems.
+   *
+   * For wide gamut colorspaces (BG_RGB and BG_YCC), the major version will be
+   * overridden by jpeg_set_colorspace and set to 2.
    */
   cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
   cinfo->JFIF_minor_version = 1;
@@ -384,6 +386,9 @@
 jpeg_default_colorspace (j_compress_ptr cinfo)
 {
   switch (cinfo->in_color_space) {
+  case JCS_UNKNOWN:
+    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
+    break;
   case JCS_GRAYSCALE:
     jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
     break;
@@ -399,9 +404,13 @@
   case JCS_YCCK:
     jpeg_set_colorspace(cinfo, JCS_YCCK);
     break;
-  case JCS_UNKNOWN:
-    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
+  case JCS_BG_RGB:
+    /* No translation for now -- conversion to BG_YCC not yet supportet */
+    jpeg_set_colorspace(cinfo, JCS_BG_RGB);
     break;
+  case JCS_BG_YCC:
+    jpeg_set_colorspace(cinfo, JCS_BG_YCC);
+    break;
   default:
     ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
   }
@@ -441,20 +450,31 @@
   cinfo->write_Adobe_marker = FALSE; /* write no Adobe marker by default */
 
   switch (colorspace) {
+  case JCS_UNKNOWN:
+    cinfo->num_components = cinfo->input_components;
+    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+	       MAX_COMPONENTS);
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      SET_COMP(ci, ci, 1,1, 0, 0,0);
+    }
+    break;
   case JCS_GRAYSCALE:
     cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
     cinfo->num_components = 1;
     /* JFIF specifies component ID 1 */
-    SET_COMP(0, 1, 1,1, 0, 0,0);
+    SET_COMP(0, 0x01, 1,1, 0, 0,0);
     break;
   case JCS_RGB:
     cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag RGB */
     cinfo->num_components = 3;
-    SET_COMP(0, 0x52 /* 'R' */, 1,1, 0, 0,0);
-    SET_COMP(1, 0x47 /* 'G' */, 1,1, 0,
+    SET_COMP(0, 0x52 /* 'R' */, 1,1, 0,
 		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
 		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
-    SET_COMP(2, 0x42 /* 'B' */, 1,1, 0, 0,0);
+    SET_COMP(1, 0x47 /* 'G' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x42 /* 'B' */, 1,1, 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
     break;
   case JCS_YCbCr:
     cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
@@ -461,9 +481,9 @@
     cinfo->num_components = 3;
     /* JFIF specifies component IDs 1,2,3 */
     /* We default to 2x2 subsamples of chrominance */
-    SET_COMP(0, 1, 2,2, 0, 0,0);
-    SET_COMP(1, 2, 1,1, 1, 1,1);
-    SET_COMP(2, 3, 1,1, 1, 1,1);
+    SET_COMP(0, 0x01, 2,2, 0, 0,0);
+    SET_COMP(1, 0x02, 1,1, 1, 1,1);
+    SET_COMP(2, 0x03, 1,1, 1, 1,1);
     break;
   case JCS_CMYK:
     cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag CMYK */
@@ -476,20 +496,34 @@
   case JCS_YCCK:
     cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag YCCK */
     cinfo->num_components = 4;
-    SET_COMP(0, 1, 2,2, 0, 0,0);
-    SET_COMP(1, 2, 1,1, 1, 1,1);
-    SET_COMP(2, 3, 1,1, 1, 1,1);
-    SET_COMP(3, 4, 2,2, 0, 0,0);
+    SET_COMP(0, 0x01, 2,2, 0, 0,0);
+    SET_COMP(1, 0x02, 1,1, 1, 1,1);
+    SET_COMP(2, 0x03, 1,1, 1, 1,1);
+    SET_COMP(3, 0x04, 2,2, 0, 0,0);
     break;
-  case JCS_UNKNOWN:
-    cinfo->num_components = cinfo->input_components;
-    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
-      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
-	       MAX_COMPONENTS);
-    for (ci = 0; ci < cinfo->num_components; ci++) {
-      SET_COMP(ci, ci, 1,1, 0, 0,0);
-    }
+  case JCS_BG_RGB:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->JFIF_major_version = 2;   /* Set JFIF major version = 2 */
+    cinfo->num_components = 3;
+    /* Add offset 0x20 to the normal R/G/B component IDs */
+    SET_COMP(0, 0x72 /* 'r' */, 1,1, 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
+    SET_COMP(1, 0x67 /* 'g' */, 1,1, 0, 0,0);
+    SET_COMP(2, 0x62 /* 'b' */, 1,1, 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0,
+		cinfo->color_transform == JCT_SUBTRACT_GREEN ? 1 : 0);
     break;
+  case JCS_BG_YCC:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->JFIF_major_version = 2;   /* Set JFIF major version = 2 */
+    cinfo->num_components = 3;
+    /* Add offset 0x20 to the normal Cb/Cr component IDs */
+    /* We default to 2x2 subsamples of chrominance */
+    SET_COMP(0, 0x01, 2,2, 0, 0,0);
+    SET_COMP(1, 0x22, 1,1, 1, 1,1);
+    SET_COMP(2, 0x23, 1,1, 1, 1,1);
+    break;
   default:
     ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
   }
@@ -572,8 +606,10 @@
     ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
 
   /* Figure space needed for script.  Calculation must match code below! */
-  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
-    /* Custom script for YCbCr color images. */
+  if (ncomps == 3 &&
+      (cinfo->jpeg_color_space == JCS_YCbCr ||
+       cinfo->jpeg_color_space == JCS_BG_YCC)) {
+    /* Custom script for YCC color images. */
     nscans = 10;
   } else {
     /* All-purpose script for other color spaces. */
@@ -588,7 +624,7 @@
    * multiple compressions without changing the settings.  To avoid a memory
    * leak if jpeg_simple_progression is called repeatedly for the same JPEG
    * object, we try to re-use previously allocated space, and we allocate
-   * enough space to handle YCbCr even if initially asked for grayscale.
+   * enough space to handle YCC even if initially asked for grayscale.
    */
   if (cinfo->script_space == NULL || cinfo->script_space_size < nscans) {
     cinfo->script_space_size = MAX(nscans, 10);
@@ -600,8 +636,10 @@
   cinfo->scan_info = scanptr;
   cinfo->num_scans = nscans;
 
-  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
-    /* Custom script for YCbCr color images. */
+  if (ncomps == 3 &&
+      (cinfo->jpeg_color_space == JCS_YCbCr ||
+       cinfo->jpeg_color_space == JCS_BG_YCC)) {
+    /* Custom script for YCC color images. */
     /* Initial DC scan */
     scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
     /* Initial AC scan: get some luma data out in a hurry */
Index: dll/3rdparty/libjpeg/jctrans.c
===================================================================
--- dll/3rdparty/libjpeg/jctrans.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jctrans.c	(working copy)
@@ -2,7 +2,7 @@
  * jctrans.c
  *
  * Copyright (C) 1995-1998, Thomas G. Lane.
- * Modified 2000-2012 by Guido Vollbeding.
+ * Modified 2000-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -143,10 +143,10 @@
    * if the application chooses to copy JFIF 1.02 extension markers from
    * the source file, we need to copy the version to make sure we don't
    * emit a file that has 1.02 extensions but a claimed version of 1.01.
-   * We will *not*, however, copy version info from mislabeled "2.01" files.
    */
   if (srcinfo->saw_JFIF_marker) {
-    if (srcinfo->JFIF_major_version == 1) {
+    if (srcinfo->JFIF_major_version == 1 ||
+	srcinfo->JFIF_major_version == 2) {
       dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
       dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
     }
Index: dll/3rdparty/libjpeg/jdapimin.c
===================================================================
--- dll/3rdparty/libjpeg/jdapimin.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jdapimin.c	(working copy)
@@ -2,7 +2,7 @@
  * jdapimin.c
  *
  * Copyright (C) 1994-1998, Thomas G. Lane.
- * Modified 2009 by Guido Vollbeding.
+ * Modified 2009-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -114,8 +114,9 @@
 LOCAL(void)
 default_decompress_parms (j_decompress_ptr cinfo)
 {
+  int cid0, cid1, cid2;
+
   /* Guess the input colorspace, and set output colorspace accordingly. */
-  /* (Wish JPEG committee had provided a real way to specify this...) */
   /* Note application may override our guesses. */
   switch (cinfo->num_components) {
   case 1:
@@ -124,9 +125,22 @@
     break;
     
   case 3:
-    if (cinfo->saw_JFIF_marker) {
-      cinfo->jpeg_color_space = JCS_YCbCr; /* JFIF implies YCbCr */
-    } else if (cinfo->saw_Adobe_marker) {
+    cid0 = cinfo->comp_info[0].component_id;
+    cid1 = cinfo->comp_info[1].component_id;
+    cid2 = cinfo->comp_info[2].component_id;
+
+    /* First try to guess from the component IDs */
+    if      (cid0 == 0x01 && cid1 == 0x02 && cid2 == 0x03)
+      cinfo->jpeg_color_space = JCS_YCbCr;
+    else if (cid0 == 0x01 && cid1 == 0x22 && cid2 == 0x23)
+      cinfo->jpeg_color_space = JCS_BG_YCC;
+    else if (cid0 == 0x52 && cid1 == 0x47 && cid2 == 0x42)
+      cinfo->jpeg_color_space = JCS_RGB;	/* ASCII 'R', 'G', 'B' */
+    else if (cid0 == 0x72 && cid1 == 0x67 && cid2 == 0x62)
+      cinfo->jpeg_color_space = JCS_BG_RGB;	/* ASCII 'r', 'g', 'b' */
+    else if (cinfo->saw_JFIF_marker)
+      cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
+    else if (cinfo->saw_Adobe_marker) {
       switch (cinfo->Adobe_transform) {
       case 0:
 	cinfo->jpeg_color_space = JCS_RGB;
@@ -136,23 +150,12 @@
 	break;
       default:
 	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+	cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
 	break;
       }
     } else {
-      /* Saw no special markers, try to guess from the component IDs */
-      int cid0 = cinfo->comp_info[0].component_id;
-      int cid1 = cinfo->comp_info[1].component_id;
-      int cid2 = cinfo->comp_info[2].component_id;
-
-      if (cid0 == 1 && cid1 == 2 && cid2 == 3)
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
-      else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
-	cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
-      else {
-	TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
-	cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
-      }
+      TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
+      cinfo->jpeg_color_space = JCS_YCbCr;	/* assume it's YCbCr */
     }
     /* Always guess RGB is proper output colorspace. */
     cinfo->out_color_space = JCS_RGB;
@@ -169,7 +172,7 @@
 	break;
       default:
 	WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
-	cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */
+	cinfo->jpeg_color_space = JCS_YCCK;	/* assume it's YCCK */
 	break;
       }
     } else {
Index: dll/3rdparty/libjpeg/jdapistd.c
===================================================================
--- dll/3rdparty/libjpeg/jdapistd.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jdapistd.c	(working copy)
@@ -2,6 +2,7 @@
  * jdapistd.c
  *
  * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
Index: dll/3rdparty/libjpeg/jdarith.c
===================================================================
--- dll/3rdparty/libjpeg/jdarith.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jdarith.c	(working copy)
@@ -1,7 +1,7 @@
 /*
  * jdarith.c
  *
- * Developed 1997-2012 by Guido Vollbeding.
+ * Developed 1997-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -395,6 +395,8 @@
 
 /*
  * MCU decoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
  */
 
 METHODDEF(boolean)
@@ -744,6 +746,17 @@
 
 
 /*
+ * Finish up at the end of an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass (j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
  * Module initialization routine for arithmetic entropy decoding.
  */
 
@@ -758,6 +771,7 @@
 				SIZEOF(arith_entropy_decoder));
   cinfo->entropy = &entropy->pub;
   entropy->pub.start_pass = start_pass;
+  entropy->pub.finish_pass = finish_pass;
 
   /* Mark tables unallocated */
   for (i = 0; i < NUM_ARITH_TBLS; i++) {
Index: dll/3rdparty/libjpeg/jdcolor.c
===================================================================
--- dll/3rdparty/libjpeg/jdcolor.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jdcolor.c	(working copy)
@@ -2,7 +2,7 @@
  * jdcolor.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2011-2012 by Guido Vollbeding.
+ * Modified 2011-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -19,12 +19,15 @@
 typedef struct {
   struct jpeg_color_deconverter pub; /* public fields */
 
-  /* Private state for YCC->RGB conversion */
+  /* Private state for YCbCr->RGB and BG_YCC->RGB conversion */
   int * Cr_r_tab;		/* => table for Cr to R conversion */
   int * Cb_b_tab;		/* => table for Cb to B conversion */
   INT32 * Cr_g_tab;		/* => table for Cr to G conversion */
   INT32 * Cb_g_tab;		/* => table for Cb to G conversion */
 
+  JSAMPLE * range_limit; /* pointer to normal sample range limit table, */
+		     /* or extended sample range limit table for BG_YCC */
+
   /* Private state for RGB->Y conversion */
   INT32 * rgb_y_tab;		/* => table for RGB to Y conversion */
 } my_color_deconverter;
@@ -32,23 +35,45 @@
 typedef my_color_deconverter * my_cconvert_ptr;
 
 
-/**************** YCbCr -> RGB conversion: most common case **************/
-/****************   RGB -> Y   conversion: less common case **************/
+/***************  YCbCr -> RGB conversion: most common case **************/
+/*************** BG_YCC -> RGB conversion: less common case **************/
+/***************    RGB -> Y   conversion: less common case **************/
 
 /*
- * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
- * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
- * The conversion equations to be implemented are therefore
+ * YCbCr is defined per Recommendation ITU-R BT.601-7 (03/2011),
+ * previously known as Recommendation CCIR 601-1, except that Cb and Cr
+ * are normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5.
+ * sRGB (standard RGB color space) is defined per IEC 61966-2-1:1999.
+ * sYCC (standard luma-chroma-chroma color space with extended gamut)
+ * is defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex F.
+ * bg-sRGB and bg-sYCC (big gamut standard color spaces)
+ * are defined per IEC 61966-2-1:1999 Amendment A1:2003 Annex G.
+ * Note that the derived conversion coefficients given in some of these
+ * documents are imprecise.  The general conversion equations are
  *
- *	R = Y                + 1.40200 * Cr
- *	G = Y - 0.34414 * Cb - 0.71414 * Cr
- *	B = Y + 1.77200 * Cb
+ *	R = Y + K * (1 - Kr) * Cr
+ *	G = Y - K * (Kb * (1 - Kb) * Cb + Kr * (1 - Kr) * Cr) / (1 - Kr - Kb)
+ *	B = Y + K * (1 - Kb) * Cb
  *
- *	Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
+ *	Y = Kr * R + (1 - Kr - Kb) * G + Kb * B
  *
+ * With Kr = 0.299 and Kb = 0.114 (derived according to SMPTE RP 177-1993
+ * from the 1953 FCC NTSC primaries and CIE Illuminant C), K = 2 for sYCC,
+ * the conversion equations to be implemented are therefore
+ *
+ *	R = Y + 1.402 * Cr
+ *	G = Y - 0.344136286 * Cb - 0.714136286 * Cr
+ *	B = Y + 1.772 * Cb
+ *
+ *	Y = 0.299 * R + 0.587 * G + 0.114 * B
+ *
  * where Cb and Cr represent the incoming values less CENTERJSAMPLE.
- * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ * For bg-sYCC, with K = 4, the equations are
  *
+ *	R = Y + 2.804 * Cr
+ *	G = Y - 0.688272572 * Cb - 1.428272572 * Cr
+ *	B = Y + 3.544 * Cb
+ *
  * To avoid floating-point arithmetic, we represent the fractional constants
  * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
  * the products by 2^16, with appropriate rounding, to get the correct answer.
@@ -58,9 +83,9 @@
  * For even more speed, we avoid doing any multiplications in the inner loop
  * by precalculating the constants times Cb and Cr for all possible values.
  * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table);
- * for 12-bit samples it is still acceptable.  It's not very reasonable for
- * 16-bit samples, but if you want lossless storage you shouldn't be changing
- * colorspace anyway.
+ * for 9-bit to 12-bit samples it is still acceptable.  It's not very
+ * reasonable for 16-bit samples, but if you want lossless storage you
+ * shouldn't be changing colorspace anyway.
  * The Cr=>R and Cb=>B values can be rounded to integers in advance; the
  * values for the G calculation are left scaled up, since we must add them
  * together before rounding.
@@ -84,11 +109,12 @@
 
 
 /*
- * Initialize tables for YCC->RGB colorspace conversion.
+ * Initialize tables for YCbCr->RGB and BG_YCC->RGB colorspace conversion.
  */
 
 LOCAL(void)
 build_ycc_rgb_table (j_decompress_ptr cinfo)
+/* Normal case, sYCC */
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   int i;
@@ -108,24 +134,84 @@
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				(MAXJSAMPLE+1) * SIZEOF(INT32));
 
+  cconvert->range_limit = cinfo->sample_range_limit;
+
   for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
     /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
     /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 1.40200 * x */
+    /* Cr=>R value is nearest int to 1.402 * x */
     cconvert->Cr_r_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
-    /* Cb=>B value is nearest int to 1.77200 * x */
+		    RIGHT_SHIFT(FIX(1.402) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.772 * x */
     cconvert->Cb_b_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
-    /* Cr=>G value is scaled-up -0.71414 * x */
-    cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x;
-    /* Cb=>G value is scaled-up -0.34414 * x */
+		    RIGHT_SHIFT(FIX(1.772) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.714136286 * x */
+    cconvert->Cr_g_tab[i] = (- FIX(0.714136286)) * x;
+    /* Cb=>G value is scaled-up -0.344136286 * x */
     /* We also add in ONE_HALF so that need not do it in inner loop */
-    cconvert->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
+    cconvert->Cb_g_tab[i] = (- FIX(0.344136286)) * x + ONE_HALF;
   }
 }
 
 
+LOCAL(void)
+build_bg_ycc_rgb_table (j_decompress_ptr cinfo)
+/* Wide gamut case, bg-sYCC */
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  int i;
+  INT32 x;
+  SHIFT_TEMPS
+
+  cconvert->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(int));
+  cconvert->Cr_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+  cconvert->Cb_g_tab = (INT32 *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				(MAXJSAMPLE+1) * SIZEOF(INT32));
+
+  cconvert->range_limit = (JSAMPLE *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				5 * (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
+
+  for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 2.804 * x */
+    cconvert->Cr_r_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(2.804) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 3.544 * x */
+    cconvert->Cb_b_tab[i] = (int)
+		    RIGHT_SHIFT(FIX(3.544) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -1.428272572 * x */
+    cconvert->Cr_g_tab[i] = (- FIX(1.428272572)) * x;
+    /* Cb=>G value is scaled-up -0.688272572 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    cconvert->Cb_g_tab[i] = (- FIX(0.688272572)) * x + ONE_HALF;
+  }
+
+  /* Cb and Cr portions can extend to double range in wide gamut case,
+   * so we prepare an appropriate extended range limit table.
+   */
+
+  /* First segment of range limit table: limit[x] = 0 for x < 0 */
+  MEMZERO(cconvert->range_limit, 2 * (MAXJSAMPLE+1) * SIZEOF(JSAMPLE));
+  cconvert->range_limit += 2 * (MAXJSAMPLE+1);
+  /* Main part of range limit table: limit[x] = x */
+  for (i = 0; i <= MAXJSAMPLE; i++)
+    cconvert->range_limit[i] = (JSAMPLE) i;
+  /* End of range limit table: limit[x] = MAXJSAMPLE for x > MAXJSAMPLE */
+  for (; i < 3 * (MAXJSAMPLE+1); i++)
+    cconvert->range_limit[i] = MAXJSAMPLE;
+}
+
+
 /*
  * Convert some rows of samples to the output colorspace.
  *
@@ -149,7 +235,7 @@
   register JDIMENSION col;
   JDIMENSION num_cols = cinfo->output_width;
   /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register JSAMPLE * range_limit = cconvert->range_limit;
   register int * Crrtab = cconvert->Cr_r_tab;
   register int * Cbbtab = cconvert->Cb_b_tab;
   register INT32 * Crgtab = cconvert->Cr_g_tab;
@@ -166,12 +252,14 @@
       y  = GETJSAMPLE(inptr0[col]);
       cb = GETJSAMPLE(inptr1[col]);
       cr = GETJSAMPLE(inptr2[col]);
-      /* Range-limiting is essential due to noise introduced by DCT losses. */
-      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
+      /* Range-limiting is essential due to noise introduced by DCT losses,
+       * for extended gamut (sYCC) and wide gamut (bg-sYCC) encodings.
+       */
+      outptr[RGB_RED]   = range_limit[y + Crrtab[cr]];
       outptr[RGB_GREEN] = range_limit[y +
 			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
 						 SCALEBITS))];
-      outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
+      outptr[RGB_BLUE]  = range_limit[y + Cbbtab[cb]];
       outptr += RGB_PIXELSIZE;
     }
   }
@@ -178,7 +266,7 @@
 }
 
 
-/**************** Cases other than YCbCr -> RGB **************/
+/**************** Cases other than YCC -> RGB ****************/
 
 
 /*
@@ -198,9 +286,9 @@
 				(TABLE_SIZE * SIZEOF(INT32)));
 
   for (i = 0; i <= MAXJSAMPLE; i++) {
-    rgb_y_tab[i+R_Y_OFF] = FIX(0.29900) * i;
-    rgb_y_tab[i+G_Y_OFF] = FIX(0.58700) * i;
-    rgb_y_tab[i+B_Y_OFF] = FIX(0.11400) * i + ONE_HALF;
+    rgb_y_tab[i+R_Y_OFF] = FIX(0.299) * i;
+    rgb_y_tab[i+G_Y_OFF] = FIX(0.587) * i;
+    rgb_y_tab[i+B_Y_OFF] = FIX(0.114) * i + ONE_HALF;
   }
 }
 
@@ -244,6 +332,9 @@
 /*
  * [R-G,G,B-G] to [R,G,B] conversion with modulo calculation
  * (inverse color transform).
+ * This can be seen as an adaption of the general YCbCr->RGB
+ * conversion equation with Kr = Kb = 0, while replacing the
+ * normalization by modulo calculation.
  */
 
 METHODDEF(void)
@@ -387,7 +478,7 @@
 
 /*
  * Color conversion for grayscale: just copy the data.
- * This also works for YCbCr -> grayscale conversion, in which
+ * This also works for YCC -> grayscale conversion, in which
  * we just copy the Y (luminance) component and ignore chrominance.
  */
 
@@ -466,7 +557,9 @@
       y  = GETJSAMPLE(inptr0[col]);
       cb = GETJSAMPLE(inptr1[col]);
       cr = GETJSAMPLE(inptr2[col]);
-      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      /* Range-limiting is essential due to noise introduced by DCT losses,
+       * and for extended gamut encodings (sYCC).
+       */
       outptr[0] = range_limit[MAXJSAMPLE - (y + Crrtab[cr])];	/* red */
       outptr[1] = range_limit[MAXJSAMPLE - (y +			/* green */
 			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
@@ -516,6 +609,8 @@
 
   case JCS_RGB:
   case JCS_YCbCr:
+  case JCS_BG_RGB:
+  case JCS_BG_YCC:
     if (cinfo->num_components != 3)
       ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
     break;
@@ -532,8 +627,10 @@
     break;
   }
 
-  /* Support color transform only for RGB colorspace */
-  if (cinfo->color_transform && cinfo->jpeg_color_space != JCS_RGB)
+  /* Support color transform only for RGB colorspaces */
+  if (cinfo->color_transform &&
+      cinfo->jpeg_color_space != JCS_RGB &&
+      cinfo->jpeg_color_space != JCS_BG_RGB)
     ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
 
   /* Set out_color_components and conversion method based on requested space.
@@ -544,13 +641,16 @@
   switch (cinfo->out_color_space) {
   case JCS_GRAYSCALE:
     cinfo->out_color_components = 1;
-    if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
-	cinfo->jpeg_color_space == JCS_YCbCr) {
+    switch (cinfo->jpeg_color_space) {
+    case JCS_GRAYSCALE:
+    case JCS_YCbCr:
+    case JCS_BG_YCC:
       cconvert->pub.color_convert = grayscale_convert;
       /* For color->grayscale conversion, only the Y (0) component is needed */
       for (ci = 1; ci < cinfo->num_components; ci++)
 	cinfo->comp_info[ci].component_needed = FALSE;
-    } else if (cinfo->jpeg_color_space == JCS_RGB) {
+      break;
+    case JCS_RGB:
       switch (cinfo->color_transform) {
       case JCT_NONE:
 	cconvert->pub.color_convert = rgb_gray_convert;
@@ -560,21 +660,29 @@
 	break;
       default:
 	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
-	break;
       }
       build_rgb_y_table(cinfo);
-    } else
+      break;
+    default:
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
     break;
 
   case JCS_RGB:
     cinfo->out_color_components = RGB_PIXELSIZE;
-    if (cinfo->jpeg_color_space == JCS_YCbCr) {
+    switch (cinfo->jpeg_color_space) {
+    case JCS_GRAYSCALE:
+      cconvert->pub.color_convert = gray_rgb_convert;
+      break;
+    case JCS_YCbCr:
       cconvert->pub.color_convert = ycc_rgb_convert;
       build_ycc_rgb_table(cinfo);
-    } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
-      cconvert->pub.color_convert = gray_rgb_convert;
-    } else if (cinfo->jpeg_color_space == JCS_RGB) {
+      break;
+    case JCS_BG_YCC:
+      cconvert->pub.color_convert = ycc_rgb_convert;
+      build_bg_ycc_rgb_table(cinfo);
+      break;
+    case JCS_RGB:
       switch (cinfo->color_transform) {
       case JCT_NONE:
 	cconvert->pub.color_convert = rgb_convert;
@@ -584,7 +692,25 @@
 	break;
       default:
 	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+      }
+      break;
+    default:
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_BG_RGB:
+    cinfo->out_color_components = RGB_PIXELSIZE;
+    if (cinfo->jpeg_color_space == JCS_BG_RGB) {
+      switch (cinfo->color_transform) {
+      case JCT_NONE:
+	cconvert->pub.color_convert = rgb_convert;
 	break;
+      case JCT_SUBTRACT_GREEN:
+	cconvert->pub.color_convert = rgb1_rgb_convert;
+	break;
+      default:
+	ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
       }
     } else
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
@@ -592,13 +718,17 @@
 
   case JCS_CMYK:
     cinfo->out_color_components = 4;
-    if (cinfo->jpeg_color_space == JCS_YCCK) {
+    switch (cinfo->jpeg_color_space) {
+    case JCS_YCCK:
       cconvert->pub.color_convert = ycck_cmyk_convert;
       build_ycc_rgb_table(cinfo);
-    } else if (cinfo->jpeg_color_space == JCS_CMYK) {
+      break;
+    case JCS_CMYK:
       cconvert->pub.color_convert = null_convert;
-    } else
+      break;
+    default:
       ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
     break;
 
   default:
Index: dll/3rdparty/libjpeg/jddctmgr.c
===================================================================
--- dll/3rdparty/libjpeg/jddctmgr.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jddctmgr.c	(working copy)
@@ -2,7 +2,7 @@
  * jddctmgr.c
  *
  * Copyright (C) 1994-1996, Thomas G. Lane.
- * Modified 2002-2010 by Guido Vollbeding.
+ * Modified 2002-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -368,7 +368,7 @@
   idct = (my_idct_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				SIZEOF(my_idct_controller));
-  cinfo->idct = (struct jpeg_inverse_dct *) idct;
+  cinfo->idct = &idct->pub;
   idct->pub.start_pass = start_pass;
 
   for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
Index: dll/3rdparty/libjpeg/jdhuff.c
===================================================================
--- dll/3rdparty/libjpeg/jdhuff.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jdhuff.c	(working copy)
@@ -2,7 +2,7 @@
  * jdhuff.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2006-2012 by Guido Vollbeding.
+ * Modified 2006-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -628,6 +628,22 @@
 
 
 /*
+ * Finish up at the end of a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass_huff (j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+}
+
+
+/*
  * Check for a restart marker & resynchronize decoder.
  * Returns FALSE if must suspend.
  */
@@ -638,10 +654,7 @@
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
   int ci;
 
-  /* Throw away any unused bits remaining in bit buffer; */
-  /* include any full bytes in next_marker's count of discarded bytes */
-  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
-  entropy->bitstate.bits_left = 0;
+  finish_pass_huff(cinfo);
 
   /* Advance past the RSTn marker */
   if (! (*cinfo->marker->read_restart_marker) (cinfo))
@@ -846,8 +859,8 @@
 
 /*
  * MCU decoding for DC successive approximation refinement scan.
- * Note: we assume such scans can be multi-component, although the spec
- * is not very clear on the point.
+ * Note: we assume such scans can be multi-component,
+ * although the spec is not very clear on the point.
  */
 
 METHODDEF(boolean)
@@ -854,9 +867,7 @@
 decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 {   
   huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int p1 = 1 << cinfo->Al;	/* 1 in the bit position being coded */
-  int blkn;
-  JBLOCKROW block;
+  int p1, blkn;
   BITREAD_STATE_VARS;
 
   /* Process restart marker if needed; may have to suspend */
@@ -873,15 +884,15 @@
   /* Load up working state */
   BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
 
+  p1 = 1 << cinfo->Al;		/* 1 in the bit position being coded */
+
   /* Outer loop handles each block in the MCU */
 
   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-    block = MCU_data[blkn];
-
     /* Encoded data is simply the next bit of the two's-complement DC value */
     CHECK_BIT_BUFFER(br_state, 1, return FALSE);
     if (GET_BITS(1))
-      (*block)[0] |= p1;
+      MCU_data[blkn][0][0] |= p1;
     /* Note: since we use |=, repeating the assignment later is safe */
   }
 
@@ -1517,6 +1528,7 @@
 				SIZEOF(huff_entropy_decoder));
   cinfo->entropy = &entropy->pub;
   entropy->pub.start_pass = start_pass_huff_decoder;
+  entropy->pub.finish_pass = finish_pass_huff;
 
   if (cinfo->progressive_mode) {
     /* Create progression status table */
Index: dll/3rdparty/libjpeg/jdinput.c
===================================================================
--- dll/3rdparty/libjpeg/jdinput.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jdinput.c	(working copy)
@@ -2,7 +2,7 @@
  * jdinput.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2002-2009 by Guido Vollbeding.
+ * Modified 2002-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -196,7 +196,7 @@
   /* Hardwire it to "no scaling" */
   cinfo->output_width = cinfo->image_width;
   cinfo->output_height = cinfo->image_height;
-  /* jdinput.c has already initialized DCT_scaled_size,
+  /* initial_setup has already initialized DCT_scaled_size,
    * and has computed unscaled downsampled_width and downsampled_height.
    */
 
@@ -216,8 +216,8 @@
       (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION)
     ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION);
 
-  /* For now, precision must match compiled-in value... */
-  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+  /* Only 8 to 12 bits data precision are supported for DCT based JPEG */
+  if (cinfo->data_precision < 8 || cinfo->data_precision > 12)
     ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
 
   /* Check that number of components won't exceed internal array sizes */
@@ -537,6 +537,7 @@
 METHODDEF(void)
 finish_input_pass (j_decompress_ptr cinfo)
 {
+  (*cinfo->entropy->finish_pass) (cinfo);
   cinfo->inputctl->consume_input = consume_markers;
 }
 
@@ -646,7 +647,7 @@
   inputctl = (my_inputctl_ptr)
     (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT,
 				SIZEOF(my_input_controller));
-  cinfo->inputctl = (struct jpeg_input_controller *) inputctl;
+  cinfo->inputctl = &inputctl->pub;
   /* Initialize method pointers */
   inputctl->pub.consume_input = consume_markers;
   inputctl->pub.reset_input_controller = reset_input_controller;
Index: dll/3rdparty/libjpeg/jdmarker.c
===================================================================
--- dll/3rdparty/libjpeg/jdmarker.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jdmarker.c	(working copy)
@@ -2,7 +2,7 @@
  * jdmarker.c
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2009-2012 by Guido Vollbeding.
+ * Modified 2009-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -269,8 +269,8 @@
   /* We don't support files in which the image height is initially specified */
   /* as 0 and is later redefined by DNL.  As long as we have to check that,  */
   /* might as well have a general sanity check. */
-  if (cinfo->image_height <= 0 || cinfo->image_width <= 0
-      || cinfo->num_components <= 0)
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
+      cinfo->num_components <= 0)
     ERREXIT(cinfo, JERR_EMPTY_IMAGE);
 
   if (length != (cinfo->num_components * 3))
@@ -350,6 +350,9 @@
 
     /* Detect the case where component id's are not unique, and, if so, */
     /* create a fake component id using the same logic as in get_sof.   */
+    /* Note:  This also ensures that all of the SOF components are      */
+    /* referenced in the single scan case, which prevents access to     */
+    /* uninitialized memory in later decoding stages. */
     for (ci = 0; ci < i; ci++) {
       if (c == cinfo->cur_comp_info[ci]->component_id) {
 	c = cinfo->cur_comp_info[0]->component_id;
@@ -493,6 +496,8 @@
     if (count > 256 || ((INT32) count) > length)
       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
 
+    MEMZERO(huffval, SIZEOF(huffval)); /* pre-zero array for later copy */
+
     for (i = 0; i < count; i++)
       INPUT_BYTE(cinfo, huffval[i], return FALSE);
 
@@ -735,12 +740,13 @@
     cinfo->X_density = (GETJOCTET(data[8]) << 8) + GETJOCTET(data[9]);
     cinfo->Y_density = (GETJOCTET(data[10]) << 8) + GETJOCTET(data[11]);
     /* Check version.
-     * Major version must be 1, anything else signals an incompatible change.
+     * Major version must be 1 or 2, anything else signals an incompatible
+     * change.
      * (We used to treat this as an error, but now it's a nonfatal warning,
      * because some bozo at Hijaak couldn't read the spec.)
      * Minor version should be 0..2, but process anyway if newer.
      */
-    if (cinfo->JFIF_major_version != 1)
+    if (cinfo->JFIF_major_version != 1 && cinfo->JFIF_major_version != 2)
       WARNMS2(cinfo, JWRN_JFIF_MAJOR,
 	      cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
     /* Generate trace messages */
Index: dll/3rdparty/libjpeg/jdmaster.c
===================================================================
--- dll/3rdparty/libjpeg/jdmaster.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jdmaster.c	(working copy)
@@ -2,7 +2,7 @@
  * jdmaster.c
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 2002-2011 by Guido Vollbeding.
+ * Modified 2002-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -51,7 +51,8 @@
   /* jdmerge.c only supports YCC=>RGB color conversion */
   if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 ||
       cinfo->out_color_space != JCS_RGB ||
-      cinfo->out_color_components != RGB_PIXELSIZE)
+      cinfo->out_color_components != RGB_PIXELSIZE ||
+      cinfo->color_transform)
     return FALSE;
   /* and it only handles 2h1v or 2h2v sampling ratios */
   if (cinfo->comp_info[0].h_samp_factor != 2 ||
@@ -158,9 +159,11 @@
     cinfo->out_color_components = 1;
     break;
   case JCS_RGB:
+  case JCS_BG_RGB:
     cinfo->out_color_components = RGB_PIXELSIZE;
     break;
   case JCS_YCbCr:
+  case JCS_BG_YCC:
     cinfo->out_color_components = 3;
     break;
   case JCS_CMYK:
@@ -273,10 +276,19 @@
   long samplesperrow;
   JDIMENSION jd_samplesperrow;
 
+  /* For now, precision must match compiled-in value... */
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
   /* Initialize dimensions and other stuff */
   jpeg_calc_output_dimensions(cinfo);
   prepare_range_limit_table(cinfo);
 
+  /* Sanity check on image dimensions */
+  if (cinfo->output_height <= 0 || cinfo->output_width <= 0 ||
+      cinfo->out_color_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
   /* Width of an output scanline must be representable as JDIMENSION. */
   samplesperrow = (long) cinfo->output_width * (long) cinfo->out_color_components;
   jd_samplesperrow = (JDIMENSION) samplesperrow;
@@ -521,7 +533,7 @@
   master = (my_master_ptr)
       (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				  SIZEOF(my_decomp_master));
-  cinfo->master = (struct jpeg_decomp_master *) master;
+  cinfo->master = &master->pub;
   master->pub.prepare_for_output_pass = prepare_for_output_pass;
   master->pub.finish_output_pass = finish_output_pass;
 
Index: dll/3rdparty/libjpeg/jdmerge.c
===================================================================
--- dll/3rdparty/libjpeg/jdmerge.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jdmerge.c	(working copy)
@@ -2,6 +2,7 @@
  * jdmerge.c
  *
  * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -103,17 +104,17 @@
   for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) {
     /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */
     /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */
-    /* Cr=>R value is nearest int to 1.40200 * x */
+    /* Cr=>R value is nearest int to 1.402 * x */
     upsample->Cr_r_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
-    /* Cb=>B value is nearest int to 1.77200 * x */
+		    RIGHT_SHIFT(FIX(1.402) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.772 * x */
     upsample->Cb_b_tab[i] = (int)
-		    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
-    /* Cr=>G value is scaled-up -0.71414 * x */
-    upsample->Cr_g_tab[i] = (- FIX(0.71414)) * x;
-    /* Cb=>G value is scaled-up -0.34414 * x */
+		    RIGHT_SHIFT(FIX(1.772) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.714136286 * x */
+    upsample->Cr_g_tab[i] = (- FIX(0.714136286)) * x;
+    /* Cb=>G value is scaled-up -0.344136286 * x */
     /* We also add in ONE_HALF so that need not do it in inner loop */
-    upsample->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF;
+    upsample->Cb_g_tab[i] = (- FIX(0.344136286)) * x + ONE_HALF;
   }
 }
 
Index: dll/3rdparty/libjpeg/jfdctint.c
===================================================================
--- dll/3rdparty/libjpeg/jfdctint.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jfdctint.c	(working copy)
@@ -2,7 +2,7 @@
  * jfdctint.c
  *
  * Copyright (C) 1991-1996, Thomas G. Lane.
- * Modification developed 2003-2009 by Guido Vollbeding.
+ * Modification developed 2003-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -165,9 +165,11 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < DCTSIZE; ctr++) {
@@ -174,7 +176,7 @@
     elemptr = sample_data[ctr] + start_col;
 
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     * rotator "c1" should be "c6".
      */
 
     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
@@ -196,48 +198,50 @@
     dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
 
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
-    dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
-				       CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
-				       CONST_BITS-PASS1_BITS);
 
+    dataptr[2] = (DCTELEM)
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS-PASS1_BITS);
+
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * cK represents sqrt(2) * cos(K*pi/16).
      * i0..i3 in the paper are tmp0..tmp3 here.
      */
 
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
 
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
     tmp12 += z1;
     tmp13 += z1;
 
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM)
-      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM)
-      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
 
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
+
     dataptr += DCTSIZE;		/* advance pointer to next row */
   }
 
@@ -244,12 +248,13 @@
   /* Pass 2: process columns.
    * We remove the PASS1_BITS scaling, but leave the results scaled up
    * by an overall factor of 8.
+   * cK represents sqrt(2) * cos(K*pi/16).
    */
 
   dataptr = data;
   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     * rotator "c1" should be "c6".
      */
 
     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
@@ -271,48 +276,50 @@
     dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
     dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
 
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+
     dataptr[DCTSIZE*2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS+PASS1_BITS);
 
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * cK represents sqrt(2) * cos(K*pi/16).
      * i0..i3 in the paper are tmp0..tmp3 here.
      */
 
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS+PASS1_BITS-1);
 
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
     tmp12 += z1;
     tmp13 += z1;
 
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
 
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
+
     dataptr++;			/* advance pointer to next column */
   }
 }
@@ -338,10 +345,11 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* cK represents sqrt(2) * cos(K*pi/14). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * cK represents sqrt(2) * cos(K*pi/14).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 7; ctr++) {
@@ -472,10 +480,11 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* cK represents sqrt(2) * cos(K*pi/12). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * cK represents sqrt(2) * cos(K*pi/12).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 6; ctr++) {
@@ -585,12 +594,13 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/10). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/10).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 5; ctr++) {
@@ -695,11 +705,12 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */
-  /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We must also scale the output by (8/4)**2 = 2**2, which we add here.
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 4; ctr++) {
@@ -737,6 +748,7 @@
   /* Pass 2: process columns.
    * We remove the PASS1_BITS scaling, but leave the results scaled up
    * by an overall factor of 8.
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
    */
 
   dataptr = data;
@@ -787,12 +799,13 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We scale the results further by 2**2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/6). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We scale the results further by 2**2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/6).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 3; ctr++) {
@@ -869,8 +882,9 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   */
 
   /* Row 0 */
   elemptr = sample_data[0] + start_col;
@@ -935,11 +949,12 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* we scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/18). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * we scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/18).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -1084,11 +1099,12 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* we scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/20). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * we scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/20).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -1248,11 +1264,12 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* we scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* cK represents sqrt(2) * cos(K*pi/22). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * we scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * cK represents sqrt(2) * cos(K*pi/22).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -1430,9 +1447,10 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* cK represents sqrt(2) * cos(K*pi/24). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * cK represents sqrt(2) * cos(K*pi/24).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -1596,9 +1614,10 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* cK represents sqrt(2) * cos(K*pi/26). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * cK represents sqrt(2) * cos(K*pi/26).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -1794,9 +1813,10 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* cK represents sqrt(2) * cos(K*pi/28). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * cK represents sqrt(2) * cos(K*pi/28).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -1995,9 +2015,10 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* cK represents sqrt(2) * cos(K*pi/30). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * cK represents sqrt(2) * cos(K*pi/30).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -2173,10 +2194,11 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* cK represents sqrt(2) * cos(K*pi/32). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * cK represents sqrt(2) * cos(K*pi/32).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -2275,6 +2297,7 @@
    * We remove the PASS1_BITS scaling, but leave the results scaled up
    * by an overall factor of 8.
    * We must also scale the output by (8/16)**2 = 1/2**2.
+   * cK represents sqrt(2) * cos(K*pi/32).
    */
 
   dataptr = data;
@@ -2380,10 +2403,11 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -2475,12 +2499,13 @@
    * We remove the PASS1_BITS scaling, but leave the results scaled up
    * by an overall factor of 8.
    * We must also scale the output by 8/16 = 1/2.
+   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
    */
 
   dataptr = data;
   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     * rotator "c1" should be "c6".
      */
 
     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
@@ -2501,44 +2526,44 @@
     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
 
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
-					   CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
-					   CONST_BITS+PASS1_BITS+1);
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);   /* c6 */
+    dataptr[DCTSIZE*2] = (DCTELEM)
+      DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+	      CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*6] = (DCTELEM)
+      DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+	      CONST_BITS+PASS1_BITS+1);
 
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
      * i0..i3 in the paper are tmp0..tmp3 here.
      */
 
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
 
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);   /*  c3 */
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);      /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);      /* -c3-c5 */
     tmp12 += z1;
     tmp13 += z1;
 
-    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12,
-					   CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13,
-					   CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12,
-					   CONST_BITS+PASS1_BITS+1);
-    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13,
-					   CONST_BITS+PASS1_BITS+1);
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);   /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);          /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);          /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
 
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);   /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);          /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);          /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
+    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+PASS1_BITS+1);
+
     dataptr++;			/* advance pointer to next column */
   }
 }
@@ -2564,10 +2589,11 @@
   /* Zero bottom row of output coefficient block. */
   MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 7; ctr++) {
@@ -2727,10 +2753,11 @@
   /* Zero 2 bottom rows of output coefficient block. */
   MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 6; ctr++) {
@@ -2866,10 +2893,11 @@
   /* Zero 3 bottom rows of output coefficient block. */
   MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 5; ctr++) {
@@ -2999,10 +3027,12 @@
   /* Zero 4 bottom rows of output coefficient block. */
   MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We must also scale the output by 8/4 = 2, which we add here. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We must also scale the output by 8/4 = 2, which we add here.
+   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 4; ctr++) {
@@ -3009,7 +3039,7 @@
     elemptr = sample_data[ctr] + start_col;
 
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     * rotator "c1" should be "c6".
      */
 
     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
@@ -3032,48 +3062,50 @@
       ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
 
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS-PASS1_BITS-2);
-    dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865),
-				       CONST_BITS-PASS1_BITS-1);
-    dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065),
-				       CONST_BITS-PASS1_BITS-1);
 
+    dataptr[2] = (DCTELEM)
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS-PASS1_BITS-1);
+    dataptr[6] = (DCTELEM)
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS-PASS1_BITS-1);
+
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
      * i0..i3 in the paper are tmp0..tmp3 here.
      */
 
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS-PASS1_BITS-2);
 
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
     tmp12 += z1;
     tmp13 += z1;
 
-    dataptr[1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1);
-    dataptr[3] = (DCTELEM)
-      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1);
-    dataptr[5] = (DCTELEM)
-      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1);
-    dataptr[7] = (DCTELEM)
-      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1);
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
 
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1);
+    dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1);
+    dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1);
+    dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1);
+
     dataptr += DCTSIZE;		/* advance pointer to next row */
   }
 
@@ -3080,7 +3112,8 @@
   /* Pass 2: process columns.
    * We remove the PASS1_BITS scaling, but leave the results scaled up
    * by an overall factor of 8.
-   * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   * 4-point FDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
    */
 
   dataptr = data;
@@ -3099,7 +3132,7 @@
 
     /* Odd part */
 
-    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);   /* c6 */
+    tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
     /* Add fudge factor here for final descale. */
     tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
 
@@ -3134,12 +3167,13 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 3; ctr++) {
@@ -3234,12 +3268,13 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */
-  /* 4-point FDCT kernel, */
-  /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We must also scale the output by (8/4)*(8/2) = 2**3, which we add here.
+   * 4-point FDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 2; ctr++) {
@@ -3323,10 +3358,12 @@
    */
 
   /* Even part */
+
   /* Apply unsigned->signed conversion */
   data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
 
   /* Odd part */
+
   data[1] = (DCTELEM) ((tmp0 - tmp1) << 5);
 }
 
@@ -3350,9 +3387,11 @@
   int ctr;
   SHIFT_TEMPS
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -3360,7 +3399,7 @@
     elemptr = sample_data[ctr] + start_col;
 
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     * rotator "c1" should be "c6".
      */
 
     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
@@ -3382,40 +3421,44 @@
     dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
 
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
-    dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865),
-				   CONST_BITS-PASS1_BITS);
-    dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065),
-				   CONST_BITS-PASS1_BITS);
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);   /* c6 */
+    dataptr[2] = (DCTELEM)
+      DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+	      CONST_BITS-PASS1_BITS);
+    dataptr[6] = (DCTELEM)
+      DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+	      CONST_BITS-PASS1_BITS);
 
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
      * i0..i3 in the paper are tmp0..tmp3 here.
      */
 
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
 
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);   /*  c3 */
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);      /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);      /* -c3-c5 */
     tmp12 += z1;
     tmp13 += z1;
 
-    dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS);
-    dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS);
-    dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS);
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);   /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);          /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);          /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
 
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);   /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);          /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);          /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
+    dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
+    dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
+    dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_BITS);
+
     ctr++;
 
     if (ctr != DCTSIZE) {
@@ -3541,10 +3584,11 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -3721,10 +3765,11 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -3870,10 +3915,11 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
+   */
 
   dataptr = data;
   ctr = 0;
@@ -4015,11 +4061,13 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We must also scale the output by 8/4 = 2, which we add here. */
-  /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We must also scale the output by 8/4 = 2, which we add here.
+   * 4-point FDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < DCTSIZE; ctr++) {
@@ -4057,12 +4105,13 @@
   /* Pass 2: process columns.
    * We remove the PASS1_BITS scaling, but leave the results scaled up
    * by an overall factor of 8.
+   * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
    */
 
   dataptr = data;
   for (ctr = 0; ctr < 4; ctr++) {
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
-     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     * rotator "c1" should be "c6".
      */
 
     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
@@ -4084,48 +4133,50 @@
     dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
     dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
 
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS+PASS1_BITS-1);
+
     dataptr[DCTSIZE*2] = (DCTELEM)
-      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS);
+      RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
+		  CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*6] = (DCTELEM)
-      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS);
+      RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
+		  CONST_BITS+PASS1_BITS);
 
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
-     * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
      * i0..i3 in the paper are tmp0..tmp3 here.
      */
 
-    tmp10 = tmp0 + tmp3;
-    tmp11 = tmp1 + tmp2;
     tmp12 = tmp0 + tmp2;
     tmp13 = tmp1 + tmp3;
-    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /*  c3 */
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
     /* Add fudge factor here for final descale. */
     z1 += ONE << (CONST_BITS+PASS1_BITS-1);
 
-    tmp0  = MULTIPLY(tmp0,    FIX_1_501321110);    /*  c1+c3-c5-c7 */
-    tmp1  = MULTIPLY(tmp1,    FIX_3_072711026);    /*  c1+c3+c5-c7 */
-    tmp2  = MULTIPLY(tmp2,    FIX_2_053119869);    /*  c1+c3-c5+c7 */
-    tmp3  = MULTIPLY(tmp3,    FIX_0_298631336);    /* -c1+c3+c5-c7 */
-    tmp10 = MULTIPLY(tmp10, - FIX_0_899976223);    /*  c7-c3 */
-    tmp11 = MULTIPLY(tmp11, - FIX_2_562915447);    /* -c1-c3 */
-    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);    /*  c5-c3 */
-    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);    /* -c3-c5 */
-
+    tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
+    tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
     tmp12 += z1;
     tmp13 += z1;
 
-    dataptr[DCTSIZE*1] = (DCTELEM)
-      RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*3] = (DCTELEM)
-      RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*5] = (DCTELEM)
-      RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS);
-    dataptr[DCTSIZE*7] = (DCTELEM)
-      RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS);
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
+    tmp0 += z1 + tmp12;
+    tmp3 += z1 + tmp13;
 
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
+    tmp1 += z1 + tmp13;
+    tmp2 += z1 + tmp12;
+
+    dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
+    dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
+
     dataptr++;			/* advance pointer to next column */
   }
 }
@@ -4150,12 +4201,13 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
-  /* We scale the results further by 2 as part of output adaption */
-  /* scaling for different DCT size. */
-  /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * We scale the results further by 2 as part of output adaption
+   * scaling for different DCT size.
+   * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 6; ctr++) {
@@ -4255,9 +4307,10 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  /* Pass 1: process rows. */
-  /* Note results are scaled up by sqrt(8) compared to a true DCT. */
-  /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */
+  /* Pass 1: process rows.
+   * Note results are scaled up by sqrt(8) compared to a true DCT.
+   * We must also scale the output by (8/2)*(8/4) = 2**3, which we add here.
+   */
 
   dataptr = data;
   for (ctr = 0; ctr < 4; ctr++) {
@@ -4329,18 +4382,23 @@
   /* Pre-zero output coefficient block. */
   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
 
-  tmp0 = GETJSAMPLE(sample_data[0][start_col]);
-  tmp1 = GETJSAMPLE(sample_data[1][start_col]);
+  /* Pass 1: empty. */
 
-  /* We leave the results scaled up by an overall factor of 8.
+  /* Pass 2: process columns.
+   * We leave the results scaled up by an overall factor of 8.
    * We must also scale the output by (8/1)*(8/2) = 2**5.
    */
 
   /* Even part */
+
+  tmp0 = GETJSAMPLE(sample_data[0][start_col]);
+  tmp1 = GETJSAMPLE(sample_data[1][start_col]);
+
   /* Apply unsigned->signed conversion */
   data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5);
 
   /* Odd part */
+
   data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5);
 }
 
Index: dll/3rdparty/libjpeg/jidctint.c
===================================================================
--- dll/3rdparty/libjpeg/jidctint.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jidctint.c	(working copy)
@@ -2,7 +2,7 @@
  * jidctint.c
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modification developed 2002-2009 by Guido Vollbeding.
+ * Modification developed 2002-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -165,6 +165,8 @@
 
 /*
  * Perform dequantization and inverse DCT on one block of coefficients.
+ *
+ * cK represents sqrt(2) * cos(K*pi/16).
  */
 
 GLOBAL(void)
@@ -184,9 +186,10 @@
   int workspace[DCTSIZE2];	/* buffers data between passes */
   SHIFT_TEMPS
 
-  /* Pass 1: process columns from input, store into work array. */
-  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* Pass 1: process columns from input, store into work array.
+   * Note results are scaled up by sqrt(8) compared to a true IDCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   */
 
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
@@ -223,15 +226,16 @@
       continue;
     }
 
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-    
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
 
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
 
     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
@@ -256,25 +260,25 @@
     tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    
+
     z2 = tmp0 + tmp2;
     z3 = tmp1 + tmp3;
 
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
     z2 += z1;
     z3 += z1;
 
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
     tmp0 += z1 + z2;
     tmp3 += z1 + z3;
 
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
     tmp1 += z1 + z3;
     tmp2 += z1 + z2;
 
@@ -288,15 +292,16 @@
     wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-    
+
     inptr++;			/* advance pointers to next column */
     quantptr++;
     wsptr++;
   }
 
-  /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3, */
-  /* and also undo the PASS1_BITS scaling. */
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   */
 
   wsptr = workspace;
   for (ctr = 0; ctr < DCTSIZE; ctr++) {
@@ -330,15 +335,16 @@
     }
 #endif
 
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-    
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
     z2 = (INT32) wsptr[2];
     z3 = (INT32) wsptr[6];
 
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
 
     /* Add fudge factor here for final descale. */
     z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
@@ -346,7 +352,7 @@
 
     tmp0 = (z2 + z3) << CONST_BITS;
     tmp1 = (z2 - z3) << CONST_BITS;
-    
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
@@ -364,21 +370,21 @@
     z2 = tmp0 + tmp2;
     z3 = tmp1 + tmp3;
 
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
     z2 += z1;
     z3 += z1;
 
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
     tmp0 += z1 + z2;
     tmp3 += z1 + z3;
 
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
     tmp1 += z1 + z3;
     tmp2 += z1 + z2;
 
@@ -2835,9 +2841,11 @@
   int workspace[8*8];	/* buffers data between passes */
   SHIFT_TEMPS
 
-  /* Pass 1: process columns from input, store into work array. */
-  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* Pass 1: process columns from input, store into work array.
+   * Note results are scaled up by sqrt(8) compared to a true IDCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
@@ -2851,7 +2859,7 @@
      * With typical images and quantization tables, half or more of the
      * column DCT calculations can be simplified this way.
      */
-    
+
     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
 	inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
 	inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
@@ -2858,7 +2866,7 @@
 	inptr[DCTSIZE*7] == 0) {
       /* AC terms all zero */
       int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
-      
+
       wsptr[DCTSIZE*0] = dcval;
       wsptr[DCTSIZE*1] = dcval;
       wsptr[DCTSIZE*2] = dcval;
@@ -2867,23 +2875,24 @@
       wsptr[DCTSIZE*5] = dcval;
       wsptr[DCTSIZE*6] = dcval;
       wsptr[DCTSIZE*7] = dcval;
-      
+
       inptr++;			/* advance pointers to next column */
       quantptr++;
       wsptr++;
       continue;
     }
-    
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-    
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-    
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     z2 <<= CONST_BITS;
@@ -2893,44 +2902,44 @@
 
     tmp0 = z2 + z3;
     tmp1 = z2 - z3;
-    
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
     tmp12 = tmp1 - tmp3;
-    
+
     /* Odd part per figure 8; the matrix is unitary and hence its
      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
      */
-    
+
     tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
     tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
     tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
     tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
-    
+
     z2 = tmp0 + tmp2;
     z3 = tmp1 + tmp3;
 
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
     z2 += z1;
     z3 += z1;
 
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
     tmp0 += z1 + z2;
     tmp3 += z1 + z3;
 
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
     tmp1 += z1 + z3;
     tmp2 += z1 + z2;
-    
+
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-    
+
     wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
@@ -2939,7 +2948,7 @@
     wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
     wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
-    
+
     inptr++;			/* advance pointers to next column */
     quantptr++;
     wsptr++;
@@ -2948,6 +2957,7 @@
   /* Pass 2: process 8 rows from work array, store into output array.
    * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 8; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -3109,6 +3119,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -3164,6 +3175,7 @@
   /* Pass 2: process 7 rows from work array, store into output array.
    * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 7; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -3304,6 +3316,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -3346,6 +3359,7 @@
   /* Pass 2: process 6 rows from work array, store into output array.
    * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 6; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -3480,6 +3494,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -3520,6 +3535,7 @@
   /* Pass 2: process 5 rows from work array, store into output array.
    * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 5; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -3639,8 +3655,10 @@
   SHIFT_TEMPS
 
   /* Pass 1: process columns from input, store into work array.
-   * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -3675,31 +3693,34 @@
     wsptr[8*2] = (int) (tmp12 - tmp2);
   }
 
-  /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3, */
-  /* and also undo the PASS1_BITS scaling. */
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   wsptr = workspace;
   for (ctr = 0; ctr < 4; ctr++) {
     outptr = output_buf[ctr] + output_col;
 
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
 
     z2 = (INT32) wsptr[2];
     z3 = (INT32) wsptr[6];
-    
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-    
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
     /* Add fudge factor here for final descale. */
     z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
     z3 = (INT32) wsptr[4];
-    
+
     tmp0 = (z2 + z3) << CONST_BITS;
     tmp1 = (z2 - z3) << CONST_BITS;
-    
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
@@ -3717,21 +3738,21 @@
     z2 = tmp0 + tmp2;
     z3 = tmp1 + tmp3;
 
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
     z2 += z1;
     z3 += z1;
 
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
     tmp0 += z1 + z2;
     tmp3 += z1 + z3;
 
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
     tmp1 += z1 + z3;
     tmp2 += z1 + z2;
 
@@ -3793,6 +3814,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -3823,6 +3845,7 @@
   /* Pass 2: process 3 rows from work array, store into output array.
    * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 3; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -3924,6 +3947,7 @@
    * 4-point IDCT kernel,
    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 2; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -3979,7 +4003,7 @@
 	       JCOEFPTR coef_block,
 	       JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp10;
+  INT32 tmp0, tmp1;
   ISLOW_MULT_TYPE * quantptr;
   JSAMPROW outptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
@@ -3994,18 +4018,18 @@
 
   /* Even part */
 
-  tmp10 = DEQUANTIZE(coef_block[0], quantptr[0]);
+  tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
   /* Add fudge factor here for final descale. */
-  tmp10 += ONE << 2;
+  tmp0 += ONE << 2;
 
   /* Odd part */
 
-  tmp0 = DEQUANTIZE(coef_block[1], quantptr[1]);
+  tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
 
   /* Final output stage */
 
-  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3) & RANGE_MASK];
-  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3) & RANGE_MASK];
+  outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
+  outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
 }
 
 
@@ -4036,6 +4060,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -4134,69 +4159,72 @@
     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
   }
-  
-  /* Pass 2: process rows from work array, store into output array. */
-  /* Note that we must descale the results by a factor of 8 == 2**3, */
-  /* and also undo the PASS1_BITS scaling. */
 
+  /* Pass 2: process rows from work array, store into output array.
+   * Note that we must descale the results by a factor of 8 == 2**3,
+   * and also undo the PASS1_BITS scaling.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 16; ctr++) {
     outptr = output_buf[ctr] + output_col;
-    
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
-    
+
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
+
     z2 = (INT32) wsptr[2];
     z3 = (INT32) wsptr[6];
-    
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-    
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
     /* Add fudge factor here for final descale. */
     z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2));
     z3 = (INT32) wsptr[4];
-    
+
     tmp0 = (z2 + z3) << CONST_BITS;
     tmp1 = (z2 - z3) << CONST_BITS;
-    
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
     tmp12 = tmp1 - tmp3;
-    
+
     /* Odd part per figure 8; the matrix is unitary and hence its
      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
      */
-    
+
     tmp0 = (INT32) wsptr[7];
     tmp1 = (INT32) wsptr[5];
     tmp2 = (INT32) wsptr[3];
     tmp3 = (INT32) wsptr[1];
-    
+
     z2 = tmp0 + tmp2;
     z3 = tmp1 + tmp3;
 
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
     z2 += z1;
     z3 += z1;
 
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
     tmp0 += z1 + z2;
     tmp3 += z1 + z3;
 
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
     tmp1 += z1 + z3;
     tmp2 += z1 + z2;
-    
+
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
-    
+
     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
 					      CONST_BITS+PASS1_BITS+3)
 			    & RANGE_MASK];
@@ -4221,7 +4249,7 @@
     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
 					      CONST_BITS+PASS1_BITS+3)
 			    & RANGE_MASK];
-    
+
     wsptr += DCTSIZE;		/* advance pointer to next row */
   }
 }
@@ -4254,6 +4282,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -4341,6 +4370,7 @@
   /* Pass 2: process 14 rows from work array, store into output array.
    * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 14; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -4437,6 +4467,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -4520,6 +4551,7 @@
   /* Pass 2: process 12 rows from work array, store into output array.
    * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 12; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -4601,6 +4633,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -4676,6 +4709,7 @@
   /* Pass 2: process 10 rows from work array, store into output array.
    * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 10; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -4750,9 +4784,11 @@
   int workspace[4*8];	/* buffers data between passes */
   SHIFT_TEMPS
 
-  /* Pass 1: process columns from input, store into work array. */
-  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
-  /* furthermore, we scale the results by 2**PASS1_BITS. */
+  /* Pass 1: process columns from input, store into work array.
+   * Note results are scaled up by sqrt(8) compared to a true IDCT;
+   * furthermore, we scale the results by 2**PASS1_BITS.
+   * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   */
 
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
@@ -4789,16 +4825,17 @@
       continue;
     }
 
-    /* Even part: reverse the even part of the forward DCT. */
-    /* The rotator is sqrt(2)*c(-6). */
+    /* Even part: reverse the even part of the forward DCT.
+     * The rotator is c(-6).
+     */
 
     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
-    
-    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);
-    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);
-    
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
+    tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
+    tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
+
     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
     z2 <<= CONST_BITS;
@@ -4808,7 +4845,7 @@
 
     tmp0 = z2 + z3;
     tmp1 = z2 - z3;
-    
+
     tmp10 = tmp0 + tmp2;
     tmp13 = tmp0 - tmp2;
     tmp11 = tmp1 + tmp3;
@@ -4826,21 +4863,21 @@
     z2 = tmp0 + tmp2;
     z3 = tmp1 + tmp3;
 
-    z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */
-    z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
-    z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+    z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
+    z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
+    z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
     z2 += z1;
     z3 += z1;
 
-    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
-    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
-    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
     tmp0 += z1 + z2;
     tmp3 += z1 + z3;
 
-    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
-    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
-    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
     tmp1 += z1 + z3;
     tmp2 += z1 + z2;
 
@@ -4861,8 +4898,10 @@
   }
 
   /* Pass 2: process 8 rows from work array, store into output array.
-   * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
+   * 4-point IDCT kernel,
+   * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 8; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -4900,7 +4939,7 @@
     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
 					      CONST_BITS+PASS1_BITS+3)
 			    & RANGE_MASK];
-    
+
     wsptr += 4;		/* advance pointer to next row */
   }
 }
@@ -4932,6 +4971,7 @@
   /* Pass 1: process columns from input, store into work array.
    * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -4974,6 +5014,7 @@
   /* Pass 2: process 6 rows from work array, store into output array.
    * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
    */
+
   wsptr = workspace;
   for (ctr = 0; ctr < 6; ctr++) {
     outptr = output_buf[ctr] + output_col;
@@ -5037,6 +5078,7 @@
    * 4-point IDCT kernel,
    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
    */
+
   inptr = coef_block;
   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
   wsptr = workspace;
@@ -5106,7 +5148,7 @@
 	       JCOEFPTR coef_block,
 	       JSAMPARRAY output_buf, JDIMENSION output_col)
 {
-  INT32 tmp0, tmp10;
+  INT32 tmp0, tmp1;
   ISLOW_MULT_TYPE * quantptr;
   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
   SHIFT_TEMPS
@@ -5117,19 +5159,19 @@
 
   /* Even part */
     
-  tmp10 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
+  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
   /* Add fudge factor here for final descale. */
-  tmp10 += ONE << 2;
+  tmp0 += ONE << 2;
 
   /* Odd part */
 
-  tmp0 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
+  tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
 
   /* Final output stage */
 
-  output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3)
+  output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3)
 					  & RANGE_MASK];
-  output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3)
+  output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3)
 					  & RANGE_MASK];
 }
 
Index: dll/3rdparty/libjpeg/jpegtran.c
===================================================================
--- dll/3rdparty/libjpeg/jpegtran.c	(revision 62563)
+++ dll/3rdparty/libjpeg/jpegtran.c	(working copy)
@@ -1,7 +1,7 @@
 /*
  * jpegtran.c
  *
- * Copyright (C) 1995-2012, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1995-2013, Thomas G. Lane, Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -66,8 +66,8 @@
   fprintf(stderr, "Switches for modifying the image:\n");
 #if TRANSFORMS_SUPPORTED
   fprintf(stderr, "  -crop WxH+X+Y  Crop to a rectangular subarea\n");
+  fprintf(stderr, "  -flip [horizontal|vertical]  Mirror image (left-right or top-bottom)\n");
   fprintf(stderr, "  -grayscale     Reduce to grayscale (omit color data)\n");
-  fprintf(stderr, "  -flip [horizontal|vertical]  Mirror image (left-right or top-bottom)\n");
   fprintf(stderr, "  -perfect       Fail if there is non-transformable edge blocks\n");
   fprintf(stderr, "  -rotate [90|180|270]         Rotate image (degrees clockwise)\n");
 #endif
@@ -76,6 +76,7 @@
   fprintf(stderr, "  -transpose     Transpose image\n");
   fprintf(stderr, "  -transverse    Transverse transpose image\n");
   fprintf(stderr, "  -trim          Drop non-transformable edge blocks\n");
+  fprintf(stderr, "  -wipe WxH+X+Y  Wipe (gray out) a rectangular subarea\n");
 #endif
   fprintf(stderr, "Switches for advanced users:\n");
 #ifdef C_ARITH_CODING_SUPPORTED
@@ -187,7 +188,8 @@
 #if TRANSFORMS_SUPPORTED
       if (++argn >= argc)	/* advance to next argument */
 	usage();
-      if (! jtransform_parse_crop_spec(&transformoption, argv[argn])) {
+      if (transformoption.crop /* reject multiple crop/wipe requests */ ||
+	  ! jtransform_parse_crop_spec(&transformoption, argv[argn])) {
 	fprintf(stderr, "%s: bogus -crop argument '%s'\n",
 		progname, argv[argn]);
 	exit(EXIT_FAILURE);
@@ -336,6 +338,21 @@
       /* Trim off any partial edge MCUs that the transform can't handle. */
       transformoption.trim = TRUE;
 
+    } else if (keymatch(arg, "wipe", 1)) {
+#if TRANSFORMS_SUPPORTED
+      if (++argn >= argc)	/* advance to next argument */
+	usage();
+      if (transformoption.crop /* reject multiple crop/wipe requests */ ||
+	  ! jtransform_parse_crop_spec(&transformoption, argv[argn])) {
+	fprintf(stderr, "%s: bogus -wipe argument '%s'\n",
+		progname, argv[argn]);
+	exit(EXIT_FAILURE);
+      }
+      select_transform(JXFORM_WIPE);
+#else
+      select_transform(JXFORM_NONE);	/* force an error */
+#endif
+
     } else {
       usage();			/* bogus switch */
     }
Index: dll/3rdparty/libjpeg/transupp.c
===================================================================
--- dll/3rdparty/libjpeg/transupp.c	(revision 62563)
+++ dll/3rdparty/libjpeg/transupp.c	(working copy)
@@ -1,7 +1,7 @@
 /*
  * transupp.c
  *
- * Copyright (C) 1997-2012, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1997-2013, Thomas G. Lane, Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -114,6 +114,116 @@
 
 
 LOCAL(void)
+do_crop_ext (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	     JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+	     jvirt_barray_ptr *src_coef_arrays,
+	     jvirt_barray_ptr *dst_coef_arrays)
+/* Crop.  This is only used when no rotate/flip is requested with the crop.
+ * Extension: If the destination size is larger than the source, we fill in
+ * the extra area with zero (neutral gray).  Note we also have to zero partial
+ * iMCUs at the right and bottom edge of the source image area in this case.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height;
+  JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_width /
+    (dstinfo->max_h_samp_factor * dstinfo->min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_height /
+    (dstinfo->max_v_samp_factor * dstinfo->min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+	 dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, dst_coef_arrays[ci], dst_blk_y,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      if (dstinfo->jpeg_height > srcinfo->output_height) {
+	if (dst_blk_y < y_crop_blocks ||
+	    dst_blk_y >= comp_height + y_crop_blocks) {
+	  for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	    FMEMZERO(dst_buffer[offset_y],
+		     compptr->width_in_blocks * SIZEOF(JBLOCK));
+	  }
+	  continue;
+	}
+	src_buffer = (*srcinfo->mem->access_virt_barray)
+	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	   dst_blk_y - y_crop_blocks,
+	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+      } else {
+	src_buffer = (*srcinfo->mem->access_virt_barray)
+	  ((j_common_ptr) srcinfo, src_coef_arrays[ci],
+	   dst_blk_y + y_crop_blocks,
+	   (JDIMENSION) compptr->v_samp_factor, FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	if (dstinfo->jpeg_width > srcinfo->output_width) {
+	  if (x_crop_blocks > 0) {
+	    FMEMZERO(dst_buffer[offset_y],
+		     x_crop_blocks * SIZEOF(JBLOCK));
+	  }
+	  jcopy_block_row(src_buffer[offset_y],
+			  dst_buffer[offset_y] + x_crop_blocks,
+			  comp_width);
+	  if (compptr->width_in_blocks > comp_width + x_crop_blocks) {
+	    FMEMZERO(dst_buffer[offset_y] +
+		       comp_width + x_crop_blocks,
+		     (compptr->width_in_blocks -
+		       comp_width - x_crop_blocks) * SIZEOF(JBLOCK));
+	  }
+	} else {
+	  jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+			  dst_buffer[offset_y],
+			  compptr->width_in_blocks);
+	}
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_wipe (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+	 JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+	 jvirt_barray_ptr *src_coef_arrays,
+	 JDIMENSION drop_width, JDIMENSION drop_height)
+/* Wipe - drop content of specified area, fill with zero (neutral gray) */
+{
+  JDIMENSION comp_width, comp_height;
+  JDIMENSION blk_y, x_wipe_blocks, y_wipe_blocks;
+  int ci, offset_y;
+  JBLOCKARRAY buffer;
+  jpeg_component_info *compptr;
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = drop_width * compptr->h_samp_factor;
+    comp_height = drop_height * compptr->v_samp_factor;
+    x_wipe_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_wipe_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (blk_y = 0; blk_y < comp_height; blk_y += compptr->v_samp_factor) {
+      buffer = (*srcinfo->mem->access_virt_barray)
+	((j_common_ptr) srcinfo, src_coef_arrays[ci], blk_y + y_wipe_blocks,
+	 (JDIMENSION) compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+	FMEMZERO(buffer[offset_y] + x_wipe_blocks,
+		 comp_width * SIZEOF(JBLOCK));
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
 do_flip_h_no_crop (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
 		   JDIMENSION x_crop_offset,
 		   jvirt_barray_ptr *src_coef_arrays)
@@ -888,7 +998,8 @@
 
   /* Determine number of components in output image */
   if (info->force_grayscale &&
-      srcinfo->jpeg_color_space == JCS_YCbCr &&
+      (srcinfo->jpeg_color_space == JCS_YCbCr ||
+       srcinfo->jpeg_color_space == JCS_BG_YCC) &&
       srcinfo->num_components == 3)
     /* We'll only process the first component */
     info->num_components = 1;
@@ -965,39 +1076,81 @@
       info->crop_xoffset = 0;	/* default to +0 */
     if (info->crop_yoffset_set == JCROP_UNSET)
       info->crop_yoffset = 0;	/* default to +0 */
-    if (info->crop_xoffset >= info->output_width ||
-	info->crop_yoffset >= info->output_height)
-      ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
-    if (info->crop_width_set == JCROP_UNSET)
+    if (info->crop_width_set == JCROP_UNSET) {
+      if (info->crop_xoffset >= info->output_width)
+	ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
       info->crop_width = info->output_width - info->crop_xoffset;
-    if (info->crop_height_set == JCROP_UNSET)
+    } else {
+      /* Check for crop extension */
+      if (info->crop_width > info->output_width) {
+	/* Crop extension does not work when transforming! */
+	if (info->transform != JXFORM_NONE ||
+	    info->crop_xoffset >= info->crop_width ||
+	    info->crop_xoffset > info->crop_width - info->output_width)
+	  ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      } else {
+	if (info->crop_xoffset >= info->output_width ||
+	    info->crop_width <= 0 ||
+	    info->crop_xoffset > info->output_width - info->crop_width)
+	  ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      }
+    }
+    if (info->crop_height_set == JCROP_UNSET) {
+      if (info->crop_yoffset >= info->output_height)
+	ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
       info->crop_height = info->output_height - info->crop_yoffset;
-    /* Ensure parameters are valid */
-    if (info->crop_width <= 0 || info->crop_width > info->output_width ||
-	info->crop_height <= 0 || info->crop_height > info->output_height ||
-	info->crop_xoffset > info->output_width - info->crop_width ||
-	info->crop_yoffset > info->output_height - info->crop_height)
-      ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+    } else {
+      /* Check for crop extension */
+      if (info->crop_height > info->output_height) {
+	/* Crop extension does not work when transforming! */
+	if (info->transform != JXFORM_NONE ||
+	    info->crop_yoffset >= info->crop_height ||
+	    info->crop_yoffset > info->crop_height - info->output_height)
+	  ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      } else {
+	if (info->crop_yoffset >= info->output_height ||
+	    info->crop_height <= 0 ||
+	    info->crop_yoffset > info->output_height - info->crop_height)
+	  ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      }
+    }
     /* Convert negative crop offsets into regular offsets */
-    if (info->crop_xoffset_set == JCROP_NEG)
+    if (info->crop_xoffset_set != JCROP_NEG)
+      xoffset = info->crop_xoffset;
+    else if (info->crop_width > info->output_width) /* crop extension */
+      xoffset = info->crop_width - info->output_width - info->crop_xoffset;
+    else
       xoffset = info->output_width - info->crop_width - info->crop_xoffset;
+    if (info->crop_yoffset_set != JCROP_NEG)
+      yoffset = info->crop_yoffset;
+    else if (info->crop_height > info->output_height) /* crop extension */
+      yoffset = info->crop_height - info->output_height - info->crop_yoffset;
     else
-      xoffset = info->crop_xoffset;
-    if (info->crop_yoffset_set == JCROP_NEG)
       yoffset = info->output_height - info->crop_height - info->crop_yoffset;
-    else
-      yoffset = info->crop_yoffset;
     /* Now adjust so that upper left corner falls at an iMCU boundary */
-    if (info->crop_width_set == JCROP_FORCE)
-      info->output_width = info->crop_width;
-    else
-      info->output_width =
-        info->crop_width + (xoffset % info->iMCU_sample_width);
-    if (info->crop_height_set == JCROP_FORCE)
-      info->output_height = info->crop_height;
-    else
-      info->output_height =
-        info->crop_height + (yoffset % info->iMCU_sample_height);
+    if (info->transform == JXFORM_WIPE) {
+      /* Ensure the effective wipe region will cover the requested */
+      info->drop_width = (JDIMENSION) jdiv_round_up
+	((long) (info->crop_width + (xoffset % info->iMCU_sample_width)),
+	 (long) info->iMCU_sample_width);
+      info->drop_height = (JDIMENSION) jdiv_round_up
+	((long) (info->crop_height + (yoffset % info->iMCU_sample_height)),
+	 (long) info->iMCU_sample_height);
+    } else {
+      /* Ensure the effective crop region will cover the requested */
+      if (info->crop_width_set == JCROP_FORCE ||
+	  info->crop_width > info->output_width)
+	info->output_width = info->crop_width;
+      else
+	info->output_width =
+	  info->crop_width + (xoffset % info->iMCU_sample_width);
+      if (info->crop_height_set == JCROP_FORCE ||
+	  info->crop_height > info->output_height)
+	info->output_height = info->crop_height;
+      else
+	info->output_height =
+	  info->crop_height + (yoffset % info->iMCU_sample_height);
+    }
     /* Save x/y offsets measured in iMCUs */
     info->x_crop_offset = xoffset / info->iMCU_sample_width;
     info->y_crop_offset = yoffset / info->iMCU_sample_height;
@@ -1013,7 +1166,9 @@
   transpose_it = FALSE;
   switch (info->transform) {
   case JXFORM_NONE:
-    if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
+    if (info->x_crop_offset != 0 || info->y_crop_offset != 0 ||
+	info->output_width > srcinfo->output_width ||
+	info->output_height > srcinfo->output_height)
       need_workspace = TRUE;
     /* No workspace needed if neither cropping nor transforming */
     break;
@@ -1067,6 +1222,8 @@
     need_workspace = TRUE;
     transpose_it = TRUE;
     break;
+  case JXFORM_WIPE:
+    break;
   }
 
   /* Allocate workspace if needed.
@@ -1327,12 +1484,13 @@
 {
   /* If force-to-grayscale is requested, adjust destination parameters */
   if (info->force_grayscale) {
-    /* First, ensure we have YCbCr or grayscale data, and that the source's
+    /* First, ensure we have YCC or grayscale data, and that the source's
      * Y channel is full resolution.  (No reasonable person would make Y
      * be less than full resolution, so actually coping with that case
      * isn't worth extra code space.  But we check it to avoid crashing.)
      */
-    if (((dstinfo->jpeg_color_space == JCS_YCbCr &&
+    if ((((dstinfo->jpeg_color_space == JCS_YCbCr ||
+	   dstinfo->jpeg_color_space == JCS_BG_YCC) &&
 	  dstinfo->num_components == 3) ||
 	 (dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
 	  dstinfo->num_components == 1)) &&
@@ -1427,7 +1585,11 @@
    */
   switch (info->transform) {
   case JXFORM_NONE:
-    if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
+    if (info->output_width > srcinfo->output_width ||
+	info->output_height > srcinfo->output_height)
+      do_crop_ext(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+		  src_coef_arrays, dst_coef_arrays);
+    else if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
       do_crop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
 	      src_coef_arrays, dst_coef_arrays);
     break;
@@ -1463,6 +1625,10 @@
     do_rot_270(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
 	       src_coef_arrays, dst_coef_arrays);
     break;
+  case JXFORM_WIPE:
+    do_wipe(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+	    src_coef_arrays, info->drop_width, info->drop_height);
+    break;
   }
 }
 
Index: include/reactos/libs/libjpeg/jmorecfg.h
===================================================================
--- include/reactos/libs/libjpeg/jmorecfg.h	(revision 62563)
+++ include/reactos/libs/libjpeg/jmorecfg.h	(working copy)
@@ -2,7 +2,7 @@
  * jmorecfg.h
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 1997-2012 by Guido Vollbeding.
+ * Modified 1997-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -15,13 +15,22 @@
 /*
  * Define BITS_IN_JSAMPLE as either
  *   8   for 8-bit sample values (the usual setting)
+ *   9   for 9-bit sample values
+ *   10  for 10-bit sample values
+ *   11  for 11-bit sample values
  *   12  for 12-bit sample values
- * Only 8 and 12 are legal data precisions for lossy JPEG according to the
- * JPEG standard, and the IJG code does not support anything else!
- * We do not support run-time selection of data precision, sorry.
+ * Only 8, 9, 10, 11, and 12 bits sample data precision are supported for
+ * full-feature DCT processing.  Further depths up to 16-bit may be added
+ * later for the lossless modes of operation.
+ * Run-time selection and conversion of data precision will be added later
+ * and are currently not supported, sorry.
+ * Exception:  The transcoding part (jpegtran) supports all settings in a
+ * single instance, since it operates on the level of DCT coefficients and
+ * not sample values.  The DCT coefficients are of the same type (16 bits)
+ * in all cases (see below).
  */
 
-#define BITS_IN_JSAMPLE  8	/* use 8 or 12 */
+#define BITS_IN_JSAMPLE  8	/* use 8, 9, 10, 11, or 12 */
 
 
 /*
@@ -77,6 +86,48 @@
 #endif /* BITS_IN_JSAMPLE == 8 */
 
 
+#if BITS_IN_JSAMPLE == 9
+/* JSAMPLE should be the smallest type that will hold the values 0..511.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	511
+#define CENTERJSAMPLE	256
+
+#endif /* BITS_IN_JSAMPLE == 9 */
+
+
+#if BITS_IN_JSAMPLE == 10
+/* JSAMPLE should be the smallest type that will hold the values 0..1023.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	1023
+#define CENTERJSAMPLE	512
+
+#endif /* BITS_IN_JSAMPLE == 10 */
+
+
+#if BITS_IN_JSAMPLE == 11
+/* JSAMPLE should be the smallest type that will hold the values 0..2047.
+ * On nearly all machines "short" will do nicely.
+ */
+
+typedef short JSAMPLE;
+#define GETJSAMPLE(value)  ((int) (value))
+
+#define MAXJSAMPLE	2047
+#define CENTERJSAMPLE	1024
+
+#endif /* BITS_IN_JSAMPLE == 11 */
+
+
 #if BITS_IN_JSAMPLE == 12
 /* JSAMPLE should be the smallest type that will hold the values 0..4095.
  * On nearly all machines "short" will do nicely.
@@ -252,7 +303,10 @@
  * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
  */
 
-#ifdef HAVE_BOOLEAN
+#ifndef HAVE_BOOLEAN
+#if defined FALSE || defined TRUE || defined QGLOBAL_H
+/* Qt3 defines FALSE and TRUE as "const" variables in qglobal.h */
+typedef int boolean;
 #ifndef FALSE			/* in case these macros already exist */
 #define FALSE	0		/* values of boolean */
 #endif
@@ -262,6 +316,7 @@
 #else
 typedef enum { FALSE = 0, TRUE = 1 } boolean;
 #endif
+#endif
 
 
 /*
@@ -299,11 +354,12 @@
 #define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
 #define DCT_SCALING_SUPPORTED	    /* Input rescaling via DCT? (Requires DCT_ISLOW)*/
 #define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
-/* Note: if you selected 12-bit data precision, it is dangerous to turn off
- * ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only good for 8-bit
- * precision, so jchuff.c normally uses entropy optimization to compute
- * usable tables for higher precision.  If you don't want to do optimization,
- * you'll have to supply different default Huffman tables.
+/* Note: if you selected more than 8-bit data precision, it is dangerous to
+ * turn off ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only
+ * good for 8-bit precision, so arithmetic coding is recommended for higher
+ * precision.  The Huffman encoder normally uses entropy optimization to
+ * compute usable tables for higher precision.  Otherwise, you'll have to
+ * supply different default Huffman tables.
  * The exact same statements apply for progressive JPEG: the default tables
  * don't work for progressive mode.  (This may get fixed, however.)
  */
@@ -314,7 +370,7 @@
 #define D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
 #define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
 #define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
-#define IDCT_SCALING_SUPPORTED	    /* Output rescaling via IDCT? */
+#define IDCT_SCALING_SUPPORTED	    /* Output rescaling via IDCT? (Requires DCT_ISLOW)*/
 #define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
 #define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
 #undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
Index: include/reactos/libs/libjpeg/jpegint.h
===================================================================
--- include/reactos/libs/libjpeg/jpegint.h	(revision 62563)
+++ include/reactos/libs/libjpeg/jpegint.h	(working copy)
@@ -2,7 +2,7 @@
  * jpegint.h
  *
  * Copyright (C) 1991-1997, Thomas G. Lane.
- * Modified 1997-2011 by Guido Vollbeding.
+ * Modified 1997-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -211,8 +211,8 @@
 /* Entropy decoding */
 struct jpeg_entropy_decoder {
   JMETHOD(void, start_pass, (j_decompress_ptr cinfo));
-  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo,
-				JBLOCKROW *MCU_data));
+  JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo, JBLOCKROW *MCU_data));
+  JMETHOD(void, finish_pass, (j_decompress_ptr cinfo));
 };
 
 /* Inverse DCT (also performs dequantization) */
Index: include/reactos/libs/libjpeg/jpeglib.h
===================================================================
--- include/reactos/libs/libjpeg/jpeglib.h	(revision 62563)
+++ include/reactos/libs/libjpeg/jpeglib.h	(working copy)
@@ -2,7 +2,7 @@
  * jpeglib.h
  *
  * Copyright (C) 1991-1998, Thomas G. Lane.
- * Modified 2002-2012 by Guido Vollbeding.
+ * Modified 2002-2013 by Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -39,12 +39,12 @@
 
 #define JPEG_LIB_VERSION        90	/* Compatibility version 9.0 */
 #define JPEG_LIB_VERSION_MAJOR  9
-#define JPEG_LIB_VERSION_MINOR  0
+#define JPEG_LIB_VERSION_MINOR  1
 
 
 /* Various constants determining the sizes of things.
- * All of these are specified by the JPEG standard, so don't change them
- * if you want to be compatible.
+ * All of these are specified by the JPEG standard,
+ * so don't change them if you want to be compatible.
  */
 
 #define DCTSIZE		    8	/* The basic DCT block is 8x8 coefficients */
@@ -157,16 +157,21 @@
   /* The downsampled dimensions are the component's actual, unpadded number
    * of samples at the main buffer (preprocessing/compression interface);
    * DCT scaling is included, so
-   * downsampled_width = ceil(image_width * Hi/Hmax * DCT_h_scaled_size/DCTSIZE)
+   * downsampled_width =
+   *   ceil(image_width * Hi/Hmax * DCT_h_scaled_size/block_size)
    * and similarly for height.
    */
   JDIMENSION downsampled_width;	 /* actual width in samples */
   JDIMENSION downsampled_height; /* actual height in samples */
-  /* This flag is used only for decompression.  In cases where some of the
-   * components will be ignored (eg grayscale output from YCbCr image),
-   * we can skip most computations for the unused components.
+  /* For decompression, in cases where some of the components will be
+   * ignored (eg grayscale output from YCbCr image), we can skip most
+   * computations for the unused components.
+   * For compression, some of the components will need further quantization
+   * scale by factor of 2 after DCT (eg BG_YCC output from normal RGB input).
+   * The field is first set TRUE for decompression, FALSE for compression
+   * in initial_setup, and then adapted in color conversion setup.
    */
-  boolean component_needed;	/* do we need the value of this component? */
+  boolean component_needed;
 
   /* These values are computed before starting a scan of the component. */
   /* The decompressor output side may not use these variables. */
@@ -215,10 +220,12 @@
 typedef enum {
 	JCS_UNKNOWN,		/* error/unspecified */
 	JCS_GRAYSCALE,		/* monochrome */
-	JCS_RGB,		/* red/green/blue */
-	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV) */
+	JCS_RGB,		/* red/green/blue, standard RGB (sRGB) */
+	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV), standard YCC */
 	JCS_CMYK,		/* C/M/Y/K */
-	JCS_YCCK		/* Y/Cb/Cr/K */
+	JCS_YCCK,		/* Y/Cb/Cr/K */
+	JCS_BG_RGB,		/* big gamut red/green/blue, bg-sRGB */
+	JCS_BG_YCC		/* big gamut Y/Cb/Cr, bg-sYCC */
 } J_COLOR_SPACE;
 
 /* Supported color transforms. */
Index: include/reactos/libs/libjpeg/jversion.h
===================================================================
--- include/reactos/libs/libjpeg/jversion.h	(revision 62563)
+++ include/reactos/libs/libjpeg/jversion.h	(working copy)
@@ -1,7 +1,7 @@
 /*
  * jversion.h
  *
- * Copyright (C) 1991-2013, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1991-2014, Thomas G. Lane, Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -9,6 +9,6 @@
  */
 
 
-#define JVERSION	"9  13-Jan-2013"
+#define JVERSION	"9a  19-Jan-2014"
 
-#define JCOPYRIGHT	"Copyright (C) 2013, Thomas G. Lane, Guido Vollbeding"
+#define JCOPYRIGHT	"Copyright (C) 2014, Thomas G. Lane, Guido Vollbeding"
Index: include/reactos/libs/libjpeg/transupp.h
===================================================================
--- include/reactos/libs/libjpeg/transupp.h	(revision 62563)
+++ include/reactos/libs/libjpeg/transupp.h	(working copy)
@@ -1,7 +1,7 @@
 /*
  * transupp.h
  *
- * Copyright (C) 1997-2011, Thomas G. Lane, Guido Vollbeding.
+ * Copyright (C) 1997-2013, Thomas G. Lane, Guido Vollbeding.
  * This file is part of the Independent JPEG Group's software.
  * For conditions of distribution and use, see the accompanying README file.
  *
@@ -51,14 +51,17 @@
  *
  * We also offer a lossless-crop option, which discards data outside a given
  * image region but losslessly preserves what is inside.  Like the rotate and
- * flip transforms, lossless crop is restricted by the JPEG format: the upper
- * left corner of the selected region must fall on an iMCU boundary.  If this
- * does not hold for the given crop parameters, we silently move the upper left
- * corner up and/or left to make it so, simultaneously increasing the region
- * dimensions to keep the lower right crop corner unchanged.  (Thus, the
+ * flip transforms, lossless crop is restricted by the current JPEG format: the
+ * upper left corner of the selected region must fall on an iMCU boundary.  If
+ * this does not hold for the given crop parameters, we silently move the upper
+ * left corner up and/or left to make it so, simultaneously increasing the
+ * region dimensions to keep the lower right crop corner unchanged.  (Thus, the
  * output image covers at least the requested region, but may cover more.)
  * The adjustment of the region dimensions may be optionally disabled.
  *
+ * A complementary lossless-wipe option is provided to discard (gray out) data
+ * inside a given image region while losslessly preserving what is outside.
+ *
  * We also provide a lossless-resize option, which is kind of a lossless-crop
  * operation in the DCT coefficient block domain - it discards higher-order
  * coefficients and losslessly preserves lower-order coefficients of a
@@ -102,7 +105,8 @@
 	JXFORM_TRANSVERSE,	/* transpose across UR-to-LL axis */
 	JXFORM_ROT_90,		/* 90-degree clockwise rotation */
 	JXFORM_ROT_180,		/* 180-degree rotation */
-	JXFORM_ROT_270		/* 270-degree clockwise (or 90 ccw) */
+	JXFORM_ROT_270,		/* 270-degree clockwise (or 90 ccw) */
+	JXFORM_WIPE		/* wipe */
 } JXFORM_CODE;
 
 /*
@@ -130,7 +134,7 @@
   boolean perfect;		/* if TRUE, fail if partial MCUs are requested */
   boolean trim;			/* if TRUE, trim partial MCUs as needed */
   boolean force_grayscale;	/* if TRUE, convert color image to grayscale */
-  boolean crop;			/* if TRUE, crop source image */
+  boolean crop;			/* if TRUE, crop or wipe source image */
 
   /* Crop parameters: application need not set these unless crop is TRUE.
    * These can be filled in by jtransform_parse_crop_spec().
@@ -151,6 +155,8 @@
   JDIMENSION output_height;
   JDIMENSION x_crop_offset;	/* destination crop offsets measured in iMCUs */
   JDIMENSION y_crop_offset;
+  JDIMENSION drop_width;	/* drop/wipe dimensions measured in iMCUs */
+  JDIMENSION drop_height;
   int iMCU_sample_width;	/* destination iMCU size */
   int iMCU_sample_height;
 } jpeg_transform_info;