From c37dca1068e019aa975c0fae0fa8a0f8525e2226 Mon Sep 17 00:00:00 2001
From: Pavel Krajcevski <pavel@cs.unc.edu>
Date: Tue, 21 Jan 2014 16:23:18 -0500
Subject: [PATCH] Split calculation of compression parameters from packing
 them.

---
 BPTCEncoder/src/CompressionMode.h |  40 ++++-
 BPTCEncoder/src/Compressor.cpp    | 249 +++++++++++++++---------------
 BPTCEncoder/src/RGBAEndpoints.cpp |   4 +-
 BPTCEncoder/src/RGBAEndpoints.h   |   8 +-
 4 files changed, 166 insertions(+), 135 deletions(-)

diff --git a/BPTCEncoder/src/CompressionMode.h b/BPTCEncoder/src/CompressionMode.h
index e38a88e..8b6f9fe 100755
--- a/BPTCEncoder/src/CompressionMode.h
+++ b/BPTCEncoder/src/CompressionMode.h
@@ -113,6 +113,30 @@ class CompressionMode {
   { }
   ~CompressionMode() { }
 
+  // These are all of the parameters required to define the data in a compressed
+  // BPTC block. The mode determines how these parameters will be translated
+  // into actual bits.
+  struct Params {
+    const uint16 m_ShapeIdx;
+    RGBAVector m_P1[kMaxNumSubsets], m_P2[kMaxNumSubsets];
+    uint8 m_Indices[kMaxNumSubsets][kMaxNumDataPoints];
+    uint8 m_AlphaIndices[kMaxNumDataPoints];
+    uint8 m_PbitCombo[kMaxNumSubsets];
+    int8 m_RotationMode, m_IndexMode;
+    explicit Params(uint32 shape)
+      : m_RotationMode(-1), m_IndexMode(-1), m_ShapeIdx(shape) {
+      memset(m_Indices, 0xFF, sizeof(m_Indices));
+      memset(m_AlphaIndices, 0xFF, sizeof(m_AlphaIndices));
+      memset(m_PbitCombo, 0xFF, sizeof(m_PbitCombo));
+    }
+  };
+
+  // This outputs the parameters to the given bitstream based on the current
+  // compression mode. The first argument is not const because the mode and
+  // the value of the first index determines whether or not the indices need to
+  // be swapped. The final output bits will always be a valid BPTC block. 
+  void Pack(Params &params, FasTC::BitStream &stream) const;
+
   // This function compresses a group of clusters into the passed bitstream. The
   // size of the clusters array is determined by the BC7 compression mode.
   double Compress(FasTC::BitStream &stream,
@@ -176,7 +200,7 @@ class CompressionMode {
   }
   int GetNumberOfSubsets() const { return m_Attributes->numSubsets; }
 
-  int GetNumberOfBitsPerIndex(int indexMode = -1) const {
+  int GetNumberOfBitsPerIndex(int8 indexMode = -1) const {
     if(indexMode < 0) indexMode = m_IndexMode;
     if(indexMode == 0)
       return m_Attributes->numBitsPerIndex;
@@ -184,7 +208,7 @@ class CompressionMode {
       return m_Attributes->numBitsPerAlpha;
   }
 
-  int GetNumberOfBitsPerAlpha(int indexMode = -1) const {
+  int GetNumberOfBitsPerAlpha(int8 indexMode = -1) const {
     if(indexMode < 0) indexMode = m_IndexMode;
     if(indexMode == 0)
       return m_Attributes->numBitsPerAlpha;
@@ -261,8 +285,8 @@ class CompressionMode {
   double OptimizeEndpointsForCluster(
     const RGBACluster &cluster,
     RGBAVector &p1, RGBAVector &p2,
-    int *bestIndices,
-    int &bestPbitCombo
+    uint8 *bestIndices,
+    uint8 &bestPbitCombo
   ) const;
 
   // This function performs the heuristic to choose the "best" neighboring
@@ -290,26 +314,26 @@ class CompressionMode {
   // then we choose the best p-bit combo and return it as well.
   double CompressSingleColor(const RGBAVector &p,
                              RGBAVector &p1, RGBAVector &p2,
-                             int &bestPbitCombo) const;
+                             uint8 &bestPbitCombo) const;
 
   // Compress the cluster using a generalized cluster fit. This figures out the
   // proper endpoints assuming that we have no alpha.
   double CompressCluster(const RGBACluster &cluster,
                          RGBAVector &p1, RGBAVector &p2,
-                         int *bestIndices, int &bestPbitCombo) const;
+                         uint8 *bestIndices, uint8 &bestPbitCombo) const;
 
   // Compress the non-opaque cluster using a generalized cluster fit, and place
   // the endpoints within p1 and p2. The color indices and alpha indices are
   // computed as well.
   double CompressCluster(const RGBACluster &cluster,
                          RGBAVector &p1, RGBAVector &p2,
-                         int *bestIndices, int *alphaIndices) const;
+                         uint8 *bestIndices, uint8 *alphaIndices) const;
 
   // This function takes two endpoints in the continuous domain (as floats) and
   // clamps them to the nearest grid points based on the compression mode (and
   // possible pbit values)
   void ClampEndpointsToGrid(RGBAVector &p1, RGBAVector &p2,
-                            int &bestPBitCombo) const;
+                            uint8 &bestPBitCombo) const;
 };
 
 extern const uint32 kInterpolationValues[4][16][2];
diff --git a/BPTCEncoder/src/Compressor.cpp b/BPTCEncoder/src/Compressor.cpp
index f85f69c..1c77c4d 100755
--- a/BPTCEncoder/src/Compressor.cpp
+++ b/BPTCEncoder/src/Compressor.cpp
@@ -356,7 +356,7 @@ CompressionMode::kModeAttributes[kNumModes] = {
 };
 
 void CompressionMode::ClampEndpointsToGrid(
-  RGBAVector &p1, RGBAVector &p2, int &bestPBitCombo
+  RGBAVector &p1, RGBAVector &p2, uint8 &bestPBitCombo
 ) const {
   const int nPbitCombos = GetNumPbitCombos();
   const bool hasPbits = nPbitCombos > 1;
@@ -397,11 +397,10 @@ void CompressionMode::ClampEndpointsToGrid(
 
 double CompressionMode::CompressSingleColor(
   const RGBAVector &p, RGBAVector &p1, RGBAVector &p2,
-  int &bestPbitCombo
+  uint8 &bestPbitCombo
 ) const {
   const uint32 pixel = p.ToPixel();
   float bestError = FLT_MAX;
-  bestPbitCombo = -1;
 
   for(int pbi = 0; pbi < GetNumPbitCombos(); pbi++) {
     const int *pbitCombo = GetPBitCombo(pbi);
@@ -453,7 +452,7 @@ double CompressionMode::CompressSingleColor(
         possValsL[i] |= (possValsL[i] >> nBits);
       }
 
-      const uint32 bpi = GetNumberOfBitsPerIndex() - 1;
+      const uint8 bpi = GetNumberOfBitsPerIndex() - 1;
       const uint32 interpVal0 = kInterpolationValues[bpi][1][0];
       const uint32 interpVal1 = kInterpolationValues[bpi][1][1];
 
@@ -685,8 +684,8 @@ bool CompressionMode::AcceptNewEndpointError(
 double CompressionMode::OptimizeEndpointsForCluster(
   const RGBACluster &cluster,
   RGBAVector &p1, RGBAVector &p2,
-  int *bestIndices,
-  int &bestPbitCombo
+  uint8 *bestIndices,
+  uint8 &bestPbitCombo
 ) const {
 
   const uint32 nBuckets = (1 << GetNumberOfBitsPerIndex());
@@ -731,7 +730,7 @@ double CompressionMode::OptimizeEndpointsForCluster(
 
     float temp = static_cast<float>(energy) / static_cast<float>(maxEnergy-1);
 
-    int indices[kMaxNumDataPoints];
+    uint8 indices[kMaxNumDataPoints];
     RGBAVector np1, np2;
     int nPbitCombo = 0;
 
@@ -779,8 +778,8 @@ double CompressionMode::OptimizeEndpointsForCluster(
 double CompressionMode::CompressCluster(
   const RGBACluster &cluster,
   RGBAVector &p1, RGBAVector &p2,
-  int *bestIndices,
-  int *alphaIndices
+  uint8 *bestIndices,
+  uint8 *alphaIndices
 ) const {
   assert(GetModeNumber() == 4 || GetModeNumber() == 5);
   assert(GetNumberOfSubsets() == 1);
@@ -796,7 +795,7 @@ double CompressionMode::CompressCluster(
             "detected much earlier.");
 
     const RGBAVector &p = cluster.GetPoint(0);
-    int dummyPbit = 0;
+    uint8 dummyPbit = 0;
     double bestErr = CompressSingleColor(p, p1, p2, dummyPbit);
 
     // We're assuming all indices will be index 1...
@@ -843,7 +842,7 @@ double CompressionMode::CompressCluster(
     rgbCluster.AddPoint(v);
   }
 
-  int dummyPbit = 0;
+  uint8 dummyPbit = 0;
   RGBAVector rgbp1, rgbp2;
   double rgbError = CompressCluster(
     rgbCluster, rgbp1, rgbp2, bestIndices, dummyPbit
@@ -1070,8 +1069,8 @@ double CompressionMode::CompressCluster(
 double CompressionMode::CompressCluster(
   const RGBACluster &cluster,
   RGBAVector &p1, RGBAVector &p2,
-  int *bestIndices,
-  int &bestPbitCombo
+  uint8 *bestIndices,
+  uint8 &bestPbitCombo
 ) const {
   // If all the points are the same in the cluster, then we need to figure out
   // what the best approximation to this point is....
@@ -1233,7 +1232,7 @@ double CompressionMode::CompressCluster(
   ClampEndpointsToGrid(p1, p2, bestPbitCombo);
 
   #ifdef _DEBUG
-    int pBitCombo = bestPbitCombo;
+    uint8 pBitCombo = bestPbitCombo;
     RGBAVector tp1 = p1, tp2 = p2;
     ClampEndpointsToGrid(tp1, tp2, pBitCombo);
 
@@ -1249,99 +1248,29 @@ double CompressionMode::CompressCluster(
   );
 }
 
-double CompressionMode::Compress(
-  BitStream &stream, const int shapeIdx, const RGBACluster *clusters
-) {
-
+void CompressionMode::Pack(Params &params, BitStream &stream) const {
+  
   const int kModeNumber = GetModeNumber();
   const int nPartitionBits = GetNumberOfPartitionBits();
   const int nSubsets = GetNumberOfSubsets();
 
+  
   // Mode #
   stream.WriteBits(1 << kModeNumber, kModeNumber + 1);
 
   // Partition #
-  assert((((1 << nPartitionBits) - 1) & shapeIdx) == shapeIdx);
-  stream.WriteBits(shapeIdx, nPartitionBits);
+  assert((((1 << nPartitionBits) - 1) & params.m_ShapeIdx) == params.m_ShapeIdx);
+  stream.WriteBits(params.m_ShapeIdx, nPartitionBits);
 
-  RGBAVector p1[kMaxNumSubsets], p2[kMaxNumSubsets];
-
-  int bestIndices[kMaxNumSubsets][kMaxNumDataPoints];
-  memset(bestIndices, 0xFF, sizeof(bestIndices));
-
-  int bestAlphaIndices[kMaxNumDataPoints];
-  memset(bestAlphaIndices, 0xFF, sizeof(bestAlphaIndices));
-
-  int bestPbitCombo[kMaxNumSubsets] = { -1, -1, -1 };
-  int bestRotationMode = -1, bestIndexMode = -1;
-
-  double totalErr = 0.0;
-  for(int cidx = 0; cidx < nSubsets; cidx++) {
-    int indices[kMaxNumDataPoints] = {0};
-
-    if(m_Attributes->hasRotation) {
-
-      assert(nSubsets == 1);
-
-      int alphaIndices[kMaxNumDataPoints];
-
-      double bestError = DBL_MAX;
-      for(int rotMode = 0; rotMode < 4; rotMode++) {
-
-        SetRotationMode(rotMode);
-        const int nIdxModes = kModeNumber == 4? 2 : 1;
-
-        for(int idxMode = 0; idxMode < nIdxModes; idxMode++) {
-
-          SetIndexMode(idxMode);
-
-          RGBAVector v1, v2;
-          double error = CompressCluster(
-            clusters[cidx], v1, v2, indices, alphaIndices
-          );
-
-          if(error < bestError) {
-            bestError = error;
-
-            memcpy(bestIndices[cidx], indices, sizeof(indices));
-            memcpy(bestAlphaIndices, alphaIndices, sizeof(alphaIndices));
-
-            bestRotationMode = rotMode;
-            bestIndexMode = idxMode;
-
-            p1[cidx] = v1;
-            p2[cidx] = v2;
-          }
-        }
-      }
-
-      totalErr += bestError;
-    } else {  // ! m_Attributes->hasRotation
-      // Compress this cluster
-      totalErr += CompressCluster(
-        clusters[cidx], p1[cidx], p2[cidx], indices, bestPbitCombo[cidx]
-      );
-
-      // Map the indices to their proper position.
-      int idx = 0;
-      for(int i = 0; i < 16; i++) {
-        int subs = GetSubsetForIndex(i, shapeIdx, GetNumberOfSubsets());
-        if(subs == cidx) {
-          bestIndices[cidx][i] = indices[idx++];
-        }
-      }
-    }
-  }
-
-  stream.WriteBits(bestRotationMode, m_Attributes->hasRotation? 2 : 0);
-  stream.WriteBits(bestIndexMode, m_Attributes->hasIdxMode? 1 : 0);
+  stream.WriteBits(params.m_RotationMode, m_Attributes->hasRotation? 2 : 0);
+  stream.WriteBits(params.m_IndexMode, m_Attributes->hasIdxMode? 1 : 0);
 
 #ifdef _DEBUG
   for(int i = 0; i < kMaxNumDataPoints; i++) {
 
     int nSet = 0;
     for(int j = 0; j < nSubsets; j++) {
-      if(bestIndices[j][i] >= 0)
+      if(params.m_Indices[j][i] < 255)
         nSet++;
     }
 
@@ -1358,14 +1287,14 @@ double CompressionMode::Compress(
     switch(GetPBitType()) {
       default:
       case ePBitType_None:
-        pixel1[i] = p1[i].ToPixel(qmask);
-        pixel2[i] = p2[i].ToPixel(qmask);
+        pixel1[i] = params.m_P1[i].ToPixel(qmask);
+        pixel2[i] = params.m_P2[i].ToPixel(qmask);
       break;
 
       case ePBitType_Shared:
       case ePBitType_NotShared:
-        pixel1[i] = p1[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[0]);
-        pixel2[i] = p2[i].ToPixel(qmask, GetPBitCombo(bestPbitCombo[i])[1]);
+        pixel1[i] = params.m_P1[i].ToPixel(qmask, GetPBitCombo(params.m_PbitCombo[i])[0]);
+        pixel2[i] = params.m_P2[i].ToPixel(qmask, GetPBitCombo(params.m_PbitCombo[i])[1]);
       break;
     }
   }
@@ -1374,28 +1303,28 @@ double CompressionMode::Compress(
   // we need to swap EVERYTHING.
   for(int sidx = 0; sidx < nSubsets; sidx++) {
 
-    int anchorIdx = GetAnchorIndexForSubset(sidx, shapeIdx, nSubsets);
-    assert(bestIndices[sidx][anchorIdx] != -1);
+    int anchorIdx = GetAnchorIndexForSubset(sidx, params.m_ShapeIdx, nSubsets);
+    assert(params.m_Indices[sidx][anchorIdx] != 255);
 
-    const int nAlphaIndexBits = GetNumberOfBitsPerAlpha(bestIndexMode);
-    const int nIndexBits = GetNumberOfBitsPerIndex(bestIndexMode);
-    if(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1)) {
-      uint32 t = pixel1[sidx]; pixel1[sidx] = pixel2[sidx]; pixel2[sidx] = t;
+    const int nAlphaIndexBits = GetNumberOfBitsPerAlpha(params.m_IndexMode);
+    const int nIndexBits = GetNumberOfBitsPerIndex(params.m_IndexMode);
+    if(params.m_Indices[sidx][anchorIdx] >> (nIndexBits - 1)) {
+      std::swap(pixel1[sidx], pixel2[sidx]);
 
       int nIndexVals = 1 << nIndexBits;
       for(int i = 0; i < 16; i++) {
-        bestIndices[sidx][i] = (nIndexVals - 1) - bestIndices[sidx][i];
+        params.m_Indices[sidx][i] = (nIndexVals - 1) - params.m_Indices[sidx][i];
       }
 
       int nAlphaIndexVals = 1 << nAlphaIndexBits;
       if(m_Attributes->hasRotation) {
         for(int i = 0; i < 16; i++) {
-          bestAlphaIndices[i] = (nAlphaIndexVals - 1) - bestAlphaIndices[i];
+          params.m_AlphaIndices[i] = (nAlphaIndexVals - 1) - params.m_AlphaIndices[i];
         }
       }
     }
 
-    const bool rotated = (bestAlphaIndices[anchorIdx] >> (nAlphaIndexBits - 1)) > 0;
+    const bool rotated = (params.m_AlphaIndices[anchorIdx] >> (nAlphaIndexBits - 1)) > 0;
     if(m_Attributes->hasRotation && rotated) {
       uint8 * bp1 = reinterpret_cast<uint8 *>(&pixel1[sidx]);
       uint8 * bp2 = reinterpret_cast<uint8 *>(&pixel2[sidx]);
@@ -1403,13 +1332,13 @@ double CompressionMode::Compress(
 
       int nAlphaIndexVals = 1 << nAlphaIndexBits;
       for(int i = 0; i < 16; i++) {
-        bestAlphaIndices[i] = (nAlphaIndexVals - 1) - bestAlphaIndices[i];
+        params.m_AlphaIndices[i] = (nAlphaIndexVals - 1) - params.m_AlphaIndices[i];
       }
     }
 
-    assert(!(bestIndices[sidx][anchorIdx] >> (nIndexBits - 1)));
+    assert(!(params.m_Indices[sidx][anchorIdx] >> (nIndexBits - 1)));
     assert(!m_Attributes->hasRotation ||
-           !(bestAlphaIndices[anchorIdx] >> (nAlphaIndexBits - 1)));
+           !(params.m_AlphaIndices[anchorIdx] >> (nAlphaIndexBits - 1)));
   }
 
   // Get the quantized values...
@@ -1459,7 +1388,7 @@ double CompressionMode::Compress(
   // Write out the best pbits..
   if(GetPBitType() != ePBitType_None) {
     for(int s = 0; s < nSubsets; s++) {
-      const int *pbits = GetPBitCombo(bestPbitCombo[s]);
+      const int *pbits = GetPBitCombo(params.m_PbitCombo[s]);
       stream.WriteBits(pbits[0], 1);
       if(GetPBitType() != ePBitType_Shared)
         stream.WriteBits(pbits[1], 1);
@@ -1468,14 +1397,14 @@ double CompressionMode::Compress(
 
   // If our index mode has changed, then we need to write the alpha indices
   // first.
-  if(m_Attributes->hasIdxMode && bestIndexMode == 1) {
+  if(m_Attributes->hasIdxMode && params.m_IndexMode == 1) {
 
     assert(m_Attributes->hasRotation);
 
     for(int i = 0; i < 16; i++) {
-      const int idx = bestAlphaIndices[i];
-      assert(GetAnchorIndexForSubset(0, shapeIdx, nSubsets) == 0);
-      assert(GetNumberOfBitsPerAlpha(bestIndexMode) == 2);
+      const int idx = params.m_AlphaIndices[i];
+      assert(GetAnchorIndexForSubset(0, params.m_ShapeIdx, nSubsets) == 0);
+      assert(GetNumberOfBitsPerAlpha(params.m_IndexMode) == 2);
       assert(idx >= 0 && idx < (1 << 2));
       assert(i != 0 ||
              !(idx >> 1) ||
@@ -1484,10 +1413,10 @@ double CompressionMode::Compress(
     }
 
     for(int i = 0; i < 16; i++) {
-      const int idx = bestIndices[0][i];
-      assert(GetSubsetForIndex(i, shapeIdx, nSubsets) == 0);
-      assert(GetAnchorIndexForSubset(0, shapeIdx, nSubsets) == 0);
-      assert(GetNumberOfBitsPerIndex(bestIndexMode) == 3);
+      const int idx = params.m_Indices[0][i];
+      assert(GetSubsetForIndex(i, params.m_ShapeIdx, nSubsets) == 0);
+      assert(GetAnchorIndexForSubset(0, params.m_ShapeIdx, nSubsets) == 0);
+      assert(GetNumberOfBitsPerIndex(params.m_IndexMode) == 3);
       assert(idx >= 0 && idx < (1 << 3));
       assert(i != 0 ||
              !(idx >> 2) ||
@@ -1496,10 +1425,10 @@ double CompressionMode::Compress(
     }
   } else {
     for(int i = 0; i < 16; i++) {
-      const int subs = GetSubsetForIndex(i, shapeIdx, nSubsets);
-      const int idx = bestIndices[subs][i];
-      const int anchorIdx = GetAnchorIndexForSubset(subs, shapeIdx, nSubsets);
-      const int nBitsForIdx = GetNumberOfBitsPerIndex(bestIndexMode);
+      const int subs = GetSubsetForIndex(i, params.m_ShapeIdx, nSubsets);
+      const int idx = params.m_Indices[subs][i];
+      const int anchorIdx = GetAnchorIndexForSubset(subs, params.m_ShapeIdx, nSubsets);
+      const int nBitsForIdx = GetNumberOfBitsPerIndex(params.m_IndexMode);
       assert(idx >= 0 && idx < (1 << nBitsForIdx));
       assert(i != anchorIdx ||
              !(idx >> (nBitsForIdx - 1)) ||
@@ -1509,9 +1438,9 @@ double CompressionMode::Compress(
 
     if(m_Attributes->hasRotation) {
       for(int i = 0; i < 16; i++) {
-        const int idx = bestAlphaIndices[i];
+        const int idx = params.m_AlphaIndices[i];
         const int anchorIdx = 0;
-        const int nBitsForIdx = GetNumberOfBitsPerAlpha(bestIndexMode);
+        const int nBitsForIdx = GetNumberOfBitsPerAlpha(params.m_IndexMode);
         assert(idx >= 0 && idx < (1 << nBitsForIdx));
         assert(i != anchorIdx ||
                !(idx >> (nBitsForIdx - 1)) ||
@@ -1521,6 +1450,80 @@ double CompressionMode::Compress(
     }
   }
   assert(stream.GetBitsWritten() == 128);
+}
+
+double CompressionMode::Compress(
+  BitStream &stream, const int shapeIdx, const RGBACluster *clusters
+) {
+
+  const int kModeNumber = GetModeNumber();
+  const int nPartitionBits = GetNumberOfPartitionBits();
+  const int nSubsets = GetNumberOfSubsets();
+
+  Params params(shapeIdx);
+
+  double totalErr = 0.0;
+  for(int cidx = 0; cidx < nSubsets; cidx++) {
+    uint8 indices[kMaxNumDataPoints] = {0};
+
+    if(m_Attributes->hasRotation) {
+
+      assert(nSubsets == 1);
+
+      uint8 alphaIndices[kMaxNumDataPoints];
+
+      double bestError = DBL_MAX;
+      for(int rotMode = 0; rotMode < 4; rotMode++) {
+
+        SetRotationMode(rotMode);
+        const int nIdxModes = kModeNumber == 4? 2 : 1;
+
+        for(int idxMode = 0; idxMode < nIdxModes; idxMode++) {
+
+          SetIndexMode(idxMode);
+
+          RGBAVector v1, v2;
+          double error = CompressCluster(
+            clusters[cidx], v1, v2, indices, alphaIndices
+          );
+
+          if(error < bestError) {
+            bestError = error;
+
+            memcpy(params.m_Indices[cidx], indices, sizeof(indices));
+            memcpy(params.m_AlphaIndices, alphaIndices, sizeof(alphaIndices));
+
+            params.m_RotationMode = rotMode;
+            params.m_IndexMode = idxMode;
+
+            params.m_P1[cidx] = v1;
+            params.m_P2[cidx] = v2;
+          }
+        }
+      }
+
+      totalErr += bestError;
+    } else {  // ! m_Attributes->hasRotation
+      // Compress this cluster
+      totalErr += CompressCluster(
+        clusters[cidx],
+        params.m_P1[cidx], params.m_P2[cidx],
+        indices, params.m_PbitCombo[cidx]
+      );
+
+      // Map the indices to their proper position.
+      int idx = 0;
+      for(int i = 0; i < 16; i++) {
+        int subs = GetSubsetForIndex(i, shapeIdx, GetNumberOfSubsets());
+        if(subs == cidx) {
+          params.m_Indices[cidx][i] = indices[idx++];
+        }
+      }
+    }
+  }
+
+  Pack(params, stream);
+  assert(stream.GetBitsWritten() == 128);
   return totalErr;
 }
 
diff --git a/BPTCEncoder/src/RGBAEndpoints.cpp b/BPTCEncoder/src/RGBAEndpoints.cpp
index 48ca587..2ab07cf 100755
--- a/BPTCEncoder/src/RGBAEndpoints.cpp
+++ b/BPTCEncoder/src/RGBAEndpoints.cpp
@@ -419,7 +419,7 @@ uint32 RGBACluster::GetPowerMethodIterations() {
 double RGBACluster::QuantizedError(
   const RGBAVector &p1, const RGBAVector &p2,
   uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec,
-  const int pbits[2], int *indices
+  const int pbits[2], uint8 *indices
 ) const {
 
   // nBuckets should be a power of two.
@@ -457,7 +457,7 @@ double RGBACluster::QuantizedError(
     const uint8 *pb = (const uint8 *)(&pixel);
 
     float minError = FLT_MAX;
-    int bestBucket = -1;
+    uint8 bestBucket = 0;
     for(int j = 0; j < nBuckets; j++) {
 
       uint32 interp0 = (*interpVals)[j][0];
diff --git a/BPTCEncoder/src/RGBAEndpoints.h b/BPTCEncoder/src/RGBAEndpoints.h
index 418c0d6..b01e0ab 100755
--- a/BPTCEncoder/src/RGBAEndpoints.h
+++ b/BPTCEncoder/src/RGBAEndpoints.h
@@ -387,8 +387,12 @@ public:
     Min = m_Min, Max = m_Max;
   }
 
-  // Returns the error if we were to quantize the colors right now with the given number of buckets and bit mask.
-  double QuantizedError(const RGBAVector &p1, const RGBAVector &p2, uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec, const int pbits[2] = NULL, int *indices = NULL) const;
+  // Returns the error if we were to quantize the colors right now with the
+  // given number of buckets and bit mask.
+  double QuantizedError(
+    const RGBAVector &p1, const RGBAVector &p2,
+    uint8 nBuckets, uint32 bitMask, const RGBAVector &errorMetricVec,
+    const int pbits[2] = NULL, uint8 *indices = NULL) const;
 
   // Returns the principal axis for this point cluster.
   double GetPrincipalEigenvalue();