// // Little cms // Copyright (C) 1998-2007 Marti Maria // // Permission is hereby granted, free of charge, to any person obtaining // a copy of this software and associated documentation files (the "Software"), // to deal in the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the Software // is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "lcms.h" // Pipeline of LUT. Enclosed by {} are new revision 4.0 of ICC spec. // // [Mat] -> [L1] -> { [Mat3] -> [Ofs3] -> [L3] ->} [CLUT] { -> [L4] -> [Mat4] -> [Ofs4] } -> [L2] // // Some of these stages would be missing. This implements the totality of // combinations of old and new LUT types as follows: // // Lut8 & Lut16 // ============ // [Mat] -> [L1] -> [CLUT] -> [L2] // // Mat2, Ofs2, L3, L3, Mat3, Ofs3 are missing // // LutAToB // ======== // // [L1] -> [CLUT] -> [L4] -> [Mat4] -> [Ofs4] -> [L2] // // Mat, Mat3, Ofs3, L3 are missing // L1 = A curves // L4 = M curves // L2 = B curves // // LutBToA // ======= // // [L1] -> [Mat3] -> [Ofs3] -> [L3] -> [CLUT] -> [L2] // // Mat, L4, Mat4, Ofs4 are missing // L1 = B Curves // L3 = M Curves // L2 = A curves // // // V2&3 emulation // =============== // // For output, Mat is multiplied by // // // | 0xff00 / 0xffff 0 0 | // | 0 0xff00 / 0xffff 0 | // | 0 0 0xff00 / 0xffff | // // // For input, an additional matrix is needed at the very last end of the chain // // // | 0xffff / 0xff00 0 0 | // | 0 0xffff / 0xff00 0 | // | 0 0 0xffff / 0xff00 | // // // Which reduces to (val * 257) >> 8 // A couple of macros to convert between revisions #define FROM_V2_TO_V4(x) (((((x)<<8)+(x))+0x80)>>8) // BY 65535 DIV 65280 ROUND #define FROM_V4_TO_V2(x) ((((x)<<8)+0x80)/257) // BY 65280 DIV 65535 ROUND // Lut Creation & Destruction LPLUT LCMSEXPORT cmsAllocLUT(void) { LPLUT NewLUT; NewLUT = (LPLUT) _cmsMalloc(sizeof(LUT)); if (NewLUT) ZeroMemory(NewLUT, sizeof(LUT)); return NewLUT; } void LCMSEXPORT cmsFreeLUT(LPLUT Lut) { unsigned int i; if (!Lut) return; if (Lut -> T) free(Lut -> T); for (i=0; i < Lut -> OutputChan; i++) { if (Lut -> L2[i]) free(Lut -> L2[i]); } for (i=0; i < Lut -> InputChan; i++) { if (Lut -> L1[i]) free(Lut -> L1[i]); } if (Lut ->wFlags & LUT_HASTL3) { for (i=0; i < Lut -> InputChan; i++) { if (Lut -> L3[i]) free(Lut -> L3[i]); } } if (Lut ->wFlags & LUT_HASTL4) { for (i=0; i < Lut -> OutputChan; i++) { if (Lut -> L4[i]) free(Lut -> L4[i]); } } if (Lut ->CLut16params.p8) free(Lut ->CLut16params.p8); free(Lut); } static LPVOID DupBlockTab(LPVOID Org, size_t size) { LPVOID mem = _cmsMalloc(size); if (mem != NULL) CopyMemory(mem, Org, size); return mem; } LPLUT LCMSEXPORT cmsDupLUT(LPLUT Orig) { LPLUT NewLUT = cmsAllocLUT(); unsigned int i; CopyMemory(NewLUT, Orig, sizeof(LUT)); for (i=0; i < Orig ->InputChan; i++) NewLUT -> L1[i] = (LPWORD) DupBlockTab((LPVOID) Orig ->L1[i], sizeof(WORD) * Orig ->In16params.nSamples); for (i=0; i < Orig ->OutputChan; i++) NewLUT -> L2[i] = (LPWORD) DupBlockTab((LPVOID) Orig ->L2[i], sizeof(WORD) * Orig ->Out16params.nSamples); NewLUT -> T = (LPWORD) DupBlockTab((LPVOID) Orig ->T, Orig -> Tsize); return NewLUT; } static unsigned int UIpow(unsigned int a, unsigned int b) { unsigned int rv = 1; for (; b > 0; b--) rv *= a; return rv; } LPLUT LCMSEXPORT cmsAlloc3DGrid(LPLUT NewLUT, int clutPoints, int inputChan, int outputChan) { DWORD nTabSize; NewLUT -> wFlags |= LUT_HAS3DGRID; NewLUT -> cLutPoints = clutPoints; NewLUT -> InputChan = inputChan; NewLUT -> OutputChan = outputChan; nTabSize = (NewLUT -> OutputChan * UIpow(NewLUT->cLutPoints, NewLUT->InputChan) * sizeof(WORD)); NewLUT -> T = (LPWORD) _cmsMalloc(nTabSize); if (NewLUT -> T == NULL) return NULL; ZeroMemory(NewLUT -> T, nTabSize); NewLUT ->Tsize = nTabSize; cmsCalcCLUT16Params(NewLUT -> cLutPoints, NewLUT -> InputChan, NewLUT -> OutputChan, &NewLUT -> CLut16params); return NewLUT; } LPLUT LCMSEXPORT cmsAllocLinearTable(LPLUT NewLUT, LPGAMMATABLE Tables[], int nTable) { unsigned int i; LPWORD PtrW; switch (nTable) { case 1: NewLUT -> wFlags |= LUT_HASTL1; cmsCalcL16Params(Tables[0] -> nEntries, &NewLUT -> In16params); NewLUT -> InputEntries = Tables[0] -> nEntries; for (i=0; i < NewLUT -> InputChan; i++) { PtrW = (LPWORD) _cmsMalloc(sizeof(WORD) * NewLUT -> InputEntries); if (PtrW == NULL) return NULL; NewLUT -> L1[i] = PtrW; CopyMemory(PtrW, Tables[i]->GammaTable, sizeof(WORD) * NewLUT -> InputEntries); CopyMemory(&NewLUT -> LCurvesSeed[0][i], &Tables[i] -> Seed, sizeof(LCMSGAMMAPARAMS)); } break; case 2: NewLUT -> wFlags |= LUT_HASTL2; cmsCalcL16Params(Tables[0] -> nEntries, &NewLUT -> Out16params); NewLUT -> OutputEntries = Tables[0] -> nEntries; for (i=0; i < NewLUT -> OutputChan; i++) { PtrW = (LPWORD) _cmsMalloc(sizeof(WORD) * NewLUT -> OutputEntries); if (PtrW == NULL) return NULL; NewLUT -> L2[i] = PtrW; CopyMemory(PtrW, Tables[i]->GammaTable, sizeof(WORD) * NewLUT -> OutputEntries); CopyMemory(&NewLUT -> LCurvesSeed[1][i], &Tables[i] -> Seed, sizeof(LCMSGAMMAPARAMS)); } break; // 3 & 4 according ICC 4.0 spec case 3: NewLUT -> wFlags |= LUT_HASTL3; cmsCalcL16Params(Tables[0] -> nEntries, &NewLUT -> L3params); NewLUT -> L3Entries = Tables[0] -> nEntries; for (i=0; i < NewLUT -> InputChan; i++) { PtrW = (LPWORD) _cmsMalloc(sizeof(WORD) * NewLUT -> L3Entries); if (PtrW == NULL) return NULL; NewLUT -> L3[i] = PtrW; CopyMemory(PtrW, Tables[i]->GammaTable, sizeof(WORD) * NewLUT -> L3Entries); CopyMemory(&NewLUT -> LCurvesSeed[2][i], &Tables[i] -> Seed, sizeof(LCMSGAMMAPARAMS)); } break; case 4: NewLUT -> wFlags |= LUT_HASTL4; cmsCalcL16Params(Tables[0] -> nEntries, &NewLUT -> L4params); NewLUT -> L4Entries = Tables[0] -> nEntries; for (i=0; i < NewLUT -> OutputChan; i++) { PtrW = (LPWORD) _cmsMalloc(sizeof(WORD) * NewLUT -> L4Entries); if (PtrW == NULL) return NULL; NewLUT -> L4[i] = PtrW; CopyMemory(PtrW, Tables[i]->GammaTable, sizeof(WORD) * NewLUT -> L4Entries); CopyMemory(&NewLUT -> LCurvesSeed[3][i], &Tables[i] -> Seed, sizeof(LCMSGAMMAPARAMS)); } break; default:; } return NewLUT; } // Set the LUT matrix LPLUT LCMSEXPORT cmsSetMatrixLUT(LPLUT Lut, LPMAT3 M) { MAT3toFix(&Lut ->Matrix, M); if (!MAT3isIdentity(&Lut->Matrix, 0.0001)) Lut ->wFlags |= LUT_HASMATRIX; return Lut; } // Set matrix & offset, v4 compatible LPLUT LCMSEXPORT cmsSetMatrixLUT4(LPLUT Lut, LPMAT3 M, LPVEC3 off, DWORD dwFlags) { WMAT3 WMat; WVEC3 Woff; VEC3 Zero = {{0, 0, 0}}; MAT3toFix(&WMat, M); if (off == NULL) off = &Zero; VEC3toFix(&Woff, off); // Nop if identity if (MAT3isIdentity(&WMat, 0.0001) && (Woff.n[VX] == 0 && Woff.n[VY] == 0 && Woff.n[VZ] == 0)) return Lut; switch (dwFlags) { case LUT_HASMATRIX: Lut ->Matrix = WMat; Lut ->wFlags |= LUT_HASMATRIX; break; case LUT_HASMATRIX3: Lut ->Mat3 = WMat; Lut ->Ofs3 = Woff; Lut ->wFlags |= LUT_HASMATRIX3; break; case LUT_HASMATRIX4: Lut ->Mat4 = WMat; Lut ->Ofs4 = Woff; Lut ->wFlags |= LUT_HASMATRIX4; break; default:; } return Lut; } // The full evaluator void LCMSEXPORT cmsEvalLUT(LPLUT Lut, WORD In[], WORD Out[]) { register unsigned int i; WORD StageABC[MAXCHANNELS], StageLMN[MAXCHANNELS]; // Try to speedup things on plain devicelinks if (Lut ->wFlags == LUT_HAS3DGRID) { Lut ->CLut16params.Interp3D(In, Out, Lut -> T, &Lut -> CLut16params); return; } // Nope, evaluate whole LUT for (i=0; i < Lut -> InputChan; i++) StageABC[i] = In[i]; if (Lut ->wFlags & LUT_V4_OUTPUT_EMULATE_V2) { // Clamp Lab to avoid overflow if (StageABC[0] > 0xFF00) StageABC[0] = 0xFF00; StageABC[0] = (WORD) FROM_V2_TO_V4(StageABC[0]); StageABC[1] = (WORD) FROM_V2_TO_V4(StageABC[1]); StageABC[2] = (WORD) FROM_V2_TO_V4(StageABC[2]); } if (Lut ->wFlags & LUT_V2_OUTPUT_EMULATE_V4) { StageABC[0] = (WORD) FROM_V4_TO_V2(StageABC[0]); StageABC[1] = (WORD) FROM_V4_TO_V2(StageABC[1]); StageABC[2] = (WORD) FROM_V4_TO_V2(StageABC[2]); } // Matrix handling. if (Lut -> wFlags & LUT_HASMATRIX) { WVEC3 InVect, OutVect; // In LUT8 here comes the special gray axis fixup if (Lut ->FixGrayAxes) { StageABC[1] = _cmsClampWord(StageABC[1] - 128); StageABC[2] = _cmsClampWord(StageABC[2] - 128); } // Matrix InVect.n[VX] = ToFixedDomain(StageABC[0]); InVect.n[VY] = ToFixedDomain(StageABC[1]); InVect.n[VZ] = ToFixedDomain(StageABC[2]); MAT3evalW(&OutVect, &Lut -> Matrix, &InVect); // PCS in 1Fixed15 format, adjusting StageABC[0] = _cmsClampWord(FromFixedDomain(OutVect.n[VX])); StageABC[1] = _cmsClampWord(FromFixedDomain(OutVect.n[VY])); StageABC[2] = _cmsClampWord(FromFixedDomain(OutVect.n[VZ])); } // First linearization if (Lut -> wFlags & LUT_HASTL1) { for (i=0; i < Lut -> InputChan; i++) StageABC[i] = cmsLinearInterpLUT16(StageABC[i], Lut -> L1[i], &Lut -> In16params); } // Mat3, Ofs3, L3 processing if (Lut ->wFlags & LUT_HASMATRIX3) { WVEC3 InVect, OutVect; InVect.n[VX] = ToFixedDomain(StageABC[0]); InVect.n[VY] = ToFixedDomain(StageABC[1]); InVect.n[VZ] = ToFixedDomain(StageABC[2]); MAT3evalW(&OutVect, &Lut -> Mat3, &InVect); OutVect.n[VX] += Lut ->Ofs3.n[VX]; OutVect.n[VY] += Lut ->Ofs3.n[VY]; OutVect.n[VZ] += Lut ->Ofs3.n[VZ]; StageABC[0] = _cmsClampWord(FromFixedDomain(OutVect.n[VX])); StageABC[1] = _cmsClampWord(FromFixedDomain(OutVect.n[VY])); StageABC[2] = _cmsClampWord(FromFixedDomain(OutVect.n[VZ])); } if (Lut ->wFlags & LUT_HASTL3) { for (i=0; i < Lut -> InputChan; i++) StageABC[i] = cmsLinearInterpLUT16(StageABC[i], Lut -> L3[i], &Lut -> L3params); } if (Lut -> wFlags & LUT_HAS3DGRID) { Lut ->CLut16params.Interp3D(StageABC, StageLMN, Lut -> T, &Lut -> CLut16params); } else { for (i=0; i < Lut -> InputChan; i++) StageLMN[i] = StageABC[i]; } // Mat4, Ofs4, L4 processing if (Lut ->wFlags & LUT_HASTL4) { for (i=0; i < Lut -> OutputChan; i++) StageLMN[i] = cmsLinearInterpLUT16(StageLMN[i], Lut -> L4[i], &Lut -> L4params); } if (Lut ->wFlags & LUT_HASMATRIX4) { WVEC3 InVect, OutVect; InVect.n[VX] = ToFixedDomain(StageLMN[0]); InVect.n[VY] = ToFixedDomain(StageLMN[1]); InVect.n[VZ] = ToFixedDomain(StageLMN[2]); MAT3evalW(&OutVect, &Lut -> Mat4, &InVect); OutVect.n[VX] += Lut ->Ofs4.n[VX]; OutVect.n[VY] += Lut ->Ofs4.n[VY]; OutVect.n[VZ] += Lut ->Ofs4.n[VZ]; StageLMN[0] = _cmsClampWord(FromFixedDomain(OutVect.n[VX])); StageLMN[1] = _cmsClampWord(FromFixedDomain(OutVect.n[VY])); StageLMN[2] = _cmsClampWord(FromFixedDomain(OutVect.n[VZ])); } // Last linearitzation if (Lut -> wFlags & LUT_HASTL2) { for (i=0; i < Lut -> OutputChan; i++) Out[i] = cmsLinearInterpLUT16(StageLMN[i], Lut -> L2[i], &Lut -> Out16params); } else { for (i=0; i < Lut -> OutputChan; i++) Out[i] = StageLMN[i]; } if (Lut ->wFlags & LUT_V4_INPUT_EMULATE_V2) { Out[0] = (WORD) FROM_V4_TO_V2(Out[0]); Out[1] = (WORD) FROM_V4_TO_V2(Out[1]); Out[2] = (WORD) FROM_V4_TO_V2(Out[2]); } if (Lut ->wFlags & LUT_V2_INPUT_EMULATE_V4) { Out[0] = (WORD) FROM_V2_TO_V4(Out[0]); Out[1] = (WORD) FROM_V2_TO_V4(Out[1]); Out[2] = (WORD) FROM_V2_TO_V4(Out[2]); } } // Precomputes tables for 8-bit on input devicelink. // LPLUT _cmsBlessLUT8(LPLUT Lut) { int i, j; WORD StageABC[3]; Fixed32 v1, v2, v3; LPL8PARAMS p8; LPL16PARAMS p = &Lut ->CLut16params; p8 = (LPL8PARAMS) _cmsMalloc(sizeof(L8PARAMS)); if (p8 == NULL) return NULL; // values comes * 257, so we can safely take first byte (x << 8 + x) // if there are prelinearization, is already smelted in tables for (i=0; i < 256; i++) { StageABC[0] = StageABC[1] = StageABC[2] = RGB_8_TO_16(i); if (Lut ->wFlags & LUT_HASTL1) { for (j=0; j < 3; j++) StageABC[j] = cmsLinearInterpLUT16(StageABC[j], Lut -> L1[j], &Lut -> In16params); Lut ->wFlags &= ~LUT_HASTL1; } v1 = ToFixedDomain(StageABC[0] * p -> Domain); v2 = ToFixedDomain(StageABC[1] * p -> Domain); v3 = ToFixedDomain(StageABC[2] * p -> Domain); p8 ->X0[i] = p->opta3 * FIXED_TO_INT(v1); p8 ->Y0[i] = p->opta2 * FIXED_TO_INT(v2); p8 ->Z0[i] = p->opta1 * FIXED_TO_INT(v3); p8 ->rx[i] = (WORD) FIXED_REST_TO_INT(v1); p8 ->ry[i] = (WORD) FIXED_REST_TO_INT(v2); p8 ->rz[i] = (WORD) FIXED_REST_TO_INT(v3); } Lut -> CLut16params.p8 = p8; Lut -> CLut16params.Interp3D = cmsTetrahedralInterp8; return Lut; } // ----------------------------------------------------------- Reverse interpolation // Here's how it goes. The derivative Df(x) of the function f is the linear // transformation that best approximates f near the point x. It can be represented // by a matrix A whose entries are the partial derivatives of the components of f // with respect to all the coordinates. This is know as the Jacobian // // The best linear approximation to f is given by the matrix equation: // // y-y0 = A (x-x0) // // So, if x0 is a good "guess" for the zero of f, then solving for the zero of this // linear approximation will give a "better guess" for the zero of f. Thus let y=0, // and since y0=f(x0) one can solve the above equation for x. This leads to the // Newton's method formula: // // xn+1 = xn - A-1 f(xn) // // where xn+1 denotes the (n+1)-st guess, obtained from the n-th guess xn in the // fashion described above. Iterating this will give better and better approximations // if you have a "good enough" initial guess. #define JACOBIAN_EPSILON 0.001 #define INVERSION_MAX_ITERATIONS 30 // Increment with reflexion on boundary static void IncDelta(double *Val) { if (*Val < (1.0 - JACOBIAN_EPSILON)) *Val += JACOBIAN_EPSILON; else *Val -= JACOBIAN_EPSILON; } static void ToEncoded(WORD Encoded[3], LPVEC3 Float) { Encoded[0] = (WORD) floor(Float->n[0] * 65535.0 + 0.5); Encoded[1] = (WORD) floor(Float->n[1] * 65535.0 + 0.5); Encoded[2] = (WORD) floor(Float->n[2] * 65535.0 + 0.5); } static void FromEncoded(LPVEC3 Float, WORD Encoded[3]) { Float->n[0] = Encoded[0] / 65535.0; Float->n[1] = Encoded[1] / 65535.0; Float->n[2] = Encoded[2] / 65535.0; } // Evaluates the CLUT part of a LUT (4 -> 3 only) static void EvalLUTdoubleKLab(LPLUT Lut, const VEC3* In, WORD FixedK, LPcmsCIELab Out) { WORD wIn[4], wOut[3]; wIn[0] = (WORD) floor(In ->n[0] * 65535.0 + 0.5); wIn[1] = (WORD) floor(In ->n[1] * 65535.0 + 0.5); wIn[2] = (WORD) floor(In ->n[2] * 65535.0 + 0.5); wIn[3] = FixedK; cmsEvalLUT(Lut, wIn, wOut); cmsLabEncoded2Float(Out, wOut); } // Builds a Jacobian CMY->Lab static void ComputeJacobianLab(LPLUT Lut, LPMAT3 Jacobian, const VEC3* Colorant, WORD K) { VEC3 ColorantD; cmsCIELab Lab, LabD; int j; EvalLUTdoubleKLab(Lut, Colorant, K, &Lab); for (j = 0; j < 3; j++) { ColorantD.n[0] = Colorant ->n[0]; ColorantD.n[1] = Colorant ->n[1]; ColorantD.n[2] = Colorant ->n[2]; IncDelta(&ColorantD.n[j]); EvalLUTdoubleKLab(Lut, &ColorantD, K, &LabD); Jacobian->v[0].n[j] = ((LabD.L - Lab.L) / JACOBIAN_EPSILON); Jacobian->v[1].n[j] = ((LabD.a - Lab.a) / JACOBIAN_EPSILON); Jacobian->v[2].n[j] = ((LabD.b - Lab.b) / JACOBIAN_EPSILON); } } // Evaluate a LUT in reverse direction. It only searches on 3->3 LUT, but It // can be used on CMYK -> Lab LUT to obtain black preservation. // Target holds LabK in this case // x1 <- x - [J(x)]^-1 * f(x) LCMSAPI double LCMSEXPORT cmsEvalLUTreverse(LPLUT Lut, WORD Target[], WORD Result[], LPWORD Hint) { int i; double error, LastError = 1E20; cmsCIELab fx, Goal; VEC3 tmp, tmp2, x; MAT3 Jacobian; WORD FixedK; WORD LastResult[4]; // This is our Lab goal cmsLabEncoded2Float(&Goal, Target); // Special case for CMYK->Lab if (Lut ->InputChan == 4) FixedK = Target[3]; else FixedK = 0; // Take the hint as starting point if specified if (Hint == NULL) { // Begin at any point, we choose 1/3 of neutral CMY gray x.n[0] = x.n[1] = x.n[2] = 0.3; } else { FromEncoded(&x, Hint); } // Iterate for (i = 0; i < INVERSION_MAX_ITERATIONS; i++) { // Get beginning fx EvalLUTdoubleKLab(Lut, &x, FixedK, &fx); // Compute error error = cmsDeltaE(&fx, &Goal); // If not convergent, return last safe value if (error >= LastError) break; // Keep latest values LastError = error; ToEncoded(LastResult, &x); LastResult[3] = FixedK; // Obtain slope ComputeJacobianLab(Lut, &Jacobian, &x, FixedK); // Solve system tmp2.n[0] = fx.L - Goal.L; tmp2.n[1] = fx.a - Goal.a; tmp2.n[2] = fx.b - Goal.b; if (!MAT3solve(&tmp, &Jacobian, &tmp2)) break; // Move our guess x.n[0] -= tmp.n[0]; x.n[1] -= tmp.n[1]; x.n[2] -= tmp.n[2]; // Some clipping.... VEC3saturate(&x); } Result[0] = LastResult[0]; Result[1] = LastResult[1]; Result[2] = LastResult[2]; Result[3] = LastResult[3]; return LastError; }