Engauge Digitizer  2
 All Classes Functions Variables Typedefs Enumerations Friends Pages
GridClassifier.cpp
1 /******************************************************************************************************
2  * (C) 2014 markummitchell@github.com. This file is part of Engauge Digitizer, which is released *
3  * under GNU General Public License version 2 (GPLv2) or (at your option) any later version. See file *
4  * LICENSE or go to gnu.org/licenses for details. Distribution requires prior written permission. *
5  ******************************************************************************************************/
6 
7 #include "ColorFilter.h"
8 #include "Correlation.h"
9 #include "DocumentModelCoords.h"
10 #include "EngaugeAssert.h"
11 #include "gnuplot.h"
12 #include "GridClassifier.h"
13 #include <iostream>
14 #include "Logger.h"
15 #include <QDebug>
16 #include <QFile>
17 #include <QImage>
18 #include "QtToString.h"
19 #include "Transformation.h"
20 
21 int GridClassifier::NUM_PIXELS_PER_HISTOGRAM_BINS = 1;
22 double GridClassifier::PEAK_HALF_WIDTH = 4;
23 int GridClassifier::MIN_STEP_PIXELS = 4 * GridClassifier::PEAK_HALF_WIDTH; // Step includes down ramp, flat part, up ramp
24 const QString GNUPLOT_DELIMITER ("\t");
25 
26 // We set up the picket fence with binStart arbitrarily set close to zero. Peak is
27 // not exactly at zero since we want to include the left side of the first peak.
28 int GridClassifier::BIN_START_UNSHIFTED = GridClassifier::PEAK_HALF_WIDTH;
29 
30 using namespace std;
31 
33 {
34 }
35 
36 int GridClassifier::binFromCoordinate (double coord,
37  double coordMin,
38  double coordMax) const
39 {
40  ENGAUGE_ASSERT (coordMin < coordMax);
41  ENGAUGE_ASSERT (coordMin <= coord);
42  ENGAUGE_ASSERT (coord <= coordMax);
43 
44  int bin = 0.5 + (m_numHistogramBins - 1.0) * (coord - coordMin) / (coordMax - coordMin);
45 
46  return bin;
47 }
48 
49 void GridClassifier::classify (bool isGnuplot,
50  const QPixmap &originalPixmap,
51  const Transformation &transformation,
52  int &countX,
53  double &startX,
54  double &stepX,
55  int &countY,
56  double &startY,
57  double &stepY)
58 {
59  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::classify";
60 
61  QImage image = originalPixmap.toImage ();
62 
63  m_numHistogramBins = image.width() / NUM_PIXELS_PER_HISTOGRAM_BINS;
64  ENGAUGE_ASSERT (m_numHistogramBins > 1);
65 
66  double xMin, xMax, yMin, yMax;
67  double binStartX, binStepX, binStartY, binStepY;
68 
69  m_binsX = new double [m_numHistogramBins];
70  m_binsY = new double [m_numHistogramBins];
71 
72  computeGraphCoordinateLimits (image,
73  transformation,
74  xMin,
75  xMax,
76  yMin,
77  yMax);
78  initializeHistogramBins ();
79  populateHistogramBins (image,
80  transformation,
81  xMin,
82  xMax,
83  yMin,
84  yMax);
85  searchStartStepSpace (isGnuplot,
86  m_binsX,
87  "x",
88  xMin,
89  xMax,
90  startX,
91  stepX,
92  binStartX,
93  binStepX);
94  searchStartStepSpace (isGnuplot,
95  m_binsY,
96  "y",
97  yMin,
98  yMax,
99  startY,
100  stepY,
101  binStartY,
102  binStepY);
103  searchCountSpace (m_binsX,
104  binStartX,
105  binStepX,
106  countX);
107  searchCountSpace (m_binsY,
108  binStartY,
109  binStepY,
110  countY);
111 
112  delete [] m_binsX;
113  delete [] m_binsY;
114 }
115 
116 void GridClassifier::computeGraphCoordinateLimits (const QImage &image,
117  const Transformation &transformation,
118  double &xMin,
119  double &xMax,
120  double &yMin,
121  double &yMax)
122 {
123  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::computeGraphCoordinateLimits";
124 
125  // Project every pixel onto the x axis, and onto the y axis. One set of histogram bins will be
126  // set up along each of the axes. The range of bins will encompass every pixel in the image, and no more.
127 
128  QPointF posGraphTL, posGraphTR, posGraphBL, posGraphBR;
129  transformation.transformScreenToRawGraph (QPointF (0, 0) , posGraphTL);
130  transformation.transformScreenToRawGraph (QPointF (image.width(), 0) , posGraphTR);
131  transformation.transformScreenToRawGraph (QPointF (0, image.height()) , posGraphBL);
132  transformation.transformScreenToRawGraph (QPointF (image.width(), image.height()), posGraphBR);
133 
134  // Compute x and y ranges for setting up the histogram bins
135  if (transformation.modelCoords().coordsType() == COORDS_TYPE_CARTESIAN) {
136 
137  // For affine cartesian coordinates, we only need to look at the screen corners
138  xMin = qMin (qMin (qMin (posGraphTL.x(), posGraphTR.x()), posGraphBL.x()), posGraphBR.x());
139  xMax = qMax (qMax (qMax (posGraphTL.x(), posGraphTR.x()), posGraphBL.x()), posGraphBR.x());
140  yMin = qMin (qMin (qMin (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
141  yMax = qMax (qMax (qMax (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
142 
143  } else {
144 
145  // For affine polar coordinates, easiest approach is to assume the full circle
146  xMin = 0.0;
147  xMax = transformation.modelCoords().thetaPeriod();
148  yMin = transformation.modelCoords().originRadius();
149  yMax = qMax (qMax (qMax (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
150 
151  }
152 
153  ENGAUGE_ASSERT (xMin < xMax);
154  ENGAUGE_ASSERT (yMin < yMax);
155 }
156 
157 double GridClassifier::coordinateFromBin (int bin,
158  double coordMin,
159  double coordMax) const
160 {
161  ENGAUGE_ASSERT (1 < m_numHistogramBins);
162  ENGAUGE_ASSERT (coordMin < coordMax);
163 
164  return coordMin + (coordMax - coordMin) * (double) bin / ((double) m_numHistogramBins - 1.0);
165 }
166 
167 void GridClassifier::copyVectorToVector (const double from [],
168  double to []) const
169 {
170  for (int bin = 0; bin < m_numHistogramBins; bin++) {
171  to [bin] = from [bin];
172  }
173 }
174 
175 void GridClassifier::dumpGnuplotCoordinate (const QString &coordinateLabel,
176  double corr,
177  const double *bins,
178  double coordinateMin,
179  double coordinateMax,
180  int binStart,
181  int binStep) const
182 {
183  QString filename = QString ("gridclassifier_%1_corr%2_startMax%3_stepMax%4.gnuplot")
184  .arg (coordinateLabel)
185  .arg (corr, 8, 'f', 3, '0')
186  .arg (binStart)
187  .arg (binStep);
188 
189  cout << GNUPLOT_FILE_MESSAGE.toLatin1().data() << filename.toLatin1().data() << "\n";
190 
191  QFile fileDump (filename);
192  fileDump.open (QIODevice::WriteOnly | QIODevice::Text);
193  QTextStream strDump (&fileDump);
194 
195  int bin;
196 
197  // For consistent scaling, get the max bin count
198  int binCountMax = 0;
199  for (bin = 0; bin < m_numHistogramBins; bin++) {
200  if (bins [bin] > binCountMax) {
201  binCountMax = qMax ((double) binCountMax,
202  bins [bin]);
203  }
204  }
205 
206  // Get picket fence
207  double *picketFence = new double [m_numHistogramBins];
208  loadPicketFence (picketFence,
209  binStart,
210  binStep,
211  0,
212  false);
213 
214  // Header
215  strDump << "bin"
216  << GNUPLOT_DELIMITER << "coordinate"
217  << GNUPLOT_DELIMITER << "binCount"
218  << GNUPLOT_DELIMITER << "startStep"
219  << GNUPLOT_DELIMITER << "picketFence" << "\n";
220 
221  // Data, with one row per coordinate value
222  for (bin = 0; bin < m_numHistogramBins; bin++) {
223 
224  double coordinate = coordinateFromBin (bin,
225  coordinateMin,
226  coordinateMax);
227  double startStepValue (((bin - binStart) % binStep == 0) ? 1 : 0);
228  strDump << bin
229  << GNUPLOT_DELIMITER << coordinate
230  << GNUPLOT_DELIMITER << bins [bin]
231  << GNUPLOT_DELIMITER << binCountMax * startStepValue
232  << GNUPLOT_DELIMITER << binCountMax * picketFence [bin] << "\n";
233  }
234 
235  delete [] picketFence;
236 }
237 
238 void GridClassifier::dumpGnuplotCorrelations (const QString &coordinateLabel,
239  double valueMin,
240  double valueMax,
241  const double signalA [],
242  const double signalB [],
243  const double correlations [])
244 {
245  QString filename = QString ("gridclassifier_%1_correlations.gnuplot")
246  .arg (coordinateLabel);
247 
248  cout << GNUPLOT_FILE_MESSAGE.toLatin1().data() << filename.toLatin1().data() << "\n";
249 
250  QFile fileDump (filename);
251  fileDump.open (QIODevice::WriteOnly | QIODevice::Text);
252  QTextStream strDump (&fileDump);
253 
254  int bin;
255 
256  // Compute max values so curves can be normalized to the same heights
257  double signalAMax = 1, signalBMax = 1, correlationsMax = 1;
258  for (bin = 0; bin < m_numHistogramBins; bin++) {
259  if (bin == 0 || signalA [bin] > signalAMax) {
260  signalAMax = signalA [bin];
261  }
262  if (bin == 0 || signalB [bin] > signalBMax) {
263  signalBMax = signalB [bin];
264  }
265  if (bin == 0 || correlations [bin] > correlationsMax) {
266  correlationsMax = correlations [bin];
267  }
268  }
269 
270  // Prevent divide by zero error
271  if (signalAMax == 0) {
272  signalAMax = 1.0;
273  }
274  if (signalBMax == 0) {
275  signalBMax = 1.0;
276  }
277 
278  // Output normalized curves
279  for (int bin = 0; bin < m_numHistogramBins; bin++) {
280 
281  strDump << coordinateFromBin (bin,
282  valueMin,
283  valueMax)
284  << GNUPLOT_DELIMITER << signalA [bin] / signalAMax
285  << GNUPLOT_DELIMITER << signalB [bin] / signalBMax
286  << GNUPLOT_DELIMITER << correlations [bin] / correlationsMax << "\n";
287  }
288 }
289 
290 void GridClassifier::initializeHistogramBins ()
291 {
292  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::initializeHistogramBins";
293 
294  for (int bin = 0; bin < m_numHistogramBins; bin++) {
295  m_binsX [bin] = 0;
296  m_binsY [bin] = 0;
297  }
298 }
299 
300 void GridClassifier::loadPicketFence (double picketFence [],
301  int binStart,
302  int binStep,
303  int count,
304  bool isCount) const
305 {
306  const double PEAK_HEIGHT = 1.0; // Arbitrary height, since normalization will counteract any change to this value
307 
308  // Count argument is optional. Note that binStart already excludes PEAK_HALF_WIDTH bins at start,
309  // and we also exclude PEAK_HALF_WIDTH bins at the end
310  ENGAUGE_ASSERT (binStart >= PEAK_HALF_WIDTH);
311  ENGAUGE_ASSERT (binStep != 0);
312  if (!isCount) {
313  count = 1 + (m_numHistogramBins - binStart - PEAK_HALF_WIDTH) / binStep;
314  }
315 
316  // Bins that we need to worry about
317  int binStartMinusHalfWidth = binStart - PEAK_HALF_WIDTH;
318  int binStopPlusHalfWidth = (binStart + (count - 1) * binStep) + PEAK_HALF_WIDTH;
319 
320  // To normalize, we compute the area under the picket fence. Constraint is
321  // areaUnnormalized + NUM_HISTOGRAM_BINS * normalizationOffset = 0
322  double areaUnnormalized = count * PEAK_HEIGHT * PEAK_HALF_WIDTH;
323  double normalizationOffset = -1.0 * areaUnnormalized / m_numHistogramBins;
324 
325  for (int bin = 0; bin < m_numHistogramBins; bin++) {
326 
327  // Outside of the peaks, bin is small negative so correlation with unit function is zero. In other
328  // words, the function is normalized
329  picketFence [bin] = normalizationOffset;
330 
331  if ((binStartMinusHalfWidth <= bin) &&
332  (bin <= binStopPlusHalfWidth)) {
333 
334  // Closest peak
335  int ordinalClosestPeak = (int) ((bin - binStart + binStep / 2) / binStep);
336  int binClosestPeak = binStart + ordinalClosestPeak * binStep;
337 
338  // Distance from closest peak is used to define an isosceles triangle
339  int distanceToClosestPeak = qAbs (bin - binClosestPeak);
340 
341  if (distanceToClosestPeak < PEAK_HALF_WIDTH) {
342 
343  // Map 0 to PEAK_HALF_WIDTH to 1 to 0
344  picketFence [bin] = 1.0 - (double) distanceToClosestPeak / PEAK_HALF_WIDTH + normalizationOffset;
345 
346  }
347  }
348  }
349 }
350 
351 void GridClassifier::populateHistogramBins (const QImage &image,
352  const Transformation &transformation,
353  double xMin,
354  double xMax,
355  double yMin,
356  double yMax)
357 {
358  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::populateHistogramBins";
359 
360  ColorFilter filter;
361  QRgb rgbBackground = filter.marginColor (&image);
362 
363  for (int x = 0; x < image.width(); x++) {
364  for (int y = 0; y < image.height(); y++) {
365 
366  QColor pixel = image.pixel (x, y);
367 
368  // Skip pixels with background color
369  if (!filter.colorCompare (rgbBackground,
370  pixel.rgb ())) {
371 
372  // Add this pixel to histograms
373  QPointF posGraph;
374  transformation.transformScreenToRawGraph (QPointF (x, y), posGraph);
375 
376  if (transformation.modelCoords().coordsType() == COORDS_TYPE_POLAR) {
377 
378  // If out of the 0 to period range, the theta value must shifted by the period to get into that range
379  while (posGraph.x() < xMin) {
380  posGraph.setX (posGraph.x() + transformation.modelCoords().thetaPeriod());
381  }
382  while (posGraph.x() > xMax) {
383  posGraph.setX (posGraph.x() - transformation.modelCoords().thetaPeriod());
384  }
385  }
386 
387  int binX = binFromCoordinate (posGraph.x(), xMin, xMax);
388  int binY = binFromCoordinate (posGraph.y(), yMin, yMax);
389 
390  ENGAUGE_ASSERT (0 <= binX);
391  ENGAUGE_ASSERT (0 <= binY);
392  ENGAUGE_ASSERT (binX < m_numHistogramBins);
393  ENGAUGE_ASSERT (binY < m_numHistogramBins);
394 
395  // Roundoff error in log scaling may let bin go just outside legal range
396  binX = qMin (binX, m_numHistogramBins - 1);
397  binY = qMin (binY, m_numHistogramBins - 1);
398 
399  ++m_binsX [binX];
400  ++m_binsY [binY];
401  }
402  }
403  }
404 }
405 
406 void GridClassifier::searchCountSpace (double bins [],
407  double binStart,
408  double binStep,
409  int &countMax)
410 {
411  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::searchCountSpace"
412  << " start=" << binStart
413  << " step=" << binStep;
414 
415  // Loop though the space of possible counts
416  Correlation correlation (m_numHistogramBins);
417  double *picketFence = new double [m_numHistogramBins];
418  double corr, corrMax;
419  bool isFirst = true;
420  int countStop = 1 + (m_numHistogramBins - binStart) / binStep;
421  for (int count = 2; count <= countStop; count++) {
422 
423  loadPicketFence (picketFence,
424  binStart,
425  binStep,
426  count,
427  true);
428 
429  correlation.correlateWithoutShift (m_numHistogramBins,
430  bins,
431  picketFence,
432  corr);
433  if (isFirst || (corr > corrMax)) {
434  countMax = count;
435  corrMax = corr;
436  }
437 
438  isFirst = false;
439  }
440 
441  delete [] picketFence;
442 }
443 
444 void GridClassifier::searchStartStepSpace (bool isGnuplot,
445  double bins [],
446  const QString &coordinateLabel,
447  double valueMin,
448  double valueMax,
449  double &start,
450  double &step,
451  double &binStartMax,
452  double &binStepMax)
453 {
454  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::searchStartStepSpace";
455 
456  // Correlations are tracked for logging
457  double *signalA = new double [m_numHistogramBins];
458  double *signalB = new double [m_numHistogramBins];
459  double *correlations = new double [m_numHistogramBins];
460  double *correlationsMax = new double [m_numHistogramBins];
461 
462  // Loop though the space of possible gridlines using the independent variables (start,step).
463  Correlation correlation (m_numHistogramBins);
464  double *picketFence = new double [m_numHistogramBins];
465  int binStart;
466  double corr = 0, corrMax = 0;
467  bool isFirst = true;
468 
469  // We do not explicitly search(=loop) through binStart here, since Correlation::correlateWithShift will take
470  // care of that for us
471 
472  // Step search starts out small, and stops at value that gives count substantially greater than 2. Freakishly small
473  // images need to have MIN_STEP_PIXELS overridden so the loop iterates at least once
474  binStartMax = BIN_START_UNSHIFTED + 1; // In case search below ever fails
475  binStepMax = qMin (MIN_STEP_PIXELS, m_numHistogramBins / 8); // In case search below ever fails
476  for (int binStep = qMin (MIN_STEP_PIXELS, m_numHistogramBins / 8); binStep < m_numHistogramBins / 4; binStep++) {
477 
478  loadPicketFence (picketFence,
479  BIN_START_UNSHIFTED,
480  binStep,
481  PEAK_HALF_WIDTH,
482  false);
483 
484  correlation.correlateWithShift (m_numHistogramBins,
485  bins,
486  picketFence,
487  binStart,
488  corr,
489  correlations);
490  if (isFirst || (corr > corrMax)) {
491 
492  int binStartMaxNext = binStart + BIN_START_UNSHIFTED + 1; // Compensate for the shift performed inside loadPicketFence
493 
494  // Make sure binStartMax never goes out of bounds
495  if (binStartMaxNext < m_numHistogramBins) {
496 
497  binStartMax = binStartMaxNext;
498  binStepMax = binStep;
499  corrMax = corr;
500  copyVectorToVector (bins, signalA);
501  copyVectorToVector (picketFence, signalB);
502  copyVectorToVector (correlations, correlationsMax);
503 
504  // Output a gnuplot file. We should see the correlation values consistently increasing
505  if (isGnuplot) {
506 
507  dumpGnuplotCoordinate(coordinateLabel,
508  corr,
509  bins,
510  valueMin,
511  valueMax,
512  binStart,
513  binStep);
514  }
515  }
516  }
517 
518  isFirst = false;
519  }
520 
521  // Convert from bins back to graph coordinates
522  start = coordinateFromBin (binStartMax,
523  valueMin,
524  valueMax);
525  if (binStartMax + binStepMax < m_numHistogramBins) {
526 
527  // Normal case where a reasonable step value is being calculated
528  double next = coordinateFromBin (binStartMax + binStepMax,
529  valueMin,
530  valueMax);
531  step = next - start;
532  } else {
533 
534  // Pathological case where step jumps to outside the legal range. We bring the step back into range
535  double next = coordinateFromBin (m_numHistogramBins - 1,
536  valueMin,
537  valueMax);
538  step = next - start;
539  }
540 
541  if (isGnuplot) {
542  dumpGnuplotCorrelations (coordinateLabel,
543  valueMin,
544  valueMax,
545  signalA,
546  signalB,
547  correlationsMax);
548  }
549 
550  delete [] signalA;
551  delete [] signalB;
552  delete [] correlations;
553  delete [] correlationsMax;
554  delete [] picketFence;
555 }
void transformScreenToRawGraph(const QPointF &coordScreen, QPointF &coordGraph) const
Transform from cartesian pixel screen coordinates to cartesian/polar graph coordinates.
double originRadius() const
Get method for origin radius in polar mode.
Fast cross correlation between two functions.
Definition: Correlation.h:14
Class for filtering image to remove unimportant information.
Definition: ColorFilter.h:20
DocumentModelCoords modelCoords() const
Get method for DocumentModelCoords.
double thetaPeriod() const
Return the period of the theta value for polar coordinates, consistent with CoordThetaUnits.
Affine transformation between screen and graph coordinates, based on digitized axis points...
QRgb marginColor(const QImage *image) const
Identify the margin color of the image, which is defined as the most common color in the four margins...
Definition: ColorFilter.cpp:73
GridClassifier()
Single constructor.
CoordsType coordsType() const
Get method for coordinates type.
bool colorCompare(QRgb rgb1, QRgb rgb2) const
See if the two color values are close enough to be considered to be the same.
Definition: ColorFilter.cpp:25
Classify the grid pattern in an original image.
void classify(bool isGnuplot, const QPixmap &originalPixmap, const Transformation &transformation, int &countX, double &startX, double &stepX, int &countY, double &startY, double &stepY)
Classify the specified image, and return the most probably x and y grid settings. ...