Introduce some extra testing and benchmarking command-line options to

the GTK front end, plus a 'make test' target in the GTK makefile which
uses them to automatically generate 100 puzzles for each game at each
preset configuration, test-run them back through the solver without
their aux_info to ensure that can cope, and produce an HTML box plot
of game generation times for each preset.

As part of this work I've removed the TESTSOLVE mechanism from r9549,
since the new --test-solve option does the same thing better (in that
when something goes wrong it prints the random seed that caused the
problem).

[originally from svn r9825]
[r9549 == 5a095b8a08fa9f087b93c86aea0fa027138b028d]
This commit is contained in:
Simon Tatham
2013-04-11 12:51:06 +00:00
parent ed82ef5c0e
commit 120f6de605
4 changed files with 260 additions and 53 deletions

16
Recipe
View File

@ -205,3 +205,19 @@ PuzzleApplet.class: PuzzleApplet.java org
echo '<applet archive="'$@'" code="PuzzleApplet" width="700" height="500"></applet>' >$*.html
mv PuzzleEngine.class $<
!end
# A benchmarking and testing target for the GTK puzzles.
!begin gtk
test: benchmark.html benchmark.txt
benchmark.html: benchmark.txt benchmark.pl
./benchmark.pl benchmark.txt > $@
benchmark.txt: $(GAMES)
for i in $(GAMES); do \
for params in $$(env -i ./$(BINPREFIX)$$i --list-presets | cut -f1 -d' '); do \
env -i ./$(BINPREFIX)$$i --test-solve --time-generation --generate 100 $$params \
|| exit 1; \
done; \
done > $@
!end

153
benchmark.pl Executable file
View File

@ -0,0 +1,153 @@
#!/usr/bin/perl
use strict;
use warnings;
my @presets = ();
my %presets = ();
my $maxval = 0;
while (<>) {
chomp;
if (/^(.*)(#.*): ([\d\.]+)$/) {
push @presets, $1 unless defined $presets{$1};
push @{$presets{$1}}, $3;
$maxval = $3 if $maxval < $3;
}
}
print <<EOF;
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ASCII" />
<title>Puzzle generation-time benchmarks</title>
<script type="text/javascript">
//<![CDATA[
function choose_scale_ticks(scale) {
var nscale = 1, j = 0, factors = [2,2.5,2];
while (scale / nscale > 20) {
nscale *= factors[j];
j = (j+1) % factors.length;
}
return nscale;
}
function initPlots() {
var canvases = document.getElementsByTagName('canvas');
for (var i = 0; i < canvases.length; i++) {
var canvas = canvases[i];
var scale = eval(canvas.getAttribute("data-scale"));
var add = 20.5, mult = (canvas.width - 2*add) / scale;
var data = eval(canvas.getAttribute("data-points"));
var ctx = canvas.getContext('2d');
ctx.lineWidth = '1px';
ctx.lineCap = 'round';
ctx.lineJoin = 'round';
ctx.strokeStyle = ctx.fillStyle = '#000000';
if (data === "scale") {
// Draw scale.
ctx.font = "16px sans-serif";
ctx.textAlign = "center";
ctx.textBaseline = "alphabetic";
var nscale = choose_scale_ticks(scale);
for (var x = 0; x <= scale; x += nscale) {
ctx.beginPath();
ctx.moveTo(add+mult*x, canvas.height);
ctx.lineTo(add+mult*x, canvas.height - 3);
ctx.stroke();
ctx.fillText(x + "s", add+mult*x, canvas.height - 6);
}
} else {
// Draw a box plot.
function quantile(x) {
var n = (data.length * x) | 0;
return (data[n-1] + data[n]) / 2;
}
var q1 = quantile(0.25), q2 = quantile(0.5), q3 = quantile(0.75);
var iqr = q3 - q1;
var top = 0.5, bot = canvas.height - 1.5, mid = (top+bot)/2;
var wlo = null, whi = null; // whisker ends
ctx.strokeStyle = '#bbbbbb';
var nscale = choose_scale_ticks(scale);
for (var x = 0; x <= scale; x += nscale) {
ctx.beginPath();
ctx.moveTo(add+mult*x, 0);
ctx.lineTo(add+mult*x, canvas.height);
ctx.stroke();
}
ctx.strokeStyle = '#000000';
for (var j in data) {
var x = data[j];
if (x >= q1 - 1.5 * iqr && x <= q3 + 1.5 * iqr) {
if (wlo === null || wlo > x)
wlo = x;
if (whi === null || whi < x)
whi = x;
} else {
ctx.beginPath();
ctx.arc(add+mult*x, mid, 2, 0, 2*Math.PI);
ctx.stroke();
if (x >= q1 - 3 * iqr && x <= q3 + 3 * iqr)
ctx.fill();
}
}
ctx.beginPath();
// Box
ctx.moveTo(add+mult*q1, top);
ctx.lineTo(add+mult*q3, top);
ctx.lineTo(add+mult*q3, bot);
ctx.lineTo(add+mult*q1, bot);
ctx.closePath();
// Line at median
ctx.moveTo(add+mult*q2, top);
ctx.lineTo(add+mult*q2, bot);
// Lower whisker
ctx.moveTo(add+mult*q1, mid);
ctx.lineTo(add+mult*wlo, mid);
ctx.moveTo(add+mult*wlo, top);
ctx.lineTo(add+mult*wlo, bot);
// Upper whisker
ctx.moveTo(add+mult*q3, mid);
ctx.lineTo(add+mult*whi, mid);
ctx.moveTo(add+mult*whi, top);
ctx.lineTo(add+mult*whi, bot);
ctx.stroke();
}
}
}
//]]>
</script>
</head>
<body onLoad="initPlots();">
<h1 align=center>Puzzle generation-time benchmarks</h1>
<table>
<tr><th>Preset</th><td><canvas width=700 height=30 data-points='"scale"' data-scale="$maxval"></td></tr>
EOF
for my $preset (@presets) {
print "<tr><td>", &escape($preset), "</td><td><canvas width=700 height=15 data-points=\"[";
print join ",", sort { $a <=> $b } @{$presets{$preset}};
print "]\" data-scale=\"$maxval\"></td></tr>\n";
}
print <<EOF;
</body>
</html>
EOF
sub escape {
my ($text) = @_;
$text =~ s/&/&amp;/g;
$text =~ s/</&lt;/g;
$text =~ s/>/&gt;/g;
return $text;
}

95
gtk.c
View File

@ -12,6 +12,7 @@
#include <math.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <gtk/gtk.h>
#include <gdk/gdkkeysyms.h>
@ -2482,6 +2483,7 @@ int main(int argc, char **argv)
char *pname = argv[0];
char *error;
int ngenerate = 0, print = FALSE, px = 1, py = 1;
int time_generation = FALSE, test_solve = FALSE, list_presets = FALSE;
int soln = FALSE, colour = FALSE;
float scale = 1.0F;
float redo_proportion = 0.0F;
@ -2534,6 +2536,12 @@ int main(int argc, char **argv)
}
} else
ngenerate = 1;
} else if (doing_opts && !strcmp(p, "--time-generation")) {
time_generation = TRUE;
} else if (doing_opts && !strcmp(p, "--test-solve")) {
test_solve = TRUE;
} else if (doing_opts && !strcmp(p, "--list-presets")) {
list_presets = TRUE;
} else if (doing_opts && !strcmp(p, "--save")) {
if (--ac > 0) {
savefile = *++av;
@ -2722,7 +2730,8 @@ int main(int argc, char **argv)
* generated descriptive game IDs.)
*/
while (ngenerate == 0 || i < n) {
char *pstr, *err;
char *pstr, *err, *seed;
struct rusage before, after;
if (ngenerate == 0) {
pstr = fgetline(stdin);
@ -2748,9 +2757,63 @@ int main(int argc, char **argv)
return 1;
}
}
sfree(pstr);
midend_new_game(me);
if (time_generation)
getrusage(RUSAGE_SELF, &before);
midend_new_game(me);
seed = midend_get_random_seed(me);
if (time_generation) {
double elapsed;
getrusage(RUSAGE_SELF, &after);
elapsed = (after.ru_utime.tv_sec -
before.ru_utime.tv_sec);
elapsed += (after.ru_utime.tv_usec -
before.ru_utime.tv_usec) / 1000000.0;
printf("%s %s: %.6f\n", thegame.name, seed, elapsed);
}
if (test_solve && thegame.can_solve) {
/*
* Now destroy the aux_info in the midend, by means of
* re-entering the same game id, and then try to solve
* it.
*/
char *game_id, *err;
game_id = midend_get_game_id(me);
err = midend_game_id(me, game_id);
if (err) {
fprintf(stderr, "%s %s: game id re-entry error: %s\n",
thegame.name, seed, err);
return 1;
}
midend_new_game(me);
sfree(game_id);
err = midend_solve(me);
/*
* If the solve operation returned the error "Solution
* not known for this puzzle", that's OK, because that
* just means it's a puzzle for which we don't have an
* algorithmic solver and hence can't solve it without
* the aux_info, e.g. Netslide. Any other error is a
* problem, though.
*/
if (err && strcmp(err, "Solution not known for this puzzle")) {
fprintf(stderr, "%s %s: solve error: %s\n",
thegame.name, seed, err);
return 1;
}
}
sfree(pstr);
sfree(seed);
if (doc) {
err = midend_print_puzzle(me, doc, soln);
@ -2794,7 +2857,7 @@ int main(int argc, char **argv)
}
sfree(realname);
}
if (!doc && !savefile) {
if (!doc && !savefile && !time_generation) {
id = midend_get_game_id(me);
puts(id);
sfree(id);
@ -2813,6 +2876,30 @@ int main(int argc, char **argv)
midend_free(me);
return 0;
} else if (list_presets) {
/*
* Another specialist mode which causes the puzzle to list the
* game_params strings for all its preset configurations.
*/
int i, npresets;
midend *me;
me = midend_new(NULL, &thegame, NULL, NULL);
npresets = midend_num_presets(me);
for (i = 0; i < npresets; i++) {
game_params *params;
char *name, *paramstr;
midend_fetch_preset(me, i, &name, &params);
paramstr = thegame.encode_params(params, TRUE);
printf("%s %s\n", paramstr, name);
sfree(paramstr);
}
midend_free(me);
return 0;
} else {
frontend *fe;

View File

@ -445,55 +445,6 @@ void midend_new_game(midend *me)
sfree(movestr);
}
/*
* Soak test, enabled by setting <gamename>_TESTSOLVE in the
* environment. This causes an immediate attempt to re-solve the
* game without benefit of aux_info. The effect is that (at least
* on Unix) you can run 'FOO_TESTSOLVE=1 foo --generate 10000
* <params>#12345' and it will generate a lot of game ids and
* instantly pass each one back to the solver.
*
* (It's worth putting in an explicit seed in any such test, so
* you can repeat it to diagnose a problem if one comes up!)
*/
{
char buf[80];
int j, k;
static int doing_test_solve = -1;
if (doing_test_solve < 0) {
sprintf(buf, "%s_TESTSOLVE", me->ourgame->name);
for (j = k = 0; buf[j]; j++)
if (!isspace((unsigned char)buf[j]))
buf[k++] = toupper((unsigned char)buf[j]);
buf[k] = '\0';
if (getenv(buf)) {
/*
* Since this is used for correctness testing, it's
* helpful to have a visual acknowledgment that the
* user hasn't mistyped the environment variable name.
*/
fprintf(stderr, "Running solver soak tests\n");
doing_test_solve = TRUE;
} else {
doing_test_solve = FALSE;
}
}
if (doing_test_solve) {
game_state *s;
char *msg, *movestr;
msg = NULL;
movestr = me->ourgame->solve(me->states[0].state,
me->states[0].state,
NULL, &msg);
assert(movestr && !msg);
s = me->ourgame->execute_move(me->states[0].state, movestr);
assert(s);
me->ourgame->free_game(s);
sfree(movestr);
}
}
me->states[me->nstates].movestr = NULL;
me->states[me->nstates].movetype = NEWGAME;
me->nstates++;