diff --git a/Recipe b/Recipe index a7db32b..822afb9 100644 --- a/Recipe +++ b/Recipe @@ -69,6 +69,10 @@ obfusc : [U] obfusc STANDALONE latincheck : [U] latin[STANDALONE_LATIN_TEST] LATIN_DEPS STANDALONE latincheck : [C] latin[STANDALONE_LATIN_TEST] LATIN_DEPS STANDALONE +# Test program built from matching.c. +matching : [U] matching[STANDALONE_MATCHING_TEST] tree234 STANDALONE +matching : [C] matching[STANDALONE_MATCHING_TEST] tree234 STANDALONE + puzzles : [G] windows[COMBINED] WINDOWS_COMMON COMMON ALL noicon.res # Mac OS X unified application containing all the puzzles. diff --git a/matching.c b/matching.c new file mode 100644 index 0000000..89f67ee --- /dev/null +++ b/matching.c @@ -0,0 +1,753 @@ +/* + * Implementation of matching.h. + */ + +#include +#include +#include + +#include "puzzles.h" +#include "matching.h" + +struct scratch { + /* + * Current contents of the in-progress matching. LtoR is an array + * of nl integers, each of which holds a value in {0,1,...,nr-1}, + * or -1 for no current assignment. RtoL is exactly the reverse. + * + * Invariant: LtoR[i] is non-empty and equal to j if and only if + * RtoL[j] is non-empty and equal to i. + */ + int *LtoR, *RtoL; + + /* + * Arrays of nl and nr integer respectively, giving the layer + * assigned to each integer in the breadth-first search step of + * the algorithm. + */ + int *Llayer, *Rlayer; + + /* + * Arrays of nl and nr integers respectively, used to hold the + * to-do queues in the breadth-first search. + */ + int *Lqueue, *Rqueue; + + /* + * An augmenting path of vertices, alternating between L vertices + * (in the even-numbered positions, starting at 0) and R (in the + * odd positions). Must be long enough to hold any such path that + * never repeats a vertex, i.e. must be at least 2*min(nl,nr) in + * size. + */ + int *augpath; + + /* + * Track the progress of the depth-first search at each + * even-numbered layer. Has one element for each even-numbered + * position in augpath. + */ + int *dfsstate; + + /* + * Store a random permutation of the L vertex indices, if we're + * randomising the dfs phase. + */ + int *Lorder; +}; + +size_t matching_scratch_size(int nl, int nr) +{ + size_t n; + int nmin = (nl < nr ? nl : nr); + + n = (sizeof(struct scratch) + sizeof(int)-1)/sizeof(int); + n += nl; /* LtoR */ + n += nr; /* RtoL */ + n += nl; /* Llayer */ + n += nr; /* Rlayer */ + n += nl; /* Lqueue */ + n += nr; /* Rqueue */ + n += 2*nmin; /* augpath */ + n += nmin; /* dfsstate */ + n += nl; /* Lorder */ + return n * sizeof(int); +} + +int matching_with_scratch(void *scratchv, + int nl, int nr, int **adjlists, int *adjsizes, + random_state *rs, int *outl, int *outr) +{ + struct scratch *s = (struct scratch *)scratchv; + int L, R, i, j; + + /* + * Set up the various array pointers in the scratch space. + */ + { + int *p = scratchv; + int nmin = (nl < nr ? nl : nr); + + p += (sizeof(struct scratch) + sizeof(int)-1)/sizeof(int); + s->LtoR = p; p += nl; + s->RtoL = p; p += nr; + s->Llayer = p; p += nl; + s->Rlayer = p; p += nr; + s->Lqueue = p; p += nl; + s->Rqueue = p; p += nr; + s->augpath = p; p += 2*nmin; + s->dfsstate = p; p += nmin; + s->Lorder = p; p += nl; + } + + /* + * Set up the initial matching, which is empty. + */ + for (L = 0; L < nl; L++) + s->LtoR[L] = -1; + for (R = 0; R < nr; R++) + s->RtoL[R] = -1; + + while (1) { + /* + * Breadth-first search starting from the unassigned left + * vertices, traversing edges from left to right only if they + * are _not_ part of the matching, and from right to left only + * if they _are_. We assign a 'layer number' to all vertices + * visited by this search, with the starting vertices being + * layer 0 and every successor of a layer-n node being layer + * n+1. + */ + int Lqs, Rqs, layer, target_layer; + + for (L = 0; L < nl; L++) + s->Llayer[L] = -1; + for (R = 0; R < nr; R++) + s->Rlayer[R] = -1; + + Lqs = 0; + for (L = 0; L < nl; L++) { + if (s->LtoR[L] == -1) { + s->Llayer[L] = 0; + s->Lqueue[Lqs++] = L; + } + } + + layer = 0; + while (1) { + int found_free_R_vertex = FALSE; + + Rqs = 0; + for (i = 0; i < Lqs; i++) { + L = s->Lqueue[i]; + assert(s->Llayer[L] == layer); + + for (j = 0; j < adjsizes[L]; j++) { + R = adjlists[L][j]; + if (R != s->LtoR[L] && s->Rlayer[R] == -1) { + s->Rlayer[R] = layer+1; + s->Rqueue[Rqs++] = R; + if (s->RtoL[R] == -1) + found_free_R_vertex = TRUE; + } + } + } + layer++; + + if (found_free_R_vertex) + break; + + if (Rqs == 0) + goto done; + + Lqs = 0; + for (j = 0; j < Rqs; j++) { + R = s->Rqueue[j]; + assert(s->Rlayer[R] == layer); + if ((L = s->RtoL[R]) != -1 && s->Llayer[L] == -1) { + s->Llayer[L] = layer+1; + s->Lqueue[Lqs++] = L; + } + } + layer++; + + if (Lqs == 0) + goto done; + } + + target_layer = layer; + + /* + * Vertices in the target layer are only interesting if + * they're actually unassigned. Blanking out the others here + * will save us a special case in the dfs loop below. + */ + for (R = 0; R < nr; R++) + if (s->Rlayer[R] == target_layer && s->RtoL[R] != -1) + s->Rlayer[R] = -1; + + /* + * Choose an ordering in which to try the L vertices at the + * start of the next pass. + */ + for (L = 0; L < nl; L++) + s->Lorder[L] = L; + if (rs) + shuffle(s->Lorder, nl, sizeof(*s->Lorder), rs); + + /* + * Now depth-first search through that layered set of vertices + * to find as many (vertex-)disjoint augmenting paths as we + * can, and for each one we find, augment the matching. + */ + s->dfsstate[0] = 0; + i = 0; + while (1) { + /* + * Find the next vertex to go on the end of augpath. + */ + if (i == 0) { + /* In this special case, we're just looking for L + * vertices that are not yet assigned. */ + if (s->dfsstate[i] == nl) + break; /* entire DFS has finished */ + + L = s->Lorder[s->dfsstate[i]++]; + + if (s->Llayer[L] != 2*i) + continue; /* skip this vertex */ + } else { + /* In the more usual case, we're going through the + * adjacency list for the previous L vertex. */ + L = s->augpath[2*i-2]; + j = s->dfsstate[i]++; + if (j == adjsizes[L]) { + /* Run out of neighbours of the previous vertex. */ + i--; + continue; + } + if (rs && adjsizes[L] - j > 1) { + int which = j + random_upto(rs, adjsizes[L] - j); + int tmp = adjlists[L][which]; + adjlists[L][which] = adjlists[L][j]; + adjlists[L][j] = tmp; + } + R = adjlists[L][j]; + + if (s->Rlayer[R] != 2*i-1) + continue; /* skip this vertex */ + + s->augpath[2*i-1] = R; + s->Rlayer[R] = -1; /* mark vertex as visited */ + + if (2*i-1 == target_layer) { + /* + * We've found an augmenting path, in the form of + * an even-sized list of vertices alternating + * L,R,...,L,R, with the initial L and final R + * vertex free and otherwise each R currently + * connected to the next L. Adjust so that each L + * connects to the next R, increasing the edge + * count in the matching by 1. + */ + for (j = 0; j < 2*i; j += 2) { + s->LtoR[s->augpath[j]] = s->augpath[j+1]; + s->RtoL[s->augpath[j+1]] = s->augpath[j]; + } + + /* + * Having dealt with that path, and already marked + * all its vertices as visited, rewind right to + * the start and resume our DFS from a new + * starting L-vertex. + */ + i = 0; + continue; + } + + L = s->RtoL[R]; + if (s->Llayer[L] != 2*i) + continue; /* skip this vertex */ + } + + s->augpath[2*i] = L; + s->Llayer[L] = -1; /* mark vertex as visited */ + i++; + s->dfsstate[i] = 0; + } + } + + done: + /* + * Fill in the output arrays. + */ + if (outl) { + for (i = 0; i < nl; i++) + outl[i] = s->LtoR[i]; + } + if (outr) { + for (j = 0; j < nr; j++) + outr[j] = s->RtoL[j]; + } + + /* + * Return the number of matching edges. + */ + for (i = j = 0; i < nl; i++) + if (s->LtoR[i] != -1) + j++; + return j; +} + +int matching(int nl, int nr, int **adjlists, int *adjsizes, + random_state *rs, int *outl, int *outr) +{ + void *scratch; + int size; + int ret; + + size = matching_scratch_size(nl, nr); + scratch = malloc(size); + if (!scratch) + return -1; + + ret = matching_with_scratch(scratch, nl, nr, adjlists, adjsizes, + rs, outl, outr); + + free(scratch); + + return ret; +} + +#ifdef STANDALONE_MATCHING_TEST + +/* + * Diagnostic routine used in testing this algorithm. It is passed a + * pointer to a piece of scratch space that's just been used by + * matching_with_scratch, and extracts from it a labelling of the + * input graph that acts as a 'witness' to the maximality of the + * returned matching. + * + * The output parameter 'witness' should be an array of (nl+nr) + * integers, indexed such that witness[L] corresponds to an L-vertex (for + * L=0,1,...,nl-1) and witness[nl+R] corresponds to an R-vertex (for + * R=0,1,...,nr-1). On return, this array will assign each vertex a + * label which is either 0 or 1, and the following properties will + * hold: + * + * + all vertices not paired up by the matching are type L0 or R1 + * + every L0->R1 edge is used by the matching + * + no L1->R0 edge is used by the matching. + * + * The mere existence of such a labelling is enough to prove the + * maximality of the matching, because if there is any larger matching + * then its symmetric difference with this one must include at least + * one 'augmenting path', which starts at a free L-vertex and ends at + * a free R-vertex, traversing only unused L->R edges and only used + * R->L edges. But that would mean it starts at an L0, ends at an R1, + * and never follows an edge that can get from an 0 to a 1. + */ +static void matching_witness(void *scratchv, int nl, int nr, int *witness) +{ + struct scratch *s = (struct scratch *)scratchv; + int i, j; + + for (i = 0; i < nl; i++) + witness[i] = s->Llayer[i] == -1; + for (j = 0; j < nr; j++) + witness[nl + j] = s->Rlayer[j] == -1; +} + +/* + * Standalone tool to run the matching algorithm. + */ + +#include +#include +#include + +#include "tree234.h" + +int nl, nr, count; +int **adjlists, *adjsizes; +int *adjdata, *outl, *outr, *witness; +void *scratch; +random_state *rs; + +void allocate(int nl_, int nr_, int maxedges) +{ + nl = nl_; + nr = nr_; + adjdata = snewn(maxedges, int); + adjlists = snewn(nl, int *); + adjsizes = snewn(nl, int); + outl = snewn(nl, int); + outr = snewn(nr, int); + witness = snewn(nl+nr, int); + scratch = smalloc(matching_scratch_size(nl, nr)); +} + +void deallocate(void) +{ + sfree(adjlists); + sfree(adjsizes); + sfree(adjdata); + sfree(outl); + sfree(outr); + sfree(witness); + sfree(scratch); +} + +void find_and_check_matching(void) +{ + int i, j, k; + + count = matching_with_scratch(scratch, nl, nr, adjlists, adjsizes, + rs, outl, outr); + matching_witness(scratch, nl, nr, witness); + + for (i = j = 0; i < nl; i++) { + if (outl[i] != -1) { + assert(0 <= outl[i] && outl[i] < nr); + assert(outr[outl[i]] == i); + j++; + + for (k = 0; k < adjsizes[i]; k++) + if (adjlists[i][k] == outl[i]) + break; + assert(k < adjsizes[i]); + } + } + assert(j == count); + + for (i = j = 0; i < nr; i++) { + if (outr[i] != -1) { + assert(0 <= outr[i] && outr[i] < nl); + assert(outl[outr[i]] == i); + j++; + } + } + assert(j == count); + + for (i = 0; i < nl; i++) { + if (outl[i] == -1) + assert(witness[i] == 0); + } + for (i = 0; i < nr; i++) { + if (outr[i] == -1) + assert(witness[nl+i] == 1); + } + for (i = 0; i < nl; i++) { + for (j = 0; j < adjsizes[i]; j++) { + k = adjlists[i][j]; + + if (outl[i] == k) + assert(!(witness[i] == 1 && witness[nl+k] == 0)); + else + assert(!(witness[i] == 0 && witness[nl+k] == 1)); + } + } +} + +struct nodename { + const char *name; + int index; +}; + +int compare_nodes(void *av, void *bv) +{ + const struct nodename *a = (const struct nodename *)av; + const struct nodename *b = (const struct nodename *)bv; + return strcmp(a->name, b->name); +} + +int node_index(tree234 *n2i, tree234 *i2n, const char *name) +{ + struct nodename *nn, *nn_prev; + char *namedup = dupstr(name); + + nn = snew(struct nodename); + nn->name = namedup; + nn->index = count234(n2i); + + nn_prev = add234(n2i, nn); + if (nn_prev != nn) { + sfree(nn); + sfree(namedup); + } else { + addpos234(i2n, nn, nn->index); + } + + return nn_prev->index; +} + +struct edge { + int L, R; +}; + +int compare_edges(void *av, void *bv) +{ + const struct edge *a = (const struct edge *)av; + const struct edge *b = (const struct edge *)bv; + if (a->L < b->L) return -1; + if (a->L > b->L) return +1; + if (a->R < b->R) return -1; + if (a->R > b->R) return +1; + return 0; +} + +void matching_from_user_input(FILE *fp, const char *filename) +{ + tree234 *Ln2i, *Li2n, *Rn2i, *Ri2n, *edges; + char *line = NULL; + struct edge *e; + int i, lineno = 0; + int *adjptr; + + Ln2i = newtree234(compare_nodes); + Rn2i = newtree234(compare_nodes); + Li2n = newtree234(NULL); + Ri2n = newtree234(NULL); + edges = newtree234(compare_edges); + + while (sfree(line), lineno++, (line = fgetline(fp)) != NULL) { + char *p, *Lname, *Rname; + + p = line; + while (*p && isspace((unsigned char)*p)) p++; + if (!*p) + continue; + + Lname = p; + while (*p && !isspace((unsigned char)*p)) p++; + if (*p) + *p++ = '\0'; + while (*p && isspace((unsigned char)*p)) p++; + + if (!*p) { + fprintf(stderr, "%s:%d: expected 2 words, found 1\n", + filename, lineno); + exit(1); + } + + Rname = p; + while (*p && !isspace((unsigned char)*p)) p++; + if (*p) + *p++ = '\0'; + while (*p && isspace((unsigned char)*p)) p++; + + if (*p) { + fprintf(stderr, "%s:%d: expected 2 words, found more\n", + filename, lineno); + exit(1); + } + + e = snew(struct edge); + e->L = node_index(Ln2i, Li2n, Lname); + e->R = node_index(Rn2i, Ri2n, Rname); + if (add234(edges, e) != e) { + fprintf(stderr, "%s:%d: duplicate edge\n", + filename, lineno); + exit(1); + } + } + + allocate(count234(Ln2i), count234(Rn2i), count234(edges)); + + adjptr = adjdata; + for (i = 0; i < nl; i++) + adjlists[i] = NULL; + for (i = 0; (e = index234(edges, i)) != NULL; i++) { + if (!adjlists[e->L]) + adjlists[e->L] = adjptr; + *adjptr++ = e->R; + adjsizes[e->L] = adjptr - adjlists[e->L]; + } + + find_and_check_matching(); + + for (i = 0; i < nl; i++) { + if (outl[i] != -1) { + struct nodename *Lnn = index234(Li2n, i); + struct nodename *Rnn = index234(Ri2n, outl[i]); + printf("%s %s\n", Lnn->name, Rnn->name); + } + } +} + +void test_subsets(void) +{ + int b = 8; + int n = 1 << b; + int i, j, nruns, expected_size; + int *adjptr; + int *edgecounts; + struct stats { + int min, max; + double n, sx, sxx; + } *stats; + static const char seed[] = "fixed random seed for repeatability"; + + /* + * Generate a graph in which every subset of [b] = {1,...,b} + * (represented as a b-bit integer 0 <= i < n) has an edge going + * to every subset obtained by removing exactly one element. + * + * This graph is the disjoint union of the corresponding graph for + * each layer (collection of same-sized subset) of the power set + * of [b]. Each of those graphs has a matching of size equal to + * the smaller of its vertex sets. So we expect the overall size + * of the output matching to be less than n by the size of the + * largest layer, that is, to be n - binomial(n, floor(n/2)). + * + * We run the generation repeatedly, randomising it every time, + * and we expect to see every possible edge appear sooner or + * later. + */ + + rs = random_new(seed, strlen(seed)); + + allocate(n, n, n*b); + adjptr = adjdata; + expected_size = 0; + for (i = 0; i < n; i++) { + adjlists[i] = adjptr; + for (j = 0; j < b; j++) { + if (i & (1 << j)) + *adjptr++ = i & ~(1 << j); + } + adjsizes[i] = adjptr - adjlists[i]; + if (adjsizes[i] != b/2) + expected_size++; + } + + edgecounts = snewn(n*b, int); + for (i = 0; i < n*b; i++) + edgecounts[i] = 0; + + stats = snewn(b, struct stats); + + nruns = 0; + while (nruns < 10000) { + nruns++; + find_and_check_matching(); + assert(count == expected_size); + + for (i = 0; i < n; i++) + for (j = 0; j < b; j++) + if ((i ^ outl[i]) == (1 << j)) + edgecounts[b*i+j]++; + + if (nruns % 1000 == 0) { + for (i = 0; i < b; i++) { + struct stats *st = &stats[i]; + st->min = st->max = -1; + st->n = st->sx = st->sxx = 0; + } + + for (i = 0; i < n; i++) { + int pop = 0; + for (j = 0; j < b; j++) + if (i & (1 << j)) + pop++; + pop--; + + for (j = 0; j < b; j++) { + if (i & (1 << j)) { + struct stats *st = &stats[pop]; + int x = edgecounts[b*i+j]; + if (st->max == -1 || st->max < x) + st->max = x; + if (st->min == -1 || st->min > x) + st->min = x; + st->n++; + st->sx += x; + st->sxx += (double)x*x; + } else { + assert(edgecounts[b*i+j] == 0); + } + } + } + } + } + + printf("after %d runs:\n", nruns); + for (j = 0; j < b; j++) { + struct stats *st = &stats[j]; + printf("edges between layers %d,%d:" + " min=%d max=%d mean=%f variance=%f\n", + j, j+1, st->min, st->max, st->sx/st->n, + (st->sxx - st->sx*st->sx/st->n) / st->n); + } +} + +int main(int argc, char **argv) +{ + static const char stdin_identifier[] = ""; + const char *infile = NULL; + int doing_opts = TRUE; + enum { USER_INPUT, AUTOTEST } mode = USER_INPUT; + + while (--argc > 0) { + const char *arg = *++argv; + + if (doing_opts && arg[0] == '-' && arg[1]) { + if (!strcmp(arg, "--")) { + doing_opts = FALSE; + } else if (!strcmp(arg, "--random")) { + char buf[64]; + int len = sprintf(buf, "%lu", (unsigned long)time(NULL)); + rs = random_new(buf, len); + } else if (!strcmp(arg, "--autotest")) { + mode = AUTOTEST; + } else { + fprintf(stderr, "matching: unrecognised option '%s'\n", arg); + return 1; + } + } else { + if (!infile) { + infile = (!strcmp(arg, "-") ? stdin_identifier : arg); + } else { + fprintf(stderr, "matching: too many arguments\n"); + return 1; + } + } + } + + if (mode == USER_INPUT) { + FILE *fp; + + if (!infile) + infile = stdin_identifier; + + if (infile != stdin_identifier) { + fp = fopen(infile, "r"); + if (!fp) { + fprintf(stderr, "matching: could not open input file '%s'\n", + infile); + return 1; + } + } else { + fp = stdin; + } + + matching_from_user_input(fp, infile); + + if (infile != stdin_identifier) + fclose(fp); + } + + if (mode == AUTOTEST) { + if (infile) { + fprintf(stderr, "matching: expected no filename argument " + "with --autotest\n"); + return 1; + } + + test_subsets(); + } + + return 0; +} + +#endif /* STANDALONE_MATCHING_TEST */ diff --git a/matching.h b/matching.h new file mode 100644 index 0000000..a4d3098 --- /dev/null +++ b/matching.h @@ -0,0 +1,80 @@ +/* + * Hopcroft-Karp algorithm for finding a maximal matching in a + * bipartite graph. + */ + +#ifndef MATCHING_MATCHING_H +#define MATCHING_MATCHING_H + +/* + * The actual algorithm. + * + * Inputs: + * + * - 'scratch' is previously allocated scratch space of a size + * previously determined by calling 'matching_scratch_size'. + * + * - 'nl' is the number of vertices on the left side of the graph. + * Left vertices are numbered from 0 to nl-1. + * + * - 'nr' is the number of vertices on the left side of the graph. + * Right vertices are numbered from 0 to nr-1. + * + * - 'adjlists' and 'adjsizes' represents the graph in adjacency-list + * form. For each left vertex L, adjlists[L] points to an array of + * adjsizes[L] integers giving the list of right vertices adjacent + * to L. + * + * - 'rs', if not NULL, is a random_state used to perturb the + * progress of the algorithm so as to choose randomly from the + * possible matchings if there's more than one. (The exact + * probability distribution can't be guaranteed, but at the very + * least, any matching that exists should be a _possible_ output.) + * + * If 'rs' is not NULL, then each list in adjlists[] will be permuted + * during the course of the algorithm as a side effect. (That's why + * it's not an array of _const_ int pointers.) + * + * Output: + * + * - 'outl' may be NULL. If non-NULL, it is an array of 'nl' + * integers, and outl[L] will be assigned the index of the right + * vertex that the output matching paired with the left vertex L, + * or -1 if L is unpaired. + * + * - 'outr' may be NULL. If non-NULL, it is an array of 'nr' + * integers, and outr[R] will be assigned the index of the left + * vertex that the output matching paired with the right vertex R, + * or -1 if R is unpaired. + * + * - the returned value from the function is the total number of + * edges in the matching. + */ +int matching_with_scratch(void *scratch, + int nl, int nr, int **adjlists, int *adjsizes, + random_state *rs, int *outl, int *outr); + +/* + * The above function expects its 'scratch' parameter to have already + * been set up. This function tells you how much space is needed for a + * given size of graph, so that you can allocate a single instance of + * scratch space and run the algorithm multiple times without the + * overhead of an alloc and free every time. + */ +size_t matching_scratch_size(int nl, int nr); + +/* + * Simplified version of the above function. All parameters are the + * same, except that 'scratch' is constructed internally and freed on + * exit. This is the simplest way to call the algorithm as a one-off; + * however, if you need to call it multiple times on the same size of + * graph, it is probably better to call the above version directly so + * that you only construct 'scratch' once. + * + * Additional return value is now -1, meaning that scratch space + * could not be allocated. + */ +int matching(int nl, int nr, int **adjlists, int *adjsizes, + random_state *rs, int *outl, int *outr); + +#endif /* MATCHING_MATCHING_H */