/* * Copyright (c) 2021 James Cook * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * Catalytic algorithm for graph reachability. * * If you're reading the code, you might want to read * reachability_with_stack.cc first. It cheats by doing recursion that uses * O(log n) stack frames, but as a result it's a bit less convoluted, and it * has some helpful comments that aren't in this version. */ #include #include #include #include #include static void require(int condition, char *message) { if (!condition) { errx(1, "%s", message); } } static long read_integer() { char buf[1000]; if (fgets(buf, sizeof(buf), stdin) == NULL) errx(1, "fgets"); return strtol(buf, NULL, 10); } static void read_row(int *row, int64_t n) { char buf[10000]; char *stringp, *token; int64_t i; if (fgets(buf, sizeof(buf), stdin) == NULL) errx(1, "fgets"); stringp = buf; for (i = 0; i < n; ++i) { do { if (stringp == NULL) errx(1, "read_row: too few numbers"); token = strsep(&stringp, " "); } while (*token == 0); row[i] = strtol(token, NULL, 10); } } static int get_stack_frame(uint64_t stack, int index) { return (stack >> (2 * index)) & 3; } static void set_stack_frame(uint64_t *stack, int index, int value) { *stack &= ~(3 << (2 * index)); *stack |= value << (2 * index); } /* * Rotates the first length elements of array by r. */ void rotate_array(uint64_t *array, int64_t length, uint64_t r) { int64_t cycle_base; int64_t lowest_nonbase_index; int64_t i, next_index; int64_t first_value; r = r % length; if (r == 0) return; /* * Rotation by r will form one or more cycles. We rotate one cycle at a * time. */ lowest_nonbase_index = length; for (cycle_base = 0; cycle_base < lowest_nonbase_index; ++cycle_base) { first_value = array[cycle_base]; i = cycle_base; for (;;) { next_index = (i + r) % length; if (next_index == cycle_base) { array[i] = first_value; break; } array[i] = array[next_index]; i = next_index; if (i < lowest_nonbase_index) lowest_nonbase_index = i; } } } /* * Given a directed graph, returns true iff there is a path from node 0 to node * 1. * * Nodes are numbered 0 through edge.size()-1. edge[u][v] should be true iff * there is an edge from u to v. edge[u].size() must equal edge.size() for all * nodes u. * * The elements of *catalyst will be temporarily modified, but restored to their * original values by the time this function returns. */ static int reachable(int **edges, int64_t num_nodes, uint64_t *catalyst, size_t catalyst_length) { int64_t max_log_path_length; /* The smallest power of 2 bigger than num_nodes. */ int64_t rotation_size; /* * catalyst is divided into three regions: * * - Starting at index 0: * * A block of num_nodes^2 registers. The "add" recursive algorithm * increments register u*(num_nodes)+w if there is a short enough * path from u to w. * * - catalyst_block_1: * * A second block of num_nodes^2 registers, for the same purpose. We * need two regions like this because the "rotate" recursive * algorithm needs to be able to set and unset two regions * independently. * * - catalyst_rotation_block: * * A block of num_nodes^3 registers, logically grouped into * num_nodes^2 arrays of num_nodes each. Used as part of a "rotation * trick" to create an or gate with fan-in num_nodes. */ uint64_t *catalyst_block_1; uint64_t *catalyst_rotation_block; /* * Within the "add" subroutine, which block we should add our output * to. */ uint64_t *current_output_block; /* * Variables that keep track of where we are in the program. * * The algorithm has two top-level passes: First with * computing_backward = false, then with computing_backward = true to * restore *catalyst to its initial state. * * call_stack_add and call_stack_rotate are runtime stacks, containing * up to 32 stack frames of 2-bits each. These keep track of where we * are in the "add" and "rotate" subroutines at each recursive level. * * recursion_depth records how many stack frames are in use. * * Finally, new_call is set when recursion_depth is increased and * cleared when it's decreased. We need this extra bit because 2 bits * per stack frame isn't quite enough to keep track of where we are in * each subroutine. */ int computing_backward; uint64_t call_stack_add; uint64_t call_stack_rotate; int recursion_depth; int new_call; int current_stack_frame; /* * The forward pass increments catalyst[output_register_index] iff * there's a path from node 0 to node 1. output_register_mid_value * records the value after the forward pass, so it can be compared to * the final (equal to initial) value. */ int64_t output_register_index; uint64_t output_register_mid_value; /* * Variables used within the subroutines. */ /* * 1 or -1, depending on whether we need to add or subtract ("add" * subroutine) or rotate forward or backward ("rotate" subroutine). */ int increment; int64_t u, v, w; /* Compute algorithm parameters. */ rotation_size = 1; while (rotation_size < num_nodes + 1) { rotation_size *= 2; } max_log_path_length = 0; while ((1 << max_log_path_length) < num_nodes - 1) { ++max_log_path_length; } require(max_log_path_length * 2 <= 64, "Too many nodes for a 64-bit stack."); catalyst_block_1 = catalyst + num_nodes * num_nodes; catalyst_rotation_block = catalyst_block_1 + num_nodes * num_nodes; require(catalyst_length >= catalyst_rotation_block - catalyst + rotation_size * num_nodes * num_nodes, "Catalyst is too small."); output_register_index = num_nodes; /* Initialize the algorithm state. */ computing_backward = 0; recursion_depth = 0; new_call = 1; /* * The main body of the algorithm. We repeatedly figure out where we * are in the program and execute the next step. */ for (;;) { if (recursion_depth < 0) { /* We just completed a pass. */ if (computing_backward) { return catalyst[output_register_index] != output_register_mid_value; } else { output_register_mid_value = catalyst[output_register_index]; computing_backward = 1; recursion_depth = 0; new_call = 1; } } if (recursion_depth % 2 == 0) { /* We are in the "add" subroutine. */ if (recursion_depth == 0) { increment = computing_backward ? -1 : 1; current_output_block = catalyst; } else { increment = get_stack_frame(call_stack_rotate, recursion_depth / 2 - 1) & 2 ? -1 : 1; current_output_block = get_stack_frame(call_stack_rotate, recursion_depth / 2 - 1) & 1 ? catalyst_block_1 : catalyst; } if (recursion_depth / 2 >= max_log_path_length) { /* Base case. */ for (u = 0; u < num_nodes; ++u) for (v = 0; v < num_nodes; ++v) if (edges[u][v]) current_output_block[num_nodes * u + v] += increment; --recursion_depth; new_call = 0; continue; } if (new_call) { /* Nothing to do before we call the "rotate" subroutine. Clear the current stack frame and go deeper. */ set_stack_frame(&call_stack_add, recursion_depth / 2, 0); ++recursion_depth; continue; } current_stack_frame = get_stack_frame(call_stack_add, recursion_depth / 2); set_stack_frame(&call_stack_add, recursion_depth / 2, (current_stack_frame + 1) % 4); switch (current_stack_frame) { case 0: for (u = 0; u < num_nodes; ++u) for ( v = 0; v < num_nodes; ++v) current_output_block[num_nodes * u + v] -= catalyst_rotation_block[ rotation_size * num_nodes * u + rotation_size * v]; ++recursion_depth; new_call = 1; break; case 1: for (u = 0; u < num_nodes; ++u) for (v = 0; v < num_nodes; ++v) catalyst_rotation_block[ rotation_size * num_nodes * u + rotation_size * v] -= increment; ++recursion_depth; new_call = 1; break; case 2: for (u = 0; u < num_nodes; ++u) for ( v = 0; v < num_nodes; ++v) current_output_block[num_nodes * u + v] += increment + catalyst_rotation_block[ rotation_size * num_nodes * u + rotation_size * v]; ++recursion_depth; new_call = 1; break; case 3: for (u = 0; u < num_nodes; ++u) for (v = 0; v < num_nodes; ++v) catalyst_rotation_block[ rotation_size * num_nodes * u + rotation_size * v] += increment; --recursion_depth; new_call = 0; break; } } else { /* We are in the "rotate" subroutine. */ if (new_call) { current_stack_frame = 0; } else { current_stack_frame = get_stack_frame(call_stack_rotate, recursion_depth / 2) + 1; if (current_stack_frame > 3) { --recursion_depth; new_call = 0; continue; } } set_stack_frame(&call_stack_rotate, recursion_depth / 2, current_stack_frame); increment = get_stack_frame(call_stack_add, recursion_depth / 2) & 1 ? 1 : -1; /* We switch directions after each recursive call. */ increment *= current_stack_frame & 1 ? 1 : -1; for (u = 0; u < num_nodes; ++u) { for (v = 0; v < num_nodes; ++v) { for (w = 0; w < num_nodes; ++w) { rotate_array(catalyst_rotation_block + rotation_size * num_nodes * u + rotation_size * v, rotation_size, increment * catalyst[num_nodes * u + w] * catalyst_block_1[num_nodes * w + v]); } } } ++recursion_depth; new_call = 1; } } } int main(int argc, char **argv) { int64_t num_nodes; int64_t u, v; int **edges; uint64_t *catalyst; uint64_t *catalyst_backup; int64_t catalyst_length; /* * Read the graph. */ num_nodes = read_integer(); if ((edges = calloc(num_nodes, sizeof(*edges))) == NULL) err(1, "allocating edges"); for (u = 0; u < num_nodes; ++u) { if ((edges[u] = calloc(num_nodes, sizeof(**edges))) == NULL) err(1, "allocating edges[u]"); } for (u = 0; u < num_nodes; ++u) { read_row(edges[u], num_nodes); edges[u][u] = 1; } /* * Allocate the catalytic space and fill it randomly. */ catalyst_length = 2 * (num_nodes * num_nodes * (1 + num_nodes)); if ((catalyst = calloc(catalyst_length, sizeof(*catalyst))) == NULL) err(1, "allocating catalyst"); for (u = 0; u < catalyst_length; ++u) catalyst[u] = arc4random() | (((uint64_t)arc4random()) << 32); /* * Save a copy of the catalytic space, so we can verify it doesn't change. */ if ((catalyst_backup = calloc(catalyst_length, sizeof(*catalyst_backup))) == NULL) err(1, "allocating catalyst_backup"); memcpy(catalyst_backup, catalyst, catalyst_length * sizeof(*catalyst)); if (reachable(edges, num_nodes, catalyst, catalyst_length)) { printf("Reachable.\n"); } else { printf("Not reachable.\n"); } require(!memcmp(catalyst, catalyst_backup, catalyst_length * sizeof(*catalyst)), "Catalyst changed."); return 0; }