├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── compile_flags.txt
├── src
    ├── alphabetic_huffman_code.hpp
    ├── alphabetic_huffman_code.test.cpp
    ├── bit.hpp
    ├── bit_cast.hpp
    ├── bm.hpp
    ├── bm.test.cpp
    ├── cartesian_tree.hpp
    ├── cartesian_tree.test.cpp
    ├── char_poly.hpp
    ├── cnt_min.hpp
    ├── dirichlet_series.hpp
    ├── dirichlet_series.test.cpp
    ├── fft.hpp
    ├── fft.test.cpp
    ├── fraction.hpp
    ├── geometry
    │   ├── point.hpp
    │   └── point3d.hpp
    ├── graph
    │   └── make_st_dag.hpp
    ├── hash_map.hpp
    ├── jacobi.hpp
    ├── lattice_cnt.hpp
    ├── lattice_cnt.test.cpp
    ├── lct.hpp
    ├── level_ancestor.hpp
    ├── manacher.hpp
    ├── mcmf.hpp
    ├── modnum.hpp
    ├── modnum.test.cpp
    ├── nim_prod.hpp
    ├── optimize.hpp
    ├── order_statistic.hpp
    ├── perm_tree.hpp
    ├── perm_tree.test.cpp
    ├── quaternion_hurwitz.hpp
    ├── reverse_comparator.hpp
    ├── rmq.hpp
    ├── rmq.test.cpp
    ├── seg_tree.hpp
    ├── seg_tree.test.cpp
    ├── smawk.hpp
    ├── smawk.test.cpp
    ├── static_tree.hpp
    ├── suffix_array.hpp
    ├── tensor.hpp
    ├── tensor.test.cpp
    ├── top_tree.hpp
    └── yc.hpp
└── third_party
    └── sais-lite-2.4.1
        ├── COPYING
        ├── Makefile
        ├── README
        ├── is_orig.c
        ├── sais.c
        ├── sais.h
        ├── sais.hxx
        ├── suftest.c
        └── test.c


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 
34 | /build/
35 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.27)
 2 | project(cp-book)
 3 | 
 4 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 5 | 
 6 | Include(FetchContent)
 7 | 
 8 | FetchContent_Declare(
 9 |   Catch2
10 |   GIT_REPOSITORY https://github.com/catchorg/Catch2.git
11 |   GIT_TAG        v3.4.0 # or a later release
12 | )
13 | 
14 | FetchContent_MakeAvailable(Catch2)
15 | 
16 | file(GLOB TEST_SRC_FILES "src/*.test.cpp")
17 | file(GLOB SRC_FILES "src/*.hpp")
18 | 
19 | add_library(cp-book INTERFACE)
20 | target_include_directories(cp-book INTERFACE src)
21 | target_compile_features(cp-book INTERFACE cxx_std_20)
22 | # TODO: Clean these up somehow
23 | target_compile_options(cp-book INTERFACE -O2 -Wall -Wextra -pedantic -Wshadow -Wformat=2 -Wfloat-equal -Wconversion -Wlogical-op -Wshift-overflow=2 -Wduplicated-cond -Wcast-qual -Wcast-align -Wno-unused-result -Wno-sign-conversion -g -D_GLIBCXX_DEBUG -D_GLIBCXX_DEBUG_PEDANTIC -fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -fstack-protector -D_FORTIFY_SOURCE=2)
24 | target_link_options(cp-book INTERFACE -O2 -fsanitize=address -fsanitize=undefined -fno-sanitize-recover=all -fstack-protector)
25 | 
26 | add_executable(tests "${TEST_SRC_FILES}")
27 | target_link_libraries(tests PUBLIC cp-book)
28 | target_link_libraries(tests PRIVATE Catch2::Catch2WithMain)
29 | target_compile_features(tests PUBLIC cxx_std_17)
30 | target_include_directories(tests PRIVATE src)
31 | 
32 | list(APPEND CMAKE_MODULE_PATH ${catch2_SOURCE_DIR}/extras)
33 | include(CTest)
34 | include(Catch)
35 | catch_discover_tests(tests)
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | CC0 1.0 Universal
  2 | 
  3 | Statement of Purpose
  4 | 
  5 | The laws of most jurisdictions throughout the world automatically confer
  6 | exclusive Copyright and Related Rights (defined below) upon the creator and
  7 | subsequent owner(s) (each and all, an "owner") of an original work of
  8 | authorship and/or a database (each, a "Work").
  9 | 
 10 | Certain owners wish to permanently relinquish those rights to a Work for the
 11 | purpose of contributing to a commons of creative, cultural and scientific
 12 | works ("Commons") that the public can reliably and without fear of later
 13 | claims of infringement build upon, modify, incorporate in other works, reuse
 14 | and redistribute as freely as possible in any form whatsoever and for any
 15 | purposes, including without limitation commercial purposes. These owners may
 16 | contribute to the Commons to promote the ideal of a free culture and the
 17 | further production of creative, cultural and scientific works, or to gain
 18 | reputation or greater distribution for their Work in part through the use and
 19 | efforts of others.
 20 | 
 21 | For these and/or other purposes and motivations, and without any expectation
 22 | of additional consideration or compensation, the person associating CC0 with a
 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
 25 | and publicly distribute the Work under its terms, with knowledge of his or her
 26 | Copyright and Related Rights in the Work and the meaning and intended legal
 27 | effect of CC0 on those rights.
 28 | 
 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 30 | protected by copyright and related or neighboring rights ("Copyright and
 31 | Related Rights"). Copyright and Related Rights include, but are not limited
 32 | to, the following:
 33 | 
 34 |   i. the right to reproduce, adapt, distribute, perform, display, communicate,
 35 |   and translate a Work;
 36 | 
 37 |   ii. moral rights retained by the original author(s) and/or performer(s);
 38 | 
 39 |   iii. publicity and privacy rights pertaining to a person's image or likeness
 40 |   depicted in a Work;
 41 | 
 42 |   iv. rights protecting against unfair competition in regards to a Work,
 43 |   subject to the limitations in paragraph 4(a), below;
 44 | 
 45 |   v. rights protecting the extraction, dissemination, use and reuse of data in
 46 |   a Work;
 47 | 
 48 |   vi. database rights (such as those arising under Directive 96/9/EC of the
 49 |   European Parliament and of the Council of 11 March 1996 on the legal
 50 |   protection of databases, and under any national implementation thereof,
 51 |   including any amended or successor version of such directive); and
 52 | 
 53 |   vii. other similar, equivalent or corresponding rights throughout the world
 54 |   based on applicable law or treaty, and any national implementations thereof.
 55 | 
 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of,
 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
 59 | and Related Rights and associated claims and causes of action, whether now
 60 | known or unknown (including existing as well as future claims and causes of
 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum
 62 | duration provided by applicable law or treaty (including future time
 63 | extensions), (iii) in any current or future medium and for any number of
 64 | copies, and (iv) for any purpose whatsoever, including without limitation
 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
 66 | the Waiver for the benefit of each member of the public at large and to the
 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver
 68 | shall not be subject to revocation, rescission, cancellation, termination, or
 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work
 70 | by the public as contemplated by Affirmer's express Statement of Purpose.
 71 | 
 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be
 73 | judged legally invalid or ineffective under applicable law, then the Waiver
 74 | shall be preserved to the maximum extent permitted taking into account
 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
 76 | is so judged Affirmer hereby grants to each affected person a royalty-free,
 77 | non transferable, non sublicensable, non exclusive, irrevocable and
 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in
 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration
 80 | provided by applicable law or treaty (including future time extensions), (iii)
 81 | in any current or future medium and for any number of copies, and (iv) for any
 82 | purpose whatsoever, including without limitation commercial, advertising or
 83 | promotional purposes (the "License"). The License shall be deemed effective as
 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the
 85 | License for any reason be judged legally invalid or ineffective under
 86 | applicable law, such partial invalidity or ineffectiveness shall not
 87 | invalidate the remainder of the License, and in such case Affirmer hereby
 88 | affirms that he or she will not (i) exercise any of his or her remaining
 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims
 90 | and causes of action with respect to the Work, in either case contrary to
 91 | Affirmer's express Statement of Purpose.
 92 | 
 93 | 4. Limitations and Disclaimers.
 94 | 
 95 |   a. No trademark or patent rights held by Affirmer are waived, abandoned,
 96 |   surrendered, licensed or otherwise affected by this document.
 97 | 
 98 |   b. Affirmer offers the Work as-is and makes no representations or warranties
 99 |   of any kind concerning the Work, express, implied, statutory or otherwise,
100 |   including without limitation warranties of title, merchantability, fitness
101 |   for a particular purpose, non infringement, or the absence of latent or
102 |   other defects, accuracy, or the present or absence of errors, whether or not
103 |   discoverable, all to the greatest extent permissible under applicable law.
104 | 
105 |   c. Affirmer disclaims responsibility for clearing rights of other persons
106 |   that may apply to the Work or any use thereof, including without limitation
107 |   any person's Copyright and Related Rights in the Work. Further, Affirmer
108 |   disclaims responsibility for obtaining any necessary consents, permissions
109 |   or other rights required for any use of the Work.
110 | 
111 |   d. Affirmer understands and acknowledges that Creative Commons is not a
112 |   party to this document and has no duty or obligation with respect to this
113 |   CC0 or use of the Work.
114 | 
115 | For more information, please see
116 | <http://creativecommons.org/publicdomain/zero/1.0/>
117 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ecnerwala's CP Book
 2 | 
 3 | This is my library of reference code for competitive programming. The goal is to
 4 | write generic, fast, and clean algorithm implementations for use in contests
 5 | like CodeForces or ICPC.
 6 | 
 7 | ## Building
 8 | 
 9 | Build using
10 | 
11 | ```sh
12 | cmake -B build
13 | cmake --build build
14 | ```
15 | 
16 | Test with
17 | 
18 | ```sh
19 | ctest --test-dir build
20 | ```
21 | 
22 | or directly with
23 | 
24 | ```sh
25 | ./build/tests
26 | ```
27 | 
28 | ## License and Attribution
29 | 
30 | All code in this book is written by me and CC0 licensed unless otherwise noted
31 | in the file. Inspiration is largely drawn from KACTL
32 | (https://github.com/kth-competitive-programming/kactl/) and other references.
33 | 


--------------------------------------------------------------------------------
/compile_flags.txt:
--------------------------------------------------------------------------------
 1 | -x
 2 | c++
 3 | -Wall
 4 | -Wextra
 5 | -pedantic
 6 | -std=c++20
 7 | -O2
 8 | -Wshadow
 9 | -Wformat=2
10 | -Wfloat-equal
11 | -Wconversion
12 | -Wshift-overflow
13 | -Wcast-qual
14 | -Wcast-align
15 | -D_GLIBCXX_DEBUG
16 | -D_GLIBCXX_DEBUG_PEDANTIC
17 | -fsanitize=address
18 | -fsanitize=undefined
19 | -fno-sanitize-recover=all
20 | -fstack-protector
21 | -D_FORTIFY_SOURCE=2
22 | -Wno-sign-conversion
23 | 
24 | -Isrc
25 | -Itest
26 | 


--------------------------------------------------------------------------------
/src/alphabetic_huffman_code.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <vector>
  4 | #include <array>
  5 | #include <cassert>
  6 | 
  7 | // Finds an optimal alphabetic (binary) Huffman code, i.e. one that preserves the ordering of the original weights
  8 | // Implements the Garsia-Wachs algorithm: https://en.wikipedia.org/wiki/Garsia%E2%80%93Wachs_algorithm
  9 | // Returns the code specified as a sequence of depths for each input weight
 10 | template <typename T, typename T_sum = T> std::vector<int> alphabetic_huffman_code(std::vector<T> weights) {
 11 | 	int N = int(weights.size());
 12 | 	if (N == 0) return {};
 13 | 	std::vector<std::array<int, 2>> ch; ch.reserve(N-1);
 14 | 
 15 | 	{
 16 | 		struct splay_node {
 17 | 			mutable splay_node* p = nullptr;
 18 | 			std::array<splay_node*, 2> c{nullptr, nullptr};
 19 | 			int d() const { return this == p->c[1]; }
 20 | 
 21 | 			T_sum value;
 22 | 			T_sum max_value;
 23 | 			int idx;
 24 | 
 25 | 			void update() {
 26 | 				max_value = value;
 27 | 				for (auto ch : c) {
 28 | 					if (ch && max_value < ch->max_value) max_value = ch->max_value;
 29 | 				}
 30 | 			}
 31 | 
 32 | 			void rot() {
 33 | 				assert(p);
 34 | 
 35 | 				int x = d();
 36 | 				splay_node* pa = p;
 37 | 				splay_node* ch = c[!x];
 38 | 
 39 | 				if (ch) ch->p = pa;
 40 | 				pa->c[x] = ch;
 41 | 
 42 | 				if (pa->p) pa->p->c[pa->d()] = this;
 43 | 				this->p = pa->p;
 44 | 
 45 | 				this->c[!x] = pa;
 46 | 				pa->p = this;
 47 | 
 48 | 				pa->update();
 49 | 			}
 50 | 
 51 | 			void splay_no_update(splay_node* top) {
 52 | 				while (p != top) {
 53 | 					if (p->p != top) {
 54 | 						if (p->d() == d()) p->rot();
 55 | 						else rot();
 56 | 					}
 57 | 					rot();
 58 | 				}
 59 | 			}
 60 | 		};
 61 | 		std::vector<splay_node> nodes(N+1);
 62 | 		for (int i = 0; i < N; i++) {
 63 | 			nodes[i].p = &nodes[i+1];
 64 | 			nodes[i+1].c[0] = &nodes[i];
 65 | 			nodes[i].value = T_sum(weights[i]);
 66 | 			nodes[i].idx = i;
 67 | 		}
 68 | 		nodes[0].update();
 69 | 		splay_node* cur = &nodes[1];
 70 | 
 71 | 		// We'll store our current state as the left spine of some splay tree.
 72 | 		// All vertices from cur to the root are precisely the vertices that may satisfy w[n-2] <= w[n]
 73 | 		// (all others provably satisfy w[x-2] > w[x] at all times),
 74 | 		// so cur is exactly the leftmost vertex that might satisfy w[n-2] <= w[n].
 75 | 		//
 76 | 		// We then check this condition, and if it does have w[n-2] <= w[n],
 77 | 		// we merge w[n-2] and w[n-1] and reinsert somewhere according to Garsia-Wachs,
 78 | 		// i.e. right after the last element of w[0:n-1] greater than or equal to it.
 79 | 		// Then, the newly inserted node is added to the candidate chain
 80 | 		// (exercise: prove that all other positions still satisfy w[x-2] > w[x]).
 81 | 
 82 | 		while (cur) {
 83 | 			// Note: cur is not necessarily updated
 84 | 
 85 | 			// First, grab the 2nd child of the left side of cur
 86 | 			splay_node* a = cur->c[0];
 87 | 			assert(a);
 88 | 			while (a->c[1]) a = a->c[1];
 89 | 			if (a->c[0]) {
 90 | 				a = a->c[0];
 91 | 				while (a->c[1]) a = a->c[1];
 92 | 			} else {
 93 | 				a = a->p;
 94 | 			}
 95 | 			if (a == cur) {
 96 | 				// size one, so we're done
 97 | 				cur->update();
 98 | 				cur = cur->p;
 99 | 				continue;
100 | 			}
101 | 			a->splay_no_update(cur);
102 | 			assert(a == cur->c[0]);
103 | 			assert(a->c[1] && !a->c[1]->c[0] && !a->c[1]->c[1]);
104 | 			if (cur->p && cur->value < a->value) {
105 | 				// no merging, so we're done
106 | 				a->update();
107 | 				cur->update();
108 | 				cur = cur->p;
109 | 				continue;
110 | 			}
111 | 
112 | 			// Otherwise, merge a and a->c[1]
113 | 			{
114 | 				int n_idx = N + int(ch.size());
115 | 				ch.push_back({a->idx, a->c[1]->idx});
116 | 				a->idx = n_idx;
117 | 			}
118 | 			a->value += a->c[1]->value;
119 | 			a->c[1]->p = nullptr;
120 | 			a->c[1] = nullptr;
121 | 
122 | 			// Now, insert a right after the first guy b which is b.v >= a.v
123 | 			if (!a->c[0] || a->c[0]->max_value < a->value) {
124 | 				a->c[1] = a->c[0];
125 | 				a->c[0] = nullptr;
126 | 				a->update();
127 | 				// Don't recurse on a, since it has no left child
128 | 				continue;
129 | 			}
130 | 
131 | 			splay_node* b = a->c[0];
132 | 			while (true) {
133 | 				assert(b);
134 | 				assert(!(b->max_value < a->value));
135 | 				if (!b->c[1] || b->c[1]->max_value < a->value) {
136 | 					if (b->value < a->value) {
137 | 						assert(b->c[0]);
138 | 						b = b->c[0];
139 | 					} else {
140 | 						break;
141 | 					}
142 | 				} else {
143 | 					b = b->c[1];
144 | 				}
145 | 			}
146 | 			b->splay_no_update(a);
147 | 			assert(b == a->c[0]);
148 | 			if (b->c[1]) b->c[1]->p = a;
149 | 			a->c[1] = b->c[1];
150 | 			b->c[1] = nullptr;
151 | 			b->update();
152 | 			cur = a;
153 | 			continue;
154 | 		}
155 | 	}
156 | 
157 | 	// Reconstruct depths
158 | 	assert(int(ch.size()) == N-1);
159 | 	std::vector<int> res(2*N-1, -1);
160 | 	res[2*N-2] = 0;
161 | 	for (int i = 2*N-2; i >= N; i--) {
162 | 		assert(res[i] != -1);
163 | 		res[ch[i-N][0]] = res[i] + 1;
164 | 		res[ch[i-N][1]] = res[i] + 1;
165 | 	}
166 | 	res.resize(N);
167 | 	return res;
168 | }
169 | 
170 | // Returns the lca array of length N - 1, suitable for building a Cartesian tree
171 | inline std::vector<int> binary_code_depths_to_lca_depths(std::vector<int> depths) {
172 | 	int N = int(depths.size());
173 | 	if (N == 0) return {};
174 | 	std::vector<int> res; res.reserve(N-1);
175 | 	std::vector<int> stk; stk.reserve(N);
176 | 	for (int v : depths) {
177 | 		while (!stk.empty() && stk.back() == v) {
178 | 			stk.pop_back();
179 | 			v--;
180 | 		}
181 | 		assert(stk.empty() || stk.back() < v);
182 | 		if (v != 0) res.push_back(v-1);
183 | 		stk.push_back(v);
184 | 	}
185 | 	assert(int(stk.size()) == 1 && stk.back() == 0);
186 | 	return res;
187 | }
188 | 


--------------------------------------------------------------------------------
/src/alphabetic_huffman_code.test.cpp:
--------------------------------------------------------------------------------
 1 | #include "alphabetic_huffman_code.hpp"
 2 | 
 3 | #include <catch2/catch_test_macros.hpp>
 4 | #include <catch2/catch_get_random_seed.hpp>
 5 | #include <cassert>
 6 | #include <random>
 7 | 
 8 | template <typename T> T alphabetic_huffman_code_naive(std::vector<T> weights) {
 9 | 	int N = int(weights.size());
10 | 	if (N == 0) return 0;
11 | 	assert(N > 0);
12 | 	std::vector<T> dp(N * N);
13 | 	for (int i = 0; i < N; i++) {
14 | 		dp[i * N + i] = 0;
15 | 		T pref = weights[i];
16 | 		for (int j = i-1; j >= 0; j--) {
17 | 			T v = dp[i * N + i] + dp[(i-1) * N + j];
18 | 			for (int k = i-1; k >= j+1; k--) {
19 | 				v = std::min(v, dp[i * N + k] + dp[(k-1) * N + j]);
20 | 			}
21 | 			pref += weights[j];
22 | 			v += pref;
23 | 			dp[i * N + j] = v;
24 | 		}
25 | 	}
26 | 	return dp[(N-1) * N + 0];
27 | }
28 | 
29 | TEST_CASE("Alphabetic Huffman Code", "[alphabetic_huffman_code]") {
30 | 	std::mt19937 mt(Catch::getSeed());
31 | 	for (int z = 0; z <= 1000; z++) {
32 | 		int N = std::uniform_int_distribution(1, 60)(mt);
33 | 		int MX = 1 << std::uniform_int_distribution(0, 15)(mt);
34 | 		std::vector<int> weights(N);
35 | 		for (auto& w : weights) w = std::uniform_int_distribution(0, MX-1)(mt);
36 | 		auto naive_tot = alphabetic_huffman_code_naive(weights);
37 | 
38 | 		auto code_depths = alphabetic_huffman_code(weights);
39 | 		auto code_lcp = binary_code_depths_to_lca_depths(code_depths);
40 | 		int tot = 0;
41 | 		for (int i = 0; i < N; i++) {
42 | 			tot += weights[i] * code_depths[i];
43 | 		}
44 | 		REQUIRE(naive_tot == tot);
45 | 	}
46 | }
47 | 


--------------------------------------------------------------------------------
/src/bit.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <vector>
  4 | #include <cassert>
  5 | 
  6 | /** Binary-indexed tree
  7 |  *
  8 |  *  A binary indexed tree with N nodes of type T provides the
  9 |  *  following two functions for 0 <= i <= N:
 10 |  *
 11 |  *      prefix(int i) -> prefix_iterator<T>
 12 |  *      suffix(int i) -> suffix_iterator<T>
 13 |  *
 14 |  *  such that size(suffix(i) intersect prefix(j)) = (1 if i < j else 0).
 15 |  *  Furthermore, the resulting lists always have size at most log_2(N).
 16 |  *
 17 |  *  This can be used to implement either point-update/(prefix|suffix)-query or
 18 |  *  (prefix|suffix)-update/point-query over a virtual array of size N of a
 19 |  *  commutative monoid. This can be generalized to implement
 20 |  *  point-update/range-query or range-update/point-query over a virtual array
 21 |  *  of size N of a commutative group.
 22 |  *
 23 |  *  With 0-indexed data, prefixes are more natural:
 24 |  *   * For range update/query, use for_prefix for the ranges and for_suffix for the points.
 25 |  *   * For prefix update/query, no change.
 26 |  *   * For suffix update/query, use for_prefix(point + 1); 1-index the data.
 27 |  */
 28 | template <typename T> class binary_indexed_tree {
 29 | private:
 30 | 	std::vector<T> dat;
 31 | public:
 32 | 	binary_indexed_tree() {}
 33 | 	explicit binary_indexed_tree(size_t N) : dat(N) {}
 34 | 	binary_indexed_tree(size_t N, const T& t) : dat(N, t) {}
 35 | 
 36 | 	size_t size() const { return dat.size(); }
 37 | 	const std::vector<T>& data() const { return dat; }
 38 | 	std::vector<T>& data() { return dat; }
 39 | 
 40 | private:
 41 | 	template <typename I, typename S = I> struct iterator_range {
 42 | 	private:
 43 | 		I begin_;
 44 | 		S end_;
 45 | 	public:
 46 | 		iterator_range() : begin_(), end_() {}
 47 | 		iterator_range(const I& begin__, const S& end__) : begin_(begin__), end_(end__) {}
 48 | 		iterator_range(I&& begin__, S&& end__) : begin_(begin__), end_(end__) {}
 49 | 		I begin() const { return begin_; }
 50 | 		S end() const { return end_; }
 51 | 	};
 52 | 
 53 | public:
 54 | 	class const_suffix_iterator {
 55 | 	private:
 56 | 		const T* dat;
 57 | 		int a;
 58 | 		const_suffix_iterator(const T* dat_, int a_) : dat(dat_), a(a_) {}
 59 | 		friend class binary_indexed_tree;
 60 | 	public:
 61 | 		friend bool operator != (const const_suffix_iterator& i, const const_suffix_iterator& j) {
 62 | 			assert(j.dat == nullptr);
 63 | 			return i.a < j.a;
 64 | 		}
 65 | 		const_suffix_iterator& operator ++ () {
 66 | 			a |= a+1;
 67 | 			return *this;
 68 | 		}
 69 | 		const T& operator * () const {
 70 | 			return dat[a];
 71 | 		}
 72 | 	};
 73 | 	using const_suffix_range = iterator_range<const_suffix_iterator>;
 74 | 	const_suffix_range suffix(int a) const {
 75 | 		assert(0 <= a && a <= int(dat.size()));
 76 | 		return const_suffix_range{const_suffix_iterator{dat.data(), a}, const_suffix_iterator{nullptr, int(dat.size())}};
 77 | 	}
 78 | 
 79 | 	class suffix_iterator {
 80 | 	private:
 81 | 		T* dat;
 82 | 		int a;
 83 | 		suffix_iterator(T* dat_, int a_) : dat(dat_), a(a_) {}
 84 | 		friend class binary_indexed_tree;
 85 | 	public:
 86 | 		friend bool operator != (const suffix_iterator& i, const suffix_iterator& j) {
 87 | 			assert(j.dat == nullptr);
 88 | 			return i.a < j.a;
 89 | 		}
 90 | 		suffix_iterator& operator ++ () {
 91 | 			a |= a+1;
 92 | 			return *this;
 93 | 		}
 94 | 		T& operator * () const {
 95 | 			return dat[a];
 96 | 		}
 97 | 	};
 98 | 	using suffix_range = iterator_range<suffix_iterator>;
 99 | 	suffix_range suffix(int a) {
100 | 		assert(0 <= a && a <= int(dat.size()));
101 | 		return suffix_range{suffix_iterator{dat.data(), a}, suffix_iterator{nullptr, int(dat.size())}};
102 | 	}
103 | 
104 | 	class const_prefix_iterator {
105 | 	private:
106 | 		const T* dat;
107 | 		int a;
108 | 		const_prefix_iterator(const T* dat_, int a_) : dat(dat_), a(a_) {}
109 | 		friend class binary_indexed_tree;
110 | 	public:
111 | 		friend bool operator != (const const_prefix_iterator& i, const const_prefix_iterator& j) {
112 | 			assert(j.dat == nullptr);
113 | 			return i.a > 0;
114 | 		}
115 | 		const_prefix_iterator& operator ++ () {
116 | 			a &= a-1;
117 | 			return *this;
118 | 		}
119 | 		const T& operator * () const {
120 | 			return dat[a-1];
121 | 		}
122 | 	};
123 | 	using const_prefix_range = iterator_range<const_prefix_iterator>;
124 | 	const_prefix_range prefix(int a) const {
125 | 		return const_prefix_range{const_prefix_iterator{dat.data(), a}, const_prefix_iterator{nullptr, 0}};
126 | 	}
127 | 
128 | 	class prefix_iterator {
129 | 	private:
130 | 		T* dat;
131 | 		int a;
132 | 		prefix_iterator(T* dat_, int a_) : dat(dat_), a(a_) {}
133 | 		friend class binary_indexed_tree;
134 | 	public:
135 | 		friend bool operator != (const prefix_iterator& i, const prefix_iterator& j) {
136 | 			assert(j.dat == nullptr);
137 | 			return i.a > 0;
138 | 		}
139 | 		prefix_iterator& operator ++ () {
140 | 			a &= a-1;
141 | 			return *this;
142 | 		}
143 | 		T& operator * () const {
144 | 			return dat[a-1];
145 | 		}
146 | 	};
147 | 	using prefix_range = iterator_range<prefix_iterator>;
148 | 	prefix_range prefix(int a) {
149 | 		return prefix_range{prefix_iterator{dat.data(), a}, prefix_iterator{nullptr, 0}};
150 | 	}
151 | };
152 | 


--------------------------------------------------------------------------------
/src/bit_cast.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <type_traits>
 4 | #include <cstring>
 5 | 
 6 | // Copied from https://en.cppreference.com/w/cpp/numeric/bit_cast
 7 | 
 8 | template <class To, class From>
 9 | typename std::enable_if_t<
10 | 	sizeof(To) == sizeof(From) &&
11 | 	std::is_trivially_copyable_v<From> &&
12 | 	std::is_trivially_copyable_v<To>,
13 | 	To>
14 | // constexpr support needs compiler magic
15 | bit_cast(const From& src) noexcept
16 | {
17 | 	static_assert(std::is_trivially_constructible_v<To>,
18 | 		"This implementation additionally requires destination type to be trivially constructible");
19 | 
20 | 	To dst;
21 | 	std::memcpy(&dst, &src, sizeof(To));
22 | 	return dst;
23 | }
24 | 


--------------------------------------------------------------------------------
/src/bm.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include<bits/stdc++.h>
 3 | 
 4 | template <typename num>
 5 | std::vector<num> BerlekampMassey(const std::vector<num>& s) {
 6 | 	int n = int(s.size()), L = 0, m = 0;
 7 | 	std::vector<num> C(n), B(n), T;
 8 | 	C[0] = B[0] = 1;
 9 | 
10 | 	num b = 1;
11 | 	for(int i = 0; i < n; i++) { ++m;
12 | 		num d = s[i];
13 | 		for (int j = 1; j <= L; j++) d += C[j] * s[i - j];
14 | 		if (d == 0) continue;
15 | 		T = C; num coef = d / b;
16 | 		for (int j = m; j < n; j++) C[j] -= coef * B[j - m];
17 | 		if (2 * L > i) continue;
18 | 		L = i + 1 - L; B = T; b = d; m = 0;
19 | 	}
20 | 
21 | 	C.resize(L + 1); C.erase(C.begin());
22 | 	for (auto& x : C) {
23 | 		x = -x;
24 | 	}
25 | 	return C;
26 | }
27 | 
28 | template <typename num>
29 | num linearRec(const std::vector<num>& S, const std::vector<num>& tr, int64_t k) {
30 | 	int n = int(tr.size());
31 | 	assert(S.size() >= tr.size());
32 | 
33 | 	auto combine = [&](std::vector<num> a, std::vector<num> b, bool e = false) {
34 | 		// multiply a * b * x^e
35 | 		std::vector<num> res(int(a.size()) + int(b.size()));
36 | 		for (int i = 0; i < int(a.size()); i++) {
37 | 			for (int j = 0; j < int(b.size()); j++) {
38 | 				res[i + j + e] += a[i] * b[j];
39 | 			}
40 | 		}
41 | 		for (int i = int(res.size())-1; i >= n; --i) {
42 | 			for (int j = 0; j < n; j++) {
43 | 				res[i - 1 - j] += res[i] * tr[j];
44 | 			}
45 | 		}
46 | 		res.resize(n);
47 | 		return res;
48 | 	};
49 | 
50 | 	std::vector<num> pol(n);
51 | 	if (n > 0) pol[0] = num(1);
52 | 
53 | 	assert(k >= 0);
54 | 	for (int i = 64 - 1 - (k == 0 ? 64 : __builtin_clzll(k)); i >= 0; i--) {
55 | 		pol = combine(pol, pol, (k >> i) & 1);
56 | 	}
57 | 
58 | 	num res = 0;
59 | 	for (int i = 0; i < n; i++) res += pol[i] * S[i];
60 | 	return res;
61 | }
62 | 


--------------------------------------------------------------------------------
/src/bm.test.cpp:
--------------------------------------------------------------------------------
 1 | #include <catch2/catch_test_macros.hpp>
 2 | 
 3 | #include "bm.hpp"
 4 | #include "modnum.hpp"
 5 | 
 6 | using namespace std;
 7 | 
 8 | TEST_CASE("Berlekamp Massey", "[bm]") {
 9 | 	using num = modnum<int(1e9)+7>;
10 | 	vector<num> S({0, 1, 1, 2, 3, 5, 8, 13});
11 | 	vector<num> tr = BerlekampMassey(S);
12 | 	REQUIRE(tr == vector<num>({num(1), num(1)}));
13 | 	num res = linearRec(S, tr, 1000);
14 | 	REQUIRE(res == num(517691607));
15 | }
16 | 


--------------------------------------------------------------------------------
/src/cartesian_tree.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | #include <array>
 5 | 
 6 | #include "reverse_comparator.hpp"
 7 | 
 8 | class CartesianTree {
 9 | public:
10 | 	struct Node {
11 | 		int l, m, r; // inclusive ranges
12 | 		std::array<int, 2> c;
13 | 	};
14 | 	std::vector<Node> nodes;
15 | 	int root = -1;
16 | 
17 | 	CartesianTree() {}
18 | 
19 | 	Node& operator [] (int idx) { return nodes[idx]; }
20 | 	const Node& operator [] (int idx) const { return nodes[idx]; }
21 | 
22 | 	int size() const { return int(nodes.size()); }
23 | 
24 | private:
25 | 	CartesianTree(std::vector<Node>&& nodes_, int root_) : nodes(std::move(nodes_)), root(root_) {}
26 | 
27 | public:
28 | 
29 | 	// min-cartesian-tree, with earlier cells tiebroken earlier
30 | 	template <typename T, typename Comp = std::less<T>>
31 | 	static CartesianTree build_min_tree(const std::vector<T>& v, Comp comp = Comp()) {
32 | 		std::vector<Node> nodes(v.size()*2+1);
33 | 		std::vector<int> stk; stk.reserve(v.size());
34 | 		int root = -1;
35 | 		for (int i = 0; i <= int(v.size()); i++) {
36 | 			int cur = 2*i;
37 | 			nodes[cur].l = i;
38 | 			nodes[cur].r = i-1;
39 | 			nodes[cur].m = i-1;
40 | 			nodes[cur].c = {-1, -1};
41 | 			while (!stk.empty() && (i == int(v.size()) || comp(v[i], v[nodes[stk.back()].m]))) {
42 | 				int nxt = stk.back(); stk.pop_back();
43 | 				nodes[nxt].c[1] = cur;
44 | 				nodes[nxt].r = nodes[cur].r;
45 | 				cur = nxt;
46 | 			}
47 | 			if (i == int(v.size())) {
48 | 				root = cur;
49 | 				break;
50 | 			}
51 | 			nodes[2*i+1].l = nodes[cur].l;
52 | 			nodes[2*i+1].m = i;
53 | 			nodes[2*i+1].c[0] = cur;
54 | 			stk.push_back(2*i+1);
55 | 		}
56 | 		return {std::move(nodes), root};
57 | 	}
58 | 
59 | 	// max-cartesian-tree, with earlier cells tiebroken earlier
60 | 	template <typename T, typename Comp = std::less<T>>
61 | 	static CartesianTree build_max_tree(const std::vector<T>& v, Comp comp = Comp()) {
62 | 		return build_min_tree(v, reverse_comparator(comp));
63 | 	}
64 | };
65 | 


--------------------------------------------------------------------------------
/src/cartesian_tree.test.cpp:
--------------------------------------------------------------------------------
 1 | #include "cartesian_tree.hpp"
 2 | 
 3 | #include <catch2/catch_test_macros.hpp>
 4 | #include <catch2/catch_get_random_seed.hpp>
 5 | #include <bits/stdc++.h>
 6 | 
 7 | TEST_CASE("Cartesian Tree", "[cartesian_tree]") {
 8 | 	std::mt19937 mt(Catch::getSeed());
 9 | 	for (int sz : {0, 1, 2, 3, 5, 8, 13}) {
10 | 		std::vector<int> v(sz);
11 | 		iota(v.begin(), v.end(), 0);
12 | 		shuffle(v.begin(), v.end(), mt);
13 | 		{
14 | 			CartesianTree t = CartesianTree::build_min_tree(v);
15 | 			for (int i = 1; i < int(t.size()); i += 2) {
16 | 				REQUIRE(t[i].m == i/2);
17 | 				REQUIRE(t[i].l <= t[i].m);
18 | 				REQUIRE(t[i].m <= t[i].r);
19 | 
20 | 				REQUIRE(t[t[i].c[0]].l == t[i].l);
21 | 				REQUIRE(t[t[i].c[0]].r == t[i].m-1);
22 | 
23 | 				REQUIRE(t[t[i].c[1]].l == t[i].m+1);
24 | 				REQUIRE(t[t[i].c[1]].r == t[i].r);
25 | 
26 | 				REQUIRE((t[t[i].c[0]].l > t[t[i].c[0]].r || v[t[i].m] < v[t[t[i].c[0]].m]));
27 | 				REQUIRE((t[t[i].c[1]].l > t[t[i].c[1]].r || v[t[i].m] < v[t[t[i].c[1]].m]));
28 | 			}
29 | 		}
30 | 		{
31 | 			CartesianTree t = CartesianTree::build_max_tree(v);
32 | 			for (int i = 1; i < int(t.size()); i += 2) {
33 | 				REQUIRE(t[i].m == i/2);
34 | 				REQUIRE(t[i].l <= t[i].m);
35 | 				REQUIRE(t[i].m <= t[i].r);
36 | 
37 | 				REQUIRE(t[t[i].c[0]].l == t[i].l);
38 | 				REQUIRE(t[t[i].c[0]].r == t[i].m-1);
39 | 
40 | 				REQUIRE(t[t[i].c[1]].l == t[i].m+1);
41 | 				REQUIRE(t[t[i].c[1]].r == t[i].r);
42 | 
43 | 				REQUIRE((t[t[i].c[0]].l > t[t[i].c[0]].r || v[t[i].m] > v[t[t[i].c[0]].m]));
44 | 				REQUIRE((t[t[i].c[1]].l > t[t[i].c[1]].r || v[t[i].m] > v[t[t[i].c[1]].m]));
45 | 			}
46 | 		}
47 | 	}
48 | }
49 | 


--------------------------------------------------------------------------------
/src/char_poly.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <vector>
  4 | #include <bitset>
  5 | #include <cassert>
  6 | 
  7 | // Compute the characteristic polynomial of a square matrix A over some field.
  8 | // Not numerically stable at all.
  9 | // Takes argument by value, use std::move if you can.
 10 | template <typename num> std::vector<num> charPoly(std::vector<std::vector<num>> A) {
 11 | 	int N = int(A.size());
 12 | 	std::vector<num> res; res.reserve(N+1);
 13 | 	res.push_back(num(1));
 14 | 	for (int i = 0, deg = 0; i < N; i++) {
 15 | 		auto& Ai = A[i];
 16 | 
 17 | 		int c = i+1;
 18 | 		while (c < N && Ai[c] == num(0)) c++;
 19 | 		if (c == N) {
 20 | 			res.resize(i+2, num(0));
 21 | 			for (int x = deg; x >= 0; x--) {
 22 | 				num v = res[x];
 23 | 				for (int y = x+1, z = i; z >= deg; z--, y++) {
 24 | 					res[y] -= v * Ai[z];
 25 | 				}
 26 | 			}
 27 | 			deg = i+1;
 28 | 			continue;
 29 | 		}
 30 | 
 31 | 		num vc = Ai[c];
 32 | 		num ivc = inv(vc);
 33 | 
 34 | 		Ai[c] = Ai[i+1];
 35 | 		Ai[i+1] = 0;
 36 | 
 37 | 		std::swap(A[i+1], A[c]);
 38 | 		auto& Ai1 = A[i+1];
 39 | 		for (int k = deg; k < N; k++) {
 40 | 			Ai1[k] *= vc;
 41 | 		}
 42 | 
 43 | 		for (int k = i+1; k < N; k++) {
 44 | 			auto& Ak = A[k];
 45 | 			{
 46 | 				auto& x = Ak[i+1];
 47 | 				auto& y = Ak[c];
 48 | 				num tmp = y;
 49 | 				y = x;
 50 | 				x = tmp * ivc;
 51 | 			}
 52 | 			{
 53 | 				num v = Ak[i+1];
 54 | 				for (int j = deg; j < N; j++) {
 55 | 					Ak[j] -= v * Ai[j];
 56 | 				}
 57 | 			}
 58 | 			if (k > i+1) {
 59 | 				num v = Ai[k];
 60 | 				for (int j = deg; j < N; j++) {
 61 | 					Ai1[j] += v * Ak[j];
 62 | 				}
 63 | 			}
 64 | 		}
 65 | 
 66 | 		for (int k = deg; k <= i; k++) {
 67 | 			Ai1[k+1] += Ai[k];
 68 | 		}
 69 | 	}
 70 | 	reverse(res.begin(), res.end());
 71 | 	return res;
 72 | }
 73 | 
 74 | // Compute the characteristic polynomial of a square matrix A over F2.
 75 | // Takes argument by value, use std::move if you can.
 76 | // Note that MAXS must be at least N+1
 77 | template <std::size_t MAXS> std::bitset<MAXS> charPoly(std::vector<std::bitset<MAXS>> A) {
 78 | 	using bs = std::bitset<MAXS>;
 79 | 	int N = int(A.size());
 80 | 	assert(MAXS >= N+1);
 81 | 	bs ans; ans[0] = 1;
 82 | 	int deg = 0;
 83 | 	for (int i = 0; i < N; i++) {
 84 | 		{
 85 | 			int j = int(A[i]._Find_next(i));
 86 | 			if (j >= N) {
 87 | 				bs nans;
 88 | 				for (; deg <= i; ans <<= 1, deg++) {
 89 | 					if (A[i][deg]) nans ^= ans;
 90 | 				}
 91 | 				ans ^= nans;
 92 | 				continue;
 93 | 			}
 94 | 			if (j != i+1) {
 95 | 				swap(A[j], A[i+1]);
 96 | 				for (auto& a : A) {
 97 | 					bool tmp = a[j];
 98 | 					a[j] = a[i+1];
 99 | 					a[i+1] = tmp;
100 | 				}
101 | 			}
102 | 		}
103 | 		assert(A[i][i+1]);
104 | 		bs msk = A[i]; msk.flip(i+1);
105 | 		for (int k = 0; k < N; k++) {
106 | 			if (msk[k]) A[i+1] ^= A[k];
107 | 		}
108 | 		for (auto& a : A) {
109 | 			if (a[i+1]) a ^= msk;
110 | 		}
111 | 	}
112 | 	return ans;
113 | }
114 | 


--------------------------------------------------------------------------------
/src/cnt_min.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "reverse_comparator.hpp"
 4 | 
 5 | template <typename T, typename C = int, typename Comp = std::less<T>> struct cnt_min {
 6 | 	T v;
 7 | 	C cnt;
 8 | 
 9 | 	cnt_min() : v(), cnt(0) {}
10 | 	explicit cnt_min(T v_) : v(v_), cnt(1) {}
11 | 	cnt_min(T v_, C cnt_) : v(v_), cnt(cnt_) {}
12 | 
13 | 	friend cnt_min operator + (const cnt_min& a, const cnt_min& b) {
14 | 		if (!b.cnt) return a;
15 | 		else if (!a.cnt) return b;
16 | 		else if (Comp().operator()(a.v, b.v)) return a;
17 | 		else if (Comp().operator()(b.v, a.v)) return b;
18 | 		else return cnt_min(a.v, a.cnt + b.cnt);
19 | 	}
20 | 
21 | 	cnt_min& operator += (const cnt_min& o) {
22 | 		return *this = (*this + o);
23 | 	}
24 | };
25 | 
26 | template <typename T, typename C = int, typename Comp = std::less<T>> using cnt_max = cnt_min<T, C, reverse_comparator_t<Comp>>;
27 | 


--------------------------------------------------------------------------------
/src/dirichlet_series.test.cpp:
--------------------------------------------------------------------------------
  1 | #include "dirichlet_series.hpp"
  2 | 
  3 | #include <catch2/catch_template_test_macros.hpp>
  4 | #include <catch2/catch_get_random_seed.hpp>
  5 | #include <bits/stdc++.h>
  6 | 
  7 | #include "modnum.hpp"
  8 | 
  9 | namespace dirichlet_series {
 10 | 
 11 | namespace test {
 12 | 
 13 | div_vector_layout layout;
 14 | template <typename T> using dv_values = dirichlet_series_values<layout, T>;
 15 | template <typename T> using dv_prefix = dirichlet_series_prefix<layout, T>;
 16 | template <typename T> using dv_bit = dirichlet_series_binary_indexed_tree<layout, T>;
 17 | 
 18 | template <typename T>
 19 | dv_values<T> multiply_slow(const dv_values<T>& a, const dv_values<T>& b) {
 20 | 	dv_values<T> r;
 21 | 	for (int i = 1; i < layout.len; i++) {
 22 | 		for (int j = 1; j < layout.len; j++) {
 23 | 			int k = layout.get_value_bucket(layout.get_bucket_bound(i) * layout.get_bucket_bound(j));
 24 | 			if (k < layout.len) r.st[k] += a.st[i] * b.st[j];
 25 | 		}
 26 | 	}
 27 | 	return r;
 28 | }
 29 | 
 30 | TEMPLATE_TEST_CASE("Dirichlet series multiplication and inverse", "[dirichlet]", modnum<int(1e9)+7>, int64_t) {
 31 | 	using num = TestType;
 32 | 	for (int N = 1; N <= 30; N++) {
 33 | 		INFO("N = " << N);
 34 | 		std::mt19937 mt(Catch::getSeed());
 35 | 		layout = div_vector_layout(N);
 36 | 		dv_prefix<num> a([&](int64_t x) { return num(x); });
 37 | 		dv_prefix<num> b([&](int64_t x) { return num(x) * num(x+1) / num(2); });
 38 | 		dv_prefix<num> slow_res(multiply_slow(dv_values<num>(a), dv_values<num>(b)));
 39 | 		dv_prefix<num> fast_res = a * b;
 40 | 		for (int i = 1; i < layout.len; i++) {
 41 | 			INFO("i = " << i);
 42 | 			REQUIRE(slow_res.st[i] == fast_res.st[i]);
 43 | 		}
 44 | 		dv_prefix<num> a_2 = fast_res / b;
 45 | 		for (int i = 1; i < layout.len; i++) {
 46 | 			INFO("i = " << i);
 47 | 			REQUIRE(a.st[i] == a_2.st[i]);
 48 | 		}
 49 | 		dv_prefix<num> b_2 = fast_res / a;
 50 | 		for (int i = 1; i < layout.len; i++) {
 51 | 			INFO("i = " << i);
 52 | 			REQUIRE(b.st[i] == b_2.st[i]);
 53 | 		}
 54 | 		if constexpr (!std::is_same_v<num, int64_t>) {
 55 | 			dv_prefix<num> rt_a = sqrt(a);
 56 | 			dv_prefix<num> a_3 = rt_a * rt_a;
 57 | 			for (int i = 1; i < layout.len; i++) {
 58 | 				INFO("i = " << i);
 59 | 				REQUIRE(a.st[i] == a_3.st[i]);
 60 | 			}
 61 | 			dv_prefix<num> rt_b = sqrt(b);
 62 | 			dv_prefix<num> b_3 = rt_b * rt_b;
 63 | 			for (int i = 1; i < layout.len; i++) {
 64 | 				INFO("i = " << i);
 65 | 				REQUIRE(b.st[i] == b_3.st[i]);
 66 | 			}
 67 | 		}
 68 | 	}
 69 | }
 70 | 
 71 | TEMPLATE_TEST_CASE("Dirichlet series BIT sparse multiplication", "[dirichlet]", modnum<int(1e9)+7>) {
 72 | 	using num = TestType;
 73 | 	for (int N : {1, 2, 3, 4, 5, 24, 25, 26, 99, 100, 101, 1000}) {
 74 | 		INFO("N = " << N);
 75 | 		layout = div_vector_layout(N);
 76 | 		for (int m = 2; m <= N+1; m++) {
 77 | 			INFO("m = " << m);
 78 | 			num w = -5;
 79 | 			dv_prefix<num> a([&](int64_t x) -> num { return num(x * (x+1) / 2); });
 80 | 			dv_prefix<num> b([&](int64_t x) -> num { return 1 + (x >= m ? w : 0); });
 81 | 			{
 82 | 				dv_prefix<num> c_slow = a * b;
 83 | 				dv_bit<num> bit(a);
 84 | 				bit.sparse_mul_at_most_one(m, w);
 85 | 				dv_prefix<num> c(bit);
 86 | 				for (int i = 1; i < layout.len; i++) {
 87 | 					INFO("i = " << i);
 88 | 					REQUIRE(c_slow.st[i] == c.st[i]);
 89 | 				}
 90 | 			}
 91 | 			{
 92 | 				dv_prefix<num> d_slow = a / b;
 93 | 				dv_bit<num> bit(a);
 94 | 				bit.sparse_div_at_most_one(m, w);
 95 | 				dv_prefix<num> d(bit);
 96 | 				for (int i = 1; i < layout.len; i++) {
 97 | 					INFO("i = " << i);
 98 | 					REQUIRE(d_slow.st[i] == d.st[i]);
 99 | 				}
100 | 			}
101 | 		}
102 | 	}
103 | }
104 | 
105 | TEMPLATE_TEST_CASE("Dirichlet series euler transform", "[dirichlet]", modnum<int(1e9)+7>) {
106 | 	using num = TestType;
107 | 	for (int N : {1, 2, 3, 4, 5, 24, 25, 26, 99, 100, 101, 1000}) {
108 | 		INFO("N = " << N);
109 | 		layout = div_vector_layout(N);
110 | 		dv_prefix<num> a([&](int64_t x) { return num(x) * num(x+1) / num(2); });
111 | 		dv_prefix<num> primes = inverse_euler_transform_fraction(a);
112 | 		dv_prefix<num> primes2 = inverse_euler_transform_binary_indexed_tree(a);
113 | 		dv_values<num> primes_slow_v;
114 | 		for (int v = 2; v <= N; v++) {
115 | 			bool is_prime = true;
116 | 			for (int p = 2; p * p <= v; p++) {
117 | 				if (v % p == 0) {
118 | 					is_prime = false;
119 | 					break;
120 | 				}
121 | 			}
122 | 			primes_slow_v[v] += is_prime ? num(v) : 0;
123 | 		}
124 | 
125 | 		dv_prefix<num> primes_slow(primes_slow_v);
126 | 		for (int i = 1; i < layout.len; i++) {
127 | 			INFO("i = " << i);
128 | 			INFO("bound = " << layout.get_bucket_bound(i));
129 | 			REQUIRE(primes_slow.st[i] == primes.st[i]);
130 | 			REQUIRE(primes_slow.st[i] == primes2.st[i]);
131 | 		}
132 | 
133 | 		dv_prefix<num> b = euler_transform_fraction(primes_slow);
134 | 		dv_prefix<num> b2 = euler_transform_binary_indexed_tree(primes_slow);
135 | 		for (int i = 1; i < layout.len; i++) {
136 | 			INFO("i = " << i);
137 | 			INFO("bound = " << layout.get_bucket_bound(i));
138 | 			REQUIRE(a.st[i] == b.st[i]);
139 | 			REQUIRE(a.st[i] == b2.st[i]);
140 | 		}
141 | 	}
142 | }
143 | 
144 | }
145 | 
146 | }
147 | 


--------------------------------------------------------------------------------
/src/fft.test.cpp:
--------------------------------------------------------------------------------
 1 | #include <bits/stdc++.h>
 2 | #include <catch2/catch_test_macros.hpp>
 3 | 
 4 | #include "fft.hpp"
 5 | 
 6 | namespace ecnerwala {
 7 | namespace fft {
 8 | 
 9 | using namespace std;
10 | 
11 | template <typename T> vector<T> multiply_slow(const vector<T>& a, const vector<T>& b) {
12 | 	if (a.empty() || b.empty()) return {};
13 | 	vector<T> res(a.size() + b.size() - 1);
14 | 	for (int i = 0; i < int(a.size()); i++) {
15 | 		for (int j = 0; j < int(b.size()); j++) {
16 | 			res[i+j] += a[i] * b[j];
17 | 		}
18 | 	}
19 | 	return res;
20 | }
21 | 
22 | TEST_CASE("FFT Multiply Mod", "[fft]") {
23 | 	using num = modnum<int(1e9)+7>;
24 | 	mt19937 mt(48);
25 | 	vector<num> a(100);
26 | 	vector<num> b(168);
27 | 	for (num& x : a) { x = num(mt()); }
28 | 	for (num& x : b) { x = num(mt()); }
29 | 	REQUIRE(multiply<fft_mod_multiplier<num>>(a,b) == multiply_slow(a, b));
30 | }
31 | 
32 | TEST_CASE("FFT Inverse", "[fft]") {
33 | 	using num = modnum<998244353>;
34 | 	mt19937 mt(48);
35 | 	vector<num> a(298);
36 | 	for (num& x : a) { x = num(mt()); }
37 | 	auto i = inverse<multiply_inverser<fft_multiplier<num>, num>>(a);
38 | 	auto r = multiply<fft_multiplier<num>>(a, i);
39 | 	REQUIRE(r == multiply_slow(a, i));
40 | 
41 | 	r.resize(a.size());
42 | 	vector<num> tgt(a.size());
43 | 	tgt[0] = 1;
44 | 	REQUIRE(r == tgt);
45 | }
46 | 
47 | }} // namespace ecnerwala::fft
48 | 


--------------------------------------------------------------------------------
/src/fraction.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <iostream>
 4 | #include <numeric>
 5 | 
 6 | template <typename T, typename MulT=T> struct fraction_t {
 7 | 	T numer = 0, denom = 1;
 8 | 
 9 | 	fraction_t() : numer(0), denom(1) {}
10 | 	fraction_t(T v) : numer(v), denom(1) {}
11 | 	fraction_t(T n, T d) : numer(n), denom(d) {
12 | 		if (denom < 0 || (denom == 0 && numer < 0)) {
13 | 			numer = -numer;
14 | 			denom = -denom;
15 | 		}
16 | 	}
17 | 	template <typename U, typename V> explicit fraction_t(const fraction_t<U, V> o) : numer(T(o.numer)), denom(T(o.denom)) {}
18 | 
19 | 	friend std::ostream& operator << (std::ostream& o, const fraction_t& f) {
20 | 		return o << f.numer << '/' << f.denom;
21 | 	}
22 | 	friend std::istream& operator >> (std::istream& i, const fraction_t& f) {
23 | 		return i >> f.numer >> f.denom;
24 | 	}
25 | 
26 | 	friend MulT cross(const fraction_t& a, const fraction_t& b) {
27 | 		return MulT(a.numer) * MulT(b.denom) - MulT(b.numer) * MulT(a.denom);
28 | 	}
29 | 
30 | 	friend bool operator == (const fraction_t& a, const fraction_t& b) {
31 | 		return cross(a, b) == 0;
32 | 	}
33 | 	friend std::strong_ordering operator <=> (const fraction_t& a, const fraction_t& b) {
34 | 		return cross(a, b) <=> 0;
35 | 	}
36 | 
37 | 	fraction_t operator + () const { return fraction_t(+numer, denom); }
38 | 	fraction_t operator - () const { return fraction_t(-numer, denom); }
39 | 
40 | 	fraction_t& operator *= (const fraction_t& o) {
41 | 		numer *= o.numer;
42 | 		denom *= o.denom;
43 | 		return *this;
44 | 	}
45 | 	fraction_t& operator /= (const fraction_t& o) {
46 | 		numer *= o.denom;
47 | 		denom *= o.numer;
48 | 		return *this;
49 | 	}
50 | 	friend fraction_t operator * (const fraction_t& a, const fraction_t& b) {
51 | 		return fraction_t(a.numer * b.numer, a.denom * b.denom);
52 | 	}
53 | 	friend fraction_t operator / (const fraction_t& a, const fraction_t& b) {
54 | 		return fraction_t(a.numer * b.denom, a.denom * b.numer);
55 | 	}
56 | 
57 | 	friend fraction_t operator + (const fraction_t& a, const fraction_t& b) {
58 | 		return {a.numer * b.denom + b.numer * a.denom, a.denom * b.denom};
59 | 	}
60 | 	friend fraction_t operator - (const fraction_t& a, const fraction_t& b) {
61 | 		return {a.numer * b.denom - b.numer * a.denom, a.denom * b.denom};
62 | 	}
63 | 	fraction_t& operator += (const fraction_t& o) { return *this = *this + o; }
64 | 	fraction_t& operator -= (const fraction_t& o) { return *this = *this - o; }
65 | 
66 | 	fraction_t& reduce() {
67 | 		using std::gcd;
68 | 		T g = gcd(numer, denom);
69 | 		numer /= g;
70 | 		denom /= g;
71 | 		return *this;
72 | 	}
73 | };
74 | 


--------------------------------------------------------------------------------
/src/geometry/point.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <complex>
  4 | #include <tuple>
  5 | #include <iostream>
  6 | #include <numeric>
  7 | 
  8 | template <typename T, typename AreaT=T> struct Point {
  9 | public:
 10 | 	T x, y;
 11 | 	Point() : x(0), y(0) {}
 12 | 	Point(T x_, T y_) : x(x_), y(y_) {}
 13 | 	template <typename U, typename V> explicit Point(const Point<U, V>& p) : x(p.x), y(p.y) {}
 14 | 	Point(const std::pair<T, T>& p) : x(p.first), y(p.second) {}
 15 | 	Point(const std::complex<T>& p) : x(real(p)), y(imag(p)) {}
 16 | 	explicit operator std::pair<T, T> () const { return std::pair<T, T>(x, y); }
 17 | 	explicit operator std::complex<T> () const { return std::complex<T>(x, y); }
 18 | 	void as_pair() const { return std::pair<T, T>(*this); }
 19 | 	void as_complex() const { return std::complex<T>(*this); }
 20 | 
 21 | 	friend std::ostream& operator << (std::ostream& o, const Point& p) { return o << '(' << p.x << ',' << p.y << ')'; }
 22 | 	friend std::istream& operator >> (std::istream& i, Point& p) { return i >> p.x >> p.y; }
 23 | 	friend bool operator == (const Point& a, const Point& b) { return a.x == b.x && a.y == b.y; }
 24 | 	friend bool operator != (const Point& a, const Point& b) { return !(a==b); }
 25 | 
 26 | 	Point operator + () const { return Point(+x, +y); }
 27 | 	Point operator - () const { return Point(-x, -y); }
 28 | 
 29 | 	Point& operator += (const Point& p) { x += p.x, y += p.y; return *this; }
 30 | 	Point& operator -= (const Point& p) { x -= p.x, y -= p.y; return *this; }
 31 | 	Point& operator *= (const T& t) { x *= t, y *= t; return *this; }
 32 | 	Point& operator /= (const T& t) { x /= t, y /= t; return *this; }
 33 | 
 34 | 	friend Point operator + (const Point& a, const Point& b) { return Point(a.x+b.x, a.y+b.y); }
 35 | 	friend Point operator - (const Point& a, const Point& b) { return Point(a.x-b.x, a.y-b.y); }
 36 | 	friend Point operator * (const Point& a, const T& t) { return Point(a.x*t, a.y*t); }
 37 | 	friend Point operator * (const T& t ,const Point& a) { return Point(t*a.x, t*a.y); }
 38 | 	friend Point operator / (const Point& a, const T& t) { return Point(a.x/t, a.y/t); }
 39 | 
 40 | 	AreaT dist2() const { return AreaT(x) * AreaT(x) + AreaT(y) * AreaT(y); }
 41 | 	auto dist() const { return std::sqrt(dist2()); }
 42 | 	Point unit() const { return *this / this->dist(); }
 43 | 	auto angle() const { return std::atan2(y, x); }
 44 | 
 45 | 	T int_norm() const { return std::gcd(x,y); }
 46 | 	Point int_unit() const { if (!x && !y) return *this; return *this / this->int_norm(); }
 47 | 
 48 | 	// Convenient free-functions, mostly for generic interop
 49 | 	friend auto norm(const Point& a) { return a.dist2(); }
 50 | 	friend auto abs(const Point& a) { return a.dist(); }
 51 | 	friend auto unit(const Point& a) { return a.unit(); }
 52 | 	friend auto arg(const Point& a) { return a.angle(); }
 53 | 	friend auto int_norm(const Point& a) { return a.int_norm(); }
 54 | 	friend auto int_unit(const Point& a) { return a.int_unit(); }
 55 | 
 56 | 	Point perp_cw() const { return Point(y, -x); }
 57 | 	Point perp_ccw() const { return Point(-y, x); }
 58 | 
 59 | 	friend AreaT dot(const Point& a, const Point& b) { return AreaT(a.x) * AreaT(b.x) + AreaT(a.y) * AreaT(b.y); }
 60 | 	friend AreaT cross(const Point& a, const Point& b) { return AreaT(a.x) * AreaT(b.y) - AreaT(a.y) * AreaT(b.x); }
 61 | 	friend AreaT cross3(const Point& a, const Point& b, const Point& c) { return cross(b-a, c-a); }
 62 | 
 63 | 	// Complex numbers and rotation
 64 | 	friend Point conj(const Point& a) { return Point(a.x, -a.y); }
 65 | 
 66 | 	// Returns conj(a) * b
 67 | 	friend Point dot_cross(const Point& a, const Point& b) { return Point(dot(a, b), cross(a, b)); }
 68 | 	friend Point cmul(const Point& a, const Point& b) { return dot_cross(conj(a), b); }
 69 | 	friend Point cdiv(const Point& a, const Point& b) { return dot_cross(b, a) / b.dist2(); }
 70 | 
 71 | 	// Must be a unit vector; otherwise multiplies the result by abs(u)
 72 | 	Point rotate(const Point& u) const { return dot_cross(conj(u), *this); }
 73 | 	Point unrotate(const Point& u) const { return dot_cross(u, *this); }
 74 | 
 75 | 	friend bool lex_less(const Point& a, const Point& b) {
 76 | 		return std::tie(a.x, a.y) < std::tie(b.x, b.y);
 77 | 	}
 78 | 
 79 | 	friend bool same_dir(const Point& a, const Point& b) { return cross(a,b) == 0 && dot(a,b) > 0; }
 80 | 
 81 | 	// check if 180 <= s..t < 360
 82 | 	friend bool is_reflex(const Point& a, const Point& b) { auto c = cross(a,b); return c ? (c < 0) : (dot(a, b) < 0); }
 83 | 
 84 | 	// operator < (s,t) for angles in [base,base+2pi)
 85 | 	friend bool angle_less(const Point& base, const Point& s, const Point& t) {
 86 | 		int r = is_reflex(base, s) - is_reflex(base, t);
 87 | 		return r ? (r < 0) : (0 < cross(s, t));
 88 | 	}
 89 | 
 90 | 	friend auto angle_cmp(const Point& base) {
 91 | 		return [base](const Point& s, const Point& t) { return angle_less(base, s, t); };
 92 | 	}
 93 | 	friend auto angle_cmp_center(const Point& center, const Point& dir) {
 94 | 		return [center, dir](const Point& s, const Point& t) -> bool { return angle_less(dir, s-center, t-center); };
 95 | 	}
 96 | 
 97 | 	// is p in [s,t] taken ccw? 1/0/-1 for in/border/out
 98 | 	friend int angle_between(const Point& s, const Point& t, const Point& p) {
 99 | 		if (same_dir(p, s) || same_dir(p, t)) return 0;
100 | 		return angle_less(s, p, t) ? 1 : -1;
101 | 	}
102 | };
103 | 


--------------------------------------------------------------------------------
/src/geometry/point3d.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include<bits/stdc++.h>
 4 | 
 5 | const double PI = acos(-1.);
 6 | const double TAU = 2 * PI;
 7 | 
 8 | template <typename T, typename AreaT=T, typename VolT=T> struct Point3D {
 9 | 	using P = Point3D;
10 | 
11 | 	T x, y, z;
12 | 	Point3D() : x(0), y(0), z(0) {}
13 | 	Point3D(T x_, T y_, T z_) : x(x_), y(y_), z(z_) {}
14 | 
15 | 	template <typename U, typename V, typename W> explicit Point3D(const Point3D<U, V, W>& p) : x(T(p.x)), y(T(p.y)), z(T(p.z)) {}
16 | 
17 | 	friend std::istream& operator >> (std::istream& i, P& p) { return i >> p.x >> p.y >> p.z; }
18 | 	friend std::ostream& operator << (std::ostream& o, const P& p) { return o << "(" << p.x << "," << p.y << "," << p.z << ")"; }
19 | 
20 | 	friend bool operator == (const P& a, const P& b) { return a.x == b.x && a.y == b.y && a.z == b.z; }
21 | 	friend bool operator != (const P& a, const P& b) { return a.x != b.x || a.y != b.y || a.z != b.z; }
22 | 
23 | 	P& operator += (const P& o) { x += o.x, y += o.y, z += o.z; return *this; }
24 | 	P& operator -= (const P& o) { x -= o.x, y -= o.y, z -= o.z; return *this; }
25 | 	friend P operator + (const P& a, const P& b) { return P(a) += b; }
26 | 	friend P operator - (const P& a, const P& b) { return P(a) -= b; }
27 | 
28 | 	P& operator *= (const T& t) { x *= t, y *= t, z *= t; return *this; }
29 | 	P& operator /= (const T& t) { x /= t, y /= t, z /= t; return *this; }
30 | 	friend P operator * (const P& p, const T& t) { return P(p) *= t; }
31 | 	friend P operator * (const T& t, const P& p) { return P(p) *= t; }
32 | 	friend P operator / (const P& a, const T& t) { return P(a) /= t; }
33 | 
34 | 	friend P operator + (const P& a) { return P(+a.x, +a.y, +a.z); }
35 | 	friend P operator - (const P& a) { return P(-a.x, -a.y, -a.z); }
36 | 
37 | 	friend AreaT dot(const P& a, const P& b) { return AreaT(a.x) * AreaT(b.x) + AreaT(a.y) * AreaT(b.y) + AreaT(a.z) * AreaT(b.z); }
38 | 	friend AreaT norm(const P& a) { return dot(a,a); }
39 | 	// We're playing a little loose with this type, expliitly cast it if you need
40 | 	friend Point3D<AreaT, VolT> cross(const P& a, const P& b) { return Point3D<AreaT, VolT>(AreaT(a.y) * AreaT(b.z) - AreaT(a.z) * AreaT(b.y), AreaT(a.z) * AreaT(b.x) - AreaT(a.x) * AreaT(b.z), AreaT(a.x) * AreaT(b.y) - AreaT(a.y) * AreaT(b.x)); }
41 | 
42 | 	friend T int_norm(const P& p) {
43 | 		return std::gcd(std::gcd(abs(p.x), abs(p.y)), abs(p.z));
44 | 	}
45 | 	friend P int_unit(const P& p) {
46 | 		T g = int_norm(p);
47 | 		return g ? p / g : p;
48 | 	}
49 | 
50 | 	friend T abs(const P& a) { return std::sqrt(std::max(T(0), norm(a))); }
51 | 	friend P unit(const P& a) { return a / abs(a); }
52 | 
53 | 	friend VolT vol(const P& a, const P& b, const P& c, const P& d) { return dot(cross(b-a, c-a), Point3D<AreaT, VolT>(d-a)); }
54 | 
55 | 	friend bool lexLess(const P& a, const P& b) { return tie(a.x, a.y, a.z) < tie(b.x, b.y, b.z); }
56 | 
57 | 	friend bool parallelSame(const P& a, const P& b) {
58 | 		assert(a != P());
59 | 		assert(b != P());
60 | 		return lexLess(a, P()) == lexLess(b, P());
61 | 	}
62 | };
63 | 


--------------------------------------------------------------------------------
/src/graph/make_st_dag.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | #include <cassert>
 5 | 
 6 | #include "yc.hpp"
 7 | 
 8 | // Direct a graph into a DAG so that given source and sink are the unique sources/sinks.
 9 | // If there are any biconnected components not on the path from the source to
10 | // the sink, they will not be output, modify the code if necessary.
11 | // Returns a topological sort. Back out the edge directions yourself.
12 | inline std::vector<int> make_st_dag(const std::vector<std::vector<int>>& adj, int source = -1, int sink = -1) {
13 | 	int N = int(adj.size());
14 | 
15 | 	// What's even going on lol
16 | 	if (N == 0) return {};
17 | 
18 | 	// Make some arbitrary choices as defaults
19 | 	if (source == -1 && sink == -1) source = 0;
20 | 	if (source == -1) source = adj[sink].empty() ? sink : adj[sink][0];
21 | 	if (sink == -1) sink = adj[source].empty() ? source : adj[source][0];
22 | 
23 | 	std::vector<int> depth(N, -1);
24 | 	std::vector<int> lowval(N);
25 | 	std::vector<bool> has_sink(N);
26 | 	std::vector<std::vector<int>> ch(N);
27 | 	std::y_combinator([&](auto self, int cur, int prv) -> void {
28 | 		depth[cur] = prv != -1 ? depth[prv] + 1 : 0;
29 | 		lowval[cur] = depth[cur];
30 | 		ch[cur].reserve(adj[cur].size());
31 | 		has_sink[cur] = (cur == sink);
32 | 		for (int nxt : adj[cur]) {
33 | 			if (nxt == prv) continue;
34 | 			if (depth[nxt] == -1) {
35 | 				ch[cur].push_back(nxt);
36 | 				self(nxt, cur);
37 | 				lowval[cur] = std::min(lowval[cur], lowval[nxt]);
38 | 				if (has_sink[nxt]) has_sink[cur] = true;
39 | 			} else if (depth[nxt] < depth[cur]) {
40 | 				lowval[cur] = std::min(lowval[cur], depth[nxt]);
41 | 			} else {
42 | 				// down edge
43 | 			}
44 | 		}
45 | 	})(source, -1);
46 | 
47 | 	// true is after, false is before
48 | 	std::vector<bool> edge_dir(N, false);
49 | 	std::vector<int> lst_nxt(N, -1);
50 | 	auto lst = std::y_combinator([&](auto self, int cur) -> std::array<int, 2> {
51 | 		std::array<int, 2> res{cur, cur};
52 | 		for (int nxt : ch[cur]) {
53 | 			// If we're on the path to the sink, mark it as downwards.
54 | 
55 | 			// Comment out this line to direct extra bcc's as extra sinks
56 | 			if (!has_sink[nxt] && lowval[nxt] >= depth[cur]) continue;
57 | 
58 | 			bool d = (has_sink[nxt] || lowval[nxt] >= depth[cur]) ? true : !edge_dir[lowval[nxt]];
59 | 			edge_dir[depth[cur]] = d;
60 | 
61 | 			auto ch_res = self(nxt);
62 | 
63 | 			// Join res and ch
64 | 			if (!d) std::swap(res, ch_res);
65 | 			lst_nxt[std::exchange(res[1], ch_res[1])] = ch_res[0];
66 | 		}
67 | 		return res;
68 | 	})(source);
69 | 
70 | 	std::vector<int> res; res.reserve(N);
71 | 	int cur = lst[0];
72 | 	while (cur != -1) {
73 | 		res.push_back(cur);
74 | 		cur = lst_nxt[cur];
75 | 	}
76 | 	return res;
77 | }
78 | 


--------------------------------------------------------------------------------
/src/hash_map.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include<bits/stdc++.h>
 4 | // #include<bits/extc++.h>
 5 | #include <ext/pb_ds/assoc_container.hpp>
 6 | 
 7 | struct splitmix64_hash {
 8 | 	static uint64_t splitmix64(uint64_t x) {
 9 | 		// http://xorshift.di.unimi.it/splitmix64.c
10 | 		x += 0x9e3779b97f4a7c15;
11 | 		x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9;
12 | 		x = (x ^ (x >> 27)) * 0x94d049bb133111eb;
13 | 		return x ^ (x >> 31);
14 | 	}
15 | 
16 | 	size_t operator()(uint64_t x) const {
17 | 		static const uint64_t FIXED_RANDOM = std::chrono::steady_clock::now().time_since_epoch().count();
18 | 		return splitmix64(x + FIXED_RANDOM);
19 | 	}
20 | };
21 | 
22 | template <typename K, typename V, typename Hash = splitmix64_hash>
23 | using hash_map = __gnu_pbds::gp_hash_table<K, V, Hash>;
24 | 
25 | template <typename K, typename Hash = splitmix64_hash>
26 | using hash_set = hash_map<K, __gnu_pbds::null_type, Hash>;
27 | 


--------------------------------------------------------------------------------
/src/jacobi.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <cassert>
 4 | #include <utility>
 5 | 
 6 | // Computes (n on m) == 1 using the binary-gcd method
 7 | // m must be positive and odd, and n must be relatively prime
 8 | template <typename T> bool is_qr_jacobi(T n, T m) {
 9 | 	bool r = true;
10 | 	assert(m & 1);
11 | 	assert(m > 0);
12 | 	if (n < 0) {
13 | 		if (m & 2) r = !r;
14 | 		n = -n;
15 | 	}
16 | 	while (m > 1) {
17 | 		assert(n > 0);
18 | 		int t = __builtin_ctzll(n);
19 | 		n >>= t;
20 | 		if ((t & 1) && (((m & 7) == 3) || ((m & 7) == 5))) {
21 | 			r = !r;
22 | 		}
23 | 		// n and m both odd
24 | 		if (n < m) {
25 | 			if ((n & 2) && (m & 2)) {
26 | 				r = !r;
27 | 			}
28 | 			using std::swap;
29 | 			swap(n, m);
30 | 		}
31 | 		n -= m;
32 | 	}
33 | 	return r;
34 | }
35 | 


--------------------------------------------------------------------------------
/src/lattice_cnt.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <utility>
 4 | #include <cassert>
 5 | 
 6 | // number of integer solutions to Ax + By <= C and x,y >= 0
 7 | inline long long lattice_cnt(long long A, long long B, long long C) {
 8 | 	using ll = long long;
 9 | 
10 | 	assert(A >= 0 && B >= 0);
11 | 	if (C < 0) return 0;
12 | 
13 | 	assert(A > 0 && B > 0);
14 | 	if (A > B) std::swap(A, B);
15 | 	assert(A <= B);
16 | 
17 | 	ll ans = 0;
18 | 	while (C >= 0) {
19 | 		assert(0 < A && A <= B);
20 | 
21 | 		ll k = B/A;
22 | 		ll l = B%A;
23 | 		assert(B == k * A + l);
24 | 
25 | 		ll f = C/B;
26 | 		ll e = C%B / A;
27 | 		ll g = C%B % A;
28 | 		assert(C == f * B + e * A + g);
29 | 		assert(C == (f * k + e) * A + f * l + g);
30 | 
31 | 		// either x + ky <= f*k+e
32 | 		// i.e. 0 <= x <= (f-y) * k + e
33 | 		// or x >= fk + e + 1 - ky
34 | 		// and Ax + (Ak+l) y <= C = (fk + e + 1) A + fl - A + g
35 | 		// Let z = x - (fk + e + 1 - ky)
36 | 		// Az + A(fk + e + 1 - ky) + Aky + ly <= C = A (fk + e + 1) + fl - A + g
37 | 		// Az + ly <= fl - A + g
38 | 
39 | 		ans += (f+1) * (e+1) + (f+1) * f / 2 * k;
40 | 
41 | 		C = f*l - A + g;
42 | 		B = A;
43 | 		A = l;
44 | 	}
45 | 	return ans;
46 | }
47 | 
48 | // count the number of 0 <= (a * x % m) < c for 0 <= x < n
49 | inline long long mod_count(long long a, long long m, long long c, long long n) {
50 | 	assert(m > 0);
51 | 	if (n == 0) return 0;
52 | 
53 | 	a %= m; if (a < 0) a += m;
54 | 
55 | 	long long extraC = c / m; c %= m;
56 | 	if (c < 0) extraC--, c += m;
57 | 	assert(0 <= c && c < m);
58 | 
59 | 	long long ans = extraC * n;
60 | 
61 | 	long long extraN = n / m; n %= m;
62 | 	if (n < 0) extraN--, n += m;
63 | 	assert(0 <= n && n < m);
64 | 
65 | 	if (extraN) {
66 | 		ans += extraN * (lattice_cnt(m, a+m, (a+m) * (m-1)) - lattice_cnt(m, a+m, (a+m) * (m-1) - c));
67 | 	}
68 | 
69 | 	if (n) {
70 | 		// we want solutions to 0 <= a(N-1-x) - my < c with 0 <= x <= N-1
71 | 		// a * (N-1) >= ax + my > a * (N-1) - c
72 | 		ans += lattice_cnt(m, a+m, (a+m) * (n-1)) - lattice_cnt(m, a+m, (a+m) * (n-1) - c);
73 | 	}
74 | 
75 | 	return ans;
76 | }
77 | 
78 | inline long long mod_count_range(long long a, long long m, long long clo, long long chi, long long nlo, long long nhi) {
79 | 	return mod_count(a, m, chi, nhi) - mod_count(a, m, chi, nlo) - mod_count(a, m, clo, nhi) + mod_count(a, m, clo, nlo);
80 | }
81 | 


--------------------------------------------------------------------------------
/src/lattice_cnt.test.cpp:
--------------------------------------------------------------------------------
 1 | #include <catch2/catch_test_macros.hpp>
 2 | 
 3 | #include "lattice_cnt.hpp"
 4 | 
 5 | using namespace std;
 6 | 
 7 | long long lattice_cnt_slow(long long A, long long B, long long C) {
 8 | 	using ll = long long;
 9 | 	ll ans = 0;
10 | 	for (ll x = 0; A * x <= C; x++) {
11 | 		for (ll y = 0; A * x + B * y <= C; y++) {
12 | 			ans++;
13 | 		}
14 | 	}
15 | 	return ans;
16 | }
17 | 
18 | long long mod_count_range_slow(long long a, long long m, long long clo, long long chi, long long nlo, long long nhi) {
19 | 	assert(nlo <= nhi);
20 | 	assert(clo <= chi);
21 | 	long long ans = 0;
22 | 	for (long long i = nlo; i < nhi; i++) {
23 | 		for (long long j = clo; j < chi; j++) {
24 | 			ans += (((a * i - j) % m) == 0);
25 | 		}
26 | 	}
27 | 	return ans;
28 | }
29 | 
30 | TEST_CASE("Lattice Count", "[lattice_cnt]") {
31 | 	for (int a = 0; a <= 50; a++) {
32 | 		for (int b = 0; b <= 10; b++) {
33 | 			for (int c = -1; c <= 100; c++) {
34 | 				if ((a == 0 || b == 0) && c >= 0) continue;
35 | 				INFO("a = " << a);
36 | 				INFO("b = " << b);
37 | 				INFO("c = " << c);
38 | 				REQUIRE(lattice_cnt(a, b, c) == lattice_cnt_slow(a, b, c));
39 | 			}
40 | 		}
41 | 	}
42 | }
43 | 
44 | TEST_CASE("Mod Count (positive)", "[lattice_cnt]") {
45 | 	for (int m = 1; m <= 25; m++) {
46 | 		for (int a = 0; a <= m+10; a++) {
47 | 			for (int c = 0; c <= m; c++) {
48 | 				INFO("a = " << a);
49 | 				INFO("m = " << m);
50 | 				INFO("c = " << c);
51 | 				int trueAns = 0;
52 | 				for (int n = 1; n <= m+10; n++) {
53 | 					INFO("n = " << n);
54 | 
55 | 					trueAns += (a * (n-1) % m) < c;
56 | 					REQUIRE(mod_count(a, m, c, n) == trueAns);
57 | 				}
58 | 			}
59 | 		}
60 | 	}
61 | }
62 | 
63 | TEST_CASE("Mod Count (negatives)", "[lattice_cnt]") {
64 | 	for (int m : {1, 2, 3, 5, 8, 13, 21}) {
65 | 		for (int a : {-10, 0, 1, 2, 3, 5, m, m+5}) {
66 | 			auto cnds = {-37, -2*m-1, -m, -m+1, -m/2, -1, 0, 1, m/2, m+1, 2*m-1, 34};
67 | 			INFO("a = " << a);
68 | 			INFO("m = " << m);
69 | 			for (int clo : cnds) {
70 | 				for (int nlo : cnds) {
71 | 					INFO("clo = " << clo);
72 | 					INFO("nlo = " << nlo);
73 | 					REQUIRE(mod_count_range(a, m, clo, 47, nlo, 49) == mod_count_range_slow(a, m, clo, 47, nlo, 49));
74 | 				}
75 | 			}
76 | 
77 | 			for (int chi : cnds) {
78 | 				for (int nhi : cnds) {
79 | 					INFO("chi = " << chi);
80 | 					INFO("nhi = " << nhi);
81 | 					REQUIRE(mod_count_range(a, m, -55, chi, -57, nhi) == mod_count_range_slow(a, m, -55, chi, -57, nhi));
82 | 				}
83 | 			}
84 | 		}
85 | 	}
86 | }
87 | 


--------------------------------------------------------------------------------
/src/lct.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cassert>
  4 | #include <utility>
  5 | 
  6 | namespace lct {
  7 | 
  8 | struct node {
  9 | 	node* p;
 10 | 	node* c[2];
 11 | 
 12 | 	int s;
 13 | 
 14 | 	bool flip;
 15 | 
 16 | 	// isroot
 17 | 	inline bool r() { return p == nullptr || !(this == p->c[0] || this == p->c[1]); }
 18 | 	// direction
 19 | 	inline bool d() { assert(!r()); return this == p->c[1]; }
 20 | 
 21 | 	inline void update() { s = 1 + (c[0] ? c[0]->s : 0) + (c[1] ? c[1]->s : 0); }
 22 | 	void propogate() {
 23 | 		if(flip) {
 24 | 			std::swap(c[0], c[1]);
 25 | 			if(c[0]) c[0]->flip = !c[0]->flip;
 26 | 			if(c[1]) c[1]->flip = !c[1]->flip;
 27 | 			flip = false;
 28 | 		}
 29 | 	}
 30 | 
 31 | 	// precondition: parent and current are propogated
 32 | 	void rot() {
 33 | 		assert(!r());
 34 | 
 35 | 		int x = d();
 36 | 		node* pa = p;
 37 | 		node* ch = c[!x];
 38 | 
 39 | 		assert(!pa->flip);
 40 | 		assert(!flip);
 41 | 
 42 | 		assert((!ch) || ch->p == this);
 43 | 
 44 | 		if(!pa->r()) pa->p->c[pa->d()] = this;
 45 | 		this->p = pa->p;
 46 | 
 47 | 		pa->c[x] = ch;
 48 | 		if(ch) ch->p = pa;
 49 | 
 50 | 		this->c[!x] = pa;
 51 | 		pa->p = this;
 52 | 
 53 | 		pa->update();
 54 | 		update();
 55 | 	}
 56 | 
 57 | 	// postcondition: always propogated
 58 | 	void splay() {
 59 | 		if(r()) {
 60 | 			update();
 61 | 			propogate();
 62 | 			return;
 63 | 		}
 64 | 
 65 | 		while(!r()) {
 66 | 			if(!p->r()) {
 67 | 				node* gp = p->p;
 68 | 				node* pa = p;
 69 | 				gp->propogate();
 70 | 				pa->propogate();
 71 | 				propogate();
 72 | 				if(d() == p->d()) {
 73 | 					pa->rot();
 74 | 					assert(p == pa);
 75 | 				} else {
 76 | 					rot();
 77 | 					assert(p == gp);
 78 | 				}
 79 | 				rot();
 80 | 			} else {
 81 | 				p->propogate();
 82 | 				propogate();
 83 | 				rot();
 84 | 				assert(r());
 85 | 			}
 86 | 		}
 87 | 		update();
 88 | 	}
 89 | 
 90 | 	// attach on right side
 91 | 	// precondition: propogated
 92 | 	void make_child(node* n) {
 93 | 		assert(!flip);
 94 | 		assert(r());
 95 | 
 96 | 		if(c[1]) {
 97 | 			node* v = c[1];
 98 | 			c[1] = nullptr;
 99 | 			assert(v->r());
100 | 
101 | 			update();
102 | 		}
103 | 
104 | 		assert(!flip);
105 | 		assert(!c[1]);
106 | 
107 | 		if(n) {
108 | 
109 | 			assert(n->r());
110 | 			assert(n->p == this);
111 | 
112 | 			c[1] = n;
113 | 			assert(c[1]->p == this);
114 | 
115 | 			update();
116 | 		}
117 | 	}
118 | 
119 | 	// postcondition: propogated
120 | 	void expose() {
121 | 		splay();
122 | 		assert(!flip);
123 | 		make_child(nullptr);
124 | 		while(p) {
125 | 			assert(r());
126 | 			p->splay();
127 | 			p->make_child(this);
128 | 			assert(!p->flip);
129 | 			assert(!flip);
130 | 			rot();
131 | 			update();
132 | 			assert(r());
133 | 		}
134 | 		assert(!p);
135 | 		assert(!c[1]);
136 | 	}
137 | 
138 | 	// does not propogate
139 | 	void make_root() {
140 | 		expose();
141 | 		assert(p == nullptr);
142 | 		assert(r());
143 | 		flip = !flip;
144 | 	}
145 | 
146 | };
147 | 
148 | } // namespace lct
149 | 


--------------------------------------------------------------------------------
/src/level_ancestor.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <vector>
  4 | #include <cassert>
  5 | 
  6 | #include "yc.hpp"
  7 | 
  8 | namespace ecnerwala {
  9 | 
 10 | using std::swap;
 11 | 
 12 | struct level_ancestor {
 13 | 	int N;
 14 | 	std::vector<int> preorder;
 15 | 	std::vector<int> idx;
 16 | 	std::vector<std::pair<int, int>> heavyPar; // heavy parent, distance
 17 | 	level_ancestor() : N(0) {}
 18 | 
 19 | 	level_ancestor(const std::vector<int>& par) : N(int(par.size())), preorder(N), idx(N), heavyPar(N) {
 20 | 		std::vector<std::vector<int>> ch(N);
 21 | 		for (int i = 0; i < N; i++) {
 22 | 			if (par[i] != -1) ch[par[i]].push_back(i);
 23 | 		}
 24 | 		std::vector<int> sz(N);
 25 | 		int nxt_idx = 0;
 26 | 		for (int i = 0; i < N; i++) {
 27 | 			if (par[i] == -1) {
 28 | 				std::y_combinator([&](auto self, int cur) -> void {
 29 | 					sz[cur] = 1;
 30 | 					for (int nxt : ch[cur]) {
 31 | 						self(nxt);
 32 | 						sz[cur] += sz[nxt];
 33 | 					}
 34 | 					if (!ch[cur].empty()) {
 35 | 						auto mit = max_element(ch[cur].begin(), ch[cur].end(), [&](int a, int b) { return sz[a] < sz[b]; });
 36 | 						swap(*ch[cur].begin(), *mit);
 37 | 					}
 38 | 				})(i);
 39 | 				std::y_combinator([&](auto self, int cur, int isRoot = true) -> void {
 40 | 					preorder[idx[cur] = nxt_idx++] = cur;
 41 | 					if (isRoot) {
 42 | 						heavyPar[idx[cur]] = {par[cur] == -1 ? -1 : idx[par[cur]], 1};
 43 | 					} else {
 44 | 						assert(idx[par[cur]] == idx[cur]-1);
 45 | 						heavyPar[idx[cur]] = heavyPar[idx[cur]-1];
 46 | 						heavyPar[idx[cur]].second++;
 47 | 					}
 48 | 					bool chRoot = false;
 49 | 					for (int nxt : ch[cur]) {
 50 | 						self(nxt, chRoot);
 51 | 						chRoot = true;
 52 | 					}
 53 | 				})(i);
 54 | 			}
 55 | 		}
 56 | 	}
 57 | 
 58 | 	int get_ancestor(int a, int k) const {
 59 | 		assert(k >= 0);
 60 | 		a = idx[a];
 61 | 		while (a != -1 && k) {
 62 | 			if (k >= heavyPar[a].second) {
 63 | 				k -= heavyPar[a].second;
 64 | 				assert(heavyPar[a].first <= a - heavyPar[a].second);
 65 | 				a = heavyPar[a].first;
 66 | 			} else {
 67 | 				a -= k;
 68 | 				k = 0;
 69 | 			}
 70 | 		}
 71 | 		if (a == -1) return -1;
 72 | 		else return preorder[a];
 73 | 	}
 74 | 
 75 | 	int lca(int a, int b) const {
 76 | 		a = idx[a], b = idx[b];
 77 | 		while (true) {
 78 | 			if (a > b) swap(a, b);
 79 | 			assert(a <= b);
 80 | 			if (a > b - heavyPar[b].second) {
 81 | 				return preorder[a];
 82 | 			}
 83 | 			b = heavyPar[b].first;
 84 | 			if (b == -1) return -1;
 85 | 		}
 86 | 	}
 87 | 
 88 | 	int dist(int a, int b) const {
 89 | 		a = idx[a], b = idx[b];
 90 | 		int res = 0;
 91 | 		while (true) {
 92 | 			if (a > b) swap(a, b);
 93 | 			assert(a <= b);
 94 | 			if (a > b - heavyPar[b].second) {
 95 | 				res += b - a;
 96 | 				break;
 97 | 			}
 98 | 			res += heavyPar[b].second;
 99 | 			b = heavyPar[b].first;
100 | 			if (b == -1) return -1;
101 | 		}
102 | 		return res;
103 | 	}
104 | };
105 | 
106 | } // namespace ecnerwala
107 | 


--------------------------------------------------------------------------------
/src/manacher.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <vector>
 4 | #include <cassert>
 5 | 
 6 | /**
 7 |  * manacher(S): return the maximum palindromic substring of S centered at each point
 8 |  *
 9 |  * Input: string (or vector) of length N (no restrictions on character-set)
10 |  * Output: vector res of length 2*N+1
11 |  *   For any 0 <= i <= 2*N:
12 |  *   * i % 2 == res[i] % 2
13 |  *   * the half-open substring S[(i-res[i])/2, (i+res[i])/2) is a palindrome of length res[i]
14 |  *   * For odd palindromes, take odd i, and vice versa
15 |  */
16 | template <typename V> std::vector<int> manacher(const V& S) {
17 | 	int N = int(S.size());
18 | 	std::vector<int> res(2*N+1, 0);
19 | 	for (int i = 1, j = -1, r = 0; i < 2*N; i++, j--) {
20 | 		if (i > r) {
21 | 			r = i+1, res[i] = 1;
22 | 		} else {
23 | 			res[i] = res[j];
24 | 		}
25 | 		if (i+res[i] >= r) {
26 | 			int b = r>>1, a = i-b;
27 | 			while (a > 0 && b < N && S[a-1] == S[b]) {
28 | 				a--, b++;
29 | 			}
30 | 			res[i] = b-a, j = i, r = b<<1;
31 | 		}
32 | 	}
33 | 	return res;
34 | }
35 | 
36 | /**
37 |  * manacher_odd(S): return the maximum palindromic substring of S centered at each point
38 |  *
39 |  * Input: string (or vector) of length N (no restrictions on character-set)
40 |  * Output: vector res of length N
41 |  *   For any 0 <= i < N:
42 |  *   * the half-open substring S[i-res[i], i+res[i]] is a palindrome of length 2*res[i]+1
43 |  */
44 | template <typename V> std::vector<int> manacher_odd(const V& S) {
45 | 	int N = int(S.size());
46 | 	std::vector<int> res(N);
47 | 	for (int i = 1, j = -1, r = 0; i < N; i++, j--) {
48 | 		if (i > r) {
49 | 			r = i, res[i] = 0;
50 | 		} else {
51 | 			res[i] = res[j];
52 | 		}
53 | 		if (i+res[i] >= r) {
54 | 			int b = r, a = 2*i-r;
55 | 			while (a-1 >= 0 && b+1 < N && S[a-1] == S[b+1]) {
56 | 				a--, b++;
57 | 			}
58 | 			res[i] = b-i, j = i, r = b;
59 | 		}
60 | 	}
61 | 	return res;
62 | }
63 | 


--------------------------------------------------------------------------------
/src/mcmf.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include<bits/stdc++.h>
  3 | // #include<bits/extc++.h>
  4 | #include <ext/pb_ds/priority_queue.hpp>
  5 | 
  6 | // NOTE: This doesn't support negative-cost edges; you can adjust edge weights
  7 | // (e.g. by precomputing a potential function) to make them positive.
  8 | 
  9 | template <typename flow_t = int, typename cost_t = int64_t>
 10 | struct MCMF_SSPA {
 11 | 	int N;
 12 | 	std::vector<std::vector<int>> adj;
 13 | 	struct edge_t {
 14 | 		int dest;
 15 | 		flow_t cap;
 16 | 		cost_t cost;
 17 | 	};
 18 | 	std::vector<edge_t> edges;
 19 | 
 20 | 	std::vector<char> seen;
 21 | 	std::vector<cost_t> pi;
 22 | 	std::vector<int> prv;
 23 | 
 24 | 	explicit MCMF_SSPA(int N_) : N(N_), adj(N), pi(N, 0), prv(N) {}
 25 | 
 26 | 	void add_edge(int from, int to, flow_t cap, cost_t cost) {
 27 | 		assert(cap >= 0);
 28 | 		assert(cost + pi[from] - pi[to] >= 0); // TODO: Remove this restriction
 29 | 		int e = int(edges.size());
 30 | 		edges.emplace_back(edge_t{to, cap, cost});
 31 | 		edges.emplace_back(edge_t{from, 0, -cost});
 32 | 		adj[from].push_back(e);
 33 | 		adj[to].push_back(e+1);
 34 | 	}
 35 | 
 36 | 	static constexpr cost_t INF_COST = std::numeric_limits<cost_t>::max() / 4;
 37 | 	static constexpr flow_t INF_FLOW = std::numeric_limits<flow_t>::max() / 4;
 38 | 	std::vector<cost_t> dist;
 39 | 	__gnu_pbds::priority_queue<std::pair<cost_t, int>> q;
 40 | 	std::vector<typename decltype(q)::point_iterator> its;
 41 | 	cost_t dijkstra(int s, int t) {
 42 | 		dist.assign(N, INF_COST);
 43 | 		dist[s] = 0;
 44 | 
 45 | 		its.assign(N, q.end());
 46 | 		its[s] = q.push({-(dist[s] - pi[s]), s});
 47 | 
 48 | 		while (!q.empty()) {
 49 | 			int i = q.top().second; q.pop();
 50 | 			cost_t d = dist[i];
 51 | 			for (int e : adj[i]) {
 52 | 				if (edges[e].cap) {
 53 | 					int j = edges[e].dest;
 54 | 					cost_t nd = d + edges[e].cost;
 55 | 					if (nd < dist[j]) {
 56 | 						dist[j] = nd;
 57 | 						prv[j] = e;
 58 | 						if (its[j] == q.end()) {
 59 | 							its[j] = q.push({-(dist[j] - pi[j]), j});
 60 | 						} else {
 61 | 							q.modify(its[j], {-(dist[j] - pi[j]), j});
 62 | 						}
 63 | 					}
 64 | 				}
 65 | 			}
 66 | 		}
 67 | 
 68 | 		swap(pi, dist);
 69 | 		return pi[t];
 70 | 	}
 71 | 
 72 | 	flow_t path(int s, int t) {
 73 | 		flow_t cur_flow = std::numeric_limits<flow_t>::max();
 74 | 		for (int cur = t; cur != s; ) {
 75 | 			int e = prv[cur];
 76 | 			int nxt = edges[e^1].dest;
 77 | 			cur_flow = std::min(cur_flow, edges[e].cap);
 78 | 			cur = nxt;
 79 | 		}
 80 | 		for (int cur = t; cur != s; ) {
 81 | 			int e = prv[cur];
 82 | 			int nxt = edges[e^1].dest;
 83 | 			edges[e].cap -= cur_flow;
 84 | 			edges[e^1].cap += cur_flow;
 85 | 			cur = nxt;
 86 | 		}
 87 | 		return cur_flow;
 88 | 	}
 89 | 
 90 | 	std::vector<std::pair<flow_t, cost_t>> all_flows(int s, int t, cost_t max_cost = INF_COST - 1) {
 91 | 		assert(s != t);
 92 | 		std::vector<std::pair<flow_t, cost_t>> res;
 93 | 		while (dijkstra(s, t) <= max_cost) {
 94 | 			assert(res.empty() || pi[t] >= res.back().second);
 95 | 			flow_t f = path(s, t);
 96 | 			res.push_back({f, pi[t]});
 97 | 		}
 98 | 		return res;
 99 | 	}
100 | 
101 | 	std::pair<flow_t, cost_t> max_flow(int s, int t, cost_t max_cost = INF_COST - 1) {
102 | 		assert(s != t);
103 | 		flow_t tot_flow = 0; cost_t tot_cost = 0;
104 | 		while (dijkstra(s, t) <= max_cost) {
105 | 			flow_t cur_flow = path(s, t);
106 | 			tot_flow += cur_flow;
107 | 			tot_cost += cur_flow * pi[t];
108 | 		}
109 | 		return {tot_flow, tot_cost};
110 | 	}
111 | };
112 | 
113 | template <typename flow_t = int, typename cost_t = int64_t>
114 | struct MCMF_Dinic {
115 | 	int N;
116 | 	std::vector<std::vector<int>> adj;
117 | 	struct edge_t {
118 | 		int dest;
119 | 		flow_t cap;
120 | 		cost_t cost;
121 | 	};
122 | 	std::vector<edge_t> edges;
123 | 
124 | 	std::vector<char> seen;
125 | 	std::vector<cost_t> pi;
126 | 
127 | 	explicit MCMF_Dinic(int N_) : N(N_), adj(N), pi(N, 0) {}
128 | 
129 | 	void add_edge(int from, int to, flow_t cap, cost_t cost) {
130 | 		assert(cap >= 0);
131 | 		assert(cost + pi[from] - pi[to] >= 0); // TODO: Remove this restriction
132 | 		int e = int(edges.size());
133 | 		edges.emplace_back(edge_t{to, cap, cost});
134 | 		edges.emplace_back(edge_t{from, 0, -cost});
135 | 		adj[from].push_back(e);
136 | 		adj[to].push_back(e+1);
137 | 	}
138 | 
139 | 	static constexpr cost_t INF_COST = std::numeric_limits<cost_t>::max() / 4;
140 | 	static constexpr flow_t INF_FLOW = std::numeric_limits<flow_t>::max() / 4;
141 | 	std::vector<cost_t> dist;
142 | 	__gnu_pbds::priority_queue<std::pair<cost_t, int>> q;
143 | 	std::vector<typename decltype(q)::point_iterator> its;
144 | 	cost_t dijkstra(int s, int t) {
145 | 		dist.assign(N, INF_COST);
146 | 		dist[s] = 0;
147 | 
148 | 		its.assign(N, q.end());
149 | 		its[s] = q.push({-(dist[s] - pi[s]), s});
150 | 
151 | 		while (!q.empty()) {
152 | 			int i = q.top().second; q.pop();
153 | 			cost_t d = dist[i];
154 | 			for (int e : adj[i]) {
155 | 				if (edges[e].cap) {
156 | 					int j = edges[e].dest;
157 | 					cost_t nd = d + edges[e].cost;
158 | 					if (nd < dist[j]) {
159 | 						dist[j] = nd;
160 | 						if (its[j] == q.end()) {
161 | 							its[j] = q.push({-(dist[j] - pi[j]), j});
162 | 						} else {
163 | 							q.modify(its[j], {-(dist[j] - pi[j]), j});
164 | 						}
165 | 					}
166 | 				}
167 | 			}
168 | 		}
169 | 
170 | 		std::swap(pi, dist);
171 | 		return pi[t];
172 | 	}
173 | 
174 | 	std::vector<int> buf;
175 | 	std::vector<int> level;
176 | 	flow_t dinic_dfs(int cur, int t, flow_t f) {
177 | 		if (cur == t) return f;
178 | 		flow_t cur_f = 0;
179 | 		assert(f > 0);
180 | 		for (; buf[cur] < int(adj[cur].size()); buf[cur]++) {
181 | 			int e = adj[cur][buf[cur]];
182 | 			int nxt = edges[e].dest;
183 | 			if (level[nxt] == level[cur] + 1 && edges[e].cap > 0 && edges[e].cost == pi[nxt] - pi[cur]) {
184 | 				flow_t v = dinic_dfs(nxt, t, std::min(f, edges[e].cap));
185 | 				edges[e].cap -= v;
186 | 				edges[e^1].cap += v;
187 | 				f -= v;
188 | 				cur_f += v;
189 | 				if (f == 0) break;
190 | 			}
191 | 		}
192 | 		return cur_f;
193 | 	}
194 | 	flow_t dinic(int s, int t) {
195 | 		flow_t tot_flow = 0;
196 | 		while (true) {
197 | 			buf.clear();
198 | 			buf.reserve(N);
199 | 			level.assign(N, -1);
200 | 			buf.push_back(s);
201 | 			level[s] = 0;
202 | 			for (int z = 0; z < int(buf.size()); z++) {
203 | 				int cur = buf[z];
204 | 				for (int e : adj[cur]) {
205 | 					int nxt = edges[e].dest;
206 | 					if (edges[e].cap > 0 && edges[e].cost == pi[nxt] - pi[cur] && level[nxt] == -1) {
207 | 						level[nxt] = level[cur] + 1;
208 | 						buf.push_back(nxt);
209 | 					}
210 | 				}
211 | 			}
212 | 			if (level[t] == -1) break;
213 | 			buf.assign(N, 0);
214 | 			tot_flow += dinic_dfs(s, t, INF_FLOW);
215 | 		}
216 | 		return tot_flow;
217 | 	}
218 | 
219 | 	std::vector<std::pair<flow_t, cost_t>> all_flows(int s, int t, cost_t max_cost = INF_COST - 1) {
220 | 		assert(s != t);
221 | 		std::vector<std::pair<flow_t, cost_t>> res;
222 | 		while (dijkstra(s, t) <= max_cost) {
223 | 			assert(res.empty() || pi[t] > res.back().second);
224 | 			flow_t f = dinic(s, t);
225 | 			res.push_back({f, pi[t]});
226 | 		}
227 | 		return res;
228 | 	}
229 | 
230 | 	std::pair<flow_t, cost_t> max_flow(int s, int t, cost_t max_cost = INF_COST - 1) {
231 | 		assert(s != t);
232 | 		flow_t tot_flow = 0; cost_t tot_cost = 0;
233 | 		while (dijkstra(s, t) <= max_cost) {
234 | 			flow_t cur_flow = dinic(s, t);
235 | 			tot_flow += cur_flow;
236 | 			tot_cost += cur_flow * pi[t];
237 | 		}
238 | 		return {tot_flow, tot_cost};
239 | 	}
240 | };
241 | 
242 | template <typename flow_t = int, typename tot_flow_t = flow_t>
243 | struct Dinic {
244 | 	int N;
245 | 	std::vector<std::vector<int>> adj;
246 | 	struct edge_t {
247 | 		int dest;
248 | 		flow_t cap;
249 | 	};
250 | 	std::vector<edge_t> edges;
251 | 
252 | 	std::vector<char> seen;
253 | 
254 | 	explicit Dinic(int N_) : N(N_), adj(N) {}
255 | 
256 | 	void add_edge(int from, int to, flow_t cap) {
257 | 		return add_bi_edge(from, to, cap, 0);
258 | 	}
259 | 
260 | 	void add_bi_edge(int from, int to, flow_t cap, flow_t rev_cap) {
261 | 		assert(cap >= 0);
262 | 		assert(rev_cap >= 0);
263 | 		int e = int(edges.size());
264 | 		edges.emplace_back(edge_t{to, cap});
265 | 		edges.emplace_back(edge_t{from, rev_cap});
266 | 		adj[from].push_back(e);
267 | 		adj[to].push_back(e+1);
268 | 	}
269 | 
270 | 	static constexpr tot_flow_t INF_FLOW = std::numeric_limits<tot_flow_t>::max() / 4;
271 | 	std::vector<int> buf;
272 | 	std::vector<int> level;
273 | 	tot_flow_t dinic_dfs(int cur, int t, tot_flow_t f) {
274 | 		if (cur == t) return f;
275 | 		tot_flow_t cur_f = 0;
276 | 		assert(f > 0);
277 | 		for (; buf[cur] < int(adj[cur].size()); buf[cur]++) {
278 | 			int e = adj[cur][buf[cur]];
279 | 			int nxt = edges[e].dest;
280 | 			if (level[nxt] == level[cur] + 1 && edges[e].cap > 0) {
281 | 				flow_t v = flow_t(dinic_dfs(nxt, t, std::min<tot_flow_t>(f, edges[e].cap)));
282 | 				edges[e].cap -= v;
283 | 				edges[e^1].cap += v;
284 | 				f -= v;
285 | 				cur_f += v;
286 | 				if (f == 0) break;
287 | 			}
288 | 		}
289 | 		return cur_f;
290 | 	}
291 | 	tot_flow_t dinic(int s, int t) {
292 | 		tot_flow_t tot_flow = 0;
293 | 		while (true) {
294 | 			buf.clear();
295 | 			buf.reserve(N);
296 | 			level.assign(N, -1);
297 | 			buf.push_back(s);
298 | 			level[s] = 0;
299 | 			for (int z = 0; z < int(buf.size()); z++) {
300 | 				int cur = buf[z];
301 | 				for (int e : adj[cur]) {
302 | 					int nxt = edges[e].dest;
303 | 					if (edges[e].cap > 0 && level[nxt] == -1) {
304 | 						level[nxt] = level[cur] + 1;
305 | 						buf.push_back(nxt);
306 | 					}
307 | 				}
308 | 			}
309 | 			if (level[t] == -1) break;
310 | 			buf.assign(N, 0);
311 | 			tot_flow += dinic_dfs(s, t, INF_FLOW);
312 | 		}
313 | 		return tot_flow;
314 | 	}
315 | 	tot_flow_t max_flow(int s, int t) { return dinic(s, t); }
316 | };
317 | 


--------------------------------------------------------------------------------
/src/modnum.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cassert>
  4 | #include <iostream>
  5 | #include <cstdint>
  6 | 
  7 | template <typename T> T mod_inv_in_range(T a, T m) {
  8 | 	// assert(0 <= a && a < m);
  9 | 	T x = a, y = m;
 10 | 	// coeff of a in x and y
 11 | 	T vx = 1, vy = 0;
 12 | 	while (x) {
 13 | 		T k = y / x;
 14 | 		y %= x;
 15 | 		vy -= k * vx;
 16 | 		std::swap(x, y);
 17 | 		std::swap(vx, vy);
 18 | 	}
 19 | 	assert(y == 1);
 20 | 	return vy < 0 ? m + vy : vy;
 21 | }
 22 | 
 23 | template <typename T> struct extended_gcd_result {
 24 | 	T gcd;
 25 | 	T coeff_a, coeff_b;
 26 | };
 27 | template <typename T> extended_gcd_result<T> extended_gcd(T a, T b) {
 28 | 	T x = a, y = b;
 29 | 	// coeff of a and b in x and y
 30 | 	T ax = 1, ay = 0;
 31 | 	T bx = 0, by = 1;
 32 | 	while (x) {
 33 | 		T k = y / x;
 34 | 		y %= x;
 35 | 		ay -= k * ax;
 36 | 		by -= k * bx;
 37 | 		std::swap(x, y);
 38 | 		std::swap(ax, ay);
 39 | 		std::swap(bx, by);
 40 | 	}
 41 | 	return {y, ay, by};
 42 | }
 43 | 
 44 | template <typename T> T mod_inv(T a, T m) {
 45 | 	a %= m;
 46 | 	a = a < 0 ? a + m : a;
 47 | 	return mod_inv_in_range(a, m);
 48 | }
 49 | 
 50 | template <int MOD_> struct modnum {
 51 | 	static constexpr int MOD = MOD_;
 52 | 	static_assert(MOD_ > 0, "MOD must be positive");
 53 | 
 54 | private:
 55 | 	int v;
 56 | 
 57 | public:
 58 | 
 59 | 	modnum() : v(0) {}
 60 | 	modnum(int64_t v_) : v(int(v_ % MOD)) { if (v < 0) v += MOD; }
 61 | 	explicit operator int() const { return v; }
 62 | 	friend std::ostream& operator << (std::ostream& out, const modnum& n) { return out << int(n); }
 63 | 	friend std::istream& operator >> (std::istream& in, modnum& n) { int64_t v_; in >> v_; n = modnum(v_); return in; }
 64 | 
 65 | 	friend bool operator == (const modnum& a, const modnum& b) { return a.v == b.v; }
 66 | 	friend bool operator != (const modnum& a, const modnum& b) { return a.v != b.v; }
 67 | 
 68 | 	modnum inv() const {
 69 | 		modnum res;
 70 | 		res.v = mod_inv_in_range(v, MOD);
 71 | 		return res;
 72 | 	}
 73 | 	friend modnum inv(const modnum& m) { return m.inv(); }
 74 | 	modnum neg() const {
 75 | 		modnum res;
 76 | 		res.v = v ? MOD-v : 0;
 77 | 		return res;
 78 | 	}
 79 | 	friend modnum neg(const modnum& m) { return m.neg(); }
 80 | 
 81 | 	modnum operator- () const {
 82 | 		return neg();
 83 | 	}
 84 | 	modnum operator+ () const {
 85 | 		return modnum(*this);
 86 | 	}
 87 | 
 88 | 	modnum& operator ++ () {
 89 | 		v ++;
 90 | 		if (v == MOD) v = 0;
 91 | 		return *this;
 92 | 	}
 93 | 	modnum& operator -- () {
 94 | 		if (v == 0) v = MOD;
 95 | 		v --;
 96 | 		return *this;
 97 | 	}
 98 | 	modnum& operator += (const modnum& o) {
 99 | 		v -= MOD-o.v;
100 | 		v = (v < 0) ? v + MOD : v;
101 | 		return *this;
102 | 	}
103 | 	modnum& operator -= (const modnum& o) {
104 | 		v -= o.v;
105 | 		v = (v < 0) ? v + MOD : v;
106 | 		return *this;
107 | 	}
108 | 	modnum& operator *= (const modnum& o) {
109 | 		v = int(int64_t(v) * int64_t(o.v) % MOD);
110 | 		return *this;
111 | 	}
112 | 	modnum& operator /= (const modnum& o) {
113 | 		return *this *= o.inv();
114 | 	}
115 | 
116 | 	friend modnum operator ++ (modnum& a, int) { modnum r = a; ++a; return r; }
117 | 	friend modnum operator -- (modnum& a, int) { modnum r = a; --a; return r; }
118 | 	friend modnum operator + (const modnum& a, const modnum& b) { return modnum(a) += b; }
119 | 	friend modnum operator - (const modnum& a, const modnum& b) { return modnum(a) -= b; }
120 | 	friend modnum operator * (const modnum& a, const modnum& b) { return modnum(a) *= b; }
121 | 	friend modnum operator / (const modnum& a, const modnum& b) { return modnum(a) /= b; }
122 | };
123 | 
124 | template <typename T> T pow(T a, long long b) {
125 | 	assert(b >= 0);
126 | 	T r = 1; while (b) { if (b & 1) r *= a; b >>= 1; a *= a; } return r;
127 | }
128 | 
129 | template <typename U, typename V> struct pairnum {
130 | 	U u;
131 | 	V v;
132 | 
133 | 	pairnum() : u(0), v(0) {}
134 | 	pairnum(long long val) : u(val), v(val) {}
135 | 	pairnum(const U& u_, const V& v_) : u(u_), v(v_) {}
136 | 
137 | 	friend std::ostream& operator << (std::ostream& out, const pairnum& n) { return out << '(' << n.u << ',' << ' ' << n.v << ')'; }
138 | 	friend std::istream& operator >> (std::istream& in, pairnum& n) { long long val; in >> val; n = pairnum(val); return in; }
139 | 
140 | 	friend bool operator == (const pairnum& a, const pairnum& b) { return a.u == b.u && a.v == b.v; }
141 | 	friend bool operator != (const pairnum& a, const pairnum& b) { return a.u != b.u || a.v != b.v; }
142 | 
143 | 	pairnum inv() const {
144 | 		return pairnum(u.inv(), v.inv());
145 | 	}
146 | 	pairnum neg() const {
147 | 		return pairnum(u.neg(), v.neg());
148 | 	}
149 | 	pairnum operator- () const {
150 | 		return pairnum(-u, -v);
151 | 	}
152 | 	pairnum operator+ () const {
153 | 		return pairnum(+u, +v);
154 | 	}
155 | 
156 | 	pairnum& operator ++ () {
157 | 		++u, ++v;
158 | 		return *this;
159 | 	}
160 | 	pairnum& operator -- () {
161 | 		--u, --v;
162 | 		return *this;
163 | 	}
164 | 
165 | 	pairnum& operator += (const pairnum& o) {
166 | 		u += o.u;
167 | 		v += o.v;
168 | 		return *this;
169 | 	}
170 | 	pairnum& operator -= (const pairnum& o) {
171 | 		u -= o.u;
172 | 		v -= o.v;
173 | 		return *this;
174 | 	}
175 | 	pairnum& operator *= (const pairnum& o) {
176 | 		u *= o.u;
177 | 		v *= o.v;
178 | 		return *this;
179 | 	}
180 | 	pairnum& operator /= (const pairnum& o) {
181 | 		u /= o.u;
182 | 		v /= o.v;
183 | 		return *this;
184 | 	}
185 | 
186 | 	friend pairnum operator ++ (pairnum& a, int) { pairnum r = a; ++a; return r; }
187 | 	friend pairnum operator -- (pairnum& a, int) { pairnum r = a; --a; return r; }
188 | 	friend pairnum operator + (const pairnum& a, const pairnum& b) { return pairnum(a) += b; }
189 | 	friend pairnum operator - (const pairnum& a, const pairnum& b) { return pairnum(a) -= b; }
190 | 	friend pairnum operator * (const pairnum& a, const pairnum& b) { return pairnum(a) *= b; }
191 | 	friend pairnum operator / (const pairnum& a, const pairnum& b) { return pairnum(a) /= b; }
192 | };
193 | 
194 | template <typename tag> struct dynamic_modnum {
195 | private:
196 | #if __cpp_inline_variables >= 201606
197 | 	// C++17 and up
198 | 	inline static int MOD_ = 0;
199 | 	inline static uint64_t BARRETT_M = 0;
200 | #else
201 | 	// NB: these must be initialized out of the class by hand:
202 | 	//   static int dynamic_modnum<tag>::MOD = 0;
203 | 	//   static int dynamic_modnum<tag>::BARRETT_M = 0;
204 | 	static int MOD_;
205 | 	static uint64_t BARRETT_M;
206 | #endif
207 | 
208 | public:
209 | 	// Make only the const-reference public, to force the use of set_mod
210 | 	static constexpr int const& MOD = MOD_;
211 | 
212 | 	// Barret reduction taken from KACTL:
213 | 	/**
214 | 	 * Author: Simon Lindholm
215 | 	 * Date: 2020-05-30
216 | 	 * License: CC0
217 | 	 * Source: https://en.wikipedia.org/wiki/Barrett_reduction
218 | 	 * Description: Compute $a \% b$ about 5 times faster than usual, where $b$ is constant but not known at compile time.
219 | 	 * Returns a value congruent to $a \pmod b$ in the range $[0, 2b)$.
220 | 	 * Status: proven correct, stress-tested
221 | 	 * Measured as having 4 times lower latency, and 8 times higher throughput, see stress-test.
222 | 	 * Details:
223 | 	 * More precisely, it can be proven that the result equals 0 only if $a = 0$,
224 | 	 * and otherwise lies in $[1, (1 + a/2^64) * b)$.
225 | 	 */
226 | 	static void set_mod(int mod) {
227 | 		assert(mod > 0);
228 | 		MOD_ = mod;
229 | 		BARRETT_M = (uint64_t(-1) / MOD);
230 | 	}
231 | 	static uint32_t barrett_reduce_partial(uint64_t a) {
232 | 		return uint32_t(a - uint64_t((__uint128_t(BARRETT_M) * a) >> 64) * MOD);
233 | 	}
234 | 	static int barrett_reduce(uint64_t a) {
235 | 		int32_t res = int32_t(barrett_reduce_partial(a) - MOD);
236 | 		return (res < 0) ? res + MOD : res;
237 | 	}
238 | 
239 | 	struct mod_reader {
240 | 		friend std::istream& operator >> (std::istream& i, mod_reader) {
241 | 			int mod; i >> mod;
242 | 			dynamic_modnum::set_mod(mod);
243 | 			return i;
244 | 		}
245 | 	};
246 | 	static mod_reader MOD_READER() {
247 | 		return mod_reader();
248 | 	}
249 | 
250 | private:
251 | 	int v;
252 | 
253 | public:
254 | 
255 | 	dynamic_modnum() : v(0) {}
256 | 	dynamic_modnum(int64_t v_) : v(int(v_ % MOD)) { if (v < 0) v += MOD; }
257 | 	explicit operator int() const { return v; }
258 | 	friend std::ostream& operator << (std::ostream& out, const dynamic_modnum& n) { return out << int(n); }
259 | 	friend std::istream& operator >> (std::istream& in, dynamic_modnum& n) { int64_t v_; in >> v_; n = dynamic_modnum(v_); return in; }
260 | 
261 | 	friend bool operator == (const dynamic_modnum& a, const dynamic_modnum& b) { return a.v == b.v; }
262 | 	friend bool operator != (const dynamic_modnum& a, const dynamic_modnum& b) { return a.v != b.v; }
263 | 
264 | 	dynamic_modnum inv() const {
265 | 		dynamic_modnum res;
266 | 		res.v = mod_inv_in_range(v, MOD);
267 | 		return res;
268 | 	}
269 | 	friend dynamic_modnum inv(const dynamic_modnum& m) { return m.inv(); }
270 | 	dynamic_modnum neg() const {
271 | 		dynamic_modnum res;
272 | 		res.v = v ? MOD-v : 0;
273 | 		return res;
274 | 	}
275 | 	friend dynamic_modnum neg(const dynamic_modnum& m) { return m.neg(); }
276 | 
277 | 	dynamic_modnum operator- () const {
278 | 		return neg();
279 | 	}
280 | 	dynamic_modnum operator+ () const {
281 | 		return dynamic_modnum(*this);
282 | 	}
283 | 
284 | 	dynamic_modnum& operator ++ () {
285 | 		v ++;
286 | 		if (v == MOD) v = 0;
287 | 		return *this;
288 | 	}
289 | 	dynamic_modnum& operator -- () {
290 | 		if (v == 0) v = MOD;
291 | 		v --;
292 | 		return *this;
293 | 	}
294 | 	dynamic_modnum& operator += (const dynamic_modnum& o) {
295 | 		v -= MOD-o.v;
296 | 		v = (v < 0) ? v + MOD : v;
297 | 		return *this;
298 | 	}
299 | 	dynamic_modnum& operator -= (const dynamic_modnum& o) {
300 | 		v -= o.v;
301 | 		v = (v < 0) ? v + MOD : v;
302 | 		return *this;
303 | 	}
304 | 	dynamic_modnum& operator *= (const dynamic_modnum& o) {
305 | 		v = barrett_reduce(int64_t(v) * int64_t(o.v));
306 | 		return *this;
307 | 	}
308 | 	dynamic_modnum& operator /= (const dynamic_modnum& o) {
309 | 		return *this *= o.inv();
310 | 	}
311 | 
312 | 	friend dynamic_modnum operator ++ (dynamic_modnum& a, int) { dynamic_modnum r = a; ++a; return r; }
313 | 	friend dynamic_modnum operator -- (dynamic_modnum& a, int) { dynamic_modnum r = a; --a; return r; }
314 | 	friend dynamic_modnum operator + (const dynamic_modnum& a, const dynamic_modnum& b) { return dynamic_modnum(a) += b; }
315 | 	friend dynamic_modnum operator - (const dynamic_modnum& a, const dynamic_modnum& b) { return dynamic_modnum(a) -= b; }
316 | 	friend dynamic_modnum operator * (const dynamic_modnum& a, const dynamic_modnum& b) { return dynamic_modnum(a) *= b; }
317 | 	friend dynamic_modnum operator / (const dynamic_modnum& a, const dynamic_modnum& b) { return dynamic_modnum(a) /= b; }
318 | };
319 | 
320 | template <typename T> struct mod_constraint {
321 | 	T v, mod;
322 | 
323 | 	friend mod_constraint operator & (mod_constraint a, mod_constraint b) {
324 | 		if (a.mod < b.mod) std::swap(a, b);
325 | 		if (b.mod == 1) return a;
326 | 
327 | 		extended_gcd_result<T> egcd = extended_gcd<T>(a.mod, b.mod);
328 | 		assert(a.v % egcd.gcd == b.v % egcd.gcd);
329 | 
330 | 		T extra = b.v - a.v % b.mod;
331 | 		extra /= egcd.gcd;
332 | 
333 | 		extra *= egcd.coeff_a;
334 | 		extra %= b.mod / egcd.gcd;
335 | 		extra += (extra < 0) ? b.mod / egcd.gcd : 0;
336 | 
337 | 		return mod_constraint{
338 | 			a.v + extra * a.mod,
339 | 			a.mod * (b.mod / egcd.gcd)
340 | 		};
341 | 	}
342 | };
343 | 


--------------------------------------------------------------------------------
/src/modnum.test.cpp:
--------------------------------------------------------------------------------
 1 | #include "modnum.hpp"
 2 | #include <catch2/catch_test_macros.hpp>
 3 | #include <numeric> // Include for std::lcm and std::gcd
 4 | 
 5 | TEST_CASE("Mod Constraint Regression Test", "[mod_constraint]") {
 6 |     for (int a_mod = 1; a_mod <= 10; ++a_mod) {
 7 |         for (int a_val = 0; a_val < a_mod; ++a_val) {
 8 |             for (int b_mod = 1; b_mod <= 10; ++b_mod) {
 9 |                 for (int b_val = 0; b_val < b_mod; ++b_val) {
10 |                     if (a_val % std::gcd(a_mod, b_mod) != b_val % std::gcd(a_mod, b_mod)) continue;
11 | 
12 |                     mod_constraint<int> a{a_val, a_mod};
13 |                     mod_constraint<int> b{b_val, b_mod};
14 | 
15 |                     mod_constraint<int> r = a & b;
16 | 
17 |                     // Check that r.mod is the LCM of a.mod and b.mod
18 |                     int lcm_ab = std::lcm(a.mod, b.mod);
19 |                     REQUIRE(r.mod == lcm_ab);
20 | 
21 |                     // Check that r.v % a.mod == a.v (and likewise for b)
22 |                     REQUIRE(r.v % a.mod == a.v);
23 |                     REQUIRE(r.v % b.mod == b.v);
24 | 
25 |                     // Check that r.v is between 0 and r.mod
26 |                     REQUIRE(r.v >= 0);
27 |                     REQUIRE(r.v < r.mod);
28 |                 }
29 |             }
30 |         }
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/nim_prod.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <utility>
 4 | #include <cstdint>
 5 | 
 6 | // Usage:
 7 | //   constexpr nim_prod_t nimProd;
 8 | // C++20:
 9 | //   constinit nim_prod_t nimProd;
10 | struct nim_prod_t {
11 | 	uint64_t bit_prod[64][64]{};
12 | 	constexpr nim_prod_t() {
13 | 		for (int i = 0; i < 64; i++) {
14 | 			for (int j = 0; j < 64; j++) {
15 | 				if ((i & j) == 0) {
16 | 					bit_prod[i][j] = uint64_t(1) << (i|j);
17 | 				} else {
18 | 					int a = (i&j) & -(i&j);
19 | 					bit_prod[i][j] = bit_prod[i ^ a][j] ^ bit_prod[(i ^ a) | (a-1)][(j ^ a) | (i & (a-1))];
20 | 				}
21 | 			}
22 | 		}
23 | 	}
24 | 	constexpr uint64_t operator () (uint64_t x, uint64_t y) const {
25 | 		uint64_t res = 0;
26 | 		for (int i = 0; i < 64 && (x >> i); i++)
27 | 			if ((x >> i) & 1)
28 | 				for (int j = 0; j < 64 && (y >> j); j++)
29 | 					if ((y >> j) & 1)
30 | 						res ^= bit_prod[i][j];
31 | 		return res;
32 | 	}
33 | };
34 | 


--------------------------------------------------------------------------------
/src/optimize.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #pragma GCC optimize("unroll-loops")
 3 | #pragma GCC optimize("Ofast")
 4 | #pragma GCC target("sse,sse2,sse3,ssse3,popcnt,abm,mmx") // Safe for yandex
 5 | 
 6 | #pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,bmi,bmi2,mmx,avx,avx2,fma") // Requires AVX2
 7 | 
 8 | // See https://codeforces.com/blog/entry/96344
 9 | 
10 | inline void disable_denormal_floats() {
11 | 	// https://stackoverflow.com/a/8217313
12 | 	#define CSR_FLUSH_TO_ZERO         (1 << 15)
13 | 	unsigned csr = __builtin_ia32_stmxcsr();
14 | 	csr |= CSR_FLUSH_TO_ZERO;
15 | 	__builtin_ia32_ldmxcsr(csr);
16 | 	#undef CSR_FLUSH_TO_ZERO
17 | }
18 | 


--------------------------------------------------------------------------------
/src/order_statistic.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <ext/pb_ds/assoc_container.hpp>
 4 | 
 5 | template <typename K, typename V, typename Comp = std::less<K>>
 6 | using order_statistic_map = __gnu_pbds::tree<
 7 | 	K, V, Comp,
 8 | 	__gnu_pbds::rb_tree_tag,
 9 | 	__gnu_pbds::tree_order_statistics_node_update
10 | >;
11 | 
12 | template <typename K, typename Comp = std::less<K>>
13 | using order_statistic_set = order_statistic_map<K, __gnu_pbds::null_type, Comp>;
14 | 
15 | // Supports
16 | //  auto iterator = order_statistic_set().find_by_order(idx); // (0-indexed)
17 | //  int num_strictly_smaller = order_statistic_set().order_of_key(key);
18 | 


--------------------------------------------------------------------------------
/src/perm_tree.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <vector>
  4 | #include <array>
  5 | #include <cassert>
  6 | 
  7 | class PermTree {
  8 | public:
  9 | 	enum class NodeType {
 10 | 		LEAF,
 11 | 		INCR,
 12 | 		DECR,
 13 | 		FULL,
 14 | 		PARTIAL,
 15 | 	};
 16 | 
 17 | 	struct Node {
 18 | 		std::array<int, 2> c;
 19 | 		NodeType type;
 20 | 		int l, r, lo, hi;
 21 | 	};
 22 | 
 23 | 	std::vector<Node> nodes;
 24 | 	int root = -1;
 25 | 
 26 | 	PermTree() {}
 27 | 	Node& operator [] (int idx) { return nodes[idx]; }
 28 | 	const Node& operator [] (int idx) const { return nodes[idx]; }
 29 | 
 30 | 	int size() const { return int(nodes.size()); }
 31 | 
 32 | 	PermTree(const std::vector<int>& A) : nodes(int(A.size())*2-1) {
 33 | 		int N = int(A.size());
 34 | 		std::vector<int> nxt_earlier(N);
 35 | 		std::vector<int> prv_earlier(N);
 36 | 		for (int i = 0; i < N; i++) {
 37 | 			nxt_earlier[i] = i+1;
 38 | 			prv_earlier[i] = i-1;
 39 | 		}
 40 | 		for (int i = N-1; i >= 0; i--) {
 41 | 			int a = A[i];
 42 | 			int p = prv_earlier[a];
 43 | 			int n = nxt_earlier[a];
 44 | 			if (p != -1) nxt_earlier[p] = n;
 45 | 			if (n != N) prv_earlier[n] = p;
 46 | 		}
 47 | 
 48 | 		struct cnd_t {
 49 | 			int left;
 50 | 			int lo;
 51 | 			int lo_gap;
 52 | 			int hi;
 53 | 			int hi_gap;
 54 | 			int node;
 55 | 		};
 56 | 
 57 | 		std::vector<cnd_t> stk; stk.reserve(N);
 58 | 
 59 | 		for (int i = 0; i < N; i++) {
 60 | 			int a = A[i];
 61 | 			while (true) {
 62 | 				if (!stk.empty() && (a < stk.back().lo_gap || a > stk.back().hi_gap)) {
 63 | 					assert(stk.size() >= 2);
 64 | 					stk.end()[-2].lo = std::min(stk.end()[-2].lo, stk.back().lo);
 65 | 					stk.end()[-2].hi = std::max(stk.end()[-2].hi, stk.back().hi);
 66 | 
 67 | 					int n = 2 * stk.back().left - 1;
 68 | 					nodes[n].c = {stk.end()[-2].node, stk.end()[-1].node};
 69 | 					nodes[n].type = NodeType::PARTIAL;
 70 | 					nodes[n].l = stk.end()[-2].left;
 71 | 					nodes[n].r = i-1;
 72 | 					nodes[n].lo = stk.end()[-2].lo;
 73 | 					nodes[n].hi = stk.end()[-2].hi;
 74 | 
 75 | 					stk.pop_back();
 76 | 
 77 | 					stk.back().node = n;
 78 | 				} else {
 79 | 					break;
 80 | 				}
 81 | 			}
 82 | 
 83 | 			stk.push_back({i, a, prv_earlier[a]+1, a, nxt_earlier[a]-1, 2*i});
 84 | 			nodes[2*i].type = NodeType::LEAF;
 85 | 			nodes[2*i].c = {-1, -1};
 86 | 			nodes[2*i].l = nodes[2*i].r = i;
 87 | 			nodes[2*i].lo = nodes[2*i].hi = a;
 88 | 
 89 | 			while (stk.size() >= 2 && std::max(stk.back().hi, stk.end()[-2].hi) - std::min(stk.back().lo, stk.end()[-2].lo) == i - stk.end()[-2].left) {
 90 | 				// merge these two nodes into one
 91 | 				stk.end()[-2].lo = std::min(stk.end()[-2].lo, stk.back().lo);
 92 | 				stk.end()[-2].hi = std::max(stk.end()[-2].hi, stk.back().hi);
 93 | 
 94 | 				int n = 2 * stk.back().left - 1;
 95 | 				nodes[n].c = {stk.end()[-2].node, stk.end()[-1].node};
 96 | 				if (stk.end()[-2].lo == stk.end()[-1].lo) {
 97 | 					nodes[n].type = NodeType::DECR;
 98 | 				} else if (stk.end()[-2].hi == stk.end()[-1].hi) {
 99 | 					nodes[n].type = NodeType::INCR;
100 | 				} else {
101 | 					nodes[n].type = NodeType::FULL;
102 | 				}
103 | 				nodes[n].l = stk.end()[-2].left;
104 | 				nodes[n].r = i;
105 | 				nodes[n].lo = stk.end()[-2].lo;
106 | 				nodes[n].hi = stk.end()[-2].hi;
107 | 
108 | 				stk.pop_back();
109 | 				stk.back().node = n;
110 | 			}
111 | 		}
112 | 
113 | 		assert(stk.size() == 1);
114 | 		root = stk.back().node;
115 | 	}
116 | };
117 | 


--------------------------------------------------------------------------------
/src/perm_tree.test.cpp:
--------------------------------------------------------------------------------
 1 | #include "perm_tree.hpp"
 2 | 
 3 | #include <catch2/catch_test_macros.hpp>
 4 | #include <bits/stdc++.h>
 5 | 
 6 | void check_tree(std::vector<int> A) {
 7 | 	int N = int(A.size());
 8 | 	std::vector<std::pair<std::array<int, 2>, std::array<int, 2>>> actual_ranges;
 9 | 	for (int i = 0; i < N; i++) {
10 | 		int lo = A[i], hi = A[i];
11 | 		for (int j = i; j < N; j++) {
12 | 			lo = std::min(lo, A[j]);
13 | 			hi = std::max(hi, A[j]);
14 | 			assert(hi - lo >= j - i);
15 | 			if (hi - lo == j - i) {
16 | 				actual_ranges.push_back({{i, j}, {lo, hi}});
17 | 			}
18 | 		}
19 | 	}
20 | 
21 | 	PermTree tree(A);
22 | 	std::vector<std::pair<std::array<int, 2>, std::array<int, 2>>> computed_ranges;
23 | 	for (int n = 0; n < tree.size(); n++) {
24 | 		const auto& node = tree[n];
25 | 		if (node.type != PermTree::NodeType::PARTIAL) {
26 | 			computed_ranges.push_back({{node.l, node.r}, {node.lo, node.hi}});
27 | 		}
28 | 		if (node.type == PermTree::NodeType::LEAF) {
29 | 			REQUIRE(node.c[0] == -1);
30 | 			REQUIRE(node.c[1] == -1);
31 | 			REQUIRE(node.l == n/2);
32 | 			REQUIRE(node.r == n/2);
33 | 			REQUIRE(node.lo == A[n/2]);
34 | 			REQUIRE(node.hi == A[n/2]);
35 | 			continue;
36 | 		}
37 | 		REQUIRE(node.c[0] != -1);
38 | 		REQUIRE(node.c[1] != -1);
39 | 		REQUIRE(node.l == tree[node.c[0]].l);
40 | 		REQUIRE(node.r == tree[node.c[1]].r);
41 | 		REQUIRE(tree[node.c[0]].r + 1 == tree[node.c[1]].l);
42 | 		REQUIRE(node.lo == std::min(tree[node.c[0]].lo, tree[node.c[1]].lo));
43 | 		REQUIRE(node.hi == std::max(tree[node.c[0]].hi, tree[node.c[1]].hi));
44 | 		if (node.type == PermTree::NodeType::FULL) {
45 | 			// There should be at least 3 pieces
46 | 			REQUIRE((
47 | 				tree[node.c[0]].type == PermTree::NodeType::PARTIAL
48 | 				|| tree[node.c[1]].type == PermTree::NodeType::PARTIAL
49 | 			));
50 | 		}
51 | 		if (node.type == PermTree::NodeType::INCR) {
52 | 			REQUIRE(tree[node.c[0]].hi + 1 == tree[node.c[1]].lo);
53 | 
54 | 			REQUIRE(tree[node.c[1]].type != PermTree::NodeType::INCR);
55 | 			for (int cur = node.c[0]; tree[cur].type == PermTree::NodeType::INCR; cur = tree[cur].c[0]) {
56 | 				int ch = tree[cur].c[1];
57 | 				computed_ranges.push_back({{tree[ch].l, node.r}, {tree[ch].lo, node.hi}});
58 | 			}
59 | 		}
60 | 		if (node.type == PermTree::NodeType::DECR) {
61 | 			REQUIRE(tree[node.c[0]].lo - 1 == tree[node.c[1]].hi);
62 | 
63 | 			REQUIRE(tree[node.c[1]].type != PermTree::NodeType::DECR);
64 | 			for (int cur = node.c[0]; tree[cur].type == PermTree::NodeType::DECR; cur = tree[cur].c[0]) {
65 | 				int ch = tree[cur].c[1];
66 | 				computed_ranges.push_back({{tree[ch].l, node.r}, {node.lo, tree[ch].hi}});
67 | 			}
68 | 		}
69 | 	}
70 | 	std::sort(computed_ranges.begin(), computed_ranges.end());
71 | 	REQUIRE(actual_ranges == computed_ranges);
72 | }
73 | 
74 | TEST_CASE("Permutation Tree", "[perm_tree]") {
75 | 	for (int N = 1; N <= 7; N++) {
76 | 		std::vector<int> A(N);
77 | 		std::iota(A.begin(), A.end(), 0);
78 | 		do {
79 | 			check_tree(A);
80 | 		} while (next_permutation(A.begin(), A.end()));
81 | 	}
82 | }
83 | 


--------------------------------------------------------------------------------
/src/quaternion_hurwitz.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <utility>
  4 | #include <array>
  5 | #include <tuple>
  6 | #include <iostream>
  7 | 
  8 | template<typename num = int>
  9 | struct hurwitz_quaternion {
 10 | 	// we store the doubled quaternion
 11 | 	num s,x,y,z;
 12 | 	hurwitz_quaternion() : s(0), x(0), y(0), z(0) {}
 13 | 	hurwitz_quaternion(num v) : s(2*v), x(0), y(0), z(0) {}
 14 | 	hurwitz_quaternion(num s_, num x_, num y_, num z_) : s(2*s_), x(2*x_), y(2*y_), z(2*z_) {}
 15 | 	struct doubled_coords_tag {};
 16 | 	hurwitz_quaternion(doubled_coords_tag, num s_, num x_, num y_, num z_) : s(s_), x(x_), y(y_), z(z_) {
 17 | 		assert((s & 1) == (x & 1) && (s & 1) == (y & 1) && (s & 1) == (z & 1));
 18 | 	}
 19 | 	friend std::ostream& operator << (std::ostream& o, const hurwitz_quaternion& q) {
 20 | 		o << double(q.s)/2;
 21 | 		{
 22 | 			std::ios_base::fmtflags f(o.flags());
 23 | 			o << std::showpos << double(q.x)/2 << "i" << double(q.y)/2 << "j" << double(q.z)/2 << "k";
 24 | 			o.flags(f);
 25 | 		}
 26 | 		return o;
 27 | 	}
 28 | 
 29 | 	explicit operator bool() const {
 30 | 		return s || x || y || z;
 31 | 	}
 32 | 
 33 | 	friend bool operator == (const hurwitz_quaternion& a, const hurwitz_quaternion& b) {
 34 | 		return std::tie(a.s,a.x,a.y,a.z) == std::tie(b.s,b.x,b.y,b.z);
 35 | 	}
 36 | 	friend bool operator != (const hurwitz_quaternion& a, const hurwitz_quaternion& b) { return !(a == b); }
 37 | 
 38 | 	num real_doubled() const {
 39 | 		return s;
 40 | 	}
 41 | 	num real() const {
 42 | 		assert(!(s & 1));
 43 | 		return s >> 1;
 44 | 	}
 45 | 	std::array<num, 3> imag_doubled() const {
 46 | 		return {x, y, z};
 47 | 	}
 48 | 	std::array<num, 3> imag() const {
 49 | 		assert(!(s & 1));
 50 | 		return {x>>1, y>>1, z>>1};
 51 | 	}
 52 | 	std::array<num, 4> coords_doubled() const {
 53 | 		return {s, x, y, z};
 54 | 	}
 55 | 	std::array<num, 4> coords() const {
 56 | 		assert(!(s & 1));
 57 | 		return {s>>1, x>>1, y>>1, z>>1};
 58 | 	}
 59 | 
 60 | 	friend num norm(const hurwitz_quaternion& q) {
 61 | 		return (q.s * q.s + q.x * q.x + q.y * q.y + q.z * q.z) >> 2;
 62 | 	}
 63 | 	friend hurwitz_quaternion conj(const hurwitz_quaternion& q) {
 64 | 		return hurwitz_quaternion(doubled_coords_tag{}, q.s, -q.x, -q.y, -q.z);
 65 | 	}
 66 | 
 67 | 	friend hurwitz_quaternion operator + (const hurwitz_quaternion& q) {
 68 | 		return hurwitz_quaternion(doubled_coords_tag{}, +q.s, +q.x, +q.y, +q.z);
 69 | 	}
 70 | 	friend hurwitz_quaternion operator - (const hurwitz_quaternion& q) {
 71 | 		return hurwitz_quaternion(doubled_coords_tag{}, -q.s, -q.x, -q.y, -q.z);
 72 | 	}
 73 | 
 74 | 	hurwitz_quaternion& operator += (const hurwitz_quaternion& o) {
 75 | 		s += o.s;
 76 | 		x += o.x;
 77 | 		y += o.y;
 78 | 		z += o.z;
 79 | 		return *this;
 80 | 	}
 81 | 	friend hurwitz_quaternion operator + (const hurwitz_quaternion& a, const hurwitz_quaternion& b) {
 82 | 		return hurwitz_quaternion(doubled_coords_tag{}, a.s + b.s, a.x + b.x, a.y + b.y, a.z + b.z);
 83 | 	}
 84 | 	hurwitz_quaternion& operator -= (const hurwitz_quaternion& o) {
 85 | 		s -= o.s;
 86 | 		x -= o.x;
 87 | 		y -= o.y;
 88 | 		z -= o.z;
 89 | 		return *this;
 90 | 	}
 91 | 	friend hurwitz_quaternion operator - (const hurwitz_quaternion& a, const hurwitz_quaternion& b) {
 92 | 		return hurwitz_quaternion(doubled_coords_tag{}, a.s - b.s, a.x - b.x, a.y - b.y, a.z - b.z);
 93 | 	}
 94 | 
 95 | 	friend hurwitz_quaternion operator * (const num& a, const hurwitz_quaternion& q) {
 96 | 		return hurwitz_quaternion(doubled_coords_tag{}, a*q.s, a*q.x, a*q.y, a*q.z);
 97 | 	}
 98 | 	friend hurwitz_quaternion operator * (const hurwitz_quaternion& q, const num& a) {
 99 | 		return hurwitz_quaternion(doubled_coords_tag{}, q.s*a, q.x*a, q.y*a, q.z*a);
100 | 	}
101 | 	hurwitz_quaternion& operator *= (const num& a) {
102 | 		s *= a;
103 | 		x *= a;
104 | 		y *= a;
105 | 		z *= a;
106 | 		return *this;
107 | 	}
108 | 
109 | 	friend hurwitz_quaternion operator * (const hurwitz_quaternion& a, const hurwitz_quaternion& b) {
110 | 		return hurwitz_quaternion(
111 | 			doubled_coords_tag{},
112 | 			(a.s * b.s - a.x * b.x - a.y * b.y - a.z * b.z) >> 1,
113 | 			(a.s * b.x + a.x * b.s + a.y * b.z - a.z * b.y) >> 1,
114 | 			(a.s * b.y + a.y * b.s + a.z * b.x - a.x * b.z) >> 1,
115 | 			(a.s * b.z + a.z * b.s + a.x * b.y - a.y * b.x) >> 1
116 | 		);
117 | 	}
118 | 	hurwitz_quaternion& operator *= (const hurwitz_quaternion& o) {
119 | 		return *this = *this * o;
120 | 	}
121 | 
122 | 	struct div_t {
123 | 		hurwitz_quaternion quot, rem;
124 | 	};
125 | 	// a = b * quot + rem
126 | 	friend div_t right_div(const hurwitz_quaternion& a, const hurwitz_quaternion& b) {
127 | 		hurwitz_quaternion numer = conj(b) * a;
128 | 		num denom = norm(b);
129 | 
130 | 		auto floor_div = [](num u, num v) -> num {
131 | 			if ((u^v) >= 0) {
132 | 				return u/v;
133 | 			} else {
134 | 				auto res = std::div(u, v);
135 | 				return res.quot - bool(res.rem);
136 | 			}
137 | 		};
138 | 		num s = floor_div(numer.s, denom);
139 | 		num x = floor_div(numer.x, denom);
140 | 		num y = floor_div(numer.y, denom);
141 | 		num z = floor_div(numer.z, denom);
142 | 
143 | 		hurwitz_quaternion q_odd(doubled_coords_tag{}, s | 1, x | 1, y | 1, z | 1);
144 | 		hurwitz_quaternion r_odd = a - b * q_odd;
145 | 		hurwitz_quaternion q_even(doubled_coords_tag{}, (s+1)&~num(1), (x+1)&~num(1), (y+1)&~num(1), (z+1)&~num(1));
146 | 		hurwitz_quaternion r_even = a - b * q_even;
147 | 		div_t res = norm(r_odd) < norm(r_even) ? div_t{q_odd, r_odd} : div_t{q_even, r_even};
148 | 		assert(norm(res.rem) < norm(b));
149 | 		return res;
150 | 	}
151 | 
152 | 	// a = ga', b = gb'
153 | 	friend hurwitz_quaternion right_gcd(hurwitz_quaternion a, hurwitz_quaternion b) {
154 | 		while (a) {
155 | 			b = right_div(b, a).rem;
156 | 			std::swap(a, b);
157 | 		}
158 | 		return b;
159 | 	}
160 | };
161 | 


--------------------------------------------------------------------------------
/src/reverse_comparator.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <functional>
 4 | #include <utility>
 5 | 
 6 | template <typename F> struct reverse_comparator_t {
 7 | 	F f;
 8 | 	template <typename Arg1, typename Arg2> constexpr bool operator() (Arg1&& arg1, Arg2&& arg2) & {
 9 | 		return f(std::forward<Arg2>(arg2), std::forward<Arg1>(arg1));
10 | 	}
11 | 	template <typename Arg1, typename Arg2> constexpr bool operator() (Arg1&& arg1, Arg2&& arg2) const& {
12 | 		return f(std::forward<Arg2>(arg2), std::forward<Arg1>(arg1));
13 | 	}
14 | 	template <typename Arg1, typename Arg2> constexpr bool operator() (Arg1&& arg1, Arg2&& arg2) && {
15 | 		return std::move(f)(std::forward<Arg2>(arg2), std::forward<Arg1>(arg1));
16 | 	}
17 | 	template <typename Arg1, typename Arg2> constexpr bool operator() (Arg1&& arg1, Arg2&& arg2) const&& {
18 | 		return std::move(f)(std::forward<Arg2>(arg2), std::forward<Arg1>(arg1));
19 | 	}
20 | };
21 | 
22 | template <typename F> constexpr reverse_comparator_t<std::decay_t<F>> reverse_comparator(F&& f) {
23 | 	return { std::forward<F>(f) };
24 | }
25 | 


--------------------------------------------------------------------------------
/src/rmq.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <functional>
  4 | #include <vector>
  5 | #include <cassert>
  6 | #include <cstdint>
  7 | 
  8 | template <typename T, class Compare = std::less<T>> class RangeMinQuery : private Compare {
  9 | 	static const int BUCKET_SIZE = 32;
 10 | 	static const int BUCKET_SIZE_LOG = 5;
 11 | 	static_assert(BUCKET_SIZE == (1 << BUCKET_SIZE_LOG), "BUCKET_SIZE should be a power of 2");
 12 | 	static const int CACHE_LINE_ALIGNMENT = 64;
 13 | 	int n = 0;
 14 | 	std::vector<T> data;
 15 | 	std::vector<T> pref_data;
 16 | 	std::vector<T> suff_data;
 17 | 	std::vector<T> sparse_table;
 18 | 	std::vector<uint32_t> range_mask;
 19 | 
 20 | private:
 21 | 	int num_buckets() const {
 22 | 		return n >> BUCKET_SIZE_LOG;
 23 | 	}
 24 | 	int num_levels() const {
 25 | 		return num_buckets() ? 32 - __builtin_clz(num_buckets()) : 0;
 26 | 	}
 27 | 	int sparse_table_size() const {
 28 | 		return num_buckets() * num_levels();
 29 | 	}
 30 | private:
 31 | 	const T& min(const T& a, const T& b) const {
 32 | 		return Compare::operator()(a, b) ? a : b;
 33 | 	}
 34 | 	void setmin(T& a, const T& b) const {
 35 | 		if (Compare::operator()(b, a)) a = b;
 36 | 	}
 37 | 
 38 | 	template <typename Vec> static int get_size(const Vec& v) { using std::size; return int(size(v)); }
 39 | 
 40 | public:
 41 | 	RangeMinQuery() {}
 42 | 	template <typename Vec> explicit RangeMinQuery(const Vec& data_, const Compare& comp_ = Compare())
 43 | 		: Compare(comp_)
 44 | 		, n(get_size(data_))
 45 | 		, data(n)
 46 | 		, pref_data(n)
 47 | 		, suff_data(n)
 48 | 		, sparse_table(sparse_table_size())
 49 | 		, range_mask(n)
 50 | 	{
 51 | 		for (int i = 0; i < n; i++) data[i] = data_[i];
 52 | 		for (int i = 0; i < n; i++) {
 53 | 			if (i & (BUCKET_SIZE-1)) {
 54 | 				uint32_t m = range_mask[i-1];
 55 | 				while (m && !Compare::operator()(data[(i | (BUCKET_SIZE-1)) - __builtin_clz(m)], data[i])) {
 56 | 					m -= uint32_t(1) << (BUCKET_SIZE - 1 - __builtin_clz(m));
 57 | 				}
 58 | 				m |= uint32_t(1) << (i & (BUCKET_SIZE - 1));
 59 | 				range_mask[i] = m;
 60 | 			} else {
 61 | 				range_mask[i] = 1;
 62 | 			}
 63 | 		}
 64 | 		for (int i = 0; i < n; i++) {
 65 | 			pref_data[i] = data[i];
 66 | 			if (i & (BUCKET_SIZE-1)) {
 67 | 				setmin(pref_data[i], pref_data[i-1]);
 68 | 			}
 69 | 		}
 70 | 		for (int i = n-1; i >= 0; i--) {
 71 | 			suff_data[i] = data[i];
 72 | 			if (i+1 < n && ((i+1) & (BUCKET_SIZE-1))) {
 73 | 				setmin(suff_data[i], suff_data[i+1]);
 74 | 			}
 75 | 		}
 76 | 		for (int i = 0; i < num_buckets(); i++) {
 77 | 			sparse_table[i] = data[i * BUCKET_SIZE];
 78 | 			for (int v = 1; v < BUCKET_SIZE; v++) {
 79 | 				setmin(sparse_table[i], data[i * BUCKET_SIZE + v]);
 80 | 			}
 81 | 		}
 82 | 		for (int l = 0; l+1 < num_levels(); l++) {
 83 | 			for (int i = 0; i + (1 << (l+1)) <= num_buckets(); i++) {
 84 | 				sparse_table[(l+1) * num_buckets() + i] = min(sparse_table[l * num_buckets() + i], sparse_table[l * num_buckets() + i + (1 << l)]);
 85 | 			}
 86 | 		}
 87 | 	}
 88 | 
 89 | 	T query(int l, int r) const {
 90 | 		assert(l <= r);
 91 | 		int bucket_l = (l >> BUCKET_SIZE_LOG);
 92 | 		int bucket_r = (r >> BUCKET_SIZE_LOG);
 93 | 		if (bucket_l == bucket_r) {
 94 | 			uint32_t msk = range_mask[r] & ~((uint32_t(1) << (l & (BUCKET_SIZE-1))) - 1);
 95 | 			int ind = (l & ~(BUCKET_SIZE-1)) + __builtin_ctz(msk);
 96 | 			return data[ind];
 97 | 		} else {
 98 | 			T ans = min(suff_data[l], pref_data[r]);
 99 | 			bucket_l++;
100 | 			if (bucket_l < bucket_r) {
101 | 				int level = (32 - __builtin_clz(bucket_r - bucket_l)) - 1;
102 | 				setmin(ans, sparse_table[level * num_buckets() + bucket_l]);
103 | 				setmin(ans, sparse_table[level * num_buckets() + bucket_r - (1 << level)]);
104 | 			}
105 | 			return ans;
106 | 		}
107 | 	}
108 | };
109 | 
110 | template <typename T> using RangeMaxQuery = RangeMinQuery<T, std::greater<T>>;
111 | 


--------------------------------------------------------------------------------
/src/rmq.test.cpp:
--------------------------------------------------------------------------------
 1 | #include <catch2/catch_test_macros.hpp>
 2 | #include <catch2/catch_get_random_seed.hpp>
 3 | 
 4 | #include "rmq.hpp"
 5 | 
 6 | #include <vector>
 7 | #include <random>
 8 | 
 9 | TEST_CASE("RangeMinQuery", "[rmq]") {
10 | 	std::mt19937 mt(Catch::getSeed());
11 | 	for (int N : {1, 2, 3, 5, 10, 20, 33, 48, 100, 163, 512}) {
12 | 		std::vector<std::pair<int, int>> data(N);
13 | 		for (int i = 0; i < N; i++) {
14 | 			data[i] = {mt(), i};
15 | 		}
16 | 
17 | 		RangeMinQuery<std::pair<int, int>> minQ(data);
18 | 		RangeMaxQuery<std::pair<int, int>> maxQ(data);
19 | 
20 | 		for (int l = 0; l < N; l++) {
21 | 			std::pair<int, int> cur_min = data[l];
22 | 			std::pair<int, int> cur_max = data[l];
23 | 			for (int r = l; r < N; r++) {
24 | 				cur_min = min(cur_min, data[r]);
25 | 				REQUIRE(minQ.query(l, r) == cur_min);
26 | 				cur_max = max(cur_max, data[r]);
27 | 				REQUIRE(maxQ.query(l, r) == cur_max);
28 | 			}
29 | 		}
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/src/seg_tree.hpp:
--------------------------------------------------------------------------------
  1 | #include <cassert>
  2 | #include <array>
  3 | #include <ostream>
  4 | 
  5 | namespace seg_tree {
  6 | 
  7 | // Floor of log_2(a); index of highest 1-bit
  8 | inline int floor_log_2(int a) {
  9 | 	return a ? (8 * sizeof(a)) - 1 - __builtin_clz(a) : -1;
 10 | }
 11 | 
 12 | inline int ceil_log_2(int a) {
 13 | 	return a ? floor_log_2(2*a-1) : -1;
 14 | }
 15 | 
 16 | inline int next_pow_2(int a) {
 17 | 	return 1 << ceil_log_2(a);
 18 | }
 19 | 
 20 | struct point {
 21 | 	int a;
 22 | 	point() : a(0) {}
 23 | 	explicit point(int a_) : a(a_) { assert(a >= -1); }
 24 | 
 25 | 	explicit operator bool () { return bool(a); }
 26 | 
 27 | 	// This is useful so you can directly do array indices
 28 | 	/* implicit */ operator int() const { return a; }
 29 | 
 30 | 	point c(bool z) const {
 31 | 		return point((a<<1)|z);
 32 | 	}
 33 | 
 34 | 	point operator [] (bool z) const {
 35 | 		return c(z);
 36 | 	}
 37 | 
 38 | 	point p() const {
 39 | 		return point(a>>1);
 40 | 	}
 41 | 
 42 | 	friend std::ostream& operator << (std::ostream& o, const point& p) { return o << int(p); }
 43 | 
 44 | 	template <typename F> void for_each(F f) const {
 45 | 		for (int v = a; v > 0; v >>= 1) {
 46 | 			f(point(v));
 47 | 		}
 48 | 	}
 49 | 
 50 | 	template <typename F> void for_each_down(F f) const {
 51 | 		// strictly greater than 0
 52 | 		for (int L = floor_log_2(a); L >= 0; L--) {
 53 | 			f(point(a >> L));
 54 | 		}
 55 | 	}
 56 | 
 57 | 	template <typename F> void for_each_up(F f) const {
 58 | 		for (int v = a; v > 0; v >>= 1) {
 59 | 			f(point(v));
 60 | 		}
 61 | 	}
 62 | 
 63 | 	template <typename F> void for_parents_down(F f) const {
 64 | 		// strictly greater than 0
 65 | 		for (int L = floor_log_2(a); L > 0; L--) {
 66 | 			f(point(a >> L));
 67 | 		}
 68 | 	}
 69 | 
 70 | 	template <typename F> void for_parents_up(F f) const {
 71 | 		for (int v = a >> 1; v > 0; v >>= 1) {
 72 | 			f(point(v));
 73 | 		}
 74 | 	}
 75 | 
 76 | 	point& operator ++ () { ++a; return *this; }
 77 | 	point operator ++ (int) { return point(a++); }
 78 | 	point& operator -- () { --a; return *this; }
 79 | 	point operator -- (int) { return point(a--); }
 80 | };
 81 | 
 82 | struct range {
 83 | 	int a, b;
 84 | 	range() : a(1), b(1) {}
 85 | 	range(int a_, int b_) : a(a_), b(b_) {
 86 | 		assert(1 <= a && a <= b && b <= 2 * a);
 87 | 	}
 88 | 	explicit range(std::array<int, 2> r) : range(r[0], r[1]) {}
 89 | 
 90 | 	explicit operator std::array<int, 2>() const {
 91 | 		return {a,b};
 92 | 	}
 93 | 
 94 | 	const int& operator[] (bool z) const {
 95 | 		return z ? b : a;
 96 | 	}
 97 | 
 98 | 	friend std::ostream& operator << (std::ostream& o, const range& r) { return o << "[" << r.a << ".." << r.b << ")"; }
 99 | 
100 | 	// Iterate over the range from outside-in.
101 | 	//   Calls f(point a)
102 | 	template <typename F> void for_each(F f) const {
103 | 		for (int x = a, y = b; x < y; x >>= 1, y >>= 1) {
104 | 			if (x & 1) f(point(x++));
105 | 			if (y & 1) f(point(--y));
106 | 		}
107 | 	}
108 | 
109 | 	// Iterate over the range from outside-in.
110 | 	//   Calls f(point a, bool is_right)
111 | 	template <typename F> void for_each_with_side(F f) const {
112 | 		for (int x = a, y = b; x < y; x >>= 1, y >>= 1) {
113 | 			if (x & 1) f(point(x++), false);
114 | 			if (y & 1) f(point(--y), true);
115 | 		}
116 | 	}
117 | 
118 | 	// Iterate over the range from left to right.
119 | 	//    Calls f(point)
120 | 	template <typename F> void for_each_l_to_r(F f) const {
121 | 		int anc_depth = floor_log_2((a-1) ^ b);
122 | 		int anc_msk = (1 << anc_depth) - 1;
123 | 		for (int v = (-a) & anc_msk; v; v &= v-1) {
124 | 			int i = __builtin_ctz(v);
125 | 			f(point(((a-1) >> i) + 1));
126 | 		}
127 | 		for (int v = b & anc_msk; v; ) {
128 | 			int i = floor_log_2(v);
129 | 			f(point((b >> i) - 1));
130 | 			v ^= (1 << i);
131 | 		}
132 | 	}
133 | 
134 | 	// Iterate over the range from right to left.
135 | 	//    Calls f(point)
136 | 	template <typename F> void for_each_r_to_l(F f) const {
137 | 		int anc_depth = floor_log_2((a-1) ^ b);
138 | 		int anc_msk = (1 << anc_depth) - 1;
139 | 		for (int v = b & anc_msk; v; v &= v-1) {
140 | 			int i = __builtin_ctz(v);
141 | 			f(point((b >> i) - 1));
142 | 		}
143 | 		for (int v = (-a) & anc_msk; v; ) {
144 | 			int i = floor_log_2(v);
145 | 			f(point(((a-1) >> i) + 1));
146 | 			v ^= (1 << i);
147 | 		}
148 | 	}
149 | 
150 | 	template <typename F> void for_parents_down(F f) const {
151 | 		int x = a, y = b;
152 | 		if ((x ^ y) > x) { x <<= 1, std::swap(x, y); }
153 | 		int dx = __builtin_ctz(x);
154 | 		int dy = __builtin_ctz(y);
155 | 		int anc_depth = floor_log_2((x-1) ^ y);
156 | 		for (int i = floor_log_2(x); i > dx; i--) {
157 | 			f(point(x >> i));
158 | 		}
159 | 		for (int i = anc_depth; i > dy; i--) {
160 | 			f(point(y >> i));
161 | 		}
162 | 	}
163 | 
164 | 	template <typename F> void for_parents_up(F f) const {
165 | 		int x = a, y = b;
166 | 		if ((x ^ y) > x) { x <<= 1, std::swap(x, y); }
167 | 		int dx = __builtin_ctz(x);
168 | 		int dy = __builtin_ctz(y);
169 | 		int anc_depth = floor_log_2((x-1) ^ y);
170 | 		for (int i = dx+1; i <= anc_depth; i++) {
171 | 			f(point(x >> i));
172 | 		}
173 | 		for (int v = y >> (dy+1); v; v >>= 1) {
174 | 			f(point(v));
175 | 		}
176 | 	}
177 | };
178 | 
179 | struct in_order_layout {
180 | 	// Alias them in for convenience
181 | 	using point = seg_tree::point;
182 | 	using range = seg_tree::range;
183 | 
184 | 	int N, S;
185 | 	in_order_layout() : N(0), S(0) {}
186 | 	in_order_layout(int N_) : N(N_), S(N ? next_pow_2(N) : 0) {}
187 | 
188 | 	point get_point(int a) const {
189 | 		assert(0 <= a && a < N);
190 | 		a += S;
191 | 		return point(a >= 2 * N ? a - N : a);
192 | 	}
193 | 
194 | 	range get_range(int a, int b) const {
195 | 		assert(0 <= a && a <= b && b <= N);
196 | 		if (N == 0) return range();
197 | 		a += S, b += S;
198 | 		return range((a >= 2 * N ? 2*(a-N) : a), (b >= 2 * N ? 2*(b-N) : b));
199 | 	}
200 | 
201 | 	range get_range(std::array<int, 2> p) const {
202 | 		return get_range(p[0], p[1]);
203 | 	}
204 | 
205 | 	int get_leaf_index(point pt) const {
206 | 		int a = int(pt);
207 | 		assert(N <= a && a < 2 * N);
208 | 		return (a < S ? a + N : a) - S;
209 | 	}
210 | 
211 | 	std::array<int, 2> get_node_bounds(point pt) const {
212 | 		int a = int(pt);
213 | 		assert(1 <= a && a < 2 * N);
214 | 		int l = __builtin_clz(a) - __builtin_clz(2*N-1);
215 | 		int x = a << l, y = (a+1) << l;
216 | 		assert(S <= x && x < y && y <= 2*S);
217 | 		return {(x >= 2 * N ? (x>>1) + N : x) - S, (y >= 2 * N ? (y>>1) + N : y) - S};
218 | 	}
219 | 
220 | 	int get_node_split(point pt) const {
221 | 		int a = int(pt);
222 | 		assert(1 <= a && a < N);
223 | 		int l = __builtin_clz(2*a+1) - __builtin_clz(2*N-1);
224 | 		int x = (2*a+1) << l;
225 | 		assert(S <= x && x < 2*S);
226 | 		return (x >= 2 * N ? (x>>1) + N : x) - S;
227 | 	}
228 | 
229 | 	int get_node_size(point pt) const {
230 | 		auto bounds = get_node_bounds(pt);
231 | 		return bounds[1] - bounds[0];
232 | 	}
233 | };
234 | 
235 | struct circular_layout {
236 | 	// Alias them in for convenience
237 | 	using point = seg_tree::point;
238 | 	using range = seg_tree::range;
239 | 
240 | 	int N;
241 | 	circular_layout() : N(0) {}
242 | 	circular_layout(int N_) : N(N_) {}
243 | 
244 | 	point get_point(int a) const {
245 | 		assert(0 <= a && a < N);
246 | 		return point(N + a);
247 | 	}
248 | 
249 | 	range get_range(int a, int b) const {
250 | 		assert(0 <= a && a <= b && b <= N);
251 | 		if (N == 0) return range();
252 | 		return range(N + a, N + b);
253 | 	}
254 | 
255 | 	range get_range(std::array<int, 2> p) const {
256 | 		return get_range(p[0], p[1]);
257 | 	}
258 | 
259 | 	int get_leaf_index(point pt) const {
260 | 		int a = int(pt);
261 | 		assert(N <= a && a < 2 * N);
262 | 		return a - N;
263 | 	}
264 | 
265 | 	// Returns {x,y} so that 0 <= x < N and 1 <= y <= N
266 | 	// If the point is non-wrapping, then 0 <= x < y <= N
267 | 	std::array<int, 2> get_node_bounds(point pt) const {
268 | 		int a = int(pt);
269 | 		assert(1 <= a && a < 2 * N);
270 | 		int l = __builtin_clz(a) - __builtin_clz(2*N-1);
271 | 		int S = next_pow_2(N);
272 | 		int x = a << l, y = (a+1) << l;
273 | 		assert(S <= x && x < y && y <= 2*S);
274 | 		return {(x >= 2 * N ? x >> 1 : x) - N, (y > 2 * N ? y >> 1 : y) - N};
275 | 	}
276 | 
277 | 	// Returns the split point of the node, such that 1 <= s <= N.
278 | 	int get_node_split(point pt) const {
279 | 		int a = int(pt);
280 | 		assert(1 <= a && a < N);
281 | 		return get_node_bounds(pt.c(0))[1];
282 | 	}
283 | 
284 | 	int get_node_size(point pt) const {
285 | 		auto bounds = get_node_bounds(pt);
286 | 		int r = bounds[1] - bounds[0];
287 | 		return r > 0 ? r : r + N;
288 | 	}
289 | };
290 | 
291 | } // namespace seg_tree
292 | 


--------------------------------------------------------------------------------
/src/seg_tree.test.cpp:
--------------------------------------------------------------------------------
 1 | #include <catch2/catch_template_test_macros.hpp>
 2 | 
 3 | #include "seg_tree.hpp"
 4 | 
 5 | #include <type_traits>
 6 | 
 7 | TEMPLATE_TEST_CASE("Segment Tree Layouts", "[seg_tree][template]", seg_tree::in_order_layout, seg_tree::circular_layout) {
 8 | 	for (int N : {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100, 101, 127, 128, 129}) {
 9 | 		auto seg = TestType(N);
10 | 		for (int i = 0; i < N; i++) {
11 | 			auto pt = seg.get_point(i);
12 | 			REQUIRE(seg.get_leaf_index(pt) == i);
13 | 			REQUIRE(seg.get_node_bounds(pt) == std::array<int, 2>({i,i+1}));
14 | 			REQUIRE(seg.get_node_size(pt) == 1);
15 | 		}
16 | 		for (seg_tree::point a(N-1); a >= 1; a--) {
17 | 			auto pt = seg_tree::point(a);
18 | 			REQUIRE(seg.get_node_size(pt) == seg.get_node_size(pt.c(0)) + seg.get_node_size(pt.c(1)));
19 | 			REQUIRE(seg.get_node_bounds(pt)[0] == seg.get_node_bounds(pt.c(0))[0]);
20 | 			REQUIRE(seg.get_node_bounds(pt)[1] == seg.get_node_bounds(pt.c(1))[1]);
21 | 			if constexpr (std::is_same_v<TestType, seg_tree::in_order_layout>) {
22 | 				REQUIRE(seg.get_node_bounds(pt.c(0))[1] == seg.get_node_bounds(pt.c(1))[0]);
23 | 			} else {
24 | 				REQUIRE(seg.get_node_bounds(pt.c(0))[1] % N == seg.get_node_bounds(pt.c(1))[0]);
25 | 			}
26 | 		}
27 | 
28 | 		for (int l = 0; l <= N; l++) {
29 | 			for (int r = l; r <= N; r++) {
30 | 				auto rng = seg.get_range(l, r);
31 | 
32 | 				{
33 | 					int x = l, y = r;
34 | 					rng.for_each([&](auto a) {
35 | 						auto bounds = seg.get_node_bounds(a);
36 | 						if (x == bounds[0]) {
37 | 							x = bounds[1];
38 | 						} else if (y == bounds[1]) {
39 | 							y = bounds[0];
40 | 						} else assert(false);
41 | 					});
42 | 					REQUIRE(x == y);
43 | 				}
44 | 				{
45 | 					int x = l, y = r;
46 | 					rng.for_each_with_side([&](auto a, bool d) {
47 | 						auto bounds = seg.get_node_bounds(a);
48 | 						if (d == 0) {
49 | 							REQUIRE(x == bounds[0]);
50 | 							x = bounds[1];
51 | 						} else if (d == 1) {
52 | 							REQUIRE(y == bounds[1]);
53 | 							y = bounds[0];
54 | 						} else assert(false);
55 | 					});
56 | 					REQUIRE(x == y);
57 | 				}
58 | 				{
59 | 					int x = l;
60 | 					rng.for_each_l_to_r([&](auto a) {
61 | 						auto bounds = seg.get_node_bounds(a);
62 | 						REQUIRE(x == bounds[0]);
63 | 						x = bounds[1];
64 | 					});
65 | 					REQUIRE(x == r);
66 | 				}
67 | 				{
68 | 					int y = r;
69 | 					rng.for_each_r_to_l([&](auto a) {
70 | 						auto bounds = seg.get_node_bounds(a);
71 | 						REQUIRE(y == bounds[1]);
72 | 						y = bounds[0];
73 | 					});
74 | 					REQUIRE(y == l);
75 | 				}
76 | 			}
77 | 		}
78 | 	}
79 | }
80 | 


--------------------------------------------------------------------------------
/src/smawk.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <vector>
  4 | #include <cassert>
  5 | #include <optional>
  6 | #if __cpp_concepts >= 202002
  7 | #include <concepts>
  8 | #endif
  9 | 
 10 | namespace smawk {
 11 | 
 12 | template <typename T> struct value_t {
 13 | 	T v;
 14 | 	int col;
 15 | };
 16 | 
 17 | // Get(int row, int col) -> T
 18 | // Select(int row, const value_t<T>& opt_0, const value_t<T>& opt_1) returns 0 or 1 for which is better
 19 | #if __cpp_concepts >= 202002
 20 | template <typename T, typename Get, typename Select> concept totally_monotone_matrix_oracle =
 21 | 	std::default_initializable<T> && std::movable<T>
 22 | 	&& std::invocable<Get, int, int> && std::convertible_to<std::invoke_result_t<Get, int, int>, T>
 23 | 	&& std::predicate<Select, int, const value_t<T>&, const value_t<T>&>;
 24 | #endif
 25 | 
 26 | 
 27 | template <typename Get, typename Select, typename T = std::invoke_result_t<Get, int, int>>
 28 | #if __cpp_concepts >= 202002
 29 | requires totally_monotone_matrix_oracle<T, Get, Select>
 30 | #endif
 31 | class LARSCH {
 32 | public:
 33 | 	int N;
 34 | 	Get get;
 35 | 	Select select;
 36 | 	int L;
 37 | 	int num_rows;
 38 | 
 39 | 	std::vector<std::vector<value_t<T>>> stk;
 40 | 	std::vector<std::pair<value_t<T>, int>> bests;
 41 | 	LARSCH() {}
 42 | 	LARSCH(int N_, Get&& get_, Select&& select_) : N(N_), get(std::forward<Get>(get_)), select(std::forward<Select>(select_)) {
 43 | 		L = N ? 31 - __builtin_clz(N) : 0;
 44 | 		stk.resize(L);
 45 | 		bests.resize(L);
 46 | 		// N >> L == 1, unless N == 0
 47 | 		for (int i = 0; i < L; i++) {
 48 | 			stk[i].reserve(N >> (i+1));
 49 | 		}
 50 | 		num_rows = 0;
 51 | 	}
 52 | 
 53 | 	value_t<T> push_and_query_next() {
 54 | 		assert(num_rows < N);
 55 | 		int inp_row = num_rows++;
 56 | 
 57 | 		int l = 0;
 58 | 		value_t<T> nbest;
 59 | 		while (true) {
 60 | 			int r = inp_row >> l;
 61 | 			int col = l == 0 ? inp_row : stk[l-1][r].col;
 62 | 			if (r & 1) {
 63 | 				int row = ((r+1) << l) - 1;
 64 | 				value_t<T> prv_col_top;
 65 | 				if (l == 0) prv_col_top = {get(row, col), col};
 66 | 				else prv_col_top = {std::move(stk[l-1][r].v), stk[l-1][r].col};
 67 | 
 68 | 				// just check this guy at this row, and then push it into the next layer, but don't query yet
 69 | 				if (select(row, bests[l].first, prv_col_top)) {
 70 | 					// prv_col_top is better here
 71 | 					bests[l].first = std::move(prv_col_top);
 72 | 					bests[l].second = (r+1)/2;
 73 | 					// optimization: since we're the global best, we know we'll kill the entire rest of the stack, so just do it here
 74 | 					assert(int(stk[l].size()) >= (r+1)/2);
 75 | 					stk[l].resize((r+1)/2);
 76 | 				}
 77 | 			}
 78 | 			if (l < L) {
 79 | 				std::optional<value_t<T>> to_push;
 80 | 				while (int(stk[l].size()) > (r+1)/2) {
 81 | 					int row = (int(stk[l].size()) << (l+1)) - 1;
 82 | 					value_t<T> nv{get(row, col), col};
 83 | 					if (select(row, stk[l].back(), nv)) {
 84 | 						stk[l].pop_back();
 85 | 						to_push = std::move(nv);
 86 | 					} else {
 87 | 						break;
 88 | 					}
 89 | 				}
 90 | 				if (to_push) {
 91 | 					stk[l].emplace_back(std::move(*to_push));
 92 | 				} else {
 93 | 					int row = (int(stk[l].size()+1) << (l+1)) - 1;
 94 | 					if (row < N) stk[l].emplace_back(get(row, col), col);
 95 | 				}
 96 | 			}
 97 | 			if (r & 1) {
 98 | 				// go return
 99 | 				nbest = std::move(bests[l].first);
100 | 				l--;
101 | 				break;
102 | 			} else if (l == L) {
103 | 				// special case: just go down 1 level already
104 | 				int row = ((r+1) << l) - 1;
105 | 				if (l == 0) nbest = {get(row, col), col};
106 | 				else nbest = {std::move(stk[l-1][r].v), stk[l-1][r].col};
107 | 				l--;
108 | 				break;
109 | 			} else if (((r+2) << l) - 1 >= N) {
110 | 				// go return
111 | 				nbest.col = col;
112 | 				break;
113 | 			} else {
114 | 				l++;
115 | 				continue;
116 | 			}
117 | 			assert(false);
118 | 		}
119 | 		for (; l >= 0; l--) {
120 | 			int r = inp_row >> l;
121 | 			assert(!(r & 1));
122 | 			int row = ((r+1) << l) - 1;
123 | 			bests[l].first = std::move(nbest);
124 | 			bool did_set = false;
125 | 			while (true) {
126 | 				int idx = bests[l].second;
127 | 				int col = (l == 0 ? idx : stk[l-1][idx].col);
128 | 				value_t<T> cnd;
129 | 				if (l > 0 && idx == r) cnd = {std::move(stk[l-1][r].v), col};
130 | 				else cnd = {get(row, col), col};
131 | 				if (!did_set || select(row, nbest, cnd)) {
132 | 					did_set = true;
133 | 					nbest = std::move(cnd);
134 | 				}
135 | 				if (col == bests[l].first.col) break;
136 | 				bests[l].second++;
137 | 			}
138 | 		}
139 | 		assert(l == -1);
140 | 		return nbest;
141 | 	}
142 | };
143 | 
144 | template <typename Get, typename Select, typename T = std::invoke_result_t<Get&&, int, int>>
145 | #if __cpp_concepts >= 202002
146 | requires totally_monotone_matrix_oracle<T, Get&&, Select&&>
147 | #endif
148 | std::vector<value_t<T>> smawk(int N, int M, Get&& get, Select&& select) {
149 | 	// TODO: If M >> N, then we should do an extra layer of column filter on the outside. The cutoff should be M > 2N or so.
150 | 	std::vector<value_t<T>> res(N);
151 | 	for (int i = 0; i < N; i++) res[i].col = -1;
152 | 	std::vector<int> stks(N);
153 | 	int L = N ? 31 - __builtin_clz(N) : 0;
154 | 	std::vector<int> stk_ends(L+1);
155 | 	stk_ends[0] = 0;
156 | 	for (int l = 0; l < L; l++) {
157 | 		int sz = 0;
158 | 		auto check_col = [&](int col, int min_sz) -> void {
159 | 			while (sz > min_sz) {
160 | 				int row = (sz << (l+1)) - 1;
161 | 				value_t<T> cnd(get(row, col), col);
162 | 				if (select(row, res[row], cnd)) {
163 | 					// we prefer cnd, save this
164 | 					res[row] = std::move(cnd);
165 | 					sz--;
166 | 				} else {
167 | 					break;
168 | 				}
169 | 			}
170 | 
171 | 			if (sz < (N >> (l+1))) {
172 | 				int row = ((sz+1) << (l+1)) - 1;
173 | 				if (res[row].col == col) {
174 | 					stks[stk_ends[l] + sz] = col;
175 | 					sz++;
176 | 				} else {
177 | 					value_t<T> cnd(get(row, col), col);
178 | 					// This is a legal optimization, but I'm not sure it buys anything real, so just stub it out with true ||
179 | 					if (true || res[row].col == -1 || res[row].col < col || !select(row, cnd, res[row])) {
180 | 						res[row] = std::move(cnd);
181 | 						stks[stk_ends[l] + sz] = col;
182 | 						sz++;
183 | 					}
184 | 				}
185 | 			}
186 | 		};
187 | 		if (l == 0) {
188 | 			for (int col = 0; col < M; col++) {
189 | 				check_col(col, 0);
190 | 			}
191 | 		} else {
192 | 			for (int z = stk_ends[l-1]; z < stk_ends[l]; z++) {
193 | 				check_col(stks[z], (z - stk_ends[l-1]) / 2);
194 | 			}
195 | 		}
196 | 		assert(sz <= (N >> (l+1)));
197 | 		stk_ends[l+1] = stk_ends[l] + sz;
198 | 	}
199 | 	for (int l = L; l >= 0; l--) {
200 | 		int z = l == 0 ? 0 : stk_ends[l-1];
201 | 		for (int r = 0; r < (N >> l); r += 2) {
202 | 			int row = ((r+1) << l) - 1;
203 | 			// TODO: You could not reset this? Not sure if it buys anything real.
204 | 			res[row].col = -1;
205 | 			for (; z < (l == 0 ? M : stk_ends[l]); z++) {
206 | 				int col = l == 0 ? z : stks[z];
207 | 				value_t<T> cnd = {get(row, col), col};
208 | 				if (res[row].col == -1 || select(row, res[row], cnd)) {
209 | 					res[row] = std::move(cnd);
210 | 				}
211 | 				if ((r+1) < (N >> l) && col == res[((r+2) << l) - 1].col) break;
212 | 			}
213 | 			assert(res[row].col != -1);
214 | 		}
215 | 	}
216 | 	return res;
217 | }
218 | 
219 | // namespace smawk
220 | }
221 | 


--------------------------------------------------------------------------------
/src/smawk.test.cpp:
--------------------------------------------------------------------------------
  1 | #include "smawk.hpp"
  2 | 
  3 | #include <catch2/catch_test_macros.hpp>
  4 | #include <catch2/catch_get_random_seed.hpp>
  5 | 
  6 | #include <random>
  7 | 
  8 | struct move_only_t {
  9 | 	int v;
 10 | 	move_only_t() : v(-1) {}
 11 | 	explicit move_only_t(int v_) : v(v_) {
 12 | 		assert(v_ != -1);
 13 | 	}
 14 | 	move_only_t(move_only_t&& o) {
 15 | 		v = o.v;
 16 | 		o.v = -1;
 17 | 	}
 18 | 	move_only_t& operator = (move_only_t&& o) {
 19 | 		v = o.v;
 20 | 		o.v = -1;
 21 | 		return *this;
 22 | 	}
 23 | 	move_only_t(const move_only_t& o) = delete;
 24 | 	move_only_t& operator = (const move_only_t& o) = delete;
 25 | };
 26 | 
 27 | std::vector<std::vector<int>> generate_totally_monotone(int N, int M, auto&& rng) {
 28 | 	std::vector<int> cur_order(M);
 29 | 	std::iota(cur_order.begin(), cur_order.end(), 0);
 30 | 	std::vector<int> cur_vals(M);
 31 | 	std::iota(cur_vals.begin(), cur_vals.end(), 0);
 32 | 	std::vector<std::vector<int>> perms; perms.reserve(M * (M-1) / 2 + 1);
 33 | 	perms.push_back(cur_vals);
 34 | 	{
 35 | 		std::vector<int> cnds; cnds.reserve(M);
 36 | 		for (int z = 0; z < M * (M-1) / 2; z++) {
 37 | 			cnds.clear();
 38 | 			for (int i = 0; i+1 < M; i++) {
 39 | 				if (cur_order[i] < cur_order[i+1]) {
 40 | 					cnds.push_back(i);
 41 | 				}
 42 | 			}
 43 | 			assert(!cnds.empty());
 44 | 			int i = cnds[std::uniform_int_distribution<int>(0, int(cnds.size()) - 1)(rng)];
 45 | 			std::swap(cur_order[i], cur_order[i+1]);
 46 | 			cur_vals[cur_order[i]] = i;
 47 | 			cur_vals[cur_order[i+1]] = i+1;
 48 | 			perms.push_back(cur_vals);
 49 | 		}
 50 | 	}
 51 | 	std::vector<std::vector<int>> output; output.reserve(N);
 52 | 	std::vector<int> stars_bars(M * (M-1) / 2 + N);
 53 | 	std::fill(stars_bars.begin(), stars_bars.begin() + N, 1);
 54 | 	std::shuffle(stars_bars.begin(), stars_bars.end(), rng);
 55 | 	{
 56 | 		int perm_idx = 0;
 57 | 		for (auto op : stars_bars) {
 58 | 			if (op == 0) {
 59 | 				perm_idx++;
 60 | 			} else {
 61 | 				output.push_back(perms[perm_idx]);
 62 | 			}
 63 | 		}
 64 | 	}
 65 | 	assert(int(output.size()) == N);
 66 | 	return output;
 67 | }
 68 | 
 69 | void check_smawk(int N, int M, std::vector<std::vector<int>> mat) {
 70 | 	auto result = smawk::smawk(N, M, [&](int row, int col) -> move_only_t {
 71 | 		REQUIRE(0 <= row); REQUIRE(row < N);
 72 | 		REQUIRE(0 <= col); REQUIRE(col < M);
 73 | 		return move_only_t{mat[row][col]};
 74 | 	}, [&](int r, const smawk::value_t<move_only_t>& cnd1, const smawk::value_t<move_only_t>& cnd2) -> bool {
 75 | 		REQUIRE(0 <= r); REQUIRE(r < N);
 76 | 		REQUIRE(0 <= cnd1.col); REQUIRE(cnd1.col < M);
 77 | 		REQUIRE(0 <= cnd2.col); REQUIRE(cnd2.col < M);
 78 | 		REQUIRE(cnd1.col < cnd2.col);
 79 | 		REQUIRE(cnd1.v.v == mat[r][cnd1.col]);
 80 | 		REQUIRE(cnd2.v.v == mat[r][cnd2.col]);
 81 | 		// cnd2 is better when it's strictly smaller
 82 | 		return cnd2.v.v < cnd1.v.v;
 83 | 	});
 84 | 	REQUIRE(int(result.size()) == N);
 85 | 	for (int i = 0; i < N; i++) {
 86 | 		int j = int(std::min_element(mat[i].begin(), mat[i].end()) - mat[i].begin());
 87 | 		REQUIRE(result[i].col == j);
 88 | 		REQUIRE(result[i].v.v == mat[i][j]);
 89 | 	}
 90 | }
 91 | 
 92 | TEST_CASE("SMAWK", "[smawk]") {
 93 | 	std::mt19937 mt(Catch::getSeed());
 94 | 	for (int N : {0, 1, 2, 3, 5, 8, 13}) {
 95 | 		for (int M : {0, 1, 2, 3, 5, 8, 13}) {
 96 | 			if (N > 0 && M == 0) continue;
 97 | 			auto inp = generate_totally_monotone(N, M, mt);
 98 | 			CAPTURE(N, M, inp);
 99 | 			check_smawk(N, M, inp);
100 | 		}
101 | 	}
102 | }
103 | 
104 | void check_larsch(int N, std::vector<std::vector<int>> mat) {
105 | 	const int M = N;
106 | 	smawk::LARSCH l(N, [&](int row, int col) -> move_only_t {
107 | 		REQUIRE(0 <= row); REQUIRE(row < N);
108 | 		REQUIRE(0 <= col); REQUIRE(col < M);
109 | 		REQUIRE(col <= row);
110 | 		return move_only_t{mat[row][col]};
111 | 	}, [&](int r, const smawk::value_t<move_only_t>& cnd1, const smawk::value_t<move_only_t>& cnd2) -> bool {
112 | 		REQUIRE(0 <= r); REQUIRE(r < N);
113 | 		REQUIRE(0 <= cnd1.col); REQUIRE(cnd1.col < M);
114 | 		REQUIRE(0 <= cnd2.col); REQUIRE(cnd2.col < M);
115 | 		REQUIRE(cnd1.col < cnd2.col);
116 | 		REQUIRE(cnd1.col <= r);
117 | 		REQUIRE(cnd2.col <= r);
118 | 		REQUIRE(cnd1.v.v == mat[r][cnd1.col]);
119 | 		REQUIRE(cnd2.v.v == mat[r][cnd2.col]);
120 | 		// cnd2 is better when it's strictly smaller
121 | 		return cnd2.v.v < cnd1.v.v;
122 | 	});
123 | 	for (int i = 0; i < N; i++) {
124 | 		auto res = l.push_and_query_next();
125 | 		int j = int(std::min_element(mat[i].begin(), mat[i].begin() + i + 1) - mat[i].begin());
126 | 		REQUIRE(res.col == j);
127 | 		REQUIRE(res.v.v == mat[i][j]);
128 | 	}
129 | }
130 | 
131 | TEST_CASE("LARSCH", "[smawk]") {
132 | 	std::mt19937 mt(Catch::getSeed());
133 | 	for (int N : {0, 1, 2, 3, 5, 8, 13}) {
134 | 		auto inp = generate_totally_monotone(N, N, mt);
135 | 		for (int i = 0; i < N; i++) {
136 | 			for (int j = i+1; j < N; j++) {
137 | 				inp[i][j] = -1;
138 | 			}
139 | 		}
140 | 		CAPTURE(N, inp);
141 | 		check_larsch(N, inp);
142 | 	}
143 | }
144 | 


--------------------------------------------------------------------------------
/src/static_tree.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "yc.hpp"
  4 | #include "rmq.hpp"
  5 | 
  6 | struct static_forest_t {
  7 | 	int N;
  8 | 
  9 | private:
 10 | 	// original label to preorder
 11 | 	std::vector<int> idx;
 12 | 
 13 | 	// all keys/values are by preorder relabelling
 14 | 	std::vector<int> preorder;
 15 | 	std::vector<int> depth;
 16 | 	std::vector<int> par;
 17 | 	std::vector<int> sz;
 18 | 	std::vector<int> heavy_par;
 19 | 	std::vector<int> heavy_dist;
 20 | 
 21 | 	std::vector<int> depth_val_to_idx;
 22 | 	RangeMinQuery<int> depth_val_rmq;
 23 | 
 24 | public:
 25 | 
 26 | 	static_forest_t() : N(0) {}
 27 | 	static_forest_t(const std::vector<std::vector<int>>& adj, const std::vector<int>& roots = {}) :
 28 | 		N(int(adj.size())),
 29 | 		idx(N, -1),
 30 | 		preorder(N, -1),
 31 | 		depth(N, -1),
 32 | 		par(N, -1),
 33 | 		sz(N, -1),
 34 | 		heavy_par(N, -1),
 35 | 		heavy_dist(N, -1),
 36 | 		depth_val_to_idx(N, -1)
 37 | 	{
 38 | 		{
 39 | 			int nxt_idx = 0;
 40 | 			std::vector<int> depth_freq(N, 0);
 41 | 			std::vector<int> depth_val(N, -1);
 42 | 			std::vector<int> heavy_child(N, -1);
 43 | 			auto build_one_tree = [&](int rt) -> void {
 44 | 				std::y_combinator([&](auto self, int cur, int prv) -> int {
 45 | 					int cur_sz = 1;
 46 | 					int cur_heavy = -1;
 47 | 					int cur_heavy_weight = 0;
 48 | 					for (int nxt : adj[cur]) {
 49 | 						if (nxt == prv) continue;
 50 | 						int n_sz = self(nxt, cur);
 51 | 						if (n_sz > cur_heavy_weight) {
 52 | 							cur_heavy = nxt;
 53 | 							cur_heavy_weight = n_sz;
 54 | 						}
 55 | 						cur_sz += n_sz;
 56 | 					}
 57 | 					heavy_child[cur] = cur_heavy;
 58 | 					return cur_sz;
 59 | 				})(rt, -1);
 60 | 				assert(idx[rt] == -1);
 61 | 				std::y_combinator([&](auto&& self, int cur, int prv, int par_idx, int d, bool is_heavy_root) -> void {
 62 | 					int cur_idx = idx[cur] = nxt_idx++;
 63 | 					preorder[cur_idx] = cur;
 64 | 					par[cur_idx] = par_idx;
 65 | 					depth[cur_idx] = d;
 66 | 					depth_val[cur_idx] = ++depth_freq[d];
 67 | 					assert(is_heavy_root == (par_idx == -1 || cur_idx != par_idx + 1));
 68 | 					if (is_heavy_root) {
 69 | 						heavy_par[cur_idx] = par_idx;
 70 | 						heavy_dist[cur_idx] = 1;
 71 | 					} else {
 72 | 						assert(par_idx == cur_idx - 1);
 73 | 						heavy_par[cur_idx] = heavy_par[cur_idx - 1];
 74 | 						heavy_dist[cur_idx] = heavy_dist[cur_idx - 1] + 1;
 75 | 					}
 76 | 					if (heavy_child[cur] != -1) {
 77 | 						int nxt = heavy_child[cur];
 78 | 						self(nxt, cur, cur_idx, d+1, false);
 79 | 					}
 80 | 					for (int nxt : adj[cur]) {
 81 | 						if (nxt == prv) continue;
 82 | 						if (nxt == heavy_child[cur]) continue;
 83 | 						self(nxt, cur, cur_idx, d+1, true);
 84 | 					}
 85 | 					sz[cur_idx] = nxt_idx - cur_idx;
 86 | 				})(rt, -1, -1, 0, true);
 87 | 			};
 88 | 			if (!roots.empty()) {
 89 | 				for (int r : roots) build_one_tree(r);
 90 | 			} else {
 91 | 				for (int rt = 0; rt < N; rt++) {
 92 | 					if (idx[rt] == -1) {
 93 | 						build_one_tree(rt);
 94 | 					}
 95 | 				}
 96 | 			}
 97 | 			for (int i = 0; i < N; i++) {
 98 | 				assert(idx[i] != -1);
 99 | 			}
100 | 			for (int i = 1; i < N; i++) {
101 | 				depth_freq[i] += depth_freq[i-1];
102 | 			}
103 | 			for (int i = 0; i < N; i++) {
104 | 				depth_val[i] = depth_freq[depth[i]] - depth_val[i];
105 | 				assert(depth_val_to_idx[depth_val[i]] == -1);
106 | 				depth_val_to_idx[depth_val[i]] = i;
107 | 			}
108 | 			depth_val_rmq = RangeMinQuery<int>(depth_val);
109 | 		}
110 | 	}
111 | 
112 | 	int dist(int a, int b) const {
113 | 		if (a == b) return 0;
114 | 		a = idx[a], b = idx[b];
115 | 		if (a > b) std::swap(a, b);
116 | 		int o = depth_val_to_idx[depth_val_rmq.query(a+1, b)];
117 | 		return depth[a] + depth[b] - 2 * (depth[o] - 1);
118 | 	}
119 | 
120 | 	int lca(int a, int b) const {
121 | 		if (a == b) return a;
122 | 		a = idx[a], b = idx[b];
123 | 		if (a > b) std::swap(a, b);
124 | 		int o = depth_val_to_idx[depth_val_rmq.query(a+1, b)];
125 | 		return preorder[par[o]];
126 | 	}
127 | 
128 | 	// query which subtree of a contains b; b must be inside a
129 | 	// returns -1 if a == b
130 | 	int get_subtree(int a, int b) const {
131 | 		if (a == b) return -1;
132 | 		a = idx[a], b = idx[b];
133 | 		assert(a < b && b < a + sz[a]);
134 | 		return preorder[depth_val_to_idx[depth_val_rmq.query(a+1, b)]];
135 | 	}
136 | 
137 | 	// next from a to b, a and b must be in the same tree
138 | 	int get_next(int a, int b) const {
139 | 		if (a == b) return -1;
140 | 		a = idx[a], b = idx[b];
141 | 		if (a < b && b < a + sz[a]) {
142 | 			return preorder[depth_val_to_idx[depth_val_rmq.query(a+1, b)]];
143 | 		} else {
144 | 			return preorder[par[a]];
145 | 		}
146 | 	}
147 | 
148 | 	int get_ancestor(int a, int k) const {
149 | 		assert(k >= 0);
150 | 		a = idx[a];
151 | 		if (k > depth[a]) return -1;
152 | 		while (a != -1 && k > 0) {
153 | 			if (k >= heavy_dist[a]) {
154 | 				k -= heavy_dist[a];
155 | 				assert(heavy_par[a] <= a - heavy_dist[a]);
156 | 				a = heavy_par[a];
157 | 			} else {
158 | 				a -= k;
159 | 				k = 0;
160 | 			}
161 | 		}
162 | 		return preorder[a];
163 | 	}
164 | 
165 | 	int get_depth(int a) const { return depth[idx[a]]; }
166 | 	int get_sz(int a) const { return sz[idx[a]]; }
167 | 	std::array<int, 2> get_range(int a) const { return {idx[a], idx[a] + sz[idx[a]]}; }
168 | 	bool is_ancestor(int a, int b) { return idx[a] <= idx[b] && idx[b] < idx[a] + sz[idx[a]]; }
169 | };
170 | 


--------------------------------------------------------------------------------
/src/suffix_array.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /*
  4 |  * This is mostly inspired by https://golang.org/src/index/suffixarray/sais.go.
  5 |  */
  6 | 
  7 | #include <vector>
  8 | #include <string>
  9 | #include <cassert>
 10 | #include <cstring>
 11 | #include <type_traits>
 12 | 
 13 | #include "rmq.hpp"
 14 | 
 15 | template<class T> int sz(T&& arg) { using std::size; return int(size(std::forward<T>(arg))); }
 16 | 
 17 | class SuffixArray {
 18 | public:
 19 | 	using index_t = int;
 20 | 	int N;
 21 | 	std::vector<index_t> sa;
 22 | 	std::vector<index_t> rank;
 23 | 	// lcp[i] = get_lcp(sa[i], sa[i+1])
 24 | 	std::vector<index_t> lcp;
 25 | 	RangeMinQuery<std::pair<index_t, index_t>> rmq;
 26 | 
 27 | 	SuffixArray() {}
 28 | 
 29 | 	template <typename String> static SuffixArray construct_raw(const String& S, index_t sigma) {
 30 | 		int N = sz(S);
 31 | 		SuffixArray sa(N);
 32 | 
 33 | 		sa.build_sa(S, sigma);
 34 | 		sa.build_rank();
 35 | 		sa.build_lcp(S);
 36 | 		sa.build_rmq();
 37 | 
 38 | 		return sa;
 39 | 	}
 40 | 
 41 | 	// Pass a function which returns a value in [0, sigma)
 42 | 	template <typename String, typename F> static SuffixArray map_and_construct(const String& S, const F& f, int sigma) {
 43 | 		std::vector<decltype((f(S[0])))> mapped(sz(S));
 44 | 		for (int i = 0; i < sz(S); i++) {
 45 | 			mapped[i] = f(S[i]);
 46 | 			assert(0 <= int(mapped[i]) && int(mapped[i]) < sigma);
 47 | 		}
 48 | 		return construct_raw(mapped, sigma);
 49 | 	}
 50 | 
 51 | 	// Sorts the elements of S and then runs suffix array. This takes O(N log N) time with no dependence on sigma.
 52 | 	template <typename String> static SuffixArray sort_and_construct(const String& S) {
 53 | 		using std::begin;
 54 | 		using std::end;
 55 | 		using value_type = typename std::iterator_traits<decltype(begin(S))>::value_type;
 56 | 		using compressed_value_type = typename std::conditional<
 57 | 			sizeof(value_type) < sizeof(index_t),
 58 | 			value_type,
 59 | 			index_t
 60 | 		>::type;
 61 | 
 62 | 		std::vector<compressed_value_type> compressed_s(sz(S));
 63 | 		int sigma = 0;
 64 | 
 65 | 		{
 66 | 			std::vector<value_type> vals(begin(S), end(S));
 67 | 			std::sort(vals.begin(), vals.end());
 68 | 			vals.resize(unique(vals.begin(), vals.end()) - vals.begin());
 69 | 			for (int i = 0; i < sz(S); i++) {
 70 | 				compressed_s[i] = compressed_value_type(index_t(std::lower_bound(vals.begin(), vals.end(), S[i]) - vals.begin()));
 71 | 			}
 72 | 			sigma = int(vals.size());
 73 | 		}
 74 | 
 75 | 		return construct_raw(compressed_s, sigma);
 76 | 	}
 77 | 
 78 | 	// Shifts the elements so that sigma = max(S) - min(S) + 1
 79 | 	template <typename String> static SuffixArray shift_and_construct(const String& S) {
 80 | 		using std::begin;
 81 | 		using std::end;
 82 | 		using value_type = typename std::iterator_traits<decltype(begin(S))>::value_type;
 83 | 
 84 | 		std::vector<value_type> compressed_s(sz(S));
 85 | 		int sigma = 0;
 86 | 
 87 | 		if (sz(S) > 0) {
 88 | 			value_type lo = *begin(S), hi = *begin(S);
 89 | 			for (const auto& x : S) {
 90 | 				if (x < lo) lo = x;
 91 | 				if (x > hi) hi = x;
 92 | 			}
 93 | 
 94 | 			for (int i = 0; i < sz(S); i++) {
 95 | 				compressed_s[i] = value_type(S[i] - lo);
 96 | 			}
 97 | 			sigma = int(hi - lo + 1);
 98 | 		}
 99 | 
100 | 		return construct_raw(compressed_s, sigma);
101 | 	}
102 | 
103 | 	// Renumber/filter to only the used elements with bucket sorting. Still takes O(max(S) - min(S) + 1) memory/time,
104 | 	// but should be less memory than `shift_and_construct` when sigma ~ N and max(S) - min(S) + 1 > N.
105 | 	template <typename String> static SuffixArray bucket_and_construct(const String& S) {
106 | 		using std::begin;
107 | 		using std::end;
108 | 		using value_type = typename std::iterator_traits<decltype(begin(S))>::value_type;
109 | 		using compressed_value_type = typename std::conditional<
110 | 			sizeof(value_type) < sizeof(index_t),
111 | 			value_type,
112 | 			index_t
113 | 		>::type;
114 | 
115 | 		std::vector<compressed_value_type> compressed_s(sz(S));
116 | 		int sigma = 0;
117 | 
118 | 		if (sz(S) > 0) {
119 | 			value_type lo = *begin(S), hi = *begin(S);
120 | 			for (const auto& x : S) {
121 | 				if (x < lo) lo = x;
122 | 				if (x > hi) hi = x;
123 | 			}
124 | 
125 | 			std::vector<compressed_value_type> buckets(hi - lo + 1, 0);
126 | 			for (const auto& x : S) {
127 | 				buckets[x - lo] = 1;
128 | 			}
129 | 			for (int v = 0; v < int(buckets.size()); v++) {
130 | 				if (buckets[v]) buckets[v] = compressed_value_type(sigma++);
131 | 			}
132 | 
133 | 			for (int i = 0; i < sz(S); i++) {
134 | 				compressed_s[i] = buckets[S[i] - lo];
135 | 			}
136 | 		}
137 | 
138 | 		return construct_raw(compressed_s, sigma);
139 | 	}
140 | 
141 | 	index_t get_lcp(index_t a, index_t b) const {
142 | 		if (a == b) return N-a;
143 | 		a = rank[a], b = rank[b];
144 | 		if (a > b) std::swap(a, b);
145 | 		return rmq.query(a, b-1).first;
146 | 	}
147 | 
148 | 	// Get the split in the suffix tree, using half-open intervals
149 | 	// Returns len, idx
150 | 	std::pair<index_t, index_t> get_split(index_t l, index_t r) const {
151 | 		assert(r - l > 1);
152 | 		return rmq.query(l, r-2);
153 | 	}
154 | 
155 | private:
156 | 	explicit SuffixArray(int N_) : N(N_) {}
157 | 
158 | 	template <typename String> void build_sa(const String& S, index_t sigma) {
159 | 		sa = std::vector<index_t>(N+1);
160 | 		assert(sigma >= 0);
161 | 		for (auto s : S) assert(0 <= index_t(s) && index_t(s) < sigma);
162 | 		std::vector<index_t> tmp(sigma + std::max(N, sigma));
163 | 		SuffixArray::sais<String>(N, S, sa.data(), sigma, tmp.data());
164 | 	}
165 | 
166 | 	template <typename String> static void sais(int N, const String& S, index_t* sa, int sigma, index_t* tmp) {
167 | 		if (N == 0) {
168 | 			sa[0] = 0;
169 | 			return;
170 | 		} else if (N == 1) {
171 | 			sa[0] = 1;
172 | 			sa[1] = 0;
173 | 			return;
174 | 		}
175 | 
176 | 		// Phase 1: Initialize the frequency array, which will let us lookup buckets.
177 | 		index_t* freq = tmp; tmp += sigma;
178 | 		memset(freq, 0, sizeof(*freq) * sigma);
179 | 		for (int i = 0; i < N; i++) {
180 | 			++freq[index_t(S[i])];
181 | 		}
182 | 		auto build_bucket_start = [&]() {
183 | 			int cur = 1;
184 | 			for (int v = 0; v < sigma; v++) {
185 | 				tmp[v] = cur;
186 | 				cur += freq[v];
187 | 			}
188 | 		};
189 | 		auto build_bucket_end = [&]() {
190 | 			int cur = 1;
191 | 			for (int v = 0; v < sigma; v++) {
192 | 				cur += freq[v];
193 | 				tmp[v] = cur;
194 | 			}
195 | 		};
196 | 
197 | 		int num_pieces = 0;
198 | 
199 | 		int first_endpoint = 0;
200 | 		// Phase 2: find the right-endpoints of the pieces
201 | 		{
202 | 			build_bucket_end();
203 | 
204 | 			// Initialize the final endpoint out-of-band this way so that we don't try to look up tmp[-1].
205 | 			// This doesn't count towards num_pieces.
206 | 			sa[0] = N;
207 | 
208 | 			index_t c0 = S[N-1], c1 = -1; bool isS = false;
209 | 			for (int i = N-2; i >= 0; i--) {
210 | 				c1 = c0;
211 | 				c0 = S[i];
212 | 				if (c0 < c1) {
213 | 					isS = true;
214 | 				} else if (c0 > c1 && isS) {
215 | 					isS = false;
216 | 					// insert i+1
217 | 					sa[first_endpoint = --tmp[c1]] = i+1;
218 | 					++num_pieces;
219 | 				}
220 | 			}
221 | 		}
222 | 
223 | 		// If num_pieces <= 1, we don't need to actually run the recursion, it's just sorted automatically
224 | 		// Otherwise, we're going to rebucket
225 | 		if (num_pieces > 1) {
226 | 			// Remove the first endpoint, we don't need to run the IS on this
227 | 			sa[first_endpoint] = 0;
228 | 
229 | 			// Run IS for L-type
230 | 			{
231 | 				build_bucket_start();
232 | 				for (int z = 0; z <= N; z++) {
233 | 					int v = sa[z];
234 | 					if (!v) continue;
235 | 
236 | 					// Leave for the S-round
237 | 					if (v < 0) continue;
238 | 
239 | 					// clear out our garbage
240 | 					sa[z] = 0;
241 | 
242 | 					--v;
243 | 					index_t c0 = S[v-1], c1 = S[v];
244 | 					sa[tmp[c1]++] = (c0 < c1) ? ~v : v;
245 | 				}
246 | 			}
247 | 
248 | 			index_t* const sa_end = sa + N + 1;
249 | 
250 | 			index_t* pieces = sa_end;
251 | 			// Run IS for S-type and compactify
252 | 			{
253 | 				build_bucket_end();
254 | 				for (int z = N; z >= 0; z--) {
255 | 					int v = sa[z];
256 | 					if (!v) continue;
257 | 
258 | 					// clear our garbage
259 | 					sa[z] = 0;
260 | 
261 | 					if (v > 0) {
262 | 						*--pieces = v;
263 | 						continue;
264 | 					}
265 | 
266 | 					v = ~v;
267 | 
268 | 					--v;
269 | 					index_t c0 = S[v-1], c1 = S[v];
270 | 					sa[--tmp[c1]] = (c0 > c1) ? v : ~v;
271 | 				}
272 | 			}
273 | 
274 | 			// Compute the lengths of the pieces in preparation for equality
275 | 			// comparison, and store them in sa[v/2]. We set the length of the
276 | 			// final piece to 0; it compares unequal to everything because of
277 | 			// the sentinel.
278 | 			{
279 | 				int prv_start = N;
280 | 				index_t c0 = S[N-1], c1 = -1; bool isS = false;
281 | 				for (int i = N-2; i >= 0; i--) {
282 | 					c1 = c0;
283 | 					c0 = S[i];
284 | 					if (c0 < c1) {
285 | 						isS = true;
286 | 					} else if (c0 > c1 && isS) {
287 | 						isS = false;
288 | 
289 | 						// insert i+1
290 | 						int v = i+1;
291 | 						sa[v>>1] = prv_start == N ? 0 : prv_start - v;
292 | 						prv_start = v;
293 | 					}
294 | 				}
295 | 			}
296 | 
297 | 			// Compute the alphabet, storing the result into sa[v/2].
298 | 			int next_sigma = 0;
299 | 			{
300 | 				int prv_len = -1, prv_v = 0;
301 | 				for (int i = 0; i < num_pieces; i++) {
302 | 					int v = pieces[i];
303 | 					int len = sa[v>>1];
304 | 
305 | 					bool eq = prv_len == len;
306 | 					for (int a = 0; eq && a < len; ++a) {
307 | 						eq = S[v+a] == S[prv_v+a];
308 | 					}
309 | 					if (!eq) {
310 | 						next_sigma++;
311 | 						prv_len = len;
312 | 						prv_v = v;
313 | 					}
314 | 
315 | 					sa[v>>1] = next_sigma; // purposely leave this 1 large to check != 0
316 | 				}
317 | 			}
318 | 
319 | 			if (next_sigma == num_pieces) {
320 | 				sa[0] = N;
321 | 				memcpy(sa+1, pieces, sizeof(*sa) * num_pieces);
322 | 			} else {
323 | 				index_t* next_S = sa_end;
324 | 
325 | 				// Finally, pack the input to the SA
326 | 				{
327 | 					for (int i = (N-1)>>1; i >= 0; i--) {
328 | 						int v = sa[i];
329 | 						if (v) *--next_S = v-1;
330 | 						sa[i] = 0;
331 | 					}
332 | 				}
333 | 
334 | 				memset(sa, 0, sizeof(*sa) * (num_pieces+1));
335 | 				sais<const index_t*>(num_pieces, next_S, sa, next_sigma, tmp);
336 | 
337 | 				{ // Compute the piece start points again and use those to map up the suffix array
338 | 					next_S = sa_end;
339 | 					index_t c0 = S[N-1], c1 = -1; bool isS = false;
340 | 					for (int i = N-2; i >= 0; i--) {
341 | 						c1 = c0;
342 | 						c0 = S[i];
343 | 						if (c0 < c1) {
344 | 							isS = true;
345 | 						} else if (c0 > c1 && isS) {
346 | 							isS = false;
347 | 
348 | 							int v = i+1;
349 | 							*--next_S = v;
350 | 						}
351 | 					}
352 | 					sa[0] = N;
353 | 					for (int i = 1; i <= num_pieces; i++) {
354 | 						sa[i] = next_S[sa[i]];
355 | 					}
356 | 				}
357 | 			}
358 | 
359 | 			// zero everything else
360 | 			memset(sa+num_pieces+1, 0, sizeof(*sa) * (N - num_pieces));
361 | 
362 | 			{
363 | 				// Scatter the finished pieces
364 | 				build_bucket_end();
365 | 				for (int i = num_pieces; i > 0; i--) {
366 | 					int v = sa[i];
367 | 					sa[i] = 0;
368 | 
369 | 					index_t c1 = S[v];
370 | 					sa[--tmp[c1]] = v;
371 | 				}
372 | 			}
373 | 		}
374 | 
375 | 		// Home stretch! Just finish out with the L-type and then S-type
376 | 		{
377 | 			build_bucket_start();
378 | 			for (int z = 0; z <= N; z++) {
379 | 				int v = sa[z];
380 | 				if (v <= 0) continue;
381 | 				--v;
382 | 				index_t c1 = S[v];
383 | 				index_t c0 = v ? S[v-1] : c1; // if v = 0, we don't want to invert
384 | 				sa[tmp[c1]++] = (c0 < c1) ? ~v : v;
385 | 			}
386 | 		}
387 | 
388 | 		// This just aggressively overwrites our original scattered pieces with the correct values
389 | 		{
390 | 			build_bucket_end();
391 | 			for (int z = N; z >= 0; z--) {
392 | 				int v = sa[z];
393 | 				if (v >= 0) continue;
394 | 				sa[z] = v = ~v;
395 | 				--v;
396 | 				index_t c1 = S[v];
397 | 				index_t c0 = v ? S[v-1] : c1+1;
398 | 				sa[--tmp[c1]] = (c0 > c1) ? v : ~v;
399 | 			}
400 | 		}
401 | 	}
402 | 
403 | 	void build_rank() {
404 | 		rank = std::vector<index_t>(N+1);
405 | 		for (int i = 0; i <= N; i++) rank[sa[i]] = i;
406 | 	}
407 | 
408 | 	template <typename String> void build_lcp(const String& S) {
409 | 		assert(sz(S) == N);
410 | 		lcp = std::vector<index_t>(N);
411 | 		for (int i = 0, k = 0; i < N - 1; i++) {
412 | 			int j = sa[rank[i]-1];
413 | 			while (k < N - std::max(i, j) && S[i+k] == S[j+k]) k++;
414 | 			lcp[rank[i]-1] = k;
415 | 			if (k) --k;
416 | 		}
417 | 	}
418 | 
419 | 	void build_rmq() {
420 | 		std::vector<std::pair<index_t, index_t>> lcp_idx(N);
421 | 		for (int i = 0; i < N; i++) {
422 | 			lcp_idx[i] = {lcp[i], i+1};
423 | 		}
424 | 		rmq = RangeMinQuery<std::pair<index_t, index_t>>(std::move(lcp_idx));
425 | 	}
426 | };
427 | 
428 | class PrefixArray : private SuffixArray {
429 | 	PrefixArray(const SuffixArray& sa_) : SuffixArray(sa_) {}
430 | 	PrefixArray(SuffixArray&& sa_) : SuffixArray(std::move(sa_)) {}
431 | public:
432 | 	PrefixArray() {}
433 | 	template <typename String> static PrefixArray construct_raw(const String& S, int sigma) {
434 | 		return PrefixArray(SuffixArray::construct_raw(String(S.rbegin(), S.rend())), sigma);
435 | 	}
436 | 
437 | 	// TODO: Fill in other constructors
438 | 
439 | 	int get_lcs(int a, int b) const {
440 | 		return SuffixArray::get_lcp(SuffixArray::N - a, SuffixArray::N - b);
441 | 	}
442 | };
443 | 


--------------------------------------------------------------------------------
/src/tensor.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <array>
  4 | 
  5 | template <typename T, int NDIMS> struct tensor_view {
  6 | 	static_assert(NDIMS >= 0, "NDIMS must be nonnegative");
  7 | 
  8 | protected:
  9 | 	std::array<int, NDIMS> shape;
 10 | 	std::array<int, NDIMS> strides;
 11 | 	T* data;
 12 | 
 13 | 	tensor_view(std::array<int, NDIMS> shape_, std::array<int, NDIMS> strides_, T* data_) : shape(shape_), strides(strides_), data(data_) {}
 14 | 
 15 | public:
 16 | 	tensor_view() : shape{0}, strides{0}, data(nullptr) {}
 17 | 
 18 | protected:
 19 | 	int flatten_index(std::array<int, NDIMS> idx) const {
 20 | 		int res = 0;
 21 | 		for (int i = 0; i < NDIMS; i++) { res += idx[i] * strides[i]; }
 22 | 		return res;
 23 | 	}
 24 | 	int flatten_index_checked(std::array<int, NDIMS> idx) const {
 25 | 		int res = 0;
 26 | 		for (int i = 0; i < NDIMS; i++) {
 27 | 			assert(0 <= idx[i] && idx[i] < shape[i]);
 28 | 			res += idx[i] * strides[i];
 29 | 		}
 30 | 		return res;
 31 | 	}
 32 | 
 33 | public:
 34 | 	T& operator[] (std::array<int, NDIMS> idx) const {
 35 | #ifdef _GLIBCXX_DEBUG
 36 | 		return data[flatten_index_checked(idx)];
 37 | #else
 38 | 		return data[flatten_index(idx)];
 39 | #endif
 40 | 	}
 41 | 	T& at(std::array<int, NDIMS> idx) const {
 42 | 		return data[flatten_index_checked(idx)];
 43 | 	}
 44 | 
 45 | 	template <int D = NDIMS>
 46 | 	typename std::enable_if<(0 < D), tensor_view<T, NDIMS-1>>::type operator[] (int idx) const {
 47 | 		std::array<int, NDIMS-1> nshape; std::copy(shape.begin()+1, shape.end(), nshape.begin());
 48 | 		std::array<int, NDIMS-1> nstrides; std::copy(strides.begin()+1, strides.end(), nstrides.begin());
 49 | 		T* ndata = data + (strides[0] * idx);
 50 | 		return tensor_view<T, NDIMS-1>(nshape, nstrides, ndata);
 51 | 	}
 52 | 	template <int D = NDIMS>
 53 | 	typename std::enable_if<(0 < D), tensor_view<T, NDIMS-1>>::type at(int idx) const {
 54 | 		assert(0 <= idx && idx < shape[0]);
 55 | 		return operator[](idx);
 56 | 	}
 57 | 
 58 | 	template <int D = NDIMS>
 59 | 	typename std::enable_if<(0 == D), T&>::type operator * () const {
 60 | 		return *data;
 61 | 	}
 62 | 
 63 | 	template <typename U, int D> friend struct tensor_view;
 64 | 	template <typename U, int D> friend struct tensor;
 65 | };
 66 | 
 67 | template <typename T, int NDIMS> struct tensor {
 68 | 	static_assert(NDIMS >= 0, "NDIMS must be nonnegative");
 69 | 
 70 | protected:
 71 | 	std::array<int, NDIMS> shape;
 72 | 	std::array<int, NDIMS> strides;
 73 | 	int len;
 74 | 	T* data;
 75 | 
 76 | public:
 77 | 	tensor() : shape{0}, strides{0}, len(0), data(nullptr) {}
 78 | 
 79 | 	explicit tensor(std::array<int, NDIMS> shape_, const T& t = T()) {
 80 | 		shape = shape_;
 81 | 		len = 1;
 82 | 		for (int i = NDIMS-1; i >= 0; i--) {
 83 | 			strides[i] = len;
 84 | 			len *= shape[i];
 85 | 		}
 86 | 		data = new T[len];
 87 | 		std::fill(data, data + len, t);
 88 | 	}
 89 | 
 90 | 	tensor(const tensor& o) : shape(o.shape), strides(o.strides), len(o.len), data(new T[len]) {
 91 | 		for (int i = 0; i < len; i++) {
 92 | 			data[i] = o.data[i];
 93 | 		}
 94 | 	}
 95 | 
 96 | 	tensor& operator=(tensor&& o) noexcept {
 97 | 		using std::swap;
 98 | 		swap(shape, o.shape);
 99 | 		swap(strides, o.strides);
100 | 		swap(len, o.len);
101 | 		swap(data, o.data);
102 | 		return *this;
103 | 	}
104 | 	tensor(tensor&& o) : tensor() {
105 | 		*this = std::move(o);
106 | 	}
107 | 	tensor& operator=(const tensor& o) {
108 | 		return *this = tensor(o);
109 | 	}
110 | 	~tensor() { delete[] data; }
111 | 
112 | 	using view_t = tensor_view<T, NDIMS>;
113 | 	view_t view() {
114 | 		return tensor_view<T, NDIMS>(shape, strides, data);
115 | 	}
116 | 	operator view_t() {
117 | 		return view();
118 | 	}
119 | 
120 | 	using const_view_t = tensor_view<const T, NDIMS>;
121 | 	const_view_t view() const {
122 | 		return tensor_view<const T, NDIMS>(shape, strides, data);
123 | 	}
124 | 	operator const_view_t() const {
125 | 		return view();
126 | 	}
127 | 
128 | 	T& operator[] (std::array<int, NDIMS> idx) { return view()[idx]; }
129 | 	T& at(std::array<int, NDIMS> idx) { return view().at(idx); }
130 | 	const T& operator[] (std::array<int, NDIMS> idx) const { return view()[idx]; }
131 | 	const T& at(std::array<int, NDIMS> idx) const { return view().at(idx); }
132 | 
133 | 	template <int D = NDIMS>
134 | 	typename std::enable_if<(0 < D), tensor_view<T, NDIMS-1>>::type operator[] (int idx) {
135 | 		return view()[idx];
136 | 	}
137 | 	template <int D = NDIMS>
138 | 	typename std::enable_if<(0 < D), tensor_view<T, NDIMS-1>>::type at(int idx) {
139 | 		return view().at(idx);
140 | 	}
141 | 
142 | 	template <int D = NDIMS>
143 | 	typename std::enable_if<(0 < D), tensor_view<const T, NDIMS-1>>::type operator[] (int idx) const {
144 | 		return view()[idx];
145 | 	}
146 | 	template <int D = NDIMS>
147 | 	typename std::enable_if<(0 < D), tensor_view<const T, NDIMS-1>>::type at(int idx) const {
148 | 		return view().at(idx);
149 | 	}
150 | 
151 | 	template <int D = NDIMS>
152 | 	typename std::enable_if<(0 == D), T&>::type operator * () {
153 | 		return *view();
154 | 	}
155 | 	template <int D = NDIMS>
156 | 	typename std::enable_if<(0 == D), const T&>::type operator * () const {
157 | 		return *view();
158 | 	}
159 | };
160 | 


--------------------------------------------------------------------------------
/src/tensor.test.cpp:
--------------------------------------------------------------------------------
 1 | #include <catch2/catch_test_macros.hpp>
 2 | 
 3 | #include "tensor.hpp"
 4 | 
 5 | #include <string>
 6 | 
 7 | TEST_CASE("Tensor", "[tensor]") {
 8 | 	using ten = tensor<std::string, 2>;
 9 | 	ten a({2, 3});
10 | 	a[{0,0}] = "0";
11 | 	a[{0,1}] = "1";
12 | 	a[{0,2}] = "2";
13 | 	a[{1,0}] = "3";
14 | 	a[{1,1}] = "4";
15 | 	a[{1,2}] = "5";
16 | 
17 | 	const ten const_a = a;
18 | 	ten b = a;
19 | 	REQUIRE(b[{0,0}] == "0");
20 | 	REQUIRE(b[{0,1}] == "1");
21 | 	REQUIRE(b[{0,2}] == "2");
22 | 	REQUIRE(b[{1,0}] == "3");
23 | 	REQUIRE(b[{1,1}] == "4");
24 | 	REQUIRE(b[{1,2}] == "5");
25 | 
26 | 	// Bounds checked
27 | 	REQUIRE(b.at({0,0}) == "0");
28 | 	REQUIRE(b.at({0,1}) == "1");
29 | 	REQUIRE(b.at({0,2}) == "2");
30 | 	REQUIRE(b.at({1,0}) == "3");
31 | 	REQUIRE(b.at({1,1}) == "4");
32 | 	REQUIRE(b.at({1,2}) == "5");
33 | 
34 | 	REQUIRE(*b[0][0] == "0");
35 | 	REQUIRE(*b[0][1] == "1");
36 | 	REQUIRE(*b[0][2] == "2");
37 | 	REQUIRE(*b[1][0] == "3");
38 | 	REQUIRE(*b[1][1] == "4");
39 | 	REQUIRE(*b[1][2] == "5");
40 | 
41 | 	REQUIRE(*const_a[0][0] == "0");
42 | 	REQUIRE(*const_a[0][1] == "1");
43 | 	REQUIRE(*const_a[0][2] == "2");
44 | 	REQUIRE(*const_a[1][0] == "3");
45 | 	REQUIRE(*const_a[1][1] == "4");
46 | 	REQUIRE(*const_a[1][2] == "5");
47 | }
48 | 


--------------------------------------------------------------------------------
/src/top_tree.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <utility>
  4 | #include <cassert>
  5 | #include <array>
  6 | 
  7 | /**
  8 |  * Top tree!
  9 |  *
 10 |  * Usage:
 11 |  *   Make a `struct T : public top_tree_node_base<T>` (CRTP), which implements
 12 |  *     void update()
 13 |  *     void downdate()
 14 |  *     void do_flip_path()
 15 |  *     void do_other_operation() ...
 16 |  *   When update() is called, you can assume downdate() has already been called.
 17 |  *
 18 |  *   In general, do_op() should eagerly apply the operation but not touch the
 19 |  *   children. In downdate(), you can push down to the children with ch->do_op().
 20 |  *   WARNING: if different operations do not trivially commute, you *must*
 21 |  *   implement a way to swap/alter them to compose in a consistent order, and you
 22 |  *   must use that order when implementing downdate(). This can be nontrivial!
 23 |  *
 24 |  *   Creating vertices:
 25 |  *     n->is_path = n->is_vert = true;
 26 |  *     n->update();
 27 |  *
 28 |  *   Creating edges: no setup/update() needed, just call
 29 |  *     link(e, va, vb);
 30 |  *
 31 |  *   Updates:
 32 |  *     auto cur = get_path(va, vb); // or get_subtree(va, vb)
 33 |  *     cur->do_stuff();
 34 |  *     cur->downdate();
 35 |  *     cur->update_all();
 36 |  *
 37 |  * Node types:
 38 |  *   path edges: compress(c[0], self, c[1])
 39 |  *     assert(is_path && !is_vert);
 40 |  *     assert(c[0] && c[1]);
 41 |  *     assert(c[0]->is_path && c[1]->is_path);
 42 |  *     assert(!c[2]);
 43 |  *   (path) vertices: self + rake(c[0], c[1])
 44 |  *     assert(is_path && is_vert);
 45 |  *     assert(!c[2]);
 46 |  *     if (c[0]) assert(!c[0]->is_path);
 47 |  *     if (c[1]) assert(!c[1]->is_path);
 48 |  *   non-path edges: rake(c[0], self + c[2], c[1])
 49 |  *     assert(!is_path && !is_vert);
 50 |  *     assert(c[2])
 51 |  *     assert(c[2]->is_path);
 52 |  *     if (c[0]) assert(!c[0]->is_path);
 53 |  *     if (c[1]) assert(!c[1]->is_path);
 54 |  */
 55 | 
 56 | template <typename top_tree_node> struct top_tree_node_base {
 57 | private:
 58 | 	top_tree_node* derived_this() {
 59 | 		return static_cast<top_tree_node*>(this);
 60 | 	}
 61 | 	const top_tree_node* derived_this() const {
 62 | 		return static_cast<const top_tree_node*>(this);
 63 | 	}
 64 | public:
 65 | 	mutable top_tree_node* p = nullptr;
 66 | 	std::array<top_tree_node*, 3> c{nullptr, nullptr, nullptr};
 67 | 
 68 | 	int d() const {
 69 | 		assert(p);
 70 | 		if (this == p->c[0]) {
 71 | 			return 0;
 72 | 		} else if (this == p->c[1]) {
 73 | 			return 1;
 74 | 		} else if (this == p->c[2]) {
 75 | 			return 2;
 76 | 		} else assert(false);
 77 | 	}
 78 | 	top_tree_node*& p_c() const { return p->c[d()]; } // p->c which points to you
 79 | 
 80 | 	// 3 types of verts: path edges, path verts, non-path edges
 81 | 	bool is_path;
 82 | 	bool is_vert;
 83 | 
 84 | 	bool r() const { return !p || p->is_path != is_path; }
 85 | 
 86 | private:
 87 | 	// Convenience wrappers for the derived functions.
 88 | 	void do_flip_path() {
 89 | 		derived_this()->do_flip_path();
 90 | 	}
 91 | 	void downdate() {
 92 | 		derived_this()->downdate();
 93 | 	}
 94 | 	void update() {
 95 | 		derived_this()->update();
 96 | 	}
 97 | 
 98 | public:
 99 | 	void downdate_all() {
100 | 		if (p) p->downdate_all();
101 | 		downdate();
102 | 	}
103 | 
104 | 	// Returns the root
105 | 	top_tree_node* update_all() {
106 | 		top_tree_node* cur = derived_this();
107 | 		cur->update();
108 | 		while (cur->p) {
109 | 			cur = cur->p;
110 | 			cur->update();
111 | 		}
112 | 		return cur;
113 | 	}
114 | 
115 | private:
116 | 
117 | 	void rot() {
118 | 		assert(!is_vert);
119 | 		assert(!r());
120 | 		top_tree_node* pa = p;
121 | 		int x = d(); assert(x == 0 || x == 1);
122 | 		top_tree_node* ch = c[!x];
123 | 
124 | 		if (pa->p) pa->p_c() = derived_this();
125 | 		this->p = pa->p;
126 | 
127 | 		pa->c[x] = ch;
128 | 		if (ch) ch->p = pa;
129 | 
130 | 		this->c[!x] = pa;
131 | 		pa->p = derived_this();
132 | 
133 | 		pa->update();
134 | 	}
135 | 
136 | 	void rot_2(int c_d) {
137 | 		assert(!is_vert);
138 | 		assert(!r());
139 | 		assert(c[c_d]);
140 | 		assert(!c[c_d]->is_vert);
141 | 
142 | 		if (d() == c_d) {
143 | 			rot();
144 | 			return;
145 | 		}
146 | 
147 | 		top_tree_node* pa = p;
148 | 		int x = d(); assert(x == 0 || x == 1);
149 | 		assert(c_d == !x);
150 | 		top_tree_node* ch = c[c_d]->c[!x];
151 | 
152 | 		if (pa->p) pa->p_c() = derived_this();
153 | 		this->p = pa->p;
154 | 
155 | 		pa->c[x] = ch;
156 | 		if (ch) ch->p = pa;
157 | 
158 | 		this->c[c_d]->c[!x] = pa;
159 | 		pa->p = this->c[c_d];
160 | 
161 | 		pa->update();
162 | 	}
163 | 
164 | 	void splay_dir(int x) {
165 | 		while (!r() && d() == x) {
166 | 			if (!p->r() && p->d() == x) {
167 | 				p->rot();
168 | 			}
169 | 			rot();
170 | 		}
171 | 	}
172 | 
173 | 	void splay_2(int c_d) {
174 | 		assert(!is_vert && is_path);
175 | 		assert(c[c_d] && !c[c_d]->is_vert);
176 | 		while (!r()) {
177 | 			if (!p->r()) {
178 | 				if (p->d() == d()) {
179 | 					p->rot();
180 | 				} else {
181 | 					rot_2(c_d);
182 | 				}
183 | 			}
184 | 			rot_2(c_d);
185 | 		}
186 | 	}
187 | 
188 | 	void splay_2() {
189 | 		assert(!is_vert && is_path);
190 | 		assert(!r());
191 | 		p->splay_2(d());
192 | 	}
193 | 
194 | 	void splay_vert() {
195 | 		assert(is_vert);
196 | 		if (r()) {
197 | 			return;
198 | 		}
199 | 		p->splay_dir(d());
200 | 		if (p->r()) {
201 | 			return;
202 | 		}
203 | 
204 | 		assert(p->d() != d());
205 | 		// we have a preference to be the left child
206 | 		if (d() == 1) {
207 | 			p->rot();
208 | 		}
209 | 		assert(d() == 0);
210 | 
211 | 		p->splay_2();
212 | 		assert(d() == 0);
213 | 		assert(p->d() == 1);
214 | 		assert(p->p->r());
215 | 	}
216 | 
217 | 	void splay() {
218 | 		assert(!is_vert);
219 | 		while (!r()) {
220 | 			if (!p->r()) {
221 | 				if (p->d() == d()) {
222 | 					p->rot();
223 | 				} else {
224 | 					rot();
225 | 				}
226 | 			}
227 | 			rot();
228 | 		}
229 | 	}
230 | 
231 | 	top_tree_node* cut_right() {
232 | 		assert(is_vert && is_path);
233 | 		splay_vert();
234 | 
235 | 		if (r() || d() == 1) {
236 | 			assert(r() || (d() == 1 && p->r()));
237 | 			assert(c[0] == nullptr);
238 | 			return nullptr;
239 | 		}
240 | 
241 | 		top_tree_node* pa = p;
242 | 		assert(pa->r() || (pa->d() == 1 && pa->p->r()));
243 | 		assert(!pa->is_vert);
244 | 		assert(pa->is_path);
245 | 		assert(pa->c[0] == this);
246 | 		assert(pa->c[2] == nullptr);
247 | 
248 | 		if (pa->p) pa->p_c() = derived_this();
249 | 		this->p = pa->p;
250 | 
251 | 		pa->is_path = false;
252 | 		pa->c[2] = pa->c[1]; // don't need to change the parent
253 | 
254 | 		pa->c[0] = c[0]; if (c[0]) c[0]->p = pa;
255 | 		pa->c[1] = c[1]; if (c[1]) c[1]->p = pa;
256 | 
257 | 		c[0] = nullptr;
258 | 		c[1] = pa; pa->p = derived_this();
259 | 		assert(c[2] == nullptr);
260 | 
261 | 		assert(c[0] == nullptr);
262 | 
263 | 		pa->update();
264 | 		return pa;
265 | 	}
266 | 
267 | 	top_tree_node* splice_non_path() {
268 | 		assert(!is_path);
269 | 		assert(!is_vert);
270 | 
271 | 		splay();
272 | 		assert(p && p->is_vert && p->is_path);
273 | 		p->cut_right();
274 | 
275 | 		if (!p->is_path) rot();
276 | 		assert(p && p->is_vert && p->is_path);
277 | 		assert(p->r() || (p->d() == 1 && p->p->r()));
278 | 		assert(p->c[d()] == this && p->c[!d()] == nullptr);
279 | 
280 | 		top_tree_node* pa = p;
281 | 
282 | 		if (pa->p) pa->p_c() = derived_this();
283 | 		this->p = pa->p;
284 | 
285 | 		pa->c[0] = c[0]; if (c[0]) c[0]->p = pa;
286 | 		pa->c[1] = c[1]; if (c[1]) c[1]->p = pa;
287 | 
288 | 		assert(c[2] && c[2]->is_path);
289 | 		c[1] = c[2]; // don't need to change parent
290 | 		c[0] = pa; pa->p = derived_this();
291 | 		c[2] = nullptr;
292 | 
293 | 		is_path = true;
294 | 
295 | 		pa->update();
296 | 		return pa;
297 | 	}
298 | 
299 | 	// Return the topmost vertex which was spliced into
300 | 	top_tree_node* splice_all() {
301 | 		top_tree_node* res = nullptr;
302 | 		for (top_tree_node* cur = derived_this(); cur; cur = cur->p) {
303 | 			if (!cur->is_path) {
304 | 				res = cur->splice_non_path();
305 | 			}
306 | 			assert(cur->is_path);
307 | 		}
308 | 		return res;
309 | 	}
310 | 
311 | public:
312 | 	// Return the topmost vertex which was spliced into
313 | 	top_tree_node* expose() {
314 | 		assert(is_vert);
315 | 		downdate_all();
316 | 
317 | 		top_tree_node* res = splice_all();
318 | 
319 | 		cut_right();
320 | 
321 | 		update_all();
322 | 
323 | 		return res;
324 | 	}
325 | 
326 | 	// Return the topmost vertex which was spliced into
327 | 	top_tree_node* expose_edge() {
328 | 		assert(!is_vert);
329 | 		downdate_all();
330 | 
331 | 		top_tree_node* v = is_path ? c[1] : c[2];
332 | 		v->downdate();
333 | 
334 | 		while (!v->is_vert) {
335 | 			v = v->c[0];
336 | 			v->downdate();
337 | 		}
338 | 
339 | 		top_tree_node* res = v->splice_all();
340 | 		v->cut_right();
341 | 		v->update_all();
342 | 
343 | 		assert(!p);
344 | 		assert(v == c[1]);
345 | 
346 | 		return res;
347 | 	}
348 | 
349 | 	// Return the new root
350 | 	top_tree_node* meld_path_end() {
351 | 		assert(!p);
352 | 		top_tree_node* rt = derived_this();
353 | 		while (true) {
354 | 			rt->downdate();
355 | 			if (rt->is_vert) break;
356 | 			rt = rt->c[1];
357 | 		}
358 | 		assert(rt->is_vert);
359 | 		rt->splay_vert();
360 | 		if (rt->c[0] && rt->c[1]) {
361 | 			top_tree_node* ch = rt->c[1];
362 | 			while (true) {
363 | 				ch->downdate();
364 | 				if (!ch->c[0]) break;
365 | 				ch = ch->c[0];
366 | 			}
367 | 			ch->splay();
368 | 			assert(ch->c[0] == nullptr);
369 | 
370 | 			ch->c[0] = rt->c[0];
371 | 			ch->c[0]->p = ch;
372 | 
373 | 			rt->c[0] = nullptr;
374 | 
375 | 			ch->update();
376 | 		} else if (rt->c[0]) {
377 | 			rt->c[1] = rt->c[0];
378 | 			rt->c[0] = nullptr;
379 | 		}
380 | 		assert(rt->c[0] == nullptr);
381 | 		return rt->update_all();
382 | 	}
383 | 
384 | 	void make_root() {
385 | 		expose();
386 | 
387 | 		top_tree_node* rt = derived_this();
388 | 		while (rt->p) {
389 | 			assert(rt->d() == 1);
390 | 			rt = rt->p;
391 | 		}
392 | 		rt->do_flip_path();
393 | 		rt->meld_path_end();
394 | 
395 | 		expose();
396 | 
397 | 		assert(!p);
398 | 	}
399 | 
400 | 	// Link v2 as a child of v1 with edge e
401 | 	friend void link(top_tree_node* e, top_tree_node* v1, top_tree_node* v2) {
402 | 		assert(e && v1 && v2);
403 | 		assert(!e->c[0] && !e->c[1] && !e->c[2]);
404 | 		v1->expose(); while (v1->p) v1 = v1->p;
405 | 		v2->make_root();
406 | 
407 | 		assert(!v1->p);
408 | 		assert(!v2->p);
409 | 
410 | 		e->is_path = true, e->is_vert = false;
411 | 		e->c[0] = v1;
412 | 		v1->p = e;
413 | 		e->c[1] = v2;
414 | 		v2->p = e;
415 | 		e->update();
416 | 	}
417 | 
418 | 	// Link v2's root as a child of v1 with edge e
419 | 	// Returns false if they're already in the same subtree
420 | 	friend bool link_root(top_tree_node* e, top_tree_node* v1, top_tree_node* v2) {
421 | 		assert(e && v1 && v2);
422 | 		assert(!e->c[0] && !e->c[1] && !e->c[2]);
423 | 		v1->expose();
424 | 		v2->expose();
425 | 
426 | 		while (v1->p) v1 = v1->p;
427 | 		while (v2->p) v2 = v2->p;
428 | 		if (v1 == v2) return false;
429 | 
430 | 		assert(!v1->p);
431 | 		assert(!v2->p);
432 | 
433 | 		e->is_path = true, e->is_vert = false;
434 | 		e->c[0] = v1;
435 | 		v1->p = e;
436 | 		e->c[1] = v2;
437 | 		v2->p = e;
438 | 		e->update();
439 | 
440 | 		return true;
441 | 	}
442 | 
443 | 	// Link v2 as a child of v1 with edge e, v2 must be the root
444 | 	friend void link_direct(top_tree_node* e, top_tree_node* v1, top_tree_node* v2) {
445 | 		assert(e && v1 && v2);
446 | 		assert(!e->c[0] && !e->c[1] && !e->c[2]);
447 | 		v1->expose();
448 | 		v2->expose();
449 | 
450 | 		while (v1->p) v1 = v1->p;
451 | 		assert(!v2->p);
452 | 
453 | 		assert(v1 != v2);
454 | 
455 | 		assert(!v1->p);
456 | 		assert(!v2->p);
457 | 
458 | 		e->is_path = true, e->is_vert = false;
459 | 		e->c[0] = v1;
460 | 		v1->p = e;
461 | 		e->c[1] = v2;
462 | 		v2->p = e;
463 | 		e->update();
464 | 	}
465 | 
466 | 	// Cuts the edge e
467 | 	// Returns the top-tree-root of the two halves; they are not necessarily the split vertices.
468 | 	friend std::pair<top_tree_node*, top_tree_node*> cut(top_tree_node* e) {
469 | 		assert(!e->is_vert);
470 | 		e->expose_edge();
471 | 
472 | 		assert(!e->p);
473 | 		assert(e->is_path);
474 | 
475 | 		top_tree_node* l = e->c[0];
476 | 		top_tree_node* r = e->c[1];
477 | 		assert(l && r);
478 | 
479 | 		e->c[0] = e->c[1] = nullptr;
480 | 		l->p = r->p = nullptr;
481 | 
482 | 		assert(e->c[2] == nullptr);
483 | 
484 | 		l = l->meld_path_end();
485 | 
486 | 		return {l, r};
487 | 	}
488 | 
489 | 	friend top_tree_node* get_path(top_tree_node* a, top_tree_node* b) {
490 | 		assert(a->is_vert && b->is_vert);
491 | 		a->make_root();
492 | 		b->expose();
493 | 		if (a == b) {
494 | 			assert(!b->p);
495 | 			return b;
496 | 		}
497 | 		assert(!b->p->p);
498 | 		return b->p;
499 | 	}
500 | 
501 | 	friend top_tree_node* get_subtree(top_tree_node* rt, top_tree_node* n) {
502 | 		rt->make_root();
503 | 		n->expose();
504 | 		return n;
505 | 	}
506 | 
507 | 	friend top_tree_node* get_path_to_root(top_tree_node* b) {
508 | 		assert(b->is_vert);
509 | 		b->expose();
510 | 		if (!b->p) return b;
511 | 		assert(!b->p->p);
512 | 		return b->p;
513 | 	}
514 | 
515 | 	friend top_tree_node* get_subtree_from_root(top_tree_node* n) {
516 | 		n->expose();
517 | 		return n;
518 | 	}
519 | 
520 | };
521 | 
522 | struct sample_top_tree_node : public top_tree_node_base<sample_top_tree_node> {
523 | 	bool lazy_flip_path = false;
524 | 
525 | 	void do_flip_path() {
526 | 		assert(is_path);
527 | 		std::swap(c[0], c[1]);
528 | 		lazy_flip_path ^= 1;
529 | 	}
530 | 
531 | 	void downdate() {
532 | 		if (lazy_flip_path) {
533 | 			assert(is_path);
534 | 			if (!is_vert) {
535 | 				c[0]->do_flip_path();
536 | 				c[1]->do_flip_path();
537 | 			}
538 | 			lazy_flip_path = false;
539 | 		}
540 | 	}
541 | 
542 | 	// NOTE: You may assume downdate() has been called on the current node, but
543 | 	// it may not have been called on the children! In particular, be careful
544 | 	// when accessing grandchildren information.
545 | 	void update() {
546 | 		if (is_vert) {
547 | 		} else if (is_path) {
548 | 		} else {
549 | 		}
550 | 	}
551 | };
552 | 


--------------------------------------------------------------------------------
/src/yc.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <functional>
 4 | #include <utility>
 5 | 
 6 | namespace std {
 7 | 
 8 | template<class Fun>
 9 | class y_combinator_result {
10 | 	Fun fun_;
11 | public:
12 | 	template<class T>
13 | 	explicit y_combinator_result(T &&fun): fun_(std::forward<T>(fun)) {}
14 | 
15 | 	template<class ...Args>
16 | 	decltype(auto) operator()(Args &&...args) {
17 | 		return fun_(std::ref(*this), std::forward<Args>(args)...);
18 | 	}
19 | };
20 | 
21 | template<class Fun>
22 | decltype(auto) y_combinator(Fun &&fun) {
23 | 	return y_combinator_result<std::decay_t<Fun>>(std::forward<Fun>(fun));
24 | }
25 | 
26 | } // namespace std
27 | 


--------------------------------------------------------------------------------
/third_party/sais-lite-2.4.1/COPYING:
--------------------------------------------------------------------------------
 1 | The sais-lite copyright is as follows:
 2 | 
 3 | Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person
 6 | obtaining a copy of this software and associated documentation
 7 | files (the "Software"), to deal in the Software without
 8 | restriction, including without limitation the rights to use,
 9 | copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the
11 | Software is furnished to do so, subject to the following
12 | conditions:
13 | 
14 | The above copyright notice and this permission notice shall be
15 | included in all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 | OTHER DEALINGS IN THE SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/third_party/sais-lite-2.4.1/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for suftest and test
 2 | 
 3 | # options
 4 | CC						= gcc
 5 | #CXX						= g++
 6 | #OUTPUT_OPTION	= -o $@
 7 | CFLAGS				= -O3 -fomit-frame-pointer
 8 | #CXXFLAGS			= -O3 -fomit-frame-pointer
 9 | CPPFLAGS			= -Wall -DNDEBUG
10 | LDFLAGS				= 
11 | LDLIBS				= 
12 | #TARGET_ARCH		=
13 | 
14 | # targets
15 | .PHONY: all
16 | all: suftest
17 | suftest: sais.o suftest.o
18 | test:
19 | 	$(CC) -O -g -Wall test.c sais.c -o test
20 | 	./test
21 | 	$(RM) test test.exe
22 | 
23 | distclean: clean
24 | clean:
25 | 	$(RM) suftest suftest.exe test test.exe sais.o suftest.o
26 | 
27 | # dependencies
28 | sais.o suftest.o: sais.h Makefile
29 | 


--------------------------------------------------------------------------------
/third_party/sais-lite-2.4.1/README:
--------------------------------------------------------------------------------
 1 | 
 2 | sais-lite-2.4.0
 3 | ----------------------
 4 | 
 5 | This archive contains the source code of the implementation of
 6 | the IS based linear suffix array construction algorithm
 7 | described in the paper:
 8 | 
 9 | Ge Nong, Sen Zhang and Wai Hong Chan
10 | Two Efficient Algorithms for Linear Suffix Array Construction
11 | 2008?
12 | http://www.cs.sysu.edu.cn/nong/index.files/Two%20Efficient%20Algorithms%20for%20Linear%20Suffix%20Array%20Construction.pdf
13 | 
14 | 
15 | Yuta Mori <yuta.256@gmail.com>
16 | 


--------------------------------------------------------------------------------
/third_party/sais-lite-2.4.1/is_orig.c:
--------------------------------------------------------------------------------
  1 | unsigned char mask[]={0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01};
  2 | #define tget(i) ( (t[(i)/8]&mask[(i)%8]) ? 1 : 0 )
  3 | #define tset(i, b) t[(i)/8]=(b) ? (mask[(i)%8]|t[(i)/8]) : ((~mask[(i)%8])&t[(i)/8])
  4 | #define chr(i) (cs==sizeof(int)?((int*)s)[i]:((unsigned char *)s)[i])
  5 | #define isLMS(i) (i>0 && tget(i) && !tget(i-1))
  6 | 
  7 | // find the start or end of each bucket
  8 | void getBuckets(unsigned char *s, int *bkt, int n, int K, int cs, bool end) {
  9 |   int i, sum=0;
 10 |   for(i=0; i<=K; i++) bkt[i]=0; // clear all buckets
 11 |   for(i=0; i<n; i++) bkt[chr(i)]++; // compute the size of each bucket
 12 |   for(i=0; i<=K; i++) { sum+=bkt[i]; bkt[i]=end ? sum : sum-bkt[i]; }
 13 | }
 14 | 
 15 | // compute SAl
 16 | void induceSAl(unsigned char *t, int *SA, unsigned char *s, int *bkt,
 17 |                 int n, int K, int cs, bool end) {
 18 |   int i, j;
 19 |   getBuckets(s, bkt, n, K, cs, end); // find starts of buckets
 20 |   for(i=0; i<n; i++) {
 21 |       j=SA[i]-1;
 22 |       if(j>=0 && !tget(j)) SA[bkt[chr(j)]++]=j;
 23 |   }
 24 | }
 25 | 
 26 | // compute SAs
 27 | void induceSAs(unsigned char *t, int *SA, unsigned char *s, int *bkt,
 28 |                 int n, int K, int cs, bool end) {
 29 |   int i, j;
 30 |   getBuckets(s, bkt, n, K, cs, end); // find ends of buckets
 31 |   for(i=n-1; i>=0; i--) {
 32 |       j=SA[i]-1;
 33 |       if(j>=0 && tget(j)) SA[--bkt[chr(j)]]=j;
 34 |   }
 35 | }
 36 | 
 37 | // find the suffix array SA of s[0..n-1] in {1..K}^n
 38 | // require s[n-1]=0 (the sentinel!), n>=2
 39 | // use a working space (excluding s and SA) of at most 2.25n+O(1) for a constant alphabet
 40 | void SA_IS(unsigned char *s, int *SA, int n, int K, int cs) {
 41 |   int i, j;
 42 |   unsigned char *t=(unsigned char *)malloc(n/8+1); // LS-type array in bits
 43 | 
 44 |   // Classify the type of each character
 45 |   tset(n-2, 0); tset(n-1, 1); // the sentinel must be in s1, important!!!
 46 |   for(i=n-3; i>=0; i--)
 47 |     tset(i, (chr(i)<chr(i+1) || (chr(i)==chr(i+1) && tget(i+1)==1))?1:0);
 48 | 
 49 |   // stage 1: reduce the problem by at least 1/2
 50 |   // sort all the S-substrings
 51 |   int *bkt = (int *)malloc(sizeof(int)*(K+1)); // bucket array
 52 |   getBuckets(s, bkt, n, K, cs, true); // find ends of buckets
 53 |   for(i=0; i<n; i++) SA[i]=-1;
 54 |   for(i=1; i<n; i++)
 55 |     if(isLMS(i)) SA[--bkt[chr(i)]]=i;
 56 | 
 57 |   induceSAl(t, SA, s, bkt, n, K, cs, false);
 58 |   induceSAs(t, SA, s, bkt, n, K, cs, true);
 59 |   free(bkt);
 60 | 
 61 |   // compact all the sorted substrings into the first n1 items of SA
 62 |   // 2*n1 must be not larger than n (proveable)
 63 |   int n1=0;
 64 |   for(i=0; i<n; i++)
 65 |     if(isLMS(SA[i])) SA[n1++]=SA[i];
 66 | 
 67 |   // find the lexicographic names of all substrings
 68 |   for(i=n1; i<n; i++) SA[i]=-1; // init the name array buffer
 69 |   int name=0, prev=-1;
 70 |   for(i=0; i<n1; i++) {
 71 |     int pos=SA[i]; bool diff=false;
 72 |     for(int d=0; d<n; d++)
 73 |       if(prev==-1 || chr(pos+d)!=chr(prev+d) || tget(pos+d)!=tget(prev+d))
 74 |       { diff=true; break; }
 75 |       else if(d>0 && (isLMS(pos+d) || isLMS(prev+d))) break;
 76 |     if(diff) { name++; prev=pos; }
 77 |     pos=(pos%2==0)?pos/2:(pos-1)/2;
 78 |     SA[n1+pos]=name-1;
 79 |   }
 80 |   for(i=n-1, j=n-1; i>=n1; i--)
 81 |       if(SA[i]>=0) SA[j--]=SA[i];
 82 | 
 83 |   // stage 2: solve the reduced problem
 84 |   // recurse if names are not yet unique
 85 |   int *SA1=SA, *s1=SA+n-n1;
 86 |   if(name<n1)
 87 |     SA_IS((unsigned char*)s1, SA1, n1, name-1, sizeof(int));
 88 |   else // generate the suffix array of s1 directly
 89 |     for(i=0; i<n1; i++) SA1[s1[i]] = i;
 90 | 
 91 |   // stage 3: induce the result for the original problem
 92 |   bkt = (int *)malloc(sizeof(int)*(K+1)); // bucket array
 93 |   // put all left-most S characters into their buckets
 94 |   getBuckets(s, bkt, n, K, cs, true); // find ends of buckets
 95 |   for(i=1, j=0; i<n; i++)
 96 |     if(isLMS(i)) s1[j++]=i; // get p1
 97 |   for(i=0; i<n1; i++) SA1[i]=s1[SA1[i]]; // get index in s
 98 |   for(i=n1; i<n; i++) SA[i]=-1; // init SA[n1..n-1]
 99 |   for(i=n1-1; i>=0; i--) {
100 |       j=SA[i]; SA[i]=-1;
101 |       SA[--bkt[chr(j)]]=j;
102 |   }
103 |   induceSAl(t, SA, s, bkt, n, K, cs, false);
104 |   induceSAs(t, SA, s, bkt, n, K, cs, true);
105 |   free(bkt); free(t);
106 | }
107 | 


--------------------------------------------------------------------------------
/third_party/sais-lite-2.4.1/sais.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * sais.c for sais-lite
  3 |  * Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person
  6 |  * obtaining a copy of this software and associated documentation
  7 |  * files (the "Software"), to deal in the Software without
  8 |  * restriction, including without limitation the rights to use,
  9 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the
 11 |  * Software is furnished to do so, subject to the following
 12 |  * conditions:
 13 |  *
 14 |  * The above copyright notice and this permission notice shall be
 15 |  * included in all copies or substantial portions of the Software.
 16 |  *
 17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 18 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 19 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 20 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 21 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 22 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 23 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 24 |  * OTHER DEALINGS IN THE SOFTWARE.
 25 |  */
 26 | 
 27 | #include <assert.h>
 28 | #include <stdlib.h>
 29 | #include "sais.h"
 30 | 
 31 | #ifndef UCHAR_SIZE
 32 | # define UCHAR_SIZE 256
 33 | #endif
 34 | #ifndef MINBUCKETSIZE
 35 | # define MINBUCKETSIZE 256
 36 | #endif
 37 | 
 38 | #define sais_index_type int
 39 | #define sais_bool_type  int
 40 | #define SAIS_LMSSORT2_LIMIT 0x3fffffff
 41 | 
 42 | #define SAIS_MYMALLOC(_num, _type) ((_type *)malloc((_num) * sizeof(_type)))
 43 | #define SAIS_MYFREE(_ptr, _num, _type) free((_ptr))
 44 | #define chr(_a) (cs == sizeof(sais_index_type) ? ((sais_index_type *)T)[(_a)] : ((unsigned char *)T)[(_a)])
 45 | 
 46 | /* find the start or end of each bucket */
 47 | static
 48 | void
 49 | getCounts(const void *T, sais_index_type *C, sais_index_type n, sais_index_type k, int cs) {
 50 |   sais_index_type i;
 51 |   for(i = 0; i < k; ++i) { C[i] = 0; }
 52 |   for(i = 0; i < n; ++i) { ++C[chr(i)]; }
 53 | }
 54 | static
 55 | void
 56 | getBuckets(const sais_index_type *C, sais_index_type *B, sais_index_type k, sais_bool_type end) {
 57 |   sais_index_type i, sum = 0;
 58 |   if(end) { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum; } }
 59 |   else { for(i = 0; i < k; ++i) { sum += C[i]; B[i] = sum - C[i]; } }
 60 | }
 61 | 
 62 | /* sort all type LMS suffixes */
 63 | static
 64 | void
 65 | LMSsort1(const void *T, sais_index_type *SA,
 66 |          sais_index_type *C, sais_index_type *B,
 67 |          sais_index_type n, sais_index_type k, int cs) {
 68 |   sais_index_type *b, i, j;
 69 |   sais_index_type c0, c1;
 70 | 
 71 |   /* compute SAl */
 72 |   if(C == B) { getCounts(T, C, n, k, cs); }
 73 |   getBuckets(C, B, k, 0); /* find starts of buckets */
 74 |   j = n - 1;
 75 |   b = SA + B[c1 = chr(j)];
 76 |   --j;
 77 |   *b++ = (chr(j) < c1) ? ~j : j;
 78 |   for(i = 0; i < n; ++i) {
 79 |     if(0 < (j = SA[i])) {
 80 |       assert(chr(j) >= chr(j + 1));
 81 |       if((c0 = chr(j)) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
 82 |       assert(i < (b - SA));
 83 |       --j;
 84 |       *b++ = (chr(j) < c1) ? ~j : j;
 85 |       SA[i] = 0;
 86 |     } else if(j < 0) {
 87 |       SA[i] = ~j;
 88 |     }
 89 |   }
 90 |   /* compute SAs */
 91 |   if(C == B) { getCounts(T, C, n, k, cs); }
 92 |   getBuckets(C, B, k, 1); /* find ends of buckets */
 93 |   for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
 94 |     if(0 < (j = SA[i])) {
 95 |       assert(chr(j) <= chr(j + 1));
 96 |       if((c0 = chr(j)) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
 97 |       assert((b - SA) <= i);
 98 |       --j;
 99 |       *--b = (chr(j) > c1) ? ~(j + 1) : j;
100 |       SA[i] = 0;
101 |     }
102 |   }
103 | }
104 | static
105 | sais_index_type
106 | LMSpostproc1(const void *T, sais_index_type *SA,
107 |              sais_index_type n, sais_index_type m, int cs) {
108 |   sais_index_type i, j, p, q, plen, qlen, name;
109 |   sais_index_type c0, c1;
110 |   sais_bool_type diff;
111 | 
112 |   /* compact all the sorted substrings into the first m items of SA
113 |       2*m must be not larger than n (proveable) */
114 |   assert(0 < n);
115 |   for(i = 0; (p = SA[i]) < 0; ++i) { SA[i] = ~p; assert((i + 1) < n); }
116 |   if(i < m) {
117 |     for(j = i, ++i;; ++i) {
118 |       assert(i < n);
119 |       if((p = SA[i]) < 0) {
120 |         SA[j++] = ~p; SA[i] = 0;
121 |         if(j == m) { break; }
122 |       }
123 |     }
124 |   }
125 | 
126 |   /* store the length of all substrings */
127 |   i = n - 1; j = n - 1; c0 = chr(n - 1);
128 |   do { c1 = c0; } while((0 <= --i) && ((c0 = chr(i)) >= c1));
129 |   for(; 0 <= i;) {
130 |     do { c1 = c0; } while((0 <= --i) && ((c0 = chr(i)) <= c1));
131 |     if(0 <= i) {
132 |       SA[m + ((i + 1) >> 1)] = j - i; j = i + 1;
133 |       do { c1 = c0; } while((0 <= --i) && ((c0 = chr(i)) >= c1));
134 |     }
135 |   }
136 | 
137 |   /* find the lexicographic names of all substrings */
138 |   for(i = 0, name = 0, q = n, qlen = 0; i < m; ++i) {
139 |     p = SA[i], plen = SA[m + (p >> 1)], diff = 1;
140 |     if((plen == qlen) && ((q + plen) < n)) {
141 |       for(j = 0; (j < plen) && (chr(p + j) == chr(q + j)); ++j) { }
142 |       if(j == plen) { diff = 0; }
143 |     }
144 |     if(diff != 0) { ++name, q = p, qlen = plen; }
145 |     SA[m + (p >> 1)] = name;
146 |   }
147 | 
148 |   return name;
149 | }
150 | static
151 | void
152 | LMSsort2(const void *T, sais_index_type *SA,
153 |          sais_index_type *C, sais_index_type *B, sais_index_type *D,
154 |          sais_index_type n, sais_index_type k, int cs) {
155 |   sais_index_type *b, i, j, t, d;
156 |   sais_index_type c0, c1;
157 |   assert(C != B);
158 | 
159 |   /* compute SAl */
160 |   getBuckets(C, B, k, 0); /* find starts of buckets */
161 |   j = n - 1;
162 |   b = SA + B[c1 = chr(j)];
163 |   --j;
164 |   t = (chr(j) < c1);
165 |   j += n;
166 |   *b++ = (t & 1) ? ~j : j;
167 |   for(i = 0, d = 0; i < n; ++i) {
168 |     if(0 < (j = SA[i])) {
169 |       if(n <= j) { d += 1; j -= n; }
170 |       assert(chr(j) >= chr(j + 1));
171 |       if((c0 = chr(j)) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
172 |       assert(i < (b - SA));
173 |       --j;
174 |       t = c0; t = (t << 1) | (chr(j) < c1);
175 |       if(D[t] != d) { j += n; D[t] = d; }
176 |       *b++ = (t & 1) ? ~j : j;
177 |       SA[i] = 0;
178 |     } else if(j < 0) {
179 |       SA[i] = ~j;
180 |     }
181 |   }
182 |   for(i = n - 1; 0 <= i; --i) {
183 |     if(0 < SA[i]) {
184 |       if(SA[i] < n) {
185 |         SA[i] += n;
186 |         for(j = i - 1; SA[j] < n; --j) { }
187 |         SA[j] -= n;
188 |         i = j;
189 |       }
190 |     }
191 |   }
192 | 
193 |   /* compute SAs */
194 |   getBuckets(C, B, k, 1); /* find ends of buckets */
195 |   for(i = n - 1, d += 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
196 |     if(0 < (j = SA[i])) {
197 |       if(n <= j) { d += 1; j -= n; }
198 |       assert(chr(j) <= chr(j + 1));
199 |       if((c0 = chr(j)) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
200 |       assert((b - SA) <= i);
201 |       --j;
202 |       t = c0; t = (t << 1) | (chr(j) > c1);
203 |       if(D[t] != d) { j += n; D[t] = d; }
204 |       *--b = (t & 1) ? ~(j + 1) : j;
205 |       SA[i] = 0;
206 |     }
207 |   }
208 | }
209 | static
210 | sais_index_type
211 | LMSpostproc2(sais_index_type *SA, sais_index_type n, sais_index_type m) {
212 |   sais_index_type i, j, d, name;
213 | 
214 |   /* compact all the sorted LMS substrings into the first m items of SA */
215 |   assert(0 < n);
216 |   for(i = 0, name = 0; (j = SA[i]) < 0; ++i) {
217 |     j = ~j;
218 |     if(n <= j) { name += 1; }
219 |     SA[i] = j;
220 |     assert((i + 1) < n);
221 |   }
222 |   if(i < m) {
223 |     for(d = i, ++i;; ++i) {
224 |       assert(i < n);
225 |       if((j = SA[i]) < 0) {
226 |         j = ~j;
227 |         if(n <= j) { name += 1; }
228 |         SA[d++] = j; SA[i] = 0;
229 |         if(d == m) { break; }
230 |       }
231 |     }
232 |   }
233 |   if(name < m) {
234 |     /* store the lexicographic names */
235 |     for(i = m - 1, d = name + 1; 0 <= i; --i) {
236 |       if(n <= (j = SA[i])) { j -= n; --d; }
237 |       SA[m + (j >> 1)] = d;
238 |     }
239 |   } else {
240 |     /* unset flags */
241 |     for(i = 0; i < m; ++i) {
242 |       if(n <= (j = SA[i])) { j -= n; SA[i] = j; }
243 |     }
244 |   }
245 | 
246 |   return name;
247 | }
248 | 
249 | /* compute SA and BWT */
250 | static
251 | void
252 | induceSA(const void *T, sais_index_type *SA,
253 |          sais_index_type *C, sais_index_type *B,
254 |          sais_index_type n, sais_index_type k, int cs) {
255 |   sais_index_type *b, i, j;
256 |   sais_index_type c0, c1;
257 |   /* compute SAl */
258 |   if(C == B) { getCounts(T, C, n, k, cs); }
259 |   getBuckets(C, B, k, 0); /* find starts of buckets */
260 |   j = n - 1;
261 |   b = SA + B[c1 = chr(j)];
262 |   *b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j;
263 |   for(i = 0; i < n; ++i) {
264 |     j = SA[i], SA[i] = ~j;
265 |     if(0 < j) {
266 |       --j;
267 |       assert(chr(j) >= chr(j + 1));
268 |       if((c0 = chr(j)) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
269 |       assert(i < (b - SA));
270 |       *b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j;
271 |     }
272 |   }
273 |   /* compute SAs */
274 |   if(C == B) { getCounts(T, C, n, k, cs); }
275 |   getBuckets(C, B, k, 1); /* find ends of buckets */
276 |   for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
277 |     if(0 < (j = SA[i])) {
278 |       --j;
279 |       assert(chr(j) <= chr(j + 1));
280 |       if((c0 = chr(j)) != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
281 |       assert((b - SA) <= i);
282 |       *--b = ((j == 0) || (chr(j - 1) > c1)) ? ~j : j;
283 |     } else {
284 |       SA[i] = ~j;
285 |     }
286 |   }
287 | }
288 | static
289 | sais_index_type
290 | computeBWT(const void *T, sais_index_type *SA,
291 |            sais_index_type *C, sais_index_type *B,
292 |            sais_index_type n, sais_index_type k, int cs) {
293 |   sais_index_type *b, i, j, pidx = -1;
294 |   sais_index_type c0, c1;
295 |   /* compute SAl */
296 |   if(C == B) { getCounts(T, C, n, k, cs); }
297 |   getBuckets(C, B, k, 0); /* find starts of buckets */
298 |   j = n - 1;
299 |   b = SA + B[c1 = chr(j)];
300 |   *b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j;
301 |   for(i = 0; i < n; ++i) {
302 |     if(0 < (j = SA[i])) {
303 |       --j;
304 |       assert(chr(j) >= chr(j + 1));
305 |       SA[i] = ~((sais_index_type)(c0 = chr(j)));
306 |       if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
307 |       assert(i < (b - SA));
308 |       *b++ = ((0 < j) && (chr(j - 1) < c1)) ? ~j : j;
309 |     } else if(j != 0) {
310 |       SA[i] = ~j;
311 |     }
312 |   }
313 |   /* compute SAs */
314 |   if(C == B) { getCounts(T, C, n, k, cs); }
315 |   getBuckets(C, B, k, 1); /* find ends of buckets */
316 |   for(i = n - 1, b = SA + B[c1 = 0]; 0 <= i; --i) {
317 |     if(0 < (j = SA[i])) {
318 |       --j;
319 |       assert(chr(j) <= chr(j + 1));
320 |       SA[i] = (c0 = chr(j));
321 |       if(c0 != c1) { B[c1] = b - SA; b = SA + B[c1 = c0]; }
322 |       assert((b - SA) <= i);
323 |       *--b = ((0 < j) && (chr(j - 1) > c1)) ? ~((sais_index_type)chr(j - 1)) : j;
324 |     } else if(j != 0) {
325 |       SA[i] = ~j;
326 |     } else {
327 |       pidx = i;
328 |     }
329 |   }
330 |   return pidx;
331 | }
332 | 
333 | /* find the suffix array SA of T[0..n-1] in {0..255}^n */
334 | static
335 | sais_index_type
336 | sais_main(const void *T, sais_index_type *SA,
337 |           sais_index_type fs, sais_index_type n, sais_index_type k, int cs,
338 |           sais_bool_type isbwt) {
339 |   sais_index_type *C, *B, *D, *RA, *b;
340 |   sais_index_type i, j, m, p, q, t, name, pidx = 0, newfs;
341 |   sais_index_type c0, c1;
342 |   unsigned int flags;
343 | 
344 |   assert((T != NULL) && (SA != NULL));
345 |   assert((0 <= fs) && (0 < n) && (1 <= k));
346 | 
347 |   if(k <= MINBUCKETSIZE) {
348 |     if((C = SAIS_MYMALLOC(k, sais_index_type)) == NULL) { return -2; }
349 |     if(k <= fs) {
350 |       B = SA + (n + fs - k);
351 |       flags = 1;
352 |     } else {
353 |       if((B = SAIS_MYMALLOC(k, sais_index_type)) == NULL) { SAIS_MYFREE(C, k, sais_index_type); return -2; }
354 |       flags = 3;
355 |     }
356 |   } else if(k <= fs) {
357 |     C = SA + (n + fs - k);
358 |     if(k <= (fs - k)) {
359 |       B = C - k;
360 |       flags = 0;
361 |     } else if(k <= (MINBUCKETSIZE * 4)) {
362 |       if((B = SAIS_MYMALLOC(k, sais_index_type)) == NULL) { return -2; }
363 |       flags = 2;
364 |     } else {
365 |       B = C;
366 |       flags = 8;
367 |     }
368 |   } else {
369 |     if((C = B = SAIS_MYMALLOC(k, sais_index_type)) == NULL) { return -2; }
370 |     flags = 4 | 8;
371 |   }
372 |   if((n <= SAIS_LMSSORT2_LIMIT) && (2 <= (n / k))) {
373 |     if(flags & 1) { flags |= ((k * 2) <= (fs - k)) ? 32 : 16; }
374 |     else if((flags == 0) && ((k * 2) <= (fs - k * 2))) { flags |= 32; }
375 |   }
376 | 
377 |   /* stage 1: reduce the problem by at least 1/2
378 |      sort all the LMS-substrings */
379 |   getCounts(T, C, n, k, cs); getBuckets(C, B, k, 1); /* find ends of buckets */
380 |   for(i = 0; i < n; ++i) { SA[i] = 0; }
381 |   b = &t; i = n - 1; j = n; m = 0; c0 = chr(n - 1);
382 |   do { c1 = c0; } while((0 <= --i) && ((c0 = chr(i)) >= c1));
383 |   for(; 0 <= i;) {
384 |     do { c1 = c0; } while((0 <= --i) && ((c0 = chr(i)) <= c1));
385 |     if(0 <= i) {
386 |       *b = j; b = SA + --B[c1]; j = i; ++m;
387 |       do { c1 = c0; } while((0 <= --i) && ((c0 = chr(i)) >= c1));
388 |     }
389 |   }
390 | 
391 |   if(1 < m) {
392 |     if(flags & (16 | 32)) {
393 |       if(flags & 16) {
394 |         if((D = SAIS_MYMALLOC(k * 2, sais_index_type)) == NULL) {
395 |           if(flags & (1 | 4)) { SAIS_MYFREE(C, k, sais_index_type); }
396 |           if(flags & 2) { SAIS_MYFREE(B, k, sais_index_type); }
397 |           return -2;
398 |         }
399 |       } else {
400 |         D = B - k * 2;
401 |       }
402 |       assert((j + 1) < n);
403 |       ++B[chr(j + 1)];
404 |       for(i = 0, j = 0; i < k; ++i) {
405 |         j += C[i];
406 |         if(B[i] != j) { assert(SA[B[i]] != 0); SA[B[i]] += n; }
407 |         D[i] = D[i + k] = 0;
408 |       }
409 |       LMSsort2(T, SA, C, B, D, n, k, cs);
410 |       name = LMSpostproc2(SA, n, m);
411 |       if(flags & 16) { SAIS_MYFREE(D, k * 2, sais_index_type); }
412 |     } else {
413 |       LMSsort1(T, SA, C, B, n, k, cs);
414 |       name = LMSpostproc1(T, SA, n, m, cs);
415 |     }
416 |   } else if(m == 1) {
417 |     *b = j + 1;
418 |     name = 1;
419 |   } else {
420 |     name = 0;
421 |   }
422 | 
423 |   /* stage 2: solve the reduced problem
424 |      recurse if names are not yet unique */
425 |   if(name < m) {
426 |     if(flags & 4) { SAIS_MYFREE(C, k, sais_index_type); }
427 |     if(flags & 2) { SAIS_MYFREE(B, k, sais_index_type); }
428 |     newfs = (n + fs) - (m * 2);
429 |     if((flags & (1 | 4 | 8)) == 0) {
430 |       if((k + name) <= newfs) { newfs -= k; }
431 |       else { flags |= 8; }
432 |     }
433 |     assert((n >> 1) <= (newfs + m));
434 |     RA = SA + m + newfs;
435 |     for(i = m + (n >> 1) - 1, j = m - 1; m <= i; --i) {
436 |       if(SA[i] != 0) {
437 |         RA[j--] = SA[i] - 1;
438 |       }
439 |     }
440 |     if(sais_main(RA, SA, newfs, m, name, sizeof(sais_index_type), 0) != 0) {
441 |       if(flags & 1) { SAIS_MYFREE(C, k, sais_index_type); }
442 |       return -2;
443 |     }
444 | 
445 |     i = n - 1; j = m - 1; c0 = chr(n - 1);
446 |     do { c1 = c0; } while((0 <= --i) && ((c0 = chr(i)) >= c1));
447 |     for(; 0 <= i;) {
448 |       do { c1 = c0; } while((0 <= --i) && ((c0 = chr(i)) <= c1));
449 |       if(0 <= i) {
450 |         RA[j--] = i + 1;
451 |         do { c1 = c0; } while((0 <= --i) && ((c0 = chr(i)) >= c1));
452 |       }
453 |     }
454 |     for(i = 0; i < m; ++i) { SA[i] = RA[SA[i]]; }
455 |     if(flags & 4) {
456 |       if((C = B = SAIS_MYMALLOC(k, int)) == NULL) { return -2; }
457 |     }
458 |     if(flags & 2) {
459 |       if((B = SAIS_MYMALLOC(k, int)) == NULL) {
460 |         if(flags & 1) { SAIS_MYFREE(C, k, sais_index_type); }
461 |         return -2;
462 |       }
463 |     }
464 |   }
465 | 
466 |   /* stage 3: induce the result for the original problem */
467 |   if(flags & 8) { getCounts(T, C, n, k, cs); }
468 |   /* put all left-most S characters into their buckets */
469 |   if(1 < m) {
470 |     getBuckets(C, B, k, 1); /* find ends of buckets */
471 |     i = m - 1, j = n, p = SA[m - 1], c1 = chr(p);
472 |     do {
473 |       q = B[c0 = c1];
474 |       while(q < j) { SA[--j] = 0; }
475 |       do {
476 |         SA[--j] = p;
477 |         if(--i < 0) { break; }
478 |         p = SA[i];
479 |       } while((c1 = chr(p)) == c0);
480 |     } while(0 <= i);
481 |     while(0 < j) { SA[--j] = 0; }
482 |   }
483 |   if(isbwt == 0) { induceSA(T, SA, C, B, n, k, cs); }
484 |   else { pidx = computeBWT(T, SA, C, B, n, k, cs); }
485 |   if(flags & (1 | 4)) { SAIS_MYFREE(C, k, sais_index_type); }
486 |   if(flags & 2) { SAIS_MYFREE(B, k, sais_index_type); }
487 | 
488 |   return pidx;
489 | }
490 | 
491 | /*---------------------------------------------------------------------------*/
492 | 
493 | int
494 | sais(const unsigned char *T, int *SA, int n) {
495 |   if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
496 |   if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; }
497 |   return sais_main(T, SA, 0, n, UCHAR_SIZE, sizeof(unsigned char), 0);
498 | }
499 | 
500 | int
501 | sais_int(const int *T, int *SA, int n, int k) {
502 |   if((T == NULL) || (SA == NULL) || (n < 0) || (k <= 0)) { return -1; }
503 |   if(n <= 1) { if(n == 1) { SA[0] = 0; } return 0; }
504 |   return sais_main(T, SA, 0, n, k, sizeof(int), 0);
505 | }
506 | 
507 | int
508 | sais_bwt(const unsigned char *T, unsigned char *U, int *A, int n) {
509 |   int i, pidx;
510 |   if((T == NULL) || (U == NULL) || (A == NULL) || (n < 0)) { return -1; }
511 |   if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
512 |   pidx = sais_main(T, A, 0, n, UCHAR_SIZE, sizeof(unsigned char), 1);
513 |   if(pidx < 0) { return pidx; }
514 |   U[0] = T[n - 1];
515 |   for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)A[i]; }
516 |   for(i += 1; i < n; ++i) { U[i] = (unsigned char)A[i]; }
517 |   pidx += 1;
518 |   return pidx;
519 | }
520 | 
521 | int
522 | sais_int_bwt(const int *T, int *U, int *A, int n, int k) {
523 |   int i, pidx;
524 |   if((T == NULL) || (U == NULL) || (A == NULL) || (n < 0) || (k <= 0)) { return -1; }
525 |   if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
526 |   pidx = sais_main(T, A, 0, n, k, sizeof(int), 1);
527 |   if(pidx < 0) { return pidx; }
528 |   U[0] = T[n - 1];
529 |   for(i = 0; i < pidx; ++i) { U[i + 1] = A[i]; }
530 |   for(i += 1; i < n; ++i) { U[i] = A[i]; }
531 |   pidx += 1;
532 |   return pidx;
533 | }
534 | 


--------------------------------------------------------------------------------
/third_party/sais-lite-2.4.1/sais.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * sais.h for sais-lite
 3 |  * Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person
 6 |  * obtaining a copy of this software and associated documentation
 7 |  * files (the "Software"), to deal in the Software without
 8 |  * restriction, including without limitation the rights to use,
 9 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10 |  * copies of the Software, and to permit persons to whom the
11 |  * Software is furnished to do so, subject to the following
12 |  * conditions:
13 |  *
14 |  * The above copyright notice and this permission notice shall be
15 |  * included in all copies or substantial portions of the Software.
16 |  *
17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 |  * OTHER DEALINGS IN THE SOFTWARE.
25 |  */
26 | 
27 | #ifndef _SAIS_H
28 | #define _SAIS_H 1
29 | 
30 | #ifdef __cplusplus
31 | extern "C" {
32 | #endif /* __cplusplus */
33 | 
34 | /* find the suffix array SA of T[0..n-1]
35 |    use a working space (excluding T and SA) of at most 2n+O(lg n) */
36 | int
37 | sais(const unsigned char *T, int *SA, int n);
38 | 
39 | /* find the suffix array SA of T[0..n-1] in {0..k-1}^n
40 |    use a working space (excluding T and SA) of at most MAX(4k,2n) */
41 | int
42 | sais_int(const int *T, int *SA, int n, int k);
43 | 
44 | /* burrows-wheeler transform */
45 | int
46 | sais_bwt(const unsigned char *T, unsigned char *U, int *A, int n);
47 | int
48 | sais_int_bwt(const int *T, int *U, int *A, int n, int k);
49 | 
50 | 
51 | #ifdef __cplusplus
52 | } /* extern "C" */
53 | #endif /* __cplusplus */
54 | 
55 | #endif /* _SAIS_H */
56 | 


--------------------------------------------------------------------------------
/third_party/sais-lite-2.4.1/suftest.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * suftest.c for sais-lite
  3 |  * Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person
  6 |  * obtaining a copy of this software and associated documentation
  7 |  * files (the "Software"), to deal in the Software without
  8 |  * restriction, including without limitation the rights to use,
  9 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the
 11 |  * Software is furnished to do so, subject to the following
 12 |  * conditions:
 13 |  *
 14 |  * The above copyright notice and this permission notice shall be
 15 |  * included in all copies or substantial portions of the Software.
 16 |  *
 17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 18 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 19 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 20 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 21 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 22 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 23 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 24 |  * OTHER DEALINGS IN THE SOFTWARE.
 25 |  */
 26 | 
 27 | #include <stdio.h>
 28 | #include <stdlib.h>
 29 | #include <string.h>
 30 | #include <time.h>
 31 | #include "sais.h"
 32 | 
 33 | 
 34 | /* Checks the suffix array SA of the string T. */
 35 | static
 36 | int
 37 | sufcheck(const unsigned char *T, const int *SA, int n, int verbose) {
 38 |   int C[256];
 39 |   int i, p, q, t;
 40 |   int c;
 41 | 
 42 |   if(verbose) { fprintf(stderr, "sufcheck: "); }
 43 |   if(n == 0) {
 44 |     if(verbose) { fprintf(stderr, "Done.\n"); }
 45 |     return 0;
 46 |   }
 47 | 
 48 |   /* Check arguments. */
 49 |   if((T == NULL) || (SA == NULL) || (n < 0)) {
 50 |     if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
 51 |     return -1;
 52 |   }
 53 | 
 54 |   /* check range: [0..n-1] */
 55 |   for(i = 0; i < n; ++i) {
 56 |     if((SA[i] < 0) || (n <= SA[i])) {
 57 |       if(verbose) {
 58 |         fprintf(stderr, "Out of the range [0,%d].\n"
 59 |                         "  SA[%d]=%d\n",
 60 |                         n - 1, i, SA[i]);
 61 |       }
 62 |       return -2;
 63 |     }
 64 |   }
 65 | 
 66 |   /* check first characters. */
 67 |   for(i = 1; i < n; ++i) {
 68 |     if(T[SA[i - 1]] > T[SA[i]]) {
 69 |       if(verbose) {
 70 |         fprintf(stderr, "Suffixes in wrong order.\n"
 71 |                         "  T[SA[%d]=%d]=%d > T[SA[%d]=%d]=%d\n",
 72 |                         i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
 73 |       }
 74 |       return -3;
 75 |     }
 76 |   }
 77 | 
 78 |   /* check suffixes. */
 79 |   for(i = 0; i < 256; ++i) { C[i] = 0; }
 80 |   for(i = 0; i < n; ++i) { ++C[T[i]]; }
 81 |   for(i = 0, p = 0; i < 256; ++i) {
 82 |     t = C[i];
 83 |     C[i] = p;
 84 |     p += t;
 85 |   }
 86 | 
 87 |   q = C[T[n - 1]];
 88 |   C[T[n - 1]] += 1;
 89 |   for(i = 0; i < n; ++i) {
 90 |     p = SA[i];
 91 |     if(0 < p) {
 92 |       c = T[--p];
 93 |       t = C[c];
 94 |     } else {
 95 |       c = T[p = n - 1];
 96 |       t = q;
 97 |     }
 98 |     if((t < 0) || (p != SA[t])) {
 99 |       if(verbose) {
100 |         fprintf(stderr, "Suffix in wrong position.\n"
101 |                         "  SA[%d]=%d or\n"
102 |                         "  SA[%d]=%d\n",
103 |                         t, (0 <= t) ? SA[t] : -1, i, SA[i]);
104 |       }
105 |       return -4;
106 |     }
107 |     if(t != q) {
108 |       ++C[c];
109 |       if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
110 |     }
111 |   }
112 | 
113 |   if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
114 |   return 0;
115 | }
116 | 
117 | static
118 | void
119 | print_help(const char *progname, int status) {
120 |   fprintf(stderr, "usage: %s FILE\n\n", progname);
121 |   exit(status);
122 | }
123 | 
124 | int
125 | main(int argc, const char *argv[]) {
126 |   FILE *fp;
127 |   const char *fname;
128 |   unsigned char *T;
129 |   int *SA;
130 |   long n;
131 |   clock_t start, finish;
132 | 
133 |   /* Check arguments. */
134 |   if((argc == 1) ||
135 |      (strcmp(argv[1], "-h") == 0) ||
136 |      (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
137 |   if(argc != 2) { print_help(argv[0], EXIT_FAILURE); }
138 | 
139 |   /* Open a file for reading. */
140 |   if((fp = fopen(fname = argv[1], "rb")) == NULL) {
141 |     fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
142 |     perror(NULL);
143 |     exit(EXIT_FAILURE);
144 |   }
145 | 
146 |   /* Get the file size. */
147 |   if(fseek(fp, 0, SEEK_END) == 0) {
148 |     n = ftell(fp);
149 |     rewind(fp);
150 |     if(n < 0) {
151 |       fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
152 |       perror(NULL);
153 |       exit(EXIT_FAILURE);
154 |     }
155 |   } else {
156 |     fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
157 |     perror(NULL);
158 |     exit(EXIT_FAILURE);
159 |   }
160 | 
161 |   /* Allocate 5n bytes of memory. */
162 |   T = (unsigned char *)malloc((size_t)n * sizeof(unsigned char));
163 |   SA = (int *)malloc((size_t)n * sizeof(int));
164 |   if((T == NULL) || (SA == NULL)) {
165 |     fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
166 |     exit(EXIT_FAILURE);
167 |   }
168 | 
169 |   /* Read n bytes of data. */
170 |   if(fread(T, sizeof(unsigned char), (size_t)n, fp) != (size_t)n) {
171 |     fprintf(stderr, "%s: %s `%s': ",
172 |       argv[0],
173 |       (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
174 |       argv[1]);
175 |     perror(NULL);
176 |     exit(EXIT_FAILURE);
177 |   }
178 |   fclose(fp);
179 | 
180 |   /* Construct the suffix array. */
181 |   fprintf(stderr, "%s: %ld bytes ... ", fname, n);
182 |   start = clock();
183 |   if(sais(T, SA, (int)n) != 0) {
184 |     fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
185 |     exit(EXIT_FAILURE);
186 |   }
187 |   finish = clock();
188 |   fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
189 | 
190 |   /* Check the suffix array. */
191 |   if(sufcheck(T, SA, (int)n, 1) != 0) { exit(EXIT_FAILURE); }
192 | 
193 |   /* Deallocate memory. */
194 |   free(SA);
195 |   free(T);
196 | 
197 |   return 0;
198 | }
199 | 


--------------------------------------------------------------------------------
/third_party/sais-lite-2.4.1/test.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * test.c for sais-lite
  3 |  * Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
  4 |  *
  5 |  * Permission is hereby granted, free of charge, to any person
  6 |  * obtaining a copy of this software and associated documentation
  7 |  * files (the "Software"), to deal in the Software without
  8 |  * restriction, including without limitation the rights to use,
  9 |  * copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the
 11 |  * Software is furnished to do so, subject to the following
 12 |  * conditions:
 13 |  *
 14 |  * The above copyright notice and this permission notice shall be
 15 |  * included in all copies or substantial portions of the Software.
 16 |  *
 17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 18 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 19 |  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 20 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 21 |  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 22 |  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 23 |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 24 |  * OTHER DEALINGS IN THE SOFTWARE.
 25 |  */
 26 | 
 27 | #include <stdio.h>
 28 | #include <stdlib.h>
 29 | #include <string.h>
 30 | #include "sais.h"
 31 | 
 32 | 
 33 | static
 34 | int
 35 | cmp_suf(const unsigned char *T, int n, int p1, int p2) {
 36 |   int r, s = (p1 < p2) ? 1 : ((p1 > p2) ? -1 : 0);
 37 |   for(r = 0; (p1 < n) && (p2 < n) && ((r = T[p1] - T[p2]) == 0); ++p1, ++p2) { }
 38 |   return (r != 0) ? r : s;
 39 | }
 40 | 
 41 | int
 42 | main(int argc, const char *argv[]) {
 43 |   unsigned char *T1;
 44 |   int *T3;
 45 |   unsigned char *T1BWT;
 46 |   int *T3BWT;
 47 |   int *SA1;
 48 |   int *SA3;
 49 |   int *A;
 50 |   int i, j, n, p1, p3;
 51 |   unsigned int bits;
 52 | 
 53 |   fprintf(stderr, "start test...\n");
 54 |   for(n = 1; n <= 24; ++n) {
 55 |     T1 = malloc(n * sizeof(unsigned char));
 56 |     T1BWT = malloc(n * sizeof(unsigned char));
 57 |     T3 = malloc(n * sizeof(int));
 58 |     T3BWT = malloc(n * sizeof(int));
 59 |     SA1 = malloc(n * sizeof(int));
 60 |     SA3 = malloc(n * sizeof(int));
 61 |     A = malloc(n * sizeof(int));
 62 |     for(bits = 0; bits < (1U << n); ++bits) {
 63 |       if((bits & 4095) == 0) {
 64 |         fprintf(stderr, "  n=%2d : %3d%%\r", n, (int)((double)bits / (double)((1U << n) - 1) * 100.0));
 65 |       }
 66 |       for(i = 0; i < n; ++i) {
 67 |         T1[i] = (bits >> i) & 1;
 68 |         T3[i] = T1[i] * 511;
 69 |       }
 70 | 
 71 |       /* construct sa and bwt */
 72 |       if(sais(T1, SA1, n) != 0) {
 73 |         fprintf(stderr, "  n=%2d, bits=%u : failure - sais\n", n, bits);
 74 |         exit(EXIT_FAILURE);
 75 |       }
 76 |       if((p1 = sais_bwt(T1, T1BWT, A, n)) < 0) {
 77 |         fprintf(stderr, "  n=%2d, bits=%u : failure - sais_bwt\n", n, bits);
 78 |         exit(EXIT_FAILURE);
 79 |       }
 80 |       if(sais_int(T3, SA3, n, 512) != 0) {
 81 |         fprintf(stderr, "  n=%2d, bits=%u : failure - sais_int\n", n, bits);
 82 |         exit(EXIT_FAILURE);
 83 |       }
 84 |       if((p3 = sais_int_bwt(T3, T3BWT, A, n, 512)) < 0) {
 85 |         fprintf(stderr, "  n=%2d, bits=%u : failure - sais_int_bwt\n", n, bits);
 86 |         exit(EXIT_FAILURE);
 87 |       }
 88 | 
 89 |       /* check SA1 */
 90 |       for(i = 1; i < n; ++i) {
 91 |         if(0 <= cmp_suf(T1, n, SA1[i - 1], SA1[i])) {
 92 |           fprintf(stderr, "  n=%2d, bits=%u : failure - SA1\n", n, bits);
 93 |           for(i = 0; i < n; ++i) {
 94 |             fprintf(stderr, "    SA[%d]=%d: ", i, SA1[i]);
 95 |             for(j = SA1[i]; j < n; ++j) { fprintf(stderr, "%d", T1[j]); }
 96 |             fprintf(stderr, "\n");
 97 |           }
 98 |           exit(EXIT_FAILURE);
 99 |         }
100 |       }
101 | 
102 |       /* check SA3 */
103 |       for(i = 0; i < n; ++i) {
104 |         if(SA1[i] != SA3[i]) {
105 |           fprintf(stderr, "  n=%2d, bits=%u : failure - SA3\n", n, bits);
106 |           for(i = 0; i < n; ++i) {
107 |             fprintf(stderr, "    SA1[%d]=%d, SA3[%d]=%d: ", i, SA1[i], i, SA3[i]);
108 |             for(j = SA3[i]; j < n; ++j) { fprintf(stderr, "%d", T3[j] / 511); }
109 |             fprintf(stderr, "\n");
110 |           }
111 |           exit(EXIT_FAILURE);
112 |         }
113 |       }
114 | 
115 |       /* check T1BWT */
116 |       for(i = 0, j = 0; i <= n; ++i) {
117 |         if(i != 0) {
118 |           if(SA1[i - 1] == 0) { if(p1 != i) { break; } }
119 |           else if(n <= j) { break; }
120 |           else { if(T1BWT[j++] != T1[SA1[i - 1] - 1]) { break; } }
121 |         } else {
122 |           if(T1BWT[j++] != T1[n - 1]) { break; }
123 |         }
124 |       }
125 |       if((i != (n + 1)) || (j != n)) {
126 |         fprintf(stderr, "  n=%2d, bits=%u : failure - T1BWT\n", n, bits);
127 |         fprintf(stderr, "    T1BWT=");
128 |         for(i = 0; i < n; ++i) { fprintf(stderr, "%d", T1BWT[i]); }
129 |         fprintf(stderr, ", p1=%d\n", p1);
130 |         for(i = 0; i < n; ++i) {
131 |           fprintf(stderr, "    SA[%d]=%d: ", i, SA1[i]);
132 |           for(j = SA1[i]; j < n; ++j) { fprintf(stderr, "%d", T1[j]); }
133 |           fprintf(stderr, " ");
134 |           for(j = 0; j < SA1[i]; ++j) { fprintf(stderr, "%d", T1[j]); }
135 |           fprintf(stderr, "\n");
136 |         }
137 |         exit(EXIT_FAILURE);
138 |       }
139 | 
140 |       /* check T3BWT */
141 |       for(i = 0; i < n; ++i) { if(T1BWT[i] != T3BWT[i] / 511) { break; } }
142 |       if((i != n) || (p1 != p3)) {
143 |         fprintf(stderr, "  n=%2d, bits=%u : failure - T3BWT\n", n, bits);
144 |         fprintf(stderr, "    T3BWT=");
145 |         for(i = 0; i < n; ++i) { fprintf(stderr, "%d", T3BWT[i]); }
146 |         fprintf(stderr, ", p3=%d\n", p3);
147 |         for(i = 0; i < n; ++i) {
148 |           fprintf(stderr, "    SA[%d]=%d: ", i, SA3[i]);
149 |           for(j = SA3[i]; j < n; ++j) { fprintf(stderr, "%d", T3[j] / 511); }
150 |           fprintf(stderr, " ");
151 |           for(j = 0; j < SA3[i]; ++j) { fprintf(stderr, "%d", T3[j] / 511); }
152 |           fprintf(stderr, "\n");
153 |         }
154 |         exit(EXIT_FAILURE);
155 |       }
156 | 
157 |     }
158 |     fprintf(stderr, "  n=%2d : success\n", n);
159 |     free(T1);
160 |     free(T1BWT);
161 |     free(T3);
162 |     free(T3BWT);
163 |     free(SA1);
164 |     free(SA3);
165 |     free(A);
166 |   }
167 |   fprintf(stderr, "finish test\n");
168 | 
169 |   return 0;
170 | }
171 | 


--------------------------------------------------------------------------------