Each task's input is a (metric, day), i.e. it runs on the summed reports
21 | for a single metric received in a single day.
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/client/cpp/libc_rand_impl.h:
--------------------------------------------------------------------------------
1 | // Copyright 2015 Google Inc. All rights reserved.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // A RAPPOR random implementation using libc's rand().
16 | //
17 | // IMPORTANT: This is for demo /simulation purposes only. Use a better random
18 | // function in production applications.
19 |
20 | #ifndef LIBC_RAND_IMPL_H_
21 | #define LIBC_RAND_IMPL_H_
22 |
23 | #include "rappor_deps.h"
24 |
25 | namespace rappor {
26 |
27 | class LibcRand : public IrrRandInterface {
28 | public:
29 | virtual ~LibcRand() {}
30 |
31 | virtual bool GetMask(float prob, int num_bits, Bits* mask_out) const;
32 | };
33 |
34 | } // namespace rappor
35 |
36 | #endif // LIBC_RAND_IMPL_H_
37 |
--------------------------------------------------------------------------------
/client/cpp/openssl_hash_impl.h:
--------------------------------------------------------------------------------
1 | // Copyright 2015 Google Inc. All rights reserved.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // OpenSSL implementation of RAPPOR dependencies.
16 |
17 | #ifndef OPENSSL_IMPL_H_
18 | #define OPENSSL_IMPL_H_
19 |
20 | #include "rappor_deps.h"
21 |
22 | namespace rappor {
23 |
24 | bool HmacSha256(const std::string& key, const std::string& value,
25 | std::vector* output);
26 | // Pass output vector of desired length.
27 | bool HmacDrbg(const std::string& key, const std::string& value,
28 | std::vector* output);
29 | bool Md5(const std::string& value, std::vector* output);
30 |
31 | } // namespace rappor
32 |
33 | #endif // OPENSSL_IMPL_H_
34 |
--------------------------------------------------------------------------------
/client/README.md:
--------------------------------------------------------------------------------
1 | RAPPOR Clients
2 | ==============
3 |
4 | This directory contains RAPPOR client implementations in various languages.
5 |
6 | The privacy of RAPPOR is based on the client "lying" about the true values --
7 | that is, not sending them over the network.
8 |
9 | The clients are typically small in terms of code size because the RAPPOR
10 | client algorithm is simple. See the README.md in each subdirectory for details
11 | on how to use the library.
12 |
13 | Common Test Protocol
14 | --------------------
15 |
16 | When implementing a new RAPPOR client, you can get for free!
17 |
18 | The `regtest.sh` script in the root of this repository does the following:
19 |
20 | 1. Create test input data and feed it into your client as a CSV file
21 | 2. Preprocesses your client output (also CSV)
22 | 3. Runs the RAPPOR analysis, learning aggregate statistics from encoded values
23 | 4. Compares the analysis to the true client values, with metrics and plots.
24 |
25 | To have your client tested, you need a small executable wrapper, which reads
26 | and write as CSV file in a specified format.
27 |
28 | Then add it to the `_run-one-instance` function in `regtest.sh`.
29 |
30 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/tests/fastrand.py:
--------------------------------------------------------------------------------
1 | # Copyright 2014 Google Inc. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | """fastrand.py - Python wrapper for _fastrand."""
16 |
17 | # NOTE: We could retire this module in favor of the C++ client? One reason to
18 | # keep it is if it supports a wider range of params (e.g. more than 32 or 64
19 | # bits.)
20 |
21 | import random
22 |
23 | import _fastrand
24 |
25 |
26 | class FastIrrRand(object):
27 | """Fast insecure version of rappor.SecureIrrRand."""
28 |
29 | def __init__(self, params):
30 | randbits = _fastrand.randbits # accelerated function
31 | num_bits = params.num_bloombits
32 |
33 | # IRR probabilities
34 | self.p_gen = lambda: randbits(params.prob_p, num_bits)
35 | self.q_gen = lambda: randbits(params.prob_q, num_bits)
36 |
--------------------------------------------------------------------------------
/ui/assoc-overview.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | RAPPOR Association Analysis Overview
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
15 |
16 |
17 |
37 |
38 |
39 |
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/analysis/R/run_tests.R:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env Rscript
2 | #
3 | # Copyright 2014 Google Inc. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | #
18 | # Run unit tests for RAPPOR R code.
19 |
20 | library(RUnit)
21 |
22 | run_tests <- function() {
23 | dirs <- "analysis/R" # Run from root
24 | test_suite <- defineTestSuite("rappor", dirs, testFileRegexp = "_test.R$",
25 | testFuncRegexp = "^Test")
26 | stopifnot(isValidTestSuite(test_suite))
27 |
28 | test_result <- runTestSuite(test_suite)
29 |
30 | printTextProtocol(test_result) # print to stdout
31 |
32 | result <- test_result[[1]] # Result for our only suite
33 |
34 | # Sanity check: fail if there were no tests found.
35 | if (result$nTestFunc == 0) {
36 | cat("No tests found.\n")
37 | return(FALSE)
38 | }
39 | if (result$nFail != 0 || result$nErr != 0) {
40 | cat("Some tests failed.\n")
41 | return(FALSE)
42 | }
43 | return(TRUE)
44 | }
45 |
46 | if (!run_tests()) {
47 | quit(status = 1)
48 | }
49 |
--------------------------------------------------------------------------------
/analysis/cpp/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Usage:
4 | # ./run.sh
5 |
6 | set -o nounset
7 | set -o pipefail
8 | set -o errexit
9 |
10 | # Call gcc with the flags we like.
11 | # NOTE: -O3 does a lot for fast_em. (More than 5x speedup over unoptimized)
12 |
13 | cpp-compiler() {
14 | g++ -Wall -Wextra -O3 "$@"
15 | #clang++ -Wall -Wextra -O3 "$@"
16 | }
17 |
18 | build-find-cliques() {
19 | mkdir -p _tmp
20 | # C++ 11 for unordered_{map,set}
21 | cpp-compiler -std=c++0x -o _tmp/find_cliques find_cliques.cc
22 | }
23 |
24 | find-cliques() {
25 | _tmp/find_cliques "$@"
26 | }
27 |
28 | test-bad-edge() {
29 | # Edge should go from lesser partition number to greater
30 | find-cliques <
15 |
16 |
17 | For now, we have collected some useful links.
18 |
19 | Linux
20 | -----
21 |
22 | * [Myths about /dev/urandom](http://www.2uo.de/myths-about-urandom/) -- Nice
23 | article explaining implementation aspects of `/dev/urandom` and `/dev/random`
24 | on Linux. (Summary: just use `/dev/urandom`, with caveats explained)
25 |
26 | * [LWN on getrandom](http://lwn.net/Articles/606141/)
27 | ([patch](http://lwn.net/Articles/605828/)) -- A very recent addition to the
28 | Linux kernel. As of this writing (11/2014), it's safe to say that very few
29 | applications use it. The relevant change, involving an issue mentioned in
30 | the first link, involves the situation at system boot, when there is little
31 | entropy available.
32 |
33 |
34 |
36 |
37 |
39 |
--------------------------------------------------------------------------------
/bin/hash_candidates_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python -S
2 | #
3 | # Copyright 2014 Google Inc. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """
18 | hash_candidates_test.py: Tests for hash_candidates.py
19 | """
20 |
21 | import cStringIO
22 | import unittest
23 |
24 | import rappor
25 | import hash_candidates # module under test
26 |
27 |
28 | STDIN = """\
29 | apple
30 | banana
31 | carrot
32 | """
33 |
34 | EXPECTED_CSV_OUT = """\
35 | apple,5,1,26,26,38,34,63,62\r
36 | banana,12,14,28,24,37,34,62,49\r
37 | carrot,4,12,25,21,48,38,61,54\r
38 | """
39 |
40 |
41 | class HashCandidatesTest(unittest.TestCase):
42 |
43 | def setUp(self):
44 | self.params = rappor.Params()
45 | self.params.num_bloombits = 16
46 | self.params.num_cohorts = 4
47 | self.params.num_hashes = 2
48 |
49 | def testHash(self):
50 | stdin = cStringIO.StringIO(STDIN)
51 | stdout = cStringIO.StringIO()
52 |
53 | hash_candidates.HashCandidates(self.params, stdin, stdout)
54 |
55 | self.assertMultiLineEqual(EXPECTED_CSV_OUT, stdout.getvalue())
56 |
57 |
58 | if __name__ == '__main__':
59 | unittest.main()
60 |
--------------------------------------------------------------------------------
/bin/README.md:
--------------------------------------------------------------------------------
1 | Command Line Tools
2 | ==================
3 |
4 | This directory contains command line tools for RAPPOR analysis.
5 |
6 | Analysis Tools
7 | --------------
8 |
9 | ### decode-dist
10 |
11 | Decode a distribution -- requires a "counts" file (summed bits from reports),
12 | map file, and a params file. See `test.sh decode-dist` in this dir for an
13 | example.
14 |
15 | ### decode-assoc
16 |
17 | Decode a joint distribution between 2 variables ("association analysis"). See
18 | `test.sh decode-assoc-R` or `test.sh decode-assoc-cpp` in this dir for an
19 | example.
20 |
21 | Currently it only supports associating strings vs. booleans.
22 |
23 | ### Setup
24 |
25 | Both of these tools are written in R, and require several R libraries to be
26 | installed (see `../setup.sh r-packages`).
27 |
28 | `decode-assoc` also shells out to a native binary written in C++ if
29 | `--em-executable` is passed. This requires a C++ compiler (see
30 | `analysis/cpp/run.sh`). You can run `test.sh decode-assoc-cpp` to test it.
31 |
32 |
33 | Helper Tools
34 | ------------
35 |
36 | These are simple Python implementations of tools needed for analysis. At
37 | Google, Chrome uses alternative C++/Go implementations of these tools.
38 |
39 | ### sum-bits
40 |
41 | Given a CSV file with RAPPOR reports (IRRs), produce a "counts" CSV file on
42 | stdout. This is the `m x (k+1)` matrix that is used in the R analysis (where m
43 | = #cohorts and k = report width in bits).
44 |
45 | ### hash-candidates
46 |
47 | Given a list of candidates on stdin, produce a CSV file of hashes (the "map
48 | file"). Each row has `m x h` cells (where m = #cohorts and h = #hashes)
49 |
50 | See the `regtest.sh` script for examples of how these tools are invoked.
51 |
52 |
--------------------------------------------------------------------------------
/ui/overview.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | RAPPOR Results Overview
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
15 |
16 |
17 |
50 |
51 |
52 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/apps/README.md:
--------------------------------------------------------------------------------
1 | RAPPOR Shiny Apps
2 | =================
3 |
4 | This directory contains web apps written using the [Shiny][shiny] web framework
5 | from [RStudio][rstudio].
6 |
7 | To run them, first install Shiny:
8 |
9 | $ R
10 | ...
11 | > install.packages('shiny')
12 | ...
13 |
14 | (You can view Shiny's platform requirements in
15 | [CRAN](http://cran.r-project.org/web/packages/shiny/index.html).)
16 |
17 | Then change to the app directory, and execute the `run_app.sh` script:
18 |
19 | $ cd rappor/apps/rappor-analysis
20 | $ ./run_app.sh
21 | ...
22 | Listening on http://0.0.0.0.:6789
23 |
24 | Visit http://localhost:6789/ in your browser.
25 |
26 | This code has been tested on Ubuntu Linux, but should work on other platforms
27 | that Shiny supports.
28 |
29 | Both of these apps use the underlying analysis code in `analysis/R`, just like
30 | the command line demo `demo.sh` does.
31 |
32 | rappor-analysis
33 | ---------------
34 |
35 | This app "decodes" a RAPPOR data set. In other words, you can upload the
36 | `params`, `counts`, and `map` files, and view the inferred distribution, as
37 | well as debug info.
38 |
39 | These files are discussed in the RAPPOR [Data Flow][data-flow] doc.
40 |
41 | rappor-sim
42 | ----------
43 |
44 | This app lets you simulate RAPPOR runs with different populations and
45 | parameters. This can help you choose collection parameters for a given
46 | situation / variable.
47 |
48 | Help
49 | ----
50 |
51 | If you need help with these apps, please send a message to
52 | [rappor-discuss][group].
53 |
54 |
55 | [shiny]: http://shiny.rstudio.com/
56 | [rstudio]: http://rstudio.com/
57 | [data-flow]: http://google.github.io/rappor/doc/data-flow.html
58 | [group]: https://groups.google.com/forum/#!forum/rappor-discuss
59 |
--------------------------------------------------------------------------------
/gh-pages/examples/report.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | RAPPOR Demo
5 |
6 |
7 |
8 |
RAPPOR Demo
9 |
10 |
11 |
12 |
13 |
14 |
Simulation Input
15 |
16 |
17 |
Number of clients
18 |
100,000
19 |
20 |
21 |
Total values reported / obfuscated
22 |
700,000
23 |
24 |
25 |
Unique values reported / obfuscated
26 |
50
27 |
28 |
29 |
30 |
31 |
32 |
33 |
RAPPOR Parameters
34 |
35 |
36 |
k
37 |
Size of Bloom filter in bits
38 |
16
39 |
40 |
41 |
h
42 |
Hash functions in Bloom filter
43 |
2
44 |
45 |
46 |
m
47 |
Number of Cohorts
48 |
64
49 |
50 |
51 |
p
52 |
Probability p
53 |
0.5
54 |
55 |
56 |
q
57 |
Probability q
58 |
0.75
59 |
60 |
61 |
f
62 |
Probability f
63 |
0.5
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/tests/fastrand_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python -S
2 | #
3 | # Copyright 2014 Google Inc. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """
18 | fastrand_test.py: Tests for _fastrand extension module.
19 | """
20 | import unittest
21 |
22 | import _fastrand # module under test
23 |
24 |
25 | BIT_WIDTHS = [8, 16, 32, 64]
26 |
27 |
28 | class FastRandTest(unittest.TestCase):
29 |
30 | def testRandbits64(self):
31 | for n in BIT_WIDTHS:
32 | #print '== %d' % n
33 | for p1 in [0.1, 0.5, 0.9]:
34 | #print '-- %f' % p1
35 | for i in xrange(5):
36 | r = _fastrand.randbits(p1, n)
37 | # Rough sanity check
38 | self.assertLess(r, 2 ** n)
39 |
40 | # Visual check
41 | #b = bin(r)
42 | #print b
43 | #print b.count('1')
44 |
45 |
46 | def testRandbits64_EdgeCases(self):
47 | for n in BIT_WIDTHS:
48 | r = _fastrand.randbits(0.0, n)
49 | self.assertEqual(0, r)
50 |
51 | for n in BIT_WIDTHS:
52 | r = _fastrand.randbits(1.0, n)
53 | self.assertEqual(2 ** n - 1, r)
54 |
55 | def testRandbitsError(self):
56 | r = _fastrand.randbits(-1, 64)
57 | # TODO: Should probably raise exceptions
58 | self.assertEqual(None, r)
59 |
60 | r = _fastrand.randbits(0.0, 65)
61 | self.assertEqual(None, r)
62 |
63 |
64 | if __name__ == '__main__':
65 | unittest.main()
66 |
--------------------------------------------------------------------------------
/pipeline/README.md:
--------------------------------------------------------------------------------
1 | pipeline
2 | ========
3 |
4 | This directory contains tools and scripts for running a cron job that does
5 | RAPPOR analysis and generates an HTML dashboard.
6 |
7 | It works like this:
8 |
9 | 1. `task_spec.py` generates a text file where each line corresponds to a process
10 | to be run (a "task"). The process is `bin/decode-dist` or
11 | `bin/decode-assoc`. The line contains the task parameters.
12 |
13 | 2. `xargs -P` is used to run processes in parallel. Our analysis is generally
14 | single-threaded (i.e. because R is single-threaded), so this helps utilize
15 | the machine fully. Each task places its output in a different subdirectory.
16 |
17 | 3. `cook.sh` calls `combine_results.py` to combine analysis results into a time
18 | series. It also calls `combine_status.py` to keep track of task data for
19 | "meta-analysis". `metric_status.R` generates more summary CSV files.
20 |
21 | 4. `ui.sh` calls `csv_to_html.py` to generate an HTML fragments from the CSV
22 | files.
23 |
24 | 5. The JavaScript in `ui/ui.js` is loaded from static HTML, and makes AJAX calls
25 | to retrieve the HTML fragments. The page is made interactive with
26 | `ui/table-lib.js`.
27 |
28 | `dist.sh` and `assoc.sh` contain functions which coordinate this process.
29 |
30 | `alarm-lib.sh` is used to kill processes that have been running for too long.
31 |
32 | Testing
33 | -------
34 |
35 | `pipeline/regtest.sh` contains end-to-end demos of this process. Right now it
36 | depends on testdata from elsewhere in the tree:
37 |
38 |
39 | rappor$ ./demo.sh run # prepare dist testdata
40 | rappor$ cd bin
41 |
42 | bin$ ./test.sh write-assoc-testdata # prepare assoc testdata
43 | bin$ cd ../pipeline
44 |
45 | pipeline$ ./regtest.sh dist
46 | pipeline$ ./regtest.sh assoc
47 |
48 | pipeline$ python -m SimpleHTTPServer # start a static web server
49 |
50 | http://localhost:8000/_tmp/
51 |
52 |
53 |
--------------------------------------------------------------------------------
/bin/sum_bits_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python -S
2 | #
3 | # Copyright 2014 Google Inc. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """
18 | sum_bits_test.py: Tests for sum_bits.py
19 | """
20 |
21 | import cStringIO
22 | import unittest
23 |
24 | import rappor
25 | import sum_bits # module under test
26 |
27 |
28 | CSV_IN = """\
29 | user_id,cohort,bloom,prr,rappor
30 | 5,1,dummy,dummy,0000111100001111
31 | 5,1,dummy,dummy,0000000000111100
32 | """
33 |
34 | # NOTE: bit order is reversed.
35 | EXPECTED_CSV_OUT = """\
36 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0\r
37 | 2,1,1,2,2,1,1,0,0,1,1,1,1,0,0,0,0\r
38 | """
39 |
40 | TOO_MANY_COLUMNS = """\
41 | user_id,cohort,rappor
42 | 5,1,0000111100001111,extra
43 | """
44 |
45 |
46 | class SumBitsTest(unittest.TestCase):
47 |
48 | def setUp(self):
49 | self.params = rappor.Params()
50 | self.params.num_bloombits = 16
51 | self.params.num_cohorts = 2
52 |
53 | def testSum(self):
54 | stdin = cStringIO.StringIO(CSV_IN)
55 | stdout = cStringIO.StringIO()
56 |
57 | sum_bits.SumBits(self.params, stdin, stdout)
58 |
59 | self.assertMultiLineEqual(EXPECTED_CSV_OUT, stdout.getvalue())
60 |
61 | def testErrors(self):
62 | stdin = cStringIO.StringIO(TOO_MANY_COLUMNS)
63 | stdout = cStringIO.StringIO()
64 |
65 | self.assertRaises(
66 | RuntimeError, sum_bits.SumBits, self.params, stdin, stdout)
67 |
68 |
69 | if __name__ == '__main__':
70 | unittest.main()
71 |
--------------------------------------------------------------------------------
/bin/hash_candidates.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | #
3 | # Copyright 2014 Google Inc. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """
18 | Given a list of candidates on stdin, produce a file of hashes ("map file").
19 | """
20 |
21 | import csv
22 | import sys
23 |
24 | import rappor
25 |
26 |
27 | def HashCandidates(params, stdin, stdout):
28 | num_bloombits = params.num_bloombits
29 | csv_out = csv.writer(stdout)
30 |
31 | for line in stdin:
32 | word = line.strip()
33 | row = [word]
34 | for cohort in xrange(params.num_cohorts):
35 | bloom_bits = rappor.get_bloom_bits(word, cohort, params.num_hashes,
36 | num_bloombits)
37 | for bit_to_set in bloom_bits:
38 | # bits are indexed from 1. Add a fixed offset for each cohort.
39 | # NOTE: This detail could be omitted from the map file format, and done
40 | # in R.
41 | row.append(cohort * num_bloombits + (bit_to_set + 1))
42 | csv_out.writerow(row)
43 |
44 |
45 | def main(argv):
46 | try:
47 | filename = argv[1]
48 | except IndexError:
49 | raise RuntimeError('Usage: hash_candidates.py ')
50 | with open(filename) as f:
51 | try:
52 | params = rappor.Params.from_csv(f)
53 | except rappor.Error as e:
54 | raise RuntimeError(e)
55 |
56 | HashCandidates(params, sys.stdin, sys.stdout)
57 |
58 |
59 | if __name__ == '__main__':
60 | try:
61 | main(sys.argv)
62 | except RuntimeError, e:
63 | print >>sys.stderr, e.args[0]
64 | sys.exit(1)
65 |
--------------------------------------------------------------------------------
/gh-pages/doc/randomness.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
9 |
10 |
11 |
12 |
13 |
Generating Random Bits for RAPPOR
14 |
15 |
To ensure privacy, an application using RAPPOR must generate random bits in an
16 | unpredictable manner. In other words, an adversary that can predict the
17 | sequence of random bits used can determine the true values being reported.
18 |
19 |
Generating random numbers is highly platform-specific -- even
20 | language-specific. So, libraries implementing RAPPOR should be parameterized
21 | by an interface to generate random bits. (This can be thought of as
22 | "dependency injection".)
23 |
24 |
26 |
27 |
For now, we have collected some useful links.
28 |
29 |
Linux
30 |
31 |
32 |
Myths about /dev/urandom -- Nice
33 | article explaining implementation aspects of /dev/urandom and /dev/random
34 | on Linux. (Summary: just use /dev/urandom, with caveats explained)
35 |
LWN on getrandom
36 | (patch) -- A very recent addition to the
37 | Linux kernel. As of this writing (11/2014), it's safe to say that very few
38 | applications use it. The relevant change, involving an issue mentioned in
39 | the first link, involves the situation at system boot, when there is little
40 | entropy available.
41 |
42 |
43 |
45 |
46 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/client/cpp/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Usage:
4 | # ./run.sh
5 |
6 | set -o nounset
7 | set -o pipefail
8 | set -o errexit
9 |
10 | setup() {
11 | # need libprotobuf-dev for headers to compile against.
12 | sudo apt-get install protobuf-compiler libprotobuf-dev
13 |
14 | # OpenSSL dev headers
15 | sudo apt-get install libssl-dev
16 | }
17 |
18 | init() {
19 | mkdir --verbose -p _tmp
20 | }
21 |
22 | rappor-sim() {
23 | make _tmp/rappor_sim
24 | _tmp/rappor_sim "$@"
25 | }
26 |
27 | protobuf-encoder-demo() {
28 | make _tmp/protobuf_encoder_demo
29 | _tmp/protobuf_encoder_demo "$@"
30 | }
31 |
32 | rappor-sim-demo() {
33 | rappor-sim 16 2 128 0.25 0.75 0.5 < // assert
21 |
22 | #include "encoder.h"
23 | #include "openssl_hash_impl.h"
24 | #include "unix_kernel_rand_impl.h"
25 |
26 | int main(int argc, char** argv) {
27 | // Suppress unused variable warnings
28 | (void) argc;
29 | (void) argv;
30 |
31 | FILE* fp = fopen("/dev/urandom", "r");
32 | rappor::UnixKernelRand irr_rand(fp);
33 |
34 | rappor::Deps deps(rappor::Md5, "client-secret", rappor::HmacSha256,
35 | irr_rand);
36 | rappor::Params params(32, // num_bits (k)
37 | 2, // num_hashes (h)
38 | 128, // num_cohorts (m)
39 | 0.25, // probability f for PRR
40 | 0.75, // probability p for IRR
41 | 0.5); // probability q for IRR
42 |
43 | const char* encoder_id = "metric-name";
44 | rappor::Encoder encoder(encoder_id, params, deps);
45 |
46 | // Now use it to encode values. The 'out' value can be sent over the
47 | // network.
48 | rappor::Bits out;
49 | assert(encoder.EncodeString("foo", &out)); // returns false on error
50 | printf("'foo' encoded with RAPPOR: %0x, cohort %d\n", out, encoder.cohort());
51 |
52 | // Raw bits
53 | assert(encoder.EncodeBits(0x123, &out)); // returns false on error
54 | printf("0x123 encoded with RAPPOR: %0x, cohort %d\n", out, encoder.cohort());
55 | }
56 |
57 |
--------------------------------------------------------------------------------
/doc/data-flow.dot:
--------------------------------------------------------------------------------
1 | // Based on http://graphviz.org/content/cluster
2 |
3 | // Node types:
4 | // Rectangle: process
5 | // Oval: data
6 | // Diamond: debug/simulation data
7 |
8 | digraph G {
9 | //rankdir="LR"; // left to right layout
10 |
11 | // http://www.graphviz.org/content/color-names
12 | colorscheme=pastel13;
13 |
14 | subgraph cluster_0 {
15 | graph [ fontsize=24 ];
16 | label = "Reporting";
17 | style=filled;
18 | color=2;
19 |
20 | node [style=filled, color=white, fontsize=12];
21 |
22 | gen_sim_input -> dist_csv -> rappor_sim;
23 |
24 | rappor_sim -> out;
25 | rappor_sim -> params;
26 | rappor_sim -> hist;
27 | rappor_sim -> true_inputs;
28 |
29 | // Process
30 | rappor_sim [label="rappor_sim"];
31 |
32 | // Data
33 | dist_csv [shape=box, label="dist.csv"];
34 | out [shape=box, label="dist_out.csv"];
35 | params [shape=box, label="dist_params.csv"];
36 |
37 | // simulation data
38 | hist [shape=box, style=dotted, color=black, label="dist_hist.csv"];
39 | true_inputs [shape=box, style=dotted, color=black, label="dist_true_inputs.txt"];
40 | }
41 |
42 | subgraph cluster_1 {
43 | graph [ fontsize=24 ];
44 | label = "Analysis";
45 | style = filled;
46 | color=3;
47 |
48 | node [style=filled, color=white, fontsize=12];
49 |
50 | sum_bits -> counts;
51 |
52 | // sum_bits needs the params to construct the matrix. Technically it could
53 | // infer it, but this is simple.
54 | params -> sum_bits;
55 |
56 | // only in the simulation
57 | true_inputs -> demo_sh -> candidates [style=dotted];
58 |
59 | candidates -> hash_candidates -> map;
60 | params -> hash_candidates;
61 |
62 | params -> analyze;
63 | map -> analyze;
64 | counts -> analyze;
65 | hist -> analyze [style=dotted]; // only for comparison
66 |
67 | analyze -> plot_png;
68 |
69 | // Processes
70 | analyze [label="analyze.R"];
71 | demo_sh [label="demo.sh", style=dotted, color=black];
72 |
73 | // Data
74 | counts [shape=box, label="dist_count.csv"];
75 | candidates [shape=box, label="dist_candidates.txt"];
76 | map [shape=box, label="dist_map.csv"];
77 |
78 | plot_png [shape=box, label="dist.png"];
79 |
80 | }
81 |
82 | out -> sum_bits;
83 | }
84 |
--------------------------------------------------------------------------------
/demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Demo of RAPPOR. Automating Python and R scripts. See README.
4 | #
5 | # Usage:
6 | # ./demo.sh [function name]
7 | #
8 | # End to end demo of rappor. Notable functions include:
9 | # quick-python: Runs a demo using the python client
10 | # quick-cpp: Runs a demo using the c++ client
11 | # If no function is specified the above two will be run consecutivly.
12 | #
13 | # This takes a minute or so. It runs a subset of tests from regtest.sh and
14 | # writes an HTML summary.
15 |
16 | set -o nounset
17 | set -o pipefail
18 | set -o errexit
19 |
20 | . util.sh
21 |
22 | readonly THIS_DIR=$(dirname $0)
23 | readonly REPO_ROOT=$THIS_DIR
24 | readonly CLIENT_DIR=$REPO_ROOT/client/python
25 |
26 | # All the Python tools need this
27 | export PYTHONPATH=$CLIENT_DIR
28 |
29 | #
30 | # Semi-automated demos
31 | #
32 |
33 | # Run rappor-sim through the Python profiler.
34 | rappor-sim-profile() {
35 | local dist=$1
36 | shift
37 |
38 | # For now, just dump it to a text file. Sort by cumulative time.
39 | time python -m cProfile -s cumulative \
40 | tests/rappor_sim.py \
41 | -i _tmp/$dist.csv \
42 | "$@" \
43 | | tee _tmp/profile.txt
44 | }
45 |
46 | quick-python() {
47 | ./regtest.sh run-seq '^demo3' python
48 | }
49 |
50 | quick-cpp() {
51 | # For now we build it first. Don't want to build it in parallel.
52 | ./build.sh cpp-client
53 |
54 | ./regtest.sh run-seq '^demo3' cpp
55 | }
56 |
57 | quick() {
58 | quick-python
59 | quick-cpp
60 | }
61 |
62 | # TODO: Port these old bad cases to regtest_spec.py.
63 |
64 | # Running the demo of the exponential distribution with 10000 reports (x7,
65 | # which is 70000 values).
66 | #
67 | # - There are 50 real values, but we add 1000 more candidates, to get 1050 candidates.
68 | # - And then we remove the two most common strings, v1 and v2.
69 | # - With the current analysis, we are getting sum(proportion) = 1.1 to 1.7
70 |
71 | # TODO: Make this sharper by including only one real value?
72 |
73 | bad-case() {
74 | local num_additional=${1:-1000}
75 | run-dist exp 10000 $num_additional 'v1|v2'
76 | }
77 |
78 | # Force it to be less than 1
79 | pcls-test() {
80 | USE_PCLS=1 bad-case
81 | }
82 |
83 | # Only add 10 more candidates. Then we properly get the 0.48 proportion.
84 | ok-case() {
85 | run-dist exp 10000 10 'v1|v2'
86 | }
87 |
88 | if test $# -eq 0 ; then
89 | quick
90 | else
91 | "$@"
92 | fi
93 |
--------------------------------------------------------------------------------
/ui/metric.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Metric Results
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
17 |
18 |
19 |
73 |
74 |
75 |
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/bin/sum_bits.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | #
3 | # Copyright 2014 Google Inc. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """
18 | Read the RAPPOR'd values on stdin, and sum the bits to produce a Counting Bloom
19 | filter by cohort. This can then be analyzed by R.
20 | """
21 |
22 | import csv
23 | import sys
24 |
25 | import rappor
26 |
27 |
28 | def SumBits(params, stdin, stdout):
29 | csv_in = csv.reader(stdin)
30 | csv_out = csv.writer(stdout)
31 |
32 | num_cohorts = params.num_cohorts
33 | num_bloombits = params.num_bloombits
34 |
35 | sums = [[0] * num_bloombits for _ in xrange(num_cohorts)]
36 | num_reports = [0] * num_cohorts
37 |
38 | for i, row in enumerate(csv_in):
39 | try:
40 | (user_id, cohort, unused_bloom, unused_prr, irr) = row
41 | except ValueError:
42 | raise RuntimeError('Error parsing row %r' % row)
43 |
44 | if i == 0:
45 | continue # skip header
46 |
47 | cohort = int(cohort)
48 | num_reports[cohort] += 1
49 |
50 | if not len(irr) == params.num_bloombits:
51 | raise RuntimeError(
52 | "Expected %d bits, got %r" % (params.num_bloombits, len(irr)))
53 | for i, c in enumerate(irr):
54 | bit_num = num_bloombits - i - 1 # e.g. char 0 = bit 15, char 15 = bit 0
55 | if c == '1':
56 | sums[cohort][bit_num] += 1
57 | else:
58 | if c != '0':
59 | raise RuntimeError('Invalid IRR -- digits should be 0 or 1')
60 |
61 | for cohort in xrange(num_cohorts):
62 | # First column is the total number of reports in the cohort.
63 | row = [num_reports[cohort]] + sums[cohort]
64 | csv_out.writerow(row)
65 |
66 |
67 | def main(argv):
68 | try:
69 | filename = argv[1]
70 | except IndexError:
71 | raise RuntimeError('Usage: sum_bits.py ')
72 | with open(filename) as f:
73 | try:
74 | params = rappor.Params.from_csv(f)
75 | except rappor.Error as e:
76 | raise RuntimeError(e)
77 |
78 | SumBits(params, sys.stdin, sys.stdout)
79 |
80 |
81 | if __name__ == '__main__':
82 | try:
83 | main(sys.argv)
84 | except RuntimeError, e:
85 | print >>sys.stderr, e.args[0]
86 | sys.exit(1)
87 |
--------------------------------------------------------------------------------
/client/cpp/rappor_deps.h:
--------------------------------------------------------------------------------
1 | // Copyright 2015 Google Inc. All rights reserved.
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | // This header declares the dependencies that the application must provide to
16 | // the RAPPOR.
17 |
18 | #ifndef RAPPOR_DEPS_H_
19 | #define RAPPOR_DEPS_H_
20 |
21 | #include // for uint32_t
22 | #include
23 | #include
24 |
25 | namespace rappor {
26 |
27 | // rappor::Bits type is used for Bloom Filter, PRR, and IRR
28 | typedef uint32_t Bits;
29 |
30 | // rappor::Encoder needs a hash function for the bloom filter, and an HMAC
31 | // function for the PRR.
32 |
33 | typedef bool HashFunc(const std::string& value, std::vector* output);
34 | typedef bool HmacFunc(const std::string& key, const std::string& value,
35 | std::vector* output);
36 |
37 | // Interface that the encoder use to generate randomness for the IRR.
38 | // Applications should implement this based on their platform and requirements.
39 | class IrrRandInterface {
40 | public:
41 | virtual ~IrrRandInterface() {}
42 | // Compute a bitmask with each bit set to 1 with probability 'prob'.
43 | // Returns false if there is an error.
44 | virtual bool GetMask(float prob, int num_bits, Bits* mask_out) const = 0;
45 | };
46 |
47 | // Dependencies
48 | // - hash_func: hash function for the Bloom Filter client step
49 | // - client_secret: key for deterministic randomness in the PRR
50 | // - hmac_func: function for deterministic randomness in the PRR
51 | // - irr_rand: randomness for the IRR
52 |
53 | class Deps {
54 | public:
55 | Deps(HashFunc* const hash_func, const std::string& client_secret,
56 | HmacFunc* const hmac_func, const IrrRandInterface& irr_rand)
57 | : hash_func_(hash_func),
58 | client_secret_(client_secret),
59 | hmac_func_(hmac_func),
60 | irr_rand_(irr_rand) {
61 | }
62 |
63 | private:
64 | friend class Encoder;
65 |
66 | HashFunc* hash_func_; // for bloom filter
67 | const std::string client_secret_; // for PRR; copy of constructor param
68 | HmacFunc* hmac_func_; // PRR
69 | const IrrRandInterface& irr_rand_; // IRR
70 | };
71 |
72 | } // namespace rappor
73 |
74 | #endif // RAPPOR_DEPS_H_
75 |
76 |
--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Setup RAPPOR analysis on Ubuntu Trusty (Google Cloud or otherwise).
4 | #
5 | # For the apps/api server, you need 'install-minimal'. For the regtest, and
6 | # Shiny apps, we need a few more R packages (ggplot2, data.table, etc.). They
7 | # cause versioning problems, so we keep them separate.
8 | #
9 | # Usage:
10 | # ./setup.sh [function name]
11 | # If run without specifing any function it will run: install-most
12 | # which should cover all the packages needed to run the demo.
13 |
14 | set -o nounset
15 | set -o pipefail
16 | set -o errexit
17 |
18 | native-packages() {
19 | sudo apt-get update
20 | # - build-essential for gcc compilers, invoked while installing R packages.
21 | # - gfortran Fortran compiler needed for glmnet.
22 | # - libblas-dev needed for limSolve.
23 | # - python-dev is for building the fastrand extension
24 | #
25 | # NOTE: we get R 3.0.2 on Trusty.
26 | sudo apt-get install build-essential gfortran libblas-dev r-base python-dev graphviz
27 | }
28 |
29 | r-packages() {
30 | # Install as root so you can write to /usr/local/lib/R.
31 |
32 | # glmnet, limSolve: solvers for decode.R
33 | # RJSONIO, optparse: for decode_dist.R
34 | # RUnit: for unit tests
35 | # abind: for decode_test only
36 | sudo R -e \
37 | 'install.packages(c("glmnet", "optparse", "limSolve", "RUnit", "abind", "RJSONIO"), repos="http://cran.rstudio.com/")'
38 | }
39 |
40 | # R 3.0.2 on Trusty is out of date with CRAN, so we need this workaround.
41 | install-plyr-with-friends() {
42 | mkdir -p _tmp
43 | wget --directory _tmp \
44 | http://cran.r-project.org/src/contrib/Archive/Rcpp/Rcpp_0.11.4.tar.gz
45 | wget --directory _tmp \
46 | http://cran.r-project.org/src/contrib/Archive/plyr/plyr_1.8.1.tar.gz
47 | sudo R CMD INSTALL _tmp/Rcpp_0.11.4.tar.gz
48 | sudo R CMD INSTALL _tmp/plyr_1.8.1.tar.gz
49 | sudo R -e \
50 | 'install.packages(c("reshape2", "ggplot2", "data.table"), repos="http://cran.rstudio.com/")'
51 | }
52 |
53 | # Keep Shiny separate, since it seems to install a lot of dependencies.
54 | shiny() {
55 | sudo R -e \
56 | 'install.packages(c("shiny"), repos="http://cran.rstudio.com/")'
57 | }
58 |
59 | #
60 | # Batch
61 | #
62 |
63 | install-minimal() {
64 | native-packages
65 | r-packages
66 | }
67 |
68 | # NOTE: hasn't yet been tested on a clean machine.
69 | install-most() {
70 | install-minimal
71 | install-plyr-with-friends
72 | }
73 |
74 | #
75 | # Shiny Apps / API Server
76 | #
77 |
78 | # After running one of the run_app.sh scripts, see if the app returns a page.
79 | shiny-smoke-test() {
80 | curl http://localhost:6789/
81 | }
82 |
83 | # Then set up a "firewall rule" in console.developers.google.com to open up
84 | # "tcp:6789". Test it from the outside.
85 |
86 | if test $# -eq 0 ; then
87 | install-most
88 | else
89 | "$@"
90 | fi
91 |
--------------------------------------------------------------------------------
/tests/gen_true_values.R:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env Rscript
2 | #
3 | # Copyright 2015 Google Inc. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | source('tests/gen_counts.R')
18 |
19 | # Usage:
20 | #
21 | # $ ./gen_true_values.R exp 100 10000 1 foo.csv
22 | #
23 | # Inputs:
24 | # distribution name
25 | # size of the distribution's support
26 | # number of clients
27 | # reports per client
28 | # name of the output file
29 | # Output:
30 | # csv file with reports sampled according to the specified distribution.
31 |
32 | GenerateTrueValues <- function(distr, distr_range, num_clients,
33 | reports_per_client, num_cohorts) {
34 |
35 | # Sums to 1.0, e.g. [0.2 0.2 0.2 0.2 0.2] for uniform distribution of 5.
36 | pdf <- ComputePdf(distr, distr_range)
37 |
38 | num_reports <- num_clients * reports_per_client
39 |
40 | # Computes the number of clients reporting each value, where the numbers are
41 | # sampled according to pdf. (sums to num_reports)
42 | partition <- RandomPartition(num_reports, pdf)
43 |
44 | value_ints <- rep(1:distr_range, partition) # expand partition
45 |
46 | stopifnot(length(value_ints) == num_reports)
47 |
48 | # Shuffle values randomly (may take a few sec for > 10^8 inputs)
49 | value_ints <- sample(value_ints)
50 |
51 | # Reported values are strings, so prefix integers "v". Even slower than
52 | # shuffling.
53 | values <- sprintf("v%d", value_ints)
54 |
55 | # e.g. [1 1 2 2 3 3] if num_clients is 3 and reports_per_client is 2
56 | client_ints <- rep(1:num_clients, each = reports_per_client)
57 |
58 | # Cohorts are assigned to clients. Cohorts are 0-based.
59 | cohorts <- client_ints %% num_cohorts # %% is integer modulus
60 |
61 | clients <- sprintf("c%d", client_ints)
62 |
63 | data.frame(client = clients, cohort = cohorts, value = values)
64 | }
65 |
66 | main <- function(argv) {
67 | distr <- argv[[1]]
68 | distr_range <- as.integer(argv[[2]])
69 | num_clients <- as.integer(argv[[3]])
70 | reports_per_client <- as.integer(argv[[4]])
71 | num_cohorts <- as.integer(argv[[5]])
72 | out_file <- argv[[6]]
73 |
74 | reports <- GenerateTrueValues(distr, distr_range, num_clients,
75 | reports_per_client, num_cohorts)
76 |
77 | write.csv(reports, file = out_file, row.names = FALSE, quote = FALSE)
78 | }
79 |
80 | if (length(sys.frames()) == 0) {
81 | main(commandArgs(TRUE))
82 | }
83 |
--------------------------------------------------------------------------------
/pipeline/alarm-lib.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Alarm tool.
4 | #
5 | # Usage:
6 | # ./alarm.sh
7 |
8 | # You can source this file and use the alarm-status function.
9 |
10 | set -o nounset
11 | set -o pipefail
12 | set -o errexit
13 |
14 | # Run a command with a timeout, and print its status to a directory.
15 | #
16 | # Usage:
17 | # alarm-status job_dir/STATUS 10 \
18 | # flaky_command ...
19 |
20 | alarm-status() {
21 | set +o errexit
22 | local status_file=$1
23 | shift # everything except the status file goes to perl
24 |
25 | # NOTE: It would be nice to setpgrp() before exec? And then can the signal
26 | # be delivered to the entire group, like kill -SIGALRM -PID?
27 |
28 | # NOTE: If we did this in Python, the error message would also be clearer.
29 | perl -e 'alarm shift; exec @ARGV or die "ERROR: after exec @ARGV"' "$@"
30 | local exit_code=$?
31 |
32 | set -o errexit
33 |
34 | local result=''
35 | case $exit_code in
36 | 0)
37 | # Would be nice to show elapsed time?
38 | result='OK'
39 | ;;
40 | 9)
41 | # decode_assoc.R will exit 9 if there are no reports AFTER
42 | # --remove-bad-rows. A task can also be marked SKIPPED before running
43 | # the child process (see backfill.sh).
44 | result='SKIPPED by child process'
45 | ;;
46 | # exit code 142 means SIGALARM. 128 + 14 = 142. See 'kill -l'.
47 | 142)
48 | local seconds=$1
49 | result="TIMEOUT after $seconds seconds"
50 | ;;
51 | *)
52 | result="FAIL with status $exit_code"
53 | ;;
54 | esac
55 | echo "$result"
56 | echo "$result" > $status_file
57 | }
58 |
59 | _work() {
60 | local n=10 # 2 seconds
61 | for i in $(seq $n); do
62 | echo $i - "$@"
63 | sleep 0.2
64 | done
65 | }
66 |
67 | _succeed() {
68 | _work "$@"
69 | exit 0
70 | }
71 |
72 | _fail() {
73 | _work "$@"
74 | exit 1
75 | }
76 |
77 | _skip() {
78 | exit 9
79 | }
80 |
81 | # http://perldoc.perl.org/functions/alarm.html
82 | #
83 | # Delivers alarm. But how to get the process to have a distinct exit code?
84 |
85 | demo() {
86 | mkdir -p _tmp
87 |
88 | # timeout
89 | alarm-status _tmp/A 1 $0 _succeed foo
90 | echo
91 |
92 | # ok
93 | alarm-status _tmp/B 3 $0 _succeed bar
94 | echo
95 |
96 | # fail
97 | alarm-status _tmp/C 3 $0 _fail baz
98 | echo
99 |
100 | # skip
101 | alarm-status _tmp/D 3 $0 _skip baz
102 | echo
103 |
104 | head _tmp/{A,B,C,D}
105 | }
106 |
107 | test-simple() {
108 | alarm-status _tmp/status.txt 1 sleep 2
109 | }
110 |
111 | test-bad-command() {
112 | alarm-status _tmp/status.txt 1 nonexistent_sleep 2
113 | }
114 |
115 | # BUG
116 | test-perl() {
117 | set +o errexit
118 | perl -e 'alarm shift; exec @ARGV or die "ERROR after exec @ARGV"' 1 _sleep 2
119 | echo $?
120 | }
121 |
122 | if test $(basename $0) = 'alarm-lib.sh'; then
123 | "$@"
124 | fi
125 |
--------------------------------------------------------------------------------
/analysis/R/alternative.R:
--------------------------------------------------------------------------------
1 | # Copyright 2014 Google Inc. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | library(limSolve)
16 | library(Matrix)
17 |
18 | # The next two functions create a matrix (G) and a vector (H) encoding
19 | # linear inequality constraints that a solution vector (x) must satisfy:
20 | # G * x >= H
21 |
22 | # Currently represent three sets of constraints on the solution vector:
23 | # - all solution coefficients are nonnegative
24 | # - the sum total of all solution coefficients is no more than 1
25 | # - in each of the coordinates of the target vector (estimated Bloom filter)
26 | # we don't overshoot by more than three standard deviations.
27 | MakeG <- function(n, X) {
28 | d <- Diagonal(n)
29 | last <- rep(-1, n)
30 | rbind2(rbind2(d, last), -X)
31 | }
32 |
33 | MakeH <- function(n, Y, stds) {
34 | # set the floor at 0.01 to avoid degenerate cases
35 | YY <- apply(Y + 3 * stds, # in each bin don't overshoot by more than 3 stds
36 | 1:2,
37 | function(x) min(1, max(0.01, x))) # clamp the bound to [0.01,1]
38 |
39 | c(rep(0, n), # non-negativity condition
40 | -1, # coefficients sum up to no more than 1
41 | -as.vector(t(YY)) # t is important!
42 | )
43 | }
44 |
45 | MakeLseiModel <- function(X, Y, stds) {
46 | m <- dim(X)[1]
47 | n <- dim(X)[2]
48 |
49 | # no slack variables for now
50 | # slack <- Matrix(FALSE, nrow = m, ncol = m, sparse = TRUE)
51 | # colnames(slack) <- 1:m
52 | # diag(slack) <- TRUE
53 | #
54 | # G <- MakeG(n + m)
55 | # H <- MakeH(n + m)
56 | #
57 | # G[n+m+1,n:(n+m)] <- -0.1
58 | # A = cbind2(X, slack)
59 |
60 | w <- as.vector(t(1 / stds))
61 | w_median <- median(w[!is.infinite(w)])
62 | if(is.na(w_median)) # all w are infinite
63 | w_median <- 1
64 | w[w > w_median * 2] <- w_median * 2
65 | w <- w / mean(w)
66 |
67 | list(# coerce sparse Boolean matrix X to sparse numeric matrix
68 | A = Diagonal(x = w) %*% (X + 0),
69 | B = as.vector(t(Y)) * w, # transform to vector in the row-first order
70 | G = MakeG(n, X),
71 | H = MakeH(n, Y, stds),
72 | type = 2) # Since there are no equality constraints, lsei defaults to
73 | # solve.QP anyway, but outputs a warning unless type == 2.
74 | }
75 |
76 | # CustomLM(X, Y)
77 | ConstrainedLinModel <- function(X,Y) {
78 | model <- MakeLseiModel(X, Y$estimates, Y$stds)
79 | coefs <- do.call(lsei, model)$X
80 | names(coefs) <- colnames(X)
81 |
82 | coefs
83 | }
--------------------------------------------------------------------------------
/tests/regtest.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | RAPPOR regtest.sh (_IMPL_)
5 |
17 |
18 |
19 |
20 |
21 |
22 |