├── .gitattributes ├── .github └── workflows │ └── generate-bloomfilter.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── add-latest.js ├── background.html ├── background.js ├── bloom-filter ├── bloom-create.c ├── bloom-js-export.c ├── bloom.c ├── bloom.h ├── murmur.c └── murmur.h ├── bloom-wrap.js ├── canonicalize.py ├── doc ├── range-chart.svg ├── scores-chart.svg └── screenshot.png ├── manifest.json ├── options.html ├── options.js └── test ├── bloom-test.c ├── murmur-test.c └── test-template.html /.gitattributes: -------------------------------------------------------------------------------- 1 | # The workflows are basically just shell scripts 2 | .github/workflows/*.yml linguist-language=Shell 3 | -------------------------------------------------------------------------------- /.github/workflows/generate-bloomfilter.yml: -------------------------------------------------------------------------------- 1 | # Generate Bloom filters with submitted Hacker News stories periodically and 2 | # on-command for use in the accompanying browser extension. 3 | # 4 | # Created by Jacob Strieb 5 | # January 2021 6 | 7 | 8 | name: Generate Bloom Filters 9 | 10 | # Run once every 24 hours because the BigQuery database is updated once every 11 | # 24 hours. Also run when manually triggered. 12 | on: 13 | schedule: 14 | # This time is based loosely around when the BigQuery data updates 15 | - cron: "5 16 * * *" 16 | workflow_dispatch: 17 | 18 | 19 | jobs: 20 | create-bloomfilter: 21 | name: Create Bloom Filters 22 | runs-on: ubuntu-latest 23 | steps: 24 | - name: Install dependencies 25 | run: | 26 | sudo apt-get install jo 27 | 28 | - uses: actions/checkout@v2 29 | 30 | - name: Set up BigQuery 31 | run: | 32 | # This BQ_JSON key is obtained by downloading the JSON key for a 33 | # service account on BigQuery. It is then copied directly into a 34 | # GitHub Actions secret (under Settings) 35 | echo '${{ secrets.BQ_JSON }}' > bq.json 36 | gcloud auth \ 37 | activate-service-account \ 38 | --key-file bq.json 39 | # Unfortunately, bq outputs some dumb shit the first time we run it 40 | # with the new authentication, so we do a dummy call here to prevent 41 | # it from outputting the annoying welcome message directly into the 42 | # CSV we are attempting to process 43 | bq query \ 44 | --project_id=hacker-news-analysis-300719 \ 45 | --use_legacy_sql=false \ 46 | 'SELECT COUNT(*) FROM `bigquery-public-data.hacker_news.full` 47 | WHERE type = "story"' 48 | 49 | - name: Compile Bloom filter creation program 50 | run: | 51 | make create 52 | 53 | - name: Pull BigQuery data 54 | run: | 55 | # Note that max_rows must be an unsigned, 32-bit int 56 | bq query \ 57 | --project_id=hacker-news-analysis-300719 \ 58 | --format csv \ 59 | --max_rows 99999999 \ 60 | --use_legacy_sql=false \ 61 | 'SELECT 62 | url, 63 | score, 64 | descendants, 65 | time 66 | FROM 67 | `bigquery-public-data.hacker_news.full` 68 | WHERE 69 | type = "story" 70 | AND dead IS NOT TRUE 71 | AND deleted IS NOT TRUE 72 | AND url != ""' \ 73 | > data.csv 74 | head -n 50 data.csv 75 | 76 | # Put the data in a sqlite3 database 77 | sqlite3 \ 78 | data.db \ 79 | ".mode csv" \ 80 | ".import data.csv hn" 81 | 82 | - name: Generate Bloom Filters 83 | run: | 84 | # Get the time of the last submission 85 | LATEST=$(sqlite3 \ 86 | -csv \ 87 | data.db \ 88 | "SELECT CAST(time AS INT) AS inttime 89 | FROM hn ORDER BY inttime DESC LIMIT 1") 90 | # TODO: Make this accurate -- not sure why but it's inaccurate now 91 | printf \ 92 | "Last story submitted %d hours ago\n" \ 93 | $((($(date +%s) - LATEST) / 3600)) 94 | 95 | # Generate one Bloom filter for each date range/threshold combination 96 | # Used as sqlite3 date modifiers: 97 | # https://sqlite.org/lang_datefunc.html 98 | DATE_RANGES=( 99 | # "-1 month" 100 | "-7 days" 101 | "-24 hours" 102 | ) 103 | 104 | THRESHOLDS=( 105 | 0 106 | 10 107 | 75 108 | 250 109 | 500 110 | ) 111 | 112 | # Apply the threshold to points (score) or comments (descendants) 113 | THRESHOLD_KEY="score" 114 | 115 | mkdir generated 116 | 117 | for THRESHOLD in "${THRESHOLDS[@]}"; do 118 | FILENAME="hn-$THRESHOLD.bloom" 119 | 120 | # Make a Bloom filter for each threshold with no time restriction 121 | echo "Creating bloom filter for stories from all time and" \ 122 | "$THRESHOLD+ $THRESHOLD_KEY. Writing to $FILENAME..." 123 | 124 | sqlite3 \ 125 | -header \ 126 | -csv \ 127 | data.db \ 128 | "SELECT * FROM hn 129 | WHERE CAST($THRESHOLD_KEY AS INT) >= $THRESHOLD" \ 130 | | python3 canonicalize.py \ 131 | | bin/bloom-create "generated/$FILENAME" 132 | 133 | # For the current threshold, make a bloom filter for each date range 134 | for DATE_RANGE in "${DATE_RANGES[@]}"; do 135 | DATE_RANGE_STR="$(echo $DATE_RANGE | sed 's/[- ]//g')" 136 | FILENAME="hn-$DATE_RANGE_STR-$THRESHOLD.bloom" 137 | 138 | echo "Creating bloom filter for $DATE_RANGE and" \ 139 | "$THRESHOLD+ $THRESHOLD_KEY. Writing to $FILENAME..." 140 | 141 | sqlite3 \ 142 | -header \ 143 | -csv \ 144 | data.db \ 145 | "SELECT * FROM hn 146 | WHERE CAST($THRESHOLD_KEY AS INT) >= $THRESHOLD 147 | AND CAST(time AS INT) > strftime('%s', ( 148 | SELECT CAST(time AS INT) AS inttime FROM hn 149 | ORDER BY inttime DESC LIMIT 1 150 | ), 'unixepoch', '$DATE_RANGE')" \ 151 | | python3 canonicalize.py \ 152 | | bin/bloom-create "generated/$FILENAME" 153 | done 154 | done 155 | 156 | # Output a JSON file with information about the thresholds and dates 157 | # 158 | # TODO: Fix to use $LATEST instead of now 159 | DATES=() 160 | for DATE_RANGE in "${DATE_RANGES[@]}"; do 161 | DATE_RANGE_STR="$(echo $DATE_RANGE | sed 's/[- ]//g')" 162 | DATE="$(date -d "now ""$DATE_RANGE" +%s)" 163 | DATES+=("$DATE=$DATE_RANGE_STR") 164 | done 165 | 166 | # 24 hours used in next_generated since this runs every 24 hours and 167 | # takes 10 to 20 minutes to complete 168 | jo \ 169 | -p \ 170 | thresholds=$(jo -a "${THRESHOLDS[@]}") \ 171 | dates=$(jo "${DATES[@]}") \ 172 | version="0.6" \ 173 | compressed="true" \ 174 | date_generated="$(date +%s)" \ 175 | next_generated="$(date -d "now +24 hours +20 minutes" +%s)" \ 176 | last_submitted="$(sqlite3 \ 177 | -csv \ 178 | data.db \ 179 | "SELECT CAST(time AS INT) AS inttime 180 | FROM hn ORDER BY inttime DESC LIMIT 1")" \ 181 | > generated/info.json 182 | 183 | - name: Create Release 184 | env: 185 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 186 | run: | 187 | cd generated 188 | 189 | # Keep the latest extension in the next release 190 | curl \ 191 | --location \ 192 | --output "hackernews-button.xpi" \ 193 | "https://github.com/jstrieb/hackernews-button/releases/latest/download/hackernews-button.xpi" 194 | 195 | # TODO: Fix fatal name collision failure if run more than once a day? 196 | gh release create \ 197 | "$(date +%Y%m%d)" \ 198 | --title "$(date '+%b %d, %Y') Bloom filters" \ 199 | --notes "Updated Bloom filters for Hacker News submitted stories for $(date '+%B %d, %^Y')" \ 200 | * 201 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Vim swap files 2 | *.swp 3 | *.swo 4 | 5 | # Generated binaries and other files 6 | bin 7 | hackernews-button.zip 8 | ycombinator-logo.jpg 9 | icons 10 | bloom.js 11 | bloom.wasm 12 | *.xpi 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ################################################################################ 3 | ## 4 | ## This file is used for building the final extension release package, 5 | ## compiling bloom filter code to run in a terminal and in WebAssembly, running 6 | ## tests on the Bloom filter and Murmur3 implementations, and for cleaning up 7 | ## garbage files that may accumulate over time. 8 | ## 9 | ## Created by Jacob Strieb 10 | ## January 2021 11 | ## 12 | ################################################################################ 13 | ################################################################################ 14 | 15 | 16 | 17 | ################################################################################ 18 | # Variables 19 | ################################################################################ 20 | 21 | SHELL = /bin/sh 22 | 23 | # NOTE: GNU extensions used for getline and getopt_long in bloom-create.c 24 | CC = gcc 25 | CFLAGS = -std=gnu99 \ 26 | -pedantic \ 27 | -Wall \ 28 | -Wextra \ 29 | -Werror \ 30 | -O3 31 | VPATH = bloom-filter test 32 | INC = bloom-filter 33 | 34 | # NOTE: In compilation commands, libraries for linker must come after code 35 | # using the libraries. See: 36 | # https://stackoverflow.com/a/409402/1376127 37 | # 38 | # For including zlib when compiling with emscripten, use "USE_ZLIB" as-per: 39 | # https://emscripten.org/docs/compiling/Building-Projects.html#emscripten-ports 40 | LDLIBS = -lz 41 | 42 | 43 | 44 | ################################################################################ 45 | # Bundle extension for release (download and resize icons if necessary) 46 | ################################################################################ 47 | 48 | EXTENSION_FILES = manifest.json \ 49 | background.js \ 50 | background.html \ 51 | bloom.js \ 52 | bloom.wasm \ 53 | bloom-wrap.js \ 54 | add-latest.js \ 55 | options.html \ 56 | options.js \ 57 | icons 58 | 59 | hackernews-button.zip: $(EXTENSION_FILES) 60 | zip \ 61 | --recurse-paths \ 62 | "$@" \ 63 | $^ 64 | 65 | # NOTE: Requires ImageMagick 66 | icons: ycombinator-logo.jpg 67 | mkdir -p icons 68 | convert -resize 16x16 $< icons/icon-16.png 69 | convert -resize 32x32 $< icons/icon-32.png 70 | convert -resize 48x48 $< icons/icon-48.png 71 | convert -resize 64x64 $< icons/icon-64.png 72 | convert -resize 96x96 $< icons/icon-96.png 73 | 74 | ycombinator-logo.jpg: 75 | curl \ 76 | --output "$@" \ 77 | "https://feeds.backtracks.fm/feeds/series/cb81757a-3054-11e7-89cf-0e1b887eb36a/images/main.jpg" 78 | 79 | 80 | 81 | ################################################################################ 82 | # Generate bloom filters from the command line 83 | ################################################################################ 84 | 85 | .PHONY: create 86 | create: bin/bloom-create 87 | 88 | bin/bloom-create: bin murmur.c bloom.c bloom-create.c 89 | $(CC) \ 90 | $(CFLAGS) \ 91 | -I $(INC) \ 92 | $(filter %.c, $^) \ 93 | $(LDLIBS) \ 94 | -o $@ 95 | 96 | 97 | 98 | ################################################################################ 99 | # Compile wrapper library to wasm and export for use in extension scripts 100 | ################################################################################ 101 | 102 | bloom.js: murmur.c bloom.c bloom-js-export.c 103 | emcc $(filter %.c, $^) \ 104 | -I $(INC) \ 105 | -s WASM=1 \ 106 | -s EXTRA_EXPORTED_RUNTIME_METHODS='["ccall", "writeArrayToMemory"]' \ 107 | -s ENVIRONMENT=web \ 108 | -s ALLOW_MEMORY_GROWTH=1 \ 109 | -s ASSERTIONS=1 \ 110 | -s USE_ZLIB=1 \ 111 | -o $@ 112 | 113 | 114 | 115 | ################################################################################ 116 | # Test Bloom filter and Murmur3 implementations 117 | ################################################################################ 118 | 119 | .PHONY: test 120 | test: bin/murmur-test bin/bloom-test bin/murmur-test.html bin/bloom-test.html 121 | bin/murmur-test 122 | bin/bloom-test 123 | 124 | bin: 125 | mkdir -p bin 126 | 127 | bin/murmur-test: bin murmur.c murmur-test.c 128 | $(CC) \ 129 | $(CFLAGS) \ 130 | -g \ 131 | -I $(INC) \ 132 | $(filter %.c, $^) \ 133 | -o $@ 134 | 135 | bin/murmur-test.html: bin murmur.c murmur-test.c test-template.html 136 | emcc $(filter %.c, $^) \ 137 | -I $(INC) \ 138 | -s WASM=1 \ 139 | -s ASSERTIONS=1 \ 140 | -s ALLOW_MEMORY_GROWTH=1 \ 141 | -s EXTRA_EXPORTED_RUNTIME_METHODS='["ccall", "cwrap"]' \ 142 | --shell-file $(filter %.html, $^) \ 143 | -o $@ 144 | @echo "Start a local web server in this directory and go to /murmur-test.html" 145 | 146 | bin/bloom-test: bin murmur.c bloom.c bloom-test.c 147 | $(CC) \ 148 | $(CFLAGS) \ 149 | -g \ 150 | -I $(INC) \ 151 | $(filter %.c, $^) \ 152 | $(LDLIBS) \ 153 | -o $@ 154 | 155 | bin/bloom-test.html: bin murmur.c bloom.c bloom-test.c test-template.html 156 | emcc $(filter %.c, $^) \ 157 | -I $(INC) \ 158 | -s WASM=1 \ 159 | -s ASSERTIONS=1 \ 160 | -s ALLOW_MEMORY_GROWTH=1 \ 161 | -s EXTRA_EXPORTED_RUNTIME_METHODS='["ccall", "cwrap"]' \ 162 | --shell-file $(filter %.html, $^) \ 163 | -s USE_ZLIB=1 \ 164 | -o $@ 165 | @echo "Start a local web server in this directory and go to /bloom-test.html" 166 | 167 | 168 | 169 | ################################################################################ 170 | # Additional targets 171 | ################################################################################ 172 | 173 | .PHONY: clean 174 | clean: 175 | rm -rf bin hackernews-button.zip 176 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Hacker News Discussion Button 2 | 3 | Firefox extension that links to the [Hacker News](https://news.ycombinator.com) 4 | discussion for the current page and preserves privacy with Bloom filters. 5 | 6 |
7 | 8 | Screenshot 9 | 10 |
11 | 12 | 13 | 14 | # Quick start 15 | 16 | Install the browser extension from one of the following sources: 17 | - [Download from the Mozilla Add-on website](https://addons.mozilla.org/en-US/firefox/addon/hacker-news-discussion-button/) 18 | - [Download from GitHub](https://github.com/jstrieb/hackernews-button/releases/latest/download/hackernews-button.xpi) 19 | 20 | --- 21 | 22 | The extension will light up bright orange when the current page has previously 23 | been posted to Hacker News. 24 | - Clicking the extension will open the Hacker News discussion. 25 | - Clicking the extension with the scroll wheel will open the discussion in a 26 | new tab. 27 | - Clicking while holding Ctrl or Shift will open the 28 | discussion in a new tab or window, respectively. 29 | 30 | There are also keyboard shortcuts. 31 | - Alt + Y opens the Hacker News discussion in the current 32 | page 33 | - Ctrl + Shift + Y opens the discussion in a 34 | new tab. 35 | 36 | Star this project if you like it! 37 | 38 | Read the [Hacker News Discussion](https://news.ycombinator.com/item?id=26301600) 39 | for this project. 40 | 41 | 42 | 43 | # How It Works 44 | 45 | When you visit a website, this browser extension determines whether the website 46 | has been submitted to Hacker News. A naive (but effective) way to do this is to 47 | query the very helpful [Algolia Search API for Hacker 48 | News](https://hn.algolia.com/api) with every page visited. In fact, that's what 49 | the original version of this extension did when I wrote it over the summer of 50 | 2020! Unfortunately, there are two problems with this naive approach: you 51 | reveal every website you visit to Algolia, and you waste bandwidth and energy 52 | sending and receiving extraneous API requests. 53 | 54 | To solve this problem, this extension uses a data structure called a [Bloom 55 | filter](https://en.wikipedia.org/wiki/Bloom_filter) to protect your privacy. 56 | Bloom filters can be thought of as a super condensed representation of the 57 | fingerprints of a long list of URLs. In this way, you can download the Bloom 58 | filter once (with periodic updates), and check if it contains the current 59 | website's URL fingerprint without making any requests over the Internet. 60 | 61 |
62 | 63 | Click to read Bloom filter parameter details 64 | 65 | Bloom filters are probabilistic data structures, which means that when you 66 | query whether a string is in the set represented by the Bloom filter, the 67 | response from the data structure is either "no," or "probably yes." Bloom 68 | filters have two parameters that can be tuned to minimize the likelihood of 69 | false positive results: the size of the filter (the number of bits), and the 70 | number of hashes used to obtain a fingerprint of each item. 71 | 72 | Based on calculations performed using this [Bloom filter 73 | calculator](https://hur.st/bloomfilter/?n=4M&p=&m=16MiB&k=23), the Bloom 74 | filters used by this Firefox extension occupy 16MB of space and use 23 hash 75 | functions. Since (at the time of this release) there are approximately 4 76 | million submitted Hacker News stories, this gives a 1 in 10 million chance of a 77 | false positive match on the Bloom filter. This probability gradually increases 78 | to 1 in 26,000 as the number of submissions approaches 6 million, and becomes 1 79 | in 850 by the time there have been 8 million Hacker News story submissions. At 80 | that point, it will likely be worthwhile to consider increasing the size of the 81 | Bloom filter. 82 | 83 | 16MB was chosen as the Bloom filter size, and the number of hashes was adjusted 84 | around it. This size is convenient because it is not too large for an initial 85 | download of multiple Bloom filters. Additionally, 16MB Bloom filters 86 | representing smaller time windows (e.g. submissions from the last 24 hours) are 87 | very sparse, and thus compress extremely well. For example, the Bloom filter 88 | representing submissions from the last 24 hours compresses from 16MB to about 89 | 50KB. Though the false positive rate could be further reduced and 90 | future-proofed, doubling the Bloom filter size to 32MB is a significant 91 | increase, even with compression. 92 | 93 | --- 94 | 95 |
96 | 97 | If the current page has been on Hacker News, the extension lights up and 98 | becomes clickable. Clicking it retrieves a link to the best discussion for the 99 | page and navigates the browser there. 100 | 101 | By default, the extension uses several Bloom filters to show a lower-bound on 102 | the score for each page. This can be easily disabled from the "Options" page 103 | for the extension, accessible by going to `about:addons`. It might be desirable 104 | to disable this if using multiple filters is too resource-intensive. 105 | 106 |
107 | 108 | Click to read more about score thresholds 109 | 110 | It seemed reasonable to use at most five distinct Bloom filters. Because they 111 | become increasingly sparse as the number of stories in the Bloom filter 112 | decreases, they compress well, so adding additional Bloom filters doesn't have 113 | a massive impact on the total amount of data downloaded. 114 | 115 | On the other hand, uncompressed, they total `5 * 16MB = 80MB` in memory – more 116 | than this seemed unreasonable. 117 | 118 | The five thresholds for the Bloom filters were chosen mostly by eye, but 119 | validated and tuned using analysis of the dataset. 120 | 121 | | Range | Count | 122 | | --- | --- | 123 | | 0-10 | 3381917 | 124 | | 10-75 | 300300 | 125 | | 75-250 | 121291 | 126 | | 250-500 | 25739 | 127 | | 500+ | 7948 | 128 | 129 | Bloom filter score range visualization 130 | 131 | As of February 28, 2021, the ranges have an approximately logarithmically 132 | decreasing number of entries. This is desirable because this mirrors the true 133 | distribution of the data, which is also approximately logarithmic. It also 134 | allows for acceptably sensible, informative score ranges. 135 | 136 | Aggregate Hacker News story scores 137 | 138 | The data used for this analysis can be viewed 139 | [here](https://docs.google.com/spreadsheets/d/1s41DRN3MrifjcqeYql88WAQH6nySIUYWs4NLUzDg7wM/edit?usp=sharing). 140 | It was generated with the following BigQuery SQL query, and the thresholds were 141 | tuned in the spreadsheet. 142 | 143 | ``` sql 144 | SELECT 145 | score, 146 | COUNT(score) AS count 147 | FROM 148 | `bigquery-public-data.hacker_news.full` 149 | WHERE 150 | score IS NOT NULL 151 | AND score != 0 152 | GROUP BY 153 | score 154 | ORDER BY 155 | score 156 | ``` 157 | 158 |
159 | 160 | ## Disclaimer 161 | 162 | You still send data to Algolia when you click the extension to visit the 163 | discussion. The improvement offered by using Bloom filters is to not send *all* 164 | of the sites you visit to the API, but *some* data still need to be sent to 165 | retrieve the link to the discussion. Moreover, by default an updated Bloom 166 | filter is downloaded once every 24 hours from GitHub. It is possible that 167 | GitHub maintains logs of who downloads these releases. 168 | 169 | 170 | 171 | # How to Read This Code 172 | 173 | Browser extensions have a lot of power to harm users, so it is important to 174 | understand what you are running. To that end, I provide a description of how to 175 | read this code. Please audit the code before running it. 176 | 177 | This repository has three parts: 178 | 1. Code to pull Hacker News data and generate Bloom filters from it 179 | 2. Code for the browser extension 180 | 3. A Bloom filter library used by the Bloom filter generator and the browser 181 | extension – just one implementation used by both parts of the project 182 | 183 | Each of the three individual parts of the code are described in greater depth 184 | below. Click "Details" to read more. 185 | 186 | The 187 | [`Makefile`](https://github.com/jstrieb/hackernews-button/blob/master/Makefile) 188 | is used for almost all parts of the code, and is a good place to start reading 189 | to understand how everything fits together. 190 | 191 |
192 | 193 | Details 194 | 195 | ## Bloom Filter Library 196 | 197 | Files to read: 198 | 199 | - [`bloom-filter/bloom.c`](https://github.com/jstrieb/hackernews-button/blob/master/bloom-filter/bloom.c) 200 | - [`test/bloom-test.c`](https://github.com/jstrieb/hackernews-button/blob/master/test/bloom-test.c) 201 | 202 | The code for Bloom filters is implemented in C. This code is used in a 203 | command-line C program to generate Bloom filters, which is compiled using 204 | `gcc`. It is also used by the browser extension in a wrapper library, which is 205 | compiled to WebAssembly using Emscripten (`emcc` in the `Makefile`). 206 | 207 | The [`test`](https://github.com/jstrieb/hackernews-button/tree/master/test) 208 | folder includes tests for various parts of the Bloom filter library to ensure 209 | it is working as expected. 210 | 211 | ## Generating Bloom Filters 212 | 213 | Files to read: 214 | 215 | - [`.github/workflows/generate-bloomfilter.yml`](https://github.com/jstrieb/hackernews-button/blob/master/.github/workflows/generate-bloomfilter.yml) 216 | - [`canonicalize.py`](https://github.com/jstrieb/hackernews-button/blob/master/canonicalize.py) 217 | - [`bloom-filter/bloom-create.c`](https://github.com/jstrieb/hackernews-button/blob/master/bloom-filter/bloom-create.c) 218 | 219 | Bloom filters are regularly regenerated on a schedule, mediated by a GitHub 220 | Actions workflow. At a high level, this process pulls down relevant data from 221 | the [Hacker News BigQuery 222 | dataset](https://console.cloud.google.com/marketplace/details/y-combinator/hacker-news), 223 | does some preprocessing, normalizes ("canonicalizes") URLs, and feeds them to 224 | the command-line Bloom filter generator. Generated Bloom filters are uploaded 225 | as [GitHub Releases](https://github.com/jstrieb/hackernews-button/releases) so 226 | users running the extension can download the latest ones. 227 | 228 | Since Bloom filters can only match exact strings, it is helpful to 229 | "canonicalize" URLs so that there are fewer false negative results. In other 230 | words, because multiple URLs often point to the same page, 231 | [`canonicalize.py`](https://github.com/jstrieb/hackernews-button/blob/master/canonicalize.py) 232 | is useful for ensuring that slightly different URLs submitted to Hacker News 233 | for the current page still match in the Bloom filter. Unfortunately, this 234 | process is inherently imperfect. Opening issues with suggested improvements to 235 | the URL canonicalization process are appreciated! 236 | 237 | For actually reading strings, adding them to Bloom filters, and writing 238 | (compressed) Bloom filters, we compile and use 239 | [`bloom-create.c`](https://github.com/jstrieb/hackernews-button/blob/master/bloom-filter/bloom-create.c). 240 | This takes some command-line arguments, and then reads from standard input, 241 | parses the line-delimited strings, and outputs a Bloom filter. 242 | 243 | ## Browser Extension 244 | 245 | Files to read: 246 | 247 | - [`manifest.json`](https://github.com/jstrieb/hackernews-button/blob/master/manifest.json) 248 | - [`background.js`](https://github.com/jstrieb/hackernews-button/blob/master/background.js) 249 | - [`bloom-wrap.js`](https://github.com/jstrieb/hackernews-button/blob/master/bloom-wrap.js) 250 | - [`add-latest.js`](https://github.com/jstrieb/hackernews-button/blob/master/add-latest.js) 251 | 252 | The 253 | [manifest](https://github.com/jstrieb/hackernews-button/blob/master/manifest.json) 254 | connects all parts of the extension together. It attaches keyboard commands to 255 | events and runs a page with background scripts, which do most of the heavy 256 | lifting. It also runs a small content script on `news.ycombinator.com` pages. 257 | 258 | There are two important background scripts. 259 | [`background.js`](https://github.com/jstrieb/hackernews-button/blob/master/background.js) 260 | is responsible for displaying the browser extension and handling user 261 | interaction. 262 | [`bloom-wrap.js`](https://github.com/jstrieb/hackernews-button/blob/master/bloom-wrap.js) 263 | makes the Bloom filter library (implemented in C) easily accessible from 264 | JavaScript via low-level wrappers and high-level helper functions. It also 265 | includes code that, when the browser starts and WebAssembly is ready, attempts 266 | to either load a Bloom filter from local storage, or download the latest one 267 | from GitHub. 268 | 269 | The content script that runs on `news.ycombinator.com` pages extracts "story" 270 | URLs from the pages and adds them to the Bloom filter. This is useful because 271 | the Bloom filters only update every 24 hours at most (as limited by the 272 | frequency of BigQuery dataset updates), so adding stories to the Bloom filter 273 | this way makes it possible to use the extension to view the discussion for 274 | recently-submitted posts. This would otherwise not be possible until the Bloom 275 | filter is updated many hours later. 276 | 277 | Note that the `background.html` page also loads a script `bloom.js` that is not 278 | in the repo. As per the 279 | [`Makefile`](https://github.com/jstrieb/hackernews-button/blob/d365b2a1619cd139186d3a162b9dd6de0bc13b0a/Makefile#L98-L111), 280 | this script is compiled from the Bloom filter C library using Emscripten. 281 | 282 |
283 | 284 | 285 | 286 | # Project Status 287 | 288 | This project is actively developed and maintained. If there have not been 289 | commits long after the initial release, everything is probably running 290 | smoothly! 291 | 292 | The project is designed so that even if something were to happen to me, as long 293 | as my GitHub account is open, the Actions workflow should continue to release 294 | updated Bloom filters. 295 | 296 | I will do my best to address issues in a timely fashion, but I'm busy and this 297 | is a side-project. Unsolicited pull requests are likely to be ignored. This is 298 | because releasing a browser extension means I have a (*moral*, not *legal* 299 | – see the 300 | [LICENSE](https://github.com/jstrieb/hackernews-button/blob/master/LICENSE)) 301 | responsibility for the security of everyone who installs it. As a result, 302 | vetting random pull requests is typically not worth the effort unless they 303 | address an issue that has been discussed beforehand. I'm happy to have others' 304 | support, just ask first – open an issue to do so. 305 | 306 | 307 | 308 | # How to Modify This Code 309 | 310 | 1. Fork your own copy of the repository 311 | 2. [Create a new project](https://console.cloud.google.com/projectcreate) in 312 | BigQuery 313 | 3. Create a service account with the `BigQuery User` permission 314 | 4. Generate a JSON key 315 | 5. Enable Actions for the repository 316 | 6. Copy the JSON key into an Actions secret called `BQ_JSON` (under Settings > 317 | Secrets > Actions) 318 | 7. Make your fork public if you want to be able to access it unauthenticated 319 | 8. Change the repo to your liking, maintaining attribution and the LICENSE file! 320 | 8. Change the repo to your liking, maintaining attribution and the LICENSE 321 | file! 322 | 323 | 324 | 325 | # Known Issues 326 | 327 | - There is currently no version of this extension for Google Chrome. To read 328 | more and discuss, check out the relevant issue 329 | ([#1](https://github.com/jstrieb/hackernews-button/issues/1)). 330 | - The [URL 331 | canonicalization](https://github.com/jstrieb/hackernews-button/blob/master/canonicalize.py) 332 | is highly imperfect. There will inevitably be false negatives in Bloom filter 333 | results. Suggestions for improving canonicalization in general, or for 334 | specific sites, are welcome! 335 | - If the button is clicked, Algolia search tries to return the "best" 336 | submission for a given URL. Often this is not the latest submission, but the 337 | one with the most points. 338 | 339 | This also means that if the button is clicked for very recently submitted 340 | stories (when browsing [new](https://news.ycombinator.com/newest), for 341 | example), Algolia may not have indexed the story yet, causing the redirect to 342 | fail. 343 | - On my computer, the plus signs in the badge text gets cut off for three-digit 344 | scores ([#2](https://github.com/jstrieb/hackernews-button/issues/2)). 345 | 346 | 347 | 348 | # Support the Project 349 | 350 | There are a few things you can do to support the project: 351 | 352 | - Star the repository (and follow me on GitHub for more) 353 | - Share and upvote on sites like Twitter, Reddit, and Hacker News 354 | - Report any bugs, glitches, or errors that you find 355 | 356 | These things motivate me to to keep sharing what I build, and they provide 357 | validation that my work is appreciated! They also help me improve the project. 358 | Thanks in advance! 359 | 360 | If you are insistent on spending money to show your support, I encourage you to 361 | instead make a generous donation to one of the following organizations. By 362 | advocating for Internet freedoms, organizations like these help me to feel 363 | comfortable releasing work publicly on the Web. 364 | 365 | - [Electronic Frontier Foundation](https://supporters.eff.org/donate/) 366 | - [Signal Foundation](https://signal.org/donate/) 367 | - [Mozilla](https://donate.mozilla.org/en-US/) 368 | - [The Internet Archive](https://archive.org/donate/index.php) 369 | 370 | 371 | 372 | # Acknowledgments 373 | 374 | *This project is not affiliated with Hacker News, Y Combinator, or any Y 375 | Combinator-backed company.* 376 | 377 | This project would not exist in its current form without: 378 | 379 | - Daniel Gackle ([dang](https://news.ycombinator.com/user?id=dang)) 380 | - Logan Snow ([@lsnow99](https://github.com/lsnow99)) 381 | - [Amy Liu](https://www.linkedin.com/in/amyjl/) 382 | - [Hacker News](https://news.ycombinator.com) 383 | - Thomas Hurst's [Bloom filter calculator](https://hur.st/bloomfilter/) 384 | - [zlib](https://zlib.net) 385 | - [MurmurHash](https://github.com/aappleby/smhasher) and Austin Appleby 386 | - [GitHub Actions](https://github.com/features/actions) 387 | - [BigQuery](https://console.cloud.google.com/marketplace/details/y-combinator/hacker-news) 388 | - [Algolia Hacker News Search](https://hn.algolia.com/) 389 | - Anyone who has asked or answered a helpful question on StackOverflow 390 | - [Mozilla Developer Network](https://developer.mozilla.org/en-US/) 391 | documentation – my _sine qua non_ for writing anything for the Web, including 392 | browser extensions 393 | -------------------------------------------------------------------------------- /add-latest.js: -------------------------------------------------------------------------------- 1 | /* add-latest.js 2 | * 3 | * Content script run on news.ycombinator.com pages. Sends story links to be 4 | * added to the Bloom filter so that recently-viewed articles can be navigated 5 | * back to via the extension button. 6 | * 7 | * Created by Jacob Strieb 8 | * January 2021 9 | */ 10 | 11 | 12 | /*** 13 | * Any story URLs from visited HN pages are sent to the Bloom filters so that 14 | * the latest pages will work, even if they were uploaded after the last time 15 | * the Bloom filter was downloaded. 16 | */ 17 | function sendLatest() { 18 | let stories = Array.from(document.querySelectorAll(".titleline > a")); 19 | let scoreParents = Array.from(document.querySelectorAll(".subtext")); 20 | if (stories.length != scoreParents.length) { 21 | console.error("Different number of story links and scores!"); 22 | console.log(stories, scoreParents); 23 | throw "Different number of story links and scores!"; 24 | } 25 | 26 | let message = { 27 | type: "add_stories", 28 | stories: [], 29 | }; 30 | 31 | for (let i = 0; i < stories.length; i++) { 32 | let storyNode = stories[i]; 33 | let scoreNode = scoreParents[i].querySelector(".score"); 34 | if (!scoreNode) { 35 | continue; 36 | } 37 | let score = Number(scoreNode.innerText.replace(/\s+points?/, "")); 38 | 39 | message.stories.push({ 40 | "url": storyNode.href, 41 | "score": score, 42 | }); 43 | } 44 | 45 | browser.runtime.sendMessage(message); 46 | } 47 | 48 | 49 | sendLatest(); 50 | -------------------------------------------------------------------------------- /background.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /background.js: -------------------------------------------------------------------------------- 1 | /* background.js 2 | * 3 | * Handle user interaction and other relevant web extension events. Expects 4 | * that bloom-wrap.js and bloom.js will be included in the background.html page 5 | * before this runs. 6 | * 7 | * Created by Jacob Strieb 8 | * July 2020 & January 2021 9 | */ 10 | 11 | 12 | /******************************************************************************* 13 | * Global variables 14 | ******************************************************************************/ 15 | 16 | var tabs = {}; 17 | 18 | 19 | 20 | /******************************************************************************* 21 | * Helper functions 22 | ******************************************************************************/ 23 | 24 | /*** 25 | * Activate the badge for a particular tab 26 | */ 27 | function activateBadge(story, tabId) { 28 | browser.browserAction.enable(tabId); 29 | browser.browserAction.setBadgeText({ 30 | text: "" + story.points, 31 | tabId: tabId 32 | }); 33 | } 34 | 35 | 36 | /*** 37 | * Deactivate the badge for a particular tab 38 | */ 39 | function deactivateBadge(tabId) { 40 | browser.browserAction.disable(tabId); 41 | browser.browserAction.setBadgeText({ 42 | text: "", 43 | tabId: tabId 44 | }); 45 | } 46 | 47 | 48 | /*** 49 | * Load settings 50 | */ 51 | async function loadSettings() { 52 | // Load settings 53 | window.settings = (await browser.storage.local.get("settings")).settings; 54 | 55 | // Set default settings values if necessary 56 | if (!window.settings) { 57 | window.settings = { 58 | debug_mode: false, 59 | multiple_filters: true, 60 | }; 61 | } 62 | } 63 | 64 | 65 | 66 | /******************************************************************************* 67 | * Event handlers and Message Listeners 68 | ******************************************************************************/ 69 | 70 | /*** 71 | * Called when a tab gets "updated." Most importantly, this includes when a 72 | * user clicks a link. 73 | */ 74 | function handleTabUpdated(tabId, changeInfo, tab) { 75 | // Only submit URLs of pages that have completed loading 76 | if (!("status" in changeInfo && changeInfo.status === "complete")) { 77 | return; 78 | } 79 | 80 | // Ignore built-in Firefox "about:" pages and local files 81 | var tab_url = new URL(tab.url); 82 | if (tab_url.protocol === "about:" || tab_url.protocol === "file:") { 83 | deactivateBadge(tabId); 84 | return; 85 | } 86 | 87 | let scoreLabels = window.filters 88 | .filter(f => inBloom(f, tab_url.toString())) 89 | .map(f => f.threshold); 90 | 91 | if (scoreLabels.length == 0) { 92 | deactivateBadge(tab.id); 93 | return; 94 | } 95 | 96 | let score = scoreLabels.reduce((a, b) => Math.max(a, b)); 97 | 98 | // TODO: Add bloom filter results to the tablist 99 | activateBadge({points: (score ? `${score}+` : "")}, tabId); 100 | return; 101 | } 102 | 103 | 104 | /*** 105 | * Open the Hacker News Discussion when the action is clicked. Do it in a new 106 | * Window if the Shift key is pressed when the click happens. If any other 107 | * modifier keys or the middle mouse button are clicked, open in a new tab. 108 | * Otherwise, open in the current tab. 109 | */ 110 | function handleActionClicked(tab, onClickData) { 111 | // Algolia doesn't work well with URLs like: 112 | // https://www.youtube.com/watch?v=-pdSjBPH3zM 113 | // I suspect the "=-" leads to treating "-" as an exclusion operator somehow 114 | // https://www.algolia.com/doc/api-reference/api-parameters/advancedSyntax 115 | let tab_url = encodeURIComponent(canonicalizeUrl(tab.url).replace("=-", "=")); 116 | 117 | // Only get the discussion URL if the button is clicked by the user 118 | fetch(`https://hn.algolia.com/api/v1/search?tags=story&query=${tab_url}`) 119 | .then(data => data.json()) 120 | .then(json => { 121 | // Filter only those search results that match on the domain of the 122 | // current page - approximate, but mostly works 123 | let stories = Array.from(json.hits).filter(hit => { 124 | if (!hit || !hit.url) return false; 125 | try { 126 | var hit_url = new URL(hit.url); 127 | } catch (err) { 128 | console.error("Opening Hacker News discussion failed on " + hit.url); 129 | return false; 130 | } 131 | let url = new URL(tab.url); 132 | 133 | // Return true if the hosts match and neither path is /, or if the hosts 134 | // match and both paths are /. 135 | // 136 | // Fixes problems where Algolia doesn't return a result for the exact 137 | // page if a top-level URL is used to search. For example, without 138 | // this, using the extension on https://github.com/ returns a result 139 | // for a GitHub blog post, not the post using the GitHub homepage as 140 | // the story URL 141 | // 142 | // TODO: Match exact path? 143 | return ((url.host === hit_url.host || url.host === "web.archive.org") 144 | && ((url.pathname === "/" && hit_url.pathname === "/") 145 | || (url.pathname !== "/" && hit_url.pathname !== "/"))); 146 | }); 147 | 148 | // If a story matched, go to the discussion for the one Algolia picked as 149 | // the "top" result 150 | if (stories.length > 0) { 151 | let hn_id = stories[0].objectID; 152 | let hn_url = `https://news.ycombinator.com/item?id=${hn_id}`; 153 | if (onClickData.button == 0 && onClickData.modifiers.length == 0) { 154 | browser.tabs.update(tab.id, {url: hn_url}); 155 | } else if (onClickData.modifiers.includes("Shift")) { 156 | browser.windows.create({url: hn_url}); 157 | } else { 158 | browser.tabs.create({url: hn_url}); 159 | } 160 | return; 161 | } 162 | 163 | deactivateBadge(tab.id); 164 | }) 165 | .catch(console.error); 166 | } 167 | 168 | 169 | /*** 170 | * Add Hacker News story URLs from browsed pages to the Bloom filter. Re-adding 171 | * URLs that are already there doesn't cost much, nor does it cause harm, so we 172 | * don't even bother detecting it. 173 | * 174 | * This function is called when a content script runs on news.ycombinator.com 175 | * and posts a message with the URLs to add. 176 | */ 177 | function addLatest(message) { 178 | if (message.type != "add_stories") { 179 | return; 180 | } 181 | 182 | for (let i = 0; i < window.filters.length; i++) { 183 | f = window.filters[i]; 184 | message.stories 185 | .filter(u => u.score >= f.threshold) 186 | .forEach(u => addBloom(f, u.url)); 187 | } 188 | 189 | // Save the updated Bloom filter 190 | // TODO: Maybe re-enable this someday? Still it takes a couple seconds of 191 | // fully blocking the process to store ~80MB of data. 192 | /* 193 | storeBloom(window.filters) 194 | .catch(e => console.error(e)); 195 | */ 196 | } 197 | 198 | 199 | /*** 200 | * Reload the settings from storage. 201 | * 202 | * Triggered by a message sent from options.html. 203 | */ 204 | async function reloadSettings(message) { 205 | if (message.type != "reload_settings") { 206 | return; 207 | } 208 | 209 | await loadSettings(); 210 | } 211 | 212 | 213 | /*** 214 | * Delete the stored Bloom filter and delete the in-memory Bloom filter. Then 215 | * reload them all. 216 | */ 217 | async function resetBloom(message) { 218 | if (message.type != "reset_bloom") { 219 | return; 220 | } 221 | 222 | await deleteStoredBloom(); 223 | window.filters.forEach(f => freeBloom(f)); 224 | await loadBloom(); 225 | } 226 | 227 | 228 | 229 | /******************************************************************************* 230 | * Main function called on browser startup or extension load 231 | ******************************************************************************/ 232 | 233 | /*** 234 | * Main procedure function, called on extension initialization 235 | */ 236 | (async () => { 237 | // TODO: Remove, or uncomment if the tablist is used again 238 | // browser.tabs.onRemoved.addListener(tabId => delete tabs[tabId]); 239 | 240 | await loadSettings(); 241 | 242 | // Set up event listeners 243 | browser.tabs.onUpdated.addListener(handleTabUpdated); 244 | browser.browserAction.onClicked.addListener(handleActionClicked); 245 | browser.commands.onCommand.addListener(command => { 246 | if (command === "open_in_new_tab") { 247 | browser.tabs.query({active: true, currentWindow: true}) 248 | .then(tabs => handleActionClicked(tabs[0], {button: 1, modifiers: []})); 249 | } 250 | }); 251 | 252 | // Set up message listeners 253 | browser.runtime.onMessage.addListener(addLatest); 254 | browser.runtime.onMessage.addListener(reloadSettings); 255 | browser.runtime.onMessage.addListener(resetBloom); 256 | 257 | // Every 10 minutes, check if the Bloom filter is outdated, and update if so 258 | setInterval(updateBloom, 10 * 60 * 1000); 259 | 260 | // Style the browser action button 261 | browser.browserAction.disable(); 262 | browser.browserAction.setBadgeText({text: ""}); 263 | browser.browserAction.setBadgeBackgroundColor({color: "#f0652f"}); 264 | // Will not run in Chrome 265 | if (browser.browserAction.setBadgeTextColor) { 266 | browser.browserAction.setBadgeTextColor({color: "white"}); 267 | } 268 | })(); 269 | -------------------------------------------------------------------------------- /bloom-filter/bloom-create.c: -------------------------------------------------------------------------------- 1 | /* bloom-create.c 2 | * 3 | * Command-line program to create a Bloom filter. 4 | * 5 | * Created by Jacob Strieb 6 | * January 2021 7 | */ 8 | 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "bloom.h" 18 | 19 | 20 | 21 | /******************************************************************************* 22 | * Types, structs, and constants 23 | ******************************************************************************/ 24 | 25 | struct args { 26 | char *infile; 27 | char *outfile; 28 | int bloom_bits; 29 | int use_compression; 30 | }; 31 | 32 | 33 | 34 | /******************************************************************************* 35 | * Helper functions 36 | ******************************************************************************/ 37 | 38 | /*** 39 | * Print a usage string describing this program's command-line arguments. 40 | */ 41 | void print_usage(char *prog_name) { 42 | printf("Usage: %s [OPTION]... OUTFILE\n" 43 | "Create a Bloom filter from a newline-separated list of input strings.\n" 44 | "OUTFILE is where the binary data of the Bloom filter will be stored.\n\n" 45 | "Options:\n" 46 | " -i, --input=IN\t\tInput file to read strings from, default is stdin\n" 47 | " -b, --bloom-bits=EXP\tUse 2^EXP bits for Bloom filter, default is 27\n" 48 | " -c, --no-compress\tTurn off gzip output compression, on by default\n" 49 | " -h, --help\t\tDisplay this help message\n" 50 | "\nCreated by Jacob Strieb in January 2021.\n", prog_name); 51 | } 52 | 53 | 54 | /*** 55 | * Parse comand line arguments, setting their values in the parsed_args struct. 56 | */ 57 | void parse_args(int argc, char *argv[], struct args *parsed_args) { 58 | // Set default values 59 | parsed_args->infile = NULL; 60 | parsed_args->outfile = NULL; 61 | // 2^27 bits = 2^24 bytes = 16MB (approx) 62 | // Calculated for 3-10M entries using: https://hur.st/bloomfilter 63 | parsed_args->bloom_bits = 27; 64 | parsed_args->use_compression = 1; 65 | 66 | int c, long_index; 67 | struct option opts[] = { 68 | { "input", required_argument, NULL, 'i' }, 69 | { "bloom-bits", required_argument, NULL, 'b' }, 70 | { "no-compress", no_argument, NULL, 'c' }, 71 | { "help", no_argument, NULL, 'h' }, 72 | { 0, 0, 0, 0 } 73 | }; 74 | while ((c = getopt_long(argc, argv, "i:b:ch", opts, &long_index)) != -1) { 75 | switch(c) { 76 | case 'i': 77 | // According to GDB this just points into argv, so we don't have to 78 | // worry about this string being overwritten when we return up the 79 | // stack to the caller and try to use the pointer 80 | parsed_args->infile = optarg; 81 | break; 82 | 83 | case 'b': 84 | parsed_args->bloom_bits = atoi(optarg); 85 | if (parsed_args->bloom_bits > 31 || parsed_args->bloom_bits <= 0) { 86 | fprintf(stderr, "%s\n\n", "Must have 0 < bloom-bits < 32."); 87 | print_usage(argv[0]); 88 | exit(EXIT_FAILURE); 89 | } 90 | break; 91 | 92 | case 'c': 93 | parsed_args->use_compression = 0; 94 | break; 95 | 96 | case 'h': 97 | print_usage(argv[0]); 98 | exit(EXIT_SUCCESS); 99 | break; 100 | 101 | default: 102 | // Add a blank line because an error will probably be printed 103 | puts(""); 104 | print_usage(argv[0]); 105 | exit(EXIT_FAILURE); 106 | break; 107 | } 108 | } 109 | 110 | // Make sure there is an outfile 111 | if (optind >= argc) { 112 | fprintf(stderr, "%s\n\n", "Output file not specified!"); 113 | print_usage(argv[0]); 114 | exit(EXIT_FAILURE); 115 | } 116 | 117 | parsed_args->outfile = argv[optind]; 118 | 119 | return; 120 | } 121 | 122 | /******************************************************************************* 123 | * Main function 124 | ******************************************************************************/ 125 | 126 | int main(int argc, char *argv[]) { 127 | // Parse command-line arguments 128 | struct args args; 129 | parse_args(argc, argv, &args); 130 | 131 | // Open files specified by user inputs 132 | FILE *infile; 133 | if (args.infile == NULL) { 134 | infile = stdin; 135 | } else if ((infile = fopen(args.infile, "r")) == NULL) { 136 | perror("Unable to open input file"); 137 | return EXIT_FAILURE; 138 | } 139 | if (args.outfile == NULL) { 140 | perror("Unable to open output file"); 141 | print_usage(argv[0]); 142 | return EXIT_FAILURE; 143 | } 144 | 145 | // Allocate a new bloom filter 146 | byte *bloom; 147 | if ((bloom = new_bloom(args.bloom_bits)) == NULL) { 148 | perror("Unable to create Bloom filter"); 149 | return EXIT_FAILURE; 150 | } 151 | 152 | // Add strings to the bloom filter from the input, line-by-line 153 | size_t n = 0; 154 | char *buffer = NULL; 155 | ssize_t bytes_read; 156 | while ((bytes_read = getline(&buffer, &n, infile)) != -1) { 157 | assert(bytes_read >= 1); 158 | // Use one less byte of the buffer since it includes the deliminter due to 159 | // the implementation of getline, and hashing the newline will cause 160 | // problems with JavaScript strings later on 161 | // NOTE: Important to see that bytes_read is VERY different from n, which 162 | // is the allocated size -- originally, missing this led to a gnarly bug 163 | add_bloom(bloom, args.bloom_bits, (uint8_t *)buffer, bytes_read - 1); 164 | } 165 | 166 | if (args.use_compression) { 167 | // Write the Bloom filter out to a gzip compressed file 168 | write_compressed_bloom(args.outfile, bloom, args.bloom_bits); 169 | } else { 170 | // Write teh Bloom filter out to a non-compressed file 171 | FILE *outfile; 172 | if ((outfile = fopen(args.outfile, "w")) == NULL) { 173 | perror("Unable to open output file"); 174 | print_usage(argv[0]); 175 | return EXIT_FAILURE; 176 | } 177 | fwrite((void *)bloom, sizeof(uint8_t), 1 << (args.bloom_bits - 3), outfile); 178 | fclose(outfile); 179 | } 180 | 181 | // Clean up 182 | free(buffer); 183 | free_bloom(bloom); 184 | 185 | fclose(infile); 186 | 187 | return EXIT_SUCCESS; 188 | } 189 | -------------------------------------------------------------------------------- /bloom-filter/bloom-js-export.c: -------------------------------------------------------------------------------- 1 | /* bloom-js-export.c 2 | * 3 | * Wrap bloom.h library functions for emcc JavaScript/wasm export. Main 4 | * function that does nothing. Function comments can be found in bloom.c and 5 | * bloom.h. 6 | * 7 | * This file is useful to have around if it becomes desirable to change the 8 | * JavaScript interface without changing the underlying library interface. 9 | * 10 | * Created by Jacob Strieb 11 | * January 2021 12 | */ 13 | 14 | #include 15 | #include 16 | 17 | #ifdef __EMSCRIPTEN__ 18 | #include 19 | #else /* __EMSCRIPTEN__ */ 20 | #define EMSCRIPTEN_KEEPALIVE 21 | #endif /* __EMSCRIPTEN__ */ 22 | 23 | #include "bloom.h" 24 | 25 | 26 | 27 | /******************************************************************************* 28 | * Types and Structures 29 | ******************************************************************************/ 30 | 31 | struct decompressed_s { 32 | byte *bloom; 33 | size_t size; 34 | }; 35 | 36 | 37 | 38 | /******************************************************************************* 39 | * Wrappers around library functions 40 | ******************************************************************************/ 41 | 42 | EMSCRIPTEN_KEEPALIVE 43 | byte *js_new_bloom(uint8_t num_bits) { 44 | return new_bloom(num_bits); 45 | } 46 | 47 | 48 | EMSCRIPTEN_KEEPALIVE 49 | void js_free_bloom(byte *bloom) { 50 | free_bloom(bloom); 51 | } 52 | 53 | 54 | /*** 55 | * Return a pointer to a heap-allocated structure containing a pointer to the 56 | * heap-allocated decompressed Bloom filter and its size. Use the helpful 57 | * wrappers below to return the address and size individually from the structure. 58 | * 59 | * Implemented this way to facilitate returning a size and address with only 60 | * one call to the underlying decompression function – decompressing twice is 61 | * wasteful. 62 | * 63 | * NOTE: Both the structure and returned Bloom filter must be individually and 64 | * manually freed. 65 | */ 66 | EMSCRIPTEN_KEEPALIVE 67 | struct decompressed_s *js_decompress_bloom(byte *compressed, size_t size) { 68 | struct decompressed_s *decompressed = malloc(sizeof(struct decompressed_s)); 69 | byte *bloom; 70 | decompressed->size = decompress_bloom(compressed, size, &bloom); 71 | decompressed->bloom = bloom; 72 | return decompressed; 73 | } 74 | 75 | EMSCRIPTEN_KEEPALIVE 76 | size_t js_get_decompressed_size(struct decompressed_s *decompressed) { 77 | return decompressed->size; 78 | } 79 | 80 | EMSCRIPTEN_KEEPALIVE 81 | byte *js_get_decompressed_bloom(struct decompressed_s *decompressed) { 82 | return decompressed->bloom; 83 | } 84 | 85 | 86 | EMSCRIPTEN_KEEPALIVE 87 | void js_add_bloom(byte *bloom, uint8_t num_bits, byte *data, uint32_t length) { 88 | add_bloom(bloom, num_bits, data, length); 89 | } 90 | 91 | 92 | EMSCRIPTEN_KEEPALIVE 93 | int js_in_bloom(byte *bloom, uint8_t num_bits, byte *data, uint32_t length) { 94 | return in_bloom(bloom, num_bits, data, length); 95 | } 96 | 97 | 98 | EMSCRIPTEN_KEEPALIVE 99 | void js_combine_bloom(byte *bloom, byte *new, uint8_t num_bits) { 100 | combine_bloom(bloom, new, num_bits); 101 | } 102 | 103 | 104 | 105 | /******************************************************************************* 106 | * (Empty) main function 107 | ******************************************************************************/ 108 | 109 | int main() { 110 | return 0; 111 | } 112 | -------------------------------------------------------------------------------- /bloom-filter/bloom.c: -------------------------------------------------------------------------------- 1 | /* bloom.c 2 | * 3 | * Implementation of Bloom filters with adding elements and checking 4 | * membership. 5 | * 6 | * Created by Jacob Strieb 7 | * January 2021 8 | */ 9 | 10 | 11 | #include 12 | #include // memcpy 13 | #include 14 | 15 | #include "bloom.h" 16 | #include "murmur.h" 17 | 18 | 19 | 20 | /******************************************************************************* 21 | * Library functions 22 | ******************************************************************************/ 23 | 24 | /*** 25 | * Allocate an empty, zeroed bloom filter of 2^num_bits bits. 26 | */ 27 | byte *new_bloom(uint8_t num_bits) { 28 | if (num_bits > 31) { 29 | return NULL; 30 | } 31 | 32 | // Subtracting 3 effectively divides by 8 to account for allocating bytes 33 | num_bits -= 3; 34 | // Allocate 2^(num_bits - 3) bytes for the Bloom filter 35 | return (byte *)calloc(1 << num_bits, sizeof(byte)); 36 | } 37 | 38 | 39 | /*** 40 | * Freeing is straightforward since we don't (yet) use fancy structs to 41 | * represent data. 42 | */ 43 | void free_bloom(byte *bloom) { 44 | free(bloom); 45 | } 46 | 47 | 48 | /*** 49 | * Write out a gzipped file using zlib. Exit the program with a failure code if 50 | * opening or writing the gzip fails. 51 | */ 52 | void write_compressed_bloom(char *filename, byte *bloom, uint8_t num_bits) { 53 | gzFile outfile; 54 | // Use compression level 9 (maximum) 55 | if ((outfile = gzopen(filename, "wb9")) == NULL) { 56 | exit(EXIT_FAILURE); 57 | } 58 | 59 | uint32_t num_bytes = 1 << (num_bits - 3); 60 | if (gzwrite(outfile, (voidpc)bloom, num_bytes) == 0) { 61 | gzclose_w(outfile); 62 | exit(EXIT_FAILURE); 63 | } 64 | 65 | gzclose_w(outfile); 66 | 67 | return; 68 | } 69 | 70 | 71 | /*** 72 | * Decompress a gzipped bloom filter in memory. Takes in a compressed Bloom 73 | * filter, the size of the compressed filter (in bytes), as well as a pointer 74 | * that will be set to a pointer to the allocated, decompressed Bloom filter. 75 | * The size in bytes of the decompressed Bloom filter will be returned. 76 | * 77 | * TODO: See if there is a better way to manage memory with buffers during the 78 | * inflation – there might be a faster/more efficient way to do things if data 79 | * isn't copied into a buffer on the stack, but rather directly into the heap 80 | * buffer *bloom. When figuring this out, just be careful because naively 81 | * reallocating *bloom would mess up stream.avail_out if it points directly 82 | * into *bloom. 83 | */ 84 | size_t decompress_bloom(byte *compressed, size_t size, byte **bloom) { 85 | z_stream stream; 86 | stream.zalloc = Z_NULL; 87 | stream.zfree = Z_NULL; 88 | stream.opaque = Z_NULL; 89 | 90 | stream.next_in = (Bytef *)compressed; 91 | stream.avail_in = (uInt)size; 92 | 93 | // The magic 15 + 32 comes from zlib.h and is used to automatically detect 94 | // whether the stream is a zlib or gzip 95 | if (inflateInit2(&stream, 15 + 32) != Z_OK) { 96 | *bloom = NULL; 97 | return 0; 98 | } 99 | 100 | size_t buf_size = 16384u; 101 | byte buf[buf_size]; 102 | size_t bloom_size = 1; 103 | *bloom = (byte *)malloc(bloom_size * sizeof(byte)); 104 | 105 | size_t bytes_copied = 0u; 106 | int ret; 107 | do { 108 | do { 109 | // Decompress (inflate) as much as possible into the buffer 110 | stream.avail_out = (uInt)buf_size; 111 | stream.next_out = buf; 112 | 113 | ret = inflate(&stream, Z_NO_FLUSH); 114 | 115 | // Check for errors after decompressing, return 0 if so (don't recover) 116 | if (ret == Z_NEED_DICT || ret == Z_DATA_ERROR || ret == Z_MEM_ERROR) { 117 | return 0u; 118 | } 119 | 120 | // Reallocate a larger Bloom filter if necessary 121 | size_t to_copy = buf_size - stream.avail_out; 122 | while (bytes_copied + to_copy > bloom_size) { 123 | // We can always increase the Bloom filter by a factor of two because 124 | // it starts as a power of two, and we assume that the final value is 125 | // always a power of two 126 | bloom_size *= 2; 127 | 128 | if ((*bloom = (byte *)realloc((void *)*bloom, bloom_size)) == NULL) { 129 | return 0u; 130 | } 131 | } 132 | 133 | // Copy decompressed bytes from the buffer to the next spot in the filter 134 | (void)memcpy((void *)(*bloom + bytes_copied), (void *)buf, to_copy); 135 | bytes_copied += to_copy; 136 | 137 | // Run while inflate still fills the buffer and the stream has not ended 138 | } while (stream.avail_out == 0); 139 | } while (ret != Z_STREAM_END); 140 | 141 | (void)inflateEnd(&stream); 142 | 143 | return bloom_size; 144 | } 145 | 146 | 147 | /*** 148 | * Add a bit at an index derived from murmur3 hashes seeded by the current 149 | * iteration -- justification for number of iterations can be found in bloom.h. 150 | */ 151 | void add_bloom(byte *bloom, uint8_t num_bits, byte *data, uint32_t length) { 152 | for (int i = 0; i < NUM_HASHES; i++) { 153 | // Calculate the hash value and only take the minimum number of 154 | // higher-order bits required to index fully into the filter. Recall that 155 | // num_bits represents a power of 2 156 | uint32_t hash = murmur3(data, length, i); 157 | hash >>= 32 - num_bits; 158 | 159 | // Divide by 8 to index into the correct byte, set the correct bit to 1 160 | bloom[hash >> 3] |= 1 << (7 - (hash & 0x7)); 161 | } 162 | } 163 | 164 | 165 | /*** 166 | * Check each bit at indices derived from murmur3 hashes seeded by the current 167 | * iteration -- justification for number of iterations can be found in bloom.h. 168 | */ 169 | int in_bloom(byte *bloom, uint8_t num_bits, byte *data, uint32_t length) { 170 | for (int i = 0; i < NUM_HASHES; i++) { 171 | // Calculate the hash value and only take the minimum number of 172 | // higher-order bits required to index fully into the filter. Recall that 173 | // num_bits represents a power of 2 174 | uint32_t hash = murmur3(data, length, i); 175 | hash >>= 32 - num_bits; 176 | 177 | // Divide by 8 to index into the correct byte, get the correct bit 178 | int set = bloom[hash >> 3] & (1 << (7 - (hash & 0x7))); 179 | 180 | // Return early if any of the expected bits are not set, meaning the 181 | // element is not in the filter 182 | if (!set) { 183 | return 0; 184 | } 185 | } 186 | 187 | return 1; 188 | } 189 | 190 | 191 | /*** 192 | * Combine two Bloom filters by ORing each byte in the "new" parameter with 193 | * each byte in bloom, and storing the result in bloom. 194 | */ 195 | void combine_bloom(byte *bloom, byte *new, uint8_t num_bits) { 196 | // Number of bytes is 2^num_bits / 8 197 | size_t num_bytes = 1 << (num_bits - 3); 198 | 199 | for (size_t i = 0; i < num_bytes; i++) { 200 | bloom[i] |= new[i]; 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /bloom-filter/bloom.h: -------------------------------------------------------------------------------- 1 | /* bloom.h 2 | * 3 | * Interface for a simple, bare-bones Bloom filter library built on top of the 4 | * Murmur3 hash function. 5 | * 6 | * Created by Jacob Strieb 7 | * January 2021 8 | */ 9 | 10 | 11 | #ifndef BLOOM_H 12 | #define BLOOM_H 13 | 14 | 15 | #include 16 | 17 | 18 | 19 | /******************************************************************************* 20 | * Constants and types 21 | ******************************************************************************/ 22 | 23 | // NUM_HASHES calculated using https://hur.st/bloomfilter 24 | // In particular: at the time this file was created, there are approximately 4 25 | // million stories on HN (not necessarily with unique URLs), and the bloom 26 | // filter is sized to approximately 16MB with this in-mind. This calculator 27 | // suggests using this number of hashes for a bloom filter of this size, with 28 | // fairly low probability of collisions for between 3 million and up to 10 29 | // million elements. 30 | #ifndef NUM_HASHES 31 | #define NUM_HASHES 23 32 | #endif /* NUM_HASHES */ 33 | 34 | typedef uint8_t byte; 35 | 36 | 37 | 38 | /******************************************************************************* 39 | * Interface functions 40 | ******************************************************************************/ 41 | 42 | /*** 43 | * Allocate a new Bloom filter. 44 | * 45 | * NOTE: input num_bits represents a power of 2. Any x not satisfying 0 < x < 46 | * 32 will return NULL. 47 | */ 48 | byte *new_bloom(uint8_t num_bits); 49 | 50 | 51 | /*** 52 | * Free an allocated Bloom filter. 53 | */ 54 | void free_bloom(byte *bloom); 55 | 56 | 57 | /*** 58 | * Write a Bloom filter out to a gzip compressed file. 59 | */ 60 | void write_compressed_bloom(char *filename, byte *bloom, uint8_t num_bits); 61 | 62 | 63 | /*** 64 | * Decompress a Bloom filter in memory. Takes the compressed filter, the 65 | * size of the compressed filter in bytes, and a pointer to the place the 66 | * pointer to the decompressed Bloom filter will be stored. Return the size of 67 | * the decompressed Bloom filter in bytes. Store a pointer to the decompressed 68 | * bloom filter in the bloom argument 69 | * 70 | * Note that the newly allocated Bloom filter stored in *bloom must be manually 71 | * freed. 72 | */ 73 | size_t decompress_bloom(byte *compressed, size_t size, byte **bloom); 74 | 75 | 76 | /*** 77 | * Add data to the Bloom filter. 78 | */ 79 | void add_bloom(byte *bloom, uint8_t num_bits, byte *data, uint32_t length); 80 | 81 | 82 | /*** 83 | * Returns an int representing whether data is (probably) in the Bloom filter. 84 | */ 85 | int in_bloom(byte *bloom, uint8_t num_bits, byte *data, uint32_t length); 86 | 87 | 88 | /*** 89 | * Combine two bloom filters. Destructively modifies the bloom parameter to 90 | * become the combined filter. 91 | * 92 | * Both bloom and new *MUST* be the exact same size. Hence, num_bits describes 93 | * the size of bloom and new. 94 | */ 95 | void combine_bloom(byte *bloom, byte *new, uint8_t num_bits); 96 | 97 | 98 | #endif /* BLOOM_H */ 99 | -------------------------------------------------------------------------------- /bloom-filter/murmur.c: -------------------------------------------------------------------------------- 1 | /* murmur.c 2 | * 3 | * Very simple, un-optimized MurmurHash v3 implementation. 4 | * 5 | * Adapted (copied) from the OG: 6 | * https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp 7 | * 8 | * Created by Jacob Strieb 9 | * January 2021 10 | */ 11 | 12 | 13 | #include "murmur.h" 14 | 15 | 16 | 17 | /******************************************************************************* 18 | * Helper functions 19 | ******************************************************************************/ 20 | 21 | uint32_t rotl32(uint32_t x, int8_t r) { 22 | return (x << r) | (x >> (32 - r)); 23 | } 24 | 25 | 26 | 27 | /******************************************************************************* 28 | * Library functions 29 | ******************************************************************************/ 30 | 31 | /*** 32 | * I wish I could say why any of this worked, but honestly, I have no idea. I 33 | * copied this from the original version in C++, to which it is nearly 34 | * identical. I applied common sense where necessary, and am relying on the 35 | * tests to validate that this is a correct implementation. 36 | */ 37 | uint32_t murmur3(uint8_t *data, uint32_t length, uint32_t seed) { 38 | int nblocks = length / 4; 39 | uint32_t h1 = seed; 40 | uint32_t c1 = 0xcc9e2d51, c2 = 0x1b873593; 41 | uint32_t *blocks = (uint32_t *)(data + nblocks * 4); 42 | 43 | for (int i = -nblocks; i < 0; i++) { 44 | uint32_t k1 = blocks[i]; 45 | 46 | k1 *= c1; 47 | k1 = rotl32(k1, 15); 48 | k1 *= c2; 49 | 50 | h1 ^= k1; 51 | h1 = rotl32(h1, 13); 52 | h1 = h1 * 5 + 0xe6546b64; 53 | } 54 | 55 | uint32_t k1 = 0; 56 | uint8_t *tail = (uint8_t *)(data + nblocks * 4); 57 | // This switch is implemented with fallthrough in the original version, but I 58 | // copied some code around to placate the compiler, which was giving me 59 | // obnoxious warnings 60 | switch (length & 3) { 61 | case 3: 62 | k1 ^= tail[2] << 16; 63 | k1 ^= tail[1] << 8; 64 | k1 ^= tail[0]; 65 | k1 *= c1; 66 | k1 = rotl32(k1, 15); 67 | k1 *= c2; 68 | h1 ^= k1; 69 | break; 70 | case 2: 71 | k1 ^= tail[1] << 8; 72 | k1 ^= tail[0]; 73 | k1 *= c1; 74 | k1 = rotl32(k1, 15); 75 | k1 *= c2; 76 | h1 ^= k1; 77 | break; 78 | case 1: 79 | k1 ^= tail[0]; 80 | k1 *= c1; 81 | k1 = rotl32(k1, 15); 82 | k1 *= c2; 83 | h1 ^= k1; 84 | break; 85 | }; 86 | 87 | h1 ^= length; 88 | h1 ^= h1 >> 16; 89 | h1 *= 0x85ebca6b; 90 | h1 ^= h1 >> 13; 91 | h1 *= 0xc2b2ae35; 92 | h1 ^= h1 >> 16; 93 | 94 | return h1; 95 | } 96 | -------------------------------------------------------------------------------- /bloom-filter/murmur.h: -------------------------------------------------------------------------------- 1 | /* murmur.h 2 | * 3 | * Inteface for using MurmurHash v3. 4 | * 5 | * Created by Jacob Strieb 6 | * January 2021 7 | */ 8 | 9 | 10 | #ifndef MURMUR_H 11 | #define MURMUR_H 12 | 13 | 14 | #include 15 | 16 | 17 | 18 | /******************************************************************************* 19 | * Interface functions 20 | ******************************************************************************/ 21 | 22 | /*** 23 | * Calculate a murmur3 hash of data, a byte array. Vary the seed as necessary 24 | * to obtain different, deterministic hashes for the same data. 25 | */ 26 | uint32_t murmur3(uint8_t *data, uint32_t length, uint32_t seed); 27 | 28 | #endif /* MURMUR_H */ 29 | -------------------------------------------------------------------------------- /bloom-wrap.js: -------------------------------------------------------------------------------- 1 | /* bloom-wrap.js 2 | * 3 | * Wrapper around Bloom filter WebAssembly functions. Includes a helper 4 | * function to take a normal URL and "canonicalize" it so that it matches the 5 | * format of the URLs inserted into the Bloom filter. 6 | * 7 | * Created by Jacob Strieb 8 | * January 2021 9 | */ 10 | 11 | 12 | /******************************************************************************* 13 | * Helper functions 14 | ******************************************************************************/ 15 | 16 | /*** 17 | * Transform the current URL object to make it as "canonical" as possible. This 18 | * includes removing unnecessary URL parameters, removing "www." from the 19 | * beginning of URLs, stripping unnecessary parts of the path, and performing a 20 | * few domain-specific adjustments. 21 | * 22 | * NOTE: The order in which the transformations take place is subtly important. 23 | * Do not change the order around without good reason. 24 | * 25 | * NOTE: Any canonicalization changes made here *MUST* be reflected in the 26 | * `URL.canonicalize` function within the `canonicalize.py` file! 27 | */ 28 | function canonicalizeUrl(rawUrl) { 29 | let url = new URL(rawUrl); 30 | 31 | // Drop the fragment 32 | url.hash = ""; 33 | 34 | // Remove unwanted URL parameters 35 | [ 36 | "ref", 37 | "sms_ss", 38 | "gclid", 39 | "fbclid", 40 | "at_xt", 41 | "_r", 42 | ].forEach(p => url.searchParams.delete(p)); 43 | Array.from(url.searchParams) 44 | .filter(p => p[0].startsWith("utm_")) 45 | .forEach(p => url.searchParams.delete(p[0])); 46 | 47 | // Use original URL for archive.org links 48 | if (url.host === "web.archive.org" && url.pathname.startsWith("/web")) { 49 | const new_url = url.pathname.replace(/\/web\/[^\/]*\//, ""); 50 | return canonicalizeUrl(new_url); 51 | } 52 | 53 | // Truncate index.html, index.php, and trailing slashes 54 | if (url.pathname.endsWith("index.html")) { 55 | url.pathname = url.pathname.slice(0, -"index.html".length); 56 | } 57 | if (url.pathname.endsWith("index.php")) { 58 | url.pathname = url.pathname.slice(0, -"index.php".length); 59 | } 60 | if (url.pathname.endsWith("/")) { 61 | url.pathname = url.pathname.slice(0, -"/".length); 62 | } 63 | 64 | // Remove www. 65 | if (url.host.startsWith("www.")) { 66 | url.host = url.host.slice("www.".length,); 67 | } 68 | 69 | // Note: youtu.be URLs will auto-redirect to youtube.com, so we don't have to 70 | // check for the host being youtu.be 71 | if (url.host == "youtube.com") { 72 | if (url.searchParams.has("v")) { 73 | Array.from(url.searchParams) 74 | .filter(p => p[0] != "v") 75 | .forEach(p => url.searchParams.delete(p[0])); 76 | } else if (url.searchParams.has("list")) { 77 | Array.from(url.searchParams) 78 | .filter(p => p[0] != "list") 79 | .forEach(p => url.searchParams.delete(p[0])); 80 | } 81 | } 82 | 83 | // Drop all URL parameters on Amazon 84 | if (url.host == "amazon.com") { 85 | Array.from(url.searchParams) 86 | .forEach(p => url.searchParams.delete(p[0])); 87 | } 88 | 89 | // Change mobile Wikipedia links to regular ones 90 | if (url.host == "en.m.wikipedia.org") { 91 | url.host = "en.wikipedia.org"; 92 | } 93 | 94 | // Drop the scheme and remove a trailing slash if it is still there 95 | let result = url.toString().replace(/^.*:\/\//, "//") 96 | .replace(/\/$/, ""); 97 | 98 | return result; 99 | } 100 | 101 | 102 | /*** 103 | * Delete the locally-stored Bloom filter. Useful for debugging from the 104 | * console. 105 | */ 106 | async function deleteStoredBloom() { 107 | if (window.settings.debug_mode) { 108 | console.debug("Deleting stored Bloom filter..."); 109 | } 110 | await browser.storage.local.remove("filters"); 111 | } 112 | 113 | 114 | /*** 115 | * Save the Bloom filter to local storage 116 | */ 117 | async function storeBloom(filters) { 118 | // Skip if storing is already in progress 119 | if (!filters || filters.some(f => f.currently_storing)) { 120 | return; 121 | } 122 | 123 | if (window.settings.debug_mode) { 124 | console.debug("Storing Bloom filters..."); 125 | } 126 | 127 | let addrs = {}; 128 | 129 | for (let i = 0; i < filters.length; i++) { 130 | let f = filters[i]; 131 | 132 | // Set the semaphore so it is not stored by another call to this function 133 | // while bloom.addr is set to null 134 | f.currently_storing = true; 135 | 136 | // Save the address and set the global one to null so that it is clear it 137 | // has not been allocated in WebAssembly when the Bloom filter is restored 138 | // from storage 139 | let addr = f.addr; 140 | addrs[f.threshold] = addr; 141 | f.addr = null; 142 | 143 | // Update the filter attribute from WebAssembly memory 144 | if (addr) { 145 | f.filter = new Uint8Array(Module.HEAPU8.buffer, addr, 146 | Math.pow(2, f.num_bits - 3)); 147 | } 148 | } 149 | 150 | // Store the Bloom filter 151 | await browser.storage.local.set({"filters": filters}); 152 | 153 | for (let i = 0; i < filters.length; i++) { 154 | let f = filters[i]; 155 | 156 | // Restore addresses 157 | f.addr = addrs[f.threshold]; 158 | 159 | // Unset the semaphore 160 | f.currently_storing = false; 161 | } 162 | 163 | if (window.settings.debug_mode) { 164 | console.debug("Completed storing Bloom filters."); 165 | } 166 | } 167 | 168 | 169 | /*** 170 | * Fetch auto-generated Bloom filter metadata. 171 | */ 172 | async function fetchInfo() { 173 | if (window.settings.debug_mode) { 174 | console.debug("Fetching info.json..."); 175 | } 176 | 177 | // Get info.json to find out which Bloom filters to download 178 | let infoUrl = ("https://github.com/jstrieb/hackernews-button/releases/latest" 179 | + "/download/info.json"); 180 | let info = await fetch(infoUrl, { 181 | cache: "no-cache", 182 | }).then(r => r.json()); 183 | 184 | return info; 185 | } 186 | 187 | 188 | /*** 189 | * Fetch the latest Bloom filter(s). Returns a Bloom filter object. 190 | */ 191 | async function fetchBloom(dateString, threshold, info, decompress = true) { 192 | let filename = (dateString 193 | ? `hn-${dateString}-${threshold}.bloom` 194 | : `hn-${threshold}.bloom`); 195 | if (window.settings.debug_mode) { 196 | console.debug("Fetching new Bloom filter..."); 197 | } 198 | let url = ("https://github.com/jstrieb/hackernews-button/releases/latest/" 199 | + `download/${filename}`); 200 | let b = await fetch(url, { 201 | cache: "no-cache", 202 | }) 203 | .then(b => b.arrayBuffer()) 204 | .then(a => new Uint8Array(a)); 205 | 206 | let bloom = { 207 | // Filter as an ArrayBuffer 208 | filter: b, 209 | // Boolean representing compression status 210 | compressed: info.compressed, 211 | // Number of bits in the filter IDs -- number of bytes is 2^(num_bits - 3) 212 | num_bits: null, 213 | // WebAssembly heap-allocated Bloom filter address 214 | addr: null, 215 | // Date of most recent filter download as a Unix timestamp 216 | last_downloaded: Math.floor(Date.now() / 1000), 217 | // Date of most recent filter generation as a Unix timestamp 218 | last_generated: info.date_generated, 219 | // Date of anticipated filter regeneration as a Unix timestamp 220 | next_generated: info.next_generated, 221 | // Filter filename 222 | filename: filename, 223 | // Semaphore for whether it is currently being stored 224 | currently_storing: false, 225 | // Score threshold 226 | threshold: threshold, 227 | }; 228 | if (window.settings.debug_mode) { 229 | console.debug("Fetched: ", bloom); 230 | } 231 | 232 | if (decompress) { 233 | // Set bloom.addr 234 | if (bloom.compressed) { 235 | decompressBloom(bloom); 236 | if (window.settings.debug_mode) { 237 | console.debug("Decompressed: ", bloom); 238 | } 239 | } else { 240 | newBloom(bloom); 241 | } 242 | } 243 | 244 | return bloom; 245 | } 246 | 247 | 248 | /*** 249 | * Update the Bloom filter(s) to the latest versions. Destructively modifies 250 | * the global object window.filters 251 | */ 252 | async function updateBloom(force = false) { 253 | // If a Bloom filter has never been loaded, try to load it again 254 | // NOTE: Since updateBloom is called regularly, this could get expensive if 255 | // there is no Internet connection or something 256 | if (!window.filters 257 | || !window.filters.every(f => f.filter) 258 | || !window.filters.every(f => f.addr)) { 259 | await loadBloom(); 260 | return; 261 | } 262 | 263 | // If the anticipated next generated time hasn't happened yet, don't check 264 | // for anything 265 | let now = Math.floor(Date.now() / 1000); 266 | if (!force && window.filters.every(f => f.next_generated > now)) { 267 | return; 268 | } 269 | 270 | // Get info.json to find out which Bloom filters to download 271 | let info = await fetchInfo(); 272 | 273 | for (let i = 0; i < window.filters.length; i++) { 274 | let f = window.filters[i]; 275 | 276 | // If the downloaded info.json is the same or older than the last generated 277 | // one, make sure the next_generated time is set properly. Theoretically, 278 | // this should be an unnecessary check if the next_generated prediction is 279 | // incorrect 280 | if (!force && info.date_generated <= f.last_generated) { 281 | f.next_generated = info.next_generated; 282 | return; 283 | } 284 | 285 | // Sort dates to the correct date range to download from. Sorted by lowest 286 | // number i.e., oldest timestamp first 287 | let sorted = Object.keys(info.dates).map(Number).sort((x, y) => x - y); 288 | 289 | // If older than the oldest by a large margin, download fresh 290 | if (sorted[0] - f.last_downloaded > 7 * 24 * 60 * 60) { 291 | freeBloom(f); 292 | window.filters[i] = await fetchBloom(null, f.threshold, info); 293 | } 294 | 295 | // Otherwise, pick the Bloom filter with the date closest to the 296 | // last_generated date to combine with 297 | else { 298 | // Re-sort based on which is closest to the last_generated date 299 | let l = f.last_generated; 300 | sorted = sorted.sort((x, y) => Math.abs(x - l) - Math.abs(y - l)); 301 | 302 | // Download latest partial Bloom filter 303 | let dateString = info.dates[sorted[0]]; 304 | let latestBloom = await fetchBloom(dateString, f.threshold, info); 305 | 306 | // Combine the filters and update the datetimes 307 | combineBloom(f, latestBloom); 308 | f.last_downloaded = latestBloom.last_downloaded; 309 | f.last_generated = latestBloom.last_generated; 310 | f.next_generated = latestBloom.next_generated; 311 | 312 | // Free the allocated partial Bloom filter 313 | freeBloom(latestBloom); 314 | } 315 | } 316 | 317 | // Store the updated Bloom filter 318 | await storeBloom(window.filters) 319 | .catch(e => console.error(e)); 320 | } 321 | 322 | 323 | 324 | /******************************************************************************* 325 | * Wrapper functions 326 | ******************************************************************************/ 327 | 328 | function newBloom(bloom) { 329 | // Need to heap-allocate the bloom filter because passing it directly will 330 | // cause a stack overflow 331 | bloom.addr = Module.ccall( 332 | "js_new_bloom", 333 | "number", 334 | ["number"], 335 | [bloom.num_bits] 336 | ); 337 | Module.writeArrayToMemory(bloom.filter, bloom.addr); 338 | } 339 | 340 | 341 | function freeBloom(bloom) { 342 | if (!bloom || !bloom.addr) { 343 | return; 344 | } 345 | 346 | Module.ccall( 347 | "js_free_bloom", 348 | null, 349 | ["number"], 350 | [bloom.addr] 351 | ); 352 | bloom.addr = null; 353 | } 354 | 355 | 356 | /*** 357 | * Decompress the compressed Bloom filter, and extract the address and size 358 | * from the struct generated by the library functions. 359 | */ 360 | function decompressBloom(bloom) { 361 | // Put the compressed Bloom filter on the heap 362 | let compressed = bloom.filter; 363 | let compressed_addr = _malloc(compressed.length); 364 | Module.writeArrayToMemory(compressed, compressed_addr); 365 | 366 | let decompressed = Module.ccall( 367 | "js_decompress_bloom", 368 | "number", 369 | ["number", "number"], 370 | [compressed_addr, compressed.length] 371 | ); 372 | let size_bytes = Module.ccall( 373 | "js_get_decompressed_size", 374 | "number", 375 | ["number"], 376 | [decompressed] 377 | ); 378 | if (size_bytes == 0) { 379 | throw "Failed to decompress downloaded Bloom filter!"; 380 | } 381 | bloom.addr = Module.ccall( 382 | "js_get_decompressed_bloom", 383 | "number", 384 | ["number"], 385 | [decompressed] 386 | ); 387 | bloom.num_bits = Math.round(Math.log2(size_bytes)) + 3, 388 | bloom.compressed = false; 389 | 390 | _free(compressed_addr); 391 | 392 | // Free the structure, but not the heap-allocated Bloom filter itself 393 | _free(decompressed); 394 | } 395 | 396 | 397 | function addBloom(bloom, url) { 398 | if (!bloom || !bloom.addr) { 399 | return; 400 | } 401 | 402 | url = canonicalizeUrl(url); 403 | Module.ccall( 404 | "js_add_bloom", 405 | null, 406 | ["number", "number", "string", "number"], 407 | [bloom.addr, bloom.num_bits, url, url.length] 408 | ); 409 | } 410 | 411 | 412 | function inBloom(bloom, url) { 413 | if (!bloom || bloom.currently_storing || !bloom.addr) { 414 | // This typically happens on news.ycombinator.com sites where new stories 415 | // have been added to the Bloom filter, and the membership check happens 416 | // while the filter is being saved to local storage 417 | return false; 418 | } 419 | 420 | url = canonicalizeUrl(url); 421 | return Module.ccall( 422 | "js_in_bloom", 423 | "boolean", 424 | ["number", "number", "string", "number"], 425 | [bloom.addr, bloom.num_bits, url, url.length] 426 | ); 427 | } 428 | 429 | 430 | /*** 431 | * Combine Bloom filters by destructively modifying the memory of the first one 432 | */ 433 | function combineBloom(bloom, new_bloom) { 434 | if (bloom.num_bits != new_bloom.num_bits) { 435 | throw "Trying to combine Bloom filters of different sizes!"; 436 | } 437 | Module.ccall( 438 | "js_combine_bloom", 439 | null, 440 | ["number", "number", "number"], 441 | [bloom.addr, new_bloom.addr, bloom.num_bits] 442 | ); 443 | } 444 | 445 | 446 | 447 | /******************************************************************************* 448 | * Main function 449 | ******************************************************************************/ 450 | 451 | /*** 452 | * Load the Bloom filter as soon as there is a WebAssembly runtime to load it 453 | * with. This is typically right when the browser/extension starts up. 454 | */ 455 | async function loadBloom() { 456 | // If any Bloom filter(s) are already allocated, free them 457 | window.filters?.forEach(bloom => { 458 | if (bloom && bloom.addr) { 459 | freeBloom(bloom); 460 | } 461 | }); 462 | 463 | // Try to get the Bloom filters out of storage, otherwise download latest. 464 | window.filters = (await browser.storage.local.get("filters")).filters; 465 | if (!window.filters || !window.filters.every(f => f.filter)) { 466 | if (window.settings.debug_mode) { 467 | console.debug("Fetching Bloom filter info..."); 468 | } 469 | let info = await fetchInfo(); 470 | 471 | window.filters = []; 472 | 473 | // Use a fixed single filter or multiple, depending on user settings 474 | let thresholds = window.settings.multiple_filters ? info.thresholds : [0]; 475 | for (let i = 0; i < thresholds.length; i++) { 476 | // Fetch the Bloom filter without decompressing (in this case, that 477 | // happens outside the conditional in case a compressed Bloom filter was 478 | // stored). 479 | let f = await fetchBloom(null, thresholds[i], info, false); 480 | window.filters.push(f); 481 | } 482 | 483 | // Save the downloaded Bloom filters 484 | await storeBloom(window.filters) 485 | .catch(e => console.error(e)); 486 | } else { 487 | // The currently_storing attribute is set to true when the filters are 488 | // actually being stored. This restores them to an accurate state after the 489 | // filters come out of storage. 490 | window.filters.forEach(f => f.currently_storing = false); 491 | } 492 | 493 | // Fail (semi) gracefully if both attempts above to load a Bloom filter fail 494 | if (!window.filters || !window.filters.every(f => f.filter)) { 495 | throw "Couldn't load Bloom filter from local storage or the web!"; 496 | return; 497 | } 498 | 499 | // Set bloom.addr, must use async-friendly foreach 500 | for (let i = 0; i < window.filters.length; i++) { 501 | let f = window.filters[i]; 502 | if (f.compressed) { 503 | decompressBloom(f); 504 | if (window.settings.debug_mode) { 505 | console.debug("Decompressed: ", f); 506 | } 507 | } else { 508 | newBloom(f); 509 | } 510 | 511 | // TODO: Is this enough? Or is this leaking memory? 512 | window.addEventListener("beforeunload", e => freeBloom(f.addr)); 513 | } 514 | 515 | await storeBloom(window.filters) 516 | .catch(e => console.error(e)); 517 | } 518 | 519 | // NOTE: This works because this file is run before the autogenerated bloom.js 520 | var Module = { 521 | onRuntimeInitialized: loadBloom, 522 | }; 523 | -------------------------------------------------------------------------------- /canonicalize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | ############################################################################### 4 | ############################################################################### 5 | ## 6 | ## canonicalize.py 7 | ## 8 | ## This file is used for "canonicalizing" URLs so that equivalent URLs 9 | ## submitted in slightly different forms don't give false negatives in the 10 | ## Bloom filter. At a high level, this is necessary because the Bloom filter 11 | ## only matches exact strings, but in many cases different URLs represent the 12 | ## same page. 13 | ## 14 | ## Created by Jacob Strieb 15 | ## January 2021 16 | ## 17 | ############################################################################### 18 | ############################################################################### 19 | 20 | 21 | import csv 22 | import json 23 | import re 24 | import sys 25 | import urllib.parse as urlparse 26 | 27 | 28 | ############################################################################### 29 | # Helper functions 30 | ############################################################################### 31 | 32 | def remove_keys(d, keys): 33 | for k in keys: 34 | if k in d: 35 | del d[k] 36 | return d 37 | 38 | 39 | ############################################################################### 40 | # Classes 41 | ############################################################################### 42 | 43 | class URL(object): 44 | undesirableQueryParams = [ 45 | "ref", 46 | "sms_ss", 47 | "gclid", 48 | "fbclid", 49 | "at_xt", 50 | "_r", 51 | ] 52 | archiveRegex = re.compile(r"/web/[^/]*/") 53 | 54 | def __init__(self, url): 55 | parsed = urlparse.urlsplit(url) 56 | # Drop the scheme and fragment 57 | self.scheme, self.fragment = "", "" 58 | _, self.netloc, self.path, self.queryStr, _ = tuple(parsed) 59 | 60 | def __iter__(self): 61 | attributes = [self.scheme, self.netloc, self.path, self.queryStr, 62 | self.fragment] 63 | for a in attributes: 64 | yield a 65 | 66 | def __str__(self): 67 | return urlparse.urlunsplit(tuple(self)) 68 | 69 | @property 70 | def queryStr(self): 71 | return urlparse.urlencode(self.query, doseq=True) 72 | 73 | @queryStr.setter 74 | def queryStr(self, value): 75 | self.query = urlparse.parse_qs(value, keep_blank_values=True) 76 | 77 | @classmethod 78 | def canonicalize(cls, url): 79 | """ 80 | Transform the current URL object to make it as "canonical" as possible. 81 | This includes removing unnecessary URL parameters, removing "www." from 82 | the beginning of URLs, stripping unnecessary parts of the path, and 83 | performing a few domain-specific adjustments. 84 | 85 | Return a canonicalized URL object. 86 | 87 | NOTE: The order in which the transformations take place is subtly 88 | important. Do not change the order around without good reason. 89 | 90 | NOTE: Any canonicalization changes made here *MUST* be reflected in the 91 | `canonicalizeUrl` function within the `bloom-wrap.js` file! 92 | """ 93 | self = cls(url) 94 | 95 | # Use the original URL for archive.org links 96 | if self.netloc == "web.archive.org" and self.path.startswith("/web"): 97 | new_url = URL.archiveRegex.sub("", self.path) 98 | return cls.canonicalize(new_url) 99 | 100 | # HTML files almost exclusively use URL parameters for tracking while 101 | # the underlying page remains the same 102 | if self.path.endswith(".html"): 103 | self.query = dict() 104 | 105 | # Remove URL parameters that never seem to be important 106 | self.query = remove_keys(self.query, URL.undesirableQueryParams) 107 | for key in list(self.query.keys()): 108 | if key.startswith("utm_"): 109 | del self.query[key] 110 | 111 | # Truncate index.html, index.php, and trailing slashes 112 | if self.path.endswith("index.html"): 113 | self.path = self.path[:-len("index.html")] 114 | if self.path.endswith("index.php"): 115 | self.path = self.path[:-len("index.php")] 116 | self.path = self.path.rstrip("/") 117 | 118 | # Remove www. since it is very rare that sites need it these days 119 | if self.netloc.startswith("www."): 120 | self.netloc = self.netloc[len("www."):] 121 | 122 | # Turn youtu.be links into youtube.com ones and remove unnecessary URL 123 | # parameters 124 | if self.netloc == "youtu.be": 125 | self.netloc = "youtube.com" 126 | self.query["v"] = self.path.strip("/") 127 | self.path = "/watch" 128 | if self.netloc == "youtube.com" and "v" in self.query: 129 | self.query = {"v": self.query["v"]} 130 | if self.netloc == "youtube.com" and "list" in self.query: 131 | self.query = {"list": self.query["list"]} 132 | 133 | # Pretty much all Amazon URL parameters seem to be useless tracking 134 | if self.netloc == "amazon.com": 135 | self.query = dict() 136 | 137 | # Mobile Wikipedia links are annoying 138 | if self.netloc == "en.m.wikipedia.org": 139 | self.netloc = "en.wikipedia.org" 140 | 141 | return self 142 | 143 | 144 | ############################################################################### 145 | # Main function 146 | ############################################################################### 147 | 148 | def main(): 149 | csvReader = csv.DictReader(sys.stdin) 150 | for entry in csvReader: 151 | url = URL.canonicalize(entry["url"]) 152 | print(url) 153 | 154 | 155 | if __name__ == "__main__": 156 | main() 157 | -------------------------------------------------------------------------------- /doc/range-chart.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jstrieb/hackernews-button/9022b3d7a52440c96e6e5eab42c5d3af3b1c10cc/doc/screenshot.png -------------------------------------------------------------------------------- /manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": 2, 3 | 4 | "name": "Hacker News Discussion Button", 5 | "description": "Links to the Hacker News discussion for the current page. Preserves privacy.", 6 | "version": "0.7.0", 7 | "author": "Jacob Strieb", 8 | "homepage_url": "https://github.com/jstrieb/hackernews-button", 9 | "icons": { 10 | "16": "icons/icon-16.png", 11 | "32": "icons/icon-32.png", 12 | "48": "icons/icon-48.png", 13 | "64": "icons/icon-64.png", 14 | "96": "icons/icon-96.png" 15 | }, 16 | 17 | "browser_specific_settings": { 18 | "gecko": { 19 | "id": "{36225028-11da-4478-b711-ee0940433686}" 20 | } 21 | }, 22 | 23 | "permissions": [ 24 | "", 25 | "tabs", 26 | "storage", 27 | "unlimitedStorage" 28 | ], 29 | 30 | "browser_action": { 31 | "default_icon": { 32 | "16": "icons/icon-16.png", 33 | "32": "icons/icon-32.png", 34 | "48": "icons/icon-48.png", 35 | "64": "icons/icon-64.png", 36 | "96": "icons/icon-96.png" 37 | } 38 | }, 39 | 40 | "commands": { 41 | "_execute_browser_action": { 42 | "suggested_key": { 43 | "default": "Alt+Y" 44 | } 45 | }, 46 | "open_in_new_tab": { 47 | "suggested_key": { 48 | "default": "Ctrl+Shift+Y" 49 | }, 50 | "description": "Open Hacker News discussion in a new tab" 51 | } 52 | }, 53 | 54 | "content_scripts": [{ 55 | "matches": [ 56 | "*://news.ycombinator.com/*" 57 | ], 58 | "js": [ 59 | "add-latest.js" 60 | ] 61 | }], 62 | 63 | "background": { 64 | "page": "background.html" 65 | }, 66 | 67 | "options_ui": { 68 | "page": "options.html", 69 | "browser_style": true 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /options.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | 10 |
11 |
12 | 13 | 14 | 15 | 16 |
17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /options.js: -------------------------------------------------------------------------------- 1 | /* options.js 2 | * 3 | * Handlers for settings changes using the options menu created in options.html 4 | * 5 | * Created by Jacob Strieb 6 | * February 2021 7 | */ 8 | 9 | 10 | /******************************************************************************* 11 | * Options Event Handlers 12 | ******************************************************************************/ 13 | 14 | /*** 15 | * Pass a message to background.js instructing it to delete the stored Bloom 16 | * filter, and to clear the one currently in memory. Afterwards it reloads them 17 | * from scratch. 18 | */ 19 | async function handleResetBloom(event) { 20 | browser.runtime.sendMessage({type: "reset_bloom"}); 21 | } 22 | 23 | 24 | /*** 25 | * Store a value in the settings representing whether or not to do lots and 26 | * lots of debug logging. Then tell the main background script to reload the 27 | * settings via a message. 28 | */ 29 | async function handleDebug(event) { 30 | window.settings.debug_mode = document.querySelector("#debug-mode").checked; 31 | await browser.storage.local.set({"settings": window.settings}); 32 | 33 | browser.runtime.sendMessage({type: "reload_settings"}); 34 | } 35 | 36 | 37 | /*** 38 | * Store a value in the settings representing whether or not to use a single 39 | * filter, or multiple filters. 40 | */ 41 | async function handleMultipleFilters(event) { 42 | window.settings.multiple_filters = document.querySelector("#multiple-filters").checked; 43 | await browser.storage.local.set({"settings": window.settings}); 44 | 45 | browser.runtime.sendMessage({type: "reload_settings"}); 46 | browser.runtime.sendMessage({type: "reset_bloom"}); 47 | } 48 | 49 | 50 | 51 | /******************************************************************************* 52 | * Main Function (called on options page load) 53 | ******************************************************************************/ 54 | 55 | async function loadSettings() { 56 | // Load settings 57 | window.settings = (await browser.storage.local.get("settings")).settings; 58 | 59 | // Set default settings values 60 | if (!window.settings) { 61 | window.settings = { 62 | debug_mode: false, 63 | multiple_filters: true, 64 | }; 65 | } 66 | 67 | // Set stateful UI widgets to current settings values 68 | document.querySelector("#debug-mode").checked = window.settings.debug_mode; 69 | document.querySelector("#multiple-filters").checked = window.settings.multiple_filters; 70 | } 71 | 72 | (() => { 73 | loadSettings(); 74 | document.querySelector("#reset").addEventListener("click", handleResetBloom); 75 | document.querySelector("#debug-mode").addEventListener("click", handleDebug); 76 | document.querySelector("#multiple-filters").addEventListener("click", handleMultipleFilters); 77 | })(); 78 | -------------------------------------------------------------------------------- /test/bloom-test.c: -------------------------------------------------------------------------------- 1 | /* test/bloom-test.c 2 | * 3 | * Run a bunch of tests on the Bloom filter implementation. Will print to 4 | * standard output if run in a terminal, will print to the browser console if 5 | * compiled using emscripten and loaded into the browser. 6 | * 7 | * Created by Jacob Strieb 8 | * January 2021 9 | */ 10 | 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include "bloom.h" 17 | 18 | 19 | /******************************************************************************* 20 | * Constants (strings for testing) 21 | ******************************************************************************/ 22 | 23 | // Statically allocate some strings to add to the filter. The songs from 24 | // which these lyrics were taken are some of my favorites. I've done my best 25 | // to include variety, and I highly recommend you giving a listen to any you 26 | // are unfamiliar with. 27 | char *input1[] = { "This is the very first test!" }; 28 | // Layla (Acoustic Version) - Eric Clapton 29 | char *input2[] = { 30 | "See if you can spot this one?", 31 | "What will you do when you get lonely", 32 | "No one waiting by your side?", 33 | "You've been running, hiding much too long", 34 | "You know it's just your foolish pride" 35 | }; 36 | // Blinding Lights - The Weeknd 37 | char *input3[] = { 38 | "I look around and", 39 | "Sin City's cold and empty", 40 | "No one's around to judge me ", 41 | "I can't see clearly when you're gone" 42 | }; 43 | // Gorgeous - Kanye West 44 | char *input4[] = { 45 | "Penitentiary chances, the devil dances", 46 | "And eventually answers to the call of autumn", 47 | "All them fallin' for the love of ballin'", 48 | "Got caught with thirty rocks, the cop look like Alec Baldwin", 49 | "Inter-century anthems based off inner-city tantrums", 50 | "Based off the way we was branded", 51 | "Face it, Jerome get more time than Brandon", 52 | "And at the airport, they check all through my bag", 53 | "And tell me that it's random", 54 | "But we stay winning", 55 | "This week has been a bad massage, I need a happy ending", 56 | "And a new beginning and a new fitted", 57 | "And some job opportunities that's lucrative", 58 | "This the real world, homie, school finished", 59 | "They done stole your dreams, you don't know who did it", 60 | "I treat the cash the way the government treats AIDS", 61 | "I won't be satisfied 'til all my n****s get it, get it?" 62 | }; 63 | // Doses and Mimosas - Cherub 64 | char *input5[] = { 65 | "Ten in the morning", 66 | "And I'm skipping breakfast", 67 | "And drinking a beverage", 68 | "To ignore it all", 69 | "Guess ignorance is bliss and", 70 | "I've come to embrace it", 71 | "It's all overrated", 72 | "Except drugs and alcohol" 73 | }; 74 | // Vivir mi Vida - Marc Anthony 75 | char *input6[] = { 76 | "Voy a vivir el momento", 77 | "Para entender el destino", 78 | "Voy a escuchar en silencio", 79 | "Para encontrar el camino" 80 | }; 81 | // Oh Devil - Electric Guest 82 | char *input7[] = { 83 | "Oh, devil, I know you're afraid", 84 | "Sometimes it's hard to learn from all your mistakes", 85 | "Oh, devil, I'm glad that you came", 86 | "Guess I should learn how to live because it won't go away" 87 | }; 88 | 89 | 90 | 91 | /******************************************************************************* 92 | * Helper functions 93 | ******************************************************************************/ 94 | 95 | /*** 96 | * Add several strings to the Bloom filter, ensuring that they are stil in 97 | * there as the test proceeds. 98 | */ 99 | int test_in(byte *bloom, int bloom_size, char *strings[], int len) { 100 | int sizes[len]; 101 | for (int i = 0; i < len; i++) { 102 | sizes[i] = strlen(strings[i]); 103 | } 104 | 105 | for (int i = 0; i < len; i++) { 106 | // Make sure the previous strings are still in the filter 107 | for (int j = 0; j < i; j++) { 108 | if (!in_bloom(bloom, bloom_size, (byte *)strings[j], sizes[j])) { 109 | printf("False negative:\n%s\n", strings[j]); 110 | return 0; 111 | } 112 | } 113 | 114 | // Add the string 115 | add_bloom(bloom, bloom_size, (byte *)strings[i], sizes[i]); 116 | } 117 | 118 | return 1; 119 | } 120 | 121 | 122 | /*** 123 | * Ensure strings that shouldn't be in the Bloom filter aren't, assuming no 124 | * false positives, which are theoretically possible, but have a diminishingly 125 | * low chance of happening for appropriately sized Bloom filters. 126 | */ 127 | int test_out(byte *bloom, int bloom_size, char *strings[], int len) { 128 | int sizes[len]; 129 | for (int i = 0; i < len; i++) { 130 | sizes[i] = strlen(strings[i]); 131 | } 132 | 133 | for (int i = 0; i < len; i++) { 134 | if (in_bloom(bloom, bloom_size, (byte *)strings[i], sizes[i])) { 135 | printf("False positive:\n%s\n", strings[i]); 136 | return 0; 137 | } 138 | } 139 | 140 | return 1; 141 | } 142 | 143 | 144 | /*** 145 | * Test a Bloom filter with stuff already added (in test_new_bloom) 146 | */ 147 | int test_old_bloom(byte *bloom, uint8_t size) { 148 | int success = 1; 149 | 150 | success = success && test_in(bloom, size, input7, 4); 151 | success = success && test_in(bloom, size, input6, 4); 152 | success = success && test_in(bloom, size, input5, 8); 153 | success = success && test_in(bloom, size, input4, 17); 154 | success = success && test_in(bloom, size, input3, 4); 155 | success = success && test_in(bloom, size, input2, 5); 156 | success = success && test_in(bloom, size, input1, 1); 157 | 158 | return success; 159 | } 160 | 161 | 162 | /*** 163 | * Test the same entries for a bunch of different sized Bloom filters. 164 | */ 165 | int test_new_bloom(byte *bloom, uint8_t size) { 166 | int success = 1; 167 | 168 | // TODO: Should this still be here? 169 | success = success && test_in(bloom, size, NULL, 0); 170 | 171 | // Ensure inputs that haven't been added yet aren't in the filter, add inputs 172 | // one-by-one, and check both that they are in there, and that they haven't 173 | // changed the membership status of other inputs that are(n't) supposed to be 174 | // in there 175 | success = success && test_out(bloom, size, input1, 1); 176 | success = success && test_out(bloom, size, input2, 5); 177 | success = success && test_out(bloom, size, input3, 4); 178 | success = success && test_out(bloom, size, input4, 17); 179 | success = success && test_out(bloom, size, input5, 8); 180 | success = success && test_out(bloom, size, input6, 4); 181 | success = success && test_out(bloom, size, input7, 4); 182 | success = success && test_in(bloom, size, input1, 1); 183 | 184 | success = success && test_out(bloom, size, input2, 5); 185 | success = success && test_out(bloom, size, input3, 4); 186 | success = success && test_out(bloom, size, input4, 17); 187 | success = success && test_out(bloom, size, input5, 8); 188 | success = success && test_out(bloom, size, input6, 4); 189 | success = success && test_out(bloom, size, input7, 4); 190 | success = success && test_in(bloom, size, input2, 5); 191 | success = success && test_in(bloom, size, input1, 1); 192 | 193 | success = success && test_out(bloom, size, input3, 4); 194 | success = success && test_out(bloom, size, input4, 17); 195 | success = success && test_out(bloom, size, input5, 8); 196 | success = success && test_out(bloom, size, input6, 4); 197 | success = success && test_out(bloom, size, input7, 4); 198 | success = success && test_in(bloom, size, input3, 4); 199 | success = success && test_in(bloom, size, input2, 5); 200 | success = success && test_in(bloom, size, input1, 1); 201 | 202 | success = success && test_out(bloom, size, input4, 17); 203 | success = success && test_out(bloom, size, input5, 8); 204 | success = success && test_out(bloom, size, input6, 4); 205 | success = success && test_out(bloom, size, input7, 4); 206 | success = success && test_in(bloom, size, input4, 17); 207 | success = success && test_in(bloom, size, input3, 4); 208 | success = success && test_in(bloom, size, input2, 5); 209 | success = success && test_in(bloom, size, input1, 1); 210 | 211 | success = success && test_out(bloom, size, input5, 8); 212 | success = success && test_out(bloom, size, input6, 4); 213 | success = success && test_out(bloom, size, input7, 4); 214 | success = success && test_in(bloom, size, input5, 8); 215 | success = success && test_in(bloom, size, input4, 17); 216 | success = success && test_in(bloom, size, input3, 4); 217 | success = success && test_in(bloom, size, input2, 5); 218 | success = success && test_in(bloom, size, input1, 1); 219 | 220 | success = success && test_out(bloom, size, input6, 4); 221 | success = success && test_out(bloom, size, input7, 4); 222 | success = success && test_in(bloom, size, input6, 4); 223 | success = success && test_in(bloom, size, input5, 8); 224 | success = success && test_in(bloom, size, input4, 17); 225 | success = success && test_in(bloom, size, input3, 4); 226 | success = success && test_in(bloom, size, input2, 5); 227 | success = success && test_in(bloom, size, input1, 1); 228 | 229 | success = success && test_out(bloom, size, input7, 4); 230 | success = success && test_in(bloom, size, input7, 4); 231 | success = success && test_in(bloom, size, input6, 4); 232 | success = success && test_in(bloom, size, input5, 8); 233 | success = success && test_in(bloom, size, input4, 17); 234 | success = success && test_in(bloom, size, input3, 4); 235 | success = success && test_in(bloom, size, input2, 5); 236 | success = success && test_in(bloom, size, input1, 1); 237 | 238 | if (!success) { 239 | printf("Bloom filter test failed for size %d!\n", (int)size); 240 | } 241 | 242 | return success; 243 | } 244 | 245 | 246 | /*** 247 | * Test writing compressed files and decompressing them in-memory 248 | */ 249 | int test_compression(byte **bloom, uint8_t size, size_t *new_size) { 250 | int success = 1; 251 | 252 | // Write the compressed Bloom filter out so we can load it back in and test 253 | char *tempfilename = "/tmp/delete.bloom"; 254 | write_compressed_bloom(tempfilename, *bloom, size); 255 | free_bloom(*bloom); 256 | 257 | // Open the gzipped temporary file and read the entire thing into a buffer 258 | FILE *tempfile; 259 | if ((tempfile = fopen(tempfilename, "rb")) == NULL) { 260 | puts("Failed to open compressed file!"); 261 | return 0; 262 | } 263 | // Seek to the end to get file length 264 | if (fseek(tempfile, 0l, SEEK_END)) { 265 | puts("Failed seek to the end of the compressed file!"); 266 | return 0; 267 | } 268 | long tempfile_length; 269 | if ((tempfile_length = ftell(tempfile)) == -1) { 270 | puts("Failed to get compressed file position!"); 271 | return 0; 272 | } 273 | rewind(tempfile); 274 | // Read the temporary file 275 | byte *compressed = (byte *)malloc(tempfile_length * sizeof(char)); 276 | if (fread(compressed, 1, tempfile_length, tempfile) == 0) { 277 | puts("Could not read from compressed file!"); 278 | return 0; 279 | } 280 | if (fclose(tempfile) != 0) { 281 | puts("Could not close compressed file!"); 282 | return 0; 283 | } 284 | 285 | byte *decompressed = NULL; 286 | *new_size = decompress_bloom(compressed, tempfile_length, &decompressed); 287 | if (new_size == 0) { 288 | puts("Could not successfully decompress the Bloom filter!"); 289 | return 0; 290 | } 291 | *bloom = decompressed; 292 | 293 | free(compressed); 294 | 295 | return success; 296 | } 297 | 298 | 299 | /*** 300 | * Test combining Bloom filters 301 | * 302 | * TODO: Improve – test_in might not actually be confirming that it works... 303 | */ 304 | int test_combine() { 305 | int success = 1; 306 | 307 | uint8_t size = 15; 308 | byte *bloom1 = new_bloom(size); 309 | byte *bloom2 = new_bloom(size); 310 | 311 | success = success && test_in(bloom1, size, input2, 5); 312 | success = success && test_in(bloom2, size, input4, 17); 313 | success = success && test_out(bloom1, size, input4, 17); 314 | 315 | combine_bloom(bloom1, bloom2, size); 316 | success = success && test_in(bloom1, size, input4, 17); 317 | 318 | free(bloom1); 319 | free(bloom2); 320 | 321 | return success; 322 | } 323 | 324 | 325 | 326 | /******************************************************************************* 327 | * Main function 328 | ******************************************************************************/ 329 | 330 | int main(int argc, char *argv[]) { 331 | int success = 1; 332 | 333 | puts("Testing Bloom filter library...\n"); 334 | 335 | // Test the same input on Bloom filters for each number of bits in the range 336 | // 9 to 31 337 | for (uint8_t i = 9; i < 32; i++) { 338 | printf("Testing a Bloom filter of size %d...\n", (int)i); 339 | // Make a new Bloom filter of the current size 340 | byte *bloom = new_bloom(i); 341 | 342 | // Test the Bloom filter 343 | success = success && test_new_bloom(bloom, i); 344 | 345 | // Write a compressed version, then read it back and decompress it 346 | size_t new_size; 347 | success = success && test_compression(&bloom, i, &new_size); 348 | if (new_size != (size_t)(1 << (i - 3))) { 349 | printf("New Bloom filter has size %d when size %d was expected!\n", 350 | (int)new_size, (int)(1 << (i - 3))); 351 | success = 0; 352 | break; 353 | } 354 | 355 | // Ensure that the right values are still in the decompressed version 356 | success = success && test_old_bloom(bloom, i); 357 | 358 | // Clean up 359 | free_bloom(bloom); 360 | 361 | if (!success) { 362 | break; 363 | } 364 | } 365 | 366 | // Test combining Bloom filters 367 | success = success && test_combine(); 368 | 369 | // TODO: Add tests that create new bloom filters and generate many strings 370 | // over and over, confirming that over time the average converges to the 371 | // expected theoretical number of collisions 372 | 373 | puts(success ? "Success!" : "Failure!"); 374 | puts(""); 375 | 376 | return 0; 377 | 378 | // TODO: Remove 379 | (void)argc; 380 | (void)argv; 381 | } 382 | -------------------------------------------------------------------------------- /test/murmur-test.c: -------------------------------------------------------------------------------- 1 | /* test/murmur-test.c 2 | * 3 | * Run a bunch of tests on the murmur3 implementation. Will print to standard 4 | * output if run in a terminal, will print to the browser console if compiled 5 | * using emscripten and loaded into the browser. 6 | * 7 | * Many tests generously provided by: 8 | * https://stackoverflow.com/a/31929528/1376127 9 | * 10 | * Created by Jacob Strieb 11 | * January 2021 12 | */ 13 | 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | #include "murmur.h" 20 | 21 | 22 | 23 | /******************************************************************************* 24 | * Helper functions 25 | ******************************************************************************/ 26 | 27 | int run_test(uint8_t *input, int length, uint32_t seed, uint32_t expected) { 28 | uint32_t result = murmur3(input, length, seed); 29 | printf("Expected: 0x%08x | Got: 0x%08x\n", expected, result); 30 | return result == expected; 31 | } 32 | 33 | 34 | 35 | /******************************************************************************* 36 | * Main function 37 | ******************************************************************************/ 38 | 39 | /** 40 | * Really hope the tests are correct, because I snatched them verbatim 41 | * without checking them anywhere else, and they match my impelmentation! 42 | * 43 | * See the link at the top of the file for the goals for several of the 44 | * specific tests. 45 | */ 46 | int main(int argc, char *argv[]) { 47 | int success = 1; 48 | 49 | puts("Testing murmur3...\n"); 50 | 51 | success = success && run_test(NULL, 0, 0, 0); 52 | success = success && run_test(NULL, 0, 1, 0x514e28b7); 53 | success = success && run_test(NULL, 0, 0xffffffff, 0x81f16f39); 54 | 55 | uint8_t input[] = { 0xff, 0xff, 0xff, 0xff }; 56 | success = success && run_test((uint8_t *)&input, 4, 0, 0x76293b50); 57 | 58 | uint8_t input2[] = { 0x21, 0x43, 0x65, 0x87 }; 59 | success = success && run_test((uint8_t *)&input2, 4, 0, 0xf55b516b); 60 | success = success && run_test((uint8_t *)&input2, 4, 0x5082edee, 0x2362f9de); 61 | success = success && run_test((uint8_t *)&input2, 3, 0, 0x7e4a8634); 62 | success = success && run_test((uint8_t *)&input2, 2, 0, 0xa0f7b07a); 63 | success = success && run_test((uint8_t *)&input2, 1, 0, 0x72661cf4); 64 | 65 | uint8_t input3[] = { 0x00, 0x00, 0x00, 0x00 }; 66 | success = success && run_test((uint8_t *)&input3, 4, 0, 0x2362f9de); 67 | success = success && run_test((uint8_t *)&input3, 3, 0, 0x85f0b427); 68 | success = success && run_test((uint8_t *)&input3, 2, 0, 0x30f4c306); 69 | success = success && run_test((uint8_t *)&input3, 1, 0, 0x514e28b7); 70 | 71 | char input4[] = ""; 72 | success = success && run_test((uint8_t *)&input4, 0, 0, 0); 73 | success = success && run_test((uint8_t *)&input4, 0, 1, 0x514e28b7); 74 | success = success && run_test((uint8_t *)&input4, 0, 0xffffffff, 0x81f16f39); 75 | 76 | char input5[] = "\0\0\0\0"; 77 | success = success && run_test((uint8_t *)&input5, 4, 0, 0x2362f9de); 78 | 79 | char input6[] = "aaaa"; 80 | success = success && run_test((uint8_t *)&input6, 4, 0x9747b28c, 0x5a97808a); 81 | success = success && run_test((uint8_t *)&input6, 3, 0x9747b28c, 0x283e0130); 82 | success = success && run_test((uint8_t *)&input6, 2, 0x9747b28c, 0x5d211726); 83 | success = success && run_test((uint8_t *)&input6, 1, 0x9747b28c, 0x7fa09ea6); 84 | 85 | char input7[] = "abcd"; 86 | success = success && run_test((uint8_t *)&input7, 4, 0x9747b28c, 0xf0478627); 87 | success = success && run_test((uint8_t *)&input7, 3, 0x9747b28c, 0xc84a62dd); 88 | success = success && run_test((uint8_t *)&input7, 2, 0x9747b28c, 0x74875592); 89 | success = success && run_test((uint8_t *)&input7, 1, 0x9747b28c, 0x7fa09ea6); 90 | 91 | char input8[] = "Hello, world!"; 92 | success = success && run_test((uint8_t *)&input8, 13, 0x9747b28c, 0x24884cba); 93 | 94 | char input9[] = "ππππππππ"; 95 | success = success && run_test((uint8_t *)&input9, 16, 0x9747b28c, 0xd58063c1); 96 | 97 | uint8_t input10[256]; 98 | memset((void *)&input10, 'a', 256); 99 | success = success && run_test((uint8_t *)&input10, 256, 0x9747b28c, 100 | 0x37405bdc); 101 | 102 | uint8_t input11[] = "abc"; 103 | success = success && run_test((uint8_t *)&input11, 3, 0, 0xb3dd93fa); 104 | 105 | uint8_t input12[] = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"; 106 | success = success && run_test((uint8_t *)&input12, 56, 0, 0xee925b90); 107 | 108 | uint8_t input13[] = "The quick brown fox jumps over the lazy dog"; 109 | success = success && run_test((uint8_t *)&input13, 43, 0x9747b28c, 0x2fa826cd); 110 | 111 | puts(""); 112 | puts(success ? "Succeeded!" : "Failed!"); 113 | puts(""); 114 | 115 | return !success; 116 | 117 | // TODO: Remove 118 | (void)argc; 119 | (void)argv; 120 | } 121 | -------------------------------------------------------------------------------- /test/test-template.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Emscripten-Generated Code 5 | 9 | 10 | 11 |

Check out the developer console to see the test results

12 | {{{ SCRIPT }}} 13 | 14 | 15 | --------------------------------------------------------------------------------