├── .github └── workflows │ └── main.yml ├── .gitignore ├── .gitmodules ├── .python-version ├── LICENSE ├── README.md ├── docs ├── CNAME ├── api-v2 │ ├── changelog.md │ ├── examples.md │ ├── index.md │ ├── other.md │ ├── root-nodes.md │ └── types.md ├── api-v3 │ ├── changelog.md │ └── index.md ├── assets │ ├── extra.css │ └── openstates.svg ├── code-of-conduct.md ├── contributing │ ├── documentation.md │ ├── images │ │ ├── committee_data_example.png │ │ ├── selector_ex1.png │ │ ├── selector_ex2.png │ │ └── selector_ex3.png │ ├── index.md │ ├── local-database.md │ ├── openstates-org.md │ ├── people.md │ ├── scrapers.md │ ├── state-specific.md │ ├── testing-scrapers.md │ ├── text-extraction.md │ └── writing-a-committee-scraper.md ├── data │ ├── categorization.md │ ├── index.md │ ├── query-scraper-output-data.md │ └── session-naming.md ├── index.md └── openstates.org │ └── scheduled-tasks.md ├── mkdocs.yml ├── poetry.lock └── pyproject.toml /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Build Docs 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | 10 | jobs: 11 | build: 12 | steps: 13 | # Python & dependency installation 14 | - uses: actions/checkout@v4 15 | with: 16 | submodules: true 17 | - name: setup Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: '3.9' 21 | - name: install Poetry 22 | uses: snok/install-poetry@v1.4.1 23 | - name: set poetry config path 24 | run: poetry config virtualenvs.path ~/.virtualenvs 25 | - name: install dependencies 26 | run: poetry install 27 | - name: build with mkdocs 28 | run: poetry run mkdocs gh-deploy --force 29 | runs-on: ubuntu-20.04 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | .vscode 3 | *.DS_Store 4 | site/ 5 | .idea/ 6 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "docs/enhancement-proposals"] 2 | path = docs/enhancement-proposals 3 | url = git@github.com:openstates/enhancement-proposals.git 4 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.9 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More_considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution 4.0 International Public License 58 | 59 | By exercising the Licensed Rights (defined below), You accept and agree 60 | to be bound by the terms and conditions of this Creative Commons 61 | Attribution 4.0 International Public License ("Public License"). To the 62 | extent this Public License may be interpreted as a contract, You are 63 | granted the Licensed Rights in consideration of Your acceptance of 64 | these terms and conditions, and the Licensor grants You such rights in 65 | consideration of benefits the Licensor receives from making the 66 | Licensed Material available under these terms and conditions. 67 | 68 | 69 | Section 1 -- Definitions. 70 | 71 | a. Adapted Material means material subject to Copyright and Similar 72 | Rights that is derived from or based upon the Licensed Material 73 | and in which the Licensed Material is translated, altered, 74 | arranged, transformed, or otherwise modified in a manner requiring 75 | permission under the Copyright and Similar Rights held by the 76 | Licensor. For purposes of this Public License, where the Licensed 77 | Material is a musical work, performance, or sound recording, 78 | Adapted Material is always produced where the Licensed Material is 79 | synched in timed relation with a moving image. 80 | 81 | b. Adapter's License means the license You apply to Your Copyright 82 | and Similar Rights in Your contributions to Adapted Material in 83 | accordance with the terms and conditions of this Public License. 84 | 85 | c. Copyright and Similar Rights means copyright and/or similar rights 86 | closely related to copyright including, without limitation, 87 | performance, broadcast, sound recording, and Sui Generis Database 88 | Rights, without regard to how the rights are labeled or 89 | categorized. For purposes of this Public License, the rights 90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 91 | Rights. 92 | 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. Share means to provide material to the public by any means or 116 | process that requires permission under the Licensed Rights, such 117 | as reproduction, public display, public performance, distribution, 118 | dissemination, communication, or importation, and to make material 119 | available to the public including in ways that members of the 120 | public may access the material from a place and at a time 121 | individually chosen by them. 122 | 123 | j. Sui Generis Database Rights means rights other than copyright 124 | resulting from Directive 96/9/EC of the European Parliament and of 125 | the Council of 11 March 1996 on the legal protection of databases, 126 | as amended and/or succeeded, as well as other essentially 127 | equivalent rights anywhere in the world. 128 | 129 | k. You means the individual or entity exercising the Licensed Rights 130 | under this Public License. Your has a corresponding meaning. 131 | 132 | 133 | Section 2 -- Scope. 134 | 135 | a. License grant. 136 | 137 | 1. Subject to the terms and conditions of this Public License, 138 | the Licensor hereby grants You a worldwide, royalty-free, 139 | non-sublicensable, non-exclusive, irrevocable license to 140 | exercise the Licensed Rights in the Licensed Material to: 141 | 142 | a. reproduce and Share the Licensed Material, in whole or 143 | in part; and 144 | 145 | b. produce, reproduce, and Share Adapted Material. 146 | 147 | 2. Exceptions and Limitations. For the avoidance of doubt, where 148 | Exceptions and Limitations apply to Your use, this Public 149 | License does not apply, and You do not need to comply with 150 | its terms and conditions. 151 | 152 | 3. Term. The term of this Public License is specified in Section 153 | 6(a). 154 | 155 | 4. Media and formats; technical modifications allowed. The 156 | Licensor authorizes You to exercise the Licensed Rights in 157 | all media and formats whether now known or hereafter created, 158 | and to make technical modifications necessary to do so. The 159 | Licensor waives and/or agrees not to assert any right or 160 | authority to forbid You from making technical modifications 161 | necessary to exercise the Licensed Rights, including 162 | technical modifications necessary to circumvent Effective 163 | Technological Measures. For purposes of this Public License, 164 | simply making modifications authorized by this Section 2(a) 165 | (4) never produces Adapted Material. 166 | 167 | 5. Downstream recipients. 168 | 169 | a. Offer from the Licensor -- Licensed Material. Every 170 | recipient of the Licensed Material automatically 171 | receives an offer from the Licensor to exercise the 172 | Licensed Rights under the terms and conditions of this 173 | Public License. 174 | 175 | b. No downstream restrictions. You may not offer or impose 176 | any additional or different terms or conditions on, or 177 | apply any Effective Technological Measures to, the 178 | Licensed Material if doing so restricts exercise of the 179 | Licensed Rights by any recipient of the Licensed 180 | Material. 181 | 182 | 6. No endorsement. Nothing in this Public License constitutes or 183 | may be construed as permission to assert or imply that You 184 | are, or that Your use of the Licensed Material is, connected 185 | with, or sponsored, endorsed, or granted official status by, 186 | the Licensor or others designated to receive attribution as 187 | provided in Section 3(a)(1)(A)(i). 188 | 189 | b. Other rights. 190 | 191 | 1. Moral rights, such as the right of integrity, are not 192 | licensed under this Public License, nor are publicity, 193 | privacy, and/or other similar personality rights; however, to 194 | the extent possible, the Licensor waives and/or agrees not to 195 | assert any such rights held by the Licensor to the limited 196 | extent necessary to allow You to exercise the Licensed 197 | Rights, but not otherwise. 198 | 199 | 2. Patent and trademark rights are not licensed under this 200 | Public License. 201 | 202 | 3. To the extent possible, the Licensor waives any right to 203 | collect royalties from You for the exercise of the Licensed 204 | Rights, whether directly or through a collecting society 205 | under any voluntary or waivable statutory or compulsory 206 | licensing scheme. In all other cases the Licensor expressly 207 | reserves any right to collect such royalties. 208 | 209 | 210 | Section 3 -- License Conditions. 211 | 212 | Your exercise of the Licensed Rights is expressly made subject to the 213 | following conditions. 214 | 215 | a. Attribution. 216 | 217 | 1. If You Share the Licensed Material (including in modified 218 | form), You must: 219 | 220 | a. retain the following if it is supplied by the Licensor 221 | with the Licensed Material: 222 | 223 | i. identification of the creator(s) of the Licensed 224 | Material and any others designated to receive 225 | attribution, in any reasonable manner requested by 226 | the Licensor (including by pseudonym if 227 | designated); 228 | 229 | ii. a copyright notice; 230 | 231 | iii. a notice that refers to this Public License; 232 | 233 | iv. a notice that refers to the disclaimer of 234 | warranties; 235 | 236 | v. a URI or hyperlink to the Licensed Material to the 237 | extent reasonably practicable; 238 | 239 | b. indicate if You modified the Licensed Material and 240 | retain an indication of any previous modifications; and 241 | 242 | c. indicate the Licensed Material is licensed under this 243 | Public License, and include the text of, or the URI or 244 | hyperlink to, this Public License. 245 | 246 | 2. You may satisfy the conditions in Section 3(a)(1) in any 247 | reasonable manner based on the medium, means, and context in 248 | which You Share the Licensed Material. For example, it may be 249 | reasonable to satisfy the conditions by providing a URI or 250 | hyperlink to a resource that includes the required 251 | information. 252 | 253 | 3. If requested by the Licensor, You must remove any of the 254 | information required by Section 3(a)(1)(A) to the extent 255 | reasonably practicable. 256 | 257 | 4. If You Share Adapted Material You produce, the Adapter's 258 | License You apply must not prevent recipients of the Adapted 259 | Material from complying with this Public License. 260 | 261 | 262 | Section 4 -- Sui Generis Database Rights. 263 | 264 | Where the Licensed Rights include Sui Generis Database Rights that 265 | apply to Your use of the Licensed Material: 266 | 267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 268 | to extract, reuse, reproduce, and Share all or a substantial 269 | portion of the contents of the database; 270 | 271 | b. if You include all or a substantial portion of the database 272 | contents in a database in which You have Sui Generis Database 273 | Rights, then the database in which You have Sui Generis Database 274 | Rights (but not its individual contents) is Adapted Material; and 275 | 276 | c. You must comply with the conditions in Section 3(a) if You Share 277 | all or a substantial portion of the contents of the database. 278 | 279 | For the avoidance of doubt, this Section 4 supplements and does not 280 | replace Your obligations under this Public License where the Licensed 281 | Rights include other Copyright and Similar Rights. 282 | 283 | 284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 285 | 286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 296 | 297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 306 | 307 | c. The disclaimer of warranties and limitation of liability provided 308 | above shall be interpreted in a manner that, to the extent 309 | possible, most closely approximates an absolute disclaimer and 310 | waiver of all liability. 311 | 312 | 313 | Section 6 -- Term and Termination. 314 | 315 | a. This Public License applies for the term of the Copyright and 316 | Similar Rights licensed here. However, if You fail to comply with 317 | this Public License, then Your rights under this Public License 318 | terminate automatically. 319 | 320 | b. Where Your right to use the Licensed Material has terminated under 321 | Section 6(a), it reinstates: 322 | 323 | 1. automatically as of the date the violation is cured, provided 324 | it is cured within 30 days of Your discovery of the 325 | violation; or 326 | 327 | 2. upon express reinstatement by the Licensor. 328 | 329 | For the avoidance of doubt, this Section 6(b) does not affect any 330 | right the Licensor may have to seek remedies for Your violations 331 | of this Public License. 332 | 333 | c. For the avoidance of doubt, the Licensor may also offer the 334 | Licensed Material under separate terms or conditions or stop 335 | distributing the Licensed Material at any time; however, doing so 336 | will not terminate this Public License. 337 | 338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 339 | License. 340 | 341 | 342 | Section 7 -- Other Terms and Conditions. 343 | 344 | a. The Licensor shall not be bound by any additional or different 345 | terms or conditions communicated by You unless expressly agreed. 346 | 347 | b. Any arrangements, understandings, or agreements regarding the 348 | Licensed Material not stated herein are separate from and 349 | independent of the terms and conditions of this Public License. 350 | 351 | 352 | Section 8 -- Interpretation. 353 | 354 | a. For the avoidance of doubt, this Public License does not, and 355 | shall not be interpreted to, reduce, limit, restrict, or impose 356 | conditions on any use of the Licensed Material that could lawfully 357 | be made without permission under this Public License. 358 | 359 | b. To the extent possible, if any provision of this Public License is 360 | deemed unenforceable, it shall be automatically reformed to the 361 | minimum extent necessary to make it enforceable. If the provision 362 | cannot be reformed, it shall be severed from this Public License 363 | without affecting the enforceability of the remaining terms and 364 | conditions. 365 | 366 | c. No term or condition of this Public License will be waived and no 367 | failure to comply consented to unless expressly agreed to by the 368 | Licensor. 369 | 370 | d. Nothing in this Public License constitutes or may be interpreted 371 | as a limitation upon, or waiver of, any privileges and immunities 372 | that apply to the Licensor or You, including from the legal 373 | processes of any jurisdiction or authority. 374 | 375 | 376 | ======================================================================= 377 | 378 | Creative Commons is not a party to its public 379 | licenses. Notwithstanding, Creative Commons may elect to apply one of 380 | its public licenses to material it publishes and in those instances 381 | will be considered the “Licensor.” The text of the Creative Commons 382 | public licenses is dedicated to the public domain under the CC0 Public 383 | Domain Dedication. Except for the limited purpose of indicating that 384 | material is shared under a Creative Commons public license or as 385 | otherwise permitted by the Creative Commons policies published at 386 | creativecommons.org/policies, Creative Commons does not authorize the 387 | use of the trademark "Creative Commons" or any other trademark or logo 388 | of Creative Commons without its prior written consent including, 389 | without limitation, in connection with any unauthorized modifications 390 | to any of its public licenses or any other arrangements, 391 | understandings, or agreements concerning use of licensed material. For 392 | the avoidance of doubt, this paragraph does not form part of the 393 | public licenses. 394 | 395 | Creative Commons may be contacted at creativecommons.org. 396 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Open States Documentation 2 | 3 | This repository contains the files powering https://docs.openstates.org/ 4 | 5 | ## Links 6 | 7 | * [Documentation Issues](https://github.com/openstates/issues/labels/documentation) 8 | * [Contributor's Guide](https://docs.openstates.org/en/latest/contributing/getting-started.html) 9 | * [Contributing to Documentation](https://docs.openstates.org/en/latest/contributing/documentation.html) 10 | * [Code of Conduct](https://docs.openstates.org/en/latest/contributing/code-of-conduct.html) 11 | -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | docs.openstates.org 2 | -------------------------------------------------------------------------------- /docs/api-v2/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | Changelog for Open States GraphQL API: 4 | 5 | ## v2.6 (March 2021) 6 | 7 | - added preliminary support for federal jurisdiction 8 | - added Jurisdiction.classification node 9 | - Jurisdictions are now filterable by classification (municipal, 10 | state, country) 11 | 12 | ## v2.5 (July 2020) 13 | 14 | - added Jurisdiction.lastScrapedAt, openstates/issues\#32 15 | 16 | ## v2.4 (April 2020) 17 | 18 | - removed unused fields from graph (organization.links, 19 | organization.other_names) 20 | 21 | ## v2.3 (August 2019) 22 | 23 | - add experimental full text search via searchQuery parameter to bills 24 | node 25 | 26 | ## v2.2 (June 2019) 27 | 28 | - add openstatesUrl to bills query 29 | - speed improvments 30 | 31 | ## v2.1 (Feb 2019) 32 | 33 | - fix lat-lon behavior to limit to active memberships 34 | - improve handling of retired legislators 35 | - fix type of maximum_memberships 36 | - bill version ordering is now consistent 37 | 38 | ## v2.0 (January 2019) 39 | 40 | - bugfix for maximum_memberships type 41 | - bugfix for versions field 42 | - improve tests 43 | 44 | ## Beta Release (November 2018) 45 | 46 | - **API Keys are now required** 47 | - consider classification when using current_memberships 48 | - fix geo filtering 49 | - add openstatesUrl to Bill node for ease of linkage to OpenStates.org 50 | - add Person.oldMemberships as analog to currentMemberships 51 | - add actionSince filter to bills node 52 | - fix 500 errors/optimization when using GraphQL fragments 53 | - addition of basic protection for excessive queries 54 | - add totalCount to assist in pagination 55 | - add Organization.currentMemberships 56 | 57 | ## Preview Release 1 (May 2018) 58 | 59 | - fix for people pagination 60 | - add updatedSince for people 61 | - add sponsor argument for bills node 62 | - allow votes to take pagination parameters 63 | - allow traversing to votes from person 64 | 65 | ## Preview Release 0 (Dec 2017) 66 | 67 | Initial draft release of the API, no backwards-compatibility guarantee 68 | made. 69 | -------------------------------------------------------------------------------- /docs/api-v2/examples.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | ## Get basic information for all legislatures 4 | 5 | [See in 6 | GraphiQL](https://openstates.org/graphql#query=%7B%0A%20%20jurisdictions%20%7B%0A%20%20%20%20edges%20%7B%0A%20%20%20%20%20%20node%20%7B%0A%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20legislativeSessions%20%7B%0A%20%20%20%20%20%20%20%20%20%20edges%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20node%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20legislature%3A%20organizations(classification%3A%20%22legislature%22%2C%20first%3A%201)%20%7B%0A%20%20%20%20%20%20%20%20%20%20edges%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20node%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20classification%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20children(first%3A%205)%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20edges%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20node%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20classification%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%7D%0A%20%20%20%20%7D%0A%20%20%7D%0A%7D%0A) 7 | 8 | { 9 | jurisdictions { 10 | edges { 11 | node { 12 | name 13 | legislativeSessions { 14 | edges { 15 | node { 16 | name 17 | } 18 | } 19 | } 20 | legislature: organizations(classification: "legislature", first: 1) { 21 | edges { 22 | node { 23 | name 24 | classification 25 | children(first: 5) { 26 | edges { 27 | node { 28 | name 29 | classification 30 | } 31 | } 32 | } 33 | } 34 | } 35 | } 36 | } 37 | } 38 | } 39 | } 40 | 41 | ## Get overview of a legislature\'s structure 42 | 43 | [See in 44 | GraphiQL](https://openstates.org/graphql#query=%7B%0A%20%20jurisdiction(name%3A%20%22North%20Dakota%22)%20%7B%0A%20%20%20%20name%0A%20%20%20%20url%0A%20%20%20%20legislativeSessions%20%7B%0A%20%20%20%20%20%20edges%20%7B%0A%20%20%20%20%20%20%20%20node%20%7B%0A%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20identifier%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%7D%0A%20%20%20%20%7D%0A%20%20%20%20organizations(classification%3A%20%22legislature%22%2C%20first%3A%201)%20%7B%0A%20%20%20%20%20%20edges%20%7B%0A%20%20%20%20%20%20%20%20node%20%7B%0A%20%20%20%20%20%20%20%20%20%20id%0A%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20children(first%3A%2020)%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20edges%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20node%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%7D%0A%20%20%20%20%7D%0A%20%20%7D%0A%7D%0A) 45 | 46 | { 47 | jurisdiction(name: "North Dakota") { 48 | name 49 | url 50 | legislativeSessions { 51 | edges { 52 | node { 53 | name 54 | identifier 55 | } 56 | } 57 | } 58 | organizations(classification: "legislature", first: 1) { 59 | edges { 60 | node { 61 | id 62 | name 63 | children(first: 20) { 64 | edges { 65 | node { 66 | name 67 | } 68 | } 69 | } 70 | } 71 | } 72 | } 73 | } 74 | } 75 | 76 | ## Search for bills that match a given condition 77 | 78 | [See in 79 | GraphiQL](https://openstates.org/graphql#query=%20%20%20%20%7B%0A%20%20%20%20%20%20search_1%3A%20bills(first%3A%205%2C%20jurisdiction%3A%20%22New%20York%22%2C%20session%3A%20%222017-2018%22%2C%20chamber%3A%20%22lower%22%2C%20classification%3A%20%22resolution%22%2C%20updatedSince%3A%20%222017-01-15%22)%20%7B%0A%20%20%20%20%20%20%20%20edges%20%7B%0A%20%20%20%20%20%20%20%20%20%20node%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20id%0A%20%20%20%20%20%20%20%20%20%20%20%20identifier%0A%20%20%20%20%20%20%20%20%20%20%20%20title%0A%20%20%20%20%20%20%20%20%20%20%20%20classification%0A%20%20%20%20%20%20%20%20%20%20%20%20updatedAt%0A%20%20%20%20%20%20%20%20%20%20%20%20createdAt%0A%20%20%20%20%20%20%20%20%20%20%20%20legislativeSession%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20identifier%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20jurisdiction%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20actions%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20date%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20description%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20classification%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20documents%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20date%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20note%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20links%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20url%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20versions%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20date%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20note%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20links%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20url%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20sources%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20url%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20note%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%7D%0A%20%20%20%20%7D%0A) 80 | 81 | { 82 | search_1: bills(first: 5, jurisdiction: "New York", session: "2017-2018", chamber: "lower", classification: "resolution", updatedSince: "2017-01-15") { 83 | edges { 84 | node { 85 | id 86 | identifier 87 | title 88 | classification 89 | updatedAt 90 | createdAt 91 | legislativeSession { 92 | identifier 93 | jurisdiction { 94 | name 95 | } 96 | } 97 | actions { 98 | date 99 | description 100 | classification 101 | } 102 | documents { 103 | date 104 | note 105 | links { 106 | url 107 | } 108 | } 109 | versions { 110 | date 111 | note 112 | links { 113 | url 114 | } 115 | } 116 | 117 | sources { 118 | url 119 | note 120 | 121 | } 122 | } 123 | } 124 | } 125 | } 126 | 127 | ## Get all information on a particular bill 128 | 129 | [See in 130 | GraphiQL](https://openstates.org/graphql#query=%20%20%20%20%7B%0A%20%20%20%20%20%20b1%3A%20bill(jurisdiction%3A%20%22Hawaii%22%2C%20session%3A%20%222017%20Regular%20Session%22%2C%20identifier%3A%20%22HB%20475%22)%20%7B%0A%20%20%20%20%20%20%20%20id%0A%20%20%20%20%20%20%20%20identifier%0A%20%20%20%20%20%20%20%20title%0A%20%20%20%20%20%20%20%20classification%0A%20%20%20%20%20%20%20%20updatedAt%0A%20%20%20%20%20%20%20%20createdAt%0A%20%20%20%20%20%20%20%20legislativeSession%20%7B%0A%20%20%20%20%20%20%20%20%20%20identifier%0A%20%20%20%20%20%20%20%20%20%20jurisdiction%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20actions%20%7B%0A%20%20%20%20%20%20%20%20%20%20date%0A%20%20%20%20%20%20%20%20%20%20description%0A%20%20%20%20%20%20%20%20%20%20classification%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20documents%20%7B%0A%20%20%20%20%20%20%20%20%20%20date%0A%20%20%20%20%20%20%20%20%20%20note%0A%20%20%20%20%20%20%20%20%20%20links%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20url%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20versions%20%7B%0A%20%20%20%20%20%20%20%20%20%20date%0A%20%20%20%20%20%20%20%20%20%20note%0A%20%20%20%20%20%20%20%20%20%20links%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20url%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20sources%20%7B%0A%20%20%20%20%20%20%20%20%20%20url%0A%20%20%20%20%20%20%20%20%20%20note%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20b2%3A%20bill(id%3A%20%22ocd-bill%2F9c24aaa2-6acc-43ad-883b-ae9f677062e9%22)%20%7B%0A%20%20%20%20%20%20%20%20id%0A%20%20%20%20%20%20%20%20identifier%0A%20%20%20%20%20%20%20%20title%0A%20%20%20%20%20%20%20%20classification%0A%20%20%20%20%20%20%20%20updatedAt%0A%20%20%20%20%20%20%20%20createdAt%0A%20%20%20%20%20%20%20%20legislativeSession%20%7B%0A%20%20%20%20%20%20%20%20%20%20identifier%0A%20%20%20%20%20%20%20%20%20%20jurisdiction%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20actions%20%7B%0A%20%20%20%20%20%20%20%20%20%20date%0A%20%20%20%20%20%20%20%20%20%20description%0A%20%20%20%20%20%20%20%20%20%20classification%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20documents%20%7B%0A%20%20%20%20%20%20%20%20%20%20date%0A%20%20%20%20%20%20%20%20%20%20note%0A%20%20%20%20%20%20%20%20%20%20links%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20url%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20versions%20%7B%0A%20%20%20%20%20%20%20%20%20%20date%0A%20%20%20%20%20%20%20%20%20%20note%0A%20%20%20%20%20%20%20%20%20%20links%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20url%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20sources%20%7B%0A%20%20%20%20%20%20%20%20%20%20url%0A%20%20%20%20%20%20%20%20%20%20note%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%7D%0A%20%20%20%20%7D%0A) 131 | 132 | { 133 | b1: bill(jurisdiction: "Hawaii", session: "2017 Regular Session", identifier: "HB 475") { 134 | id 135 | identifier 136 | title 137 | classification 138 | updatedAt 139 | createdAt 140 | legislativeSession { 141 | identifier 142 | jurisdiction { 143 | name 144 | } 145 | } 146 | actions { 147 | date 148 | description 149 | classification 150 | } 151 | documents { 152 | date 153 | note 154 | links { 155 | url 156 | } 157 | } 158 | versions { 159 | date 160 | note 161 | links { 162 | url 163 | } 164 | } 165 | sources { 166 | url 167 | note 168 | } 169 | } 170 | b2: bill(id: "ocd-bill/9c24aaa2-6acc-43ad-883b-ae9f677062e9") { 171 | id 172 | identifier 173 | title 174 | classification 175 | updatedAt 176 | createdAt 177 | legislativeSession { 178 | identifier 179 | jurisdiction { 180 | name 181 | } 182 | } 183 | actions { 184 | date 185 | description 186 | classification 187 | } 188 | documents { 189 | date 190 | note 191 | links { 192 | url 193 | } 194 | } 195 | versions { 196 | date 197 | note 198 | links { 199 | url 200 | } 201 | } 202 | sources { 203 | url 204 | note 205 | } 206 | } 207 | } 208 | 209 | ## Get information about a specific legislator 210 | 211 | [See in 212 | GraphiQL](https://openstates.org/graphql#query=%20%20%20%20%7B%0A%20%20%20%20%20%20person(id%3A%22ocd-person%2Fdd05bd23-fe49-4e65-bfff-62db997e56e0%22)%7B%0A%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20contactDetails%20%7B%0A%20%20%20%20%20%20%20%20%20%20note%0A%20%20%20%20%20%20%20%20%20%20type%0A%20%20%20%20%20%20%20%20%20%20value%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20otherNames%20%7B%0A%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20sources%20%7B%0A%20%20%20%20%20%20%20%20%20%20url%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20currentMemberships%20%7B%0A%20%20%20%20%20%20%20%20%20%20organization%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%7D%0A%20%20%20%20%7D%0A) 213 | 214 | { 215 | person(id:"ocd-person/dd05bd23-fe49-4e65-bfff-62db997e56e0"){ 216 | name 217 | contactDetails { 218 | note 219 | type 220 | value 221 | } 222 | otherNames { 223 | name 224 | } 225 | sources { 226 | url 227 | } 228 | currentMemberships { 229 | organization { 230 | name 231 | } 232 | } 233 | } 234 | } 235 | 236 | ## Get legislators for a given state/chamber 237 | 238 | [See in 239 | GraphiQL](https://openstates.org/graphql#query=%20%20%20%20%7B%0A%20%20%20%20%20%20people(memberOf%3A%22ocd-organization%2Fddf820b5-5246-46b3-a807-99b5914ad39f%22%2C%20first%3A%20100)%20%7B%0A%20%20%20%20%20%20%20%20edges%20%7B%0A%20%20%20%20%20%20%20%20%20%20node%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%20%20party%3A%20currentMemberships(classification%3A%22party%22)%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20organization%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20links%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20url%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20sources%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20url%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20chamber%3A%20currentMemberships(classification%3A%5B%22upper%22%2C%20%22lower%22%5D)%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20post%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20label%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20organization%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20classification%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20parent%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%7D%0A%20%20%20%20%7D%0A) 240 | 241 | `ocd-organization/ddf820b5-5246-46b3-a807-99b5914ad39f` is the id of the 242 | Alabama Senate chamber. 243 | 244 | { 245 | people(memberOf:"ocd-organization/ddf820b5-5246-46b3-a807-99b5914ad39f", first: 100) { 246 | edges { 247 | node { 248 | name 249 | party: currentMemberships(classification:"party") { 250 | organization { 251 | name 252 | 253 | } 254 | } 255 | links { 256 | url 257 | } 258 | sources { 259 | url 260 | } 261 | chamber: currentMemberships(classification:["upper", "lower"]) { 262 | post { 263 | label 264 | } 265 | organization { 266 | name 267 | classification 268 | parent { 269 | name 270 | } 271 | } 272 | } 273 | } 274 | } 275 | } 276 | } 277 | 278 | ## Search for legislators that represent a given area 279 | 280 | [See in 281 | GraphQL](https://openstates.org/graphql#query=%7B%0A%20%20people(latitude%3A%2040.7460022%2C%20longitude%3A%20-73.9584642%2C%20first%3A%20100)%20%7B%0A%20%20%20%20edges%20%7B%0A%20%20%20%20%20%20node%20%7B%0A%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20chamber%3A%20currentMemberships(classification%3A%5B%22upper%22%2C%20%22lower%22%5D)%20%7B%0A%20%20%20%20%20%20%20%20%20%20post%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20label%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20organization%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%20%20classification%0A%20%20%20%20%20%20%20%20%20%20%20%20parent%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20name%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%7D%0A%20%20%20%20%7D%0A%20%20%7D%0A%7D%0A) 282 | 283 | { 284 | people(latitude: 40.7460022, longitude: -73.9584642, first: 100) { 285 | edges { 286 | node { 287 | name 288 | chamber: currentMemberships(classification:["upper", "lower"]) { 289 | post { 290 | label 291 | } 292 | organization { 293 | name 294 | classification 295 | parent { 296 | name 297 | } 298 | } 299 | } 300 | } 301 | } 302 | } 303 | } 304 | -------------------------------------------------------------------------------- /docs/api-v2/index.md: -------------------------------------------------------------------------------- 1 | # DEPRECATED - GraphQL API 2 | 3 | As of [December 1, 2023, the v2/GraphQL API has been sunset](https://blog.openstates.org/2023-june-changes/). Please 4 | migrate to the [v3 API](https://docs.openstates.org/api-v3/) as soon as possible. Future service for the GraphQL API 5 | is not guaranteed. 6 | 7 | The rest of this documentation is left up for reference. 8 | 9 | - API keys are required. You can [register for an API key](https://openstates.org/api/register/) and once activated, 10 | you'll pass your API key via the `X-API-KEY` header. 11 | - You can also check out our [introductory blog post](https://blog.openstates.org/introducing-the-upcoming-open-states-graphql-api-838f9d023868) for more details. 12 | 13 | ## Basics 14 | 15 | This is a [GraphQL](https://graphql.org/) API, and some of the concepts 16 | may seem unfamiliar at first. 17 | 18 | There is in essence, only one endpoint: 19 | . 20 | 21 | This endpoint, when accessed in a browser, will provide an interface 22 | that allows you to experiment with queries in the browser, it features 23 | autocomplete and a way to browse the full graph (click the \'Docs\' link 24 | in the upper right corner). 25 | 26 | A GraphQL query mirrors the structure of the data that you\'d like to 27 | obtain. For example, to obtain a list of legislators you\'d pass 28 | something like: 29 | 30 | { 31 | people { 32 | edges { 33 | node { 34 | name 35 | } 36 | } 37 | } 38 | } 39 | 40 | !!! note 41 | 42 | If you are using the API programatically it is recommended you send the 43 | data as part of the POST body, e.g.: 44 | 45 | `curl -X POST https://openstates.org/graphql -d "query={people{edges{node{name}}}}"` 46 | 47 | Of course, if you try this you'll see it doesn't work since there are 48 | some basic limits on how much data you can request at once. We paginate 49 | with the `first`, `last`, `before` and `after` parameters to a root 50 | node. So let's try that again: 51 | 52 | { 53 | people(first: 3) { 54 | edges { 55 | node { 56 | name 57 | } 58 | } 59 | } 60 | } 61 | 62 | And you'd get back JSON like: 63 | 64 | { 65 | "data": { 66 | "people": { 67 | "edges": [ 68 | { 69 | "node": { 70 | "name": "Lydia Brasch" 71 | } 72 | }, 73 | { 74 | "node": { 75 | "name": "Matt Williams" 76 | } 77 | }, 78 | { 79 | "node": { 80 | "name": "Merv Riepe" 81 | } 82 | } 83 | ] 84 | } 85 | } 86 | } 87 | 88 | Ah, much better. Nodes also can take other parameters to filter the 89 | returned content. Let's try the "name" filter which restricts our 90 | search to people named Lydia: 91 | 92 | { 93 | people(first: 3, name: "Lydia") { 94 | edges { 95 | node { 96 | name 97 | } 98 | } 99 | } 100 | } 101 | 102 | Results in: 103 | 104 | { 105 | "data": { 106 | "people": { 107 | "edges": [ 108 | { 109 | "node": { 110 | "name": "Lydia Brasch" 111 | } 112 | }, 113 | { 114 | "node": { 115 | "name": "Lydia Graves Chassaniol" 116 | } 117 | }, 118 | { 119 | "node": { 120 | "name": "Lydia C. Blume" 121 | } 122 | } 123 | ] 124 | } 125 | } 126 | } 127 | 128 | It is also possible to request data from multiple root nodes at once, 129 | for example: 130 | 131 | { 132 | people(first: 1) { 133 | edges { 134 | node { name } 135 | } 136 | } 137 | bills(first: 1) { 138 | edges { 139 | node { title } 140 | } 141 | } 142 | } 143 | 144 | Would give back something like: 145 | 146 | { 147 | "data": { 148 | "people": { 149 | "edges": [ 150 | { 151 | "node": { 152 | "name": "Lydia Brasch" 153 | } 154 | } 155 | ] 156 | }, 157 | "bills": { 158 | "edges": [ 159 | { 160 | "node": { 161 | "title": "Criminal Law - Animal Abuse Emergency Compensation Fund - Establishment" 162 | } 163 | } 164 | ] 165 | } 166 | } 167 | } 168 | 169 | You may notice something here, that you get back just the data you need. 170 | This is extremely powerful, and lets you do the equivalent of many 171 | traditional API calls in a single query. 172 | 173 | ## Full-fledged Example 174 | 175 | Let's take a look at a more useful example: 176 | 177 | { 178 | bill(jurisdiction: "New York", session: "2017-2018", identifier: "S 5772") { 179 | title 180 | actions { 181 | description 182 | date 183 | } 184 | votes { 185 | edges { 186 | node { 187 | counts { 188 | value 189 | option 190 | } 191 | votes { 192 | voterName 193 | voter { 194 | id 195 | contactDetails { 196 | value 197 | note 198 | type 199 | } 200 | } 201 | option 202 | } 203 | } 204 | } 205 | } 206 | sources { 207 | url 208 | } 209 | createdAt 210 | updatedAt 211 | } 212 | } 213 | 214 | There's a lot going on there, let's break it down: 215 | 216 | bill(jurisdiction: "New York", session: "2017-2018", identifier: "S 5772") { 217 | 218 | We're hitting the `bill` root node, which takes 3 parameters. This 219 | should get us to a single bill from New York. 220 | 221 | title 222 | 223 | This is going to give us the title, just like we saw before. 224 | 225 | actions { 226 | description 227 | date 228 | } 229 | 230 | Here we're going into a child node, in this case all of the actions 231 | taken on the bill. For each action we're requesting a the date & 232 | description. 233 | 234 | votes { 235 | edges { 236 | node { 237 | 238 | Here too we're going into a child node, but note that this time we use 239 | that "edges" and "node" pattern that we see on root level nodes. 240 | Certain child nodes in the API have the ability to be paginated or 241 | further limited, and votes happen to be one of them. In this case 242 | however we're not making use of that so we'll just ignore this. 243 | 244 | (A full discussion of this pattern is out of scope but check out the 245 | [Relay pagination specification for more 246 | detail](https://facebook.github.io/relay/graphql/connections.htm) for 247 | more.) 248 | 249 | counts { 250 | value 251 | option 252 | } 253 | votes { 254 | voterName 255 | voter { 256 | id 257 | contactDetails { 258 | value 259 | note 260 | type 261 | } 262 | } 263 | option 264 | } 265 | } 266 | 267 | Here we grab a few more fields, including child nodes of each vote on 268 | our Bill. 269 | 270 | First, we get a list of counts (essentially pairs of outcomes + numbers 271 | e.g. (yes, 31), (no, 5)) 272 | 273 | We also get individual legislator votes by name, and we traverse into 274 | another object to get the Open States ID and contact details for the 275 | voter. (Don't sweat the exact data model here, there will be more on 276 | the structure once we get to the actual graph documentation.) 277 | 278 | sources { 279 | url 280 | } 281 | createdAt 282 | updatedAt 283 | 284 | And back up at the top level, we grab a few more pieces of information 285 | about the Bill. 286 | 287 | And now you've seen a glimpse of the power of this API. We were able to 288 | get back theexact fields we wanted on a bill, contact information on the 289 | legislators that have voted on the bill, and more. 290 | 291 | Our result looks like this: 292 | 293 | { 294 | "data": { 295 | "bill": { 296 | "title": "Relates to bureaus of administrative adjudication", 297 | "actions": [ 298 | { 299 | "description": "REFERRED TO LOCAL GOVERNMENT", 300 | "date": "2017-04-28" 301 | }, 302 | { 303 | "description": "COMMITTEE DISCHARGED AND COMMITTED TO RULES", 304 | "date": "2017-06-19" 305 | }, 306 | { 307 | "description": "ORDERED TO THIRD READING CAL.1896", 308 | "date": "2017-06-19" 309 | }, 310 | { 311 | "description": "RECOMMITTED TO RULES", 312 | "date": "2017-06-21" 313 | } 314 | ], 315 | "votes": { 316 | "edges": [ 317 | { 318 | "node": { 319 | "counts": [ 320 | { 321 | "value": 25, 322 | "option": "yes" 323 | }, 324 | { 325 | "value": 0, 326 | "option": "no" 327 | }, 328 | { 329 | "value": 0, 330 | "option": "other" 331 | } 332 | ], 333 | "votes": [ 334 | { 335 | "voterName": "John J. Bonacic", 336 | "voter": { 337 | "id": "ocd-person/da013cd5-dc67-4e65-a310-73aa32ad1f7c" 338 | "contactDetails": [ 339 | { 340 | "value": "bonacic@nysenate.gov", 341 | "note": "Capitol Office", 342 | "type": "email" 343 | }, 344 | { 345 | "value": "Room 503\nAlbany, NY 12247", 346 | "note": "District Office", 347 | "type": "address" 348 | }, 349 | { 350 | "value": "518-455-3181", 351 | "note": "District Office", 352 | "type": "voice" 353 | }, 354 | ...etc... 355 | ] 356 | }, 357 | "option": "yes" 358 | }, 359 | { 360 | "voterName": "Neil D. Breslin", 361 | "voter": { 362 | "id": "ocd-person/4b710aee-1b99-42e0-90e2-d41338e8c5df" 363 | "contactDetails": [ ...etc... ], 364 | }, 365 | "option": "yes" 366 | }, 367 | { 368 | "voterName": "David Carlucci", 369 | "voter": { 370 | "id": "ocd-person/1b0feab9-02a7-4bcc-b089-3ab23286da68" 371 | "contactDetails": [ ...etc... ], 372 | }, 373 | "option": "yes" 374 | }, 375 | ] 376 | } 377 | }, 378 | ...etc... 379 | ] 380 | }, 381 | "sources": [ 382 | { 383 | "url": "http://legislation.nysenate.gov/api/3/bills/2017-2018/S5772?summary=&detail=" 384 | }, 385 | { 386 | "url": "http://www.nysenate.gov/legislation/bills/2017/S5772" 387 | }, 388 | { 389 | "url": "http://assembly.state.ny.us/leg/?default_fld=&bn=S5772&Summary=Y&Actions=Y&Text=Y" 390 | } 391 | ], 392 | "createdAt": "2017-07-15 05:08:15.848526+00:00", 393 | "updatedAt": "2017-07-15 05:08:15.848541+00:00" 394 | } 395 | } 396 | } 397 | -------------------------------------------------------------------------------- /docs/api-v2/other.md: -------------------------------------------------------------------------------- 1 | # Other Notes 2 | 3 | There are a few other things to be aware of while using the API: 4 | 5 | ## Explore the Graph 6 | 7 | GraphQL is still quite new, so we figured it might be good to provide 8 | some helpful tips on how to think about the data and how you'll use the 9 | API. 10 | 11 | First, it is probably well worth your time to play around in GraphiQL to 12 | explore the API and data. It was heavily used when developing the API 13 | and writing tests, and is a very powerful tool, particularly when you 14 | make use of the self-documenting nature of the graph. 15 | 16 | When you're thinking about how to query don't necessarily try to 17 | replicate your old API calls exactly. For example, perhaps you were 18 | grabbing all bills that met a given criteria and then grabbing all 19 | sponsors contact details. This can now be done in one call by traversing 20 | from the `bills-root` root node into the `BillSponsorshipNode` and then up to the 21 | `PersonNode` and finally to the `ContactDetailNode` This may sound 22 | complex at first, but once you get the hang of it, it really does unlock 23 | a ton of power and will make your apps more powerful and efficient. 24 | 25 | ## Pagination 26 | 27 | In several places (such as the `bills-root` and `BillNode`'s `votes`) we mention that nodes are paginated. 28 | 29 | What this means in practice is that instead of getting back the 30 | underlying node type, say `BillNode`, directly, you'll get back 31 | `BillConnectionNode` or similar. (In practice there are connection node 32 | types for each paginated type, but all work the same way in our case.) 33 | 34 | ### Arguments 35 | 36 | Each paginated endpoint accepts any of four parameters: 37 | 38 | - `first` - given an integer N, only return the first N nodes 39 | - `last` - given an integer N, only return the last N nodes 40 | - `after` - combined with `first`, will return first N nodes after a 41 | given "cursor" 42 | - `before` - combined with `last`, will return last N nodes before a 43 | given "cursor" 44 | 45 | So typically you'd paginate using `first`, obtaining a cursor, and then 46 | calling the API again with a combination of `first` and `after`. 47 | 48 | The same process could be carried out with `last` and `before` to 49 | paginate in reverse. 50 | 51 | ### Responses 52 | 53 | Let's take a look at everything that pagination makes available: 54 | 55 | { 56 | bills(first:20) { 57 | edges { 58 | node { 59 | title 60 | } 61 | cursor 62 | } 63 | pageInfo { 64 | hasNextPage 65 | hasPreviousPage 66 | endCursor 67 | startCursor 68 | } 69 | totalCount 70 | } 71 | } 72 | 73 | You'll see that the connection node has three nodes: `edges`, 74 | `pageInfo`, and `totalCount` 75 | 76 | - 77 | 78 | `edges` - a list of objects that each have a `node` and `cursor` attribute: 79 | 80 | : - `node` - the underlying node type, in our case `BillNode` 81 | - `cursor` - an opaque cursor for this particular item, it can 82 | be used with the `before` and `after` parameters each 83 | paginated node accepts as arguments. 84 | 85 | - 86 | 87 | `pageInfo` - a list of helpful pieces of information about this page: 88 | 89 | : - `hasNextPage` - boolean that is true if there is another 90 | page after this 91 | - `hasPreviousPage` - boolean that is true if there is a page 92 | before this 93 | - `endCursor` - last cursor in the set of edges, can be used 94 | with `after` to paginate forward 95 | - `startCursor` - first cursor in the set of edges, can be 96 | used with `before` to paginate backwards 97 | 98 | - `totalCount` - total number of objects available from this 99 | connection 100 | 101 | ### In Practice 102 | 103 | Let's say you want to get all of the people matching a given criteria: 104 | 105 | You'd start with a query for all people matching your criteria, 106 | ensuring to set the page size to no greater than the maximum: 107 | 108 | { 109 | people(memberOf: "Some Organization", first: 100) { 110 | edges { 111 | node { 112 | name 113 | } 114 | } 115 | pageInfo { 116 | hasNextPage 117 | endCursor 118 | } 119 | } 120 | } 121 | 122 | Let's say we got back a list of 100 edges and our `pageInfo` object 123 | looked like: 124 | 125 | { 126 | "hasNextPage": true, 127 | "endCursor": "ZXJyYXlxb20uZWN0aW9uOjA=" 128 | } 129 | 130 | So you'd make another call: 131 | 132 | { 133 | people(memberOf: "Some Organization", first: 100, after:"ZXJyYXlxb20uZWN0aW9uOjA=" ) { 134 | edges { 135 | node { 136 | name 137 | } 138 | } 139 | pageInfo { 140 | hasNextPage 141 | endCursor 142 | } 143 | } 144 | } 145 | 146 | And let's say in this case you got back only 75 edges, and our 147 | `pageInfo` object looks like: 148 | 149 | { 150 | "hasNextPage": false, 151 | "endCursor": "AXjYylxX2bu1wxa9uunnb=" 152 | } 153 | 154 | We'd stop iteration at this point, of course, if hasNextPage had been 155 | true, we'd continue on until it wasn't. 156 | 157 | ## Renaming fields 158 | 159 | A really useful trick that is often overlooked is that you can rename 160 | fields when retrieving them, for example: 161 | 162 | { 163 | republicans: people(memberOf: "Republican", first: 5) { 164 | edges { 165 | node { 166 | full_name: name 167 | } 168 | } 169 | } 170 | } 171 | 172 | Would give back: 173 | 174 | { 175 | "data": { 176 | "republicans": { 177 | "edges": [ 178 | { 179 | "node": { 180 | "full_name": "Michelle Udall" 181 | } 182 | }, 183 | { 184 | "node": { 185 | "full_name": "Kimberly Yee" 186 | } 187 | }, 188 | { 189 | "node": { 190 | "full_name": "Regina E. Cobb" 191 | } 192 | }, 193 | { 194 | "node": { 195 | "full_name": "Michelle B. Ugenti-Rita" 196 | } 197 | }, 198 | { 199 | "node": { 200 | "full_name": "David Livingston" 201 | } 202 | } 203 | ] 204 | } 205 | } 206 | } 207 | 208 | Note that we're both renaming a top-level node here as well as a piece 209 | of data within the query. 210 | 211 | You can also use this to query the same root node twice (doing so 212 | without renaming isn't allowed since it results in a name conflict). 213 | 214 | For example: 215 | 216 | { 217 | republicans: people(memberOf: "Republican", first: 5) { 218 | edges { 219 | node { 220 | full_name: name 221 | } 222 | } 223 | } 224 | democrats: people(memberOf: "Democratic", first: 5) { 225 | edges { 226 | node { 227 | full_name: name 228 | } 229 | } 230 | } 231 | } 232 | 233 | ## Fuzzy Date Format {#date-format} 234 | 235 | Unless otherwise noted (most notably `createdAt` and `updatedAt` all 236 | date objects are "fuzzy". Instead of being expressed as an exact date, 237 | it is possible a given date takes any of the following formats: 238 | 239 | - YYYY 240 | - YYYY-MM 241 | - YYYY-MM-DD 242 | - YYYY-MM-DD HH:MM:SS (if times are allowed) 243 | 244 | Action/Vote times are all assumed to be in the state capitol's time 245 | zone. 246 | 247 | Times related to our updates such as updatedAt and createdAt are in UTC. 248 | 249 | ## Name Matching 250 | 251 | In several places such as bill sponsorships and votes you'll notice 252 | that we have a raw string representing a person or organization as well 253 | as a place for a link to the appropriate `OrganizationNode` or `PersonNode`. 254 | 255 | Because of the way we collect the data from states, we always collect 256 | the raw data and later make an attempt to (via a mix of automated 257 | matching and manual fixes) connect the reference with data we've 258 | already collected. 259 | 260 | In many cases these linkages will not be provided, but with some 261 | upcoming new tools to help us improve this matching we'll be able to 262 | dramatically improve the number of matched entities in the near future. 263 | -------------------------------------------------------------------------------- /docs/api-v2/root-nodes.md: -------------------------------------------------------------------------------- 1 | # Root Nodes 2 | 3 | As seen in the introduction, when constructing a query you will start 4 | your query at one (or more) root nodes. The following root nodes are 5 | available: 6 | 7 | ## jurisdictions 8 | 9 | Get a list of all jurisdictions. 10 | 11 | This will return a list of `JurisdictionNode` objects, one for each state (plus Puerto Rico and DC). 12 | 13 | **Pagination**: This endpoint accepts the usual 14 | `pagination` parameters, but pagination is 15 | not required. 16 | 17 | ## people 18 | 19 | Get a list of all people matching certain criteria. 20 | 21 | This will return a list of `PersonNode` objects, one for each person matching your query. 22 | 23 | **Pagination**: This endpoint accepts the usual `pagination` parameters, and you must 24 | limit your results to no more than 100 using either the "first" or "last" parameter. 25 | 26 | ### Parameters 27 | 28 | `name` 29 | 30 | : Limit response to people who's name contains the provided string. 31 | 32 | Includes partial matches & case-insensitive matches. 33 | 34 | `memberOf` 35 | 36 | : Limit response to people that have a currently active membership 37 | record for an organization. The value passed to memberOf can be an 38 | ocd-organization ID or a name (e.g. 'Republican' or 'Nebraska Legislature'). 39 | 40 | `everMemberOf` 41 | 42 | : Limit response to people that have any recorded membership record 43 | for an organization. Operates as a superset of memberOf. 44 | 45 | Specifying `memberOf` and `everMemberOf` in the same query is 46 | invalid. 47 | 48 | `district` 49 | 50 | : When specifying either memberOf or everMemberOf, limits to people 51 | who's membership represented the district with a given label. (e.g. 52 | memberOf: "Nebraska Legislature", district: "7") 53 | 54 | Specifying `district` without `memberOf` or `everMemberOf` is 55 | invalid. 56 | 57 | `latitude` and `longitude` 58 | 59 | : Limit to people that are currently representing the district(s) 60 | containing the point specified by the provided coordinates. 61 | 62 | Must be specified together. 63 | 64 | ## bills 65 | 66 | Get a list of all bills matching certain criteria. 67 | 68 | This will return a list of `BillNode` 69 | objects, one for each person matching your query. 70 | 71 | **Pagination**: This endpoint accepts the usual 72 | `pagination` parameters, and you must limit your results to no more than 100 using either the "first" or "last" parameter. 73 | 74 | ### Parameters 75 | 76 | `jurisdiction` 77 | 78 | : Limit to bills associated with given jurisdiction, parameter can 79 | either be a human-readable jurisdiction name or an ocd-jurisdiction 80 | ID. 81 | 82 | `chamber` 83 | 84 | : Limit to bills originating in a given chamber. (e.g. upper, lower, 85 | legislature) 86 | 87 | `session` 88 | 89 | : Limit to bills originating in a given legislative session. This 90 | parameter should be the desired session's `identifier`. (See 91 | `LegislativeSessionNode`). 92 | 93 | `classification` 94 | 95 | : Limit to bills with a given classification (e.g. "bill" or 96 | "resolution") 97 | 98 | `subject` 99 | 100 | : Limit to bills with a given subject (e.g. "Agriculture") 101 | 102 | `searchQuery` 103 | 104 | : Limit to bills that contain a given term. (Experimental until 2020!) 105 | 106 | `updatedSince` 107 | 108 | : Limit to bills that have had data updated since a given time (UTC). 109 | 110 | Time should be in the format YYYY-MM-DD[THH:MM:SS]. 111 | 112 | `actionsSince` 113 | 114 | : Limit to bills that have had actions since a given time (UTC). 115 | 116 | Time should be in the format YYYY-MM-DD. 117 | 118 | ## jurisdiction 119 | 120 | Look up a single jurisdiction by name or ID. 121 | 122 | This will return a single `JurisdictionNode` object with the provided name or ID parameter. 123 | 124 | ### Parameters 125 | 126 | `name` 127 | 128 | : The human-readable name of the jurisdiction, such as 'New Hampshire'. 129 | 130 | `id` 131 | 132 | : The ocd-jurisdiction ID of the desired jurisdiction, such as 133 | 'ocd-jurisdiction/country:us/state:nh'. 134 | 135 | You are required to provide one of the two available parameters. 136 | 137 | ## person 138 | 139 | Look up a single person by ocd-person ID. 140 | 141 | This will return a single `PersonNode` by ID. 142 | 143 | ### Parameters 144 | 145 | `id` 146 | 147 | : ocd-person ID for the desired individual. 148 | 149 | ## organization 150 | 151 | Look up a single organization by ocd-organization ID. 152 | 153 | This will return a single `OrganizationNode` by ID. 154 | 155 | ### Parameters 156 | 157 | `id` 158 | 159 | : ocd-organization ID for the desired individual. 160 | 161 | ## bill 162 | 163 | Look up a single bill by ID, URL, or (jurisdiction, session, identifier) 164 | combo. 165 | 166 | This will return a single `BillNode` object with the specified bill. 167 | 168 | ### Parameters 169 | 170 | `id` 171 | 172 | : The ocd-bill ID of the desired bill, such as 173 | 'ocd-jurisdiction/country:us/state:nh'. 174 | 175 | `openstatesUrl` 176 | 177 | : The URL of the desired bill, such as 178 | ''. 179 | 180 | `jurisdiction`, `session`, `identifier` 181 | 182 | : Must be specified together to fully identify a bill. 183 | 184 | As is true elsewhere, jurisdiction may be specified by name (New 185 | Hampshire) or ocd-jurisdiction ID 186 | (ocd-jurisdiction/country:us/state:nh). 187 | 188 | Session is specified by legislative session identifier (e.g. 2018 or 189 | 49). 190 | 191 | Identifier is the exact identifier of the desired bill, such as "HB 327". 192 | 193 | You are required to provide one either `id` or the other parameters to 194 | fully specify a bill. Use `bills` if you are looking for something more 195 | broad. 196 | -------------------------------------------------------------------------------- /docs/api-v2/types.md: -------------------------------------------------------------------------------- 1 | # Data Types 2 | 3 | Starting at the base nodes, data in the API is represented as 4 | interconnected nodes of various types. This page provides an overview of 5 | the nodes. 6 | 7 | Another good way to get acquainted with the layout is to use the 8 | [GraphiQL browser](https://openstates.org/graphql) (click Docs in the 9 | upper right corner). 10 | 11 | ## Jurisdictions & Sessions 12 | 13 | ### JurisdictionNode 14 | 15 | A Jurisdiction is the [Open Civic Data](https://opencivicdata.org) term 16 | for the top level divisions of the US. Open States is comprised of 52 17 | jurisdictions, one for each state, and two more for D.C. and Puerto 18 | Rico. 19 | 20 | Each JurisdictionNode has the following attributes & nodes available: 21 | 22 | - `id` - ocd-jurisdiction identifier, these are permanent identifiers 23 | assigned to each Jurisdiction 24 | 25 | - `name` - human-readable name for the jurisdiction (e.g. Kansas) 26 | 27 | - `url` - URL of official website for jurisdiction 28 | 29 | - `featureFlags` - reserved for future use 30 | 31 | - `legislativeSessions` - Paginated list (see `pagination`) of 32 | [LegislativeSessionNode](#legislativesessionnode) belonging to this 33 | jurisdiction's legislature. 34 | 35 | - 36 | 37 | `organizations` - Paginated list of [OrganizationNode](#organizationnode) belonging to this jurisdiction. 38 | 39 | : - it is also possible to filter the list of children using the 40 | `classification` parameter 41 | 42 | - `lastScrapedAt` - Time when last scrape finished. 43 | 44 | See also: [Open Civic Data Jurisdiction 45 | reference](http://docs.opencivicdata.org/en/latest/data/jurisdiction.html) 46 | 47 | ### LegislativeSessionNode 48 | 49 | A legislative session is a convening of the legislature, either a 50 | primary or special session. 51 | 52 | Each LegislativeSessionNode has the following attributes and nodes 53 | available: 54 | 55 | - `jurisdiction` - [JurisdictionNode](#jurisdictionnode) which this 56 | session belongs to. 57 | - `identifier` - short identifier by which this session is referred to 58 | (e.g. 2017s1 or 121) 59 | - `name` - formal name of session (e.g. "2017 Special Session #1" 60 | or "121st Session" 61 | - `classification` - "primary" or "special" 62 | - `startDate` - start date of session if known 63 | - `endDate` - end date of session if known 64 | 65 | ### DivisionNode 66 | 67 | Divisions represent particular geopolitical boundaries. Divisions exist 68 | for states as well as their component districts and are tied closely to 69 | political geographies. 70 | 71 | - `id` - [Open Civic Data Division 72 | ID](http://docs.opencivicdata.org/en/latest/ocdids.html#division-ids) 73 | - `name` - human-readable name for the division 74 | - `redirect` - link to another DivisionNode, only present if division 75 | has been replaced 76 | - `country` - country code (will be "us") for all Open States 77 | divisions 78 | - `createdAt` - date at which this object was created in our system 79 | - `updatedAt` - date at which this object was last updated in our 80 | system 81 | - `extras` - JSON string with optional additional information about 82 | the object 83 | 84 | ## People & Organizations 85 | 86 | ### PersonNode 87 | 88 | People, typically legislators and their associated metadata. 89 | 90 | Note that most fields are optional beyond name as often we don't have a 91 | reliable given/family name or birthDate for instance. 92 | 93 | - `id` - [Open Civic Data Person 94 | ID](http://docs.opencivicdata.org/en/latest/ocdids.html) 95 | 96 | - `name` - primary name for the person 97 | 98 | - `sortName` - alternate name to sort by (if known) 99 | 100 | - `familyName` - hereditary name, essentially a "last name" (if 101 | known) 102 | 103 | - `givenName` - essentially a "first name" (if known) 104 | 105 | - `image` - full URL to official image of legislator 106 | 107 | - `birthDate` - see `date-format` 108 | 109 | - `deathDate` - see `date-format` 110 | 111 | - `identifiers` - list of other known identifiers, 112 | [IdentifierNode](#identifiernode) 113 | 114 | - `otherNames` - list of other known names, [NameNode](#namenode) 115 | 116 | - `links` - official URLs relating to this person, 117 | [LinkNode](#linknode) 118 | 119 | - `contactDetails` - ways to contact this person (via email, phone, 120 | etc.), [contactdetailnode](#contactdetailnode) 121 | 122 | - 123 | 124 | `currentMemberships` - currently active memberships [MembershipNode](#membershipnode) 125 | 126 | : - can be filtered with the `classification` parameter to only 127 | get memberships to certain types of 128 | [OrganizationNode](#organizationnode) 129 | 130 | - 131 | 132 | `oldMemberships` - inactive memberships [MembershipNode](#membershipnode) 133 | 134 | : - can be filtered with the `classification` parameter to only 135 | get memberships to certain types of 136 | [OrganizationNode](#organizationnode) 137 | 138 | - `sources` - URLs which were used in compiling Open States' information on this subject, [LinkNode] 139 | 140 | - `createdAt` - date at which this object was created in our system 141 | 142 | - `updatedAt` - date at which this object was last updated in our 143 | system 144 | 145 | - `extras` - JSON string with optional additional information about 146 | the object 147 | 148 | See also: 149 | 150 | - [Popolo's person](http://popoloproject.com/specs/person.html) 151 | - [Open Civic Data OCDEP 5](http://docs.opencivicdata.org/en/latest/proposals/0005.html) 152 | 153 | ### OrganizationNode 154 | 155 | Organizations that comprise the state legislatures and their associated 156 | metdata. 157 | 158 | A typical bicameral legislature is comprised of a top-level organization 159 | (classification=legislature), two chambers (classification=upper & 160 | lower), and any number of committees (classification=committee). 161 | 162 | Each Organization is comprised of the following attributes and nodes: 163 | 164 | - `id` - [Open Civic Data Organization ID](http://docs.opencivicdata.org/en/latest/ocdids.html) 165 | 166 | - `name` - primary name for the person 167 | 168 | - `image` - full URL to official image for organization 169 | 170 | - `classification` - the type of organization as described above 171 | 172 | - `foundingDate` - see `date-format` 173 | 174 | - `dissolutionDate` - see `date-format` 175 | 176 | - `parent` - parent OrganizationNode if one exists 177 | 178 | - 179 | 180 | `children` - paginated list of child OrganizationNode objects 181 | 182 | : - it is also possible to filter the list of children using the 183 | `classification` parameter 184 | 185 | - `currentMemberships` - list of all current members of this 186 | Organization 187 | 188 | - `identifiers` - list of other known identifiers for this 189 | organization, [IdentifierNode](#identifiernode) 190 | 191 | - `otherNames` - list of other known names for this organization, 192 | [NameNode](#namenode) 193 | 194 | - `links` - official URLs relating to this person, 195 | [LinkNode](#linknode) 196 | 197 | - `sources` - URLs which were used in compiling Open States' 198 | information on this subject, [LinkNode] 199 | 200 | - `createdAt` - date at which this object was created in our system 201 | 202 | - `updatedAt` - date at which this object was last updated in our 203 | system 204 | 205 | - `extras` - JSON string with optional additional information about 206 | the object 207 | 208 | See also: 209 | 210 | - [Popolo's 211 | organization](http://popoloproject.com/specs/organization.html) 212 | - [Open Civic Data OCDEP 213 | 5](http://docs.opencivicdata.org/en/latest/proposals/0005.html) 214 | 215 | ### MembershipNode 216 | 217 | A MembershipNode represents a connection between a 218 | [personnode](#personnode) and a [organizationnode](#organizationnode). A 219 | membership may optionally also reference a particular 220 | [postnode](#postnode), such as a particular seat within a given chamber. 221 | 222 | Each membership has the following attributes and nodes: 223 | 224 | - `id` - [Open Civic Data Membership 225 | ID](http://docs.opencivicdata.org/en/latest/ocdids.html) 226 | - `personName` the raw name of the person that the membership 227 | describes (see `name-matching` 228 | - `person` - [personnode](#personnode) 229 | - `organization` - [organizationnode](#organizationnode) 230 | - `post` - [postnode](#postnode) 231 | - `label` - label assigned to this membership 232 | - `role` - role fulfilled by this membership 233 | - `startDate` - start date of membership if known 234 | - `endDate` - end date of membership if known 235 | - `createdAt` - date at which this object was created in our system 236 | - `updatedAt` - date at which this object was last updated in our 237 | system 238 | - `extras` - JSON string with optional additional information about 239 | the object 240 | 241 | See also: 242 | 243 | - [Popolo's 244 | membership](http://popoloproject.com/specs/membership.html) 245 | - [Open Civic Data OCDEP 246 | 5](http://docs.opencivicdata.org/en/latest/proposals/0005.html) 247 | 248 | ### PostNode 249 | 250 | A PostNode represents a given position within an organization. The most 251 | common example would be a seat such as Maryland's 4th House Seat. 252 | 253 | It is worth noting that some seats can have multiple active memberships 254 | at once, as noted in `maximumMemberships`. 255 | 256 | Each post has the following attributes and nodes: 257 | 258 | - `id` - [Open Civic Data Post 259 | ID](http://docs.opencivicdata.org/en/latest/ocdids.html) 260 | - `label` - label assigned to this post (e.g. 3) 261 | - `role` - role fulfilled by this membership (e.g. 'member') 262 | - `division` - related [divisionnode](#divisionnode) if this role has 263 | a relevant division 264 | - `startDate` - start date of membership if known 265 | - `endDate` - end date of membership if known 266 | - `maximumMemberships` - typically 1, but set higher in the case of 267 | multi-member districts 268 | - `createdAt` - date at which this object was created in our system 269 | - `updatedAt` - date at which this object was last updated in our 270 | system 271 | - `extras` - JSON string with optional additional information about 272 | the object 273 | 274 | See also: 275 | 276 | - [Popolo's post](http://popoloproject.com/specs/post.html) 277 | - [Open Civic Data OCDEP 278 | 5](http://docs.opencivicdata.org/en/latest/proposals/0005.html) 279 | 280 | ## Bills & Votes 281 | 282 | ### BillNode 283 | 284 | A BillNode represents any legislative instrument such as a bill or 285 | resolution. 286 | 287 | Each node has the following attributes and nodes available: 288 | 289 | - `id` - Internal ocd-bill identifier for this bill. 290 | - `legislativeSession` - link to 291 | [LegislativeSessionNode](#legislativesessionnode) this bill is from 292 | - `identifier` - primary identifier for this bill (e.g. HB 264) 293 | - `title` - primary title for this bill 294 | - `fromOrganization` - organization (typically upper or lower chamber) 295 | primarily associated with this bill 296 | - `classification` - list of one or more bill types such as "bill" 297 | or "resolution" 298 | - `subject` - list of zero or more subjects assigned by the state 299 | - `abstracts` - list of abstracts provided by the state, 300 | [BillAbstractNode](#billabstractnode) 301 | - `otherTitles` - list of other titles provided by the state, 302 | [BillTitleNode](#billtitlenode) 303 | - `otherIdentifiers` - list of other identifiers provided by the 304 | state, [BillIdentifierNode](#billidentifiernode) 305 | - `actions` - list of actions (such as introduction, amendment, 306 | passage, etc.) that have been taken on the bill, 307 | [BillActionNode](#billactionnode) 308 | - `sponsorships` - list of bill sponsors, 309 | [BillSponsorshipNode](#billsponsorshipnode) 310 | - `relatedBills` - list of related bills as provided by the state, 311 | [RelatedBillNode](#relatedbillnode) 312 | - `versions` - list of bill versions as provided by the state, 313 | [BillDocumentNode](#billdocumentnode) 314 | - `documents` - list of related documents (e.g. legal analysis, fiscal 315 | notes, etc.) as provided by the state, 316 | [BillDocumentNode](#billdocumentnode) 317 | - `votes` - paginated list of [VoteEventNode](#voteeventnode) related 318 | to the bill 319 | - `sources` - URLs which were used in compiling Open States' 320 | information on this subject, [linknode](#linknode) 321 | - `openstatesUrl` - URL to bill page on OpenStates.org 322 | - `createdAt` - date at which this object was created in our system 323 | - `updatedAt` - date at which this object was last updated in our 324 | system 325 | - `extras` - JSON string with optional additional information about 326 | the object 327 | 328 | ### BillAbstractNode 329 | 330 | Represents an official abstract for a bill, each BillAbstractNode has 331 | the following attributes: 332 | 333 | - `abstract` - the abstract itself 334 | - `note` - optional note about origin/purpose of abstract 335 | - `date` - optional date associated with abstract 336 | 337 | ### BillTitleNode 338 | 339 | Represents an alternate title for a bill, each BillTitleNode has the 340 | following attributes: 341 | 342 | - `title` - the alternate title 343 | - `note` - optional note about origin/purpose of this title 344 | 345 | ### BillIdentifierNode 346 | 347 | Represents an alternate identifier for a bill, each BillIdentifierNode 348 | has the following attributes: 349 | 350 | - `identifier` - the alternate identifier 351 | - `scheme` - a name for the identifier scheme 352 | - `note` - optional note about origin/purpose of this identifier 353 | 354 | ### BillActionNode 355 | 356 | Represents an action taken on a bill, each BillActionNode has the 357 | following attributes and nodes: 358 | 359 | - `organization` - [OrganizationNode](#organizationnode) where this 360 | action originated, will typically be either upper or lower chamber, 361 | or perhaps legislature as a whole. 362 | - `description` - text describing the action as provided by the 363 | jurisdiction. 364 | - `date` - date action took place (see `date-format`) 365 | - `classification` - list of zero or more normalized action types (see 366 | `action-categorization`) 367 | - `order` - integer by which actions can be sorted, not intended for 368 | display purposes 369 | - `extras` - JSON string providing extra information about this action 370 | - `vote` - if there is a known associated vote, pointer to the 371 | relevant [VoteEventNode](#voteeventnode) 372 | - `relatedEntities` - a list of 373 | [RelatedEntityNode](#relatedentitynode) with known entities 374 | referenced in this action 375 | 376 | ### RelatedEntityNode 377 | 378 | Represents an entity that is related to a 379 | [BillActionNode](#billactionnode). 380 | 381 | - `name` - raw (source-provided) name of entity 382 | - `entityType` - either organization or person 383 | - `organization` - if `entityType` is 'organization', the resolved 384 | [OrganizationNode](#organizationnode) 385 | - `person` - if `entityType` is 'person', the resolved 386 | [PersonNode](#personnode) 387 | 388 | See `name-matching` for details on how `name` relates to `organiation` and `person`. 389 | 390 | ### BillSponsorshipNode 391 | 392 | Represents a sponsor of a bill. 393 | 394 | - `name` - raw (source-provided) name of sponsoring person or 395 | organization 396 | - `entityType` - either organization or person 397 | - `organization` - if `entityType` is 'organization', the resolved 398 | [OrganizationNode](#organizationnode) 399 | - `person` - if `entityType` is 'person', the resolved 400 | [PersonNode](#personnode) 401 | - `primary` - boolean, true if sponsorship is considered by the 402 | jurisdiction to be "primary" (note: in many states multiple 403 | primary sponsors may exist) 404 | - `classification` - jurisdiction-provided type of sponsorship, such 405 | as "author" or "cosponsor". These meanings typically vary across 406 | states, which is why we provide `primary` as a sort of indicator of 407 | the degree of sponsorship indicated. 408 | 409 | See `name-matching` for details on how `name` relates to `organiation` and `person`. 410 | 411 | ### RelatedBillNode 412 | 413 | Represents relationships between bills. 414 | 415 | - `identifier` - identifier of related bill (e.g. SB 401) 416 | - `legislativeSession` - identifier of related session (in same jurisdiction) 417 | - `relationType` - type of relationship such as "companion", "prior-session", "replaced-by", or "replaces" 418 | - `relatedBill` - if the related bill is found to exist in our data, link to the [BillNode](#billnode) 419 | 420 | ### BillDocumentNode 421 | 422 | Representation of `documents` and `versions` on bills. A given document 423 | can have multiple links representing different manifestations (e.g. 424 | HTML, PDF, DOC) of the same content. 425 | 426 | - `note` - note describing the purpose of the document or version 427 | (e.g. Final Printing) 428 | - `date` - optional date associated with the document 429 | - `links` - list of one or more `MimetypeLinkNode` with actual URLs to 430 | bills. 431 | 432 | ### MimetypeLinkNode 433 | 434 | Represents a single manifestation of a particular document. 435 | 436 | - `mediaType` - media type (aka MIME type) such as application/pdf or 437 | text/html 438 | - `url` - URL to official copy of the bill 439 | - `text` - text describing this particular manifestation (e.g. PDF) 440 | 441 | ### VoteEventNode 442 | 443 | Represents a vote taken on a bill. 444 | 445 | - `id` - Internal ocd-vote identifier for this bill. 446 | - `identifier` - Identifier used by jurisdiction to uniquely identify 447 | the vote. 448 | - `motionText` - Text of the motion being voted upon, such as "motion 449 | to pass the bill as amended." 450 | - `motionClassification` - List with zero or more classifications for 451 | this motion, such as "passage" or "veto-override" 452 | - `startDate` - Date on which the vote took place. (see 453 | `date-format` 454 | - `result` - Outcome of the vote, 'pass' or 'fail'. 455 | - `organization` - Related [OrganizationNode](#organizationnode) where 456 | vote took place. 457 | - `billAction` - Optional linked [BillActionNode](#billactionnode). 458 | - `votes` - List of [PersonVoteNode](#personvotenode) for each 459 | individual's recorded vote. (May not be present depending on 460 | jurisdiction.) 461 | - `counts` - List of [VoteCountNode](#votecountnode) with sums of each 462 | outcome (e.g. yea/nay/abstain). 463 | - `sources` - URLs which were used in compiling Open States' 464 | information on this subject, [LinkNode] 465 | - `createdAt` - date at which this object was created in our system 466 | - `updatedAt` - date at which this object was last updated in our 467 | system 468 | - `extras` - JSON string with optional additional information about 469 | the object 470 | 471 | See also: [Open Civic Data vote 472 | format](http://docs.opencivicdata.org/en/latest/data/vote.html). 473 | 474 | ### PersonVoteNode 475 | 476 | Represents an individual person's vote (e.g. yea or nay) on a given 477 | bill. 478 | 479 | - `option` - Option chosen by this individual. (yea, nay, abstain, 480 | other, etc.) 481 | - `voterName` - Raw name of voter as provided by jurisdiction. 482 | - `voter` - Resolved [PersonNode](#personnode) representing voter. 483 | (See `name-matching` 484 | - `note` - Note attached to this vote, sometimes used for explaining 485 | an "other" vote. 486 | 487 | ### VoteCountNode 488 | 489 | Represents the sum of votes for a given `option`. 490 | 491 | - `option` - Option in question. (yea, nay, abstain, other, etc.) 492 | - `value` - Number of individuals voting this way. 493 | 494 | ## Other Nodes 495 | 496 | ### IdentifierNode 497 | 498 | Represents an alternate identifier, each with the following attributes: 499 | 500 | - `identifier` - the alternate identifier 501 | - `scheme` - a name for the identifier scheme 502 | 503 | ### NameNode 504 | 505 | Represents an alterante name, each with the following attributes: 506 | 507 | - `name` - the alternate name 508 | - `note` - note about usage/origin of this alternate name 509 | - `startDate` - date at which this name began being valid (blank if 510 | unknown) 511 | - `endDate` - date at which this name stopped being valid (blank if 512 | unknown or still active) 513 | 514 | ### LinkNode 515 | 516 | Represents a single link associated with a person or used as a source. 517 | 518 | - `url` - URL 519 | - `text` - text describing the use of this particular URL 520 | 521 | ### ContactDetailNode 522 | 523 | Used to represent a contact method for a given person. 524 | 525 | - `type` - type of contact detail (e.g. voice, email, address, etc.) 526 | - `value` - actual phone number, email address, etc. 527 | - `note` - used to group contact data by location (e.g. Home Address, 528 | Office Address) 529 | - `label` - human-readable label for this contact detail 530 | -------------------------------------------------------------------------------- /docs/api-v3/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 2021.11.12 4 | 5 | - add LegislativeSession.downloads 6 | 7 | ## 2021.10.27 8 | 9 | - add Bill.related_bills include 10 | 11 | ## 2021.10.12 12 | 13 | - added classification field on Person.offices 14 | - Person.offices now always have all fields present, even if empty 15 | 16 | ## 2021.09.24 17 | 18 | - added experimental event endpoints 19 | 20 | ## 2021.08.30 21 | 22 | - added identifiers parameter to bill search 23 | 24 | ## 2021.08.03 25 | 26 | - added latest_bill_update & latest_people_update to API v3 27 | 28 | ## 2021.08.02 29 | 30 | - addded experimental committee endpoints 31 | 32 | ## 2021.04.19 33 | 34 | - experimental support for Jurisdiction.latest_runs include parameter 35 | 36 | ## 2021.03.24 37 | 38 | - Jurisdiction classification can now be \'country\' 39 | - experimental US federal support 40 | 41 | ## 2021.02.23 42 | 43 | - people district= parameter added 44 | 45 | ## 2020.12.21 46 | 47 | - people name= parameter is now fuzzy-searched, matching API v1 and v2 48 | 49 | ## 2020.10.30 50 | 51 | - move email to a top-level field on Person responses 52 | - add consistent post ordering 53 | 54 | ## 2020.10.21 55 | 56 | - fix datetime handling for updated_since and created_since filters 57 | - add list of divisions when including Jurisdiction.organizations 58 | 59 | ## 2020.10.13 60 | 61 | - add updated_asc sort option 62 | - add rate limiting 63 | - bugfix for New York jurisdiction lookup (openstates/issues\#136) 64 | 65 | ## 2020.09.28 66 | 67 | - set permissive CORS settings 68 | - bills endpoint updates: 69 | - added created_since filter, thanks to Donald Wasserman! 70 | - added sponsor and sponsor_classification filters 71 | - added sort parameter 72 | - added useful error message when searching /bills by session 73 | without jurisdiction 74 | - restored missing Bill.from_organization field 75 | - introduced new fields: Person.openstates_url, Bill.openstates_url 76 | 77 | ## 2020.09.14 78 | 79 | - removed some unused fields from responses 80 | - removed deprecated government classification from Jurisdiction 81 | 82 | ## 2020.09.10 83 | 84 | - added Jurisdiction.legislative_sessions 85 | - corrected initial pagination limits for release 86 | 87 | ## 2020.09.09 88 | 89 | - Initial beta release. 90 | -------------------------------------------------------------------------------- /docs/api-v3/index.md: -------------------------------------------------------------------------------- 1 | # API v3 Overview 2 | 3 | Open States provides a JSON API that can be used to programatically 4 | access state legislative information. 5 | 6 | ## API Basics 7 | 8 | The root URL for the API is . 9 | 10 | API keys are required. You can [register for an API 11 | key](https://open.pluralpolicy.com/accounts/profile/) and once activated, 12 | you\'ll pass your API key via the `X-API-KEY` header or `?apikey` query 13 | parameter. 14 | 15 | Auto-generated interactive documentation is available at either: 16 | 17 | > - 18 | > - (whichever you prefer) 19 | 20 | Issues should be filed at [our issue 21 | tracker](https://github.com/openstates/issues/issues). 22 | 23 | You can also check out our [introductory blog 24 | post](https://blog.openstates.org/open-states-api-v3-beta/) for more 25 | details. 26 | 27 | ## Methods 28 | 29 | Method |Description |Interactive Docs 30 | --------------------------------------|------------------------------------------------------|---------------------------------------------------------------------------------------------------------------- 31 | /jurisdictions |Get list of available jurisdictions. |[try it!](https://v3.openstates.org/docs#/jurisdictions/jurisdiction_list_jurisdictions_get) 32 | /jurisdictions/{jurisdiction_id} |Get detailed metadata for a particular jurisdiction. |[try it!](https://v3.openstates.org/docs#/jurisdictions/jurisdiction_detail_jurisdictions__jurisdiction_id__get) 33 | /people |List or search people (legislators, governors, etc.) |[try it!](https://v3.openstates.org/docs#/people/people_search_people_get) 34 | /people.geo |Get legislators for a given location. |[try it!](https://v3.openstates.org/docs#/people/people_geo_people_geo_get) 35 | /bills |Search bills by various criteria. |[try it!](https://v3.openstates.org/docs#/bills/bills_search_bills_get) 36 | /bills/ocd-bill/{uuid} |Get bill by internal ID. |[try it!](https://v3.openstates.org/docs#/bills/bill_detail_by_id_bills_ocd_bill__openstates_bill_id__get) 37 | /bills/{jurisdiction}/{session}/{id} |Get bill by jurisdiction, session, and ID. |[try it!](https://v3.openstates.org/docs#/bills/bill_detail_bills__jurisdiction___session___bill_id__get) 38 | /committees |Get list of committees by jurisdiction. |[try it!](https://v3.openstates.org/docs#/committees/committee_list_committees_get) 39 | /committees/{committee_id} |Get details on committee by internal ID. |[try it!](https://v3.openstates.org/docs#/committees/committee_detail_committees__committee_id__get) 40 | /events |Get list of events by jurisdiction. |[try it!](https://v3.openstates.org/docs#/events/event_list_events_get) 41 | /events/{event_id} |Get details on event by internal ID. |[try it!](https://v3.openstates.org/docs#/events/event_detail_events__event_id__get) 42 | 43 | 44 | ## Concepts 45 | 46 | **Jurisdiction** 47 | 48 | : The fundamental unit by which data is partitioned is the 49 | \'jurisdiction.\' If you are just interested in states you can 50 | consider the words synonymous for the most part. Jurisdictions 51 | include states, DC & Puerto Rico, and municipal governments for 52 | which we have limited support. 53 | 54 | **Person** 55 | 56 | : A legislator, governor, mayor, etc. 57 | 58 | Each person possibly has a number of roles, at most one of which is 59 | considered \'current.\' 60 | 61 | **Bill** 62 | 63 | : A proposed piece of legislation, encompasses bills, resolutions, 64 | constitutional amendments, etc. 65 | 66 | A given bill may have any number of votes, sponsorships, actions, 67 | etc. 68 | -------------------------------------------------------------------------------- /docs/assets/extra.css: -------------------------------------------------------------------------------- 1 | /* Indentation. */ 2 | div.doc-contents:not(.first) { 3 | padding-left: 25px; 4 | border-left: 4px solid rgba(230, 230, 230); 5 | margin-bottom: 80px; 6 | } 7 | -------------------------------------------------------------------------------- /docs/assets/openstates.svg: -------------------------------------------------------------------------------- 1 | 2 | Open States Icon 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /docs/code-of-conduct.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in 6 | our community a harassment-free experience for everyone, regardless of 7 | age, body size, visible or invisible disability, ethnicity, sex 8 | characteristics, gender identity and expression, level of experience, 9 | education, socio-economic status, nationality, personal appearance, 10 | race, religion, or sexual identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, 13 | welcoming, diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | - Demonstrating empathy and kindness toward other people 21 | - Being respectful of differing opinions, viewpoints, and experiences 22 | - Giving and gracefully accepting constructive feedback 23 | - Accepting responsibility and apologizing to those affected by our 24 | mistakes, and learning from the experience 25 | - Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | - The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | - Trolling, insulting or derogatory comments, and personal or 33 | political attacks 34 | - Public or private harassment 35 | - Publishing others' private information, such as a physical or email 36 | address, without their explicit permission 37 | - Other conduct which could reasonably be considered inappropriate in 38 | a professional setting 39 | 40 | ## Enforcement Responsibilities 41 | 42 | Community leaders are responsible for clarifying and enforcing our 43 | standards of acceptable behavior and will take appropriate and fair 44 | corrective action in response to any behavior that they deem 45 | inappropriate, threatening, offensive, or harmful. 46 | 47 | Community leaders have the right and responsibility to remove, edit, or 48 | reject comments, commits, code, wiki edits, issues, and other 49 | contributions that are not aligned to this Code of Conduct, and will 50 | communicate reasons for moderation decisions when appropriate. 51 | 52 | ## Scope 53 | 54 | This Code of Conduct applies within all community spaces, and also 55 | applies when an individual is officially representing the community in 56 | public spaces. Examples of representing our community include using an 57 | official e-mail address, posting via an official social media account, 58 | or acting as an appointed representative at an online or offline event. 59 | 60 | ## Enforcement 61 | 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may 63 | be reported to the community leaders responsible for enforcement at 64 | . All complaints will be reviewed and 65 | investigated promptly and fairly. 66 | 67 | All community leaders are obligated to respect the privacy and security 68 | of the reporter of any incident. 69 | 70 | ## Enforcement Guidelines 71 | 72 | Community leaders will follow these Community Impact Guidelines in 73 | determining the consequences for any action they deem in violation of 74 | this Code of Conduct: 75 | 76 | ### 1. Correction 77 | 78 | **Community Impact**: Use of inappropriate language or other behavior 79 | deemed unprofessional or unwelcome in the community. 80 | 81 | **Consequence**: A private, written warning from community leaders, 82 | providing clarity around the nature of the violation and an explanation 83 | of why the behavior was inappropriate. A public apology may be 84 | requested. 85 | 86 | ### 2. Warning 87 | 88 | **Community Impact**: A violation through a single incident or series of 89 | actions. 90 | 91 | **Consequence**: A warning with consequences for continued behavior. No 92 | interaction with the people involved, including unsolicited interaction 93 | with those enforcing the Code of Conduct, for a specified period of 94 | time. This includes avoiding interactions in community spaces as well as 95 | external channels like social media. Violating these terms may lead to a 96 | temporary or permanent ban. 97 | 98 | ### 3. Temporary Ban 99 | 100 | **Community Impact**: A serious violation of community standards, 101 | including sustained inappropriate behavior. 102 | 103 | **Consequence**: A temporary ban from any sort of interaction or public 104 | communication with the community for a specified period of time. No 105 | public or private interaction with the people involved, including 106 | unsolicited interaction with those enforcing the Code of Conduct, is 107 | allowed during this period. Violating these terms may lead to a 108 | permanent ban. 109 | 110 | ### 4. Permanent Ban 111 | 112 | **Community Impact**: Demonstrating a pattern of violation of community 113 | standards, including sustained inappropriate behavior, harassment of an 114 | individual, or aggression toward or disparagement of classes of 115 | individuals. 116 | 117 | **Consequence**: A permanent ban from any sort of public interaction 118 | within the project community. 119 | 120 | ## Attribution 121 | 122 | This Code of Conduct is adapted from the [Contributor 123 | Covenant](https://www.contributor-covenant.org), version 2.0, available 124 | at 125 | . 126 | 127 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 128 | enforcement ladder](https://github.com/mozilla/diversity). 129 | 130 | For answers to common questions about this code of conduct, see the FAQ 131 | at . Translations are 132 | available at . 133 | -------------------------------------------------------------------------------- /docs/contributing/documentation.md: -------------------------------------------------------------------------------- 1 | # Documentation 2 | 3 | If you notice any issues on these docs, or just want to help improve them 4 | the source is in the 5 | [openstates/documentation](https://github.com/openstates/documentation) 6 | repository. 7 | 8 | ## Checking out 9 | 10 | Fork and clone the documentation repository: 11 | 12 | - Visit and click the 13 | 'Fork' button. 14 | 15 | - Clone your fork using your tool of choice or the command line: 16 | 17 | $ git clone git@github.com:yourname/documentation.git 18 | Cloning into 'documentation'.. 19 | 20 | ## Building Docs Locally 21 | 22 | Step 1) Install poetry if you haven't already 23 | () 24 | 25 | Step 2) Run `poetry install` to build virtualenv. 26 | 27 | Step 3) Run `poetry run mkdocs serve` to preview changes in browser. 28 | -------------------------------------------------------------------------------- /docs/contributing/images/committee_data_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openstates/documentation/8577d416d42a1d4384d4a5cd1e17a9d4955df1d0/docs/contributing/images/committee_data_example.png -------------------------------------------------------------------------------- /docs/contributing/images/selector_ex1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openstates/documentation/8577d416d42a1d4384d4a5cd1e17a9d4955df1d0/docs/contributing/images/selector_ex1.png -------------------------------------------------------------------------------- /docs/contributing/images/selector_ex2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openstates/documentation/8577d416d42a1d4384d4a5cd1e17a9d4955df1d0/docs/contributing/images/selector_ex2.png -------------------------------------------------------------------------------- /docs/contributing/images/selector_ex3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openstates/documentation/8577d416d42a1d4384d4a5cd1e17a9d4955df1d0/docs/contributing/images/selector_ex3.png -------------------------------------------------------------------------------- /docs/contributing/index.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | We're glad to have you joining us, taking a few minutes to read the 4 | following pages will help you be a better member of our community: 5 | 6 | - Our [Code of Conduct](../code-of-conduct.md) is important to us, and helps us maintain a healthy community. 7 | - We also have a [guide to help you learn where to get help](../index.md#communication) that you should look over. 8 | 9 | If you are new to open source, or unfamiliar to contributing to open source projects, it might be beneficial to read [this guide](https://opensource.guide/how-to-contribute/). We welcome and value all contributions to Open States, including (but not limited to!) code contributions. 10 | 11 | This guide assumes a basic familiarity with using the [command line](https://www.freecodecamp.org/news/command-line-for-beginners/), [git](https://www.freecodecamp.org/news/git-and-github-for-beginners/), and Python. If you are unfamiliar with something that is mentioned in this guide, we encourage you to read the linked resources. 12 | 13 | No matter how experienced you are, it is a good idea to read through this section before diving into Open States' code. 14 | 15 | No worries if you aren't an expert though, we'll walk you through the 16 | steps. And as for Python, if you've written other languages like 17 | Javascript or Ruby you'll probably be just fine. Here's a [great guide](https://realpython.com/intro-to-pyenv/) on 18 | getting started installing and managing Python versions. 19 | 20 | Don't be afraid to [ask for help](../index.md#communication) either! 21 | 22 | 23 | ## Project Overview 24 | 25 | Open States is a fairly large & somewhat complex project comprised of many moving parts with a long history. 26 | 27 | As you look to contribute, it may be beneficial to understand a little bit about the various components. 28 | 29 | These repositories make up the core of the project, if you're looking to contribute there's a 95% chance one of these is what you want. 30 | 31 | - [openstates-scrapers](https://github.com/openstates/openstates-scrapers) - Open States' scrapers. All code related to getting information from a website and storing it in the Open States database lives here. [What is a scraper?](https://realpython.com/python-web-scraping-practical-introduction/) 32 | - [people](https://github.com/openstates/people) - Open States people & committee data, maintained as editable YAML files. 33 | - [openstates-core](https://github.com/openstates/openstates-core) - Open States data model & scraper backend. 34 | - [openstates.org](https://github.com/openstates/openstates.org) - Powers [OpenStates.org](https://openstates.org/) website & GraphQL API. 35 | - [api-v3](https://github.com/openstates/api-v3) - Powers [API v3](https://v3.openstates.org). 36 | - [documentation](https://github.com/openstates/documentation) - [you're reading it now](https://docs.openstates.org/). 37 | 38 | 39 | ## Installing Prerequisites 40 | 41 | ### poetry 42 | 43 | If you're working on the `people` repo, `api-v3`, or want to work on scrapers without [Docker](https://docker-curriculum.com/), you'll need `poetry` to build your [Python virtual environment](https://realpython.com/python-virtual-environments-a-primer/). 44 | 45 | !!! note 46 | 47 | If you haven't used `poetry` before, it is similar to `pipenv`, `pip`, and `conda` in that it manages a Python virtualenv on your behalf. 48 | 49 | **Installing Poetry** 50 | 51 | The [official poetry docs](https://python-poetry.org/docs/master/#installation) recommend installing with: 52 | 53 | curl -sSL https://install.python-poetry.org | python3 - 54 | 55 | Then within each repo you check out, be sure to run: 56 | 57 | poetry install 58 | 59 | Which will fetch the correct version of dependencies. 60 | 61 | ### docker & docker-compose 62 | 63 | When working on scrapers or openstates.org, you have the option to use Docker. 64 | 65 | The first thing you will need to do is get a working docker environment 66 | on your local machine. We'll do this using Docker. No worries if you 67 | aren't familiar with Docker, you'll barely have to touch it beyond 68 | what this guide explains. 69 | 70 | Install Docker and docker-compose (if not already installed on your local system): 71 | 72 | **(a)** Installing Docker: 73 | 74 | - On OSX: [Docker for Mac](https://docs.docker.com/docker-for-mac/) 75 | - On Windows: [Docker for Windows](https://docs.docker.com/docker-for-windows/) 76 | - On Linux: Use your package manager of choice or [follow Docker's instructions](https://docs.docker.com/engine/installation/linux/). 77 | 78 | (*Docker Compose is probably already installed by step 1(a) if not, proceed to step 1(b)*) 79 | 80 | **(b)** Installing docker-compose: 81 | 82 | - For easy installation on [macOS, Windows, and 64-bit Linux.](https://docs.docker.com/compose/install/#prerequisites) 83 | 84 | Ensure that Docker and docker-compose are installed locally: 85 | 86 | $ docker --version 87 | Docker version 19.03.4, build 9013bf5 88 | $ docker-compose --version 89 | docker-compose version 1.24.1, build 4667896b 90 | 91 | Of course, your versions will differ, but ensure they are relatively 92 | recent to avoid strange issues. 93 | 94 | ### pre-commit 95 | 96 | To help keep the code as managable as possible we **strongly recommend** 97 | you use pre-commit to make sure all commits adhere to our preferred 98 | style. 99 | 100 | - See [pre-commit's installation instructions](https://pre-commit.com/#installation) 101 | 102 | - Within each repo you check out, run `pre-commit install` after checking out. It should look something like: 103 | 104 | $ pre-commit install 105 | pre-commit installed at .git/hooks/pre-commit 106 | 107 | !!! note 108 | 109 | If you're running `flake8` and `black` yourself via your editor or 110 | similar this isn't strictly necessary, but we find it helps ensure 111 | commits don't fail linting. **We require all PRs to pass linting!** 112 | [What is linting?](https://www.freecodecamp.org/news/what-is-linting-and-how-can-it-save-you-time/) 113 | 114 | ## Recent Major Work 115 | 116 | To give a sense of recent priorities, here are major milestones from the 117 | past few years: 118 | 119 | - [Federal Data & Committee Data](https://blog.openstates.org/open-states-2021-q2/) - 2021 120 | - [API v3](https://blog.openstates.org/open-states-api-v3/) - Q3 2020 121 | - [Legislation Tracking](https://blog.openstates.org/tracking-legislation-on-open-states/) - Q1 2020 122 | - **Restoration of Historical Legislator Data** - Q4 2019 123 | - [Full Text Search](https://blog.openstates.org/adding-full-text-search-to-open-states-14b665c1fe30/) - Q4 2019 124 | - **2019 Legislative Session Updates** - Q1 2019 125 | - [OpenStates.org 2019 rewrite](https://blog.openstates.org/introducing-the-new-openstates-org-64bcbd765f58/) - Q1 2019 126 | - [OpenStates GraphQL API](https://blog.openstates.org/more-ways-to-get-state-legislative-data-d9aece2245f0/) - Q4 2018 127 | - **Scraper Overhaul** - Throughout much of 2017 we reworked our 128 | scrapers to be more resilient and to use an updated tech stack, 129 | replacing the one that powered the site from 2011-2016. 130 | -------------------------------------------------------------------------------- /docs/contributing/local-database.md: -------------------------------------------------------------------------------- 1 | # Running a Local Database 2 | 3 | If you want to ensure your scraped data imports or work on OpenStates.org or API v3, you'll need a local database. 4 | 5 | This can be a bit cumbersome, since running Postgres locally varies a lot platform-to-platform, and you'll need to populate it as well. 6 | 7 | If you're comfortable with Postgres, most of these steps can be easily modified to use your own Postgres instance, but for the remainder of this guide we'll be using a dockerized postgres image. 8 | 9 | ## Prerequisites 10 | 11 | Be sure you've already installed `docker` and `docker-compose`, as noted in [Installing Prerequisites](index.md#installing-prerequisites). 12 | 13 | You'll need [openstates-scrapers](https://github.com/openstates/openstates-scrapers) checked out, even if you aren't working on scrapers. This repository has the `docker-compose.yml` config and initialization scripts for the database. 14 | 15 | If you want to initialize the database for [openstates.org](https://github.com/openstates/openstates.org) work you'll need that project checked out as well. 16 | 17 | ## Initialize Database For Scraping 18 | 19 | 0. Run `init-db.sh` from within the `openstates-scrapers` directory: 20 | 21 | !!! warning 22 | 23 | If you've already run this before, running `scripts/init-db.sh` will reset your database to scratch! 24 | 25 | ``` console 26 | openstates-scrapers/$ ./scripts/init-db.sh 27 | + unset DATABASE_URL 28 | + docker-compose down 29 | Removing scrapers_db_1 ... done 30 | Removing network openstates-network 31 | + docker volume rm openstates-postgres 32 | openstates-postgres 33 | + docker-compose up -d db 34 | Creating network "openstates-network" with the default driver 35 | Creating volume "openstates-postgres" with default driver 36 | Creating scrapers_db_1 ... done 37 | + sleep 3 38 | + DATABASE_URL=postgis://openstates:openstates@db/openstatesorg 39 | + docker-compose run --rm --entrypoint 'poetry run os-initdb' scrape 40 | Creating scrapers_scrape_run ... done 41 | Operations to perform: 42 | Apply all migrations: contenttypes, data 43 | Running migrations: 44 | Applying contenttypes.0001_initial... OK 45 | Applying contenttypes.0002_remove_content_type_name... OK 46 | Applying data.0001_initial... OK 47 | Applying data.0002_auto_20200422_0028... OK 48 | Applying data.0003_auto_20200422_0031... OK 49 | 50 | ...TRUNCATED... 51 | 52 | loading WY 53 | loading DC 54 | loading PR 55 | loading US 56 | ``` 57 | 58 | This will populate your database with the tables needed for scraping, as well as some basic static data such as the jurisdiction metadata. If all you want to do is run scrapers and import data into a database for inspection, you're good to go! 59 | 60 | ## Initialize Database for OpenStates.org 61 | 62 | !!! note 63 | 64 | You **must** run `openstates-scrapers`' init-db.sh as shown above first! 65 | 66 | From within the `openstates.org` directory run `docker/init-db.sh`: 67 | 68 | 69 | ``` console 70 | openstates.org/$ ./docker/init-db.sh 71 | + unset DATABASE_URL 72 | + docker-compose run --rm -e PYTHONPATH=docker/ --entrypoint 'poetry run ./manage.py migrate' django 73 | Creating openstatesorg_django_run ... done 74 | Operations to perform: 75 | Apply all migrations: account, admin, auth, bulk, bundles, contenttypes, dashboards, data, people_admin, profiles, sessions, sites, socialaccount 76 | Running migrations: 77 | Applying auth.0001_initial... OK 78 | 79 | ...TRUNCATED... 80 | 81 | docker-compose run --rm -e PYTHONPATH=docker/ --entrypoint 'poetry run ./manage.py shell -c "import testdata"' django 82 | ``` 83 | 84 | This creates the Django-specific tables, and also creates a local API key `testkey` that can be used for local development. 85 | 86 | ## Working with the Local Database 87 | 88 | The database will persist to disk, so for the most part once you run these steps you're good to go. 89 | 90 | ### Starting the Database 91 | 92 | You'll need to make sure that database is running whenever you're working on scrapers or OpenStates.org locally. 93 | 94 | You can do that by running `docker-compose up -d db` from the `openstates-scrapers` directory. 95 | 96 | ``` console 97 | openstates-scrapers$ docker-compose up -d db 98 | Starting scrapers_db_1 ... done 99 | ``` 100 | 101 | If it is already running output will look like: 102 | 103 | ``` console 104 | openstates-scrapers$ docker-compose up -d db 105 | scrapers_db_1 is up-to-date 106 | ``` 107 | 108 | ### Stopping the Database 109 | 110 | ``` console 111 | openstates-scrapers$ docker-compose stop db 112 | Stopping scrapers_db_1 ... done 113 | ``` 114 | 115 | ### Resetting the Database 116 | 117 | You can always run `scripts/init-db.sh` to reset your database. This can be good if you have some bad data, or just whenever you'd like a fresh start: 118 | 119 | ``` console 120 | openstates-scrapers/$ ./scripts/init-db.sh 121 | ``` 122 | -------------------------------------------------------------------------------- /docs/contributing/openstates-org.md: -------------------------------------------------------------------------------- 1 | # History of open.pluralpolicy.com / openstates.org 2 | 3 | open.pluralpolicy.com is the public-facing web app that has historically provided free democracy tools, API key registration, 4 | and access to bulk data. However, we are in the process of migrating functionality into the main Plural application. 5 | The Plural application provides continued [free legislative research and tracking tools](https://pluralpolicy.com/app/legislative-tracking) 6 | as well as the ability to [Find Your Legislators](https://pluralpolicy.com/open/). The Plural app is not open source, 7 | because our business model to support continued free democracy tools and expanded open data depends on providing 8 | premium policy intelligence features to organizational customers. 9 | 10 | For now, open.pluralpolicy.com will continue to provide a subset of related features: 11 | 12 | - Register and manage your API key 13 | - Bulk data downloads 14 | - v2 of the API until is taken out of service (already deprecated, so please use the [v3 API](https://v3.openstates.org/docs) 15 | 16 | The application is built in Django. Even after migration is complete, we will keep the repository up for anyone who 17 | is interested in the open source code. The rest of this documentation is maintained here for reference. 18 | 19 | ## Checking out 20 | 21 | Fork and clone the openstates.org repository: 22 | 23 | - Visit and click the 24 | 'Fork' button. 25 | 26 | - Clone your fork using your tool of choice or the command line: 27 | 28 | $ git clone git@github.com:yourname/openstates.org.git 29 | Cloning into 'openstates.org'... 30 | 31 | - Be sure to run `poetry install` to fetch the correct version of dependencies. 32 | - And remember to `install pre-commit `: 33 | 34 | $ pre-commit install 35 | pre-commit installed at .git/hooks/pre-commit 36 | 37 | ## Getting a working database 38 | 39 | See [Running a Local Database](local-database.md) to get your database ready for OpenStates.org. 40 | 41 | ## Running Tests 42 | 43 | You can run the tests for the project via: 44 | 45 | ./docker/run-tests.sh 46 | 47 | You can also append standard pytest arguments such as `-x` to bail on first failure. 48 | 49 | Example of running just the graphapi tests, bailing on error: 50 | 51 | ./docker/run-tests.sh graphapi -x 52 | 53 | ## Repository overview 54 | 55 | The project is rather large, with quite a few django apps, here's a 56 | quick guide: 57 | 58 | Django Apps: 59 | 60 | - bulk/ - handles bulk downloads on the website 61 | - dashboards/ - dashboards for viewing various statistics 62 | - geo/ - geography services for legislator lookup 63 | - graphapi/ - powers GraphQL API 64 | - profiles/ - user and subscription management 65 | - public/ - public-facing pages (bulk of the site) 66 | - utils/ - utilities shared by the other applications 67 | 68 | Other Stuff: 69 | 70 | - ansible/ - the files used to deploy OpenStates.org are here 71 | - docker/ - special scripts for running tests, etc. within docker 72 | - openstates/ - core Django settings files 73 | - static/ - various static assets, including frontend code 74 | - templates/ - Django templates 75 | 76 | ## Running openstates.org 77 | 78 | Simply running `docker-compose up` should start django & the database, 79 | then browse to and you'll be looking at your 80 | own local copy of openstates.org. In a separate terminal window, run `npm run build` and `npm run start` to see the 81 | site's react and style components. 82 | 83 | !!! note 84 | 85 | If you're running into issues with models not being found or an incorrectly configured virtual environment, running 86 | `docker-compose build` should help to fix it. 87 | 88 | If you have issues getting your instance up and running, please document the 89 | errors you're seeing and [reach out](../index.md#communication). 90 | 91 | ## Running outside of Docker 92 | 93 | It might be desirable to test outside of docker sometimes to bypass 94 | caching or other issues that make development within the docker 95 | environment difficult. If so, you can install 96 | [goreman](https://github.com/mattn/goreman) (or any foreman clone) and 97 | run `goreman start`. 98 | -------------------------------------------------------------------------------- /docs/contributing/people.md: -------------------------------------------------------------------------------- 1 | # Contributing People Data 2 | 3 | Person data is maintained in the 4 | [openstates/people](https://github.com/openstates/people) repository. 5 | This repository contains YAML files with all the information on given 6 | individuals and committees. 7 | 8 | !!! info 9 | 10 | Please note that this portion of the project is in the public 11 | domain in the United States with all copyright waived via a 12 | [CC0](https://creativecommons.org/publicdomain/zero/1.0/) dedication. By 13 | contributing you agree to waive all copyright claims. 14 | 15 | ## Checking out 16 | 17 | Fork and clone the people repository: 18 | 19 | - Visit and click the 'Fork' button. 20 | 21 | - Clone your fork using your tool of choice or the command line: 22 | 23 | $ git clone git@github.com:yourname/people.git 24 | Cloning into 'people'.. 25 | 26 | - Build the environment with `poetry`: 27 | 28 | $ poetry install 29 | Installing dependencies from lock file 30 | ... 31 | 32 | - And remember to install `pre-commit` hooks: 33 | 34 | $ pre-commit install 35 | pre-commit installed at .git/hooks/pre-commit 36 | 37 | ## Repository overview 38 | 39 | The repository consists of a few key components: 40 | 41 | - `settings.yml` Settings for state legislatures, including the number of seats, and current vacancies. 42 | - `data/` Data files in YAML format on legislators, organized by state & status. 43 | 44 | You can use the `os-people` and `os-committees` commands to manage the data: 45 | 46 | poetry run os-people --help 47 | 48 | or 49 | 50 | poetry run os-committees --help 51 | 52 | ## Common tasks 53 | 54 | ### Updating legislator data by hand 55 | 56 | Let's say you call a legislator and find out that they have a new phone 57 | number, contribute back! 58 | 59 | See [schema.md](https://github.com/openstates/people/blob/master/schema.md) 60 | for details on the acceptable fields. If you're looking to add a lot of 61 | data but unsure where it fits feel free to ask via an issue and we can 62 | either amend the schema or make a recommendation. 63 | 64 | 0. Start a new branch for this work 65 | 1. Make the edits you need in the appropriate YAML file. Please keep 66 | edits to a minimum (e.g. don't re-order fields) 67 | 2. Submit a PR, please describe how you came across this information to 68 | expedite review. 69 | 70 | ### Retiring a legislator 71 | 72 | 0. Start a new branch for this work 73 | 1. Add an `end_date` to their most recent role within the appropriate legislator's YAML file 74 | 2. Run `poetry run os-people retire` with the appropriate legislator file(s) from the root directory 75 | 3. Review the automatically edited files & submit a PR. 76 | 77 | ### Updating an entire state's legislators via a scrape 78 | 79 | Let's say a North Carolina has had an election & it makes sense to 80 | re-scrape everything for that state. 81 | 82 | 0. Start a new branch for this work (e.g. `nc-2021-people-update`) 83 | 1. Scrape data using [Open States' Scrapers](https://github.com/openstates/openstates-scrapers) 84 | 2. Run `poetry run os-people merge nc scrapes/2021-01-01/001` against the generated JSON data from the scrape 85 | 4. Manually reconcile remaining changes, will often require some retirements as well. 86 | 5. Check that data looks clean with `poetry run os-people lint nc --summary` 87 | 6. commit your changes and prepare a PR. 88 | 89 | Example of the process: 90 | 91 | (In this example, we assume the `people` repo is stored at `~/gitroot/people` and the openstates-scrapers repo is stored at `~/gitroot/openstates-scrapers`) 92 | 93 | ```bash 94 | :#~/gitroot/openstates-scrapers$ poetry run spatula scrape scrapers_next.de.people.House 95 | INFO:scrapers_next.de.people.House:fetching https://legis.delaware.gov/json/House/GetRepresentatives 96 | INFO:scrapers_next.de.people.LegDetail:fetching https://legis.delaware.gov/LegislatorDetail?personId=13589 97 | INFO:scrapers_next.de.people.LegDetail:fetching https://legis.delaware.gov/LegislatorDetail?personId=332 98 | ... 99 | success: wrote 41 objects to _scrapes/2022-06-17/001 100 | :#~/gitroot/openstates-scrapers$ OS_PEOPLE_DIRECTORY=~/gitroot/people poetry run os-people merge de _scrapes/2022-06-17/001/ 101 | analyzing 120 existing people and 41 scraped 102 | perfect match 103 | perfect match 104 | perfect match 105 | other_names: append {'start_date': '', 'end_date': '', 'name': 'Gerald L. Brady'} 106 | name: Gerald L. Brady => Charles "Bud" M. Freel 107 | email: Gerald.Brady@delaware.gov => Bud.Freel@delaware.gov 108 | offices changed from: 109 | Capitol Office address=411 Legislative Ave. Dover, DE 19901 voice=302-744-4351 110 | to 111 | Capitol Office address=411 Legislative Avenue Dover, DE 19901 voice=302-744-4351 112 | links: append {'url': 'https://legis.delaware.gov/LegislatorDetail?personId=24197', 'note': 'homepage'} 113 | sources: append {'url': 'https://legis.delaware.gov/LegislatorDetail?personId=24197', 'note': ''} 114 | (m)erge? (r)etire Gerald L. Brady? (s)kip? (a)bort? 115 | 116 | Aborted! 117 | ``` 118 | 119 | This example stops at step #4 because the final steps all require manual work. We _can_ use `--retirement ` to automatically retire members during this process, but this requires knowing that all potentially retired members happened at the same time. 120 | -------------------------------------------------------------------------------- /docs/contributing/scrapers.md: -------------------------------------------------------------------------------- 1 | # Contributing to Scrapers 2 | 3 | Scrapers are at the core of what Open States does, each state requires 4 | several custom scrapers designed to extract bills, legislators, 5 | committees, and votes from the state website. All together there are 6 | around 200 scrapers, each one essentially independent, which means that 7 | there is always more work to do, but fortunately plenty of prior work to 8 | learn from. 9 | 10 | ## Checking Out 11 | 12 | Fork and clone the main scraper repository: 13 | 14 | - Visit and 15 | click the 'Fork' button. 16 | 17 | - Clone your fork using your tool of choice or the command line: 18 | 19 | $ git clone git@github.com:yourname/openstates-scrapers.git 20 | Cloning into 'openstates-scrapers'... 21 | 22 | - And remember to `install pre-commit ` hooks: 23 | 24 | $ pre-commit install 25 | pre-commit installed at .git/hooks/pre-commit 26 | 27 | - Be sure to run `poetry install` to fetch the correct version of dependencies. 28 | 29 | !!! warning 30 | 31 | Before cloning on a Windows computer, you will need to disable 32 | line-ending conversion. `git config --global core.autocrlf false` After 33 | cloning and entering the repo, you'll likely want to set global 34 | line-ending conversion back to true, and set local conversion to false. 35 | 36 | ## Repository Overview 37 | 38 | At this point you'll have a local `openstates-scrapers` directory. 39 | Within it, you'll find a directory called `scrapers`, lets take a look 40 | at it: 41 | 42 | $ ls scrapers 43 | __init__.py dc in mn nj pr va 44 | ak de ks mo nm ri vi 45 | al fl ky ms nv sc vt 46 | ar ga la mt ny sd wa 47 | az hi ma nc oh tn wi 48 | ca ia md nd ok tx wv 49 | co id me ne or ut wy 50 | ct il mi nh pa utils 51 | 52 | This directory has 50+ python modules, one for each state. 53 | 54 | Let's look inside one: 55 | 56 | $ ls scrapers/nc 57 | __init__.py bills.py votes.py 58 | 59 | Some states' directories will differ a bit, but all will have 60 | `__init__.py` and `bills.py`. 61 | 62 | The `__init__.py` file for each state has basic metadata on the state 63 | including a list of sessions. 64 | 65 | Other files contain the scrapers, typically named `bills`, `votes`, etc. 66 | 67 | At the root, you'll also find a directory called `scrapers_next`. This directory also has python modules for each state. 68 | 69 | Inside a state, you'll find `people` and potentially `committee` scrapers written using 70 | [spatula](https://jamesturk.github.io/spatula/). The plan is to port all scrapers to this framework and have 71 | `scrapers_next` replace the `scraper` directory. 72 | 73 | ## Running Your First Scraper 74 | 75 | Let's run your state's bills scraper (substitute your state for 'nc' below) : 76 | 77 | $ docker-compose run --rm scrape nc bills --fastmode --scrape 78 | 79 | The parameters you pass after `docker-compose run --rm scrape` are 80 | passed to `os-update`. Here we're saying that we're running NC's 81 | scrapers, and that we want to do it in "fast mode". By default, 82 | `os-update` imports results into a postgres database; the `--scrape` 83 | flag skips that step. 84 | 85 | The following arguments are optional: To bring up a list of the optional arguments in the CLI use `-h`, `--help`\ 86 | `-h`, `--help` show this help message and exit\ 87 | `--debug` open debugger on error\ 88 | `--loglevel {LOGLEVEL}` set log level. options are: `DEBUG|INFO|WARNING|ERROR|CRITICAL (default is INFO)`\ 89 | `--scrape` only run scrape post-scrape step\ 90 | `--import` only run import post-scrape step\ 91 | `--nonstrict` skip validation on save\ 92 | `--fastmode` use cache and turn off throttling\ 93 | `--datadir {SCRAPED_DATA_DIR}` data directory\ 94 | `--cachedir {CACHE_DIR}` cache directory\ 95 | `-r {SCRAPELIB_RPM}` scraper rpm\ 96 | `--rpm {SCRAPELIB_RPM}` scraper rpm\ 97 | `--timeout {SCRAPELIB_TIMEOUT}` scraper timeout\ 98 | `--no-verify` skip tls verification\ 99 | `--retries {SCRAPELIB_RETRIES}` scraper retries\ 100 | `--retry_wait {SCRAPELIB_RETRY_WAIT_SECONDS}` scraper retry wait\ 101 | `--realtime` loads bills in realtime to database, this requires configuring an AWS S3 bucket and using the lambda function: [openstates-realtime](https://github.com/openstates/openstates-realtime) 102 | 103 | You'll see the *run plan*, which is what the update aims to capture; in 104 | this case we're scraping the state website's data into JSON files: 105 | 106 | nc (scrape) 107 | bills: {} 108 | 109 | Then legislative posts and organizations get created, which is mostly 110 | boilerplate: 111 | 112 | 08:46:35 INFO openstates: save jurisdiction North Carolina as jurisdiction_ocd-jurisdiction-country:us-state:nc-government.json 113 | 08:46:35 INFO openstates: save organization North Carolina General Assembly as organization_01d6327c-72d2-11e7-8df8-0242ac130003.json 114 | 08:46:35 INFO openstates: save organization Executive Office of the Governor as organization_01d63560-72d2-11e7-8df8-0242ac130003.json 115 | 08:46:35 INFO openstates: save organization Senate as organization_01d636e6-72d2-11e7-8df8-0242ac130003.json 116 | 08:46:35 INFO openstates: save post 1 as post_01d63a06-72d2-11e7-8df8-0242ac130003.json 117 | 08:46:35 INFO openstates: save post 2 as post_01d63b96-72d2-11e7-8df8-0242ac130003.json 118 | 08:46:35 INFO openstates: save post 3 as post_01d63cea-72d2-11e7-8df8-0242ac130003.json 119 | 08:46:35 INFO openstates: save post 4 as post_01d63e34-72d2-11e7-8df8-0242ac130003.json 120 | 08:46:35 INFO openstates: save post 5 as post_01d63f74-72d2-11e7-8df8-0242ac130003.json 121 | 122 | And then the actual data scraping begins, defaulting to the most recent 123 | legislative session: 124 | 125 | 08:46:36 INFO openstates: no session specified, using 2017 126 | 08:46:36 INFO scrapelib: GET - http://www.ncga.state.nc.us/gascripts/SimpleBillInquiry/displaybills.pl?Session=2017&tab=Chamber&Chamber=Senate 127 | 08:46:38 INFO scrapelib: GET - http://www.ncga.state.nc.us/gascripts/BillLookUp/BillLookUp.pl?Session=2017&BillID=S1 128 | 08:46:39 INFO openstates: save bill SR 1 in 2017 as bill_03c7edb4-72d2-11e7-8df8-0242ac130003.json 129 | 08:46:39 INFO scrapelib: GET - http://www.ncga.state.nc.us/gascripts/BillLookUp/BillLookUp.pl?Session=2017&BillID=S2 130 | 08:46:39 INFO openstates: save bill SJR 2 in 2017 as bill_044a5fc4-72d2-11e7-8df8-0242ac130003.json 131 | 08:46:39 INFO scrapelib: GET - http://www.ncga.state.nc.us/gascripts/BillLookUp/BillLookUp.pl?Session=2017&BillID=S3 132 | 08:46:40 INFO openstates: save bill SB 3 in 2017 as bill_04e8c66e-72d2-11e7-8df8-0242ac130003.json 133 | 08:46:40 INFO scrapelib: GET - http://www.ncga.state.nc.us/gascripts/BillLookUp/BillLookUp.pl?Session=2017&BillID=S4 134 | 08:46:41 INFO openstates: save bill SB 4 in 2017 as bill_05781f08-72d2-11e7-8df8-0242ac130003.json 135 | 08:46:41 INFO scrapelib: GET - http://www.ncga.state.nc.us/gascripts/BillLookUp/BillLookUp.pl?Session=2017&BillID=S5 136 | 137 | Depending on the scraper you run, this part takes a while. Some scrapers 138 | can take hours to run depending on the number of bills and speed of the 139 | state's website. 140 | 141 | !!! note 142 | 143 | It is often desirable to bail out of running the whole scrape (Ctrl-C) 144 | after it has gotten a bit of data, instead of letting it run the entire 145 | scrape. 146 | 147 | To review the data you just fetched, you can browse the \_data/nc/ 148 | directory and inspect the JSON files. If you're trying to make a small 149 | fix this is often sufficient, you can confirm that the scraped data 150 | looks correct and move on. 151 | 152 | Please see our document on [Querying Scraper Output Data](../data/query-scraper-output-data.md) 153 | for tools you can use to investigate data issues across a set of many 154 | scraped data output files. 155 | 156 | 157 | !!! note 158 | It is of course possible that the scrape fails. If so, there's a good 159 | chance that isn't your fault, especially if it starts to run and then 160 | errors out. Scrapers do break, and there's no guarantee North Carolina 161 | didn't change their legislator page yesterday, breaking our tutorial 162 | here. 163 | 164 | If that's the case and you think the issue is with the scraper, feel 165 | free to get in touch with us or [file an 166 | issue](https://github.com/openstates/openstates/issues). 167 | 168 | 169 | At this point you're ready to run scrapers and contribute fixes. Hop 170 | onto [our GitHub ticket queue](https://github.com/openstates/openstates/issues), pick an issue 171 | to solve, and then submit a Pull Request! 172 | 173 | ## Importing Data 174 | 175 | Optionally, if you'd like to see how your scraped data imports into the 176 | database, perhaps to diagnose an issue that is happening after the 177 | scrape, pop over to 178 | `getting a working database ` to see how to get a local database that you can import data 179 | into. 180 | 181 | Once that's done, make sure that the db image from openstates.org is running: 182 | 183 | $ docker ps 184 | CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 185 | 27fe691ad7c5 mdillon/postgis:11-alpine "docker-entrypoint.s…" 3 hours ago Up 3 hours 0.0.0.0:5405->5432/tcp openstatesorg_db_1 186 | 187 | Your output will vary, but if you don't see something named 188 | openstatesorg_db running you should run this command (from the 189 | openstates.org directory, not your scraper directory): 190 | 191 | $ docker-compose up -d db 192 | 193 | Now, when you want to run imports, you can drop the `--scrape` portion 194 | of the command you've been running. Or if you just want to test the 195 | import of already scraped data you can replace it with `--import`. 196 | 197 | An import looks something like this: 198 | 199 | $ docker-compose run --rm scrape fl bills --fast 200 | ... (truncated) ... 201 | 23:03:34 ERROR openstates: cannot resolve pseudo id to Person: ~{“name”: “Grant, M.“} 202 | 23:03:36 ERROR openstates: cannot resolve pseudo id to Person: ~{“name”: “Rodrigues, R.“} 203 | fl (import) 204 | bills: {} 205 | import: 206 | bill: 0 new 0 updated 2620 noop 207 | jurisdiction: 0 new 0 updated 1 noop 208 | vote_event: 21 new 12 updated 533 noop 209 | 210 | The errors about unresolved psuedo-ids can safely be ignored, as long as 211 | you see the final run report the data you scraped is available in your 212 | database. 213 | 214 | The number of objects of each type that were created & updated are 215 | available for spot checking, as well as the total number of items that 216 | were seen that already exactly matched what was in the database. These 217 | can be useful stats as you try to see if your local changes to a scraper 218 | have the impact you expect. 219 | 220 | ## Running Spatula Scrapers 221 | 222 | Let's run a people scraper: 223 | 224 | $ poetry run spatula scrape scrapers_next.nc.people.SenList 225 | 226 | The command to run these scrapers is structured differently, as the parameters 227 | are set by giving the exact location of the function you want to run: `directory.state.file.function`. 228 | 229 | !!! note 230 | Function names do vary and scrapes for legislators are commonly split by chamber, so make sure to check you're 231 | passing the right function in your command. 232 | 233 | The actual data scraping should look something like: 234 | 235 | INFO:scrapers_next.nc.people.SenList:fetching https://www.ncleg.gov/Members/MemberTable/S 236 | INFO:scrapers_next.nc.people.LegDetail:fetching https://www.ncleg.gov/Members/Biography/S/430 237 | INFO:scrapers_next.nc.people.LegDetail:fetching https://www.ncleg.gov/Members/Biography/S/431 238 | INFO:scrapers_next.nc.people.LegDetail:fetching https://www.ncleg.gov/Members/Biography/S/432 239 | INFO:scrapers_next.nc.people.LegDetail:fetching https://www.ncleg.gov/Members/Biography/S/433 240 | INFO:scrapers_next.nc.people.LegDetail:fetching https://www.ncleg.gov/Members/Biography/S/434 241 | 242 | To review the data you scraped, you can inspect the JSON files in the dated directory within `_scrapes/`. Each time you 243 | run a scrape, a new numbered folder will be within the dated directory, so you can compare older data to new easily. 244 | 245 | !!! note 246 | If a scrape fails, it's likely an issue with the scraper. Feel free to get in touch with us or [file an 247 | issue](https://github.com/openstates/openstates/issues). 248 | 249 | [Spatula](https://jamesturk.github.io/spatula/) is incredibly powerful with lots of flexibility and useful [CLI 250 | commands](https://jamesturk.github.io/spatula/cli/) that are worth checking out as well. 251 | 252 | At this point you're ready to run spatula scrapers and contribute fixes. Hop 253 | onto [our GitHub ticket queue](https://github.com/openstates/openstates/issues), pick an issue 254 | to solve, and then submit a Pull Request! 255 | -------------------------------------------------------------------------------- /docs/contributing/state-specific.md: -------------------------------------------------------------------------------- 1 | # State-Specific Scraper Info 2 | 3 | ## California MySQL 4 | 5 | California is a unique state that takes a couple of extra steps to get 6 | working locally. 7 | 8 | California provides MySQL dumps of their data, and in order to use those 9 | we start up a local MySQL instance and read from that. 10 | 11 | To download the data for the current session: 12 | 13 | docker-compose run --rm ca-download 14 | 15 | (You can append --year YYYY to instead select data for a given year.) 16 | 17 | This will start a local MySQL image as well, that image will need to 18 | stay up for the next step, which is running a scrape like normal: 19 | 20 | docker-compose run --rm ca-scrape ca bills --fast 21 | 22 | ## State API Keys 23 | 24 | Unfortunately, some states find it necessary to require API Keys (or 25 | other credentials) to access their best data. 26 | 27 | Despite the difficulties this creates for contributors, in the interest 28 | of ensuring we have the best possible data we've made the decision that 29 | we will use this data where possible. 30 | 31 | Our policy: 32 | 33 | - We will maintain (when possible) two copies of credentials, one for 34 | development and one for production. (Thus minimizing the chance that 35 | a mistake made w/ a development key will jeopardize our ability to 36 | scrape.) 37 | - We encourage developers to get an API key of their own, but if 38 | necessary we can share our testing key in limited circumstances. 39 | 40 | Currently only a few states require API keys: 41 | 42 | ### New York 43 | 44 | 45 | 46 | - Request Form: 47 | - Set in environment prior to running scrape: `NEW_YORK_API_KEY` 48 | 49 | ### Indiana 50 | 51 | 52 | 53 | - API Key Request Process: Email Bob Amos ( 54 | or ), and include your name, address, phone, 55 | email address and company. Also indicate that you have read the 56 | terms of service at the link above. 57 | - Set in environment prior to running scrape: `INDIANA_API_KEY` 58 | - As a side note, Indiana also requires a [user-agent string](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent), 59 | so set that in your environment as well, prior to running 60 | scrape: `USER_AGENT` 61 | 62 | ### Virginia 63 | 64 | 65 | 66 | - API Credentials Request Process: To acquire access, please 67 | contact the Virginia Legislative Information System help desk 68 | at (804) 786-9631 for a user id. 69 | - Set in environment prior to running scrape: `VIRGINIA_FTP_USER`, 70 | `VIRGINIA_FTP_PASSWORD` 71 | 72 | ### District of Columbia 73 | 74 | 75 | 76 | - API Key Request Form: 77 | 78 | - Set in environment prior to running scrape: `DC_API_KEY` 79 | -------------------------------------------------------------------------------- /docs/contributing/testing-scrapers.md: -------------------------------------------------------------------------------- 1 | # Testing Scrapers 2 | 3 | One of the first things people new to the project tend to notice is that 4 | there aren't a lot of tests in the scrapers. 5 | 6 | Over the years we've evolved a de facto policy of somewhat discouraging 7 | tests, which is definitely an unusual stance to take and warrants 8 | explanation. 9 | 10 | ## Intentionally Fragile Scrapers 11 | 12 | When it comes to scrapers, there are two major types of breakage: 13 | 14 | 1) the scraper collects bad information and inserts it into the 15 | database 16 | 2) the scraper encounters an error and quits without importing data 17 | 18 | Given a choice, the second is greatly preferable. Once bad data makes it 19 | into the database, it can be difficult to detect and remove. On the 20 | other hand, the second can be triggered to alert us immediately and 21 | someone can evaluate the proper fix. 22 | 23 | The best way to favor the second over first is to write "intentionally 24 | fragile" scrapers. That is, scrapers that raise an exception when they 25 | see unexpected input. 26 | 27 | While it is possible to try to write a resilient scraper that recovers, 28 | by nature these scrapers are more likely to produce the first kind of 29 | error, and so we encourage scraper writers to be conservative in what 30 | errors are suppressed. 31 | 32 | Here's an example of an overly permissive scraper: 33 | 34 | party_abbr = doc.xpath('//span[@class="partyabbr"]) 35 | if party_abbr == 'D': 36 | party = 'Democratic' 37 | elif party_abbr == 'R': 38 | party = 'Republican' 39 | else: 40 | # haven't seen this yet, but let's just keep things moving 41 | party = party_abbr 42 | 43 | The following would be preferred: 44 | 45 | party_abbr = doc.xpath('//span[@class="partyabbr"]) 46 | party = {'D': 'Democratic', 'R': 'Republican'}[party_abbr] 47 | 48 | This code would raise a `KeyError` the first time a new party is found. 49 | This forces someone to take a look, fix the scraper with an entry for 50 | the new party, and then the scraper will be able to run again with 51 | correct data. 52 | 53 | ## Testing Scrapers Is Hard 54 | 55 | On most software projects a failing test means that something is broken, 56 | and passing tests should mean that things are working just fine. 57 | 58 | In our experience however, the majority of the "breaks" that occur in 59 | scrapers are due to upstream site changes. 60 | 61 | In the past the fragile nature of scrapers has led to people writing a 62 | lot of bad tests, which is where our stance of somewhat discouraging 63 | tests has come from. An example of a bad test: 64 | 65 | def extract_name(doc): 66 | return doc.xpath('//h2[@class="legislatorName"]').text_content().strip() 67 | 68 | 69 | def test_extract_name(): 70 | # probably a snapshot of the page at some point in time 71 | EXAMPLE_LEGISLATOR_HTML = '...' 72 | 73 | doc = lxml.html.fromstring(EXAMPLE_LEGISLATOR_HTML) 74 | assert extract_name(doc) == 'Erica Example' 75 | 76 | With a test like this: 77 | 78 | - As soon as the HTML changes, the scraper will start failing, but the tests will still pass. 79 | - The scraper will then be updated, breaking the test. 80 | - The test HTML will be updated, fixing the test. 81 | 82 | But since the initial scraper breakage isn't predicted by a failing 83 | test, this type of test really doesn't serve us any purpose and just 84 | results in extra code to maintain every time the scraper needs a slight 85 | change. 86 | 87 | ## Other Strategies 88 | 89 | Of course this isn't to say that we just abandon the idea of testing, 90 | altogether. 91 | 92 | If you're more comfortable writing tests, say you're parsing a 93 | particularly nasty PDF and want to run it against some test data: a test 94 | might make sense there as a way to be confident in your own code, by all 95 | means, write a test. 96 | 97 | We also have some other strategies to help ensure data quality: 98 | 99 | ### Validate Scraper Output 100 | 101 | Scraper output is verified against JSON schemas that protect against 102 | common regressions (missing sources, invalid formatted districts, etc.) 103 | - most of these tests can be written effectively against scraper output 104 | across the board, and in doing so also applies universally across all 50 105 | states. 106 | 107 | We also aim for our underlying libraries like 108 | [openstates-core](https://github.com/openstates/openstates-core) to be 109 | as well tested as possible. (To be 100% clear, our lax testing 110 | philosophy only applies to site-specific scraper code, not these support 111 | libraries.) 112 | 113 | ### Run Scrapers Regularly 114 | 115 | In a sense, the scrapers are tested every night by being run. This is 116 | why the intentionally fragile approach is so important; those failures 117 | are in essence the same as integration test failures. Of course, this 118 | doesn't tell us if the scraper is picking up bad data, etc., but 119 | combined with validation we can be fairly confident in our data. 120 | 121 | ### Test Utilities 122 | 123 | One area we can definitely improve upon is our use of (and then thorough 124 | testing of) common functions. Right now (largely because of the great 125 | variety of authors, etc.) many scrapers do similar things like 126 | conversion of party abbreviations and whitespace normalization in 127 | slightly different ways. We should be making a push to use common 128 | utility functions and thoroughly test those. 129 | -------------------------------------------------------------------------------- /docs/contributing/text-extraction.md: -------------------------------------------------------------------------------- 1 | # Text Extraction 2 | 3 | The `bill scrapers ` scrape the web and pull down metadata, including links to 4 | various versions of the bills. As a later step, we extract the actual 5 | text of the bill so that it can be indexed for search and other uses. 6 | 7 | ## Checking out 8 | 9 | Fork and clone the text-extraction repository: 10 | 11 | - Visit and click 12 | the 'Fork' button. 13 | 14 | - Clone your fork using your tool of choice or the command line: 15 | 16 | $ git clone git@github.com:yourname/text-extraction.git 17 | Cloning into 'text-extraction'... 18 | 19 | - And remember to 20 | `install pre-commit `: 21 | 22 | $ pre-commit install 23 | pre-commit installed at .git/hooks/pre-commit 24 | 25 | ## Repository overview 26 | 27 | The text extraction code itself is written as a standalone Python script 28 | `text_extract.py` that uses configuration and utility functions from 29 | within `extract/`. 30 | 31 | You'll also notice a directory called `raw/` -- this contains a 32 | sampling of bills for each state that we can use to test 33 | text-extraction. 34 | 35 | Typically if you're making changes in the repository you'll be editing 36 | files within `extract/`, we'll come back to that later. 37 | 38 | ## Running text_extract 39 | 40 | Just like in other repositories, we'll use docker-compose to run the 41 | code. In this case docker-compose is running `text_extract.py`, an 42 | all-in-one tool that has a few useful subcommands: 43 | 44 | Usage: text_extract.py [OPTIONS] COMMAND [ARGS]... 45 | 46 | Options: 47 | --help Show this message and exit. 48 | 49 | Commands: 50 | reindex-state rebuild the search index objects for a given state 51 | sample obtain a sample of bills to extract text from 52 | status print a status table showing the current condition of... 53 | test run sample on all states, used for CI 54 | update update the saved bill text in the database 55 | 56 | For the purposes of development, `sample` and `update` are the only two 57 | commands that you'll need to look at. 58 | 59 | Let's go ahead and run sample against NC: 60 | 61 | $ docker-compose run --rm text-extract sample nc 62 | raw/nc/2017-HR 924-Edition 1.pdf => text/nc/2017-HR 924-Edition 1.pdf.txt (1507 bytes) 63 | raw/nc/2017-HB 1034-Edition 1.pdf => text/nc/2017-HB 1034-Edition 1.pdf.txt (3096 bytes) 64 | raw/nc/2019-SB 421-Edition 1.pdf => text/nc/2019-SB 421-Edition 1.pdf.txt (961 bytes) 65 | raw/nc/2019-HB 430-Edition 1.pdf => text/nc/2019-HB 430-Edition 1.pdf.txt (4831 bytes) 66 | raw/nc/2017-SB 753-Edition 1.pdf => text/nc/2017-SB 753-Edition 1.pdf.txt (719 bytes) 67 | raw/nc/2019-HB 788-Edition 1.pdf => text/nc/2019-HB 788-Edition 1.pdf.txt (2674 bytes) 68 | raw/nc/2017-SB 373-Filed.pdf => text/nc/2017-SB 373-Filed.pdf.txt (18538 bytes) 69 | raw/nc/2019-SB 574-Filed.pdf => text/nc/2019-SB 574-Filed.pdf.txt (1712 bytes) 70 | raw/nc/2017-SJR 686-Resolution 2017-12.pdf => text/nc/2017-SJR 686-Resolution 2017-12.pdf.txt (15928 bytes) 71 | raw/nc/2017-HB 1007-Filed.pdf => text/nc/2017-HB 1007-Filed.pdf.txt (6248 bytes) 72 | nc: processed 10, 0 skipped, 0 missing, 0 empty 73 | 74 | The exact output and number of bills will vary across states, but should 75 | be pretty similar. 76 | 77 | This command just did a lot: 78 | 79 | > - Read in the file `raw/nc.csv` to get a list of bills to sample. 80 | > - Downloaded those files (assuming this was the first run) to 81 | > `raw/nc/` so future runs will be faster. 82 | > - Used the extraction function(s) defined in `extract/__init__.py` 83 | > for NC to extract text from the given documents. 84 | > - Wrote that output to `text/nc/` so you can compare. 85 | 86 | You'll also notice that it helpfully prints the number of bytes of text 87 | extracted, this is useful as a first check. Let's go ahead and look at 88 | the shortest one, `text/nc/2017-SB 753-Edition 1.pdf.txt`. (Your run may 89 | differ, pick whichever you prefer.) : 90 | 91 | $ cat "text/nc/2017-SB 753 Edition 1.pdf.txt" 92 | A BILL TO BE ENTITLED 93 | AN ACT PROVIDING THAT THE DEPOSIT OF CURRENCY AND COINS INTO A CASH 94 | VAULT THAT PHYSICALLY SECURES THE CASH AND ELECTRONICALLY 95 | RECORDS THE DEPOSIT DAILY IN AN OFFICIAL DEPOSITORY BANK QUALIFIES 96 | AS A DAILY DEPOSIT UNDER THE LOCAL GOVERNMENT BUDGET AND FISCAL 97 | CONTROL ACT FOR FRANKLIN AND WAKE COUNTIES AND THE 98 | MUNICIPALITIES IN THOSE COUNTIES. 99 | The General Assembly of North Carolina enacts: 100 | SECTION 1. Section 2 of S.L. 2011-89 reads as rewritten: 101 | "SECTION 2. This act applies only to the City of Winston-Salem only.Winston-Salem, 102 | Franklin County and the municipalities in Franklin County, and Wake County and the 103 | municipalities in Wake County." 104 | SECTION 2. This act is effective when it becomes law. 105 | 106 | This looks complete, but to check, go ahead and open the equivalent 107 | source file, in this case `raw/nc/2017-SB 753-Edition 1.pdf` and confirm 108 | visually that all the text was extracted. Don't worry about formatting, 109 | or the preamble, as we'll often exclude that and just aim for the 110 | interesting bits of the text. 111 | 112 | ## Making changes 113 | 114 | Let's say that we discover that a state has started publishing their 115 | bills in a new format. Perhaps Alabama switches from PDF to HTML. It'd 116 | first be good to add some of these new bills to the sample csv, which 117 | you can do manually or by invoking sample with the `--resample` flag.: 118 | 119 | docker-compose run --rm text-extract sample --resample al 120 | 121 | Running would result in some warnings being printed and some zero byte 122 | files. 123 | 124 | To actually handle the HTML documents we'd open up 125 | `extract/__init__.py` and find the `CONVERSION_FUNCTIONS` dictionary, 126 | you'll see a line like: 127 | 128 | CONVERSION_FUNCTIONS = { 129 | "al": {"application/pdf": extract_line_numbered_pdf}, 130 | ... 131 | 132 | The way extraction works is by matching a document found in a scrape to 133 | an appropriate function, in this case PDFs will be sent through the 134 | `extract_line_numbered_pdf` function. 135 | 136 | If the new HTML was wrapped in a given element, perhaps with 137 | `
` we could just update that line to look like: 138 | 139 | CONVERSION_FUNCTIONS = { 140 | "al": { 141 | "application/pdf": extract_line_numbered_pdf, 142 | "text/html": extractor_for_element_by_id("billtext"), 143 | }, 144 | ... 145 | 146 | And we'd be good to go. 147 | 148 | ## Tips & Tricks 149 | 150 | - Functions already exist for common configurations of PDF, HTML, Word 151 | Doc, and even OCR. Rarely will you need to write a custom function, 152 | always look at the options first. 153 | - When dealing with PDFs, most are either handled by 154 | `extract_line_numbered_pdf` or `extract_sometimes_numbered_pdf`, the 155 | difference is that "sometimes numbered PDF" accounts for cases 156 | where 90% or so of bills are numbered, but a few (often resolutions) 157 | are not numbered. 158 | 159 | ## Formatting Guidelines 160 | 161 | **How far do we go? Should we strip punctuation? Newlines? Whitespace? 162 | Section headings?** 163 | 164 | - Try not to be too aggressive with punctuation stripping, search 165 | indices/etc. can easily strip it later, but it can be handy if 166 | someone decides they want to search for things like "§ 143C-4-8.b" 167 | - Ideally leave newlines as-is since it makes looking at changes a lot 168 | nicer for humans and stripping newlines out for final products 169 | (search/text comparison/etc.) is trivial. 170 | - Collapsing spaces/etc. is recommended, but not required. 171 | - Removal of section headers/etc. is fine, but if the only reason 172 | you're writing a new function instead of using a common one is to 173 | do this, reconsider. 174 | 175 | When in doubt, **ask**, you may have encountered something we haven't 176 | considered yet and we can discuss the best practice and add it here. 177 | 178 | **Should we include bill digests?** 179 | 180 | There isn't a need to, but it doesn't hurt if separating the two is difficult. 181 | -------------------------------------------------------------------------------- /docs/contributing/writing-a-committee-scraper.md: -------------------------------------------------------------------------------- 1 | # Writing a Committee Scraper 2 | 3 | ## For Those Completely New to Writing Scrapers 4 | - [What do we mean when we use the term “web scraping”? What is a scraper?](https://data-lessons.github.io/library-webscraping-DEPRECATED/01-introduction/) 5 | - [How do you write a web scraper?](https://www.edureka.co/blog/web-scraping-with-python/) 6 | * An overview from the source above: 7 | * When you run the code for web scraping, a request is sent to the URL that you have mentioned. As a response to the request, the server sends the data and allows you to read the HTML or XML page. The code then parses the HTML or XML page, finds the data and extracts it. 8 | * To extract data using web scraping with python, you need to follow these basic steps: 9 | * Find the URL that you want to scrape 10 | * Find the data you want to extract 11 | * Write the code 12 | * Run the code and extract the data 13 | * Store the data in the required format 14 | 15 | 16 | ## For those new to Plural Open / Open States: 17 | - [What is Plural Open?](https://docs.openstates.org/#about-plural-open) 18 | - [What is an open source project?](https://opensource.com/resources/what-open-source) 19 | - [How do you contribute to Plural Open?](https://docs.openstates.org/contributing/#getting-started) 20 | - What projects are we looking for contributions on? 21 | * [Committee Scrapers](https://github.com/openstates/issues/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+scraper%22) 22 | * [Other issues](https://github.com/openstates/issues/issues) 23 | 24 | 25 | ## Writing Committee Scrapers 26 | ### Background: What are legislative committees? 27 | This information *isn’t necessary for writing scrapers*, but if you’re interested in more context or want to know why committee data is important, this is where you’ll find it! 28 | 29 | Legislative committees are groups of legislators selected by House and Senate leadership to consider legislation concerning a certain subject or set of subjects (ex: Appropriations, Environment & Natural Resources, Criminal Justice & Public Safety). While most state legislatures have separate House and Senate committees, a few states (CT, ME, MA, and NE) have just one set of committees. Most legislators serve on between 2 and 5 committees each session. 30 | 31 | Standing committees are permanent committees that are created at the beginning of the legislative session in the House, Senate, or Joint Rules. Other committee types (select committees, interim committees, study committees) are time-limited and often created by legislation or resolutions. 32 | 33 | Because the vast majority of legislation goes through the committee process, and because the committee membership has such an outsized role on whether that legislation lives or dies, it's incredibly important that people have accurate and up to date information about committees, their membership, and their work. 34 | 35 | ### Writing a Committee Scraper, Step by Step 36 | - How do I set up my environment? 37 | - Make sure you've walked through our [installation prerequisities](https://docs.openstates.org/contributing/#installing-prerequisites) 38 | - Where should my code live? 39 | - All committee scrapers live in the `scrapers_next` directory of the [openstates_scrapers](https://github.com/openstates/openstates-scrapers) repository 40 | - When you're ready to commit your code, this is where your new committee scraper will live too! 41 | - What are the steps for working on my committee scraper locally? 42 | 1. Fork the openstates-scrapers repository ([what is a fork](https://docs.github.com/en/get-started/quickstart/fork-a-repo#forking-a-repository)?) 43 | 2. Find the state you want to write a committee scraper for under the `scrapers_next` directory 44 | 3. Create a `committees.py` file inside of that state's directory 45 | 4. Write your committee scraper code! 46 | - What information does a committee scraper need to grab? 47 | * Name of the committee 48 | * Chamber (upper/lower/etc) 49 | * Classification (committee, subcommittee) 50 | * Parent (if it is a subcommittee) 51 | * Sources (home page for the list of committees, specific page for that committee, etc) 52 | * Members (name and role on the committee) 53 | * Example of complete data: 54 | ```json 55 | { 56 | "name": "Revenue", 57 | "chamber": "upper", 58 | "classification": "committee", 59 | "parent": null, 60 | "sources": [ 61 | {"url": "https://www.ilga.gov/senate/committees/default.asp", "note": "homepage"}, 62 | {"url": "https://www.ilga.gov/senate/committees/members.asp?CommitteeID=2688", "note": ""}], 63 | "links": [], 64 | "other_names": [], 65 | "members": [ 66 | {"name": "Mattie Hunter", "role": "Chair", "person_id": null}, 67 | {"name": "Steve Stadelman", "role": "Vice-Chair", "person_id": null}, 68 | {"name": "Robert F. Martwick", "role": "Member", "person_id": null}, 69 | {"name": "Cristina H. Pacione-Zayas", "role": "Member", "person_id": null}, 70 | {"name": "Robert Peters", "role": "Member", "person_id": null}, 71 | {"name": "Elgie R. Sims, Jr.", "role": "Member", "person_id": null}, 72 | {"name": "Donald P. DeWitte", "role": "Minority", "person_id": null}, 73 | {"name": "Dale Fowler", "role": "Member", "person_id": null}, 74 | {"name": "Jil Tracy", "role": "Member", "person_id": null}, 75 | {"name": "Sally J. Turner", "role": "Member", "person_id": null}], 76 | "extras": {} 77 | } 78 | ``` 79 | - What does a complete committee scraper look like? 80 | * [An example](https://github.com/openstates/openstates-scrapers/blob/main/scrapers_next/mo/committees.py) committee scraper 81 | * The way committee scrapers look will vary depending on how the website for the state is set up. Most of them will have a list of committees for the House, a list of committees for the Senate, and individual pages for each committee that includes details about membership and/or subcommittees 82 | - I know what information I want to grab, but how do I grab it? 83 | * Grab the information you need using selectors (<- a guide to selectors) 84 | * Getting the selector/XPath on chrome: 85 | * Right click the item you want to grab 86 | * Click “inspect” 87 | * ![Selector Tutorial Image](./images/selector_ex1.png "Selector Example 1") 88 | * A panel will pop up and the element that you’re inspecting should be highlighted. Click on the three dots on the left side of the highlight 89 | * ![Selector Tutorial Image](./images/selector_ex2.png "Selector Example 2") 90 | * Hover over “copy” and click on either selector (for a CSS selector) or XPath to copy it 91 | * ![Selector Tutorial Image](./images/selector_ex3.png "Selector Example 3") 92 | * There aren’t any rules about whether XPath or CSS selectors are better. Use whichever helps you grab the information you need! 93 | 94 | ### General Scraper Writing Tips 95 | - What are helpful tools for writing scrapers? 96 | * The most helpful tool that you can use to write a committee scraper is the python package [spatula](https://jamesturk.github.io/spatula/) 97 | * [A walkthrough](https://jamesturk.github.io/spatula/scraper-basics/) on how to write a scraper using spatula 98 | * How do I run a scraper? 99 | * Follow the instructions [here](https://docs.openstates.org/contributing/scrapers/#running-spatula-scrapers). If you haven’t read the [Getting Started](https://docs.openstates.org/contributing/#getting-started), make sure you do that first! 100 | * The command for running a spatula scraper is `poetry run spatula scrape {directory your python file is in}.{state your scraper is for}.committees.{function (optional)}` 101 | * Example: To run the entire committee scraper for Missouri, the command would be `poetry run spatula scrape scrapers_next.mo.committees` 102 | * Help! My code doesn't work! 103 | * Stuck? Think it through and ask us for help! 104 | * What’s not working? What do you think should happen? What’s actually happening? 105 | * Exception handling can be great for weird cases, like a specific committee’s information being unavailable 106 | * Spatula lets you easily skip over weird cases 107 | * Debug and quickly test if your CSS or Xpath selector is accurate with spatula test for a single page 108 | * Before running spatula scrape, you can test the logic of what you’ve already written (and essentially get a quick “preview”) in the terminal. This is especially helpful for making sure you’re using the correct CSS or Xpath selector for an element. 109 | * You can set an [example_source](https://jamesturk.github.io/spatula/data-models/#example_source) as a default URL to keep running spatula scrape on 110 | * Example use case: After scraping a list of committees on one page, I now need specific information about each committee. Each committee has its own webpage (that we navigate to from that list of committees), and the format for each webpage is fairly similar (this is most likely the case!). I can set the `example_source` as a single committee’s more-detailed webpage, and as I write that part of the scraper, I can keep running spatula test to see in my terminal that I’m grabbing the correct information from that specific webpage. 111 | * This should save time and prevent some headaches! 112 | 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /docs/data/categorization.md: -------------------------------------------------------------------------------- 1 | Categorization 2 | ============== 3 | 4 | One of the ways that we add value to the data we provide is by 5 | attempting to classify bills, actions, and votes across states. 6 | 7 | This allows us to let states use their own names for these things, but 8 | for us to try to provide some mapping to a common simplified view of the 9 | legislative process. 10 | 11 | Bill Types 12 | ---------- 13 | 14 | State legislatures deal with more than bills. Despite the name of the 15 | bill objects in our data we take in all types of legislation that a 16 | state might produce. Generally looking at the bill_id will help you 17 | determine the type of legislation, but to make things easier across 18 | states we provide a type field on bills. This field is a list with one 19 | (or more) of the following values: 20 | 21 | Common Values: 22 | 23 | - bill 24 | - resolution 25 | - joint resolution 26 | - concurrent resolution 27 | - constitutional amendment 28 | 29 | Some states also make use of additional types such as 'contract', 30 | 'nomination', 'memorial' and more. 31 | 32 | Action Types 33 | ------------ 34 | 35 | Although most states follow very similar parlimentary procedure the 36 | names that their bill status systems use for various actions almost 37 | never match up. To make analysis and the building of certain types of 38 | tools easier we attempt to classify common actions. In using our data 39 | you'll find these values in the classification field of actions. 40 | 41 | - filing - bill is filed (where this is a separate action from 42 | introduction) 43 | - introduction - introduced, typically the first action 44 | - reading-1 - first reading (often same as introduction) 45 | - reading-2 - second reading 46 | - reading-3 - third reading (often same as passage) 47 | - passage - bill is passed by the chamber 48 | - failure - bill fails to proceed from the chamber 49 | - withdrawal - bill is withdrawn 50 | - substitution - a substitution is made to the bill text 51 | - deferral - consideration of the bill was deferred 52 | - receipt - a bill was received by another chamber 53 | - referral - a bill was sent somewhere for consideration 54 | - referral-committee - a bill was sent to a committee for consideration 55 | - became-law - the bill became law (through signature or inaction) 56 | - amendment-introduction - an amendment is introduced 57 | - amendment-passage - an amendment passes 58 | - amendment-withdrawal - an amendment is withdrawn 59 | - amendment-failure - an amendment fails to pass 60 | - amendment-amendment - an amendment is amended 61 | - amendment-deferral - consideration of an amendment is deferred 62 | - committee-passage - the bill passes the current committee (unknown 63 | outcome, typically favorble) 64 | - committee-passage-favorable - the bill passes the current committee 65 | favorably 66 | - committee-passage-unfavorable - the bill passes the current committee 67 | with an unfavorable report 68 | - committee-failure - the bill fails to advance out of committee 69 | - executive-receipt - the bill is sent to the governor 70 | - executive-signature - the governor signs the bill 71 | - executive-veto - the governor vetos the bill 72 | - executive-veto-line-item - the governor uses a line-item veto to 73 | strike part of a bill 74 | - veto-override-passage - a veto override vote occurred and succeeded 75 | - veto-override-failure - a veto override vote occurred and failed 76 | 77 | Vote Types 78 | ---------- 79 | 80 | Similarly to actions, we make an effort to categorize the motion being 81 | voted upon. You'll find these values in the categorization field on 82 | VoteEvents. 83 | 84 | Possible values: 85 | 86 | - **bill-passage** - This is a vote to pass (either out of committee or 87 | a chamber) 88 | - **amendment-passage** - Vote on amending a bill 89 | - **veto-override** - Vote to override an executive veto 90 | -------------------------------------------------------------------------------- /docs/data/index.md: -------------------------------------------------------------------------------- 1 | # Understanding the Data 2 | 3 | Open States data adheres to a schema that has evolved over our 11+ years 4 | of working with legislative data. Our goal is to provide as much 5 | uniformity across states as possible while still allowing for the wide 6 | diversity of legislative processes between the states. 7 | 8 | These docs both catalog the schema and attempt to explain some of those 9 | choices, particularly where they might be surprising. 10 | 11 | ## Main Concepts 12 | 13 | The main concepts are defined below. You'll notice these concepts mostly correspond to the v2 GraphQL root nodes. 14 | 15 | | Concept | Definition | 16 | |:-------------:|:-----------------------------------------------------------------:| 17 | | **Jurisdiction** | Essentially just another word for "State" in our context. (Includes DC and Puerto Rico.) | 18 | | **Session** | A period of time in a legislature where the same members serve together, typically punctuated by elections. All bills in a session will be uniquely numbered. (e.g. HB 1 in the 2017 session is typically not the same bill as in the 2019 session) | 19 | | **Bill** | Represents all types of legislation whether it is a bill, resolution, etc. | 20 | | **Vote** | A vote among members of the legislature, typically an entire chamber but can also be a committee vote. | 21 | | **Person** | Any person that is associated with the legislature. | 22 | | **Organization** | A generic term used to represent a few different concepts: legislatures, chambers, committees, and political parties. | 23 | | **Post** | A particular role within an organization, typically used to represent a seat in the legislature. (e.g. the District 4 post in the North Carolina Senate Organization) | 24 | | **Membership** | Ties a Person to a Post for a duration of time. | 25 | -------------------------------------------------------------------------------- /docs/data/query-scraper-output-data.md: -------------------------------------------------------------------------------- 1 | # How to query and examine data file output created by scrapers 2 | 3 | This document is aimed to help with scraper development and data debugging. 4 | 5 | Open States scrapers typically produce a set of output data files in the JSON format. Each file represents a primary 6 | entity that the scraper has produced, such as a Bill, an Event or a Vote Event. When working on scraper code, it can 7 | be really helpful to examine this output for data quality issues. 8 | 9 | ## Simple examination 10 | 11 | The JSON format is relatively easy to view in any text editor or Integrated Development Environment (IDE). This makes 12 | it easy to examine a specific output file. 13 | 14 | And you can take it a step further by using a tool like [jq](https://jqlang.github.io/jq/) to query the JSON document 15 | so that you only see the attributes that are relevant to you. For example: 16 | 17 | ```shell 18 | cat vote_event_ff4ae3ef-656f-11ef-8b4c-732c295f582c.json | jq .counts 19 | ``` 20 | 21 | ## Querying a full set of scraper output files 22 | 23 | But what if you want to check data quality issues that might be distributed across the output, which may consist of 24 | thousands of data points? You can put together shell scripts/commands that iterate through and find certian files, 25 | then parse them with `jq`. But this can be tricky, especially if you are not familiar with advanced shell scripting. 26 | 27 | The good news is that, if you are familiar with SQL, there is a way to treat a set of scraper output files like a 28 | database and use SQL to query that database. A tool called [DuckDB](https://duckdb.org/) makes this possible. 29 | 30 | Let's assume I've run one or more scrapers for Delaware. I open my terminal and look at the scraper output directory, 31 | `_data/de`. There are over 3000 JSON files in that folder, so it would be painful to spot check lots of files: 32 | 33 | ``` 34 | jesse@work:~/repo/openstates/openstates-scrapers/scrapers/_data/de$ ls -alh | grep ".json" | wc -l 35 | 3760 36 | ``` 37 | 38 | However, installing the DuckDB tool allows me to use it inside that folder to query the Bills data: 39 | 40 | ``` 41 | jesse@work:~/repo/openstates/openstates-scrapers/scrapers/_data/de$ duckdb 42 | v0.10.2 1601d94f94 43 | Enter ".help" for usage hints. 44 | Connected to a transient in-memory database. 45 | Use ".open FILENAME" to reopen on a persistent database. 46 | D SELECT * FROM read_json('bill_*.json'); 47 | ``` 48 | 49 | Running `duckdb` in this way opens a "transient" database, meaning that anything you do will be gone once you close 50 | the `duckdb` terminal (by using the `ctrl+d` keystroke). If you want to save anything you create in your session, 51 | instead open it with a file that will save those Views/Tables: `duckdb scraper_output.db` 52 | 53 | You can see the data schema that DuckDB has identified from the JSON files by using `DESCRIBE`: 54 | 55 | ``` 56 | D DESCRIBE SELECT * FROM read_json('bill_*.json'); 57 | ┌─────────────────────┬──────────────────────────────────────────────────────────────────────────────────────────────┬─────────┬─────────┬─────────┬─────────┐ 58 | │ column_name │ column_type │ null │ key │ default │ extra │ 59 | │ varchar │ varchar │ varchar │ varchar │ varchar │ varchar │ 60 | ├─────────────────────┼──────────────────────────────────────────────────────────────────────────────────────────────┼─────────┼─────────┼─────────┼─────────┤ 61 | │ legislative_session │ VARCHAR │ YES │ │ │ │ 62 | │ identifier │ VARCHAR │ YES │ │ │ │ 63 | │ title │ VARCHAR │ YES │ │ │ │ 64 | │ from_organization │ VARCHAR │ YES │ │ │ │ 65 | │ classification │ VARCHAR[] │ YES │ │ │ │ 66 | │ subject │ JSON[] │ YES │ │ │ │ 67 | │ abstracts │ STRUCT(note VARCHAR, abstract VARCHAR)[] │ YES │ │ │ │ 68 | │ other_titles │ STRUCT(note VARCHAR, title VARCHAR)[] │ YES │ │ │ │ 69 | │ other_identifiers │ JSON[] │ YES │ │ │ │ 70 | │ actions │ STRUCT(description VARCHAR, date DATE, organization_id VARCHAR, classification VARCHAR[], … │ YES │ │ │ │ 71 | │ sponsorships │ STRUCT("name" VARCHAR, classification VARCHAR, entity_type VARCHAR, "primary" BOOLEAN, per… │ YES │ │ │ │ 72 | │ related_bills │ JSON[] │ YES │ │ │ │ 73 | │ versions │ STRUCT(note VARCHAR, links STRUCT(url VARCHAR, media_type VARCHAR)[], date VARCHAR, classi… │ YES │ │ │ │ 74 | │ documents │ STRUCT(note VARCHAR, links STRUCT(url VARCHAR, media_type VARCHAR)[], date VARCHAR, classi… │ YES │ │ │ │ 75 | │ citations │ STRUCT("publication" VARCHAR, citation VARCHAR, citation_type VARCHAR, effective JSON, exp… │ YES │ │ │ │ 76 | │ sources │ STRUCT(url VARCHAR, note VARCHAR)[] │ YES │ │ │ │ 77 | │ extras │ JSON │ YES │ │ │ │ 78 | │ _id │ UUID │ YES │ │ │ │ 79 | ├─────────────────────┴──────────────────────────────────────────────────────────────────────────────────────────────┴─────────┴─────────┴─────────┴─────────┤ 80 | │ 18 rows 6 columns │ 81 | └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ 82 | ``` 83 | 84 | You'll notice that related entities like Bill Actions, Bill Sponsorships, etc... are embedded in the data as `STRUCT` 85 | data: `actions`, `sponsorships`, etc.. This is typically structured as a list of `STRUCT` data, which is like a dict 86 | or object (`STRUCT(property_a VARCHAR, property_b VARCHAR)[]`). We can drill down into those. 87 | 88 | ### Use views to drill into the data 89 | 90 | To make this data structure simpler to reason about, we can use Views. Here's a set of recommended views you can try. 91 | Please note that these views are used in many of the suggested queries below. In order to execute a query that uses 92 | a view, you need to create the view first! 93 | 94 | ```sql 95 | -- Bills 96 | CREATE VIEW bills AS 97 | SELECT * 98 | FROM read_json('bill_*.json'); 99 | CREATE VIEW bill_actions AS 100 | SELECT legislative_session, identifier AS bill_identifier, _id AS bill_id, unnest(actions, recursive := true) 101 | FROM bills; 102 | CREATE VIEW bill_action_classifications AS 103 | SELECT legislative_session, bill_identifier, bill_id, unnest(classification) AS classification 104 | FROM bill_actions; 105 | CREATE VIEW bill_sponsorships AS 106 | SELECT legislative_session, identifier AS bill_identifier, _id AS bill_id, unnest(sponsorships, recursive := true) 107 | FROM bills; 108 | CREATE VIEW bill_versions AS 109 | SELECT legislative_session, identifier AS bill_identifier, _id AS bill_id, unnest(versions, recursive := true) 110 | FROM bills; 111 | CREATE VIEW bill_version_links AS 112 | SELECT legislative_session, bill_identifier, bill_id, unnest(links, recursive := true) 113 | FROM bill_versions; 114 | CREATE VIEW bill_subjects AS 115 | SELECT legislative_session, identifier AS bill_identifier, _id AS bill_id, unnest(subject) AS subject 116 | FROM bills; 117 | CREATE VIEW bill_documents AS 118 | SELECT legislative_session, identifier AS bill_identifier, _id AS bill_id, unnest(documents, recursive := true) 119 | FROM bills; 120 | CREATE VIEW bill_abstracts AS SELECT legislative_session, identifier AS bill_identifier, _id AS bill_id, 121 | unnest(abstracts, recursive := true) FROM bills; 122 | 123 | 124 | -- Vote Events 125 | CREATE VIEW vote_events AS 126 | SELECT * 127 | FROM read_json('vote_event_*.json'); 128 | CREATE VIEW vote_event_votes AS 129 | SELECT legislative_session, 130 | bill_identifier, 131 | identifier AS vote_identifier, 132 | _id AS vote_id, 133 | unnest(votes, recursive := true) 134 | FROM vote_events; 135 | CREATE VIEW vote_event_counts AS 136 | SELECT legislative_session, 137 | bill_identifier, 138 | identifier AS vote_identifier, 139 | _id AS vote_id, 140 | unnest(counts, recursive := true) 141 | FROM vote_events; 142 | CREATE VIEW vote_sources AS 143 | SELECT legislative_session, 144 | bill_identifier, 145 | identifier AS vote_identifier, 146 | _id AS vote_id, 147 | unnest(sources, recursive := true) 148 | FROM vote_events; 149 | 150 | -- Events 151 | CREATE VIEW events AS 152 | SELECT * 153 | FROM read_json('event*.json'); 154 | CREATE VIEW event_documents AS 155 | SELECT name AS event_name, _id AS event_id, unnest(documents, recursive := true) 156 | FROM events; 157 | CREATE VIEW event_document_links AS 158 | SELECT event_name, event_id, note AS document_note, unnest(links, recursive := true) 159 | FROM event_documents; 160 | CREATE VIEW event_media AS 161 | SELECT name AS event_name, _id AS event_id, unnest(media) 162 | FROM events; 163 | CREATE VIEW event_participants AS 164 | SELECT name AS event_name, _id AS event_id, unnest(participants, recursive := true) 165 | FROM events; 166 | CREATE VIEW event_agenda AS 167 | SELECT name AS event_name, _id AS event_id, unnest(agenda, recursive := true) 168 | FROM events; 169 | CREATE VIEW event_agenda_related_entities AS 170 | SELECT event_name, event_id, unnest(related_entities, recursive := true) 171 | FROM event_agenda; 172 | CREATE VIEW event_sources AS 173 | SELECT name AS event_name, _id AS event_id, unnest(sources, recursive := true) 174 | FROM events; 175 | ``` 176 | 177 | To view Views in your DuckDB database: `SHOW TABLES;` 178 | 179 | ### Useful queries 180 | 181 | These queries will assume you've created the Views listed above! Execute the Views queries above before trying these! 182 | 183 | General utility: describe the shape of a given dataset: 184 | 185 | ```sql 186 | DESCRIBE SELECT * FROM bills; 187 | DESCRIBE SELECT * FROM vote_events; 188 | ``` 189 | 190 | ### Bills 191 | 192 | #### Bills that are missing a property that is a list (abstracts, actions, sponsorships, etc.) 193 | 194 | ```sql 195 | SELECT (len(abstracts) > 0) AS has_abstract, COUNT(*) 196 | FROM bills 197 | GROUP BY 1; 198 | SELECT (len(sponsorships) > 0) AS has_sponsorship, COUNT(*) 199 | FROM bills 200 | GROUP BY 1; 201 | SELECT (len(related_bills) > 0) AS has_related_bills, COUNT(*) 202 | FROM bills 203 | GROUP BY 1; 204 | ``` 205 | 206 | #### Distribution of bill classifications 207 | 208 | ```sql 209 | SELECT classification, COUNT(*) 210 | FROM bills 211 | GROUP BY classification; 212 | ``` 213 | 214 | #### Distribution of bill action classifications 215 | 216 | ```sql 217 | SELECT classification, COUNT(*) 218 | FROM bill_action_classifications 219 | GROUP BY 1; 220 | ``` 221 | 222 | #### Bill actions that are unclassified 223 | 224 | ```sql 225 | SELECT (classification = []) AS action_is_unclassified, COUNT(*) 226 | FROM bill_actions 227 | GROUP BY 1; 228 | ``` 229 | 230 | #### Bill version media type distribution 231 | 232 | ```sql 233 | SELECT media_type, COUNT(*) 234 | FROM bill_version_links 235 | GROUP BY 1; 236 | ``` 237 | 238 | ### Vote Events 239 | 240 | #### General summary 241 | 242 | ```sql 243 | SELECT bill_identifier AS bill, start_date, motion_text, 244 | counts[1].option AS opt1, counts[1].value AS opt1_num, 245 | counts[2].option AS opt2, counts[2].value AS opt2_num, 246 | sources[1].url AS source_url 247 | FROM vote_events 248 | ORDER BY bill_identifier, start_date; 249 | ``` 250 | 251 | #### Vote Events over time (by month) 252 | 253 | ```sql 254 | SELECT date_trunc('month', start_date::timestamp), COUNT(*) 255 | FROM vote_events 256 | GROUP BY 1, 257 | ORDER BY 1; 258 | ``` 259 | 260 | #### Vote events with zero or unexpected number of votes 261 | 262 | ```sql 263 | SELECT len(votes) AS num_votes, COUNT(*) AS events_with_this_number 264 | FROM vote_events 265 | GROUP BY 1 266 | ORDER BY 1 DESC; 267 | ``` 268 | 269 | or check via count values for vote events where all counts are zero 270 | 271 | ```sql 272 | SELECT vote_id, SUM(value) 273 | FROM vote_event_counts 274 | GROUP BY 1 275 | HAVING SUM(value) = 0; 276 | ``` 277 | 278 | #### Vote event counts check distribution for anomalies 279 | 280 | ```sql 281 | SELECT 282 | option, value, COUNT (*) 283 | FROM vote_event_counts 284 | GROUP BY 1, 2 285 | ORDER BY 1, 2 DESC; 286 | ``` 287 | 288 | ### Events 289 | 290 | #### Date-sorted lists of events to compare to source 291 | 292 | Basic event info 293 | 294 | ```sql 295 | SELECT start_date, all_day, name, location.name, status, classification, description 296 | FROM events 297 | ORDER BY start_date; 298 | ``` 299 | 300 | Event info with source URL 301 | 302 | ```sql 303 | SELECT e.start_date, e.all_day, e.name, e.location.name, e.status, e.classification, e.description, s.url 304 | FROM events e 305 | LEFT JOIN event_sources s ON e._id = s.event_id 306 | ORDER BY start_date; 307 | ``` 308 | 309 | #### Check distribution of related entity counts 310 | 311 | ```sql 312 | SELECT len(media), COUNT(*) 313 | FROM events 314 | GROUP BY 1 315 | ORDER BY 2 DESC; 316 | SELECT len(documents), COUNT(*) 317 | FROM events 318 | GROUP BY 1 319 | ORDER BY 2 DESC; 320 | SELECT len(links), COUNT(*) 321 | FROM events 322 | GROUP BY 1 323 | ORDER BY 2 DESC; 324 | SELECT len(participants), COUNT(*) 325 | FROM events 326 | GROUP BY 1 327 | ORDER BY 2 DESC; 328 | SELECT len(agenda), COUNT(*) 329 | FROM events 330 | GROUP BY 1 331 | ORDER BY 2 DESC; 332 | SELECT len(sources), COUNT(*) 333 | FROM events 334 | GROUP BY 1 335 | ORDER BY 2 DESC; 336 | ``` 337 | 338 | #### Event Agenda Related Entities 339 | 340 | Distribution of entity types 341 | 342 | ```sql 343 | SELECT entity_type, COUNT(*) 344 | FROM event_agenda_related_entities 345 | GROUP BY 1; 346 | ``` 347 | 348 | List of events with related entities, sorted by event date 349 | 350 | ```sql 351 | SELECT e.start_date, re.event_name, re.name, entity_type 352 | FROM event_agenda_related_entities re 353 | INNER JOIN events e ON re.event_id = e._id 354 | ORDER BY e.start_date; 355 | ``` 356 | -------------------------------------------------------------------------------- /docs/data/session-naming.md: -------------------------------------------------------------------------------- 1 | # Session Naming 2 | 3 | States name their sessions drastically differently, and sometimes 4 | inconsistently even within their own site. (49th vs 2008 Regular 5 | Session). As our goal is to help smooth these inconsistencies we put 6 | forward this guide to naming sessions within state metadata. (See 7 | for discussion on 8 | the topic) 9 | 10 | ## Default Session Names 11 | 12 | The `sessions` list within `terms` is dangerous to change as all bill 13 | data is keyed off it. As a rule these should be short and generally 14 | useful for the scraper to make the appropriate decisions on what data to 15 | scrape. 16 | 17 | If a state calls its 1st special session in 2010 '2010E1' this is a 18 | perfectly acceptable name for the session in the metadata. Similarly 19 | 49th-regular, 2009-Special-B, etc. are fine names. Generally names with 20 | spaces should be avoided simply for ease of construction of URLs, etc. 21 | In states where spaces are already in use it is fine to continue to use 22 | them. 23 | 24 | The one caveat is that if a state uses a unique ID that has no bearing 25 | on the session itself such as '7323' for the 2011 session, this *should 26 | not* be used. Instead add some mapping that maps a session name that is 27 | descriptive to their internal ids. 28 | 29 | ## Session Display Names 30 | 31 | Because the most convenient name to refer to a session is often far from 32 | what a user might expect to see upon opening a mobile application, the 33 | `session_details` dict supports a `display_name` key. 34 | 35 | Suitable display names are descriptive but also short and obey a given 36 | style. 37 | 38 | ### General Rules 39 | 40 | - All sessions should be in title case. 41 | - Fewer than 20 characters is highly preferable. 42 | - Months should be abbreviated to 3 letters (Jan., Feb., Jun., Dec.) 43 | 44 | ### Ordinals 45 | 46 | If no special sessions are present: 47 | 48 | : - \[Ordinal\] Legislature 49 | 50 | If special sessions are present: 51 | 52 | : - \[Ordinal\] Regular Session 53 | - \[Ordinal\], \[Ordinal\] Special Session 54 | 55 | Examples: 56 | 57 | > - 82nd Legislature 58 | > - 82nd Regular Session 59 | > - 82nd, 3rd Special Session 60 | 61 | ### Years 62 | 63 | - \[Year/Year-Range\] Regular Session 64 | - \[Year/Year-Range\], \[Ordinal\] Special Session 65 | - \[Mon. Year\] Special Session 66 | 67 | Examples: 68 | 69 | > - 2010 Regular Session 70 | > - 2011-2012, 4th Special Session 71 | > - Dec. 2011 Special Session 72 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | 3 | Welcome to the Documentation for the Plural Open Data project (formerly Open States). The sections below cover 4 | contributing to scrapers & data, and how to use the API. 5 | 6 | ## About Plural Open 7 | 8 | Plural Open strives to improve civic engagement at the state level by providing data and tools regarding state 9 | legislatures. We aim to serve members of the public, activist groups, journalists, and researchers with better data on 10 | what is happening in their state capital, and to provide tools to reduce barriers to participation and increase 11 | engagement. 12 | 13 | The project aggregates legislative information from all 50 states, Washington, D.C., and Puerto Rico. This information 14 | is then standardized, cleaned, and published to the public via PluralPolicy.com/open, a powerful API, and bulk 15 | downloads. 16 | 17 | This work was begun as the [Open States project and has a long history](https://open.pluralpolicy.com/about/). We moved 18 | the work under [Plural Open in 2023](https://blog.openstates.org/2023-june-changes/). 19 | 20 | Our open data work is [done in the open](https://github.com/openstates/), and depends in part on contributors to make 21 | this 22 | important resource available. Thank you for your interest in our community, whether you are looking to use our bulk data 23 | or APIs or interested in contributing data or code, we're glad to have you here. 24 | 25 | ## Communication 26 | 27 | When joining a new community, it can be tough to figure out *where* to 28 | ask questions, provide feedback, or help out. Don't worry! As long as 29 | you're respectful and follow our [Code of Conduct](code-of-conduct.md), we're happy to have you! 30 | 31 | Here are some guidelines regarding the best way to get in touch or 32 | contribute. Do note that Open States is a volunteer-powered project, and 33 | all of the core developers have day jobs; we're excited to talk to you, 34 | but it may sometimes take a bit of time to get back to you. 35 | 36 | ### Recommendations 37 | 38 | **Want to ask a general question, have a conversation, or keep up with 39 | the community?** 40 | 41 | We have a Matrix chat space that you [can join if you're 42 | interested in being a part of the community](https://matrix.to/#/#open-states:matrix.org). The 43 | Matrix space (similar to Slack) is a good way to raise an emergency issue (API seems down, etc.) 44 | or ask questions about how to get involved/contribute. 45 | 46 | **Have a private question, or a security concern?** 47 | 48 | Email ; only the administrative team can see these. 49 | 50 | **Have you found an error or issue in the Open States data?** **Have a 51 | technical issue not related to the data itself?** 52 | 53 | File an issue on [our bug tracker](https://github.com/openstates/issues/issues). 54 | And before you do, quickly check whether anyone else there has already reported the same bug. 55 | 56 | ### Discouraged Methods of Communication 57 | 58 | Please *avoid* using these channels to get in touch with us: 59 | 60 | **Personal email addresses of Open States developers** 61 | 62 | Please respect our boundaries & refrain from contacting any of the developers directly, unless we ask you to do so. 63 | 64 | **Twitter (or any other social media)** 65 | 66 | We mainly use the [@openstates twitter account](https://twitter.com/openstates) to make announcements, and 67 | don't have the resources to provide technical support or other feedback on Twitter. 68 | -------------------------------------------------------------------------------- /docs/openstates.org/scheduled-tasks.md: -------------------------------------------------------------------------------- 1 | # Openstates.org scheduled tasks 2 | 3 | Openstates.org is a django application that has some simple scheduled tasks that execute using [Django admin commands](https://docs.djangoproject.com/en/4.0/howto/custom-management-commands/). 4 | 5 | ## Jobs and Locations 6 | 7 | Containerized versions of the actual scheduled jobs are managed in [Github](https://github.com/openstates/openstates.org/tree/develop/docker/cron). These are _not_ the jobs executed in production. 8 | 9 | We deploy jobs to the Openstates.org host using [ansible](https://github.com/openstates/openstates.org/blob/develop/ansible/openstates/tasks/main.yml#L70). 10 | 11 | openstates.org runs in AWS. Access credentials/location/etc. can be found in the AWS console. 12 | 13 | ## Currently Scheduled Jobs 14 | 15 | ### Subscription Processing 16 | 17 | [Defined here](https://github.com/openstates/openstates.org/blob/develop/profiles/management/commands/process_subscriptions.py) 18 | 19 | Tool that processes search subscriptions for users. 20 | 21 | Currently (2022-08-02) scheduled to run once a day (12:30 UTC) 22 | 23 | ### Aggregate API Usage 24 | 25 | [Defined here](https://github.com/openstates/openstates.org/blob/develop/profiles/management/commands/aggregate_api_usage.py) 26 | 27 | Tool that generates some internal stats for user interactions with Openstates. 28 | 29 | Currently (2022-08-02) scheduled to run every 2 hours (39 */2) 30 | 31 | ### System Maintenance Jobs 32 | 33 | * Let's Encrypt certificate collection/rotation 34 | * Nginx maintenance (tied to certificate rotation) 35 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Open States 2 | site_url: https://docs.openstates.org/ 3 | site_author: James Turk 4 | site_description: Documentation for the Open States project. 5 | copyright: Copyright © 2021 Open States 6 | repo_url: https://github.com/openstates/documentation 7 | repo_name: openstates/documentation 8 | edit_uri: edit/main/docs/ 9 | 10 | theme: 11 | logo: assets/openstates.svg 12 | name: material 13 | palette: 14 | - scheme: default 15 | primary: indigo 16 | accent: red 17 | toggle: 18 | icon: material/toggle-switch-off-outline 19 | name: Switch to dark mode 20 | - scheme: slate 21 | primary: indigo 22 | accent: red 23 | toggle: 24 | icon: material/toggle-switch 25 | name: Switch to light mode 26 | 27 | features: 28 | - navigation.expand 29 | - navigation.top 30 | - navigation.sections 31 | - content.tabs.link 32 | icon: 33 | repo: fontawesome/brands/github 34 | extra: 35 | social: 36 | - icon: fontawesome/brands/twitter 37 | link: https://twitter.com/openstates 38 | - icon: fontawesome/brands/github 39 | link: https://github.com/openstates 40 | markdown_extensions: 41 | - admonition 42 | - def_list 43 | - tables 44 | - pymdownx.highlight 45 | - pymdownx.tabbed 46 | - pymdownx.superfences 47 | - toc: 48 | permalink: true 49 | plugins: 50 | - search 51 | extra_css: 52 | - assets/extra.css 53 | nav: 54 | - 'index.md' 55 | - 'code-of-conduct.md' 56 | - API v3: 57 | - 'api-v3/index.md' 58 | - 'api-v3/changelog.md' 59 | - GraphQL API: 60 | - 'api-v2/index.md' 61 | - 'api-v2/root-nodes.md' 62 | - 'api-v2/types.md' 63 | - 'api-v2/other.md' 64 | - 'api-v2/examples.md' 65 | - 'api-v2/changelog.md' 66 | - Contributing: 67 | - 'contributing/index.md' 68 | - 'contributing/people.md' 69 | - 'contributing/local-database.md' 70 | - 'contributing/scrapers.md' 71 | - 'contributing/writing-a-committee-scraper.md' 72 | - 'contributing/openstates-org.md' 73 | # - 'contributing/text-extraction.md' 74 | - 'contributing/testing-scrapers.md' 75 | - 'contributing/state-specific.md' 76 | - 'contributing/documentation.md' 77 | - Data: 78 | - 'data/index.md' 79 | - 'data/query-scraper-output-data.md' 80 | - 'data/categorization.md' 81 | - 'data/session-naming.md' 82 | - Enhancement Proposals: 83 | - 'enhancement-proposals/001-purpose-and-process.md' 84 | - 'enhancement-proposals/002-legal-citations.md' 85 | - 'enhancement-proposals/003-manual-people-data-tools.md' 86 | - 'enhancement-proposals/004-committee-data.md' 87 | - 'enhancement-proposals/005-dedupe-key.md' 88 | - 'enhancement-proposals/006-new-people-offices.md' 89 | - 'enhancement-proposals/007-events.md' 90 | - 'enhancement-proposals/008-active-sessions.md' 91 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "openstates-documentation" 3 | version = "2019.12" 4 | description = "" 5 | authors = ["James Turk "] 6 | 7 | [tool.poetry.dependencies] 8 | python = "^3.9" 9 | mkdocs-material = "^8.5.11" 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | --------------------------------------------------------------------------------