├── .gitignore ├── LICENSE.txt ├── README.md ├── examples ├── case.html ├── count_without_table.html ├── multi_case_table.html ├── multi_count.html ├── no_counts.html └── number_not_used.html ├── oscn ├── __init__.py ├── _meta.py ├── find │ ├── __init__.py │ ├── parse.py │ └── searches.py ├── parse │ ├── __init__.py │ ├── _helpers.py │ ├── bs4_attorneys.py │ ├── bs4_cmids.py │ ├── bs4_counts.py │ ├── bs4_docket.py │ ├── bs4_events.py │ ├── bs4_issues.py │ ├── dates.py │ ├── docket_report.py │ ├── judge.py │ ├── lax_attorneys.py │ ├── lax_body.py │ ├── lax_cmids.py │ ├── lax_counts.py │ ├── lax_docket.py │ ├── lax_events.py │ ├── lax_issues.py │ ├── lax_parties.py │ ├── parties.py │ ├── party_addresses.py │ ├── party_profile.py │ ├── party_properties.py │ └── style.py ├── request │ ├── __init__.py │ ├── cases.py │ ├── docket.py │ └── parties.py ├── requirements.txt └── settings.py ├── requirements.txt ├── scripts ├── docket_test.py ├── events.py ├── example.py ├── find-attorneys.py ├── find-counts.py ├── find_party.py ├── parse_test.py ├── save-counts.py ├── save_cases.py ├── soup_test.py └── test_lists.py ├── setup.py └── tests ├── test_alpha_casenumber.py ├── test_alphacases.py ├── test_appellate.py ├── test_attorneys.py ├── test_body.py ├── test_caselist.py ├── test_cmids.py ├── test_counts.py ├── test_dates.py ├── test_events.py ├── test_find.py ├── test_get_party.py ├── test_issues.py ├── test_judge_docket.py ├── test_lax_docket.py ├── test_lax_events.py ├── test_lax_issues.py ├── test_meta_types.py ├── test_parse.py ├── test_parties.py ├── test_party_search.py ├── test_request.py ├── test_source.py └── test_style.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .env 3 | *.pyc 4 | __pycache__/ 5 | data/ 6 | /*.egg-info 7 | /dist 8 | /build 9 | /examples -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 The Python Packaging Authority (PyPA) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OSCN utilities 2 | 3 | A python library for scraping case information from the [Oklahoma State Courts Network](https://www.oscn.net/dockets/). 4 | 5 | # Update: 6 | 7 | - 12/16/2024: Updated to use Selectolax instead of BeautifulSoup. Most parsing functions now use Selectolax which is faster and more memory efficient. Some BeautifulSoup functions are still used and old parsers are still in the codebase for testing. 8 | 9 | ## Contents 10 | 11 | oscn > Python package source to provide an api for retrieving and parsing case records. 12 | 13 | scripts > Python scripts showing use of the oscn package 14 | 15 | - example.py: demonstrates use of the request Case and Caselist 16 | - retrieve-counts.py: saves a list of all counts for a list of counties and years 17 | - find-counts.py: saves a list of counts passing a test for a list of counties and years 18 | - soup_test.py: a stub for testing parsing attempts using BeautifulSoup 19 | - parse_test.py: a stub for developing using saved examples 20 | 21 | ## OSCN package 22 | 23 | ### oscn 24 | 25 | - counties: Returns a list of counties. 26 | - courts: Same as counties but more a accurate description. 27 | - judges: Returns a list of objects formated as {'name': 'Bond, James', 'number': '007'} 28 | - types: returns a dict of case type codes and descriptons 29 | - type: function to return case type description. Usage: 30 | ``` 31 | >>> oscn.type("AO") 32 | 'CIVIL ADMINISTRATIVE' 33 | ``` 34 | 35 | ### oscn.request 36 | 37 | - Case: Returns a single case. Case can be saved as files using Case.save() and retrieved using Case.open(). 38 | 39 | - CaseList: Returns an iterator for retrieving cases for a county and year. CaseLists can be filtered using .find(). See scripts/example.py for details 40 | 41 | - Party: Returns information on parties available on OSCN. 42 | 43 | - Docket: Returns docket of cases for specific judges and date 44 | 45 | ### oscn.parse 46 | 47 | Parsers accept the html of an OSCN page and return python objects. 48 | 49 | #### Case Page Parsers 50 | 51 | - filed: returns a string of the filing date (e.g. 12/25/2017) 52 | - closed: returns a string of the date the case was closed. Return None if not closed. 53 | - counts: returns of list of count dicts found in a case. Keys include 'description' 54 | of the count. If available 'violation' and 'disposed' are added. 55 | - judge: returns a string of the judge's name 56 | - parties: returns a list of dicts with these keys: id, name, type 57 | - docket: returns a list of rows in a docket 58 | - events: returns a list of dicts with these keys: event, party, docket, reporter, date, description. The keys date and description are cleaner versions of the event text. The event key will be deprecated some day so use date and description if you are starting a project. 59 | - attorneys: returns a list of dicts with these keys: name, address, and representing 60 | - issues: returns a list of dicts with issue information. Each issues includes a list of dicts for each party 61 | 62 | #### Party Page Parsers 63 | 64 | - name: returns 'Requested Party' 65 | - alias: returns 'Alias or Alternate Names' 66 | - profile: returns dict of values in 'Personal Profile' 67 | - birth_month: returns string of 'Birth Month and Year' 68 | - addresses: returns a list of dicts for each address 69 | 70 | #### Docket Page Parsers 71 | 72 | - cases: returns a list of case indexes 73 | - tables: returns the html table for each case in the docket 74 | 75 | ### oscn.find 76 | 77 | - CaseIndexes: returns an iterator of case indexes (e.g. tulsa-CF-2019-12). 78 | 79 | #### Usage 80 | 81 | Create a CaseIndexes list using these key word arguments: 82 | 83 | - county: defaults to all, 84 | - last_name: use this for company or organization names 85 | - first_name: optional 86 | - middle_name: optional 87 | - filed_after: More readable than FiledDateL 88 | - filed_before: More readable than FiledDateH 89 | - closed_after: More readable than ClosedDateL 90 | - closed_before: More readable than ClosedDateH 91 | 92 | #### Notes 93 | 94 | - The % wild card is added to all words in name, first and middle 95 | - Date arguments use MM/DD/YYY strings. 96 | 97 | #### OSCN search parameters 98 | 99 | If you are familar with the OSCN search parameters you can initialize CaseIndexes using these as key word arguments: db, number, lname, fname, mname, DoBMin, DoBMax, partytype, apct, dcct, FiledDate, FiledDateH, ClosedDateL, ClosedDateH, iLC, iLCType, iYear, iNumber, and citation 100 | 101 | Using this will override init keyword values such as first or filed_after. 102 | 103 | ## Development Install 104 | 105 | 1. python3 -m venv ~/your_path/oscn 106 | 1. source ~/your_path/oscn/bin/activate 107 | 1. `git clone git@github.com:codefortulsa/oscn.git` 108 | 1. `cd oscn` 109 | 1. `pip install -e .` 110 | 111 | ## Usage 112 | 113 | Install with `pip install oscn` 114 | 115 | Script example: 116 | 117 | `import oscn` 118 | 119 | Request a single case: 120 | 121 | `oscn.request.Case(county='tulsa', year='2018', number=84)` 122 | 123 | or use case index notation: 124 | 125 | `oscn.request.Case('love-CF-2019-25')` 126 | 127 | To request a list of cases to iterate: 128 | 129 | `oscn.request.CaseList(county='adair', year='2016')` 130 | 131 | ## Run test scripts 132 | 133 | - `pytest tests/` 134 | 135 | or with ipdb: 136 | 137 | - `pytest -s tests/` 138 | 139 | specify a test: 140 | 141 | - `pytest -s tests/test_parse.py -k 'test_events'` 142 | 143 | ## Deployment steps 144 | 145 | 1. Edit setup.py 146 | 1. `python3 setup.py sdist bdist_wheel` 147 | 1. `twine upload dist/*` 148 | 149 | ## User Agent 150 | 151 | In some cases a custom user agent is required in the header of requests. 152 | Setting an environmental varialbe called OSCN_USER_AGENT will override the default. 153 | -------------------------------------------------------------------------------- /examples/count_without_table.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | OSCN Case Details 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 36 | 37 | 38 | 47 | 48 | 49 |
50 | 51 |
52 | 56 | 98 | 116 |
117 |
118 | 240 | 296 |

The information on this page is NOT an official 297 | record. Do not rely on the correctness or completeness of this 298 | information. Verify all information with the official record 300 | keeper. The information contained in this report is provided in 301 | compliance with the Oklahoma Open Records Act, 302 | 51 O.S. 24A.1. 303 | Use of this information is governed by this act, as well as other 304 | applicable state and federal laws. 305 |

306 |

In the District Court in and for tulsa County, Oklahoma

308 | 309 | 310 | 315 | 320 | 321 |
STATE OF OKLAHOMA,
311 | Plaintiff,
312 | v.
313 | SABRINA NICOLE WILSON,
314 | Defendant.
316 | No. CF-2018-14
317 | (Criminal Felony) 318 |


319 | Filed: 01/02/2018
Closed: 03/28/2018

Judge: CF B Docket
322 |

Parties

323 |

STATE OF OKLAHOMA, 324 | Plaintiff
Tulsa County Sheriff's Department, 325 | ARRESTING AGENCY
WILSON,  SABRINA  NICOLE, 326 | Defendant
  327 |

328 |

Attorneys

329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 340 | 341 | 342 | 343 |
AttorneyRepresented Parties
TULSA COUNTY PUBLIC DEFENDER

  339 |
WILSON,   SABRINA  NICOLE
344 |

Events

345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 360 | 361 | 362 | 364 | 365 | 366 | 370 | 371 | 372 | 374 | 375 | 376 | 380 | 381 | 382 | 384 | 385 | 386 | 390 | 391 | 392 | 394 | 395 | 396 |
EventPartyDocketReporter
357 | Wednesday, January 17, 2018 at 9:01 AM
PRELIMINARY HEARING NO ISSUE (PUBLIC DEFENDER)
WILSON,  SABRINA  NICOLE Preliminary Hearing Docket 363 |
367 | Wednesday, February 7, 2018 at 9:01 AM
PRELIMINARY HEARING STC OF CIVIL COMM (PUBLIC DEFENDER)
WILSON,  SABRINA  NICOLE Preliminary Hearing Docket 373 |
377 | Wednesday, March 21, 2018 at 9:01 AM
PRELIMINARY HEARING NO ISSUE/REVIEW OF CIVIL COMMIT PROGRAM
WILSON,  SABRINA  NICOLE Preliminary Hearing Docket 383 |
387 | Wednesday, March 28, 2018 at 9:01 AM
PRELIMINARY HEARING REVIEW OF CIVIL COMMIT (PUBLIC DEFENDER)
WILSON,  SABRINA  NICOLE Preliminary Hearing Docket 393 |
397 |

Counts

398 |

399 | Parties appear only under the counts with which they were charged. For complete sentence information, see the court minute on the docket.
 

400 |
401 | 402 | 403 | 404 | 408 | 409 |
Count # 1. 405 | Count as Filed: 406 | ABOFF, ASSAULT & BATTERY UPON A DETENTION OFFICER, 407 | in violation of 21 O.S. 649 B
Date of Offense: 12/28/2017
410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 430 | 431 | 432 |
Party NameDisposition Information
WILSON, SABRINA NICOLE 423 | 424 | Disposed: DISMISSED, 425 | 03/28/2018. 426 | Dismissed- Request of the State
427 | Count as Disposed: ASSAULT & BATTERY UPON A DETENTION OFFICER(ABOFF)
428 | Violation of 429 | 21 O.S. 649 B
433 |
434 |
435 | 436 | 437 | 438 | 442 | 443 |
Count # 2. 439 | Count as Filed: 440 | ABOFF, ASSAULT & BATTERY UPON A DETENTION OFFICETR, 441 | in violation of 21 O.S. 649 B
Date of Offense: 12/28/2017
444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 464 | 465 | 466 |
Party NameDisposition Information
WILSON, SABRINA NICOLE 457 | 458 | Disposed: DISMISSED, 459 | 03/28/2018. 460 | Dismissed- Request of the State
461 | Count as Disposed: ASSAULT & BATTERY UPON A DETENTION OFFICETR(ABOFF)
462 | Violation of 463 | 21 O.S. 649 B
467 |
468 |

Docket

469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 485 | 488 | 494 | 496 | 497 | 498 | 499 | 500 | 503 | 506 | 511 | 513 | 514 | 515 | 516 | 517 | 520 | 523 | 528 | 530 | 531 | 532 | 533 | 534 | 537 | 540 | 545 | 546 | 547 | 548 | 549 | 550 | 553 | 556 | 561 | 562 | 563 | 564 | 565 | 566 | 569 | 572 | 577 | 578 | 579 | 580 | 581 | 582 | 585 | 588 | 598 | 599 | 600 | 601 | 602 | 603 | 606 | 609 | 619 | 620 | 621 | 622 | 623 | 624 | 627 | 630 | 640 | 641 | 642 | 643 | 644 | 645 | 648 | 651 | 661 | 662 | 663 | 664 | 665 | 666 | 669 | 672 | 682 | 683 | 684 | 685 | 686 | 687 | 690 | 693 | 699 | 700 | 701 | 702 | 703 | 704 | 707 | 710 | 715 | 716 | 717 | 718 | 719 | 720 | 723 | 726 | 736 | 737 | 738 | 739 | 740 | 741 | 744 | 747 | 752 | 753 | 754 | 755 | 756 | 757 | 760 | 763 | 768 | 769 | 770 | 771 | 772 | 773 | 776 | 779 | 789 | 790 | 791 | 794 | 795 | 796 | 799 | 802 | 812 | 813 | 814 | 815 | 816 | 817 | 820 | 823 | 828 | 829 | 830 | 831 | 832 | 833 | 836 | 839 | 844 | 846 | 847 | 848 | 849 | 850 |
DateCodeDescriptionCountPartyAmount
483 | 01-02-2018  484 | 486 | TEXT 487 | 489 |

490 | CRIMINAL FELONY INITIAL FILING. 491 |

492 |

Document Available at Court Clerk's Office

493 |
1   495 | WILSON, SABRINA NICOLE
501 | 01-02-2018  502 | 504 | INFORMATION 505 | 507 |

508 | DEFENDANT SABRINA NICOLE WILSON WAS CHARGED WITH COUNT #1, ASSAULT & BATTERY UPON A DETENTION OFFICER IN VIOLATION OF 21 O.S. 649 B 509 |

510 |
1   512 | WILSON, SABRINA NICOLE
518 | 01-02-2018  519 | 521 | INFORMATION 522 | 524 |

525 | DEFENDANT SABRINA NICOLE WILSON WAS CHARGED WITH COUNT #2, ASSAULT & BATTERY UPON A DETENTION OFFICETR IN VIOLATION OF 21 O.S. 649 B 526 |

527 |
2   529 | WILSON, SABRINA NICOLE
535 | 01-02-2018  536 | 538 | TEXT 539 | 541 |

542 | OCIS HAS AUTOMATICALLY ASSIGNED JUDGE CF D DOCKET TO THIS CASE. 543 |

544 |
551 | 01-03-2018  552 | 554 | CTFREE 555 | 557 |

558 | JUDGE DAWN MOODY: CASE REASSIGNED TO CF: B BASED ON CF-17-6443.
559 |

560 |
WILSON, SABRINA NICOLE
567 | 01-03-2018  568 | 570 | CTARRPL 571 | 573 |

574 | JUDGE DAWN MOODY: DEFENDANT PRESENT, IN CUSTODY AND PUBLIC DEFENDER APPOINTED AS COUNSEL OF RECORD. ARRAIGNMENT HELD. DEFENDANT WAIVES READING OF THE INFORMATION AND FURTHER TIME TO PLEAD. DEFENDANT ENTERS A PLEA OF NOT GUILTY. PRELIMINARY HEARING NO ISSUE SET FOR 1/17/18 @ 9AM IN ROOM 347. BOND $10,000.00 AGG BOND. DEFENDANT REMANDED TO CUSTODY. 575 |

576 |
WILSON, SABRINA NICOLE
583 | 01-03-2018  584 | 586 | DAINS 587 | 589 |

590 | DISTRICT ATTORNEY INSPECTION NOTIFICATION 591 |

592 |

593 | Document Available (#1039099589) 594 | Download document in TIFF format.TIFF 595 |    596 | Download document in PDF format.PDF

597 |
WILSON, SABRINA NICOLE
604 | 01-03-2018  605 | 607 | DAINS 608 | 610 |

611 | TULSA COUNTY PUBLIC DEFENDER'S OFFICE INSPECTION REQUEST 612 |

613 |

614 | Document Available (#1039096460) 615 | Download document in TIFF format.TIFF 616 |    617 | Download document in PDF format.PDF

618 |
WILSON, SABRINA NICOLE
625 | 01-04-2018  626 | 628 | PA 629 | 631 |

632 | PAUPER'S AFFIDAVIT 633 |

634 |

635 | Document Available (#1039096611) 636 | Download document in TIFF format.TIFF 637 |    638 | Download document in PDF format.PDF

639 |
WILSON, SABRINA NICOLE
646 | 01-05-2018  647 | 649 | RETCO 650 | 652 |

653 | RETURN COMMITMENT 654 |

655 |

656 | Document Available (#1039100255) 657 | Download document in TIFF format.TIFF 658 |    659 | Download document in PDF format.PDF

660 |
WILSON, SABRINA NICOLE
667 | 01-08-2018  668 | 670 | ORC 671 | 673 |

674 | ORDER FOR REASSIGNMENT OF CRIMINAL DISTRICT JUDGE 675 |

676 |

677 | Document Available (#1039210514) 678 | Download document in TIFF format.TIFF 679 |    680 | Download document in PDF format.PDF

681 |
WILSON, SABRINA NICOLE
688 | 01-09-2018  689 | 691 | AFPCA 692 | 694 |

695 | AFFIDAVIT & FINDING OF PROBABLE CAUSE T.R.A.C.I.S. (ARRESTED) 696 |

697 |

Document Available at Court Clerk's Office

698 |
WILSON, SABRINA NICOLE
705 | 01-17-2018  706 | 708 | CTPASS 709 | 711 |

712 | JUDGE JAMES KEELEY: DEFENDANT PRESENT, IN CUSTODY AND REPRESENTED BY KASEY BALDWIN. STATE REPRESENTED BY SEAN WATERS. PRELIMINARY HEARING PASSED FOR STATUS OF CIVIL COMMITMENT TO 2/28/18 AT 9:00 AM ROOM 347. BOND TO REMAIN; DEFENDANT REMANDED TO CUSTODY. 713 |

714 |
WILSON, SABRINA NICOLE
721 | 01-18-2018  722 | 724 | ADISC 725 | 727 |

728 | ACKNOWLEDGEMENT OF RECEIPT OF DISCOVERY 729 |

730 |

731 | Document Available (#1039206463) 732 | Download document in TIFF format.TIFF 733 |    734 | Download document in PDF format.PDF

735 |
742 | 02-01-2018  743 | 745 | CTFREE 746 | 748 |

749 | JUDGE JAMES KEELEY: COURT RESETS STATUS OF CIVIL COMMITMENT TO 2/7/18 AT 9:00 AM ROOM 347 750 |

751 |
WILSON, SABRINA NICOLE
758 | 02-07-2018  759 | 761 | CTFREE 762 | 764 |

765 | JUDGE JAMES KEELEY: DEFENDANT PRESENT, IN CUSTODY, AND REPRESENTED BY KASEY BALDWIN PD. STATE REPRESENTED BY SEAN WATERS. PRELIMINARY HEARING/REVIEW PASSED TO 3-21-2018 @ 9 AM IN ROOM 347. PR BOND AUTHORIZED, RELEASE ISSUED, DEFENDANT APPEARANCE IS WAIVED AT NEXT COURT DATE IF DEFENDANT IS INCOMPLIANT WITH CIVIL COMMIT PROGRAM. . 766 |

767 |
WILSON, SABRINA NICOLE
774 | 02-08-2018  775 | 777 | PR 778 | 780 |

781 | RECOGNIZANCE BOND FOR WILSON, SABRINA NICOLE POSTED BY WILSON, SABRINA NICOLE, POSTED 02/08/2018 782 |

783 |

784 | Document Available (#1039453123) 785 | Download document in TIFF format.TIFF 786 |    787 | Download document in PDF format.PDF

788 |
WILSON, SABRINA NICOLE 792 | $ 35.00 793 |
797 | 02-09-2018  798 | 800 | RETRL 801 | 803 |

804 | RETURN RELEASE 805 |

806 |

807 | Document Available (#1039525166) 808 | Download document in TIFF format.TIFF 809 |    810 | Download document in PDF format.PDF

811 |
WILSON, SABRINA NICOLE
818 | 03-21-2018  819 | 821 | CTPASS 822 | 824 |

825 | JUDGE JAMES KEELEY: DEFENDANT NOT PRESENT, BENCH WARRANT UNDER ADVISEMENT, AND REPRESENTED BY KASEY BALDWIN. STATE REPRESENTED BY MARY KNOPP. PRELIMINARY HEARING/REVIEW PASSED TO 3-28-2018 @ 9 AM IN ROOM 347. DEFENDANT'S APPEARANCE IS WAIVED AT NEXT COURT DATE IF DEFENDANT IS INCOMPLIANT WITH CIVIL COMMIT PROGRAM.
826 |

827 |
WILSON, SABRINA NICOLE
834 | 03-28-2018  835 | 837 | DISMISSED 838 | 840 |

841 | JUDGE JAMES KEELEY: DEFENDANT PRESENT, NOT IN CUSTODY AND REPRESENTED BY PUBLIC DEFENDER. STATE REPRESENTED BY MARY KNOPP. DSMISSED BY COURT, AT REQUEST OF STATE, COST TO STATE. BOND EXONERATED.
842 |

843 |
1   845 | WILSON, SABRINA NICOLE
851 |
852 | 910 |
911 | 912 | 913 | 914 | 915 | 916 | 917 | 918 | 924 | 925 | 926 | -------------------------------------------------------------------------------- /examples/multi_case_table.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | OSCN Case Details 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 36 | 37 | 38 | 47 | 48 | 49 |
50 | 51 |
52 | 56 | 98 | 116 |
117 |
118 | 240 | 241 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 260 | 261 | 262 | 263 | 264 | 265 | 267 | 268 | 269 |
delaware County Search Results 242 | 243 | Found 2 Records. 244 |
Case NumberDate FiledStyleDate Closed
CF-2018-00020A01/12/2018STATE OF OKLAHOMA V. COLTON WAYNE GIBSON 259 |
CF-2018-00020B01/12/2018STATE OF OKLAHOMA V. JUSTIN GIBSON 266 |
270 |
271 | 329 |
330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 343 | 344 | 345 | -------------------------------------------------------------------------------- /examples/number_not_used.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | OSCN Case Details 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 36 | 37 | 38 | 47 | 48 | 49 |
50 | 51 |
52 | 56 | 98 | 116 |
117 |
118 | 240 | 296 |

The information on this page is NOT an official 297 | record. Do not rely on the correctness or completeness of this 298 | information. Verify all information with the official record 300 | keeper. The information contained in this report is provided in 301 | compliance with the Oklahoma Open Records Act, 302 | 51 O.S. 24A.1. 303 | Use of this information is governed by this act, as well as other 304 | applicable state and federal laws. 305 |

306 |

In the District Court in and for adair County, Oklahoma

308 | 309 | 310 | 311 | 316 | 317 |
THIS CASE NUMBER WAS NOT USED 312 | No. CF-2017-122
313 | (Criminal Felony) 314 |


315 | Filed:


Judge: Unassigned
318 |

Parties

319 |

   None   320 |

321 |

Attorneys

322 |

  None

323 |

Events

324 |

325 |   None

326 |

Counts

327 |

328 | Parties appear only under the counts with which they were charged. For complete sentence information, see the court minute on the docket.
 

329 |

Docket

   None
330 | 388 |
389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 402 | 403 | 404 | -------------------------------------------------------------------------------- /oscn/__init__.py: -------------------------------------------------------------------------------- 1 | from . import _meta 2 | from . import request 3 | from . import parse 4 | from . import find 5 | 6 | counties = _meta.courts() 7 | courts = _meta.courts() 8 | judges = _meta.judges() 9 | 10 | 11 | type = _meta.get_type 12 | types = _meta.all_types() 13 | -------------------------------------------------------------------------------- /oscn/_meta.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from requests.exceptions import ConnectionError 3 | 4 | import functools 5 | 6 | from bs4 import BeautifulSoup 7 | 8 | from . import settings 9 | 10 | OSCN_URL = settings.OSCN_SEARCH_URL 11 | OSCN_HEADER = settings.OSCN_REQUEST_HEADER 12 | OSCN_PARTY_URL = settings.OSCN_PARTY_URL 13 | OSCN_DOCKET_URL = settings.OSCN_DOCKET_URL 14 | 15 | 16 | def search_get(**kwargs): 17 | try: 18 | response = requests.get(OSCN_URL, kwargs, headers=OSCN_HEADER, verify=False) 19 | except ConnectionError: 20 | return "" 21 | return response 22 | 23 | 24 | def party_get(id, db="oklahoma"): 25 | party_params = {"db": db, "id": id} 26 | try: 27 | response = requests.get( 28 | OSCN_PARTY_URL, party_params, headers=OSCN_HEADER, verify=False 29 | ) 30 | except ConnectionError: 31 | return "" 32 | return response 33 | 34 | 35 | def docket_get(judge_id, start_date): 36 | 37 | params = { 38 | "report": "WebJudicialDocketJudgeAll", 39 | "errorcheck": "true", 40 | "Judge": judge_id, 41 | "database": "", 42 | "db": "Oklahoma", 43 | "StartDate": start_date, 44 | "GeneralNumber": "1", 45 | "generalnumber1": "1", 46 | "GeneralCheck": "on", 47 | } 48 | 49 | try: 50 | response = requests.get( 51 | OSCN_DOCKET_URL, params, headers=OSCN_HEADER, verify=False 52 | ) 53 | except ConnectionError: 54 | return "" 55 | return response 56 | 57 | 58 | @functools.lru_cache() 59 | def courts(): 60 | try: 61 | response = requests.get( 62 | "https://www.oscn.net/dockets/", 63 | headers=settings.OSCN_REQUEST_HEADER, 64 | verify=False, 65 | ) 66 | soup = BeautifulSoup(response.text, "html.parser") 67 | form = soup.find("form", action="Results.aspx") 68 | select = form.find("select", id="db") 69 | options = select.find_all("option") 70 | court_vals = [option["value"] for option in options] 71 | court_vals.remove("all") 72 | return court_vals 73 | except: 74 | return settings.ALL_COURTS 75 | 76 | 77 | @functools.lru_cache() 78 | def judges(): 79 | try: 80 | response = requests.get( 81 | "https://www.oscn.net/applications/oscn/report.asp?report=WebJudicialDocketJudgeAll", 82 | headers=settings.OSCN_REQUEST_HEADER, 83 | verify=False, 84 | ) 85 | soup = BeautifulSoup(response.text, "html.parser") 86 | form = soup.find("form") 87 | select = form.find("select") 88 | options = select.find_all("option") 89 | judge_numbers = [option["value"] for option in options] 90 | judge_names = [option.text for option in options] 91 | judges_dict = [ 92 | {"number": num, "name": name} 93 | for num, name in zip(judge_numbers, judge_names) 94 | ] 95 | return judges_dict 96 | 97 | except: 98 | return settings.ALL_JUDGES 99 | 100 | 101 | def get_type(type_code): 102 | get_type = settings.ALL_TYPES.get(type_code, "") 103 | return get_type 104 | 105 | 106 | def all_types(): 107 | return settings.ALL_TYPES 108 | -------------------------------------------------------------------------------- /oscn/find/__init__.py: -------------------------------------------------------------------------------- 1 | from .searches import CaseIndexes 2 | -------------------------------------------------------------------------------- /oscn/find/parse.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | 6 | def is_case_url(lnk): 7 | pass_test = False 8 | try: 9 | parent_row = lnk.find_parent("tr") 10 | pass_test = "resultTableRow" in parent_row.attrs["class"] 11 | except KeyError: 12 | pass 13 | return pass_test 14 | 15 | 16 | def get_case_indexes(oscn_html): 17 | case_indexes = [] 18 | soup = BeautifulSoup(oscn_html, "html.parser") 19 | counties = soup.find_all("table", "caseCourtTable") 20 | 21 | for county in counties: 22 | found_county = "" 23 | county_indexes = [] 24 | links = county.find_all("a") 25 | case_urls = filter(is_case_url, links) 26 | for link in case_urls: 27 | oscn_query = link.attrs["href"] 28 | parsed_query = urllib.parse.urlparse(oscn_query) 29 | params = urllib.parse.parse_qs(parsed_query.query) 30 | found_county = params["db"][0] 31 | index = params["db"] + params["number"] 32 | case_index = "-".join(index) 33 | if case_index not in county_indexes: 34 | county_indexes.append(case_index) 35 | more_url = county.find("td", "moreResults") 36 | if more_url: 37 | case_indexes.append(f"{found_county}-more") 38 | case_indexes += county_indexes 39 | 40 | return case_indexes 41 | -------------------------------------------------------------------------------- /oscn/find/searches.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from enum import Enum 4 | 5 | from oscn._meta import search_get 6 | 7 | from .parse import get_case_indexes 8 | 9 | # OSCN search wildcards '%' and '_' 10 | # % Smi% 11 | # _ Sm_th 12 | 13 | SEARCH_PARAMS = { 14 | "db": "all", 15 | "number": "", 16 | "lname": "", 17 | "fname": "", 18 | "mname": "", 19 | "DoBMin": "", 20 | "DoBMax": "", 21 | "partytype": "", 22 | "apct": "", 23 | "dcct": "", 24 | "FiledDateL": "01/01/2020", 25 | "FiledDateH": "", 26 | "ClosedDateL": "", 27 | "ClosedDateH": "", 28 | "iLC": "", 29 | "iLCType": "", 30 | "iYear": "", 31 | "iNumber": "", 32 | "citation": "", 33 | } 34 | 35 | 36 | class OSCN_SearchParams(Enum): 37 | county = "db" 38 | last_name = "lname" 39 | first_name = "fname" 40 | middle_name = "mname" 41 | filed_after = "FiledDateL" 42 | filed_before = "FiledDateH" 43 | closed_after = "ClosedDateL" 44 | closed_before = "ClosedDateH" 45 | 46 | 47 | class CaseIndexes(object): 48 | def __init__(self, **kwargs): 49 | self.search = SEARCH_PARAMS.copy() 50 | for kw in kwargs.keys(): 51 | if kw in OSCN_SearchParams.__members__: 52 | oscn_param = OSCN_SearchParams[kw].value 53 | self.search[oscn_param] = kwargs[kw] 54 | elif kw in self.search.keys(): 55 | self.search[kw] = kwargs[kw] 56 | 57 | name_params = ["lname", "fname", "mname"] 58 | add_wildcards = lambda nm: "%25".join(nm.split()) 59 | for param in name_params: 60 | self.search[param] = add_wildcards(self.search[param]) 61 | 62 | if "text" in kwargs.keys(): 63 | self.text = kwargs["text"] 64 | self.source = "" 65 | else: 66 | results = search_get(**self.search) 67 | self.text = results.text 68 | self.source = f"{results.request.url}?{results.request.body}" 69 | 70 | self._indexes = self._case_indexes() 71 | 72 | def __iter__(self): 73 | return self 74 | 75 | def __next__(self): 76 | return next(self._indexes) 77 | 78 | def _case_indexes(self): 79 | cases = get_case_indexes(self.text) 80 | skip_county = "" 81 | for case_index in cases: 82 | county, type = case_index.split("-")[:2] 83 | if type == "more": 84 | skip_county = county 85 | county_search = self.search.copy() 86 | county_search["db"] = county 87 | county_results = search_get(**county_search) 88 | county_cases = get_case_indexes(county_results.text) 89 | for county_idx in county_cases: 90 | yield county_idx 91 | else: 92 | if county != skip_county: 93 | yield case_index 94 | -------------------------------------------------------------------------------- /oscn/parse/__init__.py: -------------------------------------------------------------------------------- 1 | from os import listdir 2 | from os.path import splitext 3 | from importlib import import_module 4 | from types import FunctionType 5 | 6 | 7 | # The following code searches for parse functions to allow them to be 8 | # added to objects as properties using the append_parsers as a decorator 9 | 10 | # it also imports them so they are available from `oscn.parse` 11 | 12 | parse_functions = [] 13 | 14 | try: 15 | for py_file in listdir(__path__[0]): 16 | parse_module = import_module(f".{splitext(py_file)[0]}", package=__package__) 17 | for name in dir(parse_module): 18 | attr = getattr(parse_module, name) 19 | if isinstance(attr, FunctionType): 20 | if hasattr(attr, "target"): 21 | # replaces 'from .counts import counts' 22 | locals()[name] = attr 23 | parse_functions.append(attr) 24 | except NameError: 25 | pass 26 | 27 | 28 | def make_safe_parser(fn): 29 | on_error = fn._default_value if hasattr(fn, "_default_value") else False 30 | 31 | def safe_parser(self): 32 | try: 33 | return fn(self.text) 34 | except: 35 | return on_error 36 | 37 | return safe_parser 38 | 39 | 40 | def make_property(parse_function): 41 | return property(make_safe_parser(parse_function)) 42 | 43 | 44 | # this function accepts a class and searches for 45 | # parsers to be added to it. It is used as decortator for Case and Party 46 | def append_parsers(obj): 47 | for fn in parse_functions: 48 | if obj.__name__ in fn.target: 49 | setattr(obj, fn.__name__, make_property(fn)) 50 | return obj 51 | -------------------------------------------------------------------------------- /oscn/parse/_helpers.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | from unicodedata import normalize 4 | 5 | 6 | def clean_string(some_string): 7 | # Normalize unicode characters 8 | normal_str = normalize("NFKD", some_string) 9 | # Remove all types of whitespace by splitting and rejoining 10 | condensed = ' '.join(normal_str.split()) 11 | return condensed 12 | 13 | def text_values(ResultSet): 14 | return [clean_string(el.text) for el in ResultSet] 15 | 16 | 17 | def column_titles(thead): 18 | return [hdr for hdr in map(lambda str: str.lower(), text_values(thead))] 19 | 20 | 21 | def add_properties(obj, names, values): 22 | for idx, value in enumerate(values): 23 | setattr(obj, names[idx], value) 24 | 25 | 26 | def lists2dict(keys, values): 27 | return {k: v for k, v in map(lambda k, v: (k, v), keys, values)} 28 | 29 | 30 | def old_find_values(soup, key_names): 31 | key_values = [] 32 | for key in key_names: 33 | key_found = soup.find(string=re.compile(f"{key}:")) 34 | key_value = key_found.split(":")[1] if key_found else "" 35 | key_values.append(clean_string(key_value)) 36 | return lists2dict(key_names, key_values) 37 | 38 | 39 | def find_values(soup, key_names): 40 | """ 41 | Find key word in soup and return a dictionary of key value pairs 42 | """ 43 | return { 44 | key: clean_string((match := soup.find(string=lambda text: text and f"{key}:" in text)) and match.split(":", 1)[1].strip() or "") 45 | for key in key_names 46 | } 47 | 48 | 49 | 50 | # class to allow adding metadata to returned lists 51 | class MetaList(list): 52 | saved_text = "" 53 | 54 | @property 55 | def text(self): 56 | return self.saved_text 57 | 58 | @text.setter 59 | def text(self, new_text): 60 | self.saved_text = clean_string(new_text) 61 | 62 | def add_text(self, more_text): 63 | self.text += clean_string(more_text) 64 | 65 | 66 | def table2dict(bs4_table): 67 | get_text = lambda el: clean_string(el.text) 68 | header = bs4_table.thead.find_all("th") 69 | data = bs4_table.tbody.find_all("td") 70 | keys = map(get_text, header) 71 | values = map(get_text, data) 72 | new_dict = {k: v for k, v in zip(keys, values)} 73 | return new_dict 74 | -------------------------------------------------------------------------------- /oscn/parse/bs4_attorneys.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from ._helpers import clean_string, MetaList 3 | 4 | 5 | def bs4_attorneys(oscn_html): 6 | attorney_list = MetaList() 7 | soup = BeautifulSoup(oscn_html, "html.parser") 8 | attorneys_h2 = soup.find("h2", "section attorneys") 9 | attorney_table = None 10 | 11 | attorney_table = attorneys_h2.find_next_sibling("table") 12 | 13 | current_h2 = attorney_table.find_previous_sibling("h2") 14 | if attorneys_h2 == current_h2: 15 | attorney_list.text = attorney_table.get_text(separator=" ") 16 | rows = attorney_table.find("tbody").find_all("tr") 17 | for row in rows: 18 | row_tds = row.find_all("td") 19 | attorney_with_address = [ 20 | clean_string(s) for s in row_tds[0].stripped_strings 21 | ] 22 | representing = clean_string(row_tds[1].text) 23 | attorney_list.append( 24 | { 25 | "name": attorney_with_address[0], 26 | "address": attorney_with_address[1:], 27 | "representing": representing, 28 | } 29 | ) 30 | return attorney_list 31 | 32 | 33 | -------------------------------------------------------------------------------- /oscn/parse/bs4_cmids.py: -------------------------------------------------------------------------------- 1 | # Some case URL's return a list of 'sbu' caseStyle 2 | # See an example in examples/multi_case_table.html 3 | # this function looks return a list of cmids for if found 4 | 5 | from bs4 import BeautifulSoup 6 | from urllib.parse import parse_qs 7 | 8 | 9 | def bs4_cmids(oscn_html): 10 | soup = BeautifulSoup(oscn_html, "html.parser") 11 | cmids = [] 12 | ref_table = soup.find("table", "multipleRecords") 13 | if ref_table: 14 | for row in ref_table.find("tbody").find_all("tr"): 15 | href = row.find("a")["href"] 16 | cmids += parse_qs(href)["cmid"] 17 | 18 | return cmids 19 | 20 | 21 | -------------------------------------------------------------------------------- /oscn/parse/bs4_counts.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from ._helpers import find_values, MetaList 4 | 5 | # count_re = r'Count as Filed:[^A-Z]*([A-Z|\d]*),\s(.*)' 6 | # count_details = re.compile(count_re, re.M) 7 | 8 | 9 | def bs4_counts(oscn_html): 10 | count_list = MetaList() 11 | 12 | soup = BeautifulSoup(oscn_html, "html.parser") 13 | counts = soup.find_all("div", "CountsContainer") 14 | if counts: 15 | for count in counts: 16 | count_list.add_text(count.get_text(separator=" ")) 17 | count_keys = [ 18 | "Count as Disposed", 19 | "Count as Filed", 20 | "Disposed", 21 | "Date of Offense", 22 | ] 23 | count_values = find_values(count, count_keys) 24 | count_desc = ( 25 | count_values["Count as Disposed"] 26 | if count_values["Count as Disposed"] 27 | else count_values["Count as Filed"] 28 | ) 29 | 30 | # find violation 31 | found_it = False 32 | violated_statute = "" 33 | for str in count.strings: 34 | if found_it: 35 | violated_statute = str 36 | break 37 | found_it = True if "violation" in str.lower() else False 38 | 39 | save_count_info = { 40 | "party": count.nobr.text, 41 | "offense": count_values["Date of Offense"], 42 | "description": count_desc, 43 | "disposed": count_values["Disposed"], 44 | "violation": violated_statute, 45 | } 46 | count_list.append(save_count_info) 47 | else: 48 | count_start = soup.find("h2", "section counts") 49 | next_sibling = count_start.find_next_sibling("p") 50 | if next_sibling: 51 | while next_sibling.name != "h2": 52 | if next_sibling.name == "p": 53 | count_list.add_text(next_sibling.get_text(separator=" ")) 54 | next_sibling.strong.extract() 55 | count_list.append({"description": next_sibling.text.strip()}) 56 | next_sibling = next_sibling.next_sibling 57 | 58 | return count_list 59 | 60 | 61 | -------------------------------------------------------------------------------- /oscn/parse/bs4_docket.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from ._helpers import text_values, lists2dict, MetaList 3 | 4 | 5 | def bs4_docket(oscn_html): 6 | soup = BeautifulSoup(oscn_html, "html.parser") 7 | docket_table = soup.find("table", "docketlist") 8 | thead = docket_table.find("thead").find_all("th") 9 | rows = docket_table.find("tbody").find_all("tr") 10 | 11 | minutes = MetaList() 12 | minutes.text = docket_table.text 13 | 14 | # make a lower case list of column headers 15 | columns = [hdr for hdr in map(lambda str: str.lower(), text_values(thead))] 16 | 17 | for row in rows: 18 | cells = row.find_all("td") 19 | values = text_values(cells) 20 | minute = lists2dict(columns, values) 21 | minute["html"] = row.decode() 22 | minutes.append(minute) 23 | 24 | # clean up blank dates 25 | saved_date = minutes[0]["date"] 26 | for idx, min in enumerate(minutes): 27 | if min["date"]: 28 | saved_date = min["date"] 29 | else: 30 | min["date"] = saved_date 31 | 32 | return minutes 33 | 34 | 35 | -------------------------------------------------------------------------------- /oscn/parse/bs4_events.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | 4 | from bs4 import BeautifulSoup 5 | from ._helpers import text_values, column_titles, lists2dict, clean_string, MetaList 6 | 7 | event_pattern = re.compile( 8 | r'\{\s*"date":\s*".*?",\s*"description":\s*".*?"\s*\}', re.DOTALL 9 | ) 10 | 11 | def get_event_dict(event_string): 12 | # Extract the date and description using regex 13 | date_match = re.search(r'"date":\s*"(.*?)"', event_string) 14 | description_match = re.search(r'"description":\s*"(.*?)"', event_string) 15 | 16 | if date_match and description_match: 17 | date = date_match.group(1) 18 | description = description_match.group(1).replace("\\", "\\\\") 19 | return { 20 | "date": date, 21 | "description": description 22 | } 23 | return None 24 | 25 | def escape_description(match): 26 | description = match.group(2) 27 | escaped_description = description.replace("\\", "\\\\") 28 | return f'{match.group(1)}{escaped_description}{match.group(3)}' 29 | 30 | def find_events(some_string): 31 | # Find all event objects using the precompiled regex pattern 32 | event_objects = event_pattern.findall(some_string) 33 | 34 | # Initialize list to hold cleaned events as dictionaries 35 | events = [] 36 | 37 | for event in event_objects: 38 | # Clean up extra whitespace/newlines around the JSON object 39 | clean_event = re.sub(r'[\n\t\r]', '', event).strip() 40 | 41 | # Escape problematic characters in the description field 42 | clean_event = re.sub( 43 | r'("description":\s*")([^"]*?)(")', 44 | escape_description, 45 | clean_event 46 | ) 47 | 48 | # Parse the cleaned string into a dictionary 49 | event_dict = get_event_dict(clean_event) 50 | if event_dict: 51 | events.append(event_dict) 52 | 53 | return events 54 | 55 | def bs4_events(oscn_html): 56 | soup = BeautifulSoup(oscn_html, "html.parser") 57 | if json_script := soup.find("script", {"id": "json_events"}): 58 | return find_events(json_script.string) 59 | 60 | events = MetaList() 61 | events_start = soup.find("h2", "section events") 62 | events_table = events_start.find_next_sibling() 63 | if events_table.name == "table": 64 | events.text = events_table.get_text(separator=" ") 65 | thead = events_table.find("thead").find_all("th") 66 | event_keys = column_titles(thead) 67 | rows = events_table.find("tbody").find_all("tr") 68 | for row in rows: 69 | cells = row.find_all("td") 70 | values = text_values(cells) 71 | event = lists2dict(event_keys, values) 72 | event_font = cells[0].font.extract() 73 | event_date = clean_string(event_font.text) 74 | event["date"] = event_date 75 | event["description"] = clean_string(cells[0].text) 76 | events.append(event) 77 | 78 | return events 79 | 80 | -------------------------------------------------------------------------------- /oscn/parse/bs4_issues.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | from ._helpers import find_values, clean_string, MetaList 6 | 7 | 8 | issue_keys = ["Filed Date", "Filed By", "Issue"] 9 | party_keys = ["Defendant", "Plaintiff", "Respondent", "Disposed"] 10 | 11 | 12 | def make_party_dict(**kwargs): 13 | # test for all keys are empty 14 | if all("" == v for v in kwargs.values()): 15 | return False 16 | 17 | party_type = "" 18 | party_name = "" 19 | 20 | if kwargs["Defendant"]: 21 | party_type = "defendant" 22 | party_name = kwargs["Defendant"] 23 | elif kwargs["Plaintiff"]: 24 | party_type = "plaintiff" 25 | party_name = kwargs["Plaintiff"] 26 | elif kwargs["Respondent"]: 27 | party_type = "respondent" 28 | party_name = kwargs["Respondent"] 29 | 30 | return {"type": party_type, "name": party_name, "disposed": kwargs["Disposed"]} 31 | 32 | 33 | def bs4_issues(oscn_html): 34 | issue_list = MetaList() 35 | soup = BeautifulSoup(oscn_html, "html.parser") 36 | start = soup.find("h2", "section issues") 37 | issue_table = start.find_next_sibling("table") 38 | 39 | # if the next table is the docket there are no issue tables 40 | is_docket = issue_table.find("thead") 41 | 42 | if is_docket: 43 | next_sibling = start.find_next_sibling("p") 44 | if next_sibling: 45 | while next_sibling.name != "h2": 46 | if next_sibling.name == "p": 47 | issue_list.append(clean_string(next_sibling.text)) 48 | next_sibling = next_sibling.next_sibling 49 | 50 | else: 51 | issue_list.text = issue_table.get_text(separator=" ") 52 | while re.search("Issue #", issue_table.text): 53 | # find the issue details 54 | issue_dict = find_values(issue_table, issue_keys) 55 | 56 | # the next table should be dispositions 57 | disp_table = issue_table.find_next_sibling("table") 58 | issue_list.add_text(disp_table.get_text(separator=" ")) 59 | 60 | issue_dict["parties"] = [] 61 | parties_rows = disp_table.find_all("tr") 62 | for row in parties_rows: 63 | # remove formatting elements from td 64 | for td in row.find_all("td"): 65 | td.string = " ".join(td.strings) 66 | party_values = find_values(row, party_keys) 67 | party_dict = make_party_dict(**party_values) 68 | if party_dict: 69 | issue_dict["parties"].append(party_dict) 70 | 71 | issue_list.append(issue_dict) 72 | 73 | # get the next table 74 | issue_table = disp_table.find_next_sibling("table") 75 | 76 | return issue_list 77 | -------------------------------------------------------------------------------- /oscn/parse/dates.py: -------------------------------------------------------------------------------- 1 | import re 2 | from functools import partial 3 | 4 | # Precompile regex patterns for efficiency 5 | FILED_PATTERN = re.compile(r"Filed:\s*([\/\d]*)", re.M) 6 | CLOSED_PATTERN = re.compile(r"Closed:\s*([\/\d]*)", re.M) 7 | OFFENSE_PATTERN = re.compile(r"Date.of.Offense:\s*([\/\d]*)", re.M) 8 | 9 | def make_date_finder(name, compiled_pattern, default="01/01/1970",target=["Case"]): 10 | def find_date(text): 11 | match = compiled_pattern.search(text) 12 | return match.group(1) if match else default 13 | 14 | find_date.__name__ = name 15 | find_date.target = target 16 | return find_date 17 | 18 | # Instantiate finders with precompiled patterns 19 | find_filed_date = make_date_finder("filed", FILED_PATTERN) 20 | find_closed_date = make_date_finder("closed", CLOSED_PATTERN) 21 | find_offense_date = make_date_finder("offense", OFFENSE_PATTERN) 22 | -------------------------------------------------------------------------------- /oscn/parse/docket_report.py: -------------------------------------------------------------------------------- 1 | import urllib.parse as urlparse 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | 6 | def cases(oscn_html): 7 | case_list = [] 8 | soup = BeautifulSoup(oscn_html, "html.parser") 9 | case_tables = soup.findAll("table", "clspg") 10 | 11 | for case in case_tables: 12 | case_link = case.find("a") 13 | parsed = urlparse.urlparse(case_link["href"]) 14 | db = urlparse.parse_qs(parsed.query)["db"][0] 15 | cn = case_link.text 16 | case_index = f"{db}-{cn}" 17 | case_list.append(case_index) 18 | 19 | return case_list 20 | 21 | 22 | setattr(cases, "target", ["Docket"]) 23 | setattr(cases, "_default_value", []) 24 | 25 | 26 | def tables(oscn_html): 27 | case_list = [] 28 | soup = BeautifulSoup(oscn_html, "html.parser") 29 | case_tables = soup.findAll("table", "clspg") 30 | 31 | for case in case_tables: 32 | case_list.append(case.get_text) 33 | 34 | return case_list 35 | 36 | 37 | setattr(tables, "target", ["Docket"]) 38 | setattr(tables, "_default_value", []) 39 | -------------------------------------------------------------------------------- /oscn/parse/judge.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from .. import settings 4 | 5 | find_judge = re.compile(r"Judge:\s*([\w\s\,]*)", re.M) 6 | find_titles = re.compile("|".join(settings.REMOVE_TITLES)) 7 | find_name_parts = re.compile(r"(\w+\.*)", re.I) 8 | 9 | 10 | def judge(oscn_html): 11 | judge_search = find_judge.search(oscn_html) 12 | if judge_search.group: 13 | found_name = judge_search.group(1).upper() 14 | found_name = find_titles.sub("", found_name) 15 | if re.search(",", found_name): 16 | return found_name 17 | else: 18 | name_parts = find_name_parts.findall(found_name) 19 | last_name = name_parts.pop() 20 | if name_parts == []: 21 | return last_name 22 | found_name = last_name + ", " + " ".join(name_parts) 23 | return found_name 24 | else: 25 | return None 26 | 27 | 28 | setattr(judge, "target", ["Case"]) 29 | setattr(judge, "_default_value", []) 30 | -------------------------------------------------------------------------------- /oscn/parse/lax_attorneys.py: -------------------------------------------------------------------------------- 1 | from selectolax.parser import HTMLParser 2 | from ._helpers import clean_string, MetaList 3 | 4 | def attorneys(oscn_html): 5 | attorney_list = MetaList() 6 | tree = HTMLParser(oscn_html) 7 | 8 | attorneys_h2 = tree.css_first('h2.section.attorneys') 9 | if not attorneys_h2: 10 | return attorney_list 11 | 12 | # Loop through elements until a table is found or another h2 is found 13 | next_element = attorneys_h2.next 14 | while next_element: 15 | if next_element.tag == 'table': 16 | attorney_table = next_element 17 | attorney_list.text = attorney_table.text(strip=True) 18 | break 19 | if next_element.tag == 'h2': 20 | return attorney_list 21 | next_element = next_element.next 22 | else: 23 | return attorney_list 24 | 25 | # Extract text from the attorney table 26 | rows = attorney_table.css('tr') # Use 'tr' directly to avoid dependency on 'tbody' 27 | for row in rows: 28 | row_tds = row.css('td') 29 | if len(row_tds) < 2: 30 | continue 31 | 32 | # Extract name and address properly by splitting lines and cleaning each part 33 | attorney_with_address = [clean_string(line) for line in row_tds[0].text(separator="\n").split('\n') if line.strip()] 34 | if not attorney_with_address: 35 | continue 36 | 37 | name = attorney_with_address[0] 38 | address = attorney_with_address[1:] 39 | representing = clean_string(row_tds[1].text()) 40 | 41 | attorney_list.append( 42 | { 43 | "name": name, 44 | "address": address, 45 | "representing": representing, 46 | } 47 | ) 48 | return attorney_list 49 | 50 | # add this attribute to allow it to be added to request objects 51 | setattr(attorneys, "target", ["Case"]) 52 | setattr(attorneys, "_default_value", []) -------------------------------------------------------------------------------- /oscn/parse/lax_body.py: -------------------------------------------------------------------------------- 1 | from selectolax.parser import HTMLParser 2 | 3 | from ._helpers import clean_string 4 | 5 | def body(oscn_html): 6 | tree = HTMLParser(oscn_html) 7 | if not (body := tree.body): 8 | return "" 9 | 10 | excluded_tags = {"script", "style", "div"} 11 | body_tags = [] 12 | for node in body.traverse(): 13 | if node.tag not in excluded_tags: 14 | body_tags.append(node.text(separator=" ")) 15 | 16 | body_text = " ".join(body_tags) 17 | return clean_string(body_text) 18 | 19 | setattr(body, "target", ["Case"]) 20 | setattr(body, "_default_value", "") 21 | -------------------------------------------------------------------------------- /oscn/parse/lax_cmids.py: -------------------------------------------------------------------------------- 1 | from selectolax.parser import HTMLParser 2 | from urllib.parse import parse_qs 3 | 4 | def cmids(oscn_html): 5 | tree = HTMLParser(oscn_html) 6 | cmids = [] 7 | links = tree.css("table.multipleRecords tbody tr a[href]") 8 | seen_cmids = set() 9 | for link in links: 10 | href = link.attributes.get("href", "") 11 | cmid_values = parse_qs(href).get("cmid", []) 12 | for cmid in cmid_values: 13 | if cmid not in seen_cmids: 14 | seen_cmids.add(cmid) 15 | cmids.append(cmid) 16 | 17 | return cmids 18 | 19 | setattr(cmids, "target", ["Case"]) 20 | setattr(cmids, "_default_value", []) 21 | -------------------------------------------------------------------------------- /oscn/parse/lax_counts.py: -------------------------------------------------------------------------------- 1 | import re 2 | from selectolax.parser import HTMLParser 3 | from ._helpers import clean_string, MetaList 4 | 5 | def extract_text_after_keyword(keyword, stop_keywords=None): 6 | stop_pattern = "|".join(map(re.escape, stop_keywords)) if stop_keywords else "$" 7 | pattern = re.compile(rf"{re.escape(keyword)}(.*?)(?:{stop_pattern})", re.DOTALL) 8 | 9 | def extract(content): 10 | match = pattern.search(content) 11 | return clean_string(match.group(1)) if match else "" 12 | 13 | return extract 14 | 15 | # Curried extract functions with pre-compiled regex patterns 16 | extract_count_description = extract_text_after_keyword("Count as Filed:", [", in violation of"]) 17 | extract_count_as_disposed = extract_text_after_keyword("Count as Disposed:",["
"]) 18 | extract_offense_date = extract_text_after_keyword("Date of Offense:", ["
"]) 19 | extract_disposed_value = extract_text_after_keyword("Disposed:", ["<"]) 20 | 21 | def parse_count_container(counts_container): 22 | count_info = { 23 | "party": "", 24 | "offense": "", 25 | "description": "", 26 | "disposed": "", 27 | "violation": "" 28 | } 29 | 30 | # Extract Count Description and Date of Offense 31 | count_description_td = counts_container.css_first("td.CountDescription") 32 | if not count_description_td: 33 | return None 34 | 35 | count_description_text = clean_string(count_description_td.html) 36 | count_info["description"] = extract_count_description(count_description_text) 37 | count_info["offense"] = extract_offense_date(count_description_text) 38 | 39 | # Extract Violation Link Text 40 | violation_link = count_description_td.css_first("a[href]") 41 | if violation_link: 42 | count_info["violation"] = violation_link.text(separator=" ") 43 | 44 | # Extract Party Name and Disposed Information 45 | disposition_row = counts_container.css_first("table.Disposition tbody tr") 46 | if disposition_row: 47 | party_name_td = disposition_row.css_first("td.countpartyname nobr") 48 | if party_name_td: 49 | count_info["party"] = party_name_td.text().strip() 50 | 51 | count_disposition_td = disposition_row.css_first("td.countdisposition") 52 | if count_disposition_td: 53 | count_disposition_text = count_disposition_td.html 54 | count_info["disposed"] = extract_disposed_value(count_disposition_text) 55 | if count_as_disposed := extract_count_as_disposed(count_disposition_text): 56 | count_info["description"] = count_as_disposed 57 | return count_info 58 | 59 | def counts(oscn_html): 60 | count_list = MetaList() 61 | tree = HTMLParser(oscn_html) 62 | if counts := tree.css("div.CountsContainer"): 63 | for count in counts: 64 | count_list.add_text(count.text(separator=" ")) 65 | count_info = parse_count_container(count) 66 | if count_info: 67 | count_list.append(count_info) 68 | else: 69 | count_start = tree.css_first("h2.section.counts") 70 | next_element = count_start.next 71 | while next_element.tag != "h2": 72 | if next_element.tag == "p": 73 | count_description = clean_string(next_element.text()) 74 | count_list.append({"description": count_description}) 75 | 76 | next_element = next_element.next 77 | return count_list 78 | 79 | setattr(counts, "target", ["Case"]) 80 | setattr(counts, "_default_value", []) 81 | -------------------------------------------------------------------------------- /oscn/parse/lax_docket.py: -------------------------------------------------------------------------------- 1 | from selectolax.parser import HTMLParser 2 | from ._helpers import clean_string, MetaList 3 | 4 | 5 | def docket(oscn_html): 6 | tree = HTMLParser(oscn_html) 7 | docket_table = tree.css_first("table.docketlist") 8 | if not docket_table: 9 | return MetaList() 10 | 11 | thead = docket_table.css_first("thead") 12 | if not thead: 13 | return MetaList() 14 | 15 | columns = [th.text().strip().lower() for th in thead.css("th")] 16 | 17 | rows = docket_table.css("tbody tr") 18 | minutes = MetaList() 19 | minutes.text = docket_table.text(separator=" ") 20 | 21 | saved_date = "" 22 | for row in rows: 23 | cells = row.css("td") 24 | values = [clean_string(td.text()) if td.text().strip() else "" for td in cells] 25 | minute = dict(zip(columns, values)) 26 | minute["html"] = "".join(line.strip() for line in row.html.splitlines()) 27 | minute["date"] = minute["date"] or saved_date 28 | saved_date = minute["date"] or saved_date 29 | minutes.append(minute) 30 | 31 | return minutes 32 | 33 | setattr(docket, "target", ["Case"]) 34 | setattr(docket, "_default_value", []) -------------------------------------------------------------------------------- /oscn/parse/lax_events.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | from selectolax.parser import HTMLParser 4 | from ._helpers import lists2dict, clean_string, MetaList 5 | from unicodedata import normalize 6 | 7 | def get_events(json_string: str) -> list[dict]: 8 | json_string = normalize('NFKD', json_string) 9 | date_values = [] 10 | description_values = [] 11 | 12 | date_pattern = r'"date"\s*:\s*"(.*?)"' 13 | date_matches = re.findall(date_pattern, json_string) 14 | date_values.extend(date_matches) 15 | 16 | description_pattern = r'"description"\s*:\s*"(.*?)"' 17 | description_matches = re.findall(description_pattern, json_string) 18 | description_values.extend(description_matches) 19 | 20 | events = [ 21 | {"date": date, "description": description} 22 | for date, description in zip(date_values, description_values) 23 | ] 24 | 25 | return events 26 | 27 | 28 | def column_names(events_table): 29 | thead = events_table.css_first('thead') 30 | if thead: 31 | return [th.text().strip().lower() for th in thead.css('th')] 32 | return [] 33 | 34 | def events(oscn_html): 35 | tree = HTMLParser(oscn_html) 36 | events = MetaList() 37 | # Extract JSON events from the script tag if present 38 | json_script = tree.css_first("script#json_events") 39 | if json_script: 40 | if events := get_events(json_script.text().strip()): 41 | return events 42 | 43 | # Proceed with table extraction if JSON is not found 44 | events_section = tree.css_first('h2.section.events') 45 | if not events_section: 46 | return events 47 | 48 | # Find the events table or stop at the next section 49 | next_element = events_section.next 50 | while next_element: 51 | if next_element.tag == 'table': 52 | events_table = next_element 53 | break 54 | if next_element.tag == 'h2': 55 | return events 56 | next_element = next_element.next 57 | else: 58 | return events 59 | 60 | # Extract table content 61 | events.text = events_table.text(separator=" ") 62 | event_keys = column_names(events_table) 63 | rows = events_table.css('tbody tr') 64 | for row in rows: 65 | cells = row.css('td') 66 | values = [clean_string(td.text()) for td in cells] 67 | event = lists2dict(event_keys, values) 68 | event_description_td = row.css_first('td.event_description') 69 | if event_description_td: 70 | event_font = event_description_td.css_first('font') 71 | if event_font: 72 | event_date = event_font.text().strip() 73 | event["date"] = event_date 74 | event["description"] = event_description_td.text().strip() 75 | events.append(event) 76 | 77 | return events 78 | 79 | # add this attribute to allow it to be added to request objects 80 | setattr(events, "target", ["Case"]) 81 | setattr(events, "_default_value", []) -------------------------------------------------------------------------------- /oscn/parse/lax_issues.py: -------------------------------------------------------------------------------- 1 | import re 2 | from selectolax.parser import HTMLParser 3 | 4 | from ._helpers import clean_string, MetaList 5 | 6 | issue_keys = ["Filed Date", "Filed By", "Issue"] 7 | party_keys = ["Defendant", "Plaintiff", "Respondent", "Disposed"] 8 | 9 | 10 | def find_values(node, key_names): 11 | """ 12 | Find key word in node_text and return a dictionary of key-value pairs. 13 | """ 14 | values = {} 15 | node_text = node.text() 16 | 17 | # Create a single regex pattern to match each key followed by its value 18 | key_pattern = "|".join(re.escape(key) for key in key_names) 19 | pattern = rf"({key_pattern}):\s*(.*?)(?=(?:\s*(?:{key_pattern})|$))" 20 | 21 | # Find all matches for key-value pairs in the text 22 | matches = re.finditer(pattern, node_text, re.DOTALL) 23 | 24 | for match in matches: 25 | key = match.group(1) 26 | value = clean_string(match.group(2).strip()) 27 | values[key] = value 28 | 29 | # Ensure all keys are present in the result, even if not found in the text 30 | for key in key_names: 31 | if key not in values: 32 | values[key] = "" 33 | 34 | return values 35 | 36 | 37 | def next_tag(node, tag): 38 | next_node = node.next 39 | while next_node: 40 | if next_node.tag == tag: 41 | return next_node 42 | next_node = next_node.next 43 | return None 44 | 45 | 46 | def issues(oscn_html): 47 | issue_list = MetaList() 48 | tree = HTMLParser(oscn_html) 49 | issues_header = tree.css_first("h2.section.issues") 50 | if not issues_header: 51 | return issue_list 52 | 53 | issue_table = next_tag(issues_header, "table") 54 | docket_header = tree.css_first("h2.section.dockets") 55 | docket_table = next_tag(docket_header, "table") 56 | 57 | if (docket_table == issue_table): 58 | next_element = issues_header.next 59 | while next_element != docket_header: 60 | if next_element.tag == "p": 61 | issue_list.append(clean_string(next_element.text())) 62 | next_element = next_element.next 63 | else: 64 | issue_list.text = issue_table.text() 65 | while issue_table and "Issue #" in issue_table.text(): 66 | issue_dict = find_values(issue_table, issue_keys) 67 | disp_table = next_tag(issue_table, "table") 68 | if disp_table != docket_table: 69 | issue_list.add_text(disp_table.text()) 70 | name_details = disp_table.css("td.countpartyname") 71 | dispositions = disp_table.css("td.countdisposition") 72 | issue_dict["parties"] = [] 73 | for name_detail, disposition in zip(name_details, dispositions): 74 | name_detail_text = clean_string(name_detail.text()) 75 | if not name_detail_text: 76 | continue 77 | if ":" in name_detail_text: 78 | party_type, party_name = clean_string(name_detail.text()).split( 79 | ": " 80 | ) 81 | else: 82 | party_type = "" 83 | party_name = name_detail_text 84 | disposition_text = disposition.text() 85 | disposed = ( 86 | disposition_text.split(": ", 1)[1] 87 | if (":" in disposition_text) 88 | else disposition.text() 89 | ) 90 | party = { 91 | "type": clean_string((party_type).lower()), 92 | "name": clean_string(party_name), 93 | "disposed": clean_string(disposed), 94 | } 95 | issue_dict["parties"].append(party) 96 | issue_list.append(issue_dict) 97 | issue_table = next_tag(disp_table, "table") 98 | 99 | return issue_list 100 | 101 | 102 | # add this attribute to allow it to be added to request objects 103 | setattr(issues, "target", ["Case"]) 104 | setattr(issues, "_default_value", []) 105 | -------------------------------------------------------------------------------- /oscn/parse/lax_parties.py: -------------------------------------------------------------------------------- 1 | import urllib.parse 2 | from selectolax.parser import HTMLParser 3 | from ._helpers import clean_string, MetaList 4 | 5 | def get_party_info(link): 6 | href = link.attributes.get("href", "") 7 | party_id = href.split("id=")[-1] if "id=" in href else "" 8 | return party_id, href 9 | 10 | def parties(oscn_html): 11 | party_list = MetaList() 12 | tree = HTMLParser(oscn_html) 13 | 14 | section_header = tree.css_first('h2.section.party') 15 | if not section_header: 16 | return party_list 17 | 18 | # Find the next paragraph or stop at the next section header 19 | next_element = section_header.next 20 | while next_element: 21 | if next_element.tag == 'p': 22 | party_paragraph = next_element 23 | break 24 | if next_element.tag == 'h2': 25 | return party_list 26 | next_element = next_element.next 27 | else: 28 | return party_list 29 | party_list.text = party_paragraph.text().strip() 30 | # Extract party details from spans within the paragraph 31 | party_spans = party_paragraph.css('span.parties_party') 32 | for party_span in party_spans: 33 | name_span = party_span.css_first('a.parties_partyname, span.parties_partyname') 34 | type_span = party_span.css_first('span.parties_type') 35 | 36 | if not name_span or not type_span: 37 | continue 38 | 39 | name = clean_string(name_span.text()) 40 | if name.lower() == 'and': 41 | continue # Skip 'AND' entries 42 | 43 | party_id, href = get_party_info(name_span) if name_span.tag == 'a' else ("", "") 44 | party_type = clean_string(type_span.text()) 45 | 46 | party_list.append({ 47 | "name": name, 48 | "type": party_type, 49 | "id": party_id, 50 | "href": href, 51 | }) 52 | return party_list 53 | 54 | # add this attribute to allow it to be added to request objects 55 | setattr(parties, "target", ["Case"]) 56 | setattr(parties, "_default_value", []) -------------------------------------------------------------------------------- /oscn/parse/parties.py: -------------------------------------------------------------------------------- 1 | import urllib.parse 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | from ._helpers import clean_string, MetaList 6 | 7 | 8 | def get_party_id(link): 9 | href = link["href"] 10 | url = urllib.parse.urlparse(href) 11 | params = urllib.parse.parse_qs(url.query) 12 | try: 13 | party_id = params["id"][0] 14 | return party_id 15 | except KeyError: 16 | return "" 17 | 18 | 19 | def bs4_parties(oscn_html): 20 | names = [] 21 | types = [] 22 | party_ids = [] 23 | 24 | soup = BeautifulSoup(oscn_html, "html.parser") 25 | start = soup.find("h2", "section party") 26 | party_p = start.find_next_sibling("p") 27 | party_links = party_p.find_all("a") 28 | named_parties = MetaList() 29 | named_parties.text = party_p.get_text(separator=" ") 30 | 31 | if party_links: 32 | names = [link.text for link in party_links] 33 | party_ids = [get_party_id(link) for link in party_links] 34 | party_types = [link.find_next_sibling("span") for link in party_links] 35 | types = [party_type.text for party_type in party_types] 36 | 37 | else: 38 | 39 | def get_name_and_type(string): 40 | # separates a line like this into name and type 41 | # HEFFLIN,\xa0 ASHLEY\xa0 LAUREN,\r\nRespondent' 42 | # import ipdb; ipdb.set_trace() # fmt: skip 43 | more_strings = string.split(",") 44 | # take the last item off the list 45 | get_type = more_strings.pop(-1) 46 | # put it back together 47 | name = ",".join(more_strings) 48 | return name, get_type 49 | 50 | for party_text in party_p.strings: 51 | name, party_type = get_name_and_type(party_text) 52 | names.append(name) 53 | types.append(party_type) 54 | party_ids.append("") 55 | 56 | def Party(name, type_string, id_param): 57 | return { 58 | "name": clean_string(name), 59 | "type": clean_string(type_string), 60 | "id": id_param, 61 | } 62 | 63 | raw_parties = map(Party, names, types, party_ids) 64 | 65 | for party in raw_parties: 66 | if party["name"]: 67 | named_parties.append(party) 68 | 69 | return named_parties 70 | -------------------------------------------------------------------------------- /oscn/parse/party_addresses.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from ._helpers import clean_string 4 | 5 | 6 | def addresses(oscn_html): 7 | address_list = [] 8 | get_text = lambda el: clean_string(el.text) 9 | 10 | soup = BeautifulSoup(oscn_html, "html.parser") 11 | 12 | table = soup.find("table", "partytable addresses") 13 | header = table.thead.find_all("th") 14 | keys = list(map(get_text, header)) 15 | 16 | for row in table.tbody.find_all("tr"): 17 | data = row.find_all("td") 18 | values = map(get_text, data) 19 | new_dict = {k: v for k, v in zip(keys, values)} 20 | address_list.append(new_dict) 21 | 22 | return address_list 23 | 24 | 25 | setattr(addresses, "target", ["Party"]) 26 | setattr(addresses, "_default_value", []) 27 | -------------------------------------------------------------------------------- /oscn/parse/party_profile.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from ._helpers import table2dict 4 | 5 | 6 | def profile(oscn_html): 7 | soup = BeautifulSoup(oscn_html, "html.parser") 8 | 9 | profile_table = soup.find("table", "partytable personal") 10 | 11 | profile_dict = table2dict(profile_table) 12 | 13 | return profile_dict 14 | 15 | 16 | setattr(profile, "target", ["Party"]) 17 | setattr(profile, "_default_value", {}) 18 | -------------------------------------------------------------------------------- /oscn/parse/party_properties.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from ._helpers import table2dict 4 | 5 | 6 | def make_property_finder(table_class, key): 7 | def find_property(oscn_html): 8 | soup = BeautifulSoup(oscn_html, "html.parser") 9 | table = soup.find("table", table_class) 10 | property_dict = table2dict(table) 11 | return property_dict[key] 12 | 13 | return find_property 14 | 15 | 16 | find_name = make_property_finder("partytable partymain", "Requested Party") 17 | find_name.__name__ = "name" 18 | setattr(find_name, "target", ["Party"]) 19 | setattr(find_name, "_default_value", "") 20 | 21 | 22 | find_alias = make_property_finder("partytable partymain", "Alias or Alternate Names") 23 | find_alias.__name__ = "alias" 24 | setattr(find_alias, "target", ["Party"]) 25 | setattr(find_alias, "_default_value", "") 26 | 27 | 28 | find_birthmonth = make_property_finder("partytable personal", "Birth Month and Year") 29 | find_birthmonth.__name__ = "birth_month" 30 | setattr(find_birthmonth, "target", ["Party"]) 31 | setattr(find_birthmonth, "_default_value", "") 32 | -------------------------------------------------------------------------------- /oscn/parse/style.py: -------------------------------------------------------------------------------- 1 | from selectolax.parser import HTMLParser 2 | from unicodedata import normalize 3 | 4 | def clean_string(some_string): 5 | # Normalize unicode characters 6 | normal_str = normalize("NFKD", some_string) 7 | # Remove all types of whitespace by splitting and rejoining 8 | condensed = ' '.join(normal_str.split()) 9 | return condensed 10 | 11 | def style(oscn_html): 12 | tree = HTMLParser(oscn_html) 13 | if style_table := tree.css('table.caseStyle'): 14 | if style_cell := style_table[0].css_first('td'): 15 | if text := style_cell.text(): 16 | return clean_string(text) 17 | return "" 18 | 19 | setattr(style, "target", ["Case"]) 20 | setattr(style, "_default_value", "") 21 | -------------------------------------------------------------------------------- /oscn/request/__init__.py: -------------------------------------------------------------------------------- 1 | from .cases import CaseList, Case 2 | from .parties import Party 3 | from .docket import Docket 4 | -------------------------------------------------------------------------------- /oscn/request/cases.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import errno 4 | from io import BytesIO 5 | import gzip 6 | import json 7 | 8 | from types import FunctionType 9 | 10 | import logging 11 | import warnings 12 | 13 | import requests 14 | from requests.exceptions import ConnectionError 15 | 16 | import boto3 17 | import botocore 18 | 19 | from .. import settings 20 | from ..parse import append_parsers 21 | 22 | # Initialize s3 and s3_client as None by default 23 | s3 = None 24 | s3_client = None 25 | 26 | # Check if all required AWS credentials are set 27 | required_aws_vars = ["AWS_DEFAULT_REGION", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] 28 | 29 | if all(os.getenv(var) for var in required_aws_vars): 30 | try: 31 | s3 = boto3.resource("s3") 32 | s3_client = boto3.client("s3") 33 | except botocore.exceptions.ClientError: 34 | pass 35 | except botocore.exceptions.NoCredentialsError: 36 | pass 37 | 38 | oscn_url = settings.OSCN_CASE_URL 39 | warnings.filterwarnings("ignore") 40 | logger = logging.getLogger("oscn") 41 | 42 | logger.setLevel(logging.INFO) 43 | 44 | 45 | # regex for index parsing 46 | 47 | get_court = re.compile(r"^(?P\w+)-") 48 | get_type = re.compile(r"-(?P\w+)-") 49 | get_year = re.compile(r"-(?P\d{4})-") 50 | get_number = re.compile(r"0*(?P\d+\w*)$") 51 | 52 | 53 | # This decorators adds properties to the OSCNrequest as a shortcut 54 | # for parsing. This allows access to parse results such as: 55 | # name = Case.judge 56 | # or 57 | # counts = Case.counts 58 | @append_parsers 59 | class Case(object): 60 | headers = settings.OSCN_REQUEST_HEADER 61 | response = False 62 | 63 | def __init__( 64 | self, 65 | index=False, 66 | type=None, 67 | county=None, 68 | year=None, 69 | number=None, 70 | cmid=False, 71 | **kwargs, 72 | ): 73 | 74 | if index: 75 | self.county = get_court.match(index).group("court") 76 | self.number = get_number.search(index).group("number") 77 | 78 | try: 79 | self.type = get_type.search(index).group("type") 80 | except AttributeError as exc: 81 | self.type = None 82 | 83 | try: 84 | self.year = get_year.search(index).group("year") 85 | except AttributeError as exc: 86 | self.year = None 87 | 88 | else: 89 | self.county = county 90 | self.year = year 91 | self.number = number 92 | self.type = type 93 | 94 | if hasattr(self, "type"): 95 | self.cmid = self.type == "cmid" 96 | else: 97 | self.cmid = False 98 | 99 | self.source = kwargs["source"] if "source" in kwargs else False 100 | 101 | if "text" in kwargs: 102 | self.text = kwargs["text"] 103 | return 104 | else: 105 | self.directory = kwargs["directory"] if "directory" in kwargs else "" 106 | self.bucket = kwargs["bucket"] if "bucket" in kwargs else "" 107 | if self.directory: 108 | self._open_file() 109 | elif self.bucket: 110 | self._open_s3_object() 111 | else: 112 | # default for test 113 | self.text = "" 114 | self._request() 115 | 116 | @property 117 | def oscn_number(self): 118 | key_names = ["type", "year", "number"] 119 | number_parts = [] 120 | for ky in key_names: 121 | if hasattr(self, ky): 122 | ky_val = getattr(self, ky) 123 | if ky_val: 124 | number_parts.append(str(getattr(self, ky))) 125 | return "-".join(number_parts) 126 | 127 | @property 128 | def index(self): 129 | return f"{self.county}-{self.oscn_number}" 130 | 131 | @property 132 | def inner_path(self): 133 | path_parts = [] 134 | key_names = ["county", "type", "year"] 135 | for ky in key_names: 136 | if hasattr(self, ky): 137 | path_parts.append(getattr(self, ky)) 138 | return "/".join(path_parts) 139 | 140 | @property 141 | def path(self): 142 | return f"{self.directory}/{self.inner_path}" 143 | 144 | @property 145 | def file_name(self): 146 | return f"{self.path}/{self.number}.zip" 147 | 148 | @property 149 | def s3_key(self): 150 | return f"{self.inner_path}/{self.number}.zip" 151 | 152 | def save(self, **kwargs): 153 | case_data = { 154 | "source": self.source, 155 | "index": self.index, 156 | "text": self.text, 157 | } 158 | file_data = gzip.compress(bytes(json.dumps(case_data), "utf-8")) 159 | 160 | self.directory = kwargs["directory"] if "directory" in kwargs else "" 161 | self.bucket = kwargs["bucket"] if "bucket" in kwargs else "" 162 | if self.directory: 163 | if not os.path.exists(os.path.dirname(self.file_name)): 164 | try: 165 | os.makedirs(os.path.dirname(self.file_name)) 166 | except OSError as exc: # Guard against race condition 167 | if exc.errno != errno.EEXIST: 168 | raise 169 | with open(self.file_name, "wb") as open_file: 170 | open_file.write(file_data) 171 | 172 | if self.bucket and s3 is not None: 173 | try: 174 | s3.meta.client.head_bucket(Bucket=self.bucket) 175 | except botocore.exceptions.ClientError as e: 176 | error_code = e.response["Error"]["Code"] 177 | if error_code == "404" or error_code == "403": 178 | s3.create_bucket(Bucket=self.bucket) 179 | else: 180 | raise e 181 | s3.Bucket(self.bucket).put_object(Key=self.s3_key, Body=file_data) 182 | 183 | def _open_file(self): 184 | try: 185 | with gzip.GzipFile(self.file_name, "r") as open_file: 186 | saved_data = json.loads(open_file.read().decode("utf-8")) 187 | self.__init__(**saved_data) 188 | self.valid = True 189 | except FileNotFoundError: 190 | self.valid = False 191 | 192 | def _open_s3_object(self): 193 | # If S3 client is not available, mark as invalid and return 194 | if s3_client is None: 195 | self.valid = False 196 | return 197 | 198 | try: 199 | s3_object = s3_client.get_object(Bucket=self.bucket, Key=self.s3_key) 200 | bytes = BytesIO(s3_object["Body"].read()) 201 | unzipped_stream = ( 202 | gzip.GzipFile(None, "rb", fileobj=bytes).read().decode("utf-8") 203 | ) 204 | saved_data = json.loads(unzipped_stream) 205 | self.__init__(**saved_data) 206 | self.valid = True 207 | except botocore.exceptions.ClientError as e: 208 | error_code = e.response["Error"]["Code"] 209 | if error_code == "NoSuchKey": 210 | self.valid = False 211 | else: 212 | raise e 213 | 214 | def _valid_response(self, response): 215 | if not response.ok: 216 | return False 217 | for msg in settings.INVALID_CASE_MESSAGES: 218 | if msg in response.text: 219 | logger.info("Case %s is invalid", self.oscn_number) 220 | return False 221 | return True 222 | 223 | def _request(self, attempts_left=settings.MAX_EMPTY_CASES): 224 | params = {"db": self.county} 225 | if self.cmid: 226 | params["cmid"] = self.number 227 | else: 228 | params["number"] = self.oscn_number 229 | 230 | try: 231 | response = requests.post( 232 | oscn_url, params, headers=self.headers, verify=False 233 | ) 234 | except ConnectionError: 235 | if attempts_left > 0: 236 | return self._request(attempts_left=attempts_left - 1) 237 | else: 238 | raise ConnectionError 239 | 240 | if self._valid_response(response): 241 | self.valid = True 242 | self.source = f"{response.url}" 243 | self.text = response.text 244 | for msg in settings.UNUSED_CASE_MESSAGES: 245 | if msg in response.text: 246 | self.valid = False 247 | return 248 | else: 249 | self.valid = False 250 | 251 | 252 | class CaseList(object): 253 | def __init__(self, types=[], counties=[], years=[], start=1, stop=20000, **kwargs): 254 | 255 | self.start = start 256 | self.stop = stop 257 | self.filters = [] 258 | 259 | # allow kwargs to override certain args 260 | self.types = kwargs["type"] if "type" in kwargs else types 261 | self.counties = kwargs["county"] if "county" in kwargs else counties 262 | self.years = kwargs["year"] if "year" in kwargs else years 263 | 264 | # Allow passing a string to list keywords 265 | # make a str into a single element list otherwise return the value 266 | str_to_list = lambda val: [val] if type(val) is str else val 267 | self.types = str_to_list(self.types) 268 | self.counties = str_to_list(self.counties) 269 | self.years = str_to_list(self.years) 270 | 271 | # create case request based on storage option 272 | if "directory" in kwargs: 273 | self._request_case = self._make_case_requester( 274 | directory=kwargs["directory"] 275 | ) 276 | elif "bucket" in kwargs: 277 | self._request_case = self._make_case_requester(bucket=kwargs["bucket"]) 278 | else: 279 | self._request_case = self._make_case_requester() 280 | 281 | self.all_cases = self._case_generator() 282 | 283 | def _make_case_requester(self, **kwargs): 284 | def case_request(index=False): 285 | kwargs["index"] = index 286 | return Case(**kwargs) 287 | 288 | return case_request 289 | 290 | def _request_generator(self, start, stop): 291 | case_numbers = range(start, stop + 1) 292 | for county in self.counties: 293 | for case_type in self.types: 294 | for year in self.years: 295 | self.exit_year = False 296 | for num in case_numbers: 297 | case_index = f"{county}-{case_type}-{year}-{num}" 298 | yield self._request_case(case_index) 299 | if self.exit_year: 300 | break 301 | 302 | def _case_generator(self): 303 | request_attempts = 10 304 | for case in self._request_generator(self.start, self.stop): 305 | if case.valid: 306 | request_attempts = 10 307 | if case.cmids: 308 | for cmid in case.cmids: 309 | cmid_index = f"{case.county}-cmid-{case.year}-{cmid}" 310 | cmid_case = self._request_case(cmid_index) 311 | if cmid_case.valid: 312 | if self._passes_filters(cmid_case): 313 | yield cmid_case 314 | elif self._passes_filters(case): 315 | yield case 316 | else: 317 | if request_attempts > 0: 318 | request_attempts -= 1 319 | else: 320 | self.exit_year = True 321 | 322 | def __iter__(self): 323 | return self 324 | 325 | def __next__(self): 326 | return next(self.all_cases) 327 | 328 | def _passes_filters(self, case): 329 | def does_it_pass(filter): 330 | target, test = filter 331 | target_value = getattr(case, target) 332 | if isinstance(test, str): 333 | return test in target_value 334 | elif isinstance(test, FunctionType): 335 | return test(target_value) 336 | 337 | # run the tests 338 | test_results = map(does_it_pass, self.filters) 339 | 340 | # see if they are all true 341 | return all(test_results) 342 | 343 | def find(self, **kwargs): 344 | for kw in kwargs: 345 | self.filters.append((kw, kwargs[kw])) 346 | return self 347 | -------------------------------------------------------------------------------- /oscn/request/docket.py: -------------------------------------------------------------------------------- 1 | from .. import settings 2 | from ..parse import append_parsers 3 | 4 | from oscn._meta import docket_get 5 | 6 | 7 | @append_parsers 8 | class Docket(object): 9 | def __init__(self, judge_id, start_date): 10 | self.id = judge_id 11 | self.start_date = start_date 12 | self._request() 13 | 14 | def _request(self): 15 | response = docket_get(self.id, self.start_date) 16 | self.text = response.text 17 | -------------------------------------------------------------------------------- /oscn/request/parties.py: -------------------------------------------------------------------------------- 1 | from .. import settings 2 | from ..parse import append_parsers 3 | 4 | from oscn._meta import party_get 5 | 6 | 7 | # ?db=tulsa&cn=CF-2020-1&id=12576087 8 | 9 | 10 | @append_parsers 11 | class Party(object): 12 | def __init__(self, party_id, db="oklahoma"): 13 | self.id = party_id 14 | self.db = db 15 | self._request() 16 | 17 | def _request(self): 18 | response = party_get(self.id, self.db) 19 | self.source = response.request.url 20 | self.text = response.text 21 | -------------------------------------------------------------------------------- /oscn/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4>=4.11.1 2 | boto3>=1.24.48 3 | requests>=2.28.1 4 | python-decouple>=3.7 5 | selectolax==0.3.21 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ipdb==0.12.2 2 | twine==1.13.0 3 | pytest==4.3.1 4 | wheel==0.34.2 5 | black==21.7b0 6 | -------------------------------------------------------------------------------- /scripts/docket_test.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | 4 | import oscn 5 | 6 | test_for = re.compile(r"reappear", re.I | re.M) 7 | 8 | 9 | def find_disp(html_doc): 10 | if test_for.search(html_doc): 11 | return True 12 | else: 13 | sys.stdout.write(".") 14 | sys.stdout.flush() 15 | 16 | return False 17 | 18 | 19 | # define the Case attr to test and the function to use 20 | 21 | cases = oscn.request.CaseList( 22 | county="tulsa", type="CF", year="2017", start=1001, stop=2000 23 | ) 24 | 25 | for c in cases: 26 | docket = c.docket 27 | print(docket.text) 28 | -------------------------------------------------------------------------------- /scripts/events.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | cases = [ 5 | "tulsa-CF-2018-01048", 6 | "tulsa-CJ-2016-3196", 7 | "tulsa-CJ-2018-4616", 8 | "tulsa-CV-2019-0012", 9 | "mayes-CJ-2018-0104", 10 | ] 11 | 12 | for case_index in cases: 13 | case = oscn.request.Case(case_index) 14 | print(case.events) 15 | print(case.source) 16 | print("---") 17 | -------------------------------------------------------------------------------- /scripts/example.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | # # single case request 4 | case = oscn.request.Case(county="mayes", year="2018", number=11) 5 | print(f"judge: {case.judge}") 6 | print(f"filed: {case.filed}") 7 | print(f"parties: {case.parties}") 8 | 9 | for min in case.docket: 10 | print(min) 11 | 12 | # # all cases for a one county and year 13 | cases = oscn.request.CaseList(county="tulsa", year="2017") 14 | # 15 | # 16 | # # multiple types, multiple counties, or multiple years 17 | types = ["CM", "CF"] 18 | counties = ["tulsa", "adair", "bryan"] 19 | years = ["2016", "2017"] 20 | cases = oscn.request.CaseList(county=counties, year=years, start=5, stop=7) 21 | 22 | 23 | for case in cases: 24 | print(f"county: {case.county} number: {case.oscn_number}") 25 | 26 | 27 | # example using find on CaseList 28 | 29 | cases = oscn.request.CaseList(county="bryan", year="2018", stop=60) 30 | 31 | # Create a test function 32 | count_text = "OBSTRUCT" 33 | 34 | 35 | def count_test(counts): 36 | for count in counts: 37 | if count_text in count["description"]: 38 | return True 39 | return False 40 | 41 | 42 | # define the Case attr to test and the function to use 43 | cases.find(counts=count_test) 44 | 45 | # this will print any cases with OBSTRUCT in the counts 46 | for case in cases: 47 | print(f"case: {case.oscn_number}") 48 | print(f"source: {case.source}") 49 | -------------------------------------------------------------------------------- /scripts/find-attorneys.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import csv 4 | 5 | import oscn 6 | 7 | counties = ["tulsa", "cimarron", "adair", "delaware"] 8 | years = ["2010"] 9 | 10 | 11 | for county in counties: 12 | csv_file = open(f"data/{county}-attorneys.csv", "w") 13 | # if this breaks, you may need to mkdir data 14 | writer = csv.writer(csv_file, delimiter=",") 15 | 16 | for year in years: 17 | sys.stdout.write(f"{county} {year}") 18 | case_iter = oscn.request.CaseList(county=county, year=year, stop=25) 19 | for case in case_iter: 20 | sys.stdout.write(case.oscn_number) 21 | sys.stdout.flush() 22 | writer.writerow([year, county, case.oscn_number]) 23 | writer.writerow(case.attorneys) 24 | sys.stdout.write(".") 25 | sys.stdout.flush() 26 | 27 | csv_file.close() 28 | -------------------------------------------------------------------------------- /scripts/find-counts.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import csv 4 | 5 | import oscn 6 | 7 | 8 | counties = ["adair", "washington"] 9 | years = ["2018"] 10 | 11 | find_descriptions = ["OBSTR", "RESIST"] 12 | 13 | 14 | def count_test(desc): 15 | for text in find_descriptions: 16 | if text in desc: 17 | return True 18 | return False 19 | 20 | 21 | csv_file = open("data/find-counts.csv", "w") 22 | writer = csv.writer(csv_file, delimiter=",") 23 | writer.writerow(["year", "county", "case", "description"]) 24 | 25 | cases = oscn.request.CaseList(county=counties, year=years) 26 | 27 | for case in cases: 28 | count = 0 29 | sys.stdout.write(case.oscn_number) 30 | sys.stdout.flush() 31 | for count in case.counts: 32 | if count_test(count["description"]): 33 | writer.writerow( 34 | [case.year, case.county, case.oscn_number, count["description"]] 35 | ) 36 | sys.stdout.write("*") 37 | else: 38 | sys.stdout.write(".") 39 | sys.stdout.flush() 40 | 41 | csv_file.close() 42 | -------------------------------------------------------------------------------- /scripts/find_party.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | # searhing by name returns a list of CaseList 5 | 6 | cases = oscn.find.party(first="Jill", last="Webb") 7 | 8 | 9 | c = next(cases) 10 | 11 | import ipdb 12 | 13 | ipdb.set_trace() 14 | print(c.case_index) 15 | print(c.counts) 16 | -------------------------------------------------------------------------------- /scripts/parse_test.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | fp = open("examples/number_not_used.html") 4 | 5 | find = oscn.parse.judge(fp.read()) 6 | 7 | print(find) 8 | 9 | x = oscn.request.Case(type="CF", county="cimarron", year="2017", number=122) 10 | 11 | cases = oscn.request.CaseList(type="CF", county="cimarron", year="2018") 12 | 13 | for case in cases: 14 | print(f"case: {case.number} found: {case.judge}") 15 | -------------------------------------------------------------------------------- /scripts/save-counts.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import csv 4 | 5 | import oscn 6 | 7 | counties = ["washington", "cimarron", "beckham"] 8 | years = ["2017", "2018"] 9 | 10 | cases = oscn.request.CaseList(county=counties, year=years) 11 | 12 | for case in cases: 13 | csv_file = open(f"data/counts.csv", "w") 14 | writer = csv.writer(csv_file, delimiter=",") 15 | writer.writerow(["year", "county", "case", "judge", "description", "source"]) 16 | sys.stdout.write(case.oscn_number) 17 | sys.stdout.flush() 18 | for count in case.counts: 19 | writer.writerow( 20 | [ 21 | case.year, 22 | case.county, 23 | case.oscn_number, 24 | case.judge, 25 | count["description"], 26 | case.source, 27 | ] 28 | ) 29 | sys.stdout.write(".") 30 | sys.stdout.flush() 31 | 32 | csv_file.close() 33 | -------------------------------------------------------------------------------- /scripts/save_cases.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | def save_cases(cases): 5 | for case in cases: 6 | print(case.index) 7 | case.save(bucket="oscn-test-data") 8 | case.save(directory="data") 9 | 10 | 11 | cases = oscn.request.CaseList(counties="bryan", years="2018", types="CF", stop=60) 12 | 13 | save_cases(cases) 14 | 15 | cases = oscn.request.CaseList( 16 | types="CM", counties="carter", years="2019", start=13, stop=16 17 | ) 18 | 19 | save_cases(cases) 20 | -------------------------------------------------------------------------------- /scripts/soup_test.py: -------------------------------------------------------------------------------- 1 | import re 2 | from bs4 import BeautifulSoup 3 | 4 | fp = open("examples/multi_count.html") 5 | soup = BeautifulSoup(fp, "html.parser") 6 | count_list = [] 7 | 8 | count_start = soup.find("h2", "section counts") 9 | next_sibling = count_start.find_next_sibling("p") 10 | while next_sibling.name != "h2": 11 | if next_sibling.name == "p": 12 | codetags = next_sibling.find_all("strong") 13 | for codetag in codetags: 14 | codetag.extract() 15 | count_list.append({"description": next_sibling.text.strip()}) 16 | next_sibling = next_sibling.next_sibling 17 | -------------------------------------------------------------------------------- /scripts/test_lists.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | # CM-2018-299 4 | 5 | # parties = case.parties 6 | 7 | # carter-CM-2019-14 has cmid references 8 | 9 | types = ["CM"] 10 | 11 | cases = oscn.request.CaseList( 12 | types=types, counties=["carter"], years=["2019"], start=13, stop=16 13 | ) 14 | 15 | print(f"---") 16 | 17 | for case in cases: 18 | print(f"case: {case.index}") 19 | case.save(directory="data") 20 | 21 | print(f"---") 22 | 23 | cases = oscn.request.CaseList( 24 | types=types, 25 | counties=["carter"], 26 | years=["2019"], 27 | start=13, 28 | stop=16, 29 | directory="data", 30 | ) 31 | 32 | 33 | for case in cases: 34 | print(f"case: {case.index}") 35 | case.save(bucket="oscn-test-data") 36 | 37 | print(f"---") 38 | 39 | cases = oscn.request.CaseList( 40 | types=types, 41 | counties=["carter"], 42 | years=["2019"], 43 | start=13, 44 | stop=16, 45 | bucket="oscn-test-data", 46 | ) 47 | 48 | for case in cases: 49 | print(f"case: {case.index}") 50 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setup( 7 | name="oscn", 8 | version="0.0.0.89", 9 | description="Oklahoma State Courts Network case parsing utilities", 10 | long_description=long_description, 11 | long_description_content_type="text/markdown", 12 | url="https://github.com/codefortulsa/oscn", 13 | author="John Dungan", 14 | author_email="john@docket2me.com", 15 | license="MIT", 16 | packages=find_packages(), 17 | zip_safe=False, 18 | install_requires=[ 19 | "requests", 20 | "beautifulsoup4", 21 | "boto3", 22 | "python-decouple", 23 | "selectolax", 24 | ], 25 | classifiers=[ 26 | "Programming Language :: Python :: 3", 27 | "Programming Language :: Python :: 3.8", 28 | "Programming Language :: Python :: 3.9", 29 | "Programming Language :: Python :: 3.10", 30 | "Programming Language :: Python :: 3.11", 31 | ], 32 | ) 33 | -------------------------------------------------------------------------------- /tests/test_alpha_casenumber.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | def dont_test_request_withalpha(): 5 | case_params = {"number": "181A", "county": "pittsburg", "year": "2003"} 6 | 7 | case = oscn.request.Case(**case_params) 8 | 9 | assert case.valid 10 | assert case.county == "pittsburg" 11 | assert case.type == "F" 12 | assert case.year == "2003" 13 | -------------------------------------------------------------------------------- /tests/test_alphacases.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | def test_case_with_alpha(): 5 | case_index = "stephens-CS-2021-00267R" 6 | 7 | case = oscn.request.Case(case_index) 8 | 9 | assert case.valid 10 | -------------------------------------------------------------------------------- /tests/test_appellate.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | valid_cases = [ 4 | "appellate-118615", 5 | "appellate-SCBD-6889", 6 | "appellate-DF-118613", 7 | "appellate-F-2021-229", 8 | "appellate-118610", 9 | "appellate-MA-118610", 10 | "appellate-118609", 11 | "appellate-PR-118611", 12 | "appellate-DF-118616", 13 | "appellate-MA-118618", 14 | "appellate-DF-118620", 15 | "appellate-IN-118622", 16 | "appellate-DF-118617", 17 | "appellate-DF-118619", 18 | "appellate-DF-118623", 19 | "appellate-DF-118625", 20 | "appellate-SCBD-6893", 21 | "appellate-SCBD-6894", 22 | "appellate-HC-118629", 23 | "appellate-PR-118630", 24 | "appellate-DF-118627", 25 | "appellate-SD-118628", 26 | "appellate-DF-118635", 27 | "appellate-PR-118633", 28 | "appellate-SCBD-6895", 29 | "appellate-SD-118634", 30 | "appellate-CI-118636", 31 | "appellate-PR-118640", 32 | "appellate-PR-118646", 33 | "appellate-DF-118645", 34 | "appellate-MA-118637", 35 | "appellate-PR-118639", 36 | "appellate-IN-118641", 37 | "appellate-CQ-118638", 38 | "appellate-118642", 39 | "appellate-SD-118644", 40 | "appellate-SD-118643", 41 | "appellate-118648", 42 | "appellate-CI-118647", 43 | "appellate-118649", 44 | "appellate-SCBD-6896", 45 | "appellate-DF-118650", 46 | "appellate-DF-118654", 47 | "appellate-DF-118653", 48 | "appellate-118652", 49 | "appellate-DF-118651", 50 | ] 51 | 52 | 53 | def test_appellate_types(): 54 | for indx in valid_cases: 55 | print(indx) 56 | case = oscn.request.Case(indx) 57 | assert case.valid 58 | -------------------------------------------------------------------------------- /tests/test_attorneys.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | import time 3 | 4 | from oscn import settings 5 | from oscn.parse.lax_attorneys import attorneys 6 | from oscn.parse.bs4_attorneys import bs4_attorneys 7 | 8 | OSCN_HEADER = settings.OSCN_REQUEST_HEADER 9 | 10 | def test_lax_versus_bs4(): 11 | cases = oscn.request.CaseList( 12 | types=["CJ","CM"], 13 | counties=["tulsa", "oklahoma","mayes","wagoner","creek","okmulgee","osage",], 14 | years=["2024","2018"], 15 | start=4, 16 | stop=5, 17 | ) 18 | 19 | 20 | for case in cases: 21 | bs4_result = bs4_attorneys(case.text) 22 | lax_result = attorneys(case.text) 23 | print("." * 100) 24 | print(f"Case: {case.source}") 25 | print(f"BS4: {bs4_result}") 26 | print(f"LAX: {lax_result}") 27 | print("case.attorneys: ", case.attorneys) 28 | assert case.attorneys == lax_result 29 | assert bs4_result == lax_result 30 | 31 | -------------------------------------------------------------------------------- /tests/test_body.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | def test_body_text(): 5 | case = oscn.request.Case("cleveland-CF-2016-84") 6 | 7 | body_text = case.body 8 | 9 | assert isinstance(body_text, str) 10 | assert "Judge: Walkley, Lori" in body_text 11 | assert " 0 else 0 44 | print(f"\nAverage Difference in Processing Time: {average_diff_percentage:.2f}%") 45 | -------------------------------------------------------------------------------- /tests/test_dates.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | class TestCaseDateFinders: 4 | 5 | def test_filed_date(self): 6 | # Set the HTML response containing the 'Filed' date 7 | html = """ 8 | WEBB, JON et al VS. CUPPS, GARY B et al 9 | 10 | No. CJ-2022-00141
11 | (Civil relief more than $10,000: JUDGMENT (DISMISSED)) 12 |


13 | Filed: 05/31/2022


Judge: PARISH, LAWRENCE
14 | """ 15 | case = oscn.request.Case(text=html) # Setting the HTML text during instantiation 16 | filed_date = case.filed 17 | assert filed_date == "05/31/2022" 18 | 19 | def test_closed_date(self): 20 | # Set the HTML response containing the 'Closed' date 21 | html = """ 22 | 23 | No. CM-2022-141
24 | (Criminal Misdemeanor) 25 |


26 | Filed: 01/13/2022
Closed: 03/09/2022

Judge: Traffic Court Judge (General)
27 | """ 28 | case = oscn.request.Case(text=html) # Setting the HTML text during instantiation 29 | closed_date = case.closed 30 | assert closed_date == "03/09/2022" 31 | 32 | def test_offense_date(self): 33 | # Set the HTML response containing the 'Date of Offense' 34 | html = """ 35 | Count as Filed: 36 | LA4, POSSESSION OF STOLEN VEHICLE, 37 | in violation of 47 O.S. 4-103
Date of Offense: 01/07/2022
38 | """ 39 | case = oscn.request.Case(text=html) # Setting the HTML text during instantiation 40 | offense_date = case.offense 41 | assert offense_date == "01/07/2022" 42 | -------------------------------------------------------------------------------- /tests/test_events.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | def test_events(): 5 | case = oscn.request.Case("tulsa-FD-2022-945") 6 | events = case.events 7 | first = events[0] 8 | # {'date': 'Wednesday, May 25, 2022 at 9:30 AM', 'description': 'Parenting Plan Conference'}, 9 | assert first['date'] == 'Wednesday, May 25, 2022 at 9:30 AM' 10 | assert first['description'] == 'Parenting Plan Conference' 11 | 12 | case = oscn.request.Case("tulsa-FD-2016-3013") 13 | events = case.events 14 | second = events[1] 15 | # {'date': 'Thursday, January 5, 2017 at 9:00 AM', 'description': 'Parenting Plan Conference'} 16 | assert second['date'] == 'Thursday, January 5, 2017 at 9:00 AM' 17 | assert second['description'] == 'Parenting Plan Conference' 18 | 19 | -------------------------------------------------------------------------------- /tests/test_find.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | search_for_text = "OBSTRUCT" 4 | known_good = ["bryan-CF-2018-4", "bryan-CF-2018-24", "bryan-CF-2018-51"] 5 | 6 | 7 | def test_find_text_in_text(): 8 | cases = oscn.request.CaseList(counties="bryan", years="2018", types="CF", stop=60) 9 | cases.find(text=search_for_text) 10 | 11 | found_index = [] 12 | for case in cases: 13 | found_index.append(case.index) 14 | assert case.county == "bryan" 15 | assert case.year == "2018" 16 | assert int(case.number) <= 60 17 | assert search_for_text in case.text 18 | 19 | assert found_index == known_good 20 | 21 | 22 | def test_find_function(): 23 | def count_test(counts): 24 | for count in counts: 25 | if search_for_text in count["description"]: 26 | return True 27 | return False 28 | 29 | cases = oscn.request.CaseList(counties="bryan", years="2018", types="CF", stop=60) 30 | cases.find(counts=count_test) 31 | 32 | found_index = [] 33 | for case in cases: 34 | found_index.append(case.index) 35 | assert case.county == "bryan" 36 | assert case.year == "2018" 37 | assert int(case.number) <= 60 38 | assert search_for_text in case.text 39 | assert found_index == known_good 40 | 41 | 42 | def test_find_bucket_text(): 43 | 44 | cases = oscn.request.CaseList( 45 | counties="bryan", years="2018", types="CF", stop=60, bucket="oscn-test-data" 46 | ) 47 | 48 | cases.find(text=search_for_text) 49 | 50 | found_index = [] 51 | 52 | for case in cases: 53 | found_index.append(case.index) 54 | assert case.county == "bryan" 55 | assert case.year == "2018" 56 | assert int(case.number) <= 60 57 | assert search_for_text in case.text 58 | assert found_index == known_good 59 | 60 | 61 | def test_find_directory_text(): 62 | 63 | cases = oscn.request.CaseList( 64 | counties="bryan", years="2018", types="CF", stop=60, directory="data" 65 | ) 66 | 67 | cases.find(text=search_for_text) 68 | 69 | found_index = [] 70 | for case in cases: 71 | found_index.append(case.index) 72 | assert case.county == "bryan" 73 | assert case.year == "2018" 74 | assert int(case.number) <= 60 75 | assert search_for_text in case.text 76 | assert found_index == known_good 77 | 78 | 79 | def test_find_multi_funcs(): 80 | source_calls = 0 81 | count_calls = 0 82 | case_count = 0 83 | 84 | def log_source(source): 85 | nonlocal source_calls 86 | source_calls = source_calls + 1 87 | return True 88 | 89 | def log_counts(counts): 90 | nonlocal count_calls 91 | count_calls += 1 92 | return True 93 | 94 | cases = oscn.request.CaseList( 95 | counties="bryan", years="2018", types="CF", stop=20, directory="data" 96 | ) 97 | 98 | cases.find(source=log_source, counts=log_counts) 99 | 100 | for case in cases: 101 | case_count += 1 102 | 103 | # import pdb; pdb.set_trace() 104 | 105 | assert source_calls == case_count 106 | assert count_calls == case_count 107 | assert source_calls == count_calls 108 | -------------------------------------------------------------------------------- /tests/test_get_party.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | class TestPartyProperties: 5 | def setup_class(self): 6 | case = oscn.request.Case("tulsa-CF-2020-1") 7 | defendant_id = case.parties[0]["id"] 8 | defendant = oscn.request.Party(defendant_id) 9 | self.defendant = defendant 10 | 11 | def test_get_party_dob(self): 12 | this_profile = self.defendant.profile 13 | dob = this_profile["Birth Month and Year"] 14 | assert dob == "05/1991" 15 | 16 | def test_get_party_name(self): 17 | name = self.defendant.name 18 | assert name == "JONES, STUART CHANCE" 19 | 20 | def test_get_party_alias(self): 21 | alias = self.defendant.alias 22 | assert alias == "JONES, STEWART CHANCE" 23 | 24 | def test_get_party_birthmonth(self): 25 | mob = self.defendant.birth_month 26 | assert mob == "05/1991" 27 | 28 | def test_party_addreseses(self): 29 | party_addresses = self.defendant.addresses 30 | assert len(party_addresses) == 5 31 | assert party_addresses[2]["Address"] == "TULSA, Oklahoma 74115" 32 | 33 | def test_party_source(self): 34 | party_source = self.defendant.source 35 | assert ( 36 | party_source 37 | == "https://www.oscn.net/dockets/GetPartyRecord.aspx?db=oklahoma&id=12576087" 38 | ) 39 | 40 | # SKIPPING TO GET FIXES TO EVENTS AND PARTIES PUBLSIHED 41 | # class TestDifferentDB: 42 | # def setup_class(self): 43 | # case = oscn.request.Case("kingfisher-CF-2018-16") 44 | # defendant_id = case.parties[0]["id"] 45 | # defendant = oscn.request.Party(defendant_id, case.county) 46 | # self.defendant = defendant 47 | 48 | # def test_party_db(self): 49 | # party_name = self.defendant.name 50 | # assert party_name == "HUDSON, PHILLIP JOSEPH" 51 | -------------------------------------------------------------------------------- /tests/test_issues.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | def test_two_issue_formats(): 5 | 6 | case1 = oscn.request.Case("choctaw-SC-2020-118") 7 | assert case1.issues[0] == "1. EVICTION SMALL CLAIMS(UNDER $5000.00)" 8 | 9 | case2 = oscn.request.Case("tulsa-SC-2020-118") 10 | assert case2.issues[0]["Issue"] == "FORCIBLE ENTRY & DETAINER <$5000.00. (SCFED1)" 11 | 12 | case3 = oscn.request.Case("oklahoma-CJ-2024-4") 13 | assert case3.issues[0]["Issue"] == "AUTO NEGLIGENCE (AUTONEG)" 14 | -------------------------------------------------------------------------------- /tests/test_judge_docket.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | class TestGetDocket: 5 | def setup_class(self): 6 | self.judge = 1208 7 | self.start_date = "07/28/2020" 8 | self.docket = oscn.request.Docket(self.judge, self.start_date) 9 | 10 | def test_get_case_index_list(self): 11 | all_cases = self.docket.cases 12 | 13 | def test_get_case_tables(self): 14 | 15 | all_cases = self.docket.tables 16 | -------------------------------------------------------------------------------- /tests/test_lax_docket.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | import time 3 | 4 | from oscn import settings 5 | from oscn.parse.lax_docket import docket 6 | from oscn.parse.bs4_docket import bs4_docket 7 | 8 | OSCN_HEADER = settings.OSCN_REQUEST_HEADER 9 | 10 | def test_lax_versus_bs4(): 11 | cases = oscn.request.CaseList( 12 | types=["CJ", "CM"], 13 | counties=["tulsa", "oklahoma", "cleveland", "texas", "bexar"], 14 | years=["2024", "2018"], 15 | start=4, 16 | stop=5, 17 | ) 18 | 19 | total_lax_time = 0 20 | total_bs4_time = 0 21 | for case in cases: 22 | # Measure BS4 parsing time 23 | start_time = time.time() 24 | bs4_result = bs4_docket(case.text) 25 | bs4_duration = time.time() - start_time 26 | 27 | # Measure LAX parsing time 28 | start_time = time.time() 29 | lax_result = docket(case.text) 30 | lax_duration = time.time() - start_time 31 | 32 | total_bs4_time += bs4_duration 33 | 34 | # Accumulate total LAX time 35 | total_lax_time += lax_duration 36 | 37 | # Calculate percentage difference 38 | if bs4_duration > 0: 39 | percentage = (lax_duration / bs4_duration) * 100 40 | else: 41 | percentage = float('inf') # Handle division by zero if bs4_duration is zero 42 | print("." * 100) 43 | print(f"Case: {case.source}") 44 | # print(f"BS4 Result: {bs4_result}") 45 | # print(f"LAX Result: {lax_result}") 46 | # print(f"BS4 Time: {bs4_duration:.6f} seconds") 47 | # print(f"LAX Time: {lax_duration:.6f} seconds") 48 | print(f"LAX is {percentage:.2f}% of BS4 time") 49 | 50 | # Ensure both results have the same number of rows 51 | assert len(bs4_result) == len(lax_result) 52 | 53 | # Compare each row 54 | for bs4_row, lax_row in zip(bs4_result, lax_result): 55 | assert bs4_row['date']==lax_row['date'] 56 | assert bs4_row['code']==lax_row['code'] 57 | assert bs4_row['description']==lax_row['description'] 58 | assert bs4_row['count']==lax_row['count'] 59 | assert bs4_row['party']==lax_row['party'] 60 | assert bs4_row['amount']==lax_row['amount'] 61 | 62 | # Check if lax_result matches the case docket 63 | assert lax_result == case.docket 64 | 65 | print("=" * 100) 66 | print(f"Total BS4 Time: {total_bs4_time:.6f} seconds") 67 | print(f"Total LAX Time: {total_lax_time:.6f} seconds") 68 | # Calculate percentage difference 69 | if total_bs4_time > 0: 70 | total_percentage = (total_lax_time / total_bs4_time) * 100 71 | else: 72 | total_percentage = float('inf') # Handle division by zero if bs4_duration is zero 73 | print(f"Total LAX is {total_percentage:.2f}% of BS4 time") 74 | -------------------------------------------------------------------------------- /tests/test_lax_events.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | import time 3 | 4 | from oscn import settings 5 | from oscn.parse.lax_events import events 6 | from oscn.parse.bs4_events import bs4_events 7 | 8 | OSCN_HEADER = settings.OSCN_REQUEST_HEADER 9 | 10 | def test_lax_versus_bs4(): 11 | cases = oscn.request.CaseList( 12 | types=["CJ", "CM"], 13 | counties=["tulsa", "oklahoma", "cleveland", "texas", "bexar"], 14 | years=["2024", "2018"], 15 | start=4, 16 | stop=5, 17 | ) 18 | 19 | total_lax_time = 0 20 | 21 | for case in cases: 22 | # Measure BS4 parsing time 23 | start_time = time.time() 24 | bs4_result = bs4_events(case.text) 25 | bs4_duration = time.time() - start_time 26 | 27 | # Measure LAX parsing time 28 | start_time = time.time() 29 | lax_result = events(case.text) 30 | lax_duration = time.time() - start_time 31 | 32 | # Accumulate total LAX time 33 | total_lax_time += lax_duration 34 | 35 | # Calculate percentage difference 36 | if bs4_duration > 0: 37 | percentage = (lax_duration / bs4_duration) * 100 38 | else: 39 | percentage = float('inf') # Handle division by zero if bs4_duration is zero 40 | 41 | print("." * 100) 42 | print(f"Case: {case.source}") 43 | # print(f"BS4 Result: {bs4_result}") 44 | print("." * 100) 45 | print(f"LAX Result: {lax_result}") 46 | print("." * 100) 47 | # print(f"BS4 Time: {bs4_duration:.6f} seconds") 48 | # print(f"LAX Time: {lax_duration:.6f} seconds") 49 | print(f"LAX is {percentage:.2f}% of BS4 time") 50 | # assert bs4_result == lax_result 51 | assert lax_result == case.events 52 | print("=" * 100) 53 | print(f"Total LAX Time: {total_lax_time:.6f} seconds") 54 | -------------------------------------------------------------------------------- /tests/test_lax_issues.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | import time 3 | from oscn.parse.lax_issues import issues 4 | from oscn.parse.bs4_issues import bs4_issues 5 | 6 | 7 | def test_counts(): 8 | 9 | cases = oscn.request.CaseList( 10 | types=["CJ", "SC"], 11 | counties=["oklahoma", "bexar", "tulsa"], 12 | years=["2024", "2020"], 13 | start=4, 14 | stop=6, 15 | ) 16 | total_diff_percentage = 0 17 | total_cases = 0 18 | 19 | for case in cases: 20 | print(f"source: {case.source}") 21 | bs4_text = case.text 22 | start_time_bs4 = time.perf_counter() 23 | bs4_result = bs4_issues(bs4_text) 24 | end_time_bs4 = time.perf_counter() 25 | bs4_time = end_time_bs4 - start_time_bs4 26 | 27 | lax_text = case.text 28 | start_time_lax = time.perf_counter() 29 | lax_result = issues(lax_text) 30 | end_time_lax = time.perf_counter() 31 | lax_time = end_time_lax - start_time_lax 32 | 33 | time_diff_percentage = (bs4_time - lax_time) / ((bs4_time + lax_time) / 2) * 100 34 | total_diff_percentage += time_diff_percentage 35 | total_cases += 1 36 | print(f"bs4 issues: {bs4_result}") 37 | print(f"lax issues: {lax_result}") 38 | print( 39 | f"Case {case.index} - BS4: {bs4_time:.4f}s, LAX: {lax_time:.4f}s, DIFF: {(lax_time-bs4_time):.4f} PCT:{time_diff_percentage:.2f}%" 40 | ) 41 | assert bs4_result == lax_result 42 | 43 | average_diff_percentage = ( 44 | total_diff_percentage / total_cases if total_cases > 0 else 0 45 | ) 46 | print(f"\nAverage Difference in Processing Time: {average_diff_percentage:.2f}%") 47 | -------------------------------------------------------------------------------- /tests/test_meta_types.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | def test_get_type_desc(): 5 | cm_desc = oscn.type("CM") 6 | assert isinstance(cm_desc, str) 7 | assert "CRIMINAL MISDEMEANOR" in cm_desc 8 | 9 | 10 | def test_get_all_types(): 11 | dict_of_all = oscn.types 12 | assert isinstance(dict_of_all, dict) 13 | 14 | 15 | def test_judges(): 16 | judges = oscn.judges 17 | assert isinstance(judges, list) 18 | assert len(judges) != 0 19 | assert judges[0]["number"] 20 | assert judges[0]["name"] 21 | 22 | 23 | def test_get_judge(): 24 | judges = oscn.judges 25 | assert isinstance(judges, list) 26 | assert len(judges) != 0 27 | assert judges[0]["number"] 28 | assert judges[0]["name"] 29 | -------------------------------------------------------------------------------- /tests/test_parse.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | def test_parse_string_response(): 5 | case = oscn.request.Case("cleveland-CF-2016-84") 6 | assert case.judge == "WALKLEY, LORI" 7 | assert case.filed == "01/19/2016" 8 | assert case.closed == "04/28/2016" 9 | assert case.county == "cleveland" 10 | assert case.year == "2016" 11 | assert case.type == "CF" 12 | 13 | 14 | def test_live_counts(): 15 | case1 = oscn.request.Case("tulsa-CF-2012-255") 16 | counts = case1.counts 17 | assert len(counts) == 2 18 | assert counts[0]["offense"] == "01/09/2012" 19 | assert counts[1]["description"] == "CHILD ABUSE BY INJURY(CHAB)" 20 | assert counts[1]["violation"] == "21 O.S. 843.5 (A)" 21 | assert counts[1]["party"] == "COTTON, JASON MACK" 22 | assert counts[1]["disposed"] == "CONVICTION, 06/25/2013. Guilty Plea" 23 | assert counts.text != "" 24 | 25 | 26 | def test_live_counts_list(): 27 | cases = oscn.request.CaseList(start=15, stop=17) 28 | for case in cases: 29 | assert case.counts[0]["party"] 30 | 31 | 32 | def test_docket(): 33 | case1 = oscn.request.Case("tulsa-CF-2019-03") 34 | docket = case1.docket 35 | assert isinstance(docket, list) 36 | assert "FELONY INITIAL FILING" in docket.text 37 | for minute in docket: 38 | assert isinstance(minute, dict) 39 | assert minute["date"] is not "" 40 | 41 | 42 | def test_issues(): 43 | case1 = oscn.request.Case("tulsa-CJ-2017-5021") 44 | issues = oscn.parse.issues(case1.text) 45 | assert isinstance(issues, list) 46 | assert "Disposition" in issues.text 47 | for issue in issues: 48 | assert isinstance(issue, dict) 49 | 50 | 51 | def test_get_parties(): 52 | case1 = oscn.request.Case("tulsa-CJ-2020-299") 53 | parties = oscn.parse.parties(case1.text) 54 | assert isinstance(parties, list) 55 | assert parties != [] 56 | assert "DISCOVER BANK" in parties.text 57 | 58 | 59 | def test_parties(): 60 | case1 = oscn.request.Case("tulsa-CJ-2020-299") 61 | issues = oscn.parse.issues(case1.text) 62 | assert isinstance(issues, list) 63 | for issue in issues: 64 | assert isinstance(issue, dict) 65 | assert isinstance(issue["parties"], list) 66 | for party in issue["parties"]: 67 | assert isinstance(party, dict) 68 | assert "name" in party.keys() 69 | assert "disposed" in party.keys() 70 | 71 | 72 | def test_attorneys(): 73 | case1 = oscn.request.Case("tulsa-CJ-2016-143") 74 | attorneys1 = oscn.parse.attorneys(case1.text) 75 | 76 | assert isinstance(attorneys1, list) 77 | assert len(attorneys1) == 1 78 | assert attorneys1[0]["representing"] == "BANK OF AMERICA NA," 79 | assert "KOZENY & MCCUBBIN" in case1.attorneys.text 80 | 81 | case2 = oscn.request.Case("mayes-PO-2015-1") 82 | attorneys2 = oscn.parse.attorneys(case2.text) 83 | assert isinstance(attorneys2, list) 84 | assert len(attorneys2) == 0 85 | assert attorneys2.text == "" 86 | 87 | 88 | def test_issue_list(): 89 | case_list = oscn.request.CaseList( 90 | counties=["tulsa", "oklahoma" "mayes"], types=["CJ", "PB", "CV"], stop=20 91 | ) 92 | 93 | for case in case_list: 94 | assert isinstance(case.issues, list) 95 | for issue in case.issues: 96 | assert isinstance(issue, dict) 97 | assert isinstance(issue["parties"], list) 98 | for party in issue["parties"]: 99 | assert isinstance(party, dict) 100 | assert "name" in party.keys() 101 | assert "disposed" in party.keys() 102 | 103 | 104 | def test_events(): 105 | case = oscn.request.Case("oklahoma-FD-2018-5") 106 | events = oscn.parse.events(case.text) 107 | assert events == [] 108 | 109 | case = oscn.request.Case("oklahoma-FD-2012-5") 110 | events = oscn.parse.events(case.text) 111 | found = any("PETITIONER'S APPLICATION" in event.get("description") for event in events) 112 | assert found 113 | -------------------------------------------------------------------------------- /tests/test_parties.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | import time 3 | 4 | from oscn import settings 5 | from oscn.parse.lax_parties import parties 6 | from oscn.parse.parties import bs4_parties 7 | 8 | def test_lax_versus_bs4(): 9 | cases = oscn.request.CaseList( 10 | types=["CJ","CM"], 11 | counties=["tulsa", "oklahoma","cleveland","texas","bexar"], 12 | years=["2024","2018"], 13 | start=4, 14 | stop=5, 15 | ) 16 | 17 | 18 | for case in cases: 19 | bs4_result = bs4_parties(case.text) 20 | lax_result = parties(case.text) 21 | print("." * 100) 22 | print(f"Case: {case.source}") 23 | # print(f"BS4: {bs4_result}") 24 | print(f"LAX: {lax_result}") 25 | # print(f"cas: {case.parties}") 26 | assert case.parties == lax_result 27 | # assert bs4_result == lax_result 28 | 29 | -------------------------------------------------------------------------------- /tests/test_party_search.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | def test_init_text(): 5 | # no text should return [] 6 | cases = oscn.find.CaseIndexes(text="") 7 | cases_list = list(cases) 8 | assert cases_list == [] 9 | 10 | 11 | def test_find_name(): 12 | search_params = { 13 | "filed_after": "12/31/2000", 14 | "filed_before": "01/01/2019", 15 | "last_name": "dungan", 16 | } 17 | 18 | cases = oscn.find.CaseIndexes(**search_params) 19 | cases_list = list(cases) 20 | assert len(cases_list) > 300 21 | 22 | search_params["first_name"] = "john" 23 | cases = oscn.find.CaseIndexes(**search_params) 24 | cases_list = list(cases) 25 | assert len(cases_list) == 2 26 | 27 | # text text init 28 | text_cases = oscn.find.CaseIndexes(text=cases.text) 29 | cases_list = list(text_cases) 30 | assert len(cases_list) == 2 31 | 32 | 33 | def test_find_company(): 34 | search_params = { 35 | "filed_after": "12/31/2018", 36 | "filed_before": "01/30/2019", 37 | "last_name": "DISCOVER BANK", 38 | } 39 | 40 | cases = oscn.find.CaseIndexes(**search_params) 41 | cases_list = list(cases) 42 | assert len(cases_list) > 300 43 | 44 | 45 | def test_find_district_type(): 46 | search_params = { 47 | "dcct": 2, 48 | "apct": 42, 49 | "db": "oklahoma", 50 | "filed_after": "02/15/2020", 51 | "filed_before": "02/19/2020", 52 | } 53 | 54 | cases = oscn.find.CaseIndexes(**search_params) 55 | cases_list = list(cases) 56 | assert len(cases_list) == 78 57 | 58 | 59 | def test_text_matches_live_query(): 60 | 61 | search_params = { 62 | "last_name": "discover bank", 63 | "first_name": "", 64 | "middle_name": "", 65 | "filed_before": "03/04/2020", 66 | "filed_after": "03/04/2020", 67 | } 68 | 69 | cases = oscn.find.CaseIndexes(**search_params) 70 | cases_list = list(cases) 71 | 72 | len_cases = len(cases_list) 73 | assert len(cases_list) == 27 74 | 75 | search_params["text"] = cases.text 76 | 77 | saved_cases = oscn.find.CaseIndexes(**search_params) 78 | cases_list = list(saved_cases) 79 | assert len(cases_list) == len_cases 80 | -------------------------------------------------------------------------------- /tests/test_request.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | 4 | def test_live_request_properties(): 5 | case1 = oscn.request.Case(county="adair", type="CM", year="2019", number=6) 6 | assert case1.oscn_number == "CM-2019-6" 7 | assert case1.index == "adair-CM-2019-6" 8 | assert case1.path == "/adair/CM/2019" 9 | assert case1.s3_key == "adair/CM/2019/6.zip" 10 | assert case1.file_name == "/adair/CM/2019/6.zip" 11 | 12 | 13 | def test_init_number(): 14 | case1 = oscn.request.Case(county="adair", type="CM", year="2019", number=6) 15 | assert type(case1.number) == int 16 | assert case1.number == 6 17 | case2 = oscn.request.Case(county="adair", type="CM", year="2019", number="6") 18 | assert type(case2.number) == str 19 | assert case2.number == "6" 20 | case3 = oscn.request.Case("adair-CM-2019-6") 21 | assert type(case3.number) == str 22 | assert case3.number == "6" 23 | case5 = oscn.request.Case("oklahoma-cmid-2018-00001") 24 | assert type(case5.number) == str 25 | assert case5.number == "1" 26 | case5 = oscn.request.Case("oklahoma-cmid-2018-008001") 27 | assert type(case5.number) == str 28 | assert case5.number == "8001" 29 | case5 = oscn.request.Case("oklahoma-cmid-2018-00810") 30 | assert type(case5.number) == str 31 | assert case5.number == "810" 32 | case5 = oscn.request.Case("oklahoma-cmid-2018-000880100") 33 | assert type(case5.number) == str 34 | assert case5.number == "880100" 35 | 36 | 37 | def test_live_request_params_index(): 38 | case1 = oscn.request.Case(county="adair", type="CM", year="2019", number=6) 39 | case2 = oscn.request.Case("adair-CM-2019-6") 40 | assert case2.text == case1.text 41 | assert case2.county == case1.county 42 | assert case2.type == case1.type 43 | assert case2.year == case1.year 44 | 45 | 46 | def test_live_request_appellate(): 47 | case1 = oscn.request.Case("appellate-116264") 48 | assert case1.number == "116264" 49 | assert case1.county == "appellate" 50 | 51 | case2 = oscn.request.Case(county="appellate", number=116264) 52 | assert case2.county == "appellate" 53 | assert case2.source == case1.source 54 | assert case2.style == case1.style 55 | 56 | case3 = oscn.request.Case(county="appellate", type="F", year="2021", number=229) 57 | assert case3.valid == True 58 | 59 | 60 | def test_live_request_cmid(): 61 | case1 = oscn.request.Case("carter-cmid-2019-639922") 62 | assert case1.county == "carter" 63 | assert case1.type == "cmid" 64 | 65 | case2 = oscn.request.Case(county="carter", type="cmid", number=639922) 66 | assert case2.county == "carter" 67 | assert case2.type == "cmid" 68 | assert case2.style == case1.style 69 | -------------------------------------------------------------------------------- /tests/test_source.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | import requests 4 | 5 | from oscn import settings 6 | 7 | OSCN_HEADER = settings.OSCN_REQUEST_HEADER 8 | 9 | 10 | def test_source_works(): 11 | 12 | cases = oscn.request.CaseList( 13 | types=["CM", "CF", "CJ"], 14 | counties=["tulsa", "carter", "cimarron", "oklahoma"], 15 | years=["2019", "2020"], 16 | start=5, 17 | stop=10, 18 | ) 19 | 20 | for case in cases: 21 | print(case.source) 22 | response = requests.get(case.source, headers=OSCN_HEADER, verify=False) 23 | assert response.status_code == 200 24 | -------------------------------------------------------------------------------- /tests/test_style.py: -------------------------------------------------------------------------------- 1 | import oscn 2 | 3 | from oscn import settings 4 | 5 | OSCN_HEADER = settings.OSCN_REQUEST_HEADER 6 | 7 | answer = [ 8 | "JULIE TURLINGTON, Plaintiff, v. LESLIE FINCH, Defendant.", 9 | "SRS DISTRIBUTION INC, Plaintiff, v. KELLY CAIN, D/B/A CAIN ROOFING, Defendant.", 10 | "BETTY SMITH, Plaintiff, v. H AND H VENTURES LLC, D/B/A LIFEWAY HOMES, Defendant.", 11 | "AUTO ADVANTAGE FINANCE LLC, Plaintiff, v. TATYANA KYRE BROWN, Defendant.", 12 | "UNITED AUTO CREDIT CORPORATION, Plaintiff, v. JEREMY ANTWON WILLIAMS, Defendant, and JASMIN JANAE BYRD, Defendant.", 13 | "TINKER FEDERAL CREDIT UNION, Plaintiff, v. SANDY KENNETH ANGEL, A/K/A KENNETH ANGEL, Defendant.", 14 | "STATE OF OKLAHOMA EX REL Oklahoma Tax Commission, Plaintiff, v. Brian Muirhead, Defendant.", 15 | "Amur Equipment Finance, Inc., Plaintiff, v. Traction Logistics Management, LLC and Judson Avery Cook, Defendant.", 16 | "STATE OF OKLAHOMA EX REL Oklahoma Tax Commission, Plaintiff, v. Donaciano Gutierrez, Defendant.", 17 | "Discover Bank, Plaintiff, v. Doug Dwayne Davis, Defendant.", 18 | "Discover Bank, Plaintiff, v. William Gempel, Defendant.", 19 | "Discover Bank, Plaintiff, v. Whitney Lane, Defendant.", 20 | ] 21 | 22 | def test_style(): 23 | 24 | cases = oscn.request.CaseList( 25 | types=[ "CJ"], 26 | counties=["tulsa", "oklahoma"], 27 | years=["2024"], 28 | start=5, 29 | stop=10, 30 | ) 31 | 32 | results = [] 33 | for case in cases: 34 | print(case.style) 35 | results.append(case.style) 36 | assert results == answer 37 | --------------------------------------------------------------------------------