├── .circleci └── config.yml ├── .coveragerc ├── .github └── workflows │ └── deploy.yml ├── .gitignore ├── .vscode └── settings.json ├── CHANGES.md ├── LICENSE ├── README.md ├── cassettes ├── test_comment_praw_ids ├── test_comment_praw_limit ├── test_comment_praw_mem_safe ├── test_comment_praw_query ├── test_comment_search_ids ├── test_comment_search_limit ├── test_comment_search_mem_safe ├── test_comment_search_query ├── test_submission_comment_ids_praw ├── test_submission_comment_ids_search ├── test_submission_praw_ids ├── test_submission_praw_limit ├── test_submission_praw_mem_safe ├── test_submission_praw_query ├── test_submission_search_ids ├── test_submission_search_limit ├── test_submission_search_mem_safe └── test_submission_search_query ├── examples ├── 01-ratelimitcomparison.csv ├── 02-ratelimitcomparison.csv ├── benchmark.ipynb ├── img │ ├── 01-comparison.png │ ├── 02-comparison.png │ ├── 02-requests-comparison.png │ ├── 03-cache-max-memory-comparison.png │ ├── 03-cache-memory-comparison.png │ └── 03-cache-time-comparison.png ├── search_comments.ipynb ├── search_submission_comment_ids.ipynb ├── search_submissions.ipynb └── test_data.csv ├── pmaw ├── Cache.py ├── Metadata.py ├── PushshiftAPI.py ├── PushshiftAPIBase.py ├── RateLimit.py ├── Request.py ├── Response.py ├── __init__.py ├── types │ ├── __init__.py │ └── exceptions.py └── utils │ ├── __init__.py │ ├── filter.py │ └── slices.py ├── setup.py └── tests ├── __init__.py ├── __mocks__ ├── __init__.py ├── comment.py ├── metadata.py └── submission.py ├── config.py ├── test_cache.py ├── test_filter_fn.py ├── test_metadata.py ├── test_request.py ├── test_response.py ├── test_search_comments.py ├── test_search_submission_comment_ids.py └── test_search_submissions.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | orbs: 3 | codecov: codecov/codecov@1.0.2 4 | jobs: 5 | build: 6 | docker: 7 | - image: circleci/python:3.6.4 8 | steps: 9 | - checkout 10 | - run: 11 | name: install dependencies 12 | command: | 13 | sudo pip install --upgrade pip 14 | python setup.py sdist bdist_wheel 15 | sudo pip install coverage pytest praw vcrpy python-dotenv 16 | sudo pip install . 17 | - run: 18 | name: run tests 19 | command: | 20 | mkdir test-results 21 | coverage run --source=. -m pytest --junitxml=test-results/junit.xml 22 | coverage html 23 | coverage xml 24 | - codecov/upload: 25 | file: coverage.xml 26 | - store_test_results: 27 | path: test-results 28 | - store_artifacts: 29 | path: htmlcov 30 | workflows: 31 | build_test: 32 | jobs: 33 | - build 34 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | */__init__.py 4 | setup.py 5 | tests/* -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy 2 | 3 | on: 4 | release: 5 | types: [published] 6 | workflow_dispatch: 7 | 8 | jobs: 9 | deploy: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Setup Python 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: '3.x' 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install twine wheel 21 | - name: Build and publish 22 | env: 23 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 24 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 25 | run: | 26 | python setup.py sdist bdist_wheel 27 | twine upload dist/* 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /dist 2 | /build 3 | /examples/.ipynb_checkpoints 4 | /notebook-tests 5 | .pytest_cache 6 | .coverage 7 | .env 8 | pytest.ini 9 | pytest.log 10 | htmlcov 11 | 12 | pmaw.code-workspace 13 | pmaw.egg-info 14 | 15 | /**/__pycache__ 16 | /**/cache 17 | .idea/ 18 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.testing.pytestArgs": [ 3 | "tests" 4 | ], 5 | "python.testing.unittestEnabled": false, 6 | "python.testing.pytestEnabled": true 7 | } -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | ## 3.0.0 (2022/12/24) 2 | 3 | - changed `before` and `after` to `until` and `since` 4 | - removed `metadata=true` as this is now always enabled 5 | - set `order='desc'` as this replaces `sort` 6 | - set `sort='created_utc'` so that slicing still works as expected 7 | - Read more on [COLO switchover](https://www.reddit.com/r/pushshift/comments/zkggt0/update_on_colo_switchover_bug_fixes_reindexing/) 8 | - refactored metadata usage 9 | - 🎅🎅🎅🎅🎅🎅🎅🎅🎅🎅🎅🎅🎅🎅 10 | 11 | ## 2.1.3 (2022/02/20) 12 | 13 | - Don't inherit from object in classes 14 | - Removed logging configuration to prevent unexpected results for users 15 | 16 | ## 2.1.2 (2022/01/07) 17 | 18 | - fix scenario where a result is reported but cannot be returned by Pushshift 19 | 20 | ## 2.1.1 (2021/11/29) 21 | 22 | - fix index error bug 23 | 24 | ## 2.1.0 (2021/10/01) 25 | 26 | - Updated logging and set default log level to INFO 27 | - Added `load_cache` static method to `Response` to load cached responses using cache key 28 | 29 | ## 2.0.0 (2021/09/11) 30 | 31 | - Added support for enriching result metadata using PRAW 32 | - Implemented functional tests 33 | - Reduced `max_ids_per_request` to 500 34 | - Added automated testing 35 | - Increased exception handling specificity 36 | - Added `filter_fn` for custom filtering 37 | 38 | ## 1.1.0 (2021/05/27) 39 | 40 | - Added gzip for cached pickle files 41 | - Exception handling is now slightly more specific 42 | - Updated many print statements to output via logging 43 | - Fixed issue with safe_exit not saving info 44 | 45 | ## 1.0.5 (2021/04/21) 46 | 47 | - Moved remaining limit logging to DEBUG from INFO 48 | - Fixed generator incorrect length after being partially iterated through 49 | - Reduced the number of debug logs 50 | - Fixed duplicate responses being returned if the number of responses for a provided window is less than expected 51 | 52 | ## 1.0.4 (2021/03/05) 53 | 54 | - None type comparison bug fixed 55 | - updated how limit was being updated for submission comment ids 56 | 57 | ## 1.0.3 (2021/02/19) 58 | 59 | - fixed early cache bug 60 | - fixed limit being retrieved from next search window when resuming from safe exit 61 | 62 | ## 1.0.2 (2021/02/16) 63 | 64 | - fixed comments returning 25 by default 65 | 66 | ## 1.0.1 (2021/02/16) 67 | 68 | - limit error in `trim` hot fix 69 | 70 | ## 1.0.0 (2021/02/14) 71 | 72 | - `search` methods now return a `Response` generator object 73 | - memory safety can now be enabled with `mem_safe` to cache responses during data retrieval and reduce the amount of memory used 74 | - safe exiting can now be enabled with `safe_exit` to safely exit when an interrupt signal is received during data retrieval 75 | - load unfinished requests and saved responses from `cache` when safe exiting is enabled 76 | - request details are now handled inside a `Request` object 77 | 78 | ## 0.1.3 (2021/02/08) 79 | 80 | - Fixed infinite while loop error 81 | - Checkpoint by batch 82 | - Removed erroneous pandas import 83 | 84 | ## 0.1.2 (2021/02/06) 85 | 86 | - Fixed timeslicing creating extra requests 87 | 88 | ## 0.1.1 (2021/02/06) 89 | 90 | - Fixed a bug with timeslicing causing duplicate results 91 | - Fixed a miscalculation error for remaining results for a timeslice 92 | 93 | ## 0.1.0 (2021/02/05) 94 | 95 | - General code improvements 96 | - Added exponential backoff and jitter rate-limiting 97 | - Added `non-id` search for submissions and comments 98 | 99 | ## 0.0.2 (2021/01/23) 100 | 101 | - Initial implementation of multithreading requests for `ids` queries, with support for: 102 | - comment ids by submission id 103 | - submissions by id 104 | - comments by id 105 | - Rate-limit based on rate averaging across previous requests 106 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Matthew Podolak 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /cassettes/test_comment_search_ids: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: null 4 | headers: 5 | Accept: 6 | - '*/*' 7 | Accept-Encoding: 8 | - gzip, deflate 9 | Connection: 10 | - keep-alive 11 | User-Agent: 12 | - python-requests/2.25.1 13 | method: GET 14 | uri: https://api.pushshift.io/reddit/comment/search?ids=gjacwx5,gjad2l6,gjadatw,gjadc7w,gjadcwh,gjadgd7,gjadlbc,gjadnoc,gjadog1,gjadphb 15 | response: 16 | body: 17 | string: !!binary | 18 | H4sIAAAAAAAAA+2a32/bNhDH3/tX3NSHbEBiR3b8I3kpUmAFMjQYsBbbQ1MIlHQW2VA8lTxFMYr+ 19 | 7wMlO808a/Ewd45h5SGwzS9p8njkx3e6Ly8AAIJUsAgu4EP9zv99eXhVtwutI1EJmyqTOS/8eLwi 20 | KApLd5hGgqOSk+ACTKn1qsg5SpRgL/ODtahKlmSDCwguf7fCpJSXTiXBWlU000LZKBbJbWapNGmU 21 | kK47tw686JI4FyVaOLeB1qpEMt7z2oU/FjLmhRaMkUo3GHYx5CayjZfF8wK96eqxW4Sl1kbkjWwQ 22 | Tc4mKE5T2aIuBFsk0wwfXMBMaIctUou5KvM2kd9xtGu9JxbGPOE7MaVzP+M/JB05SMgbWhllsldw 23 | dZSDAJYqk+BYmPSH1bUkwkQ5pVFBjluml5DWonCYPtUexZiI0mGUWKq8vxm2pNdP+lsni8KRaVPl 24 | ORpe7t06hcX61DTWCcfh6Xg8HYanK7JUOVYmK5WT2OKAmCpuXWOm9PKAf/m60lZ7dJB9Ekl1P1q1 25 | r3KRK+NcMWObj2hlbptjEfAwur1Xg2q6Ooym5LZ1coaiGWlNVUt7Iaw34hNfUaDNhZ+LV/VtXxiV 26 | Y3+xA67fdOo7piJ65GORiKnkiCVGtZu5SJn+whb91a+wyFahvwvrDfe7NRmNh5NwRecSsn6/x6uf 27 | o/H+UmiFfifYlqtLdaySW9VqKVfGFtNUeVcP6gUGbYqluUbR4LMcDFZ13gzNuU3/wTuZWCzw4CKL 28 | Caq7em6r7sneixtPFwuKPAgeuduzQM8bIu2i3/BzqTD/t+wJXg5wFIpBsCmAguvLt8F3IFCA8Wgc 29 | h5PpyVk8TE/CEMOT8yniSTgYhsP4dCpGYRhswKhAMhfuot/P57VDaeW4Z5D7haWZ0tiv7bWJuf5C 30 | s0CrTHKwXaDNTT6kPaHZh48/vjQo+KfDINb5ToiVDvR418Rac4vvAljeFBsC62zaBqyzDljPD1iX 31 | 8dy5SnAi0caou3DpO4VLQ11OjSunewKYt7++PQZhEeZUgkOrqHRgPXXBUPXqxtyYd2TtHCqET6Vj 32 | qIRhcJQjS2UyYAIn7hC0ukN3EJAanU53AinBVQepBlKCqw0hNR6OWiB1EnaUen6UMrNIivTvN0mH 33 | py3hSVr1OYz3BE41cDLykGGJYLACV1/oQAYSW5pEzsGS1iBMConE5LYWLi+WHryXqCzgHdpKosWe 34 | 7xgTS8BCOUrR9f4js9ZcE9tC1hJQUF9ZEKOmClhadJJ0GmyJZoPJTmiWTJ6i2RrL/r8wC6OWbOaW 35 | aZZMNqbZOGyl2aDD2fPD2TuRl6JC/3O5yxE+mSP82dK1MJl4I8yOU4ThGC2Fe0LJqxlcQSLMTTk4 36 | Dc99hMaJhNrEMCNbE5Glqtno751j/4mBSgpuurhaUpAyDMo0/X1s1wwhGITWvV7vMKK74Y54WMnu 37 | odkSiJXcFIiTYffQrIvuuuhuH6O7K5A+c2ioySSKTCjjGLS6rROLj4jVg3wOhaVYYw7+BEOlWDbU 38 | Qio0QiWpzmKmloqi6Y7gJFWwAAvQDBQfCMOm450wLEsnexHTrUulbhlhWTrZEGGT4bgFYV2C8hki 39 | 7FoU9J5m5fUTMcphR3QP7/qvhSXz2qp7VbpdB3Wq+GzH+Gl/8EgGHes5pCo1R+zTmMbjUiUIpWGl 40 | 4QqcqKA5sg9ZT6gvJU+7A4nZxqPdPJHTcdI9kWt4p+NkQ95NB2dtOcyubuQ51o10NfZdjf0Kmt6J 41 | HEGT7sFVUw/CBDECKpZoAW7MZc8/nvOXL8TzRR6xuTB8QQlZ//91DzLkh8J8LQoolL/W/MiHwa3z 42 | cCfcMvT8udUEamuyolsGl6GNwTUed/WOXa6xY9Z+5hqXMVTDKJaCjxxUcg6KQRhDcwc5giPIy0TW 43 | 7cvcos8r5qJOSjrfJiBWGaQo9LdI6yB4NTkd7oRXlIX7kVdcExBuGVeUhZvianLeVT52cVbHrD2O 44 | s35BV7rjutp+CaNFSSSzgEQqrcX9gbAn3E1dRiHj51+XsQiWJt/7qVYh403pcz5toc+ogw/A1/rV 45 | xxdf/wRFqEIVX0MAAA== 46 | headers: 47 | CF-Cache-Status: 48 | - MISS 49 | CF-RAY: 50 | - 68a9bdd89b0d5431-YYZ 51 | Connection: 52 | - keep-alive 53 | Content-Encoding: 54 | - gzip 55 | Content-Type: 56 | - application/json; charset=UTF-8 57 | Date: 58 | - Mon, 06 Sep 2021 18:25:45 GMT 59 | Expect-CT: 60 | - max-age=604800, report-uri="https://report-uri.cloudflare.com/cdn-cgi/beacon/expect-ct" 61 | Last-Modified: 62 | - Mon, 06 Sep 2021 18:25:45 GMT 63 | NEL: 64 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 65 | Report-To: 66 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=9GmsIAcKsg2YkkvprDUgzc38MdrQcugGbBdBZwJX7quTodBfuSczsx%2FH1tWXAiol%2F8HfsnJ6MDnl92ZMC1JZy%2Ffns8Fr1cfYpDL3ZDJNHdqPiva6HlIUqCgNeY52fgKGy9Mx"}],"group":"cf-nel","max_age":604800}' 67 | Server: 68 | - cloudflare 69 | Transfer-Encoding: 70 | - chunked 71 | Vary: 72 | - Accept-Encoding 73 | access-control-allow-origin: 74 | - '*' 75 | alt-svc: 76 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400, h3-28=":443"; ma=86400, h3-27=":443"; 77 | ma=86400 78 | cache-control: 79 | - public, max-age=1, s-maxage=1 80 | status: 81 | code: 200 82 | message: OK 83 | - request: 84 | body: null 85 | headers: 86 | Accept: 87 | - '*/*' 88 | Accept-Encoding: 89 | - gzip, deflate, br 90 | Connection: 91 | - keep-alive 92 | User-Agent: 93 | - python-requests/2.27.1 94 | method: GET 95 | uri: https://api.pushshift.io/reddit/comment/search?ids=gjacwx5,gjad2l6,gjadatw,gjadc7w,gjadcwh,gjadgd7,gjadlbc,gjadnoc,gjadog1,gjadphb 96 | response: 97 | body: 98 | string: !!binary | 99 | 4YDIASB/S+3rdGXfhqS2iQJpQ0z+pULwZDGNQA2o3V6Xq+Zynft15n+NmRKPVAhJk/jRIIVvqMg/ 100 | RC1Bu0PEZpbWLJS9jnhcWrZbqaYaLYQ2G8PFd1jLG0N7tQ/wtlkY//cAG+NUBcWdCulaYfzfpw5s 101 | rdkF29CrxoMxHTF2EHch4Qg/+WexyecCoOrDUX9KuLwF6KejOVOtmZZoQ5mi3+pPibMnrbG+q3Wq 102 | wWLSOuKU4NaGH61bWU8SE6LqKXjj0GhyFJPbUSHbfUcYwQrK2hBHjMluu1nwSUuNlvp1cITebSuY 103 | kzVcMC42VtTAUXALx6YU5maLxzIC+Obs7zBCjRO4O1LzhqTsqvvks6M2cs2tWeLWEKP9OEEHzqbp 104 | GqKHsZUD/2tky34qGAJ2Bk9wWfhHZlOEaY7JlXzzlcu3LzmOAUorVUhYSs1v2zC1IYT1e6RWcoFi 105 | 52hjaHcYaQcTxh6zntG+N5rpDnyoLaTrEeqKU1l99KF1aIhriB69XXbXEKfSw+zx7CB4GOH6mfX7 106 | OkMHoU5JXeRrOMRdYkivE8TqoYnp9SPwm4EOYnavg8I+5SlCEBqEIXZbMDWz+0GoYZB6EFzKDnYs 107 | m40hvcII53K2KWx4HhKm9exEuHNteZ8S3t8bG1JI18nO+WhTW3Fqa7iudQrp3OmzP0MHb4ctNrWQ 108 | Rg6rxhCF2TjMd7O6XBBG0UHF5KeCewxYO5yztuBeg0Wj1mMu6H1oMELnpof/0YDhVxN/WzlX3WSy 109 | G057wSV8oIcRepatD472dNBwAaEddNDyDhUF0U8SVJsdQNU4dSroMLyPTwJw9lOzuMNVsJWA71PL 110 | 3Dhjqhe96eDY/WQUqzf9IHQ/zC6H/lRXy2AEtGbuOee854zpBdHMA18EFWqQ1Lq5x8WqgVvo4Ghu 111 | 8rZhCxtOtRUYgVPOXih7YZJwMSozCg3P7gE2xr6Ak1/KAfKfXlqm1Xqs4PiLWUt4Y/PxYP6G+ODT 112 | NxvB0FYspK22fbOS23onoRGbUr5XsiGpmZTLmJ0HPMJQD1uQ5EqKM2FtTuZwJdnQg3n1iUxCA9Vb 113 | ggpSOl8ZHBanwPUXZaiighPz+cruql7YQS5UL1hI3Qs5C4+LFWJZFFK7LNJQjigFRe+4GJiUmuSH 114 | i3MdHOfq/2Y3JDHHE/kNudnUSGVyMdBumFzST07ElaOu6Ml8JxyQLpkNd+KSLimXS7qkn57IFRux 115 | iv+DSbQ72UOM+UZijtzqI9EPjB6JlB0ho0ncXinJ+EDMp+yuqhTlikRXwYKKZdGUDf1iZ6TcCqc1 116 | 1YYbybyyWju5OO6ZIflhbLH8we7573k5/mDTfp6Y4oYbxYQEqyQgtmfwVY6KWQ6wRuNso/CHn/xe 117 | S6rcjo6As+pnps2LnIV/YQzZy2AQXxgXTMzUWMWYA9Dy0WBtba/j+bzd190SxlDbKWE7/7Ow55/a 118 | ktNPS/gIR7WI9GgHjuG6NkDu3wwL+1vp8TOa5DdkzQlri3fOeLJ3TCTlFhySvOJC/YZUeyPzhD5Z 119 | Z6V43lw/iRGQzjTBZixA0CGMmAFJlWSQFqxrjh5g+5ZQhh7NODsBAxxApxAnrAZxdmdXeC097GDP 120 | nHLPqOPLMDNuLdMKkffWaNorLji1Axo5QAcJa5tK219jGBl8v5PSozCSBqqdU/hTbmtI1/zJ8sdQ 121 | E3S/tgb3ygH7n8h2J8lBzQeJASu5hbaSVFtsbmuWPvj6kvfdirBI6ppvBDqf0CQv20dCmZ4e3avX 122 | 9IrpgUlpLvNXr6+mOOUOv56wK7EY6hhzXlE5eCe8F8ZzI9Fr6ryZtZ+9ppQ0ph95H/NvdjvsDWte 123 | XCFIS4Tv6JkcKKdGDBwtz/ws8tr2kpcQ8fyLkv9g09X+clWBsiWN+eNPpseSmUxj4t8s5DfE2fTl 124 | 5180kiM0klvYLLmAMAvHN9SRtmIit9W2Lz//ourBOWPbkpAsAAsCIPfoBLYRGyPrnOi/px1kXxrC 125 | 9+3VdLcVZBpw6XQKyYmZd7f1qmpAa7lgQjCnXnKxsEEj6x3yXvVi4JYqxTVb1KKGWTvvZsUgX5tS 126 | 3nAoKEIG0gJBPglvpHpnrZrkRFw5klvvpOQYm9+AaooeoBl7mM/qRP6+YigE37E0rVDAieREatby 127 | zwL3ULPHKpi4RRTH3ltY31jlT0oLyTQx7/TtruqF+dIxwakSCpgUvtfLYITkjGspNHJu1MClG9TA 128 | cRZaKqWCPaiRGrnPT+Z7rdzS1x6WGSP0/mZEPEyqhzm5/P5Pv+/ECRPe80EqlpCPSgoqsUmS8u1H 129 | l3RJf8ul3MktvBjwzSMh/9KTRpII8Y4khvfF4xlL5CAZdmlQAe+T07YblYFtKHo1RYUNMmNaHHPS 130 | YDFcUbbMdhFWamM87dniey+E7bWxZlkQhXKahc0jKt9dPL/MOdbpr/h2BNz42HlisleaStpTTDx7 131 | kD6AoZH4nkJw0ptpIn5ock+byKsKaewv9zf8v0/f+mpC274t030kxEDpycljD7JPvzz20tzaoFps 132 | BHOxWDYovXBESblh2movpRSSIWfzwASz2s/SkMaIkdFzHaHKOP+15m9W4osS7kfkN9/cSAAxXBSd 133 | K/FXcPKSYPRputuHgoBhcDCcOkCp/u72oa6qepIcFUcL5rNFVGJWfu6pRTkvM9du0MoZZVFo5ZH1 134 | 3rKeZsVGReH5qQMsBbNcN2w2CtiReABWGB/Qcn6FkQvRQQsb+ikfM6BHOrapfnoxcNpXW7F6YIaZ 135 | 6mqLt2FPmo0wGk47qIdzWOty/On2GvadWSBFfOoORvrsYA3NAosHVLQwAoyMdlCmJiCZGwff4NnB 136 | Zj+mJe6upiPG57MDrNPbgeUO4wNq+L9p8rdu6DxgcYMK4//EWzBuD4+huued7zArzg+K/wMCVaLf 137 | IqZnKBw72ogaOtgP1OibkFEp2KfnswN79e9uSc2lwfjgvnjAY3XQXyE4jPC4XHQuB93LIfYFxscF 138 | gq8/YA9expeDzuWgc/HP17k8YJfSDKbLBcZLj050gecTOrB7mMpOufNTCxvCyHrNpOZKsVPfG8MG 139 | rdG8YFGm9KeyVMHTOhjGuNYnqZhmWlJ5TH4j1pxUzw1lPYyRhNzcjhNjjBrJOdeDUHp4PgE= 140 | headers: 141 | CF-Cache-Status: 142 | - MISS 143 | CF-RAY: 144 | - 77e250a3cea8631b-ORD 145 | Connection: 146 | - keep-alive 147 | Content-Encoding: 148 | - br 149 | Content-Type: 150 | - application/json 151 | Date: 152 | - Fri, 23 Dec 2022 16:01:18 GMT 153 | Last-Modified: 154 | - Fri, 23 Dec 2022 16:01:18 GMT 155 | NEL: 156 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 157 | Report-To: 158 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=835NiN55EVxnuClXF%2BUSb2nsN5Yys18h5BmXbV4xZPcAGwBnvP4Tm5O7M75FjnT3Zr9db8p9LoLEXjb77ivbO7VccUUqCTnYbhfI1bUegIiPd2f5tfAGYYH5gbBUQnxwmO0d"}],"group":"cf-nel","max_age":604800}' 159 | Server: 160 | - cloudflare 161 | Transfer-Encoding: 162 | - chunked 163 | Vary: 164 | - Accept-Encoding 165 | alt-svc: 166 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 167 | cache-control: 168 | - public, max-age=2, s-maxage=2 169 | status: 170 | code: 200 171 | message: OK 172 | version: 1 173 | -------------------------------------------------------------------------------- /cassettes/test_submission_comment_ids_praw: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: null 4 | headers: 5 | Accept: 6 | - '*/*' 7 | Accept-Encoding: 8 | - gzip, deflate, br 9 | Connection: 10 | - keep-alive 11 | User-Agent: 12 | - python-requests/2.27.1 13 | method: GET 14 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxi2w8 15 | response: 16 | body: 17 | string: '{"detail":"Not Found"}' 18 | headers: 19 | Age: 20 | - '79' 21 | CF-Cache-Status: 22 | - HIT 23 | CF-RAY: 24 | - 77e28ee80e7c6399-ORD 25 | Connection: 26 | - keep-alive 27 | Content-Length: 28 | - '22' 29 | Content-Type: 30 | - application/json 31 | Date: 32 | - Fri, 23 Dec 2022 16:43:47 GMT 33 | NEL: 34 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 35 | Report-To: 36 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=GqSzydrSjDpkTx2dnuvZ1m%2BljSJQq7Ywp9zGPFzlm4CasI6wUt%2FVJLmIBvsUVaPHDDr3BwjRCORo7Xe9PWC7aAkmQMUb1Jsge8mrDYpY3Be2S9kq8%2FNQazAUzwlDTNan93UP"}],"group":"cf-nel","max_age":604800}' 37 | Server: 38 | - cloudflare 39 | Vary: 40 | - Accept-Encoding 41 | alt-svc: 42 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 43 | status: 44 | code: 404 45 | message: Not Found 46 | - request: 47 | body: null 48 | headers: 49 | Accept: 50 | - '*/*' 51 | Accept-Encoding: 52 | - gzip, deflate, br 53 | Connection: 54 | - keep-alive 55 | User-Agent: 56 | - python-requests/2.27.1 57 | method: GET 58 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxhwh0 59 | response: 60 | body: 61 | string: '{"detail":"Not Found"}' 62 | headers: 63 | Age: 64 | - '134' 65 | CF-Cache-Status: 66 | - HIT 67 | CF-RAY: 68 | - 77e28f073f40a1fe-YYZ 69 | Connection: 70 | - keep-alive 71 | Content-Length: 72 | - '22' 73 | Content-Type: 74 | - application/json 75 | Date: 76 | - Fri, 23 Dec 2022 16:43:52 GMT 77 | NEL: 78 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 79 | Report-To: 80 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=zNgpijbBjIWtpc%2BX4hNOLZdae%2BusBm3wOMEORDR4CXYb1DX7kPv8%2F2buN8G%2B38%2B%2Bh7RUXzbqvhOpX6WEu8kle%2FX6tDPgA4LM%2FiaHCs3ck9Kylr0K74Y0hC2%2FUz8A1rFNEXyX"}],"group":"cf-nel","max_age":604800}' 81 | Server: 82 | - cloudflare 83 | Vary: 84 | - Accept-Encoding 85 | alt-svc: 86 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 87 | status: 88 | code: 404 89 | message: Not Found 90 | - request: 91 | body: null 92 | headers: 93 | Accept: 94 | - '*/*' 95 | Accept-Encoding: 96 | - gzip, deflate, br 97 | Connection: 98 | - keep-alive 99 | User-Agent: 100 | - python-requests/2.27.1 101 | method: GET 102 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxhv53 103 | response: 104 | body: 105 | string: '{"detail":"Not Found"}' 106 | headers: 107 | Age: 108 | - '134' 109 | CF-Cache-Status: 110 | - HIT 111 | CF-RAY: 112 | - 77e28f0d9adf62bd-ORD 113 | Connection: 114 | - keep-alive 115 | Content-Length: 116 | - '22' 117 | Content-Type: 118 | - application/json 119 | Date: 120 | - Fri, 23 Dec 2022 16:43:53 GMT 121 | NEL: 122 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 123 | Report-To: 124 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=%2FKbCincNhloTPKC0WfRxqDHbX28NFfYTVL3xkQE1mUHxq39RR6UpNTrdkRzzbXntH5zJ6oQdVYxwcEUYtxSLS8CjYV2p4qYo9UjNXE5auUsuGtDZZlzqPjQW0kPA2UfzX0l8"}],"group":"cf-nel","max_age":604800}' 125 | Server: 126 | - cloudflare 127 | Vary: 128 | - Accept-Encoding 129 | alt-svc: 130 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 131 | status: 132 | code: 404 133 | message: Not Found 134 | - request: 135 | body: null 136 | headers: 137 | Accept: 138 | - '*/*' 139 | Accept-Encoding: 140 | - gzip, deflate, br 141 | Connection: 142 | - keep-alive 143 | User-Agent: 144 | - python-requests/2.27.1 145 | method: GET 146 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxhm7b 147 | response: 148 | body: 149 | string: '{"detail":"Not Found"}' 150 | headers: 151 | Age: 152 | - '124' 153 | CF-Cache-Status: 154 | - HIT 155 | CF-RAY: 156 | - 77e28f138f0bf981-YYZ 157 | Connection: 158 | - keep-alive 159 | Content-Length: 160 | - '22' 161 | Content-Type: 162 | - application/json 163 | Date: 164 | - Fri, 23 Dec 2022 16:43:54 GMT 165 | NEL: 166 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 167 | Report-To: 168 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=YwpNPlssPVqiKOL1fImAENoMbltkmW26vvrosYelwSvajNgGmNdjyb0PL7pWzI6xBrKU%2FYbpYZ4dwPEEskndcwc0KyCTSIn6MlLFBQPeZ3BJ8340D%2BuVq100fXDPMJC7w9J2"}],"group":"cf-nel","max_age":604800}' 169 | Server: 170 | - cloudflare 171 | Vary: 172 | - Accept-Encoding 173 | alt-svc: 174 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 175 | status: 176 | code: 404 177 | message: Not Found 178 | - request: 179 | body: null 180 | headers: 181 | Accept: 182 | - '*/*' 183 | Accept-Encoding: 184 | - gzip, deflate, br 185 | Connection: 186 | - keep-alive 187 | User-Agent: 188 | - python-requests/2.27.1 189 | method: GET 190 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxhm3s 191 | response: 192 | body: 193 | string: '{"detail":"Not Found"}' 194 | headers: 195 | Age: 196 | - '134' 197 | CF-Cache-Status: 198 | - HIT 199 | CF-RAY: 200 | - 77e28f19efd7a238-YYZ 201 | Connection: 202 | - keep-alive 203 | Content-Length: 204 | - '22' 205 | Content-Type: 206 | - application/json 207 | Date: 208 | - Fri, 23 Dec 2022 16:43:55 GMT 209 | NEL: 210 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 211 | Report-To: 212 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=2a5h%2BPZS5M%2BCd85sbdzDCDbyJqWoA9ErSwHOcQ5uo9jW7Mi2JQ%2BYSZROmfrlbRHq2IOvK0g5IWi4lgjIyMdvsqynhK44VSgq7dUqaZ2v%2FHLWATmpDLVp%2FBPQzcASQZK5HelA"}],"group":"cf-nel","max_age":604800}' 213 | Server: 214 | - cloudflare 215 | Vary: 216 | - Accept-Encoding 217 | alt-svc: 218 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 219 | status: 220 | code: 404 221 | message: Not Found 222 | - request: 223 | body: null 224 | headers: 225 | Accept: 226 | - '*/*' 227 | Accept-Encoding: 228 | - gzip, deflate, br 229 | Connection: 230 | - keep-alive 231 | User-Agent: 232 | - python-requests/2.27.1 233 | method: GET 234 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxhg37 235 | response: 236 | body: 237 | string: '{"detail":"Not Found"}' 238 | headers: 239 | Age: 240 | - '134' 241 | CF-Cache-Status: 242 | - HIT 243 | CF-RAY: 244 | - 77e28f204edc6356-ORD 245 | Connection: 246 | - keep-alive 247 | Content-Length: 248 | - '22' 249 | Content-Type: 250 | - application/json 251 | Date: 252 | - Fri, 23 Dec 2022 16:43:56 GMT 253 | NEL: 254 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 255 | Report-To: 256 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=4qCpAPjUWrybb4p%2FJQC4AE%2Bpy32WoxpwzUoUb%2FpnAkpdttLRQwmqxSgj%2FCUs%2BPsaBEsrKZ%2BGz4in%2B6vHY7gwnGSag0NxwmDwJh0%2B0rD9CaPVATUrQyWj4vT%2BMP%2F6NvopNvNm"}],"group":"cf-nel","max_age":604800}' 257 | Server: 258 | - cloudflare 259 | Vary: 260 | - Accept-Encoding 261 | alt-svc: 262 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 263 | status: 264 | code: 404 265 | message: Not Found 266 | - request: 267 | body: null 268 | headers: 269 | Accept: 270 | - '*/*' 271 | Accept-Encoding: 272 | - gzip, deflate, br 273 | Connection: 274 | - keep-alive 275 | User-Agent: 276 | - python-requests/2.27.1 277 | method: GET 278 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxhak9 279 | response: 280 | body: 281 | string: '{"detail":"Not Found"}' 282 | headers: 283 | Age: 284 | - '134' 285 | CF-Cache-Status: 286 | - HIT 287 | CF-RAY: 288 | - 77e28f268bcd6275-ORD 289 | Connection: 290 | - keep-alive 291 | Content-Length: 292 | - '22' 293 | Content-Type: 294 | - application/json 295 | Date: 296 | - Fri, 23 Dec 2022 16:43:57 GMT 297 | NEL: 298 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 299 | Report-To: 300 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=j5P1DmbE6Wl0Z9Be66igkyyQkm2cGK9OMHui8itShnbYoU%2BO5o5dQJaM9friSDkJvt29TzHADC0Z5vUKtHPioiQ%2Fjeji6Cu0kfhuaB39e6nHIeqsLohsp51%2B2sk7RlGZ2ViK"}],"group":"cf-nel","max_age":604800}' 301 | Server: 302 | - cloudflare 303 | Vary: 304 | - Accept-Encoding 305 | alt-svc: 306 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 307 | status: 308 | code: 404 309 | message: Not Found 310 | - request: 311 | body: null 312 | headers: 313 | Accept: 314 | - '*/*' 315 | Accept-Encoding: 316 | - gzip, deflate, br 317 | Connection: 318 | - keep-alive 319 | User-Agent: 320 | - python-requests/2.27.1 321 | method: GET 322 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxi2w8 323 | response: 324 | body: 325 | string: '{"detail":"Not Found"}' 326 | headers: 327 | Age: 328 | - '100' 329 | CF-Cache-Status: 330 | - HIT 331 | CF-RAY: 332 | - 77e28f668a296357-ORD 333 | Connection: 334 | - keep-alive 335 | Content-Length: 336 | - '22' 337 | Content-Type: 338 | - application/json 339 | Date: 340 | - Fri, 23 Dec 2022 16:44:08 GMT 341 | NEL: 342 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 343 | Report-To: 344 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=QiOm7YYHQvh1VEro6scgYKMQvBdWhWyYhgD9re3M4BPiabRArvGfwHFqiWLKz9RxaJrnwsaJXIqWGX57NqhjNCR1fweVR7cBMtegpG9ssn6wKqQfxdfkCekVsREenHCLsn8%2F"}],"group":"cf-nel","max_age":604800}' 345 | Server: 346 | - cloudflare 347 | Vary: 348 | - Accept-Encoding 349 | alt-svc: 350 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 351 | status: 352 | code: 404 353 | message: Not Found 354 | - request: 355 | body: null 356 | headers: 357 | Accept: 358 | - '*/*' 359 | Accept-Encoding: 360 | - gzip, deflate, br 361 | Connection: 362 | - keep-alive 363 | User-Agent: 364 | - python-requests/2.27.1 365 | method: GET 366 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxhwh0 367 | response: 368 | body: 369 | string: '{"detail":"Not Found"}' 370 | headers: 371 | Age: 372 | - '155' 373 | CF-Cache-Status: 374 | - HIT 375 | CF-RAY: 376 | - 77e28f85b9c3549d-YYZ 377 | Connection: 378 | - keep-alive 379 | Content-Length: 380 | - '22' 381 | Content-Type: 382 | - application/json 383 | Date: 384 | - Fri, 23 Dec 2022 16:44:13 GMT 385 | NEL: 386 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 387 | Report-To: 388 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=DM%2FeyGdP1yJob2TB4bMiFaWi2wmYeOYJVMrQsrT6ojXL1QIpZ%2B4B4WBUqzBgPV3MQKBnlT%2FutCioK%2BKX29bXXRQSif%2BV0sr%2FP4ZA2Jv3Apprpcj8yQ7%2BZKauB9P0DcfqDg56"}],"group":"cf-nel","max_age":604800}' 389 | Server: 390 | - cloudflare 391 | Vary: 392 | - Accept-Encoding 393 | alt-svc: 394 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 395 | status: 396 | code: 404 397 | message: Not Found 398 | - request: 399 | body: null 400 | headers: 401 | Accept: 402 | - '*/*' 403 | Accept-Encoding: 404 | - gzip, deflate, br 405 | Connection: 406 | - keep-alive 407 | User-Agent: 408 | - python-requests/2.27.1 409 | method: GET 410 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxhv53 411 | response: 412 | body: 413 | string: '{"detail":"Not Found"}' 414 | headers: 415 | Age: 416 | - '155' 417 | CF-Cache-Status: 418 | - HIT 419 | CF-RAY: 420 | - 77e28f8c1e96639e-ORD 421 | Connection: 422 | - keep-alive 423 | Content-Length: 424 | - '22' 425 | Content-Type: 426 | - application/json 427 | Date: 428 | - Fri, 23 Dec 2022 16:44:14 GMT 429 | NEL: 430 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 431 | Report-To: 432 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=8Z%2FXe4xAplmHzIqCEzsjVCa7WcfitdKufX7IFCrDoAzKjO5KKItJYTqRZ3Ywry3llEYuA2KXvBLpCIryS72ftycAuEHQwRZaEZQI3x0xMZ%2B6m7AQ%2BJCdqEMsr1jYBZTOGFqr"}],"group":"cf-nel","max_age":604800}' 433 | Server: 434 | - cloudflare 435 | Vary: 436 | - Accept-Encoding 437 | alt-svc: 438 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 439 | status: 440 | code: 404 441 | message: Not Found 442 | - request: 443 | body: null 444 | headers: 445 | Accept: 446 | - '*/*' 447 | Accept-Encoding: 448 | - gzip, deflate, br 449 | Connection: 450 | - keep-alive 451 | User-Agent: 452 | - python-requests/2.27.1 453 | method: GET 454 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxhm7b 455 | response: 456 | body: 457 | string: '{"detail":"Not Found"}' 458 | headers: 459 | Age: 460 | - '145' 461 | CF-Cache-Status: 462 | - HIT 463 | CF-RAY: 464 | - 77e28f923d1da217-YYZ 465 | Connection: 466 | - keep-alive 467 | Content-Length: 468 | - '22' 469 | Content-Type: 470 | - application/json 471 | Date: 472 | - Fri, 23 Dec 2022 16:44:15 GMT 473 | NEL: 474 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 475 | Report-To: 476 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=joiT914836rLm4UgeJbkkSMnNSm%2FOYLefjkb9BbUwi1yTxI0YFcACwt9jlnWqPbV5ttUT5kSZFPs%2BeMLz1FICjTSVhb4BT%2ByoUDCHAvudUUlBtB8PIFDDtko1UKyvZ1tVyMr"}],"group":"cf-nel","max_age":604800}' 477 | Server: 478 | - cloudflare 479 | Vary: 480 | - Accept-Encoding 481 | alt-svc: 482 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 483 | status: 484 | code: 404 485 | message: Not Found 486 | - request: 487 | body: null 488 | headers: 489 | Accept: 490 | - '*/*' 491 | Accept-Encoding: 492 | - gzip, deflate, br 493 | Connection: 494 | - keep-alive 495 | User-Agent: 496 | - python-requests/2.27.1 497 | method: GET 498 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxhm3s 499 | response: 500 | body: 501 | string: '{"detail":"Not Found"}' 502 | headers: 503 | Age: 504 | - '145' 505 | CF-Cache-Status: 506 | - HIT 507 | CF-RAY: 508 | - 77e28f98af4d6368-ORD 509 | Connection: 510 | - keep-alive 511 | Content-Length: 512 | - '22' 513 | Content-Type: 514 | - application/json 515 | Date: 516 | - Fri, 23 Dec 2022 16:44:16 GMT 517 | NEL: 518 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 519 | Report-To: 520 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=XogA8GDIe1ZMO3%2FD3cVCsys8FeTbcFqiJKp8D6f3%2F7dRMZao48VZDRzsCGQ0R%2FMs5QFVH8kiKDRV8S2hKOmoPjqOor6mD4fHoUxwpHc8YUeYjbVfV%2B3sXtUmYQVG4tmiViPy"}],"group":"cf-nel","max_age":604800}' 521 | Server: 522 | - cloudflare 523 | Vary: 524 | - Accept-Encoding 525 | alt-svc: 526 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 527 | status: 528 | code: 404 529 | message: Not Found 530 | - request: 531 | body: null 532 | headers: 533 | Accept: 534 | - '*/*' 535 | Accept-Encoding: 536 | - gzip, deflate, br 537 | Connection: 538 | - keep-alive 539 | User-Agent: 540 | - python-requests/2.27.1 541 | method: GET 542 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxhg37 543 | response: 544 | body: 545 | string: '{"detail":"Not Found"}' 546 | headers: 547 | Age: 548 | - '155' 549 | CF-Cache-Status: 550 | - HIT 551 | CF-RAY: 552 | - 77e28f9f08b362b1-ORD 553 | Connection: 554 | - keep-alive 555 | Content-Length: 556 | - '22' 557 | Content-Type: 558 | - application/json 559 | Date: 560 | - Fri, 23 Dec 2022 16:44:17 GMT 561 | NEL: 562 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 563 | Report-To: 564 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=STXML6zH9EyrYv3wQFtC1%2BpJx8M%2FeSYl%2BFxo%2F%2FckpOdSqe53ewv5jzTyayr05lq%2Br6cIA4P1X3DcRD1M89PP%2BSKlLTrP3vTEJCbl0O29enj0kuOQjkE1m1Id9sdlbxf15pGM"}],"group":"cf-nel","max_age":604800}' 565 | Server: 566 | - cloudflare 567 | Vary: 568 | - Accept-Encoding 569 | alt-svc: 570 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 571 | status: 572 | code: 404 573 | message: Not Found 574 | - request: 575 | body: null 576 | headers: 577 | Accept: 578 | - '*/*' 579 | Accept-Encoding: 580 | - gzip, deflate, br 581 | Connection: 582 | - keep-alive 583 | User-Agent: 584 | - python-requests/2.27.1 585 | method: GET 586 | uri: https://api.pushshift.io/reddit/submission/comment_ids/kxhak9 587 | response: 588 | body: 589 | string: '{"detail":"Not Found"}' 590 | headers: 591 | Age: 592 | - '100' 593 | CF-Cache-Status: 594 | - HIT 595 | CF-RAY: 596 | - 77e28fa4fa0aa223-YYZ 597 | Connection: 598 | - keep-alive 599 | Content-Length: 600 | - '22' 601 | Content-Type: 602 | - application/json 603 | Date: 604 | - Fri, 23 Dec 2022 16:44:18 GMT 605 | NEL: 606 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 607 | Report-To: 608 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=xIj6uqcmVhd3nZFRD7NfEDly6aY4xOGezIRUbGSI7k4mZ6tf%2BpAyE%2FfBChBxiQSYCo97Ri%2FOo1o8BEkPGrw%2BYDu0LwsFTqxz7PWmjsjP%2FAn0EHFb9cGf%2FHUbfHEOAVX%2BD5tU"}],"group":"cf-nel","max_age":604800}' 609 | Server: 610 | - cloudflare 611 | Vary: 612 | - Accept-Encoding 613 | alt-svc: 614 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 615 | status: 616 | code: 404 617 | message: Not Found 618 | version: 1 619 | -------------------------------------------------------------------------------- /cassettes/test_submission_praw_ids: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: null 4 | headers: 5 | Accept: 6 | - '*/*' 7 | Accept-Encoding: 8 | - gzip, deflate 9 | Connection: 10 | - keep-alive 11 | User-Agent: 12 | - python-requests/2.25.1 13 | method: GET 14 | uri: https://api.pushshift.io/reddit/submission/search?ids=kxi2w8,kxi2g1,kxhzrl,kxhyh6,kxhwh0,kxhv53,kxhm7b,kxhm3s,kxhg37,kxhak9&filter=id 15 | response: 16 | body: 17 | string: !!binary | 18 | H4sIAAAAAAAAA6vmUlBQUFBKSSxJVLJSiAbzQKAazgLLZ6YoWSkoZVdk5BoXK8GlanUIqk/MtiRJ 19 | fbqxOSnqM43SDUkyv6oohyT1ueZJJKkvzzAgSX2ZqTFJ6iszzEgLn3ILJPVgVixXLQCfBu1g9gEA 20 | AA== 21 | headers: 22 | CF-Cache-Status: 23 | - EXPIRED 24 | CF-RAY: 25 | - 68aaa0060bc6cab8-YYZ 26 | Connection: 27 | - keep-alive 28 | Content-Encoding: 29 | - gzip 30 | Content-Type: 31 | - application/json; charset=UTF-8 32 | Date: 33 | - Mon, 06 Sep 2021 21:00:08 GMT 34 | Expect-CT: 35 | - max-age=604800, report-uri="https://report-uri.cloudflare.com/cdn-cgi/beacon/expect-ct" 36 | Last-Modified: 37 | - Mon, 06 Sep 2021 20:26:04 GMT 38 | NEL: 39 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 40 | Report-To: 41 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=lJkhTOgfpSwPle2Kp0LxXwExFwlX72xGLaKBZhJCQg2EFP2OUais5M5AEmVcJiWmFnHr7AxqIsKSVA3mEX3HkEstq8jT7P1U3T3t%2FPvRuxaBZesfNr8LNbF8NLx59bTckDRy"}],"group":"cf-nel","max_age":604800}' 42 | Server: 43 | - cloudflare 44 | Transfer-Encoding: 45 | - chunked 46 | Vary: 47 | - Accept-Encoding 48 | access-control-allow-origin: 49 | - '*' 50 | alt-svc: 51 | - h3-27=":443"; ma=86400, h3-28=":443"; ma=86400, h3-29=":443"; ma=86400, h3=":443"; 52 | ma=86400 53 | cache-control: 54 | - public, max-age=1, s-maxage=1 55 | status: 56 | code: 200 57 | message: OK 58 | - request: 59 | body: null 60 | headers: 61 | Accept: 62 | - '*/*' 63 | Accept-Encoding: 64 | - gzip, deflate 65 | Connection: 66 | - keep-alive 67 | Cookie: 68 | - csv=1; edgebucket=rMnJXdgWbcPi5FKlAW; loid=0000000000edl180ip.2.1630961281900.Z0FBQUFBQmhOb0U4Z0dmdWZmMmJ5V2tGeExMekVuZ2syd29hQkxvRDZiYzNyckRJc3VfaC1zbHFhZWxDMnliNEZSWTJXd2c3T2VUaVlFMDJaYV9Kb2RvQ1FqQ2tPYUpiSVI3RjZOdTdqbTVONzY1QkxYM29PalF4T1h5LU5NdXdSM0owUWJnMTU2blE; 69 | session_tracker=jdhkgkbdkjemrqkpeo.0.1630962002066.Z0FBQUFBQmhOb0ZTbzVoTS1zSmxhOGZXejc0QVlRMW5yYmtCUkhNWTJ4bUNWYkNGbUlRS2otWjc1WGZzemlOUDFtLUY1cmV5TG9rZFFUY0tuWUNkdlVTNDRKWXZrd0tQSDZLRmtrOEJsWHFUeVAySG4ybm40d0NibmdGaGFuYU53bmY3N0NrVkVsazI 70 | User-Agent: 71 | - 'python: PMAW v2 endpoint testing (by u/potato-sword) PRAW/7.4.0 prawcore/2.3.0' 72 | method: GET 73 | uri: https://oauth.reddit.com/api/info/?id=t3_kxhm3s%2Ct3_kxhak9%2Ct3_kxhg37%2Ct3_kxi2g1%2Ct3_kxhzrl%2Ct3_kxhm7b%2Ct3_kxhwh0%2Ct3_kxhv53%2Ct3_kxhyh6%2Ct3_kxi2w8&raw_json=1 74 | response: 75 | body: 76 | string: !!binary | 77 | H4sIAF2BNmEC/+1daXPbRtL+K1jthyRVOggQBMlNuVJJvEmUjXOsk3L2XW+hBsCAHAsEaByi6VT+ 78 | +/t0z+CiaJuiJYVOuLXlENdM3/N0z6HfTq5UGp38wzr5ThWlSmcnp9ZJJEqBW7+diLiUOX6lVZLQ 79 | fbyCK3uA34ssmotiTl/SJzOZ+bFK9Ot8J5yrJMpliuv//tb0Ug77HSyXeXYtI1+UflWGbVdFFeQy 80 | ihT1dyJStZD0XSETUPSKb35bFaUVq1QVcxlZK1Giw3Rm/at6XVlpZn2TpVdCnVoijaxL6wW9vBJp 81 | iVfLzCrE2orzbGGVc2kVpchLK4txoQorl9dKrk5xIUpLxdY6q6y5uJZos7QKKVP9GpPED4t5ViWR 82 | BT4CESRra5ZpYixVnluXaAMvW2G2WCaylJZ8tZS5kmkoW5LbBkFuUmT4kr7KUqmpklYsV+hbpeid 83 | +AHlglrQRGRVSS/lUtMs8GOV5eW86eDc+pzaN6/zXZlr/oWVZEQB931phXiLZMMN6TapOXqLJVSl 84 | kcyjbFboNwoFrtZWlKXPK2dgT0trJkuSL4t1mZWJms35Q5KpzAsrqmT9PBYh9aE7MrTF4A5UgXlW 85 | WAxboK+XMoP09MuJupJFrQj0IvBZAGFdgyWSDf2A3FvFNbRpxiNw04iS+gxknBGP+JY+ofZrAZ/y 86 | HRIKlBbLsASzKi3zLKpCVn5OBqkZAs0zSeL5WLf+iRWst6n4/KaYG3PK4piEJECTqEoF9rv2otK2 87 | PVgIEcwN1DQmUuSp0ZUIyCpUSXYsrZ9zkRaJKFWW1halO0TPiYLPigScsZzCp2G10BIrrGfwLf4d 88 | cW+BTJSEJyyMKqidlORfLKW4KizIEV2SSE4tvJevgywigem3dY8hO0tAP9JCQVxQSIEuT62AzZic 89 | rrBW9N9LrQvNCvW1yonWUws/F4KsQFizXOLNF1lg4T8ROk2ypZa3tCJF4pRpSdorslSAVYWvjASM 90 | Uan0OksQgTSdFAAa50JUkbVFw+D4q4Tssua+aFoJRZ6v4bHWSsHx8GgBMVzL4tx62rE16jtmO+3q 91 | tVHiqvUjxLWImqMuYVZ4NWyayHviAW0h1Md9GhV+L/IKt/nx5/yRViIbQ85aZ/cqKkQpQcECKoFU 92 | TAc9o+rQSQKR6YvM2K1uohRrtseUw1UoSjnLIAkTFPt6FhwJc3aCZx0WFllRW2w/ehOlDTUdRUDp 93 | SS15ucjIrotTq8gWkgJZ7U3hXOSIMZC3JlVCQAiuNIixgtEYNaRKwxB1EguVsEizNJUUnzhWwTpr 94 | 62jjvrZkKQoF34mlTNAafIz4hKDIvbU2EIou4bhF4wiFKivti7jfHzsi6k6Uxue4Q/yfGTNhpFVN 95 | bSrUk6r9vtXiilUNA2ZTQg+lmGUYLMtC2+i1SCoWVQT5IwDUIb3Kl6IotP9a8wq+AB8vKxMh9TuN 96 | oBsrQZjtsPw5j0Ud9yA5k5T0kAuWlolIpeFTj9GtSBtptA2isTWPRLEKWXKNbo1pcxOXlkA8QNSe 97 | Zyuin24kK7GmIKc9RSAIrF/XrKj0ypgdO1Sk6CY60nFGyzKyfjU6L+EGDB5KCXejgWMNY0b4MfGm 98 | 4DgYKpDYMb3abkDG+fP0efq1LPmDQIRX3BjeuzrdYvfteE3awpVK0VukAoo2TfMm5C0oytH4MFdw 99 | 6Q1voRhC3RUleaaJfq0AmcnVnJzACFlLy1ydGgKsQL1GkGvGRv6MLZ68b66W8ECjbYzsxCKbFaSa 100 | yMVC4GmCoRm6gYuwO8HFQkRT0mbHs+rY1A89ELUeWLqIqwZaFLsBtZ6IK+pV0IhWgoqiQrdguKPh 101 | fj/mAbij/wKuqMQMrZfGErtQaPO7cpVZc8CbIpxnWWJdqch4EEkQ7BG3WnTkLgiOrVn2XEOrrJF9 102 | 78sOsOIAQ2Qw4IE1Gb0wSzTSqbBKurFFd7KNyqs0WyFuodeMxgNWpMbAMBhjUBBjV7faRTUynWU0 103 | rBmAJRJ4QGSADrlHK+FaiLkE+IWBRta/MkVd/IJ2AhmKiuir+eZwAzypEkEjvJFU5wtVdAdHDfrZ 104 | uWrNpIQ+9A0Y6Exo6MM/SWvcQSzA7qoL/vW3NRsag+C6glHkHHtLBW9fVkGCLtnT6VpLHZZ8bj3O 105 | tCAo8mgDIH3rdo0tb+JjwiZQTk6og6QzSxUJ+pTCP7XF6YZItXaMTAyhjTBrJfMoqgXNOvjM+hE4 106 | EJJFtkFBipR9TqmTZsknSE2YjXMxx58uX8TL+WDJyRW6pRwtxmAgdYbnQyxATn6p4FFtcjZDZsev 107 | UiIYJiq86n1Yv33yb86lKJPZCHAf17Do1Pp3N5f5hOiAF175cSJU7gP2zE2+hxxSE02X1AldPFbA 108 | jgXFkJPf/9fNGn3i0V/mMlavmLaT/IJ7oE/nKoo4L60JXq6SApdev/OwKPwwwWBIn0dtT5S/ZquU 109 | bhP/JQbJIAVu8OeS0p1WTGW29GFtOWTll+tlR4CgQPpFiLyjQ0WjlqF/9Wq+GBbU08sKcTrFkNF9 110 | s0MkicMPsyTjvJvzLfqsWl5npfRzMhOi83w4PG1tgL+kIWGWZ0jpmu83c++a6hO2/1A33PANRJFo 111 | /grYSSiVNh8uDmCgEr5cBHznt997YlqpqKS6gemtR1QpkVHCr3xFH5o3VOFnuZopDN4gFTl8SkIu 112 | 84pkgSgCO5FLZLxE2X/ZDGQIwOIzFb1mDFuaPGSaQnXNAC8A2NAX9Z0az3Zk02m6z+CGUkhsHfPk 113 | xlKffGqZcRWl7qM2A5JbUw4JOl2CLMYZuE1hyxeQd6U6LRgJwtoXqlp0HjQyJ2Jo+CQyQHnJVNue 114 | PfC8sesNzrnvrh66tr9NT13HJIlTREB0pNdZGEZNvhEg0i568wS3ZF5C6gRYTuhDMMeENfokCSH0 115 | dZzFMN9Q7HkjlynuStwYakMXHrdODcEYqdYEdcQbCED9vsT7JmmarpttzIYlet5EFSDcbOUn8AJf 116 | x2i2x0bLpnLlz8sFq+N5NRgMv/zb2Zn19Ev/h6++ss7O+NY/9YNIXVusgEfPTxbRc/26ebbUF8cC 117 | mCHiWAA7FsCOBTBxLIAdC2DHAtixACYPtwDG8OXC4Bcqh/UAzbE2dqyN9bzmWBs71sbuuzZ2Iybp 118 | a+Rf3ZSrztO+b9I0Xam64sS2qZvMZrKgPL1A+nEjw9xc4kApAVLFiqsZdeaZA97qOorJh9PMjzNK 119 | LdtbXBDIioIKCRgnupUhYL+0V4ujbMC3J507GIh01cbk65y/6zJV3lzr+kaWJpQVby07des0J1KK 120 | OJSOOBvLeHRm23JyJlxPnA2kOxYimnqj8aAugVCpoJsYU2zhVSNNN9lGPXGjQsRVh7rERuMPRXgf 121 | o3tD/bUqTI2jbqKtALTfptWiUzgyNwkcQTAVO1F7uy2JaYbLke+8nDtOp7IKrQQ3SO/UULtdMzki 122 | MY/a+x0JbynPnfx9bE+HMVdxNBltnZCKW1mQgT6kYK+MVRh70Uyazm5UdKgdYPIUUQwQ6RoB5zX3 123 | 0FY8N4qXJLdOhYMqMQW8jmSZ6EKP6RfhG1ABAvVhp2VFT7T1RUwy14jwEDLqmrAWWa+eVxe4BNQN 124 | clhAnS9u2MdmQRNj5UKQbImCC1MJvqh5uNBSvNCFjwJW718BcvjwvDlDDl8ntL4Z3X39ObGwFJRe 125 | +e9gFBYVXqmeXVQ5F4HmZbks/nFxsVqtzrV9nYOqu6SwNVz8KpB7BNrLnfFgOHCpOFbXFnVs2iix 126 | sa7rWFNr+2Zd9RoJDtWZmb3ffz+17mPB13+NE/+v43Sb0xkqsYOrl4o/vfPpjCdZms3ASM7t32Ka 127 | 4huZLO97guIlkhka91ja9z89Ia6m1NPdTE/YNwvQW8LfpqG8cXbCZub+2NmJxtEPeHqCzZKbeevE 128 | BInzPicmIhkL4HOipJmb2BraD3lSYuRNh3cxKUFdSfgFLP70A56baGL1EV7fAbx2xnEwkp5zNh0J 129 | T8NrePYE8Ho0iEYDEQ3lEV7fAbyeyknk8rjWwGszzm2F1y3378LXo+HEHY1t9/uvf2A33RFaU+T9 130 | 4KE1JHixaJDTQ8PmLb3vB4mbAH9IkLjjHxtI2I6rII1X1/zFnSPhx1zlNOXSX0CRLArr6wyoFf/9 131 | +PustBaKqnpUM+JCOs2NzEXRll1Voef41rdd8vMUmBGw5J7RdKF7ITW9HUvbgxE9eB8oPRuOqaOD 132 | hNIOM7cdSlN02gCaNcSipvTIruKci7KEqgEXhiPv+YmlpYdLZzDAZZGH+N116nVWlVUg2au58Ytf 133 | s29//Wn+lXqSP/ksljwf8yjjR9SP48mUgvKLQizVIxttcrdBRqMu2qZeGAPhtwhD2GtOUzgy/5TK 134 | mRnG2vWngDRqGWTg8oznPz61ZBrm6yUCwRmz+ak1W8Pvw2yJZ0sVEg1nKj0zP+suyBERVSTNu7cI 135 | 50ILogU3dZ4BiZAmEFESKrF3dN/YGKS0PUex3RvKvp8U5bcTLWv+SdGKJjT9LdG4qzh22a0Ro9qI 136 | GM8rBN6Y/h1F9O/E5jsh/Tvm++GA/o2H9G/AI3Ifqx7N7A1mdtNo3AkZTde4blgjIjYvWoN4EU86 137 | g0wduJ6WOc2DfYXuabnK14gtYkHvNbZRv/mfrPoZIu6TsmE46nxdqsWM1XCtujq4mL802eH5iyVv 138 | TawDlR5ee602LA29jlu8w0jDOWUPycUvX/6U/Xz15Hr69PH6P//34ocfEZhfvc5/OmHPM512PuT7 139 | 95iyHyPpzib+7kja1c42g9BK5LdYUPxLS6sdnzfD8ZaCisEmrOa3llRoWP2rllS6KPTtNRXPdrxD 140 | qql03Z++ulVRpUlB36emAdv1qeSQM6QlNjF+sum9yef1xOf1o47XE+2HUhz5DRZFb4UZPDOX9Kgr 141 | nF5VoHWRVBJFdaORWAPfxH6Uq6UPicvUDF6cxFDDS1gLUWo7BNZ1bs9k+KPYdoZyGJ25sTs5c53Y 142 | PZuGtn0mp5Ng5HnRaDJl51/KNIWws1R0DVa3ATIbo/n6ux+++Pw7jgA9jogUBWfZjD6KY8+5Ki90 143 | W2ohYA8XVP9wQpm/vBip18G1G6eDReTa/rN5BuCEgHu+rHfqa97beMB5isIATtaRy5cVIlLUE7oh 144 | PpeFeo1HRJbRxQZxSz3f9B4kfqZHK9vTQ4sZrepLGjAerWTAxUHHKx5NnekQKvBGkLt0Q9sLbCfw 145 | pAjdUAzGkTOGpmw7ZETSYGDy+zYd8zjTvi9Ghk6PkfryBiOg0p24whlMIm8ihvYkiKeRG4fxcGzb 146 | Y1vYYeCJseR51mb4okGhxTDOvTLiTnqM1Jc3GIllEMXOJPDi4TQeR6A5sIfeaOiMgslIDoLAnYpp 147 | 7HFBowWYXUbcyb0y4rk9RurLG4wErjvwhBNNoIqJF4thFE6EOxIijMdyEI6DKAxDNw66jHhulxHP 148 | vVdGbKevkub6Biv2dDQaQQ1yKO2pF3iOlMHU9gaRLUI7soPh2InCYNBjBa313MSZcAGFQ1L9jjNg 149 | zVEtToUcFrY8ykuf1vacdMOxRoydcaOJNm0AKjN/RuUNP5CpjFWvJiypCLfkyUtI9tlcpmYxJK0V 150 | 4tHGErzI62yWZXrd4oz3tFFptE9NO2TcjL/1cMdV1Q6THWDHXNapS6Ml6qqOlp0Pj0FzN0aOQfMY 151 | NO+JkT8uaMZZvhAd8Lw1kmjUWKPPHmisAeMsyQKRUK/dkHSXKJFp5u/2n4CM3Elg21O3s75vOhU8 152 | ARnEdjAeDAOPQe9xAtJg6/0mIGGYg6FNnTYTkKb68J4TkP+8mqsoFc+oHPOa2vtLTUFChhcRL4xD 153 | qlTOpW8K0P5MF6CpDuHTjJUfVOWdz1DukI7vNyXZ1EfeOiV5rN1/qIXNY+3+7Ub6vrX7B5mi32HV 154 | 6jh64TnXL3iwufu5+idr66uK9689ViJfUye3mG7/856woZwZj7QHOe9+XMK6hcMtMy63OmHjuJB1 155 | h0mXie2NDmnS5biQ9VDmau4gjzzuE3uYPPLmPrF6tHvPPPLJ+jveQfxYXlXsp3+hNJJEeLFYA70R 156 | mvIjQlN3nixq62F4exsS9ksgm1j/1gTSyOfBAHMk6aAgDZi3Yo87h8jfZZneH0/01BuH623bfD6Q 157 | mOmDKay5iKygPgHEHGqhikyfe2F2kl/yFMXVZzr0b4Xa9wql6xB17xB6/jrn0uXtIHQk8ivW7eYZ 158 | daPx6YafbgtufXP/EAD0gYBl88m9geQ7w8IPCXfHU/uNh8ntDHVNxKJ3twDdw8GydWQ9YtmdsewR 159 | ierm9kSi1FszlWFGi/eEoD18sCv+pHVfD4M/3zbivRNikoguEo1G9CZ+n4CGr3yDRnxCIz7QiL4P 160 | NPLACHRfCvcDqE10PiSAevkRn+GVMmSkM7IsxAxgH6sk8KfPOaLgpU8HKzNAxGXF9+ht3MtB33VG 161 | EwgacZ5bP9N+KP5cn4dW0hoYkjKd6sfHEdGsiz75ayEiqc8cglpI8RvH4DABaI6O66zP7jFn/5Bp 162 | nNORnLoX7oDOLFzUpyr2WtKddAjic7+IJ2KF2mvPe6OO+dwgZq8h9uPiEz7PiidZiHs+6Y67kwWf 163 | h3/50bW05hKRmB4/WX9OTdGfItK9KzoyCBGlqA+g7ByTGGWySD8qO4coBaKQ1I6Rcn3qIfOtD99b 164 | 0dQ4i6A5anKmAF3NIU5ziDPSr4Oa/oFChTkwFLwb8UBytK9Nc09aN8eImkNT6bRVUv8Soz/8kcwl 165 | MWerWYTNL6h98E0tNl1tkaYm52ZHkN8vKc/F0WFI+vgto2/RsUUSE9lrwfNYC1pkgB7pjCyjWd6K 166 | p49B1Ec3cutEOR0yyVlMHduq/h7D0cvphMsAd5+SPWuJM/KAH5OJMmNMLy8Gg8OLtyVa2+Y0nmDQ 167 | OL/v6YwFOuGIc/9Z2GLMy1Zul4W9aSJjcM4reN6Vhm1GyzcmYtTYoSRiW6sJh5GcGaPkdt6an7Fy 168 | 9srP6gLZ2ycxOIE63SNr6z3frHU8ZEqH/93JURym+S0ZXd8OD3Xq4pL+43h/H04/PQKWuwEsm3lz 169 | //zSjsDfD9S07RzRzT2jmxsq1dcHWQrpjD1/SC2kN2r15vUGXmhPR0NJ83rmgJpJLKKzgQwmchSF 170 | w2DEkOBYTdHN7VlN+bvnetGYE9mmqGLA39aiirGhd9dUvnvtujo/3rGewrtBHqSecn/zeSS6Cw4T 171 | PoUJU80weYZPIcYPpC98yjN8yjMeutSyF337FVoazHRIhRYzDL5yBoMvPqXR9k37BwZg8mo9Gq+i 172 | wLO7O0+GrtnaoJfrP8IjfU2DRW9/wDiYjKJxKEQwlKPJZCi8wXQUDca2643teDwdRlPHdUfdBZUb 173 | 2XDwOl2k7j0dPflt9aIqi8r6l1BwO+srkdKZ1MHaWsi/WT8z7uKTt4cW7QjVg9PO+fAPX5oG7zsn 174 | jrmXs2yXxFifbXLLvFgPW5SYNfbXsw2+0zosQrDiWRuWySXtqqAryo1OeI/Fhdlmu9SiowhuO1Pc 175 | eEW/BhQFK3r5toaJT7XhwdWWj4oFpPIGuwwl7xsaSG/kTaZoJZh6Y8cbyWBkR8PQm3r22I7iiNa0 176 | agKdEZFFBDq8zWsvAp16u9k7CQzEeDoeBWNJG82GoR150nFGbuBOgtiLQjEcT0bgwW4JHE5o0xYR 177 | OHRIx3sRiE93JHA4jWLbnspB7AwC2tEdiQF+ieEIkc+xR0PhhJOR47YEjj2aLiACPTbCvQjEpzsS 178 | COVGsRxPPDsSwcAOQweyE07gxIErxtKd2mPPG4VOSyBtlDMUTnm99F4U4tMdKYT+pl4YT6JgHEBy 179 | I3c8Hg6g6dAdeC4EOwjdWHpx3KHQmdJEsnGTvUmkb3ekMZbxwBYjKSZT1xnZGOocOXADT7jDYeRO 180 | A1jB0J64ExPjTigSEKXugBfRszliuNiX0vsYaqhQYvB1L4jReLtRAlzNeWS6qxLglA8ouasS4Pht 181 | p4jtXAN8v8OuDDo95ApgOwxzW2+tArJI77MMWNt/cM63C4NVmRMGrD89KX/6djT84Qvffvn1jz9+ 182 | eTaRr0ZPvvj1mYt8dSoff//rN9+48+ifT4NZvf/jwy0pjr2Bcywp6osWkfOTN1bD9IWw5sCPnb1b 183 | tw+oYrH89EZQxb2NwEq3dg+uNXuHRpaWqeiK8Fices/i1GQycIbB4AxjMxenpmfCtd2zgZzEdjQd 184 | T6ahW4fcY3Fq/+LUFFDQZUk2xSkDS96vOFUomAP/9VCzNWzHGhVhuQ++RgUJXrzQyb9/xcm/r7No 185 | Gllo8Q1yf59zf3/oU+7/0EWq/Qjcr0rVDMOHVKX6A9arP5uvaT7kMboNS4Rk68sspb+omum/T5uT 186 | 4G67KOLPu9Fzfj1iZd9VcjTa5U+pH5ep750HPdCezg9zubo7vpsjMY9L1u8PCv/BS9bfjISP2y8f 187 | Bgnf3H5Zj0FbkXDL/bug8F5r323aVvUwQHjbQLr74nfI6GI1X5OKohra0DgFaywyv4U2Dw1xb0XX 188 | nsi2jusHhWyNOz4gsn2ytuivPpuFajMNAW6BYg9yKrMOgu+DYddzPmDtbjDsboeVHCHs3hD2NqX8 189 | vxqCHU+mfykEWwfRI4K9AwR7rOU+DILdUss1I9AfgmBJMB8AgIWI6HQOg2DosEkgmIdGq28mYk9o 190 | WgfsQ4Kml7rSCSmmtMibV40nGa9r5/fRe1pYc+BTWi3OWwVo3wB+hVf072xenFtf8KJ7WeCdXFp0 191 | sz7BY6bypDjlHQkUmvg53lzTknU0pVfrS30fbNKOCWs5Xxd68Tkt/U/K+dqC+YEJWl1/bl3yH76r 192 | FYVmeS16Kle4T95i0adEJ24vLLYE9DOzEjkraL+FVDn/5j4LSDRdn1prWdR7B4gNJrvzAi/7jzLa 193 | JDmTpbWowrklX8k8VHj5Y/kqlMuS5fKfrITMxSdUQi7brvSnLEX+dlEVIW0w+FFmywSch6VFI5Ol 194 | yo94mb5F2xTMOaGrLE+ihrj1RyAKPYEMinpaZbw4v5hnK0vxRk4WNZFDypP5tQr1Zgb6w4HCWpJ+ 195 | iSHeXhLOAYRD4BH9ZwWZZqBwCI7xyjkzTGMSiGv2LODhUp/rQjQI3XcgQ4HkplVHWUWKdgdE+l7T 196 | E3hkqQTgAe3CdimCQLsyou0EmoYgi9A995ZK/aChhPY7MBrReru0rlL0z4bEMkY4xbjOqSTayQK8 197 | o02brIzNO1D0At1NLXyRi5lsRVyTnIGZApEqIea1mQG8ZrQDRFsaWSzZTgkD560XUAMME+OsSiGY 198 | z6yed2nZ0RabWb42y0w3ZEUHcc6M7VpyTWae0PYRbQlPvuzIWG/1gTwr9pRCzVIVw230ZhKmFRpc 199 | n2/tguTZ6wGfBLDNjAQUAp9DdHUzC7wv2HEs7e+1LPhB67lasBQi0lqyLMwVbzrRn8QKsqilZ7gi 200 | 27lBJW2KytcsiQz3BGWzVZ8kvReHuyAaokybIm9mfZEF59pIA6nFSR1rN+4oyChBlNnCBBykEsi6 201 | mA3ilN6fZdrSLRhpyhsL6yGu6i1bnufqpc1bCO4+m39KrNwknGTR6uRlpWiQxvheKpHoy4pme+HE 202 | HFhSWmRB25Kijs/hxobPaZy8c6XgzzvfpZzVhHq6m1rB4Hy4y7FMmyP3G8sFzPeBlAu21rQOpIRw 203 | q1kwLufsVUMwInzHasC0iFdExq0rC73nf+QKvokzuMWmYJrh3V52MM1vqTj0jfFQV/AdgfNtgDPJ 204 | rN6Iuy+C7raxC5Ru338Xpm7ffBe47tJw4Ci7y/4Rbh/h9gcJt01t989f6zUf30ep97hY4WFKvTcX 205 | K9QJxNZS787LdtOYDn/TedmuhV6H8qjdK70YExYZMG2VIGszEPXWJd/7W7xLcryg+EIs1tHErIVF 206 | RKJd3QgfkO4Dl4hvR9d+VeMGb+9XNSb9BDLW2Q299fvv/w/0SvdiqrYAAA== 207 | headers: 208 | Accept-Ranges: 209 | - bytes 210 | Connection: 211 | - keep-alive 212 | Content-Length: 213 | - '7396' 214 | Content-Type: 215 | - application/json; charset=UTF-8 216 | Date: 217 | - Mon, 06 Sep 2021 21:00:13 GMT 218 | Server: 219 | - snooserv 220 | Set-Cookie: 221 | - session_tracker=jdhkgkbdkjemrqkpeo.0.1630962013008.Z0FBQUFBQmhOb0ZkQVRxb3lCU3BETE4yRThYV1J3SGhmeWM0VEYtVWgxTWU4SU1aYVJUbmNtSThpZDFTSVc2eVdSTDlxZ210ZXZBb0VMdC1IMUJ1ZG1Pd2QxbnNCNTk4eHhhb0d4YkFza0t4cTJtNWYyaWt1X3d3SndKN0E3RjduaU1OLUpONEF3dkg; 222 | Domain=reddit.com; Max-Age=7199; Path=/; expires=Mon, 06-Sep-2021 23:00:13 223 | GMT; secure; SameSite=None; Secure 224 | Strict-Transport-Security: 225 | - max-age=15552000; includeSubDomains; preload 226 | Vary: 227 | - accept-encoding 228 | Via: 229 | - 1.1 varnish 230 | X-Clacks-Overhead: 231 | - GNU Terry Pratchett 232 | X-Moose: 233 | - majestic 234 | access-control-allow-origin: 235 | - '*' 236 | access-control-expose-headers: 237 | - X-Moose 238 | cache-control: 239 | - max-age=0, must-revalidate 240 | content-encoding: 241 | - gzip 242 | x-content-type-options: 243 | - nosniff 244 | x-frame-options: 245 | - SAMEORIGIN 246 | x-ratelimit-remaining: 247 | - '298' 248 | x-ratelimit-reset: 249 | - '587' 250 | x-ratelimit-used: 251 | - '2' 252 | x-ua-compatible: 253 | - IE=edge 254 | x-xss-protection: 255 | - 1; mode=block 256 | status: 257 | code: 200 258 | message: OK 259 | version: 1 260 | -------------------------------------------------------------------------------- /cassettes/test_submission_search_ids: -------------------------------------------------------------------------------- 1 | interactions: 2 | - request: 3 | body: null 4 | headers: 5 | Accept: 6 | - '*/*' 7 | Accept-Encoding: 8 | - gzip, deflate 9 | Connection: 10 | - keep-alive 11 | User-Agent: 12 | - python-requests/2.26.0 13 | method: GET 14 | uri: https://api.pushshift.io/reddit/submission/search?ids=ztksvq,ztksac,ztkr79,ztkr6z,ztkq2d,ztkp56,ztknht,ztklgp,ztkl5w,ztkj4p,ztkhvz,ztkgpe,ztkgpn,ztkgnk,ztkgf5,ztkfag 15 | response: 16 | body: 17 | string: !!binary | 18 | H4sIAAAAAAAAA+19244cN5bgr9A5g3XbnVmKe0ZUwxB0tUqtm1WyZXdXI8EgGZlURQRDJCNTKcPA 19 | YN/2D3aedr3z0OgB+mmwX1B/sl+wn7A4hxGRmXWxSppSr9xdDxKyMiPIw8NzP4eHP444tXS0/8cf 20 | R6bNteBc2tH+6I7SqqZLqVtzR2pWildCH4/GIyPKwoo38MhoPKKtXSg9K9qyrGklRvsjG8wMC/jc 21 | Xy5G49Fcllzw0b43HllpS3jgxUIaUolKkBU1pFHGCk5kTSgxbU7sglrCBS0NWUm7IKWq5xMrdEWW 22 | 9M0bIutXrZbC7JEfhBkTuxAkl3OSayprwcfEMClqJkihylKthDaEarH1quCE1pysBH5fKzsmuWC0 23 | NYIcjSjJtToWNWGlYsdEGqLlfGGJXUkmCCWcro9G+H6t7PAejMVsS8ty3U/LEbAeFnhB1HM6d+tk 24 | WlrJaEnsQtbHsp7vkf/7P/77f+3/jcajUtbHs6KkUs+0ZAuH7T/+abzZnxkge9ZoUcg3gN6RvnHR 25 | fi0k56Ie7Re0NGI8alalwe3YmoQZM2MlNcbtqV20VV5TWc4WAtY/2vfDYDyyqpnRFdVc8JldN2K0 26 | X7dliROImWFKi9G+1a0Yj163VNPayloM027NBsuZMVUqPdoflTjBeNQ2S2XFTFMr1Wjf3/M2pIUv 27 | 5ZQdz7Vqaz68+k8xnUaMAcDK0tLBZmZaMCGXHc1Vgks6E1UOf//40/baVpLbxWjfj05PZUXVlNSK 28 | mQTEFiKlAc/pxOdBPPF9kU+yOC4mnojzYsp4IIJiNB5JM1NazmVNyxlTtRW1HdZuBGu1mCEsPdIk 29 | AIo76UDkqqKy7hEozawSwJPdCIxaMVd63b+9PeL26k5hebQ/uqsqYYDcgMkWbUVrQmtZAXvlgpTy 30 | WOwDT7vd83FmpgU8PSu0qmaUm1krB0A6RDVaVLKthq8HrI72RwtrG7N/40a+h9+aPbdOhHWPqerG 31 | 5C77/Z0f7v7w7AWfHdLXz394/PjV7VcPWHnv948Wj2jw4LYtjl9Obvt37+29auaj8UhwaWGJu0Cc 32 | oV2Hm51fd9kHhJGs58YhCyTPbCFho0ayonMxGo+6rZt1CJfCbG0YSL7Npg6s6Hhh1LR5Kdku93Y/ 33 | IQTjUc95WlRqKfgsX89Ob+wuIe683VPISCJG9yR8SUHczEq5FDOmqkrU1mxBOJ8LEK4zo7TtZ2h1 34 | OVNLoTWKBYCBC2O39m0Y/obImnB6HAdTNaV+txVLKVYzptp6GJBqtnD81s1bq5kTg8M3SFTKGMA4 35 | zctBTjSyrrdeBLBmfroRVVrAdKP9H93+GKejVKuZgC9bvU1v3dMXQ3+TtlZ9tRJ5819o1fzOfEX5 36 | lLMsDSIxDZiXUlr4kVekjEYBmxbU96cJ8xNvClvnhEXmheNRLxTTOPkJNtOosrVS1Q689wUKR/7K 37 | 91IEimnVfGUqqi3+eQZi4SeeFxUxi6KIeSnnoReLKaN0mmbc97jn5VHspRuIfS/dQOx7wU/jDwUx 38 | 8JPLgBhlnHt0OgUI4yLJ0pizpMiKLAh5PhV+EXsZD3xvA2LgJxsQAy/6cBDDwLsMiD5nNPKnXsrj 39 | MA0SWvCUpmHg5UHoRVkRiDTjgRDZBsQw8DYghl744SAm0aVAjFIR+1HhFyz0wjzmcZYVcR7FYRoy 40 | P0pzTsU095jYgJhEWyAmXvLTn8ajJdWSokgAeYfqbHX4pL51V72VSfRq9qh+/ordXb+cvrjLX67v 41 | 8iS+rV89+nr+PJEN+2EEQ4gaGJY7jv0JBY5TtE6OglR1NoHu/nI6SdXl+nzdv61bszT3iiANJgWb 42 | ek635mkmJnEQppRyf0rTBIQyrWcgunuxYRolS6E34yt2fJF66NTg4zUBE3WSl2pVk+Wtg7uHZKXa 43 | kn++FCQXoiZGkaplC7JSGmw6aReqtWjGgfEIRgaoRZCwM0v7pW8keS8NuTRW1vNWmgWA1KnrQVfg 44 | sm08C+bTIK53lcV5Jo6gnheAAMIX39pjs3ztrA2tcmVnsubizbZIdYsf7Y8eHt7//pmWlYhGCBVr 45 | jZGq3rHc6rbaUhse2BU1n2nRlKj43IirhbSilMbOjKW2BRuxVmAX9OrS2FmluDhjI1Crhard4i7e 46 | nDOWYCN0RQEto/3RjQvM2hs91DccSm7YBZpMlZitqJk5n2Im6xmdmTafgU8xQ59iBht7A2ahGrby 47 | lxYHZtOx3CKsXY7/ZT252XLT5oZpmSOHxFHse+NRb2K1lo32/WTqp76fBnG3I72qNBsTdssKWUou 48 | 1ACTFlZLsTw9VpiASc3PToI/WDaDn6ysxMxYQH7gBcHEDyZBSPxk3/f2g3gEQm7bIeTC0JwWyuw6 49 | gbdVRbikY5IrSiyIAiLwc62kFYQqQ0ohrdLCEK6IG26PvBD1QpHAI7RWZo88Fi3hshQVHROmKkUK 50 | JQmXVpFaEXvyV9uWakxO/kKMyrUgr1tBGNVaSE2JEfNW6r2j+qi+15L65N8UsTg6JbJsDfzNwUdb 51 | UA0v7hNFrKY5LRfwYAXfaTJXxiqjiGqJOfkP0ihNuKwXQmq1R76tSCUkPK0rGKwRNRc1V7AgYezJ 52 | z3OpCKdkKTlFOJ5RbQXx4eN3tAIUnPy8T0TrpiFVCysDoGpqVSUpEaSQRqpSzSWFUZnSjXIGuxoD 53 | NJbW8KLgksma3sCnrdC0kZQsVWkpV6ShmhKACJ4XxAgtKalES2RthRbGiD3y8ORnwlS9FNoICagm 54 | tJy3FXjiwhhFDREIJzUWtAfxA9whN3YFjrWuqCa/mWvKW3ryZ8Dub4kWRvKTf68ZLKUawOxAkUtY 55 | oNJWNUDMXxABmJQUt9EKTRQpAEuKfHOwD19WRNZcMgp0AZNXtAQkSNNtbFsRqSuYGkdwm0lxNUVr 56 | Ra5KIJv65K9LURJz8hctBblDBLm7R747+Vm/FZSgz4h7dVdUSuNa57QGGkGEDLs/Jtsbjq+B6BGw 57 | fbkqDUWynoPxWqs9cqek8A5TVSNqQ8cEUOt2InWYBAcVhgqOB8jhT/JboL0J8dNFR32OlB7LekEB 58 | Pyd/LSV15H3yF6IRPYeqBTjQARKkOvkLh0eU23AJowjy+Os9cq+28J4RZKnYyb8jDLkgz/+ZhHue 59 | 5409j/CTn0kD3NQacIGoJkZUpNEqB640pK22oOCCRD3BIJTPcUBYJTxHX7UcH2orYqUCYNWYVLiS 60 | 6uTfQEJQAvhB3qSkQnrlAom1oZL8xiAVNtQY2BB78hcSeEEEeNUasQeDGoZM1AiwP4giR6Nc1KI4 61 | +SuT8McXCNkTReZCAwG5XWhrA6gHFFcn/26Af1uOOyAqwiSnXJC5pjUX5Le4GC2M0EtcjqiEnndk 62 | DpRXyLc7e8hbiHsIEjqpBtPfAWHmMIUCwKgWsNKolilSUWlgr7QyjWBWLgFLIGgqUdKanfxHKZki 63 | v2G0rIDLQNT8tWpLhSQn3nR7XAlSUc1o5RiAMlniMr4YbwvDWr1SRJB6Z1PhBUWKtmZSQUgJZMwg 64 | fSbE0HreCsAc0+Lkzyf/G2JuwrItGRfAx1sog2tOiWqcSOhkD7WiOvkZ4g/IEa2lOFG5R752ghAQ 65 | wHcf5I6rOQWZY8XpF5+CzG5w4wHd29NFjsGEQ7Axjv8HAS0qApHOIS7HheGCKNJoWbOTvzZS7RFU 66 | IoS+biVsVoc+GJu+bkUJMrUGtQl40q2sAAoj9MA0IIQF7E8lWpCZ0uyRO6p2GkpSYqWmumMEBquT 67 | 2im8Br5vK1Rq6nfdvnTkz1W/ZKqJqK2mnJIaSJNRQ3tihVdOftYCCZUtWq2ped3CBLhbLzqNWCuH 68 | MUCxY7LXrYQlFPSt2Og8UgtjqBtwnzRaMGlQRsKCgVvHnYjuqcIgB1aiXCi9hXwrWC0ZUhbQEc0F 69 | 0usS5KkeD8yjHXW3sEJQM6YCBoMYIvIjANtWTr4xtQXOmFhaLsVb1CKAEVE1WpiOO+nJn8E60Et5 70 | 8mdlQC4OPAP7zk7+V5UjszmBVXO1R54pffKXylEAWCumdbr15C+otakjWfCsTn5G7eIIlGp86H8C 71 | MVVolXScsHdRkLx9s2xfv1qx84Pkj0EKgGCTXPEPDQlvW2zvGQa+2716bjjY2aKfWDz4tKt1Oiz3 72 | nwkSnxec2/ZkB+P8SmLAm6jZVQWBN5t5lZFejId+rOjsJSKxjrg+drgV5trb5qT3ibmOarF676jp 73 | kAP4kKDp1QZoppEQke9NIlpggCad5H4WTzxBo2jq+0mYiisJ0HQS5SpDLGEWl6t3R1iKIoo9bzvC 74 | QtnlIizPBXiQJRf61xFhcRCdCq8MhL0bUqHsBtihs14DXU3IZLVaddkgTAS91+wXBFTCKIvi5IKI 75 | ihdcXUQl8C+IqOAPl4ioeMGZiMq3t+/sxlIetsaSlQLbSNZzIgtC67WqhcuK0wbIhoMLTig5vEvM 76 | Qq0MoWD8aJmX6AYcE1VD2FJqMK5qwFRjb5KD4W33M/Eju8DMtLSYh3eD9qYxUy2EQWXhZq7osTAA 77 | C+GyKISGZ/bIAdECs94rWltiFTmu1WpIix9AklwyzIVL41xUouBtQwTV5RpWIN7QCjwg8pDWLdXr 78 | McRhD3YHlAU5gKW2JScQlLDExdoBQ0rDLBfaWJV+Fflqdb6JdXiXPKaYIv8g48pt3rlmVXKRWdWJ 79 | ub+lQcXdEq/tqauzp7rH/8EsqeRqLCnHNu+ft/7ETahP0gQKXq+DV+80gbZsHz3NLmf7KEgs1hQ2 80 | 1r966wc34G9i/nx7+86O6aGn2Q3DZ6BHf9nk2YD4vjbPO6a8wM5JvcwPzzdzpvHVJY6myQWJI/fD 81 | u8wcL9v3zyaOunQDvU4cXSeOrhNH14mj68TRdeLoOnF0nTi6Thx95MTRhaGNH0db/hnYYvcAhcpg 82 | yd8vBT22Tbn3C32MRteBj199Iqknk3+M6Mcw7NVGQLZ56L1ySUzVheSggX7FKaU09eJ0yqLJlHIK 83 | KSU+ybM8mdCApl6ShnGceZ9qSmmxKlp1mZQST8J4O6ySvP1YKSX/EwyqDAS+G+dI3u5kdT5WfOXy 84 | s18QavEDb+pHF8Vaok8p1hKdibU8E3re1lY8d19cB1yuAy7XAZcrCLiIHXf9OuJyHXG5jrhcR1yu 85 | Iy7XEZediAsIAGfrfPGhdSVnLLhPPM5yXbH7EQIthyf/7fnB03/Aet0rirGcYaKPXXDyn+t0cKUR 86 | liDmSZixZBJEVECEJZtQP4GOJSL1PJr4Igg+1QjLsl4V9t0RlqmfhUWxFWF5HfD//0W7f7sIyy55 87 | 7wQ6Xgd8J9Ax6xXSx4q3fCgsF0RfpokfX3BCenqF9bzTi+p5p0FwmeBLuh+credthC4Es+WatdbA 88 | CipzuYZZ6WvaVOGr6nwD4+DzyvXC2m4qZQWtPvtQG+N8QK+mlhUbKv3jZHTe0T/qH62S9Z3dn/7A 89 | vja32Xrx8H7y7DsaTZ6++Y7Xd6NF8/RecPsBf2q+pf6rR4mXPTj+m3V/WmC/iH0nN96nCdTHtmVu 90 | nMuqOzK2iZMbssK+FjO7AOZC+TAD+XDj43SIWg6dL+bJMl6HaplC54tPujsUepM1LSenG/U8rddP 91 | nqk/LJ/m379eN/GLmT/j3oF4OVlGv+fL6P7yu+zerbcl4/fne009v4lRVftV86qZn99cKIqjIMvS 92 | jIY+D4s0Cv0insaRCKbTnAWcZn6e5HGx6dwTp9PtFk1R+M6uUle2mF9sPvWulXqJ4EHGcjGNCxHR 93 | MM6jnNFpwsKYRpHPs5wFAaXBRc2oMv9sG6UrXtkFPavetbLED1gcen4RMB75jIosSHguAh4keQYd 94 | wjIaedNCXNDDKkzDj76yC1pdvWtleRD7UeDnfpxR6otpnEKDoTSYTtMY+xwmccTSML2g9VWcpBf0 95 | lXr6ZPnGfP8ySxfm6dsH0/mrNy/iyQNx6+lq8jg/LBYP6+hhnD79w/ff3trtK4Xc/NPfUdF9WE8L 96 | 8z5F902cXM53eaDK9ez31NJGVjmWD/xqHZj/rFa7AkfmYi12UV449bIoCs73TZLEfx/fxG3B+a5J 97 | Mr0gL+x+eKdrMt2P/LN5Ya0oZC3EoVXs+JRbMpmQu4KBoahJEI4hfxMQaok33Q+8W4+PakLIzn/P 98 | BXUtbsGNIU/EypDDmjZmoSxZCC32yTCfrJfCWKUN+fbwFkExRO7QGuLi+N62X9mceWmP0RsYs62l 99 | MDdqsTI3fM8LfD/GPyamm3VS0Nq0tbRiAuFrbamsgZgmgho7MZKLyZxWwkxqpe1iJYydMNU0Qk9W 100 | oixzIev5hMu5tLScdLRmJt1Mewtblf3Kl5KS+0Lwizy5PIr8ulro8z05XHEP8z65T2vzLQANSb8N 101 | 0GNyjxpLDiUX5GuAekyeANgvhbHkDoI9Ji97uMldBzc5dHB/cNz5DIWc6w86Sr4+3nh9vPETCTxv 102 | O3LdRFcUi3avXEk4+gxvfdrx6CvyrW4loXn0zXerPzy8+zbk0Ssji2NrsqJ+e+fpA/nonvcokq9u 103 | yaUoHp/boNfLoHksCyI/8YI08uOU5jQJizQr4pCyMCwKFmbRVoNeP/C226CG3of7Uu8L/Hs18g1E 104 | 7GdhGlA/nRYszorcT6H/LOV5wESQJX5O/WkWXuA7Qefhj7yQS7f7ZTwFeyHJApFnmS98Nk0830un 105 | Qcam0yBnUezT4gJXyffDj76SS3YFFjzxssQL49j3M5oUaR6KzBciEyzibBrTJOFCJBd1BfaT9KOv 106 | 5JLNg8Mgz2LqJyyKQ5+L2Kc8nNIkiwo/jkXg8yDxeFH4FzQPDsOPT11ZcqmVpIxNPeYXmR/GRZiJ 107 | KKNhLLypFwdTHudxyoRf5PnWSrJk2109r5n01TP85ZpOsyLjWRHxLKJREbKCsTyIE+FleRZmBcuY 108 | F3s5zXc4fsf1nl7gesezl0o+vH18T01z+sO3vM5tWH7z/f0fxBNKbz37Wtx+M53fv6sfmR/+zl3v 109 | JC+zkr6P711j6cIlfO9Bg88Oet/kyv1v9/JHzx3u2iI7bne9sOhXzXofZTb4VbMdv2oGftUM/Kpf 110 | 9Ma7jXrfjOLVQ3iBSx9Mz/fmMSR7RZnGOMsucOfBVHm3O5/sB9OzvZilBoKrG1qe8uV/UC2BQyJW 111 | FmtCoTtQKQorjSWFqGQNH2SNjXtIKQuxR263lqxVS2hpoPzSQo8eaBJkVCVIzk1FclVzOocirdpS 112 | g5fvHNTENOC4qmIMxVp90x9VYFigkkbN1/Ua/sYS8u3vXDMhaYlYiposlLVC7xGA2+o1dv0RoiF2 113 | oYwgARYWQt8ggFHDOokRUMJlhXsnb2XJCSUrWpYkF3YFzdz7l40ol3hXkGqxlM9d/gMjkSUFz2tv 114 | q9lQrluL7Yzytiy5ett1XcKBJVQRmkpiYyZEwPCaqahZnJpxgJdYNRd2ITQg/XUrl66xU6E0NEZq 115 | SkFNqwVW/T0S1hDxpimVFg5ErAyuaQlV4EUpmTXDArsllC22ZeLEiDctLaFMUkL1Pt6mtKJrtwKm 116 | qhygdm/h1q+Uxm5Kup3PS9FdtUQJXxurGknr7oEVBHNc2tlqVc8J6jeyUpWo+zWSXJRSFA6OHiR4 117 | i1aCu4G75So9p6YyBLzInhyksWaPPBJ2C7hCiBJUEY5oVxIviBqID5YjdvBKa4c3I5dAZm/2IEJT 118 | Etg3IGyEbyngSfBz8PV87eaD2wAcX9SiQ6K0a0ewKwW35cArDdwkBfsGmIBdKUuBNz7BmGvVulrU 119 | A/IK2neVaulQZkUNmgGe4WoyVwr063j3fZRz0nQohSLLNc6jiKHdMLgTY5K31rGoUcS2unadvPI1 120 | PiOWsiSwJ7qFpcyR6ToSBRRYRcCPg0WugCKg5xf20+ruAELyF/0+WYUUpxUQ+IIC/yqyAHgQZ3BD 121 | giWiFhXsBnAzzgG7hldazZHQCFf1//mXf7U9n+Ag+AwOchbpyJ+5FvR4wDQX1GzIDK74QpJYCKph 122 | fd2KOiRtwaSKHp+tgcHwj//zL/9qcL83M+LJDqDsdU/jop7bxfD+HrlVlk6gKWMJtVY7xTTIVMcJ 123 | aF4ITtoG26jBC1wyS63SOOuCalHtkftA9/DjY6rfSGMn93uJ3GhVQN25BpD6pYMmlYwwVZZiLgZS 124 | ccSGEi1XfI04NW4XpTVkrjoCQpbgwnEb7dZxSvA4zG5Y8RQegY3ncgkDooaYU1kDHrrBBvA7ru93 125 | ahC03UZtYFWNcFso3jTKuFZ2HcVZoCM3sjSkrYFstkUkCvsCRkaZ1r3Yyb1hPRLb4Slt4Q93lYdd 126 | 0BrKu+s9ctdtdrceqKCe10AHqCaR3d00w3VuSPTIG90bjDYYy4b736wC5DjiB3QMQKiCsLaqkNtz 127 | d4nIGtjhc0tgT4GuDoUYoFgIrVA8a8HbHhsLacao+zqsIhuBOiEriepGVKTQQowB+AZ+ayG4ttkT 128 | vPuDPMY/QL/e7m4zERq68hEwjHaHBmFaq07ki9IIhF26boEgjwWczTEloMQRtKtVL0HyDGJlixod 129 | DcJbUK9k4HKV05IaIBCVwthMRycb7tyRvANhjR1VjTuiGzsp0b/cczDSWIVvgzEIxe7gQBC6ouvu 130 | 2W05LMHKkVWuCin4HnlaQ1PDhdOzjgiMrJnQAqwDp054jxrVNMpIKJkvVAu3BfYiYPMDo24kq9ty 131 | vYuUvJLoPziVk8v5XLi3C1qW4072dDp40VaylBDirp3KrIQzTDoyonz4Ghsnnr8nuXAqq3uytzgc 132 | 66G14IwNImskEnz4wFEKCISzBuV481upBeW9NKUa9rdaE3CX0GxENfk5zCQ65eyexJ/h4bPGas8d 133 | e+Rlb3UMOzcGpbAU0DYSt3pMdGsXpTAGjkSMB3IY9/IUhDC4050NtkGveN0OVhEqL0hJElXskVso 134 | cURNduF2hq7UZKG63psFtQBlpzGkdmOguQLTMU0NKEfCQQeBvnDTO6uTLEAzdtbzM2q1hOAuWHSd 135 | HjEL2eyR+4O4PJbG2UelZMduIKUssuS2XtwiGRh9QZcDMxVS6DUpSlqhIHgull2UtLPcOtPCrAS0 136 | HgUb83UrNaJOFb1xpU/ZdqDllXtn3Qn6BW3RgHOkR4ldgMMFf9J6DVrByVzmXDdgPLA7tMjBhOs0 137 | jpPWgEgwJtDCqihbwCofUyffgH0aoQ2cPAI10KjSXbPpmGj4E3/rnnu3fh/IMKd5pxC3lD2sAU6R 138 | GOypCsxmNroDzpiWJe2VP64RTE+4VqrbaOjfKrQZE65l06ClIiyIXUtqegyS6FbvrkDwhbCFgotI 139 | VQGDFBLncwK5l88OD/MWslakUgxsqXJNDC1bK3oTu99oqoE1wVOHzTZ07SxHAyYm0DijoLIgPUdW 140 | gjaqNtBvFeITcBSo5o6fj+oD+zn6Xs4Sg6wnIOxczxHGhSu3xFKgsc877wV0Yq+hQHAY5zmOHTHk 141 | wnmjjdI1IFgjd+dtRxiDzttd2kZyoOF17lNo5XRE1vRst+6tsF489xrPIRth2zzssHCrpwPMiKti 142 | M92G8twT0P9W1mBBoDiAVZzeww2bw9lBp7A5hZw0ogtPMCG2q87dHKw/Sr6TDMQp/dyQQzgKaEkB 143 | ggcc+oVadSK4kxB4oy0k+lFdFYVkeFbMYdW5ep1ligf6SElXa6EHc8MQUYNP06mWhTR7e3vkMbQJ 144 | frRjH6OjDyEC2EMt5sjFRWcNy17nE5q3cO75JfY6du6rIhjUIZUCZ2+hFP+MYJNkebGQA9/nNppX 145 | FA3a41NLysEpEMYQWMMeVCoMKyyd0NICz3jSvFyDFDcGWZEvac1632QIdzRazeERfLHb9KpTGyjx 146 | 0Cnkms7h+t+O+R1LI8KB9aiusE1y2/m8qNRpj5aFKEFS4V6hlMEmyJ3PjprXbg84JsW2iuvh29rY 147 | FhhL1HwzxCkx3pMEnJTdmPU7vsEYAaWELSD8sm0q/PL2oc3oxAsQ0CB7raZtRYHQJZzalNQKspSq 148 | HHYVQdhErDoO7SQdBgqc1Q/iFISM7jBPa5x7w4+srZ0+A7P8c2Rtjrf/URevwkW8AJMdjahB0eHB 149 | Wms21qOEk7CkMWu2gG4A67EzuFx4CwMP4MzukQdqteMr0F1ko84+BPHmmAXfcjuDXsu5PzvSKNeb 150 | X0tBIWBPSoU+DxognXsj69MSBndh24Pr9J0hq4XqpvDBsAPLNRekAbKtCZemKeka3Gq4R9t0Fwr3 151 | ysH2r7pgGAwFR22A6DusFcLZhLCwcY/Wsphwaea4WbYn0d4uHm/h85WSGE1ZtM4McY+ifzNhqgKh 152 | iEDQpZIc5yBouqwJU2bXNsJ96HWrG4IMQxSy3nh/qHggOktoiZ3YtwJPziUUYKjTAoageF05cirY 153 | SCDPcJpTATBZExddAfeHsgVBEkUAO3OTFEIau+52daB/SGQ0Y8QsrKCTU4I7Z8UAQzjHdAWIZqXT 154 | MRBWkg41AEEt34LoH/b++Y7WRMvUJUxwaU54d75Op+OYqrnsnLYDJBraWWxw2h3X0UeVnQ2Bzj0g 155 | DQ5/q9aUazcimEw104JLELW9a7pDq7Aq467o7LAI0hHdXlyvBP3n4nKmkdrJCy4KyJIR0wjBFrBw 156 | jTu6Vi0ANyx9R02NQYTg6We+pWZhOzQ4dkOICOK0su+xD7pQdiFm5BZElxFuuaDxSqscw92hjW3R 157 | md1C/rwVGs1EZ/DOpS7HvXRyK4YXYCzYVTW09ydct3Ow4UsQf2UJNLf14kAPYHIaAkmwc63yMT4k 158 | DeIRgsavW/AnnGGKUku4GCCpKdCj8wYgQ7AAd2uYr399QTUnRcuOuyAUCI4OeFMK0YxJW1vpbO7V 159 | QjYwvgs9NKg2jNkOotXijYWFbXNu58LgZuwSyoLWc9R6fZitp1bHoI7bcY+qTawJHGCmWwbmA4Ls 160 | QkcQBh+Dam5aa51ZTEAYwSi56Fw61NDkWFoLW9WyYwcQGJNg2YIE2DgQbn9Nm0M/F7l0JLEVAOxN 161 | Q+BtQDrmvhaqrIhZ11yDFEJJ4rIRzgfqD4XJnRie84jAYYahFNocvDNd+jcqVAriDehoBdutwQpt 162 | re0Nni6tsIthlA21oHYBFlaHhc7oh4e651eyM5IdaL2H5lwF6KoBVgXajBTdhVNugpOd5GvsO0Du 163 | SIhtv6R6yH30RhZC+LjLKo0dlW6smt4lxhBEn3qSFYRkYOtyrAnajsB0lhdGAjD8VHeR5Qr5SW2H 164 | d/qHnC2yyQUhYUnbh2wosES/KHgPbAtAw5dfHjg36+Xzp0++7n8awqCbcGbdRQJpH/cETA/TuLs5 165 | ILDWZdG+/BKxfL/jn568NlSFwdVCWGkgKLJtgEFQpQaTti6BzW7fPXy8HanqWQd04ni4NwSAGRze 166 | UnQpgj6BM1gpJNfquIv8mgUFUCG022I8rRevDH6Q9dwRygPH5pouMec/GGi9xL7Qm5PWMShQcH9Y 167 | 0rFCr319iKEPYZ2ewTuVPuhldIlQ0YPBOChPU8En+LqTfRAC2o7LvUALvVQrXLagx2bgkMHtx5YJ 168 | 5dqRSa+rVN1b1WZ43k0IYnBMoPOJ6NyTRlA7GHF1i4XbXTD3/m7EAo0wtGOcM4hu+cYd3+j1JWXr 169 | wc5YKKw6RgMa0G8cqAg3MMapdEKPWQx0gzktAa9QSItAokfAFhQbuahNTHXwU1A99W74zphjp8nA 170 | BBhIIfC8s/GZzvrIRdGlT286cYNcZtbGCvQuxoMF1EUegECsLCQDD6s/W4siYysZdWY2lx3CllSq 171 | C2PK+hhNza3MEK2RZbpLcqDxFvCZvbmtzubCkV+BMhasjSEDKN4w6YI2Y7TnNgK2F8iAHVmKIfy6 172 | yaY4EiFHeHaYIgkfjTCijst2H+a9FU4q3NiBk0FSuNk5LhI9brfEfiokjTPMiKyCID5xRBl6nter 173 | 88oZi4OnN3b3FhVrNFFrsQLjAHyueuxm711Y9Mg6mVad69ypWmz7bEjSg1/mjLnF2jVt64l8AP68 174 | +IwBCxq87heOopeis3AQglJCO7cSZZdqTe8kDyFQF6iWLktESQXUp/ugEqitNXDTNsALtXJkJNnx 175 | BSyjSVPSGidylIS/IWg957gUyxADJ5XQTOInAZEBiYmQLmyqioGaJz2d3+wkCnrFEJMDYerKKgoE 176 | GIQmRO7WsFvgVLDjodJhTBau2gQcoEZDjQbymFk5wToomkFaDgoCTIJygAIiB3uOddFGQF773PQC 177 | DDUdxCHQWRjU6yDR10ielRtKWhdzBSrC0oJOqeeKuxhGh7leQo9hOTu5XTeuUV10rc83DjSzQONy 178 | SM6DA7ALWHedV4nx0g5GczqBvMu8UBWOUaxbg4oBcWS61BeEPjv4+2RZn6wFvi4Vei1A9C7nuMkF 179 | 9RZKF4XqpzDKIc4575ifQJYpqUsBaFUO79DBWKvVUpQl3SNPnMsJnpPs8IvhXejuBCSHrIf81Sfw 180 | eys7FwvZ+btNW0vjnFWU+F3OtKN15ImnUL1A1+RgSGVtBwHPOANNI2pXcGF6mdFQKErsw2tWQmQA 181 | XFw0GEOv0/fotnPn6iCRtnBpGXw65S52LgVwC0ojjG+iLkMXxEWs4CSP86Yh7tGjosPAVoAfk+ZO 182 | 4UJlGw6+5TxLO8iNPkxwkb/jfCvYMwxbYEq8oRIthLa0mm4cbVkfr89EnyGc5GLTaF/mra670iFz 183 | LGuHrB2Tp/f1OyScBzO6x8Zq2aDz6AInXXjCOVUKMdzWfPCmu6DT2c2FkJoaqkSM1bQxEPV803+V 184 | Q6c2U0oMTkOeBqpxFgpsy4MtEw+hCL3TIaRhUyDQWvOtIKkFIdC4gPCO7MgpCECX+3QX6T2+9ege 185 | FtKUVFZdTFFtimXADN8JdzRaNUJj6ZHLk2mFVNgXKmH5xNlAnsNrpz6242YX9jLjXAh7QSOz6D65 186 | pfvg2G6h0qDi0RPChDIFRWhLCMLKEgILuWpPxe+W4mwKCO4iROtW1i756GLaC9p5uEOcdPCCXMEH 187 | zgmKWeNG3tp2efssqXu0Bff1tGm6iU32Ph0aPpj345XUYmufXZUQqpuagtMH9x5q+N/V133wfcy7 188 | VZvveSUzOEjXzd3+Dpq73ek6VKO6nJCntavZuuJub7UpVv8otzPv8tUnerauO1Fwtc30acqiOJl4 189 | Ho+h1VsySZOpP/FExBnPhMhF/Km2egtMni/f3eotnIYs3G71Vs6byx3ZABFXiOrtr/l25i3C3jkG 190 | Uc6bG1VUzKgWs7VqZ7SebavrWe/bzVYLNUPVOaMf6w7nq4bxgrMasRd7cAbyvPMaURZe3XkN74L2 191 | C+6Hd57XiPbj8JwrEKvOMDt1XOOBBA/7M+weviZgMoDB/uWXj9Zasi+/RIuoi+HAusE2WmmIPzhz 192 | xWkOKDuRhcWDAaIrntmK3YLRrl04ZRikkMxiehUyiJKW8i36yzX0JAC3ddy3EyD3+ycBlEcY+tDr 193 | /ls0qNH3oJAurtabGeai1sM8GBwAH1hw7OHbxVD0mtC56PLxc4WJlGrt8lxoKWDAT9akaksrm1J0 194 | qYp+1IrO6VusnUWHVUscymLcB3zi3wArGXKDcGGpBJt1SWUJ0oK0jYLQ3OtWGPsFFEewsuXOGxmG 195 | WPfnIiD8sVIazXGszmPH5Lmyi6I1rnX4wQaPiO9Gq6VzP1gJcZcuKcFaYxV2jna2LoTCO69ZdTCO 196 | u6o+5+EaaGfv/LjuKgC332Ki6olLRzunAg3lFbSXB9j6ZJcL+Yth+7G0EXuki8+6PERNKHfJXJfu 197 | 29q0MdwaXqLJDYHIIbABDo67lrzs/DD4yk9/Szqz4DNy4CJTGGe0F5BlD1Q/LvpaeCrBHaGAMHvZ 198 | uLhYq7tse1dUiS48DACDQ2QHkjFffunWdOi2AqIi+19+Se5i3hHulKAMGKdQDF0roLLfYVk90Keg 199 | yzVppGDC/K6vScYYX6UwE+F+glMVtHJYKSj6wB0nYnaiUU1bUmjQXXNVmW2yOoRy2JfoJz4CSupy 200 | Ac/BDBiTB1TrNXmGWRzMbUP4EXPcEErB0is9x+AtkMIwPEoHAIjRBqkaq+5p02C00W3bRvS4UzTd 201 | 5iLZHh1NJhOHtGfPD+4cPPm6wyH58stD5LTDrjb3Nz6ZkHgM4VxgBPMFYPafvT0vuQF/u1ceKXBL 202 | N+/A8/CeD7cWeL899Wa89eY9jrQAP3WlxrXArE8lOip3h3XJDQIG7cZN64hoTA4+x9Msw+KBdoAO 203 | 8cZ5ECrCTeGajEMpHeQxhiiBYbR0pYe2qxz7Z9/zoFKUBJtV7xHSo+cZXYOmA4i/E3WlxuQONYtb 204 | TTMmz+i6oaVD652nT17cuvOiRysWeUP4siuPAoahzFVyG8cu4OZBneXdx9hKB8B0rVU/gwH6MgOk 205 | wAoav8/hKgZ46i5csqBBqJAStIeBGuF/ytIs7Nji1uNnj+4d9qD88ejojwDOIwhaUEvuQl73c0ru 206 | qFbbo6M/kUNagcR9Wos//abX/lwxszdXal4KVP9csRbQcIPf8O88Ozie3fJfLx98U/EDf/kgfP1s 207 | eTD//dcP47j45jv/BuzAzdY0X3ENGWJ+jEejVSv5V77v+VPPi9IwioPY8zMv7foPaNuo4iswxdxB 208 | ao6fv9isAcKCX2NJ8QuIfG9B/mKlNpDbhUBLS/BS1AaBv9l85YdeEsT9aE8O77/sXt4nd5TWbQOs 209 | crnlL+/X371+M/9u+pp9v5o++e7hs/Tw+/tVaV4ezNbPpq9n32Q2Pl7J6Zvjs5j44qJwTbIM8sX5 210 | 4Zo/3n/6nDw4eH7vT+RZryvHWwKW97r8xiGTk/uSuAkcRyDNu7S/VXq9JSXMZwTQAJIE8HH5OwQ3 211 | 8FziGsFtc+g94yDXFwn++kMgW7TyK4p6XG2rodPXC3pXdb3gNm990rGQq2rh6j/5Np9W4uGD9bc/ 212 | HN5/k2VP6+rBnR+K+tH9omi+e3T3LX1Qff39zJpz2wwlEQtYSJM4KeLcYwWlURAmGZ8Wvp97YUJp 213 | 4k1DEXyUNkPvC/x7tRnKEz/w/IAzmsY0DTKf8TRKUxGGqRd4KUu473lxPr2KNkMftpBLthlinie4 214 | n0bCC0LGk9yjIog5y6dZEk4jz8/DMAi8IrqSNkMftpJLthkKo8JLwoiHcTylUy8TPC6i3EtSb5ql 215 | SZqFOU2mXjq9kjZDH7aSS7YZSlgUxKk3DQJPUEFpFmcJDX2RxIE/LTwvT4swTvyraTP0YSu5ZJuh 216 | IPT8MAyySKRRESZBlHgB9bIE2j1lLMr8IqRBxviVtBn6YIa/XMekoAj9opjGMWNh4Hl+zmKeFTTw 217 | 8+k0EkmYFX6Whu/RZmik8qI1oI5Am3986ZyXrf6qo8B3tTOmXpZ63pRl08zjRcoLHvpp4QchLTjP 218 | woSy2EtZWnz6khtX7V9u1SkPaZgwL2dTaCrlFZnnB1nCMx4WjHNvmuWMFUnwyUl1XGTgX26RImJR 219 | mqSxH4dFkvAgF4JN82nMClrQhAGVJ1EQf3oSH1cZBpfroV54aZwHWTylSShymkZFnmYem+a+X2Rp 220 | HBVURH7KPz1t8D5smvhxGkAndY97BfX8KfeKaJrQPOI0TUNvmtKICq/49DTFe62S5jwP42I6DZIs 221 | T+k0yMNomvlgucRe6oVJFvs8ST9BLfI+ywwTL2KMxTTzeBYnURoIXsTTXLCMpVNB08D3pl5Bf1HD 222 | /DR2ntq1QrlWKNcK5VqhXCuUa4Xyn1MofTfUYP376nD5ZJ0WqU3f3HvUfrt4ax7Vh/cerpe3760e 223 | PrwteHyfKfW1d3XdUC8u0Emm06BI00keRB4U6LBJ6hfxxBMpn6a+8EThfbIFOu3xcfTuAh0RZNHO 224 | XYxlvLpcgc4LLeu39FdcnrMVat2tfIlXNwqlZwupxWyoY5htpWZm3ekAw2QhP1JhzhVCd0FJTjaN 225 | sosqcqbZlVXkRJl3fkWO++ESFTlhdqYipxRU14ffPDpVjiPKUn1G7iosKXE1Dl0phoWT+jfJU+0q 226 | lCmHLDL+rYVDL8eKTihw/rzqzynDgRh3nlCrFjL0w+FULD3OqZEMfnYfDr95BNdhu2ZZeDRDwFl7 227 | OCVusPJDFl0BQn9Ui0GRfNs3HZPda9t1GdjAALLh7uBAX7S9YeW9vlqjPxRbrsmTp1sPdAUtc00r 228 | PJ/5SB4LqA3hkkOtP554x0MmAL54g/06YVB39AbbD5Jnm+NYN8ihYt1ZrK1JhrIXV0khFKRwcbHb 229 | q8G72AV0EljIwnblGKzVwD3QQqDW0C4RQb6DDWM1dsL55hG+fRfulO8Lmxj+vqmh6Lqh4mFQqnEF 230 | 2K+y624zvIAk607Mu5P4opC1a4fgalhc01R3+B1PfMGed0egYHfX210B8ZCYo6gaesNh9hlLZRau 231 | b0jr+nlUbhegXIXhIQ13TubA1fiobnD59m1XDAWCz4y7lSloC4KnKbpBN12qmqZcd6urmWiG3gbd 232 | ZemlxMgflj8QLJZybR/cIZT1CroOVOvNL3AMaZ8c1f7+DhM5+qrXBNqgwqEedPqwnxSXkI+GIjCH 233 | X+OK+LH3pMBDZFatXJeX/tB2LuaQV9LmJrmt1LG5SZ66k7pugJvkqA72yV3J++mxnWHfoNQQA2V2 234 | UIHhjixsEYr7CAOE+8hJALOVDY4Z7WMvl+4Ia3esStT9JlE49uD2V2l34EqB2XCTvBQET83lWooC 235 | 0K1atnANXB08bf5KsKHZYkdn2H4RKkWG07fHghyN+pPYA10Ag4PwM2M3mjT92dyqLenRyB1P6/tN 236 | uSWfJoN+MCB6qDmzJCAL18DYnUelW1KlbYb33fE+6pZnJJZd/AZ+RCamZd5W0BIED2N/jiVyhWRH 237 | IwJlNnAWkC7FzS9cSwocohtwi567EirBiQ9fhz2SXRtqYiwcJXEH3V5DC0zXpMCd3Fm4s6BrdzKr 238 | bXrhC+dNOgd9b6tzr+N2PBGCp6wRM+dKYKjRWG/P5ETvATb+wFNkppNghhyNenI9Gm3xCVYfgi5y 239 | bR0GQeO6urmSQzmvoWhy3kKRDzXdWcAdXgPe3uLTvTOiDBp/uX05vPfo3p0X2PFNoDFASmG7rjYU 240 | m1sBrwm2qCVMAUMJiXzluNcd7ewKl+YKyX4XA0D3B1BWAkV82KuVbrO3E9yNax/Rus48To0SPJ/Z 241 | C4WhNfEcDAsnpDSU9FnBP4MD9u4c8mcX1czMj6MoOr9m5nm3qRv8IEn0GPrmEZmQWwNYNz/0oM+W 242 | cXE1lypfl7dc36D1qR/mSa6mfmWLdz528Ypb1QfekfX3c2Gpb9Ll+1ya8iq65Amc+9F99eaFVvbX 243 | fFVpT487PvSrqLnhDITZRpe460qdLpmZ1+WM1uuZ0yRXdGfpGR//yoC76IIUKKs538MP0+jqPHwv 244 | uMDD98LLePjhvhe5aJ7Wm40EYc4pCPQfRwJLGaxSx6P90AeDoBJ8ptqNwpgZ8Pq6xywtR/sRoIUx 245 | YUzRdn8ey6bplFhBJcYNIeGzkHb7xR9H2JwcVgErdk0sR/sj8Xr003hU0Te9sgY4IXIpzAwdB8yH 246 | ybfwpgdKvPtKOrBcx/PR/h8DP4mzLI6n0dh9jFKv/xiG2XT4mPYf/SyIu4/eNPTcxzSNs7D7GGeJ 247 | 33+Mp0H3McjCboTUD4Pu4zSbZlH/MfXC4dsg7T9G/WzTaZZ4GJyl817dOHn94y5RjbgwbLSNi2C0 248 | P/rxCNFxhPg4cgg5Gu3/eAQocR8cUo5+nVg5QrQcIV6OEDFuUVuoORrtHyFyjkY//TT66af/Bz+x 249 | Q3SuzwAA 250 | headers: 251 | CF-Cache-Status: 252 | - MISS 253 | CF-RAY: 254 | - 77e6d0b7fae3a1e1-YYZ 255 | Connection: 256 | - keep-alive 257 | Content-Encoding: 258 | - gzip 259 | Content-Type: 260 | - application/json 261 | Date: 262 | - Sat, 24 Dec 2022 05:07:48 GMT 263 | Last-Modified: 264 | - Sat, 24 Dec 2022 05:07:48 GMT 265 | NEL: 266 | - '{"success_fraction":0,"report_to":"cf-nel","max_age":604800}' 267 | Report-To: 268 | - '{"endpoints":[{"url":"https:\/\/a.nel.cloudflare.com\/report\/v3?s=RRQsXJGpcPzdq1fsElrMcHF2Ik7BErvy2TBajYxJoWpMnMsKkvMOgSWIK0ZlD7E7IX1PhjUGmMuPotLIc9WssMwNWDUvEvIoOZDJuni0Zi1piSGH%2BfIa%2F6Xqr8Yjav4pJyhW"}],"group":"cf-nel","max_age":604800}' 269 | Server: 270 | - cloudflare 271 | Transfer-Encoding: 272 | - chunked 273 | Vary: 274 | - Accept-Encoding 275 | alt-svc: 276 | - h3=":443"; ma=86400, h3-29=":443"; ma=86400 277 | cache-control: 278 | - public, max-age=2, s-maxage=2 279 | status: 280 | code: 200 281 | message: OK 282 | version: 1 283 | -------------------------------------------------------------------------------- /examples/01-ratelimitcomparison.csv: -------------------------------------------------------------------------------- 1 | ,api,time,limit,results,limit_type,jitter,iter 2 | 0,pmaw,2.9531655000000683,1,1,backoff,,0 3 | 1,pmaw,5.971504200000254,1,1,backoff,full,0 4 | 2,pmaw,3.246701799999755,1,1,backoff,equal,0 5 | 3,pmaw,8.310208400000192,1,1,backoff,decorr,0 6 | 4,pmaw,3.8984789000001,5,5,backoff,,0 7 | 5,pmaw,2.592994500000259,5,5,backoff,full,0 8 | 6,pmaw,8.032991200000197,5,5,backoff,equal,0 9 | 7,pmaw,12.001277299999856,5,5,backoff,decorr,0 10 | 8,pmaw,4.945224500000222,25,25,backoff,,0 11 | 9,pmaw,2.932161800000358,25,25,backoff,full,0 12 | 10,pmaw,7.953437499999836,25,25,backoff,equal,0 13 | 11,pmaw,10.249230100000204,25,25,backoff,decorr,0 14 | 12,pmaw,2.5214257000002362,125,125,backoff,,0 15 | 13,pmaw,9.804731700000048,125,125,backoff,full,0 16 | 14,pmaw,1.634954199999811,125,125,backoff,equal,0 17 | 15,pmaw,12.307522900000095,125,125,backoff,decorr,0 18 | 16,pmaw,2.125090199999704,625,625,backoff,,0 19 | 17,pmaw,8.700606899999912,625,625,backoff,full,0 20 | 18,pmaw,2.105794199999764,625,625,backoff,equal,0 21 | 19,pmaw,9.97245219999968,625,625,backoff,decorr,0 22 | 20,pmaw,44.46875450000016,3125,3125,backoff,,0 23 | 21,pmaw,29.83592060000001,3125,3125,backoff,full,0 24 | 22,pmaw,39.86004839999987,3125,3125,backoff,equal,0 25 | 23,pmaw,77.12840129999995,3125,3125,backoff,decorr,0 26 | 24,pmaw,1151.7659666,15625,15625,backoff,,0 27 | 25,pmaw,173.4849761000005,15625,15625,backoff,full,0 28 | 26,pmaw,193.84525649999978,15625,15625,backoff,equal,0 29 | 27,pmaw,653.624272,15625,15625,backoff,decorr,0 30 | 28,pmaw,1.236821399999826,1,1,average,,0 31 | 29,pmaw,5.767700500000501,5,5,average,,0 32 | 30,pmaw,3.4298125999994227,25,25,average,,0 33 | 31,pmaw,2.4052159000002575,125,125,average,,0 34 | 32,pmaw,12.111757600000601,625,625,average,,0 35 | 33,pmaw,35.038121800000226,3125,3125,average,,0 36 | 34,pmaw,173.89691429999948,15625,15625,average,,0 37 | 35,psaw,1.2460128000002442,1,1,default,,0 38 | 36,psaw,0.5799336999998559,5,5,default,,0 39 | 37,psaw,1.0546879000003173,25,25,default,,0 40 | 38,psaw,8.293716799999856,125,125,default,,0 41 | 39,psaw,18.194302299999435,625,625,default,,0 42 | 40,psaw,55.99513619999925,3125,3125,default,,0 43 | 41,psaw,341.78206240000054,15625,15625,default,,0 44 | -------------------------------------------------------------------------------- /examples/02-ratelimitcomparison.csv: -------------------------------------------------------------------------------- 1 | ,api,time,limit,results,limit_type,jitter,iter 2 | 0,pmaw,3.603237600000284,1,1,backoff,full,0 3 | 1,pmaw,3.474622099998669,1,1,backoff,equal,0 4 | 2,pmaw,2.973867299999256,5,5,backoff,full,0 5 | 3,pmaw,6.878816799999186,5,5,backoff,equal,0 6 | 4,pmaw,3.021754500001407,25,25,backoff,full,0 7 | 5,pmaw,3.689710099999502,25,25,backoff,equal,0 8 | 6,pmaw,7.8126764000007825,125,125,backoff,full,0 9 | 7,pmaw,3.4567371000011917,125,125,backoff,equal,0 10 | 8,pmaw,8.3157312000003,625,625,backoff,full,0 11 | 9,pmaw,3.8506134999988717,625,625,backoff,equal,0 12 | 10,pmaw,35.69038869999713,3125,3125,backoff,full,0 13 | 11,pmaw,44.11949529999765,3125,3125,backoff,equal,0 14 | 12,pmaw,181.2667149000008,15625,15625,backoff,full,0 15 | 13,pmaw,191.5530276999998,15625,15625,backoff,equal,0 16 | 14,pmaw,945.6589474999964,78125,78125,backoff,full,0 17 | 15,pmaw,992.5847036000014,78125,78125,backoff,equal,0 18 | 16,pmaw,5079.366979799997,390625,390625,backoff,full,0 19 | 17,pmaw,5325.919546600002,390625,390625,backoff,equal,0 20 | 18,pmaw,1.908530200002133,1,1,average,,0 21 | 19,pmaw,9.96570060000522,5,5,average,,0 22 | 20,pmaw,0.9519539999964763,25,25,average,,0 23 | 21,pmaw,1.2121703999946476,125,125,average,,0 24 | 22,pmaw,9.905716699999175,625,625,average,,0 25 | 23,pmaw,33.76039319999836,3125,3125,average,,0 26 | 24,pmaw,181.3356635999953,15625,15625,average,,0 27 | 25,pmaw,984.1716327999966,78125,78125,average,,0 28 | 26,pmaw,5522.755829499998,390625,390625,average,,0 29 | 27,psaw,2.6867573999988963,1,1,default,,0 30 | 28,psaw,1.8746405999991111,5,5,default,,0 31 | 29,psaw,8.099379700004647,25,25,default,,0 32 | 30,psaw,4.103474700001243,125,125,default,,0 33 | 31,psaw,21.7489052000019,625,625,default,,0 34 | 32,psaw,73.92517249999946,3125,3125,default,,0 35 | 33,psaw,319.6424377000003,15625,15625,default,,0 36 | 34,psaw,1766.3204124999975,78125,78125,default,,0 37 | 35,psaw,9508.719632100001,390625,390408,default,,0 38 | -------------------------------------------------------------------------------- /examples/img/01-comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpodolak/pmaw/bb102c2316ded945805f2de1b5cae5aacb032e66/examples/img/01-comparison.png -------------------------------------------------------------------------------- /examples/img/02-comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpodolak/pmaw/bb102c2316ded945805f2de1b5cae5aacb032e66/examples/img/02-comparison.png -------------------------------------------------------------------------------- /examples/img/02-requests-comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpodolak/pmaw/bb102c2316ded945805f2de1b5cae5aacb032e66/examples/img/02-requests-comparison.png -------------------------------------------------------------------------------- /examples/img/03-cache-max-memory-comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpodolak/pmaw/bb102c2316ded945805f2de1b5cae5aacb032e66/examples/img/03-cache-max-memory-comparison.png -------------------------------------------------------------------------------- /examples/img/03-cache-memory-comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpodolak/pmaw/bb102c2316ded945805f2de1b5cae5aacb032e66/examples/img/03-cache-memory-comparison.png -------------------------------------------------------------------------------- /examples/img/03-cache-time-comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpodolak/pmaw/bb102c2316ded945805f2de1b5cae5aacb032e66/examples/img/03-cache-time-comparison.png -------------------------------------------------------------------------------- /examples/search_comments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "separated-champagne", 6 | "metadata": {}, 7 | "source": [ 8 | "# Search Comments\n", 9 | "In this notebook, I will show you how to use the method `search_comments` from `PMAW` to retrieve comments from the Reddit Pushshift API. To view more details about the Search Comments endpoint you can view the Pushshift [documentation](https://github.com/pushshift/api#searching-comments)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "id": "threaded-dutch", 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import pandas as pd\n", 20 | "from pmaw import PushshiftAPI" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "id": "legislative-shoulder", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# instantiate\n", 31 | "api = PushshiftAPI()" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "expensive-afghanistan", 37 | "metadata": {}, 38 | "source": [ 39 | "## Data Preparation" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 9, 45 | "id": "continent-treat", 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/html": [ 51 | "
\n", 52 | "\n", 65 | "\n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | "
all_awardingsallow_live_commentsauthorauthor_flair_css_classauthor_flair_richtextauthor_flair_textauthor_flair_typeauthor_fullnameauthor_patreon_flairauthor_premium...author_cakedaydistinguishedsuggested_sortcrosspost_parentcrosspost_parent_listcategorytop_awarded_typepoll_datasteward_reportscomment_ids
0[]Falsenf_hadesNaN[]NaNtextt2_hriq1bFalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjacwx5,gjad2l6,gjadatw,gjadc7w,gjadcwh,gjadgd...
1[]FalseMyLittleDekuNaN[]NaNtextt2_7dj62vj2FalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjacn1r
2[]Falselilirucaarde12NaN[]NaNtextt2_6i04uaxwFalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjac5fb,gjacdy5,gjaco45,gjasj4f,gjbxfeg
\n", 167 | "

3 rows × 89 columns

\n", 168 | "
" 169 | ], 170 | "text/plain": [ 171 | " all_awardings allow_live_comments author author_flair_css_class \\\n", 172 | "0 [] False nf_hades NaN \n", 173 | "1 [] False MyLittleDeku NaN \n", 174 | "2 [] False lilirucaarde12 NaN \n", 175 | "\n", 176 | " author_flair_richtext author_flair_text author_flair_type author_fullname \\\n", 177 | "0 [] NaN text t2_hriq1b \n", 178 | "1 [] NaN text t2_7dj62vj2 \n", 179 | "2 [] NaN text t2_6i04uaxw \n", 180 | "\n", 181 | " author_patreon_flair author_premium ... author_cakeday distinguished \\\n", 182 | "0 False False ... NaN NaN \n", 183 | "1 False False ... NaN NaN \n", 184 | "2 False False ... NaN NaN \n", 185 | "\n", 186 | " suggested_sort crosspost_parent crosspost_parent_list category \\\n", 187 | "0 NaN NaN NaN NaN \n", 188 | "1 NaN NaN NaN NaN \n", 189 | "2 NaN NaN NaN NaN \n", 190 | "\n", 191 | " top_awarded_type poll_data steward_reports \\\n", 192 | "0 NaN NaN NaN \n", 193 | "1 NaN NaN NaN \n", 194 | "2 NaN NaN NaN \n", 195 | "\n", 196 | " comment_ids \n", 197 | "0 gjacwx5,gjad2l6,gjadatw,gjadc7w,gjadcwh,gjadgd... \n", 198 | "1 gjacn1r \n", 199 | "2 gjac5fb,gjacdy5,gjaco45,gjasj4f,gjbxfeg \n", 200 | "\n", 201 | "[3 rows x 89 columns]" 202 | ] 203 | }, 204 | "execution_count": 9, 205 | "metadata": {}, 206 | "output_type": "execute_result" 207 | } 208 | ], 209 | "source": [ 210 | "# import test data into a dataframe\n", 211 | "posts_df = pd.read_csv(f'./test_data.csv', delimiter=';', header=0)\n", 212 | "posts_df.head(3)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 10, 218 | "id": "qualified-legislation", 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "2500" 225 | ] 226 | }, 227 | "execution_count": 10, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "len(posts_df)" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "id": "unexpected-uzbekistan", 239 | "metadata": {}, 240 | "source": [ 241 | "The data in `posts_df`, contains 2500 submissions and their respective metadata extracted from a subreddit submission search, the comment_ids were added post-search with additional requests." 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 11, 247 | "id": "charged-september", 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "data": { 252 | "text/plain": [ 253 | "271" 254 | ] 255 | }, 256 | "execution_count": 11, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "posts_df.loc[:, 'comment_ids'].isna().sum()" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 12, 268 | "id": "interpreted-dinner", 269 | "metadata": { 270 | "scrolled": true 271 | }, 272 | "outputs": [], 273 | "source": [ 274 | "# extract comment_ids\n", 275 | "comment_ids_str = list(posts_df.loc[posts_df['comment_ids'].notna(), 'comment_ids'])" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 13, 281 | "id": "municipal-witch", 282 | "metadata": {}, 283 | "outputs": [ 284 | { 285 | "name": "stdout", 286 | "output_type": "stream", 287 | "text": [ 288 | "Ready to retrieve 43377 comments\n" 289 | ] 290 | } 291 | ], 292 | "source": [ 293 | "# convert strings to lists\n", 294 | "comment_ids = []\n", 295 | "for c_str in comment_ids_str:\n", 296 | " # exclude ending , since all entries include one\n", 297 | " comment_ids.extend(c_str[:-1].split(\",\"))\n", 298 | "num_comments = len(comment_ids)\n", 299 | "print(f'Ready to retrieve {num_comments} comments')" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 14, 305 | "id": "twelve-chain", 306 | "metadata": { 307 | "scrolled": true 308 | }, 309 | "outputs": [ 310 | { 311 | "data": { 312 | "text/plain": [ 313 | "['gjacwx5', 'gjad2l6', 'gjadatw']" 314 | ] 315 | }, 316 | "execution_count": 14, 317 | "metadata": {}, 318 | "output_type": "execute_result" 319 | } 320 | ], 321 | "source": [ 322 | "comment_ids[:3]" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "id": "ceramic-ordinary", 328 | "metadata": {}, 329 | "source": [ 330 | "## Search Comments" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 15, 336 | "id": "leading-planet", 337 | "metadata": { 338 | "scrolled": true 339 | }, 340 | "outputs": [ 341 | { 342 | "name": "stdout", 343 | "output_type": "stream", 344 | "text": [ 345 | "Total:: Success Rate: 100.00% - Requests: 60 - Batches: 6 - Items Remaining: 0\n", 346 | "Wall time: 1min 1s\n" 347 | ] 348 | } 349 | ], 350 | "source": [ 351 | "%%time\n", 352 | "comments = api.search_comments(subreddit=\"science\", limit=6000)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 16, 358 | "id": "sweet-dover", 359 | "metadata": {}, 360 | "outputs": [ 361 | { 362 | "data": { 363 | "text/plain": [ 364 | "6000" 365 | ] 366 | }, 367 | "execution_count": 16, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "len(comments)" 374 | ] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "id": "municipal-manner", 379 | "metadata": {}, 380 | "source": [ 381 | "### Using a query string" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 18, 387 | "id": "constitutional-modern", 388 | "metadata": { 389 | "scrolled": true 390 | }, 391 | "outputs": [ 392 | { 393 | "name": "stdout", 394 | "output_type": "stream", 395 | "text": [ 396 | "Total:: Success Rate: 90.91% - Requests: 11 - Batches: 2 - Items Remaining: 0\n", 397 | "Wall time: 12.4 s\n" 398 | ] 399 | } 400 | ], 401 | "source": [ 402 | "%%time\n", 403 | "# example with passing a query string\n", 404 | "comments = api.search_comments(q=\"GME\", subreddit=\"wallstreetbets\", limit=1000)" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 19, 410 | "id": "curious-livestock", 411 | "metadata": {}, 412 | "outputs": [ 413 | { 414 | "data": { 415 | "text/plain": [ 416 | "1000" 417 | ] 418 | }, 419 | "execution_count": 19, 420 | "metadata": {}, 421 | "output_type": "execute_result" 422 | } 423 | ], 424 | "source": [ 425 | "len(comments)" 426 | ] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "id": "liquid-sperm", 431 | "metadata": {}, 432 | "source": [ 433 | "Since the `search_comments` method returns a `Response` object which is a generator we store the comments in a list using the following code: " 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 20, 439 | "id": "partial-hardware", 440 | "metadata": {}, 441 | "outputs": [], 442 | "source": [ 443 | "comment_list = [c for c in comments]" 444 | ] 445 | }, 446 | { 447 | "cell_type": "markdown", 448 | "id": "interested-guide", 449 | "metadata": {}, 450 | "source": [ 451 | "## Search Comments by ID" 452 | ] 453 | }, 454 | { 455 | "cell_type": "markdown", 456 | "id": "refined-peninsula", 457 | "metadata": {}, 458 | "source": [ 459 | "### Using a Single Comment ID" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 21, 465 | "id": "covered-processing", 466 | "metadata": { 467 | "scrolled": true 468 | }, 469 | "outputs": [ 470 | { 471 | "name": "stdout", 472 | "output_type": "stream", 473 | "text": [ 474 | "Total:: Success Rate: 100.00% - Requests: 1 - Batches: 1 - Items Remaining: 0\n" 475 | ] 476 | } 477 | ], 478 | "source": [ 479 | "comment = api.search_comments(ids=comment_ids[0])" 480 | ] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "id": "wired-mileage", 485 | "metadata": {}, 486 | "source": [ 487 | "### Using Multiple Comment IDs" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": 22, 493 | "id": "neutral-shakespeare", 494 | "metadata": { 495 | "scrolled": true 496 | }, 497 | "outputs": [ 498 | { 499 | "name": "stdout", 500 | "output_type": "stream", 501 | "text": [ 502 | "Total:: Success Rate: 68.75% - Requests: 64 - Batches: 7 - Items Remaining: 2229\n", 503 | "Wall time: 1min 9s\n" 504 | ] 505 | }, 506 | { 507 | "name": "stderr", 508 | "output_type": "stream", 509 | "text": [ 510 | "c:\\users\\mattp\\documents\\projects\\pmaw\\pmaw\\Request.py:119: UserWarning: 2229 items were not found in Pushshift\n", 511 | " f'{self.limit} items were not found in Pushshift')\n" 512 | ] 513 | } 514 | ], 515 | "source": [ 516 | "%%time\n", 517 | "comments_arr = api.search_comments(ids=comment_ids)" 518 | ] 519 | }, 520 | { 521 | "cell_type": "markdown", 522 | "id": "tracked-dictionary", 523 | "metadata": {}, 524 | "source": [ 525 | "We can see that when searching for comments by id, that some items are no longer stored in Pushshift and could not be returned." 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 23, 531 | "id": "valued-newton", 532 | "metadata": {}, 533 | "outputs": [ 534 | { 535 | "name": "stdout", 536 | "output_type": "stream", 537 | "text": [ 538 | "41148 comments returned by Pushshift\n" 539 | ] 540 | } 541 | ], 542 | "source": [ 543 | "print(f'{len(comments_arr)} comments returned by Pushshift')" 544 | ] 545 | }, 546 | { 547 | "cell_type": "markdown", 548 | "id": "stylish-flight", 549 | "metadata": {}, 550 | "source": [ 551 | "### Save Comments to CSV" 552 | ] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "execution_count": 24, 557 | "id": "peaceful-sentence", 558 | "metadata": {}, 559 | "outputs": [], 560 | "source": [ 561 | "# convert comments to dataframe\n", 562 | "comment_list = [c for c in comments_arr]\n", 563 | "comments_df = pd.DataFrame(comment_list)" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": 25, 569 | "id": "previous-missile", 570 | "metadata": {}, 571 | "outputs": [ 572 | { 573 | "data": { 574 | "text/html": [ 575 | "
\n", 576 | "\n", 589 | "\n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | "
all_awardingsapproved_at_utcassociated_awardauthorauthor_flair_background_colorauthor_flair_css_classauthor_flair_richtextauthor_flair_template_idauthor_flair_textauthor_flair_text_color...retrieved_onscoresend_repliesstickiedsubredditsubreddit_idtop_awarded_typetotal_awards_receivedtreatment_tagsauthor_cakeday
0[]NoneNoneAutoModeratorNoneNone[]NoneNoneNone...16107310541FalseFalseanimet5_2qh22None0[]NaN
1[]NoneNoneNihhrtMAL[]Nonehttp://myanimelist.net/animelist/Nihhrtdark...16107313102TrueFalseanimet5_2qh22None0[]NaN
2[]NoneNone[deleted]NoneNaNNoneNonedark...16107313141TrueFalseanimet5_2qh22None0[]NaN
\n", 691 | "

3 rows × 43 columns

\n", 692 | "
" 693 | ], 694 | "text/plain": [ 695 | " all_awardings approved_at_utc associated_award author \\\n", 696 | "0 [] None None AutoModerator \n", 697 | "1 [] None None Nihhrt \n", 698 | "2 [] None None [deleted] \n", 699 | "\n", 700 | " author_flair_background_color author_flair_css_class author_flair_richtext \\\n", 701 | "0 None None [] \n", 702 | "1 MAL [] \n", 703 | "2 None NaN \n", 704 | "\n", 705 | " author_flair_template_id author_flair_text \\\n", 706 | "0 None None \n", 707 | "1 None http://myanimelist.net/animelist/Nihhrt \n", 708 | "2 None None \n", 709 | "\n", 710 | " author_flair_text_color ... retrieved_on score send_replies stickied \\\n", 711 | "0 None ... 1610731054 1 False False \n", 712 | "1 dark ... 1610731310 2 True False \n", 713 | "2 dark ... 1610731314 1 True False \n", 714 | "\n", 715 | " subreddit subreddit_id top_awarded_type total_awards_received \\\n", 716 | "0 anime t5_2qh22 None 0 \n", 717 | "1 anime t5_2qh22 None 0 \n", 718 | "2 anime t5_2qh22 None 0 \n", 719 | "\n", 720 | " treatment_tags author_cakeday \n", 721 | "0 [] NaN \n", 722 | "1 [] NaN \n", 723 | "2 [] NaN \n", 724 | "\n", 725 | "[3 rows x 43 columns]" 726 | ] 727 | }, 728 | "execution_count": 25, 729 | "metadata": {}, 730 | "output_type": "execute_result" 731 | } 732 | ], 733 | "source": [ 734 | "comments_df.head(3)" 735 | ] 736 | }, 737 | { 738 | "cell_type": "code", 739 | "execution_count": 26, 740 | "id": "novel-radar", 741 | "metadata": {}, 742 | "outputs": [], 743 | "source": [ 744 | "# store the extracted comments into a csv file for later use\n", 745 | "comments_df.to_csv('./test_comments.csv', header=True, index=False, columns=list(comments_df.axes[1]))" 746 | ] 747 | } 748 | ], 749 | "metadata": { 750 | "kernelspec": { 751 | "display_name": "Python 3", 752 | "language": "python", 753 | "name": "python3" 754 | }, 755 | "language_info": { 756 | "codemirror_mode": { 757 | "name": "ipython", 758 | "version": 3 759 | }, 760 | "file_extension": ".py", 761 | "mimetype": "text/x-python", 762 | "name": "python", 763 | "nbconvert_exporter": "python", 764 | "pygments_lexer": "ipython3", 765 | "version": "3.6.12" 766 | } 767 | }, 768 | "nbformat": 4, 769 | "nbformat_minor": 5 770 | } 771 | -------------------------------------------------------------------------------- /examples/search_submission_comment_ids.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "systematic-failure", 6 | "metadata": {}, 7 | "source": [ 8 | "# Find Comment IDs from Submission IDs\n", 9 | "In this notebook, I will show you how to use the `search_submission_comment_ids` method from `PMAW` to retrieve all the Reddit comment IDs for an array of submission IDs. You can view details about this endpoint in the Pushshift [documentation](https://github.com/pushshift/api#get-all-comment-ids-for-a-particular-submission)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "id": "equivalent-employer", 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import pandas as pd\n", 20 | "from pmaw import PushshiftAPI" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "id": "weighted-robinson", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# instantiate\n", 31 | "api = PushshiftAPI()" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "related-federation", 37 | "metadata": {}, 38 | "source": [ 39 | "## Data Preparation" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "id": "frank-group", 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/html": [ 51 | "
\n", 52 | "\n", 65 | "\n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | "
all_awardingsallow_live_commentsauthorauthor_flair_css_classauthor_flair_richtextauthor_flair_textauthor_flair_typeauthor_fullnameauthor_patreon_flairauthor_premium...author_cakedaydistinguishedsuggested_sortcrosspost_parentcrosspost_parent_listcategorytop_awarded_typepoll_datasteward_reportscomment_ids
0[]Falsenf_hadesNaN[]NaNtextt2_hriq1bFalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjacwx5,gjad2l6,gjadatw,gjadc7w,gjadcwh,gjadgd...
1[]FalseMyLittleDekuNaN[]NaNtextt2_7dj62vj2FalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjacn1r
2[]Falselilirucaarde12NaN[]NaNtextt2_6i04uaxwFalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjac5fb,gjacdy5,gjaco45,gjasj4f,gjbxfeg
3[]False[deleted]NaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjac9d6
4[]FalsesirdimpletonNaN[]NaNtextt2_bznmn4iFalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjaocmg,gjb2jsj,gjbisrw,gjbjbk8
\n", 215 | "

5 rows × 89 columns

\n", 216 | "
" 217 | ], 218 | "text/plain": [ 219 | " all_awardings allow_live_comments author author_flair_css_class \\\n", 220 | "0 [] False nf_hades NaN \n", 221 | "1 [] False MyLittleDeku NaN \n", 222 | "2 [] False lilirucaarde12 NaN \n", 223 | "3 [] False [deleted] NaN \n", 224 | "4 [] False sirdimpleton NaN \n", 225 | "\n", 226 | " author_flair_richtext author_flair_text author_flair_type author_fullname \\\n", 227 | "0 [] NaN text t2_hriq1b \n", 228 | "1 [] NaN text t2_7dj62vj2 \n", 229 | "2 [] NaN text t2_6i04uaxw \n", 230 | "3 NaN NaN NaN NaN \n", 231 | "4 [] NaN text t2_bznmn4i \n", 232 | "\n", 233 | " author_patreon_flair author_premium ... author_cakeday distinguished \\\n", 234 | "0 False False ... NaN NaN \n", 235 | "1 False False ... NaN NaN \n", 236 | "2 False False ... NaN NaN \n", 237 | "3 NaN NaN ... NaN NaN \n", 238 | "4 False False ... NaN NaN \n", 239 | "\n", 240 | " suggested_sort crosspost_parent crosspost_parent_list category \\\n", 241 | "0 NaN NaN NaN NaN \n", 242 | "1 NaN NaN NaN NaN \n", 243 | "2 NaN NaN NaN NaN \n", 244 | "3 NaN NaN NaN NaN \n", 245 | "4 NaN NaN NaN NaN \n", 246 | "\n", 247 | " top_awarded_type poll_data steward_reports \\\n", 248 | "0 NaN NaN NaN \n", 249 | "1 NaN NaN NaN \n", 250 | "2 NaN NaN NaN \n", 251 | "3 NaN NaN NaN \n", 252 | "4 NaN NaN NaN \n", 253 | "\n", 254 | " comment_ids \n", 255 | "0 gjacwx5,gjad2l6,gjadatw,gjadc7w,gjadcwh,gjadgd... \n", 256 | "1 gjacn1r \n", 257 | "2 gjac5fb,gjacdy5,gjaco45,gjasj4f,gjbxfeg \n", 258 | "3 gjac9d6 \n", 259 | "4 gjaocmg,gjb2jsj,gjbisrw,gjbjbk8 \n", 260 | "\n", 261 | "[5 rows x 89 columns]" 262 | ] 263 | }, 264 | "execution_count": 3, 265 | "metadata": {}, 266 | "output_type": "execute_result" 267 | } 268 | ], 269 | "source": [ 270 | "# import test data into a dataframe\n", 271 | "posts_df = pd.read_csv(f'./test_data.csv', delimiter=';', header=0)\n", 272 | "posts_df.head(5)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 4, 278 | "id": "labeled-diana", 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "data": { 283 | "text/plain": [ 284 | "2500" 285 | ] 286 | }, 287 | "execution_count": 4, 288 | "metadata": {}, 289 | "output_type": "execute_result" 290 | } 291 | ], 292 | "source": [ 293 | "len(posts_df)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "id": "returning-burden", 299 | "metadata": {}, 300 | "source": [ 301 | "The data in `posts_df`, contains 2500 submissions and their respective metadata extracted from a subreddit submission search, the `comment_ids` was added post-search with additional requests." 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 5, 307 | "id": "floral-campus", 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "data": { 312 | "text/plain": [ 313 | "['kxi2w8', 'kxi2g1', 'kxhzrl']" 314 | ] 315 | }, 316 | "execution_count": 5, 317 | "metadata": {}, 318 | "output_type": "execute_result" 319 | } 320 | ], 321 | "source": [ 322 | "# create submission ID list\n", 323 | "post_ids = list(posts_df.loc[:, 'id'])\n", 324 | "post_ids[:3]" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "id": "ultimate-gabriel", 330 | "metadata": {}, 331 | "source": [ 332 | "## Comment IDs for a Single Submission" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 6, 338 | "id": "monthly-pillow", 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "name": "stdout", 343 | "output_type": "stream", 344 | "text": [ 345 | "Total:: Success Rate: 100.00% - Requests: 1 - Batches: 1 - Items Remaining: 0\n" 346 | ] 347 | } 348 | ], 349 | "source": [ 350 | "comment = api.search_submission_comment_ids(ids=post_ids[0])" 351 | ] 352 | }, 353 | { 354 | "cell_type": "markdown", 355 | "id": "absolute-cabin", 356 | "metadata": {}, 357 | "source": [ 358 | "## Comment IDs for Multiple Submissions" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "execution_count": 7, 364 | "id": "criminal-perspective", 365 | "metadata": { 366 | "scrolled": true 367 | }, 368 | "outputs": [ 369 | { 370 | "name": "stdout", 371 | "output_type": "stream", 372 | "text": [ 373 | "Checkpoint:: Success Rate: 89.00% - Requests: 100 - Batches: 10 - Items Remaining: 1405\n", 374 | "Total:: Success Rate: 83.57% - Requests: 140 - Batches: 14 - Items Remaining: 0\n", 375 | "Wall time: 2min 17s\n" 376 | ] 377 | } 378 | ], 379 | "source": [ 380 | "%%time\n", 381 | "comment_ids = api.search_submission_comment_ids(ids=post_ids)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "id": "contained-microwave", 387 | "metadata": {}, 388 | "source": [ 389 | "### Save Comment IDs" 390 | ] 391 | }, 392 | { 393 | "cell_type": "markdown", 394 | "id": "specified-breed", 395 | "metadata": {}, 396 | "source": [ 397 | "Since the `search_submission_comment_ids` method returns a `Response` object which is a generator we need to store the comment ids in a list before we start working with them." 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 8, 403 | "id": "oriental-acceptance", 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [ 407 | "comment_id_list = [_id for _id in comment_ids]" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 10, 413 | "id": "recorded-government", 414 | "metadata": {}, 415 | "outputs": [ 416 | { 417 | "data": { 418 | "text/plain": [ 419 | "2500" 420 | ] 421 | }, 422 | "execution_count": 10, 423 | "metadata": {}, 424 | "output_type": "execute_result" 425 | } 426 | ], 427 | "source": [ 428 | "len(comment_id_list)" 429 | ] 430 | } 431 | ], 432 | "metadata": { 433 | "kernelspec": { 434 | "display_name": "Python 3", 435 | "language": "python", 436 | "name": "python3" 437 | }, 438 | "language_info": { 439 | "codemirror_mode": { 440 | "name": "ipython", 441 | "version": 3 442 | }, 443 | "file_extension": ".py", 444 | "mimetype": "text/x-python", 445 | "name": "python", 446 | "nbconvert_exporter": "python", 447 | "pygments_lexer": "ipython3", 448 | "version": "3.6.12" 449 | } 450 | }, 451 | "nbformat": 4, 452 | "nbformat_minor": 5 453 | } 454 | -------------------------------------------------------------------------------- /examples/search_submissions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "monetary-sphere", 6 | "metadata": {}, 7 | "source": [ 8 | "# Search Submissions\n", 9 | "In this notebook, I will show you how to use the method `search_submissions` from `PMAW` to retrieve submissions from the Reddit Pushshift API. To view more details about the Search Submissions endpoint you can view the Pushshift [documentation](https://github.com/pushshift/api#searching-submissions)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "id": "binary-marketplace", 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import pandas as pd\n", 20 | "from pmaw import PushshiftAPI" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "id": "interim-radio", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "# instantiate\n", 31 | "api = PushshiftAPI()" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "bacterial-field", 37 | "metadata": {}, 38 | "source": [ 39 | "## Data Preparation" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "id": "commercial-height", 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/html": [ 51 | "
\n", 52 | "\n", 65 | "\n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | "
all_awardingsallow_live_commentsauthorauthor_flair_css_classauthor_flair_richtextauthor_flair_textauthor_flair_typeauthor_fullnameauthor_patreon_flairauthor_premium...author_cakedaydistinguishedsuggested_sortcrosspost_parentcrosspost_parent_listcategorytop_awarded_typepoll_datasteward_reportscomment_ids
0[]Falsenf_hadesNaN[]NaNtextt2_hriq1bFalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjacwx5,gjad2l6,gjadatw,gjadc7w,gjadcwh,gjadgd...
1[]FalseMyLittleDekuNaN[]NaNtextt2_7dj62vj2FalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjacn1r
2[]Falselilirucaarde12NaN[]NaNtextt2_6i04uaxwFalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjac5fb,gjacdy5,gjaco45,gjasj4f,gjbxfeg
3[]False[deleted]NaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjac9d6
4[]FalsesirdimpletonNaN[]NaNtextt2_bznmn4iFalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNgjaocmg,gjb2jsj,gjbisrw,gjbjbk8
\n", 215 | "

5 rows × 89 columns

\n", 216 | "
" 217 | ], 218 | "text/plain": [ 219 | " all_awardings allow_live_comments author author_flair_css_class \\\n", 220 | "0 [] False nf_hades NaN \n", 221 | "1 [] False MyLittleDeku NaN \n", 222 | "2 [] False lilirucaarde12 NaN \n", 223 | "3 [] False [deleted] NaN \n", 224 | "4 [] False sirdimpleton NaN \n", 225 | "\n", 226 | " author_flair_richtext author_flair_text author_flair_type author_fullname \\\n", 227 | "0 [] NaN text t2_hriq1b \n", 228 | "1 [] NaN text t2_7dj62vj2 \n", 229 | "2 [] NaN text t2_6i04uaxw \n", 230 | "3 NaN NaN NaN NaN \n", 231 | "4 [] NaN text t2_bznmn4i \n", 232 | "\n", 233 | " author_patreon_flair author_premium ... author_cakeday distinguished \\\n", 234 | "0 False False ... NaN NaN \n", 235 | "1 False False ... NaN NaN \n", 236 | "2 False False ... NaN NaN \n", 237 | "3 NaN NaN ... NaN NaN \n", 238 | "4 False False ... NaN NaN \n", 239 | "\n", 240 | " suggested_sort crosspost_parent crosspost_parent_list category \\\n", 241 | "0 NaN NaN NaN NaN \n", 242 | "1 NaN NaN NaN NaN \n", 243 | "2 NaN NaN NaN NaN \n", 244 | "3 NaN NaN NaN NaN \n", 245 | "4 NaN NaN NaN NaN \n", 246 | "\n", 247 | " top_awarded_type poll_data steward_reports \\\n", 248 | "0 NaN NaN NaN \n", 249 | "1 NaN NaN NaN \n", 250 | "2 NaN NaN NaN \n", 251 | "3 NaN NaN NaN \n", 252 | "4 NaN NaN NaN \n", 253 | "\n", 254 | " comment_ids \n", 255 | "0 gjacwx5,gjad2l6,gjadatw,gjadc7w,gjadcwh,gjadgd... \n", 256 | "1 gjacn1r \n", 257 | "2 gjac5fb,gjacdy5,gjaco45,gjasj4f,gjbxfeg \n", 258 | "3 gjac9d6 \n", 259 | "4 gjaocmg,gjb2jsj,gjbisrw,gjbjbk8 \n", 260 | "\n", 261 | "[5 rows x 89 columns]" 262 | ] 263 | }, 264 | "execution_count": 3, 265 | "metadata": {}, 266 | "output_type": "execute_result" 267 | } 268 | ], 269 | "source": [ 270 | "# import test data into a dataframe\n", 271 | "posts_df = pd.read_csv(f'./test_data.csv', delimiter=';', header=0)\n", 272 | "posts_df.head(5)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 4, 278 | "id": "desirable-register", 279 | "metadata": {}, 280 | "outputs": [ 281 | { 282 | "data": { 283 | "text/plain": [ 284 | "2500" 285 | ] 286 | }, 287 | "execution_count": 4, 288 | "metadata": {}, 289 | "output_type": "execute_result" 290 | } 291 | ], 292 | "source": [ 293 | "len(posts_df)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "id": "realistic-glasgow", 299 | "metadata": {}, 300 | "source": [ 301 | "The data in `posts_df`, contains 2500 submissions and their respective metadata extracted from a subreddit submission search, the comment_ids were added post-search with additional requests. For the purpose of demonstration, submission ids will be used from this dataframe, even though the data has already been retrieved." 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 5, 307 | "id": "underlying-bradford", 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "data": { 312 | "text/plain": [ 313 | "['kxi2w8', 'kxi2g1', 'kxhzrl']" 314 | ] 315 | }, 316 | "execution_count": 5, 317 | "metadata": {}, 318 | "output_type": "execute_result" 319 | } 320 | ], 321 | "source": [ 322 | "# create submission ID list\n", 323 | "post_ids = list(posts_df.loc[:, 'id'])\n", 324 | "post_ids[:3]" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "id": "drawn-lebanon", 330 | "metadata": {}, 331 | "source": [ 332 | "## Search Submissions" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 6, 338 | "id": "intensive-junior", 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "name": "stdout", 343 | "output_type": "stream", 344 | "text": [ 345 | "Total:: Success Rate: 90.91% - Requests: 11 - Batches: 2 - Items Remaining: 0\n", 346 | "Wall time: 12.4 s\n" 347 | ] 348 | } 349 | ], 350 | "source": [ 351 | "%%time\n", 352 | "posts = api.search_submissions(subreddit=\"science\", limit=1000)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "id": "abroad-median", 358 | "metadata": {}, 359 | "source": [ 360 | "### Using a query string" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 7, 366 | "id": "usual-miami", 367 | "metadata": {}, 368 | "outputs": [ 369 | { 370 | "name": "stdout", 371 | "output_type": "stream", 372 | "text": [ 373 | "Total:: Success Rate: 100.00% - Requests: 10 - Batches: 1 - Items Remaining: 599\n", 374 | "Total:: Success Rate: 100.00% - Requests: 20 - Batches: 2 - Items Remaining: 175\n", 375 | "Total:: Success Rate: 100.00% - Requests: 25 - Batches: 3 - Items Remaining: 0\n", 376 | "Wall time: 37.7 s\n" 377 | ] 378 | } 379 | ], 380 | "source": [ 381 | "%%time\n", 382 | "# example with passing a query string\n", 383 | "posts = api.search_submissions(q=\"quantum\", subreddit=\"science\", limit=1000)" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 9, 389 | "id": "wicked-duplicate", 390 | "metadata": {}, 391 | "outputs": [ 392 | { 393 | "name": "stdout", 394 | "output_type": "stream", 395 | "text": [ 396 | "1000 posts retrieved\n" 397 | ] 398 | } 399 | ], 400 | "source": [ 401 | "print(f'{len(posts)} posts retrieved')" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "id": "valued-cooking", 407 | "metadata": {}, 408 | "source": [ 409 | "Since the `search_submissions` method returns a `Response` object which is a generator we store the posts in the list:" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 10, 415 | "id": "satisfied-payment", 416 | "metadata": {}, 417 | "outputs": [], 418 | "source": [ 419 | "post_list = [p for p in posts]" 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "id": "super-interaction", 425 | "metadata": {}, 426 | "source": [ 427 | "## Search Submissions by ID" 428 | ] 429 | }, 430 | { 431 | "cell_type": "markdown", 432 | "id": "innovative-buddy", 433 | "metadata": {}, 434 | "source": [ 435 | "### Using a Single Submission ID" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": 11, 441 | "id": "express-matter", 442 | "metadata": { 443 | "scrolled": true 444 | }, 445 | "outputs": [ 446 | { 447 | "name": "stdout", 448 | "output_type": "stream", 449 | "text": [ 450 | "Total:: Success Rate: 100.00% - Requests: 1 - Batches: 1 - Items Remaining: 0\n" 451 | ] 452 | } 453 | ], 454 | "source": [ 455 | "post = api.search_submissions(ids=post_ids[0])" 456 | ] 457 | }, 458 | { 459 | "cell_type": "markdown", 460 | "id": "emotional-hobby", 461 | "metadata": {}, 462 | "source": [ 463 | "### Using Multiple Submission IDs" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 12, 469 | "id": "creative-needle", 470 | "metadata": {}, 471 | "outputs": [ 472 | { 473 | "name": "stdout", 474 | "output_type": "stream", 475 | "text": [ 476 | "Total:: Success Rate: 100.00% - Requests: 3 - Batches: 1 - Items Remaining: 0\n", 477 | "Wall time: 4.36 s\n" 478 | ] 479 | } 480 | ], 481 | "source": [ 482 | "%%time\n", 483 | "posts = api.search_submissions(ids=post_ids)" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 13, 489 | "id": "valid-techno", 490 | "metadata": {}, 491 | "outputs": [ 492 | { 493 | "name": "stdout", 494 | "output_type": "stream", 495 | "text": [ 496 | "2500 submissions returned by Pushshift\n" 497 | ] 498 | } 499 | ], 500 | "source": [ 501 | "print(f'{len(posts)} submissions returned by Pushshift')" 502 | ] 503 | }, 504 | { 505 | "cell_type": "markdown", 506 | "id": "completed-favor", 507 | "metadata": {}, 508 | "source": [ 509 | "### Convert to Dataframe" 510 | ] 511 | }, 512 | { 513 | "cell_type": "code", 514 | "execution_count": 15, 515 | "id": "dynamic-pizza", 516 | "metadata": {}, 517 | "outputs": [], 518 | "source": [ 519 | "# convert submissions to dataframe\n", 520 | "new_posts_df = pd.DataFrame(post_list)" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": 16, 526 | "id": "complete-state", 527 | "metadata": {}, 528 | "outputs": [ 529 | { 530 | "data": { 531 | "text/html": [ 532 | "
\n", 533 | "\n", 546 | "\n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | "
all_awardingsallow_live_commentsauthorauthor_flair_css_classauthor_flair_richtextauthor_flair_textauthor_flair_typeauthor_fullnameauthor_patreon_flairauthor_premium...steward_reportsog_descriptionog_titleremoved_byrte_modeauthor_idview_countbrand_safecrosspost_parentcrosspost_parent_list
0[]FalseHeathenLemmingNone[]Nonetextt2_5on10d6uFalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1[]Falseclostridium_deadNone[]Nonetextt2_9uxh3FalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2[]FalseRomanTheOmenNone[]Nonetextt2_4r7zaFalseFalse...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", 648 | "

3 rows × 87 columns

\n", 649 | "
" 650 | ], 651 | "text/plain": [ 652 | " all_awardings allow_live_comments author author_flair_css_class \\\n", 653 | "0 [] False HeathenLemming None \n", 654 | "1 [] False clostridium_dead None \n", 655 | "2 [] False RomanTheOmen None \n", 656 | "\n", 657 | " author_flair_richtext author_flair_text author_flair_type author_fullname \\\n", 658 | "0 [] None text t2_5on10d6u \n", 659 | "1 [] None text t2_9uxh3 \n", 660 | "2 [] None text t2_4r7za \n", 661 | "\n", 662 | " author_patreon_flair author_premium ... steward_reports og_description \\\n", 663 | "0 False False ... NaN NaN \n", 664 | "1 False False ... NaN NaN \n", 665 | "2 False False ... NaN NaN \n", 666 | "\n", 667 | " og_title removed_by rte_mode author_id view_count brand_safe \\\n", 668 | "0 NaN NaN NaN NaN NaN NaN \n", 669 | "1 NaN NaN NaN NaN NaN NaN \n", 670 | "2 NaN NaN NaN NaN NaN NaN \n", 671 | "\n", 672 | " crosspost_parent crosspost_parent_list \n", 673 | "0 NaN NaN \n", 674 | "1 NaN NaN \n", 675 | "2 NaN NaN \n", 676 | "\n", 677 | "[3 rows x 87 columns]" 678 | ] 679 | }, 680 | "execution_count": 16, 681 | "metadata": {}, 682 | "output_type": "execute_result" 683 | } 684 | ], 685 | "source": [ 686 | "new_posts_df.head(3)" 687 | ] 688 | } 689 | ], 690 | "metadata": { 691 | "kernelspec": { 692 | "display_name": "Python 3", 693 | "language": "python", 694 | "name": "python3" 695 | }, 696 | "language_info": { 697 | "codemirror_mode": { 698 | "name": "ipython", 699 | "version": 3 700 | }, 701 | "file_extension": ".py", 702 | "mimetype": "text/x-python", 703 | "name": "python", 704 | "nbconvert_exporter": "python", 705 | "pygments_lexer": "ipython3", 706 | "version": "3.6.12" 707 | } 708 | }, 709 | "nbformat": 4, 710 | "nbformat_minor": 5 711 | } 712 | -------------------------------------------------------------------------------- /pmaw/Cache.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import json 4 | import hashlib 5 | import logging 6 | import re 7 | import warnings 8 | from pathlib import Path 9 | import gzip 10 | 11 | log = logging.getLogger(__name__) 12 | 13 | 14 | class Cache: 15 | """Cache: Handle storing and loading request info and responses in the cache""" 16 | 17 | def __init__(self, payload, safe_exit, cache_dir=None, key=None): 18 | 19 | if key is None: 20 | # generating key 21 | key_str = json.dumps(payload, sort_keys=True).encode("utf-8") 22 | self.key = hashlib.md5(key_str).hexdigest() 23 | log.info(f"Response cache key: {self.key}") 24 | else: 25 | self.key = key 26 | 27 | # create cache folder 28 | self.folder = str(cache_dir) if cache_dir else "./cache" 29 | Path(self.folder).mkdir(exist_ok=True, parents=True) 30 | 31 | self.response_cache = [] 32 | self.size = 0 33 | if safe_exit: 34 | self.check_cache() 35 | 36 | @staticmethod 37 | def load_with_key(key, cache_dir=None): 38 | return Cache({}, True, cache_dir, key) 39 | 40 | def cache_responses(self, responses): 41 | if responses: 42 | num_resp = len(responses) 43 | checkpoint = len(self.response_cache) + 1 44 | self.size += num_resp 45 | log.debug(f"File Checkpoint {checkpoint}:: Caching {num_resp} Responses") 46 | 47 | filename = f"{checkpoint}-{self.key}-{num_resp}.pickle.gz" 48 | self.response_cache.append(filename) 49 | 50 | with gzip.open(f"{self.folder}/{filename}", "wb") as handle: 51 | pickle.dump(responses, handle, protocol=pickle.HIGHEST_PROTOCOL) 52 | 53 | def load_info(self): 54 | try: 55 | with gzip.open(f"{self.folder}/{self.key}_info.pickle.gz", "rb") as handle: 56 | return pickle.load(handle) 57 | except FileNotFoundError: 58 | log.info("No previous requests to load") 59 | return None 60 | 61 | def load_resp(self, cache_num): 62 | filename = self.response_cache[cache_num] 63 | try: 64 | with gzip.open(f"{self.folder}/{filename}", "rb") as handle: 65 | return pickle.load(handle) 66 | except FileNotFoundError as exc: 67 | warnings.warn(f"Failed to load responses from {filename} - {exc}") 68 | 69 | def save_info(self, **kwargs): 70 | filename = f"{self.folder}/{self.key}_info.pickle.gz" 71 | with gzip.open(filename, "wb") as handle: 72 | pickle.dump(kwargs, handle, protocol=pickle.HIGHEST_PROTOCOL) 73 | 74 | def check_cache(self): 75 | for filename in os.listdir(self.folder): 76 | m = re.match(f"\d+-{self.key}-(\d+).pickle.gz", filename) 77 | if m: 78 | self.response_cache.append(m.group(0)) 79 | self.size += int(m.group(1)) 80 | -------------------------------------------------------------------------------- /pmaw/Metadata.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple 2 | 3 | """ 4 | Helper class for working with request metadata 5 | """ 6 | 7 | 8 | class Metadata: 9 | def __init__(self, metadata) -> None: 10 | self._metadata = metadata 11 | 12 | @property 13 | def shards_are_down(self) -> bool: 14 | try: 15 | shards = self._metadata["es"].get("_shards") 16 | except KeyError: 17 | return True 18 | 19 | if shards is None: 20 | return True 21 | 22 | return shards["successful"] != shards["total"] 23 | 24 | @property 25 | def total_results(self) -> int: 26 | try: 27 | return self._metadata["es"]["hits"]["total"]["value"] 28 | except KeyError: 29 | return 0 30 | 31 | @property 32 | def ranges(self) -> Tuple[Optional[int], Optional[int]]: 33 | after, before = None, None 34 | query_params = self._metadata["es_query"]["query"].get("bool", None) 35 | 36 | # if searching by ids before and after timestamps wont exist 37 | # in the metadata, and instead look like ['query']['ids'] 38 | if query_params: 39 | # now we have to find the before and after values 40 | for condition in query_params["must"]: 41 | if "bool" in condition and "must" in condition["bool"]: 42 | for nested_cond in condition["bool"]["must"]: 43 | if ( 44 | "range" in nested_cond 45 | and "created_utc" in nested_cond["range"] 46 | ): 47 | # either before or after 48 | timestamp = nested_cond["range"]["created_utc"] 49 | # convert timestamps to epoch time 50 | if "gte" in timestamp: 51 | after = int(timestamp["gte"]) / 1000 52 | elif "lt" in timestamp: 53 | before = int(timestamp["lt"]) / 1000 54 | return after, before 55 | -------------------------------------------------------------------------------- /pmaw/PushshiftAPI.py: -------------------------------------------------------------------------------- 1 | from pmaw.PushshiftAPIBase import PushshiftAPIBase 2 | 3 | 4 | class PushshiftAPI(PushshiftAPIBase): 5 | def __init__(self, *args, **kwargs): 6 | """ 7 | Helper class for interacting with the PushShift API for searching public reddit archival data using multiple threads. 8 | 9 | Input: 10 | 11 | num_workers (int, optional) - Number of workers to use for multithreading, defaults to 10. 12 | max_sleep (int, optional) - Maximum rate-limit sleep time (in seconds) between requests, defaults to 60s. 13 | rate_limit (int, optional) - Target number of requests per minute for rate-averaging, defaults to 60 requests per minute. 14 | base_backoff (float, optional) - Base delay in seconds for exponential backoff, defaults to 0.5s 15 | batch_size (int, optional) - Size of batches for multithreading, defaults to number of workers. 16 | shards_down_behavior (str, optional) - Specifies how PMAW will respond if some shards are down during a query. Options are "warn" to only emit a warning, "stop" to throw a RuntimeError, or None to take no action. Defaults to "warn". 17 | limit_type (str, optional) - Type of rate limiting to use, default value is 'average' for rate averaging, use 'backoff' for exponential backoff 18 | jitter (str, optional) - Jitter to use with backoff, defaults to None, options are None, full, equal, decorr 19 | checkpoint (int, optional) - Size of interval in batches to print a checkpoint with stats, defaults to 10 20 | file_checkpoint (int, optional) - Size of interval in batches to cache responses when using mem_safe, defaults to 20 21 | praw (praw.Reddit, optional) - Used to enrich the Pushshift items retrieved with metadata directly from Reddit 22 | """ 23 | super().__init__(*args, **kwargs) 24 | 25 | def search_submission_comment_ids(self, ids, **kwargs): 26 | """ 27 | Method for getting comment ids based on submission id(s) 28 | 29 | Input: 30 | ids (str, list) - Submission id(s) to return the comment ids of 31 | max_ids_per_request (int, optional) - Maximum number of ids to use in a single request, defaults to 500, maximum 500. 32 | mem_safe (boolean, optional) - If True, stores responses in cache during operation, defaults to False 33 | safe_exit (boolean, optional) - If True, will safely exit if interrupted by storing current responses and requests in the cache. Will also load previous requests / responses if found in cache, defaults to False 34 | cache_dir (str, optional) - An absolute or relative folder path to cache responses in when mem_safe or safe_exit is enabled 35 | Output: 36 | Response generator object 37 | """ 38 | kwargs["ids"] = ids 39 | if "filter_fn" in kwargs: 40 | raise ValueError( 41 | "filter_fn not supported for search_submission_comment_ids" 42 | ) 43 | return self._search(kind="submission_comment_ids", **kwargs) 44 | 45 | def search_comments(self, **kwargs): 46 | """ 47 | Method for searching comments, returns an array of comments 48 | 49 | Input: 50 | max_ids_per_request (int, optional) - Maximum number of ids to use in a single request, defaults to 500, maximum 500. 51 | max_results_per_request (int, optional) - Maximum number of items to return in a single non-id based request, defaults to 100, maximum 100. 52 | mem_safe (boolean, optional) - If True, stores responses in cache during operation, defaults to False 53 | search_window (int, optional) - Size in days for search window for submissions / comments in non-id based search, defaults to 365 54 | safe_exit (boolean, optional) - If True, will safely exit if interrupted by storing current responses and requests in the cache. Will also load previous requests / responses if found in cache, defaults to False 55 | filter_fn (function, optional) - A function used for custom filtering the results before saving them. Accepts a single comment parameter and returns False to filter out the item, otherwise returns True. 56 | cache_dir (str, optional) - An absolute or relative folder path to cache responses in when mem_safe or safe_exit is enabled 57 | Output: 58 | Response generator object 59 | """ 60 | return self._search(kind="comment", **kwargs) 61 | 62 | def search_submissions(self, **kwargs): 63 | """ 64 | Method for searching submissions, returns an array of submissions 65 | 66 | Input: 67 | max_ids_per_request (int, optional) - Maximum number of ids to use in a single request, defaults to 500, maximum 500. 68 | max_results_per_request (int, optional) - Maximum number of items to return in a single non-id based request, defaults to 100, maximum 100. 69 | mem_safe (boolean, optional) - If True, stores responses in cache during operation, defaults to False 70 | search_window (int, optional) - Size in days for search window for submissions / comments in non-id based search, defaults to 365 71 | safe_exit (boolean, optional) - If True, will safely exit if interrupted by storing current responses and requests in the cache. Will also load previous requests / responses if found in cache, defaults to False 72 | filter_fn (function, optional) - A function used for custom filtering the results before saving them. Accepts a single submission parameter and returns False to filter out the item, otherwise returns True. 73 | cache_dir (str, optional) - An absolute or relative folder path to cache responses in when mem_safe or safe_exit is enabled 74 | Output: 75 | Response generator object 76 | """ 77 | return self._search(kind="submission", **kwargs) 78 | -------------------------------------------------------------------------------- /pmaw/PushshiftAPIBase.py: -------------------------------------------------------------------------------- 1 | import json 2 | import copy 3 | import logging 4 | from concurrent.futures import ThreadPoolExecutor, as_completed 5 | 6 | import requests 7 | from pmaw.types.exceptions import HTTPError, HTTPNotFoundError 8 | from pmaw.Metadata import Metadata 9 | 10 | from pmaw.RateLimit import RateLimit 11 | from pmaw.Request import Request 12 | 13 | 14 | log = logging.getLogger(__name__) 15 | 16 | 17 | class PushshiftAPIBase: 18 | _base_url = "https://{domain}.pushshift.io/{{endpoint}}" 19 | 20 | def __init__( 21 | self, 22 | num_workers=10, 23 | max_sleep=60, 24 | rate_limit=60, 25 | base_backoff=0.5, 26 | batch_size=None, 27 | shards_down_behavior="warn", 28 | limit_type="average", 29 | jitter=None, 30 | checkpoint=10, 31 | file_checkpoint=20, 32 | praw=None, 33 | ): 34 | self.num_workers = num_workers 35 | self.domain = "api" 36 | self.shards_down_behavior = shards_down_behavior 37 | self.meta = Metadata({}) 38 | self.resp_dict = {} 39 | self.checkpoint = checkpoint 40 | self.file_checkpoint = file_checkpoint 41 | self.praw = praw 42 | 43 | if batch_size: 44 | self.batch_size = batch_size 45 | else: 46 | self.batch_size = num_workers 47 | 48 | # instantiate rate limiter 49 | self._rate_limit = RateLimit( 50 | rate_limit, base_backoff, limit_type, max_sleep, jitter 51 | ) 52 | 53 | @property 54 | def base_url(self): 55 | # getter for base_url, with formatted domain 56 | return self._base_url.format(domain=self.domain) 57 | 58 | def _impose_rate_limit(self): 59 | interval = self._rate_limit.delay() 60 | if interval > 0: 61 | self.req._idle_task(interval) 62 | 63 | def _get(self, url, payload={}): 64 | self._impose_rate_limit() 65 | r = requests.get(url, params=payload) 66 | status = r.status_code 67 | reason = r.reason 68 | 69 | if status == 200: 70 | r = json.loads(r.text) 71 | 72 | # check if shards are down 73 | self.meta = Metadata(r.get("metadata", {})) 74 | total_results = self.meta.total_results 75 | if total_results: 76 | after, before = self.meta.ranges 77 | if after and before: 78 | self.resp_dict[(after, before)] = total_results 79 | 80 | return r["data"] 81 | else: 82 | if status == 404: 83 | raise HTTPNotFoundError(f"HTTP {status} - {reason}") 84 | else: 85 | # TODO: add custom error types for rate limit and other errors 86 | raise HTTPError(f"HTTP {status} - {reason}") 87 | 88 | def _multithread(self, check_total=False): 89 | with ThreadPoolExecutor(max_workers=self.num_workers) as executor: 90 | 91 | while len(self.req.req_list) > 0 and not self.req.exit.is_set(): 92 | # reset resp_dict which tracks remaining responses for timeslices 93 | self.resp_dict = {} 94 | 95 | # set number of futures created to batch size 96 | reqs = [] 97 | if check_total: 98 | reqs.append(self.req.req_list.popleft()) 99 | else: 100 | for i in range(min(len(self.req.req_list), self.batch_size)): 101 | reqs.append(self.req.req_list.popleft()) 102 | 103 | futures = { 104 | executor.submit(self._get, url_pay[0], url_pay[1]): url_pay 105 | for url_pay in reqs 106 | } 107 | 108 | self._futures_handler(futures, check_total) 109 | 110 | # reset attempts if no failures 111 | self._rate_limit._check_fail() 112 | 113 | # check if shards are down 114 | if self.meta.shards_are_down and ( 115 | self.shards_down_behavior is not None 116 | ): 117 | shards_down_message = "Not all PushShift shards are active. Query results may be incomplete." 118 | if self.shards_down_behavior == "warn": 119 | log.warning(shards_down_message) 120 | if self.shards_down_behavior == "stop": 121 | self._shutdown(executor) 122 | raise RuntimeError( 123 | shards_down_message 124 | + f" {len(self.req.req_list)} unfinished requests." 125 | ) 126 | if not check_total: 127 | self.num_batches += 1 128 | if self.num_batches % self.file_checkpoint == 0: 129 | # cache current results 130 | executor.submit(self.req.save_cache()) 131 | self._print_stats("Checkpoint") 132 | else: 133 | break 134 | if not check_total: 135 | self._print_stats("Total") 136 | self._shutdown(executor) 137 | 138 | def _futures_handler(self, futures, check_total): 139 | for future in as_completed(futures): 140 | url_pay = futures[future] 141 | self.num_req += int(not check_total) 142 | try: 143 | data = future.result() 144 | self.num_suc += int(not check_total) 145 | url = url_pay[0] 146 | payload = url_pay[1] 147 | if not check_total: 148 | self.req.save_resp(data) 149 | 150 | log.debug(f"Remaining limit {self.req.limit}") 151 | if self.req.limit <= 0: 152 | log.debug( 153 | f"Cancelling {len(self.req.req_list)} unfinished requests" 154 | ) 155 | self.req.req_list.clear() 156 | break 157 | 158 | # handle time slicing logic 159 | if "until" in payload and "since" in payload: 160 | until = payload["until"] 161 | since = payload["since"] 162 | log.debug( 163 | f"Time slice from {since} - {until} returned {len(data)} results" 164 | ) 165 | total_results = self.resp_dict.get((since, until), 0) 166 | log.debug(f"{total_results} total results for this time slice") 167 | # calculate remaining results 168 | remaining = total_results - len(data) 169 | 170 | # number of timeslices is depending on remaining results 171 | if remaining > self.req.max_results_per_request * 2: 172 | num = 2 173 | elif remaining > 0: 174 | num = 1 175 | else: 176 | num = 0 177 | 178 | if num > 0: 179 | # find minimum `created_utc` to set as the `before` parameter in next timeslices 180 | # Fix issue where Pushshift occasionally reports remaining results that it is 181 | # unable to return - len(data) == 0 when this happens 182 | if len(data) > 0: 183 | until = data[-1]["created_utc"] 184 | # generate payloads 185 | self.req.gen_slices(url, payload, since, until, num) 186 | 187 | except HTTPNotFoundError as exc: 188 | log.debug(f"Request Failed -- {exc}") 189 | # dont retry ids not found 190 | # it looks like submission/comment_ids/ returns 404s now 191 | if "ids" not in self.req.payload: 192 | self.req.req_list.appendleft(url_pay) 193 | 194 | except HTTPError as exc: 195 | log.debug(f"Request Failed -- {exc}") 196 | self._rate_limit._req_fail() 197 | self.req.req_list.appendleft(url_pay) 198 | 199 | def _shutdown(self, exc, wait=False, cancel_futures=True): 200 | # shutdown executor 201 | try: 202 | # pass cancel_futures keywords avail in python 3.9 203 | exc.shutdown(wait=wait, cancel_futures=cancel_futures) 204 | except TypeError: 205 | # TODO: manually cancel pending futures 206 | exc.shutdown(wait=wait) 207 | 208 | def _print_stats(self, prefix): 209 | rate = self.num_suc / self.num_req * 100 210 | remaining = self.req.limit 211 | if (self.num_batches % self.checkpoint == 0) and prefix == "Checkpoint": 212 | log.info( 213 | f"{prefix}:: Success Rate: {rate:.2f}% - Requests: {self.num_req} - Batches: {self.num_batches} - Items Remaining: {remaining}" 214 | ) 215 | elif prefix == "Total": 216 | if remaining < 0: 217 | remaining = 0 # don't print a neg number 218 | log.info( 219 | f"{prefix}:: Success Rate: {rate:.2f}% - Requests: {self.num_req} - Batches: {self.num_batches} - Items Remaining: {remaining}" 220 | ) 221 | if self.req.praw and len(self.req.enrich_list) > 0: 222 | # let the user know praw enrichment is still in progress so it doesnt appear to hang after 223 | # finishing retrieval from Pushshift 224 | log.info(f"Finishing enrichment for {len(self.req.enrich_list)} items") 225 | 226 | def _reset(self): 227 | self.num_suc = 0 228 | self.num_req = 0 229 | self.num_batches = 0 230 | 231 | def _search( 232 | self, 233 | kind, 234 | max_ids_per_request=500, 235 | max_results_per_request=100, 236 | mem_safe=False, 237 | search_window=365, 238 | dataset="reddit", 239 | safe_exit=False, 240 | cache_dir=None, 241 | filter_fn=None, 242 | **kwargs, 243 | ): 244 | 245 | # TODO: remove this warning once 404s stop happening 246 | if kind == "submission_comment_ids": 247 | log.warning( 248 | "submission comment id search may return no results due to COLO switchover" 249 | ) 250 | 251 | # raise error if aggs are requested 252 | if "aggs" in kwargs: 253 | err_msg = "Aggregations support for {} has not yet been implemented, please use the PSAW package for your request" 254 | raise NotImplementedError(err_msg.format(kwargs["aggs"])) 255 | 256 | self.meta = Metadata({}) 257 | self.resp_dict = {} 258 | self.req = Request( 259 | copy.deepcopy(kwargs), 260 | filter_fn, 261 | kind, 262 | max_results_per_request, 263 | max_ids_per_request, 264 | mem_safe, 265 | safe_exit, 266 | cache_dir, 267 | self.praw, 268 | ) 269 | 270 | # reset stat tracking 271 | self._reset() 272 | 273 | if kind == "submission_comment_ids": 274 | endpoint = f"{dataset}/submission/comment_ids/" 275 | else: 276 | endpoint = f"{dataset}/{kind}/search" 277 | 278 | url = self.base_url.format(endpoint=endpoint) 279 | 280 | while ( 281 | self.req.limit is None or self.req.limit > 0 282 | ) and not self.req.exit.is_set(): 283 | # set/update limit 284 | if "ids" not in self.req.payload and len(self.req.req_list) == 0: 285 | # check to see how many results are remaining 286 | self.req.req_list.appendleft((url, self.req.payload)) 287 | self._multithread(check_total=True) 288 | total_avail = self.meta.total_results 289 | 290 | if self.req.limit is None: 291 | log.info(f"{total_avail} result(s) available in Pushshift") 292 | self.req.limit = total_avail 293 | elif total_avail < self.req.limit: 294 | log.info( 295 | f"{self.req.limit - total_avail} result(s) not found in Pushshift" 296 | ) 297 | log.info(f"{total_avail} total available") 298 | self.req.limit = total_avail 299 | 300 | # generate payloads 301 | self.req.gen_url_payloads(url, self.batch_size, search_window) 302 | 303 | # check for exit signals 304 | self.req.check_sigs() 305 | 306 | if self.req.limit > 0 and len(self.req.req_list) > 0: 307 | self._multithread() 308 | 309 | self.req.save_cache() 310 | return self.req.resp 311 | -------------------------------------------------------------------------------- /pmaw/RateLimit.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | import random 4 | 5 | log = logging.getLogger(__name__) 6 | 7 | 8 | class RateLimit: 9 | """RateLimit: Implements different rate-limiting strategies for concurrent requests""" 10 | 11 | def __init__( 12 | self, 13 | rate_limit=60, 14 | base_backoff=0.5, 15 | limit_type="average", 16 | max_sleep=60, 17 | jitter=None, 18 | ): 19 | self.rate_limit = rate_limit 20 | self.cache = list() 21 | self.base = base_backoff 22 | self.limit_type = limit_type 23 | self.max_sleep = max_sleep 24 | self.jitter = jitter 25 | self.sleep = self.base 26 | 27 | # track failures and attempts 28 | self.last_batch = 0 29 | self.attempts = 0 30 | self.num_fail = 0 31 | 32 | def delay(self): 33 | if self.limit_type: 34 | if self.limit_type == "average": 35 | return min(self.max_sleep, self._average()) 36 | elif self.limit_type == "backoff": 37 | return self._backoff() 38 | else: 39 | return 0 40 | 41 | def _req_fail(self): 42 | self.num_fail += 1 43 | 44 | def _check_fail(self): 45 | # reset attempts if no new failures 46 | if self.last_batch == self.num_fail: 47 | self.num_fail = 0 48 | self.last_batch = 0 49 | self.attempts = 0 50 | self.sleep = self.base 51 | else: 52 | # store last batch num failures 53 | self.last_batch = self.num_fail 54 | 55 | # increase number of attempted batches 56 | self.attempts += 1 57 | 58 | def _expo(self): 59 | return min(self.max_sleep, self.base * pow(2, self.attempts)) 60 | 61 | def _backoff(self): 62 | if self.jitter: 63 | if self.jitter == "equal": 64 | v = self._expo() 65 | return v / 2 + random.uniform(0, v / 2) 66 | elif self.jitter == "full": 67 | v = self._expo() 68 | return random.uniform(0, v) 69 | elif self.jitter == "decorr": 70 | self.sleep = min( 71 | self.max_sleep, random.uniform(self.base, self.sleep * 3) 72 | ) 73 | return self.sleep 74 | else: 75 | return self._expo() 76 | 77 | def _average(self): 78 | # calculating delay required based on rate averaging 79 | curr_time = time.time() 80 | self.cache.append(curr_time) 81 | 82 | num_req = len(self.cache) 83 | first_req = min(self.cache) 84 | last_req = max(self.cache) 85 | 86 | # remove requests older than 60 seconds old 87 | while curr_time - first_req > 60: 88 | 89 | try: 90 | self.cache.remove(first_req) 91 | except ValueError: 92 | log.debug(f"{first_req} has already been removed RL cache") 93 | 94 | num_req = len(self.cache) 95 | first_req = min(self.cache) 96 | 97 | # return 0 if no other requests on cache 98 | if last_req == first_req: 99 | return 0 100 | else: 101 | period = last_req - first_req 102 | 103 | # project rate with no delay 104 | proj_rate = 60 * (num_req) / (period) 105 | 106 | # check if projected rate is too high 107 | if proj_rate < self.rate_limit or num_req < 5: 108 | return 0 109 | else: 110 | return 60 * (num_req) / self.rate_limit - period 111 | -------------------------------------------------------------------------------- /pmaw/Request.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import copy 3 | import datetime as dt 4 | from collections import deque 5 | import warnings 6 | from threading import Event 7 | import signal 8 | import time 9 | 10 | from praw.exceptions import RedditAPIException 11 | 12 | from pmaw.Cache import Cache 13 | from pmaw.utils.slices import timeslice, mapslice 14 | from pmaw.utils.filter import apply_filter 15 | from pmaw.Response import Response 16 | 17 | 18 | log = logging.getLogger(__name__) 19 | 20 | 21 | class Request: 22 | """Request: Handles request information, response saving, and cache usage.""" 23 | 24 | def __init__( 25 | self, 26 | payload, 27 | filter_fn, 28 | kind, 29 | max_results_per_request, 30 | max_ids_per_request, 31 | mem_safe, 32 | safe_exit, 33 | cache_dir=None, 34 | praw=None, 35 | ): 36 | self.kind = kind 37 | self.max_ids_per_request = min(500, max_ids_per_request) 38 | self.max_results_per_request = min(100, max_results_per_request) 39 | self.safe_exit = safe_exit 40 | self.mem_safe = mem_safe 41 | self.req_list = deque() 42 | self.payload = payload 43 | self.limit = payload.get("limit", None) 44 | self.exit = Event() 45 | self.praw = praw 46 | self._filter = filter_fn 47 | 48 | if filter_fn is not None and not callable(filter_fn): 49 | raise ValueError("filter_fn must be a callable function") 50 | 51 | if safe_exit and self.payload.get("until", None) is None: 52 | # warn the user not to use safe_exit without setting until, 53 | # doing otherwise will make it impossible to resume without modifying 54 | # future query to use until value from first run 55 | before = int(dt.datetime.now().timestamp()) 56 | payload["until"] = before 57 | warnings.warn( 58 | f"Using safe_exit without setting until value is not recommended. Setting until to {before}" 59 | ) 60 | 61 | if self.praw is not None: 62 | if safe_exit: 63 | raise NotImplementedError( 64 | "safe_exit is not implemented when PRAW is used for metadata enrichment" 65 | ) 66 | 67 | self.enrich_list = deque() 68 | 69 | if not kind == "submission_comment_ids": 70 | # id filter causes an error for submission_comment_ids endpoint 71 | self.payload["filter"] = "id" 72 | 73 | if kind == "submission": 74 | self.prefix = "t3_" 75 | else: 76 | self.prefix = "t1_" 77 | 78 | if "ids" not in self.payload: 79 | # add necessary args 80 | self._add_nec_args(self.payload) 81 | 82 | if mem_safe or safe_exit: 83 | # instantiate cache 84 | _tmp = copy.deepcopy(payload) 85 | _tmp["kind"] = kind 86 | self._cache = Cache(_tmp, safe_exit, cache_dir=cache_dir) 87 | if safe_exit: 88 | info = self._cache.load_info() 89 | if info is not None: 90 | self.req_list.extend(info["req_list"]) 91 | self.payload = info["payload"] 92 | self.limit = info["limit"] 93 | log.info( 94 | f"Loaded Cache:: Responses: {self._cache.size} - Pending Requests: {len(self.req_list)} - Items Remaining: {self.limit}" 95 | ) 96 | else: 97 | self._cache = None 98 | 99 | # instantiate response 100 | self.resp = Response(self._cache) 101 | 102 | def check_sigs(self): 103 | try: 104 | getattr(signal, "SIGHUP") 105 | sigs = ("TERM", "HUP", "INT") 106 | except AttributeError: 107 | sigs = ("TERM", "INT") 108 | 109 | for sig in sigs: 110 | signal.signal(getattr(signal, "SIG" + sig), self._exit) 111 | 112 | def _enrich_data(self): 113 | # create batch of fullnames up to 100 114 | fullnames = [] 115 | while len(fullnames) < 100: 116 | try: 117 | fullnames.append(self.enrich_list.popleft()) 118 | except IndexError: 119 | break 120 | 121 | # exit loop if nothing to enrich 122 | if len(fullnames) == 0: 123 | return 124 | 125 | try: 126 | # TODO: may need to change praw usage based on multithread performance 127 | resp_gen = self.praw.info(fullnames=fullnames) 128 | praw_data = [vars(obj) for obj in resp_gen] 129 | results = self._apply_filter(praw_data) 130 | self.resp.responses.extend(results) 131 | 132 | except RedditAPIException: 133 | self.enrich_list.extend(fullnames) 134 | 135 | def _idle_task(self, interval): 136 | start = time.time() 137 | current = time.time() 138 | 139 | if self.praw: 140 | # make multiple enrich requests based on sleep interval 141 | while current - start < interval and len(self.enrich_list) > 0: 142 | 143 | self._enrich_data() 144 | 145 | current = time.time() 146 | 147 | current = time.time() 148 | diff = current - start 149 | 150 | if diff < interval and diff >= 0: 151 | time.sleep(interval - diff) 152 | 153 | def save_cache(self): 154 | # trim extra responses 155 | self.trim() 156 | 157 | # enrich if needed 158 | if self.praw: 159 | while len(self.enrich_list) > 0: 160 | self._enrich_data() 161 | 162 | if ( 163 | self.safe_exit 164 | and not self.limit == None 165 | and (self.limit == 0 or self.exit.is_set()) 166 | ): 167 | # save request info to cache 168 | self._cache.save_info( 169 | req_list=self.req_list, payload=self.payload, limit=self.limit 170 | ) 171 | # save responses to cache 172 | self.resp.to_cache() 173 | elif self.mem_safe: 174 | self.resp.to_cache() 175 | 176 | def _exit(self, signo, _frame): 177 | self.exit.set() 178 | 179 | def _apply_filter(self, results): 180 | # apply user defined filter function before storing 181 | if self._filter is not None: 182 | return apply_filter(results, self._filter) 183 | else: 184 | return results 185 | 186 | def save_resp(self, results): 187 | # dont filter results before updating limit: limit is the max number of results 188 | # extracted from Pushshift, filtering can reduce the results < limit 189 | if self.kind == "submission_comment_ids": 190 | self.limit -= 1 191 | else: 192 | self.limit -= len(results) 193 | 194 | if self.praw: 195 | # save fullnames of objects to be enriched with metadata by PRAW 196 | if self.kind == "submission_comment_ids": 197 | self.enrich_list.extend([self.prefix + res for res in results]) 198 | else: 199 | self.enrich_list.extend([self.prefix + res["id"] for res in results]) 200 | else: 201 | results = self._apply_filter(results) 202 | self.resp.responses.extend(results) 203 | 204 | def _add_nec_args(self, payload): 205 | """Adds arguments to the payload as necessary.""" 206 | 207 | payload["size"] = self.max_results_per_request 208 | 209 | # set to true to get a real count, 210 | # otherwise `total_results` estimate maxes out at 10000 211 | payload["track_total_hits"] = True 212 | 213 | # we need to sort by created_utc for slicing to work 214 | payload["sort"] = "created_utc" 215 | 216 | if "order" not in payload: 217 | payload["order"] = "desc" 218 | elif payload.get("order") != "desc": 219 | err_msg = "Support for non-default order has not been implemented as it may cause unexpected results" 220 | raise NotImplementedError(err_msg) 221 | 222 | if "until" not in payload: 223 | payload["until"] = int(dt.datetime.now().timestamp()) 224 | if "filter" in payload: 225 | if not isinstance(payload["filter"], list): 226 | if isinstance(payload["filter"], str): 227 | payload["filter"] = [payload["filter"]] 228 | else: 229 | payload["filter"] = list(payload["filter"]) 230 | 231 | # make sure that the created_utc field is returned 232 | if "created_utc" not in payload["filter"]: 233 | payload["filter"].append("created_utc") 234 | 235 | # there is a bug where multiple filters like: 236 | # filter=&filter= 237 | # only returns field2 238 | payload["filter"] = ",".join(payload["filter"]) 239 | 240 | def gen_slices(self, url, payload, after, before, num): 241 | # create time slices 242 | ts = timeslice(after, before, num) 243 | url_payloads = [ 244 | (url, mapslice(copy.deepcopy(payload), ts[i], ts[i + 1])) 245 | for i in range(num) 246 | ] 247 | self.req_list.extend(url_payloads) 248 | 249 | def gen_url_payloads(self, url, batch_size, search_window): 250 | """Creates a list of url payload tuples""" 251 | url_payloads = [] 252 | 253 | # check if new payloads have to be made 254 | if len(self.req_list) == 0: 255 | # paging for ids 256 | if "ids" in self.payload: 257 | 258 | # convert ids to list 259 | self._id_list(self.payload) 260 | 261 | all_ids = self.payload["ids"] 262 | if len(all_ids) == 0 and (self.limit and self.limit > 0): 263 | warnings.warn(f"{self.limit} items were not found in Pushshift") 264 | self.limit = len(all_ids) 265 | 266 | # remove ids from payload to prevent , -> %2C and increasing query length 267 | # beyond the max length of 8190 268 | self.payload["ids"] = [] 269 | 270 | # if searching for submission comment ids 271 | if self.kind == "submission_comment_ids": 272 | urls = [url + sub_id for sub_id in all_ids] 273 | url_payloads = [(url, self.payload) for url in urls] 274 | else: 275 | # split ids into arrays of size max_ids_per_request 276 | ids_split = [] 277 | max_len = self.max_ids_per_request 278 | while len(all_ids) > 0: 279 | ids_split.append(",".join(all_ids[:max_len])) 280 | all_ids = all_ids[max_len:] 281 | 282 | log.debug(f"Created {len(ids_split)} id slices") 283 | 284 | # create url payload tuples 285 | url_payloads = [ 286 | (url + "?ids=" + id_str, self.payload) for id_str in ids_split 287 | ] 288 | # add payloads to req_list 289 | self.req_list.extend(url_payloads) 290 | 291 | else: 292 | if "since" not in self.payload: 293 | search_window = dt.timedelta(days=search_window) 294 | num = batch_size 295 | before = self.payload["until"] 296 | after = int( 297 | (dt.datetime.fromtimestamp(before) - search_window).timestamp() 298 | ) 299 | 300 | # set before to after for future time slices 301 | self.payload["until"] = after 302 | 303 | else: 304 | before = self.payload["until"] 305 | after = self.payload["since"] 306 | 307 | # set before to avoid repeated time slices when there are missed responses 308 | self.payload["until"] = after 309 | num = batch_size 310 | 311 | # generate payloads 312 | self.gen_slices(url, self.payload, after, before, num) 313 | 314 | def _id_list(self, payload): 315 | if not isinstance(payload["ids"], list): 316 | if isinstance(payload["ids"], str): 317 | payload["ids"] = [payload["ids"]] 318 | else: 319 | payload["ids"] = list(payload["ids"]) 320 | 321 | def trim(self): 322 | if self.limit: 323 | if self.praw: 324 | while self.limit < 0: 325 | try: 326 | self.enrich_list.pop() 327 | self.limit += 1 328 | except IndexError as exc: 329 | break 330 | if self.limit < 0: 331 | log.debug(f"Trimming {self.limit*-1} requests") 332 | self.resp.responses = self.resp.responses[: self.limit] 333 | self.limit = 0 334 | -------------------------------------------------------------------------------- /pmaw/Response.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections.abc import Generator 3 | 4 | from pmaw.Cache import Cache 5 | 6 | log = logging.getLogger(__name__) 7 | 8 | 9 | class Response(Generator): 10 | """Response: A generator which contains the responses from the request, loads from cache if needed.""" 11 | 12 | def __init__(self, cache=None): 13 | self.responses = [] 14 | self._cache = cache 15 | # track length of remainder 16 | self.num_returned = 0 17 | # indexing for returning responses 18 | self.i = 0 19 | self.num_cache = 0 20 | 21 | @staticmethod 22 | def load_cache(key, cache_dir=None): 23 | """ 24 | Return an instance of Response with the results stored with the provided key 25 | 26 | Input: 27 | key (str) - Cache key for results to load into Response 28 | cache_dir (str, optional) - An absolute or relative folder path to load cached responses from, defaults to './cache' 29 | Output: 30 | Response generator object 31 | """ 32 | cache = Cache.load_with_key(key, cache_dir) 33 | return Response(cache) 34 | 35 | def to_cache(self): 36 | self._cache.cache_responses(self.responses) 37 | self.responses.clear() 38 | 39 | def _next_resp(self): 40 | resp = self.responses[self.i] 41 | self.i += 1 42 | return resp 43 | 44 | def send(self, ignored_arg): 45 | if self.i < len(self.responses): 46 | return self._next_resp() 47 | elif self._cache and self.num_cache < len(self._cache.response_cache): 48 | self.responses = self._cache.load_resp(self.num_cache) 49 | self.num_cache += 1 50 | # increase num returned to reflect responses retrieved from cache 51 | # as well as previously returned responses 52 | self.num_returned += self.i + len(self.responses) 53 | self.i = 0 54 | return self._next_resp() 55 | else: 56 | self.responses.clear() 57 | raise StopIteration 58 | 59 | def __del__(self): 60 | self.close() 61 | 62 | def throw(self, type=None, value=None, traceback=None): 63 | log.debug("Cleaning up responses") 64 | self.responses.clear() 65 | raise StopIteration 66 | 67 | def __len__(self): 68 | if self._cache: 69 | length = ( 70 | len(self.responses) + self._cache.size - (self.i + self.num_returned) 71 | ) 72 | else: 73 | length = len(self.responses) - self.i 74 | 75 | return max(length, 0) 76 | -------------------------------------------------------------------------------- /pmaw/__init__.py: -------------------------------------------------------------------------------- 1 | # PMAW 2 | # Copyright 2023 Matthew Podolak 3 | # See LICENSE for details. 4 | 5 | """ 6 | PMAW: Pushshift Multithread API Wrapper 7 | """ 8 | __version__ = "3.0.0" 9 | __author__ = "Matthew Podolak" 10 | __license__ = "MIT" 11 | 12 | from .RateLimit import RateLimit 13 | from .Request import Request 14 | from .Response import Response 15 | from .Cache import Cache 16 | from .PushshiftAPIBase import PushshiftAPIBase 17 | from .PushshiftAPI import PushshiftAPI 18 | from .Metadata import Metadata 19 | -------------------------------------------------------------------------------- /pmaw/types/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpodolak/pmaw/bb102c2316ded945805f2de1b5cae5aacb032e66/pmaw/types/__init__.py -------------------------------------------------------------------------------- /pmaw/types/exceptions.py: -------------------------------------------------------------------------------- 1 | from requests import HTTPError 2 | 3 | 4 | class HTTPNotFoundError(HTTPError): 5 | """Error class for 404 error""" 6 | -------------------------------------------------------------------------------- /pmaw/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpodolak/pmaw/bb102c2316ded945805f2de1b5cae5aacb032e66/pmaw/utils/__init__.py -------------------------------------------------------------------------------- /pmaw/utils/filter.py: -------------------------------------------------------------------------------- 1 | def apply_filter(array, filter_fn): 2 | filtered_array = [] 3 | for item in array: 4 | try: 5 | if filter_fn(item): 6 | filtered_array.append(item) 7 | except TypeError as exc: 8 | raise TypeError("An error occured while filtering:\n", exc) 9 | except KeyError as exc: 10 | raise KeyError( 11 | f"The {exc} key does not exist for the item you are filtering" 12 | ) 13 | 14 | return filtered_array 15 | -------------------------------------------------------------------------------- /pmaw/utils/slices.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | log = logging.getLogger(__name__) 4 | 5 | 6 | def timeslice(since, until, num): 7 | log.debug(f"Generating {num} slices between {since} and {until}") 8 | return [int((until - since) * i / num + since) for i in range(num + 1)] 9 | 10 | 11 | def mapslice(payload, since, until): 12 | payload["until"] = until 13 | payload["since"] = since 14 | return payload 15 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | import re 3 | 4 | with open("README.md", "r", encoding="utf-8") as fh: 5 | long_description = fh.read() 6 | 7 | VERSION_FILE = "pmaw/__init__.py" 8 | with open(VERSION_FILE) as version_file: 9 | match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", 10 | version_file.read(), re.MULTILINE) 11 | 12 | if match: 13 | version = match.group(1) 14 | else: 15 | raise RuntimeError(f"Unable to find version string in {VERSION_FILE}.") 16 | 17 | setuptools.setup( 18 | name="pmaw", 19 | version=version, 20 | author="Matthew Podolak", 21 | author_email="mpodola2@gmail.com", 22 | description="A multithread Pushshift.io API Wrapper for reddit.com comment and submission searches.", 23 | long_description=long_description, 24 | long_description_content_type="text/markdown", 25 | url="https://github.com/mattpodolak/pmaw", 26 | packages=setuptools.find_packages(), 27 | license='MIT License', 28 | install_requires=['requests', 'praw'], 29 | keywords='reddit api wrapper pushshift multithread data collection cache', 30 | classifiers=[ 31 | "Development Status :: 3 - Alpha", 32 | "Programming Language :: Python :: 3 :: Only", 33 | 'Programming Language :: Python :: 3.5', 34 | 'Programming Language :: Python :: 3.6', 35 | 'Programming Language :: Python :: 3.7', 36 | 'Programming Language :: Python :: 3.8', 37 | 'Programming Language :: Python :: 3.9', 38 | "Intended Audience :: Developers", 39 | "Topic :: Utilities", 40 | "License :: OSI Approved :: MIT License", 41 | "Operating System :: OS Independent", 42 | ], 43 | python_requires='>=3.5', 44 | ) 45 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpodolak/pmaw/bb102c2316ded945805f2de1b5cae5aacb032e66/tests/__init__.py -------------------------------------------------------------------------------- /tests/__mocks__/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpodolak/pmaw/bb102c2316ded945805f2de1b5cae5aacb032e66/tests/__mocks__/__init__.py -------------------------------------------------------------------------------- /tests/__mocks__/comment.py: -------------------------------------------------------------------------------- 1 | ids = [ 2 | 'gjacwx5', 3 | 'gjad2l6', 4 | 'gjadatw', 5 | 'gjadc7w', 6 | 'gjadcwh', 7 | 'gjadgd7', 8 | 'gjadlbc', 9 | 'gjadnoc', 10 | 'gjadog1', 11 | 'gjadphb' 12 | ] -------------------------------------------------------------------------------- /tests/__mocks__/metadata.py: -------------------------------------------------------------------------------- 1 | # https://api.pushshift.io/reddit/comment/search?until=1629990795&track_total_hits=true&since=1629960795 2 | before_after_query = { 3 | "es": { 4 | "took": 129, 5 | "timed_out": False, 6 | "_shards": { 7 | "total": 820, 8 | "successful": 820, 9 | "skipped": 816, 10 | "failed": 0 11 | }, 12 | "hits": { 13 | "total": { 14 | "value": 2184259, 15 | "relation": "eq" 16 | }, 17 | "max_score": None # null 18 | } 19 | }, 20 | "es_query": { 21 | "track_total_hits": True, 22 | "size": 10, 23 | "query": { 24 | "bool": { 25 | "must": [ 26 | { 27 | "bool": { 28 | "must": [ 29 | { 30 | "range": { 31 | "created_utc": { 32 | "gte": 1629960795000 33 | } 34 | } 35 | }, 36 | { 37 | "range": { 38 | "created_utc": { 39 | "lt": 1629990795000 40 | } 41 | } 42 | } 43 | ] 44 | } 45 | } 46 | ] 47 | } 48 | }, 49 | "aggs": {}, 50 | "sort": { 51 | "created_utc": "desc" 52 | } 53 | }, 54 | "es_query2": "{\"track_total_hits\":true,\"size\":10,\"query\":{\"bool\":{\"must\":[{\"bool\":{\"must\":[{\"range\":{\"created_utc\":{\"gte\":1629960795000}}},{\"range\":{\"created_utc\":{\"lt\":1629990795000}}}]}}]}},\"aggs\":{},\"sort\":{\"created_utc\":\"desc\"}}", 55 | "api_launch_time": 1671472551.6688197, 56 | "api_request_start": 1671803683.826682, 57 | "api_request_end": 1671803685.9008527, 58 | "api_total_time": 2.0741705894470215 59 | } 60 | 61 | # https://api.pushshift.io/reddit/submission/search?ids=zhzaea 62 | submission_id = { 63 | "es": { 64 | "took": 14, 65 | "timed_out": False, 66 | "_shards": { 67 | "total": 4, 68 | "successful": 4, 69 | "skipped": 0, 70 | "failed": 0 71 | }, 72 | "hits": { 73 | "total": { 74 | "value": 1, 75 | "relation": "eq" 76 | }, 77 | "max_score": None # null 78 | } 79 | }, 80 | "es_query": { 81 | "size": 10, 82 | "query": { 83 | "ids": { 84 | "values": [ 85 | 2146516066 86 | ] 87 | } 88 | }, 89 | "aggs": {}, 90 | "sort": { 91 | "created_utc": "desc" 92 | } 93 | }, 94 | "es_query2": "{\"size\":10,\"query\":{\"ids\":{\"values\":[2146516066]}},\"aggs\":{},\"sort\":{\"created_utc\":\"desc\"}}" 95 | } 96 | 97 | # synthetic 98 | shards_down = { 99 | "es": { 100 | "took": 14, 101 | "timed_out": False, 102 | "_shards": { 103 | "total": 4, 104 | "successful": 2, 105 | "skipped": 0, 106 | "failed": 0 107 | }, 108 | "hits": { 109 | "total": { 110 | "value": 1, 111 | "relation": "eq" 112 | }, 113 | "max_score": None # null 114 | } 115 | }, 116 | "es_query": { 117 | "size": 10, 118 | "query": { 119 | "ids": { 120 | "values": [ 121 | 2146516066 122 | ] 123 | } 124 | }, 125 | "aggs": {}, 126 | "sort": { 127 | "created_utc": "desc" 128 | } 129 | }, 130 | "es_query2": "{\"size\":10,\"query\":{\"ids\":{\"values\":[2146516066]}},\"aggs\":{},\"sort\":{\"created_utc\":\"desc\"}}" 131 | } -------------------------------------------------------------------------------- /tests/__mocks__/submission.py: -------------------------------------------------------------------------------- 1 | # these submission ids return no data 2 | ids = [ 3 | "kxi2w8", 4 | "kxi2g1", 5 | "kxhzrl", 6 | "kxhyh6", 7 | "kxhwh0", 8 | "kxhv53", 9 | "kxhm7b", 10 | "kxhm3s", 11 | "kxhg37", 12 | "kxhak9", 13 | ] 14 | 15 | ids_with_data = [ 16 | "ztksvq", 17 | "ztksac", 18 | "ztkr79", 19 | "ztkr6z", 20 | "ztkq2d", 21 | "ztkp56", 22 | "ztknht", 23 | "ztklgp", 24 | "ztkl5w", 25 | "ztkj4p", 26 | "ztkhvz", 27 | "ztkgpe", 28 | "ztkgpn", 29 | "ztkgnk", 30 | "ztkgf5", 31 | "ztkfag", 32 | ] 33 | -------------------------------------------------------------------------------- /tests/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import vcr 4 | import praw 5 | from dotenv import load_dotenv 6 | 7 | load_dotenv() 8 | 9 | client_id = os.environ.get("REDDIT_CLIENT_ID") 10 | client_secret = os.environ.get("REDDIT_CLIENT_SECRET") 11 | 12 | # dont record responses that werent successful, usually due to rate limiting 13 | def bad_status(response): 14 | if response["status"]["code"] == 200: 15 | return response 16 | elif response["status"]["code"] == 404: 17 | # TODO: remove once submission comment ids endpoint is working 18 | return response 19 | else: 20 | return None 21 | 22 | 23 | tape = vcr.VCR( 24 | match_on=["uri"], 25 | filter_headers=["Authorization"], 26 | cassette_library_dir="cassettes", 27 | record_mode="new_episodes", 28 | before_record_response=bad_status, 29 | ) 30 | 31 | reddit = praw.Reddit( 32 | client_id=client_id, 33 | client_secret=client_secret, 34 | user_agent="python: PMAW v2 endpoint testing (by u/potato-sword)", 35 | ) 36 | -------------------------------------------------------------------------------- /tests/test_cache.py: -------------------------------------------------------------------------------- 1 | from pmaw import Cache 2 | 3 | 4 | def test_no_info(): 5 | cache = Cache({}, False, cache_dir="./rand_cache") 6 | info = cache.load_info() 7 | assert info is None 8 | -------------------------------------------------------------------------------- /tests/test_filter_fn.py: -------------------------------------------------------------------------------- 1 | from .config import tape, reddit 2 | from .__mocks__.comment import ids as comment_ids 3 | from .__mocks__.submission import ids_with_data as post_ids 4 | from pmaw import PushshiftAPI 5 | import pytest 6 | 7 | 8 | @tape.use_cassette("test_comment_praw_ids") 9 | def test_praw_ids_filter(): 10 | def fxn(item): 11 | return item["ups"] > 2 12 | 13 | api_praw = PushshiftAPI(praw=reddit) 14 | comments = api_praw.search_comments(ids=comment_ids, filter_fn=fxn) 15 | assert len(comments) == 4 16 | 17 | 18 | @tape.use_cassette("test_submission_search_ids") 19 | def test_search_ids_filter(): 20 | api = PushshiftAPI() 21 | 22 | def fxn(item): 23 | return item["score"] > 2 24 | 25 | posts = api.search_submissions(ids=post_ids, filter_fn=fxn) 26 | assert len(posts) == 0 27 | 28 | 29 | # TODO: add submission_comment_ids test once endpoint is working again 30 | # @tape.use_cassette('test_submission_comment_ids_search') 31 | # def test_submission_comment_id_exception(): 32 | # with pytest.raises(ValueError): 33 | # api = PushshiftAPI() 34 | # def fxn(item): 35 | # return item['score'] > 2 36 | # api.search_submission_comment_ids(ids=post_ids, filter_fn=fxn) 37 | 38 | 39 | @tape.use_cassette("test_submission_search_ids") 40 | def test_filter_callable(): 41 | with pytest.raises(ValueError): 42 | api = PushshiftAPI() 43 | api.search_submissions(ids=post_ids, filter_fn="fxn") 44 | 45 | 46 | @tape.use_cassette("test_submission_search_ids") 47 | def test_filter_param_exception(): 48 | with pytest.raises(TypeError): 49 | api = PushshiftAPI() 50 | 51 | def fxn(): 52 | return True 53 | 54 | api.search_submissions(ids=post_ids, filter_fn=fxn) 55 | 56 | 57 | @tape.use_cassette("test_submission_search_ids") 58 | def test_filter_key_exception(): 59 | with pytest.raises(KeyError): 60 | api = PushshiftAPI() 61 | 62 | def fxn(item): 63 | return item["badkeydoesntexist"] > 2 64 | 65 | api.search_submissions(ids=post_ids, filter_fn=fxn) 66 | -------------------------------------------------------------------------------- /tests/test_metadata.py: -------------------------------------------------------------------------------- 1 | from pmaw import Metadata 2 | from .__mocks__ import metadata as mock_data 3 | 4 | 5 | def test_before_after_query(): 6 | metadata = Metadata(mock_data.before_after_query) 7 | total_results = metadata.total_results 8 | after, before = metadata.ranges 9 | assert after == 1629960795 and before == 1629990795 10 | assert total_results == 2184259 11 | 12 | 13 | def test_submission_id(): 14 | metadata = Metadata(mock_data.submission_id) 15 | total_results = metadata.total_results 16 | after, before = metadata.ranges 17 | assert before == None and after == None 18 | assert total_results == 1 19 | 20 | 21 | def test_shards_down(): 22 | metadata = Metadata(mock_data.shards_down) 23 | assert metadata.shards_are_down 24 | 25 | 26 | def test_shards_not_down(): 27 | metadata = Metadata(mock_data.submission_id) 28 | assert not metadata.shards_are_down 29 | -------------------------------------------------------------------------------- /tests/test_request.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pmaw import PushshiftAPI 3 | from .config import tape, reddit 4 | from .__mocks__.comment import ids 5 | 6 | 7 | @tape.use_cassette("test_comment_praw_ids") 8 | def test_safe_exit_praw(): 9 | with pytest.raises(NotImplementedError): 10 | api_praw = PushshiftAPI(praw=reddit) 11 | api_praw.search_comments(ids=ids, safe_exit=True) 12 | 13 | 14 | @tape.use_cassette("test_comment_search_limit") 15 | def test_asc_sort(): 16 | with pytest.raises(NotImplementedError): 17 | api = PushshiftAPI() 18 | api.search_comments( 19 | subreddit="science", limit=100, until=1629990795, order="asc" 20 | ) 21 | -------------------------------------------------------------------------------- /tests/test_response.py: -------------------------------------------------------------------------------- 1 | from pmaw import Response, PushshiftAPI 2 | from .config import tape 3 | from .__mocks__.submission import ids as post_ids 4 | 5 | # TODO: add submission_comment_ids tests once endpoint is working again 6 | # expected_length = 66 7 | expected_length = 0 8 | 9 | 10 | @tape.use_cassette("test_submission_comment_ids_search") 11 | def test_response_load_cache(): 12 | api = PushshiftAPI(file_checkpoint=1) 13 | comments = api.search_submission_comment_ids(ids=post_ids, mem_safe=True) 14 | resp = Response.load_cache(key=comments._cache.key) 15 | assert len(comments) == len(resp) and len(comments) == expected_length 16 | 17 | 18 | @tape.use_cassette("test_submission_comment_ids_search") 19 | def test_response_generator(): 20 | api = PushshiftAPI(file_checkpoint=1) 21 | comments = api.search_submission_comment_ids(ids=post_ids, mem_safe=True) 22 | all_c = [c for c in comments] 23 | assert len(all_c) == expected_length 24 | -------------------------------------------------------------------------------- /tests/test_search_comments.py: -------------------------------------------------------------------------------- 1 | from pmaw import PushshiftAPI 2 | from .config import tape, reddit 3 | from .__mocks__.comment import ids as comment_ids 4 | 5 | 6 | @tape.use_cassette() 7 | def test_comment_search_limit(): 8 | api = PushshiftAPI(file_checkpoint=1) 9 | comments = api.search_comments(subreddit="science", limit=100, until=1629990795) 10 | assert len(comments) == 100 11 | 12 | 13 | @tape.use_cassette() 14 | def test_comment_search_query(): 15 | api = PushshiftAPI(file_checkpoint=1) 16 | comments = api.search_comments( 17 | q="quantum", subreddit="science", limit=100, until=1629990795 18 | ) 19 | assert len(comments) == 100 20 | 21 | 22 | @tape.use_cassette() 23 | def test_comment_search_ids(): 24 | api = PushshiftAPI(file_checkpoint=1) 25 | comments = api.search_comments(ids=comment_ids) 26 | assert len(comments) == len(comment_ids) 27 | 28 | 29 | @tape.use_cassette() 30 | def test_comment_search_mem_safe(): 31 | api = PushshiftAPI(file_checkpoint=1) 32 | comments = api.search_comments( 33 | subreddit="science", limit=1000, mem_safe=True, until=1629990795 34 | ) 35 | assert len(comments) == 1000 36 | 37 | 38 | @tape.use_cassette() 39 | def test_comment_praw_ids(): 40 | api_praw = PushshiftAPI(file_checkpoint=1, praw=reddit) 41 | comments = api_praw.search_comments(ids=comment_ids) 42 | assert len(comments) == len(comment_ids) 43 | 44 | 45 | # TODO: update cassettes to fix JSON issues 46 | # @tape.use_cassette() 47 | # def test_comment_praw_query(): 48 | # api_praw = PushshiftAPI(file_checkpoint=1, praw=reddit) 49 | # comments = api_praw.search_comments( 50 | # q="quantum", subreddit="science", limit=100, until=1629990795 51 | # ) 52 | # assert len(comments) == 100 53 | 54 | 55 | # @tape.use_cassette() 56 | # def test_comment_praw_limit(): 57 | # api_praw = PushshiftAPI(file_checkpoint=1, praw=reddit) 58 | # comments = api_praw.search_comments( 59 | # subreddit="science", limit=100, until=1629990795 60 | # ) 61 | # assert len(comments) == 100 62 | 63 | 64 | # @tape.use_cassette() 65 | # def test_comment_praw_mem_safe(): 66 | # api_praw = PushshiftAPI(file_checkpoint=1, praw=reddit) 67 | # comments = api_praw.search_comments( 68 | # subreddit="science", limit=1000, mem_safe=True, until=1629990795 69 | # ) 70 | # # TODO: investigate why this isnt 1000 71 | # assert len(comments) == 999 72 | -------------------------------------------------------------------------------- /tests/test_search_submission_comment_ids.py: -------------------------------------------------------------------------------- 1 | from pmaw import PushshiftAPI 2 | from .config import tape, reddit 3 | from .__mocks__.submission import ids 4 | 5 | # TODO: update submission_comment_ids tests once endpoint is working again 6 | # expected_length = 66 7 | expected_length = 0 8 | 9 | 10 | @tape.use_cassette() 11 | def test_submission_comment_ids_search(): 12 | api = PushshiftAPI(file_checkpoint=1) 13 | comments = api.search_submission_comment_ids(ids=ids) 14 | assert len(comments) == expected_length 15 | 16 | 17 | @tape.use_cassette() 18 | def test_submission_comment_ids_praw(): 19 | api_praw = PushshiftAPI(file_checkpoint=1, praw=reddit) 20 | comments = api_praw.search_submission_comment_ids(ids=ids) 21 | assert len(comments) == expected_length 22 | 23 | 24 | @tape.use_cassette("test_submission_comment_ids_search") 25 | def test_submission_comment_ids_search_mem_safe(): 26 | api = PushshiftAPI(file_checkpoint=1) 27 | comments = api.search_submission_comment_ids(ids=ids, mem_safe=True) 28 | assert len(comments) == expected_length 29 | 30 | 31 | @tape.use_cassette("test_submission_comment_ids_praw") 32 | def test_submission_comment_ids_praw_mem_safe(): 33 | api_praw = PushshiftAPI(file_checkpoint=1, praw=reddit) 34 | comments = api_praw.search_submission_comment_ids(ids=ids, mem_safe=True) 35 | assert len(comments) == expected_length 36 | -------------------------------------------------------------------------------- /tests/test_search_submissions.py: -------------------------------------------------------------------------------- 1 | from pmaw import PushshiftAPI 2 | from .config import tape, reddit 3 | from .__mocks__.submission import ids_with_data as post_ids 4 | 5 | 6 | @tape.use_cassette() 7 | def test_submission_search_limit(): 8 | api = PushshiftAPI(file_checkpoint=1) 9 | posts = api.search_submissions(subreddit="science", limit=100, until=1671827157) 10 | assert len(posts) == 100 11 | 12 | 13 | @tape.use_cassette() 14 | def test_submission_search_query(): 15 | api = PushshiftAPI(file_checkpoint=1) 16 | posts = api.search_submissions( 17 | q="java", subreddit="programming", limit=100, until=1671827157 18 | ) 19 | assert len(posts) == 100 20 | 21 | 22 | @tape.use_cassette() 23 | def test_submission_search_ids(): 24 | api = PushshiftAPI(file_checkpoint=1) 25 | posts = api.search_submissions(ids=post_ids) 26 | # 6 out of 16 items not found (expected) 27 | assert len(posts) == 10 28 | 29 | 30 | @tape.use_cassette() 31 | def test_submission_search_mem_safe(): 32 | api = PushshiftAPI(file_checkpoint=1) 33 | posts = api.search_submissions( 34 | subreddit="science", 35 | limit=1000, 36 | mem_safe=True, 37 | until=1671827157, 38 | ) 39 | assert len(posts) == 1000 40 | 41 | 42 | @tape.use_cassette() 43 | def test_submission_praw_mem_safe(): 44 | api_praw = PushshiftAPI(file_checkpoint=1, praw=reddit) 45 | posts = api_praw.search_submissions( 46 | subreddit="programming", limit=1000, mem_safe=True, until=1671827157 47 | ) 48 | assert len(posts) == 1000 49 | 50 | 51 | @tape.use_cassette() 52 | def test_submission_praw_limit(): 53 | api_praw = PushshiftAPI(file_checkpoint=1, praw=reddit) 54 | posts = api_praw.search_submissions( 55 | subreddit="programming", limit=100, until=1671827157 56 | ) 57 | assert len(posts) == 100 58 | 59 | 60 | @tape.use_cassette() 61 | def test_submission_praw_query(): 62 | api_praw = PushshiftAPI(file_checkpoint=1, praw=reddit) 63 | posts = api_praw.search_submissions( 64 | q="ai", subreddit="programming", limit=100, until=1671827157 65 | ) 66 | # TODO: why is 1 missing? 67 | assert len(posts) == 99 68 | 69 | 70 | @tape.use_cassette() 71 | def test_submission_praw_ids(): 72 | api_praw = PushshiftAPI(file_checkpoint=1, praw=reddit) 73 | posts = api_praw.search_submissions(ids=post_ids) 74 | # 6 out of 16 items not found (expected) 75 | assert len(posts) == 10 76 | --------------------------------------------------------------------------------