├── .github ├── ISSUE_TEMPLATE │ ├── BUG.yml │ ├── FEATURE-REQUEST.yml │ └── config.yml └── workflows │ ├── publish.yml │ ├── security.yml │ └── test.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── faapi ├── __init__.py ├── __version__.py ├── base.py ├── comment.py ├── connection.py ├── exceptions.py ├── journal.py ├── parse.py ├── submission.py └── user.py ├── poetry.lock ├── pyproject.toml └── tests ├── test_connection.py ├── test_faapi.py └── test_parse.py /.github/ISSUE_TEMPLATE/BUG.yml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: Open a bug issue 3 | title: "[Bug]: " 4 | labels: ["bug"] 5 | assignees: 6 | - MatteoCampinoti94 7 | body: 8 | - type: markdown 9 | attributes: 10 | value: | 11 | Thanks for taking the time to fill out this bug report! 12 | - type: input 13 | id: version 14 | attributes: 15 | label: Version 16 | description: What version of the program where you running? 17 | validations: 18 | required: true 19 | - type: textarea 20 | id: summary 21 | attributes: 22 | label: What happened? 23 | description: Summarize the bug encountered concisely 24 | validations: 25 | required: true 26 | - type: textarea 27 | id: reproduce 28 | attributes: 29 | label: How to reproduce the bug? 30 | description: Summarize the steps to encounter the bug 31 | validations: 32 | required: true 33 | - type: textarea 34 | id: logs 35 | attributes: 36 | label: Relevant log output 37 | description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. 38 | render: shell -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/FEATURE-REQUEST.yml: -------------------------------------------------------------------------------- 1 | name: Feature Request 2 | description: Open a feature request 3 | title: "[Feature Request]: " 4 | labels: ["enhancement"] 5 | assignees: 6 | - MatteoCampinoti94 7 | body: 8 | - type: markdown 9 | attributes: 10 | value: | 11 | Thanks for taking the time to fill out this feature request! 12 | - type: textarea 13 | id: summary 14 | attributes: 15 | label: The idea 16 | description: Summarize your idea, including UX/UI changes 17 | validations: 18 | required: true 19 | - type: textarea 20 | id: implementation 21 | attributes: 22 | label: Implementation ideas 23 | description: How would you implement the feature? 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*.*.*" 7 | workflow_dispatch: 8 | 9 | env: 10 | PYTHON_VERSION: 3.11.0 11 | POETRY_VERSION: 1.3.1 12 | 13 | jobs: 14 | wait_static_tests: 15 | name: Wait for Static Tests 16 | runs-on: ubuntu-latest 17 | strategy: 18 | matrix: 19 | test: [ "Flake8", "mypy" ] 20 | python: [ "3.11", "3.10", "3.9" ] 21 | steps: 22 | - uses: actions/checkout@v4 23 | - name: Wait for Tests 24 | id: wait 25 | uses: fountainhead/action-wait-for-check@v1.0.0 26 | with: 27 | token: ${{ secrets.GITHUB_TOKEN }} 28 | checkName: "${{ matrix.test }} (${{ matrix.python }})" 29 | ref: ${{ github.event.push_request.head.sha || github.sha }} 30 | - if: ${{ steps.wait.outputs.conclusion != 'success' }} 31 | run: exit 1 32 | 33 | wait_unit_tests: 34 | name: Wait for Unit Tests 35 | runs-on: ubuntu-latest 36 | steps: 37 | - uses: actions/checkout@v4 38 | - name: Wait for Tests 39 | id: wait 40 | uses: fountainhead/action-wait-for-check@v1.0.0 41 | with: 42 | token: ${{ secrets.GITHUB_TOKEN }} 43 | checkName: "pytest" 44 | ref: ${{ github.event.push_request.head.sha || github.sha }} 45 | - if: ${{ steps.wait.outputs.conclusion != 'success' }} 46 | run: exit 1 47 | 48 | publish: 49 | name: Publish 50 | runs-on: ubuntu-latest 51 | needs: [ wait_static_tests, wait_unit_tests ] 52 | steps: 53 | - uses: actions/checkout@v4 54 | - uses: actions/setup-python@v5 55 | with: 56 | python-version: ${{ env.PYTHON_VERSION }} 57 | - uses: abatilo/actions-poetry@v2.0.0 58 | with: 59 | poetry-version: ${{ env.POETRY_VERSION }} 60 | - name: Build and publish 61 | env: 62 | PYPI_USERNAME: __token__ 63 | PYPI_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 64 | run: | 65 | poetry config virtualenvs.in-project true 66 | poetry install --no-root 67 | poetry publish --build --username $PYPI_USERNAME --password $PYPI_PASSWORD 68 | - name: Save dist 69 | uses: actions/upload-artifact@v4 70 | with: 71 | name: dist 72 | path: dist 73 | 74 | release: 75 | name: Release 76 | runs-on: ubuntu-latest 77 | needs: publish 78 | steps: 79 | - uses: actions/checkout@v4 80 | with: 81 | fetch-depth: 0 82 | - name: Get dist 83 | uses: actions/download-artifact@v4 84 | with: 85 | name: dist 86 | path: dist 87 | - name: Get Tag 88 | id: tag 89 | uses: olegtarasov/get-tag@v2.1.1 90 | - name: Clean CHANGELOG.md 91 | run: | 92 | touch CHANGELOG.md.tmp 93 | npm install -g prettier 94 | prettier --parser markdown --tab-width 4 --prose-wrap never CHANGELOG.md > CHANGELOG.md.tmp 95 | cat CHANGELOG.md.tmp > CHANGELOG.md 96 | - name: Build Release 97 | id: release 98 | uses: MatteoCampinoti94/changelog-to-release@v1.0.2 99 | with: 100 | version-name: ${{ steps.tag.outputs.tag }} 101 | - name: Build Release File 102 | env: 103 | TAG: ${{ steps.tag.outputs.tag }} 104 | RELEASE: ${{ steps.release.outputs.body }} 105 | run: | 106 | touch RELEASE.md 107 | PREVIOUS_TAG="$(git tag -l --sort=-version:refname | head -2 | tail -1)" 108 | printf "%s\n" "$RELEASE" > RELEASE.md 109 | printf "\n## 🔗 Links\n" >> RELEASE.md 110 | printf "\n* %s" "PyPi release: https://pypi.org/project/${GITHUB_REPOSITORY#*/}/${TAG#v}" >> RELEASE.md 111 | printf "\n* %s" "Full changelog: https://github.com/$GITHUB_REPOSITORY/compare/$PREVIOUS_TAG...$TAG" >> RELEASE.md 112 | cat RELEASE.md 113 | printf "\n\n## Dist Files\n" 114 | ls -l dist || echo " no files" 115 | - name: Create Release 116 | uses: softprops/action-gh-release@v1 117 | with: 118 | token: ${{ secrets.GITHUB_TOKEN }} 119 | tag_name: ${{ steps.tag.outputs.tag }} 120 | name: ${{ steps.release.outputs.title }} 121 | body_path: RELEASE.md 122 | files: | 123 | dist/* -------------------------------------------------------------------------------- /.github/workflows/security.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | schedule: 9 | - cron: '0 18 * * 6' 10 | workflow_dispatch: 11 | 12 | jobs: 13 | analyze: 14 | name: Analyze 15 | runs-on: ubuntu-latest 16 | permissions: 17 | actions: read 18 | contents: read 19 | security-events: write 20 | steps: 21 | - name: Checkout repository 22 | uses: actions/checkout@v3 23 | - name: Initialize CodeQL 24 | uses: github/codeql-action/init@v2 25 | with: 26 | languages: "python" 27 | - name: Perform CodeQL Analysis 28 | uses: github/codeql-action/analyze@v2 29 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [ main, dev ] 6 | pull_request: 7 | branches: [ main, dev ] 8 | schedule: 9 | - cron: '0 7 * * *' 10 | workflow_dispatch: 11 | 12 | env: 13 | PYTHON_VERSION: 3.11.0 14 | POETRY_VERSION: 1.3.1 15 | 16 | jobs: 17 | flake8: 18 | if: github.event_name != 'schedule' 19 | name: Flake8 20 | runs-on: ubuntu-latest 21 | strategy: 22 | matrix: 23 | python: [ "3.11", "3.10", "3.9" ] 24 | steps: 25 | - uses: actions/checkout@v4 26 | - uses: actions/setup-python@v5 27 | with: 28 | python-version: ${{ matrix.python }} 29 | - name: Run pip install 30 | run: python -m pip install 'flake8>=5.0.4' 31 | - name: Style Test 32 | id: test 33 | run: | 34 | python -m flake8 --max-line-length=120 faapi 35 | 36 | mypy: 37 | if: github.event_name != 'schedule' 38 | name: mypy 39 | runs-on: ubuntu-latest 40 | strategy: 41 | matrix: 42 | python: [ "3.11", "3.10", "3.9" ] 43 | steps: 44 | - uses: actions/checkout@v4 45 | - uses: actions/setup-python@v5 46 | with: 47 | python-version: ${{ matrix.python }} 48 | - name: Run pip install 49 | run: python -m pip install 'mypy>=0.991' 'pytest>=7.2.0' 'types-beautifulsoup4>=4.11.6' 50 | - name: Types test 51 | id: test 52 | run: | 53 | python -m mypy --install-types --non-interactive --check-untyped-defs faapi 54 | python -m mypy --install-types --non-interactive --check-untyped-defs tests 55 | 56 | pytest: 57 | name: pytest 58 | runs-on: ubuntu-latest 59 | steps: 60 | - uses: actions/checkout@v4 61 | - uses: actions/setup-python@v5 62 | with: 63 | python-version: ${{ env.PYTHON_VERSION }} 64 | - uses: abatilo/actions-poetry@v2.0.0 65 | with: 66 | poetry-version: ${{ env.POETRY_VERSION }} 67 | - run: | 68 | poetry install 69 | - name: Unit test 70 | env: 71 | TEST_DATA: ${{ secrets.TEST_DATA }} 72 | TEST_USER: ${{ secrets.TEST_USER }} 73 | TEST_SUBMISSION: ${{ secrets.TEST_SUBMISSION }} 74 | TEST_JOURNAL: ${{ secrets.TEST_JOURNAL }} 75 | run: | 76 | echo "$TEST_DATA" > tests/test_data.json 77 | echo "$TEST_USER" > tests/test_user.json 78 | echo "$TEST_SUBMISSION" > tests/test_submission.json 79 | echo "$TEST_JOURNAL" > tests/test_journal.json 80 | poetry run coverage run -m pytest tests/test_connection.py tests/test_parse.py tests/test_faapi.py -v --tb=line -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | 4 | # Distribution / packaging 5 | .Python 6 | env/ 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | wheels/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | .coverage 23 | 24 | # virtualenv 25 | .venv 26 | venv/ 27 | ENV/ 28 | 29 | 30 | # mypy 31 | .mypy_cache/ 32 | 33 | # Editors folders 34 | .idea/ 35 | 36 | # System folders/files 37 | .DS_Store 38 | 39 | # Local test data 40 | tests/test_*.json -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v3.11.9 4 | 5 | ### Fixes 6 | 7 | * Fix `KeyError` raised when parsing a submission or journal with hidden comments 8 | 9 | ## v3.11.8 10 | 11 | ### Changes 12 | 13 | * Update parser to support FurAffinity's new display names feature 14 | * Added `UserPartial.display_name` and `User.display_name` 15 | * Full support coming in next minor update 16 | 17 | ## v3.11.7 18 | 19 | ### Changes 20 | 21 | * Updated parser to work correctly with FurAffinity's new tag-blocking feature on submission pages 22 | 23 | ## v3.11.6 24 | 25 | ### Fixes 26 | 27 | * Fix next page of favorites 28 | * The selector path to the "Next" button had changed 29 | * The presence of the button is now tested 30 | 31 | ## v3.11.5 32 | 33 | ### Fixes 34 | 35 | * Fix a possible issue were cookie value could be set as `None` when using a `http.cookiejar.CookieJar` object 36 | 37 | ### Dependencies 38 | 39 | * Use [requests ^2.32.3](https://pypi.org/project/requests/2.32.3) 40 | * Use [beautifulsoup4 ^4.12.3](https://pypi.org/project/beautifulsoup4/4.12.3) 41 | * Use [lxml ^5.3.0](https://pypi.org/project/lxml/5.3.0) 42 | * Use [python-dateutil ^2.9.0](https://pypi.org/project/python-dateutil/2.9.0) 43 | 44 | ## v3.11.4 45 | 46 | ### Fixes 47 | 48 | * Fix square brackets [] being removed from usernames 49 | 50 | ## v3.11.3 51 | 52 | ### Fixes 53 | 54 | * Fix recursion limit error with chains of journal comments longer than ~1/6 of the recursion limit 55 | 56 | ## v3.11.2 57 | 58 | ### Changes 59 | 60 | * HTML content is not minified beyond basic stripping of whitespace characters 61 | 62 | ### Fixes 63 | 64 | * Fix recursion limit error with chains of submission comments longer than ~1/6 of the recursion limit 65 | * Fix incorrectly parsed usernames in comments with the OP (Original Poster) tag 66 | 67 | ### Dependencies 68 | 69 | * Use [lxml ^4.9.3](https://pypi.org/project/lxml/4.9.3) 70 | * Remove [htmlmin](https://pypi.org/project/htmlmin) 71 | 72 | ## v3.11.1 73 | 74 | ### Changes 75 | 76 | * Support submissions with no or partial category 77 | 78 | ## v3.11.0 79 | 80 | ### New Features 81 | 82 | * Session class for requests can be customized with new `session_class` argument for `FAAPI` 83 | 84 | ### Changes 85 | 86 | * Remove [cfscrape](https://pypi.org/project/cfscrape) dependency 87 | * Was not updated in years and all requests succeeded with a normal `requests.Session` object 88 | 89 | ### Dependencies 90 | 91 | * Use [requests ^2.31.0](https://pypi.org/project/requests/2.31.0) 92 | * Fix [CVE-2023-32681](https://cve.report/CVE-2022-42969) issue 93 | * Use [beautifulsoup4 ^4.12.2](https://pypi.org/project/beautifulsoup4/4.12.2) 94 | * Use [lxml ^4.9.2](https://pypi.org/project/lxml/4.9.2) 95 | 96 | ## v3.10.1 97 | 98 | ### Fixes 99 | 100 | * Fix parsed URLs not being properly encoded if they contained non-allowed URL characters 101 | 102 | ## v3.10.0 103 | 104 | ### New Features 105 | 106 | * Fur Affinity UI update 107 | * Support the new UI introduced on November 26, 2022 108 | * *Note:* the new UI does not show comment parents yet, but the parent comment link is still present in the HTML and 109 | just commented out, so the parser uses regex to extract the parent ID; this could cause unforeseen issues so be 110 | careful when parsing comments 111 | * User banners 112 | * Parse new user banners (when set) 113 | * New `User.banner_url` variable holds the banner URL 114 | 115 | ### Changes 116 | 117 | * Rename `User.user_icon_url` and `UserPartial.user_icon_url` to `User.avatar_url` and `UserPartial.avatar_url` 118 | 119 | ### Dependencies 120 | 121 | * Use [flake ^6.0.0](https://pypi.org/project/flake/6.0.0) for testing 122 | 123 | ## v3.9.6 124 | 125 | ### Changes 126 | 127 | * Remove implicit `Optional` types to comply with [PEP 484](https://peps.python.org/pep-0484/) 128 | 129 | ### Fixes 130 | 131 | * Fix selectors for date tags in journals and submissions which sometimes caused the incorrect date to be selected 132 | 133 | ### Dependencies 134 | 135 | * Use [mypy ^0.991](https://pypi.org/project/mypy/0.991) 136 | * Complies with [PEP 484](https://peps.python.org/pep-0484/) 137 | 138 | ## v3.9.5 139 | 140 | ### Changes 141 | 142 | * Improve parsing of usernames and statuses 143 | * Thanks to PR [#7](https://github.com/FurryCoders/FAAPI/pull/7) by @Xraydylan 144 | 145 | ### Fixes 146 | 147 | * Fix parsing of user tags for folders when the user had no title set, or used bars (`|`) in their title 148 | 149 | ## v3.9.4 150 | 151 | ### Fixes 152 | 153 | * Fix admins' username and status not being parsed correctly in watchlists and users tags 154 | * Fix issue [#6](https://github.com/FurryCoders/FAAPI/issues/6) 155 | 156 | ## v3.9.3 157 | 158 | ### Changes 159 | 160 | * Users with non-alphanumeric characters in their name are now escaped in URLs 161 | * From suggestion in issue [#5](https://github.com/FurryCoders/FAAPI/issues/5) 162 | 163 | ### Fixes 164 | 165 | * Fix admins' username and status not being parsed correctly 166 | * Fix issue [#6](https://github.com/FurryCoders/FAAPI/issues/6) 167 | 168 | ## v3.9.2 169 | 170 | ### Fixes 171 | 172 | * Fix ` being removed from usernames 173 | 174 | ## v3.9.1 175 | 176 | ### Fixes 177 | 178 | * Fix incorrect user icon URLs when converting BBCode to HTML 179 | 180 | ### Dependencies 181 | 182 | * Use [pytest ^7.2.0](https://pypi.org/project/pytest/7.2.0) 183 | * Fix [CVE-2022-42969](https://cve.report/CVE-2022-42969) issue 184 | 185 | ## v3.9.0 186 | 187 | ### New Features 188 | 189 | * Submission footers 190 | * Submission footers are now separated from the submission description and stored in the `Submission.footer` field 191 | * The BBCode of the footer can be accessed with the `Submission.footer_bbcode` property 192 | * Generate user icon URLs 193 | * New `generate_user_icon_url()` method added to `UserPartial` and `User` to create the URL for the current user 194 | icon 195 | * BBCode to HTML conversion 196 | * Work-in-progress version of a BBCode converter based on the [bbcode](https://pypi.org/project/bbcode) library 197 | * Converter function is located in the `parse` submodule: `faapi.parse.bbcode_to_html()` 198 | * The majority of HTML fields (submission descriptions, journal contents, comments, etc.) can be converted back and 199 | forth between HTML and BBCode without loosing information 200 | * If a submission contains incorrect or very unusual BBCode tags or text, the BBCode to HTML conversion may create 201 | artifacts and tags that did not exist in the original content 202 | 203 | ### Changes 204 | 205 | * Added `Journal.header_bbcode` and `Journal.footer_bbcode` properties to convert `Journal.header` and `Journal.footer` 206 | to BBCode 207 | * Return `None` instead of 0 (or `""` for favorites) when reaching the last page with `FAAPI.gallery()` 208 | , `FAAPI.scraps()`, `FAAPI.journals()`, `FAAPI.favorites()`, `FAAPI.watchlist_by()`, and `FAAPI.watchlist_to()` 209 | * Added `__hash__` method to `User`, `UserPartial`, `Submission`, `SubmissionPartial`, `Journal`, `JournalPartial`, 210 | and `Comment`; the hash value is calculated using the same fields used for equality comparisons 211 | * Improved cleanup of HTML fields by using [htmlmin](https://pypi.org/project/htmlmin) 212 | * Fur Affinity URLs are now properly converted to relative `[url=]` tags in BBCode 213 | * Unknown tags are converted to `[tag=.]` in BBCode 214 | * Added `CookieDict(TypedDict)` notation for cookies dictionary (alternative to `CookieJar`) to provide intellisense and 215 | type checking information 216 | 217 | ### Fixes 218 | 219 | * Fix comments being considered equal even if they had different parents but the same ID 220 | * Fix break lines tags (`
`) not always being converted to newlines when converting to BBCode 221 | * Fix errors when converting nav links (e.g. `[2,1,3]`) to BBCode 222 | * Fix incorrect detection of last page in `FAAPI.watchlist_by()` and `FAAPI.watchlist_by()` 223 | * Fix errors when converting special characters (e.g. `&`) 224 | * Fix trailing spaces around newlines remaining after converting to BBCode 225 | * Fix horizontal lines not being correctly converted from BBCode if the dashes (`-----` or longer) were not surrounded 226 | by newlines 227 | 228 | ### Dependencies 229 | 230 | * Added [htmlmin ^0.1.12](https://pypi.org/project/htmlmin/0.1.12) 231 | * Added [bbcode ^1.1.0](https://pypi.org/project/bbcode/1.1.0) 232 | 233 | ## v3.8.1 234 | 235 | ### Changes 236 | 237 | * Improved HTML extraction for specific tags to avoid encoding issues 238 | * HTML fields are cleaned up (i.e., removed newlines, carriage returns, and extra spaces) 239 | * None of the parsed pages use tags with _pre_ white space rendering, so no information is lost 240 | * Improvements to BBCode conversion 241 | * Do not quote URLs when converting to BBCode 242 | * Support nested quote blocks 243 | * Support non-specific tags (e.g. `div.submission-footer`) and convert them 244 | to `[tag..][/tag.]` 245 | 246 | ### Fixes 247 | 248 | * Fix incorrect encoding of special characters (`<`, `>`, etc.) in HTML fields 249 | * Was caused by the previous method of extracting the inner HTML of a tag 250 | * Fix URLs automatically shortened by Fur Affinity being converted to BBCode with the wrong text content 251 | * Fix HTML paragraph tags (`

`) sometimes appearing in BBCode-converted content 252 | * Fix BBCode conversion of `:usernameicon:` links (i.e., user icon links without the username) 253 | 254 | ## v3.8.0 255 | 256 | ### New Features 257 | 258 | * Submission user folders 259 | * Submission folders are now parsed and stored in a dedicated `user_folders` field in the `Submission` object 260 | * Each folder is stored in a `namedtuple` with fields for `name`, `url`, and `group` (if any) 261 | * BBCode conversion 262 | * New properties have been added to the `User`, `Submission`, `Journal`, `JournalPartial`, and `Comment` objects to 263 | provide BBCode versions of HTML fields 264 | * The generated BBCode tags follow the Fur Affinity standard found on 265 | their [support page](https://www.furaffinity.net/help/#tags-and-codes) 266 | 267 | ## v3.7.4 268 | 269 | ### Dependencies 270 | 271 | * Use [lxml ^4.9.1](https://pypi.org/project/lxml/4.9.1) 272 | * Fix [CVE-2022-2309](https://cve.report/CVE-2022-2309) issue 273 | 274 | ## v3.7.3 275 | 276 | ### Fixes 277 | 278 | * Fix error when parsing journals folders and journal pages caused by date format set to full on Fur Affinity's site 279 | settings 280 | 281 | ## v3.7.2 282 | 283 | ### New Features 284 | 285 | * Requests timeout 286 | * New `FAAPI.timeout: int | None` variable to set request timeout in seconds 287 | * Timeout is used for both page requests (e.g. submissions) and file requests 288 | 289 | ### Fixes 290 | 291 | * Fix possible parsing error arising from multiple attributes in one tag 292 | 293 | ## v3.7.1 294 | 295 | ### New Features 296 | 297 | * Frontpage 298 | * New `FAAPI.frontpage()` method to get submissions from Fur Affinity's front page 299 | * Sorting of `Journal`, `Submission`, and `User` objects 300 | * All data objects now support greater than, greater or equal, lower than, and lower or equal operations for easy 301 | sorting 302 | 303 | ### Fixes 304 | 305 | * Fix equality comparison between `Journal` and `JournalPartial` 306 | * Fix parsing of usernames from user pages returning the title instead 307 | * Caused by a change in Fur Affinity's DOM 308 | 309 | ## v3.7.0 310 | 311 | ### New Features 312 | 313 | * Journal headers and footers 314 | * The `Journal` class now contains header and footer fields which are parsed from journal pages (`FAAPI.journal`) 315 | * Submission favorite status and link 316 | * The `Submission` class now contains a boolean `favorite` field that is set to `True` if the submission is a 317 | favorite, and a `favorite_toggle_link` containing the link to toggle the favorite status (`/fav/` or `/unfav/`) 318 | * User watch and block statuses and links 319 | * The `User` class now contains boolean `watched` and `blocked` fields that are set to `True` if the user is watched 320 | or blocked respectively, and `watched_toggle_link` and `blocked_toggle_link` fields containing the links to toggle 321 | the watched (`/watch/` or `/unwatch/`) and blocked (`/block/` or `/unblock/`) statuses respectively. 322 | 323 | ### Changes 324 | 325 | * Remove `parse.check_page` function which had no usage in the library anymore 326 | * Remove `parse.parse_search_submissions` function and `FAAPI.search` method 327 | * They will be reintroduced once Fur Affinity allows scraping search pages again 328 | 329 | ### Fixes 330 | 331 | * Fix an incorrect regular expression that parsed mentions in journals, submissions, and profiles which could cause 332 | non-Fur Affinity links to be matched as valid 333 | * Security issue [#3](https://github.com/FurryCoders/FAAPI/issues/3) 334 | 335 | ## v3.6.1 336 | 337 | ### Fixes 338 | 339 | * Fix `FAAPI.journals` not detecting the next page correctly 340 | * Caused by a change in Fur Affinity's journals page 341 | 342 | ## v3.6.0 343 | 344 | ### New Features 345 | 346 | * Comments! 💬 347 | * A new `Comment` object is now used to store comments for submissions and journals 348 | * The comments are organised in a tree structure, and each one contains references to both its parent 349 | object (`Submission` or `Journal`) and, if the comment is a reply, to its parent comment too 350 | * The auxiliary functions `faapi.comment.flatten_comments` and `faapi.comment.sort_comments` allow to flatten the 351 | comment tree or reorganise it 352 | 353 | * Separate `JournalPartial` and `Journal` objects 354 | * The new `JournalPartial` class takes the place of the previous `Journal` class, and it is now used only to parse 355 | journal from a user's journals folder 356 | * The new `Journal` class contains the same fields as `JournalPartial` with the addition of comments, and it is only 357 | used to parse journal pages 358 | 359 | * Comparisons 360 | * All objects can now be used with the comparison (==) operator with other objects of the same type or the type of 361 | their key property (`id: int` for submissions and journals, and `name_url: str` for users) 362 | 363 | ### Changes 364 | 365 | * The `cookies` argument of `FAAPI` is now mandatory, and an `Unauthorized` exception is raised if `FAAPI` is 366 | initialised with an empty cookies list 367 | * The list of `Submission`/`Journal` objects returned by `FAAPI.gallery`, `FAAPI.scraps`, and `FAAPI.journals` now uses 368 | a shared `UserPartial` object in the `author` variable (i.e. changing a property of the author in one object of the 369 | list will change it for the others as well) 370 | 371 | ### Fixes 372 | 373 | * Fix path checking against robots.txt not working correctly with paths missing a leading forward slash 374 | 375 | ## v3.5.0 376 | 377 | ### New Features 378 | 379 | * New `Submission.stats` field for submission statistics stored in a named tuple (`views`, `comments` (count) 380 | , `favorites`) 381 | * Pull request [#2](https://github.com/FurryCoders/FAAPI/pull/2), thanks 382 | to [@warpKaiba](https://github.com/warpKaiba)! 383 | * New `Journal.stats` field for journal statistics stored in a named tuple (`comments` (count)) 384 | 385 | ### Changes 386 | 387 | * Rename `UserStats.favs` to `UserStats.favorites` 388 | 389 | ### Fixes 390 | 391 | * Fix links in PyPi metadata pointing to previous hosting at GitLab 392 | 393 | ## v3.4.3 394 | 395 | ### Changes 396 | 397 | * Better and more resilient robots.txt parsing 398 | 399 | ### Fixes 400 | 401 | * Fix spaces around slash (/) not being preserved for submission categories 402 | 403 | ## v3.4.2 404 | 405 | ### Changes 406 | 407 | * Raise `DisabledAccount` for users pending deletion 408 | * Error messages from server are not lowercase 409 | 410 | ## v3.4.1 411 | 412 | ### Fixes 413 | 414 | * Fix rare occurrence of error message not being parsed if inside a `section.notice-message` 415 | 416 | ## v3.4.0 (was 3.3.8) 417 | 418 | ### New Features 419 | 420 | * New `NotFound` exception inheriting from `ParsingError` 421 | 422 | ### Changes 423 | 424 | * Removed `FAAPI.submission_exists`, `FAAPI.journal_exists`, and `FAAPI.user_exists` methods 425 | * Improved reliability of error pages' parser 426 | 427 | ### Fixes 428 | 429 | * Custom exceptions inherit from `Exception` instead of `BaseException` 430 | 431 | ## v3.3.7 432 | 433 | ### Changes 434 | 435 | * No changes to code; migrated repository to GitHub and updated README and PyPi metadata 436 | 437 | ## v3.3.6 438 | 439 | ### Changes 440 | 441 | * Allow empty info/contacts when parsing user profiles 442 | 443 | ## v3.3.5 444 | 445 | ### Changes 446 | 447 | * Fix last page check when parsing galleries 448 | 449 | ## v3.3.4 450 | 451 | ### Changes 452 | 453 | * Use BaseException as base class of custom exceptions 454 | 455 | ### Dependencies 456 | 457 | * Use [requests ^2.27.1](https://pypi.org/project/requests/2.27.1) 458 | 459 | ## v3.3.3 460 | 461 | ### Changes 462 | 463 | * Allow submission thumbnail tag to be null 464 | 465 | ## v3.3.2 466 | 467 | ### Changes 468 | 469 | * Use `UserStats` class to hold user statistics instead of namedtuple 470 | * Add watched by and watching stats to `UserStats` 471 | 472 | ## v3.3.1 473 | 474 | ### Changes 475 | 476 | * Safer parsing 477 | 478 | ## v3.3.0 479 | 480 | ### New Features 481 | 482 | * Add docstrings 483 | * Handle robots.txt parsing with `urllib.RobotFileParser` 484 | * `User-Agent` header is exposed as `FAAPI.user_agent` property 485 | 486 | ### Changes 487 | 488 | * `FAAPI.last_get` uses UNIX time 489 | * `FAAPI.check_path` doesn't raise an exception by default 490 | * `FAAPI.login_status` does not raise an exception on unauthorized 491 | * Remove crawl delay error 492 | * Improve download of files 493 | 494 | ## v3.2.0 495 | 496 | ### New Features 497 | 498 | * `FAAPI.get_parsed` checks login status and checks the page for errors directly (both can be manually skipped) 499 | * Add `Unauthorized` exception 500 | 501 | ## v3.1.2 502 | 503 | ### New Features 504 | 505 | * `FAAPI.submission` and `FAAPI.submission_file` support setting the chunk size for the binary file download 506 | 507 | ### Changes 508 | 509 | * The file downloader uses chunk size instead of speed 510 | 511 | ## v3.1.1 512 | 513 | ### Changes 514 | 515 | * When raising `ServerError` and `NoticeMessage`, the actual messages appearing on the page are use as exception 516 | arguments 517 | 518 | ## v3.1.0 519 | 520 | ### New feature 521 | 522 | * Add support for `http.cookiejar.CookieJar` (and inheriting classes, like `requests.cookies.RequestsCookieJar`) for 523 | cookies. 524 | * Add `FAAPI.me()` method to get the logged-in user 525 | * Add `FAAPI.login_status` property to get the current login status 526 | 527 | ### Dependencies 528 | 529 | * Use [lxml ^4.7.1](https://pypi.org/project/lxml/4.7.1) 530 | * Fix [CVE-2021-43818](https://cve.report/CVE-2021-43818) issue 531 | 532 | ## v3.0.2 533 | 534 | ### Fixes 535 | 536 | * Fix rare error when parsing the info section of a userpage 537 | 538 | ## v3.0.1 539 | 540 | ### Fixes 541 | 542 | * Fix a key error in `Submission` when assigning the parsed results 543 | 544 | ## v3.0.0 545 | 546 | ### New Features 547 | 548 | * Upgrade to Python 3.9+ 549 | * Update type annotations 550 | * `Submission` parses next and previous submission IDs 551 | * `FAAPI.watchlist_by()` and `FAAPI.watchlist_to()` methods support multiple watchlist pages 552 | 553 | ### Changes 554 | 555 | * Renamed `FAAPI.get_parse` to `get_parsed` 556 | * Removed _get_ prefix from `FAAPI` methods (e.g. `get_submission` to `submission`) and return a list of `UserPartials` 557 | objects instead of `Users` 558 | * Added `__all__` declarations to allow importing exceptions and secondary functions from `connection` and `parse` 559 | * `datetime` fields are not serialised on `__iter__` (e.g. when casting a `Submission` object to `dict`) 560 | 561 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | European Union Public Licence 2 | V. 1.2 3 | 4 | EUPL © the European Union 2007, 2016 5 | 6 | This European Union Public Licence (the ‘EUPL’) applies to the Work (as 7 | defined below) which is provided under the terms of this Licence. Any use of 8 | the Work, other than as authorised under this Licence is prohibited (to the 9 | extent such use is covered by a right of the copyright holder of the Work). 10 | 11 | The Work is provided under the terms of this Licence when the Licensor (as 12 | defined below) has placed the following notice immediately following the 13 | copyright notice for the Work: “Licensed under the EUPL”, or has expressed by 14 | any other means his willingness to license under the EUPL. 15 | 16 | 1. Definitions 17 | 18 | In this Licence, the following terms have the following meaning: 19 | — ‘The Licence’: this Licence. 20 | — ‘The Original Work’: the work or software distributed or communicated by the 21 | ‘Licensor under this Licence, available as Source Code and also as 22 | ‘Executable Code as the case may be. 23 | — ‘Derivative Works’: the works or software that could be created by the 24 | ‘Licensee, based upon the Original Work or modifications thereof. This 25 | ‘Licence does not define the extent of modification or dependence on the 26 | ‘Original Work required in order to classify a work as a Derivative Work; 27 | ‘this extent is determined by copyright law applicable in the country 28 | ‘mentioned in Article 15. 29 | — ‘The Work’: the Original Work or its Derivative Works. 30 | — ‘The Source Code’: the human-readable form of the Work which is the most 31 | convenient for people to study and modify. 32 | 33 | — ‘The Executable Code’: any code which has generally been compiled and which 34 | is meant to be interpreted by a computer as a program. 35 | — ‘The Licensor’: the natural or legal person that distributes or communicates 36 | the Work under the Licence. 37 | — ‘Contributor(s)’: any natural or legal person who modifies the Work under 38 | the Licence, or otherwise contributes to the creation of a Derivative Work. 39 | — ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of 40 | the Work under the terms of the Licence. 41 | — ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, 42 | renting, distributing, communicating, transmitting, or otherwise making 43 | available, online or offline, copies of the Work or providing access to its 44 | essential functionalities at the disposal of any other natural or legal 45 | person. 46 | 47 | 2. Scope of the rights granted by the Licence 48 | 49 | The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, 50 | sublicensable licence to do the following, for the duration of copyright 51 | vested in the Original Work: 52 | 53 | — use the Work in any circumstance and for all usage, 54 | — reproduce the Work, 55 | — modify the Work, and make Derivative Works based upon the Work, 56 | — communicate to the public, including the right to make available or display 57 | the Work or copies thereof to the public and perform publicly, as the case 58 | may be, the Work, 59 | — distribute the Work or copies thereof, 60 | — lend and rent the Work or copies thereof, 61 | — sublicense rights in the Work or copies thereof. 62 | 63 | Those rights can be exercised on any media, supports and formats, whether now 64 | known or later invented, as far as the applicable law permits so. 65 | 66 | In the countries where moral rights apply, the Licensor waives his right to 67 | exercise his moral right to the extent allowed by law in order to make 68 | effective the licence of the economic rights here above listed. 69 | 70 | The Licensor grants to the Licensee royalty-free, non-exclusive usage rights 71 | to any patents held by the Licensor, to the extent necessary to make use of 72 | the rights granted on the Work under this Licence. 73 | 74 | 3. Communication of the Source Code 75 | 76 | The Licensor may provide the Work either in its Source Code form, or as 77 | Executable Code. If the Work is provided as Executable Code, the Licensor 78 | provides in addition a machine-readable copy of the Source Code of the Work 79 | along with each copy of the Work that the Licensor distributes or indicates, 80 | in a notice following the copyright notice attached to the Work, a repository 81 | where the Source Code is easily and freely accessible for as long as the 82 | Licensor continues to distribute or communicate the Work. 83 | 84 | 4. Limitations on copyright 85 | 86 | Nothing in this Licence is intended to deprive the Licensee of the benefits 87 | from any exception or limitation to the exclusive rights of the rights owners 88 | in the Work, of the exhaustion of those rights or of other applicable 89 | limitations thereto. 90 | 91 | 5. Obligations of the Licensee 92 | 93 | The grant of the rights mentioned above is subject to some restrictions and 94 | obligations imposed on the Licensee. Those obligations are the following: 95 | 96 | Attribution right: The Licensee shall keep intact all copyright, patent or 97 | trademarks notices and all notices that refer to the Licence and to the 98 | disclaimer of warranties. The Licensee must include a copy of such notices and 99 | a copy of the Licence with every copy of the Work he/she distributes or 100 | communicates. The Licensee must cause any Derivative Work to carry prominent 101 | notices stating that the Work has been modified and the date of modification. 102 | 103 | Copyleft clause: If the Licensee distributes or communicates copies of the 104 | Original Works or Derivative Works, this Distribution or Communication will be 105 | done under the terms of this Licence or of a later version of this Licence 106 | unless the Original Work is expressly distributed only under this version of 107 | the Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee 108 | (becoming Licensor) cannot offer or impose any additional terms or conditions 109 | on the Work or Derivative Work that alter or restrict the terms of the 110 | Licence. 111 | 112 | Compatibility clause: If the Licensee Distributes or Communicates Derivative 113 | Works or copies thereof based upon both the Work and another work licensed 114 | under a Compatible Licence, this Distribution or Communication can be done 115 | under the terms of this Compatible Licence. For the sake of this clause, 116 | ‘Compatible Licence’ refers to the licences listed in the appendix attached to 117 | this Licence. Should the Licensee's obligations under the Compatible Licence 118 | conflict with his/her obligations under this Licence, the obligations of the 119 | Compatible Licence shall prevail. 120 | 121 | Provision of Source Code: When distributing or communicating copies of the 122 | Work, the Licensee will provide a machine-readable copy of the Source Code or 123 | indicate a repository where this Source will be easily and freely available 124 | for as long as the Licensee continues to distribute or communicate the Work. 125 | 126 | Legal Protection: This Licence does not grant permission to use the trade 127 | names, trademarks, service marks, or names of the Licensor, except as required 128 | for reasonable and customary use in describing the origin of the Work and 129 | reproducing the content of the copyright notice. 130 | 131 | 6. Chain of Authorship 132 | 133 | The original Licensor warrants that the copyright in the Original Work granted 134 | hereunder is owned by him/her or licensed to him/her and that he/she has the 135 | power and authority to grant the Licence. 136 | 137 | Each Contributor warrants that the copyright in the modifications he/she 138 | brings to the Work are owned by him/her or licensed to him/her and that he/she 139 | has the power and authority to grant the Licence. 140 | 141 | Each time You accept the Licence, the original Licensor and subsequent 142 | Contributors grant You a licence to their contributions to the Work, under the 143 | terms of this Licence. 144 | 145 | 7. Disclaimer of Warranty 146 | 147 | The Work is a work in progress, which is continuously improved by numerous 148 | Contributors. It is not a finished work and may therefore contain defects or 149 | ‘bugs’ inherent to this type of development. 150 | 151 | For the above reason, the Work is provided under the Licence on an ‘as is’ 152 | basis and without warranties of any kind concerning the Work, including 153 | without limitation merchantability, fitness for a particular purpose, absence 154 | of defects or errors, accuracy, non-infringement of intellectual property 155 | rights other than copyright as stated in Article 6 of this Licence. 156 | 157 | This disclaimer of warranty is an essential part of the Licence and a 158 | condition for the grant of any rights to the Work. 159 | 160 | 8. Disclaimer of Liability 161 | 162 | Except in the cases of wilful misconduct or damages directly caused to natural 163 | persons, the Licensor will in no event be liable for any direct or indirect, 164 | material or moral, damages of any kind, arising out of the Licence or of the 165 | use of the Work, including without limitation, damages for loss of goodwill, 166 | work stoppage, computer failure or malfunction, loss of data or any commercial 167 | damage, even if the Licensor has been advised of the possibility of such 168 | damage. However, the Licensor will be liable under statutory product liability 169 | laws as far such laws apply to the Work. 170 | 171 | 9. Additional agreements 172 | 173 | While distributing the Work, You may choose to conclude an additional 174 | agreement, defining obligations or services consistent with this Licence. 175 | However, if accepting obligations, You may act only on your own behalf and on 176 | your sole responsibility, not on behalf of the original Licensor or any other 177 | Contributor, and only if You agree to indemnify, defend, and hold each 178 | Contributor harmless for any liability incurred by, or claims asserted against 179 | such Contributor by the fact You have accepted any warranty or additional 180 | liability. 181 | 182 | 10. Acceptance of the Licence 183 | 184 | The provisions of this Licence can be accepted by clicking on an icon ‘I 185 | agree’ placed under the bottom of a window displaying the text of this Licence 186 | or by affirming consent in any other similar way, in accordance with the rules 187 | of applicable law. Clicking on that icon indicates your clear and irrevocable 188 | acceptance of this Licence and all of its terms and conditions. 189 | 190 | Similarly, you irrevocably accept this Licence and all of its terms and 191 | conditions by exercising any rights granted to You by Article 2 of this 192 | Licence, such as the use of the Work, the creation by You of a Derivative Work 193 | or the Distribution or Communication by You of the Work or copies thereof. 194 | 195 | 11. Information to the public 196 | 197 | In case of any Distribution or Communication of the Work by means of 198 | electronic communication by You (for example, by offering to download the Work 199 | from a remote location) the distribution channel or media (for example, a 200 | website) must at least provide to the public the information requested by the 201 | applicable law regarding the Licensor, the Licence and the way it may be 202 | accessible, concluded, stored and reproduced by the Licensee. 203 | 204 | 12. Termination of the Licence 205 | 206 | The Licence and the rights granted hereunder will terminate automatically upon 207 | any breach by the Licensee of the terms of the Licence. Such a termination 208 | will not terminate the licences of any person who has received the Work from 209 | the Licensee under the Licence, provided such persons remain in full 210 | compliance with the Licence. 211 | 212 | 13. Miscellaneous 213 | 214 | Without prejudice of Article 9 above, the Licence represents the complete 215 | agreement between the Parties as to the Work. 216 | 217 | If any provision of the Licence is invalid or unenforceable under applicable 218 | law, this will not affect the validity or enforceability of the Licence as a 219 | whole. Such provision will be construed or reformed so as necessary to make it 220 | valid and enforceable. 221 | 222 | The European Commission may publish other linguistic versions or new versions 223 | of this Licence or updated versions of the Appendix, so far this is required 224 | and reasonable, without reducing the scope of the rights granted by the 225 | Licence. New versions of the Licence will be published with a unique version 226 | number. 227 | 228 | All linguistic versions of this Licence, approved by the European Commission, 229 | have identical value. Parties can take advantage of the linguistic version of 230 | their choice. 231 | 232 | 14. Jurisdiction 233 | 234 | Without prejudice to specific agreement between parties, 235 | — any litigation resulting from the interpretation of this License, arising 236 | between the European Union institutions, bodies, offices or agencies, as a 237 | Licensor, and any Licensee, will be subject to the jurisdiction of the Court 238 | of Justice of the European Union, as laid down in article 272 of the Treaty 239 | on the Functioning of the European Union, 240 | — any litigation arising between other parties and resulting from the 241 | interpretation of this License, will be subject to the exclusive 242 | jurisdiction of the competent court where the Licensor resides or conducts 243 | its primary business. 244 | 245 | 15. Applicable Law 246 | 247 | Without prejudice to specific agreement between parties, 248 | — this Licence shall be governed by the law of the European Union Member State 249 | where the Licensor has his seat, resides or has his registered office, 250 | — this licence shall be governed by Belgian law if the Licensor has no seat, 251 | residence or registered office inside a European Union Member State. 252 | 253 | Appendix 254 | 255 | ‘Compatible Licences’ according to Article 5 EUPL are: 256 | — GNU General Public License (GPL) v. 2, v. 3 257 | — GNU Affero General Public License (AGPL) v. 3 258 | — Open Software License (OSL) v. 2.1, v. 3.0 259 | — Eclipse Public License (EPL) v. 1.0 260 | — CeCILL v. 2.0, v. 2.1 261 | — Mozilla Public Licence (MPL) v. 2 262 | — GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 263 | — Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for 264 | works other than software 265 | — European Union Public Licence (EUPL) v. 1.1, v. 1.2 266 | — Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or 267 | Strong Reciprocity (LiLiQ-R+) 268 | 269 | — The European Commission may update this Appendix to later versions of the 270 | above licences without producing a new version of the EUPL, as long as they 271 | provide the rights granted in Article 2 of this Licence and protect the 272 | covered Source Code from exclusive appropriation. 273 | — All other changes or additions to this Appendix require the production of a 274 | new EUPL version. 275 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | logo 4 | 5 | # Fur Affinity API 6 | 7 | Python library to implement API-like functionality for the [Fur Affinity](https://furaffinity.net) website. 8 | 9 | [![](https://img.shields.io/pypi/v/faapi?logo=pypi)](https://pypi.org/project/faapi/) 10 | [![](https://img.shields.io/pypi/pyversions/faapi?logo=Python)](https://www.python.org) 11 | 12 | [![](https://img.shields.io/github/v/tag/FurryCoders/faapi?label=github&sort=date&logo=github&color=blue)](https://github.com/FurryCoders/faapi) 13 | [![](https://img.shields.io/github/issues/FurryCoders/faapi?logo=github&color=blue)](https://github.com/FurryCoders/FAAPI/issues) 14 | [![](https://img.shields.io/github/actions/workflow/status/FurryCoders/FAAPI/test.yml?label=Test&logo=githubactions)](https://github.com/FurryCoders/FAAPI/actions/workflows/test.yml) 15 | [![](https://coveralls.io/repos/github/FurryCoders/FAAPI/badge.svg)](https://coveralls.io/github/FurryCoders/FAAPI) 16 | 17 |
18 | 19 | ## Requirements 20 | 21 | Python 3.9+ is necessary to run this 22 | library. [ Poetry](https://python-poetry.org) 23 | is used for packaging and dependency management. 24 | 25 | ## Usage 26 | 27 | The API comprises a main class `FAAPI`, two submission classes `Submission` and `SubmissionPartial`, a journal 28 | class `Journal`, and a user class `User`. 29 | 30 | Once `FAAPI` is initialized, its methods can be used to crawl FA and return parsed objects. 31 | 32 | ```python 33 | from requests.cookies import RequestsCookieJar 34 | import faapi 35 | import orjson 36 | 37 | cookies = RequestsCookieJar() 38 | cookies.set("a", "38565475-3421-3f21-7f63-3d341339737") 39 | cookies.set("b", "356f5962-5a60-0922-1c11-65003b70308") 40 | 41 | api = faapi.FAAPI(cookies) 42 | sub, sub_file = api.submission(12345678, get_file=True) 43 | 44 | print(sub.id, sub.title, sub.author, f"{len(sub_file) / 1024:02f}KiB") 45 | 46 | with open(f"{sub.id}.json", "wb") as f: 47 | f.write(orjson.dumps(dict(sub))) 48 | 49 | with open(sub.file_url.split("/")[-1], "wb") as f: 50 | f.write(sub_file) 51 | 52 | gallery, _ = api.gallery("user_name", 1) 53 | with open("user_name-gallery.json", "wb") as f: 54 | f.write(orjson.dumps(list(map(dict, gallery)))) 55 | ``` 56 | 57 | ### robots.txt 58 | 59 | At init, the `FAAPI` object downloads the [robots.txt](https://www.furaffinity.net/robots.txt) file from FA to determine 60 | the `Crawl-delay` and `disallow` values set therein. If not set in the robots.txt file, a crawl delay value of 1 second 61 | is used. 62 | 63 | To respect this value, the default behaviour of the `FAAPI` object is to wait when a get request is made if the last 64 | request was performed more recently then the crawl delay value. 65 | 66 | See under [FAAPI](#faapi) for more details on this behaviour. 67 | 68 | Furthermore, any get operation that points to a disallowed path from robots.txt will raise an exception. This check 69 | should not be circumvented, and the developer of this library does not take responsibility for violations of the TOS of 70 | Fur Affinity. 71 | 72 | ### Cookies 73 | 74 | To access protected pages, cookies from an active session are needed. These cookies can be given to the FAAPI object as 75 | a list of dictionaries - each containing a `name` and a `value` field -, or as a `http.cookiejar.CookieJar` 76 | object (`requests.cookies.RequestsCookieJar` and other objects inheriting from `CookieJar` are also supported). The 77 | cookies list should look like the following example: 78 | 79 | ```python 80 | cookies = [ 81 | {"name": "a", "value": "38565475-3421-3f21-7f63-3d3413397537"}, 82 | {"name": "b", "value": "356f5962-5a60-0922-1c11-65003b703038"}, 83 | ] 84 | ``` 85 | 86 | ```python 87 | from requests.cookies import RequestsCookieJar 88 | 89 | cookies = RequestsCookieJar() 90 | cookies.set("a", "38565475-3421-3f21-7f63-3d3413397537") 91 | cookies.set("b", "356f5962-5a60-0922-1c11-65003b703038") 92 | ``` 93 | 94 | To access session cookies, consult the manual of the browser used to log in. 95 | 96 | *Note:* it is important to not logout of the session the cookies belong to, otherwise they will no longer work.
97 | *Note:* as of April 2022 only cookies `a` and `b` are needed. 98 | 99 | ### User Agent 100 | 101 | `FAAPI` attaches a `User-Agent` header to every request. The user agent string is generated at startup in the following 102 | format: `faapi/{library version} Python/{python version} {system name}/{system release}`. 103 | 104 | ## Objects 105 | 106 | ### FAAPI 107 | 108 | This is the main object that handles all the calls to scrape pages and get submissions. 109 | 110 | It holds 6 different fields: 111 | 112 | * `session: requests.Session` The session used for all requests. 113 | * `robots: urllib.robotparser.RobotFileParser` robots.txt handler 114 | * `user_agent: str` user agent used by the session (property, cannot be set) 115 | * `crawl_delay: float` crawl delay from robots.txt (property, cannot be set) 116 | * `last_get: float` time of last get (UNIX time) 117 | * `raise_for_unauthorized: bool = True` if set to `True`, raises an exception if a request is made and the resulting 118 | page is not from a login session 119 | * `timeout: int | None = None` requests timeout in seconds for both page requests (e.g. submissions) and files 120 | 121 | #### Init 122 | 123 | `__init__(cookies: list[dict[str, str]] | CookieJar, session_class: Type[Session] = Session)` 124 | 125 | A FAAPI object must be initialised with a cookies object in the format mentioned above in [#Cookies](#cookies). 126 | 127 | An optional `session_class` argument can be given to modify the class used by `FAAPI.session`. Any class based 128 | on `requests.Session` is accepted. 129 | 130 | #### Methods & Properties 131 | 132 | * `load_cookies(cookies: list[dict[str, str]] | CookieJar)`
133 | Load new cookies and create a new session.
134 | *Note:* This method removes any cookies currently in use, to update/add single cookies access them from the session 135 | object. 136 | * `handle_delay()`
137 | Handles the crawl delay as set in the robots.txt 138 | * `check_path(path: str, *, raise_for_disallowed: bool = False) -> bool`
139 | Checks whether a given path is allowed by the robots.txt. If `raise_for_disallowed` is set to `True` 140 | a `DisallowedPath` exception is raised on non-allowed paths. 141 | * `connection_status -> bool`
142 | Returns the status of the connection. 143 | * `login_status -> bool`
144 | Returns the login status. 145 | * `get(path: str, **params) -> requests.Response`
146 | This returns a response object containing the result of the get operation on the given URL with the 147 | optional `**params` added to it (url provided is considered as path from 'https://www.furaffinity.net/'). 148 | * `get_parsed(path: str, *, skip_page_check: bool = False, skip_auth_check: bool = False, **params) -> bs4.BeautifulSoup`
149 | Similar to `get()` but returns the parsed HTML from the normal get operation. If the GET request encountered an error, 150 | an `HTTPError` exception is raised. If `skip_page_check` is set to `True`, the parsed page is not checked for errors ( 151 | e.g. non-existing submission). If `skip_auth_check` is set to `True`, the page is not checked for login status. 152 | * `me() -> User | None`
153 | Returns the logged-in user as a `User` object if the cookies are from a login session. 154 | * `frontpage() -> list[SubmissionPartial]`
155 | Fetch the latest submissions from Fur Affinity's front page. 156 | * `submission(submission_id: int, get_file: bool = False, *, chunk_size: int = None) -> tuple[Submission, bytes | None]`
157 | Given a submission ID, it returns a `Submission` object containing the various metadata of the submission itself and 158 | a `bytes` object with the submission file if `get_file` is passed as `True`. The optional `chunk_size` argument is 159 | used for the request; if left to `None` or set to 0 the download is performed directly without streaming.
160 | *Note:* the author `UserPartial` object of the submission does not contain the `join_date` field as it does not appear 161 | on submission pages. 162 | * `submission_file(submission: Submission, *, chunk_size: int = None) -> bytes`
163 | Given a submission object, it downloads its file and returns it as a `bytes` object. The optional `chunk_size` 164 | argument is used for the request; if left to `None` or set to 0 the download is performed directly without streaming. 165 | * `journal(journal_id: int) -> Journal`
166 | Given a journal ID, it returns a `Journal` object containing the various metadata of the journal. 167 | * `user(user: str) -> User`
168 | Given a username, it returns a `User` object containing information regarding the user. 169 | * `gallery(user: str, page: int = 1) -> tuple[list[SubmissionPartial], int | None]`
170 | Returns the list of submissions found on a specific gallery page, and the number of the next page. The returned page 171 | number is set to `None` if it is the last page. 172 | * `scraps(user: str, page: int = 1) -> -> tuple[list[SubmissionPartial], int | None]`
173 | Returns the list of submissions found on a specific scraps page, and the number of the next page. The returned page 174 | number is set to `None` if it is the last page. 175 | * `favorites(user: str, page: str = "") -> tuple[list[SubmissionPartial], str | None]`
176 | Downloads a user's favorites page. Because of how favorites pages work on FA, the `page` argument (and the one 177 | returned) are strings. If the favorites page is the last then a `None` is returned as next page. An empty page 178 | value as argument is equivalent to page 1.
179 | *Note:* favorites page "numbers" do not follow any scheme and are only generated server-side. 180 | * `journals(user: str, page: int = 1) -> -> tuple[list[JournalPartial], int | None]`
181 | Returns the list of submissions found on a specific journals page, and the number of the next page. The returned page 182 | number is set to `None` if it is the last page. 183 | * `watchlist_to(self, user: str, page:int = 1) -> tuple[list[UserPartial], int | None]`
184 | Given a username, returns a list of `UserPartial` objects for each user that is watching the given user and the next 185 | page, if it is not the last, in which case a `None` is returned. 186 | * `watchlist_by(self, user: str, page:int = 1) -> tuple[list[UserPartial], int | None]`
187 | Given a username, returns a list of `UserPartial` objects for each user that is watched by the given user and the next 188 | page, if it is not the last, in which case a `None` is returned. 189 | 190 | *Note:* The last page returned by the `watchlist_to` and `watchlist_by` may not be correct as Fur Affinity doesn't seem 191 | to have a consistent behaviour when rendering the next page button, as such it is safer to use an external algorithm to 192 | check whether the method is advancing the page but returning the same/no users. 193 | 194 | ### UserPartial 195 | 196 | A stripped-down class that holds basic user information. It is used to hold metadata gathered when parsing a submission, 197 | journal, gallery, scraps, etc. 198 | 199 | * `name: str` display name with capital letters and extra characters such as "_" 200 | * `status: str` user status (~, !, etc.) 201 | * `title: str` the user title as it appears on their userpage 202 | * `join_date: datetime` the date the user joined (defaults to timestamp 0) 203 | * `avatar_url: str` the URL to the user icon (used only when available) 204 | * `user_tag: bs4.element.Tag` the user element used to parse information (placeholder, `UserPartial` is filled 205 | externally) 206 | 207 | `UserPartial` objects can be directly cast to a dict object and iterated through. 208 | 209 | Comparison with `UserPartial` can be made with either another `UserPartial` or `User` object (the URL names are 210 | compared), or a string (the URL name is compared to the given string). 211 | 212 | #### Init 213 | 214 | `__init__(user_tag: bs4.element.Tag = None)` 215 | 216 | To initialise the object, an optional `bs4.element.Tag` object is needed containing the user element from a user page or 217 | user folder. 218 | 219 | If no `user_tag` is passed then the object fields will remain at their default - empty - value. 220 | 221 | #### Methods 222 | 223 | * `name_url -> str`
224 | Property method that returns the URL-safe username 225 | * `url -> str`
226 | Property method that returns the Fur Affinity URL to the user (`https://www.furaffinity.net/user/{name_url}`). 227 | * `generate_avatar_url() -> str`
228 | Generates the URl for the current user icon. 229 | * `parse(user_page: bs4.BeautifulSoup = None)`
230 | Parses the stored user page for metadata. If `user_page` is passed, it overwrites the existing `user_page` value. 231 | 232 | ### User 233 | 234 | The main class storing all of a user's metadata. 235 | 236 | * `name: str` display name with capital letters and extra characters such as "_" 237 | * `status: str` user status (~, !, etc.) 238 | * `title: str` the user title as it appears on their userpage 239 | * `join_date: datetime` the date the user joined (defaults to timestamp 0) 240 | * `profile: str` profile text in HTML format 241 | * `profile_bbcode: str` profile text in BBCode format 242 | * `stats: UserStats` user statistics sorted in a `namedtuple` (`views`, `submissions`, `favorites`, `comments_earned` 243 | , `comments_made`, `journals`, `watched_by`, `watching`) 244 | * `info: dict[str, str]` profile information (e.g. "Accepting Trades", "Accepting Commissions", "Character Species", 245 | etc.) 246 | * `contacts: dict[str, str]` contact links (e.g. Twitter, Steam, etc.) 247 | * `avatar_url: str` the URL to the user icon 248 | * `banner_url: str | None` the URL to the user banner (if any is set, otherwise `None`) 249 | * `watched: bool` `True` if the user is watched, `False` otherwise 250 | * `watched_toggle_link: str | None` The link to toggle the watch status (`/watch/` or `/unwatch/` type link) 251 | * `blocked: bool` `True` if the user is blocked, `False` otherwise 252 | * `blocked_toggle_link: str | None` The link to toggle the block status (`/block/` or `/unblock/` type link) 253 | * `user_page: bs4.BeautifulSoup` the user page used to parse the object fields 254 | 255 | `User` objects can be directly cast to a dict object and iterated through. 256 | 257 | Comparison with `User` can be made with either another `User` or `UserPartial` object (the URL names are compared), or a 258 | string (the URL name is compared to the given string). 259 | 260 | #### Init 261 | 262 | `__init__(user_page: bs4.BeautifulSoup = None)` 263 | 264 | To initialise the object, an optional `bs4.BeautifulSoup` object is needed containing the parsed HTML of a submission 265 | page. 266 | 267 | If no `user_page` is passed then the object fields will remain at their default - empty - value. 268 | 269 | #### Methods 270 | 271 | * `name_url -> str`
272 | Property method that returns the URL-safe username 273 | * `url -> str`
274 | Property method that returns the Fur Affinity URL to the user (`https://www.furaffinity.net/user/{name_url}`). 275 | * `generate_avatar_url() -> str`
276 | Generates the URl for the current user icon. 277 | * `parse(user_page: bs4.BeautifulSoup = None)`
278 | Parses the stored user page for metadata. If `user_page` is passed, it overwrites the existing `user_page` value. 279 | 280 | ### JournalPartial 281 | 282 | This object contains partial information gathered when parsing a journals folder. It contains the following fields: 283 | 284 | * `id: int` journal ID 285 | * `title: str` journal title 286 | * `date: datetime` upload date as a [`datetime` object](https://docs.python.org/3/library/datetime.html) (defaults to 287 | timestamp 0) 288 | * `author: UserPartial` journal author (filled only if the journal is parsed from a `bs4.BeautifulSoup` page) 289 | * `stats: JournalStats` journal statistics stored in a named tuple (`comments` (count)) 290 | * `content: str` journal content in HTML format 291 | * `content_bbcode: str` journal content in BBCode format 292 | * `mentions: list[str]` the users mentioned in the content (if they were mentioned as links, e.g. `:iconusername:`, 293 | `@username`, etc.) 294 | * `journal_tag: bs4.element.Tag` the journal tag used to parse the object fields 295 | 296 | `JournalPartial` objects can be directly cast to a dict object or iterated through. 297 | 298 | Comparison with `JournalPartial` can be made with either another `JournalPartial` or `Journal` object (the IDs are 299 | compared), or an integer (the `JournalPartial.id` value is compared to the given integer). 300 | 301 | #### Init 302 | 303 | `__init__(journal_tag: bs4.element.Tag = None)` 304 | 305 | `Journal` takes one optional parameters: a journal section tag from a journals page. 306 | 307 | If no `journal_tag` is passed then the object fields will remain at their default - empty - value. 308 | 309 | #### Methods 310 | 311 | * `url -> str`
312 | Property method that returns the Fur Affinity URL to the journal (`https://www.furaffinity.net/journal/{id}`). 313 | * `parse(journal_item: bs4.element.Tag = None)`
314 | Parses the stored journal tag for information. If `journal_tag` is passed, it overwrites the existing `journal_tag` 315 | value. 316 | 317 | ### Journal 318 | 319 | This object contains full information gathered when parsing a journal page. It contains the same fields 320 | as `JournalPartial` with the addition of comments: 321 | 322 | * `id: int` journal ID 323 | * `title: str` journal title 324 | * `date: datetime` upload date as a [`datetime` object](https://docs.python.org/3/library/datetime.html) (defaults to 325 | timestamp 0) 326 | * `author: UserPartial` journal author (filled only if the journal is parsed from a `bs4.BeautifulSoup` page) 327 | * `stats: JournalStats` journal statistics stored in a named tuple (`comments` (count)) 328 | * `content: str` journal content in HTML format 329 | * `content_bbcode: str` journal content in BBCode format 330 | * `header: str` journal header in HTML format (if present) 331 | * `footer: str` journal footer in HTML format (if present) 332 | * `mentions: list[str]` the users mentioned in the content (if they were mentioned as links, e.g. `:iconusername:`, 333 | `@username`, etc.) 334 | * `comments: list[Comments]` the comments to the journal, organised in a tree structure 335 | * `journal_page: bs4.BeautifulSoup` the journal page used to parse the object fields 336 | 337 | `Journal` objects can be directly cast to a dict object or iterated through. 338 | 339 | Comparison with `Journal` can be made with either another `Journal` or `JournalPartial` object (the IDs are compared), 340 | or an integer (the `Journal.id` value is compared to the given integer). 341 | 342 | #### Init 343 | 344 | `__init__(journal_page: bs4.BeautifulSoup = None)` 345 | 346 | `Journal` takes one optional journal page argument. 347 | 348 | If no `journal_page` is passed then the object fields will remain at their default - empty - value. 349 | 350 | #### Methods 351 | 352 | * `url -> str`
353 | Property method that returns the Fur Affinity URL to the journal (`https://www.furaffinity.net/journal/{id}`). 354 | * `parse(journal_page: bs4.BeautifulSoup = None)`
355 | Parses the stored journal tag for information. If `journal_tag` is passed, it overwrites the existing `journal_tag` 356 | value. 357 | 358 | ### SubmissionPartial 359 | 360 | This lightweight submission object is used to contain the information gathered when parsing gallery, scraps, and 361 | favorites pages. It contains only the following fields: 362 | 363 | * `id: int` submission ID 364 | * `title: str` submission title 365 | * `author: UserPartial` submission author (only the `name` field is filled) 366 | * `rating: str` submission rating [general, mature, adult] 367 | * `type: str` submission type [text, image, etc...] 368 | * `thumbnail_url: str` the URL to the submission thumbnail 369 | * `submission_figure: bs4.element.Tag` the figure tag used to parse the object fields 370 | 371 | `SubmissionPartial` objects can be directly cast to a dict object or iterated through. 372 | 373 | Comparison with `Submission` can be made with either another `SubmissionPartial` or `Submission` object (the IDs are 374 | compared), or an integer (the `Submission.id` value is compared to the given integer). 375 | 376 | #### Init 377 | 378 | `__init__(submission_figure: bs4.element.Tag = None)` 379 | 380 | To initialise the object, an optional `bs4.element.Tag` object is needed containing the parsed HTML of a submission 381 | figure tag. 382 | 383 | If no `submission_figure` is passed then the object fields will remain at their default - empty - value. 384 | 385 | #### Methods 386 | 387 | * `url -> str`
388 | Property method that returns the Fur Affinity URL to the submission (`https://www.furaffinity.net/view/{id}`). 389 | * `parse(submission_figure: bs4.element.Tag = None)`
390 | Parses the stored submission figure tag for information. If `submission_figure` is passed, it overwrites the 391 | existing `submission_figure` value. 392 | 393 | ### Submission 394 | 395 | The main class that parses and holds submission metadata. 396 | 397 | * `id: int` submission ID 398 | * `title: str` submission title 399 | * `author: UserPartial` submission author (only the `name`, `title`, and `avatar_url` fields are filled) 400 | * `date: datetime` upload date as a [`datetime` object](https://docs.python.org/3/library/datetime.html) (defaults to 401 | timestamp 0) 402 | * `tags: list[str]` tags list 403 | * `category: str` category 404 | * `species: str` species 405 | * `gender: str` gender 406 | * `rating: str` rating 407 | * `stats: SubmissionStats` submission statistics stored in a named tuple (`views`, `comments` (count), `favorites`) 408 | * `type: str` submission type (text, image, etc...) 409 | * `description: str` description in HTML format 410 | * `description_bbcode: str` description in BBCode format 411 | * `footer: str` footer in HTML format 412 | * `mentions: list[str]` the users mentioned in the description (if they were mentioned as links, e.g. `:iconusername:`, 413 | `@username`, etc.) 414 | * `folder: str` the submission folder (gallery or scraps) 415 | * `user_folders: list[SubmissionUserFolder]` user folders stored in a list of named tuples (`name`, `url`, `group` ( 416 | if any)) 417 | * `file_url: str` the URL to the submission file 418 | * `thumbnail_url: str` the URL to the submission thumbnail 419 | * `prev: int` the ID of the previous submission (if any) 420 | * `next: int` the ID of the next submission (if any) 421 | * `favorite: bool` `True` if the submission is a favorite, `False` otherwise 422 | * `favorite_toggle_link: str` the link to toggle the favorite status (`/fav/` or `/unfav/` type URL) 423 | * `comments: list[Comments]` the comments to the submission, organised in a tree structure 424 | * `submission_page: bs4.BeautifulSoup` the submission page used to parse the object fields 425 | 426 | `Submission` objects can be directly cast to a dict object and iterated through. 427 | 428 | Comparison with `Submission` can be made with either another `Submission` or `SubmissionPartial` object (the IDs are 429 | compared), or an integer (the `Submission.id` value is compared to the given integer). 430 | 431 | #### Init 432 | 433 | `__init__(submission_page: bs4.BeautifulSoup = None)` 434 | 435 | To initialise the object, an optional `bs4.BeautifulSoup` object is needed containing the parsed HTML of a submission 436 | page. 437 | 438 | If no `submission_page` is passed then the object fields will remain at their default - empty - value. 439 | 440 | #### Methods 441 | 442 | * `url -> str`
443 | Property method that returns the Fur Affinity URL to the submission (`https://www.furaffinity.net/view/{id}`). 444 | * `parse(submission_page: bs4.BeautifulSoup = None)`
445 | Parses the stored submission page for metadata. If `submission_page` is passed, it overwrites the 446 | existing `submission_page` value. 447 | 448 | ### Comment 449 | 450 | This object class contains comment metadata and is used to build a tree structure with the comments and their replies. 451 | 452 | * `id: int` the comment ID 453 | * `author: UserPartial` the user who posted the comment 454 | * `date: datetime` the date the comment was posted 455 | * `text: str` the comment text in HTML format 456 | * `text_bbcode: str` the comment text in BBCode format 457 | * `replies: list[Comment]` list of replies to the comment 458 | * `reply_to: Comment | int | None` the parent comment, if the comment is a reply. The variable type is `int` only if the 459 | comment is parsed outside the parse method of a `Submission` or `Journal` (e.g. by creating a new comment with a 460 | comment tag), and when iterating over the parent object (to avoid infinite recursion errors), be it `Submission` 461 | , `Journal` or another `Comment`. 462 | * `edited: bool` `True` if the comment was edited, `False` otherwise 463 | * `hidden: bool` `True` if the comment was hidden, `False` otherwise (if the comment was hidden, the author and date 464 | fields will default to their empty values) 465 | * `parent: Submission | Journal | None` the `Submission` or `Journal` object the comments are connected to 466 | * `comment_tag: bs4.element.Tag` the comment tag used to parse the object fields 467 | 468 | `Comment` objects can be directly cast to a dict object and iterated through. 469 | 470 | Comparison with `Comment` can be made with either another comment (the IDs are compared), or an integer ( 471 | the `Comment.id` value is compared to the given integer). 472 | 473 | *Note:* The `__iter__` method of `Comment` objects automatically removes recursion. The `parent` variable is set 474 | to `None` and `reply_to` is set to the comment's ID.
475 | *Note:* Because each comment contains the parent `Submission` or `Journal` object (which contains the comment itself) 476 | and the replied comment object, some iterations may cause infinite recursion errors, for example when using 477 | the `copy.deepcopy` function. If such iterations are needed, simply set the `parent` variable to `None` and 478 | the `reply_to` variable to `None` or the comment's ID (this can be done easily after flattening the comments list 479 | with `faapi.comment.flatten_comments`, the comments can then be sorted again with `faapi.comment.sort_comments` which 480 | will also restore the `reply_to` values to `Comment` objects). 481 | 482 | #### Init 483 | 484 | `__init__(self, tag: bs4.element.Tag = None, parent: Submission | Journal = None)` 485 | 486 | To initialise the object, an optional `bs4.element.Tag` object is needed containing the comment tag as taken from a 487 | submission/journal page. 488 | 489 | The optional `parent` argument sets the `parent` variable described above. 490 | 491 | If no `tag` is passed then the object fields will remain at their default - empty - value. 492 | 493 | #### Methods 494 | 495 | * `url -> str`
496 | Property method that returns the Fur Affinity URL to the comment ( 497 | e.g. `https://www.furaffinity.net/view/12345678#cid:1234567890`). If the `parent` variable is `None`, the property 498 | returns an empty string. 499 | * `parse(tag: bs4.element.Tag = None)`
500 | Parses the stored tag for metadata. If `tag` is passed, it overwrites the existing `tag` value. 501 | 502 | #### Extra Functions 503 | 504 | These extra functions can be used to operate on a list of comments. They only alter the order and structure, but they do 505 | not touch any of the metadata. 506 | 507 | * `faapi.comment.sort_comments(comments: list[Comment]) -> list[Comment]`
508 | Sorts a list of comments into a tree structure. Replies are overwritten. 509 | * `faapi.comment.flatten_comments(comments: list[Comment]) -> list[Comment]`
510 | Flattens a list of comments. Replies are not modified. 511 | 512 | #### Comment Tree Graphs 513 | 514 | Using the tree structure generated by the library, it is trivial to build a graph visualisation of the comment tree 515 | using the [DOT](https://www.graphviz.org/doc/info/lang.html) language. 516 | 517 | ```python 518 | submission, _ = api.submission(12345678) 519 | comments = faapi.comment.flatten_comments(submission.comments) 520 | with open("comments.dot", "w") as f: 521 | f.write("digraph {\n") 522 | for comment in [c for c in comments if c.reply_to is None]: 523 | f.write(f" parent -> {comment.id}\n") 524 | for comment in comments: 525 | for reply in comment.replies: 526 | f.write(f" {comment.id} -> {reply.id}\n") 527 | f.write("}") 528 | ``` 529 | 530 | ```dot 531 | digraph { 532 | parent -> 157990848 533 | parent -> 157993838 534 | parent -> 157997294 535 | 157990848 -> 158014077 536 | 158014077 -> 158014816 537 | 158014816 -> 158093180 538 | 158093180 -> 158097024 539 | 157993838 -> 157998464 540 | 157993838 -> 158014126 541 | 157997294 -> 158014135 542 | 158014135 -> 158014470 543 | 158014135 -> 158030074 544 | 158014470 -> 158093185 545 | 158030074 -> 158093199 546 | } 547 | ``` 548 | 549 | comments tree graph 550 | 551 | _The graph above was generated with [quickchart.io](https://quickchart.io/documentation/graphviz-api/)_ 552 | 553 | ## BBCode Conversion 554 | 555 | Using the BBCode fields allows to convert between the raw HTMl recovered from Fur Affinity and BBCode tags that follow 556 | FA's guidelines. Conversion from HTML to BBCode covers all known tags and preserves all newlines and spacing. 557 | 558 | BBCode text can be converted to Fur Affinity's HTMl using the `faapi.parse.bbcode_to_html()` function. The majority of 559 | submissions can be converted back and forth between HTML and BBCode without any information loss, however, the parser 560 | rules are still a work in progress and there are many edge cases where unusual text and formatting cause the parser to 561 | generate incorrect HTML. 562 | 563 | ## Exceptions 564 | 565 | The following are the exceptions explicitly raised by the FAAPI functions. The exceptions deriving from `ParsingError` 566 | are chosen depending on the content of the page. Because Fur Affinity doesn't use HTTP status codes besides 404, the 567 | page is checked against a static list of known error messages/page titles in order to determine the specific error to be 568 | used. If no match is found, then the `ServerError` (if the page has the "Server Error" title) or the more 569 | general `NoticeMessage` exceptions are used instead. The actual error message parsed from the page is used as argument 570 | for the exceptions, so that it can be analysed when caught. 571 | 572 | * `DisallowedPath(Exception)` The path is not allowed by the robots.txt. 573 | * `Unauthorized(Exception)` The user is not logged-in. 574 | * `ParsingError(Exception)` An error occurred while parsing the page. 575 | * `NonePage(ParsingError)` The parsed page is `None`. 576 | * `NotFound(ParsingError)` The resource could not be found (general 404 page or non-existing submission, user, or 577 | journal). 578 | * `NoTitle(ParsingError)` The parsed paged is missing a title. 579 | * `DisabledAccount(ParsingError)` The resource belongs to a disabled account. 580 | * `ServerError(ParsingError)` The page contains a server error notice. 581 | * `NoticeMessage(ParsingError)` A notice of unknown type was found in the page. 582 | 583 | ## Beautiful Soup Warnings 584 | 585 | When parsing some pages or converting HTML to BBCode, the [Beautiful Soup](https://pypi.org/project/beautifulsoup4/) 586 | library may give some warnings, for example `MarkupResemblesLocatorWarning`. These warnings are left enabled for 587 | clarity, but can be disabled manually using the `warnings.filterwarnings` function. 588 | 589 | ## Contributing 590 | 591 | All contributions and suggestions are welcome! 592 | 593 | If you have suggestions for fixes or improvements, you can open an issue with your idea, see [#Issues](#issues) for 594 | details. 595 | 596 | ## Issues 597 | 598 | If any problem is encountered during usage of the program, an issue can be opened 599 | on [GitHub](https://github.com/FurryCoders/FAAPI/issues). 600 | 601 | Issues can also be used to suggest improvements and features. 602 | 603 | When opening an issue for a problem, please copy the error message and describe the operation in progress when the error 604 | occurred. 605 | -------------------------------------------------------------------------------- /faapi/__init__.py: -------------------------------------------------------------------------------- 1 | from .__version__ import __version__ 2 | from .base import FAAPI 3 | from .comment import Comment 4 | from .journal import Journal 5 | from .journal import JournalPartial 6 | from .submission import Submission 7 | from .submission import SubmissionPartial 8 | from .user import User 9 | from .user import UserPartial 10 | 11 | __all__ = [ 12 | "__version__", 13 | "FAAPI", 14 | "Comment", 15 | "Journal", 16 | "JournalPartial", 17 | "Submission", 18 | "SubmissionPartial", 19 | "User", 20 | "UserPartial", 21 | "exceptions", 22 | "connection", 23 | "parse" 24 | ] 25 | -------------------------------------------------------------------------------- /faapi/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = "3.11.9" 2 | -------------------------------------------------------------------------------- /faapi/base.py: -------------------------------------------------------------------------------- 1 | from http.cookiejar import CookieJar 2 | from time import sleep 3 | from time import time 4 | from typing import Any 5 | from typing import Optional 6 | from typing import Type 7 | from typing import Union 8 | from urllib.parse import quote 9 | from urllib.robotparser import RobotFileParser 10 | 11 | from requests import Session 12 | 13 | from .connection import CookieDict 14 | from .connection import Response 15 | from .connection import get 16 | from .connection import get_robots 17 | from .connection import join_url 18 | from .connection import make_session 19 | from .connection import stream_binary 20 | from .exceptions import DisallowedPath 21 | from .exceptions import Unauthorized 22 | from .journal import Journal 23 | from .journal import JournalPartial 24 | from .parse import BeautifulSoup 25 | from .parse import check_page_raise 26 | from .parse import parse_loggedin_user 27 | from .parse import parse_page 28 | from .parse import parse_submission_figures 29 | from .parse import parse_user_favorites 30 | from .parse import parse_user_journals 31 | from .parse import parse_user_submissions 32 | from .parse import parse_watchlist 33 | from .parse import username_url 34 | from .submission import Submission 35 | from .submission import SubmissionPartial 36 | from .user import User 37 | from .user import UserPartial 38 | 39 | 40 | # noinspection GrazieInspection 41 | class FAAPI: 42 | """ 43 | This class provides the methods to access and parse Fur Affinity pages and retrieve objects. 44 | """ 45 | 46 | def __init__(self, cookies: Union[list[CookieDict], CookieJar], session_class: Type[Session] = Session): 47 | """ 48 | :param cookies: The cookies for the session. 49 | :param session_class: The class to use for the session (defaults to requests.Session). 50 | """ 51 | 52 | self.session: Session = make_session(cookies, session_class) # Session used for get requests 53 | self.robots: RobotFileParser = get_robots(self.session) # robots.txt handler 54 | self.last_get: float = time() - self.crawl_delay # Time of last get (UNIX time) 55 | self.raise_for_unauthorized: bool = True # Control login checks 56 | self.timeout: Optional[int] = None # Timeout for requests 57 | 58 | @property 59 | def user_agent(self) -> str: 60 | """ 61 | The user agent of the session 62 | """ 63 | return ua.decode() if isinstance(ua := self.session.headers["User-Agent"], bytes) else ua 64 | 65 | @property 66 | def crawl_delay(self) -> float: 67 | """ 68 | Crawl delay from robots.txt 69 | """ 70 | return float(self.robots.crawl_delay(self.user_agent) or 1) 71 | 72 | def load_cookies(self, cookies: Union[list[CookieDict], CookieJar]): 73 | """ 74 | Load new cookies and create a new session. 75 | 76 | :param cookies: The cookies for the session. 77 | """ 78 | self.session = make_session(cookies, self.session.__class__) 79 | 80 | def handle_delay(self): 81 | """ 82 | Handles the crawl delay as set in the robots.txt 83 | """ 84 | if (d := time() - self.last_get) < self.crawl_delay: 85 | sleep(self.crawl_delay - d) 86 | self.last_get = time() 87 | 88 | def check_path(self, path: str, *, raise_for_disallowed: bool = False) -> bool: 89 | """ 90 | Checks whether a given path is allowed by the robots.txt. 91 | 92 | :param path: The path to check. 93 | :param raise_for_disallowed: Whether to raise an exception for a non-allowed path. 94 | :return: True if the path is allowed in the robots.txt, False otherwise. 95 | """ 96 | if not (allowed := self.robots.can_fetch(self.user_agent, "/" + path.lstrip("/"))) and raise_for_disallowed: 97 | raise DisallowedPath(f"Path {path!r} is not allowed by robots.txt") 98 | return allowed 99 | 100 | @property 101 | def connection_status(self) -> bool: 102 | """ 103 | Check the status of the connection to Fur Affinity. 104 | 105 | :return: True if it can connect, False otherwise. 106 | """ 107 | try: 108 | return self.get("/").ok 109 | except ConnectionError: 110 | return False 111 | 112 | @property 113 | def login_status(self) -> bool: 114 | """ 115 | Check the login status of the given cookies. 116 | 117 | :return: True if the cookies belong to a login session, False otherwise. 118 | """ 119 | return parse_loggedin_user(self.get_parsed("login", skip_auth_check=True)) is not None 120 | 121 | def get(self, path: str, **params: Union[str, bytes, int, float]) -> Response: 122 | """ 123 | Fetch a path with a GET request. 124 | The path is checked against the robots.txt before the request is made. 125 | The crawl-delay setting is enforced wth a wait time. 126 | 127 | :param path: The path to fetch. 128 | :param params: Query parameters for the request. 129 | :return: A Response object from the request. 130 | """ 131 | self.check_path(path, raise_for_disallowed=True) 132 | self.handle_delay() 133 | return get(self.session, path, timeout=self.timeout, params=params) 134 | 135 | def get_parsed(self, path: str, *, skip_page_check: bool = False, skip_auth_check: bool = False, 136 | **params: Union[str, bytes, int, float]) -> BeautifulSoup: 137 | """ 138 | Fetch a path with a GET request and parse it using BeautifulSoup. 139 | 140 | :param path: The path to fetch. 141 | :param skip_page_check: Whether to skip checking the parsed page for errors. 142 | :param skip_auth_check: Whether to skip checking the parsed page for login status. 143 | :param params: Query parameters for the request. 144 | :return: A BeautifulSoup object containing the parsed content of the request response. 145 | """ 146 | response: Response = self.get(path, **params) 147 | response.raise_for_status() 148 | page: BeautifulSoup = parse_page(response.text) 149 | if not skip_page_check: 150 | check_page_raise(page) 151 | if not skip_auth_check and self.raise_for_unauthorized and not parse_loggedin_user(page): 152 | raise Unauthorized("Not logged in") 153 | return page 154 | 155 | def me(self) -> Optional[User]: 156 | """ 157 | Fetch the information of the logged-in user. 158 | 159 | :return: A User object for the logged-in user, or None if the cookies are not from a login session. 160 | """ 161 | return self.user(user) if (user := parse_loggedin_user(self.get_parsed("login"))) else None 162 | 163 | def frontpage(self) -> list[SubmissionPartial]: 164 | """ 165 | Fetch latest submissions from Fur Affinity's front page 166 | 167 | :return: A list of SubmissionPartial objects 168 | """ 169 | page_parsed: BeautifulSoup = self.get_parsed("/") 170 | submissions: list[SubmissionPartial] = [SubmissionPartial(f) for f in parse_submission_figures(page_parsed)] 171 | return sorted({s for s in submissions}, reverse=True) 172 | 173 | def submission(self, submission_id: int, get_file: bool = False, *, chunk_size: Optional[int] = None 174 | ) -> tuple[Submission, Optional[bytes]]: 175 | """ 176 | Fetch a submission and, optionally, its file. 177 | 178 | :param submission_id: The ID of the submission. 179 | :param get_file: Whether to download the submission file. 180 | :param chunk_size: The chunk_size to be used for the download (does not override get_file). 181 | :return: A Submission object and a bytes object (if the submission file is downloaded). 182 | """ 183 | sub: Submission = Submission(self.get_parsed(join_url("view", int(submission_id)))) 184 | sub_file: Optional[bytes] = self.submission_file(sub, chunk_size=chunk_size) if get_file and sub.id else None 185 | return sub, sub_file 186 | 187 | def submission_file(self, submission: Submission, *, chunk_size: Optional[int] = None) -> bytes: 188 | """ 189 | Fetch a submission file from a Submission object. 190 | 191 | :param submission: A Submission object. 192 | :param chunk_size: The chunk_size to be used for the download. 193 | :return: The submission file as a bytes object. 194 | """ 195 | self.handle_delay() 196 | return stream_binary(self.session, submission.file_url, chunk_size=chunk_size, timeout=self.timeout) 197 | 198 | def journal(self, journal_id: int) -> Journal: 199 | """ 200 | Fetch a journal. 201 | 202 | :param journal_id: The ID of the journal. 203 | :return: A Journal object. 204 | """ 205 | return Journal(self.get_parsed(join_url("journal", int(journal_id)))) 206 | 207 | def user(self, user: str) -> User: 208 | """ 209 | Fetch a user. 210 | 211 | :param user: The name of the user (_ characters are allowed). 212 | :return: A User object. 213 | """ 214 | return User(self.get_parsed(join_url("user", quote(username_url(user))))) 215 | 216 | # noinspection DuplicatedCode 217 | def gallery(self, user: str, page: int = 1) -> tuple[list[SubmissionPartial], Optional[int]]: 218 | """ 219 | Fetch a user's gallery page. 220 | 221 | :param user: The name of the user (_ characters are allowed). 222 | :param page: The page to fetch. 223 | :return: A list of SubmissionPartial objects and the next page (None if it is the last). 224 | """ 225 | page_parsed: BeautifulSoup = self.get_parsed(join_url("gallery", quote(username_url(user)), int(page))) 226 | info_parsed: dict[str, Any] = parse_user_submissions(page_parsed) 227 | author: UserPartial = UserPartial() 228 | author.name, author.status, author.title, author.join_date, author.avatar_url = [ 229 | info_parsed["name"], info_parsed["status"], 230 | info_parsed["title"], info_parsed["join_date"], 231 | info_parsed["avatar_url"] 232 | ] 233 | for s in (submissions := list(map(SubmissionPartial, info_parsed["figures"]))): 234 | s.author = author 235 | return submissions, (page + 1) if not info_parsed["last_page"] else None 236 | 237 | # noinspection DuplicatedCode 238 | def scraps(self, user: str, page: int = 1) -> tuple[list[SubmissionPartial], Optional[int]]: 239 | """ 240 | Fetch a user's scraps page. 241 | 242 | :param user: The name of the user (_ characters are allowed). 243 | :param page: The page to fetch. 244 | :return: A list of SubmissionPartial objects and the next page (None if it is the last). 245 | """ 246 | page_parsed: BeautifulSoup = self.get_parsed(join_url("scraps", quote(username_url(user)), int(page))) 247 | info_parsed: dict[str, Any] = parse_user_submissions(page_parsed) 248 | author: UserPartial = UserPartial() 249 | author.name, author.status, author.title, author.join_date, author.avatar_url = [ 250 | info_parsed["name"], info_parsed["status"], 251 | info_parsed["title"], info_parsed["join_date"], 252 | info_parsed["avatar_url"] 253 | ] 254 | for s in (submissions := list(map(SubmissionPartial, info_parsed["figures"]))): 255 | s.author = author 256 | return submissions, (page + 1) if not info_parsed["last_page"] else None 257 | 258 | def favorites(self, user: str, page: str = "") -> tuple[list[SubmissionPartial], Optional[str]]: 259 | """ 260 | Fetch a user's favorites page. 261 | 262 | :param user: The name of the user (_ characters are allowed). 263 | :param page: The page to fetch. 264 | :return: A list of SubmissionPartial objects and the next page (None if it is the last). 265 | """ 266 | page_parsed: BeautifulSoup = self.get_parsed(join_url("favorites", quote(username_url(user)), page.strip())) 267 | info_parsed: dict[str, Any] = parse_user_favorites(page_parsed) 268 | submissions: list[SubmissionPartial] = list(map(SubmissionPartial, info_parsed["figures"])) 269 | return submissions, info_parsed["next_page"] or None 270 | 271 | def journals(self, user: str, page: int = 1) -> tuple[list[JournalPartial], Optional[int]]: 272 | """ 273 | Fetch a user's journals page. 274 | 275 | :param user: The name of the user (_ characters are allowed). 276 | :param page: The page to fetch. 277 | :return: A list of Journal objects and the next page (None if it is the last). 278 | """ 279 | page_parsed: BeautifulSoup = self.get_parsed(join_url("journals", quote(username_url(user)), int(page))) 280 | info_parsed: dict[str, Any] = parse_user_journals(page_parsed) 281 | author: UserPartial = UserPartial() 282 | author.name, author.status, author.title, author.join_date, author.avatar_url = [ 283 | info_parsed["name"], info_parsed["status"], 284 | info_parsed["title"], info_parsed["join_date"], 285 | info_parsed["avatar_url"] 286 | ] 287 | for j in (journals := list(map(JournalPartial, info_parsed["sections"]))): 288 | j.author = author 289 | return journals, (page + 1) if not info_parsed["last_page"] else None 290 | 291 | def watchlist_to(self, user: str, page: int = 1) -> tuple[list[UserPartial], Optional[int]]: 292 | """ 293 | Fetch a page from the list of users watching the user. 294 | 295 | :param user: The name of the user (_ characters are allowed). 296 | :param page: The page to fetch. 297 | :return: A list of UserPartial objects and the next page (None if it is the last). 298 | """ 299 | users: list[UserPartial] = [] 300 | us, np = parse_watchlist( 301 | self.get_parsed(join_url("watchlist", "to", quote(username_url(user)), page), skip_auth_check=True)) 302 | for s, u in us: 303 | _user: UserPartial = UserPartial() 304 | _user.name = u 305 | _user.status = s 306 | users.append(_user) 307 | return users, np if np and np != page else None 308 | 309 | def watchlist_by(self, user: str, page: int = 1) -> tuple[list[UserPartial], Optional[int]]: 310 | """ 311 | Fetch a page from the list of users watched by the user. 312 | :param user: The name of the user (_ characters are allowed). 313 | :param page: The page to fetch. 314 | :return: A list of UserPartial objects and the next page (None if it is the last). 315 | """ 316 | users: list[UserPartial] = [] 317 | us, np = parse_watchlist( 318 | self.get_parsed(join_url("watchlist", "by", quote(username_url(user)), page), skip_auth_check=True)) 319 | for s, u in us: 320 | _user: UserPartial = UserPartial() 321 | _user.name = u 322 | _user.status = s 323 | users.append(_user) 324 | return users, np if np and np != page else None 325 | -------------------------------------------------------------------------------- /faapi/comment.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from functools import reduce 3 | from typing import Optional 4 | from typing import Union 5 | 6 | from bs4.element import Tag 7 | 8 | import faapi 9 | from .exceptions import _raise_exception 10 | from .parse import html_to_bbcode 11 | from .parse import parse_comment_tag 12 | 13 | 14 | class Comment: 15 | """ 16 | Contains comment information and references to replies and parent objects. 17 | """ 18 | 19 | def __init__(self, tag: Optional[Tag] = None, 20 | parent: Optional[Union[faapi.submission.Submission, faapi.journal.Journal]] = None): 21 | """ 22 | :param tag: The comment tag from which to parse information 23 | :param parent: The parent object of the comment 24 | """ 25 | assert tag is None or isinstance(tag, Tag), _raise_exception(TypeError(f"tag must be {None} or {Tag.__name__}")) 26 | 27 | self.comment_tag: Optional[Tag] = tag 28 | 29 | self.id: int = 0 30 | self.author: faapi.user.UserPartial = faapi.user.UserPartial() 31 | self.date: datetime = datetime.fromtimestamp(0) 32 | self.text: str = "" 33 | self.replies: list[Comment] = [] 34 | self.reply_to: Optional[Union[Comment, int]] = None 35 | self.edited: bool = False 36 | self.hidden: bool = False 37 | self.parent: Optional[Union[faapi.submission.Submission, faapi.journal.Journal]] = parent 38 | 39 | self.parse() 40 | 41 | def __hash__(self) -> int: 42 | return hash((self.id, type(self.parent), self.parent)) 43 | 44 | def __eq__(self, other) -> bool: 45 | if isinstance(other, Comment): 46 | return other.id == self.id and self.parent == other.parent 47 | elif isinstance(other, int): 48 | return other == self.id 49 | return False 50 | 51 | def __gt__(self, other) -> bool: 52 | if isinstance(other, Comment): 53 | return self.id > other.id 54 | elif isinstance(other, int): 55 | return self.id > other 56 | return False 57 | 58 | def __ge__(self, other) -> bool: 59 | if isinstance(other, Comment): 60 | return self.id >= other.id 61 | elif isinstance(other, int): 62 | return self.id >= other 63 | return False 64 | 65 | def __lt__(self, other) -> bool: 66 | if isinstance(other, Comment): 67 | return self.id < other.id 68 | elif isinstance(other, int): 69 | return self.id < other 70 | return False 71 | 72 | def __le__(self, other) -> bool: 73 | if isinstance(other, Comment): 74 | return self.id <= other.id 75 | elif isinstance(other, int): 76 | return self.id <= other 77 | return False 78 | 79 | def __iter__(self): 80 | yield "id", self.id 81 | yield "author", dict(self.author) 82 | yield "date", self.date 83 | yield "text", self.text 84 | yield "replies", _sort_comments_dict(self.replies) 85 | yield "reply_to", dict(_remove_recursion(self.reply_to)) if isinstance(self.reply_to, Comment) \ 86 | else self.reply_to 87 | yield "edited", self.edited 88 | yield "hidden", self.hidden 89 | yield "parent", None if self.parent is None else dict(self.parent) 90 | 91 | def __repr__(self): 92 | return self.__str__() 93 | 94 | def __str__(self): 95 | return f"{self.id} {self.author}".rstrip() 96 | 97 | @property 98 | def text_bbcode(self) -> str: 99 | """ 100 | The comment text formatted to BBCode 101 | 102 | :return: BBCode text 103 | """ 104 | return html_to_bbcode(self.text) 105 | 106 | @property 107 | def url(self): 108 | """ 109 | Compose the full URL to the comment. 110 | 111 | :return: The URL to the comment. 112 | """ 113 | return "" if self.parent is None else f"{self.parent.url}#cid:{self.id}" 114 | 115 | def parse(self, comment_tag: Optional[Tag] = None): 116 | """ 117 | Parse a comment tag, overrides any information already present in the object. 118 | 119 | :param comment_tag: The comment tag from which to parse information 120 | """ 121 | assert comment_tag is None or isinstance(comment_tag, Tag), \ 122 | _raise_exception(TypeError(f"tag must be {None} or {Tag.__name__}")) 123 | 124 | self.comment_tag = comment_tag or self.comment_tag 125 | if self.comment_tag is None: 126 | return 127 | 128 | parsed: dict = parse_comment_tag(self.comment_tag) 129 | 130 | self.id = parsed["id"] 131 | self.date = datetime.fromtimestamp(parsed["timestamp"]) 132 | self.author = faapi.user.UserPartial() 133 | self.author.name = parsed["user_name"] 134 | self.author.display_name = parsed["user_display_name"] 135 | self.author.title = parsed["user_title"] 136 | self.author.avatar_url = parsed["avatar_url"] 137 | self.text = parsed["text"] 138 | self.replies = [] 139 | self.reply_to = parsed["parent"] 140 | self.edited = parsed["edited"] 141 | self.hidden = parsed["hidden"] 142 | 143 | 144 | def sort_comments(comments: list[Comment]) -> list[Comment]: 145 | """ 146 | Sort a list of comments into a tree structure. Replies are overwritten. 147 | 148 | :param comments: A list of Comment objects (flat or tree-structured) 149 | :return: A tree-structured list of comments with replies 150 | """ 151 | for comment in (comments := flatten_comments(comments)): 152 | comment.replies = [_set_reply_to(c, comment) for c in comments if c.reply_to == comment] 153 | return [c for c in comments if c.reply_to is None] 154 | 155 | 156 | def flatten_comments(comments: list[Comment]) -> list[Comment]: 157 | """ 158 | Flattens a list of comments. Replies are not modified. 159 | 160 | :param comments: A list of Comment objects (flat or tree-structured) 161 | :return: A flat date-sorted (ascending) list of comments 162 | """ 163 | replies: list[Comment] = comments 164 | comments_flat: list[Comment] = [] 165 | 166 | while replies: 167 | comments_flat.extend(replies) 168 | replies = [r for c in replies for r in c.replies] 169 | 170 | return sorted(set(comments_flat)) 171 | 172 | 173 | def _set_reply_to(comment: Comment, reply_to: Union[Comment, int]) -> Comment: 174 | comment.reply_to = reply_to 175 | return comment 176 | 177 | 178 | def _sort_comments_dict(comments: list[Comment]) -> list[dict]: 179 | comments_flat = flatten_comments(comments) 180 | comments_levels: list[list[Comment]] = [[c for c in comments_flat if not c.reply_to]] 181 | 182 | comments_flat = [c for c in comments_flat if c not in comments_levels[-1]] 183 | 184 | while comments_flat: 185 | comments_levels.append([c for c in comments_flat if c.reply_to in comments_levels[-1]]) 186 | comments_flat = [c for c in comments_flat if c not in comments_levels[-1]] 187 | 188 | comments_levels.reverse() 189 | 190 | comments_dicts: list[dict] = reduce( 191 | lambda prev, curr: [ 192 | dict(_remove_recursion(c)) | {"replies": [cd for cd in prev if cd["reply_to"] == c]} 193 | for c in curr 194 | ], 195 | comments_levels, 196 | [] 197 | ) 198 | return comments_dicts 199 | 200 | 201 | def _remove_recursion(comment: Comment) -> Comment: 202 | comment_new: Comment = Comment() 203 | 204 | comment_new.comment_tag = comment.comment_tag 205 | comment_new.id = comment.id 206 | comment_new.author = comment.author 207 | comment_new.date = comment.date 208 | comment_new.text = comment.text 209 | comment_new.replies = [] 210 | comment_new.reply_to = comment.reply_to.id if isinstance(comment.reply_to, Comment) else comment.reply_to 211 | comment_new.edited = comment.edited 212 | comment_new.hidden = comment.hidden 213 | comment_new.parent = None 214 | 215 | return comment_new 216 | -------------------------------------------------------------------------------- /faapi/connection.py: -------------------------------------------------------------------------------- 1 | from http.client import IncompleteRead 2 | from http.cookiejar import Cookie 3 | from http.cookiejar import CookieJar 4 | from platform import python_version 5 | from platform import uname 6 | from re import compile as re_compile 7 | from typing import Optional 8 | from typing import Type 9 | from typing import TypedDict 10 | from typing import Union 11 | from urllib.robotparser import RobotFileParser 12 | 13 | from requests import Response 14 | from requests import Session 15 | 16 | from .__version__ import __version__ 17 | from .exceptions import Unauthorized 18 | from .exceptions import _raise_exception 19 | 20 | root: str = "https://www.furaffinity.net" 21 | 22 | 23 | class CookieDict(TypedDict): 24 | name: str 25 | value: str 26 | 27 | 28 | def join_url(*url_comps: Union[str, int]) -> str: 29 | return "/".join(map(lambda e: str(e).strip(" /"), url_comps)) 30 | 31 | 32 | def make_session(cookies: Union[list[CookieDict], CookieJar], cls: Type[Session]) -> Session: 33 | assert len(cookies), _raise_exception(Unauthorized("No cookies for session")) 34 | session: Session = cls() 35 | session.headers["User-Agent"] = f"faapi/{__version__} Python/{python_version()} {(u := uname()).system}/{u.release}" 36 | 37 | for cookie in cookies: 38 | if isinstance(cookie, Cookie): 39 | session.cookies.set(cookie.name, cookie.value or "") 40 | else: 41 | session.cookies.set(cookie["name"], cookie["value"]) 42 | 43 | return session 44 | 45 | 46 | def get_robots(session: Session) -> RobotFileParser: 47 | robots: RobotFileParser = RobotFileParser(url := join_url(root, "robots.txt")) 48 | robots.parse(filter(re_compile(r"^[^#\s].+").match, map(str.strip, session.get(url).text.splitlines()))) 49 | return robots 50 | 51 | 52 | def get(session: Session, path: str, *, timeout: Optional[int] = None, 53 | params: Optional[dict[str, Union[str, bytes, int, float]]] = None) -> Response: 54 | return session.get(join_url(root, path), params=params, timeout=timeout) 55 | 56 | 57 | def stream_binary(session: Session, url: str, *, chunk_size: Optional[int] = None, 58 | timeout: Optional[int] = None) -> bytes: 59 | stream: Response = session.get(url, stream=True, timeout=timeout) 60 | stream.raise_for_status() 61 | 62 | file_binary: bytes = bytes().join(stream.iter_content(chunk_size)) 63 | 64 | if (length := int(stream.headers.get("Content-Length", 0))) > 0 and length != len(file_binary): 65 | raise IncompleteRead(file_binary, length - len(file_binary)) 66 | 67 | return file_binary 68 | -------------------------------------------------------------------------------- /faapi/exceptions.py: -------------------------------------------------------------------------------- 1 | class DisallowedPath(Exception): 2 | """ 3 | The path is not allowed by the robots.txt. 4 | """ 5 | 6 | 7 | class Unauthorized(Exception): 8 | """ 9 | The user is not logged-in. 10 | """ 11 | 12 | 13 | class ParsingError(Exception): 14 | """ 15 | An error occurred while parsing the page. 16 | """ 17 | 18 | 19 | class NonePage(ParsingError): 20 | """ 21 | The parsed page is None. 22 | """ 23 | 24 | 25 | class NoTitle(ParsingError): 26 | """ 27 | The parsed paged is missing a title. 28 | """ 29 | 30 | 31 | class NotFound(ParsingError): 32 | """ 33 | The resource could not be found. 34 | """ 35 | 36 | 37 | class DisabledAccount(ParsingError): 38 | """ 39 | The resource belongs to a disabled account. 40 | """ 41 | 42 | 43 | class ServerError(ParsingError): 44 | """ 45 | The page contains a server error notice. 46 | """ 47 | 48 | 49 | class NoticeMessage(ParsingError): 50 | """ 51 | A notice of unknown type was found in the page. 52 | """ 53 | 54 | 55 | def _raise_exception(err: BaseException): 56 | raise err 57 | -------------------------------------------------------------------------------- /faapi/journal.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from datetime import datetime 3 | from typing import Optional 4 | from typing import Union 5 | 6 | from .connection import join_url 7 | from .connection import root 8 | from .exceptions import _raise_exception 9 | from .parse import BeautifulSoup 10 | from .parse import check_page_raise 11 | from .parse import html_to_bbcode 12 | from .parse import parse_comments 13 | from .parse import parse_journal_page 14 | from .parse import parse_journal_section 15 | from .parse import Tag 16 | from .user import UserPartial 17 | 18 | 19 | class JournalStats(namedtuple("JournalStats", ["comments"])): 20 | """ 21 | This object contains the journal's statistics: 22 | * comments 23 | """ 24 | 25 | 26 | class JournalBase: 27 | def __init__(self): 28 | self.id: int = 0 29 | self.title: str = "" 30 | self.date: datetime = datetime.fromtimestamp(0) 31 | self.author: UserPartial = UserPartial() 32 | self.stats: JournalStats = JournalStats(0) 33 | self.content: str = "" 34 | self.mentions: list[str] = [] 35 | 36 | def __hash__(self) -> int: 37 | return hash(self.id) 38 | 39 | def __eq__(self, other) -> bool: 40 | if isinstance(other, JournalBase): 41 | return other.id == self.id 42 | elif isinstance(other, int): 43 | return other == self.id 44 | return False 45 | 46 | def __gt__(self, other) -> bool: 47 | if isinstance(other, JournalBase): 48 | return self.id > other.id 49 | elif isinstance(other, int): 50 | return self.id > other 51 | return False 52 | 53 | def __ge__(self, other) -> bool: 54 | if isinstance(other, JournalBase): 55 | return self.id >= other.id 56 | elif isinstance(other, int): 57 | return self.id >= other 58 | return False 59 | 60 | def __lt__(self, other) -> bool: 61 | if isinstance(other, JournalBase): 62 | return self.id < other.id 63 | elif isinstance(other, int): 64 | return self.id < other 65 | return False 66 | 67 | def __le__(self, other) -> bool: 68 | if isinstance(other, JournalBase): 69 | return self.id <= other.id 70 | elif isinstance(other, int): 71 | return self.id <= other 72 | return False 73 | 74 | def __iter__(self): 75 | yield "id", self.id 76 | yield "title", self.title 77 | yield "date", self.date 78 | yield "author", dict(self.author) 79 | yield "stats", self.stats._asdict() 80 | yield "content", self.content 81 | yield "mentions", self.mentions 82 | 83 | def __repr__(self): 84 | return self.__str__() 85 | 86 | def __str__(self): 87 | return f"{self.id} {self.author} {self.title}" 88 | 89 | @property 90 | def content_bbcode(self) -> str: 91 | """ 92 | The journal content formatted to BBCode 93 | 94 | :return: BBCode content 95 | """ 96 | return html_to_bbcode(self.content) 97 | 98 | @property 99 | def url(self) -> str: 100 | """ 101 | Compose the full URL to the journal. 102 | 103 | :return: The URL to the journal. 104 | """ 105 | return join_url(root, "journal", self.id) 106 | 107 | 108 | class JournalPartial(JournalBase): 109 | """ 110 | Contains partial journal information gathered from journals pages. 111 | """ 112 | 113 | def __init__(self, journal_tag: Optional[Tag] = None): 114 | """ 115 | :param journal_tag: The tag from which to parse the journal. 116 | """ 117 | assert journal_tag is None or isinstance(journal_tag, Tag), \ 118 | _raise_exception(TypeError(f"journal_item must be {None} or {Tag.__name__}")) 119 | self.journal_tag: Optional[Tag] = journal_tag 120 | 121 | super(JournalPartial, self).__init__() 122 | 123 | self.parse() 124 | 125 | def parse(self, journal_tag: Optional[Union[Tag, BeautifulSoup]] = None): 126 | """ 127 | Parse a journal tag, overrides any information already present in the object. 128 | 129 | :param journal_tag: The tag from which to parse the journal. 130 | """ 131 | assert journal_tag is None or isinstance(journal_tag, BeautifulSoup), \ 132 | _raise_exception(TypeError(f"journal_item must be {None} or {BeautifulSoup.__name__}")) 133 | 134 | self.journal_tag = journal_tag or self.journal_tag 135 | if self.journal_tag is None: 136 | return 137 | 138 | parsed: dict = parse_journal_section(self.journal_tag) 139 | 140 | # noinspection DuplicatedCode 141 | self.id = parsed["id"] 142 | self.title = parsed["title"] 143 | self.author.name = parsed.get("user_name", "") 144 | self.author.display_name = parsed.get("user_display_name", "") 145 | self.author.status = parsed.get("user_status", "") 146 | self.author.title = parsed.get("user_title", "") 147 | self.author.join_date = parsed.get("user_join_date", "") 148 | self.author.avatar_url = parsed.get("avatar_url", "") 149 | self.stats = JournalStats(parsed["comments"]) 150 | self.date = parsed["date"] 151 | self.content = parsed["content"] 152 | self.mentions = parsed["mentions"] 153 | 154 | 155 | class Journal(JournalBase): 156 | """ 157 | Contains complete journal information gathered from journal pages, including comments. 158 | """ 159 | 160 | def __init__(self, journal_page: Optional[BeautifulSoup] = None): 161 | """ 162 | :param journal_page: The page from which to parse the journal. 163 | """ 164 | assert journal_page is None or isinstance(journal_page, BeautifulSoup), \ 165 | _raise_exception(TypeError(f"journal_item must be {None} or {BeautifulSoup.__name__}")) 166 | self.journal_page: Optional[BeautifulSoup] = journal_page 167 | 168 | super(Journal, self).__init__() 169 | 170 | self.header: str = "" 171 | self.footer: str = "" 172 | from .comment import Comment 173 | self.comments: list[Comment] = [] 174 | 175 | self.parse() 176 | 177 | def __iter__(self): 178 | for k, v in super(Journal, self).__iter__(): 179 | yield k, v 180 | yield "header", self.header 181 | yield "footer", self.footer 182 | from .comment import _sort_comments_dict 183 | yield "comments", _sort_comments_dict(self.comments) 184 | 185 | @property 186 | def header_bbcode(self) -> str: 187 | """ 188 | The journal header formatted to BBCode 189 | 190 | :return: BBCode header 191 | """ 192 | return html_to_bbcode(self.header) 193 | 194 | @property 195 | def footer_bbcode(self) -> str: 196 | """ 197 | The journal footer formatted to BBCode 198 | 199 | :return: BBCode footer 200 | """ 201 | return html_to_bbcode(self.footer) 202 | 203 | def parse(self, journal_page: Optional[Union[Tag, BeautifulSoup]] = None): 204 | """ 205 | Parse a journal page, overrides any information already present in the object. 206 | 207 | :param journal_page: The page from which to parse the journal. 208 | """ 209 | assert journal_page is None or isinstance(journal_page, BeautifulSoup), \ 210 | _raise_exception(TypeError(f"journal_item must be {None} or {BeautifulSoup.__name__}")) 211 | 212 | self.journal_page = journal_page or self.journal_page 213 | if self.journal_page is None: 214 | return 215 | 216 | check_page_raise(self.journal_page) 217 | 218 | parsed: dict = parse_journal_page(self.journal_page) 219 | 220 | # noinspection DuplicatedCode 221 | self.id = parsed["id"] 222 | self.title = parsed["title"] 223 | self.author.name = parsed["user_info"]["name"] 224 | self.author.display_name = parsed["user_info"]["display_name"] 225 | self.author.status = parsed["user_info"]["status"] 226 | self.author.title = parsed["user_info"]["title"] 227 | self.author.join_date = parsed["user_info"]["join_date"] 228 | self.author.avatar_url = parsed["user_info"]["avatar_url"] 229 | self.stats = JournalStats(parsed["comments"]) 230 | self.date = parsed["date"] 231 | self.content = parsed["content"] 232 | self.header = parsed["header"] 233 | self.footer = parsed["footer"] 234 | self.mentions = parsed["mentions"] 235 | from .comment import sort_comments, Comment 236 | self.comments = sort_comments([Comment(t, self) for t in parse_comments(self.journal_page)]) 237 | -------------------------------------------------------------------------------- /faapi/parse.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from re import compile as re_compile 3 | from re import Match 4 | from re import match 5 | from re import MULTILINE 6 | from re import Pattern 7 | from re import search 8 | from re import sub 9 | from typing import Any 10 | from typing import Optional 11 | from typing import Union 12 | from urllib.parse import quote 13 | 14 | from bbcode import Parser as BBCodeParser # type:ignore 15 | from bs4 import BeautifulSoup 16 | from bs4.element import NavigableString 17 | from bs4.element import Tag 18 | from dateutil.parser import parse as parse_date 19 | from urllib3.util import parse_url 20 | 21 | from .connection import root 22 | from .exceptions import _raise_exception 23 | from .exceptions import DisabledAccount 24 | from .exceptions import NonePage 25 | from .exceptions import NotFound 26 | from .exceptions import NoticeMessage 27 | from .exceptions import NoTitle 28 | from .exceptions import ParsingError 29 | from .exceptions import ServerError 30 | 31 | relative_url: Pattern = re_compile(r"^(?:https?://(?:www\.)?furaffinity\.net)?(.*)") 32 | mentions_regexp: Pattern = re_compile(r"^(?:(?:https?://)?(?:www\.)?furaffinity\.net)?/user/([^/#]+).*$") 33 | url_username_regexp: Pattern = re_compile(r"/(?:user|gallery|scraps|favorites|journals|commissions)/([^/]+)(/.*)?") 34 | watchlist_next_regexp: Pattern = re_compile(r"/watchlist/(?:by|to)/[^/]+/(\d+)") 35 | not_found_messages: tuple[str, ...] = ("not in our database", "cannot be found", "could not be found", "user not found") 36 | deactivated_messages: tuple[str, ...] = ("deactivated", "pending deletion") 37 | smilie_icons: tuple[str, ...] = ( 38 | "crying", "derp", "dunno", "embarrassed", "evil", "gift", "huh", "lmao", "love", "nerd", "note", "oooh", "pleased", 39 | "rollingeyes", "sad", "sarcastic", "serious", "sleepy", "smile", "teeth", "tongue", "veryhappy", "wink", "yelling", 40 | "zipped", "angel", "badhairday", "cd", "coffee", "cool", "whatever" 41 | ) 42 | 43 | 44 | def get_attr(tag: Tag, attr: str) -> str: 45 | return value[0] if isinstance(value := tag.attrs[attr], list) else value 46 | 47 | 48 | def parse_page(text: str) -> BeautifulSoup: 49 | return BeautifulSoup(text, "lxml") 50 | 51 | 52 | def check_page_raise(page: BeautifulSoup) -> None: 53 | if page is None: 54 | raise NonePage 55 | elif not (title := page.title.text.lower() if page.title else ""): 56 | raise NoTitle 57 | elif title.startswith("account disabled"): 58 | raise DisabledAccount 59 | elif title == "system error": 60 | error_text: str = error.text if (error := page.select_one("div.section-body")) else "" 61 | if any(m in error_text.lower() for m in not_found_messages): 62 | raise NotFound 63 | else: 64 | raise ServerError(*filter(bool, map(str.strip, error_text.splitlines()))) 65 | elif notice := page.select_one("section.notice-message"): 66 | notice_text: str = notice.text 67 | if any(m in notice_text.lower() for m in deactivated_messages): 68 | raise DisabledAccount 69 | elif any(m in notice_text.lower() for m in not_found_messages): 70 | raise NotFound 71 | else: 72 | raise NoticeMessage(*filter(bool, map(str.strip, notice_text.splitlines()))) 73 | 74 | 75 | def username_url(username: str) -> str: 76 | return sub(r"[^a-z\d.~`\[\]-]", "", username.lower()) 77 | 78 | 79 | def inner_html(tag: Tag) -> str: 80 | return tag.decode_contents() 81 | 82 | 83 | def clean_html(html: str) -> str: 84 | return html.strip().replace("\r", "") 85 | 86 | 87 | def html_to_bbcode(html: str) -> str: 88 | body: Optional[Tag] = parse_page(f"{html}").select_one("html > body") 89 | if not body: 90 | return "" 91 | 92 | for linkusername in body.select("a.linkusername"): 93 | linkusername.replaceWith(f"@{linkusername.text.strip()}") 94 | 95 | for iconusername in body.select("a.iconusername,a.usernameicon"): 96 | username: str = iconusername.text.strip() or iconusername.attrs.get('href', '').strip('/').split('/')[-1] 97 | if icon := iconusername.select_one("img"): 98 | username = icon.attrs.get('alt', '').strip() or username 99 | iconusername.replaceWith(f":icon{username}:" if iconusername.text.strip() else f":{username}icon:") 100 | 101 | for img in body.select("img"): 102 | img.replaceWith(f"[img={img.attrs.get('src', '')}/]") 103 | 104 | for hr in body.select("hr"): 105 | hr.replaceWith("-----") 106 | 107 | for smilie in body.select("i.smilie"): 108 | smilie_class: list[str] = list(smilie.attrs.get("class", [])) 109 | smilie_name: str = next(filter(lambda c: c not in ["smilie", ""], smilie_class), "") 110 | smilie.replaceWith(f":{smilie_name or 'smilie'}:") 111 | 112 | for span in body.select("span.bbcode[style*=color]"): 113 | if m := match(r".*color: ?([^ ;]+).*", span.attrs["style"]): 114 | span.replaceWith(f"[color={m[1]}]", *span.children, "[/color]") 115 | else: 116 | span.replaceWith(*span.children) 117 | 118 | for nav_link in body.select("span.parsed_nav_links"): 119 | a_tags = nav_link.select("a") 120 | a_prev_tag: Optional[Tag] = next((a for a in a_tags if "prev" in a.text.lower()), None) 121 | a_frst_tag: Optional[Tag] = next((a for a in a_tags if "first" in a.text.lower()), None) 122 | a_next_tag: Optional[Tag] = next((a for a in a_tags if "next" in a.text.lower()), None) 123 | a_prev = a_prev_tag.attrs.get("href", "").strip("/").split("/")[-1] if a_prev_tag else "" 124 | a_frst = a_frst_tag.attrs.get("href", "").strip("/").split("/")[-1] if a_frst_tag else "" 125 | a_next = a_next_tag.attrs.get("href", "").strip("/").split("/")[-1] if a_next_tag else "" 126 | nav_link.replaceWith(f"[{a_prev or '-'},{a_frst or '-'},{a_next or '-'}]") 127 | 128 | for a in body.select("a.auto_link_shortened:not(.named_url), a.auto_link:not(.named_url)"): 129 | a.replaceWith(a.attrs.get('href', '')) 130 | 131 | for a in body.select("a"): 132 | href_match: Optional[Match] = relative_url.match(a.attrs.get('href', '')) 133 | a.replaceWith( 134 | f"[url={href_match[1] if href_match else a.attrs.get('href', '')}]", 135 | *a.children, 136 | "[/url]" 137 | ) 138 | 139 | for yt in body.select("iframe[src*='youtube.com/embed']"): 140 | yt.replaceWith(f"[yt]https://youtube.com/embed/{yt.attrs.get('src', '').strip('/').split('/')}[/yt]") 141 | 142 | for quote_name_tag in body.select("span.bbcode.bbcode_quote > span.bbcode_quote_name"): 143 | quote_author: str = quote_name_tag.text.strip().removesuffix('wrote:').strip() 144 | quote_tag = quote_name_tag.parent 145 | if not quote_tag: 146 | quote_name_tag.replaceWith(quote_author) 147 | continue 148 | quote_name_tag.decompose() 149 | quote_tag.replaceWith( 150 | f"[quote{('=' + quote_author) if quote_author else ''}]", 151 | *quote_tag.children, 152 | "[/quote]" 153 | ) 154 | 155 | for quote_tag in body.select("span.bbcode.bbcode_quote"): 156 | quote_tag.replaceWith("[quote]", *quote_tag.children, "[/quote]") 157 | 158 | for [selector, bbcode_tag] in ( 159 | ("i", "i"), 160 | ("b", "b"), 161 | ("strong", "b"), 162 | ("u", "u"), 163 | ("s", "s"), 164 | ("code.bbcode_left", "left"), 165 | ("code.bbcode_center", "center"), 166 | ("code.bbcode_right", "right"), 167 | ("span.bbcode_spoiler", "spoiler"), 168 | ("sub", "sub"), 169 | ("sup", "sup"), 170 | ("h1", "h1"), 171 | ("h2", "h2"), 172 | ("h3", "h3"), 173 | ("h4", "h4"), 174 | ("h5", "h5"), 175 | ("h6", "h6"), 176 | ): 177 | for tag in body.select(selector): 178 | tag.replaceWith(f"[{bbcode_tag}]", *tag.children, f"[/{bbcode_tag}]") 179 | 180 | for br in body.select("br"): 181 | br.replaceWith("\n") 182 | 183 | for p in body.select("p"): 184 | p.replaceWith(*p.children) 185 | 186 | for tag in body.select("*"): 187 | if not (div_class := tag.attrs.get("class", None)): 188 | tag.replaceWith(f"[tag={tag.name}]", *tag.children, "[/tag.{tag.name}]") 189 | else: 190 | tag.replaceWith( 191 | f"[tag={tag.name}.{' '.join(div_class) if isinstance(div_class, list) else div_class}]", 192 | *tag.children, 193 | "[/tag]" 194 | ) 195 | 196 | bbcode: str = body.decode_contents() 197 | 198 | bbcode = sub(" *$", "", bbcode, flags=MULTILINE) 199 | bbcode = sub("^ *", "", bbcode, flags=MULTILINE) 200 | 201 | for char, substitution in ( 202 | ("©", "(c)"), 203 | ("™", "(tm)"), 204 | ("®", "(r)"), 205 | ("©", "(c)"), 206 | ("®", "(tm)"), 207 | ("™", "(r)"), 208 | ("<", "<"), 209 | (">", ">"), 210 | ("&", "&"), 211 | ): 212 | bbcode = bbcode.replace(char, substitution) 213 | 214 | return bbcode.strip(" ") 215 | 216 | 217 | def bbcode_to_html(bbcode: str) -> str: 218 | def render_url(_tag_name, value: str, options: dict[str, str], _parent, _context) -> str: 219 | return f'{value}' 220 | 221 | def render_color(_tag_name, value, options, _parent, _context) -> str: 222 | return f'{value}' 223 | 224 | def render_quote(_tag_name, value: str, options: dict[str, str], _parent, _context) -> str: 225 | author: str = options.get("quote", "") 226 | author = f"{author} wrote:" if author else "" 227 | return f'{author}{value}' 228 | 229 | def render_tags(tag_name: str, value: str, options: dict[str, str], _parent, _context) -> str: 230 | if not options and tag_name.islower(): 231 | return f"<{tag_name}>{value}" 232 | return f"[{tag_name} {' '.join(f'{k}={v}' if v else k for k, v in options.items())}]{value}" 233 | 234 | def render_tag(_tag_name, value: str, options: dict[str, str], _parent, _context) -> str: 235 | name, *classes = options["tag"].split(".") 236 | return f'<{name} class="{" ".join(classes)}">{value}' 237 | 238 | def parse_extra(page: BeautifulSoup) -> BeautifulSoup: 239 | child: NavigableString 240 | child_new: Tag 241 | has_match: bool = True 242 | while has_match: 243 | has_match = False 244 | for child in [c for e in page.select("*:not(a)") for c in e.children if isinstance(c, NavigableString)]: 245 | if m_ := match(rf"(.*):({'|'.join(smilie_icons)}):(.*)", child): 246 | has_match = True 247 | child_new = Tag(name="i", attrs={"class": f"smilie {m_[2]}"}) 248 | child.replaceWith(m_[1], child_new, m_[3]) 249 | elif m_ := match(r"(.*)(?:@([a-zA-Z0-9.~_-]+)|:link([a-zA-Z0-9.~_-]+):)(.*)", child): 250 | has_match = True 251 | child_new = Tag(name="a", attrs={"class": "linkusername", "href": f"/user/{m_[2] or m_[3]}"}) 252 | child_new.insert(0, m_[2] or m_[3]) 253 | child.replaceWith(m_[1], child_new, m_[4]) 254 | elif m_ := match(r"(.*):(?:icon([a-zA-Z0-9.~_-]+)|([a-zA-Z0-9.~_-]+)icon):(.*)", child): 255 | has_match = True 256 | user: str = m_[2] or m_[3] or "" 257 | child_new = Tag(name="a", attrs={"class": "iconusername", "href": f"/user/{user}"}) 258 | child_new_img: Tag = Tag( 259 | name="img", 260 | attrs={ 261 | "alt": user, "title": user, 262 | "src": f"//a.furaffinity.net/{datetime.now():%Y%m%d}/{username_url(user)}.gif" 263 | } 264 | ) 265 | child_new.insert(0, child_new_img) 266 | if m_[2]: 267 | child_new.insert(1, f"\xA0{m_[2]}") 268 | child.replaceWith(m_[1], child_new, m_[4]) 269 | elif m_ := match(r"(.*)\[ *(?:(\d+)|-)?, *(?:(\d+)|-)? *, *(?:(\d+)|-)? *](.*)", child): 270 | has_match = True 271 | child_new = Tag(name="span", attrs={"class": "parsed_nav_links"}) 272 | child_new_1: Union[Tag, str] = "<<<\xA0PREV" 273 | child_new_2: Union[Tag, str] = "FIRST" 274 | child_new_3: Union[Tag, str] = "NEXT\xA0>>>" 275 | if m_[2]: 276 | child_new_1 = Tag(name="a", attrs={"href": f"/view/{m_[2]}"}) 277 | child_new_1.insert(0, "<<<\xA0PREV") 278 | if m_[3]: 279 | child_new_2 = Tag(name="a", attrs={"href": f"/view/{m_[3]}"}) 280 | child_new_2.insert(0, "<<<\xA0FIRST") 281 | if m_[4]: 282 | child_new_3 = Tag(name="a", attrs={"href": f"/view/{m_[4]}"}) 283 | child_new_3.insert(0, "NEXT\xA0>>>") 284 | child_new.insert(0, child_new_1) 285 | child_new.insert(1, "\xA0|\xA0") 286 | child_new.insert(2, child_new_2) 287 | child_new.insert(3, "\xA0|\xA0") 288 | child_new.insert(4, child_new_3) 289 | child.replaceWith(m_[1], child_new, m_[5]) 290 | 291 | for p in page.select("p"): 292 | p.replaceWith(*p.children) 293 | 294 | return page 295 | 296 | parser: BBCodeParser = BBCodeParser(install_defaults=False, replace_links=False, replace_cosmetic=True) 297 | parser.REPLACE_ESCAPE = ( 298 | ("&", "&"), 299 | ("<", "<"), 300 | (">", ">"), 301 | ) 302 | parser.REPLACE_COSMETIC = ( 303 | ("(c)", "©"), 304 | ("(r)", "®"), 305 | ("(tm)", "™"), 306 | ) 307 | 308 | for tag in ("i", "b", "u", "s", "sub", "sup", "h1", "h2", "h3", "h3", "h4", "h5", "h6"): 309 | parser.add_formatter(tag, render_tags) 310 | for align in ("left", "center", "right"): 311 | parser.add_simple_formatter(align, f'%(value)s') 312 | 313 | parser.add_simple_formatter("spoiler", '%(value)s') 314 | parser.add_simple_formatter("url", '%(value)s') 315 | parser.add_simple_formatter( 316 | "iconusername", 317 | f'' 318 | f'%(value)s' 319 | f'%(value)s' 320 | f'' 321 | ) 322 | parser.add_simple_formatter( 323 | "usernameicon", 324 | f'' 325 | f'%(value)s' 326 | f'' 327 | ) 328 | parser.add_simple_formatter("linkusername", '%(value)s') 329 | parser.add_simple_formatter("hr", "
", standalone=True) 330 | 331 | parser.add_formatter("url", render_url) 332 | parser.add_formatter("color", render_color) 333 | parser.add_formatter("quote", render_quote) 334 | parser.add_formatter("tag", render_tag) 335 | 336 | bbcode = sub(r"-{5,}", "[hr]", bbcode) 337 | 338 | result_page: BeautifulSoup = parse_extra(parse_page(parser.format(bbcode))) 339 | return (result_page.select_one("html > body") or result_page).decode_contents() 340 | 341 | 342 | def parse_username_from_url(url: str) -> Optional[str]: 343 | return m[1] if (m := url_username_regexp.match(parse_url(url).path or "")) else None 344 | 345 | 346 | def parse_mentions(tag: Tag) -> list[str]: 347 | mentions: list[str] = [username_url(m[1]) for a in tag.select("a") 348 | if (m := match(mentions_regexp, get_attr(a, "href")))] 349 | return sorted(set([m for m in mentions if m]), key=mentions.index) 350 | 351 | 352 | def parse_loggedin_user(page: BeautifulSoup) -> Optional[str]: 353 | return get_attr(avatar, "alt") if (avatar := page.select_one("img.loggedin_user_avatar")) else None 354 | 355 | 356 | def parse_journal_section(section_tag: Tag) -> dict[str, Any]: 357 | id_: int = int(section_tag.attrs.get("id", "00000")[4:]) 358 | tag_title: Optional[Tag] = section_tag.select_one("h2") 359 | tag_date: Optional[Tag] = section_tag.select_one("div.section-header span.popup_date") 360 | tag_content: Optional[Tag] = section_tag.select_one("div.journal-body") 361 | tag_comments: Optional[Tag] = section_tag.select_one("div.section-footer > a > span") 362 | 363 | assert id_ != 0, _raise_exception(ParsingError("Missing ID")) 364 | assert tag_title is not None, _raise_exception(ParsingError("Missing title tag")) 365 | assert tag_date is not None, _raise_exception(ParsingError("Missing date tag")) 366 | assert tag_content is not None, _raise_exception(ParsingError("Missing content tag")) 367 | assert tag_comments is not None, _raise_exception(ParsingError("Missing comments tag")) 368 | 369 | # noinspection DuplicatedCode 370 | title: str = tag_title.text.strip() 371 | date: datetime = parse_date( 372 | get_attr(tag_date, "title").strip() 373 | if match(r"^[A-Za-z]+ \d+,.*$", get_attr(tag_date, "title")) 374 | else tag_date.text.strip() 375 | ) 376 | content: str = clean_html(inner_html(tag_content)) 377 | mentions: list[str] = parse_mentions(tag_content) 378 | comments: int = int(tag_comments.text.strip()) 379 | 380 | return { 381 | "id": id_, 382 | "title": title, 383 | "date": date, 384 | "content": content, 385 | "mentions": mentions, 386 | "comments": comments, 387 | } 388 | 389 | 390 | def parse_journal_page(journal_page: BeautifulSoup) -> dict[str, Any]: 391 | user_info: dict[str, str] = parse_user_folder(journal_page) 392 | tag_id: Optional[Tag] = journal_page.select_one("meta[property='og:url']") 393 | tag_title: Optional[Tag] = journal_page.select_one("h2.journal-title") 394 | tag_date: Optional[Tag] = journal_page.select_one("div.content div.section-header span.popup_date") 395 | tag_header: Optional[Tag] = journal_page.select_one("div.journal-header") 396 | tag_footer: Optional[Tag] = journal_page.select_one("div.journal-footer") 397 | tag_content: Optional[Tag] = journal_page.select_one("div.journal-content") 398 | tag_comments: Optional[Tag] = journal_page.select_one("div.section-footer > span") 399 | 400 | assert tag_id is not None, _raise_exception(ParsingError("Missing ID tag")) 401 | assert tag_title is not None, _raise_exception(ParsingError("Missing title tag")) 402 | assert tag_date is not None, _raise_exception(ParsingError("Missing date tag")) 403 | assert tag_content is not None, _raise_exception(ParsingError("Missing content tag")) 404 | assert tag_comments is not None, _raise_exception(ParsingError("Missing comments tag")) 405 | 406 | id_: int = int(tag_id.attrs.get("content", "0").strip("/").split("/")[-1]) 407 | # noinspection DuplicatedCode 408 | title: str = tag_title.text.strip() 409 | date: datetime = parse_date( 410 | get_attr(tag_date, "title").strip() 411 | if match(r"^[A-Za-z]+ \d+,.*$", get_attr(tag_date, "title")) 412 | else tag_date.text.strip() 413 | ) 414 | header: str = clean_html(inner_html(tag_header)) if tag_header else "" 415 | footer: str = clean_html(inner_html(tag_footer)) if tag_footer else "" 416 | content: str = clean_html(inner_html(tag_content)) 417 | mentions: list[str] = parse_mentions(tag_content) 418 | comments: int = int(tag_comments.text.strip()) 419 | 420 | assert id_ != 0, _raise_exception(ParsingError("Missing ID")) 421 | 422 | return { 423 | "user_info": user_info, 424 | "id": id_, 425 | "title": title, 426 | "date": date, 427 | "content": content, 428 | "header": header, 429 | "footer": footer, 430 | "mentions": mentions, 431 | "comments": comments, 432 | } 433 | 434 | 435 | def parse_submission_figure(figure_tag: Tag) -> dict[str, Any]: 436 | id_: int = int(get_attr(figure_tag, "id")[4:]) 437 | tag_title: Optional[Tag] = figure_tag.select_one("figcaption a[href^='/view/']") 438 | tag_author: Optional[Tag] = figure_tag.select_one("figcaption a[href^='/user/']") 439 | tag_thumbnail: Optional[Tag] = figure_tag.select_one("img") 440 | 441 | assert tag_title is not None, _raise_exception(ParsingError("Missing title tag")) 442 | assert tag_author is not None, _raise_exception(ParsingError("Missing author tag")) 443 | assert tag_thumbnail is not None, _raise_exception(ParsingError("Missing thumbnail tag")) 444 | 445 | title: str = get_attr(tag_title, "title") 446 | author: str = get_attr(tag_author, "title") 447 | rating: str = next(c for c in figure_tag["class"] if c.startswith("r-"))[2:] 448 | type_: str = next(c for c in figure_tag["class"] if c.startswith("t-"))[2:] 449 | thumbnail_url: str = "https:" + get_attr(tag_thumbnail, "src") 450 | thumbnail_url = f"{thumbnail_url.rsplit('/', 1)[0]}/{quote(thumbnail_url.rsplit('/', 1)[1])}" 451 | 452 | return { 453 | "id": id_, 454 | "title": title, 455 | "author": author, 456 | "rating": rating, 457 | "type": type_, 458 | "thumbnail_url": thumbnail_url, 459 | } 460 | 461 | 462 | def parse_submission_author(author_tag: Tag) -> dict[str, Any]: 463 | tag_author: Optional[Tag] = author_tag.select_one("div.submission-id-sub-container") 464 | 465 | assert tag_author is not None, _raise_exception(ParsingError("Missing author tag")) 466 | 467 | tag_author_name: Optional[Tag] = tag_author.select_one("span.c-usernameBlockSimple__displayName") 468 | tag_author_icon: Optional[Tag] = author_tag.select_one("img.submission-user-icon") 469 | 470 | assert tag_author_name is not None, _raise_exception(ParsingError("Missing author name tag")) 471 | assert tag_author_icon is not None, _raise_exception(ParsingError("Missing author icon tag")) 472 | 473 | author_name: str = tag_author_name.attrs["title"].strip() 474 | author_display_name: str = tag_author_name.text.strip() 475 | author_title: str = ([*filter( 476 | bool, [child.strip() 477 | for child in tag_author.children 478 | if isinstance(child, NavigableString)][3:] 479 | )] or [""])[-1] 480 | author_title = author_title if tag_author.select_one('a[href$="/#tip"]') is None else sub(r"\|$", "", author_title) 481 | author_title = author_title.strip("\xA0 ") # NBSP 482 | author_icon_url: str = "https:" + get_attr(tag_author_icon, "src") 483 | 484 | return { 485 | "author": author_name, 486 | "author_display_name": author_display_name, 487 | "author_title": author_title, 488 | "author_icon_url": author_icon_url, 489 | } 490 | 491 | 492 | def parse_submission_page(sub_page: BeautifulSoup) -> dict[str, Any]: 493 | tag_id: Optional[Tag] = sub_page.select_one("meta[property='og:url']") 494 | tag_sub_info: Optional[Tag] = sub_page.select_one("div.submission-id-sub-container") 495 | 496 | assert tag_sub_info is not None, _raise_exception(ParsingError("Missing info tag")) 497 | 498 | tag_title: Optional[Tag] = tag_sub_info.select_one("div.submission-title") 499 | tag_author: Optional[Tag] = sub_page.select_one("div.submission-id-container") 500 | tag_date: Optional[Tag] = sub_page.select_one("div.submission-id-container span.popup_date") 501 | tag_tags: list[Tag] = sub_page.select('section.tags-row a[href^="/"]') 502 | tag_views: Optional[Tag] = sub_page.select_one("div.views span") 503 | tag_comment_count: Optional[Tag] = sub_page.select_one("section.stats-container div.comments span") 504 | tag_favorites: Optional[Tag] = sub_page.select_one("div.favorites span") 505 | tag_rating: Optional[Tag] = sub_page.select_one("div.rating span.rating-box") 506 | tag_type: Optional[Tag] = sub_page.select_one("div#submission_page[class^='page-content-type']") 507 | tag_fav: Optional[Tag] = sub_page.select_one("div.fav > a") 508 | tag_info: Optional[Tag] = sub_page.select_one("section.info.text") 509 | tag_user_folders: list[Tag] = sub_page.select("section.folder-list-container > div > a") 510 | 511 | assert tag_info is not None, _raise_exception(ParsingError("Missing info tag")) 512 | 513 | tag_category1: Optional[Tag] = tag_info.select_one("span.category-name") 514 | tag_category2: Optional[Tag] = tag_info.select_one("span.type-name") 515 | tag_species: Optional[Tag] = (info_spans := tag_info.select("span"))[bool(tag_category1) + bool(tag_category2)] 516 | tag_gender: Optional[Tag] = info_spans[1 + bool(tag_category1) + bool(tag_category2)] 517 | tag_description: Optional[Tag] = sub_page.select_one("div.submission-description") 518 | tag_folder: Optional[Tag] = sub_page.select_one("a.button[href^='/scraps/'],a.button[href^='/gallery/']") 519 | tag_file_url: Optional[Tag] = sub_page.select_one("div.download a") 520 | tag_thumbnail_url: Optional[Tag] = sub_page.select_one("img#submissionImg") 521 | tag_prev: Optional[Tag] = sub_page.select_one("div.submission-content div.favorite-nav a:nth-child(1)") 522 | tag_next: Optional[Tag] = sub_page.select_one("div.submission-content div.favorite-nav a:last-child") 523 | 524 | assert tag_id is not None, _raise_exception(ParsingError("Missing id tag")) 525 | assert tag_title is not None, _raise_exception(ParsingError("Missing title tag")) 526 | assert tag_author is not None, _raise_exception(ParsingError("Missing author tag")) 527 | assert tag_date is not None, _raise_exception(ParsingError("Missing date tag")) 528 | assert tag_views is not None, _raise_exception(ParsingError("Missing views tag")) 529 | assert tag_comment_count is not None, _raise_exception(ParsingError("Missing comment count tag")) 530 | assert tag_favorites is not None, _raise_exception(ParsingError("Missing favorites tag")) 531 | assert tag_rating is not None, _raise_exception(ParsingError("Missing rating tag")) 532 | assert tag_type is not None, _raise_exception(ParsingError("Missing type tag")) 533 | assert tag_fav is not None, _raise_exception(ParsingError("Missing fav tag")) 534 | assert tag_species is not None, _raise_exception(ParsingError("Missing species tag")) 535 | assert tag_gender is not None, _raise_exception(ParsingError("Missing gender tag")) 536 | assert tag_description is not None, _raise_exception(ParsingError("Missing description tag")) 537 | assert tag_folder is not None, _raise_exception(ParsingError("Missing folder tag")) 538 | assert tag_file_url is not None, _raise_exception(ParsingError("Missing file URL tag")) 539 | assert tag_prev is not None, _raise_exception(ParsingError("Missing prev tag")) 540 | assert tag_next is not None, _raise_exception(ParsingError("Missing next tag")) 541 | 542 | tag_footer: Optional[Tag] = tag_description.select_one("div.submission-footer") 543 | 544 | id_: int = int(get_attr(tag_id, "content").strip("/").split("/")[-1]) 545 | title: str = tag_title.text.strip() 546 | date: datetime = parse_date( 547 | get_attr(tag_date, "title").strip() 548 | if match(r"^[A-Za-z]+ \d+,.*$", get_attr(tag_date, "title")) 549 | else tag_date.text.strip() 550 | ) 551 | tags: list[str] = [t.text.strip() for t in tag_tags] 552 | category: str = "" 553 | if tag_category1: 554 | category += tag_category1.text.strip() 555 | if tag_category2: 556 | category += " / " + tag_category2.text.strip() 557 | category.strip() 558 | species: str = tag_species.text.strip() 559 | gender: str = tag_gender.text.strip() 560 | rating: str = tag_rating.text.strip() 561 | views: int = int(tag_views.text.strip()) 562 | comment_count: int = int(tag_comment_count.text.strip()) 563 | favorites: int = int(tag_favorites.text.strip()) 564 | type_: str = tag_type["class"][0][18:] 565 | footer: str = "" 566 | if tag_footer: 567 | if tag_footer_hr := tag_footer.select_one("hr"): 568 | tag_footer_hr.decompose() 569 | footer = clean_html(inner_html(tag_footer)) 570 | tag_footer.decompose() 571 | description: str = clean_html(inner_html(tag_description)) 572 | mentions: list[str] = parse_mentions(tag_description) 573 | folder: str = m.group(1).lower() if (m := match(r"^/(scraps|gallery)/.*$", get_attr(tag_folder, "href"))) else "" 574 | file_url: str = "https:" + get_attr(tag_file_url, "href") 575 | file_url = f"{file_url.rsplit('/', 1)[0]}/{quote(file_url.rsplit('/', 1)[1])}" 576 | thumbnail_url: str = ("https:" + get_attr(tag_thumbnail_url, "data-preview-src")) if tag_thumbnail_url else "" 577 | thumbnail_url = f"{thumbnail_url.rsplit('/', 1)[0]}/{quote(thumbnail_url.rsplit('/', 1)[1])}" \ 578 | if thumbnail_url else "" 579 | prev_sub: Optional[int] = int( 580 | get_attr(tag_prev, "href").split("/")[-2] 581 | ) if tag_prev and tag_prev.text.lower() == "prev" else None 582 | next_sub: Optional[int] = int( 583 | get_attr(tag_next, "href").split("/")[-2] 584 | ) if tag_next and tag_next.text.lower() == "next" else None 585 | fav_link: Optional[str] = f"{root}{href}" if (href := get_attr(tag_fav, "href")).startswith("/fav/") else None 586 | unfav_link: Optional[str] = f"{root}{href}" if (href := get_attr(tag_fav, "href")).startswith("/unfav/") else None 587 | user_folders: list[tuple[str, str, str]] = [] 588 | for a in tag_user_folders: 589 | tag_folder_name: Optional[Tag] = a.select_one("span") 590 | tag_folder_group: Optional[Tag] = a.select_one("strong") 591 | assert tag_folder_name is not None, _raise_exception(ParsingError("Missing folder name tag")) 592 | user_folders.append( 593 | ( 594 | tag_folder_name.text.strip(), 595 | (root + href) if (href := a.attrs.get("href", "")) else "", 596 | tag_folder_group.text.strip() if tag_folder_group else "" 597 | ) 598 | ) 599 | 600 | return { 601 | "id": id_, 602 | "title": title, 603 | **parse_submission_author(tag_author), 604 | "date": date, 605 | "tags": tags, 606 | "category": category, 607 | "species": species, 608 | "gender": gender, 609 | "rating": rating, 610 | "views": views, 611 | "comment_count": comment_count, 612 | "favorites": favorites, 613 | "type": type_, 614 | "footer": footer, 615 | "description": description, 616 | "mentions": mentions, 617 | "folder": folder, 618 | "user_folders": user_folders, 619 | "file_url": file_url, 620 | "thumbnail_url": thumbnail_url, 621 | "prev": prev_sub, 622 | "next": next_sub, 623 | "fav_link": fav_link, 624 | "unfav_link": unfav_link, 625 | } 626 | 627 | 628 | def parse_user_header(user_header: Tag) -> dict[str, Any]: 629 | tag_user_name: Optional[Tag] = user_header.select_one("a.c-usernameBlock__userName") 630 | tag_user_display_name: Optional[Tag] = user_header.select_one("a.c-usernameBlock__displayName") 631 | tag_title_join_date: Optional[Tag] = user_header.select_one("userpage-nav-user-details span.user-title") 632 | tag_avatar: Optional[Tag] = user_header.select_one("userpage-nav-avatar img") 633 | 634 | assert tag_user_name is not None, _raise_exception(ParsingError("Missing user name tag")) 635 | assert tag_user_display_name is not None, _raise_exception(ParsingError("Missing user display name tag")) 636 | assert tag_title_join_date is not None, _raise_exception(ParsingError("Missing join date tag")) 637 | assert tag_avatar is not None, _raise_exception(ParsingError("Missing user icon tag")) 638 | 639 | tag_user_symbol: Optional[Tag] = tag_user_name.select_one("span.c-usernameBlock__symbol") 640 | 641 | status: str = tag_user_symbol.text.strip() if tag_user_symbol else "" 642 | name: str = tag_user_name.text.strip().removeprefix(status).strip() 643 | display_name: str = tag_user_display_name.text.strip() 644 | 645 | title: str = ttd[0].strip() if len(ttd := tag_title_join_date.text.rsplit("|", 1)) > 1 else "" 646 | join_date: datetime = parse_date(ttd[-1].strip().split(":", 1)[1]) 647 | avatar_url: str = "https:" + get_attr(tag_avatar, "src") 648 | avatar_url = f"{avatar_url.rsplit('/', 1)[0]}/{quote(avatar_url.rsplit('/', 1)[1])}" 649 | 650 | return { 651 | "status": status, 652 | "name": name, 653 | "display_name": display_name, 654 | "title": title, 655 | "join_date": join_date, 656 | "avatar_url": avatar_url, 657 | } 658 | 659 | 660 | def parse_user_page(user_page: BeautifulSoup) -> dict[str, Any]: 661 | tag_user_header: Optional[Tag] = user_page.select_one("userpage-nav-header") 662 | tag_user_banner: Optional[Tag] = user_page.select_one("site-banner picture img") 663 | tag_profile: Optional[Tag] = user_page.select_one("div.userpage-profile") 664 | tag_stats: Optional[Tag] = user_page.select_one("div.userpage-section-right div.table") 665 | tag_watchlist_to: Optional[Tag] = user_page.select_one("a[href*='watchlist/to']") 666 | tag_watchlist_by: Optional[Tag] = user_page.select_one("a[href*='watchlist/by']") 667 | tag_infos: list[Tag] = user_page.select("div#userpage-contact-item div.table-row") 668 | tag_contacts: list[Tag] = user_page.select("div#userpage-contact div.user-contact-user-info") 669 | tag_user_nav_controls: Optional[Tag] = user_page.select_one("userpage-nav-interface-buttons") 670 | tag_meta_url: Optional[Tag] = user_page.select_one('meta[property="og:url"]') 671 | 672 | assert tag_user_header is not None, _raise_exception(ParsingError("Missing user header tag")) 673 | assert tag_profile is not None, _raise_exception(ParsingError("Missing profile tag")) 674 | assert tag_stats is not None, _raise_exception(ParsingError("Missing stats tag")) 675 | assert tag_watchlist_to is not None, _raise_exception(ParsingError("Missing watchlist to tag")) 676 | assert tag_watchlist_by is not None, _raise_exception(ParsingError("Missing watchlist by tag")) 677 | assert tag_meta_url is not None, _raise_exception(ParsingError("Missing meta tag")) 678 | 679 | tag_watch: Optional[Tag] = None 680 | tag_block: Optional[Tag] = None 681 | 682 | if tag_user_nav_controls: 683 | tag_watch = tag_user_nav_controls.select_one("a[href^='/watch/'], a[href^='/unwatch/']") 684 | tag_block = tag_user_nav_controls.select_one("a[href^='/block/'], a[href^='/unblock/']") 685 | 686 | profile: str = clean_html(inner_html(tag_profile)) 687 | stats: tuple[int, ...] = ( 688 | *map(lambda s: int(s.split(":")[1]), filter(bool, map(str.strip, tag_stats.text.split("\n")))), 689 | int(m[1]) if (m := search(r"(\d+)", tag_watchlist_to.text)) else 0, 690 | int(m[1]) if (m := search(r"(\d+)", tag_watchlist_by.text)) else 0, 691 | ) 692 | 693 | tag_key: Optional[Tag] 694 | info: dict[str, str] = {} 695 | contacts: dict[str, str] = {} 696 | for tb in tag_infos: 697 | if (tag_key := tb.select_one("div")) is None: 698 | continue 699 | elif "profile-empty" in tb.attrs.get("class", []): 700 | continue 701 | elif not (val := [*filter(bool, [c.strip() for c in tb.children if isinstance(c, NavigableString)])][-1:]): 702 | continue 703 | info[tag_key.text.strip()] = val[0] 704 | for pc in tag_contacts: 705 | if (tag_key := pc.select_one("span")) is None: 706 | continue 707 | contacts[tag_key.text.strip()] = get_attr(a, "href") if (a := pc.select_one("a")) else \ 708 | [*filter(bool, map(str.strip, pc.text.split("\n")))][-1] 709 | tag_watch_href: str = get_attr(tag_watch, "href") if tag_watch else "" 710 | watch: Optional[str] = f"{root}{tag_watch_href}" if tag_watch_href.startswith("/watch/") else None 711 | unwatch: Optional[str] = f"{root}{tag_watch_href}" if tag_watch_href.startswith("/unwatch/") else None 712 | tag_block_href: str = get_attr(tag_block, "href") if tag_block else "" 713 | block: Optional[str] = f"{root}{tag_block_href}" if tag_block_href.startswith("/block/") else None 714 | unblock: Optional[str] = f"{root}{tag_block_href}" if tag_block_href.startswith("/unblock/") else None 715 | user_banner_url: Optional[str] = ("https:" + get_attr(tag_user_banner, "src")) if tag_user_banner else None 716 | user_banner_url = f"{user_banner_url.rsplit('/', 1)[0]}/{quote(user_banner_url.rsplit('/', 1)[1])}" \ 717 | if user_banner_url else None 718 | 719 | return { 720 | **parse_user_header(tag_user_header), 721 | "banner_url": user_banner_url, 722 | "profile": profile, 723 | "stats": stats, 724 | "info": info, 725 | "contacts": contacts, 726 | "watch": watch, 727 | "unwatch": unwatch, 728 | "block": block, 729 | "unblock": unblock, 730 | } 731 | 732 | 733 | def parse_comment_tag(tag: Tag) -> dict: 734 | tag_id: Optional[Tag] = tag.select_one("a.comment_anchor") 735 | tag_user_name: Optional[Tag] = tag.select_one("comment-username a.c-usernameBlock__userName") 736 | tag_user_symbol: Optional[Tag] = tag_user_name.select_one(".c-usernameBlock__symbol") if tag_user_name else None 737 | tag_user_display_name: Optional[Tag] = tag.select_one("comment-username a.c-usernameBlock__displayName") 738 | tag_avatar: Optional[Tag] = tag.select_one("div.avatar img.comment_useravatar") 739 | tag_user_title: Optional[Tag] = tag.select_one("comment-title") 740 | tag_body: Optional[Tag] = tag.select_one("comment-user-text") 741 | # TODO: update when they implement parent link 742 | # tag_parent_link: Optional[Tag] = tag.select_one("a.comment-parent") 743 | tag_edited: Optional[Tag] = tag.select_one("img.edited") 744 | 745 | assert tag_id is not None, _raise_exception(ParsingError("Missing link tag")) 746 | assert tag_body is not None, _raise_exception(ParsingError("Missing body tag")) 747 | 748 | attr_id: Optional[str] = tag_id.attrs.get("id") 749 | 750 | assert attr_id is not None, _raise_exception(ParsingError("Missing id attribute")) 751 | 752 | comment_id: int = int(attr_id.removeprefix("cid:")) 753 | comment_text: str = clean_html(inner_html(tag_body)) 754 | 755 | if tag_user_name is None or tag_user_display_name is None: 756 | return { 757 | "id": comment_id, 758 | "user_name": "", 759 | "user_display_name": "", 760 | "user_title": "", 761 | "avatar_url": "", 762 | "timestamp": 0, 763 | "text": comment_text, 764 | "parent": None, 765 | "edited": tag_edited is not None, 766 | "hidden": True, 767 | } 768 | 769 | assert tag_avatar is not None, _raise_exception(ParsingError("Missing user icon tag")) 770 | assert tag_user_title is not None, _raise_exception(ParsingError("Missing user title tag")) 771 | 772 | attr_timestamp: Optional[str] = tag.attrs.get("data-timestamp") 773 | attr_avatar: Optional[str] = tag_avatar.attrs.get("src") 774 | # TODO: update when they implement parent link 775 | # attr_parent_href: Optional[str] = tag_parent_link.attrs.get("href") if tag_parent_link is not None else None 776 | # TODO: remove when they implement parent link 777 | attr_parent_href: Optional[str] = None 778 | if m := search(r' list[Tag]: 805 | return page.select("div.comment_container") 806 | 807 | 808 | def parse_user_tag(user_tag: Tag) -> dict[str, Any]: 809 | tag_status: Optional[Tag] = user_tag.select_one("h2") 810 | tag_title: Optional[Tag] = user_tag.select_one("span") 811 | 812 | assert tag_status, _raise_exception(ParsingError("Missing status and username tag")) 813 | assert tag_title, _raise_exception(ParsingError("Missing title and join date tag")) 814 | 815 | status: str = "" 816 | name: str = tag_status.text.strip() 817 | title: str 818 | join_date_str: str 819 | 820 | if not user_tag.select_one("img.type-admin"): 821 | status, name = name[0], name[1:] 822 | 823 | if "|" in (tag_title_text := tag_title.text.strip()): 824 | title, join_date_str = tag_title_text.rsplit("|", 1) 825 | else: 826 | title, join_date_str = "", tag_title_text 827 | join_date: datetime = parse_date(join_date_str.split(":", 1)[1].strip()) 828 | 829 | return { 830 | "user_name": name, 831 | "user_status": status, 832 | "user_title": title, 833 | "user_join_date": join_date, 834 | } 835 | 836 | 837 | def parse_user_folder(folder_page: BeautifulSoup) -> dict[str, Any]: 838 | tag_user_header: Optional[Tag] = folder_page.select_one("userpage-nav-header") 839 | assert tag_user_header is not None, _raise_exception(ParsingError("Missing user header tag")) 840 | return { 841 | **parse_user_header(tag_user_header), 842 | } 843 | 844 | 845 | def parse_submission_figures(figures_page: BeautifulSoup) -> list[Tag]: 846 | return figures_page.select("figure[id^='sid-']") 847 | 848 | 849 | def parse_user_submissions(submissions_page: BeautifulSoup) -> dict[str, Any]: 850 | user_info: dict[str, str] = parse_user_folder(submissions_page) 851 | last_page: bool = not any(b.text.lower() == "next" for b in submissions_page.select("form button.button")) 852 | 853 | return { 854 | **user_info, 855 | "figures": parse_submission_figures(submissions_page), 856 | "last_page": last_page, 857 | } 858 | 859 | 860 | def parse_user_favorites(favorites_page: BeautifulSoup) -> dict[str, Any]: 861 | parsed_submissions = parse_user_submissions(favorites_page) 862 | tag_next_page: Optional[Tag] = favorites_page.select_one('form[action^="/favorites/"][action$="/next"]') 863 | next_page: str = get_attr(tag_next_page, "action").split("/", 3)[-1] if tag_next_page else "" 864 | 865 | return { 866 | **parsed_submissions, 867 | "next_page": next_page, 868 | } 869 | 870 | 871 | def parse_user_journals(journals_page: BeautifulSoup) -> dict[str, Any]: 872 | user_info: dict[str, str] = parse_user_folder(journals_page) 873 | sections: list[Tag] = journals_page.select("section[id^='jid:']") 874 | next_page_tag: Optional[Tag] = journals_page.select_one("div.mini-nav > div.mini-nav-cell:first-child > a.button") 875 | 876 | return { 877 | **user_info, 878 | "sections": sections, 879 | "last_page": next_page_tag is None, 880 | } 881 | 882 | 883 | def parse_watchlist(watch_page: BeautifulSoup) -> tuple[list[tuple[str, str]], int]: 884 | tag_next: Optional[Tag] = watch_page.select_one("section div.floatright form[method=get]") 885 | match_next: Optional[Match] = watchlist_next_regexp.match(get_attr(tag_next, "action")) if tag_next else None 886 | 887 | watches: list[tuple[str, str]] = [] 888 | 889 | for tag_user in watch_page.select("div.watch-list-items"): 890 | user_link: Optional[Tag] = tag_user.select_one("a") 891 | assert user_link, _raise_exception(ParsingError("Missing user link")) 892 | 893 | username: str = user_link.text.strip() 894 | user_link.decompose() 895 | 896 | status: str = tag_user.text.strip() 897 | 898 | watches.append((status, username)) 899 | 900 | return watches, int(match_next[1]) if match_next else 0 901 | -------------------------------------------------------------------------------- /faapi/submission.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from datetime import datetime 3 | from typing import Optional 4 | 5 | from .connection import join_url 6 | from .connection import root 7 | from .exceptions import _raise_exception 8 | from .parse import BeautifulSoup 9 | from .parse import Tag 10 | from .parse import check_page_raise 11 | from .parse import html_to_bbcode 12 | from .parse import parse_comments 13 | from .parse import parse_submission_figure 14 | from .parse import parse_submission_page 15 | from .user import UserPartial 16 | 17 | 18 | class SubmissionStats(namedtuple("SubmissionStats", ["views", "comments", "favorites"])): 19 | """ 20 | This object contains the submission's statistics: 21 | * views 22 | * comments 23 | * favorites 24 | """ 25 | 26 | 27 | class SubmissionUserFolder(namedtuple("SubmissionUserFolder", ["name", "url", "group"])): 28 | """ 29 | This object contains a submission's folder details: 30 | * name: str the name of the folder 31 | * url: str the URL to the folder 32 | * group: str the group the folder belongs to 33 | """ 34 | 35 | 36 | class SubmissionBase: 37 | """ 38 | Base class for the submission objects. 39 | """ 40 | 41 | def __init__(self): 42 | self.id: int = 0 43 | self.title: str = "" 44 | self.author: UserPartial = UserPartial() 45 | 46 | def __hash__(self) -> int: 47 | return hash(self.id) 48 | 49 | def __eq__(self, other) -> bool: 50 | if isinstance(other, SubmissionBase): 51 | return other.id == self.id 52 | elif isinstance(other, int): 53 | return other == self.id 54 | return False 55 | 56 | def __gt__(self, other) -> bool: 57 | if isinstance(other, SubmissionBase): 58 | return self.id > other.id 59 | elif isinstance(other, int): 60 | return self.id > other 61 | return False 62 | 63 | def __ge__(self, other) -> bool: 64 | if isinstance(other, SubmissionBase): 65 | return self.id >= other.id 66 | elif isinstance(other, int): 67 | return self.id >= other 68 | return False 69 | 70 | def __lt__(self, other) -> bool: 71 | if isinstance(other, SubmissionBase): 72 | return self.id < other.id 73 | elif isinstance(other, int): 74 | return self.id < other 75 | return False 76 | 77 | def __le__(self, other) -> bool: 78 | if isinstance(other, SubmissionBase): 79 | return self.id <= other.id 80 | elif isinstance(other, int): 81 | return self.id <= other 82 | return False 83 | 84 | def __iter__(self): 85 | yield "id", self.id 86 | yield "title", self.title 87 | yield "author", dict(self.author) 88 | 89 | def __repr__(self): 90 | return self.__str__() 91 | 92 | def __str__(self): 93 | return f"{self.id} {self.author} {self.title}" 94 | 95 | @property 96 | def url(self): 97 | """ 98 | Compose the full URL to the submission. 99 | 100 | :return: The URL to the submission. 101 | """ 102 | return join_url(root, "view", self.id) 103 | 104 | 105 | class SubmissionPartial(SubmissionBase): 106 | """ 107 | Contains partial submission information gathered from submissions pages (gallery, scraps, etc.). 108 | """ 109 | 110 | def __init__(self, submission_figure: Optional[Tag] = None): 111 | """ 112 | :param submission_figure: The figure tag from which to parse the submission information. 113 | """ 114 | assert submission_figure is None or isinstance(submission_figure, Tag), \ 115 | _raise_exception(TypeError(f"submission_figure must be {None} or {BeautifulSoup.__name__}")) 116 | 117 | super().__init__() 118 | 119 | self.submission_figure: Optional[Tag] = submission_figure 120 | self.rating: str = "" 121 | self.type: str = "" 122 | self.thumbnail_url: str = "" 123 | 124 | self.parse() 125 | 126 | def __iter__(self): 127 | yield "id", self.id 128 | yield "title", self.title 129 | yield "author", dict(self.author) 130 | yield "rating", self.rating 131 | yield "type", self.type 132 | yield "thumbnail_url", self.thumbnail_url 133 | 134 | def parse(self, submission_figure: Optional[Tag] = None): 135 | """ 136 | Parse a submission figure Tag, overrides any information already present in the object. 137 | 138 | :param submission_figure: The optional figure tag from which to parse the submission. 139 | """ 140 | assert submission_figure is None or isinstance(submission_figure, Tag), \ 141 | _raise_exception(TypeError(f"submission_figure must be {None} or {BeautifulSoup.__name__}")) 142 | 143 | self.submission_figure = submission_figure or self.submission_figure 144 | if self.submission_figure is None: 145 | return 146 | 147 | parsed: dict = parse_submission_figure(self.submission_figure) 148 | 149 | self.id = parsed["id"] 150 | self.title = parsed["title"] 151 | self.author.name = parsed["author"] 152 | self.rating = parsed["rating"] 153 | self.type = parsed["type"] 154 | self.thumbnail_url = parsed["thumbnail_url"] 155 | 156 | 157 | class Submission(SubmissionBase): 158 | """ 159 | Contains complete submission information gathered from submission pages, including comments. 160 | """ 161 | 162 | def __init__(self, submission_page: Optional[BeautifulSoup] = None): 163 | """ 164 | :param submission_page: The page from which to parse the submission information. 165 | """ 166 | assert submission_page is None or isinstance(submission_page, BeautifulSoup), \ 167 | _raise_exception(TypeError(f"submission_page must be {None} or {BeautifulSoup.__name__}")) 168 | 169 | super().__init__() 170 | 171 | self.submission_page: Optional[BeautifulSoup] = submission_page 172 | self.date: datetime = datetime.fromtimestamp(0) 173 | self.tags: list[str] = [] 174 | self.category: str = "" 175 | self.species: str = "" 176 | self.gender: str = "" 177 | self.rating: str = "" 178 | self.stats: SubmissionStats = SubmissionStats(0, 0, 0) 179 | self.type: str = "" 180 | self.description: str = "" 181 | self.footer: str = "" 182 | self.mentions: list[str] = [] 183 | self.folder: str = "" 184 | self.user_folders: list[SubmissionUserFolder] = [] 185 | self.file_url: str = "" 186 | self.thumbnail_url: str = "" 187 | self.prev: Optional[int] = None 188 | self.next: Optional[int] = None 189 | self.favorite: bool = False 190 | self.favorite_toggle_link: str = "" 191 | from .comment import Comment 192 | self.comments: list[Comment] = [] 193 | 194 | self.parse() 195 | 196 | def __iter__(self): 197 | yield "id", self.id 198 | yield "title", self.title 199 | yield "author", dict(self.author) 200 | yield "date", self.date 201 | yield "tags", self.tags 202 | yield "category", self.category 203 | yield "species", self.species 204 | yield "gender", self.gender 205 | yield "rating", self.rating 206 | yield "stats", self.stats._asdict() 207 | yield "type", self.type 208 | yield "description", self.description 209 | yield "footer", self.footer 210 | yield "mentions", self.mentions 211 | yield "folder", self.folder 212 | yield "user_folders", [f._asdict() for f in self.user_folders] 213 | yield "file_url", self.file_url 214 | yield "thumbnail_url", self.thumbnail_url 215 | yield "prev", self.prev 216 | yield "next", self.next 217 | yield "favorite", self.favorite 218 | yield "favorite_toggle_link", self.favorite_toggle_link 219 | from .comment import _sort_comments_dict 220 | yield "comments", _sort_comments_dict(self.comments) 221 | 222 | @property 223 | def description_bbcode(self) -> str: 224 | """ 225 | The submission description formatted to BBCode 226 | 227 | :return: BBCode description 228 | """ 229 | return html_to_bbcode(self.description) 230 | 231 | @property 232 | def footer_bbcode(self) -> str: 233 | """ 234 | The submission footer formatted to BBCode 235 | 236 | :return: BBCode footer 237 | """ 238 | return html_to_bbcode(self.footer) 239 | 240 | def parse(self, submission_page: Optional[BeautifulSoup] = None): 241 | """ 242 | Parse a submission page, overrides any information already present in the object. 243 | 244 | :param submission_page: The optional page from which to parse the submission. 245 | """ 246 | assert submission_page is None or isinstance(submission_page, BeautifulSoup), \ 247 | _raise_exception(TypeError(f"submission_page must be {None} or {BeautifulSoup.__name__}")) 248 | 249 | self.submission_page = submission_page or self.submission_page 250 | if self.submission_page is None: 251 | return 252 | 253 | check_page_raise(self.submission_page) 254 | 255 | parsed: dict = parse_submission_page(self.submission_page) 256 | 257 | self.id = parsed["id"] 258 | self.title = parsed["title"] 259 | self.author.name = parsed["author"] 260 | self.author.display_name = parsed["author_display_name"] 261 | self.author.title = parsed["author_title"] 262 | self.author.avatar_url = parsed["author_icon_url"] 263 | self.date = parsed["date"] 264 | self.tags = parsed["tags"] 265 | self.category = parsed["category"] 266 | self.species = parsed["species"] 267 | self.gender = parsed["gender"] 268 | self.rating = parsed["rating"] 269 | self.stats = SubmissionStats(parsed["views"], parsed["comment_count"], parsed["favorites"]) 270 | self.type = parsed["type"] 271 | self.description = parsed["description"] 272 | self.footer = parsed["footer"] 273 | self.mentions = parsed["mentions"] 274 | self.folder = parsed["folder"] 275 | self.user_folders = [SubmissionUserFolder(*f) for f in parsed["user_folders"]] 276 | self.file_url = parsed["file_url"] 277 | self.thumbnail_url = parsed["thumbnail_url"] 278 | self.prev = parsed["prev"] 279 | self.next = parsed["next"] 280 | self.favorite = parsed["unfav_link"] is not None 281 | self.favorite_toggle_link = parsed["fav_link"] or parsed["unfav_link"] 282 | from .comment import sort_comments, Comment 283 | self.comments = sort_comments([Comment(t, self) for t in parse_comments(self.submission_page)]) 284 | -------------------------------------------------------------------------------- /faapi/user.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from datetime import datetime 3 | from typing import Optional 4 | from urllib.parse import quote 5 | 6 | from .connection import join_url 7 | from .connection import root 8 | from .exceptions import _raise_exception 9 | from .parse import BeautifulSoup 10 | from .parse import Tag 11 | from .parse import check_page_raise 12 | from .parse import html_to_bbcode 13 | from .parse import parse_user_page 14 | from .parse import parse_user_tag 15 | from .parse import username_url 16 | 17 | 18 | class UserStats(namedtuple("UserStats", ["views", "submissions", "favorites", "comments_earned", 19 | "comments_made", "journals", "watched_by", "watching"])): 20 | """ 21 | This object contains a user's statistics: 22 | * views 23 | * submissions 24 | * favorites 25 | * comments_earned 26 | * comments_made 27 | * journals 28 | * watched_by 29 | * watching 30 | """ 31 | 32 | 33 | class UserBase: 34 | """ 35 | Base class for the user objects. 36 | """ 37 | 38 | def __init__(self): 39 | self.name: str = "" 40 | self.display_name: str = "" 41 | self.status: str = "" 42 | 43 | def __hash__(self) -> int: 44 | return hash(self.name_url) 45 | 46 | def __eq__(self, other) -> bool: 47 | if isinstance(other, UserBase): 48 | return other.name_url == self.name_url 49 | elif isinstance(other, str): 50 | return username_url(other) == self.name_url 51 | return False 52 | 53 | def __gt__(self, other) -> bool: 54 | if isinstance(other, UserBase): 55 | return self.name_url > other.name_url 56 | elif isinstance(other, str): 57 | return self.name_url > username_url(other) 58 | return False 59 | 60 | def __ge__(self, other) -> bool: 61 | if isinstance(other, UserBase): 62 | return self.name_url >= other.name_url 63 | elif isinstance(other, str): 64 | return self.name_url >= username_url(other) 65 | return False 66 | 67 | def __lt__(self, other) -> bool: 68 | if isinstance(other, UserBase): 69 | return self.name_url < other.name_url 70 | elif isinstance(other, str): 71 | return self.name_url < username_url(other) 72 | return False 73 | 74 | def __le__(self, other) -> bool: 75 | if isinstance(other, UserBase): 76 | return self.name_url <= other.name_url 77 | elif isinstance(other, str): 78 | return self.name_url <= username_url(other) 79 | return False 80 | 81 | def __iter__(self): 82 | yield "name", self.name 83 | yield "display_name", self.display_name 84 | yield "status", self.status 85 | 86 | def __repr__(self): 87 | return self.__str__() 88 | 89 | def __str__(self): 90 | return self.status + self.name 91 | 92 | @property 93 | def name_url(self): 94 | """ 95 | Compose the URL-safe username. 96 | 97 | :return: The cleaned username. 98 | """ 99 | return username_url(self.name) 100 | 101 | @property 102 | def url(self): 103 | """ 104 | Compose the full URL to the user. 105 | 106 | :return: The URL to the user. 107 | """ 108 | return join_url(root, "user", quote(self.name_url)) 109 | 110 | def generate_avatar_url(self) -> str: 111 | """ 112 | Generate the URl for the current user icon. 113 | 114 | :return: The URL to the user icon 115 | """ 116 | return f"https://a.furaffinity.net/{datetime.now():%Y%m%d}/{self.name_url}.gif" 117 | 118 | 119 | class UserPartial(UserBase): 120 | """ 121 | Contains partial user information gathered from user folders (gallery, journals, etc.) and submission/journal pages. 122 | """ 123 | 124 | def __init__(self, user_tag: Optional[Tag] = None): 125 | """ 126 | :param user_tag: The tag from which to parse the user information. 127 | """ 128 | assert user_tag is None or isinstance(user_tag, Tag), \ 129 | _raise_exception(TypeError(f"user_tag must be {None} or {Tag.__name__}")) 130 | 131 | super().__init__() 132 | 133 | self.user_tag: Optional[Tag] = user_tag 134 | self.title: str = "" 135 | self.join_date: datetime = datetime.fromtimestamp(0) 136 | self.avatar_url: str = "" 137 | 138 | self.parse() 139 | 140 | def __iter__(self): 141 | yield "name", self.name 142 | yield "status", self.status 143 | yield "title", self.title 144 | yield "join_date", self.join_date 145 | yield "avatar_url", self.avatar_url 146 | 147 | def parse(self, user_tag: Optional[Tag] = None): 148 | """ 149 | Parse a user page, overrides any information already present in the object. 150 | 151 | :param user_tag: The tag from which to parse the user information. 152 | """ 153 | assert user_tag is None or isinstance(user_tag, Tag), \ 154 | _raise_exception(TypeError(f"user_tag must be {None} or {Tag.__name__}")) 155 | 156 | self.user_tag = user_tag or self.user_tag 157 | if self.user_tag is None: 158 | return 159 | 160 | parsed: dict = parse_user_tag(self.user_tag) 161 | 162 | self.name = parsed["name"] 163 | self.status = parsed["status"] 164 | self.title = parsed["title"] 165 | self.join_date = parsed["join_date"] 166 | 167 | 168 | class User(UserBase): 169 | """ 170 | Contains complete user information gathered from userpages. 171 | """ 172 | 173 | def __init__(self, user_page: Optional[BeautifulSoup] = None): 174 | """ 175 | :param user_page: The page from which to parse the user information. 176 | """ 177 | assert user_page is None or isinstance(user_page, BeautifulSoup), \ 178 | _raise_exception(TypeError(f"user_page must be {None} or {BeautifulSoup.__name__}")) 179 | 180 | super().__init__() 181 | 182 | self.user_page: Optional[BeautifulSoup] = user_page 183 | self.title: str = "" 184 | self.join_date: datetime = datetime.fromtimestamp(0) 185 | self.profile: str = "" 186 | self.stats: UserStats = UserStats(0, 0, 0, 0, 0, 0, 0, 0) 187 | self.info: dict[str, str] = {} 188 | self.contacts: dict[str, str] = {} 189 | self.avatar_url: str = "" 190 | self.banner_url: Optional[str] = None 191 | self.watched: bool = False 192 | self.watched_toggle_link: Optional[str] = None 193 | self.blocked: bool = False 194 | self.blocked_toggle_link: Optional[str] = None 195 | 196 | self.parse() 197 | 198 | def __iter__(self): 199 | yield "name", self.name 200 | yield "display_name", self.display_name 201 | yield "status", self.status 202 | yield "title", self.title 203 | yield "join_date", self.join_date 204 | yield "profile", self.profile 205 | yield "stats", self.stats._asdict() 206 | yield "info", self.info 207 | yield "contacts", self.contacts 208 | yield "avatar_url", self.avatar_url 209 | yield "banner_url", self.banner_url 210 | yield "watched", self.watched 211 | yield "watched_toggle_link", self.watched_toggle_link 212 | yield "blocked", self.blocked 213 | yield "blocked_toggle_link", self.blocked_toggle_link 214 | 215 | @property 216 | def profile_bbcode(self) -> str: 217 | """ 218 | The user profile text formatted to BBCode 219 | 220 | :return: BBCode profile 221 | """ 222 | return html_to_bbcode(self.profile) 223 | 224 | def parse(self, user_page: Optional[BeautifulSoup] = None): 225 | """ 226 | Parse a user page, overrides any information already present in the object. 227 | 228 | :param user_page: The page from which to parse the user information. 229 | """ 230 | assert user_page is None or isinstance(user_page, BeautifulSoup), \ 231 | _raise_exception(TypeError(f"user_page must be {None} or {BeautifulSoup.__name__}")) 232 | 233 | self.user_page = user_page or self.user_page 234 | if self.user_page is None: 235 | return 236 | 237 | check_page_raise(self.user_page) 238 | 239 | parsed: dict = parse_user_page(self.user_page) 240 | 241 | self.name = parsed["name"] 242 | self.display_name = parsed["display_name"] 243 | self.status = parsed["status"] 244 | self.profile = parsed["profile"] 245 | self.title = parsed["title"] 246 | self.join_date = parsed["join_date"] 247 | self.stats = UserStats(*parsed["stats"]) 248 | self.info = parsed["info"] 249 | self.contacts = parsed["contacts"] 250 | self.avatar_url = parsed["avatar_url"] 251 | self.banner_url = parsed["banner_url"] 252 | self.watched = parsed["watch"] is None and parsed["unwatch"] is not None 253 | self.watched_toggle_link = parsed["watch"] or parsed["unwatch"] or None 254 | self.blocked = parsed["block"] is None and parsed["unblock"] is not None 255 | self.blocked_toggle_link = parsed["block"] or parsed["unblock"] or None 256 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "faapi" 3 | version = "3.11.9" 4 | description = "Python module to implement API-like functionality for the FurAffinity.net website." 5 | authors = ["Matteo Campinoti "] 6 | license = "EUPL-1.2" 7 | readme = "README.md" 8 | homepage = "https://github.com/FurryCoders/FAAPI" 9 | repository = "https://github.com/FurryCoders/FAAPI" 10 | classifiers = [ 11 | "Programming Language :: Python :: 3.9", 12 | "Programming Language :: Python :: 3.10", 13 | "Operating System :: OS Independent", 14 | "License :: OSI Approved :: European Union Public Licence 1.2 (EUPL 1.2)", 15 | "Development Status :: 5 - Production/Stable", 16 | "Intended Audience :: Developers", 17 | "Topic :: Internet :: WWW/HTTP :: Indexing/Search", 18 | "Typing :: Typed", 19 | ] 20 | 21 | [tool.poetry.urls] 22 | "Changelog" = "https://github.com/FurryCoders/FAAPI/blob/main/CHANGELOG.md" 23 | "Source" = "https://github.com/FurryCoders/FAAPI" 24 | "Download" = "https://pypi.org/project/faapi/#files" 25 | "Bug Reports" = "https://github.com/FurryCoders/FAAPI/issues" 26 | 27 | [tool.poetry.dependencies] 28 | python = "^3.9" 29 | requests = "^2.32.3" 30 | beautifulsoup4 = "^4.12.3" 31 | lxml = "^5.3.0" 32 | python-dateutil = "^2.9.0" 33 | bbcode = "^1.1.0" 34 | 35 | [tool.poetry.group.test.dependencies] 36 | pytest = "^7.2.0" 37 | mypy = "^0.991" 38 | types-beautifulsoup4 = "^4.11.6" 39 | flake8 = "^6.0.0" 40 | coverage = "^7.3.1" 41 | 42 | [build-system] 43 | requires = ["poetry>=0.12"] 44 | build-backend = "poetry.masonry.api" 45 | -------------------------------------------------------------------------------- /tests/test_connection.py: -------------------------------------------------------------------------------- 1 | from json import load 2 | from pathlib import Path 3 | from urllib.robotparser import RobotFileParser 4 | 5 | from pytest import fixture 6 | from pytest import raises 7 | from requests import Response, Session 8 | from requests.cookies import RequestsCookieJar 9 | 10 | from faapi.connection import get_robots 11 | from faapi.connection import join_url 12 | from faapi.connection import make_session 13 | from faapi.connection import root 14 | from faapi.exceptions import Unauthorized 15 | 16 | __root__: Path = Path(__file__).resolve().parent 17 | 18 | 19 | @fixture 20 | def data() -> dict: 21 | return load((__root__ / "test_data.json").open()) 22 | 23 | 24 | @fixture 25 | def cookies(data: dict) -> RequestsCookieJar: 26 | return data["cookies"] 27 | 28 | 29 | def test_make_session_cookie_jar(): 30 | cookie_jar = RequestsCookieJar() 31 | cookie_jar.set("a", "a") 32 | result = make_session(cookie_jar, Session) 33 | assert isinstance(result, Session) 34 | 35 | 36 | def test_make_session_list_dict(): 37 | result = make_session([{"name": "a", "value": "a"}], Session) 38 | assert isinstance(result, Session) 39 | 40 | 41 | def test_make_session_error(): 42 | with raises(Unauthorized): 43 | make_session([], Session) 44 | 45 | 46 | def test_get_robots(cookies: RequestsCookieJar): 47 | result = get_robots(make_session(cookies, Session)) 48 | assert isinstance(result, RobotFileParser) 49 | assert getattr(result, "default_entry", None) is not None 50 | 51 | 52 | def test_get(cookies: RequestsCookieJar): 53 | res: Response = make_session(cookies, Session).get(join_url(root, "view", 1)) 54 | assert res.ok 55 | -------------------------------------------------------------------------------- /tests/test_faapi.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from datetime import timedelta 3 | from json import load 4 | from pathlib import Path 5 | from re import sub 6 | from typing import Optional 7 | 8 | from pytest import fixture 9 | from pytest import raises 10 | from requests.cookies import RequestsCookieJar 11 | 12 | import faapi 13 | from faapi import Comment 14 | from faapi import FAAPI 15 | from faapi import JournalPartial 16 | from faapi import SubmissionPartial 17 | from faapi import UserPartial 18 | from faapi.exceptions import DisallowedPath 19 | from faapi.exceptions import Unauthorized 20 | from faapi.parse import username_url 21 | from test_parse import clean_html 22 | 23 | __root__: Path = Path(__file__).resolve().parent 24 | 25 | 26 | @fixture 27 | def data() -> dict: 28 | return load((__root__ / "test_data.json").open()) 29 | 30 | 31 | @fixture 32 | def cookies(data: dict) -> RequestsCookieJar: 33 | return data["cookies"] 34 | 35 | 36 | @fixture 37 | def user_test_data() -> dict: 38 | return load((__root__ / "test_user.json").open()) 39 | 40 | 41 | @fixture 42 | def submission_test_data() -> dict: 43 | return load((__root__ / "test_submission.json").open()) 44 | 45 | 46 | @fixture 47 | def journal_test_data() -> dict: 48 | return load((__root__ / "test_journal.json").open()) 49 | 50 | 51 | def dst_us() -> timedelta: 52 | now: datetime = datetime.now() 53 | 54 | if now.month < 3 or now.month >= 12: 55 | return timedelta(0) 56 | 57 | m1 = datetime(now.year, 3, 1) 58 | 59 | if now < datetime(now.year, 3, 7 + (6 - m1.weekday() + 1)): 60 | return timedelta(0) 61 | 62 | n1 = datetime(now.year, 11, 1) 63 | 64 | if now > datetime(now.year, 11, 6 - n1.weekday() + 1): 65 | return timedelta(0) 66 | 67 | return timedelta(hours=-1) 68 | 69 | 70 | def remove_user_icons(html: str) -> str: 71 | return sub(r"a\.furaffinity\.net/\d{8}/[^. ]+.gif", "", html) 72 | 73 | 74 | def test_robots(cookies: RequestsCookieJar): 75 | api: FAAPI = FAAPI(cookies) 76 | assert getattr(api.robots, "default_entry") is not None 77 | assert api.crawl_delay >= 1 78 | assert api.check_path("/login") 79 | assert api.check_path("/view") 80 | assert api.check_path("/journal") 81 | assert api.check_path("/user") 82 | assert api.check_path("/gallery") 83 | assert api.check_path("/scraps") 84 | assert api.check_path("/favorite") 85 | assert api.check_path("/journals") 86 | assert api.check_path("/watchlist/to") 87 | assert api.check_path("/watchlist/by") 88 | with raises(DisallowedPath): 89 | assert not api.check_path("/fav/", raise_for_disallowed=True) 90 | 91 | 92 | def test_login(cookies: RequestsCookieJar): 93 | api: FAAPI = FAAPI(cookies) 94 | assert api.login_status 95 | assert api.connection_status 96 | 97 | api.load_cookies([{"name": "a", "value": "1"}]) 98 | with raises(Unauthorized): 99 | api.me() 100 | 101 | 102 | # noinspection DuplicatedCode 103 | def test_frontpage(cookies: RequestsCookieJar): 104 | api: FAAPI = FAAPI(cookies) 105 | 106 | ss = api.frontpage() 107 | 108 | assert len({s.id for s in ss}) == len(ss) 109 | 110 | for submission in ss: 111 | assert submission.id > 0 112 | assert submission.type != "" 113 | assert submission.rating != "" 114 | assert submission.thumbnail_url != "" 115 | 116 | 117 | def test_user(cookies: RequestsCookieJar, user_test_data: dict): 118 | api: FAAPI = FAAPI(cookies) 119 | 120 | user = api.user(user_test_data["name"]) 121 | user_dict = dict(user) 122 | 123 | assert user.name.lower() == user_dict["name"].lower() == user_test_data["name"].lower() 124 | assert user.status == user_dict["status"] == user_test_data["status"] 125 | assert user.title == user_dict["title"] == user_test_data["title"] 126 | assert user.join_date == user_dict["join_date"] == datetime.fromisoformat(user_test_data["join_date"]) + dst_us() 127 | assert user.stats.views == user_dict["stats"]["views"] 128 | assert user_dict["stats"]["views"] >= user_test_data["stats"]["views"] 129 | assert user.stats.submissions == user_dict["stats"]["submissions"] 130 | assert user_dict["stats"]["submissions"] >= user_test_data["stats"]["submissions"] 131 | assert user.stats.favorites == user_dict["stats"]["favorites"] 132 | assert user_dict["stats"]["favorites"] >= user_test_data["stats"]["favorites"] 133 | assert user.stats.comments_earned == user_dict["stats"]["comments_earned"] 134 | assert user_dict["stats"]["comments_earned"] >= user_test_data["stats"]["comments_earned"] 135 | assert user.stats.comments_made == user_dict["stats"]["comments_made"] 136 | assert user_dict["stats"]["comments_made"] >= user_test_data["stats"]["comments_made"] 137 | assert user.stats.journals == user_dict["stats"]["journals"] 138 | assert user_dict["stats"]["journals"] >= user_test_data["stats"]["journals"] 139 | assert user.info == user_dict["info"] == user_test_data["info"] 140 | assert user.contacts == user_dict["contacts"] == user_test_data["contacts"] 141 | assert user.avatar_url == user_dict["avatar_url"] != "" 142 | assert user.banner_url == user_dict["banner_url"] != "" 143 | assert remove_user_icons(clean_html(user.profile)) == \ 144 | remove_user_icons(clean_html(user_dict["profile"])) == \ 145 | remove_user_icons(clean_html(user_test_data["profile"])) 146 | assert user.profile_bbcode == user_test_data["profile_bbcode"] 147 | 148 | 149 | # noinspection DuplicatedCode 150 | def test_submission(cookies: RequestsCookieJar, submission_test_data: dict): 151 | api: FAAPI = FAAPI(cookies) 152 | 153 | submission, file = api.submission(submission_test_data["id"], get_file=True) 154 | submission_dict = dict(submission) 155 | 156 | assert submission.id == submission_dict["id"] == submission_test_data["id"] 157 | assert submission.title == submission_dict["title"] == submission_test_data["title"] 158 | assert submission.author.name.lower() == submission_dict["author"]["name"].lower() == submission_test_data["author"]["name"].lower() 159 | assert submission.author.avatar_url == submission_dict["author"]["avatar_url"] != "" 160 | assert submission.date == submission_dict["date"] == datetime.fromisoformat(submission_test_data["date"]) + dst_us() 161 | assert submission.tags == submission_dict["tags"] == submission_test_data["tags"] 162 | assert submission.category == submission_dict["category"] == submission_test_data["category"] 163 | assert submission.species == submission_dict["species"] == submission_test_data["species"] 164 | assert submission.gender == submission_dict["gender"] == submission_test_data["gender"] 165 | assert submission.rating == submission_dict["rating"] == submission_test_data["rating"] 166 | assert submission.stats.views == submission_dict["stats"]["views"] 167 | assert submission.stats.views >= submission_test_data["stats"]["views"] 168 | assert submission.stats.comments == submission_dict["stats"]["comments"] 169 | assert submission.stats.comments >= submission_test_data["stats"]["comments"] 170 | assert submission.stats.favorites == submission_dict["stats"]["favorites"] 171 | assert submission.stats.favorites >= submission_test_data["stats"]["favorites"] 172 | assert submission.type == submission_dict["type"] == submission_test_data["type"] 173 | assert submission.mentions == submission_dict["mentions"] == submission_test_data["mentions"] 174 | assert submission.folder == submission_dict["folder"] == submission_test_data["folder"] 175 | assert submission.file_url == submission_dict["file_url"] != "" 176 | assert submission.thumbnail_url == submission_dict["thumbnail_url"] != "" 177 | assert submission.prev == submission_dict["prev"] == submission_test_data["prev"] 178 | assert submission.next == submission_dict["next"] == submission_test_data["next"] 179 | assert submission.favorite == submission_dict["favorite"] == submission_test_data["favorite"] 180 | assert bool(submission.favorite_toggle_link) == bool(submission_dict["favorite_toggle_link"]) == \ 181 | bool(submission_test_data["favorite_toggle_link"]) 182 | assert remove_user_icons(clean_html(submission.description)) == \ 183 | remove_user_icons(clean_html(submission_dict["description"])) == \ 184 | remove_user_icons(clean_html(submission_test_data["description"])) 185 | assert remove_user_icons(clean_html(submission.footer)) == \ 186 | remove_user_icons(clean_html(submission_dict["footer"])) == \ 187 | remove_user_icons(clean_html(submission_test_data["footer"])) 188 | assert submission.description_bbcode == submission_test_data["description_bbcode"] 189 | assert submission.footer_bbcode == submission_test_data["footer_bbcode"] 190 | 191 | assert file is not None and len(file) > 0 192 | 193 | assert len(faapi.comment.flatten_comments(submission.comments)) == submission.stats.comments 194 | 195 | comments: dict[int, Comment] = {c.id: c for c in faapi.comment.flatten_comments(submission.comments)} 196 | 197 | for comment in comments.values(): 198 | assert comment.reply_to is None or isinstance(comment.reply_to, Comment) 199 | 200 | if comment.reply_to: 201 | assert comment.reply_to.id in comments 202 | assert comment in comments[comment.reply_to.id].replies 203 | 204 | if comment.replies: 205 | for reply in comment.replies: 206 | assert reply.reply_to == comment 207 | 208 | 209 | # noinspection DuplicatedCode 210 | def test_journal(cookies: RequestsCookieJar, journal_test_data: dict): 211 | api: FAAPI = FAAPI(cookies) 212 | 213 | journal = api.journal(journal_test_data["id"]) 214 | journal_dict = dict(journal) 215 | 216 | assert journal.id == journal_dict["id"] == journal_test_data["id"] 217 | assert journal.title == journal_dict["title"] == journal_test_data["title"] 218 | assert journal.author.name.lower() == journal_dict["author"]["name"].lower() == journal_test_data["author"]["name"].lower() 219 | assert journal.author.join_date == journal_dict["author"]["join_date"] == \ 220 | datetime.fromisoformat(journal_test_data["author"]["join_date"]) + dst_us() 221 | assert journal.author.avatar_url == journal_dict["author"]["avatar_url"] != "" 222 | assert journal.date == journal_dict["date"] == datetime.fromisoformat(journal_test_data["date"]) + dst_us() 223 | assert journal.stats.comments == journal_dict["stats"]["comments"] >= journal_test_data["stats"]["comments"] 224 | assert journal.mentions == journal_dict["mentions"] == journal_test_data["mentions"] 225 | assert remove_user_icons(clean_html(journal.content)) == \ 226 | remove_user_icons(clean_html(journal_dict["content"])) == \ 227 | remove_user_icons(clean_html(journal_test_data["content"])) 228 | assert remove_user_icons(clean_html(journal.header)) == \ 229 | remove_user_icons(clean_html(journal_dict["header"])) == \ 230 | remove_user_icons(clean_html(journal_test_data["header"])) 231 | assert remove_user_icons(clean_html(journal.footer)) == \ 232 | remove_user_icons(clean_html(journal_dict["footer"])) == \ 233 | remove_user_icons(clean_html(journal_test_data["footer"])) 234 | assert journal.content_bbcode == journal_test_data["content_bbcode"] 235 | assert journal.header_bbcode == journal_test_data["header_bbcode"] 236 | assert journal.footer_bbcode == journal_test_data["footer_bbcode"] 237 | 238 | assert len(faapi.comment.flatten_comments(journal.comments)) == journal.stats.comments 239 | 240 | comments: dict[int, Comment] = {c.id: c for c in faapi.comment.flatten_comments(journal.comments)} 241 | 242 | for comment in comments.values(): 243 | assert comment.reply_to is None or isinstance(comment.reply_to, Comment) 244 | 245 | if comment.reply_to: 246 | assert comment.reply_to.id in comments 247 | assert comment in comments[comment.reply_to.id].replies 248 | 249 | if comment.replies: 250 | for reply in comment.replies: 251 | assert reply.reply_to == comment 252 | 253 | 254 | # noinspection DuplicatedCode 255 | def test_gallery(cookies: RequestsCookieJar, data: dict): 256 | api: FAAPI = FAAPI(cookies) 257 | 258 | ss: list[SubmissionPartial] = [] 259 | p: Optional[int] = 1 260 | 261 | while p: 262 | ss_, p_ = api.gallery(data["gallery"]["user"], p) 263 | assert isinstance(ss, list) 264 | assert all(isinstance(s, SubmissionPartial) for s in ss_) 265 | assert p_ is None or isinstance(p_, int) 266 | assert p_ is None or p_ > p 267 | assert len(ss) or p == 1 268 | assert len(ss_) or p_ is None 269 | 270 | ss.extend(ss_) 271 | p = p_ 272 | 273 | assert len(ss) >= data["gallery"]["length"] 274 | assert len({s.id for s in ss}) == len(ss) 275 | 276 | for submission in ss: 277 | assert submission.id > 0 278 | assert submission.type != "" 279 | assert submission.rating != "" 280 | assert submission.thumbnail_url != "" 281 | assert submission.author.name_url == username_url(data["gallery"]["user"]) 282 | 283 | 284 | # noinspection DuplicatedCode 285 | def test_scraps(cookies: RequestsCookieJar, data: dict): 286 | api: FAAPI = FAAPI(cookies) 287 | 288 | ss: list[SubmissionPartial] = [] 289 | p: Optional[int] = 1 290 | 291 | while p: 292 | ss_, p_ = api.scraps(data["scraps"]["user"], p) 293 | assert isinstance(ss, list) 294 | assert all(isinstance(s, SubmissionPartial) for s in ss_) 295 | assert p_ is None or isinstance(p_, int) 296 | assert p_ is None or p_ > p 297 | assert len(ss) or p == 1 298 | assert len(ss_) or p_ is None 299 | 300 | ss.extend(ss_) 301 | p = p_ 302 | 303 | assert len(ss) >= data["scraps"]["length"] 304 | assert len({s.id for s in ss}) == len(ss) 305 | 306 | for submission in ss: 307 | assert submission.id > 0 308 | assert submission.type != "" 309 | assert submission.rating != "" 310 | assert submission.thumbnail_url != "" 311 | assert submission.author.name_url == username_url(data["scraps"]["user"]) 312 | 313 | 314 | # noinspection DuplicatedCode 315 | def test_favorites(cookies: RequestsCookieJar, data: dict): 316 | api: FAAPI = FAAPI(cookies) 317 | 318 | ss: list[SubmissionPartial] = [] 319 | p: Optional[str] = "/" 320 | 321 | while p and len(ss) < data["favorites"]["max_length"]: 322 | ss_, p_ = api.favorites(data["favorites"]["user"], p) 323 | assert isinstance(ss, list) 324 | assert all(isinstance(s, SubmissionPartial) for s in ss_) 325 | assert p_ is None or isinstance(p_, str) 326 | assert p_ is None or (p == "/" and p_ > p) or p_ < p 327 | assert len(ss) or p == "/" 328 | assert len(ss_) or p_ is None 329 | 330 | ss.extend(ss_) 331 | p = p_ 332 | 333 | assert not data["favorites"]["next_page"] or p is not None 334 | assert len(ss) >= data["favorites"]["length"] 335 | assert len({s.id for s in ss}) == len(ss) 336 | 337 | for submission in ss: 338 | assert submission.id > 0 339 | assert submission.type != "" 340 | assert submission.rating != "" 341 | assert submission.thumbnail_url != "" 342 | 343 | 344 | # noinspection DuplicatedCode 345 | def test_journals(cookies: RequestsCookieJar, data: dict): 346 | api: FAAPI = FAAPI(cookies) 347 | 348 | js: list[JournalPartial] = [] 349 | p: Optional[int] = 1 350 | 351 | while p: 352 | js_, p_ = api.journals(data["journals"]["user"], p) 353 | assert isinstance(js, list) 354 | assert all(isinstance(s, JournalPartial) for s in js_) 355 | assert p_ is None or isinstance(p_, int) 356 | assert p_ is None or p_ > p 357 | assert len(js) or p == 1 358 | assert len(js_) or p_ is None 359 | 360 | js.extend(js_) 361 | p = p_ 362 | 363 | assert len(js) >= data["journals"]["length"] 364 | assert len({j.id for j in js}) == len(js) 365 | 366 | for journal in js: 367 | assert journal.id > 0 368 | assert journal.author.join_date.timestamp() > 0 369 | assert journal.date.timestamp() > 0 370 | assert journal.author.name_url == username_url(data["scraps"]["user"]) 371 | 372 | 373 | # noinspection DuplicatedCode 374 | def test_watchlist_to(cookies: RequestsCookieJar, data: dict): 375 | api: FAAPI = FAAPI(cookies) 376 | assert api.login_status 377 | 378 | ws: list[UserPartial] = [] 379 | p: Optional[int] = 1 380 | 381 | while p: 382 | ws_, p_ = api.watchlist_to(data["watchlist"]["user"], p) 383 | assert isinstance(ws, list) 384 | assert all(isinstance(s, UserPartial) for s in ws_) 385 | assert p_ is None or isinstance(p_, int) 386 | assert p_ is None or p_ > p 387 | assert len(ws) or p == 1 388 | assert len(ws_) or p_ is None 389 | 390 | ws.extend(ws_) 391 | p = p_ 392 | 393 | assert len({w.name_url for w in ws}) == len(ws) 394 | 395 | 396 | # noinspection DuplicatedCode 397 | def test_watchlist_by(cookies: RequestsCookieJar, data: dict): 398 | api: FAAPI = FAAPI(cookies) 399 | assert api.login_status 400 | 401 | ws: list[UserPartial] = [] 402 | p: Optional[int] = 1 403 | 404 | while p: 405 | ws_, p_ = api.watchlist_by(data["watchlist"]["user"], p) 406 | assert isinstance(ws, list) 407 | assert all(isinstance(s, UserPartial) for s in ws_) 408 | assert p_ is None or isinstance(p_, int) 409 | assert p_ is None or p_ > p 410 | assert len(ws) or p == 1 411 | assert len(ws_) or p_ is None 412 | 413 | ws.extend(ws_) 414 | p = p_ 415 | 416 | assert len({w.name_url for w in ws}) == len(ws) 417 | -------------------------------------------------------------------------------- /tests/test_parse.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from datetime import timedelta 3 | from json import load 4 | from pathlib import Path 5 | from re import sub 6 | from typing import Optional 7 | 8 | from pytest import fixture 9 | from pytest import raises 10 | from requests import Response 11 | from requests import Session 12 | 13 | from faapi.connection import join_url 14 | from faapi.connection import make_session 15 | from faapi.connection import root 16 | from faapi.exceptions import DisabledAccount 17 | from faapi.exceptions import NotFound 18 | from faapi.parse import bbcode_to_html 19 | from faapi.parse import check_page_raise 20 | from faapi.parse import clean_html 21 | from faapi.parse import html_to_bbcode 22 | from faapi.parse import parse_journal_page 23 | from faapi.parse import parse_loggedin_user 24 | from faapi.parse import parse_page 25 | from faapi.parse import parse_submission_page 26 | from faapi.parse import parse_user_page 27 | from faapi.parse import username_url 28 | 29 | __root__: Path = Path(__file__).resolve().parent 30 | 31 | 32 | @fixture 33 | def data() -> dict: 34 | return load((__root__ / "test_data.json").open()) 35 | 36 | 37 | @fixture 38 | def session(data: dict) -> Session: 39 | return make_session(data["cookies"], Session) 40 | 41 | 42 | @fixture 43 | def user_test_data() -> dict: 44 | return load((__root__ / "test_user.json").open()) 45 | 46 | 47 | @fixture 48 | def submission_test_data() -> dict: 49 | return load((__root__ / "test_submission.json").open()) 50 | 51 | 52 | @fixture 53 | def journal_test_data() -> dict: 54 | return load((__root__ / "test_journal.json").open()) 55 | 56 | 57 | def dst_us() -> timedelta: 58 | now: datetime = datetime.now() 59 | 60 | if now.month < 3 or now.month >= 12: 61 | return timedelta(0) 62 | 63 | m1 = datetime(now.year, 3, 1) 64 | 65 | if now < datetime(now.year, 3, 7 + (6 - m1.weekday() + 1)): 66 | return timedelta(0) 67 | 68 | n1 = datetime(now.year, 11, 1) 69 | 70 | if now > datetime(now.year, 11, 6 - n1.weekday() + 1): 71 | return timedelta(0) 72 | 73 | return timedelta(hours=-1) 74 | 75 | 76 | def remove_user_icons(html: str) -> str: 77 | return sub(r"a\.furaffinity\.net/\d{8}/[^. ]+.gif", "", html) 78 | 79 | 80 | def test_check_page_disabled_account(session: Session, data: dict): 81 | res: Response = session.get(join_url(root, "user", data["disabled"]["user"])) 82 | assert res.ok 83 | 84 | page = parse_page(res.text) 85 | 86 | with raises(DisabledAccount): 87 | check_page_raise(page) 88 | 89 | 90 | def test_check_page_not_found(session: Session): 91 | res: Response = session.get(join_url(root, "user", "_")) 92 | assert res.ok 93 | 94 | page = parse_page(res.text) 95 | 96 | with raises(NotFound): 97 | check_page_raise(page) 98 | 99 | 100 | def test_parse_loggedin_user(session: Session, data: dict): 101 | res: Response = session.get(join_url(root, "user", data["login"]["user"])) 102 | assert res.ok 103 | 104 | page = parse_page(res.text) 105 | login_user: Optional[str] = parse_loggedin_user(page) 106 | assert login_user is not None 107 | 108 | assert username_url(login_user) == username_url(data["login"]["user"]) 109 | 110 | 111 | def test_parse_user_page(session: Session, user_test_data: dict): 112 | res: Response = session.get(join_url(root, "user", username_url(user_test_data["name"]))) 113 | assert res.ok 114 | 115 | page = parse_page(res.text) 116 | result = parse_user_page(page) 117 | 118 | assert result["name"].lower() == user_test_data["name"].lower() 119 | assert result["status"] == user_test_data["status"] 120 | assert result["title"] == user_test_data["title"] 121 | assert result["join_date"] == datetime.fromisoformat(user_test_data["join_date"]) + dst_us() 122 | assert result["stats"][0] >= user_test_data["stats"]["views"] 123 | assert result["stats"][1] >= user_test_data["stats"]["submissions"] 124 | assert result["stats"][2] >= user_test_data["stats"]["favorites"] 125 | assert result["stats"][3] >= user_test_data["stats"]["comments_earned"] 126 | assert result["stats"][4] >= user_test_data["stats"]["comments_made"] 127 | assert result["stats"][5] >= user_test_data["stats"]["journals"] 128 | assert result["info"] == user_test_data["info"] 129 | assert result["contacts"] == user_test_data["contacts"] 130 | assert result["avatar_url"] == user_test_data["avatar_url"] != "" 131 | assert result["banner_url"] == user_test_data["banner_url"] != "" 132 | assert remove_user_icons(clean_html(result["profile"])) == remove_user_icons(clean_html(user_test_data["profile"])) 133 | assert html_to_bbcode(result["profile"]) == user_test_data["profile_bbcode"] 134 | assert user_test_data["profile_bbcode"] == html_to_bbcode(bbcode_to_html(user_test_data["profile_bbcode"])) 135 | 136 | 137 | def test_parse_submission_page(session: Session, submission_test_data: dict): 138 | res: Response = session.get(join_url(root, "view", submission_test_data["id"])) 139 | assert res.ok 140 | 141 | page = parse_page(res.text) 142 | result = parse_submission_page(page) 143 | 144 | assert result["id"] == submission_test_data["id"] 145 | assert result["title"] == submission_test_data["title"] 146 | assert result["author"].lower() == submission_test_data["author"]["name"].lower() 147 | assert result["author_icon_url"] != "" 148 | assert result["date"] == datetime.fromisoformat(submission_test_data["date"]) + dst_us() 149 | assert result["tags"] == submission_test_data["tags"] 150 | assert result["category"] == submission_test_data["category"] 151 | assert result["species"] == submission_test_data["species"] 152 | assert result["gender"] == submission_test_data["gender"] 153 | assert result["rating"] == submission_test_data["rating"] 154 | assert result["views"] >= submission_test_data["stats"]["views"] 155 | assert result["comment_count"] >= submission_test_data["stats"]["comments"] 156 | assert result["favorites"] >= submission_test_data["stats"]["favorites"] 157 | assert result["type"] == submission_test_data["type"] 158 | assert result["mentions"] == submission_test_data["mentions"] 159 | assert result["folder"] == submission_test_data["folder"] 160 | assert [list(f) for f in result["user_folders"]] == submission_test_data["user_folders_tuples"] 161 | assert result["file_url"] != "" 162 | assert result["thumbnail_url"] != "" 163 | assert result["prev"] == submission_test_data["prev"] 164 | assert result["next"] == submission_test_data["next"] 165 | assert bool(result["unfav_link"]) == submission_test_data["favorite"] 166 | assert (("/fav/" in submission_test_data["favorite_toggle_link"]) and bool(result["fav_link"])) or \ 167 | (("/unfav/" in submission_test_data["favorite_toggle_link"]) and bool(result["unfav_link"])) 168 | assert remove_user_icons(clean_html(result["description"])) == \ 169 | remove_user_icons(clean_html(submission_test_data["description"])) 170 | assert remove_user_icons(clean_html(result["footer"])) == \ 171 | remove_user_icons(clean_html(submission_test_data["footer"])) 172 | assert html_to_bbcode(result["description"]) == submission_test_data["description_bbcode"] 173 | assert html_to_bbcode(result["footer"]) == submission_test_data["footer_bbcode"] 174 | assert submission_test_data["description_bbcode"] == \ 175 | html_to_bbcode(bbcode_to_html(submission_test_data["description_bbcode"])) 176 | assert submission_test_data["footer_bbcode"] == \ 177 | html_to_bbcode(bbcode_to_html(submission_test_data["footer_bbcode"])) 178 | 179 | 180 | def test_parse_journal_page(session: Session, journal_test_data: dict): 181 | res: Response = session.get(join_url(root, "journal", journal_test_data["id"])) 182 | assert res.ok 183 | 184 | page = parse_page(res.text) 185 | result = parse_journal_page(page) 186 | 187 | assert result["id"] == journal_test_data["id"] 188 | assert result["title"] == journal_test_data["title"] 189 | assert result["user_info"]["name"].lower() == journal_test_data["author"]["name"].lower() 190 | assert result["user_info"]["join_date"] == \ 191 | datetime.fromisoformat(journal_test_data["author"]["join_date"]) + dst_us() 192 | assert result["user_info"]["avatar_url"] != "" 193 | assert result["date"] == datetime.fromisoformat(journal_test_data["date"]) + dst_us() 194 | assert result["comments"] >= journal_test_data["stats"]["comments"] 195 | assert result["mentions"] == journal_test_data["mentions"] 196 | assert remove_user_icons(clean_html(result["content"])) == remove_user_icons( 197 | clean_html(journal_test_data["content"])) 198 | assert remove_user_icons(clean_html(result["header"])) == remove_user_icons( 199 | clean_html(journal_test_data["header"])) 200 | assert remove_user_icons(clean_html(result["footer"])) == remove_user_icons( 201 | clean_html(journal_test_data["footer"])) 202 | assert html_to_bbcode(result["content"]) == journal_test_data["content_bbcode"] 203 | assert html_to_bbcode(result["header"]) == journal_test_data["header_bbcode"] 204 | assert html_to_bbcode(result["footer"]) == journal_test_data["footer_bbcode"] 205 | assert journal_test_data["content"] == html_to_bbcode(bbcode_to_html(journal_test_data["content"])) 206 | assert journal_test_data["header"] == html_to_bbcode(bbcode_to_html(journal_test_data["header"])) 207 | assert journal_test_data["footer"] == html_to_bbcode(bbcode_to_html(journal_test_data["footer"])) 208 | --------------------------------------------------------------------------------