├── .github
    ├── ISSUE_TEMPLATE
    │   ├── BUG.yml
    │   ├── FEATURE-REQUEST.yml
    │   └── config.yml
    └── workflows
    │   ├── publish.yml
    │   ├── security.yml
    │   └── test.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── faapi
    ├── __init__.py
    ├── __version__.py
    ├── base.py
    ├── comment.py
    ├── connection.py
    ├── exceptions.py
    ├── journal.py
    ├── parse.py
    ├── submission.py
    └── user.py
├── poetry.lock
├── pyproject.toml
└── tests
    ├── test_connection.py
    ├── test_faapi.py
    └── test_parse.py


/.github/ISSUE_TEMPLATE/BUG.yml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: Open a bug issue
 3 | title: "[Bug]: "
 4 | labels: ["bug"]
 5 | assignees:
 6 |   - MatteoCampinoti94
 7 | body:
 8 |   - type: markdown
 9 |     attributes:
10 |       value: |
11 |         Thanks for taking the time to fill out this bug report!
12 |   - type: input
13 |     id: version
14 |     attributes:
15 |       label: Version
16 |       description: What version of the program where you running?
17 |     validations:
18 |       required: true
19 |   - type: textarea
20 |     id: summary
21 |     attributes:
22 |       label: What happened?
23 |       description: Summarize the bug encountered concisely
24 |     validations:
25 |       required: true
26 |   - type: textarea
27 |     id: reproduce
28 |     attributes:
29 |       label: How to reproduce the bug?
30 |       description: Summarize the steps to encounter the bug
31 |     validations:
32 |       required: true
33 |   - type: textarea
34 |     id: logs
35 |     attributes:
36 |       label: Relevant log output
37 |       description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
38 |       render: shell


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/FEATURE-REQUEST.yml:
--------------------------------------------------------------------------------
 1 | name: Feature Request
 2 | description: Open a feature request
 3 | title: "[Feature Request]: "
 4 | labels: ["enhancement"]
 5 | assignees:
 6 |   - MatteoCampinoti94
 7 | body:
 8 |   - type: markdown
 9 |     attributes:
10 |       value: |
11 |         Thanks for taking the time to fill out this feature request!
12 |   - type: textarea
13 |     id: summary
14 |     attributes:
15 |       label: The idea
16 |       description: Summarize your idea, including UX/UI changes
17 |     validations:
18 |       required: true
19 |   - type: textarea
20 |     id: implementation
21 |     attributes:
22 |       label: Implementation ideas
23 |       description: How would you implement the feature?
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
  1 | name: Publish
  2 | 
  3 | on:
  4 |   push:
  5 |     tags:
  6 |       - "v*.*.*"
  7 |   workflow_dispatch:
  8 | 
  9 | env:
 10 |   PYTHON_VERSION: 3.11.0
 11 |   POETRY_VERSION: 1.3.1
 12 | 
 13 | jobs:
 14 |   wait_static_tests:
 15 |     name: Wait for Static Tests
 16 |     runs-on: ubuntu-latest
 17 |     strategy:
 18 |       matrix:
 19 |         test: [ "Flake8", "mypy" ]
 20 |         python: [ "3.11", "3.10", "3.9" ]
 21 |     steps:
 22 |       - uses: actions/checkout@v4
 23 |       - name: Wait for Tests
 24 |         id: wait
 25 |         uses: fountainhead/action-wait-for-check@v1.0.0
 26 |         with:
 27 |           token: ${{ secrets.GITHUB_TOKEN }}
 28 |           checkName: "${{ matrix.test }} (${{ matrix.python }})"
 29 |           ref: ${{ github.event.push_request.head.sha || github.sha }}
 30 |       - if: ${{ steps.wait.outputs.conclusion != 'success' }}
 31 |         run: exit 1
 32 | 
 33 |   wait_unit_tests:
 34 |     name: Wait for Unit Tests
 35 |     runs-on: ubuntu-latest
 36 |     steps:
 37 |       - uses: actions/checkout@v4
 38 |       - name: Wait for Tests
 39 |         id: wait
 40 |         uses: fountainhead/action-wait-for-check@v1.0.0
 41 |         with:
 42 |           token: ${{ secrets.GITHUB_TOKEN }}
 43 |           checkName: "pytest"
 44 |           ref: ${{ github.event.push_request.head.sha || github.sha }}
 45 |       - if: ${{ steps.wait.outputs.conclusion != 'success' }}
 46 |         run: exit 1
 47 | 
 48 |   publish:
 49 |     name: Publish
 50 |     runs-on: ubuntu-latest
 51 |     needs: [ wait_static_tests, wait_unit_tests ]
 52 |     steps:
 53 |       - uses: actions/checkout@v4
 54 |       - uses: actions/setup-python@v5
 55 |         with:
 56 |           python-version: ${{ env.PYTHON_VERSION }}
 57 |       - uses: abatilo/actions-poetry@v2.0.0
 58 |         with:
 59 |           poetry-version: ${{ env.POETRY_VERSION }}
 60 |       - name: Build and publish
 61 |         env:
 62 |           PYPI_USERNAME: __token__
 63 |           PYPI_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
 64 |         run: |
 65 |           poetry config virtualenvs.in-project true
 66 |           poetry install --no-root
 67 |           poetry publish --build --username $PYPI_USERNAME --password $PYPI_PASSWORD
 68 |       - name: Save dist
 69 |         uses: actions/upload-artifact@v4
 70 |         with:
 71 |           name: dist
 72 |           path: dist
 73 | 
 74 |   release:
 75 |     name: Release
 76 |     runs-on: ubuntu-latest
 77 |     needs: publish
 78 |     steps:
 79 |       - uses: actions/checkout@v4
 80 |         with:
 81 |           fetch-depth: 0
 82 |       - name: Get dist
 83 |         uses: actions/download-artifact@v4
 84 |         with:
 85 |           name: dist
 86 |           path: dist
 87 |       - name: Get Tag
 88 |         id: tag
 89 |         uses: olegtarasov/get-tag@v2.1.1
 90 |       - name: Clean CHANGELOG.md
 91 |         run: |
 92 |           touch CHANGELOG.md.tmp
 93 |           npm install -g prettier
 94 |           prettier --parser markdown --tab-width 4 --prose-wrap never CHANGELOG.md > CHANGELOG.md.tmp
 95 |           cat CHANGELOG.md.tmp > CHANGELOG.md
 96 |       - name: Build Release
 97 |         id: release
 98 |         uses: MatteoCampinoti94/changelog-to-release@v1.0.2
 99 |         with:
100 |           version-name: ${{ steps.tag.outputs.tag }}
101 |       - name: Build Release File
102 |         env:
103 |           TAG: ${{ steps.tag.outputs.tag }}
104 |           RELEASE: ${{ steps.release.outputs.body }}
105 |         run: |
106 |           touch RELEASE.md
107 |           PREVIOUS_TAG="$(git tag -l --sort=-version:refname | head -2 | tail -1)"
108 |           printf "%s\n" "$RELEASE" > RELEASE.md
109 |           printf "\n## 🔗 Links\n" >> RELEASE.md
110 |           printf "\n* %s" "PyPi release: https://pypi.org/project/${GITHUB_REPOSITORY#*/}/${TAG#v}" >> RELEASE.md
111 |           printf "\n* %s" "Full changelog: https://github.com/$GITHUB_REPOSITORY/compare/$PREVIOUS_TAG...$TAG" >> RELEASE.md
112 |           cat RELEASE.md
113 |           printf "\n\n## Dist Files\n"
114 |           ls -l dist || echo "  no files"
115 |       - name: Create Release
116 |         uses: softprops/action-gh-release@v1
117 |         with:
118 |           token: ${{ secrets.GITHUB_TOKEN }}
119 |           tag_name: ${{ steps.tag.outputs.tag }}
120 |           name: ${{ steps.release.outputs.title }}
121 |           body_path: RELEASE.md
122 |           files: |
123 |             dist/*


--------------------------------------------------------------------------------
/.github/workflows/security.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |   schedule:
 9 |     - cron: '0 18 * * 6'
10 |   workflow_dispatch:
11 | 
12 | jobs:
13 |   analyze:
14 |     name: Analyze
15 |     runs-on: ubuntu-latest
16 |     permissions:
17 |       actions: read
18 |       contents: read
19 |       security-events: write
20 |     steps:
21 |       - name: Checkout repository
22 |         uses: actions/checkout@v3
23 |       - name: Initialize CodeQL
24 |         uses: github/codeql-action/init@v2
25 |         with:
26 |           languages: "python"
27 |       - name: Perform CodeQL Analysis
28 |         uses: github/codeql-action/analyze@v2
29 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main, dev ]
 6 |   pull_request:
 7 |     branches: [ main, dev ]
 8 |   schedule:
 9 |     - cron: '0 7 * * *'
10 |   workflow_dispatch:
11 | 
12 | env:
13 |   PYTHON_VERSION: 3.11.0
14 |   POETRY_VERSION: 1.3.1
15 | 
16 | jobs:
17 |   flake8:
18 |     if: github.event_name != 'schedule'
19 |     name: Flake8
20 |     runs-on: ubuntu-latest
21 |     strategy:
22 |       matrix:
23 |         python: [ "3.11", "3.10", "3.9" ]
24 |     steps:
25 |       - uses: actions/checkout@v4
26 |       - uses: actions/setup-python@v5
27 |         with:
28 |           python-version: ${{ matrix.python }}
29 |       - name: Run pip install
30 |         run: python -m pip install 'flake8>=5.0.4'
31 |       - name: Style Test
32 |         id: test
33 |         run: |
34 |           python -m flake8 --max-line-length=120 faapi
35 | 
36 |   mypy:
37 |     if: github.event_name != 'schedule'
38 |     name: mypy
39 |     runs-on: ubuntu-latest
40 |     strategy:
41 |       matrix:
42 |         python: [ "3.11", "3.10", "3.9" ]
43 |     steps:
44 |       - uses: actions/checkout@v4
45 |       - uses: actions/setup-python@v5
46 |         with:
47 |           python-version: ${{ matrix.python }}
48 |       - name: Run pip install
49 |         run: python -m pip install 'mypy>=0.991' 'pytest>=7.2.0' 'types-beautifulsoup4>=4.11.6'
50 |       - name: Types test
51 |         id: test
52 |         run: |
53 |           python -m mypy --install-types --non-interactive --check-untyped-defs faapi 
54 |           python -m mypy --install-types --non-interactive --check-untyped-defs tests 
55 | 
56 |   pytest:
57 |     name: pytest
58 |     runs-on: ubuntu-latest
59 |     steps:
60 |       - uses: actions/checkout@v4
61 |       - uses: actions/setup-python@v5
62 |         with:
63 |           python-version: ${{ env.PYTHON_VERSION }}
64 |       - uses: abatilo/actions-poetry@v2.0.0
65 |         with:
66 |           poetry-version: ${{ env.POETRY_VERSION }}
67 |       - run: |
68 |           poetry install
69 |       - name: Unit test
70 |         env:
71 |           TEST_DATA: ${{ secrets.TEST_DATA }}
72 |           TEST_USER: ${{ secrets.TEST_USER }}
73 |           TEST_SUBMISSION: ${{ secrets.TEST_SUBMISSION }}
74 |           TEST_JOURNAL: ${{ secrets.TEST_JOURNAL }}
75 |         run: |
76 |           echo "$TEST_DATA" > tests/test_data.json
77 |           echo "$TEST_USER" > tests/test_user.json
78 |           echo "$TEST_SUBMISSION" > tests/test_submission.json
79 |           echo "$TEST_JOURNAL" > tests/test_journal.json
80 |           poetry run coverage run -m pytest tests/test_connection.py tests/test_parse.py tests/test_faapi.py -v --tb=line


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | 
 4 | # Distribution / packaging
 5 | .Python
 6 | env/
 7 | build/
 8 | develop-eggs/
 9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | .coverage
23 | 
24 | # virtualenv
25 | .venv
26 | venv/
27 | ENV/
28 | 
29 | 
30 | # mypy
31 | .mypy_cache/
32 | 
33 | # Editors folders
34 | .idea/
35 | 
36 | # System folders/files
37 | .DS_Store
38 | 
39 | # Local test data
40 | tests/test_*.json


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | ## v3.11.9
  4 | 
  5 | ### Fixes
  6 | 
  7 | * Fix `KeyError` raised when parsing a submission or journal with hidden comments
  8 | 
  9 | ## v3.11.8
 10 | 
 11 | ### Changes
 12 | 
 13 | * Update parser to support FurAffinity's new display names feature
 14 | * Added `UserPartial.display_name` and `User.display_name`
 15 | * Full support coming in next minor update
 16 | 
 17 | ## v3.11.7
 18 | 
 19 | ### Changes
 20 | 
 21 | * Updated parser to work correctly with FurAffinity's new tag-blocking feature on submission pages
 22 | 
 23 | ## v3.11.6
 24 | 
 25 | ### Fixes
 26 | 
 27 | * Fix next page of favorites
 28 |   * The selector path to the "Next" button had changed
 29 |   * The presence of the button is now tested
 30 | 
 31 | ## v3.11.5
 32 | 
 33 | ### Fixes
 34 | 
 35 | * Fix a possible issue were cookie value could be set as `None` when using a `http.cookiejar.CookieJar` object
 36 | 
 37 | ### Dependencies
 38 | 
 39 | * Use [requests ^2.32.3](https://pypi.org/project/requests/2.32.3)
 40 | * Use [beautifulsoup4 ^4.12.3](https://pypi.org/project/beautifulsoup4/4.12.3)
 41 | * Use [lxml ^5.3.0](https://pypi.org/project/lxml/5.3.0)
 42 | * Use [python-dateutil ^2.9.0](https://pypi.org/project/python-dateutil/2.9.0)
 43 | 
 44 | ## v3.11.4
 45 | 
 46 | ### Fixes
 47 | 
 48 | * Fix square brackets [] being removed from usernames
 49 | 
 50 | ## v3.11.3
 51 | 
 52 | ### Fixes
 53 | 
 54 | * Fix recursion limit error with chains of journal comments longer than ~1/6 of the recursion limit
 55 | 
 56 | ## v3.11.2
 57 | 
 58 | ### Changes
 59 | 
 60 | * HTML content is not minified beyond basic stripping of whitespace characters
 61 | 
 62 | ### Fixes
 63 | 
 64 | * Fix recursion limit error with chains of submission comments longer than ~1/6 of the recursion limit
 65 | * Fix incorrectly parsed usernames in comments with the OP (Original Poster) tag
 66 | 
 67 | ### Dependencies
 68 | 
 69 | * Use [lxml ^4.9.3](https://pypi.org/project/lxml/4.9.3)
 70 | * Remove [htmlmin](https://pypi.org/project/htmlmin)
 71 | 
 72 | ## v3.11.1
 73 | 
 74 | ### Changes
 75 | 
 76 | * Support submissions with no or partial category
 77 | 
 78 | ## v3.11.0
 79 | 
 80 | ### New Features
 81 | 
 82 | * Session class for requests can be customized with new `session_class` argument for `FAAPI`
 83 | 
 84 | ### Changes
 85 | 
 86 | * Remove [cfscrape](https://pypi.org/project/cfscrape) dependency
 87 |     * Was not updated in years and all requests succeeded with a normal `requests.Session` object
 88 | 
 89 | ### Dependencies
 90 | 
 91 | * Use [requests ^2.31.0](https://pypi.org/project/requests/2.31.0)
 92 |     * Fix [CVE-2023-32681](https://cve.report/CVE-2022-42969) issue
 93 | * Use [beautifulsoup4 ^4.12.2](https://pypi.org/project/beautifulsoup4/4.12.2)
 94 | * Use [lxml ^4.9.2](https://pypi.org/project/lxml/4.9.2)
 95 | 
 96 | ## v3.10.1
 97 | 
 98 | ### Fixes
 99 | 
100 | * Fix parsed URLs not being properly encoded if they contained non-allowed URL characters
101 | 
102 | ## v3.10.0
103 | 
104 | ### New Features
105 | 
106 | * Fur Affinity UI update
107 |     * Support the new UI introduced on November 26, 2022
108 |     * *Note:* the new UI does not show comment parents yet, but the parent comment link is still present in the HTML and
109 |       just commented out, so the parser uses regex to extract the parent ID; this could cause unforeseen issues so be
110 |       careful when parsing comments
111 | * User banners
112 |     * Parse new user banners (when set)
113 |     * New `User.banner_url` variable holds the banner URL
114 | 
115 | ### Changes
116 | 
117 | * Rename `User.user_icon_url` and `UserPartial.user_icon_url` to `User.avatar_url` and `UserPartial.avatar_url`
118 | 
119 | ### Dependencies
120 | 
121 | * Use [flake ^6.0.0](https://pypi.org/project/flake/6.0.0) for testing
122 | 
123 | ## v3.9.6
124 | 
125 | ### Changes
126 | 
127 | * Remove implicit `Optional` types to comply with [PEP 484](https://peps.python.org/pep-0484/)
128 | 
129 | ### Fixes
130 | 
131 | * Fix selectors for date tags in journals and submissions which sometimes caused the incorrect date to be selected
132 | 
133 | ### Dependencies
134 | 
135 | * Use [mypy ^0.991](https://pypi.org/project/mypy/0.991)
136 |     * Complies with [PEP 484](https://peps.python.org/pep-0484/)
137 | 
138 | ## v3.9.5
139 | 
140 | ### Changes
141 | 
142 | * Improve parsing of usernames and statuses
143 |     * Thanks to PR [#7](https://github.com/FurryCoders/FAAPI/pull/7) by @Xraydylan
144 | 
145 | ### Fixes
146 | 
147 | * Fix parsing of user tags for folders when the user had no title set, or used bars (`|`) in their title
148 | 
149 | ## v3.9.4
150 | 
151 | ### Fixes
152 | 
153 | * Fix admins' username and status not being parsed correctly in watchlists and users tags
154 |     * Fix issue [#6](https://github.com/FurryCoders/FAAPI/issues/6)
155 | 
156 | ## v3.9.3
157 | 
158 | ### Changes
159 | 
160 | * Users with non-alphanumeric characters in their name are now escaped in URLs
161 |     * From suggestion in issue [#5](https://github.com/FurryCoders/FAAPI/issues/5)
162 | 
163 | ### Fixes
164 | 
165 | * Fix admins' username and status not being parsed correctly
166 |     * Fix issue [#6](https://github.com/FurryCoders/FAAPI/issues/6)
167 | 
168 | ## v3.9.2
169 | 
170 | ### Fixes
171 | 
172 | * Fix ` being removed from usernames
173 | 
174 | ## v3.9.1
175 | 
176 | ### Fixes
177 | 
178 | * Fix incorrect user icon URLs when converting BBCode to HTML
179 | 
180 | ### Dependencies
181 | 
182 | * Use [pytest ^7.2.0](https://pypi.org/project/pytest/7.2.0)
183 |     * Fix [CVE-2022-42969](https://cve.report/CVE-2022-42969) issue
184 | 
185 | ## v3.9.0
186 | 
187 | ### New Features
188 | 
189 | * Submission footers
190 |     * Submission footers are now separated from the submission description and stored in the `Submission.footer` field
191 |     * The BBCode of the footer can be accessed with the `Submission.footer_bbcode` property
192 | * Generate user icon URLs
193 |     * New `generate_user_icon_url()` method added to `UserPartial` and `User` to create the URL for the current user
194 |       icon
195 | * BBCode to HTML conversion
196 |     * Work-in-progress version of a BBCode converter based on the [bbcode](https://pypi.org/project/bbcode) library
197 |     * Converter function is located in the `parse` submodule: `faapi.parse.bbcode_to_html()`
198 |     * The majority of HTML fields (submission descriptions, journal contents, comments, etc.) can be converted back and
199 |       forth between HTML and BBCode without loosing information
200 |     * If a submission contains incorrect or very unusual BBCode tags or text, the BBCode to HTML conversion may create
201 |       artifacts and tags that did not exist in the original content
202 | 
203 | ### Changes
204 | 
205 | * Added `Journal.header_bbcode` and `Journal.footer_bbcode` properties to convert `Journal.header` and `Journal.footer`
206 |   to BBCode
207 | * Return `None` instead of 0 (or `""` for favorites) when reaching the last page with `FAAPI.gallery()`
208 |   , `FAAPI.scraps()`, `FAAPI.journals()`, `FAAPI.favorites()`, `FAAPI.watchlist_by()`, and `FAAPI.watchlist_to()`
209 | * Added `__hash__` method to `User`, `UserPartial`, `Submission`, `SubmissionPartial`, `Journal`, `JournalPartial`,
210 |   and `Comment`; the hash value is calculated using the same fields used for equality comparisons
211 | * Improved cleanup of HTML fields by using [htmlmin](https://pypi.org/project/htmlmin)
212 | * Fur Affinity URLs are now properly converted to relative `[url=<path>]` tags in BBCode
213 | * Unknown tags are converted to `[tag=<name>.<classes>]` in BBCode
214 | * Added `CookieDict(TypedDict)` notation for cookies dictionary (alternative to `CookieJar`) to provide intellisense and
215 |   type checking information
216 | 
217 | ### Fixes
218 | 
219 | * Fix comments being considered equal even if they had different parents but the same ID
220 | * Fix break lines tags (`<br/>`) not always being converted to newlines when converting to BBCode
221 | * Fix errors when converting nav links (e.g. `[2,1,3]`) to BBCode
222 | * Fix incorrect detection of last page in `FAAPI.watchlist_by()` and `FAAPI.watchlist_by()`
223 | * Fix errors when converting special characters (e.g. `&`)
224 | * Fix trailing spaces around newlines remaining after converting to BBCode
225 | * Fix horizontal lines not being correctly converted from BBCode if the dashes (`-----` or longer) were not surrounded
226 |   by newlines
227 | 
228 | ### Dependencies
229 | 
230 | * Added [htmlmin ^0.1.12](https://pypi.org/project/htmlmin/0.1.12)
231 | * Added [bbcode ^1.1.0](https://pypi.org/project/bbcode/1.1.0)
232 | 
233 | ## v3.8.1
234 | 
235 | ### Changes
236 | 
237 | * Improved HTML extraction for specific tags to avoid encoding issues
238 | * HTML fields are cleaned up (i.e., removed newlines, carriage returns, and extra spaces)
239 |     * None of the parsed pages use tags with _pre_ white space rendering, so no information is lost
240 | * Improvements to BBCode conversion
241 |     * Do not quote URLs when converting to BBCode
242 |     * Support nested quote blocks
243 |     * Support non-specific tags (e.g. `div.submission-footer`) and convert them
244 |       to `[tag.<tag name>.<tag class>][/tag.<tag.name>]`
245 | 
246 | ### Fixes
247 | 
248 | * Fix incorrect encoding of special characters (`<`, `>`, etc.) in HTML fields
249 |     * Was caused by the previous method of extracting the inner HTML of a tag
250 | * Fix URLs automatically shortened by Fur Affinity being converted to BBCode with the wrong text content
251 | * Fix HTML paragraph tags (`<p>`) sometimes appearing in BBCode-converted content
252 | * Fix BBCode conversion of `:usernameicon:` links (i.e., user icon links without the username)
253 | 
254 | ## v3.8.0
255 | 
256 | ### New Features
257 | 
258 | * Submission user folders
259 |     * Submission folders are now parsed and stored in a dedicated `user_folders` field in the `Submission` object
260 |     * Each folder is stored in a `namedtuple` with fields for `name`, `url`, and `group` (if any)
261 | * BBCode conversion
262 |     * New properties have been added to the `User`, `Submission`, `Journal`, `JournalPartial`, and `Comment` objects to
263 |       provide BBCode versions of HTML fields
264 |     * The generated BBCode tags follow the Fur Affinity standard found on
265 |       their [support page](https://www.furaffinity.net/help/#tags-and-codes)
266 | 
267 | ## v3.7.4
268 | 
269 | ### Dependencies
270 | 
271 | * Use [lxml ^4.9.1](https://pypi.org/project/lxml/4.9.1)
272 |     * Fix [CVE-2022-2309](https://cve.report/CVE-2022-2309) issue
273 | 
274 | ## v3.7.3
275 | 
276 | ### Fixes
277 | 
278 | * Fix error when parsing journals folders and journal pages caused by date format set to full on Fur Affinity's site
279 |   settings
280 | 
281 | ## v3.7.2
282 | 
283 | ### New Features
284 | 
285 | * Requests timeout
286 |     * New `FAAPI.timeout: int | None` variable to set request timeout in seconds
287 |     * Timeout is used for both page requests (e.g. submissions) and file requests
288 | 
289 | ### Fixes
290 | 
291 | * Fix possible parsing error arising from multiple attributes in one tag
292 | 
293 | ## v3.7.1
294 | 
295 | ### New Features
296 | 
297 | * Frontpage
298 |     * New `FAAPI.frontpage()` method to get submissions from Fur Affinity's front page
299 | * Sorting of `Journal`, `Submission`, and `User` objects
300 |     * All data objects now support greater than, greater or equal, lower than, and lower or equal operations for easy
301 |       sorting
302 | 
303 | ### Fixes
304 | 
305 | * Fix equality comparison between `Journal` and `JournalPartial`
306 | * Fix parsing of usernames from user pages returning the title instead
307 |     * Caused by a change in Fur Affinity's DOM
308 | 
309 | ## v3.7.0
310 | 
311 | ### New Features
312 | 
313 | * Journal headers and footers
314 |     * The `Journal` class now contains header and footer fields which are parsed from journal pages (`FAAPI.journal`)
315 | * Submission favorite status and link
316 |     * The `Submission` class now contains a boolean `favorite` field that is set to `True` if the submission is a
317 |       favorite, and a `favorite_toggle_link` containing the link to toggle the favorite status (`/fav/` or `/unfav/`)
318 | * User watch and block statuses and links
319 |     * The `User` class now contains boolean `watched` and `blocked` fields that are set to `True` if the user is watched
320 |       or blocked respectively, and `watched_toggle_link` and `blocked_toggle_link` fields containing the links to toggle
321 |       the watched (`/watch/` or `/unwatch/`) and blocked (`/block/` or `/unblock/`) statuses respectively.
322 | 
323 | ### Changes
324 | 
325 | * Remove `parse.check_page` function which had no usage in the library anymore
326 | * Remove `parse.parse_search_submissions` function and `FAAPI.search` method
327 |     * They will be reintroduced once Fur Affinity allows scraping search pages again
328 | 
329 | ### Fixes
330 | 
331 | * Fix an incorrect regular expression that parsed mentions in journals, submissions, and profiles which could cause
332 |   non-Fur Affinity links to be matched as valid
333 |     * Security issue [#3](https://github.com/FurryCoders/FAAPI/issues/3)
334 | 
335 | ## v3.6.1
336 | 
337 | ### Fixes
338 | 
339 | * Fix `FAAPI.journals` not detecting the next page correctly
340 |     * Caused by a change in Fur Affinity's journals page
341 | 
342 | ## v3.6.0
343 | 
344 | ### New Features
345 | 
346 | * Comments! 💬
347 |     * A new `Comment` object is now used to store comments for submissions and journals
348 |     * The comments are organised in a tree structure, and each one contains references to both its parent
349 |       object (`Submission` or `Journal`) and, if the comment is a reply, to its parent comment too
350 |     * The auxiliary functions `faapi.comment.flatten_comments` and `faapi.comment.sort_comments` allow to flatten the
351 |       comment tree or reorganise it
352 | 
353 | * Separate `JournalPartial` and `Journal` objects
354 |     * The new `JournalPartial` class takes the place of the previous `Journal` class, and it is now used only to parse
355 |       journal from a user's journals folder
356 |     * The new `Journal` class contains the same fields as `JournalPartial` with the addition of comments, and it is only
357 |       used to parse journal pages
358 | 
359 | * Comparisons
360 |     * All objects can now be used with the comparison (==) operator with other objects of the same type or the type of
361 |       their key property (`id: int` for submissions and journals, and `name_url: str` for users)
362 | 
363 | ### Changes
364 | 
365 | * The `cookies` argument of `FAAPI` is now mandatory, and an `Unauthorized` exception is raised if `FAAPI` is
366 |   initialised with an empty cookies list
367 | * The list of `Submission`/`Journal` objects returned by `FAAPI.gallery`, `FAAPI.scraps`, and `FAAPI.journals` now uses
368 |   a shared `UserPartial` object in the `author` variable (i.e. changing a property of the author in one object of the
369 |   list will change it for the others as well)
370 | 
371 | ### Fixes
372 | 
373 | * Fix path checking against robots.txt not working correctly with paths missing a leading forward slash
374 | 
375 | ## v3.5.0
376 | 
377 | ### New Features
378 | 
379 | * New `Submission.stats` field for submission statistics stored in a named tuple (`views`, `comments` (count)
380 |   , `favorites`)
381 |     * Pull request [#2](https://github.com/FurryCoders/FAAPI/pull/2), thanks
382 |       to [@warpKaiba](https://github.com/warpKaiba)!
383 | * New `Journal.stats` field for journal statistics stored in a named tuple (`comments` (count))
384 | 
385 | ### Changes
386 | 
387 | * Rename `UserStats.favs` to `UserStats.favorites`
388 | 
389 | ### Fixes
390 | 
391 | * Fix links in PyPi metadata pointing to previous hosting at GitLab
392 | 
393 | ## v3.4.3
394 | 
395 | ### Changes
396 | 
397 | * Better and more resilient robots.txt parsing
398 | 
399 | ### Fixes
400 | 
401 | * Fix spaces around slash (/) not being preserved for submission categories
402 | 
403 | ## v3.4.2
404 | 
405 | ### Changes
406 | 
407 | * Raise `DisabledAccount` for users pending deletion
408 | * Error messages from server are not lowercase
409 | 
410 | ## v3.4.1
411 | 
412 | ### Fixes
413 | 
414 | * Fix rare occurrence of error message not being parsed if inside a `section.notice-message`
415 | 
416 | ## v3.4.0 (was 3.3.8)
417 | 
418 | ### New Features
419 | 
420 | * New `NotFound` exception inheriting from `ParsingError`
421 | 
422 | ### Changes
423 | 
424 | * Removed `FAAPI.submission_exists`, `FAAPI.journal_exists`, and `FAAPI.user_exists` methods
425 | * Improved reliability of error pages' parser
426 | 
427 | ### Fixes
428 | 
429 | * Custom exceptions inherit from `Exception` instead of `BaseException`
430 | 
431 | ## v3.3.7
432 | 
433 | ### Changes
434 | 
435 | * No changes to code; migrated repository to GitHub and updated README and PyPi metadata
436 | 
437 | ## v3.3.6
438 | 
439 | ### Changes
440 | 
441 | * Allow empty info/contacts when parsing user profiles
442 | 
443 | ## v3.3.5
444 | 
445 | ### Changes
446 | 
447 | * Fix last page check when parsing galleries
448 | 
449 | ## v3.3.4
450 | 
451 | ### Changes
452 | 
453 | * Use BaseException as base class of custom exceptions
454 | 
455 | ### Dependencies
456 | 
457 | * Use [requests ^2.27.1](https://pypi.org/project/requests/2.27.1)
458 | 
459 | ## v3.3.3
460 | 
461 | ### Changes
462 | 
463 | * Allow submission thumbnail tag to be null
464 | 
465 | ## v3.3.2
466 | 
467 | ### Changes
468 | 
469 | * Use `UserStats` class to hold user statistics instead of namedtuple
470 | * Add watched by and watching stats to `UserStats`
471 | 
472 | ## v3.3.1
473 | 
474 | ### Changes
475 | 
476 | * Safer parsing
477 | 
478 | ## v3.3.0
479 | 
480 | ### New Features
481 | 
482 | * Add docstrings
483 | * Handle robots.txt parsing with `urllib.RobotFileParser`
484 | * `User-Agent` header is exposed as `FAAPI.user_agent` property
485 | 
486 | ### Changes
487 | 
488 | * `FAAPI.last_get` uses UNIX time
489 | * `FAAPI.check_path` doesn't raise an exception by default
490 | * `FAAPI.login_status` does not raise an exception on unauthorized
491 | * Remove crawl delay error
492 | * Improve download of files
493 | 
494 | ## v3.2.0
495 | 
496 | ### New Features
497 | 
498 | * `FAAPI.get_parsed` checks login status and checks the page for errors directly (both can be manually skipped)
499 | * Add `Unauthorized` exception
500 | 
501 | ## v3.1.2
502 | 
503 | ### New Features
504 | 
505 | * `FAAPI.submission` and `FAAPI.submission_file` support setting the chunk size for the binary file download
506 | 
507 | ### Changes
508 | 
509 | * The file downloader uses chunk size instead of speed
510 | 
511 | ## v3.1.1
512 | 
513 | ### Changes
514 | 
515 | * When raising `ServerError` and `NoticeMessage`, the actual messages appearing on the page are use as exception
516 |   arguments
517 | 
518 | ## v3.1.0
519 | 
520 | ### New feature
521 | 
522 | * Add support for `http.cookiejar.CookieJar` (and inheriting classes, like `requests.cookies.RequestsCookieJar`) for
523 |   cookies.
524 | * Add `FAAPI.me()` method to get the logged-in user
525 | * Add `FAAPI.login_status` property to get the current login status
526 | 
527 | ### Dependencies
528 | 
529 | * Use [lxml ^4.7.1](https://pypi.org/project/lxml/4.7.1)
530 |     * Fix [CVE-2021-43818](https://cve.report/CVE-2021-43818) issue
531 | 
532 | ## v3.0.2
533 | 
534 | ### Fixes
535 | 
536 | * Fix rare error when parsing the info section of a userpage
537 | 
538 | ## v3.0.1
539 | 
540 | ### Fixes
541 | 
542 | * Fix a key error in `Submission` when assigning the parsed results
543 | 
544 | ## v3.0.0
545 | 
546 | ### New Features
547 | 
548 | * Upgrade to Python 3.9+
549 |     * Update type annotations
550 | * `Submission` parses next and previous submission IDs
551 | * `FAAPI.watchlist_by()` and `FAAPI.watchlist_to()` methods support multiple watchlist pages
552 | 
553 | ### Changes
554 | 
555 | * Renamed `FAAPI.get_parse` to `get_parsed`
556 | * Removed _get_ prefix from `FAAPI` methods (e.g. `get_submission` to `submission`) and return a list of `UserPartials`
557 |   objects instead of `Users`
558 | * Added `__all__` declarations to allow importing exceptions and secondary functions from `connection` and `parse`
559 | * `datetime` fields are not serialised on `__iter__` (e.g. when casting a `Submission` object to `dict`)
560 | 
561 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | European Union Public Licence
  2 | V. 1.2
  3 | 
  4 | EUPL © the European Union 2007, 2016
  5 | 
  6 | This European Union Public Licence (the ‘EUPL’) applies to the Work (as
  7 | defined below) which is provided under the terms of this Licence. Any use of
  8 | the Work, other than as authorised under this Licence is prohibited (to the
  9 | extent such use is covered by a right of the copyright holder of the Work).
 10 | 
 11 | The Work is provided under the terms of this Licence when the Licensor (as
 12 | defined below) has placed the following notice immediately following the
 13 | copyright notice for the Work: “Licensed under the EUPL”, or has expressed by
 14 | any other means his willingness to license under the EUPL.
 15 | 
 16 | 1. Definitions
 17 | 
 18 | In this Licence, the following terms have the following meaning:
 19 | — ‘The Licence’: this Licence.
 20 | — ‘The Original Work’: the work or software distributed or communicated by the
 21 |   ‘Licensor under this Licence, available as Source Code and also as
 22 |   ‘Executable Code as the case may be.
 23 | — ‘Derivative Works’: the works or software that could be created by the
 24 |   ‘Licensee, based upon the Original Work or modifications thereof. This
 25 |   ‘Licence does not define the extent of modification or dependence on the
 26 |   ‘Original Work required in order to classify a work as a Derivative Work;
 27 |   ‘this extent is determined by copyright law applicable in the country
 28 |   ‘mentioned in Article 15.
 29 | — ‘The Work’: the Original Work or its Derivative Works.
 30 | — ‘The Source Code’: the human-readable form of the Work which is the most
 31 |   convenient for people to study and modify.
 32 | 
 33 | — ‘The Executable Code’: any code which has generally been compiled and which
 34 |   is meant to be interpreted by a computer as a program.
 35 | — ‘The Licensor’: the natural or legal person that distributes or communicates
 36 |   the Work under the Licence.
 37 | — ‘Contributor(s)’: any natural or legal person who modifies the Work under
 38 |   the Licence, or otherwise contributes to the creation of a Derivative Work.
 39 | — ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of
 40 |   the Work under the terms of the Licence.
 41 | — ‘Distribution’ or ‘Communication’: any act of selling, giving, lending,
 42 |   renting, distributing, communicating, transmitting, or otherwise making
 43 |   available, online or offline, copies of the Work or providing access to its
 44 |   essential functionalities at the disposal of any other natural or legal
 45 |   person.
 46 | 
 47 | 2. Scope of the rights granted by the Licence
 48 | 
 49 | The Licensor hereby grants You a worldwide, royalty-free, non-exclusive,
 50 | sublicensable licence to do the following, for the duration of copyright
 51 | vested in the Original Work:
 52 | 
 53 | — use the Work in any circumstance and for all usage,
 54 | — reproduce the Work,
 55 | — modify the Work, and make Derivative Works based upon the Work,
 56 | — communicate to the public, including the right to make available or display
 57 |   the Work or copies thereof to the public and perform publicly, as the case
 58 |   may be, the Work,
 59 | — distribute the Work or copies thereof,
 60 | — lend and rent the Work or copies thereof,
 61 | — sublicense rights in the Work or copies thereof.
 62 | 
 63 | Those rights can be exercised on any media, supports and formats, whether now
 64 | known or later invented, as far as the applicable law permits so.
 65 | 
 66 | In the countries where moral rights apply, the Licensor waives his right to
 67 | exercise his moral right to the extent allowed by law in order to make
 68 | effective the licence of the economic rights here above listed.
 69 | 
 70 | The Licensor grants to the Licensee royalty-free, non-exclusive usage rights
 71 | to any patents held by the Licensor, to the extent necessary to make use of
 72 | the rights granted on the Work under this Licence.
 73 | 
 74 | 3. Communication of the Source Code
 75 | 
 76 | The Licensor may provide the Work either in its Source Code form, or as
 77 | Executable Code. If the Work is provided as Executable Code, the Licensor
 78 | provides in addition a machine-readable copy of the Source Code of the Work
 79 | along with each copy of the Work that the Licensor distributes or indicates,
 80 | in a notice following the copyright notice attached to the Work, a repository
 81 | where the Source Code is easily and freely accessible for as long as the
 82 | Licensor continues to distribute or communicate the Work.
 83 | 
 84 | 4. Limitations on copyright
 85 | 
 86 | Nothing in this Licence is intended to deprive the Licensee of the benefits
 87 | from any exception or limitation to the exclusive rights of the rights owners
 88 | in the Work, of the exhaustion of those rights or of other applicable
 89 | limitations thereto.
 90 | 
 91 | 5. Obligations of the Licensee
 92 | 
 93 | The grant of the rights mentioned above is subject to some restrictions and
 94 | obligations imposed on the Licensee. Those obligations are the following:
 95 |  
 96 | Attribution right: The Licensee shall keep intact all copyright, patent or
 97 | trademarks notices and all notices that refer to the Licence and to the
 98 | disclaimer of warranties. The Licensee must include a copy of such notices and
 99 | a copy of the Licence with every copy of the Work he/she distributes or
100 | communicates. The Licensee must cause any Derivative Work to carry prominent
101 | notices stating that the Work has been modified and the date of modification.
102 | 
103 | Copyleft clause: If the Licensee distributes or communicates copies of the
104 | Original Works or Derivative Works, this Distribution or Communication will be
105 | done under the terms of this Licence or of a later version of this Licence
106 | unless the Original Work is expressly distributed only under this version of
107 | the Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee
108 | (becoming Licensor) cannot offer or impose any additional terms or conditions
109 | on the Work or Derivative Work that alter or restrict the terms of the
110 | Licence.
111 | 
112 | Compatibility clause: If the Licensee Distributes or Communicates Derivative
113 | Works or copies thereof based upon both the Work and another work licensed
114 | under a Compatible Licence, this Distribution or Communication can be done
115 | under the terms of this Compatible Licence. For the sake of this clause,
116 | ‘Compatible Licence’ refers to the licences listed in the appendix attached to
117 | this Licence. Should the Licensee's obligations under the Compatible Licence
118 | conflict with his/her obligations under this Licence, the obligations of the
119 | Compatible Licence shall prevail.
120 | 
121 | Provision of Source Code: When distributing or communicating copies of the
122 | Work, the Licensee will provide a machine-readable copy of the Source Code or
123 | indicate a repository where this Source will be easily and freely available
124 | for as long as the Licensee continues to distribute or communicate the Work.
125 | 
126 | Legal Protection: This Licence does not grant permission to use the trade
127 | names, trademarks, service marks, or names of the Licensor, except as required
128 | for reasonable and customary use in describing the origin of the Work and
129 | reproducing the content of the copyright notice.
130 | 
131 | 6. Chain of Authorship
132 | 
133 | The original Licensor warrants that the copyright in the Original Work granted
134 | hereunder is owned by him/her or licensed to him/her and that he/she has the
135 | power and authority to grant the Licence.
136 | 
137 | Each Contributor warrants that the copyright in the modifications he/she
138 | brings to the Work are owned by him/her or licensed to him/her and that he/she
139 | has the power and authority to grant the Licence.
140 | 
141 | Each time You accept the Licence, the original Licensor and subsequent
142 | Contributors grant You a licence to their contributions to the Work, under the
143 | terms of this Licence.
144 | 
145 | 7. Disclaimer of Warranty
146 | 
147 | The Work is a work in progress, which is continuously improved by numerous
148 | Contributors. It is not a finished work and may therefore contain defects or
149 | ‘bugs’ inherent to this type of development.
150 | 
151 | For the above reason, the Work is provided under the Licence on an ‘as is’
152 | basis and without warranties of any kind concerning the Work, including
153 | without limitation merchantability, fitness for a particular purpose, absence
154 | of defects or errors, accuracy, non-infringement of intellectual property
155 | rights other than copyright as stated in Article 6 of this Licence.
156 | 
157 | This disclaimer of warranty is an essential part of the Licence and a
158 | condition for the grant of any rights to the Work.
159 | 
160 | 8. Disclaimer of Liability
161 | 
162 | Except in the cases of wilful misconduct or damages directly caused to natural
163 | persons, the Licensor will in no event be liable for any direct or indirect,
164 | material or moral, damages of any kind, arising out of the Licence or of the
165 | use of the Work, including without limitation, damages for loss of goodwill,
166 | work stoppage, computer failure or malfunction, loss of data or any commercial
167 | damage, even if the Licensor has been advised of the possibility of such
168 | damage. However, the Licensor will be liable under statutory product liability
169 | laws as far such laws apply to the Work.
170 | 
171 | 9. Additional agreements
172 | 
173 | While distributing the Work, You may choose to conclude an additional
174 | agreement, defining obligations or services consistent with this Licence.
175 | However, if accepting obligations, You may act only on your own behalf and on
176 | your sole responsibility, not on behalf of the original Licensor or any other
177 | Contributor, and only if You agree to indemnify, defend, and hold each
178 | Contributor harmless for any liability incurred by, or claims asserted against
179 | such Contributor by the fact You have accepted any warranty or additional
180 | liability.
181 | 
182 | 10. Acceptance of the Licence
183 | 
184 | The provisions of this Licence can be accepted by clicking on an icon ‘I
185 | agree’ placed under the bottom of a window displaying the text of this Licence
186 | or by affirming consent in any other similar way, in accordance with the rules
187 | of applicable law. Clicking on that icon indicates your clear and irrevocable
188 | acceptance of this Licence and all of its terms and conditions.
189 | 
190 | Similarly, you irrevocably accept this Licence and all of its terms and
191 | conditions by exercising any rights granted to You by Article 2 of this
192 | Licence, such as the use of the Work, the creation by You of a Derivative Work
193 | or the Distribution or Communication by You of the Work or copies thereof.
194 | 
195 | 11. Information to the public
196 | 
197 | In case of any Distribution or Communication of the Work by means of
198 | electronic communication by You (for example, by offering to download the Work
199 | from a remote location) the distribution channel or media (for example, a
200 | website) must at least provide to the public the information requested by the
201 | applicable law regarding the Licensor, the Licence and the way it may be
202 | accessible, concluded, stored and reproduced by the Licensee.
203 | 
204 | 12. Termination of the Licence
205 | 
206 | The Licence and the rights granted hereunder will terminate automatically upon
207 | any breach by the Licensee of the terms of the Licence. Such a termination
208 | will not terminate the licences of any person who has received the Work from
209 | the Licensee under the Licence, provided such persons remain in full
210 | compliance with the Licence.
211 | 
212 | 13. Miscellaneous
213 | 
214 | Without prejudice of Article 9 above, the Licence represents the complete
215 | agreement between the Parties as to the Work.
216 | 
217 | If any provision of the Licence is invalid or unenforceable under applicable
218 | law, this will not affect the validity or enforceability of the Licence as a
219 | whole. Such provision will be construed or reformed so as necessary to make it
220 | valid and enforceable.
221 | 
222 | The European Commission may publish other linguistic versions or new versions
223 | of this Licence or updated versions of the Appendix, so far this is required
224 | and reasonable, without reducing the scope of the rights granted by the
225 | Licence. New versions of the Licence will be published with a unique version
226 | number.
227 | 
228 | All linguistic versions of this Licence, approved by the European Commission,
229 | have identical value. Parties can take advantage of the linguistic version of
230 | their choice.
231 | 
232 | 14. Jurisdiction
233 | 
234 | Without prejudice to specific agreement between parties,
235 | — any litigation resulting from the interpretation of this License, arising
236 |   between the European Union institutions, bodies, offices or agencies, as a
237 |   Licensor, and any Licensee, will be subject to the jurisdiction of the Court
238 |   of Justice of the European Union, as laid down in article 272 of the Treaty
239 |   on the Functioning of the European Union,
240 | — any litigation arising between other parties and resulting from the
241 |   interpretation of this License, will be subject to the exclusive
242 |   jurisdiction of the competent court where the Licensor resides or conducts
243 |   its primary business.
244 | 
245 | 15. Applicable Law
246 | 
247 | Without prejudice to specific agreement between parties,
248 | — this Licence shall be governed by the law of the European Union Member State
249 |   where the Licensor has his seat, resides or has his registered office,
250 | — this licence shall be governed by Belgian law if the Licensor has no seat,
251 |   residence or registered office inside a European Union Member State.
252 | 
253 | Appendix
254 | 
255 | ‘Compatible Licences’ according to Article 5 EUPL are:
256 | — GNU General Public License (GPL) v. 2, v. 3
257 | — GNU Affero General Public License (AGPL) v. 3
258 | — Open Software License (OSL) v. 2.1, v. 3.0
259 | — Eclipse Public License (EPL) v. 1.0
260 | — CeCILL v. 2.0, v. 2.1
261 | — Mozilla Public Licence (MPL) v. 2
262 | — GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3
263 | — Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for
264 |   works other than software
265 | — European Union Public Licence (EUPL) v. 1.1, v. 1.2
266 | — Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or
267 |   Strong Reciprocity (LiLiQ-R+)
268 | 
269 | — The European Commission may update this Appendix to later versions of the
270 |   above licences without producing a new version of the EUPL, as long as they
271 |   provide the rights granted in Article 2 of this Licence and protect the
272 |   covered Source Code from exclusive appropriation.
273 | — All other changes or additions to this Appendix require the production of a
274 |   new EUPL version.
275 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | 
  3 | <img alt="logo" width="400" src="https://raw.githubusercontent.com/FurryCoders/Logos/main/logos/faapi-transparent.png">
  4 | 
  5 | # Fur Affinity API
  6 | 
  7 | Python library to implement API-like functionality for the [Fur Affinity](https://furaffinity.net) website.
  8 | 
  9 | [![](https://img.shields.io/pypi/v/faapi?logo=pypi)](https://pypi.org/project/faapi/)
 10 | [![](https://img.shields.io/pypi/pyversions/faapi?logo=Python)](https://www.python.org)
 11 | 
 12 | [![](https://img.shields.io/github/v/tag/FurryCoders/faapi?label=github&sort=date&logo=github&color=blue)](https://github.com/FurryCoders/faapi)
 13 | [![](https://img.shields.io/github/issues/FurryCoders/faapi?logo=github&color=blue)](https://github.com/FurryCoders/FAAPI/issues)
 14 | [![](https://img.shields.io/github/actions/workflow/status/FurryCoders/FAAPI/test.yml?label=Test&logo=githubactions)](https://github.com/FurryCoders/FAAPI/actions/workflows/test.yml)
 15 | [![](https://coveralls.io/repos/github/FurryCoders/FAAPI/badge.svg)](https://coveralls.io/github/FurryCoders/FAAPI)
 16 | 
 17 | </div>
 18 | 
 19 | ## Requirements
 20 | 
 21 | Python 3.9+ is necessary to run this
 22 | library. [<img src="https://python-poetry.org/images/logo-origami.svg" height="12"> Poetry](https://python-poetry.org)
 23 | is used for packaging and dependency management.
 24 | 
 25 | ## Usage
 26 | 
 27 | The API comprises a main class `FAAPI`, two submission classes `Submission` and `SubmissionPartial`, a journal
 28 | class `Journal`, and a user class `User`.
 29 | 
 30 | Once `FAAPI` is initialized, its methods can be used to crawl FA and return parsed objects.
 31 | 
 32 | ```python
 33 | from requests.cookies import RequestsCookieJar
 34 | import faapi
 35 | import orjson
 36 | 
 37 | cookies = RequestsCookieJar()
 38 | cookies.set("a", "38565475-3421-3f21-7f63-3d341339737")
 39 | cookies.set("b", "356f5962-5a60-0922-1c11-65003b70308")
 40 | 
 41 | api = faapi.FAAPI(cookies)
 42 | sub, sub_file = api.submission(12345678, get_file=True)
 43 | 
 44 | print(sub.id, sub.title, sub.author, f"{len(sub_file) / 1024:02f}KiB")
 45 | 
 46 | with open(f"{sub.id}.json", "wb") as f:
 47 |     f.write(orjson.dumps(dict(sub)))
 48 | 
 49 | with open(sub.file_url.split("/")[-1], "wb") as f:
 50 |     f.write(sub_file)
 51 | 
 52 | gallery, _ = api.gallery("user_name", 1)
 53 | with open("user_name-gallery.json", "wb") as f:
 54 |     f.write(orjson.dumps(list(map(dict, gallery))))
 55 | ```
 56 | 
 57 | ### robots.txt
 58 | 
 59 | At init, the `FAAPI` object downloads the [robots.txt](https://www.furaffinity.net/robots.txt) file from FA to determine
 60 | the `Crawl-delay` and `disallow` values set therein. If not set in the robots.txt file, a crawl delay value of 1 second
 61 | is used.
 62 | 
 63 | To respect this value, the default behaviour of the `FAAPI` object is to wait when a get request is made if the last
 64 | request was performed more recently then the crawl delay value.
 65 | 
 66 | See under [FAAPI](#faapi) for more details on this behaviour.
 67 | 
 68 | Furthermore, any get operation that points to a disallowed path from robots.txt will raise an exception. This check
 69 | should not be circumvented, and the developer of this library does not take responsibility for violations of the TOS of
 70 | Fur Affinity.
 71 | 
 72 | ### Cookies
 73 | 
 74 | To access protected pages, cookies from an active session are needed. These cookies can be given to the FAAPI object as
 75 | a list of dictionaries - each containing a `name` and a `value` field -, or as a `http.cookiejar.CookieJar`
 76 | object (`requests.cookies.RequestsCookieJar` and other objects inheriting from `CookieJar` are also supported). The
 77 | cookies list should look like the following example:
 78 | 
 79 | ```python
 80 | cookies = [
 81 |     {"name": "a", "value": "38565475-3421-3f21-7f63-3d3413397537"},
 82 |     {"name": "b", "value": "356f5962-5a60-0922-1c11-65003b703038"},
 83 | ]
 84 | ```
 85 | 
 86 | ```python
 87 | from requests.cookies import RequestsCookieJar
 88 | 
 89 | cookies = RequestsCookieJar()
 90 | cookies.set("a", "38565475-3421-3f21-7f63-3d3413397537")
 91 | cookies.set("b", "356f5962-5a60-0922-1c11-65003b703038")
 92 | ```
 93 | 
 94 | To access session cookies, consult the manual of the browser used to log in.
 95 | 
 96 | *Note:* it is important to not logout of the session the cookies belong to, otherwise they will no longer work.<br/>
 97 | *Note:* as of April 2022 only cookies `a` and `b` are needed.
 98 | 
 99 | ### User Agent
100 | 
101 | `FAAPI` attaches a `User-Agent` header to every request. The user agent string is generated at startup in the following
102 | format: `faapi/{library version} Python/{python version} {system name}/{system release}`.
103 | 
104 | ## Objects
105 | 
106 | ### FAAPI
107 | 
108 | This is the main object that handles all the calls to scrape pages and get submissions.
109 | 
110 | It holds 6 different fields:
111 | 
112 | * `session: requests.Session` The session used for all requests.
113 | * `robots: urllib.robotparser.RobotFileParser` robots.txt handler
114 | * `user_agent: str` user agent used by the session (property, cannot be set)
115 | * `crawl_delay: float` crawl delay from robots.txt (property, cannot be set)
116 | * `last_get: float` time of last get (UNIX time)
117 | * `raise_for_unauthorized: bool = True` if set to `True`, raises an exception if a request is made and the resulting
118 |   page is not from a login session
119 | * `timeout: int | None = None` requests timeout in seconds for both page requests (e.g. submissions) and files
120 | 
121 | #### Init
122 | 
123 | `__init__(cookies: list[dict[str, str]] | CookieJar, session_class: Type[Session] = Session)`
124 | 
125 | A FAAPI object must be initialised with a cookies object in the format mentioned above in [#Cookies](#cookies).
126 | 
127 | An optional `session_class` argument can be given to modify the class used by `FAAPI.session`. Any class based
128 | on `requests.Session` is accepted.
129 | 
130 | #### Methods & Properties
131 | 
132 | * `load_cookies(cookies: list[dict[str, str]] | CookieJar)`<br/>
133 |   Load new cookies and create a new session.<br/>
134 |   *Note:* This method removes any cookies currently in use, to update/add single cookies access them from the session
135 |   object.
136 | * `handle_delay()`<br/>
137 |   Handles the crawl delay as set in the robots.txt
138 | * `check_path(path: str, *, raise_for_disallowed: bool = False) -> bool`<br/>
139 |   Checks whether a given path is allowed by the robots.txt. If `raise_for_disallowed` is set to `True`
140 |   a `DisallowedPath` exception is raised on non-allowed paths.
141 | * `connection_status -> bool`<br/>
142 |   Returns the status of the connection.
143 | * `login_status -> bool`<br/>
144 |   Returns the login status.
145 | * `get(path: str, **params) -> requests.Response`<br/>
146 |   This returns a response object containing the result of the get operation on the given URL with the
147 |   optional `**params` added to it (url provided is considered as path from 'https://www.furaffinity.net/').
148 | * `get_parsed(path: str, *, skip_page_check: bool = False, skip_auth_check: bool = False, **params) -> bs4.BeautifulSoup`<br/>
149 |   Similar to `get()` but returns the parsed HTML from the normal get operation. If the GET request encountered an error,
150 |   an `HTTPError` exception is raised. If `skip_page_check` is set to `True`, the parsed page is not checked for errors (
151 |   e.g. non-existing submission). If `skip_auth_check` is set to `True`, the page is not checked for login status.
152 | * `me() -> User | None`<br/>
153 |   Returns the logged-in user as a `User` object if the cookies are from a login session.
154 | * `frontpage() -> list[SubmissionPartial]`<br/>
155 |   Fetch the latest submissions from Fur Affinity's front page.
156 | * `submission(submission_id: int, get_file: bool = False, *, chunk_size: int = None) -> tuple[Submission, bytes | None]`<br/>
157 |   Given a submission ID, it returns a `Submission` object containing the various metadata of the submission itself and
158 |   a `bytes` object with the submission file if `get_file` is passed as `True`. The optional `chunk_size` argument is
159 |   used for the request; if left to `None` or set to 0 the download is performed directly without streaming.<br/>
160 |   *Note:* the author `UserPartial` object of the submission does not contain the `join_date` field as it does not appear
161 |   on submission pages.
162 | * `submission_file(submission: Submission, *, chunk_size: int = None) -> bytes`<br/>
163 |   Given a submission object, it downloads its file and returns it as a `bytes` object. The optional `chunk_size`
164 |   argument is used for the request; if left to `None` or set to 0 the download is performed directly without streaming.
165 | * `journal(journal_id: int) -> Journal`<br/>
166 |   Given a journal ID, it returns a `Journal` object containing the various metadata of the journal.
167 | * `user(user: str) -> User`<br/>
168 |   Given a username, it returns a `User` object containing information regarding the user.
169 | * `gallery(user: str, page: int = 1) -> tuple[list[SubmissionPartial], int | None]`<br/>
170 |   Returns the list of submissions found on a specific gallery page, and the number of the next page. The returned page
171 |   number is set to `None` if it is the last page.
172 | * `scraps(user: str, page: int = 1) -> -> tuple[list[SubmissionPartial], int | None]`<br/>
173 |   Returns the list of submissions found on a specific scraps page, and the number of the next page. The returned page
174 |   number is set to `None` if it is the last page.
175 | * `favorites(user: str, page: str = "") -> tuple[list[SubmissionPartial], str | None]`<br/>
176 |   Downloads a user's favorites page. Because of how favorites pages work on FA, the `page` argument (and the one
177 |   returned) are strings. If the favorites page is the last then a `None` is returned as next page. An empty page
178 |   value as argument is equivalent to page 1.<br/>
179 |   *Note:* favorites page "numbers" do not follow any scheme and are only generated server-side.
180 | * `journals(user: str, page: int = 1) -> -> tuple[list[JournalPartial], int | None]`<br/>
181 |   Returns the list of submissions found on a specific journals page, and the number of the next page. The returned page
182 |   number is set to `None` if it is the last page.
183 | * `watchlist_to(self, user: str, page:int = 1) -> tuple[list[UserPartial], int | None]`<br/>
184 |   Given a username, returns a list of `UserPartial` objects for each user that is watching the given user and the next
185 |   page, if it is not the last, in which case a `None` is returned.
186 | * `watchlist_by(self, user: str, page:int = 1) -> tuple[list[UserPartial], int | None]`<br/>
187 |   Given a username, returns a list of `UserPartial` objects for each user that is watched by the given user and the next
188 |   page, if it is not the last, in which case a `None` is returned.
189 | 
190 | *Note:* The last page returned by the `watchlist_to` and `watchlist_by` may not be correct as Fur Affinity doesn't seem
191 | to have a consistent behaviour when rendering the next page button, as such it is safer to use an external algorithm to
192 | check whether the method is advancing the page but returning the same/no users.
193 | 
194 | ### UserPartial
195 | 
196 | A stripped-down class that holds basic user information. It is used to hold metadata gathered when parsing a submission,
197 | journal, gallery, scraps, etc.
198 | 
199 | * `name: str` display name with capital letters and extra characters such as "_"
200 | * `status: str` user status (~, !, etc.)
201 | * `title: str` the user title as it appears on their userpage
202 | * `join_date: datetime` the date the user joined (defaults to timestamp 0)
203 | * `avatar_url: str` the URL to the user icon (used only when available)
204 | * `user_tag: bs4.element.Tag` the user element used to parse information (placeholder, `UserPartial` is filled
205 |   externally)
206 | 
207 | `UserPartial` objects can be directly cast to a dict object and iterated through.
208 | 
209 | Comparison with `UserPartial` can be made with either another `UserPartial` or `User` object (the URL names are
210 | compared), or a string (the URL name is compared to the given string).
211 | 
212 | #### Init
213 | 
214 | `__init__(user_tag: bs4.element.Tag = None)`
215 | 
216 | To initialise the object, an optional `bs4.element.Tag` object is needed containing the user element from a user page or
217 | user folder.
218 | 
219 | If no `user_tag` is passed then the object fields will remain at their default - empty - value.
220 | 
221 | #### Methods
222 | 
223 | * `name_url -> str`<br/>
224 |   Property method that returns the URL-safe username
225 | * `url -> str`<br/>
226 |   Property method that returns the Fur Affinity URL to the user (`https://www.furaffinity.net/user/{name_url}`).
227 | * `generate_avatar_url() -> str`<br/>
228 |   Generates the URl for the current user icon.
229 | * `parse(user_page: bs4.BeautifulSoup = None)`<br/>
230 |   Parses the stored user page for metadata. If `user_page` is passed, it overwrites the existing `user_page` value.
231 | 
232 | ### User
233 | 
234 | The main class storing all of a user's metadata.
235 | 
236 | * `name: str` display name with capital letters and extra characters such as "_"
237 | * `status: str` user status (~, !, etc.)
238 | * `title: str` the user title as it appears on their userpage
239 | * `join_date: datetime` the date the user joined (defaults to timestamp 0)
240 | * `profile: str` profile text in HTML format
241 | * `profile_bbcode: str` profile text in BBCode format
242 | * `stats: UserStats` user statistics sorted in a `namedtuple` (`views`, `submissions`, `favorites`, `comments_earned`
243 |   , `comments_made`, `journals`, `watched_by`, `watching`)
244 | * `info: dict[str, str]` profile information (e.g. "Accepting Trades", "Accepting Commissions", "Character Species",
245 |   etc.)
246 | * `contacts: dict[str, str]` contact links (e.g. Twitter, Steam, etc.)
247 | * `avatar_url: str` the URL to the user icon
248 | * `banner_url: str | None` the URL to the user banner (if any is set, otherwise `None`)
249 | * `watched: bool` `True` if the user is watched, `False` otherwise
250 | * `watched_toggle_link: str | None` The link to toggle the watch status (`/watch/` or `/unwatch/` type link)
251 | * `blocked: bool` `True` if the user is blocked, `False` otherwise
252 | * `blocked_toggle_link: str | None` The link to toggle the block status (`/block/` or `/unblock/` type link)
253 | * `user_page: bs4.BeautifulSoup` the user page used to parse the object fields
254 | 
255 | `User` objects can be directly cast to a dict object and iterated through.
256 | 
257 | Comparison with `User` can be made with either another `User` or `UserPartial` object (the URL names are compared), or a
258 | string (the URL name is compared to the given string).
259 | 
260 | #### Init
261 | 
262 | `__init__(user_page: bs4.BeautifulSoup = None)`
263 | 
264 | To initialise the object, an optional `bs4.BeautifulSoup` object is needed containing the parsed HTML of a submission
265 | page.
266 | 
267 | If no `user_page` is passed then the object fields will remain at their default - empty - value.
268 | 
269 | #### Methods
270 | 
271 | * `name_url -> str`<br/>
272 |   Property method that returns the URL-safe username
273 | * `url -> str`<br/>
274 |   Property method that returns the Fur Affinity URL to the user (`https://www.furaffinity.net/user/{name_url}`).
275 | * `generate_avatar_url() -> str`<br/>
276 |   Generates the URl for the current user icon.
277 | * `parse(user_page: bs4.BeautifulSoup = None)`<br/>
278 |   Parses the stored user page for metadata. If `user_page` is passed, it overwrites the existing `user_page` value.
279 | 
280 | ### JournalPartial
281 | 
282 | This object contains partial information gathered when parsing a journals folder. It contains the following fields:
283 | 
284 | * `id: int` journal ID
285 | * `title: str` journal title
286 | * `date: datetime` upload date as a [`datetime` object](https://docs.python.org/3/library/datetime.html) (defaults to
287 |   timestamp 0)
288 | * `author: UserPartial` journal author (filled only if the journal is parsed from a `bs4.BeautifulSoup` page)
289 | * `stats: JournalStats` journal statistics stored in a named tuple (`comments` (count))
290 | * `content: str` journal content in HTML format
291 | * `content_bbcode: str` journal content in BBCode format
292 | * `mentions: list[str]` the users mentioned in the content (if they were mentioned as links, e.g. `:iconusername:`,
293 |   `@username`, etc.)
294 | * `journal_tag: bs4.element.Tag` the journal tag used to parse the object fields
295 | 
296 | `JournalPartial` objects can be directly cast to a dict object or iterated through.
297 | 
298 | Comparison with `JournalPartial` can be made with either another `JournalPartial` or `Journal` object (the IDs are
299 | compared), or an integer (the `JournalPartial.id` value is compared to the given integer).
300 | 
301 | #### Init
302 | 
303 | `__init__(journal_tag: bs4.element.Tag = None)`
304 | 
305 | `Journal` takes one optional parameters: a journal section tag from a journals page.
306 | 
307 | If no `journal_tag` is passed then the object fields will remain at their default - empty - value.
308 | 
309 | #### Methods
310 | 
311 | * `url -> str`<br/>
312 |   Property method that returns the Fur Affinity URL to the journal (`https://www.furaffinity.net/journal/{id}`).
313 | * `parse(journal_item: bs4.element.Tag = None)`<br/>
314 |   Parses the stored journal tag for information. If `journal_tag` is passed, it overwrites the existing `journal_tag`
315 |   value.
316 | 
317 | ### Journal
318 | 
319 | This object contains full information gathered when parsing a journal page. It contains the same fields
320 | as `JournalPartial` with the addition of comments:
321 | 
322 | * `id: int` journal ID
323 | * `title: str` journal title
324 | * `date: datetime` upload date as a [`datetime` object](https://docs.python.org/3/library/datetime.html) (defaults to
325 |   timestamp 0)
326 | * `author: UserPartial` journal author (filled only if the journal is parsed from a `bs4.BeautifulSoup` page)
327 | * `stats: JournalStats` journal statistics stored in a named tuple (`comments` (count))
328 | * `content: str` journal content in HTML format
329 | * `content_bbcode: str` journal content in BBCode format
330 | * `header: str` journal header in HTML format (if present)
331 | * `footer: str` journal footer in HTML format (if present)
332 | * `mentions: list[str]` the users mentioned in the content (if they were mentioned as links, e.g. `:iconusername:`,
333 |   `@username`, etc.)
334 | * `comments: list[Comments]` the comments to the journal, organised in a tree structure
335 | * `journal_page: bs4.BeautifulSoup` the journal page used to parse the object fields
336 | 
337 | `Journal` objects can be directly cast to a dict object or iterated through.
338 | 
339 | Comparison with `Journal` can be made with either another `Journal` or `JournalPartial` object (the IDs are compared),
340 | or an integer (the `Journal.id` value is compared to the given integer).
341 | 
342 | #### Init
343 | 
344 | `__init__(journal_page: bs4.BeautifulSoup = None)`
345 | 
346 | `Journal` takes one optional journal page argument.
347 | 
348 | If no `journal_page` is passed then the object fields will remain at their default - empty - value.
349 | 
350 | #### Methods
351 | 
352 | * `url -> str`<br/>
353 |   Property method that returns the Fur Affinity URL to the journal (`https://www.furaffinity.net/journal/{id}`).
354 | * `parse(journal_page: bs4.BeautifulSoup = None)`<br/>
355 |   Parses the stored journal tag for information. If `journal_tag` is passed, it overwrites the existing `journal_tag`
356 |   value.
357 | 
358 | ### SubmissionPartial
359 | 
360 | This lightweight submission object is used to contain the information gathered when parsing gallery, scraps, and
361 | favorites pages. It contains only the following fields:
362 | 
363 | * `id: int` submission ID
364 | * `title: str` submission title
365 | * `author: UserPartial` submission author (only the `name` field is filled)
366 | * `rating: str` submission rating [general, mature, adult]
367 | * `type: str` submission type [text, image, etc...]
368 | * `thumbnail_url: str` the URL to the submission thumbnail
369 | * `submission_figure: bs4.element.Tag` the figure tag used to parse the object fields
370 | 
371 | `SubmissionPartial` objects can be directly cast to a dict object or iterated through.
372 | 
373 | Comparison with `Submission` can be made with either another `SubmissionPartial` or `Submission` object (the IDs are
374 | compared), or an integer (the `Submission.id` value is compared to the given integer).
375 | 
376 | #### Init
377 | 
378 | `__init__(submission_figure: bs4.element.Tag = None)`
379 | 
380 | To initialise the object, an optional `bs4.element.Tag` object is needed containing the parsed HTML of a submission
381 | figure tag.
382 | 
383 | If no `submission_figure` is passed then the object fields will remain at their default - empty - value.
384 | 
385 | #### Methods
386 | 
387 | * `url -> str`<br/>
388 |   Property method that returns the Fur Affinity URL to the submission (`https://www.furaffinity.net/view/{id}`).
389 | * `parse(submission_figure: bs4.element.Tag = None)`<br/>
390 |   Parses the stored submission figure tag for information. If `submission_figure` is passed, it overwrites the
391 |   existing `submission_figure` value.
392 | 
393 | ### Submission
394 | 
395 | The main class that parses and holds submission metadata.
396 | 
397 | * `id: int` submission ID
398 | * `title: str` submission title
399 | * `author: UserPartial` submission author (only the `name`, `title`, and `avatar_url` fields are filled)
400 | * `date: datetime` upload date as a [`datetime` object](https://docs.python.org/3/library/datetime.html) (defaults to
401 |   timestamp 0)
402 | * `tags: list[str]` tags list
403 | * `category: str` category
404 | * `species: str` species
405 | * `gender: str` gender
406 | * `rating: str` rating
407 | * `stats: SubmissionStats` submission statistics stored in a named tuple (`views`, `comments` (count), `favorites`)
408 | * `type: str` submission type (text, image, etc...)
409 | * `description: str` description in HTML format
410 | * `description_bbcode: str` description in BBCode format
411 | * `footer: str` footer in HTML format
412 | * `mentions: list[str]` the users mentioned in the description (if they were mentioned as links, e.g. `:iconusername:`,
413 |   `@username`, etc.)
414 | * `folder: str` the submission folder (gallery or scraps)
415 | * `user_folders: list[SubmissionUserFolder]` user folders stored in a list of named tuples (`name`, `url`, `group` (
416 |   if any))
417 | * `file_url: str` the URL to the submission file
418 | * `thumbnail_url: str` the URL to the submission thumbnail
419 | * `prev: int` the ID of the previous submission (if any)
420 | * `next: int` the ID of the next submission (if any)
421 | * `favorite: bool` `True` if the submission is a favorite, `False` otherwise
422 | * `favorite_toggle_link: str` the link to toggle the favorite status (`/fav/` or `/unfav/` type URL)
423 | * `comments: list[Comments]` the comments to the submission, organised in a tree structure
424 | * `submission_page: bs4.BeautifulSoup` the submission page used to parse the object fields
425 | 
426 | `Submission` objects can be directly cast to a dict object and iterated through.
427 | 
428 | Comparison with `Submission` can be made with either another `Submission` or `SubmissionPartial` object (the IDs are
429 | compared), or an integer (the `Submission.id` value is compared to the given integer).
430 | 
431 | #### Init
432 | 
433 | `__init__(submission_page: bs4.BeautifulSoup = None)`
434 | 
435 | To initialise the object, an optional `bs4.BeautifulSoup` object is needed containing the parsed HTML of a submission
436 | page.
437 | 
438 | If no `submission_page` is passed then the object fields will remain at their default - empty - value.
439 | 
440 | #### Methods
441 | 
442 | * `url -> str`<br/>
443 |   Property method that returns the Fur Affinity URL to the submission (`https://www.furaffinity.net/view/{id}`).
444 | * `parse(submission_page: bs4.BeautifulSoup = None)`<br/>
445 |   Parses the stored submission page for metadata. If `submission_page` is passed, it overwrites the
446 |   existing `submission_page` value.
447 | 
448 | ### Comment
449 | 
450 | This object class contains comment metadata and is used to build a tree structure with the comments and their replies.
451 | 
452 | * `id: int` the comment ID
453 | * `author: UserPartial` the user who posted the comment
454 | * `date: datetime` the date the comment was posted
455 | * `text: str` the comment text in HTML format
456 | * `text_bbcode: str` the comment text in BBCode format
457 | * `replies: list[Comment]` list of replies to the comment
458 | * `reply_to: Comment | int | None` the parent comment, if the comment is a reply. The variable type is `int` only if the
459 |   comment is parsed outside the parse method of a `Submission` or `Journal` (e.g. by creating a new comment with a
460 |   comment tag), and when iterating over the parent object (to avoid infinite recursion errors), be it `Submission`
461 |   , `Journal` or another `Comment`.
462 | * `edited: bool` `True` if the comment was edited, `False` otherwise
463 | * `hidden: bool` `True` if the comment was hidden, `False` otherwise (if the comment was hidden, the author and date
464 |   fields will default to their empty values)
465 | * `parent: Submission | Journal | None` the `Submission` or `Journal` object the comments are connected to
466 | * `comment_tag: bs4.element.Tag` the comment tag used to parse the object fields
467 | 
468 | `Comment` objects can be directly cast to a dict object and iterated through.
469 | 
470 | Comparison with `Comment` can be made with either another comment (the IDs are compared), or an integer (
471 | the `Comment.id` value is compared to the given integer).
472 | 
473 | *Note:* The `__iter__` method of `Comment` objects automatically removes recursion. The `parent` variable is set
474 | to `None` and `reply_to` is set to the comment's ID.<br/>
475 | *Note:* Because each comment contains the parent `Submission` or `Journal` object (which contains the comment itself)
476 | and the replied comment object, some iterations may cause infinite recursion errors, for example when using
477 | the `copy.deepcopy` function. If such iterations are needed, simply set the `parent` variable to `None` and
478 | the `reply_to` variable to `None` or the comment's ID (this can be done easily after flattening the comments list
479 | with `faapi.comment.flatten_comments`, the comments can then be sorted again with `faapi.comment.sort_comments` which
480 | will also restore the `reply_to` values to `Comment` objects).
481 | 
482 | #### Init
483 | 
484 | `__init__(self, tag: bs4.element.Tag = None, parent: Submission | Journal = None)`
485 | 
486 | To initialise the object, an optional `bs4.element.Tag` object is needed containing the comment tag as taken from a
487 | submission/journal page.
488 | 
489 | The optional `parent` argument sets the `parent` variable described above.
490 | 
491 | If no `tag` is passed then the object fields will remain at their default - empty - value.
492 | 
493 | #### Methods
494 | 
495 | * `url -> str`<br/>
496 |   Property method that returns the Fur Affinity URL to the comment (
497 |   e.g. `https://www.furaffinity.net/view/12345678#cid:1234567890`). If the `parent` variable is `None`, the property
498 |   returns an empty string.
499 | * `parse(tag: bs4.element.Tag = None)`<br/>
500 |   Parses the stored tag for metadata. If `tag` is passed, it overwrites the existing `tag` value.
501 | 
502 | #### Extra Functions
503 | 
504 | These extra functions can be used to operate on a list of comments. They only alter the order and structure, but they do
505 | not touch any of the metadata.
506 | 
507 | * `faapi.comment.sort_comments(comments: list[Comment]) -> list[Comment]`<br/>
508 |   Sorts a list of comments into a tree structure. Replies are overwritten.
509 | * `faapi.comment.flatten_comments(comments: list[Comment]) -> list[Comment]`<br/>
510 |   Flattens a list of comments. Replies are not modified.
511 | 
512 | #### Comment Tree Graphs
513 | 
514 | Using the tree structure generated by the library, it is trivial to build a graph visualisation of the comment tree
515 | using the [DOT](https://www.graphviz.org/doc/info/lang.html) language.
516 | 
517 | ```python
518 | submission, _ = api.submission(12345678)
519 | comments = faapi.comment.flatten_comments(submission.comments)
520 | with open("comments.dot", "w") as f:
521 |     f.write("digraph {\n")
522 |     for comment in [c for c in comments if c.reply_to is None]:
523 |         f.write(f"    parent -> {comment.id}\n")
524 |     for comment in comments:
525 |         for reply in comment.replies:
526 |             f.write(f"    {comment.id} -> {reply.id}\n")
527 |     f.write("}")
528 | ```
529 | 
530 | ```dot
531 | digraph {
532 |     parent -> 157990848
533 |     parent -> 157993838
534 |     parent -> 157997294
535 |     157990848 -> 158014077
536 |     158014077 -> 158014816
537 |     158014816 -> 158093180
538 |     158093180 -> 158097024
539 |     157993838 -> 157998464
540 |     157993838 -> 158014126
541 |     157997294 -> 158014135
542 |     158014135 -> 158014470
543 |     158014135 -> 158030074
544 |     158014470 -> 158093185
545 |     158030074 -> 158093199
546 | }
547 | ```
548 | 
549 | <img alt="comments tree graph" width="400" src="https://quickchart.io/graphviz?graph=digraph%7B%0Aparent-%3E157990848%0Aparent-%3E157993838%0Aparent-%3E157997294%0A157990848-%3E158014077%0A158014077-%3E158014816%0A158014816-%3E158093180%0A158093180-%3E158097024%0A157993838-%3E157998464%0A157993838-%3E158014126%0A157997294-%3E158014135%0A158014135-%3E158014470%0A158014135-%3E158030074%0A158014470-%3E158093185%0A158030074-%3E158093199%0A%7D">
550 | 
551 | _The graph above was generated with [quickchart.io](https://quickchart.io/documentation/graphviz-api/)_
552 | 
553 | ## BBCode Conversion
554 | 
555 | Using the BBCode fields allows to convert between the raw HTMl recovered from Fur Affinity and BBCode tags that follow
556 | FA's guidelines. Conversion from HTML to BBCode covers all known tags and preserves all newlines and spacing.
557 | 
558 | BBCode text can be converted to Fur Affinity's HTMl using the `faapi.parse.bbcode_to_html()` function. The majority of
559 | submissions can be converted back and forth between HTML and BBCode without any information loss, however, the parser
560 | rules are still a work in progress and there are many edge cases where unusual text and formatting cause the parser to
561 | generate incorrect HTML.
562 | 
563 | ## Exceptions
564 | 
565 | The following are the exceptions explicitly raised by the FAAPI functions. The exceptions deriving from `ParsingError`
566 | are chosen depending on the content of the page. Because Fur Affinity doesn't use HTTP status codes besides 404, the
567 | page is checked against a static list of known error messages/page titles in order to determine the specific error to be
568 | used. If no match is found, then the `ServerError` (if the page has the "Server Error" title) or the more
569 | general `NoticeMessage` exceptions are used instead. The actual error message parsed from the page is used as argument
570 | for the exceptions, so that it can be analysed when caught.
571 | 
572 | * `DisallowedPath(Exception)` The path is not allowed by the robots.txt.
573 | * `Unauthorized(Exception)` The user is not logged-in.
574 | * `ParsingError(Exception)` An error occurred while parsing the page.
575 |     * `NonePage(ParsingError)` The parsed page is `None`.
576 |     * `NotFound(ParsingError)` The resource could not be found (general 404 page or non-existing submission, user, or
577 |       journal).
578 |     * `NoTitle(ParsingError)` The parsed paged is missing a title.
579 |     * `DisabledAccount(ParsingError)` The resource belongs to a disabled account.
580 |     * `ServerError(ParsingError)` The page contains a server error notice.
581 |     * `NoticeMessage(ParsingError)` A notice of unknown type was found in the page.
582 | 
583 | ## Beautiful Soup Warnings
584 | 
585 | When parsing some pages or converting HTML to BBCode, the [Beautiful Soup](https://pypi.org/project/beautifulsoup4/)
586 | library may give some warnings, for example `MarkupResemblesLocatorWarning`. These warnings are left enabled for
587 | clarity, but can be disabled manually using the `warnings.filterwarnings` function.
588 | 
589 | ## Contributing
590 | 
591 | All contributions and suggestions are welcome!
592 | 
593 | If you have suggestions for fixes or improvements, you can open an issue with your idea, see [#Issues](#issues) for
594 | details.
595 | 
596 | ## Issues
597 | 
598 | If any problem is encountered during usage of the program, an issue can be opened
599 | on [GitHub](https://github.com/FurryCoders/FAAPI/issues).
600 | 
601 | Issues can also be used to suggest improvements and features.
602 | 
603 | When opening an issue for a problem, please copy the error message and describe the operation in progress when the error
604 | occurred.
605 | 


--------------------------------------------------------------------------------
/faapi/__init__.py:
--------------------------------------------------------------------------------
 1 | from .__version__ import __version__
 2 | from .base import FAAPI
 3 | from .comment import Comment
 4 | from .journal import Journal
 5 | from .journal import JournalPartial
 6 | from .submission import Submission
 7 | from .submission import SubmissionPartial
 8 | from .user import User
 9 | from .user import UserPartial
10 | 
11 | __all__ = [
12 |     "__version__",
13 |     "FAAPI",
14 |     "Comment",
15 |     "Journal",
16 |     "JournalPartial",
17 |     "Submission",
18 |     "SubmissionPartial",
19 |     "User",
20 |     "UserPartial",
21 |     "exceptions",
22 |     "connection",
23 |     "parse"
24 | ]
25 | 


--------------------------------------------------------------------------------
/faapi/__version__.py:
--------------------------------------------------------------------------------
1 | __version__ = "3.11.9"
2 | 


--------------------------------------------------------------------------------
/faapi/base.py:
--------------------------------------------------------------------------------
  1 | from http.cookiejar import CookieJar
  2 | from time import sleep
  3 | from time import time
  4 | from typing import Any
  5 | from typing import Optional
  6 | from typing import Type
  7 | from typing import Union
  8 | from urllib.parse import quote
  9 | from urllib.robotparser import RobotFileParser
 10 | 
 11 | from requests import Session
 12 | 
 13 | from .connection import CookieDict
 14 | from .connection import Response
 15 | from .connection import get
 16 | from .connection import get_robots
 17 | from .connection import join_url
 18 | from .connection import make_session
 19 | from .connection import stream_binary
 20 | from .exceptions import DisallowedPath
 21 | from .exceptions import Unauthorized
 22 | from .journal import Journal
 23 | from .journal import JournalPartial
 24 | from .parse import BeautifulSoup
 25 | from .parse import check_page_raise
 26 | from .parse import parse_loggedin_user
 27 | from .parse import parse_page
 28 | from .parse import parse_submission_figures
 29 | from .parse import parse_user_favorites
 30 | from .parse import parse_user_journals
 31 | from .parse import parse_user_submissions
 32 | from .parse import parse_watchlist
 33 | from .parse import username_url
 34 | from .submission import Submission
 35 | from .submission import SubmissionPartial
 36 | from .user import User
 37 | from .user import UserPartial
 38 | 
 39 | 
 40 | # noinspection GrazieInspection
 41 | class FAAPI:
 42 |     """
 43 |     This class provides the methods to access and parse Fur Affinity pages and retrieve objects.
 44 |     """
 45 | 
 46 |     def __init__(self, cookies: Union[list[CookieDict], CookieJar], session_class: Type[Session] = Session):
 47 |         """
 48 |         :param cookies: The cookies for the session.
 49 |         :param session_class: The class to use for the session (defaults to requests.Session).
 50 |         """
 51 | 
 52 |         self.session: Session = make_session(cookies, session_class)  # Session used for get requests
 53 |         self.robots: RobotFileParser = get_robots(self.session)  # robots.txt handler
 54 |         self.last_get: float = time() - self.crawl_delay  # Time of last get (UNIX time)
 55 |         self.raise_for_unauthorized: bool = True  # Control login checks
 56 |         self.timeout: Optional[int] = None  # Timeout for requests
 57 | 
 58 |     @property
 59 |     def user_agent(self) -> str:
 60 |         """
 61 |         The user agent of the session
 62 |         """
 63 |         return ua.decode() if isinstance(ua := self.session.headers["User-Agent"], bytes) else ua
 64 | 
 65 |     @property
 66 |     def crawl_delay(self) -> float:
 67 |         """
 68 |         Crawl delay from robots.txt
 69 |         """
 70 |         return float(self.robots.crawl_delay(self.user_agent) or 1)
 71 | 
 72 |     def load_cookies(self, cookies: Union[list[CookieDict], CookieJar]):
 73 |         """
 74 |         Load new cookies and create a new session.
 75 | 
 76 |         :param cookies: The cookies for the session.
 77 |         """
 78 |         self.session = make_session(cookies, self.session.__class__)
 79 | 
 80 |     def handle_delay(self):
 81 |         """
 82 |         Handles the crawl delay as set in the robots.txt
 83 |         """
 84 |         if (d := time() - self.last_get) < self.crawl_delay:
 85 |             sleep(self.crawl_delay - d)
 86 |         self.last_get = time()
 87 | 
 88 |     def check_path(self, path: str, *, raise_for_disallowed: bool = False) -> bool:
 89 |         """
 90 |         Checks whether a given path is allowed by the robots.txt.
 91 | 
 92 |         :param path: The path to check.
 93 |         :param raise_for_disallowed: Whether to raise an exception for a non-allowed path.
 94 |         :return: True if the path is allowed in the robots.txt, False otherwise.
 95 |         """
 96 |         if not (allowed := self.robots.can_fetch(self.user_agent, "/" + path.lstrip("/"))) and raise_for_disallowed:
 97 |             raise DisallowedPath(f"Path {path!r} is not allowed by robots.txt")
 98 |         return allowed
 99 | 
100 |     @property
101 |     def connection_status(self) -> bool:
102 |         """
103 |         Check the status of the connection to Fur Affinity.
104 | 
105 |         :return: True if it can connect, False otherwise.
106 |         """
107 |         try:
108 |             return self.get("/").ok
109 |         except ConnectionError:
110 |             return False
111 | 
112 |     @property
113 |     def login_status(self) -> bool:
114 |         """
115 |         Check the login status of the given cookies.
116 | 
117 |         :return: True if the cookies belong to a login session, False otherwise.
118 |         """
119 |         return parse_loggedin_user(self.get_parsed("login", skip_auth_check=True)) is not None
120 | 
121 |     def get(self, path: str, **params: Union[str, bytes, int, float]) -> Response:
122 |         """
123 |         Fetch a path with a GET request.
124 |         The path is checked against the robots.txt before the request is made.
125 |         The crawl-delay setting is enforced wth a wait time.
126 | 
127 |         :param path: The path to fetch.
128 |         :param params: Query parameters for the request.
129 |         :return: A Response object from the request.
130 |         """
131 |         self.check_path(path, raise_for_disallowed=True)
132 |         self.handle_delay()
133 |         return get(self.session, path, timeout=self.timeout, params=params)
134 | 
135 |     def get_parsed(self, path: str, *, skip_page_check: bool = False, skip_auth_check: bool = False,
136 |                    **params: Union[str, bytes, int, float]) -> BeautifulSoup:
137 |         """
138 |         Fetch a path with a GET request and parse it using BeautifulSoup.
139 | 
140 |         :param path: The path to fetch.
141 |         :param skip_page_check: Whether to skip checking the parsed page for errors.
142 |         :param skip_auth_check: Whether to skip checking the parsed page for login status.
143 |         :param params: Query parameters for the request.
144 |         :return: A BeautifulSoup object containing the parsed content of the request response.
145 |         """
146 |         response: Response = self.get(path, **params)
147 |         response.raise_for_status()
148 |         page: BeautifulSoup = parse_page(response.text)
149 |         if not skip_page_check:
150 |             check_page_raise(page)
151 |         if not skip_auth_check and self.raise_for_unauthorized and not parse_loggedin_user(page):
152 |             raise Unauthorized("Not logged in")
153 |         return page
154 | 
155 |     def me(self) -> Optional[User]:
156 |         """
157 |         Fetch the information of the logged-in user.
158 | 
159 |         :return: A User object for the logged-in user, or None if the cookies are not from a login session.
160 |         """
161 |         return self.user(user) if (user := parse_loggedin_user(self.get_parsed("login"))) else None
162 | 
163 |     def frontpage(self) -> list[SubmissionPartial]:
164 |         """
165 |         Fetch latest submissions from Fur Affinity's front page
166 | 
167 |         :return: A list of SubmissionPartial objects
168 |         """
169 |         page_parsed: BeautifulSoup = self.get_parsed("/")
170 |         submissions: list[SubmissionPartial] = [SubmissionPartial(f) for f in parse_submission_figures(page_parsed)]
171 |         return sorted({s for s in submissions}, reverse=True)
172 | 
173 |     def submission(self, submission_id: int, get_file: bool = False, *, chunk_size: Optional[int] = None
174 |                    ) -> tuple[Submission, Optional[bytes]]:
175 |         """
176 |         Fetch a submission and, optionally, its file.
177 | 
178 |         :param submission_id: The ID of the submission.
179 |         :param get_file: Whether to download the submission file.
180 |         :param chunk_size: The chunk_size to be used for the download (does not override get_file).
181 |         :return: A Submission object and a bytes object (if the submission file is downloaded).
182 |         """
183 |         sub: Submission = Submission(self.get_parsed(join_url("view", int(submission_id))))
184 |         sub_file: Optional[bytes] = self.submission_file(sub, chunk_size=chunk_size) if get_file and sub.id else None
185 |         return sub, sub_file
186 | 
187 |     def submission_file(self, submission: Submission, *, chunk_size: Optional[int] = None) -> bytes:
188 |         """
189 |         Fetch a submission file from a Submission object.
190 | 
191 |         :param submission: A Submission object.
192 |         :param chunk_size: The chunk_size to be used for the download.
193 |         :return: The submission file as a bytes object.
194 |         """
195 |         self.handle_delay()
196 |         return stream_binary(self.session, submission.file_url, chunk_size=chunk_size, timeout=self.timeout)
197 | 
198 |     def journal(self, journal_id: int) -> Journal:
199 |         """
200 |         Fetch a journal.
201 | 
202 |         :param journal_id: The ID of the journal.
203 |         :return: A Journal object.
204 |         """
205 |         return Journal(self.get_parsed(join_url("journal", int(journal_id))))
206 | 
207 |     def user(self, user: str) -> User:
208 |         """
209 |         Fetch a user.
210 | 
211 |         :param user: The name of the user (_ characters are allowed).
212 |         :return: A User object.
213 |         """
214 |         return User(self.get_parsed(join_url("user", quote(username_url(user)))))
215 | 
216 |     # noinspection DuplicatedCode
217 |     def gallery(self, user: str, page: int = 1) -> tuple[list[SubmissionPartial], Optional[int]]:
218 |         """
219 |         Fetch a user's gallery page.
220 | 
221 |         :param user: The name of the user (_ characters are allowed).
222 |         :param page: The page to fetch.
223 |         :return: A list of SubmissionPartial objects and the next page (None if it is the last).
224 |         """
225 |         page_parsed: BeautifulSoup = self.get_parsed(join_url("gallery", quote(username_url(user)), int(page)))
226 |         info_parsed: dict[str, Any] = parse_user_submissions(page_parsed)
227 |         author: UserPartial = UserPartial()
228 |         author.name, author.status, author.title, author.join_date, author.avatar_url = [
229 |             info_parsed["name"], info_parsed["status"],
230 |             info_parsed["title"], info_parsed["join_date"],
231 |             info_parsed["avatar_url"]
232 |         ]
233 |         for s in (submissions := list(map(SubmissionPartial, info_parsed["figures"]))):
234 |             s.author = author
235 |         return submissions, (page + 1) if not info_parsed["last_page"] else None
236 | 
237 |     # noinspection DuplicatedCode
238 |     def scraps(self, user: str, page: int = 1) -> tuple[list[SubmissionPartial], Optional[int]]:
239 |         """
240 |         Fetch a user's scraps page.
241 | 
242 |         :param user: The name of the user (_ characters are allowed).
243 |         :param page: The page to fetch.
244 |         :return: A list of SubmissionPartial objects and the next page (None if it is the last).
245 |         """
246 |         page_parsed: BeautifulSoup = self.get_parsed(join_url("scraps", quote(username_url(user)), int(page)))
247 |         info_parsed: dict[str, Any] = parse_user_submissions(page_parsed)
248 |         author: UserPartial = UserPartial()
249 |         author.name, author.status, author.title, author.join_date, author.avatar_url = [
250 |             info_parsed["name"], info_parsed["status"],
251 |             info_parsed["title"], info_parsed["join_date"],
252 |             info_parsed["avatar_url"]
253 |         ]
254 |         for s in (submissions := list(map(SubmissionPartial, info_parsed["figures"]))):
255 |             s.author = author
256 |         return submissions, (page + 1) if not info_parsed["last_page"] else None
257 | 
258 |     def favorites(self, user: str, page: str = "") -> tuple[list[SubmissionPartial], Optional[str]]:
259 |         """
260 |         Fetch a user's favorites page.
261 | 
262 |         :param user: The name of the user (_ characters are allowed).
263 |         :param page: The page to fetch.
264 |         :return: A list of SubmissionPartial objects and the next page (None if it is the last).
265 |         """
266 |         page_parsed: BeautifulSoup = self.get_parsed(join_url("favorites", quote(username_url(user)), page.strip()))
267 |         info_parsed: dict[str, Any] = parse_user_favorites(page_parsed)
268 |         submissions: list[SubmissionPartial] = list(map(SubmissionPartial, info_parsed["figures"]))
269 |         return submissions, info_parsed["next_page"] or None
270 | 
271 |     def journals(self, user: str, page: int = 1) -> tuple[list[JournalPartial], Optional[int]]:
272 |         """
273 |         Fetch a user's journals page.
274 | 
275 |         :param user: The name of the user (_ characters are allowed).
276 |         :param page: The page to fetch.
277 |         :return: A list of Journal objects and the next page (None if it is the last).
278 |         """
279 |         page_parsed: BeautifulSoup = self.get_parsed(join_url("journals", quote(username_url(user)), int(page)))
280 |         info_parsed: dict[str, Any] = parse_user_journals(page_parsed)
281 |         author: UserPartial = UserPartial()
282 |         author.name, author.status, author.title, author.join_date, author.avatar_url = [
283 |             info_parsed["name"], info_parsed["status"],
284 |             info_parsed["title"], info_parsed["join_date"],
285 |             info_parsed["avatar_url"]
286 |         ]
287 |         for j in (journals := list(map(JournalPartial, info_parsed["sections"]))):
288 |             j.author = author
289 |         return journals, (page + 1) if not info_parsed["last_page"] else None
290 | 
291 |     def watchlist_to(self, user: str, page: int = 1) -> tuple[list[UserPartial], Optional[int]]:
292 |         """
293 |         Fetch a page from the list of users watching the user.
294 | 
295 |         :param user: The name of the user (_ characters are allowed).
296 |         :param page: The page to fetch.
297 |         :return: A list of UserPartial objects and the next page (None if it is the last).
298 |         """
299 |         users: list[UserPartial] = []
300 |         us, np = parse_watchlist(
301 |             self.get_parsed(join_url("watchlist", "to", quote(username_url(user)), page), skip_auth_check=True))
302 |         for s, u in us:
303 |             _user: UserPartial = UserPartial()
304 |             _user.name = u
305 |             _user.status = s
306 |             users.append(_user)
307 |         return users, np if np and np != page else None
308 | 
309 |     def watchlist_by(self, user: str, page: int = 1) -> tuple[list[UserPartial], Optional[int]]:
310 |         """
311 |         Fetch a page from the list of users watched by the user.
312 |         :param user: The name of the user (_ characters are allowed).
313 |         :param page: The page to fetch.
314 |         :return: A list of UserPartial objects and the next page (None if it is the last).
315 |         """
316 |         users: list[UserPartial] = []
317 |         us, np = parse_watchlist(
318 |             self.get_parsed(join_url("watchlist", "by", quote(username_url(user)), page), skip_auth_check=True))
319 |         for s, u in us:
320 |             _user: UserPartial = UserPartial()
321 |             _user.name = u
322 |             _user.status = s
323 |             users.append(_user)
324 |         return users, np if np and np != page else None
325 | 


--------------------------------------------------------------------------------
/faapi/comment.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from functools import reduce
  3 | from typing import Optional
  4 | from typing import Union
  5 | 
  6 | from bs4.element import Tag
  7 | 
  8 | import faapi
  9 | from .exceptions import _raise_exception
 10 | from .parse import html_to_bbcode
 11 | from .parse import parse_comment_tag
 12 | 
 13 | 
 14 | class Comment:
 15 |     """
 16 |     Contains comment information and references to replies and parent objects.
 17 |     """
 18 | 
 19 |     def __init__(self, tag: Optional[Tag] = None,
 20 |                  parent: Optional[Union[faapi.submission.Submission, faapi.journal.Journal]] = None):
 21 |         """
 22 |         :param tag: The comment tag from which to parse information
 23 |         :param parent: The parent object of the comment
 24 |         """
 25 |         assert tag is None or isinstance(tag, Tag), _raise_exception(TypeError(f"tag must be {None} or {Tag.__name__}"))
 26 | 
 27 |         self.comment_tag: Optional[Tag] = tag
 28 | 
 29 |         self.id: int = 0
 30 |         self.author: faapi.user.UserPartial = faapi.user.UserPartial()
 31 |         self.date: datetime = datetime.fromtimestamp(0)
 32 |         self.text: str = ""
 33 |         self.replies: list[Comment] = []
 34 |         self.reply_to: Optional[Union[Comment, int]] = None
 35 |         self.edited: bool = False
 36 |         self.hidden: bool = False
 37 |         self.parent: Optional[Union[faapi.submission.Submission, faapi.journal.Journal]] = parent
 38 | 
 39 |         self.parse()
 40 | 
 41 |     def __hash__(self) -> int:
 42 |         return hash((self.id, type(self.parent), self.parent))
 43 | 
 44 |     def __eq__(self, other) -> bool:
 45 |         if isinstance(other, Comment):
 46 |             return other.id == self.id and self.parent == other.parent
 47 |         elif isinstance(other, int):
 48 |             return other == self.id
 49 |         return False
 50 | 
 51 |     def __gt__(self, other) -> bool:
 52 |         if isinstance(other, Comment):
 53 |             return self.id > other.id
 54 |         elif isinstance(other, int):
 55 |             return self.id > other
 56 |         return False
 57 | 
 58 |     def __ge__(self, other) -> bool:
 59 |         if isinstance(other, Comment):
 60 |             return self.id >= other.id
 61 |         elif isinstance(other, int):
 62 |             return self.id >= other
 63 |         return False
 64 | 
 65 |     def __lt__(self, other) -> bool:
 66 |         if isinstance(other, Comment):
 67 |             return self.id < other.id
 68 |         elif isinstance(other, int):
 69 |             return self.id < other
 70 |         return False
 71 | 
 72 |     def __le__(self, other) -> bool:
 73 |         if isinstance(other, Comment):
 74 |             return self.id <= other.id
 75 |         elif isinstance(other, int):
 76 |             return self.id <= other
 77 |         return False
 78 | 
 79 |     def __iter__(self):
 80 |         yield "id", self.id
 81 |         yield "author", dict(self.author)
 82 |         yield "date", self.date
 83 |         yield "text", self.text
 84 |         yield "replies", _sort_comments_dict(self.replies)
 85 |         yield "reply_to", dict(_remove_recursion(self.reply_to)) if isinstance(self.reply_to, Comment) \
 86 |             else self.reply_to
 87 |         yield "edited", self.edited
 88 |         yield "hidden", self.hidden
 89 |         yield "parent", None if self.parent is None else dict(self.parent)
 90 | 
 91 |     def __repr__(self):
 92 |         return self.__str__()
 93 | 
 94 |     def __str__(self):
 95 |         return f"{self.id} {self.author}".rstrip()
 96 | 
 97 |     @property
 98 |     def text_bbcode(self) -> str:
 99 |         """
100 |         The comment text formatted to BBCode
101 | 
102 |         :return: BBCode text
103 |         """
104 |         return html_to_bbcode(self.text)
105 | 
106 |     @property
107 |     def url(self):
108 |         """
109 |         Compose the full URL to the comment.
110 | 
111 |         :return: The URL to the comment.
112 |         """
113 |         return "" if self.parent is None else f"{self.parent.url}#cid:{self.id}"
114 | 
115 |     def parse(self, comment_tag: Optional[Tag] = None):
116 |         """
117 |         Parse a comment tag, overrides any information already present in the object.
118 | 
119 |         :param comment_tag: The comment tag from which to parse information
120 |         """
121 |         assert comment_tag is None or isinstance(comment_tag, Tag), \
122 |             _raise_exception(TypeError(f"tag must be {None} or {Tag.__name__}"))
123 | 
124 |         self.comment_tag = comment_tag or self.comment_tag
125 |         if self.comment_tag is None:
126 |             return
127 | 
128 |         parsed: dict = parse_comment_tag(self.comment_tag)
129 | 
130 |         self.id = parsed["id"]
131 |         self.date = datetime.fromtimestamp(parsed["timestamp"])
132 |         self.author = faapi.user.UserPartial()
133 |         self.author.name = parsed["user_name"]
134 |         self.author.display_name = parsed["user_display_name"]
135 |         self.author.title = parsed["user_title"]
136 |         self.author.avatar_url = parsed["avatar_url"]
137 |         self.text = parsed["text"]
138 |         self.replies = []
139 |         self.reply_to = parsed["parent"]
140 |         self.edited = parsed["edited"]
141 |         self.hidden = parsed["hidden"]
142 | 
143 | 
144 | def sort_comments(comments: list[Comment]) -> list[Comment]:
145 |     """
146 |     Sort a list of comments into a tree structure. Replies are overwritten.
147 | 
148 |     :param comments: A list of Comment objects (flat or tree-structured)
149 |     :return: A tree-structured list of comments with replies
150 |     """
151 |     for comment in (comments := flatten_comments(comments)):
152 |         comment.replies = [_set_reply_to(c, comment) for c in comments if c.reply_to == comment]
153 |     return [c for c in comments if c.reply_to is None]
154 | 
155 | 
156 | def flatten_comments(comments: list[Comment]) -> list[Comment]:
157 |     """
158 |     Flattens a list of comments. Replies are not modified.
159 | 
160 |     :param comments: A list of Comment objects (flat or tree-structured)
161 |     :return: A flat date-sorted (ascending) list of comments
162 |     """
163 |     replies: list[Comment] = comments
164 |     comments_flat: list[Comment] = []
165 | 
166 |     while replies:
167 |         comments_flat.extend(replies)
168 |         replies = [r for c in replies for r in c.replies]
169 | 
170 |     return sorted(set(comments_flat))
171 | 
172 | 
173 | def _set_reply_to(comment: Comment, reply_to: Union[Comment, int]) -> Comment:
174 |     comment.reply_to = reply_to
175 |     return comment
176 | 
177 | 
178 | def _sort_comments_dict(comments: list[Comment]) -> list[dict]:
179 |     comments_flat = flatten_comments(comments)
180 |     comments_levels: list[list[Comment]] = [[c for c in comments_flat if not c.reply_to]]
181 | 
182 |     comments_flat = [c for c in comments_flat if c not in comments_levels[-1]]
183 | 
184 |     while comments_flat:
185 |         comments_levels.append([c for c in comments_flat if c.reply_to in comments_levels[-1]])
186 |         comments_flat = [c for c in comments_flat if c not in comments_levels[-1]]
187 | 
188 |     comments_levels.reverse()
189 | 
190 |     comments_dicts: list[dict] = reduce(
191 |         lambda prev, curr: [
192 |             dict(_remove_recursion(c)) | {"replies": [cd for cd in prev if cd["reply_to"] == c]}
193 |             for c in curr
194 |         ],
195 |         comments_levels,
196 |         []
197 |     )
198 |     return comments_dicts
199 | 
200 | 
201 | def _remove_recursion(comment: Comment) -> Comment:
202 |     comment_new: Comment = Comment()
203 | 
204 |     comment_new.comment_tag = comment.comment_tag
205 |     comment_new.id = comment.id
206 |     comment_new.author = comment.author
207 |     comment_new.date = comment.date
208 |     comment_new.text = comment.text
209 |     comment_new.replies = []
210 |     comment_new.reply_to = comment.reply_to.id if isinstance(comment.reply_to, Comment) else comment.reply_to
211 |     comment_new.edited = comment.edited
212 |     comment_new.hidden = comment.hidden
213 |     comment_new.parent = None
214 | 
215 |     return comment_new
216 | 


--------------------------------------------------------------------------------
/faapi/connection.py:
--------------------------------------------------------------------------------
 1 | from http.client import IncompleteRead
 2 | from http.cookiejar import Cookie
 3 | from http.cookiejar import CookieJar
 4 | from platform import python_version
 5 | from platform import uname
 6 | from re import compile as re_compile
 7 | from typing import Optional
 8 | from typing import Type
 9 | from typing import TypedDict
10 | from typing import Union
11 | from urllib.robotparser import RobotFileParser
12 | 
13 | from requests import Response
14 | from requests import Session
15 | 
16 | from .__version__ import __version__
17 | from .exceptions import Unauthorized
18 | from .exceptions import _raise_exception
19 | 
20 | root: str = "https://www.furaffinity.net"
21 | 
22 | 
23 | class CookieDict(TypedDict):
24 |     name: str
25 |     value: str
26 | 
27 | 
28 | def join_url(*url_comps: Union[str, int]) -> str:
29 |     return "/".join(map(lambda e: str(e).strip(" /"), url_comps))
30 | 
31 | 
32 | def make_session(cookies: Union[list[CookieDict], CookieJar], cls: Type[Session]) -> Session:
33 |     assert len(cookies), _raise_exception(Unauthorized("No cookies for session"))
34 |     session: Session = cls()
35 |     session.headers["User-Agent"] = f"faapi/{__version__} Python/{python_version()} {(u := uname()).system}/{u.release}"
36 | 
37 |     for cookie in cookies:
38 |         if isinstance(cookie, Cookie):
39 |             session.cookies.set(cookie.name, cookie.value or "")
40 |         else:
41 |             session.cookies.set(cookie["name"], cookie["value"])
42 | 
43 |     return session
44 | 
45 | 
46 | def get_robots(session: Session) -> RobotFileParser:
47 |     robots: RobotFileParser = RobotFileParser(url := join_url(root, "robots.txt"))
48 |     robots.parse(filter(re_compile(r"^[^#\s].+").match, map(str.strip, session.get(url).text.splitlines())))
49 |     return robots
50 | 
51 | 
52 | def get(session: Session, path: str, *, timeout: Optional[int] = None,
53 |         params: Optional[dict[str, Union[str, bytes, int, float]]] = None) -> Response:
54 |     return session.get(join_url(root, path), params=params, timeout=timeout)
55 | 
56 | 
57 | def stream_binary(session: Session, url: str, *, chunk_size: Optional[int] = None,
58 |                   timeout: Optional[int] = None) -> bytes:
59 |     stream: Response = session.get(url, stream=True, timeout=timeout)
60 |     stream.raise_for_status()
61 | 
62 |     file_binary: bytes = bytes().join(stream.iter_content(chunk_size))
63 | 
64 |     if (length := int(stream.headers.get("Content-Length", 0))) > 0 and length != len(file_binary):
65 |         raise IncompleteRead(file_binary, length - len(file_binary))
66 | 
67 |     return file_binary
68 | 


--------------------------------------------------------------------------------
/faapi/exceptions.py:
--------------------------------------------------------------------------------
 1 | class DisallowedPath(Exception):
 2 |     """
 3 |     The path is not allowed by the robots.txt.
 4 |     """
 5 | 
 6 | 
 7 | class Unauthorized(Exception):
 8 |     """
 9 |     The user is not logged-in.
10 |     """
11 | 
12 | 
13 | class ParsingError(Exception):
14 |     """
15 |     An error occurred while parsing the page.
16 |     """
17 | 
18 | 
19 | class NonePage(ParsingError):
20 |     """
21 |     The parsed page is None.
22 |     """
23 | 
24 | 
25 | class NoTitle(ParsingError):
26 |     """
27 |     The parsed paged is missing a title.
28 |     """
29 | 
30 | 
31 | class NotFound(ParsingError):
32 |     """
33 |     The resource could not be found.
34 |     """
35 | 
36 | 
37 | class DisabledAccount(ParsingError):
38 |     """
39 |     The resource belongs to a disabled account.
40 |     """
41 | 
42 | 
43 | class ServerError(ParsingError):
44 |     """
45 |     The page contains a server error notice.
46 |     """
47 | 
48 | 
49 | class NoticeMessage(ParsingError):
50 |     """
51 |     A notice of unknown type was found in the page.
52 |     """
53 | 
54 | 
55 | def _raise_exception(err: BaseException):
56 |     raise err
57 | 


--------------------------------------------------------------------------------
/faapi/journal.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | from datetime import datetime
  3 | from typing import Optional
  4 | from typing import Union
  5 | 
  6 | from .connection import join_url
  7 | from .connection import root
  8 | from .exceptions import _raise_exception
  9 | from .parse import BeautifulSoup
 10 | from .parse import check_page_raise
 11 | from .parse import html_to_bbcode
 12 | from .parse import parse_comments
 13 | from .parse import parse_journal_page
 14 | from .parse import parse_journal_section
 15 | from .parse import Tag
 16 | from .user import UserPartial
 17 | 
 18 | 
 19 | class JournalStats(namedtuple("JournalStats", ["comments"])):
 20 |     """
 21 |     This object contains the journal's statistics:
 22 |     * comments
 23 |     """
 24 | 
 25 | 
 26 | class JournalBase:
 27 |     def __init__(self):
 28 |         self.id: int = 0
 29 |         self.title: str = ""
 30 |         self.date: datetime = datetime.fromtimestamp(0)
 31 |         self.author: UserPartial = UserPartial()
 32 |         self.stats: JournalStats = JournalStats(0)
 33 |         self.content: str = ""
 34 |         self.mentions: list[str] = []
 35 | 
 36 |     def __hash__(self) -> int:
 37 |         return hash(self.id)
 38 | 
 39 |     def __eq__(self, other) -> bool:
 40 |         if isinstance(other, JournalBase):
 41 |             return other.id == self.id
 42 |         elif isinstance(other, int):
 43 |             return other == self.id
 44 |         return False
 45 | 
 46 |     def __gt__(self, other) -> bool:
 47 |         if isinstance(other, JournalBase):
 48 |             return self.id > other.id
 49 |         elif isinstance(other, int):
 50 |             return self.id > other
 51 |         return False
 52 | 
 53 |     def __ge__(self, other) -> bool:
 54 |         if isinstance(other, JournalBase):
 55 |             return self.id >= other.id
 56 |         elif isinstance(other, int):
 57 |             return self.id >= other
 58 |         return False
 59 | 
 60 |     def __lt__(self, other) -> bool:
 61 |         if isinstance(other, JournalBase):
 62 |             return self.id < other.id
 63 |         elif isinstance(other, int):
 64 |             return self.id < other
 65 |         return False
 66 | 
 67 |     def __le__(self, other) -> bool:
 68 |         if isinstance(other, JournalBase):
 69 |             return self.id <= other.id
 70 |         elif isinstance(other, int):
 71 |             return self.id <= other
 72 |         return False
 73 | 
 74 |     def __iter__(self):
 75 |         yield "id", self.id
 76 |         yield "title", self.title
 77 |         yield "date", self.date
 78 |         yield "author", dict(self.author)
 79 |         yield "stats", self.stats._asdict()
 80 |         yield "content", self.content
 81 |         yield "mentions", self.mentions
 82 | 
 83 |     def __repr__(self):
 84 |         return self.__str__()
 85 | 
 86 |     def __str__(self):
 87 |         return f"{self.id} {self.author} {self.title}"
 88 | 
 89 |     @property
 90 |     def content_bbcode(self) -> str:
 91 |         """
 92 |         The journal content formatted to BBCode
 93 | 
 94 |         :return: BBCode content
 95 |         """
 96 |         return html_to_bbcode(self.content)
 97 | 
 98 |     @property
 99 |     def url(self) -> str:
100 |         """
101 |         Compose the full URL to the journal.
102 | 
103 |         :return: The URL to the journal.
104 |         """
105 |         return join_url(root, "journal", self.id)
106 | 
107 | 
108 | class JournalPartial(JournalBase):
109 |     """
110 |     Contains partial journal information gathered from journals pages.
111 |     """
112 | 
113 |     def __init__(self, journal_tag: Optional[Tag] = None):
114 |         """
115 |         :param journal_tag: The tag from which to parse the journal.
116 |         """
117 |         assert journal_tag is None or isinstance(journal_tag, Tag), \
118 |             _raise_exception(TypeError(f"journal_item must be {None} or {Tag.__name__}"))
119 |         self.journal_tag: Optional[Tag] = journal_tag
120 | 
121 |         super(JournalPartial, self).__init__()
122 | 
123 |         self.parse()
124 | 
125 |     def parse(self, journal_tag: Optional[Union[Tag, BeautifulSoup]] = None):
126 |         """
127 |         Parse a journal tag, overrides any information already present in the object.
128 | 
129 |         :param journal_tag: The tag from which to parse the journal.
130 |         """
131 |         assert journal_tag is None or isinstance(journal_tag, BeautifulSoup), \
132 |             _raise_exception(TypeError(f"journal_item must be {None} or {BeautifulSoup.__name__}"))
133 | 
134 |         self.journal_tag = journal_tag or self.journal_tag
135 |         if self.journal_tag is None:
136 |             return
137 | 
138 |         parsed: dict = parse_journal_section(self.journal_tag)
139 | 
140 |         # noinspection DuplicatedCode
141 |         self.id = parsed["id"]
142 |         self.title = parsed["title"]
143 |         self.author.name = parsed.get("user_name", "")
144 |         self.author.display_name = parsed.get("user_display_name", "")
145 |         self.author.status = parsed.get("user_status", "")
146 |         self.author.title = parsed.get("user_title", "")
147 |         self.author.join_date = parsed.get("user_join_date", "")
148 |         self.author.avatar_url = parsed.get("avatar_url", "")
149 |         self.stats = JournalStats(parsed["comments"])
150 |         self.date = parsed["date"]
151 |         self.content = parsed["content"]
152 |         self.mentions = parsed["mentions"]
153 | 
154 | 
155 | class Journal(JournalBase):
156 |     """
157 |     Contains complete journal information gathered from journal pages, including comments.
158 |     """
159 | 
160 |     def __init__(self, journal_page: Optional[BeautifulSoup] = None):
161 |         """
162 |         :param journal_page: The page from which to parse the journal.
163 |         """
164 |         assert journal_page is None or isinstance(journal_page, BeautifulSoup), \
165 |             _raise_exception(TypeError(f"journal_item must be {None} or {BeautifulSoup.__name__}"))
166 |         self.journal_page: Optional[BeautifulSoup] = journal_page
167 | 
168 |         super(Journal, self).__init__()
169 | 
170 |         self.header: str = ""
171 |         self.footer: str = ""
172 |         from .comment import Comment
173 |         self.comments: list[Comment] = []
174 | 
175 |         self.parse()
176 | 
177 |     def __iter__(self):
178 |         for k, v in super(Journal, self).__iter__():
179 |             yield k, v
180 |         yield "header", self.header
181 |         yield "footer", self.footer
182 |         from .comment import _sort_comments_dict
183 |         yield "comments", _sort_comments_dict(self.comments)
184 | 
185 |     @property
186 |     def header_bbcode(self) -> str:
187 |         """
188 |         The journal header formatted to BBCode
189 | 
190 |         :return: BBCode header
191 |         """
192 |         return html_to_bbcode(self.header)
193 | 
194 |     @property
195 |     def footer_bbcode(self) -> str:
196 |         """
197 |         The journal footer formatted to BBCode
198 | 
199 |         :return: BBCode footer
200 |         """
201 |         return html_to_bbcode(self.footer)
202 | 
203 |     def parse(self, journal_page: Optional[Union[Tag, BeautifulSoup]] = None):
204 |         """
205 |         Parse a journal page, overrides any information already present in the object.
206 | 
207 |         :param journal_page: The page from which to parse the journal.
208 |         """
209 |         assert journal_page is None or isinstance(journal_page, BeautifulSoup), \
210 |             _raise_exception(TypeError(f"journal_item must be {None} or {BeautifulSoup.__name__}"))
211 | 
212 |         self.journal_page = journal_page or self.journal_page
213 |         if self.journal_page is None:
214 |             return
215 | 
216 |         check_page_raise(self.journal_page)
217 | 
218 |         parsed: dict = parse_journal_page(self.journal_page)
219 | 
220 |         # noinspection DuplicatedCode
221 |         self.id = parsed["id"]
222 |         self.title = parsed["title"]
223 |         self.author.name = parsed["user_info"]["name"]
224 |         self.author.display_name = parsed["user_info"]["display_name"]
225 |         self.author.status = parsed["user_info"]["status"]
226 |         self.author.title = parsed["user_info"]["title"]
227 |         self.author.join_date = parsed["user_info"]["join_date"]
228 |         self.author.avatar_url = parsed["user_info"]["avatar_url"]
229 |         self.stats = JournalStats(parsed["comments"])
230 |         self.date = parsed["date"]
231 |         self.content = parsed["content"]
232 |         self.header = parsed["header"]
233 |         self.footer = parsed["footer"]
234 |         self.mentions = parsed["mentions"]
235 |         from .comment import sort_comments, Comment
236 |         self.comments = sort_comments([Comment(t, self) for t in parse_comments(self.journal_page)])
237 | 


--------------------------------------------------------------------------------
/faapi/parse.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from re import compile as re_compile
  3 | from re import Match
  4 | from re import match
  5 | from re import MULTILINE
  6 | from re import Pattern
  7 | from re import search
  8 | from re import sub
  9 | from typing import Any
 10 | from typing import Optional
 11 | from typing import Union
 12 | from urllib.parse import quote
 13 | 
 14 | from bbcode import Parser as BBCodeParser  # type:ignore
 15 | from bs4 import BeautifulSoup
 16 | from bs4.element import NavigableString
 17 | from bs4.element import Tag
 18 | from dateutil.parser import parse as parse_date
 19 | from urllib3.util import parse_url
 20 | 
 21 | from .connection import root
 22 | from .exceptions import _raise_exception
 23 | from .exceptions import DisabledAccount
 24 | from .exceptions import NonePage
 25 | from .exceptions import NotFound
 26 | from .exceptions import NoticeMessage
 27 | from .exceptions import NoTitle
 28 | from .exceptions import ParsingError
 29 | from .exceptions import ServerError
 30 | 
 31 | relative_url: Pattern = re_compile(r"^(?:https?://(?:www\.)?furaffinity\.net)?(.*)")
 32 | mentions_regexp: Pattern = re_compile(r"^(?:(?:https?://)?(?:www\.)?furaffinity\.net)?/user/([^/#]+).*$")
 33 | url_username_regexp: Pattern = re_compile(r"/(?:user|gallery|scraps|favorites|journals|commissions)/([^/]+)(/.*)?")
 34 | watchlist_next_regexp: Pattern = re_compile(r"/watchlist/(?:by|to)/[^/]+/(\d+)")
 35 | not_found_messages: tuple[str, ...] = ("not in our database", "cannot be found", "could not be found", "user not found")
 36 | deactivated_messages: tuple[str, ...] = ("deactivated", "pending deletion")
 37 | smilie_icons: tuple[str, ...] = (
 38 |     "crying", "derp", "dunno", "embarrassed", "evil", "gift", "huh", "lmao", "love", "nerd", "note", "oooh", "pleased",
 39 |     "rollingeyes", "sad", "sarcastic", "serious", "sleepy", "smile", "teeth", "tongue", "veryhappy", "wink", "yelling",
 40 |     "zipped", "angel", "badhairday", "cd", "coffee", "cool", "whatever"
 41 | )
 42 | 
 43 | 
 44 | def get_attr(tag: Tag, attr: str) -> str:
 45 |     return value[0] if isinstance(value := tag.attrs[attr], list) else value
 46 | 
 47 | 
 48 | def parse_page(text: str) -> BeautifulSoup:
 49 |     return BeautifulSoup(text, "lxml")
 50 | 
 51 | 
 52 | def check_page_raise(page: BeautifulSoup) -> None:
 53 |     if page is None:
 54 |         raise NonePage
 55 |     elif not (title := page.title.text.lower() if page.title else ""):
 56 |         raise NoTitle
 57 |     elif title.startswith("account disabled"):
 58 |         raise DisabledAccount
 59 |     elif title == "system error":
 60 |         error_text: str = error.text if (error := page.select_one("div.section-body")) else ""
 61 |         if any(m in error_text.lower() for m in not_found_messages):
 62 |             raise NotFound
 63 |         else:
 64 |             raise ServerError(*filter(bool, map(str.strip, error_text.splitlines())))
 65 |     elif notice := page.select_one("section.notice-message"):
 66 |         notice_text: str = notice.text
 67 |         if any(m in notice_text.lower() for m in deactivated_messages):
 68 |             raise DisabledAccount
 69 |         elif any(m in notice_text.lower() for m in not_found_messages):
 70 |             raise NotFound
 71 |         else:
 72 |             raise NoticeMessage(*filter(bool, map(str.strip, notice_text.splitlines())))
 73 | 
 74 | 
 75 | def username_url(username: str) -> str:
 76 |     return sub(r"[^a-z\d.~`\[\]-]", "", username.lower())
 77 | 
 78 | 
 79 | def inner_html(tag: Tag) -> str:
 80 |     return tag.decode_contents()
 81 | 
 82 | 
 83 | def clean_html(html: str) -> str:
 84 |     return html.strip().replace("\r", "")
 85 | 
 86 | 
 87 | def html_to_bbcode(html: str) -> str:
 88 |     body: Optional[Tag] = parse_page(f"<html><body>{html}</body></html>").select_one("html > body")
 89 |     if not body:
 90 |         return ""
 91 | 
 92 |     for linkusername in body.select("a.linkusername"):
 93 |         linkusername.replaceWith(f"@{linkusername.text.strip()}")
 94 | 
 95 |     for iconusername in body.select("a.iconusername,a.usernameicon"):
 96 |         username: str = iconusername.text.strip() or iconusername.attrs.get('href', '').strip('/').split('/')[-1]
 97 |         if icon := iconusername.select_one("img"):
 98 |             username = icon.attrs.get('alt', '').strip() or username
 99 |         iconusername.replaceWith(f":icon{username}:" if iconusername.text.strip() else f":{username}icon:")
100 | 
101 |     for img in body.select("img"):
102 |         img.replaceWith(f"[img={img.attrs.get('src', '')}/]")
103 | 
104 |     for hr in body.select("hr"):
105 |         hr.replaceWith("-----")
106 | 
107 |     for smilie in body.select("i.smilie"):
108 |         smilie_class: list[str] = list(smilie.attrs.get("class", []))
109 |         smilie_name: str = next(filter(lambda c: c not in ["smilie", ""], smilie_class), "")
110 |         smilie.replaceWith(f":{smilie_name or 'smilie'}:")
111 | 
112 |     for span in body.select("span.bbcode[style*=color]"):
113 |         if m := match(r".*color: ?([^ ;]+).*", span.attrs["style"]):
114 |             span.replaceWith(f"[color={m[1]}]", *span.children, "[/color]")
115 |         else:
116 |             span.replaceWith(*span.children)
117 | 
118 |     for nav_link in body.select("span.parsed_nav_links"):
119 |         a_tags = nav_link.select("a")
120 |         a_prev_tag: Optional[Tag] = next((a for a in a_tags if "prev" in a.text.lower()), None)
121 |         a_frst_tag: Optional[Tag] = next((a for a in a_tags if "first" in a.text.lower()), None)
122 |         a_next_tag: Optional[Tag] = next((a for a in a_tags if "next" in a.text.lower()), None)
123 |         a_prev = a_prev_tag.attrs.get("href", "").strip("/").split("/")[-1] if a_prev_tag else ""
124 |         a_frst = a_frst_tag.attrs.get("href", "").strip("/").split("/")[-1] if a_frst_tag else ""
125 |         a_next = a_next_tag.attrs.get("href", "").strip("/").split("/")[-1] if a_next_tag else ""
126 |         nav_link.replaceWith(f"[{a_prev or '-'},{a_frst or '-'},{a_next or '-'}]")
127 | 
128 |     for a in body.select("a.auto_link_shortened:not(.named_url), a.auto_link:not(.named_url)"):
129 |         a.replaceWith(a.attrs.get('href', ''))
130 | 
131 |     for a in body.select("a"):
132 |         href_match: Optional[Match] = relative_url.match(a.attrs.get('href', ''))
133 |         a.replaceWith(
134 |             f"[url={href_match[1] if href_match else a.attrs.get('href', '')}]",
135 |             *a.children,
136 |             "[/url]"
137 |         )
138 | 
139 |     for yt in body.select("iframe[src*='youtube.com/embed']"):
140 |         yt.replaceWith(f"[yt]https://youtube.com/embed/{yt.attrs.get('src', '').strip('/').split('/')}[/yt]")
141 | 
142 |     for quote_name_tag in body.select("span.bbcode.bbcode_quote > span.bbcode_quote_name"):
143 |         quote_author: str = quote_name_tag.text.strip().removesuffix('wrote:').strip()
144 |         quote_tag = quote_name_tag.parent
145 |         if not quote_tag:
146 |             quote_name_tag.replaceWith(quote_author)
147 |             continue
148 |         quote_name_tag.decompose()
149 |         quote_tag.replaceWith(
150 |             f"[quote{('=' + quote_author) if quote_author else ''}]",
151 |             *quote_tag.children,
152 |             "[/quote]"
153 |         )
154 | 
155 |     for quote_tag in body.select("span.bbcode.bbcode_quote"):
156 |         quote_tag.replaceWith("[quote]", *quote_tag.children, "[/quote]")
157 | 
158 |     for [selector, bbcode_tag] in (
159 |         ("i", "i"),
160 |         ("b", "b"),
161 |         ("strong", "b"),
162 |         ("u", "u"),
163 |         ("s", "s"),
164 |         ("code.bbcode_left", "left"),
165 |         ("code.bbcode_center", "center"),
166 |         ("code.bbcode_right", "right"),
167 |         ("span.bbcode_spoiler", "spoiler"),
168 |         ("sub", "sub"),
169 |         ("sup", "sup"),
170 |         ("h1", "h1"),
171 |         ("h2", "h2"),
172 |         ("h3", "h3"),
173 |         ("h4", "h4"),
174 |         ("h5", "h5"),
175 |         ("h6", "h6"),
176 |     ):
177 |         for tag in body.select(selector):
178 |             tag.replaceWith(f"[{bbcode_tag}]", *tag.children, f"[/{bbcode_tag}]")
179 | 
180 |     for br in body.select("br"):
181 |         br.replaceWith("\n")
182 | 
183 |     for p in body.select("p"):
184 |         p.replaceWith(*p.children)
185 | 
186 |     for tag in body.select("*"):
187 |         if not (div_class := tag.attrs.get("class", None)):
188 |             tag.replaceWith(f"[tag={tag.name}]", *tag.children, "[/tag.{tag.name}]")
189 |         else:
190 |             tag.replaceWith(
191 |                 f"[tag={tag.name}.{' '.join(div_class) if isinstance(div_class, list) else div_class}]",
192 |                 *tag.children,
193 |                 "[/tag]"
194 |             )
195 | 
196 |     bbcode: str = body.decode_contents()
197 | 
198 |     bbcode = sub(" *$", "", bbcode, flags=MULTILINE)
199 |     bbcode = sub("^ *", "", bbcode, flags=MULTILINE)
200 | 
201 |     for char, substitution in (
202 |         ("©", "(c)"),
203 |         ("™", "(tm)"),
204 |         ("®", "(r)"),
205 |         ("&copy;", "(c)"),
206 |         ("&reg;", "(tm)"),
207 |         ("&trade;", "(r)"),
208 |         ("&lt;", "<"),
209 |         ("&gt;", ">"),
210 |         ("&amp;", "&"),
211 |     ):
212 |         bbcode = bbcode.replace(char, substitution)
213 | 
214 |     return bbcode.strip(" ")
215 | 
216 | 
217 | def bbcode_to_html(bbcode: str) -> str:
218 |     def render_url(_tag_name, value: str, options: dict[str, str], _parent, _context) -> str:
219 |         return f'<a class="auto_link named_url" href="{options.get("url", "#")}">{value}</a>'
220 | 
221 |     def render_color(_tag_name, value, options, _parent, _context) -> str:
222 |         return f'<span class=bbcode style="color:{options.get("color", "inherit")};">{value}</span>'
223 | 
224 |     def render_quote(_tag_name, value: str, options: dict[str, str], _parent, _context) -> str:
225 |         author: str = options.get("quote", "")
226 |         author = f"<span class=bbcode_quote_name>{author} wrote:</span>" if author else ""
227 |         return f'<span class="bbcode bbcode_quote">{author}{value}</span>'
228 | 
229 |     def render_tags(tag_name: str, value: str, options: dict[str, str], _parent, _context) -> str:
230 |         if not options and tag_name.islower():
231 |             return f"<{tag_name}>{value}</{tag_name}>"
232 |         return f"[{tag_name} {' '.join(f'{k}={v}' if v else k for k, v in options.items())}]{value}"
233 | 
234 |     def render_tag(_tag_name, value: str, options: dict[str, str], _parent, _context) -> str:
235 |         name, *classes = options["tag"].split(".")
236 |         return f'<{name} class="{" ".join(classes)}">{value}</{name}>'
237 | 
238 |     def parse_extra(page: BeautifulSoup) -> BeautifulSoup:
239 |         child: NavigableString
240 |         child_new: Tag
241 |         has_match: bool = True
242 |         while has_match:
243 |             has_match = False
244 |             for child in [c for e in page.select("*:not(a)") for c in e.children if isinstance(c, NavigableString)]:
245 |                 if m_ := match(rf"(.*):({'|'.join(smilie_icons)}):(.*)", child):
246 |                     has_match = True
247 |                     child_new = Tag(name="i", attrs={"class": f"smilie {m_[2]}"})
248 |                     child.replaceWith(m_[1], child_new, m_[3])
249 |                 elif m_ := match(r"(.*)(?:@([a-zA-Z0-9.~_-]+)|:link([a-zA-Z0-9.~_-]+):)(.*)", child):
250 |                     has_match = True
251 |                     child_new = Tag(name="a", attrs={"class": "linkusername", "href": f"/user/{m_[2] or m_[3]}"})
252 |                     child_new.insert(0, m_[2] or m_[3])
253 |                     child.replaceWith(m_[1], child_new, m_[4])
254 |                 elif m_ := match(r"(.*):(?:icon([a-zA-Z0-9.~_-]+)|([a-zA-Z0-9.~_-]+)icon):(.*)", child):
255 |                     has_match = True
256 |                     user: str = m_[2] or m_[3] or ""
257 |                     child_new = Tag(name="a", attrs={"class": "iconusername", "href": f"/user/{user}"})
258 |                     child_new_img: Tag = Tag(
259 |                         name="img",
260 |                         attrs={
261 |                             "alt": user, "title": user,
262 |                             "src": f"//a.furaffinity.net/{datetime.now():%Y%m%d}/{username_url(user)}.gif"
263 |                         }
264 |                     )
265 |                     child_new.insert(0, child_new_img)
266 |                     if m_[2]:
267 |                         child_new.insert(1, f"\xA0{m_[2]}")
268 |                     child.replaceWith(m_[1], child_new, m_[4])
269 |                 elif m_ := match(r"(.*)\[ *(?:(\d+)|-)?, *(?:(\d+)|-)? *, *(?:(\d+)|-)? *](.*)", child):
270 |                     has_match = True
271 |                     child_new = Tag(name="span", attrs={"class": "parsed_nav_links"})
272 |                     child_new_1: Union[Tag, str] = "<<<\xA0PREV"
273 |                     child_new_2: Union[Tag, str] = "FIRST"
274 |                     child_new_3: Union[Tag, str] = "NEXT\xA0>>>"
275 |                     if m_[2]:
276 |                         child_new_1 = Tag(name="a", attrs={"href": f"/view/{m_[2]}"})
277 |                         child_new_1.insert(0, "<<<\xA0PREV")
278 |                     if m_[3]:
279 |                         child_new_2 = Tag(name="a", attrs={"href": f"/view/{m_[3]}"})
280 |                         child_new_2.insert(0, "<<<\xA0FIRST")
281 |                     if m_[4]:
282 |                         child_new_3 = Tag(name="a", attrs={"href": f"/view/{m_[4]}"})
283 |                         child_new_3.insert(0, "NEXT\xA0>>>")
284 |                     child_new.insert(0, child_new_1)
285 |                     child_new.insert(1, "\xA0|\xA0")
286 |                     child_new.insert(2, child_new_2)
287 |                     child_new.insert(3, "\xA0|\xA0")
288 |                     child_new.insert(4, child_new_3)
289 |                     child.replaceWith(m_[1], child_new, m_[5])
290 | 
291 |         for p in page.select("p"):
292 |             p.replaceWith(*p.children)
293 | 
294 |         return page
295 | 
296 |     parser: BBCodeParser = BBCodeParser(install_defaults=False, replace_links=False, replace_cosmetic=True)
297 |     parser.REPLACE_ESCAPE = (
298 |         ("&", "&amp;"),
299 |         ("<", "&lt;"),
300 |         (">", "&gt;"),
301 |     )
302 |     parser.REPLACE_COSMETIC = (
303 |         ("(c)", "&copy;"),
304 |         ("(r)", "&reg;"),
305 |         ("(tm)", "&trade;"),
306 |     )
307 | 
308 |     for tag in ("i", "b", "u", "s", "sub", "sup", "h1", "h2", "h3", "h3", "h4", "h5", "h6"):
309 |         parser.add_formatter(tag, render_tags)
310 |     for align in ("left", "center", "right"):
311 |         parser.add_simple_formatter(align, f'<code class="bbcode bbcode_{align}">%(value)s</code>')
312 | 
313 |     parser.add_simple_formatter("spoiler", '<span class="bbcode bbcode_spoiler">%(value)s</span>')
314 |     parser.add_simple_formatter("url", '<a class="auto_link named_link">%(value)s</a>')
315 |     parser.add_simple_formatter(
316 |         "iconusername",
317 |         f'<a class=iconusername href="/user/%(value)s">'
318 |         f'<img alt="%(value)s" title="%(value)s" src="//a.furaffinity.net/{datetime.now():%Y%m%d}/%(value)s.gif">'
319 |         f'%(value)s'
320 |         f'</a>'
321 |     )
322 |     parser.add_simple_formatter(
323 |         "usernameicon",
324 |         f'<a class=iconusername href="/user/%(value)s">'
325 |         f'<img alt="%(value)s" title="%(value)s" src="//a.furaffinity.net/{datetime.now():%Y%m%d}/%(value)s.gif">'
326 |         f'</a>'
327 |     )
328 |     parser.add_simple_formatter("linkusername", '<a class=linkusername href="/user/%(value)s">%(value)s</a>')
329 |     parser.add_simple_formatter("hr", "<hr>", standalone=True)
330 | 
331 |     parser.add_formatter("url", render_url)
332 |     parser.add_formatter("color", render_color)
333 |     parser.add_formatter("quote", render_quote)
334 |     parser.add_formatter("tag", render_tag)
335 | 
336 |     bbcode = sub(r"-{5,}", "[hr]", bbcode)
337 | 
338 |     result_page: BeautifulSoup = parse_extra(parse_page(parser.format(bbcode)))
339 |     return (result_page.select_one("html > body") or result_page).decode_contents()
340 | 
341 | 
342 | def parse_username_from_url(url: str) -> Optional[str]:
343 |     return m[1] if (m := url_username_regexp.match(parse_url(url).path or "")) else None
344 | 
345 | 
346 | def parse_mentions(tag: Tag) -> list[str]:
347 |     mentions: list[str] = [username_url(m[1]) for a in tag.select("a")
348 |                            if (m := match(mentions_regexp, get_attr(a, "href")))]
349 |     return sorted(set([m for m in mentions if m]), key=mentions.index)
350 | 
351 | 
352 | def parse_loggedin_user(page: BeautifulSoup) -> Optional[str]:
353 |     return get_attr(avatar, "alt") if (avatar := page.select_one("img.loggedin_user_avatar")) else None
354 | 
355 | 
356 | def parse_journal_section(section_tag: Tag) -> dict[str, Any]:
357 |     id_: int = int(section_tag.attrs.get("id", "00000")[4:])
358 |     tag_title: Optional[Tag] = section_tag.select_one("h2")
359 |     tag_date: Optional[Tag] = section_tag.select_one("div.section-header span.popup_date")
360 |     tag_content: Optional[Tag] = section_tag.select_one("div.journal-body")
361 |     tag_comments: Optional[Tag] = section_tag.select_one("div.section-footer > a > span")
362 | 
363 |     assert id_ != 0, _raise_exception(ParsingError("Missing ID"))
364 |     assert tag_title is not None, _raise_exception(ParsingError("Missing title tag"))
365 |     assert tag_date is not None, _raise_exception(ParsingError("Missing date tag"))
366 |     assert tag_content is not None, _raise_exception(ParsingError("Missing content tag"))
367 |     assert tag_comments is not None, _raise_exception(ParsingError("Missing comments tag"))
368 | 
369 |     # noinspection DuplicatedCode
370 |     title: str = tag_title.text.strip()
371 |     date: datetime = parse_date(
372 |         get_attr(tag_date, "title").strip()
373 |         if match(r"^[A-Za-z]+ \d+,.*$", get_attr(tag_date, "title"))
374 |         else tag_date.text.strip()
375 |     )
376 |     content: str = clean_html(inner_html(tag_content))
377 |     mentions: list[str] = parse_mentions(tag_content)
378 |     comments: int = int(tag_comments.text.strip())
379 | 
380 |     return {
381 |         "id": id_,
382 |         "title": title,
383 |         "date": date,
384 |         "content": content,
385 |         "mentions": mentions,
386 |         "comments": comments,
387 |     }
388 | 
389 | 
390 | def parse_journal_page(journal_page: BeautifulSoup) -> dict[str, Any]:
391 |     user_info: dict[str, str] = parse_user_folder(journal_page)
392 |     tag_id: Optional[Tag] = journal_page.select_one("meta[property='og:url']")
393 |     tag_title: Optional[Tag] = journal_page.select_one("h2.journal-title")
394 |     tag_date: Optional[Tag] = journal_page.select_one("div.content div.section-header span.popup_date")
395 |     tag_header: Optional[Tag] = journal_page.select_one("div.journal-header")
396 |     tag_footer: Optional[Tag] = journal_page.select_one("div.journal-footer")
397 |     tag_content: Optional[Tag] = journal_page.select_one("div.journal-content")
398 |     tag_comments: Optional[Tag] = journal_page.select_one("div.section-footer > span")
399 | 
400 |     assert tag_id is not None, _raise_exception(ParsingError("Missing ID tag"))
401 |     assert tag_title is not None, _raise_exception(ParsingError("Missing title tag"))
402 |     assert tag_date is not None, _raise_exception(ParsingError("Missing date tag"))
403 |     assert tag_content is not None, _raise_exception(ParsingError("Missing content tag"))
404 |     assert tag_comments is not None, _raise_exception(ParsingError("Missing comments tag"))
405 | 
406 |     id_: int = int(tag_id.attrs.get("content", "0").strip("/").split("/")[-1])
407 |     # noinspection DuplicatedCode
408 |     title: str = tag_title.text.strip()
409 |     date: datetime = parse_date(
410 |         get_attr(tag_date, "title").strip()
411 |         if match(r"^[A-Za-z]+ \d+,.*$", get_attr(tag_date, "title"))
412 |         else tag_date.text.strip()
413 |     )
414 |     header: str = clean_html(inner_html(tag_header)) if tag_header else ""
415 |     footer: str = clean_html(inner_html(tag_footer)) if tag_footer else ""
416 |     content: str = clean_html(inner_html(tag_content))
417 |     mentions: list[str] = parse_mentions(tag_content)
418 |     comments: int = int(tag_comments.text.strip())
419 | 
420 |     assert id_ != 0, _raise_exception(ParsingError("Missing ID"))
421 | 
422 |     return {
423 |         "user_info": user_info,
424 |         "id": id_,
425 |         "title": title,
426 |         "date": date,
427 |         "content": content,
428 |         "header": header,
429 |         "footer": footer,
430 |         "mentions": mentions,
431 |         "comments": comments,
432 |     }
433 | 
434 | 
435 | def parse_submission_figure(figure_tag: Tag) -> dict[str, Any]:
436 |     id_: int = int(get_attr(figure_tag, "id")[4:])
437 |     tag_title: Optional[Tag] = figure_tag.select_one("figcaption a[href^='/view/']")
438 |     tag_author: Optional[Tag] = figure_tag.select_one("figcaption a[href^='/user/']")
439 |     tag_thumbnail: Optional[Tag] = figure_tag.select_one("img")
440 | 
441 |     assert tag_title is not None, _raise_exception(ParsingError("Missing title tag"))
442 |     assert tag_author is not None, _raise_exception(ParsingError("Missing author tag"))
443 |     assert tag_thumbnail is not None, _raise_exception(ParsingError("Missing thumbnail tag"))
444 | 
445 |     title: str = get_attr(tag_title, "title")
446 |     author: str = get_attr(tag_author, "title")
447 |     rating: str = next(c for c in figure_tag["class"] if c.startswith("r-"))[2:]
448 |     type_: str = next(c for c in figure_tag["class"] if c.startswith("t-"))[2:]
449 |     thumbnail_url: str = "https:" + get_attr(tag_thumbnail, "src")
450 |     thumbnail_url = f"{thumbnail_url.rsplit('/', 1)[0]}/{quote(thumbnail_url.rsplit('/', 1)[1])}"
451 | 
452 |     return {
453 |         "id": id_,
454 |         "title": title,
455 |         "author": author,
456 |         "rating": rating,
457 |         "type": type_,
458 |         "thumbnail_url": thumbnail_url,
459 |     }
460 | 
461 | 
462 | def parse_submission_author(author_tag: Tag) -> dict[str, Any]:
463 |     tag_author: Optional[Tag] = author_tag.select_one("div.submission-id-sub-container")
464 | 
465 |     assert tag_author is not None, _raise_exception(ParsingError("Missing author tag"))
466 | 
467 |     tag_author_name: Optional[Tag] = tag_author.select_one("span.c-usernameBlockSimple__displayName")
468 |     tag_author_icon: Optional[Tag] = author_tag.select_one("img.submission-user-icon")
469 | 
470 |     assert tag_author_name is not None, _raise_exception(ParsingError("Missing author name tag"))
471 |     assert tag_author_icon is not None, _raise_exception(ParsingError("Missing author icon tag"))
472 | 
473 |     author_name: str = tag_author_name.attrs["title"].strip()
474 |     author_display_name: str = tag_author_name.text.strip()
475 |     author_title: str = ([*filter(
476 |         bool, [child.strip()
477 |                for child in tag_author.children
478 |                if isinstance(child, NavigableString)][3:]
479 |     )] or [""])[-1]
480 |     author_title = author_title if tag_author.select_one('a[href$="/#tip"]') is None else sub(r"\|$", "", author_title)
481 |     author_title = author_title.strip("\xA0 ")  # NBSP
482 |     author_icon_url: str = "https:" + get_attr(tag_author_icon, "src")
483 | 
484 |     return {
485 |         "author": author_name,
486 |         "author_display_name": author_display_name,
487 |         "author_title": author_title,
488 |         "author_icon_url": author_icon_url,
489 |     }
490 | 
491 | 
492 | def parse_submission_page(sub_page: BeautifulSoup) -> dict[str, Any]:
493 |     tag_id: Optional[Tag] = sub_page.select_one("meta[property='og:url']")
494 |     tag_sub_info: Optional[Tag] = sub_page.select_one("div.submission-id-sub-container")
495 | 
496 |     assert tag_sub_info is not None, _raise_exception(ParsingError("Missing info tag"))
497 | 
498 |     tag_title: Optional[Tag] = tag_sub_info.select_one("div.submission-title")
499 |     tag_author: Optional[Tag] = sub_page.select_one("div.submission-id-container")
500 |     tag_date: Optional[Tag] = sub_page.select_one("div.submission-id-container span.popup_date")
501 |     tag_tags: list[Tag] = sub_page.select('section.tags-row a[href^="/"]')
502 |     tag_views: Optional[Tag] = sub_page.select_one("div.views span")
503 |     tag_comment_count: Optional[Tag] = sub_page.select_one("section.stats-container div.comments span")
504 |     tag_favorites: Optional[Tag] = sub_page.select_one("div.favorites span")
505 |     tag_rating: Optional[Tag] = sub_page.select_one("div.rating span.rating-box")
506 |     tag_type: Optional[Tag] = sub_page.select_one("div#submission_page[class^='page-content-type']")
507 |     tag_fav: Optional[Tag] = sub_page.select_one("div.fav > a")
508 |     tag_info: Optional[Tag] = sub_page.select_one("section.info.text")
509 |     tag_user_folders: list[Tag] = sub_page.select("section.folder-list-container > div > a")
510 | 
511 |     assert tag_info is not None, _raise_exception(ParsingError("Missing info tag"))
512 | 
513 |     tag_category1: Optional[Tag] = tag_info.select_one("span.category-name")
514 |     tag_category2: Optional[Tag] = tag_info.select_one("span.type-name")
515 |     tag_species: Optional[Tag] = (info_spans := tag_info.select("span"))[bool(tag_category1) + bool(tag_category2)]
516 |     tag_gender: Optional[Tag] = info_spans[1 + bool(tag_category1) + bool(tag_category2)]
517 |     tag_description: Optional[Tag] = sub_page.select_one("div.submission-description")
518 |     tag_folder: Optional[Tag] = sub_page.select_one("a.button[href^='/scraps/'],a.button[href^='/gallery/']")
519 |     tag_file_url: Optional[Tag] = sub_page.select_one("div.download a")
520 |     tag_thumbnail_url: Optional[Tag] = sub_page.select_one("img#submissionImg")
521 |     tag_prev: Optional[Tag] = sub_page.select_one("div.submission-content div.favorite-nav a:nth-child(1)")
522 |     tag_next: Optional[Tag] = sub_page.select_one("div.submission-content div.favorite-nav a:last-child")
523 | 
524 |     assert tag_id is not None, _raise_exception(ParsingError("Missing id tag"))
525 |     assert tag_title is not None, _raise_exception(ParsingError("Missing title tag"))
526 |     assert tag_author is not None, _raise_exception(ParsingError("Missing author tag"))
527 |     assert tag_date is not None, _raise_exception(ParsingError("Missing date tag"))
528 |     assert tag_views is not None, _raise_exception(ParsingError("Missing views tag"))
529 |     assert tag_comment_count is not None, _raise_exception(ParsingError("Missing comment count tag"))
530 |     assert tag_favorites is not None, _raise_exception(ParsingError("Missing favorites tag"))
531 |     assert tag_rating is not None, _raise_exception(ParsingError("Missing rating tag"))
532 |     assert tag_type is not None, _raise_exception(ParsingError("Missing type tag"))
533 |     assert tag_fav is not None, _raise_exception(ParsingError("Missing fav tag"))
534 |     assert tag_species is not None, _raise_exception(ParsingError("Missing species tag"))
535 |     assert tag_gender is not None, _raise_exception(ParsingError("Missing gender tag"))
536 |     assert tag_description is not None, _raise_exception(ParsingError("Missing description tag"))
537 |     assert tag_folder is not None, _raise_exception(ParsingError("Missing folder tag"))
538 |     assert tag_file_url is not None, _raise_exception(ParsingError("Missing file URL tag"))
539 |     assert tag_prev is not None, _raise_exception(ParsingError("Missing prev tag"))
540 |     assert tag_next is not None, _raise_exception(ParsingError("Missing next tag"))
541 | 
542 |     tag_footer: Optional[Tag] = tag_description.select_one("div.submission-footer")
543 | 
544 |     id_: int = int(get_attr(tag_id, "content").strip("/").split("/")[-1])
545 |     title: str = tag_title.text.strip()
546 |     date: datetime = parse_date(
547 |         get_attr(tag_date, "title").strip()
548 |         if match(r"^[A-Za-z]+ \d+,.*$", get_attr(tag_date, "title"))
549 |         else tag_date.text.strip()
550 |     )
551 |     tags: list[str] = [t.text.strip() for t in tag_tags]
552 |     category: str = ""
553 |     if tag_category1:
554 |         category += tag_category1.text.strip()
555 |     if tag_category2:
556 |         category += " / " + tag_category2.text.strip()
557 |         category.strip()
558 |     species: str = tag_species.text.strip()
559 |     gender: str = tag_gender.text.strip()
560 |     rating: str = tag_rating.text.strip()
561 |     views: int = int(tag_views.text.strip())
562 |     comment_count: int = int(tag_comment_count.text.strip())
563 |     favorites: int = int(tag_favorites.text.strip())
564 |     type_: str = tag_type["class"][0][18:]
565 |     footer: str = ""
566 |     if tag_footer:
567 |         if tag_footer_hr := tag_footer.select_one("hr"):
568 |             tag_footer_hr.decompose()
569 |         footer = clean_html(inner_html(tag_footer))
570 |         tag_footer.decompose()
571 |     description: str = clean_html(inner_html(tag_description))
572 |     mentions: list[str] = parse_mentions(tag_description)
573 |     folder: str = m.group(1).lower() if (m := match(r"^/(scraps|gallery)/.*$", get_attr(tag_folder, "href"))) else ""
574 |     file_url: str = "https:" + get_attr(tag_file_url, "href")
575 |     file_url = f"{file_url.rsplit('/', 1)[0]}/{quote(file_url.rsplit('/', 1)[1])}"
576 |     thumbnail_url: str = ("https:" + get_attr(tag_thumbnail_url, "data-preview-src")) if tag_thumbnail_url else ""
577 |     thumbnail_url = f"{thumbnail_url.rsplit('/', 1)[0]}/{quote(thumbnail_url.rsplit('/', 1)[1])}" \
578 |         if thumbnail_url else ""
579 |     prev_sub: Optional[int] = int(
580 |         get_attr(tag_prev, "href").split("/")[-2]
581 |     ) if tag_prev and tag_prev.text.lower() == "prev" else None
582 |     next_sub: Optional[int] = int(
583 |         get_attr(tag_next, "href").split("/")[-2]
584 |     ) if tag_next and tag_next.text.lower() == "next" else None
585 |     fav_link: Optional[str] = f"{root}{href}" if (href := get_attr(tag_fav, "href")).startswith("/fav/") else None
586 |     unfav_link: Optional[str] = f"{root}{href}" if (href := get_attr(tag_fav, "href")).startswith("/unfav/") else None
587 |     user_folders: list[tuple[str, str, str]] = []
588 |     for a in tag_user_folders:
589 |         tag_folder_name: Optional[Tag] = a.select_one("span")
590 |         tag_folder_group: Optional[Tag] = a.select_one("strong")
591 |         assert tag_folder_name is not None, _raise_exception(ParsingError("Missing folder name tag"))
592 |         user_folders.append(
593 |             (
594 |                 tag_folder_name.text.strip(),
595 |                 (root + href) if (href := a.attrs.get("href", "")) else "",
596 |                 tag_folder_group.text.strip() if tag_folder_group else ""
597 |             )
598 |         )
599 | 
600 |     return {
601 |         "id": id_,
602 |         "title": title,
603 |         **parse_submission_author(tag_author),
604 |         "date": date,
605 |         "tags": tags,
606 |         "category": category,
607 |         "species": species,
608 |         "gender": gender,
609 |         "rating": rating,
610 |         "views": views,
611 |         "comment_count": comment_count,
612 |         "favorites": favorites,
613 |         "type": type_,
614 |         "footer": footer,
615 |         "description": description,
616 |         "mentions": mentions,
617 |         "folder": folder,
618 |         "user_folders": user_folders,
619 |         "file_url": file_url,
620 |         "thumbnail_url": thumbnail_url,
621 |         "prev": prev_sub,
622 |         "next": next_sub,
623 |         "fav_link": fav_link,
624 |         "unfav_link": unfav_link,
625 |     }
626 | 
627 | 
628 | def parse_user_header(user_header: Tag) -> dict[str, Any]:
629 |     tag_user_name: Optional[Tag] = user_header.select_one("a.c-usernameBlock__userName")
630 |     tag_user_display_name: Optional[Tag] = user_header.select_one("a.c-usernameBlock__displayName")
631 |     tag_title_join_date: Optional[Tag] = user_header.select_one("userpage-nav-user-details span.user-title")
632 |     tag_avatar: Optional[Tag] = user_header.select_one("userpage-nav-avatar img")
633 | 
634 |     assert tag_user_name is not None, _raise_exception(ParsingError("Missing user name tag"))
635 |     assert tag_user_display_name is not None, _raise_exception(ParsingError("Missing user display name tag"))
636 |     assert tag_title_join_date is not None, _raise_exception(ParsingError("Missing join date tag"))
637 |     assert tag_avatar is not None, _raise_exception(ParsingError("Missing user icon tag"))
638 | 
639 |     tag_user_symbol: Optional[Tag] = tag_user_name.select_one("span.c-usernameBlock__symbol")
640 | 
641 |     status: str = tag_user_symbol.text.strip() if tag_user_symbol else ""
642 |     name: str = tag_user_name.text.strip().removeprefix(status).strip()
643 |     display_name: str = tag_user_display_name.text.strip()
644 | 
645 |     title: str = ttd[0].strip() if len(ttd := tag_title_join_date.text.rsplit("|", 1)) > 1 else ""
646 |     join_date: datetime = parse_date(ttd[-1].strip().split(":", 1)[1])
647 |     avatar_url: str = "https:" + get_attr(tag_avatar, "src")
648 |     avatar_url = f"{avatar_url.rsplit('/', 1)[0]}/{quote(avatar_url.rsplit('/', 1)[1])}"
649 | 
650 |     return {
651 |         "status": status,
652 |         "name": name,
653 |         "display_name": display_name,
654 |         "title": title,
655 |         "join_date": join_date,
656 |         "avatar_url": avatar_url,
657 |     }
658 | 
659 | 
660 | def parse_user_page(user_page: BeautifulSoup) -> dict[str, Any]:
661 |     tag_user_header: Optional[Tag] = user_page.select_one("userpage-nav-header")
662 |     tag_user_banner: Optional[Tag] = user_page.select_one("site-banner picture img")
663 |     tag_profile: Optional[Tag] = user_page.select_one("div.userpage-profile")
664 |     tag_stats: Optional[Tag] = user_page.select_one("div.userpage-section-right div.table")
665 |     tag_watchlist_to: Optional[Tag] = user_page.select_one("a[href*='watchlist/to']")
666 |     tag_watchlist_by: Optional[Tag] = user_page.select_one("a[href*='watchlist/by']")
667 |     tag_infos: list[Tag] = user_page.select("div#userpage-contact-item div.table-row")
668 |     tag_contacts: list[Tag] = user_page.select("div#userpage-contact div.user-contact-user-info")
669 |     tag_user_nav_controls: Optional[Tag] = user_page.select_one("userpage-nav-interface-buttons")
670 |     tag_meta_url: Optional[Tag] = user_page.select_one('meta[property="og:url"]')
671 | 
672 |     assert tag_user_header is not None, _raise_exception(ParsingError("Missing user header tag"))
673 |     assert tag_profile is not None, _raise_exception(ParsingError("Missing profile tag"))
674 |     assert tag_stats is not None, _raise_exception(ParsingError("Missing stats tag"))
675 |     assert tag_watchlist_to is not None, _raise_exception(ParsingError("Missing watchlist to tag"))
676 |     assert tag_watchlist_by is not None, _raise_exception(ParsingError("Missing watchlist by tag"))
677 |     assert tag_meta_url is not None, _raise_exception(ParsingError("Missing meta tag"))
678 | 
679 |     tag_watch: Optional[Tag] = None
680 |     tag_block: Optional[Tag] = None
681 | 
682 |     if tag_user_nav_controls:
683 |         tag_watch = tag_user_nav_controls.select_one("a[href^='/watch/'], a[href^='/unwatch/']")
684 |         tag_block = tag_user_nav_controls.select_one("a[href^='/block/'], a[href^='/unblock/']")
685 | 
686 |     profile: str = clean_html(inner_html(tag_profile))
687 |     stats: tuple[int, ...] = (
688 |         *map(lambda s: int(s.split(":")[1]), filter(bool, map(str.strip, tag_stats.text.split("\n")))),
689 |         int(m[1]) if (m := search(r"(\d+)", tag_watchlist_to.text)) else 0,
690 |         int(m[1]) if (m := search(r"(\d+)", tag_watchlist_by.text)) else 0,
691 |     )
692 | 
693 |     tag_key: Optional[Tag]
694 |     info: dict[str, str] = {}
695 |     contacts: dict[str, str] = {}
696 |     for tb in tag_infos:
697 |         if (tag_key := tb.select_one("div")) is None:
698 |             continue
699 |         elif "profile-empty" in tb.attrs.get("class", []):
700 |             continue
701 |         elif not (val := [*filter(bool, [c.strip() for c in tb.children if isinstance(c, NavigableString)])][-1:]):
702 |             continue
703 |         info[tag_key.text.strip()] = val[0]
704 |     for pc in tag_contacts:
705 |         if (tag_key := pc.select_one("span")) is None:
706 |             continue
707 |         contacts[tag_key.text.strip()] = get_attr(a, "href") if (a := pc.select_one("a")) else \
708 |             [*filter(bool, map(str.strip, pc.text.split("\n")))][-1]
709 |     tag_watch_href: str = get_attr(tag_watch, "href") if tag_watch else ""
710 |     watch: Optional[str] = f"{root}{tag_watch_href}" if tag_watch_href.startswith("/watch/") else None
711 |     unwatch: Optional[str] = f"{root}{tag_watch_href}" if tag_watch_href.startswith("/unwatch/") else None
712 |     tag_block_href: str = get_attr(tag_block, "href") if tag_block else ""
713 |     block: Optional[str] = f"{root}{tag_block_href}" if tag_block_href.startswith("/block/") else None
714 |     unblock: Optional[str] = f"{root}{tag_block_href}" if tag_block_href.startswith("/unblock/") else None
715 |     user_banner_url: Optional[str] = ("https:" + get_attr(tag_user_banner, "src")) if tag_user_banner else None
716 |     user_banner_url = f"{user_banner_url.rsplit('/', 1)[0]}/{quote(user_banner_url.rsplit('/', 1)[1])}" \
717 |         if user_banner_url else None
718 | 
719 |     return {
720 |         **parse_user_header(tag_user_header),
721 |         "banner_url": user_banner_url,
722 |         "profile": profile,
723 |         "stats": stats,
724 |         "info": info,
725 |         "contacts": contacts,
726 |         "watch": watch,
727 |         "unwatch": unwatch,
728 |         "block": block,
729 |         "unblock": unblock,
730 |     }
731 | 
732 | 
733 | def parse_comment_tag(tag: Tag) -> dict:
734 |     tag_id: Optional[Tag] = tag.select_one("a.comment_anchor")
735 |     tag_user_name: Optional[Tag] = tag.select_one("comment-username a.c-usernameBlock__userName")
736 |     tag_user_symbol: Optional[Tag] = tag_user_name.select_one(".c-usernameBlock__symbol") if tag_user_name else None
737 |     tag_user_display_name: Optional[Tag] = tag.select_one("comment-username a.c-usernameBlock__displayName")
738 |     tag_avatar: Optional[Tag] = tag.select_one("div.avatar img.comment_useravatar")
739 |     tag_user_title: Optional[Tag] = tag.select_one("comment-title")
740 |     tag_body: Optional[Tag] = tag.select_one("comment-user-text")
741 |     # TODO: update when they implement parent link
742 |     # tag_parent_link: Optional[Tag] = tag.select_one("a.comment-parent")
743 |     tag_edited: Optional[Tag] = tag.select_one("img.edited")
744 | 
745 |     assert tag_id is not None, _raise_exception(ParsingError("Missing link tag"))
746 |     assert tag_body is not None, _raise_exception(ParsingError("Missing body tag"))
747 | 
748 |     attr_id: Optional[str] = tag_id.attrs.get("id")
749 | 
750 |     assert attr_id is not None, _raise_exception(ParsingError("Missing id attribute"))
751 | 
752 |     comment_id: int = int(attr_id.removeprefix("cid:"))
753 |     comment_text: str = clean_html(inner_html(tag_body))
754 | 
755 |     if tag_user_name is None or tag_user_display_name is None:
756 |         return {
757 |             "id": comment_id,
758 |             "user_name": "",
759 |             "user_display_name": "",
760 |             "user_title": "",
761 |             "avatar_url": "",
762 |             "timestamp": 0,
763 |             "text": comment_text,
764 |             "parent": None,
765 |             "edited": tag_edited is not None,
766 |             "hidden": True,
767 |         }
768 | 
769 |     assert tag_avatar is not None, _raise_exception(ParsingError("Missing user icon tag"))
770 |     assert tag_user_title is not None, _raise_exception(ParsingError("Missing user title tag"))
771 | 
772 |     attr_timestamp: Optional[str] = tag.attrs.get("data-timestamp")
773 |     attr_avatar: Optional[str] = tag_avatar.attrs.get("src")
774 |     # TODO: update when they implement parent link
775 |     # attr_parent_href: Optional[str] = tag_parent_link.attrs.get("href") if tag_parent_link is not None else None
776 |     # TODO: remove when they implement parent link
777 |     attr_parent_href: Optional[str] = None
778 |     if m := search(r'<a class="comment-parent" href="(#cid:\d+)"', tag.decode_contents()):
779 |         attr_parent_href = m[1]
780 | 
781 |     assert attr_timestamp is not None, _raise_exception(ParsingError("Missing timestamp attribute"))
782 |     assert attr_avatar is not None, _raise_exception(ParsingError("Missing user icon src attribute"))
783 | 
784 |     parent_id: Optional[int] = int(attr_parent_href.removeprefix("#cid:")) if attr_parent_href else None
785 |     avatar_url: str = "https:" + attr_avatar
786 |     avatar_url = f"{avatar_url.rsplit('/', 1)[0]}/{quote(avatar_url.rsplit('/', 1)[1])}"
787 | 
788 |     return {
789 |         "id": comment_id,
790 |         "user_name": tag_user_name.text.strip().removeprefix(
791 |             tag_user_symbol.text.strip() if tag_user_symbol else ""
792 |         ).strip(),
793 |         "user_display_name": tag_user_display_name.text.strip(),
794 |         "user_title": tag_user_title.text.strip(),
795 |         "avatar_url": avatar_url,
796 |         "timestamp": int(attr_timestamp),
797 |         "text": comment_text,
798 |         "parent": parent_id,
799 |         "edited": tag_edited is not None,
800 |         "hidden": False,
801 |     }
802 | 
803 | 
804 | def parse_comments(page: BeautifulSoup) -> list[Tag]:
805 |     return page.select("div.comment_container")
806 | 
807 | 
808 | def parse_user_tag(user_tag: Tag) -> dict[str, Any]:
809 |     tag_status: Optional[Tag] = user_tag.select_one("h2")
810 |     tag_title: Optional[Tag] = user_tag.select_one("span")
811 | 
812 |     assert tag_status, _raise_exception(ParsingError("Missing status and username tag"))
813 |     assert tag_title, _raise_exception(ParsingError("Missing title and join date tag"))
814 | 
815 |     status: str = ""
816 |     name: str = tag_status.text.strip()
817 |     title: str
818 |     join_date_str: str
819 | 
820 |     if not user_tag.select_one("img.type-admin"):
821 |         status, name = name[0], name[1:]
822 | 
823 |     if "|" in (tag_title_text := tag_title.text.strip()):
824 |         title, join_date_str = tag_title_text.rsplit("|", 1)
825 |     else:
826 |         title, join_date_str = "", tag_title_text
827 |     join_date: datetime = parse_date(join_date_str.split(":", 1)[1].strip())
828 | 
829 |     return {
830 |         "user_name": name,
831 |         "user_status": status,
832 |         "user_title": title,
833 |         "user_join_date": join_date,
834 |     }
835 | 
836 | 
837 | def parse_user_folder(folder_page: BeautifulSoup) -> dict[str, Any]:
838 |     tag_user_header: Optional[Tag] = folder_page.select_one("userpage-nav-header")
839 |     assert tag_user_header is not None, _raise_exception(ParsingError("Missing user header tag"))
840 |     return {
841 |         **parse_user_header(tag_user_header),
842 |     }
843 | 
844 | 
845 | def parse_submission_figures(figures_page: BeautifulSoup) -> list[Tag]:
846 |     return figures_page.select("figure[id^='sid-']")
847 | 
848 | 
849 | def parse_user_submissions(submissions_page: BeautifulSoup) -> dict[str, Any]:
850 |     user_info: dict[str, str] = parse_user_folder(submissions_page)
851 |     last_page: bool = not any(b.text.lower() == "next" for b in submissions_page.select("form button.button"))
852 | 
853 |     return {
854 |         **user_info,
855 |         "figures": parse_submission_figures(submissions_page),
856 |         "last_page": last_page,
857 |     }
858 | 
859 | 
860 | def parse_user_favorites(favorites_page: BeautifulSoup) -> dict[str, Any]:
861 |     parsed_submissions = parse_user_submissions(favorites_page)
862 |     tag_next_page: Optional[Tag] = favorites_page.select_one('form[action^="/favorites/"][action$="/next"]')
863 |     next_page: str = get_attr(tag_next_page, "action").split("/", 3)[-1] if tag_next_page else ""
864 | 
865 |     return {
866 |         **parsed_submissions,
867 |         "next_page": next_page,
868 |     }
869 | 
870 | 
871 | def parse_user_journals(journals_page: BeautifulSoup) -> dict[str, Any]:
872 |     user_info: dict[str, str] = parse_user_folder(journals_page)
873 |     sections: list[Tag] = journals_page.select("section[id^='jid:']")
874 |     next_page_tag: Optional[Tag] = journals_page.select_one("div.mini-nav > div.mini-nav-cell:first-child > a.button")
875 | 
876 |     return {
877 |         **user_info,
878 |         "sections": sections,
879 |         "last_page": next_page_tag is None,
880 |     }
881 | 
882 | 
883 | def parse_watchlist(watch_page: BeautifulSoup) -> tuple[list[tuple[str, str]], int]:
884 |     tag_next: Optional[Tag] = watch_page.select_one("section div.floatright form[method=get]")
885 |     match_next: Optional[Match] = watchlist_next_regexp.match(get_attr(tag_next, "action")) if tag_next else None
886 | 
887 |     watches: list[tuple[str, str]] = []
888 | 
889 |     for tag_user in watch_page.select("div.watch-list-items"):
890 |         user_link: Optional[Tag] = tag_user.select_one("a")
891 |         assert user_link, _raise_exception(ParsingError("Missing user link"))
892 | 
893 |         username: str = user_link.text.strip()
894 |         user_link.decompose()
895 | 
896 |         status: str = tag_user.text.strip()
897 | 
898 |         watches.append((status, username))
899 | 
900 |     return watches, int(match_next[1]) if match_next else 0
901 | 


--------------------------------------------------------------------------------
/faapi/submission.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | from datetime import datetime
  3 | from typing import Optional
  4 | 
  5 | from .connection import join_url
  6 | from .connection import root
  7 | from .exceptions import _raise_exception
  8 | from .parse import BeautifulSoup
  9 | from .parse import Tag
 10 | from .parse import check_page_raise
 11 | from .parse import html_to_bbcode
 12 | from .parse import parse_comments
 13 | from .parse import parse_submission_figure
 14 | from .parse import parse_submission_page
 15 | from .user import UserPartial
 16 | 
 17 | 
 18 | class SubmissionStats(namedtuple("SubmissionStats", ["views", "comments", "favorites"])):
 19 |     """
 20 |     This object contains the submission's statistics:
 21 |     * views
 22 |     * comments
 23 |     * favorites
 24 |     """
 25 | 
 26 | 
 27 | class SubmissionUserFolder(namedtuple("SubmissionUserFolder", ["name", "url", "group"])):
 28 |     """
 29 |     This object contains a submission's folder details:
 30 |     * name: str the name of the folder
 31 |     * url: str the URL to the folder
 32 |     * group: str the group the folder belongs to
 33 |     """
 34 | 
 35 | 
 36 | class SubmissionBase:
 37 |     """
 38 |     Base class for the submission objects.
 39 |     """
 40 | 
 41 |     def __init__(self):
 42 |         self.id: int = 0
 43 |         self.title: str = ""
 44 |         self.author: UserPartial = UserPartial()
 45 | 
 46 |     def __hash__(self) -> int:
 47 |         return hash(self.id)
 48 | 
 49 |     def __eq__(self, other) -> bool:
 50 |         if isinstance(other, SubmissionBase):
 51 |             return other.id == self.id
 52 |         elif isinstance(other, int):
 53 |             return other == self.id
 54 |         return False
 55 | 
 56 |     def __gt__(self, other) -> bool:
 57 |         if isinstance(other, SubmissionBase):
 58 |             return self.id > other.id
 59 |         elif isinstance(other, int):
 60 |             return self.id > other
 61 |         return False
 62 | 
 63 |     def __ge__(self, other) -> bool:
 64 |         if isinstance(other, SubmissionBase):
 65 |             return self.id >= other.id
 66 |         elif isinstance(other, int):
 67 |             return self.id >= other
 68 |         return False
 69 | 
 70 |     def __lt__(self, other) -> bool:
 71 |         if isinstance(other, SubmissionBase):
 72 |             return self.id < other.id
 73 |         elif isinstance(other, int):
 74 |             return self.id < other
 75 |         return False
 76 | 
 77 |     def __le__(self, other) -> bool:
 78 |         if isinstance(other, SubmissionBase):
 79 |             return self.id <= other.id
 80 |         elif isinstance(other, int):
 81 |             return self.id <= other
 82 |         return False
 83 | 
 84 |     def __iter__(self):
 85 |         yield "id", self.id
 86 |         yield "title", self.title
 87 |         yield "author", dict(self.author)
 88 | 
 89 |     def __repr__(self):
 90 |         return self.__str__()
 91 | 
 92 |     def __str__(self):
 93 |         return f"{self.id} {self.author} {self.title}"
 94 | 
 95 |     @property
 96 |     def url(self):
 97 |         """
 98 |         Compose the full URL to the submission.
 99 | 
100 |         :return: The URL to the submission.
101 |         """
102 |         return join_url(root, "view", self.id)
103 | 
104 | 
105 | class SubmissionPartial(SubmissionBase):
106 |     """
107 |     Contains partial submission information gathered from submissions pages (gallery, scraps, etc.).
108 |     """
109 | 
110 |     def __init__(self, submission_figure: Optional[Tag] = None):
111 |         """
112 |         :param submission_figure: The figure tag from which to parse the submission information.
113 |         """
114 |         assert submission_figure is None or isinstance(submission_figure, Tag), \
115 |             _raise_exception(TypeError(f"submission_figure must be {None} or {BeautifulSoup.__name__}"))
116 | 
117 |         super().__init__()
118 | 
119 |         self.submission_figure: Optional[Tag] = submission_figure
120 |         self.rating: str = ""
121 |         self.type: str = ""
122 |         self.thumbnail_url: str = ""
123 | 
124 |         self.parse()
125 | 
126 |     def __iter__(self):
127 |         yield "id", self.id
128 |         yield "title", self.title
129 |         yield "author", dict(self.author)
130 |         yield "rating", self.rating
131 |         yield "type", self.type
132 |         yield "thumbnail_url", self.thumbnail_url
133 | 
134 |     def parse(self, submission_figure: Optional[Tag] = None):
135 |         """
136 |         Parse a submission figure Tag, overrides any information already present in the object.
137 | 
138 |         :param submission_figure: The optional figure tag from which to parse the submission.
139 |         """
140 |         assert submission_figure is None or isinstance(submission_figure, Tag), \
141 |             _raise_exception(TypeError(f"submission_figure must be {None} or {BeautifulSoup.__name__}"))
142 | 
143 |         self.submission_figure = submission_figure or self.submission_figure
144 |         if self.submission_figure is None:
145 |             return
146 | 
147 |         parsed: dict = parse_submission_figure(self.submission_figure)
148 | 
149 |         self.id = parsed["id"]
150 |         self.title = parsed["title"]
151 |         self.author.name = parsed["author"]
152 |         self.rating = parsed["rating"]
153 |         self.type = parsed["type"]
154 |         self.thumbnail_url = parsed["thumbnail_url"]
155 | 
156 | 
157 | class Submission(SubmissionBase):
158 |     """
159 |     Contains complete submission information gathered from submission pages, including comments.
160 |     """
161 | 
162 |     def __init__(self, submission_page: Optional[BeautifulSoup] = None):
163 |         """
164 |         :param submission_page: The page from which to parse the submission information.
165 |         """
166 |         assert submission_page is None or isinstance(submission_page, BeautifulSoup), \
167 |             _raise_exception(TypeError(f"submission_page must be {None} or {BeautifulSoup.__name__}"))
168 | 
169 |         super().__init__()
170 | 
171 |         self.submission_page: Optional[BeautifulSoup] = submission_page
172 |         self.date: datetime = datetime.fromtimestamp(0)
173 |         self.tags: list[str] = []
174 |         self.category: str = ""
175 |         self.species: str = ""
176 |         self.gender: str = ""
177 |         self.rating: str = ""
178 |         self.stats: SubmissionStats = SubmissionStats(0, 0, 0)
179 |         self.type: str = ""
180 |         self.description: str = ""
181 |         self.footer: str = ""
182 |         self.mentions: list[str] = []
183 |         self.folder: str = ""
184 |         self.user_folders: list[SubmissionUserFolder] = []
185 |         self.file_url: str = ""
186 |         self.thumbnail_url: str = ""
187 |         self.prev: Optional[int] = None
188 |         self.next: Optional[int] = None
189 |         self.favorite: bool = False
190 |         self.favorite_toggle_link: str = ""
191 |         from .comment import Comment
192 |         self.comments: list[Comment] = []
193 | 
194 |         self.parse()
195 | 
196 |     def __iter__(self):
197 |         yield "id", self.id
198 |         yield "title", self.title
199 |         yield "author", dict(self.author)
200 |         yield "date", self.date
201 |         yield "tags", self.tags
202 |         yield "category", self.category
203 |         yield "species", self.species
204 |         yield "gender", self.gender
205 |         yield "rating", self.rating
206 |         yield "stats", self.stats._asdict()
207 |         yield "type", self.type
208 |         yield "description", self.description
209 |         yield "footer", self.footer
210 |         yield "mentions", self.mentions
211 |         yield "folder", self.folder
212 |         yield "user_folders", [f._asdict() for f in self.user_folders]
213 |         yield "file_url", self.file_url
214 |         yield "thumbnail_url", self.thumbnail_url
215 |         yield "prev", self.prev
216 |         yield "next", self.next
217 |         yield "favorite", self.favorite
218 |         yield "favorite_toggle_link", self.favorite_toggle_link
219 |         from .comment import _sort_comments_dict
220 |         yield "comments", _sort_comments_dict(self.comments)
221 | 
222 |     @property
223 |     def description_bbcode(self) -> str:
224 |         """
225 |         The submission description formatted to BBCode
226 | 
227 |         :return: BBCode description
228 |         """
229 |         return html_to_bbcode(self.description)
230 | 
231 |     @property
232 |     def footer_bbcode(self) -> str:
233 |         """
234 |         The submission footer formatted to BBCode
235 | 
236 |         :return: BBCode footer
237 |         """
238 |         return html_to_bbcode(self.footer)
239 | 
240 |     def parse(self, submission_page: Optional[BeautifulSoup] = None):
241 |         """
242 |         Parse a submission page, overrides any information already present in the object.
243 | 
244 |         :param submission_page: The optional page from which to parse the submission.
245 |         """
246 |         assert submission_page is None or isinstance(submission_page, BeautifulSoup), \
247 |             _raise_exception(TypeError(f"submission_page must be {None} or {BeautifulSoup.__name__}"))
248 | 
249 |         self.submission_page = submission_page or self.submission_page
250 |         if self.submission_page is None:
251 |             return
252 | 
253 |         check_page_raise(self.submission_page)
254 | 
255 |         parsed: dict = parse_submission_page(self.submission_page)
256 | 
257 |         self.id = parsed["id"]
258 |         self.title = parsed["title"]
259 |         self.author.name = parsed["author"]
260 |         self.author.display_name = parsed["author_display_name"]
261 |         self.author.title = parsed["author_title"]
262 |         self.author.avatar_url = parsed["author_icon_url"]
263 |         self.date = parsed["date"]
264 |         self.tags = parsed["tags"]
265 |         self.category = parsed["category"]
266 |         self.species = parsed["species"]
267 |         self.gender = parsed["gender"]
268 |         self.rating = parsed["rating"]
269 |         self.stats = SubmissionStats(parsed["views"], parsed["comment_count"], parsed["favorites"])
270 |         self.type = parsed["type"]
271 |         self.description = parsed["description"]
272 |         self.footer = parsed["footer"]
273 |         self.mentions = parsed["mentions"]
274 |         self.folder = parsed["folder"]
275 |         self.user_folders = [SubmissionUserFolder(*f) for f in parsed["user_folders"]]
276 |         self.file_url = parsed["file_url"]
277 |         self.thumbnail_url = parsed["thumbnail_url"]
278 |         self.prev = parsed["prev"]
279 |         self.next = parsed["next"]
280 |         self.favorite = parsed["unfav_link"] is not None
281 |         self.favorite_toggle_link = parsed["fav_link"] or parsed["unfav_link"]
282 |         from .comment import sort_comments, Comment
283 |         self.comments = sort_comments([Comment(t, self) for t in parse_comments(self.submission_page)])
284 | 


--------------------------------------------------------------------------------
/faapi/user.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | from datetime import datetime
  3 | from typing import Optional
  4 | from urllib.parse import quote
  5 | 
  6 | from .connection import join_url
  7 | from .connection import root
  8 | from .exceptions import _raise_exception
  9 | from .parse import BeautifulSoup
 10 | from .parse import Tag
 11 | from .parse import check_page_raise
 12 | from .parse import html_to_bbcode
 13 | from .parse import parse_user_page
 14 | from .parse import parse_user_tag
 15 | from .parse import username_url
 16 | 
 17 | 
 18 | class UserStats(namedtuple("UserStats", ["views", "submissions", "favorites", "comments_earned",
 19 |                                          "comments_made", "journals", "watched_by", "watching"])):
 20 |     """
 21 |     This object contains a user's statistics:
 22 |     * views
 23 |     * submissions
 24 |     * favorites
 25 |     * comments_earned
 26 |     * comments_made
 27 |     * journals
 28 |     * watched_by
 29 |     * watching
 30 |     """
 31 | 
 32 | 
 33 | class UserBase:
 34 |     """
 35 |     Base class for the user objects.
 36 |     """
 37 | 
 38 |     def __init__(self):
 39 |         self.name: str = ""
 40 |         self.display_name: str = ""
 41 |         self.status: str = ""
 42 | 
 43 |     def __hash__(self) -> int:
 44 |         return hash(self.name_url)
 45 | 
 46 |     def __eq__(self, other) -> bool:
 47 |         if isinstance(other, UserBase):
 48 |             return other.name_url == self.name_url
 49 |         elif isinstance(other, str):
 50 |             return username_url(other) == self.name_url
 51 |         return False
 52 | 
 53 |     def __gt__(self, other) -> bool:
 54 |         if isinstance(other, UserBase):
 55 |             return self.name_url > other.name_url
 56 |         elif isinstance(other, str):
 57 |             return self.name_url > username_url(other)
 58 |         return False
 59 | 
 60 |     def __ge__(self, other) -> bool:
 61 |         if isinstance(other, UserBase):
 62 |             return self.name_url >= other.name_url
 63 |         elif isinstance(other, str):
 64 |             return self.name_url >= username_url(other)
 65 |         return False
 66 | 
 67 |     def __lt__(self, other) -> bool:
 68 |         if isinstance(other, UserBase):
 69 |             return self.name_url < other.name_url
 70 |         elif isinstance(other, str):
 71 |             return self.name_url < username_url(other)
 72 |         return False
 73 | 
 74 |     def __le__(self, other) -> bool:
 75 |         if isinstance(other, UserBase):
 76 |             return self.name_url <= other.name_url
 77 |         elif isinstance(other, str):
 78 |             return self.name_url <= username_url(other)
 79 |         return False
 80 | 
 81 |     def __iter__(self):
 82 |         yield "name", self.name
 83 |         yield "display_name", self.display_name
 84 |         yield "status", self.status
 85 | 
 86 |     def __repr__(self):
 87 |         return self.__str__()
 88 | 
 89 |     def __str__(self):
 90 |         return self.status + self.name
 91 | 
 92 |     @property
 93 |     def name_url(self):
 94 |         """
 95 |         Compose the URL-safe username.
 96 | 
 97 |         :return: The cleaned username.
 98 |         """
 99 |         return username_url(self.name)
100 | 
101 |     @property
102 |     def url(self):
103 |         """
104 |         Compose the full URL to the user.
105 | 
106 |         :return: The URL to the user.
107 |         """
108 |         return join_url(root, "user", quote(self.name_url))
109 | 
110 |     def generate_avatar_url(self) -> str:
111 |         """
112 |         Generate the URl for the current user icon.
113 | 
114 |         :return: The URL to the user icon
115 |         """
116 |         return f"https://a.furaffinity.net/{datetime.now():%Y%m%d}/{self.name_url}.gif"
117 | 
118 | 
119 | class UserPartial(UserBase):
120 |     """
121 |     Contains partial user information gathered from user folders (gallery, journals, etc.) and submission/journal pages.
122 |     """
123 | 
124 |     def __init__(self, user_tag: Optional[Tag] = None):
125 |         """
126 |         :param user_tag: The tag from which to parse the user information.
127 |         """
128 |         assert user_tag is None or isinstance(user_tag, Tag), \
129 |             _raise_exception(TypeError(f"user_tag must be {None} or {Tag.__name__}"))
130 | 
131 |         super().__init__()
132 | 
133 |         self.user_tag: Optional[Tag] = user_tag
134 |         self.title: str = ""
135 |         self.join_date: datetime = datetime.fromtimestamp(0)
136 |         self.avatar_url: str = ""
137 | 
138 |         self.parse()
139 | 
140 |     def __iter__(self):
141 |         yield "name", self.name
142 |         yield "status", self.status
143 |         yield "title", self.title
144 |         yield "join_date", self.join_date
145 |         yield "avatar_url", self.avatar_url
146 | 
147 |     def parse(self, user_tag: Optional[Tag] = None):
148 |         """
149 |         Parse a user page, overrides any information already present in the object.
150 | 
151 |         :param user_tag: The tag from which to parse the user information.
152 |         """
153 |         assert user_tag is None or isinstance(user_tag, Tag), \
154 |             _raise_exception(TypeError(f"user_tag must be {None} or {Tag.__name__}"))
155 | 
156 |         self.user_tag = user_tag or self.user_tag
157 |         if self.user_tag is None:
158 |             return
159 | 
160 |         parsed: dict = parse_user_tag(self.user_tag)
161 | 
162 |         self.name = parsed["name"]
163 |         self.status = parsed["status"]
164 |         self.title = parsed["title"]
165 |         self.join_date = parsed["join_date"]
166 | 
167 | 
168 | class User(UserBase):
169 |     """
170 |     Contains complete user information gathered from userpages.
171 |     """
172 | 
173 |     def __init__(self, user_page: Optional[BeautifulSoup] = None):
174 |         """
175 |         :param user_page: The page from which to parse the user information.
176 |         """
177 |         assert user_page is None or isinstance(user_page, BeautifulSoup), \
178 |             _raise_exception(TypeError(f"user_page must be {None} or {BeautifulSoup.__name__}"))
179 | 
180 |         super().__init__()
181 | 
182 |         self.user_page: Optional[BeautifulSoup] = user_page
183 |         self.title: str = ""
184 |         self.join_date: datetime = datetime.fromtimestamp(0)
185 |         self.profile: str = ""
186 |         self.stats: UserStats = UserStats(0, 0, 0, 0, 0, 0, 0, 0)
187 |         self.info: dict[str, str] = {}
188 |         self.contacts: dict[str, str] = {}
189 |         self.avatar_url: str = ""
190 |         self.banner_url: Optional[str] = None
191 |         self.watched: bool = False
192 |         self.watched_toggle_link: Optional[str] = None
193 |         self.blocked: bool = False
194 |         self.blocked_toggle_link: Optional[str] = None
195 | 
196 |         self.parse()
197 | 
198 |     def __iter__(self):
199 |         yield "name", self.name
200 |         yield "display_name", self.display_name
201 |         yield "status", self.status
202 |         yield "title", self.title
203 |         yield "join_date", self.join_date
204 |         yield "profile", self.profile
205 |         yield "stats", self.stats._asdict()
206 |         yield "info", self.info
207 |         yield "contacts", self.contacts
208 |         yield "avatar_url", self.avatar_url
209 |         yield "banner_url", self.banner_url
210 |         yield "watched", self.watched
211 |         yield "watched_toggle_link", self.watched_toggle_link
212 |         yield "blocked", self.blocked
213 |         yield "blocked_toggle_link", self.blocked_toggle_link
214 | 
215 |     @property
216 |     def profile_bbcode(self) -> str:
217 |         """
218 |         The user profile text formatted to BBCode
219 | 
220 |         :return: BBCode profile
221 |         """
222 |         return html_to_bbcode(self.profile)
223 | 
224 |     def parse(self, user_page: Optional[BeautifulSoup] = None):
225 |         """
226 |         Parse a user page, overrides any information already present in the object.
227 | 
228 |         :param user_page: The page from which to parse the user information.
229 |         """
230 |         assert user_page is None or isinstance(user_page, BeautifulSoup), \
231 |             _raise_exception(TypeError(f"user_page must be {None} or {BeautifulSoup.__name__}"))
232 | 
233 |         self.user_page = user_page or self.user_page
234 |         if self.user_page is None:
235 |             return
236 | 
237 |         check_page_raise(self.user_page)
238 | 
239 |         parsed: dict = parse_user_page(self.user_page)
240 | 
241 |         self.name = parsed["name"]
242 |         self.display_name = parsed["display_name"]
243 |         self.status = parsed["status"]
244 |         self.profile = parsed["profile"]
245 |         self.title = parsed["title"]
246 |         self.join_date = parsed["join_date"]
247 |         self.stats = UserStats(*parsed["stats"])
248 |         self.info = parsed["info"]
249 |         self.contacts = parsed["contacts"]
250 |         self.avatar_url = parsed["avatar_url"]
251 |         self.banner_url = parsed["banner_url"]
252 |         self.watched = parsed["watch"] is None and parsed["unwatch"] is not None
253 |         self.watched_toggle_link = parsed["watch"] or parsed["unwatch"] or None
254 |         self.blocked = parsed["block"] is None and parsed["unblock"] is not None
255 |         self.blocked_toggle_link = parsed["block"] or parsed["unblock"] or None
256 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "faapi"
 3 | version = "3.11.9"
 4 | description = "Python module to implement API-like functionality for the FurAffinity.net website."
 5 | authors = ["Matteo Campinoti <matteo.campinoti94@gmail.com>"]
 6 | license = "EUPL-1.2"
 7 | readme = "README.md"
 8 | homepage = "https://github.com/FurryCoders/FAAPI"
 9 | repository = "https://github.com/FurryCoders/FAAPI"
10 | classifiers = [
11 |     "Programming Language :: Python :: 3.9",
12 |     "Programming Language :: Python :: 3.10",
13 |     "Operating System :: OS Independent",
14 |     "License :: OSI Approved :: European Union Public Licence 1.2 (EUPL 1.2)",
15 |     "Development Status :: 5 - Production/Stable",
16 |     "Intended Audience :: Developers",
17 |     "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
18 |     "Typing :: Typed",
19 | ]
20 | 
21 | [tool.poetry.urls]
22 | "Changelog" = "https://github.com/FurryCoders/FAAPI/blob/main/CHANGELOG.md"
23 | "Source" = "https://github.com/FurryCoders/FAAPI"
24 | "Download" = "https://pypi.org/project/faapi/#files"
25 | "Bug Reports" = "https://github.com/FurryCoders/FAAPI/issues"
26 | 
27 | [tool.poetry.dependencies]
28 | python = "^3.9"
29 | requests = "^2.32.3"
30 | beautifulsoup4 = "^4.12.3"
31 | lxml = "^5.3.0"
32 | python-dateutil = "^2.9.0"
33 | bbcode = "^1.1.0"
34 | 
35 | [tool.poetry.group.test.dependencies]
36 | pytest = "^7.2.0"
37 | mypy = "^0.991"
38 | types-beautifulsoup4 = "^4.11.6"
39 | flake8 = "^6.0.0"
40 | coverage = "^7.3.1"
41 | 
42 | [build-system]
43 | requires = ["poetry>=0.12"]
44 | build-backend = "poetry.masonry.api"
45 | 


--------------------------------------------------------------------------------
/tests/test_connection.py:
--------------------------------------------------------------------------------
 1 | from json import load
 2 | from pathlib import Path
 3 | from urllib.robotparser import RobotFileParser
 4 | 
 5 | from pytest import fixture
 6 | from pytest import raises
 7 | from requests import Response, Session
 8 | from requests.cookies import RequestsCookieJar
 9 | 
10 | from faapi.connection import get_robots
11 | from faapi.connection import join_url
12 | from faapi.connection import make_session
13 | from faapi.connection import root
14 | from faapi.exceptions import Unauthorized
15 | 
16 | __root__: Path = Path(__file__).resolve().parent
17 | 
18 | 
19 | @fixture
20 | def data() -> dict:
21 |     return load((__root__ / "test_data.json").open())
22 | 
23 | 
24 | @fixture
25 | def cookies(data: dict) -> RequestsCookieJar:
26 |     return data["cookies"]
27 | 
28 | 
29 | def test_make_session_cookie_jar():
30 |     cookie_jar = RequestsCookieJar()
31 |     cookie_jar.set("a", "a")
32 |     result = make_session(cookie_jar, Session)
33 |     assert isinstance(result, Session)
34 | 
35 | 
36 | def test_make_session_list_dict():
37 |     result = make_session([{"name": "a", "value": "a"}], Session)
38 |     assert isinstance(result, Session)
39 | 
40 | 
41 | def test_make_session_error():
42 |     with raises(Unauthorized):
43 |         make_session([], Session)
44 | 
45 | 
46 | def test_get_robots(cookies: RequestsCookieJar):
47 |     result = get_robots(make_session(cookies, Session))
48 |     assert isinstance(result, RobotFileParser)
49 |     assert getattr(result, "default_entry", None) is not None
50 | 
51 | 
52 | def test_get(cookies: RequestsCookieJar):
53 |     res: Response = make_session(cookies, Session).get(join_url(root, "view", 1))
54 |     assert res.ok
55 | 


--------------------------------------------------------------------------------
/tests/test_faapi.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from datetime import timedelta
  3 | from json import load
  4 | from pathlib import Path
  5 | from re import sub
  6 | from typing import Optional
  7 | 
  8 | from pytest import fixture
  9 | from pytest import raises
 10 | from requests.cookies import RequestsCookieJar
 11 | 
 12 | import faapi
 13 | from faapi import Comment
 14 | from faapi import FAAPI
 15 | from faapi import JournalPartial
 16 | from faapi import SubmissionPartial
 17 | from faapi import UserPartial
 18 | from faapi.exceptions import DisallowedPath
 19 | from faapi.exceptions import Unauthorized
 20 | from faapi.parse import username_url
 21 | from test_parse import clean_html
 22 | 
 23 | __root__: Path = Path(__file__).resolve().parent
 24 | 
 25 | 
 26 | @fixture
 27 | def data() -> dict:
 28 |     return load((__root__ / "test_data.json").open())
 29 | 
 30 | 
 31 | @fixture
 32 | def cookies(data: dict) -> RequestsCookieJar:
 33 |     return data["cookies"]
 34 | 
 35 | 
 36 | @fixture
 37 | def user_test_data() -> dict:
 38 |     return load((__root__ / "test_user.json").open())
 39 | 
 40 | 
 41 | @fixture
 42 | def submission_test_data() -> dict:
 43 |     return load((__root__ / "test_submission.json").open())
 44 | 
 45 | 
 46 | @fixture
 47 | def journal_test_data() -> dict:
 48 |     return load((__root__ / "test_journal.json").open())
 49 | 
 50 | 
 51 | def dst_us() -> timedelta:
 52 |     now: datetime = datetime.now()
 53 | 
 54 |     if now.month < 3 or now.month >= 12:
 55 |         return timedelta(0)
 56 | 
 57 |     m1 = datetime(now.year, 3, 1)
 58 | 
 59 |     if now < datetime(now.year, 3, 7 + (6 - m1.weekday() + 1)):
 60 |         return timedelta(0)
 61 | 
 62 |     n1 = datetime(now.year, 11, 1)
 63 | 
 64 |     if now > datetime(now.year, 11, 6 - n1.weekday() + 1):
 65 |         return timedelta(0)
 66 | 
 67 |     return timedelta(hours=-1)
 68 | 
 69 | 
 70 | def remove_user_icons(html: str) -> str:
 71 |     return sub(r"a\.furaffinity\.net/\d{8}/[^. ]+.gif", "", html)
 72 | 
 73 | 
 74 | def test_robots(cookies: RequestsCookieJar):
 75 |     api: FAAPI = FAAPI(cookies)
 76 |     assert getattr(api.robots, "default_entry") is not None
 77 |     assert api.crawl_delay >= 1
 78 |     assert api.check_path("/login")
 79 |     assert api.check_path("/view")
 80 |     assert api.check_path("/journal")
 81 |     assert api.check_path("/user")
 82 |     assert api.check_path("/gallery")
 83 |     assert api.check_path("/scraps")
 84 |     assert api.check_path("/favorite")
 85 |     assert api.check_path("/journals")
 86 |     assert api.check_path("/watchlist/to")
 87 |     assert api.check_path("/watchlist/by")
 88 |     with raises(DisallowedPath):
 89 |         assert not api.check_path("/fav/", raise_for_disallowed=True)
 90 | 
 91 | 
 92 | def test_login(cookies: RequestsCookieJar):
 93 |     api: FAAPI = FAAPI(cookies)
 94 |     assert api.login_status
 95 |     assert api.connection_status
 96 | 
 97 |     api.load_cookies([{"name": "a", "value": "1"}])
 98 |     with raises(Unauthorized):
 99 |         api.me()
100 | 
101 | 
102 | # noinspection DuplicatedCode
103 | def test_frontpage(cookies: RequestsCookieJar):
104 |     api: FAAPI = FAAPI(cookies)
105 | 
106 |     ss = api.frontpage()
107 | 
108 |     assert len({s.id for s in ss}) == len(ss)
109 | 
110 |     for submission in ss:
111 |         assert submission.id > 0
112 |         assert submission.type != ""
113 |         assert submission.rating != ""
114 |         assert submission.thumbnail_url != ""
115 | 
116 | 
117 | def test_user(cookies: RequestsCookieJar, user_test_data: dict):
118 |     api: FAAPI = FAAPI(cookies)
119 | 
120 |     user = api.user(user_test_data["name"])
121 |     user_dict = dict(user)
122 | 
123 |     assert user.name.lower() == user_dict["name"].lower() == user_test_data["name"].lower()
124 |     assert user.status == user_dict["status"] == user_test_data["status"]
125 |     assert user.title == user_dict["title"] == user_test_data["title"]
126 |     assert user.join_date == user_dict["join_date"] == datetime.fromisoformat(user_test_data["join_date"]) + dst_us()
127 |     assert user.stats.views == user_dict["stats"]["views"]
128 |     assert user_dict["stats"]["views"] >= user_test_data["stats"]["views"]
129 |     assert user.stats.submissions == user_dict["stats"]["submissions"]
130 |     assert user_dict["stats"]["submissions"] >= user_test_data["stats"]["submissions"]
131 |     assert user.stats.favorites == user_dict["stats"]["favorites"]
132 |     assert user_dict["stats"]["favorites"] >= user_test_data["stats"]["favorites"]
133 |     assert user.stats.comments_earned == user_dict["stats"]["comments_earned"]
134 |     assert user_dict["stats"]["comments_earned"] >= user_test_data["stats"]["comments_earned"]
135 |     assert user.stats.comments_made == user_dict["stats"]["comments_made"]
136 |     assert user_dict["stats"]["comments_made"] >= user_test_data["stats"]["comments_made"]
137 |     assert user.stats.journals == user_dict["stats"]["journals"]
138 |     assert user_dict["stats"]["journals"] >= user_test_data["stats"]["journals"]
139 |     assert user.info == user_dict["info"] == user_test_data["info"]
140 |     assert user.contacts == user_dict["contacts"] == user_test_data["contacts"]
141 |     assert user.avatar_url == user_dict["avatar_url"] != ""
142 |     assert user.banner_url == user_dict["banner_url"] != ""
143 |     assert remove_user_icons(clean_html(user.profile)) == \
144 |            remove_user_icons(clean_html(user_dict["profile"])) == \
145 |            remove_user_icons(clean_html(user_test_data["profile"]))
146 |     assert user.profile_bbcode == user_test_data["profile_bbcode"]
147 | 
148 | 
149 | # noinspection DuplicatedCode
150 | def test_submission(cookies: RequestsCookieJar, submission_test_data: dict):
151 |     api: FAAPI = FAAPI(cookies)
152 | 
153 |     submission, file = api.submission(submission_test_data["id"], get_file=True)
154 |     submission_dict = dict(submission)
155 | 
156 |     assert submission.id == submission_dict["id"] == submission_test_data["id"]
157 |     assert submission.title == submission_dict["title"] == submission_test_data["title"]
158 |     assert submission.author.name.lower() == submission_dict["author"]["name"].lower() == submission_test_data["author"]["name"].lower()
159 |     assert submission.author.avatar_url == submission_dict["author"]["avatar_url"] != ""
160 |     assert submission.date == submission_dict["date"] == datetime.fromisoformat(submission_test_data["date"]) + dst_us()
161 |     assert submission.tags == submission_dict["tags"] == submission_test_data["tags"]
162 |     assert submission.category == submission_dict["category"] == submission_test_data["category"]
163 |     assert submission.species == submission_dict["species"] == submission_test_data["species"]
164 |     assert submission.gender == submission_dict["gender"] == submission_test_data["gender"]
165 |     assert submission.rating == submission_dict["rating"] == submission_test_data["rating"]
166 |     assert submission.stats.views == submission_dict["stats"]["views"]
167 |     assert submission.stats.views >= submission_test_data["stats"]["views"]
168 |     assert submission.stats.comments == submission_dict["stats"]["comments"]
169 |     assert submission.stats.comments >= submission_test_data["stats"]["comments"]
170 |     assert submission.stats.favorites == submission_dict["stats"]["favorites"]
171 |     assert submission.stats.favorites >= submission_test_data["stats"]["favorites"]
172 |     assert submission.type == submission_dict["type"] == submission_test_data["type"]
173 |     assert submission.mentions == submission_dict["mentions"] == submission_test_data["mentions"]
174 |     assert submission.folder == submission_dict["folder"] == submission_test_data["folder"]
175 |     assert submission.file_url == submission_dict["file_url"] != ""
176 |     assert submission.thumbnail_url == submission_dict["thumbnail_url"] != ""
177 |     assert submission.prev == submission_dict["prev"] == submission_test_data["prev"]
178 |     assert submission.next == submission_dict["next"] == submission_test_data["next"]
179 |     assert submission.favorite == submission_dict["favorite"] == submission_test_data["favorite"]
180 |     assert bool(submission.favorite_toggle_link) == bool(submission_dict["favorite_toggle_link"]) == \
181 |            bool(submission_test_data["favorite_toggle_link"])
182 |     assert remove_user_icons(clean_html(submission.description)) == \
183 |            remove_user_icons(clean_html(submission_dict["description"])) == \
184 |            remove_user_icons(clean_html(submission_test_data["description"]))
185 |     assert remove_user_icons(clean_html(submission.footer)) == \
186 |            remove_user_icons(clean_html(submission_dict["footer"])) == \
187 |            remove_user_icons(clean_html(submission_test_data["footer"]))
188 |     assert submission.description_bbcode == submission_test_data["description_bbcode"]
189 |     assert submission.footer_bbcode == submission_test_data["footer_bbcode"]
190 | 
191 |     assert file is not None and len(file) > 0
192 | 
193 |     assert len(faapi.comment.flatten_comments(submission.comments)) == submission.stats.comments
194 | 
195 |     comments: dict[int, Comment] = {c.id: c for c in faapi.comment.flatten_comments(submission.comments)}
196 | 
197 |     for comment in comments.values():
198 |         assert comment.reply_to is None or isinstance(comment.reply_to, Comment)
199 | 
200 |         if comment.reply_to:
201 |             assert comment.reply_to.id in comments
202 |             assert comment in comments[comment.reply_to.id].replies
203 | 
204 |         if comment.replies:
205 |             for reply in comment.replies:
206 |                 assert reply.reply_to == comment
207 | 
208 | 
209 | # noinspection DuplicatedCode
210 | def test_journal(cookies: RequestsCookieJar, journal_test_data: dict):
211 |     api: FAAPI = FAAPI(cookies)
212 | 
213 |     journal = api.journal(journal_test_data["id"])
214 |     journal_dict = dict(journal)
215 | 
216 |     assert journal.id == journal_dict["id"] == journal_test_data["id"]
217 |     assert journal.title == journal_dict["title"] == journal_test_data["title"]
218 |     assert journal.author.name.lower() == journal_dict["author"]["name"].lower() == journal_test_data["author"]["name"].lower()
219 |     assert journal.author.join_date == journal_dict["author"]["join_date"] == \
220 |            datetime.fromisoformat(journal_test_data["author"]["join_date"]) + dst_us()
221 |     assert journal.author.avatar_url == journal_dict["author"]["avatar_url"] != ""
222 |     assert journal.date == journal_dict["date"] == datetime.fromisoformat(journal_test_data["date"]) + dst_us()
223 |     assert journal.stats.comments == journal_dict["stats"]["comments"] >= journal_test_data["stats"]["comments"]
224 |     assert journal.mentions == journal_dict["mentions"] == journal_test_data["mentions"]
225 |     assert remove_user_icons(clean_html(journal.content)) == \
226 |            remove_user_icons(clean_html(journal_dict["content"])) == \
227 |            remove_user_icons(clean_html(journal_test_data["content"]))
228 |     assert remove_user_icons(clean_html(journal.header)) == \
229 |            remove_user_icons(clean_html(journal_dict["header"])) == \
230 |            remove_user_icons(clean_html(journal_test_data["header"]))
231 |     assert remove_user_icons(clean_html(journal.footer)) == \
232 |            remove_user_icons(clean_html(journal_dict["footer"])) == \
233 |            remove_user_icons(clean_html(journal_test_data["footer"]))
234 |     assert journal.content_bbcode == journal_test_data["content_bbcode"]
235 |     assert journal.header_bbcode == journal_test_data["header_bbcode"]
236 |     assert journal.footer_bbcode == journal_test_data["footer_bbcode"]
237 | 
238 |     assert len(faapi.comment.flatten_comments(journal.comments)) == journal.stats.comments
239 | 
240 |     comments: dict[int, Comment] = {c.id: c for c in faapi.comment.flatten_comments(journal.comments)}
241 | 
242 |     for comment in comments.values():
243 |         assert comment.reply_to is None or isinstance(comment.reply_to, Comment)
244 | 
245 |         if comment.reply_to:
246 |             assert comment.reply_to.id in comments
247 |             assert comment in comments[comment.reply_to.id].replies
248 | 
249 |         if comment.replies:
250 |             for reply in comment.replies:
251 |                 assert reply.reply_to == comment
252 | 
253 | 
254 | # noinspection DuplicatedCode
255 | def test_gallery(cookies: RequestsCookieJar, data: dict):
256 |     api: FAAPI = FAAPI(cookies)
257 | 
258 |     ss: list[SubmissionPartial] = []
259 |     p: Optional[int] = 1
260 | 
261 |     while p:
262 |         ss_, p_ = api.gallery(data["gallery"]["user"], p)
263 |         assert isinstance(ss, list)
264 |         assert all(isinstance(s, SubmissionPartial) for s in ss_)
265 |         assert p_ is None or isinstance(p_, int)
266 |         assert p_ is None or p_ > p
267 |         assert len(ss) or p == 1
268 |         assert len(ss_) or p_ is None
269 | 
270 |         ss.extend(ss_)
271 |         p = p_
272 | 
273 |     assert len(ss) >= data["gallery"]["length"]
274 |     assert len({s.id for s in ss}) == len(ss)
275 | 
276 |     for submission in ss:
277 |         assert submission.id > 0
278 |         assert submission.type != ""
279 |         assert submission.rating != ""
280 |         assert submission.thumbnail_url != ""
281 |         assert submission.author.name_url == username_url(data["gallery"]["user"])
282 | 
283 | 
284 | # noinspection DuplicatedCode
285 | def test_scraps(cookies: RequestsCookieJar, data: dict):
286 |     api: FAAPI = FAAPI(cookies)
287 | 
288 |     ss: list[SubmissionPartial] = []
289 |     p: Optional[int] = 1
290 | 
291 |     while p:
292 |         ss_, p_ = api.scraps(data["scraps"]["user"], p)
293 |         assert isinstance(ss, list)
294 |         assert all(isinstance(s, SubmissionPartial) for s in ss_)
295 |         assert p_ is None or isinstance(p_, int)
296 |         assert p_ is None or p_ > p
297 |         assert len(ss) or p == 1
298 |         assert len(ss_) or p_ is None
299 | 
300 |         ss.extend(ss_)
301 |         p = p_
302 | 
303 |     assert len(ss) >= data["scraps"]["length"]
304 |     assert len({s.id for s in ss}) == len(ss)
305 | 
306 |     for submission in ss:
307 |         assert submission.id > 0
308 |         assert submission.type != ""
309 |         assert submission.rating != ""
310 |         assert submission.thumbnail_url != ""
311 |         assert submission.author.name_url == username_url(data["scraps"]["user"])
312 | 
313 | 
314 | # noinspection DuplicatedCode
315 | def test_favorites(cookies: RequestsCookieJar, data: dict):
316 |     api: FAAPI = FAAPI(cookies)
317 | 
318 |     ss: list[SubmissionPartial] = []
319 |     p: Optional[str] = "/"
320 | 
321 |     while p and len(ss) < data["favorites"]["max_length"]:
322 |         ss_, p_ = api.favorites(data["favorites"]["user"], p)
323 |         assert isinstance(ss, list)
324 |         assert all(isinstance(s, SubmissionPartial) for s in ss_)
325 |         assert p_ is None or isinstance(p_, str)
326 |         assert p_ is None or (p == "/" and p_ > p) or p_ < p
327 |         assert len(ss) or p == "/"
328 |         assert len(ss_) or p_ is None
329 | 
330 |         ss.extend(ss_)
331 |         p = p_
332 | 
333 |     assert not data["favorites"]["next_page"] or p is not None
334 |     assert len(ss) >= data["favorites"]["length"]
335 |     assert len({s.id for s in ss}) == len(ss)
336 | 
337 |     for submission in ss:
338 |         assert submission.id > 0
339 |         assert submission.type != ""
340 |         assert submission.rating != ""
341 |         assert submission.thumbnail_url != ""
342 | 
343 | 
344 | # noinspection DuplicatedCode
345 | def test_journals(cookies: RequestsCookieJar, data: dict):
346 |     api: FAAPI = FAAPI(cookies)
347 | 
348 |     js: list[JournalPartial] = []
349 |     p: Optional[int] = 1
350 | 
351 |     while p:
352 |         js_, p_ = api.journals(data["journals"]["user"], p)
353 |         assert isinstance(js, list)
354 |         assert all(isinstance(s, JournalPartial) for s in js_)
355 |         assert p_ is None or isinstance(p_, int)
356 |         assert p_ is None or p_ > p
357 |         assert len(js) or p == 1
358 |         assert len(js_) or p_ is None
359 | 
360 |         js.extend(js_)
361 |         p = p_
362 | 
363 |     assert len(js) >= data["journals"]["length"]
364 |     assert len({j.id for j in js}) == len(js)
365 | 
366 |     for journal in js:
367 |         assert journal.id > 0
368 |         assert journal.author.join_date.timestamp() > 0
369 |         assert journal.date.timestamp() > 0
370 |         assert journal.author.name_url == username_url(data["scraps"]["user"])
371 | 
372 | 
373 | # noinspection DuplicatedCode
374 | def test_watchlist_to(cookies: RequestsCookieJar, data: dict):
375 |     api: FAAPI = FAAPI(cookies)
376 |     assert api.login_status
377 | 
378 |     ws: list[UserPartial] = []
379 |     p: Optional[int] = 1
380 | 
381 |     while p:
382 |         ws_, p_ = api.watchlist_to(data["watchlist"]["user"], p)
383 |         assert isinstance(ws, list)
384 |         assert all(isinstance(s, UserPartial) for s in ws_)
385 |         assert p_ is None or isinstance(p_, int)
386 |         assert p_ is None or p_ > p
387 |         assert len(ws) or p == 1
388 |         assert len(ws_) or p_ is None
389 | 
390 |         ws.extend(ws_)
391 |         p = p_
392 | 
393 |     assert len({w.name_url for w in ws}) == len(ws)
394 | 
395 | 
396 | # noinspection DuplicatedCode
397 | def test_watchlist_by(cookies: RequestsCookieJar, data: dict):
398 |     api: FAAPI = FAAPI(cookies)
399 |     assert api.login_status
400 | 
401 |     ws: list[UserPartial] = []
402 |     p: Optional[int] = 1
403 | 
404 |     while p:
405 |         ws_, p_ = api.watchlist_by(data["watchlist"]["user"], p)
406 |         assert isinstance(ws, list)
407 |         assert all(isinstance(s, UserPartial) for s in ws_)
408 |         assert p_ is None or isinstance(p_, int)
409 |         assert p_ is None or p_ > p
410 |         assert len(ws) or p == 1
411 |         assert len(ws_) or p_ is None
412 | 
413 |         ws.extend(ws_)
414 |         p = p_
415 | 
416 |     assert len({w.name_url for w in ws}) == len(ws)
417 | 


--------------------------------------------------------------------------------
/tests/test_parse.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | from datetime import timedelta
  3 | from json import load
  4 | from pathlib import Path
  5 | from re import sub
  6 | from typing import Optional
  7 | 
  8 | from pytest import fixture
  9 | from pytest import raises
 10 | from requests import Response
 11 | from requests import Session
 12 | 
 13 | from faapi.connection import join_url
 14 | from faapi.connection import make_session
 15 | from faapi.connection import root
 16 | from faapi.exceptions import DisabledAccount
 17 | from faapi.exceptions import NotFound
 18 | from faapi.parse import bbcode_to_html
 19 | from faapi.parse import check_page_raise
 20 | from faapi.parse import clean_html
 21 | from faapi.parse import html_to_bbcode
 22 | from faapi.parse import parse_journal_page
 23 | from faapi.parse import parse_loggedin_user
 24 | from faapi.parse import parse_page
 25 | from faapi.parse import parse_submission_page
 26 | from faapi.parse import parse_user_page
 27 | from faapi.parse import username_url
 28 | 
 29 | __root__: Path = Path(__file__).resolve().parent
 30 | 
 31 | 
 32 | @fixture
 33 | def data() -> dict:
 34 |     return load((__root__ / "test_data.json").open())
 35 | 
 36 | 
 37 | @fixture
 38 | def session(data: dict) -> Session:
 39 |     return make_session(data["cookies"], Session)
 40 | 
 41 | 
 42 | @fixture
 43 | def user_test_data() -> dict:
 44 |     return load((__root__ / "test_user.json").open())
 45 | 
 46 | 
 47 | @fixture
 48 | def submission_test_data() -> dict:
 49 |     return load((__root__ / "test_submission.json").open())
 50 | 
 51 | 
 52 | @fixture
 53 | def journal_test_data() -> dict:
 54 |     return load((__root__ / "test_journal.json").open())
 55 | 
 56 | 
 57 | def dst_us() -> timedelta:
 58 |     now: datetime = datetime.now()
 59 | 
 60 |     if now.month < 3 or now.month >= 12:
 61 |         return timedelta(0)
 62 | 
 63 |     m1 = datetime(now.year, 3, 1)
 64 | 
 65 |     if now < datetime(now.year, 3, 7 + (6 - m1.weekday() + 1)):
 66 |         return timedelta(0)
 67 | 
 68 |     n1 = datetime(now.year, 11, 1)
 69 | 
 70 |     if now > datetime(now.year, 11, 6 - n1.weekday() + 1):
 71 |         return timedelta(0)
 72 | 
 73 |     return timedelta(hours=-1)
 74 | 
 75 | 
 76 | def remove_user_icons(html: str) -> str:
 77 |     return sub(r"a\.furaffinity\.net/\d{8}/[^. ]+.gif", "", html)
 78 | 
 79 | 
 80 | def test_check_page_disabled_account(session: Session, data: dict):
 81 |     res: Response = session.get(join_url(root, "user", data["disabled"]["user"]))
 82 |     assert res.ok
 83 | 
 84 |     page = parse_page(res.text)
 85 | 
 86 |     with raises(DisabledAccount):
 87 |         check_page_raise(page)
 88 | 
 89 | 
 90 | def test_check_page_not_found(session: Session):
 91 |     res: Response = session.get(join_url(root, "user", "_"))
 92 |     assert res.ok
 93 | 
 94 |     page = parse_page(res.text)
 95 | 
 96 |     with raises(NotFound):
 97 |         check_page_raise(page)
 98 | 
 99 | 
100 | def test_parse_loggedin_user(session: Session, data: dict):
101 |     res: Response = session.get(join_url(root, "user", data["login"]["user"]))
102 |     assert res.ok
103 | 
104 |     page = parse_page(res.text)
105 |     login_user: Optional[str] = parse_loggedin_user(page)
106 |     assert login_user is not None
107 | 
108 |     assert username_url(login_user) == username_url(data["login"]["user"])
109 | 
110 | 
111 | def test_parse_user_page(session: Session, user_test_data: dict):
112 |     res: Response = session.get(join_url(root, "user", username_url(user_test_data["name"])))
113 |     assert res.ok
114 | 
115 |     page = parse_page(res.text)
116 |     result = parse_user_page(page)
117 | 
118 |     assert result["name"].lower() == user_test_data["name"].lower()
119 |     assert result["status"] == user_test_data["status"]
120 |     assert result["title"] == user_test_data["title"]
121 |     assert result["join_date"] == datetime.fromisoformat(user_test_data["join_date"]) + dst_us()
122 |     assert result["stats"][0] >= user_test_data["stats"]["views"]
123 |     assert result["stats"][1] >= user_test_data["stats"]["submissions"]
124 |     assert result["stats"][2] >= user_test_data["stats"]["favorites"]
125 |     assert result["stats"][3] >= user_test_data["stats"]["comments_earned"]
126 |     assert result["stats"][4] >= user_test_data["stats"]["comments_made"]
127 |     assert result["stats"][5] >= user_test_data["stats"]["journals"]
128 |     assert result["info"] == user_test_data["info"]
129 |     assert result["contacts"] == user_test_data["contacts"]
130 |     assert result["avatar_url"] == user_test_data["avatar_url"] != ""
131 |     assert result["banner_url"] == user_test_data["banner_url"] != ""
132 |     assert remove_user_icons(clean_html(result["profile"])) == remove_user_icons(clean_html(user_test_data["profile"]))
133 |     assert html_to_bbcode(result["profile"]) == user_test_data["profile_bbcode"]
134 |     assert user_test_data["profile_bbcode"] == html_to_bbcode(bbcode_to_html(user_test_data["profile_bbcode"]))
135 | 
136 | 
137 | def test_parse_submission_page(session: Session, submission_test_data: dict):
138 |     res: Response = session.get(join_url(root, "view", submission_test_data["id"]))
139 |     assert res.ok
140 | 
141 |     page = parse_page(res.text)
142 |     result = parse_submission_page(page)
143 | 
144 |     assert result["id"] == submission_test_data["id"]
145 |     assert result["title"] == submission_test_data["title"]
146 |     assert result["author"].lower() == submission_test_data["author"]["name"].lower()
147 |     assert result["author_icon_url"] != ""
148 |     assert result["date"] == datetime.fromisoformat(submission_test_data["date"]) + dst_us()
149 |     assert result["tags"] == submission_test_data["tags"]
150 |     assert result["category"] == submission_test_data["category"]
151 |     assert result["species"] == submission_test_data["species"]
152 |     assert result["gender"] == submission_test_data["gender"]
153 |     assert result["rating"] == submission_test_data["rating"]
154 |     assert result["views"] >= submission_test_data["stats"]["views"]
155 |     assert result["comment_count"] >= submission_test_data["stats"]["comments"]
156 |     assert result["favorites"] >= submission_test_data["stats"]["favorites"]
157 |     assert result["type"] == submission_test_data["type"]
158 |     assert result["mentions"] == submission_test_data["mentions"]
159 |     assert result["folder"] == submission_test_data["folder"]
160 |     assert [list(f) for f in result["user_folders"]] == submission_test_data["user_folders_tuples"]
161 |     assert result["file_url"] != ""
162 |     assert result["thumbnail_url"] != ""
163 |     assert result["prev"] == submission_test_data["prev"]
164 |     assert result["next"] == submission_test_data["next"]
165 |     assert bool(result["unfav_link"]) == submission_test_data["favorite"]
166 |     assert (("/fav/" in submission_test_data["favorite_toggle_link"]) and bool(result["fav_link"])) or \
167 |            (("/unfav/" in submission_test_data["favorite_toggle_link"]) and bool(result["unfav_link"]))
168 |     assert remove_user_icons(clean_html(result["description"])) == \
169 |            remove_user_icons(clean_html(submission_test_data["description"]))
170 |     assert remove_user_icons(clean_html(result["footer"])) == \
171 |            remove_user_icons(clean_html(submission_test_data["footer"]))
172 |     assert html_to_bbcode(result["description"]) == submission_test_data["description_bbcode"]
173 |     assert html_to_bbcode(result["footer"]) == submission_test_data["footer_bbcode"]
174 |     assert submission_test_data["description_bbcode"] == \
175 |            html_to_bbcode(bbcode_to_html(submission_test_data["description_bbcode"]))
176 |     assert submission_test_data["footer_bbcode"] == \
177 |            html_to_bbcode(bbcode_to_html(submission_test_data["footer_bbcode"]))
178 | 
179 | 
180 | def test_parse_journal_page(session: Session, journal_test_data: dict):
181 |     res: Response = session.get(join_url(root, "journal", journal_test_data["id"]))
182 |     assert res.ok
183 | 
184 |     page = parse_page(res.text)
185 |     result = parse_journal_page(page)
186 | 
187 |     assert result["id"] == journal_test_data["id"]
188 |     assert result["title"] == journal_test_data["title"]
189 |     assert result["user_info"]["name"].lower() == journal_test_data["author"]["name"].lower()
190 |     assert result["user_info"]["join_date"] == \
191 |            datetime.fromisoformat(journal_test_data["author"]["join_date"]) + dst_us()
192 |     assert result["user_info"]["avatar_url"] != ""
193 |     assert result["date"] == datetime.fromisoformat(journal_test_data["date"]) + dst_us()
194 |     assert result["comments"] >= journal_test_data["stats"]["comments"]
195 |     assert result["mentions"] == journal_test_data["mentions"]
196 |     assert remove_user_icons(clean_html(result["content"])) == remove_user_icons(
197 |         clean_html(journal_test_data["content"]))
198 |     assert remove_user_icons(clean_html(result["header"])) == remove_user_icons(
199 |         clean_html(journal_test_data["header"]))
200 |     assert remove_user_icons(clean_html(result["footer"])) == remove_user_icons(
201 |         clean_html(journal_test_data["footer"]))
202 |     assert html_to_bbcode(result["content"]) == journal_test_data["content_bbcode"]
203 |     assert html_to_bbcode(result["header"]) == journal_test_data["header_bbcode"]
204 |     assert html_to_bbcode(result["footer"]) == journal_test_data["footer_bbcode"]
205 |     assert journal_test_data["content"] == html_to_bbcode(bbcode_to_html(journal_test_data["content"]))
206 |     assert journal_test_data["header"] == html_to_bbcode(bbcode_to_html(journal_test_data["header"]))
207 |     assert journal_test_data["footer"] == html_to_bbcode(bbcode_to_html(journal_test_data["footer"]))
208 | 


--------------------------------------------------------------------------------