├── .dockerignore
├── .gitignore
├── CODE_OF_CONDUCT.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE.md
├── Makefile
├── README.md
├── arc-disk-cache
    ├── Cargo.toml
    ├── README.md
    └── src
    │   ├── error.rs
    │   └── lib.rs
├── cargo_config
├── opensuse-proxy-cache
    ├── Cargo.lock
    ├── Cargo.toml
    ├── Dockerfile
    ├── src
    │   ├── auth.rs
    │   ├── cache.rs
    │   ├── constants.rs
    │   ├── main.rs
    │   └── memcache.rs
    └── templates
    │   └── ipxe.menu.html
└── redis-server
    ├── Cargo.toml
    ├── Dockerfile
    └── src
        ├── codec.rs
        ├── main.rs
        └── parser.rs


/.dockerignore:
--------------------------------------------------------------------------------
 1 | .git
 2 | .github
 3 | .gitignore
 4 | .mypy_cache
 5 | .vscode
 6 | *.md
 7 | **/__pycache__/**
 8 | docs
 9 | Makefile
10 | target
11 | */target
12 | test.db
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /insecure
3 | /vendor
4 | 
5 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | ## Our Pledge
 2 | 
 3 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
 4 | 
 5 | ## Our Standards
 6 | 
 7 | Examples of behavior that contributes to creating a positive environment include:
 8 | 
 9 | - Using welcoming and inclusive language
10 | - Being respectful of differing viewpoints and experiences
11 | - Gracefully accepting constructive criticism
12 | - Focusing on what is best for the community
13 | - Showing empathy towards other community members
14 | 
15 | Examples of unacceptable behavior by participants include:
16 | 
17 | - The use of sexualized language or imagery and unwelcome sexual attention or advances
18 | - Trolling, insulting/derogatory comments, and personal or political attacks
19 | - Public or private harassment
20 | - Publishing others’ private information, such as a physical or electronic address, without explicit permission
21 | - Other conduct which could reasonably be considered inappropriate in a professional setting
22 | 
23 | ## Our Responsibilities
24 | 
25 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
26 | 
27 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
28 | Scope
29 | 
30 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
31 | 
32 | ## Enforcement
33 | 
34 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at:
35 | 
36 | * william at blackhats.net.au
37 | * charcol at redhat.com
38 | 
39 | All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
40 | 
41 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project’s leadership.
42 | 
43 | ## Attribution
44 | 
45 | This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
46 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | 
 2 | [workspace]
 3 | resolver = "2"
 4 | 
 5 | members = [
 6 |     "opensuse-proxy-cache",
 7 |     "arc-disk-cache",
 8 |     "redis-server"
 9 | ]
10 | 
11 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | Mozilla Public License Version 2.0
  2 | ==================================
  3 | 
  4 | 1. Definitions
  5 | --------------
  6 | 
  7 | 1.1. "Contributor"
  8 |     means each individual or legal entity that creates, contributes to
  9 |     the creation of, or owns Covered Software.
 10 | 
 11 | 1.2. "Contributor Version"
 12 |     means the combination of the Contributions of others (if any) used
 13 |     by a Contributor and that particular Contributor's Contribution.
 14 | 
 15 | 1.3. "Contribution"
 16 |     means Covered Software of a particular Contributor.
 17 | 
 18 | 1.4. "Covered Software"
 19 |     means Source Code Form to which the initial Contributor has attached
 20 |     the notice in Exhibit A, the Executable Form of such Source Code
 21 |     Form, and Modifications of such Source Code Form, in each case
 22 |     including portions thereof.
 23 | 
 24 | 1.5. "Incompatible With Secondary Licenses"
 25 |     means
 26 | 
 27 |     (a) that the initial Contributor has attached the notice described
 28 |         in Exhibit B to the Covered Software; or
 29 | 
 30 |     (b) that the Covered Software was made available under the terms of
 31 |         version 1.1 or earlier of the License, but not also under the
 32 |         terms of a Secondary License.
 33 | 
 34 | 1.6. "Executable Form"
 35 |     means any form of the work other than Source Code Form.
 36 | 
 37 | 1.7. "Larger Work"
 38 |     means a work that combines Covered Software with other material, in 
 39 |     a separate file or files, that is not Covered Software.
 40 | 
 41 | 1.8. "License"
 42 |     means this document.
 43 | 
 44 | 1.9. "Licensable"
 45 |     means having the right to grant, to the maximum extent possible,
 46 |     whether at the time of the initial grant or subsequently, any and
 47 |     all of the rights conveyed by this License.
 48 | 
 49 | 1.10. "Modifications"
 50 |     means any of the following:
 51 | 
 52 |     (a) any file in Source Code Form that results from an addition to,
 53 |         deletion from, or modification of the contents of Covered
 54 |         Software; or
 55 | 
 56 |     (b) any new file in Source Code Form that contains any Covered
 57 |         Software.
 58 | 
 59 | 1.11. "Patent Claims" of a Contributor
 60 |     means any patent claim(s), including without limitation, method,
 61 |     process, and apparatus claims, in any patent Licensable by such
 62 |     Contributor that would be infringed, but for the grant of the
 63 |     License, by the making, using, selling, offering for sale, having
 64 |     made, import, or transfer of either its Contributions or its
 65 |     Contributor Version.
 66 | 
 67 | 1.12. "Secondary License"
 68 |     means either the GNU General Public License, Version 2.0, the GNU
 69 |     Lesser General Public License, Version 2.1, the GNU Affero General
 70 |     Public License, Version 3.0, or any later versions of those
 71 |     licenses.
 72 | 
 73 | 1.13. "Source Code Form"
 74 |     means the form of the work preferred for making modifications.
 75 | 
 76 | 1.14. "You" (or "Your")
 77 |     means an individual or a legal entity exercising rights under this
 78 |     License. For legal entities, "You" includes any entity that
 79 |     controls, is controlled by, or is under common control with You. For
 80 |     purposes of this definition, "control" means (a) the power, direct
 81 |     or indirect, to cause the direction or management of such entity,
 82 |     whether by contract or otherwise, or (b) ownership of more than
 83 |     fifty percent (50%) of the outstanding shares or beneficial
 84 |     ownership of such entity.
 85 | 
 86 | 2. License Grants and Conditions
 87 | --------------------------------
 88 | 
 89 | 2.1. Grants
 90 | 
 91 | Each Contributor hereby grants You a world-wide, royalty-free,
 92 | non-exclusive license:
 93 | 
 94 | (a) under intellectual property rights (other than patent or trademark)
 95 |     Licensable by such Contributor to use, reproduce, make available,
 96 |     modify, display, perform, distribute, and otherwise exploit its
 97 |     Contributions, either on an unmodified basis, with Modifications, or
 98 |     as part of a Larger Work; and
 99 | 
100 | (b) under Patent Claims of such Contributor to make, use, sell, offer
101 |     for sale, have made, import, and otherwise transfer either its
102 |     Contributions or its Contributor Version.
103 | 
104 | 2.2. Effective Date
105 | 
106 | The licenses granted in Section 2.1 with respect to any Contribution
107 | become effective for each Contribution on the date the Contributor first
108 | distributes such Contribution.
109 | 
110 | 2.3. Limitations on Grant Scope
111 | 
112 | The licenses granted in this Section 2 are the only rights granted under
113 | this License. No additional rights or licenses will be implied from the
114 | distribution or licensing of Covered Software under this License.
115 | Notwithstanding Section 2.1(b) above, no patent license is granted by a
116 | Contributor:
117 | 
118 | (a) for any code that a Contributor has removed from Covered Software;
119 |     or
120 | 
121 | (b) for infringements caused by: (i) Your and any other third party's
122 |     modifications of Covered Software, or (ii) the combination of its
123 |     Contributions with other software (except as part of its Contributor
124 |     Version); or
125 | 
126 | (c) under Patent Claims infringed by Covered Software in the absence of
127 |     its Contributions.
128 | 
129 | This License does not grant any rights in the trademarks, service marks,
130 | or logos of any Contributor (except as may be necessary to comply with
131 | the notice requirements in Section 3.4).
132 | 
133 | 2.4. Subsequent Licenses
134 | 
135 | No Contributor makes additional grants as a result of Your choice to
136 | distribute the Covered Software under a subsequent version of this
137 | License (see Section 10.2) or under the terms of a Secondary License (if
138 | permitted under the terms of Section 3.3).
139 | 
140 | 2.5. Representation
141 | 
142 | Each Contributor represents that the Contributor believes its
143 | Contributions are its original creation(s) or it has sufficient rights
144 | to grant the rights to its Contributions conveyed by this License.
145 | 
146 | 2.6. Fair Use
147 | 
148 | This License is not intended to limit any rights You have under
149 | applicable copyright doctrines of fair use, fair dealing, or other
150 | equivalents.
151 | 
152 | 2.7. Conditions
153 | 
154 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155 | in Section 2.1.
156 | 
157 | 3. Responsibilities
158 | -------------------
159 | 
160 | 3.1. Distribution of Source Form
161 | 
162 | All distribution of Covered Software in Source Code Form, including any
163 | Modifications that You create or to which You contribute, must be under
164 | the terms of this License. You must inform recipients that the Source
165 | Code Form of the Covered Software is governed by the terms of this
166 | License, and how they can obtain a copy of this License. You may not
167 | attempt to alter or restrict the recipients' rights in the Source Code
168 | Form.
169 | 
170 | 3.2. Distribution of Executable Form
171 | 
172 | If You distribute Covered Software in Executable Form then:
173 | 
174 | (a) such Covered Software must also be made available in Source Code
175 |     Form, as described in Section 3.1, and You must inform recipients of
176 |     the Executable Form how they can obtain a copy of such Source Code
177 |     Form by reasonable means in a timely manner, at a charge no more
178 |     than the cost of distribution to the recipient; and
179 | 
180 | (b) You may distribute such Executable Form under the terms of this
181 |     License, or sublicense it under different terms, provided that the
182 |     license for the Executable Form does not attempt to limit or alter
183 |     the recipients' rights in the Source Code Form under this License.
184 | 
185 | 3.3. Distribution of a Larger Work
186 | 
187 | You may create and distribute a Larger Work under terms of Your choice,
188 | provided that You also comply with the requirements of this License for
189 | the Covered Software. If the Larger Work is a combination of Covered
190 | Software with a work governed by one or more Secondary Licenses, and the
191 | Covered Software is not Incompatible With Secondary Licenses, this
192 | License permits You to additionally distribute such Covered Software
193 | under the terms of such Secondary License(s), so that the recipient of
194 | the Larger Work may, at their option, further distribute the Covered
195 | Software under the terms of either this License or such Secondary
196 | License(s).
197 | 
198 | 3.4. Notices
199 | 
200 | You may not remove or alter the substance of any license notices
201 | (including copyright notices, patent notices, disclaimers of warranty,
202 | or limitations of liability) contained within the Source Code Form of
203 | the Covered Software, except that You may alter any license notices to
204 | the extent required to remedy known factual inaccuracies.
205 | 
206 | 3.5. Application of Additional Terms
207 | 
208 | You may choose to offer, and to charge a fee for, warranty, support,
209 | indemnity or liability obligations to one or more recipients of Covered
210 | Software. However, You may do so only on Your own behalf, and not on
211 | behalf of any Contributor. You must make it absolutely clear that any
212 | such warranty, support, indemnity, or liability obligation is offered by
213 | You alone, and You hereby agree to indemnify every Contributor for any
214 | liability incurred by such Contributor as a result of warranty, support,
215 | indemnity or liability terms You offer. You may include additional
216 | disclaimers of warranty and limitations of liability specific to any
217 | jurisdiction.
218 | 
219 | 4. Inability to Comply Due to Statute or Regulation
220 | ---------------------------------------------------
221 | 
222 | If it is impossible for You to comply with any of the terms of this
223 | License with respect to some or all of the Covered Software due to
224 | statute, judicial order, or regulation then You must: (a) comply with
225 | the terms of this License to the maximum extent possible; and (b)
226 | describe the limitations and the code they affect. Such description must
227 | be placed in a text file included with all distributions of the Covered
228 | Software under this License. Except to the extent prohibited by statute
229 | or regulation, such description must be sufficiently detailed for a
230 | recipient of ordinary skill to be able to understand it.
231 | 
232 | 5. Termination
233 | --------------
234 | 
235 | 5.1. The rights granted under this License will terminate automatically
236 | if You fail to comply with any of its terms. However, if You become
237 | compliant, then the rights granted under this License from a particular
238 | Contributor are reinstated (a) provisionally, unless and until such
239 | Contributor explicitly and finally terminates Your grants, and (b) on an
240 | ongoing basis, if such Contributor fails to notify You of the
241 | non-compliance by some reasonable means prior to 60 days after You have
242 | come back into compliance. Moreover, Your grants from a particular
243 | Contributor are reinstated on an ongoing basis if such Contributor
244 | notifies You of the non-compliance by some reasonable means, this is the
245 | first time You have received notice of non-compliance with this License
246 | from such Contributor, and You become compliant prior to 30 days after
247 | Your receipt of the notice.
248 | 
249 | 5.2. If You initiate litigation against any entity by asserting a patent
250 | infringement claim (excluding declaratory judgment actions,
251 | counter-claims, and cross-claims) alleging that a Contributor Version
252 | directly or indirectly infringes any patent, then the rights granted to
253 | You by any and all Contributors for the Covered Software under Section
254 | 2.1 of this License shall terminate.
255 | 
256 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257 | end user license agreements (excluding distributors and resellers) which
258 | have been validly granted by You or Your distributors under this License
259 | prior to termination shall survive termination.
260 | 
261 | ************************************************************************
262 | *                                                                      *
263 | *  6. Disclaimer of Warranty                                           *
264 | *  -------------------------                                           *
265 | *                                                                      *
266 | *  Covered Software is provided under this License on an "as is"       *
267 | *  basis, without warranty of any kind, either expressed, implied, or  *
268 | *  statutory, including, without limitation, warranties that the       *
269 | *  Covered Software is free of defects, merchantable, fit for a        *
270 | *  particular purpose or non-infringing. The entire risk as to the     *
271 | *  quality and performance of the Covered Software is with You.        *
272 | *  Should any Covered Software prove defective in any respect, You     *
273 | *  (not any Contributor) assume the cost of any necessary servicing,   *
274 | *  repair, or correction. This disclaimer of warranty constitutes an   *
275 | *  essential part of this License. No use of any Covered Software is   *
276 | *  authorized under this License except under this disclaimer.         *
277 | *                                                                      *
278 | ************************************************************************
279 | 
280 | ************************************************************************
281 | *                                                                      *
282 | *  7. Limitation of Liability                                          *
283 | *  --------------------------                                          *
284 | *                                                                      *
285 | *  Under no circumstances and under no legal theory, whether tort      *
286 | *  (including negligence), contract, or otherwise, shall any           *
287 | *  Contributor, or anyone who distributes Covered Software as          *
288 | *  permitted above, be liable to You for any direct, indirect,         *
289 | *  special, incidental, or consequential damages of any character      *
290 | *  including, without limitation, damages for lost profits, loss of    *
291 | *  goodwill, work stoppage, computer failure or malfunction, or any    *
292 | *  and all other commercial damages or losses, even if such party      *
293 | *  shall have been informed of the possibility of such damages. This   *
294 | *  limitation of liability shall not apply to liability for death or   *
295 | *  personal injury resulting from such party's negligence to the       *
296 | *  extent applicable law prohibits such limitation. Some               *
297 | *  jurisdictions do not allow the exclusion or limitation of           *
298 | *  incidental or consequential damages, so this exclusion and          *
299 | *  limitation may not apply to You.                                    *
300 | *                                                                      *
301 | ************************************************************************
302 | 
303 | 8. Litigation
304 | -------------
305 | 
306 | Any litigation relating to this License may be brought only in the
307 | courts of a jurisdiction where the defendant maintains its principal
308 | place of business and such litigation shall be governed by laws of that
309 | jurisdiction, without reference to its conflict-of-law provisions.
310 | Nothing in this Section shall prevent a party's ability to bring
311 | cross-claims or counter-claims.
312 | 
313 | 9. Miscellaneous
314 | ----------------
315 | 
316 | This License represents the complete agreement concerning the subject
317 | matter hereof. If any provision of this License is held to be
318 | unenforceable, such provision shall be reformed only to the extent
319 | necessary to make it enforceable. Any law or regulation which provides
320 | that the language of a contract shall be construed against the drafter
321 | shall not be used to construe this License against a Contributor.
322 | 
323 | 10. Versions of the License
324 | ---------------------------
325 | 
326 | 10.1. New Versions
327 | 
328 | Mozilla Foundation is the license steward. Except as provided in Section
329 | 10.3, no one other than the license steward has the right to modify or
330 | publish new versions of this License. Each version will be given a
331 | distinguishing version number.
332 | 
333 | 10.2. Effect of New Versions
334 | 
335 | You may distribute the Covered Software under the terms of the version
336 | of the License under which You originally received the Covered Software,
337 | or under the terms of any subsequent version published by the license
338 | steward.
339 | 
340 | 10.3. Modified Versions
341 | 
342 | If you create software not governed by this License, and you want to
343 | create a new license for such software, you may create and use a
344 | modified version of this License if you rename the license and remove
345 | any references to the name of the license steward (except to note that
346 | such modified license differs from this License).
347 | 
348 | 10.4. Distributing Source Code Form that is Incompatible With Secondary
349 | Licenses
350 | 
351 | If You choose to distribute Source Code Form that is Incompatible With
352 | Secondary Licenses under the terms of this version of the License, the
353 | notice described in Exhibit B of this License must be attached.
354 | 
355 | Exhibit A - Source Code Form License Notice
356 | -------------------------------------------
357 | 
358 |   This Source Code Form is subject to the terms of the Mozilla Public
359 |   License, v. 2.0. If a copy of the MPL was not distributed with this
360 |   file, You can obtain one at http://mozilla.org/MPL/2.0/.
361 | 
362 | If it is not possible or desirable to put the notice in a particular
363 | file, then You may include the notice in a location (such as a LICENSE
364 | file in a relevant directory) where a recipient would be likely to look
365 | for such a notice.
366 | 
367 | You may add additional accurate notices of copyright ownership.
368 | 
369 | Exhibit B - "Incompatible With Secondary Licenses" Notice
370 | ---------------------------------------------------------
371 | 
372 |   This Source Code Form is "Incompatible With Secondary Licenses", as
373 |   defined by the Mozilla Public License, v. 2.0.
374 | 
375 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean
 2 | 
 3 | vendor:
 4 | 	cargo vendor 1> ./cargo_config
 5 | 
 6 | proxy: vendor
 7 | 	docker buildx build --pull --push --platform "linux/amd64,linux/arm64" \
 8 | 		-f ./opensuse-proxy-cache/Dockerfile \
 9 | 		-t firstyear/opensuse_proxy_cache:latest .
10 | 
11 | redis: vendor
12 | 	docker buildx build --pull --push --platform "linux/amd64" \
13 | 		-f ./redis-server/Dockerfile \
14 | 		-t firstyear/redis-server:latest .
15 | 
16 | clean:
17 | 	rm -rf ./vendor ./cargo_config
18 | 
19 | all: proxy redis
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## An OpenSUSE mirror aware RPM caching proxy
  2 | 
  3 | This is a small service that allows caching RPM's, Metadata, disk images and more from
  4 | download.opensuse.org and it's network of mirrors. This allows faster content refresh,
  5 | lower latency, and better use of bandwidth especially if you have multiple systems. In
  6 | some early tests (see technical details section) this has been shown to reduce zypper
  7 | metadata refresh times by 75% on a hot cache, and 25% on a warm cache. Additionally
  8 | repeat installs of packages are significantly faster, with tests showing this proxy is
  9 | able to provide data at 850 MegaBytes Per Second. Generally it is limited by your Disk and Network
 10 | performance.
 11 | 
 12 | Effectively this lets you "host your own mirror" at a fraction of the resources of a traditional
 13 | rsync based mirror system. If you have more than one OpenSUSE machine on your network, this will
 14 | help make all your downloads and installs much faster!
 15 | 
 16 | ### Usage
 17 | 
 18 | #### Container
 19 | 
 20 |     docker run -p 8080:8080 -v /your/storage/:/tmp/osuse_cache/ -u X:X firstyear/opensuse_proxy_cache:latest
 21 | 
 22 | Docker containers are configured through environment variables. These variables affect the image:
 23 | 
 24 | * `CACHE_LARGE_OBJECTS` - Should we cache large objects like ISO/vm images/boot images?
 25 | * `WONDER_GUARD` - If set to true, will enable a bloom filter that avoids caching of one-hit-wonder items to prevent disk churn
 26 | * `CACHE_SIZE` - Disk size for cache content in bytes. Defaults to 16GiB.
 27 | * `CACHE_PATH` - Path where cache content should be stored. Defaults to `/tmp/osuse_cache`
 28 | * `BIND_ADDRESS` - Address to listen to. Defaults to `[::]:8080`
 29 | * `VERBOSE` - Enable verbose logging.
 30 | * `TLS_BIND_ADDRESS` - Address to listen to for https. Defaults off.
 31 | * `TLS_PEM_KEY` - Path to Key in PEM format.
 32 | * `TLS_PEM_CHAIN` - Path to Ca Chain in PEM format.
 33 | * `MIRROR_CHAIN` - url of an upstream mirror you would like to use directly (may be another opensuse-proxy-cache instance)
 34 | * `BOOT_SERVICES` - enable a read-only tftp server that contains ipxe bootroms.
 35 | 
 36 | #### From Source (advanced)
 37 | 
 38 |     cargo run -- --help
 39 | 
 40 | ### How To's
 41 | 
 42 | * [opensuse proxy cache on TrueNas](https://sfalken.tech/posts/2024-03-07-docker-container-truenas-scale/)
 43 | 
 44 | ### Change Your System Repositories
 45 | 
 46 | For your systems to use this proxy, they need to be configured to send their traffic thorugh this
 47 | cache. The following can update your repository locations. Change IPADDRESS to your cache's hostname
 48 | or IP.
 49 | 
 50 |     sed -i -E 's/https?:\/\/download.opensuse.org/http:\/\/IPADDRESS:8080/g' /etc/zypp/repos.d/*.repo
 51 | 
 52 | HINT: This also works with obs:// repos :)
 53 | 
 54 | ### Boot From IPXE
 55 | 
 56 | It is possible to boot from this mirror allowing system recovery or install.
 57 | 
 58 | > ⚠️  You must ensure your upstream mirrors are served by HTTPS and are trustworthy as IPXE can not
 59 | > trivially validate boot image signatures.
 60 | 
 61 | Your DHCP server must be capable of serving different boot images based on client tags. Depending
 62 | on the client type, you need to provide different files and values to dhcpd.
 63 | 
 64 | * MBR PXE
 65 | 
 66 |     next-server: proxy-ip
 67 |     filename: undionly.kpxe
 68 | 
 69 | * EFI PXE
 70 | 
 71 |     next-server: proxy-ip
 72 |     filename: ipxe-x86_64.efi
 73 | 
 74 | * EFI HTTP PXE
 75 | 
 76 |     next-server: unset
 77 |     filename: http://proxy-ip/ipxe/ipxe-x86_64.efi
 78 | 
 79 | 
 80 | An example dnsmasq.conf supporting all three device classes is
 81 | 
 82 |     # Trigger PXE Boot support on HTTP Boot client request
 83 |     dhcp-pxe-vendor=PXEClient,HTTPClient
 84 | 
 85 |     # Set tag ipxe if 175 is set.
 86 |     dhcp-match=set:ipxe,175
 87 |     # Set tag if client is http efi
 88 |     dhcp-match=set:http-efi-x64,option:client-arch,16
 89 |     # Or if it is pxe efi native
 90 |     dhcp-match=set:efi-x64,option:client-arch,7
 91 |     # Or finally, legacy bios
 92 |     dhcp-match=set:bios-x86,option:client-arch,0
 93 | 
 94 |     # Menu for ipxe clients, this is served by http.
 95 |     dhcp-boot=tag:ipxe,http://<proxy ip>:8080/menu.ipxe
 96 | 
 97 |     # This provides a boot-option in EFI boot menus
 98 |     pxe-service=tag:http-efi-x64,x86-64_EFI,"Network Boot"
 99 | 
100 |     # Specify bootfile-name option via PXE Boot setting
101 |     dhcp-boot=tag:http-efi-x64,http://<proxy ip>:8080/ipxe/ipxe-x86_64.efi
102 | 
103 |     # Force required vendor class in the response, even if not requested
104 |     dhcp-option-force=tag:http-efi-x64,option:vendor-class,HTTPClient
105 | 
106 |     # ipxe via tftp for everyone else. Requires BOOT_SERVICES to be enabled.
107 |     dhcp-boot=tag:!ipxe,tag:efi-x64,ipxe-x86_64.efi,<proxy ip>,<proxy ip>
108 |     dhcp-boot=tag:!ipxe,tag:bios-x86,undionly.kpxe,<proxy ip>,<proxy ip>
109 | 
110 | 
111 | ### Known Issues
112 | 
113 | #### Unknown Content Types
114 | 
115 | Some types of content are "not classified" yet, meaning we do not cache them as we don't know what policy
116 | to use. If you see these in your logs please report them! The log lines appear as:
117 | 
118 |     opensuse_proxy_cache::cache ⚠️  Classification::Unknown - /demo_unknown.content
119 | 
120 | This information will help us adjust the content classifier and what policies we can apply to cached
121 | items.
122 | 
123 | #### Disk Usage May Exceed Configured Capacity
124 | 
125 | Due to the way the cache works in this service, if there is some content currently in the miss
126 | process, it is not accounted for in max capacity which may cause disk usage to exceed the amount
127 | you have allocated. It's a safe rule to assume that you may have 15% above capacity as a buffer
128 | "just in case" of this occurance. Also note that when the cache *does* evict items, it will not
129 | remove them from disk until all active downloads of that item are complete. This again may cause
130 | disk usage to appear greater than capacity.
131 | 


--------------------------------------------------------------------------------
/arc-disk-cache/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "arc-disk-cache"
 3 | version = "0.1.1"
 4 | edition = "2021"
 5 | description = "Concurrently Readable Disk Cache"
 6 | repository = "https://github.com/Firstyear/opensuse-proxy-cache"
 7 | readme = "README.md"
 8 | keywords = ["concurrency", "lru", "mvcc", "copy-on-write", "transactional-memory"]
 9 | categories = ["data-structures", "memory-management", "caching", "concurrency"]
10 | license = "MPL-2.0"
11 | 
12 | [dependencies]
13 | concread = "^0.5.0"
14 | thiserror = "1.0"
15 | tempfile = "3.10"
16 | time = { version = "0.3", features = ["serde"]}
17 | tracing = "0.1"
18 | serde = { version = "1.0", features = ["derive"]}
19 | serde_json = "1.0"
20 | crc32c = "0.6"
21 | rand = "^0.8.5"
22 | hex = "0.4"
23 | 
24 | sha2 = "0.10"
25 | 
26 | [dev-dependencies]
27 | tracing = { version = "0.1", features = ["attributes"] }
28 | tracing-subscriber = { version = "0.3", features = ["env-filter"] }
29 | 
30 | 


--------------------------------------------------------------------------------
/arc-disk-cache/README.md:
--------------------------------------------------------------------------------
1 | # A Concurrently Readable Disk Cache
2 | 
3 | WIP
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/arc-disk-cache/src/error.rs:
--------------------------------------------------------------------------------
1 | use thiserror::Error;
2 | 
3 | #[derive(Error, Debug)]
4 | pub enum CacheError {
5 |     #[error("unknown")]
6 |     Unknown,
7 | }
8 | 


--------------------------------------------------------------------------------
/arc-disk-cache/src/lib.rs:
--------------------------------------------------------------------------------
  1 | #[macro_use]
  2 | extern crate tracing;
  3 | 
  4 | use concread::arcache::stats::ARCacheWriteStat;
  5 | use concread::arcache::{ARCache, ARCacheBuilder};
  6 | use concread::CowCell;
  7 | use serde::de::DeserializeOwned;
  8 | use serde::{Deserialize, Serialize};
  9 | 
 10 | use tempfile::NamedTempFile;
 11 | 
 12 | use std::collections::BTreeSet;
 13 | 
 14 | use std::borrow::Borrow;
 15 | use std::fmt::Debug;
 16 | use std::fs::{self, File};
 17 | use std::hash::Hash;
 18 | use std::io::{self, BufRead, BufReader, BufWriter, Seek, Write};
 19 | use std::num::NonZeroUsize;
 20 | use std::path::{Path, PathBuf};
 21 | use std::sync::atomic::{AtomicBool, Ordering};
 22 | use std::sync::Arc;
 23 | 
 24 | use rand::prelude::*;
 25 | 
 26 | use sha2::{Digest, Sha256};
 27 | 
 28 | static CHECK_INLINE: usize = 536870912;
 29 | 
 30 | pub mod prelude {
 31 |     pub use tempfile::NamedTempFile;
 32 | }
 33 | 
 34 | pub mod error;
 35 | 
 36 | #[derive(Clone, Debug, Default)]
 37 | pub struct CacheStats {
 38 |     pub ops: u32,
 39 |     pub hits: u32,
 40 |     pub ratio: f64,
 41 | 
 42 |     // As of last write.
 43 |     pub p_weight: u64,
 44 |     pub freq: u64,
 45 |     pub recent: u64,
 46 |     pub shared_max: u64,
 47 |     pub all_seen_keys: u64,
 48 | }
 49 | 
 50 | impl CacheStats {
 51 |     fn update(&mut self, tstats: TraceStat) {
 52 |         self.p_weight = tstats.p_weight;
 53 |         self.shared_max = tstats.shared_max;
 54 |         self.freq = tstats.freq;
 55 |         self.recent = tstats.recent;
 56 |         self.all_seen_keys = tstats.all_seen_keys;
 57 |     }
 58 | }
 59 | 
 60 | #[derive(Debug, Default)]
 61 | pub struct TraceStat {
 62 |     /// The current cache weight between recent and frequent.
 63 |     pub p_weight: u64,
 64 | 
 65 |     /// The maximum number of items in the shared cache.
 66 |     pub shared_max: u64,
 67 |     /// The number of items in the frequent set at this point in time.
 68 |     pub freq: u64,
 69 |     /// The number of items in the recent set at this point in time.
 70 |     pub recent: u64,
 71 | 
 72 |     /// The number of total keys seen through the cache's lifetime.
 73 |     pub all_seen_keys: u64,
 74 | }
 75 | 
 76 | impl<K> ARCacheWriteStat<K> for TraceStat
 77 | where
 78 |     K: Debug,
 79 | {
 80 |     fn include(&mut self, k: &K) {
 81 |         tracing::trace!(?k, "arc-disk include");
 82 |     }
 83 | 
 84 |     fn include_haunted(&mut self, k: &K) {
 85 |         tracing::warn!(?k, "arc-disk include_haunted");
 86 |     }
 87 | 
 88 |     fn modify(&mut self, k: &K) {
 89 |         tracing::debug!(?k, "arc-disk modify");
 90 |     }
 91 | 
 92 |     fn ghost_frequent_revive(&mut self, k: &K) {
 93 |         tracing::warn!(?k, "arc-disk ghost_frequent_revive");
 94 |     }
 95 | 
 96 |     fn ghost_recent_revive(&mut self, k: &K) {
 97 |         tracing::warn!(?k, "arc-disk ghost_recent_revive");
 98 |     }
 99 | 
100 |     fn evict_from_recent(&mut self, k: &K) {
101 |         tracing::debug!(?k, "arc-disk evict_from_recent");
102 |     }
103 | 
104 |     fn evict_from_frequent(&mut self, k: &K) {
105 |         tracing::debug!(?k, "arc-disk evict_from_frequent");
106 |     }
107 | 
108 |     fn p_weight(&mut self, p: u64) {
109 |         self.p_weight = p;
110 |     }
111 | 
112 |     fn shared_max(&mut self, i: u64) {
113 |         self.shared_max = i;
114 |     }
115 | 
116 |     fn freq(&mut self, i: u64) {
117 |         self.freq = i;
118 |     }
119 | 
120 |     fn recent(&mut self, i: u64) {
121 |         self.recent = i;
122 |     }
123 | 
124 |     fn all_seen_keys(&mut self, i: u64) {
125 |         self.all_seen_keys = i;
126 |     }
127 | }
128 | 
129 | #[derive(Debug, Serialize, Deserialize)]
130 | pub struct CacheObjMeta<K, D> {
131 |     pub key: K,
132 |     path: PathBuf,
133 |     pub crc: u32,
134 |     pub userdata: D,
135 | }
136 | 
137 | #[derive(Clone, Debug)]
138 | pub struct CacheObj<K, D>
139 | where
140 |     K: Serialize
141 |         + DeserializeOwned
142 |         + AsRef<[u8]>
143 |         + Hash
144 |         + Eq
145 |         + Ord
146 |         + Clone
147 |         + Debug
148 |         + Sync
149 |         + Send
150 |         + 'static,
151 |     D: Serialize + DeserializeOwned + Clone + Debug + Sync + Send + 'static,
152 | {
153 |     pub key: K,
154 |     pub fhandle: Arc<FileHandle>,
155 |     pub userdata: D,
156 | }
157 | 
158 | #[derive(Clone, Debug)]
159 | pub struct FileHandle {
160 |     pub meta_path: PathBuf,
161 |     pub path: PathBuf,
162 |     pub amt: usize,
163 |     pub crc: u32,
164 |     running: Arc<AtomicBool>,
165 | }
166 | 
167 | impl Drop for FileHandle {
168 |     fn drop(&mut self) {
169 |         if self.running.load(Ordering::Acquire) {
170 |             info!("🗑  remove fhandle -> {:?}", self.path);
171 |             let _ = std::fs::remove_file(&self.meta_path);
172 |             let _ = std::fs::remove_file(&self.path);
173 |         }
174 |     }
175 | }
176 | 
177 | impl FileHandle {
178 |     pub fn reopen(&self) -> io::Result<File> {
179 |         File::open(&self.path)
180 |     }
181 | }
182 | 
183 | #[instrument(level = "trace")]
184 | fn crc32c_len(file: &mut File) -> io::Result<u32> {
185 |     file.seek(std::io::SeekFrom::Start(0)).inspect_err(|err| {
186 |         error!(?err, "Unable to seek tempfile");
187 |     })?;
188 | 
189 |     /*
190 |     let amt = file.metadata().map(|m| m.len() as usize).map_err(|e| {
191 |         error!("Unable to access metadata -> {:?}", e);
192 |     })?;
193 |     */
194 | 
195 |     let mut buf_file = BufReader::with_capacity(8192, file);
196 |     let mut crc = 0;
197 |     loop {
198 |         let buffer = buf_file
199 |             .fill_buf()
200 |             .inspect_err(|err| error!(?err, "crc32c_len error"))?;
201 | 
202 |         let length = buffer.len();
203 |         if length == 0 {
204 |             // We are done!
205 |             break;
206 |         } else {
207 |             // we have content, proceed.
208 |             crc = crc32c::crc32c_append(crc, buffer);
209 |             buf_file.consume(length);
210 |         }
211 |     }
212 |     debug!("crc32c is: {:x}", crc);
213 | 
214 |     Ok(crc)
215 | }
216 | 
217 | #[derive(Clone)]
218 | pub struct ArcDiskCache<K, D>
219 | where
220 |     K: Serialize
221 |         + DeserializeOwned
222 |         + AsRef<[u8]>
223 |         + Hash
224 |         + Eq
225 |         + Ord
226 |         + Clone
227 |         + Debug
228 |         + Sync
229 |         + Send
230 |         + 'static,
231 |     D: Serialize + DeserializeOwned + Clone + Debug + Sync + Send + 'static,
232 | {
233 |     cache: Arc<ARCache<K, CacheObj<K, D>>>,
234 |     stats: Arc<CowCell<CacheStats>>,
235 |     running: Arc<AtomicBool>,
236 |     durable_fs: bool,
237 |     u8_to_path: Vec<PathBuf>,
238 |     temp_path: PathBuf,
239 |     content_dir: PathBuf,
240 | }
241 | 
242 | impl<K, D> Drop for ArcDiskCache<K, D>
243 | where
244 |     K: Serialize
245 |         + DeserializeOwned
246 |         + AsRef<[u8]>
247 |         + Hash
248 |         + Eq
249 |         + Ord
250 |         + Clone
251 |         + Debug
252 |         + Sync
253 |         + Send
254 |         + 'static,
255 |     D: Serialize + DeserializeOwned + Clone + Debug + Sync + Send + 'static,
256 | {
257 |     fn drop(&mut self) {
258 |         trace!("ArcDiskCache - setting running to false");
259 |         self.running.store(false, Ordering::Release);
260 |     }
261 | }
262 | 
263 | impl<K, D> ArcDiskCache<K, D>
264 | where
265 |     K: Serialize
266 |         + DeserializeOwned
267 |         + AsRef<[u8]>
268 |         + Hash
269 |         + Eq
270 |         + Ord
271 |         + Clone
272 |         + Debug
273 |         + Sync
274 |         + Send
275 |         + 'static,
276 |     D: Serialize + DeserializeOwned + Clone + Debug + Sync + Send + 'static,
277 | {
278 |     pub fn new<P>(capacity: usize, content_dir: P, durable_fs: bool) -> io::Result<Self>
279 |     where
280 |         P: AsRef<Path>,
281 |     {
282 |         let content_dir: &Path = content_dir.as_ref();
283 | 
284 |         info!("capacity: {}  content_dir: {:?}", capacity, content_dir);
285 | 
286 |         let cache = Arc::new(
287 |             ARCacheBuilder::new()
288 |                 .set_size(capacity, 0)
289 |                 .set_watermark(0)
290 |                 .set_reader_quiesce(false)
291 |                 .build()
292 |                 .ok_or_else(|| {
293 |                     io::Error::new(
294 |                         io::ErrorKind::Other,
295 |                         "Failed to build Arc Disk Cache - Invalid Parameters",
296 |                     )
297 |                 })?,
298 |         );
299 | 
300 |         let running = Arc::new(AtomicBool::new(true));
301 |         // Clean up the legacy content structure.
302 |         let entries = std::fs::read_dir(content_dir)?
303 |             .filter_map(|dir_ent| dir_ent.ok().map(|d| d.path()))
304 |             .filter(|dir_ent| !dir_ent.is_dir())
305 |             .collect::<Vec<_>>();
306 | 
307 |         let garbage_path = content_dir.join("garbage");
308 | 
309 |         if !garbage_path.exists() {
310 |             let _ = fs::create_dir(&garbage_path);
311 |         }
312 | 
313 |         let temp_path = content_dir.join("tmp");
314 | 
315 |         if !temp_path.exists() {
316 |             let _ = fs::create_dir(&temp_path);
317 |         }
318 | 
319 |         debug!("Start cleanup");
320 | 
321 |         for dir_ent in entries {
322 |             if let Some(fname) = dir_ent.file_name() {
323 |                 let dir_ent_gbg = garbage_path.join(fname);
324 |                 info!("{:?}", dir_ent_gbg);
325 |                 if !dir_ent_gbg.exists() {
326 |                     fs::rename(dir_ent, dir_ent_gbg).inspect_err(|err| error!(?err))?
327 |                 } else {
328 |                     warn!("Unable to cleanup {:?}", dir_ent_gbg);
329 |                 }
330 |             }
331 |         }
332 | 
333 |         debug!("start new content dirs");
334 | 
335 |         // Make a map for the u8 -> hex str.
336 |         let u8_to_path: Vec<_> = (0..=u8::MAX)
337 |             .map(|i| {
338 |                 let h = hex::encode([i]);
339 |                 content_dir.join(h)
340 |             })
341 |             .collect();
342 | 
343 |         for i in 0..=u8::MAX {
344 |             let c_path = &u8_to_path[i as usize];
345 |             trace!("content path {:?}", c_path);
346 |             if !c_path.exists() {
347 |                 fs::create_dir(c_path)?;
348 |             }
349 |         }
350 | 
351 |         // Now for everything in content dir, look if we have valid metadata
352 |         // and everything that isn't metadata.
353 |         let mut entries = Vec::with_capacity(u8::MAX as usize);
354 |         for i in 0..=u8::MAX {
355 |             let c_path = &u8_to_path[i as usize];
356 |             let read_dir = std::fs::read_dir(c_path)?;
357 | 
358 |             for dir_ent in read_dir {
359 |                 let de = dir_ent?;
360 |                 entries.push(de.path())
361 |             }
362 |         }
363 | 
364 |         entries.sort();
365 | 
366 |         debug!(?entries);
367 | 
368 |         let (meta, files): (Vec<_>, Vec<_>) = entries
369 |             .into_iter()
370 |             .partition(|p| p.extension() == Some(std::ffi::OsStr::new("meta")));
371 | 
372 |         let meta_len = meta.len();
373 |         info!("Will process {} metadata", meta_len);
374 | 
375 |         // Now we read each metadata in.
376 |         let meta: Vec<(PathBuf, CacheObjMeta<K, D>)> = meta
377 |             .into_iter()
378 |             .enumerate()
379 |             .filter_map(|(i, p)| {
380 |                 if i % 1000 == 0 {
381 |                     info!("{} of {}", i, meta_len);
382 |                 }
383 |                 trace!(?p, "meta read");
384 |                 File::open(&p)
385 |                     .ok()
386 |                     .map(BufReader::new)
387 |                     .and_then(|rdr| serde_json::from_reader(rdr).ok())
388 |                     .map(|m| (p.to_path_buf(), m))
389 |             })
390 |             .collect();
391 | 
392 |         let meta: Vec<CacheObj<K, D>> = meta
393 |             .into_iter()
394 |             .enumerate()
395 |             .filter_map(|(i, (meta_path, m))| {
396 |                 if i % 1000 == 0 {
397 |                     info!("{} of {}", i, meta_len);
398 |                 }
399 |                 let CacheObjMeta {
400 |                     key,
401 |                     path,
402 |                     crc,
403 |                     userdata,
404 |                 } = m;
405 | 
406 |                 if !path.exists() {
407 |                     return None;
408 |                 }
409 | 
410 |                 let mut file = File::open(&path).ok()?;
411 | 
412 |                 let amt = match file.metadata().map(|m| m.len() as usize) {
413 |                     Ok(a) => a,
414 |                     Err(e) => {
415 |                         error!("Unable to access metadata -> {:?}", e);
416 |                         return None;
417 |                     }
418 |                 };
419 | 
420 |                 if !durable_fs && amt >= CHECK_INLINE {
421 |                     // Check large files on startup ONLY
422 |                     let crc_ck = crc32c_len(&mut file).ok()?;
423 |                     if crc_ck != crc {
424 |                         warn!("file potentially corrupted - {:?}", meta_path);
425 |                         return None;
426 |                     }
427 |                 }
428 | 
429 |                 Some(CacheObj {
430 |                     key,
431 |                     userdata,
432 |                     fhandle: Arc::new(FileHandle {
433 |                         meta_path,
434 |                         path,
435 |                         amt,
436 |                         crc,
437 |                         running: running.clone(),
438 |                     }),
439 |                 })
440 |             })
441 |             .collect();
442 | 
443 |         info!("Found {:?} existing metadata", meta.len());
444 | 
445 |         // Now we prune any files that ARENT in our valid cache meta set.
446 |         let mut files: BTreeSet<_> = files.into_iter().collect();
447 |         meta.iter().for_each(|co| {
448 |             files.remove(&co.fhandle.path);
449 |         });
450 | 
451 |         files.iter().for_each(|p| {
452 |             trace!("🗑  -> {:?}", p);
453 |             let _ = std::fs::remove_file(p);
454 |         });
455 | 
456 |         // Finally setup the cache.
457 |         let mut wrtxn = cache.write();
458 |         meta.into_iter().for_each(|co| {
459 |             let key = co.key.clone();
460 |             let amt = NonZeroUsize::new(co.fhandle.amt)
461 |                 .unwrap_or(unsafe { NonZeroUsize::new_unchecked(1) });
462 |             wrtxn.insert_sized(key, co, amt);
463 |         });
464 |         wrtxn.commit();
465 | 
466 |         let stats = Arc::new(CowCell::new(CacheStats::default()));
467 | 
468 |         debug!("ArcDiskCache Ready!");
469 | 
470 |         Ok(ArcDiskCache {
471 |             cache,
472 |             running,
473 |             durable_fs,
474 |             stats,
475 |             u8_to_path,
476 |             temp_path,
477 |             // Only used for metadata.
478 |             content_dir: content_dir.to_owned(),
479 |         })
480 |     }
481 | 
482 |     pub fn get<Q>(&self, q: &Q) -> Option<CacheObj<K, D>>
483 |     where
484 |         K: Borrow<Q>,
485 |         Q: Hash + Eq + Ord + ?Sized,
486 |     {
487 |         let mut rtxn = self.cache.read();
488 |         let maybe_obj = rtxn
489 |             .get(q)
490 |             .and_then(|obj| {
491 |                 let mut file = File::open(&obj.fhandle.path).ok()?;
492 | 
493 |                 let amt = file
494 |                     .metadata()
495 |                     .map(|m| m.len() as usize)
496 |                     .map_err(|e| {
497 |                         error!("Unable to access metadata -> {:?}", e);
498 |                     })
499 |                     .ok()?;
500 | 
501 |                 if !self.durable_fs {
502 |                     if amt < CHECK_INLINE {
503 |                         let crc_ck = crc32c_len(&mut file).ok()?;
504 |                         if crc_ck != obj.fhandle.crc {
505 |                             warn!("file potentially corrupted - {:?}", obj.fhandle.meta_path);
506 |                             return None;
507 |                         }
508 |                     } else {
509 |                         info!("Skipping crc check, file too large");
510 |                     }
511 |                 }
512 | 
513 |                 Some(obj)
514 |             })
515 |             .cloned();
516 | 
517 |         // We manually quiesce and finish for stat management here
518 |         // In theory, this should only affect hit counts since evict / include
519 |         // should only occur in a write with how this is setup
520 |         rtxn.finish();
521 | 
522 |         let mut stat_guard = self.stats.write();
523 |         stat_guard.ops += 1;
524 |         if maybe_obj.is_some() {
525 |             stat_guard.hits += 1;
526 |         }
527 |         stat_guard.ratio = (f64::from(stat_guard.hits) / f64::from(stat_guard.ops)) * 100.0;
528 | 
529 |         let stats = self.cache.try_quiesce_stats(TraceStat::default());
530 |         stat_guard.update(stats);
531 |         stat_guard.commit();
532 | 
533 |         maybe_obj
534 |     }
535 | 
536 |     pub fn path(&self) -> &Path {
537 |         &self.content_dir
538 |     }
539 | 
540 |     pub fn view_stats(&self) -> CacheStats {
541 |         let read_stats = self.stats.read();
542 |         (*read_stats).clone()
543 |     }
544 | 
545 |     pub fn insert_bytes(&self, k: K, d: D, bytes: &[u8]) {
546 |         let mut fh = match self.new_tempfile() {
547 |             Some(fh) => fh,
548 |             None => return,
549 |         };
550 | 
551 |         if let Err(e) = fh.write(bytes) {
552 |             error!(?e, "failed to write bytes to file");
553 |             return;
554 |         };
555 | 
556 |         if let Err(e) = fh.flush() {
557 |             error!(?e, "failed to flush bytes to file");
558 |             return;
559 |         }
560 | 
561 |         self.insert(k, d, fh)
562 |     }
563 | 
564 |     // Add an item?
565 |     pub fn insert(&self, k: K, d: D, mut fh: NamedTempFile) {
566 |         let file = fh.as_file_mut();
567 | 
568 |         let amt = match file.metadata().map(|m| m.len() as usize) {
569 |             Ok(a) => a,
570 |             Err(e) => {
571 |                 error!("Unable to access metadata -> {:?}", e);
572 |                 return;
573 |             }
574 |         };
575 | 
576 |         let crc = match crc32c_len(file) {
577 |             Ok(v) => v,
578 |             Err(_) => return,
579 |         };
580 | 
581 |         // Need to salt the file path so that we don't accidently collide.
582 |         let mut rng = rand::thread_rng();
583 |         let mut salt: [u8; 16] = [0; 16];
584 |         rng.fill(&mut salt);
585 | 
586 |         let k_slice: &[u8] = k.as_ref();
587 | 
588 |         let mut hasher = Sha256::new();
589 | 
590 |         hasher.update(k_slice);
591 |         hasher.update(salt);
592 | 
593 |         let adapted_k = hasher.finalize();
594 | 
595 |         trace!(adapted_k_len = %adapted_k.len());
596 | 
597 |         let i: u8 = adapted_k[0];
598 |         let key_str = hex::encode(adapted_k);
599 | 
600 |         let c_path = &self.u8_to_path[i as usize];
601 | 
602 |         let path = c_path.join(&key_str);
603 |         let mut meta_str = key_str.clone();
604 |         meta_str.push_str(".meta");
605 |         let meta_path = c_path.join(&meta_str);
606 | 
607 |         trace!(?path);
608 |         trace!(?meta_path);
609 | 
610 |         let objmeta = CacheObjMeta {
611 |             key: k.clone(),
612 |             path: path.clone(),
613 |             crc,
614 |             userdata: d.clone(),
615 |         };
616 | 
617 |         if meta_path.exists() {
618 |             warn!(
619 |                 immediate = true,
620 |                 "file collision detected, skipping write of {}", meta_str
621 |             );
622 |             return;
623 |         }
624 | 
625 |         let m_file = match File::create(&meta_path).map(BufWriter::new) {
626 |             Ok(f) => f,
627 |             Err(e) => {
628 |                 error!(
629 |                     immediate = true,
630 |                     "CRITICAL! Failed to open metadata {:?}", e
631 |                 );
632 |                 return;
633 |             }
634 |         };
635 | 
636 |         if let Err(e) = serde_json::to_writer(m_file, &objmeta) {
637 |             error!(
638 |                 immediate = true,
639 |                 "CRITICAL! Failed to write metadata {:?}", e
640 |             );
641 |             return;
642 |         } else {
643 |             // trace!("Persisted metadata for {:?}", &meta_path);
644 | 
645 |             if let Err(e) = fh.persist(&path) {
646 |                 error!(immediate = true, "CRITICAL! Failed to persist file {:?}", e);
647 |                 return;
648 |             }
649 |         }
650 | 
651 |         info!("Persisted data for {:?}", &path);
652 | 
653 |         // Can not fail from this point!
654 |         let co = CacheObj {
655 |             key: k.clone(),
656 |             userdata: d,
657 |             fhandle: Arc::new(FileHandle {
658 |                 meta_path,
659 |                 path,
660 |                 amt,
661 |                 crc,
662 |                 running: self.running.clone(),
663 |             }),
664 |         };
665 | 
666 |         let amt = NonZeroUsize::new(amt).unwrap_or(unsafe { NonZeroUsize::new_unchecked(1) });
667 | 
668 |         let mut wrtxn = self.cache.write_stats(TraceStat::default());
669 |         wrtxn.insert_sized(k, co, amt);
670 |         trace!("commit");
671 |         let stats = wrtxn.commit();
672 | 
673 |         let mut stat_guard = self.stats.write();
674 |         (*stat_guard).update(stats);
675 |         stat_guard.commit();
676 |     }
677 | 
678 |     // Given key, update the ud.
679 |     pub fn update_userdata<Q, F>(&self, q: &Q, mut func: F)
680 |     where
681 |         K: Borrow<Q>,
682 |         Q: Hash + Eq + Ord + ?Sized,
683 |         F: FnMut(&mut D),
684 |     {
685 |         let mut wrtxn = self.cache.write_stats(TraceStat::default());
686 | 
687 |         if let Some(mref) = wrtxn.get_mut(q, false) {
688 |             func(&mut mref.userdata);
689 | 
690 |             let objmeta = CacheObjMeta {
691 |                 key: mref.key.clone(),
692 |                 path: mref.fhandle.path.clone(),
693 |                 crc: mref.fhandle.crc,
694 |                 userdata: mref.userdata.clone(),
695 |             };
696 | 
697 |             // This will truncate the metadata if it does exist.
698 |             let m_file = File::create(&mref.fhandle.meta_path)
699 |                 .map(BufWriter::new)
700 |                 .map_err(|e| {
701 |                     error!("Failed to open metadata {:?}", e);
702 |                 })
703 |                 .unwrap();
704 | 
705 |             serde_json::to_writer(m_file, &objmeta)
706 |                 .map_err(|e| {
707 |                     error!("Failed to write metadata {:?}", e);
708 |                 })
709 |                 .unwrap();
710 | 
711 |             info!("Persisted metadata for {:?}", &mref.fhandle.meta_path);
712 |         }
713 | 
714 |         debug!("commit");
715 |         let stats = wrtxn.commit();
716 |         let mut stat_guard = self.stats.write();
717 |         (*stat_guard).update(stats);
718 |         stat_guard.commit();
719 |     }
720 | 
721 |     pub fn update_all_userdata<F, C>(&self, check: C, mut func: F)
722 |     where
723 |         C: Fn(&D) -> bool,
724 |         F: FnMut(&mut D),
725 |     {
726 |         let mut wrtxn = self.cache.write_stats(TraceStat::default());
727 | 
728 |         let keys: Vec<_> = wrtxn
729 |             .iter()
730 |             .filter_map(|(k, mref)| {
731 |                 if check(&mref.userdata) {
732 |                     Some(k.clone())
733 |                 } else {
734 |                     None
735 |                 }
736 |             })
737 |             .collect();
738 | 
739 |         for k in keys {
740 |             if let Some(mref) = wrtxn.get_mut(&k, false) {
741 |                 func(&mut mref.userdata);
742 | 
743 |                 let objmeta = CacheObjMeta {
744 |                     key: mref.key.clone(),
745 |                     path: mref.fhandle.path.clone(),
746 |                     crc: mref.fhandle.crc,
747 |                     userdata: mref.userdata.clone(),
748 |                 };
749 | 
750 |                 // This will truncate the metadata if it does exist.
751 |                 let m_file = File::create(&mref.fhandle.meta_path)
752 |                     .map(BufWriter::new)
753 |                     .map_err(|e| {
754 |                         error!("Failed to open metadata {:?}", e);
755 |                     })
756 |                     .unwrap();
757 | 
758 |                 serde_json::to_writer(m_file, &objmeta)
759 |                     .map_err(|e| {
760 |                         error!("Failed to write metadata {:?}", e);
761 |                     })
762 |                     .unwrap();
763 | 
764 |                 info!("Persisted metadata for {:?}", &mref.fhandle.meta_path);
765 |             }
766 |         }
767 | 
768 |         debug!("commit");
769 |         let stats = wrtxn.commit();
770 |         let mut stat_guard = self.stats.write();
771 |         (*stat_guard).update(stats);
772 |         stat_guard.commit();
773 |     }
774 | 
775 |     // Remove a key
776 |     pub fn remove(&self, k: K) {
777 |         let mut wrtxn = self.cache.write_stats(TraceStat::default());
778 |         wrtxn.remove(k);
779 |         // This causes the handles to be dropped and binned.
780 |         debug!("commit");
781 |         let stats = wrtxn.commit();
782 |         let mut stat_guard = self.stats.write();
783 |         (*stat_guard).update(stats);
784 |         stat_guard.commit();
785 |     }
786 | 
787 |     //
788 |     pub fn new_tempfile(&self) -> Option<NamedTempFile> {
789 |         NamedTempFile::new_in(&self.temp_path)
790 |             .map_err(|e| error!(?e))
791 |             .ok()
792 |     }
793 | }
794 | 
795 | #[cfg(test)]
796 | mod tests {
797 |     use super::ArcDiskCache;
798 |     use std::io::Write;
799 |     use tempfile::tempdir;
800 | 
801 |     #[test]
802 |     fn disk_cache_test_basic() {
803 |         let _ = tracing_subscriber::fmt::try_init();
804 | 
805 |         let dir = tempdir().expect("Failed to build tempdir");
806 |         // let dir = std::path::PathBuf::from("/tmp/dc");
807 | 
808 |         // Need a new temp dir
809 |         let dc: ArcDiskCache<Vec<u8>, ()> = ArcDiskCache::new(1024, &dir, false).unwrap();
810 | 
811 |         let mut fh = dc.new_tempfile().unwrap();
812 |         let k = vec![0, 1, 2, 3, 4, 5];
813 | 
814 |         let file = fh.as_file_mut();
815 |         file.write_all(b"Hello From Cache").unwrap();
816 | 
817 |         dc.insert(k, (), fh);
818 |     }
819 | }
820 | 


--------------------------------------------------------------------------------
/cargo_config:
--------------------------------------------------------------------------------
1 | [source.crates-io]
2 | replace-with = "vendored-sources"
3 | 
4 | [source.vendored-sources]
5 | directory = "vendor"
6 | 


--------------------------------------------------------------------------------
/opensuse-proxy-cache/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "opensuse-proxy-cache"
 3 | version = "0.1.0"
 4 | edition = "2018"
 5 | 
 6 | [profile.release]
 7 | debug = 2
 8 | lto = "thin"
 9 | 
10 | [features]
11 | default = []
12 | dhat-heap = [ "dhat" ]    # if you are doing heap profiling
13 | dhat-ad-hoc = [ "dhat" ]  # if you are doing ad hoc profiling
14 | 
15 | [dependencies]
16 | 
17 | # rypper_reader = { path = "/Users/william/development/rypper/rypper-reader" }
18 | # rypper_reader = { git = "https://codeberg.org/Rusty-Geckos/rypper.git" }
19 | 
20 | askama = "0.12.1"
21 | async-tftp = "0.3"
22 | bloomfilter = "1.0"
23 | dhat = { version = "0.3", optional = true }
24 | clap = { version = "4.5", features = ["derive", "env"] }
25 | 
26 | bytes = "1.6"
27 | 
28 | tokio = { version = "1", features = ["full", "tracing"] }
29 | tokio-util = { version = "0.7", features = ["io"] }
30 | tokio-stream = { version = "0.1" }
31 | url = "2"
32 | mime = "0.3"
33 | pin-project-lite = "0.2"
34 | tempfile = "3.10"
35 | # hex = "0.4"
36 | serde = "1.0"
37 | serde_json = "1.0"
38 | # httpdate = "1"
39 | time = { version = "0.3", features = ["serde"]}
40 | regex = "1"
41 | lazy_static = "1.4"
42 | 
43 | axum = { version = "0.7", features = [ "http1", "tokio", "macros" ] }
44 | axum-server = { version = "0.6", features = ["tls-rustls"] }
45 | futures-util = "0.3"
46 | hyper = { version = "1.2", features = ["full"] }
47 | tower = { version = "0.4", features = ["make"] }
48 | reqwest = { version = "0.12", features = ["stream", "rustls-tls-native-roots"] }
49 | 
50 | 
51 | console-subscriber = "0.2.0"
52 | tracing = { version = "0.1", features = ["attributes"] }
53 | tracing-subscriber = { version = "0.3", features = ["env-filter"] }
54 | tracing-forest = { version = "0.1.6", features = ["smallvec", "tokio", "uuid"] }
55 | 
56 | arc-disk-cache = { path = "../arc-disk-cache" }
57 | 
58 | # Oauth
59 | # rand = "0.8"
60 | # async-trait = "0.1"
61 | # anyhow = "1"
62 | # chrono = "0.4"
63 | # oauth2 = { version = "4.0", default-features = false, features = ["reqwest", "native-tls"] }
64 | 
65 | lru = "0.12"
66 | 
67 | 


--------------------------------------------------------------------------------
/opensuse-proxy-cache/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM opensuse/tumbleweed:latest AS ref_repo
 2 | 
 3 | # RUN zypper mr -d repo-non-oss && \
 4 | #     zypper mr -d repo-oss && \
 5 | #     zypper mr -d repo-update && \
 6 | #     zypper ar http://dl.suse.blackhats.net.au:8080/update/tumbleweed/ repo-update-https && \
 7 | #     zypper ar http://dl.suse.blackhats.net.au:8080/tumbleweed/repo/oss/ repo-oss-https && \
 8 | #     zypper ar http://dl.suse.blackhats.net.au:8080/tumbleweed/repo/non-oss/ repo-non-oss-https && \
 9 | #     zypper --gpg-auto-import-keys ref --force
10 | 
11 | # FROM opensuse/leap:latest AS ref_repo
12 | # RUN sed -i -E 's/https?:\/\/download.opensuse.org/https:\/\/mirrorcache.firstyear.id.au/g' /etc/zypp/repos.d/*.repo && \
13 | 
14 | RUN sed -i -E 's/https?:\/\/download.opensuse.org/http:\/\/os.int.firstyear.id.au/g' /etc/zypp/repos.d/*.repo && \
15 |     zypper mr -d repo-openh264 && \
16 |     zypper --gpg-auto-import-keys ref --force
17 | 
18 | # // setup the builder pkgs
19 | FROM ref_repo AS build_base
20 | RUN zypper install -y cargo rust gcc sqlite3-devel libopenssl-3-devel sccache perl make
21 | 
22 | # // setup the runner pkgs
23 | FROM ref_repo AS run_base
24 | RUN zypper install -y sqlite3 openssl-3 timezone iputils iproute2 curl \
25 |     ipxe-bootimgs
26 | 
27 | # memtest86+
28 | 
29 | # COPY ipxe-bootimgs-1.21.1+git20240329.764e34f-0.x86_64.rpm /usr/share/ipxe-bootimgs.rpm
30 | # RUN zypper install -y --allow-unsigned-rpm /usr/share/ipxe-bootimgs.rpm
31 | 
32 | COPY SUSE_CA_Root.pem /etc/pki/trust/anchors/
33 | RUN /usr/sbin/update-ca-certificates
34 | 
35 | # RUN cp /boot/efi/EFI/memtest86/memtest.efi /usr/share/ipxe/memtest.efi
36 | COPY memtest64.7.00.efi /usr/share/ipxe/memtest.efi
37 | 
38 | # // build artifacts
39 | FROM build_base AS builder
40 | 
41 | COPY . /home/proxy/
42 | RUN mkdir /home/proxy/.cargo
43 | COPY cargo_config /home/proxy/.cargo/config.toml
44 | WORKDIR /home/proxy/opensuse-proxy-cache
45 | 
46 | # RUSTFLAGS="-Ctarget-cpu=x86-64-v3"
47 | #
48 | # SCCACHE_REDIS=redis://redis.dev.blackhats.net.au:6379 \
49 | # RUSTC_WRAPPER=sccache \
50 | 
51 | RUN if [ "$(uname -i)" = "x86_64" ]; then export RUSTFLAGS="-Ctarget-cpu=x86-64-v3 --cfg tokio_unstable"; fi && \
52 |     RUST_BACKTRACE=full \
53 |     cargo build --release
54 | 
55 | # == end builder setup, we now have static artifacts.
56 | FROM run_base
57 | MAINTAINER william@blackhats.net.au
58 | EXPOSE 8080
59 | EXPOSE 8443
60 | WORKDIR /
61 | 
62 | # RUN cd /etc && \
63 | #     ln -sf ../usr/share/zoneinfo/Australia/Brisbane localtime
64 | 
65 | COPY --from=builder /home/proxy/target/release/opensuse-proxy-cache /bin/
66 | 
67 | HEALTHCHECK --interval=15s --timeout=2s --start-period=8m CMD curl -f http://localhost:8080/_status || exit 1
68 | STOPSIGNAL SIGINT
69 | 
70 | ENV RUST_BACKTRACE 1
71 | CMD ["/bin/opensuse-proxy-cache"]
72 | 


--------------------------------------------------------------------------------
/opensuse-proxy-cache/src/auth.rs:
--------------------------------------------------------------------------------
  1 | use crate::AppState;
  2 | pub(crate) use oauth2::basic::BasicClient;
  3 | 
  4 | use oauth2::reqwest::http_client;
  5 | use oauth2::{
  6 |     AuthUrl, AuthorizationCode, ClientId, ClientSecret, CsrfToken, IntrospectionUrl,
  7 |     PkceCodeChallenge, PkceCodeVerifier, RedirectUrl, Scope, TokenIntrospectionResponse,
  8 |     TokenResponse, TokenUrl,
  9 | };
 10 | use serde::{Deserialize, Serialize};
 11 | use std::sync::Arc;
 12 | 
 13 | pub struct AuthMiddleware;
 14 | 
 15 | impl AuthMiddleware {
 16 |     pub fn new() -> Self {
 17 |         AuthMiddleware {}
 18 |     }
 19 | }
 20 | 
 21 | #[async_trait::async_trait]
 22 | impl<State: Clone + Send + Sync + 'static> tide::Middleware<State> for AuthMiddleware {
 23 |     async fn handle(
 24 |         &self,
 25 |         mut request: tide::Request<State>,
 26 |         next: tide::Next<'_, State>,
 27 |     ) -> tide::Result {
 28 |         let maybe_exp: Option<chrono::DateTime<chrono::offset::Utc>> = request.session().get("exp");
 29 |         if let Some(exp) = maybe_exp {
 30 |             let now = chrono::offset::Utc::now();
 31 |             if exp > now {
 32 |                 info!("authenticated session found");
 33 |                 let response = next.run(request).await;
 34 |                 Ok(response)
 35 |             } else {
 36 |                 info!("expired session, redirecting");
 37 |                 request.session_mut().remove("exp");
 38 | 
 39 |                 Ok(tide::Redirect::new("/oauth/login").into())
 40 |             }
 41 |         } else {
 42 |             info!("authenticated NOT found, redirecting");
 43 |             Ok(tide::Redirect::new("/oauth/login").into())
 44 |         }
 45 |     }
 46 | }
 47 | 
 48 | pub fn configure_oauth(
 49 |     client_id: &str,
 50 |     client_secret: &str,
 51 |     client_url: &str,
 52 |     server_url: &str,
 53 | ) -> BasicClient {
 54 |     let client_id = ClientId::new(client_id.to_string());
 55 |     let secret = ClientSecret::new(client_secret.to_string());
 56 |     let auth_url = AuthUrl::new(format!("{}/ui/oauth2", server_url)).unwrap();
 57 |     let token_url = TokenUrl::new(format!("{}/oauth2/token", server_url)).unwrap();
 58 |     let intro_url =
 59 |         IntrospectionUrl::new(format!("{}/oauth2/token/introspect", server_url)).unwrap();
 60 |     let redir_url = RedirectUrl::new(format!("{}/oauth/response", client_url)).unwrap();
 61 | 
 62 |     BasicClient::new(client_id, Some(secret), auth_url, Some(token_url))
 63 |         .set_redirect_uri(redir_url)
 64 |         .set_introspection_uri(intro_url)
 65 | }
 66 | 
 67 | // Not authenticated - kick of the redir to oauth.
 68 | pub(crate) async fn login_view(mut request: tide::Request<Arc<AppState>>) -> tide::Result {
 69 |     let (pkce_code_challenge, pkce_code_verifier) = PkceCodeChallenge::new_random_sha256();
 70 | 
 71 |     debug!("challenge -> {:?}", pkce_code_challenge.as_str());
 72 |     debug!("secret -> {:?}", pkce_code_verifier.secret());
 73 | 
 74 |     let (auth_url, csrf_token) = request
 75 |         .state()
 76 |         .oauth
 77 |         .as_ref()
 78 |         .ok_or_else(|| tide::Error::new(403 as u16, anyhow::Error::msg("Forbidden")))?
 79 |         .authorize_url(CsrfToken::new_random)
 80 |         .add_scope(Scope::new("read".to_string()))
 81 |         .set_pkce_challenge(pkce_code_challenge)
 82 |         .url();
 83 | 
 84 |     // We can stash the verifier in the session.
 85 |     let session = request.session_mut();
 86 |     session
 87 |         .insert("pkce_code_verifier", &pkce_code_verifier)
 88 |         .unwrap();
 89 |     session.insert("csrf_token", &csrf_token).unwrap();
 90 | 
 91 |     info!("starting oauth");
 92 |     Ok(tide::Redirect::new(auth_url.as_str()).into())
 93 | }
 94 | 
 95 | #[derive(Debug, Serialize, Deserialize)]
 96 | struct OauthResp {
 97 |     state: CsrfToken,
 98 |     code: AuthorizationCode,
 99 | }
100 | 
101 | // Handle the response
102 | pub(crate) async fn oauth_view(mut request: tide::Request<Arc<AppState>>) -> tide::Result {
103 |     // How do we get the params out?
104 |     let resp: OauthResp = request.query().map_err(|e| {
105 |         error!("{:?}", e);
106 |         e
107 |     })?;
108 | 
109 |     debug!("resp -> {:?}", resp);
110 | 
111 |     // get the verifier and csrf token
112 |     let session = request.session();
113 |     let pkce_code_verifier: PkceCodeVerifier =
114 |         session.get("pkce_code_verifier").ok_or_else(|| {
115 |             error!("pkce");
116 |             tide::Error::new(500 as u16, anyhow::Error::msg("pkce"))
117 |         })?;
118 |     debug!("secret -> {:?}", pkce_code_verifier.secret());
119 |     let csrf_token: CsrfToken = session.get("csrf_token").ok_or_else(|| {
120 |         error!("csrf");
121 |         tide::Error::new(500 as u16, anyhow::Error::msg("csrf"))
122 |     })?;
123 | 
124 |     // Compare state to csrf token.
125 |     if csrf_token.secret() != resp.state.secret() {
126 |         error!("csrf validation");
127 |         // give an error?
128 |         return Ok(tide::Response::builder(tide::StatusCode::Conflict)
129 |             .body("csrf failure")
130 |             .build());
131 |     }
132 | 
133 |     let r_token = request
134 |         .state()
135 |         .oauth
136 |         .as_ref()
137 |         .ok_or_else(|| tide::Error::new(403 as u16, anyhow::Error::msg("Forbidden")))?
138 |         .exchange_code(resp.code)
139 |         .set_pkce_verifier(pkce_code_verifier)
140 |         // .request_async(async_http_client)
141 |         .request(http_client);
142 | 
143 |     let tr = match r_token {
144 |         Ok(tr) => {
145 |             debug!("{:?}", tr.access_token());
146 |             debug!("{:?}", tr.token_type());
147 |             debug!("{:?}", tr.scopes());
148 |             tr
149 |         }
150 |         Err(e) => {
151 |             error!("oauth2 token request failure - {:?}", e);
152 |             return Ok(
153 |                 tide::Response::builder(tide::StatusCode::InternalServerError)
154 |                     .body("token request failure")
155 |                     .build(),
156 |             );
157 |         }
158 |     };
159 | 
160 |     let intro_result = request
161 |         .state()
162 |         .oauth
163 |         .as_ref()
164 |         .ok_or_else(|| tide::Error::new(403 as u16, anyhow::Error::msg("Forbidden")))?
165 |         .introspect(tr.access_token())
166 |         .unwrap()
167 |         .request(http_client);
168 | 
169 |     info!("{:?}", intro_result);
170 | 
171 |     match intro_result {
172 |         Ok(ir) => {
173 |             let exp = ir.exp().unwrap();
174 |             let username = ir.username().unwrap();
175 | 
176 |             if let Err(e) = request.session_mut().insert("exp", exp)
177 |                 .and_then(|_| request.session_mut().insert("username", username))
178 |             {
179 |                 error!(?e, "Failed to setup request session");
180 |                 Ok(
181 |                     tide::Response::builder(tide::StatusCode::InternalServerError)
182 |                         .body("session failure")
183 |                         .build()
184 |                 )
185 |             } else {
186 |                 Ok(tide::Redirect::new("/_admin").into())
187 |             }
188 | 
189 |         }
190 |         Err(e) => {
191 |             error!("oauth2 token request failure - {:?}", e);
192 |             Ok(
193 |                 tide::Response::builder(tide::StatusCode::InternalServerError)
194 |                     .body("token request failure")
195 |                     .build(),
196 |             )
197 |         }
198 |     }
199 | }
200 | 


--------------------------------------------------------------------------------
/opensuse-proxy-cache/src/cache.rs:
--------------------------------------------------------------------------------
  1 | use crate::constants::*;
  2 | use bloomfilter::Bloom;
  3 | use std::collections::BTreeMap;
  4 | use std::path::Path;
  5 | use std::sync::atomic::Ordering;
  6 | use std::sync::Mutex;
  7 | use tempfile::NamedTempFile;
  8 | use time::OffsetDateTime;
  9 | use tokio::sync::mpsc::{channel, Receiver, Sender};
 10 | use tokio::time::{sleep, Duration};
 11 | use url::Url;
 12 | 
 13 | use arc_disk_cache::{ArcDiskCache, CacheObj};
 14 | 
 15 | use serde::{Deserialize, Serialize};
 16 | 
 17 | const PENDING_ADDS: usize = 8;
 18 | 
 19 | #[derive(Debug, Serialize, Deserialize, Clone)]
 20 | pub struct Status {
 21 |     pub req_path: String,
 22 |     pub headers: BTreeMap<String, String>,
 23 |     //                  Soft            Hard
 24 |     pub expiry: Option<(OffsetDateTime, OffsetDateTime)>,
 25 |     pub cls: Classification,
 26 |     pub nxtime: Option<OffsetDateTime>,
 27 | }
 28 | 
 29 | #[derive(Debug)]
 30 | pub struct CacheMeta {
 31 |     // Clippy will whinge about variant sizes here.
 32 |     pub req_path: String,
 33 |     // Add the time this was added
 34 |     pub etime: OffsetDateTime,
 35 |     pub action: Action,
 36 | }
 37 | 
 38 | /*
 39 | #[derive(Clone, Debug)]
 40 | pub struct CacheObj {
 41 |     pub req_path: String,
 42 |     pub fhandle: Arc<FileHandle>,
 43 |     pub headers: BTreeMap<String, String>,
 44 |     pub soft_expiry: Option<OffsetDateTime>,
 45 |     pub expiry: Option<OffsetDateTime>,
 46 |     pub cls: Classification,
 47 | }
 48 | */
 49 | 
 50 | #[derive(Debug)]
 51 | pub enum Action {
 52 |     Submit {
 53 |         file: NamedTempFile,
 54 |         // These need to be extracted
 55 |         headers: BTreeMap<String, String>,
 56 |         // amt: usize,
 57 |         // hash_str: String,
 58 |         cls: Classification,
 59 |     },
 60 |     Update,
 61 |     NotFound {
 62 |         cls: Classification,
 63 |     },
 64 | }
 65 | 
 66 | pub enum CacheDecision {
 67 |     // We can't cache this, stream it from a remote.
 68 |     Stream(Url),
 69 |     // We have this item, and can send from our cache.
 70 |     FoundObj(CacheObj<String, Status>),
 71 |     // We don't have this item but we want it, so please dl it to this location
 72 |     // then notify this cache.
 73 |     MissObj(
 74 |         Url,
 75 |         NamedTempFile,
 76 |         Sender<CacheMeta>,
 77 |         Classification,
 78 |         Option<Vec<(String, NamedTempFile, Classification)>>,
 79 |     ),
 80 |     // Refresh - we can also prefetch some paths in the background.
 81 |     Refresh(
 82 |         Url,
 83 |         NamedTempFile,
 84 |         Sender<CacheMeta>,
 85 |         CacheObj<String, Status>,
 86 |         Option<Vec<(String, NamedTempFile, Classification)>>,
 87 |     ),
 88 |     // We found it, but we also want to refresh in the background.
 89 |     AsyncRefresh(
 90 |         Url,
 91 |         NamedTempFile,
 92 |         Sender<CacheMeta>,
 93 |         CacheObj<String, Status>,
 94 |         Option<Vec<(String, NamedTempFile, Classification)>>,
 95 |     ),
 96 |     NotFound,
 97 |     // Can't proceed, something is wrong.
 98 |     Invalid,
 99 | }
100 | 
101 | #[derive(Debug, Copy, Clone, PartialEq, Serialize, Deserialize)]
102 | pub enum Classification {
103 |     // The more major repos
104 |     RepomdXmlSlow,
105 |     // Stuff from obs
106 |     RepomdXmlFast,
107 |     // Metadata, related to repos.
108 |     Metadata,
109 |     // Large blobs that need a slower rate of refresh. Some proxies
110 |     // may choose not to cache this at all.
111 |     Blob,
112 |     // Content that has inbuilt version strings, that we can
113 |     // keep forever.
114 |     Static,
115 |     // 🤔
116 |     Unknown,
117 |     // Spam ... ffs
118 |     Spam,
119 | }
120 | 
121 | impl Classification {
122 |     fn prefetch(
123 |         &self,
124 |         path: &Path,
125 |         pri_cache: &ArcDiskCache<String, Status>,
126 |         complete: bool,
127 |     ) -> Option<Vec<(String, NamedTempFile, Classification)>> {
128 |         match self {
129 |             Classification::RepomdXmlSlow | Classification::RepomdXmlFast => {
130 |                 path.parent().and_then(|p| p.parent()).map(|p| {
131 |                     let mut v = vec![];
132 |                     if let Some(temp_file) = pri_cache.new_tempfile() {
133 |                         v.push((
134 |                             p.join("media.1/media")
135 |                                 .to_str()
136 |                                 .map(str::to_string)
137 |                                 .unwrap(),
138 |                             temp_file,
139 |                             Classification::Metadata,
140 |                         ))
141 |                     };
142 | 
143 |                     if let Some(temp_file) = pri_cache.new_tempfile() {
144 |                         v.push((
145 |                             p.join("repodata/repomd.xml.asc")
146 |                                 .to_str()
147 |                                 .map(str::to_string)
148 |                                 .unwrap(),
149 |                             temp_file,
150 |                             Classification::Metadata,
151 |                         ))
152 |                     };
153 | 
154 |                     if let Some(temp_file) = pri_cache.new_tempfile() {
155 |                         v.push((
156 |                             p.join("repodata/repomd.xml.key")
157 |                                 .to_str()
158 |                                 .map(str::to_string)
159 |                                 .unwrap(),
160 |                             temp_file,
161 |                             Classification::Metadata,
162 |                         ))
163 |                     };
164 |                     if complete {
165 |                         if let Some(temp_file) = pri_cache.new_tempfile() {
166 |                             v.push((
167 |                                 p.join("repodata/repomd.xml")
168 |                                     .to_str()
169 |                                     .map(str::to_string)
170 |                                     .unwrap(),
171 |                                 temp_file,
172 |                                 Classification::Metadata,
173 |                             ))
174 |                         };
175 |                     };
176 |                     v
177 |                 })
178 |             }
179 |             /*
180 |             Classification::Metadata => {
181 | 
182 |             }
183 |             */
184 |             _ => None,
185 |         }
186 |     }
187 | 
188 |     pub fn expiry(&self, etime: OffsetDateTime) -> Option<(OffsetDateTime, OffsetDateTime)> {
189 |         match self {
190 |             // We can now do async prefetching on bg refreshes so this keeps everything in sync.
191 |             Classification::RepomdXmlSlow => Some((
192 |                 etime + time::Duration::minutes(10),
193 |                 etime + time::Duration::hours(180),
194 |             )),
195 |             Classification::RepomdXmlFast => Some((
196 |                 etime + time::Duration::minutes(1),
197 |                 etime + time::Duration::minutes(180),
198 |             )),
199 |             Classification::Metadata => Some((
200 |                 etime + time::Duration::minutes(15),
201 |                 etime + time::Duration::hours(24),
202 |             )),
203 |             Classification::Blob => Some((
204 |                 // etime + time::Duration::hours(2),
205 |                 etime + time::Duration::minutes(15),
206 |                 etime + time::Duration::hours(336),
207 |             )),
208 |             Classification::Static => Some((
209 |                 // Because OBS keeps publishing incorrect shit ...
210 |                 // etime + time::Duration::hours(2),
211 |                 etime + time::Duration::minutes(15),
212 |                 etime + time::Duration::hours(336),
213 |             )),
214 |             Classification::Unknown => Some((etime, etime + time::Duration::minutes(5))),
215 |             Classification::Spam => None,
216 |         }
217 |     }
218 | }
219 | 
220 | pub struct Cache {
221 |     pri_cache: ArcDiskCache<String, Status>,
222 |     clob: bool,
223 |     wonder_guard: bool,
224 |     mirror_chain: Option<Url>,
225 |     bloom: Mutex<Bloom<String>>,
226 |     pub submit_tx: Sender<CacheMeta>,
227 | }
228 | 
229 | impl Cache {
230 |     pub fn new(
231 |         capacity: usize,
232 |         content_dir: &Path,
233 |         clob: bool,
234 |         wonder_guard: bool,
235 |         durable_fs: bool,
236 |         mirror_chain: Option<Url>,
237 |     ) -> std::io::Result<Self> {
238 |         let pri_cache = ArcDiskCache::new(capacity, content_dir, durable_fs)?;
239 |         let (submit_tx, submit_rx) = channel(PENDING_ADDS);
240 |         let pri_cache_cln = pri_cache.clone();
241 | 
242 |         let bloom = Mutex::new(Bloom::new_for_fp_rate(65536, 0.001));
243 | 
244 |         let _ = tokio::task::spawn_blocking(move || cache_mgr(submit_rx, pri_cache_cln));
245 | 
246 |         let pri_cache_cln = pri_cache.clone();
247 |         let _ = tokio::task::spawn(async move { cache_stats(pri_cache_cln).await });
248 | 
249 |         Ok(Cache {
250 |             pri_cache,
251 |             bloom,
252 |             clob,
253 |             wonder_guard,
254 |             mirror_chain,
255 |             submit_tx,
256 |         })
257 |     }
258 | 
259 |     fn url(&self, cls: &Classification, req_path: &str) -> Url {
260 |         let mut url = if let Some(m_url) = self.mirror_chain.as_ref() {
261 |             m_url.clone()
262 |         } else {
263 |             match cls {
264 |                 Classification::RepomdXmlSlow
265 |                 | Classification::Metadata
266 |                 | Classification::RepomdXmlFast
267 |                 | Classification::Spam => MCS_OS_URL.clone(),
268 |                 Classification::Blob | Classification::Static | Classification::Unknown => {
269 |                     DL_OS_URL.clone()
270 |                 }
271 |             }
272 |         };
273 | 
274 |         url.set_path(req_path);
275 |         url
276 |     }
277 | 
278 |     /*
279 |     pub fn contains(&self, req_path: &str) -> bool {
280 |         let req_path = req_path.replace("//", "/");
281 |         let req_path_trim = req_path.as_str();
282 |         self.pri_cache.get(req_path_trim).is_some()
283 |     }
284 |     */
285 | 
286 |     pub fn decision(&self, req_path: &str, head_req: bool) -> CacheDecision {
287 |         let req_path = req_path.replace("//", "/");
288 |         let req_path_trim = req_path.as_str();
289 |         info!("🤔  contemplating req -> {:?}", req_path_trim);
290 | 
291 |         let path = Path::new(req_path_trim);
292 | 
293 |         // If the path fails some validations, refuse to proceed.
294 |         if !path.is_absolute() {
295 |             error!("path not absolute");
296 |             return CacheDecision::Invalid;
297 |         }
298 | 
299 |         let fname = if req_path_trim.ends_with("/") {
300 |             "index.html".to_string()
301 |         } else {
302 |             path.file_name()
303 |                 .and_then(|f| f.to_str().map(str::to_string))
304 |                 .unwrap_or_else(|| "index.html".to_string())
305 |         };
306 | 
307 |         debug!(" fname --> {:?}", fname);
308 | 
309 |         let cls = self.classify(&fname, req_path_trim);
310 | 
311 |         // Just go away.
312 |         if cls == Classification::Spam {
313 |             debug!("SPAM");
314 |             return CacheDecision::NotFound;
315 |         }
316 | 
317 |         let now = time::OffsetDateTime::now_utc();
318 |         match self.pri_cache.get(req_path_trim) {
319 |             Some(cache_obj) => {
320 |                 match &cache_obj.userdata.nxtime {
321 |                     None => {
322 |                         // If we hit, we need to decide if this
323 |                         // is a found item or something that may need
324 |                         // a refresh.
325 |                         if let Some((softexp, hardexp)) = cache_obj.userdata.expiry {
326 |                             debug!("now: {} - {} {}", now, softexp, hardexp);
327 | 
328 |                             let temp_file = match self.pri_cache.new_tempfile() {
329 |                                 Some(f) => f,
330 |                                 None => {
331 |                                     error!("TEMP FILE COULD NOT BE CREATED - FORCE STREAM");
332 |                                     return CacheDecision::Stream(self.url(&cls, req_path_trim));
333 |                                 }
334 |                             };
335 | 
336 |                             if now > softexp && UPSTREAM_ONLINE.load(Ordering::Relaxed) {
337 |                                 if now > hardexp {
338 |                                     debug!("EXPIRED INLINE REFRESH");
339 |                                     return CacheDecision::Refresh(
340 |                                         self.url(&cls, req_path_trim),
341 |                                         temp_file,
342 |                                         self.submit_tx.clone(),
343 |                                         cache_obj,
344 |                                         cls.prefetch(&path, &self.pri_cache, head_req),
345 |                                     );
346 |                                 } else {
347 |                                     debug!("EXPIRED ASYNC REFRESH");
348 |                                     return CacheDecision::AsyncRefresh(
349 |                                         self.url(&cls, req_path_trim),
350 |                                         temp_file,
351 |                                         self.submit_tx.clone(),
352 |                                         cache_obj,
353 |                                         cls.prefetch(&path, &self.pri_cache, head_req),
354 |                                     );
355 |                                 }
356 |                             }
357 |                         }
358 | 
359 |                         debug!("HIT");
360 |                         CacheDecision::FoundObj(cache_obj)
361 |                     }
362 |                     Some(etime) => {
363 |                         // When we refresh this, we treat it as a MissObj, not a refresh.
364 |                         if &now > etime && UPSTREAM_ONLINE.load(Ordering::Relaxed) {
365 |                             debug!("NX EXPIRED");
366 |                             let temp_file = match self.pri_cache.new_tempfile() {
367 |                                 Some(f) => f,
368 |                                 None => {
369 |                                     error!("TEMP FILE COULD NOT BE CREATED - FORCE 404");
370 |                                     return CacheDecision::NotFound;
371 |                                 }
372 |                             };
373 | 
374 |                             return CacheDecision::MissObj(
375 |                                 self.url(&cls, req_path_trim),
376 |                                 temp_file,
377 |                                 self.submit_tx.clone(),
378 |                                 cls,
379 |                                 cls.prefetch(&path, &self.pri_cache, head_req),
380 |                             );
381 |                         }
382 | 
383 |                         debug!("NX VALID - force notfound to 404");
384 |                         return CacheDecision::NotFound;
385 |                     }
386 |                 }
387 |             }
388 |             None => {
389 |                 // NEED TO MOVE NX HERE
390 | 
391 |                 // Is it in the bloom filter? We want to check if it's a "one hit wonder".
392 |                 let can_cache = if cls == Classification::Blob && !self.clob {
393 |                     // It's a blob, and cache large object is false
394 |                     info!("cache_large_object=false - skip caching of blob item");
395 |                     false
396 |                 } else if self.wonder_guard {
397 |                     // Lets check it's in the wonder guard?
398 |                     let x = {
399 |                         let mut bguard = self.bloom.lock().unwrap();
400 |                         bguard.check_and_set(&req_path)
401 |                     };
402 |                     if !x {
403 |                         info!("wonder_guard - skip caching of one hit item");
404 |                     }
405 |                     x
406 |                 } else {
407 |                     // Yep, we can cache it as we aren't wonder guarding.
408 |                     true
409 |                 };
410 | 
411 |                 // If miss, we need to choose between stream and
412 |                 // miss.
413 |                 debug!("MISS");
414 | 
415 |                 if UPSTREAM_ONLINE.load(Ordering::Relaxed) {
416 |                     match (cls, can_cache, self.pri_cache.new_tempfile()) {
417 |                         (_, false, _) => CacheDecision::Stream(self.url(&cls, req_path_trim)),
418 |                         (cls, _, Some(temp_file)) => CacheDecision::MissObj(
419 |                             self.url(&cls, req_path_trim),
420 |                             temp_file,
421 |                             self.submit_tx.clone(),
422 |                             cls,
423 |                             cls.prefetch(&path, &self.pri_cache, head_req),
424 |                         ),
425 |                         (cls, _, None) => {
426 |                             error!("TEMP FILE COULD NOT BE CREATED - FORCE STREAM");
427 |                             CacheDecision::Stream(self.url(&cls, req_path_trim))
428 |                         }
429 |                     }
430 |                 } else {
431 |                     warn!("upstream offline - force miss to 404");
432 |                     // If we are offline, just give a 404
433 |                     CacheDecision::NotFound
434 |                 } // end upstream online
435 |             }
436 |         }
437 |     }
438 | 
439 |     fn classify(&self, fname: &str, req_path: &str) -> Classification {
440 |         if fname == "repomd.xml" {
441 |             if req_path.starts_with("/repositories/") {
442 |                 // These are obs
443 |                 info!("Classification::RepomdXmlFast");
444 |                 Classification::RepomdXmlFast
445 |             } else {
446 |                 info!("Classification::RepomdXmlSlow");
447 |                 Classification::RepomdXmlSlow
448 |             }
449 |         } else if fname == "media"
450 |             || fname == "products"
451 |             || fname == "repomd.xml.key"
452 |             || fname == "ARCHIVES.gz"
453 |             || fname.ends_with("asc")
454 |             || fname.ends_with("sha256")
455 |             || fname.ends_with("mirrorlist")
456 |             || fname.ends_with("metalink")
457 |             || fname.ends_with(".repo")
458 |             // Arch
459 |             || fname.ends_with("Arch.key")
460 |             || fname.ends_with("Arch.db")
461 |             || fname.ends_with("Arch.db.tar.gz")
462 |             || fname.ends_with("Arch.files")
463 |             || fname.ends_with("Arch.files.tar.gz")
464 |             || fname.ends_with(".sig")
465 |             || fname.ends_with(".files")
466 |             // Deb
467 |             || fname == "Packages"
468 |             || fname == "Packages.gz"
469 |             || fname == "Release"
470 |             || fname == "Release.gpg"
471 |             || fname == "Release.key"
472 |             || fname == "Sources"
473 |             || fname == "Sources.gz"
474 |             || fname.ends_with(".dsc")
475 |             // Html
476 |             || fname.ends_with("html")
477 |             || fname.ends_with("js")
478 |             || fname.ends_with("css")
479 |             // Html assets - we make this metadata because else it's inconsistent between
480 |             // MC and DL.O.O
481 |             || fname.ends_with("svg")
482 |             || fname.ends_with("png")
483 |             || fname.ends_with("jpg")
484 |             || fname.ends_with("gif")
485 |             || fname.ends_with("ttf")
486 |             || fname.ends_with("woff")
487 |             || fname.ends_with("woff2")
488 |             || fname == "favicon.ico"
489 |             // --
490 |             // Related to live boots of tumbleweed.
491 |             // These are in metadata to get them to sync with the repo prefetch since
492 |             // they can change aggressively.
493 |             || fname == "config"
494 |             // /tumbleweed/repo/oss/boot/x86_64/config is the first req.
495 |             // All of these will come after.
496 |             || fname == "add_on_products.xml"
497 |             || fname == "add_on_products"
498 |             || fname == "directory.yast"
499 |             || fname == "CHECKSUMS"
500 |             || fname == "content"
501 |             || fname == "bind"
502 |             || fname == "control.xml"
503 |             || fname == "autoinst.xml"
504 |             || fname == "license.tar.gz"
505 |             || fname == "info.txt"
506 |             || fname == "part.info"
507 |             || fname == "README.BETA"
508 |             || fname == "driverupdate"
509 |             || fname == "linux"
510 |             || fname == "initrd"
511 |             || fname == "common"
512 |             || fname == "root"
513 |             || fname == "cracklib-dict-full.rpm"
514 |             || fname.starts_with("yast2-trans")
515 |         {
516 |             info!("Classification::Metadata");
517 |             Classification::Metadata
518 |         } else if fname.ends_with("iso")
519 |             || fname.ends_with("qcow2")
520 |             || fname.ends_with("raw")
521 |             || fname.ends_with("raw.xz")
522 |             || fname.ends_with("raw.zst")
523 |             || fname.ends_with("tar.xz")
524 |             // looks to be used in some ubuntu repos? Not sure if metadata.
525 |             || fname.ends_with("tar.gz")
526 |             || fname.ends_with("tar.zst")
527 |             || fname.ends_with("diff.gz")
528 |             || fname.ends_with("diff.zst")
529 |             // wsl
530 |             || fname.ends_with("appx")
531 |             // Random bits
532 |             || fname.ends_with("txt")
533 |         {
534 |             info!("Classification::Blob");
535 |             Classification::Blob
536 |         } else if fname.ends_with("rpm")
537 |             || fname.ends_with("deb")
538 |             || fname.ends_with("primary.xml.gz")
539 |             || fname.ends_with("primary.xml.zst")
540 |             || fname.ends_with("suseinfo.xml.gz")
541 |             || fname.ends_with("suseinfo.xml.zst")
542 |             || fname.ends_with("deltainfo.xml.gz")
543 |             || fname.ends_with("deltainfo.xml.zst")
544 |             || fname.ends_with("filelists.xml.gz")
545 |             || fname.ends_with("filelists.xml.zst")
546 |             || fname.ends_with("filelists-ext.xml.gz")
547 |             || fname.ends_with("filelists-ext.xml.zst")
548 |             || fname.ends_with("filelists.sqlite.bz2")
549 |             || fname.ends_with("filelists.sqlite.gz")
550 |             || fname.ends_with("filelists.sqlite.zst")
551 |             || fname.ends_with("other.xml.gz")
552 |             || fname.ends_with("other.xml.zst")
553 |             || fname.ends_with("other.sqlite.bz2")
554 |             || fname.ends_with("other.sqlite.gz")
555 |             || fname.ends_with("other.sqlite.zst")
556 |             || fname.ends_with("updateinfo.xml.gz")
557 |             || fname.ends_with("updateinfo.xml.zst")
558 |             || (fname.contains("susedata") && fname.ends_with(".xml.gz"))
559 |             || (fname.contains("susedata") && fname.ends_with(".xml.zst"))
560 |             || fname.ends_with("appdata-icons.tar.gz")
561 |             || fname.ends_with("appdata-icons.tar.zst")
562 |             || fname.ends_with("app-icons.tar.gz")
563 |             || fname.ends_with("app-icons.tar.zst")
564 |             || fname.ends_with("appdata.xml.gz")
565 |             || fname.ends_with("appdata.xml.zst")
566 |             || fname.ends_with("license.tar.gz")
567 |             || fname.ends_with("license.tar.zst")
568 |             || fname.ends_with("pkg.tar.zst")
569 |             || fname.ends_with("pkg.tar.zst.sig")
570 |         {
571 |             info!("Classification::Static");
572 |             Classification::Static
573 |         } else if fname == "login"
574 |             || fname == "not.found"
575 |             || fname.ends_with(".php")
576 |             || fname.ends_with(".drpm")
577 |             || fname.ends_with(".aspx")
578 |         {
579 |             error!("🥓  Classification::Spam - {}", req_path);
580 |             Classification::Spam
581 |         } else {
582 |             error!("⚠️  Classification::Unknown - {}", req_path);
583 |             Classification::Unknown
584 |         }
585 |     }
586 | 
587 |     pub fn clear_nxcache(&self, etime: OffsetDateTime) {
588 |         warn!("NXCACHE CLEAR REQUESTED");
589 |         self.pri_cache.update_all_userdata(
590 |             |d: &Status| d.nxtime.is_some(),
591 |             |d: &mut Status| {
592 |                 if d.nxtime.is_some() {
593 |                     d.nxtime = Some(etime);
594 |                 }
595 |             },
596 |         )
597 |     }
598 | }
599 | 
600 | async fn cache_stats(pri_cache: ArcDiskCache<String, Status>) {
601 |     // let zero = CacheStats::default();
602 |     loop {
603 |         let stats = pri_cache.view_stats();
604 |         warn!("cache stats - {:?}", stats);
605 |         // stats.change_since(&zero));
606 |         if cfg!(debug_assertions) {
607 |             sleep(Duration::from_secs(5)).await;
608 |         } else {
609 |             sleep(Duration::from_secs(300)).await;
610 |         }
611 |     }
612 | }
613 | 
614 | fn cache_mgr(mut submit_rx: Receiver<CacheMeta>, pri_cache: ArcDiskCache<String, Status>) {
615 |     // Wait on the channel, and when we get something proceed from there.
616 |     while let Some(meta) = submit_rx.blocking_recv() {
617 |         info!(
618 |             "✨ Cache Manager Got -> {:?} {} {:?}",
619 |             meta.req_path, meta.etime, meta.action
620 |         );
621 | 
622 |         let CacheMeta {
623 |             req_path,
624 |             etime,
625 |             action,
626 |         } = meta;
627 | 
628 |         // Req path sometimes has dup //, so we replace them.
629 |         let req_path = req_path.replace("//", "/");
630 | 
631 |         match action {
632 |             Action::Submit { file, headers, cls } => {
633 |                 let expiry = cls.expiry(etime);
634 |                 let key = req_path.clone();
635 | 
636 |                 pri_cache.insert(
637 |                     key,
638 |                     Status {
639 |                         req_path,
640 |                         headers,
641 |                         expiry,
642 |                         cls,
643 |                         nxtime: None,
644 |                     },
645 |                     file,
646 |                 )
647 |             }
648 |             Action::Update => pri_cache.update_userdata(&req_path, |d: &mut Status| {
649 |                 d.expiry = d.cls.expiry(etime);
650 |                 if let Some(exp) = d.expiry.as_ref() {
651 |                     debug!("⏰  expiry updated to soft {} hard {}", exp.0, exp.1);
652 |                 }
653 |             }),
654 |             Action::NotFound { cls } => {
655 |                 match pri_cache.new_tempfile() {
656 |                     Some(file) => {
657 |                         let key = req_path.clone();
658 | 
659 |                         pri_cache.insert(
660 |                             key,
661 |                             Status {
662 |                                 req_path,
663 |                                 headers: BTreeMap::default(),
664 |                                 expiry: None,
665 |                                 cls,
666 |                                 nxtime: Some(etime + time::Duration::minutes(1)),
667 |                             },
668 |                             file,
669 |                         )
670 |                     }
671 |                     None => {
672 |                         error!("TEMP FILE COULD NOT BE CREATED - SKIP CACHING");
673 |                     }
674 |                 };
675 |             }
676 |         }
677 |     }
678 |     error!("CRITICAL: CACHE MANAGER STOPPED.");
679 | }
680 | 


--------------------------------------------------------------------------------
/opensuse-proxy-cache/src/constants.rs:
--------------------------------------------------------------------------------
 1 | use regex::Regex;
 2 | use std::sync::atomic::AtomicBool;
 3 | use url::Url;
 4 | 
 5 | pub static UPSTREAM_ONLINE: AtomicBool = AtomicBool::new(false);
 6 | pub const ALLOW_REDIRECTS: usize = 4;
 7 | // Should be about 16Mb worst case.
 8 | pub const CHANNEL_MAX_OUTSTANDING: usize = 2048;
 9 | 
10 | pub const BUFFER_WRITE_PAGE: usize = 8 * 1024 * 1024;
11 | // Match zypper default range reqs. Finally now 4MB!
12 | pub const BUFFER_READ_PAGE: usize = 8 * 1024 * 1024;
13 | 
14 | pub static DEBOUNCE: u64 = 5 * 60;
15 | 
16 | // If we go to https we are booted to mirrorcache. If we use http we get the content
17 | // that we want 😈
18 | // You can alternately go to downloadcontent.opensuse.org if you want from the primary mirror.
19 | // but that will likely break mirrorcache behaviour in the future.
20 | 
21 | lazy_static::lazy_static! {
22 |     pub static ref DL_OS_URL: Url =
23 |         Url::parse("https://downloadcontent2.opensuse.org").expect("Invalid base url");
24 |     pub static ref MCS_OS_URL: Url =
25 |         Url::parse("https://downloadcontent2.opensuse.org").expect("Invalid base url");
26 |     pub static ref ETAG_NGINIX_RE: Regex = {
27 |         Regex::new("(?P<mtime>[a-fA-F0-9]+)-(?P<len>[a-fA-F0-9]+)").expect("Invalid etag regex")
28 |     };
29 |     pub static ref ETAG_APACHE_RE: Regex = {
30 |         Regex::new("(?P<len>[a-fA-F0-9]+)-(?P<junk>[a-fA-F0-9]+)").expect("Invalid etag regex")
31 |     };
32 | }
33 | 


--------------------------------------------------------------------------------
/opensuse-proxy-cache/src/main.rs:
--------------------------------------------------------------------------------
   1 | #[macro_use]
   2 | extern crate tracing;
   3 | 
   4 | mod cache;
   5 | mod constants;
   6 | 
   7 | use askama::Template;
   8 | 
   9 | use std::num::NonZeroUsize;
  10 | use std::time::Instant;
  11 | use tracing::Instrument;
  12 | 
  13 | use crate::cache::*;
  14 | use arc_disk_cache::CacheObj;
  15 | 
  16 | use crate::constants::*;
  17 | 
  18 | use clap::Parser;
  19 | use lru::LruCache;
  20 | use std::path::Path;
  21 | use std::path::PathBuf;
  22 | use std::sync::atomic::Ordering;
  23 | 
  24 | use axum::{
  25 |     body::Body,
  26 |     extract,
  27 |     http::{HeaderMap, HeaderName, HeaderValue, StatusCode},
  28 |     response::{Html, IntoResponse, Response},
  29 |     routing::get,
  30 |     Router,
  31 | };
  32 | use std::net::SocketAddr;
  33 | use std::str::FromStr;
  34 | 
  35 | use tokio::sync::broadcast;
  36 | 
  37 | use tokio::sync::mpsc::{channel, Receiver, Sender};
  38 | 
  39 | use bytes::Bytes;
  40 | use futures_util::stream::Stream;
  41 | use futures_util::task::{Context, Poll};
  42 | use pin_project_lite::pin_project;
  43 | use std::convert::TryInto;
  44 | use std::io::{BufWriter, Write};
  45 | use std::pin::Pin;
  46 | use std::sync::Arc;
  47 | use tempfile::NamedTempFile;
  48 | use tokio::sync::mpsc::error::TryRecvError;
  49 | use tokio::time::{sleep, Duration};
  50 | use url::Url;
  51 | 
  52 | use tokio::fs::File;
  53 | use tokio::io::BufReader;
  54 | use tokio::io::{AsyncReadExt, AsyncSeekExt};
  55 | use tokio_stream::StreamExt;
  56 | use tokio_util::io::InspectReader;
  57 | use tokio_util::io::ReaderStream;
  58 | use tokio_util::io::StreamReader;
  59 | 
  60 | // use axum_server::accept::NoDelayAcceptor;
  61 | use axum_server::tls_rustls::RustlsConfig;
  62 | use axum_server::Handle;
  63 | 
  64 | struct AppState {
  65 |     cache: Cache,
  66 |     client: reqwest::Client,
  67 |     // oauth: Option<auth::BasicClient>,
  68 |     prefetch_tx: Sender<PrefetchReq>,
  69 |     boot_origin: Url,
  70 | }
  71 | 
  72 | impl AppState {
  73 |     pub fn new(
  74 |         capacity: usize,
  75 |         content_dir: &Path,
  76 |         clob: bool,
  77 |         wonder_guard: bool,
  78 |         durable_fs: bool,
  79 |         mirror_chain: Option<Url>,
  80 |         client: reqwest::Client,
  81 |         prefetch_tx: Sender<PrefetchReq>,
  82 |         boot_origin: Url,
  83 |     ) -> std::io::Result<Self> {
  84 |         let cache = Cache::new(
  85 |             capacity,
  86 |             content_dir,
  87 |             clob,
  88 |             wonder_guard,
  89 |             durable_fs,
  90 |             mirror_chain,
  91 |         )?;
  92 | 
  93 |         Ok(AppState {
  94 |             cache,
  95 |             client,
  96 |             prefetch_tx,
  97 |             boot_origin,
  98 |         })
  99 |     }
 100 | }
 101 | 
 102 | #[instrument(skip_all)]
 103 | async fn head_view(
 104 |     headers: HeaderMap,
 105 |     extract::State(state): extract::State<Arc<AppState>>,
 106 |     extract::OriginalUri(req_uri): extract::OriginalUri,
 107 | ) -> Response {
 108 |     let req_path = req_uri.path();
 109 |     let req_path = format!("/{}", req_path.replace("//", "/"));
 110 |     trace!("{:?}", req_path);
 111 |     info!("request_headers -> {:?}", headers);
 112 |     let decision = state.cache.decision(&req_path, true);
 113 |     // Based on the decision, take an appropriate path. Generally with head reqs
 114 |     // we try to stream this if we don't have it, and we prefetch in the BG.
 115 |     match decision {
 116 |         CacheDecision::Stream(url) => stream(state, url, true, None).await,
 117 |         CacheDecision::NotFound => missing().await,
 118 |         CacheDecision::FoundObj(meta) => found(meta, true, None).await,
 119 |         CacheDecision::Refresh(url, _, submit_tx, _, prefetch_paths)
 120 |         | CacheDecision::MissObj(url, _, submit_tx, _, prefetch_paths) => {
 121 |             // Submit all our BG prefetch reqs
 122 |             prefetch(state.prefetch_tx.clone(), &url, &submit_tx, prefetch_paths);
 123 |             // Now we just stream.
 124 |             stream(state, url, true, None).await
 125 |         }
 126 |         CacheDecision::AsyncRefresh(url, file, submit_tx, meta, prefetch_paths) => {
 127 |             // Submit all our BG prefetch reqs
 128 |             async_refresh(
 129 |                 state.client.clone(),
 130 |                 state.prefetch_tx.clone(),
 131 |                 &url,
 132 |                 &submit_tx,
 133 |                 file,
 134 |                 &meta,
 135 |                 prefetch_paths,
 136 |             );
 137 |             // Send our current head data.
 138 |             found(meta, true, None).await
 139 |         }
 140 |         CacheDecision::Invalid => {
 141 |             (StatusCode::INTERNAL_SERVER_ERROR, "Invalid Request").into_response()
 142 |         }
 143 |     }
 144 | }
 145 | 
 146 | // https://github.com/tokio-rs/axum/discussions/608
 147 | 
 148 | #[instrument(skip_all)]
 149 | async fn get_view(
 150 |     headers: HeaderMap,
 151 |     extract::State(state): extract::State<Arc<AppState>>,
 152 |     extract::OriginalUri(req_uri): extract::OriginalUri,
 153 | ) -> Response {
 154 |     let req_path = req_uri.path();
 155 |     let req_path = format!("/{}", req_path.replace("//", "/"));
 156 |     trace!("{:?}", req_path);
 157 |     info!("request_headers -> {:?}", headers);
 158 |     let decision = state.cache.decision(&req_path, false);
 159 |     // Req path sometimes has dup //, so we replace them.
 160 | 
 161 |     // We have a hit, with our cache meta! Hooray!
 162 |     // Let's setup the response, and then stream from the file!
 163 |     let range = headers
 164 |         .get("range")
 165 |         .and_then(|hv| hv.to_str().ok())
 166 |         .and_then(|sr| {
 167 |             if sr.starts_with("bytes=") {
 168 |                 sr.strip_prefix("bytes=")
 169 |                     .and_then(|v| v.split_once('-'))
 170 |                     .and_then(|(range_start, range_end)| {
 171 |                         let r_end = u64::from_str_radix(range_end, 10).ok();
 172 |                         u64::from_str_radix(range_start, 10)
 173 |                             .ok()
 174 |                             .map(|s| (s, r_end))
 175 |                     })
 176 |             } else {
 177 |                 None
 178 |             }
 179 |         });
 180 | 
 181 |     // Based on the decision, take an appropriate path.
 182 |     match decision {
 183 |         CacheDecision::Stream(url) => stream(state, url, false, range).await,
 184 |         CacheDecision::NotFound => missing().await,
 185 |         CacheDecision::FoundObj(meta) => found(meta, false, range).await,
 186 |         CacheDecision::MissObj(url, file, submit_tx, cls, prefetch_paths) => {
 187 |             // Submit all our BG prefetch reqs
 188 |             prefetch(state.prefetch_tx.clone(), &url, &submit_tx, prefetch_paths);
 189 | 
 190 |             miss(state, url, req_path, file, submit_tx, cls, range).await
 191 |         }
 192 |         CacheDecision::Refresh(url, file, submit_tx, meta, prefetch_paths) => {
 193 |             // Do a head req - on any error, stream what we have if possible.
 194 |             // if head etag OR last update match, serve what we have.
 195 |             // else follow the miss path.
 196 |             debug!("prefetch {:?}", prefetch_paths);
 197 |             if refresh(&state.client, url.clone(), &meta).await {
 198 |                 info!("👉  refresh required");
 199 |                 // Submit all our BG prefetch reqs
 200 |                 prefetch(state.prefetch_tx.clone(), &url, &submit_tx, prefetch_paths);
 201 | 
 202 |                 miss(
 203 |                     state,
 204 |                     url,
 205 |                     req_path,
 206 |                     file,
 207 |                     submit_tx,
 208 |                     meta.userdata.cls,
 209 |                     range,
 210 |                 )
 211 |                 .await
 212 |             } else {
 213 |                 info!("👉  cache valid");
 214 |                 let etime = time::OffsetDateTime::now_utc();
 215 |                 // If we can't submit, we are probably shutting down so just finish up cleanly.
 216 |                 // That's why we ignore these errors.
 217 |                 //
 218 |                 // If this item is valid we can update all the related prefetch items.
 219 |                 let _ = submit_tx
 220 |                     .send(CacheMeta {
 221 |                         req_path,
 222 |                         etime,
 223 |                         action: Action::Update,
 224 |                     })
 225 |                     .await;
 226 |                 if let Some(pre) = prefetch_paths {
 227 |                     for p in pre.into_iter() {
 228 |                         let _ = submit_tx
 229 |                             .send(CacheMeta {
 230 |                                 req_path: p.0,
 231 |                                 etime,
 232 |                                 action: Action::Update,
 233 |                             })
 234 |                             .await;
 235 |                     }
 236 |                 }
 237 |                 found(meta, false, range).await
 238 |             }
 239 |         }
 240 |         CacheDecision::AsyncRefresh(url, file, submit_tx, meta, prefetch_paths) => {
 241 |             // Submit all our BG prefetch reqs
 242 |             async_refresh(
 243 |                 state.client.clone(),
 244 |                 state.prefetch_tx.clone(),
 245 |                 &url,
 246 |                 &submit_tx,
 247 |                 file,
 248 |                 &meta,
 249 |                 prefetch_paths,
 250 |             );
 251 |             // Send our current cached data.
 252 |             found(meta, false, range).await
 253 |         }
 254 |         CacheDecision::Invalid => {
 255 |             (StatusCode::INTERNAL_SERVER_ERROR, "Invalid Request").into_response()
 256 |         }
 257 |     }
 258 | }
 259 | 
 260 | #[instrument(skip_all)]
 261 | fn async_refresh(
 262 |     client: reqwest::Client,
 263 |     prefetch_tx: Sender<PrefetchReq>,
 264 |     url: &Url,
 265 |     submit_tx: &Sender<CacheMeta>,
 266 |     file: NamedTempFile,
 267 |     obj: &CacheObj<String, Status>,
 268 |     prefetch_paths: Option<Vec<(String, NamedTempFile, Classification)>>,
 269 | ) {
 270 |     let u = url.clone();
 271 |     let tx = submit_tx.clone();
 272 |     let obj = obj.clone();
 273 |     tokio::spawn(async move {
 274 |         async_refresh_task(client, prefetch_tx, u, tx, file, obj, prefetch_paths).await
 275 |     });
 276 | }
 277 | 
 278 | #[instrument(skip_all)]
 279 | async fn async_refresh_task(
 280 |     client: reqwest::Client,
 281 |     prefetch_tx: Sender<PrefetchReq>,
 282 |     url: Url,
 283 |     submit_tx: Sender<CacheMeta>,
 284 |     file: NamedTempFile,
 285 |     obj: CacheObj<String, Status>,
 286 |     prefetch_paths: Option<Vec<(String, NamedTempFile, Classification)>>,
 287 | ) {
 288 |     info!("🥺  start async refresh {}", obj.userdata.req_path);
 289 | 
 290 |     if !refresh(&client, url.clone(), &obj).await {
 291 |         info!(
 292 |             "🥰  async prefetch, content still valid {}",
 293 |             obj.userdata.req_path
 294 |         );
 295 |         let etime = time::OffsetDateTime::now_utc();
 296 |         // If we can't submit, we are probably shutting down so just finish up cleanly.
 297 |         // That's why we ignore these errors.
 298 |         //
 299 |         // If this item is valid we can update all the related prefetch items.
 300 |         let _ = submit_tx
 301 |             .send(CacheMeta {
 302 |                 req_path: obj.userdata.req_path.clone(),
 303 |                 etime,
 304 |                 action: Action::Update,
 305 |             })
 306 |             .await;
 307 |         return;
 308 |     }
 309 | 
 310 |     info!(
 311 |         "😵  async refresh, need to refresh {}",
 312 |         obj.userdata.req_path
 313 |     );
 314 | 
 315 |     prefetch(prefetch_tx.clone(), &url, &submit_tx, prefetch_paths);
 316 | 
 317 |     // Fetch our actual file too
 318 | 
 319 |     if let Err(_) = prefetch_tx
 320 |         .send(PrefetchReq {
 321 |             req_path: obj.userdata.req_path.clone(),
 322 |             url,
 323 |             submit_tx: submit_tx,
 324 |             file,
 325 |             cls: obj.userdata.cls.clone(),
 326 |         })
 327 |         .await
 328 |     {
 329 |         error!("Prefetch task may have died!");
 330 |     } else {
 331 |         debug!("Prefetch submitted");
 332 |     }
 333 | }
 334 | 
 335 | fn send_headers(range: Option<(u64, Option<u64>)>) -> HeaderMap {
 336 |     let mut h = HeaderMap::new();
 337 |     h.append("user-agent", "opensuse-proxy-cache".try_into().unwrap());
 338 |     // h.append("x-ospc-uuid", tracing_forest::id().to_string());
 339 |     h.append(
 340 |         "x-zypp-anonymousid",
 341 |         "dd27909d-1c87-4640-b006-ef604d302f92".try_into().unwrap(),
 342 |     );
 343 | 
 344 |     if let Some((lower, maybe_upper)) = range {
 345 |         if let Some(upper) = maybe_upper {
 346 |             h.append(
 347 |                 "range",
 348 |                 format!("bytes={}-{}", lower, upper).try_into().unwrap(),
 349 |             );
 350 |         } else {
 351 |             h.append("range", format!("bytes={}-", lower).try_into().unwrap());
 352 |         }
 353 |     };
 354 | 
 355 |     h
 356 | }
 357 | 
 358 | fn filter_headers(headers: &HeaderMap, metadata: bool) -> HeaderMap {
 359 |     debug!(?headers);
 360 | 
 361 |     headers
 362 |         .iter()
 363 |         .filter_map(|(hv, hk)| {
 364 |             let hvs = hv.as_str();
 365 |             if hvs == "etag"
 366 |             || hvs == "accept-ranges"
 367 |             || hvs == "content-type"
 368 |             || hvs == "content-range"
 369 |             || hvs == "last-modified"
 370 |             || hvs == "expires"
 371 |             || hvs == "cache-control"
 372 |             // If it's metadata then nix the content-length else curl has a sad.
 373 |             || (hvs == "content-length" && !metadata)
 374 |             {
 375 |                 Some((hv.clone(), hk.clone()))
 376 |             } else {
 377 |                 debug!("discarding -> {}: {:?}", hvs, hk);
 378 |                 None
 379 |             }
 380 |         })
 381 |         .collect()
 382 | }
 383 | 
 384 | #[instrument(skip_all)]
 385 | async fn stream(
 386 |     state: Arc<AppState>,
 387 |     url: Url,
 388 |     metadata: bool,
 389 |     range: Option<(u64, Option<u64>)>,
 390 | ) -> Response {
 391 |     let send_headers = send_headers(range);
 392 | 
 393 |     let client_response = if metadata {
 394 |         info!("🍍  start stream -> HEAD {}", url.as_str());
 395 |         state.client.head(url).headers(send_headers).send().await
 396 |     } else {
 397 |         info!("🍍  start stream -> GET {}", url.as_str());
 398 |         state.client.get(url).headers(send_headers).send().await
 399 |     };
 400 | 
 401 |     // Handle this error properly. Shortcut return a 500?
 402 |     let client_response = match client_response {
 403 |         Ok(cr) => cr,
 404 |         Err(e) => {
 405 |             error!(?e, "Error handling client response");
 406 |             return (StatusCode::INTERNAL_SERVER_ERROR).into_response();
 407 |         }
 408 |     };
 409 | 
 410 |     let headers = filter_headers(client_response.headers(), metadata);
 411 |     // Filter the headers
 412 |     let status = client_response.status();
 413 | 
 414 |     if metadata {
 415 |         (status, headers).into_response()
 416 |     } else {
 417 |         let stream = client_response.bytes_stream();
 418 |         let body = Body::from_stream(stream);
 419 |         (status, headers, body).into_response()
 420 |     }
 421 | }
 422 | 
 423 | #[instrument(skip_all)]
 424 | async fn miss(
 425 |     state: Arc<AppState>,
 426 |     url: Url,
 427 |     req_path: String,
 428 |     file: NamedTempFile,
 429 |     submit_tx: Sender<CacheMeta>,
 430 |     cls: Classification,
 431 |     range: Option<(u64, Option<u64>)>,
 432 | ) -> Response {
 433 |     info!("❄️   start miss ");
 434 |     debug!("range -> {:?}", range);
 435 | 
 436 |     if range.is_some() {
 437 |         info!("Range request, submitting bg dl with rangestream");
 438 | 
 439 |         if let Err(_) = state
 440 |             .prefetch_tx
 441 |             .send(PrefetchReq {
 442 |                 req_path,
 443 |                 url: url.clone(),
 444 |                 submit_tx,
 445 |                 file,
 446 |                 cls,
 447 |             })
 448 |             .await
 449 |         {
 450 |             error!("Prefetch task may have died!");
 451 |         }
 452 | 
 453 |         // Stream. metadata=false because we want the body.
 454 |         return stream(state, url, false, range).await;
 455 |     }
 456 | 
 457 |     // Not a range, go on.
 458 |     info!("Not a range request, as you were.");
 459 | 
 460 |     // Start the dl.
 461 |     let send_headers = send_headers(None);
 462 |     let client_response = state.client.get(url).headers(send_headers).send().await;
 463 | 
 464 |     let client_response = match client_response {
 465 |         Ok(cr) => cr,
 466 |         Err(e) => {
 467 |             error!(?e, "Error handling client response");
 468 |             return (StatusCode::INTERNAL_SERVER_ERROR).into_response();
 469 |         }
 470 |     };
 471 | 
 472 |     let headers = filter_headers(client_response.headers(), false);
 473 |     // Filter the headers
 474 |     let status = client_response.status();
 475 | 
 476 |     if status == StatusCode::OK || status == StatusCode::FORBIDDEN {
 477 |         let (io_tx, io_rx) = channel(CHANNEL_MAX_OUTSTANDING);
 478 | 
 479 |         let headers_clone = headers.clone();
 480 |         let _ = tokio::task::spawn_blocking(move || {
 481 |             write_file(io_rx, req_path, headers_clone, file, submit_tx, cls)
 482 |         });
 483 | 
 484 |         let stream = CacheDownloader::new(client_response.bytes_stream(), io_tx);
 485 |         let body = Body::from_stream(stream);
 486 |         (status, headers, body).into_response()
 487 |     } else if status == StatusCode::NOT_FOUND {
 488 |         info!("👻  rewrite -> NotFound");
 489 |         let etime = time::OffsetDateTime::now_utc();
 490 |         let _ = submit_tx
 491 |             .send(CacheMeta {
 492 |                 req_path,
 493 |                 etime,
 494 |                 action: Action::NotFound { cls },
 495 |             })
 496 |             .await;
 497 | 
 498 |         // Send back the 404
 499 |         missing().await
 500 |     } else {
 501 |         error!(
 502 |             "Response returned {:?}, aborting miss to stream -> {}",
 503 |             status, req_path
 504 |         );
 505 |         let stream = client_response.bytes_stream();
 506 |         let body = Body::from_stream(stream);
 507 |         (status, headers, body).into_response()
 508 |     }
 509 | }
 510 | 
 511 | pin_project! {
 512 |     struct CacheDownloader<T>
 513 |         where T: Stream<Item = Result<Bytes, reqwest::Error>>
 514 |     {
 515 |         #[pin]
 516 |         dlos_reader: T,
 517 |         #[pin]
 518 |         io_send: bool,
 519 |         #[pin]
 520 |         io_tx: Sender<Bytes>,
 521 |     }
 522 | }
 523 | 
 524 | impl<T> CacheDownloader<T>
 525 | where
 526 |     T: Stream<Item = Result<Bytes, reqwest::Error>>,
 527 | {
 528 |     pub fn new(dlos_reader: T, io_tx: Sender<Bytes>) -> Self {
 529 |         CacheDownloader {
 530 |             dlos_reader,
 531 |             io_send: true,
 532 |             io_tx,
 533 |         }
 534 |     }
 535 | }
 536 | 
 537 | impl<T> Stream for CacheDownloader<T>
 538 | where
 539 |     T: Stream<Item = Result<Bytes, reqwest::Error>>,
 540 | {
 541 |     type Item = Result<Bytes, reqwest::Error>;
 542 | 
 543 |     // Required method
 544 |     fn poll_next(self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
 545 |         let mut this = self.project();
 546 |         match this.dlos_reader.poll_next(ctx) {
 547 |             Poll::Ready(Some(Ok(buf))) => {
 548 |                 // We don't care if this errors - it won't be written to the cache so we'll
 549 |                 // try again and correct it later
 550 |                 if *this.io_send {
 551 |                     // Write the content of the buffer here into the channel.
 552 |                     let bytes = buf.clone();
 553 | 
 554 |                     if let Err(_e) = this.io_tx.try_send(bytes) {
 555 |                         error!("🚨  poll_read io_tx blocking_send error.");
 556 |                         error!(
 557 |                             "🚨  io_rx has likely died or is backlogged. continuing to stream ..."
 558 |                         );
 559 |                         *this.io_send = false;
 560 |                     }
 561 |                 }
 562 | 
 563 |                 Poll::Ready(Some(Ok(buf)))
 564 |             }
 565 |             Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
 566 |             Poll::Ready(None) => Poll::Ready(None),
 567 |             Poll::Pending => Poll::Pending,
 568 |         }
 569 |     }
 570 | 
 571 |     fn size_hint(&self) -> (usize, Option<usize>) {
 572 |         self.dlos_reader.size_hint()
 573 |     }
 574 | }
 575 | 
 576 | #[instrument(skip_all)]
 577 | fn write_file(
 578 |     mut io_rx: Receiver<Bytes>,
 579 |     req_path: String,
 580 |     mut headers: HeaderMap,
 581 |     file: NamedTempFile,
 582 |     submit_tx: Sender<CacheMeta>,
 583 |     cls: Classification,
 584 | ) {
 585 |     let mut amt = 0;
 586 | 
 587 |     let cnt_amt = headers
 588 |         .remove("content-length")
 589 |         .and_then(|hk| hk.to_str().ok().and_then(|i| usize::from_str(i).ok()))
 590 |         .unwrap_or(0);
 591 | 
 592 |     let etag_nginix_len = headers
 593 |         .get("etag")
 594 |         .and_then(|hk| {
 595 |             hk.to_str().ok().and_then(|t| {
 596 |                 ETAG_NGINIX_RE.captures(t).and_then(|caps| {
 597 |                     let etcap = caps.name("len");
 598 |                     etcap.map(|s| s.as_str()).and_then(|len_str| {
 599 |                         let r = usize::from_str_radix(len_str, 16).ok();
 600 |                         r
 601 |                     })
 602 |                 })
 603 |             })
 604 |         })
 605 |         .unwrap_or(0);
 606 | 
 607 |     let etag_apache_len = headers
 608 |         .get("etag")
 609 |         .and_then(|hk| {
 610 |             hk.to_str().ok().and_then(|t| {
 611 |                 ETAG_APACHE_RE.captures(t).and_then(|caps| {
 612 |                     let etcap = caps.name("len");
 613 |                     etcap.map(|s| s.as_str()).and_then(|len_str| {
 614 |                         let r = usize::from_str_radix(len_str, 16).ok();
 615 |                         r
 616 |                     })
 617 |                 })
 618 |             })
 619 |         })
 620 |         .unwrap_or(0);
 621 | 
 622 |     // At least *one* etag length has to make sense ...
 623 |     // Does this length make sense? Can we get an etag length?
 624 | 
 625 |     if cnt_amt != 0
 626 |         && ((etag_nginix_len != 0 && cnt_amt != etag_nginix_len)
 627 |             && (etag_apache_len != 0 && cnt_amt != etag_apache_len))
 628 |     {
 629 |         error!(
 630 |             "content-length and etag do not agree - {} != a {} && n {}",
 631 |             cnt_amt, etag_apache_len, etag_nginix_len
 632 |         );
 633 |         return;
 634 |     } else {
 635 |         info!(
 636 |             "content-length and etag agree - {} == a {} || n {}",
 637 |             cnt_amt, etag_apache_len, etag_nginix_len
 638 |         );
 639 |     };
 640 | 
 641 |     let mut buf_file = BufWriter::with_capacity(BUFFER_WRITE_PAGE, file);
 642 |     let mut count = 0;
 643 | 
 644 |     loop {
 645 |         match io_rx.try_recv() {
 646 |             Ok(bytes) => {
 647 |                 // Path?
 648 |                 if let Err(e) = buf_file.write(&bytes) {
 649 |                     error!("Error writing to tempfile -> {:?}", e);
 650 |                     return;
 651 |                 }
 652 |                 amt += bytes.len();
 653 |                 if bytes.len() > 0 {
 654 |                     // We actually progressed.
 655 |                     if count >= 10 {
 656 |                         warn!("Download has become unstuck.");
 657 |                         eprintln!("Download has become unstuck.");
 658 |                     }
 659 |                     count = 0;
 660 |                 }
 661 |             }
 662 |             Err(TryRecvError::Empty) => {
 663 |                 // pending
 664 |                 std::thread::sleep(std::time::Duration::from_millis(100));
 665 |                 count += 1;
 666 |                 if count >= 200 {
 667 |                     eprintln!("No activity in {}ms seconds, cancelling task.", count * 100);
 668 |                     error!("No activity in {}ms seconds, cancelling task.", count * 100);
 669 |                     return;
 670 |                 } else if count == 10 {
 671 |                     warn!("Download may be stuck!!!");
 672 |                     eprintln!("Download may be stuck!!!");
 673 |                 }
 674 |             }
 675 |             Err(TryRecvError::Disconnected) => {
 676 |                 debug!("Channel closed, download may be complete.");
 677 |                 break;
 678 |             }
 679 |         }
 680 |     }
 681 | 
 682 |     // Check the content len is ok.
 683 |     // We have to check that amt >= cnt_amt (aka cnt_amt < amt)
 684 |     if amt == 0 || (cnt_amt != 0 && cnt_amt > amt) {
 685 |         warn!(
 686 |             "transfer interupted, ending - received: {} expect: {}",
 687 |             amt, cnt_amt
 688 |         );
 689 |         return;
 690 |     }
 691 | 
 692 |     info!("final sizes - amt {} cnt_amt {}", amt, cnt_amt);
 693 | 
 694 |     if cnt_amt != 0 {
 695 |         // Header map overwrites content-length on insert.
 696 |         headers.insert("content-length", amt.into());
 697 |     }
 698 | 
 699 |     let file = match buf_file.into_inner() {
 700 |         Ok(f) => f,
 701 |         Err(e) => {
 702 |             error!("error processing -> {}, {} -> {:?}", req_path, amt, e);
 703 |             return;
 704 |         }
 705 |     };
 706 | 
 707 |     // event time
 708 | 
 709 |     let etime = time::OffsetDateTime::now_utc();
 710 | 
 711 |     // Don't touch etag! We need it to check if upstreams content is still
 712 |     // valid!
 713 | 
 714 |     let headers = headers
 715 |         .into_iter()
 716 |         .filter_map(|(k, v)| {
 717 |             if let Some(k) = k.map(|ik| ik.as_str().to_string()) {
 718 |                 v.to_str().ok().map(|iv| (k, iv.to_string()))
 719 |             } else {
 720 |                 None
 721 |             }
 722 |         })
 723 |         .collect();
 724 | 
 725 |     // TODO HERE
 726 |     // Now if the FILE is a repomd xml we need to parse it and indicate prefetch on
 727 |     // the sha-sum locations of the actual repodata.
 728 | 
 729 |     let meta = CacheMeta {
 730 |         req_path,
 731 |         etime,
 732 |         action: Action::Submit { file, headers, cls },
 733 |     };
 734 |     // Send the file + metadata to the main cache.
 735 |     if let Err(e) = submit_tx.try_send(meta) {
 736 |         error!("failed to submit to cache channel -> {:?}", e);
 737 |     }
 738 | }
 739 | 
 740 | #[instrument(skip_all)]
 741 | fn prefetch(
 742 |     prefetch_tx: Sender<PrefetchReq>,
 743 |     url: &Url,
 744 |     submit_tx: &Sender<CacheMeta>,
 745 |     prefetch_paths: Option<Vec<(String, NamedTempFile, Classification)>>,
 746 | ) {
 747 |     if let Some(prefetch) = prefetch_paths {
 748 |         for (path, file, cls) in prefetch.into_iter() {
 749 |             if let Err(_) = prefetch_tx.try_send(PrefetchReq {
 750 |                 req_path: path,
 751 |                 url: url.clone(),
 752 |                 submit_tx: submit_tx.clone(),
 753 |                 file,
 754 |                 cls,
 755 |             }) {
 756 |                 error!("Prefetch task may have died!");
 757 |             }
 758 |         }
 759 |     }
 760 | }
 761 | 
 762 | #[instrument(skip_all)]
 763 | async fn prefetch_dl_task(
 764 |     client: reqwest::Client,
 765 |     mut url: Url,
 766 |     submit_tx: Sender<CacheMeta>,
 767 |     req_path: String,
 768 |     file: NamedTempFile,
 769 |     cls: Classification,
 770 | ) {
 771 |     info!("🚅  start prefetch {}", req_path);
 772 | 
 773 |     let send_headers = send_headers(None);
 774 |     // Add the path to our base mirror url.
 775 |     url.set_path(&req_path);
 776 | 
 777 |     let client_response = client.get(url).headers(send_headers).send().await;
 778 | 
 779 |     let client_response = match client_response {
 780 |         Ok(cr) => cr,
 781 |         Err(e) => {
 782 |             error!(?e, "Error handling client response");
 783 |             return;
 784 |         }
 785 |     };
 786 | 
 787 |     let status = client_response.status();
 788 |     if status == StatusCode::NOT_FOUND {
 789 |         info!("👻  prefetch rewrite -> NotFound");
 790 |         let etime = time::OffsetDateTime::now_utc();
 791 |         let _ = submit_tx
 792 |             .send(CacheMeta {
 793 |                 req_path,
 794 |                 etime,
 795 |                 action: Action::NotFound { cls },
 796 |             })
 797 |             .await;
 798 |         return;
 799 |     } else if status != StatusCode::OK {
 800 |         error!("Response returned {:?}, aborting prefetch", status);
 801 |         return;
 802 |     }
 803 | 
 804 |     let headers = filter_headers(client_response.headers(), false);
 805 | 
 806 |     let (io_tx, io_rx) = channel(CHANNEL_MAX_OUTSTANDING);
 807 |     let _ = tokio::task::spawn_blocking(move || {
 808 |         write_file(io_rx, req_path, headers, file, submit_tx, cls)
 809 |     });
 810 | 
 811 |     let mut byte_reader = InspectReader::new(
 812 |         StreamReader::new(
 813 |             client_response
 814 |                 .bytes_stream()
 815 |                 .map(|item| item.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))),
 816 |         ),
 817 |         move |bytes| {
 818 |             let b: Bytes = bytes.to_vec().into();
 819 |             let _ = io_tx.try_send(b);
 820 |         },
 821 |     );
 822 | 
 823 |     let mut sink = tokio::io::sink();
 824 | 
 825 |     if let Err(e) = tokio::io::copy(&mut byte_reader, &mut sink).await {
 826 |         error!("prefetch tokio::io::copy error -> {:?}", e);
 827 |     }
 828 |     // That's it!
 829 | }
 830 | 
 831 | #[instrument(skip_all)]
 832 | async fn found(
 833 |     obj: CacheObj<String, Status>,
 834 |     metadata: bool,
 835 |     range: Option<(u64, Option<u64>)>,
 836 | ) -> Response {
 837 |     info!(
 838 |         "🔥  start found -> {:?} : range: {:?}",
 839 |         obj.fhandle.path, range
 840 |     );
 841 | 
 842 |     let amt: u64 = obj.fhandle.amt as u64;
 843 | 
 844 |     // rebuild headers.
 845 |     let mut headers = HeaderMap::new();
 846 | 
 847 |     obj.userdata.headers.iter().for_each(|(k, v)| {
 848 |         headers.insert(
 849 |             HeaderName::from_str(k.as_str()).unwrap(),
 850 |             HeaderValue::from_str(v.as_str()).unwrap(),
 851 |         );
 852 |     });
 853 | 
 854 |     let mut headers = filter_headers(&headers, metadata);
 855 | 
 856 |     if metadata {
 857 |         return (StatusCode::OK, headers).into_response();
 858 |     }
 859 | 
 860 |     // Not a head req - send the file!
 861 |     let mut n_file = match File::open(&obj.fhandle.path).await {
 862 |         Ok(f) => f,
 863 |         Err(e) => {
 864 |             error!("{:?}", e);
 865 |             return StatusCode::INTERNAL_SERVER_ERROR.into_response();
 866 |         }
 867 |     };
 868 | 
 869 |     let (start, end) = match range {
 870 |         Some((start, None)) => {
 871 |             // If some clients already have the whole file, they'll send the byte range like this, so we
 872 |             // just ignore it and send the file again.
 873 |             if start == amt {
 874 |                 (0, amt)
 875 |             } else {
 876 |                 (start, amt)
 877 |             }
 878 |         }
 879 |         Some((start, Some(end))) => (start, end + 1),
 880 |         None => (0, amt),
 881 |     };
 882 | 
 883 |     // Sanity check!
 884 |     if end <= start || end > amt {
 885 |         error!("Range failed {} <= {} || {} > {}", end, start, end, amt);
 886 |         return StatusCode::RANGE_NOT_SATISFIABLE.into_response();
 887 |     }
 888 | 
 889 |     if start != 0 {
 890 |         if let Err(e) = n_file.seek(std::io::SeekFrom::Start(start)).await {
 891 |             error!("Range start not satisfiable -> {:?}", e);
 892 |             return StatusCode::RANGE_NOT_SATISFIABLE.into_response();
 893 |         }
 894 |     }
 895 | 
 896 |     // 0 - 1024, we want 1024 - 0 = 1024
 897 |     // 1024 - 2048, we want 2048 - 1024 = 1024
 898 |     let limit_bytes = end - start;
 899 | 
 900 |     // UPDATE HEADER WITH LIMIT_BYTES AS LEN
 901 |     headers.insert(
 902 |         "content-length",
 903 |         HeaderValue::from_str(format!("{}", limit_bytes).as_str()).unwrap(),
 904 |     );
 905 | 
 906 |     let limit_file = n_file.take(limit_bytes);
 907 | 
 908 |     /*
 909 |     let stream = Body::from_stream(ReaderStream::new(BufReader::with_capacity(
 910 |         BUFFER_READ_PAGE,
 911 |         limit_file,
 912 |     )));
 913 |     */
 914 | 
 915 |     let stream = Body::from_stream(ReaderStream::with_capacity(limit_file, BUFFER_READ_PAGE));
 916 | 
 917 |     if start == 0 && end == amt {
 918 |         assert!(limit_bytes == amt);
 919 |         (StatusCode::OK, headers, stream).into_response()
 920 |     } else {
 921 |         headers.insert(
 922 |             "content-range",
 923 |             HeaderValue::from_str(format!("bytes {}-{}/{}", start, end - 1, amt).as_str()).unwrap(),
 924 |         );
 925 | 
 926 |         (StatusCode::PARTIAL_CONTENT, headers, stream).into_response()
 927 |     }
 928 | }
 929 | 
 930 | #[instrument(skip_all)]
 931 | async fn refresh(client: &reqwest::Client, url: Url, obj: &CacheObj<String, Status>) -> bool {
 932 |     info!("💸  start refresh ");
 933 |     // If we don't have an etag and/or last mod, treat as miss.
 934 |     // If we don't have a content-len we may have corrupt content,
 935 |     // so force the refresh.
 936 | 
 937 |     // First do a head request.
 938 |     let send_headers = send_headers(None);
 939 |     let client_response = client.head(url).headers(send_headers).send().await;
 940 | 
 941 |     let client_response = match client_response {
 942 |         Ok(cr) => cr,
 943 |         Err(e) => {
 944 |             error!(?e, "Error handling client response");
 945 |             // For now assume we can't proceed anyway
 946 |             return false;
 947 |         }
 948 |     };
 949 | 
 950 |     let etag: Option<&str> = client_response
 951 |         .headers()
 952 |         .get("etag")
 953 |         .and_then(|hv| hv.to_str().ok());
 954 |     let x_etag = obj.userdata.headers.get("etag");
 955 | 
 956 |     debug!("etag -> {:?} == {:?}", etag, x_etag);
 957 |     if etag.is_some() && etag == x_etag.map(|s| s.as_str()) {
 958 |         // No need to refresh, continue.
 959 |         info!("💸  refresh not required");
 960 |         false
 961 |     } else {
 962 |         // No etag present from head request. Assume we need to refresh.
 963 |         info!("💸  refresh is required");
 964 |         true
 965 |     }
 966 | }
 967 | 
 968 | #[instrument(skip_all)]
 969 | async fn missing() -> Response {
 970 |     info!("👻  start force missing");
 971 | 
 972 |     StatusCode::NOT_FOUND.into_response()
 973 | }
 974 | 
 975 | async fn monitor_upstream(
 976 |     client: reqwest::Client,
 977 |     mirror_chain: Option<Url>,
 978 |     mut rx: broadcast::Receiver<bool>,
 979 | ) {
 980 |     info!(immediate = true, "Spawning upstream monitor task ...");
 981 | 
 982 |     loop {
 983 |         match rx.try_recv() {
 984 |             Err(broadcast::error::TryRecvError::Empty) => {
 985 |                 async {
 986 |                     let r = if let Some(mc_url) = mirror_chain.as_ref() {
 987 |                         info!("upstream checking -> {}", mc_url.as_str());
 988 |                         client
 989 |                             .head(mc_url.as_str())
 990 |                             .timeout(std::time::Duration::from_secs(8))
 991 |                             .send()
 992 |                             .await
 993 |                             .map(|resp| {
 994 |                                 info!("upstream check {} -> {:?}", mc_url.as_str(), resp.status());
 995 |                                 resp.status() == StatusCode::OK
 996 |                                     || resp.status() == StatusCode::FORBIDDEN
 997 |                             })
 998 |                             .unwrap_or_else(|resp| {
 999 |                                 info!(?resp);
1000 |                                 info!(
1001 |                                     "upstream err check {} -> {:?}",
1002 |                                     mc_url.as_str(),
1003 |                                     resp.status()
1004 |                                 );
1005 |                                 resp.status() == Some(StatusCode::OK)
1006 |                                     || resp.status() == Some(StatusCode::FORBIDDEN)
1007 |                             })
1008 |                     } else {
1009 |                         info!("upstream checking -> {:?}", DL_OS_URL.as_str());
1010 |                         info!("upstream checking -> {:?}", MCS_OS_URL.as_str());
1011 |                         client
1012 |                             .head(DL_OS_URL.as_str())
1013 |                             .timeout(std::time::Duration::from_secs(8))
1014 |                             .send()
1015 |                             .await
1016 |                             .map(|resp| {
1017 |                                 info!(
1018 |                                     "upstream check {} -> {:?}",
1019 |                                     DL_OS_URL.as_str(),
1020 |                                     resp.status()
1021 |                                 );
1022 |                                 resp.status() == StatusCode::OK
1023 |                                     || resp.status() == StatusCode::FORBIDDEN
1024 |                             })
1025 |                             .unwrap_or_else(|resp| {
1026 |                                 info!(
1027 |                                     "upstream err check {} -> {:?}",
1028 |                                     DL_OS_URL.as_str(),
1029 |                                     resp.status()
1030 |                                 );
1031 |                                 resp.status() == Some(StatusCode::OK)
1032 |                                     || resp.status() == Some(StatusCode::FORBIDDEN)
1033 |                             })
1034 |                             && client
1035 |                                 .head(MCS_OS_URL.as_str())
1036 |                                 .timeout(std::time::Duration::from_secs(8))
1037 |                                 .send()
1038 |                                 .await
1039 |                                 .map(|resp| {
1040 |                                     info!(
1041 |                                         "upstream check {} -> {:?}",
1042 |                                         MCS_OS_URL.as_str(),
1043 |                                         resp.status()
1044 |                                     );
1045 |                                     resp.status() == StatusCode::OK
1046 |                                         || resp.status() == StatusCode::FORBIDDEN
1047 |                                 })
1048 |                                 .unwrap_or_else(|resp| {
1049 |                                     info!(
1050 |                                         "upstream err check {} -> {:?}",
1051 |                                         MCS_OS_URL.as_str(),
1052 |                                         resp.status()
1053 |                                     );
1054 |                                     resp.status() == Some(StatusCode::OK)
1055 |                                         || resp.status() == Some(StatusCode::FORBIDDEN)
1056 |                                 })
1057 |                     };
1058 |                     UPSTREAM_ONLINE.store(r, Ordering::Relaxed);
1059 |                     warn!("upstream online -> {}", r);
1060 |                 }
1061 |                 .instrument(tracing::info_span!("monitor_upstream"))
1062 |                 .await;
1063 | 
1064 |                 sleep(Duration::from_secs(5)).await;
1065 |             }
1066 |             _ => {
1067 |                 break;
1068 |             }
1069 |         }
1070 |     }
1071 | 
1072 |     info!(immediate = true, "Stopping upstream monitor task.");
1073 | }
1074 | 
1075 | struct PrefetchReq {
1076 |     req_path: String,
1077 |     url: Url,
1078 |     file: NamedTempFile,
1079 |     submit_tx: Sender<CacheMeta>,
1080 |     cls: Classification,
1081 | }
1082 | 
1083 | async fn prefetch_task(
1084 |     state: Arc<AppState>,
1085 |     mut prefetch_rx: Receiver<PrefetchReq>,
1086 |     mut rx: broadcast::Receiver<bool>,
1087 | ) {
1088 |     info!(immediate = true, "Spawning prefetch task ...");
1089 | 
1090 |     let mut req_cache = LruCache::new(NonZeroUsize::new(64).unwrap());
1091 | 
1092 |     while matches!(rx.try_recv(), Err(broadcast::error::TryRecvError::Empty)) {
1093 |         async {
1094 |         tokio::select! {
1095 |             _ = sleep(Duration::from_secs(5)) => {
1096 |                 // Do nothing, this is to make us loop and check the running state.
1097 |                 info!("prefetch loop idle");
1098 |             }
1099 |             got = prefetch_rx.recv() => {
1100 |                 match got {
1101 |                     Some(PrefetchReq {
1102 |                         req_path,
1103 |                         url,
1104 |                         file,
1105 |                         submit_tx,
1106 |                         cls
1107 |                     }) => {
1108 |                         trace!("received a prefetch operation");
1109 |                         let debounce_t = req_cache.get(&req_path)
1110 |                             .map(|inst: &Instant| inst.elapsed().as_secs())
1111 |                             .unwrap_or(DEBOUNCE + 1);
1112 |                         let debounce = debounce_t < DEBOUNCE;
1113 | 
1114 |                         if debounce {
1115 |                             info!(immediate = true, "Skipping debounce item {}", req_path);
1116 |                         } else {
1117 |                             prefetch_dl_task(state.client.clone(), url, submit_tx, req_path.clone(), file, cls).await;
1118 |                             // Sometimes if the dl is large, we can accidentally trigger a second dl because the cache
1119 |                             // hasn't finished crc32c yet. So we need a tiny cache to debounce repeat dl's.
1120 |                             req_cache.put(req_path, Instant::now());
1121 |                         }
1122 |                     }
1123 |                     None => {
1124 |                         // channels dead.
1125 |                         warn!("prefetch channel has died");
1126 |                         return;
1127 |                     }
1128 |                 }
1129 |             }
1130 |         }
1131 |         }
1132 |         .instrument(tracing::info_span!("prefetch_task"))
1133 |         .await;
1134 |     }
1135 | 
1136 |     info!(immediate = true, "Stopping prefetch task.");
1137 | }
1138 | 
1139 | async fn ipxe_static(extract::Path(fname): extract::Path<PathBuf>) -> Response {
1140 |     let Some(rel_fname) = fname.file_name() else {
1141 |         return StatusCode::NOT_FOUND.into_response();
1142 |     };
1143 | 
1144 |     // Get the abs path.
1145 |     let abs_path = Path::new("/usr/share/ipxe").join(rel_fname);
1146 | 
1147 |     let n_file = match File::open(&abs_path).await {
1148 |         Ok(f) => f,
1149 |         Err(e) => {
1150 |             error!("{:?}", e);
1151 |             return StatusCode::INTERNAL_SERVER_ERROR.into_response();
1152 |         }
1153 |     };
1154 | 
1155 |     let stream = Body::from_stream(ReaderStream::new(BufReader::with_capacity(
1156 |         BUFFER_READ_PAGE,
1157 |         n_file,
1158 |     )));
1159 | 
1160 |     (StatusCode::OK, stream).into_response()
1161 | }
1162 | 
1163 | #[derive(Template)]
1164 | #[template(path = "ipxe.menu.html")]
1165 | struct IpxeMenuTemplate<'a> {
1166 |     mirror_uri: &'a str,
1167 | }
1168 | 
1169 | #[axum::debug_handler]
1170 | async fn ipxe_menu_view(
1171 |     headers: HeaderMap,
1172 |     extract::State(state): extract::State<Arc<AppState>>,
1173 | ) -> Response {
1174 |     let menu = IpxeMenuTemplate {
1175 |         mirror_uri: state.boot_origin.as_str(),
1176 |     }
1177 |     .render()
1178 |     .unwrap();
1179 | 
1180 |     // error!("ipxe request_headers -> {:?}", headers);
1181 |     // ipxe request_headers -> {"connection": "keep-alive", "user-agent": "iPXE/1.21.1+git20231006.ff0f8604", "host": "172.24.11.130:8080"}
1182 | 
1183 |     // https://ipxe.org/cfg
1184 |     // https://ipxe.org/cmd/
1185 | 
1186 |     // set mirror-uri ${cwduri}
1187 | 
1188 |     menu.into_response()
1189 | }
1190 | 
1191 | async fn robots_view() -> Html<&'static str> {
1192 |     Html(
1193 |         r#"
1194 | User-agent: *
1195 | Disallow: /
1196 | "#,
1197 |     )
1198 | }
1199 | 
1200 | async fn status_view() -> Html<&'static str> {
1201 |     Html(r#"Ok"#)
1202 | }
1203 | 
1204 | #[derive(Debug, clap::Parser)]
1205 | #[clap(about = "OpenSUSE Caching Mirror Tool")]
1206 | struct Config {
1207 |     #[arg(short = 's', default_value = "17179869184", env = "CACHE_SIZE")]
1208 |     /// Disk size for cache content in bytes. Defaults to 16GiB
1209 |     cache_size: usize,
1210 |     #[arg(short = 'p', default_value = "/tmp/osuse_cache", env = "CACHE_PATH")]
1211 |     /// Path where cache content should be stored
1212 |     cache_path: PathBuf,
1213 |     #[arg(short = 'c', long = "cache_large_objects", env = "CACHE_LARGE_OBJECTS")]
1214 |     /// Should we cache large objects like ISO/vm images/boot images?
1215 |     cache_large_objects: bool,
1216 |     #[arg(short = 'w', long = "wonder_guard", env = "WONDER_GUARD")]
1217 |     /// Enables a bloom filter to prevent pre-emptive caching of one-hit-wonders
1218 |     wonder_guard: bool,
1219 |     #[arg(short = 'Z', long = "durable_fs", env = "DURABLE_FS")]
1220 |     /// Is this running on a consistent and checksummed fs? If yes, then we can skip
1221 |     /// internal crc32c sums on get().
1222 |     durable_fs: bool,
1223 |     #[arg(default_value = "[::]:8080", env = "BIND_ADDRESS", long = "addr")]
1224 |     /// Address to listen to for http
1225 |     bind_addr: String,
1226 | 
1227 |     #[arg(long = "boot-services", env = "BOOT_SERVICES")]
1228 |     /// Enable a tftp server for pxe boot services
1229 |     boot_services: bool,
1230 | 
1231 |     #[arg(
1232 |         env = "BOOT_ORIGIN",
1233 |         default_value = "http://localhost:8080",
1234 |         long = "boot_origin"
1235 |     )]
1236 |     /// The external URL of this server as seen by boot service clients
1237 |     boot_origin: Url,
1238 | 
1239 |     #[arg(env = "TLS_BIND_ADDRESS", long = "tlsaddr")]
1240 |     /// Address to listen to for https (optional)
1241 |     tls_bind_addr: Option<String>,
1242 |     #[arg(env = "TLS_PEM_KEY", long = "tlskey")]
1243 |     /// Path to the TLS Key file in PEM format.
1244 |     tls_pem_key: Option<String>,
1245 |     #[arg(env = "TLS_PEM_CHAIN", long = "tlschain")]
1246 |     /// Path to the TLS Chain file in PEM format.
1247 |     tls_pem_chain: Option<String>,
1248 |     #[arg(env = "MIRROR_CHAIN", long = "mirrorchain")]
1249 |     /// Url to another proxy-cache instance to chain through.
1250 |     mirror_chain: Option<String>,
1251 |     #[arg(env = "ACME_CHALLENGE_DIR", long = "acmechallengedir")]
1252 |     /// Location to store acme challenges for lets encrypt if in use.
1253 |     acme_challenge_dir: Option<String>,
1254 | 
1255 |     #[arg(env = "OAUTH2_CLIENT_ID", long = "oauth_client_id")]
1256 |     /// Oauth client id
1257 |     oauth_client_id: Option<String>,
1258 |     #[arg(env = "OAUTH2_CLIENT_SECRET", long = "oauth_client_secret")]
1259 |     /// Oauth client secret
1260 |     oauth_client_secret: Option<String>,
1261 |     #[arg(
1262 |         env = "OAUTH2_CLIENT_URL",
1263 |         default_value = "http://localhost:8080",
1264 |         long = "oauth_client_url"
1265 |     )]
1266 |     /// Oauth client url - this is the url of THIS server
1267 |     oauth_client_url: String,
1268 |     #[arg(env = "OAUTH2_SERVER_URL", long = "oauth_server_url")]
1269 |     /// Oauth server url - the url of the authorisation provider
1270 |     oauth_server_url: Option<String>,
1271 | }
1272 | 
1273 | async fn do_main() {
1274 |     let config = Config::parse();
1275 | 
1276 |     // This affects a bunch of things, may need to override in the upstream check.
1277 |     let timeout = std::time::Duration::from_secs(7200);
1278 | 
1279 |     let client = reqwest::ClientBuilder::new()
1280 |         .no_gzip()
1281 |         .no_brotli()
1282 |         .no_deflate()
1283 |         .no_proxy()
1284 |         .timeout(timeout)
1285 |         .redirect(reqwest::redirect::Policy::limited(ALLOW_REDIRECTS))
1286 |         .build()
1287 |         .expect("Unable to build client");
1288 | 
1289 |     trace!("Trace working!");
1290 |     debug!("Debug working!");
1291 | 
1292 |     let (tx, mut rx1) = broadcast::channel(1);
1293 |     let (prefetch_tx, prefetch_rx) = channel(2048);
1294 | 
1295 |     let mirror_chain = config
1296 |         .mirror_chain
1297 |         .as_ref()
1298 |         .map(|s| Url::parse(s).expect("Invalid mirror_chain url"));
1299 | 
1300 |     let app_state_res = AppState::new(
1301 |         config.cache_size,
1302 |         &config.cache_path,
1303 |         config.cache_large_objects,
1304 |         config.wonder_guard,
1305 |         config.durable_fs,
1306 |         mirror_chain.clone(),
1307 |         client.clone(),
1308 |         prefetch_tx,
1309 |         config.boot_origin.clone(),
1310 |     );
1311 | 
1312 |     let app_state = match app_state_res {
1313 |         Ok(state) => Arc::new(state),
1314 |         Err(err) => {
1315 |             error!(?err, "Unable to configure cache");
1316 |             return;
1317 |         }
1318 |     };
1319 | 
1320 |     let app = Router::new()
1321 |         .route("/", get(get_view).head(head_view))
1322 |         .route("/*req_path", get(get_view).head(head_view))
1323 |         .route("/_status", get(status_view))
1324 |         .route("/robots.txt", get(robots_view))
1325 |         .route("/menu.ipxe", get(ipxe_menu_view))
1326 |         .route("/ipxe/:fname", get(ipxe_static))
1327 |         .with_state(app_state.clone());
1328 | 
1329 |     // Later need to add acme well-known if needed.
1330 | 
1331 |     let svc = app
1332 |         // .into_make_service();
1333 |         .into_make_service_with_connect_info::<SocketAddr>();
1334 | 
1335 |     let tls_server_handle = match (
1336 |         config.tls_bind_addr.as_ref(),
1337 |         config.tls_pem_key.as_ref(),
1338 |         config.tls_pem_chain.as_ref(),
1339 |     ) {
1340 |         (Some(tba), Some(tpk), Some(tpc)) => {
1341 |             info!("Binding -> https://{}", tba);
1342 | 
1343 |             let p_tpk = Path::new(tpk);
1344 |             let p_tpc = Path::new(tpc);
1345 | 
1346 |             if !p_tpk.exists() {
1347 |                 error!("key does not exist -> {}", tpk);
1348 |             }
1349 | 
1350 |             if !p_tpc.exists() {
1351 |                 error!("chain does not exist -> {}", tpc);
1352 |             }
1353 | 
1354 |             if !p_tpc.exists() || !p_tpk.exists() {
1355 |                 return;
1356 |             }
1357 | 
1358 |             let tls_addr = SocketAddr::from_str(&tba).expect("Invalid config bind address");
1359 | 
1360 |             let tls_svc = svc.clone();
1361 |             let mut tls_rx1 = tx.subscribe();
1362 | 
1363 |             let tls_config = RustlsConfig::from_pem_chain_file(p_tpc, p_tpk)
1364 |                 .await
1365 |                 .expect("Invalid TLS configuration");
1366 | 
1367 |             let server_handle = Handle::new();
1368 | 
1369 |             let server_fut = axum_server::bind_rustls(tls_addr, tls_config)
1370 |                 // .acceptor(NoDelayAcceptor::new())
1371 |                 .handle(server_handle.clone())
1372 |                 .serve(tls_svc);
1373 | 
1374 |             tokio::task::spawn(async move {
1375 |                 let _ = tls_rx1.recv().await;
1376 |                 server_handle.shutdown();
1377 |             });
1378 | 
1379 |             Some(tokio::task::spawn(async move {
1380 |                 server_fut.await.unwrap();
1381 |                 info!("TLS Server has stopped!");
1382 |             }))
1383 |         }
1384 |         (None, None, None) => {
1385 |             info!("TLS not configured");
1386 |             None
1387 |         }
1388 |         _ => {
1389 |             error!("Inconsistent TLS config. Must specfiy tls_bind_addr, tls_pem_key and tls_pem_chain");
1390 |             return;
1391 |         }
1392 |     };
1393 | 
1394 |     let addr = SocketAddr::from_str(&config.bind_addr).expect("Invalid config bind address");
1395 |     info!("Binding -> http://{}", config.bind_addr);
1396 | 
1397 |     let monitor_rx = tx.subscribe();
1398 |     let monitor_client = client.clone();
1399 |     let monitor_handle = tokio::task::spawn(async move {
1400 |         monitor_upstream(monitor_client, mirror_chain, monitor_rx).await
1401 |     });
1402 | 
1403 |     let prefetch_bcast_rx = tx.subscribe();
1404 |     let prefetch_app_state = app_state.clone();
1405 |     let prefetch_handle = tokio::task::spawn(async move {
1406 |         prefetch_task(prefetch_app_state, prefetch_rx, prefetch_bcast_rx).await
1407 |     });
1408 | 
1409 |     let server_handle = tokio::task::spawn(async move {
1410 |         tokio::select! {
1411 |             _ = rx1.recv() => {
1412 |                 return
1413 |             }
1414 |             _ = axum_server::bind(addr)
1415 |                 // .acceptor(NoDelayAcceptor::new())
1416 |                 .serve(svc) => {}
1417 |         }
1418 |         info!("Server has stopped!");
1419 |     });
1420 | 
1421 |     let mut boot_services_rx = tx.subscribe();
1422 | 
1423 |     let maybe_tftp_handle = if config.boot_services {
1424 |         let tftp_handle = tokio::task::spawn(async move {
1425 |             let tftpd = async_tftp::server::TftpServerBuilder::with_dir_ro("/usr/share/ipxe/")
1426 |                 .expect("Unable to build tftp server")
1427 |                 .build()
1428 |                 .await
1429 |                 .expect("Unable to build tftp server");
1430 |             info!("Starting TFTP");
1431 |             tokio::select! {
1432 |                 _ = boot_services_rx.recv() => {
1433 |                     return
1434 |                 }
1435 |                 _ = tftpd.serve() => {}
1436 |             }
1437 |             info!("TFTP Server has stopped!");
1438 |         });
1439 |         Some(tftp_handle)
1440 |     } else {
1441 |         None
1442 |     };
1443 | 
1444 |     // Block for signals now
1445 | 
1446 |     tokio::select! {
1447 |         Ok(()) = tokio::signal::ctrl_c() => {}
1448 |         Some(()) = async move {
1449 |             let sigterm = tokio::signal::unix::SignalKind::terminate();
1450 |             tokio::signal::unix::signal(sigterm).unwrap().recv().await
1451 |         } => {}
1452 |     }
1453 | 
1454 |     info!("Stopping ...");
1455 |     tx.send(true).expect("Failed to signal workes to stop");
1456 | 
1457 |     let _ = server_handle.await;
1458 | 
1459 |     if let Some(tls_server_handle) = tls_server_handle {
1460 |         let _ = tls_server_handle;
1461 |     }
1462 | 
1463 |     let _ = monitor_handle.await;
1464 |     let _ = prefetch_handle.await;
1465 | }
1466 | 
1467 | #[tokio::main(flavor = "multi_thread", worker_threads = 20)]
1468 | async fn main() {
1469 |     #[cfg(feature = "dhat-heap")]
1470 |     let file_name = format!("/tmp/dhat/heap-{}.json", std::process::id());
1471 |     #[cfg(feature = "dhat-heap")]
1472 |     let _profiler = dhat::Profiler::builder()
1473 |         .trim_backtraces(Some(4))
1474 |         .file_name(file_name)
1475 |         .build();
1476 | 
1477 |     use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Registry};
1478 |     let filter_layer = EnvFilter::try_from_default_env()
1479 |         .or_else(|_| EnvFilter::try_new("info"))
1480 |         .unwrap();
1481 | 
1482 |     let fmt_layer = tracing_forest::ForestLayer::default();
1483 |     // let fmt_layer = tracing_subscriber::fmt::layer();
1484 |     // .with_target(true);
1485 | 
1486 |     // let console_layer = ConsoleLayer::builder().with_default_env().spawn();
1487 | 
1488 |     Registry::default()
1489 |         // .with(console_layer)
1490 |         .with(filter_layer)
1491 |         .with(fmt_layer)
1492 |         .init();
1493 | 
1494 |     do_main().await;
1495 | }
1496 | 


--------------------------------------------------------------------------------
/opensuse-proxy-cache/src/memcache.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | use concread::hashmap::HashMap;
 3 | use tokio::sync::mpsc::{channel, Receiver, Sender};
 4 | use tide::log;
 5 | use std::time::Instant;
 6 | 
 7 | const PENDING_ADDS: usize = 8;
 8 | 
 9 | #[derive(Debug, Clone, PartialEq, Eq)]
10 | pub struct MemCacheObj {
11 |     pub etag: String,
12 |     // Time to refresh
13 |     pub refresh: Instant,
14 |     pub headers: Vec<(String, String)>,
15 |     pub content: Option<tide::http::Mime>,
16 |     pub blob: Vec<u8>,
17 | }
18 | 
19 | #[derive(Debug, Clone)]
20 | pub struct MemCacheMeta {
21 |     pub req_path: String,
22 |     pub headers: Vec<(String, String)>,
23 |     pub content: Option<tide::http::Mime>,
24 |     pub blob: Vec<u8>,
25 |     pub refresh: Instant,
26 | }
27 | 
28 | async fn cache_mgr(
29 |     map: Arc<HashMap<String, Arc<MemCacheObj>>>,
30 |     mut submit_rx: Receiver<MemCacheMeta>,
31 | ) {
32 |     // Wait on the channel, and when we get something proceed from there.
33 |     while let Some(meta) = submit_rx.recv().await {
34 |         log::error!("mem_cache_mgr got -> {:?}", meta.req_path);
35 |         let MemCacheMeta {
36 |             req_path, headers, content, blob, refresh
37 |         } = meta;
38 | 
39 |         let etag = headers
40 |             .iter()
41 |             .find_map(|(hv, hk)| {
42 |                 if hv == "etag" {
43 |                     Some(hk.clone())
44 |                 } else {
45 |                     None
46 |                 }
47 |             })
48 |             .unwrap_or_else(|| "".to_string());
49 | 
50 |         let obj = Arc::new(MemCacheObj {
51 |             etag, refresh, headers, content, blob
52 |         });
53 | 
54 |         let mut wrtxn = map.write();
55 |         let _prev = wrtxn.insert(req_path, obj);
56 |         wrtxn.commit();
57 |     }
58 | }
59 | 
60 | pub struct MemCache {
61 |     map: Arc<HashMap<String, Arc<MemCacheObj>>>,
62 |     pub submit_tx: Sender<MemCacheMeta>,
63 | }
64 | 
65 | impl MemCache {
66 |     pub fn new() -> MemCache {
67 |         let map = Arc::new(HashMap::new());
68 |         let map_cln = map.clone();
69 |         let (submit_tx, submit_rx) = channel(PENDING_ADDS);
70 |         // This launches our task too.
71 |         let _ = tokio::task::spawn(async move {
72 |             cache_mgr(map_cln, submit_rx).await
73 |         });
74 | 
75 |         MemCache {
76 |             map,
77 |             submit_tx,
78 |         }
79 |     }
80 | 
81 |     pub fn get(&self, req_path: &str) -> Option<Arc<MemCacheObj>> {
82 |         let rtxn = self.map.read();
83 |         rtxn.get(req_path).cloned()
84 |     }
85 | }
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/opensuse-proxy-cache/templates/ipxe.menu.html:
--------------------------------------------------------------------------------
 1 | #!ipxe
 2 | 
 3 | set mirror-uri {{mirror_uri}}
 4 | 
 5 | :start
 6 | menu Boot Menu (${mirror-uri})
 7 | item --gap == openSUSE
 8 | item tumbleweed Tumbleweed (Latest)
 9 | item leap15_6 Leap 15.6
10 | item leap15_5 Leap 15.5
11 | item leap_micro_5_4 Leap Micro 5.4
12 | item --gap == Utilities
13 | item memtest86 Memtest 86+ (EFI Only)
14 | item shell Drop to iPXE shell
15 | item reboot Reboot
16 | item exit Exit
17 | 
18 | choose target && goto ${target}
19 | 
20 | :failed
21 | echo Booting failed, dropping to shell
22 | goto shell
23 | 
24 | :reboot
25 | reboot
26 | 
27 | :exit
28 | exit
29 | 
30 | :shell
31 | echo Type 'exit' to get the back to the menu
32 | shell
33 | set menu-timeout 0
34 | set submenu-timeout 0
35 | goto start
36 | 
37 | :memtest86
38 | kernel ${mirror-uri}ipxe/memtest.efi
39 | boot || goto failed
40 | 
41 | :tumbleweed
42 | set repo ${mirror-uri}tumbleweed/repo/oss
43 | kernel ${repo}/boot/x86_64/loader/linux initrd=initrd install=${repo}
44 | initrd ${repo}/boot/x86_64/loader/initrd
45 | boot || goto failed
46 | 
47 | :leap15_6
48 | set repo ${mirror-uri}distribution/leap/15.6/repo/oss
49 | kernel ${repo}/boot/x86_64/loader/linux initrd=initrd install=${repo}
50 | initrd ${repo}/boot/x86_64/loader/initrd
51 | boot || goto failed
52 | 
53 | :leap15_5
54 | set repo ${mirror-uri}distribution/leap/15.5/repo/oss
55 | kernel ${repo}/boot/x86_64/loader/linux initrd=initrd install=${repo}
56 | initrd ${repo}/boot/x86_64/loader/initrd
57 | boot || goto failed
58 | 
59 | :leap_micro_5_4
60 | set repo ${mirror-uri}distribution/leap-micro/5.4/product/repo/Leap-Micro-5.4-x86_64-Media
61 | kernel ${repo}/boot/x86_64/loader/linux initrd=initrd install=${repo}
62 | initrd ${repo}/boot/x86_64/loader/initrd
63 | boot || goto failed
64 | 
65 | 


--------------------------------------------------------------------------------
/redis-server/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "redis-server"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | [profile.release]
 7 | debug = true
 8 | lto = "thin"
 9 | 
10 | [dependencies]
11 | tokio-util = { version = "0.7", features = ["codec"] }
12 | tokio = { version = "1", features = ["full"] }
13 | nom = "7.1.3"
14 | bytes = "1"
15 | tempfile = "3.10"
16 | 
17 | tracing = { version = "0.1", features = ["attributes"] }
18 | tracing-subscriber = { version = "0.3", features = ["env-filter"] }
19 | 
20 | tracing-forest = { version = "0.1.6", features = ["uuid", "smallvec", "tokio"] }
21 | # tracing-forest = { git = "https://github.com/QnnOkabayashi/tracing-forest.git", features = ["uuid", "smallvec", "tokio"] }
22 | 
23 | structopt = { version = "0.3", default-features = false }
24 | futures = "0.3"
25 | 
26 | arc-disk-cache = { path = "../arc-disk-cache" }
27 | 
28 | [dev-dependencies]
29 | redis = "0.25.2"
30 | 
31 | tracing-subscriber = "0.3"
32 | 
33 | 


--------------------------------------------------------------------------------
/redis-server/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM opensuse/tumbleweed:latest AS ref_repo
 2 | 
 3 | # RUN zypper mr -d repo-non-oss && \
 4 | #     zypper mr -d repo-oss && \
 5 | #     zypper mr -d repo-update && \
 6 | #     zypper ar http://dl.suse.blackhats.net.au:8080/update/tumbleweed/ repo-update-https && \
 7 | #     zypper ar http://dl.suse.blackhats.net.au:8080/tumbleweed/repo/oss/ repo-oss-https && \
 8 | #     zypper ar http://dl.suse.blackhats.net.au:8080/tumbleweed/repo/non-oss/ repo-non-oss-https && \
 9 | #     zypper --gpg-auto-import-keys ref --force
10 | 
11 | # FROM opensuse/leap:latest AS ref_repo
12 | RUN sed -i -E 's/https?:\/\/download.opensuse.org/http:\/\/os.int.firstyear.id.au/g' /etc/zypp/repos.d/*.repo && \
13 |     zypper --gpg-auto-import-keys ref --force
14 | 
15 | # // setup the builder pkgs
16 | FROM ref_repo AS build_base
17 | RUN zypper install -y cargo rust gcc sqlite3-devel libopenssl-3-devel sccache
18 | 
19 | # // setup the runner pkgs
20 | FROM ref_repo AS run_base
21 | RUN zypper install -y sqlite3 openssl-3 timezone iputils iproute2 curl
22 | 
23 | # // build artifacts
24 | FROM build_base AS builder
25 | 
26 | COPY . /home/proxy/
27 | RUN mkdir /home/proxy/.cargo
28 | COPY cargo_config /home/proxy/.cargo/config
29 | WORKDIR /home/proxy/
30 | WORKDIR /home/proxy/redis-server
31 | 
32 | # SCCACHE_REDIS=redis://redis.dev.blackhats.net.au:6379 \
33 | # RUSTC_WRAPPER=sccache \
34 | 
35 | RUN RUSTFLAGS="-Ctarget-cpu=x86-64-v3" \
36 |     cargo build --release
37 | 
38 | # == end builder setup, we now have static artifacts.
39 | FROM run_base
40 | MAINTAINER william@blackhats.net.au
41 | EXPOSE 8080
42 | EXPOSE 8443
43 | WORKDIR /
44 | 
45 | # RUN cd /etc && \
46 | #     ln -sf ../usr/share/zoneinfo/Australia/Brisbane localtime
47 | 
48 | COPY --from=builder /home/proxy/target/release/redis-server /bin/
49 | 
50 | STOPSIGNAL SIGINT
51 | 
52 | ENV RUST_BACKTRACE 1
53 | CMD ["/bin/redis-server"]
54 | 


--------------------------------------------------------------------------------
/redis-server/src/codec.rs:
--------------------------------------------------------------------------------
  1 | use crate::parser::*;
  2 | use crate::CacheT;
  3 | use bytes::{Buf, BufMut, BytesMut};
  4 | use nom::Err::Incomplete;
  5 | use std::io;
  6 | use std::io::Write;
  7 | use std::sync::Arc;
  8 | use tempfile::NamedTempFile;
  9 | use tokio_util::codec::{Decoder, Encoder};
 10 | 
 11 | #[derive(Debug)]
 12 | pub enum RedisClientMsg {
 13 |     Auth(Vec<u8>),
 14 |     Info,
 15 |     Disconnect,
 16 |     ConfigGet(String),
 17 |     ClientSetInfo(String, Option<String>),
 18 |     Get(Vec<u8>),
 19 |     Set(Vec<u8>, usize, NamedTempFile),
 20 | }
 21 | 
 22 | #[allow(dead_code)]
 23 | pub enum RedisServerMsg<'a> {
 24 |     Ok,
 25 |     Error(String),
 26 |     // Info returns a single bulk string.
 27 |     Info { used_memory: u64 },
 28 |     KvPair { k: String, v: String },
 29 |     DataHdr { sz: usize },
 30 |     DataChunk { slice: &'a [u8] },
 31 |     DataEof,
 32 |     Null,
 33 | }
 34 | 
 35 | #[derive(Debug)]
 36 | enum DecodeState {
 37 |     Cmd,
 38 |     Set {
 39 |         fh: NamedTempFile,
 40 |         key: Vec<u8>,
 41 |         dsz: usize,
 42 |         rem: usize,
 43 |     },
 44 | }
 45 | 
 46 | pub struct RedisCodec {
 47 |     cache: Arc<CacheT>,
 48 |     d_state: DecodeState,
 49 | }
 50 | 
 51 | const MAX_CMD_LEN: usize = MAXIMUM_KEY_SIZE_BYTES + 256;
 52 | 
 53 | impl RedisCodec {
 54 |     pub fn new(cache: Arc<CacheT>) -> Self {
 55 |         RedisCodec {
 56 |             cache,
 57 |             d_state: DecodeState::Cmd,
 58 |         }
 59 |     }
 60 | 
 61 |     fn decode_cmd(&mut self, buf: &mut BytesMut) -> Result<Option<RedisClientMsg>, io::Error> {
 62 |         trace!("cap: {}: len: {}", buf.capacity(), buf.len());
 63 |         // trace!("buf_raw: {:?}", String::from_utf8(buf.to_vec()))
 64 | 
 65 |         let (_rem, (cmd, sz)) = match cmd_parser(buf.as_ref()) {
 66 |             Ok(r) => r,
 67 |             Err(Incomplete(_)) => {
 68 |                 if buf.len() >= MAX_CMD_LEN {
 69 |                     return Err(io::Error::new(io::ErrorKind::Other, "Command too long"));
 70 |                 } else {
 71 |                     debug!("Need more data");
 72 |                     return Ok(None);
 73 |                 }
 74 |             }
 75 |             Err(e) => {
 76 |                 error!(?e, "Malformed input");
 77 |                 return Err(io::Error::new(io::ErrorKind::Other, "Malformed Input"));
 78 |             }
 79 |         };
 80 | 
 81 |         trace!(?cmd);
 82 | 
 83 |         let r = match cmd {
 84 |             Cmd::Wait => {
 85 |                 buf.clear();
 86 |                 trace!("WAIT cap: {}: len: {}", buf.capacity(), buf.len());
 87 |                 None
 88 |             }
 89 |             Cmd::Auth(pw) => Some(RedisClientMsg::Auth(pw.to_vec())),
 90 |             Cmd::Get(key) => Some(RedisClientMsg::Get(key.to_vec())),
 91 |             Cmd::Set(key, dsz) => {
 92 |                 // Okay, this is the fun one. We basicly need to setup for the next iter.
 93 |                 let fh = self.cache.new_tempfile().ok_or_else(|| {
 94 |                     error!("Unable to allocate temp file");
 95 |                     io::Error::new(io::ErrorKind::Other, "Server Error")
 96 |                 })?;
 97 |                 self.d_state = DecodeState::Set {
 98 |                     fh,
 99 |                     key: key.to_vec(),
100 |                     dsz: dsz as usize,
101 |                     rem: dsz as usize,
102 |                 };
103 |                 buf.advance(sz);
104 |                 return self.process_set(buf);
105 |             }
106 |             Cmd::ConfigGet(key) => {
107 |                 let skey = String::from_utf8(key.to_vec())
108 |                     .map_err(|_| io::Error::new(io::ErrorKind::Other, "Invalid UTF8"))?;
109 |                 Some(RedisClientMsg::ConfigGet(skey))
110 |             }
111 | 
112 |             Cmd::ClientSetInfo(name, None) => {
113 |                 let name = String::from_utf8(name.to_vec())
114 |                     .map_err(|_| io::Error::new(io::ErrorKind::Other, "Invalid UTF8"))?;
115 |                 Some(RedisClientMsg::ClientSetInfo(name, None))
116 |             }
117 |             Cmd::ClientSetInfo(name, Some(version)) => {
118 |                 let name = String::from_utf8(name.to_vec())
119 |                     .map_err(|_| io::Error::new(io::ErrorKind::Other, "Invalid UTF8"))?;
120 |                 let version = String::from_utf8(version.to_vec())
121 |                     .map_err(|_| io::Error::new(io::ErrorKind::Other, "Invalid UTF8"))?;
122 |                 Some(RedisClientMsg::ClientSetInfo(name, Some(version)))
123 |             }
124 | 
125 |             Cmd::Info => Some(RedisClientMsg::Info),
126 |             Cmd::Disconnect => Some(RedisClientMsg::Disconnect),
127 |         };
128 | 
129 |         if sz == buf.len() {
130 |             buf.clear();
131 |         } else {
132 |             buf.advance(sz);
133 |         }
134 | 
135 |         Ok(r)
136 |     }
137 | 
138 |     fn process_set(&mut self, buf: &mut BytesMut) -> Result<Option<RedisClientMsg>, io::Error> {
139 |         if let DecodeState::Set {
140 |             fh,
141 |             key: _,
142 |             dsz: _,
143 |             rem,
144 |         } = &mut self.d_state
145 |         {
146 |             trace!("START PROCESS SET");
147 |             trace!("cap: {}: len: {}", buf.capacity(), buf.len());
148 |             // trace!("buf_raw: {:?}", String::from_utf8(buf.to_vec()));
149 | 
150 |             // How much is remaining?
151 |             trace!("rem: {}", rem);
152 | 
153 |             if *rem > 0 {
154 |                 let r_buf = if *rem <= buf.len() {
155 |                     // There could be excess bytes.
156 |                     // Can finish and consume everything.
157 |                     let (a, _) = buf.split_at(*rem);
158 |                     a
159 |                 } else {
160 |                     // Not enough to finish, just take as much as we can.
161 |                     &buf
162 |                 };
163 | 
164 |                 let wr_b = fh.write(r_buf).map_err(|e| {
165 |                     error!(?e, "Failed to write to fh");
166 |                     io::Error::new(io::ErrorKind::Other, "Server Error")
167 |                 })?;
168 |                 *rem -= wr_b;
169 |                 trace!("wrote: {} rem: {} buflen: {}", wr_b, rem, buf.len());
170 | 
171 |                 if wr_b == buf.len() {
172 |                     buf.clear();
173 |                 } else {
174 |                     buf.advance(wr_b);
175 |                 }
176 |             }
177 | 
178 |             let r = if *rem == 0 {
179 |                 // We don't need to read anything but we still need the crlf.
180 |                 match tag_eol(buf) {
181 |                     Ok(_) => {
182 |                         // We ignore the OK inners since this is the remaining / trailing bytes.
183 |                         // Since we'll advance by the correct len here, we don't need to
184 |                         // do anything else.
185 |                         let wr_b = 2;
186 |                         trace!("COMPLETE!!! {} {}", wr_b, buf.len());
187 |                         if wr_b == buf.len() {
188 |                             buf.clear();
189 |                         } else {
190 |                             buf.advance(wr_b);
191 |                         };
192 | 
193 |                         // (2, Some(...))
194 |                         let mut n_state = DecodeState::Cmd;
195 |                         std::mem::swap(&mut n_state, &mut self.d_state);
196 | 
197 |                         if let DecodeState::Set {
198 |                             fh,
199 |                             key,
200 |                             dsz,
201 |                             rem: _,
202 |                         } = n_state
203 |                         {
204 |                             Some(RedisClientMsg::Set(key, dsz, fh))
205 |                         } else {
206 |                             error!("Invalid state transition");
207 |                             return Err(io::Error::new(io::ErrorKind::Other, "Server Error"));
208 |                         }
209 |                     }
210 |                     Err(Incomplete(_)) => {
211 |                         debug!("Need more data");
212 |                         None
213 |                     }
214 |                     Err(e) => {
215 |                         error!(?e, "Malformed input");
216 |                         return Err(io::Error::new(io::ErrorKind::Other, "Malformed Input"));
217 |                     }
218 |                 }
219 |             } else {
220 |                 // Need more data!
221 |                 None
222 |             };
223 | 
224 |             Ok(r)
225 |         } else {
226 |             error!("Invalid state transition");
227 |             Err(io::Error::new(io::ErrorKind::Other, "Server Error"))
228 |         }
229 |     }
230 | }
231 | 
232 | impl Decoder for RedisCodec {
233 |     type Item = RedisClientMsg;
234 |     type Error = io::Error;
235 | 
236 |     fn decode(&mut self, buf: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
237 |         match &self.d_state {
238 |             DecodeState::Cmd => self.decode_cmd(buf),
239 |             DecodeState::Set { .. } => self.process_set(buf),
240 |         }
241 |     }
242 | }
243 | 
244 | impl Encoder<RedisServerMsg<'_>> for RedisCodec {
245 |     type Error = io::Error;
246 | 
247 |     fn encode(&mut self, msg: RedisServerMsg<'_>, buf: &mut BytesMut) -> io::Result<()> {
248 |         // error!("wframe - cap: {}: len: {}", buf.capacity(), buf.len());
249 |         match msg {
250 |             RedisServerMsg::Ok => {
251 |                 buf.put(&b"+OK\r\n"[..]);
252 |             }
253 |             RedisServerMsg::Null => {
254 |                 buf.put(&b"$-1\r\n"[..]);
255 |             }
256 |             RedisServerMsg::Info { used_memory } => {
257 |                 // Build the raw buffer.
258 |                 let r = format!("# Memory\r\nused_memory:{}\r\n", used_memory);
259 |                 let b = r.as_bytes();
260 |                 let bs_hdr = format!("${}\r\n", b.len());
261 |                 buf.put_slice(bs_hdr.as_bytes());
262 |                 buf.put_slice(b);
263 |                 buf.put(&b"\r\n"[..]);
264 | 
265 |                 debug!("buf {:?}", String::from_utf8(buf.to_vec()));
266 |             }
267 |             RedisServerMsg::KvPair { k, v } => {
268 |                 // Identify our response has 2 values.
269 |                 buf.put(&b"*2\r\n"[..]);
270 |                 // How long is the key?
271 |                 let kb = k.as_bytes();
272 |                 let kbs_hdr = format!("${}\r\n", k.len());
273 |                 buf.put_slice(kbs_hdr.as_bytes());
274 |                 buf.put_slice(kb);
275 |                 buf.put(&b"\r\n"[..]);
276 |                 // Add the value
277 |                 let vb = v.as_bytes();
278 |                 let vbs_hdr = format!("${}\r\n", v.len());
279 |                 buf.put_slice(vbs_hdr.as_bytes());
280 |                 buf.put_slice(vb);
281 |                 buf.put(&b"\r\n"[..]);
282 |             }
283 |             // We split this up into "parts".
284 |             RedisServerMsg::DataHdr { sz } => {
285 |                 let kbs_hdr = format!("${}\r\n", sz);
286 |                 buf.put_slice(kbs_hdr.as_bytes());
287 |             }
288 |             RedisServerMsg::DataChunk { slice } => {
289 |                 // extend from slice will auto-resize.
290 |                 buf.extend_from_slice(slice)
291 |             }
292 |             RedisServerMsg::DataEof => {
293 |                 buf.put(&b"\r\n"[..]);
294 |             }
295 |             RedisServerMsg::Error(err) => {
296 |                 buf.put(&b"-SERVER_ERROR "[..]);
297 |                 buf.put_slice(err.as_bytes());
298 |                 buf.put(&b"\r\n"[..]);
299 |             }
300 |         }
301 | 
302 |         Ok(())
303 |     }
304 | }
305 | 


--------------------------------------------------------------------------------
/redis-server/src/main.rs:
--------------------------------------------------------------------------------
  1 | #[macro_use]
  2 | extern crate tracing;
  3 | 
  4 | use structopt::StructOpt;
  5 | 
  6 | use tokio::io::{AsyncRead, AsyncWrite};
  7 | use tokio::net::TcpListener;
  8 | use tokio::sync::broadcast;
  9 | use tokio::sync::oneshot;
 10 | use tokio::time::sleep;
 11 | use tokio_util::codec::{FramedRead, FramedWrite};
 12 | 
 13 | use futures::SinkExt;
 14 | use futures::StreamExt;
 15 | 
 16 | use std::net;
 17 | use std::path::PathBuf;
 18 | use std::str::FromStr;
 19 | use std::time::Duration;
 20 | 
 21 | use crate::tracing::Instrument;
 22 | 
 23 | use std::io::Read;
 24 | use std::sync::Arc;
 25 | 
 26 | use arc_disk_cache::ArcDiskCache;
 27 | 
 28 | mod codec;
 29 | mod parser;
 30 | 
 31 | use crate::codec::{RedisClientMsg, RedisCodec, RedisServerMsg};
 32 | 
 33 | pub(crate) type CacheT = ArcDiskCache<Vec<u8>, ()>;
 34 | 
 35 | async fn client_process<W: AsyncWrite + Unpin, R: AsyncRead + Unpin>(
 36 |     mut r: FramedRead<R, RedisCodec>,
 37 |     mut w: FramedWrite<W, RedisCodec>,
 38 |     client_address: net::SocketAddr,
 39 |     mut shutdown_rx: broadcast::Receiver<()>,
 40 |     cache: Arc<CacheT>,
 41 | ) {
 42 |     info!(?client_address, "connect");
 43 | 
 44 |     'outer: loop {
 45 |         tokio::select! {
 46 |                 Ok(()) = (&mut shutdown_rx).recv() => {
 47 |                     break;
 48 |                 }
 49 |                 res = r.next() => {
 50 |                     let rmsg =
 51 |                     match res {
 52 |                         None => {
 53 |                             info!(?client_address, "disconnect");
 54 |                             break;
 55 |                         }
 56 |                         Some(Err(e)) => {
 57 |                             error!(?e);
 58 |                             info!(?client_address, "disconnect");
 59 |                             break;
 60 |                         }
 61 |                         Some(Ok(rmsg)) => {
 62 |                             rmsg
 63 |                         }
 64 |                     };
 65 | 
 66 |                     match rmsg {
 67 |                         RedisClientMsg::Auth(_passwd) => {
 68 |                             debug!("Handling Auth");
 69 |                             if let Err(e) = w.send(RedisServerMsg::Ok).await {
 70 |                                 error!(?e);
 71 |                                 break;
 72 |                             }
 73 |                         }
 74 | 
 75 |                         RedisClientMsg::Info => {
 76 |                             debug!("Handling Info");
 77 |                             let stats = cache.view_stats();
 78 |                             let used_memory = stats.freq + stats.recent;
 79 |                             if let Err(e) = w.send(
 80 |                             RedisServerMsg::Info { used_memory }
 81 |                             ).await {
 82 |                                 error!(?e);
 83 |                                 break;
 84 |                             }
 85 |                         }
 86 |                         RedisClientMsg::ConfigGet(skey) => {
 87 |                             debug!("Handling Config Get");
 88 |                             let v = cache.view_stats().shared_max.to_string();
 89 |                             if let Err(e) = w.send(
 90 |                             RedisServerMsg::KvPair { k: skey, v }
 91 |                             ).await {
 92 |                                 error!(?e);
 93 |                                 break;
 94 |                             }
 95 |                         }
 96 | 
 97 |                         RedisClientMsg::Get(key) => {
 98 |                             debug!("Handling Get");
 99 |                             match cache.get(&key) {
100 |                                 Some(cobj) => {
101 |                                     if let Err(e) = w.send(
102 |                                         RedisServerMsg::DataHdr { sz: cobj.fhandle.amt }
103 |                                     ).await {
104 |                                         error!(?e);
105 |                                         break;
106 |                                     };
107 | 
108 |                                     let mut f = match cobj.fhandle.reopen() {
109 |                                         Ok(f) => f,
110 |                                         Err(e) => {
111 |                                             error!(?e);
112 |                                             break;
113 |                                         }
114 |                                     };
115 |                                     let mut buffer = [0; 8192];
116 | 
117 |                                     'inner: loop {
118 |                                         if let Ok(n) = f.read(&mut buffer) {
119 |                                             if n == 0 {
120 |                                                 break 'inner;
121 |                                             } else {
122 |                                                 let (slice, _) = buffer.split_at(n);
123 |                                                 if let Err(e) = w.send(
124 |                                                     RedisServerMsg::DataChunk { slice }
125 |                                                 ).await {
126 |                                                     error!(?e);
127 |                                                     break 'outer;
128 |                                                 };
129 | 
130 |                                             }
131 |                                         } else {
132 |                                             info!(?client_address, "disconnect");
133 |                                             break 'outer;
134 |                                         }
135 |                                     }
136 | 
137 |                                     if let Err(e) = w.send(
138 |                                         RedisServerMsg::DataEof
139 |                                     ).await {
140 |                                         error!(?e);
141 |                                         break;
142 |                                     };
143 |                                 }
144 |                                 None => {
145 |                                     if let Err(e) = w.send(
146 |                                         RedisServerMsg::Null
147 |                                     ).await {
148 |                                         error!(?e);
149 |                                         break;
150 |                                     }
151 |                                 }
152 |                             }
153 |                         }
154 |                         RedisClientMsg::Set(key, _dsz, fh) => {
155 |                             debug!("Handling Set");
156 |                             cache.insert(key, (), fh);
157 |                             if let Err(e) = w.send(
158 |                                 RedisServerMsg::Null
159 |                             ).await {
160 |                                 error!(?e);
161 |                                 break;
162 |                             }
163 |                         }
164 |                         RedisClientMsg::ClientSetInfo(_name, _maybe_version) => {
165 |                             debug!("Handling Client SetInfo");
166 |                             if let Err(e) = w.send(RedisServerMsg::Ok).await {
167 |                                 error!(?e);
168 |                                 break;
169 |                             }
170 |                         }
171 |                         RedisClientMsg::Disconnect => {
172 |                             info!(?client_address, "disconnect");
173 |                             break;
174 |                         }
175 |                     }
176 |             }
177 |         }
178 |         .instrument(tracing::info_span!("client_request"));
179 |     }
180 |     trace!(?client_address, "client process stopped cleanly.");
181 | }
182 | 
183 | async fn run_server(
184 |     cache_size: usize,
185 |     cache_path: PathBuf,
186 |     durable_fs: bool,
187 |     addr: net::SocketAddr,
188 |     mut shutdown_rx: oneshot::Receiver<()>,
189 | ) {
190 |     info!(%cache_size, ?cache_path, %addr, "Starting with parameters.");
191 | 
192 |     // Setup the cache here.
193 |     let cache = match ArcDiskCache::new(cache_size, &cache_path, durable_fs) {
194 |         Ok(l) => Arc::new(l),
195 |         Err(err) => {
196 |             error!(?err, "Could not create Arc Disk Cache");
197 |             return;
198 |         }
199 |     };
200 | 
201 |     let listener = match TcpListener::bind(&addr).await {
202 |         Ok(l) => l,
203 |         Err(e) => {
204 |             error!("Could not bind to redis server address {} -> {:?}", addr, e);
205 |             return;
206 |         }
207 |     };
208 | 
209 |     let (tx, _rx) = broadcast::channel(1);
210 | 
211 |     trace!("Listening on {:?}", addr);
212 | 
213 |     loop {
214 |         tokio::select! {
215 |             Ok(()) = &mut shutdown_rx => {
216 |                 tx.send(())
217 |                     .expect("Unable to broadcast shutdown!");
218 |                 break;
219 |             }
220 |             res = listener.accept() => {
221 |                 match res {
222 |                     Ok((tcpstream, client_socket_addr)) => {
223 |                         tcpstream.set_nodelay(true);
224 |                         // Start the event
225 |                         let (r, w) = tokio::io::split(tcpstream);
226 |                         let r = FramedRead::new(r, RedisCodec::new(cache.clone()));
227 |                         let w = FramedWrite::new(w, RedisCodec::new(cache.clone()));
228 |                         let c_rx = tx.subscribe();
229 |                         let c_cache = cache.clone();
230 |                         // Let it rip.
231 |                         tokio::spawn(client_process(r, w, client_socket_addr, c_rx, c_cache));
232 |                     }
233 |                     Err(e) => {
234 |                         error!("TCP acceptor error, continuing -> {:?}", e);
235 |                     }
236 |                 }
237 |             }
238 |         }
239 |     }
240 | 
241 |     while tx.receiver_count() > 1 {
242 |         trace!("Waiting for {} tasks", tx.receiver_count());
243 |         sleep(Duration::from_millis(250)).await;
244 |     }
245 | }
246 | 
247 | #[derive(StructOpt)]
248 | struct Config {
249 |     #[structopt(default_value = "17179869184", env = "CACHE_SIZE")]
250 |     /// Disk size for cache content in bytes. Defaults to 16GiB
251 |     cache_size: usize,
252 |     #[structopt(
253 |         parse(from_os_str),
254 |         default_value = "/var/cache/redis/",
255 |         env = "CACHE_PATH"
256 |     )]
257 |     /// Path where cache content should be stored
258 |     cache_path: PathBuf,
259 |     #[structopt(default_value = "[::1]:6379", env = "BIND_ADDRESS", long = "addr")]
260 |     /// Address to listen to for http
261 |     bind_addr: String,
262 |     #[structopt(short = "Z", long = "durable_fs", env = "DURABLE_FS")]
263 |     /// Is this running on a consistent and checksummed fs? If yes, then we can skip
264 |     /// internal crc32c sums on get().
265 |     durable_fs: bool,
266 | }
267 | 
268 | async fn do_main() {
269 |     debug!("Starting");
270 |     let Config {
271 |         cache_size,
272 |         cache_path,
273 |         bind_addr,
274 |         durable_fs,
275 |     } = Config::from_args();
276 | 
277 |     let addr = match net::SocketAddr::from_str(&bind_addr) {
278 |         Ok(a) => a,
279 |         Err(e) => {
280 |             error!(
281 |                 "Could not parse redis server address {} -> {:?}",
282 |                 bind_addr, e
283 |             );
284 |             return;
285 |         }
286 |     };
287 | 
288 |     let (shutdown_tx, shutdown_rx) = oneshot::channel();
289 | 
290 |     let mut handle = tokio::spawn(async move {
291 |         run_server(cache_size, cache_path, durable_fs, addr, shutdown_rx).await;
292 |     });
293 | 
294 |     tokio::select! {
295 |         Some(()) = async move {
296 |             let sigterm = tokio::signal::unix::SignalKind::terminate();
297 |             tokio::signal::unix::signal(sigterm).unwrap().recv().await
298 |         } => {}
299 |         Ok(()) = tokio::signal::ctrl_c() => {
300 |         }
301 |         _ = &mut handle => {
302 |             warn!("Server has unexpectedly stopped!");
303 |             return;
304 |         }
305 |     }
306 | 
307 |     info!("Starting shutdown process ...");
308 |     shutdown_tx
309 |         .send(())
310 |         .expect("Could not send shutdown signal!");
311 |     // Ignore if there is an error from the handler on return.
312 |     let _ = handle.await;
313 |     info!("Server has stopped!");
314 | }
315 | 
316 | #[tokio::main]
317 | async fn main() {
318 |     tracing_forest::worker_task()
319 |         .set_global(true)
320 |         .build_with(|layer: tracing_forest::ForestLayer<_, _>| {
321 |             use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Registry};
322 | 
323 |             let filter_layer = EnvFilter::try_from_default_env()
324 |                 .or_else(|_| EnvFilter::try_new("info"))
325 |                 .unwrap();
326 | 
327 |             Registry::default().with(filter_layer).with(layer)
328 |         })
329 |         .on(async { do_main().await })
330 |         .await
331 | }
332 | 
333 | #[cfg(test)]
334 | mod tests {
335 |     use crate::run_server;
336 |     use tokio::sync::oneshot;
337 | 
338 |     use std::collections::HashMap;
339 |     use std::fs;
340 |     use std::net::{IpAddr, Ipv4Addr, SocketAddr};
341 |     use std::path::PathBuf;
342 |     use std::sync::atomic::{AtomicU16, Ordering};
343 | 
344 |     use redis::{cmd, InfoDict};
345 | 
346 |     static PORT_ALLOC: AtomicU16 = AtomicU16::new(19080);
347 | 
348 |     #[tokio::test]
349 |     async fn server_it_works() {
350 |         let _ = tracing_subscriber::fmt::try_init();
351 | 
352 |         let port = PORT_ALLOC.fetch_add(1, Ordering::SeqCst);
353 | 
354 |         let localhost_v4 = IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1));
355 |         let addr = SocketAddr::new(localhost_v4, port);
356 | 
357 |         let (shutdown_tx, shutdown_rx) = oneshot::channel();
358 |         let cache_path = PathBuf::from(format!(
359 |             "{}/redis-test/{}/",
360 |             option_env!("CARGO_TARGET_TMPDIR").unwrap_or("/tmp"),
361 |             port
362 |         ));
363 |         let cache_size = 1048576;
364 | 
365 |         info!(?cache_path);
366 |         fs::remove_dir_all(&cache_path);
367 |         fs::create_dir_all(&cache_path).unwrap();
368 | 
369 |         let mut handle = tokio::spawn(async move {
370 |             run_server(cache_size, cache_path, false, addr, shutdown_rx).await;
371 |         });
372 | 
373 |         // Do the test
374 |         let blocking_task = tokio::task::spawn_blocking(move || {
375 |             let client = redis::Client::open(
376 |                 format!("redis://username:password@127.0.0.1:{}/", port).as_str(),
377 |             )
378 |             .expect("failed to launch redis client");
379 | 
380 |             let mut con = client.get_connection().expect("failed to get connection");
381 | 
382 |             let v: InfoDict = cmd("INFO").query(&mut con).expect("Failed to get info");
383 | 
384 |             let r = v.get::<u64>("used_memory");
385 |             error!(?r, "used memory");
386 | 
387 |             let h: HashMap<String, usize> = cmd("CONFIG")
388 |                 .arg("GET")
389 |                 .arg("maxmemory")
390 |                 .query(&mut con)
391 |                 .expect("Failed to get config");
392 | 
393 |             let mm = h
394 |                 .get("maxmemory")
395 |                 .and_then(|&s| if s != 0 { Some(s as u64) } else { None })
396 |                 .expect("Failed to get maxmemory");
397 | 
398 |             let key = b"test_key";
399 |             let d = b"test_data";
400 | 
401 |             let d1: Vec<u8> = cmd("GET")
402 |                 .arg(key)
403 |                 .query(&mut con)
404 |                 .expect("Failed to get key");
405 | 
406 |             let d2: Vec<u8> = cmd("SET")
407 |                 .arg(key)
408 |                 .arg(d)
409 |                 .query(&mut con)
410 |                 .expect("Failed to set key");
411 | 
412 |             let d3: Vec<u8> = cmd("GET")
413 |                 .arg(key)
414 |                 .query(&mut con)
415 |                 .expect("Failed to get key");
416 | 
417 |             assert!(d3 == d);
418 |             info!("Success!");
419 |         });
420 | 
421 |         blocking_task.await;
422 | 
423 |         shutdown_tx.send(());
424 |         handle.await;
425 |     }
426 | }
427 | 


--------------------------------------------------------------------------------
/redis-server/src/parser.rs:
--------------------------------------------------------------------------------
  1 | use nom::branch::alt;
  2 | use nom::bytes::streaming::tag;
  3 | use nom::bytes::streaming::{take, take_until};
  4 | use nom::combinator::eof;
  5 | use nom::IResult;
  6 | 
  7 | pub(crate) const MAXIMUM_KEY_SIZE_BYTES: usize = 1024;
  8 | 
  9 | #[derive(Debug, PartialEq)]
 10 | pub enum Cmd<'a> {
 11 |     Wait,
 12 |     Auth(&'a [u8]),
 13 |     Get(&'a [u8]),
 14 |     Set(&'a [u8], u32),
 15 |     ConfigGet(&'a [u8]),
 16 |     ClientSetInfo(&'a [u8], Option<&'a [u8]>),
 17 |     Info,
 18 |     Disconnect,
 19 | }
 20 | 
 21 | // get
 22 | // set_noreply
 23 | 
 24 | enum IType<'a> {
 25 |     // Array(usize),
 26 |     BulkString(&'a [u8]),
 27 | }
 28 | 
 29 | fn wait_parser(input: &[u8]) -> IResult<&[u8], (Cmd, usize)> {
 30 |     eof(input).map(|(a, _)| (a, (Cmd::Wait, 0)))
 31 | }
 32 | 
 33 | fn line_parser(input: &[u8]) -> IResult<&[u8], (&[u8], usize)> {
 34 |     let (rem, ln) = take_until("\r\n")(input)?;
 35 |     // alt((auth_parser, ver_parser, set_parser))(i)
 36 | 
 37 |     let (rem, _) = take(2u32)(rem)?;
 38 | 
 39 |     // trace!("ln {:?}", String::from_utf8(ln.to_vec()));
 40 |     // trace!("rem {:?}", String::from_utf8(rem.to_vec()));
 41 | 
 42 |     Ok((rem, (ln, ln.len() + 2)))
 43 | }
 44 | 
 45 | fn bulkstrln_parser(input: &[u8]) -> IResult<&[u8], (u32, usize)> {
 46 |     let mut taken = 0;
 47 |     let (rem, (ln, sz)) = line_parser(input)?;
 48 |     taken += sz;
 49 | 
 50 |     let (strln, _tag) = tag(b"$")(ln)?;
 51 | 
 52 |     let a = unsafe { std::str::from_utf8_unchecked(&strln) };
 53 | 
 54 |     let strln = u32::from_str_radix(a, 10).expect("Invalid str");
 55 |     Ok((rem, (strln, taken)))
 56 | }
 57 | 
 58 | fn bulkstr_parser(input: &[u8]) -> IResult<&[u8], (IType<'_>, usize)> {
 59 |     let mut taken = 0;
 60 |     let (rem, (strln, sz)) = bulkstrln_parser(input)?;
 61 |     taken += sz;
 62 | 
 63 |     let (rem, tgt_bytes) = take(strln)(rem)?;
 64 | 
 65 |     // get rid of the \r\n
 66 |     let (rem, _) = tag(b"\r\n")(rem)?;
 67 | 
 68 |     taken += strln as usize + 2;
 69 | 
 70 |     Ok((rem, (IType::BulkString(tgt_bytes), taken)))
 71 | }
 72 | 
 73 | fn type_parser(input: &[u8]) -> IResult<&[u8], (IType<'_>, usize)> {
 74 |     alt((bulkstr_parser,))(input)
 75 | }
 76 | 
 77 | fn array4_parser(input: &[u8]) -> IResult<&[u8], (Cmd<'_>, usize)> {
 78 |     let mut taken = 0;
 79 | 
 80 |     let (rem, (ln, sz)) = line_parser(input)?;
 81 | 
 82 |     taken += sz;
 83 | 
 84 |     tag("*4")(ln)?;
 85 | 
 86 |     // What is the next value? That tells us if we should proceed now or not.
 87 |     let (rem, (itype1, sz)) = type_parser(rem)?;
 88 |     taken += sz;
 89 | 
 90 |     match itype1 {
 91 |         IType::BulkString(b"CLIENT") => {
 92 |             let (rem, (itype2, sz)) = type_parser(rem)?;
 93 |             taken += sz;
 94 |             let (rem, (itype3, sz)) = type_parser(rem)?;
 95 |             taken += sz;
 96 |             let (rem, (itype4, sz)) = type_parser(rem)?;
 97 |             taken += sz;
 98 | 
 99 |             trace!("array4_parser - taken {:?}", taken);
100 | 
101 |             match (itype2, itype3, itype4) {
102 |                 (
103 |                     IType::BulkString(b"SETINFO"),
104 |                     IType::BulkString(name),
105 |                     IType::BulkString(version),
106 |                 ) => Ok((rem, (Cmd::ClientSetInfo(name, Some(version)), taken))),
107 |                 _ => Ok((rem, (Cmd::Disconnect, taken))),
108 |             }
109 |         }
110 |         _ => Ok((rem, (Cmd::Disconnect, taken))),
111 |     }
112 | }
113 | 
114 | fn array3_parser(input: &[u8]) -> IResult<&[u8], (Cmd<'_>, usize)> {
115 |     let mut taken = 0;
116 | 
117 |     let (rem, (ln, sz)) = line_parser(input)?;
118 | 
119 |     taken += sz;
120 | 
121 |     tag("*3")(ln)?;
122 | 
123 |     // What is the next value? That tells us if we should proceed now or not.
124 |     let (rem, (itype1, sz)) = type_parser(rem)?;
125 |     taken += sz;
126 | 
127 |     match itype1 {
128 |         IType::BulkString(b"CONFIG") => {
129 |             let (rem, (itype2, sz)) = type_parser(rem)?;
130 |             taken += sz;
131 |             let (rem, (itype3, sz)) = type_parser(rem)?;
132 |             taken += sz;
133 | 
134 |             trace!("array3_parser - taken {:?}", taken);
135 | 
136 |             match (itype2, itype3) {
137 |                 (IType::BulkString(b"GET"), IType::BulkString(key)) => {
138 |                     Ok((rem, (Cmd::ConfigGet(key), taken)))
139 |                 }
140 |                 _ => Ok((rem, (Cmd::Disconnect, taken))),
141 |             }
142 |         }
143 |         IType::BulkString(b"SET") => {
144 |             let (rem, (itype2, sz)) = type_parser(rem)?;
145 |             taken += sz;
146 | 
147 |             trace!("array3_parser - taken {:?}", taken);
148 | 
149 |             match itype2 {
150 |                 IType::BulkString(key) => {
151 |                     let (rem, (strln, sz)) = bulkstrln_parser(rem)?;
152 |                     taken += sz;
153 |                     trace!("array3_parser - SET - taken {:?}", taken);
154 | 
155 |                     Ok((rem, (Cmd::Set(key, strln), taken)))
156 |                 } // _ => Ok((rem, (Cmd::Disconnect, taken))),
157 |             }
158 |         }
159 |         _ => Ok((rem, (Cmd::Disconnect, taken))),
160 |     }
161 | 
162 |     // do a "first value" parse, capable of reading if it's bulk or simple string.
163 |     // once we know the first value, can understand the second.
164 | }
165 | 
166 | fn array2_parser(input: &[u8]) -> IResult<&[u8], (Cmd<'_>, usize)> {
167 |     // The *2 parser then checks the first element
168 |     //     AUTH
169 |     //         then it can stuff the second into the resp.
170 |     //   etc
171 |     //
172 |     let mut taken = 0;
173 | 
174 |     let (rem, (ln, sz)) = line_parser(input)?;
175 | 
176 |     taken += sz;
177 | 
178 |     tag("*2")(ln)?;
179 | 
180 |     // What is the next value? That tells us if we should proceed now or not.
181 |     let (rem, (itype1, sz)) = type_parser(rem)?;
182 |     taken += sz;
183 | 
184 |     match itype1 {
185 |         IType::BulkString(b"AUTH") => {
186 |             let (rem, (itype2, sz)) = type_parser(rem)?;
187 |             taken += sz;
188 | 
189 |             trace!("array2_parser - taken {:?}", taken);
190 | 
191 |             match itype2 {
192 |                 IType::BulkString(pw) => Ok((rem, (Cmd::Auth(pw), taken))),
193 |                 // _ => Ok((rem, (Cmd::Disconnect, taken))),
194 |             }
195 |         }
196 |         IType::BulkString(b"GET") => {
197 |             let (rem, (itype2, sz)) = type_parser(rem)?;
198 |             taken += sz;
199 | 
200 |             trace!("array2_parser - taken {:?}", taken);
201 | 
202 |             match itype2 {
203 |                 IType::BulkString(k) => Ok((rem, (Cmd::Get(k), taken))),
204 |                 // _ => Ok((rem, (Cmd::Disconnect, taken))),
205 |             }
206 |         }
207 |         _ => Ok((rem, (Cmd::Disconnect, taken))),
208 |     }
209 | 
210 |     // do a "first value" parse, capable of reading if it's bulk or simple string.
211 |     // once we know the first value, can understand the second.
212 | }
213 | 
214 | fn array1_parser(input: &[u8]) -> IResult<&[u8], (Cmd<'_>, usize)> {
215 |     let mut taken = 0;
216 | 
217 |     let (rem, (ln, sz)) = line_parser(input)?;
218 | 
219 |     taken += sz;
220 | 
221 |     tag("*1")(ln)?;
222 | 
223 |     // What is the next value? That tells us if we should proceed now or not.
224 |     let (rem, (itype1, sz)) = type_parser(rem)?;
225 |     taken += sz;
226 | 
227 |     match itype1 {
228 |         IType::BulkString(b"INFO") => Ok((rem, (Cmd::Info, taken))),
229 |         _ => Ok((rem, (Cmd::Disconnect, taken))),
230 |     }
231 | }
232 | 
233 | // For the set command we can just return the size and start of bytes?
234 | 
235 | pub fn cmd_parser(input: &[u8]) -> IResult<&[u8], (Cmd<'_>, usize)> {
236 |     trace!(?input);
237 |     alt((
238 |         wait_parser,
239 |         array1_parser,
240 |         array2_parser,
241 |         array3_parser,
242 |         array4_parser,
243 |     ))(input)
244 | }
245 | 
246 | pub fn tag_eol(input: &[u8]) -> IResult<&[u8], &[u8]> {
247 |     tag(b"\r\n")(input)
248 | }
249 | 
250 | #[cfg(test)]
251 | mod tests {
252 |     use super::*;
253 |     use nom::Err::Incomplete;
254 | 
255 |     #[test]
256 |     fn auth_test() {
257 |         let _ = tracing_subscriber::fmt::try_init();
258 |         assert!(
259 |             cmd_parser(b"*2\r\n$4\r\nAUTH\r\n$8\r\npassword\r\n")
260 |                 == Ok((b"", (Cmd::Auth(b"password"), 28)))
261 |         );
262 | 
263 |         assert!(matches!(cmd_parser(b"*"), Err(Incomplete(_))));
264 | 
265 |         assert!(matches!(cmd_parser(b"*2\r"), Err(Incomplete(_))));
266 | 
267 |         assert!(matches!(cmd_parser(b"*2\r\n$"), Err(Incomplete(_))));
268 | 
269 |         assert!(matches!(
270 |             cmd_parser(b"*2\r\n$4\r\nAUTH\r\n$8\r\npass"),
271 |             Err(Incomplete(_))
272 |         ));
273 | 
274 |         eprintln!("{:?}", cmd_parser(b"*2\r\n$4\r\nAUTH\r\n$8\r\npassword\r"));
275 | 
276 |         assert!(matches!(
277 |             cmd_parser(b"*2\r\n$4\r\nAUTH\r\n$8\r\npassword\r"),
278 |             Err(Incomplete(_))
279 |         ));
280 |     }
281 | 
282 |     #[test]
283 |     fn config_get_test() {
284 |         let _ = tracing_subscriber::fmt::try_init();
285 |         assert!(
286 |             cmd_parser(b"*3\r\n$6\r\nCONFIG\r\n$3\r\nGET\r\n$9\r\nmaxmemory\r\n")
287 |                 == Ok((b"", (Cmd::ConfigGet(b"maxmemory"), 40))) //
288 |                                                                  // *2
289 |                                                                  // $9
290 |                                                                  // maxmemory
291 |                                                                  // $10
292 |                                                                  // 3758096384
293 |         );
294 |     }
295 | 
296 |     #[test]
297 |     fn get_test() {
298 |         let _ = tracing_subscriber::fmt::try_init();
299 |         assert!(
300 |             cmd_parser(b"*2\r\n$3\r\nGET\r\n$8\r\ntest_key\r\n")
301 |                 == Ok((b"", (Cmd::Get(b"test_key"), 27)))
302 |         )
303 | 
304 |         // $-1
305 | 
306 |         // $1
307 |         // a
308 |     }
309 | 
310 |     #[test]
311 |     fn set_test() {
312 |         let _ = tracing_subscriber::fmt::try_init();
313 |         assert!(
314 |             cmd_parser(b"*3\r\n$3\r\nSET\r\n$8\r\ntest_key\r\n$9\r\ntest_data\r\n")
315 |                 == Ok((b"test_data\r\n", (Cmd::Set(b"test_key", 9), 31)))
316 |         )
317 | 
318 |         // +OK
319 |         //
320 |     }
321 | 
322 |     #[test]
323 |     fn cmd_test() {
324 |         let r = cmd_parser(b"set ");
325 |         eprintln!("{:?}", r);
326 |         assert!(matches!(r, Err(Incomplete(_))));
327 |     }
328 | }
329 | 


--------------------------------------------------------------------------------