├── .githooks └── pre-commit ├── .github ├── FUNDING.yml └── ISSUE_TEMPLATE │ └── bug_report.md ├── .gitignore ├── .golangci.yml ├── LICENSE ├── Makefile ├── README.md ├── cmd ├── extractor │ └── extractor.go ├── webserver │ ├── ipranges.go │ └── webserver.go └── worker │ └── worker.go ├── deploy ├── .env ├── Dockerfile_webserver ├── Dockerfile_worker ├── docker-compose.yml ├── nats_config.conf └── seccomp_profile.json ├── frontend └── wizard-vue │ ├── .editorconfig │ ├── .env.development │ ├── .env.production │ ├── .gitattributes │ ├── .gitignore │ ├── .prettierrc.json │ ├── embed.go │ ├── env.d.ts │ ├── eslint.config.ts │ ├── index.html │ ├── package-lock.json │ ├── package.json │ ├── src │ ├── App.vue │ ├── assets │ │ ├── base.scss │ │ └── logo.png │ ├── common │ │ └── enum.ts │ ├── components │ │ ├── Btn.vue │ │ ├── Copyable.vue │ │ ├── EditUrlModal.vue │ │ ├── ForkMe.vue │ │ ├── Modal.vue │ │ ├── SpecsForm.vue │ │ └── inputs │ │ │ ├── RadioButtons.vue │ │ │ └── TextField.vue │ ├── main.ts │ ├── pages │ │ └── WizardPage.vue │ ├── router │ │ └── index.ts │ ├── stores │ │ └── wizard.ts │ └── urlmaker │ │ ├── index.ts │ │ ├── proto │ │ └── specs.ts │ │ ├── specs.ts │ │ ├── utils.ts │ │ └── validators.ts │ ├── tsconfig.app.json │ ├── tsconfig.json │ ├── tsconfig.node.json │ └── vite.config.ts ├── go.mod ├── go.sum ├── internal ├── adapters │ ├── adapters.go │ └── natsadapter │ │ └── natsadapter.go ├── api │ └── http │ │ ├── handler.go │ │ └── pb │ │ └── specs.pb.go ├── config │ └── config.go ├── cookiemgr │ ├── dummy │ │ └── dummycookies.go │ ├── nats │ │ └── natscookies.go │ └── utils.go ├── dateparser │ └── dateparser.go ├── extractors │ └── pwextractor │ │ ├── adblock.go │ │ ├── blocklists │ │ ├── easylist.txt │ │ └── easyprivacy.txt │ │ ├── extract_post.js │ │ ├── pageparser.go │ │ ├── pwextractor.go │ │ ├── utils.go │ │ └── utils_test.go ├── limiter │ ├── dummy │ │ └── dummy.go │ ├── limiter.go │ └── redisleaky │ │ └── redisleaky.go ├── models │ └── models.go └── validators │ └── validators.go ├── presets ├── README.md └── cookies.md └── proto └── specs.proto /.githooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | go vet ./... 4 | go test ./... 5 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | custom: https://nc.efprojects.com/s/KXWCeFPa2PBkQ8x 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | ** Version info ** 24 | If it's a deployment from master, specify commit hash which I'll use to identify what version do you use 25 | 26 | **Logs** 27 | Please provide logs with debugging information. (set environment variable DEBUG=1) 28 | 29 | **Additional context** 30 | Add any other context about the problem here. 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea/ 2 | /trash/ 3 | /todo.md 4 | /.env.dev 5 | /test_tasks/ 6 | /compare_revs.sh 7 | /task*.json 8 | /screenshot*.png 9 | node_modules 10 | .vite 11 | *~ 12 | *.prof 13 | /fgtrace*.json 14 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | linters: 2 | fast: true 3 | presets: 4 | - bugs 5 | - error 6 | - metalinter 7 | - performance 8 | - sql 9 | - test 10 | - unused 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published 637 | by the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PROTOBUF_TAGGER_PATH := ${GOPATH}/pkg/mod/github.com/srikrsna/protoc-gen-gotag@v1.0.2 2 | 3 | all: 4 | 5 | js_proto: 6 | protoc -I=. -I=${PROTOBUF_TAGGER_PATH} --ts_out=./frontend/wizard-vue/src/urlmaker ./proto/specs.proto 7 | # Remove unneeded code left from GO plugin (and corresponsing unused import) 8 | sed -i -E '/import.+tagger\/tagger/d' ./frontend/wizard-vue/src/urlmaker/proto/specs.ts 9 | rm -rf ./frontend/wizard-vue/src/urlmaker/google 10 | rm -rf ./frontend/wizard-vue/src/urlmaker/tagger 11 | 12 | go_proto: 13 | protoc -I=. -I=${PROTOBUF_TAGGER_PATH} --go_out=. ./proto/specs.proto 14 | protoc -I=. -I=${PROTOBUF_TAGGER_PATH} --gotag_out=. ./proto/specs.proto 15 | 16 | proto: js_proto go_proto 17 | 18 | update_adblock: 19 | wget -O internal/extractors/pwextractor/blocklists/easylist.txt https://easylist.to/easylist/easylist.txt 20 | wget -O internal/extractors/pwextractor/blocklists/easyprivacy.txt https://easylist.to/easylist/easyprivacy.txt 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |

3 | logo 4 |

5 | 6 | --- 7 | 8 | **RSSAlchemy** is a website-to-rss converter, like RSSHub, RSS-bridge or Rss.app. Here are main features: 9 | 10 | - Convert arbitrary website to RSS feed using CSS selectors 11 | - Dynamic websites are supported using headless chrome (playwright) 12 | - Cookies[^1] (supports scraping private feeds, eg youtube subscriptions) 13 | - Proxy 14 | - Results caching 15 | - Adblock (primarily for loading speedup) 16 | - Screenshots (primarily for debugging) 17 | - [Presets](presets) for sharing configurations 18 | - Stateless[^2] (all task parameters are encoded into url, no database needed) 19 | - Distruibuted by design (deploy as many workers as you need) 20 | - Self-hosted; easy to deploy; docker-compose provided 21 | - Relatively small codebase, written in go + typescript 22 | - Security and reliability: 23 | - Rate-limit by source client IP 24 | - Rate-limit by target domain (to prevent 429 if many tasks target the same site) 25 | - Block service workers 26 | - Prevent WebRTC leak if using proxy 27 | - Block localhost and private IPs (including proxy server's internal services) 28 | - Chrome is sandboxed; container is UNprivileged 29 | 30 | [^1]: Cookies require support from your RSS reader/aggregator. Miniflux works, others are not checked yet. 31 | [^2]: Nats KV is used to store cookies permanently, it's required for sites that update cookies on every request, like 32 | youtube 33 | 34 | | feature/program | RSS Alchemy | RSS Hub | RSS-Bridge | RSS.app | 35 | |----------------------|---------------------------|------------------------------|-------------------------|---------------| 36 | | Custom websites | ✅ (using CSS selectors) | ❌ (only hardcoded site list) | ✅ (using CSS selectors) | ✅ | 37 | | Render dynamic sites | ✅ (using headless chrome) | ❌ | ❌ | ✅ | 38 | | Hosting | Self-hosting | Self-hosting | Self-hosting | Only cloud | 39 | | Price | Free and open-source | Free and open-source | Free and open-source | Paid ($8/mon) | 40 | 41 | 42 | ## Demo instance 43 | 44 | [rssalchemy.efprojects.com](https://rssalchemy.efprojects.com) 45 | 46 | 47 | ## Deployment 48 | 49 | ```bash 50 | git clone https://github.com/egor3f/rssalchemy 51 | cd rssalchemy/deploy 52 | docker-compose up -d 53 | ``` 54 | 55 | Then open your browser targeting to port 8080. 56 | 57 | For SSL, authentication, domains, etc. - use Caddy or Nginx (no specific configuration required). Personally I recommend Caddy, if you haven't used it before - give it a try :) 58 | 59 | 60 | ### Configuration 61 | 62 | Configuration is done using environment variables 63 | 64 | You can see all available options in [config.go file](internal/config/config.go) (struct Config) 65 | 66 | Docker-compose deployment uses [deploy/.env file](deploy/.env) 67 | 68 | 69 | ### Scaling 70 | 71 | Each worker can process 1 page at a time, so to scale you should run multiple worker instances. This is done using replicas parameter in worker section in [docker-compose.yml file](deploy/docker-compose.yml) 72 | 73 | 74 | ### Troubleshooting FAQ 75 | 76 | **Q: My RSS software shows timeout error, but rssalchemy logs are ok**
77 | A: Increase timeout. For miniflux it's HTTP_CLIENT_TIMEOUT, for other clients - read their documentation
78 | 79 | 80 | ## Development 81 | 82 | You need 83 | - Go 1.23 (most of application) 84 | - Node.js 20 (frontend) 85 | - Nats (with jetstream) 86 | - Redis 87 | 88 | Instaling dependencies example for MacOS: 89 | 90 | ```bash 91 | brew install go@1.23 92 | brew install node@20 93 | brew install redis 94 | brew install nats-server # Don't use brew services to manage nats because it lacks config support 95 | go mod download 96 | cd frontend/wizard-vue && npm install 97 | nats -js 98 | ``` 99 | 100 | Also this repository contains some useful git hooks. To enable them, use: 101 | ```bash 102 | git config --local core.hooksPath .githooks/ 103 | ``` 104 | -------------------------------------------------------------------------------- /cmd/extractor/extractor.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "flag" 6 | "fmt" 7 | "github.com/egor3f/rssalchemy/internal/config" 8 | dummycookies "github.com/egor3f/rssalchemy/internal/cookiemgr/dummy" 9 | "github.com/egor3f/rssalchemy/internal/dateparser" 10 | "github.com/egor3f/rssalchemy/internal/extractors/pwextractor" 11 | "github.com/egor3f/rssalchemy/internal/limiter/dummy" 12 | "github.com/egor3f/rssalchemy/internal/models" 13 | "github.com/felixge/fgprof" 14 | "github.com/labstack/gommon/log" 15 | "io" 16 | "os" 17 | "time" 18 | ) 19 | 20 | func main() { 21 | log.SetLevel(log.DEBUG) 22 | log.SetHeader(`${time_rfc3339_nano} ${level}`) 23 | 24 | outFile := flag.String("o", "", "Output file name") 25 | skipOutput := flag.Bool("s", false, "Skip json output; show just logs") 26 | useProfiler := flag.Bool("p", false, "Use profiler") 27 | flag.Parse() 28 | 29 | if *useProfiler { 30 | //goland:noinspection GoUnhandledErrorResult 31 | //defer fgtrace.Config{Dst: fgtrace.File(fmt.Sprintf("fgtrace_%d.json", time.Now().Unix()))}.Trace().Stop() 32 | w, err := os.Create(fmt.Sprintf("fgprof_%d.prof", time.Now().Unix())) 33 | if err != nil { 34 | panic(fmt.Sprintf("frprof create file: %v", err)) 35 | } 36 | stop := fgprof.Start(w, fgprof.FormatPprof) 37 | defer stop() 38 | } 39 | 40 | taskFileName := "task.json" 41 | if flag.NArg() > 0 { 42 | taskFileName = flag.Arg(0) 43 | } 44 | 45 | out := os.Stdout 46 | if len(*outFile) > 0 { 47 | var err error 48 | out, err = os.OpenFile(*outFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) 49 | if err != nil { 50 | log.Panicf("open output file: %v", err) 51 | } 52 | //goland:noinspection GoUnhandledErrorResult 53 | defer out.Close() 54 | } 55 | 56 | task, err := loadTask(taskFileName) 57 | if err != nil { 58 | log.Panicf("load task: %v", err) 59 | } 60 | 61 | cfg, err := config.Read() 62 | if err != nil { 63 | log.Panicf("read config: %v", err) 64 | } 65 | 66 | pwe, err := pwextractor.New(pwextractor.Config{ 67 | Proxy: cfg.Proxy, 68 | DateParser: &dateparser.DateParser{ 69 | CurrentTimeFunc: func() time.Time { 70 | return time.Date(2025, 01, 10, 10, 00, 00, 00, time.UTC) 71 | }, 72 | }, 73 | CookieManager: dummycookies.New(), 74 | Limiter: &dummy.Limiter{}, 75 | }) 76 | if err != nil { 77 | log.Panicf("create pw extractor: %v", err) 78 | } 79 | defer func() { 80 | if err := pwe.Stop(); err != nil { 81 | log.Errorf("stop pw extractor: %v", err) 82 | } 83 | }() 84 | 85 | start := time.Now() 86 | result, err := pwe.Extract(task) 87 | log.Infof("Extract took %v ms", time.Since(start).Milliseconds()) 88 | if err != nil { 89 | log.Errorf("extract: %v", err) 90 | scrResult, err := pwe.Screenshot(task) 91 | if err != nil { 92 | log.Errorf("screenshot failed: %v", err) 93 | panic(err) 94 | } 95 | err = os.WriteFile("screenshot.png", scrResult.Image, 0600) 96 | if err != nil { 97 | log.Errorf("screenshot save failed: %v", err) 98 | } 99 | panic(err) 100 | } 101 | 102 | if !*skipOutput { 103 | resultStr, err := json.MarshalIndent(result, "", "\t") 104 | if err != nil { 105 | log.Panicf("marshal result: %v", err) 106 | } 107 | n, err := out.Write(resultStr) 108 | if err != nil { 109 | log.Panicf("write output: %v", err) 110 | } 111 | log.Infof("Result written (%d bytes)", n) 112 | } 113 | } 114 | 115 | func loadTask(taskFileName string) (models.Task, error) { 116 | taskFile, err := os.Open(taskFileName) 117 | if err != nil { 118 | return models.Task{}, fmt.Errorf("open task file: %w", err) 119 | } 120 | defer taskFile.Close() 121 | 122 | fileContents, err := io.ReadAll(taskFile) 123 | if err != nil { 124 | return models.Task{}, fmt.Errorf("read file: %w", err) 125 | } 126 | 127 | var task models.Task 128 | if err := json.Unmarshal(fileContents, &task); err != nil { 129 | return models.Task{}, fmt.Errorf("unmarshal task: %w", err) 130 | } 131 | 132 | return task, err 133 | } 134 | -------------------------------------------------------------------------------- /cmd/webserver/ipranges.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | var IpRanges = []string{ 4 | // Cloudflare: 5 | "173.245.48.0/20", 6 | "103.21.244.0/22", 7 | "103.22.200.0/22", 8 | "103.31.4.0/22", 9 | "141.101.64.0/18", 10 | "108.162.192.0/18", 11 | "190.93.240.0/20", 12 | "188.114.96.0/20", 13 | "197.234.240.0/22", 14 | "198.41.128.0/17", 15 | "162.158.0.0/15", 16 | "104.16.0.0/13", 17 | "104.24.0.0/14", 18 | "172.64.0.0/13", 19 | "131.0.72.0/22", 20 | } 21 | -------------------------------------------------------------------------------- /cmd/webserver/webserver.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | wizard_vue "github.com/egor3f/rssalchemy/frontend/wizard-vue" 7 | "github.com/egor3f/rssalchemy/internal/adapters/natsadapter" 8 | httpApi "github.com/egor3f/rssalchemy/internal/api/http" 9 | "github.com/egor3f/rssalchemy/internal/config" 10 | "github.com/labstack/echo/v4" 11 | "github.com/labstack/echo/v4/middleware" 12 | "github.com/labstack/gommon/log" 13 | "github.com/nats-io/nats.go" 14 | "golang.org/x/time/rate" 15 | "net" 16 | "net/http" 17 | "os" 18 | "os/signal" 19 | "slices" 20 | "time" 21 | ) 22 | 23 | func main() { 24 | cfg, err := config.Read() 25 | if err != nil { 26 | log.Panicf("reading config failed: %v", err) 27 | } 28 | 29 | log.SetHeader(`${time_rfc3339_nano} ${level}`) 30 | if cfg.Debug { 31 | log.SetLevel(log.DEBUG) 32 | } 33 | 34 | baseCtx, stop := signal.NotifyContext(context.Background(), os.Interrupt) 35 | defer stop() 36 | 37 | natsc, err := nats.Connect(cfg.NatsUrl) 38 | if err != nil { 39 | log.Panicf("nats connect failed: %v", err) 40 | } 41 | defer func() { 42 | if err := natsc.Drain(); err != nil { 43 | log.Errorf("nats drain failed: %v", err) 44 | } 45 | }() 46 | 47 | na, err := natsadapter.New(natsc, "RENDER_TASKS") 48 | if err != nil { 49 | log.Panicf("create nats adapter: %v", err) 50 | } 51 | 52 | e := echo.New() 53 | e.Use(middleware.Logger()) 54 | if !cfg.Debug { 55 | e.Use(middleware.Recover()) 56 | } 57 | 58 | setIPExtractor(e, cfg) 59 | 60 | cacheGroup := e.Group("", addCacheControlHeader(1*time.Hour)) 61 | cacheGroup.Use(middleware.StaticWithConfig(middleware.StaticConfig{ 62 | Root: wizard_vue.FSPrefix, 63 | Filesystem: http.FS(wizard_vue.EmbedFS), 64 | })) 65 | 66 | apiHandler := httpApi.New( 67 | na, 68 | na, 69 | rate.Every(time.Duration(float64(time.Second)*cfg.TaskRateLimitEvery)), 70 | cfg.TaskRateLimitBurst, 71 | cfg.Debug, 72 | ) 73 | apiHandler.SetupRoutes(e.Group("/api/v1")) 74 | 75 | go func() { 76 | if err := e.Start(cfg.WebserverAddress); err != nil && err != http.ErrServerClosed { 77 | e.Logger.Errorf("http server error, shutting down: %v", err) 78 | } 79 | }() 80 | <-baseCtx.Done() 81 | log.Infof("stopping webserver gracefully") 82 | ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) 83 | defer cancel() 84 | if err := e.Shutdown(ctx); err != nil { 85 | e.Logger.Errorf("failed to shutdown server: %v", err) 86 | } 87 | } 88 | 89 | func addCacheControlHeader(ttl time.Duration) echo.MiddlewareFunc { 90 | return func(next echo.HandlerFunc) echo.HandlerFunc { 91 | return func(c echo.Context) error { 92 | c.Response().Header().Set( 93 | echo.HeaderCacheControl, 94 | fmt.Sprintf("public, max-age=%d", int(ttl.Seconds())), 95 | ) 96 | return next(c) 97 | } 98 | } 99 | } 100 | 101 | func setIPExtractor(e *echo.Echo, cfg config.Config) { 102 | if len(cfg.RealIpHeader) > 0 { 103 | // Real ip header 104 | e.IPExtractor = func(req *http.Request) string { 105 | if len(req.Header.Get(cfg.RealIpHeader)) > 0 { 106 | return req.Header.Get(cfg.RealIpHeader) 107 | } 108 | // fallback 109 | ra, _, _ := net.SplitHostPort(req.RemoteAddr) 110 | return ra 111 | } 112 | } else { 113 | // X-Forwarded-For with trusted ip ranges 114 | var trustOptions []echo.TrustOption 115 | for _, ipRange := range slices.Concat(IpRanges, cfg.TrustedIpRanges) { 116 | _, network, err := net.ParseCIDR(ipRange) 117 | if err != nil { 118 | log.Panicf("Invalid ip range: %s", ipRange) 119 | } 120 | trustOptions = append(trustOptions, echo.TrustIPRange(network)) 121 | } 122 | e.IPExtractor = echo.ExtractIPFromXFFHeader(trustOptions...) 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /cmd/worker/worker.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "github.com/egor3f/rssalchemy/internal/adapters/natsadapter" 8 | "github.com/egor3f/rssalchemy/internal/config" 9 | natscookies "github.com/egor3f/rssalchemy/internal/cookiemgr/nats" 10 | "github.com/egor3f/rssalchemy/internal/dateparser" 11 | "github.com/egor3f/rssalchemy/internal/extractors/pwextractor" 12 | "github.com/egor3f/rssalchemy/internal/limiter/redisleaky" 13 | "github.com/egor3f/rssalchemy/internal/models" 14 | "github.com/labstack/gommon/log" 15 | "github.com/nats-io/nats.go" 16 | "github.com/redis/go-redis/v9" 17 | "golang.org/x/time/rate" 18 | "os" 19 | "os/signal" 20 | "time" 21 | ) 22 | 23 | func main() { 24 | cfg, err := config.Read() 25 | if err != nil { 26 | log.Panicf("reading config failed: %v", err) 27 | } 28 | 29 | log.SetHeader(`${time_rfc3339_nano} ${level}`) 30 | if cfg.Debug { 31 | log.SetLevel(log.DEBUG) 32 | } 33 | 34 | defer func() { 35 | log.Infof("worker gracefully stopped") 36 | }() 37 | 38 | baseCtx, stop := signal.NotifyContext(context.Background(), os.Interrupt) 39 | defer stop() 40 | 41 | natsc, err := nats.Connect(cfg.NatsUrl) 42 | if err != nil { 43 | log.Panicf("nats connect failed: %v", err) 44 | } 45 | defer func() { 46 | if err := natsc.Drain(); err != nil { 47 | log.Errorf("nats drain failed: %v", err) 48 | } 49 | }() 50 | 51 | qc, err := natsadapter.New(natsc, "RENDER_TASKS") 52 | if err != nil { 53 | log.Panicf("create nats adapter: %v", err) 54 | } 55 | 56 | cookieManager, err := natscookies.New(natsc) 57 | if err != nil { 58 | log.Panicf("create cookie manager: %v", err) 59 | } 60 | 61 | redisClient := redis.NewClient(&redis.Options{ 62 | Addr: cfg.RedisUrl, 63 | }) 64 | defer func() { 65 | if err := redisClient.Close(); err != nil { 66 | log.Errorf("close redis client: %v", err) 67 | } 68 | }() 69 | if err := redisClient.Ping(baseCtx).Err(); err != nil { 70 | log.Panicf("redis ping: %v", err) 71 | } 72 | 73 | perDomainLimiter := redisleaky.New( 74 | rate.Every(time.Duration(float64(time.Second)*cfg.PerDomainRateLimitEvery)), 75 | int64(cfg.PerDomainRateLimitCapacity), 76 | redisClient, 77 | "per_domain_limiter", 78 | ) 79 | 80 | pwe, err := pwextractor.New(pwextractor.Config{ 81 | Proxy: cfg.Proxy, 82 | DateParser: &dateparser.DateParser{ 83 | CurrentTimeFunc: time.Now, 84 | }, 85 | CookieManager: cookieManager, 86 | Limiter: perDomainLimiter, 87 | }) 88 | if err != nil { 89 | log.Panicf("create pw extractor: %v", err) 90 | } 91 | defer func() { 92 | if err := pwe.Stop(); err != nil { 93 | log.Errorf("stop pw extractor: %v", err) 94 | } 95 | }() 96 | 97 | err = qc.ConsumeQueue(baseCtx, func(taskPayload []byte) (cacheKey string, resultPayoad []byte, errRet error) { 98 | var task models.Task 99 | if err := json.Unmarshal(taskPayload, &task); err != nil { 100 | errRet = fmt.Errorf("unmarshal task: %w", err) 101 | return 102 | } 103 | var result any 104 | switch task.TaskType { 105 | case models.TaskTypeExtract: 106 | result, err = pwe.Extract(task) 107 | case models.TaskTypePageScreenshot: 108 | result, err = pwe.Screenshot(task) 109 | } 110 | if err != nil { 111 | errRet = fmt.Errorf("task processing: %w", err) 112 | return 113 | } 114 | resultPayoad, err = json.Marshal(result) 115 | if err != nil { 116 | errRet = fmt.Errorf("marshal result: %w", err) 117 | return 118 | } 119 | return task.CacheKey(), resultPayoad, errRet 120 | }) 121 | if err != nil { 122 | log.Panicf("consume queue: %v", err) 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /deploy/.env: -------------------------------------------------------------------------------- 1 | WEBSERVER_ADDRESS=0.0.0.0:8080 2 | NATS_URL=nats://nats:4222 3 | REDIS_URL=redis:6379 4 | DEBUG=false 5 | -------------------------------------------------------------------------------- /deploy/Dockerfile_webserver: -------------------------------------------------------------------------------- 1 | FROM node:20 AS frontend 2 | 3 | WORKDIR /buildfront 4 | COPY frontend/wizard-vue/package.json frontend/wizard-vue/package-lock.json ./ 5 | RUN npm install 6 | COPY frontend/wizard-vue ./ 7 | RUN npm run build 8 | 9 | FROM golang:1.23 10 | 11 | WORKDIR /app 12 | 13 | COPY go.mod go.sum ./ 14 | RUN go mod download 15 | 16 | COPY . . 17 | COPY --from=frontend /buildfront/dist ./frontend/wizard-vue/dist 18 | RUN go build -o bin/webserver github.com/egor3f/rssalchemy/cmd/webserver 19 | 20 | EXPOSE 8080 21 | CMD ["/app/bin/webserver"] 22 | -------------------------------------------------------------------------------- /deploy/Dockerfile_worker: -------------------------------------------------------------------------------- 1 | # todo: multi-stage build 2 | # todo: let playwright install all deps by itself 3 | 4 | FROM golang:1.23 5 | 6 | RUN apt-get update && apt-get install -y ca-certificates tzdata libasound2 libatk-bridge2.0-0 libatk1.0-0 \ 7 | libatspi2.0-0 libcairo2 libcups2 libdbus-1-3 libdrm2 libgbm1 libglib2.0-0 libnspr4 libnss3 \ 8 | libpango-1.0-0 libx11-6 libxcb1 libxcomposite1 libxdamage1 libxext6 libxfixes3 libxkbcommon0 \ 9 | libxrandr2 xvfb fonts-noto-color-emoji fonts-unifont libfontconfig1 libfreetype6 xfonts-scalable \ 10 | fonts-liberation fonts-ipafont-gothic fonts-wqy-zenhei fonts-tlwg-loma-otf fonts-freefont-ttf && \ 11 | rm -rf /var/lib/apt/lists/* 12 | 13 | RUN useradd -ms /bin/bash pwuser 14 | WORKDIR /app 15 | 16 | COPY go.mod go.sum ./ 17 | RUN go mod download 18 | 19 | RUN PWGO_VER=$(grep -oE "playwright-go v\S+" go.mod | sed 's/playwright-go //g') \ 20 | && go install github.com/playwright-community/playwright-go/cmd/playwright@${PWGO_VER} 21 | RUN playwright install --with-deps --no-shell chromium && \ 22 | mkdir /home/pwuser/.cache && \ 23 | mv /root/.cache/ms-playwright* /home/pwuser/.cache/ && \ 24 | chown -R pwuser:pwuser /home/pwuser/.cache && \ 25 | rm -rf /var/lib/apt/lists/* 26 | 27 | COPY . . 28 | RUN go build -o bin/worker github.com/egor3f/rssalchemy/cmd/worker 29 | 30 | CMD ["/app/bin/worker"] 31 | -------------------------------------------------------------------------------- /deploy/docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | webserver: 3 | build: 4 | context: ../ 5 | dockerfile: deploy/Dockerfile_webserver 6 | env_file: .env 7 | depends_on: 8 | - nats 9 | ports: 10 | - "8080:8080" 11 | restart: unless-stopped 12 | 13 | worker: 14 | build: 15 | context: ../ 16 | dockerfile: deploy/Dockerfile_worker 17 | env_file: .env 18 | depends_on: 19 | - nats 20 | - redis 21 | ipc: host 22 | user: pwuser 23 | security_opt: 24 | - seccomp:seccomp_profile.json 25 | deploy: 26 | replicas: 1 27 | restart: unless-stopped 28 | 29 | nats: 30 | image: nats:2.10 31 | command: "-config /nats_config.conf" 32 | volumes: 33 | - ./nats_config.conf:/nats_config.conf:ro 34 | - natsdata:/data 35 | restart: unless-stopped 36 | 37 | redis: 38 | image: redis:7.4 39 | restart: unless-stopped 40 | 41 | volumes: 42 | natsdata: 43 | -------------------------------------------------------------------------------- /deploy/nats_config.conf: -------------------------------------------------------------------------------- 1 | jetstream: enabled 2 | max_payload: 4MB 3 | 4 | jetstream { 5 | store_dir: /data 6 | max_memory_store: 1GB 7 | max_file_store: 10GB 8 | } 9 | -------------------------------------------------------------------------------- /deploy/seccomp_profile.json: -------------------------------------------------------------------------------- 1 | { 2 | "defaultAction": "SCMP_ACT_ERRNO", 3 | "archMap": [ 4 | { 5 | "architecture": "SCMP_ARCH_X86_64", 6 | "subArchitectures": [ 7 | "SCMP_ARCH_X86", 8 | "SCMP_ARCH_X32" 9 | ] 10 | }, 11 | { 12 | "architecture": "SCMP_ARCH_AARCH64", 13 | "subArchitectures": [ 14 | "SCMP_ARCH_ARM" 15 | ] 16 | }, 17 | { 18 | "architecture": "SCMP_ARCH_MIPS64", 19 | "subArchitectures": [ 20 | "SCMP_ARCH_MIPS", 21 | "SCMP_ARCH_MIPS64N32" 22 | ] 23 | }, 24 | { 25 | "architecture": "SCMP_ARCH_MIPS64N32", 26 | "subArchitectures": [ 27 | "SCMP_ARCH_MIPS", 28 | "SCMP_ARCH_MIPS64" 29 | ] 30 | }, 31 | { 32 | "architecture": "SCMP_ARCH_MIPSEL64", 33 | "subArchitectures": [ 34 | "SCMP_ARCH_MIPSEL", 35 | "SCMP_ARCH_MIPSEL64N32" 36 | ] 37 | }, 38 | { 39 | "architecture": "SCMP_ARCH_MIPSEL64N32", 40 | "subArchitectures": [ 41 | "SCMP_ARCH_MIPSEL", 42 | "SCMP_ARCH_MIPSEL64" 43 | ] 44 | }, 45 | { 46 | "architecture": "SCMP_ARCH_S390X", 47 | "subArchitectures": [ 48 | "SCMP_ARCH_S390" 49 | ] 50 | } 51 | ], 52 | "syscalls": [ 53 | { 54 | "comment": "Allow create user namespaces", 55 | "names": [ 56 | "clone", 57 | "setns", 58 | "unshare" 59 | ], 60 | "action": "SCMP_ACT_ALLOW", 61 | "args": [], 62 | "includes": {}, 63 | "excludes": {} 64 | }, 65 | { 66 | "names": [ 67 | "accept", 68 | "accept4", 69 | "access", 70 | "adjtimex", 71 | "alarm", 72 | "bind", 73 | "brk", 74 | "capget", 75 | "capset", 76 | "chdir", 77 | "chmod", 78 | "chown", 79 | "chown32", 80 | "clock_adjtime", 81 | "clock_adjtime64", 82 | "clock_getres", 83 | "clock_getres_time64", 84 | "clock_gettime", 85 | "clock_gettime64", 86 | "clock_nanosleep", 87 | "clock_nanosleep_time64", 88 | "close", 89 | "connect", 90 | "copy_file_range", 91 | "creat", 92 | "dup", 93 | "dup2", 94 | "dup3", 95 | "epoll_create", 96 | "epoll_create1", 97 | "epoll_ctl", 98 | "epoll_ctl_old", 99 | "epoll_pwait", 100 | "epoll_wait", 101 | "epoll_wait_old", 102 | "eventfd", 103 | "eventfd2", 104 | "execve", 105 | "execveat", 106 | "exit", 107 | "exit_group", 108 | "faccessat", 109 | "fadvise64", 110 | "fadvise64_64", 111 | "fallocate", 112 | "fanotify_mark", 113 | "fchdir", 114 | "fchmod", 115 | "fchmodat", 116 | "fchown", 117 | "fchown32", 118 | "fchownat", 119 | "fcntl", 120 | "fcntl64", 121 | "fdatasync", 122 | "fgetxattr", 123 | "flistxattr", 124 | "flock", 125 | "fork", 126 | "fremovexattr", 127 | "fsetxattr", 128 | "fstat", 129 | "fstat64", 130 | "fstatat64", 131 | "fstatfs", 132 | "fstatfs64", 133 | "fsync", 134 | "ftruncate", 135 | "ftruncate64", 136 | "futex", 137 | "futex_time64", 138 | "futimesat", 139 | "getcpu", 140 | "getcwd", 141 | "getdents", 142 | "getdents64", 143 | "getegid", 144 | "getegid32", 145 | "geteuid", 146 | "geteuid32", 147 | "getgid", 148 | "getgid32", 149 | "getgroups", 150 | "getgroups32", 151 | "getitimer", 152 | "getpeername", 153 | "getpgid", 154 | "getpgrp", 155 | "getpid", 156 | "getppid", 157 | "getpriority", 158 | "getrandom", 159 | "getresgid", 160 | "getresgid32", 161 | "getresuid", 162 | "getresuid32", 163 | "getrlimit", 164 | "get_robust_list", 165 | "getrusage", 166 | "getsid", 167 | "getsockname", 168 | "getsockopt", 169 | "get_thread_area", 170 | "gettid", 171 | "gettimeofday", 172 | "getuid", 173 | "getuid32", 174 | "getxattr", 175 | "inotify_add_watch", 176 | "inotify_init", 177 | "inotify_init1", 178 | "inotify_rm_watch", 179 | "io_cancel", 180 | "ioctl", 181 | "io_destroy", 182 | "io_getevents", 183 | "io_pgetevents", 184 | "io_pgetevents_time64", 185 | "ioprio_get", 186 | "ioprio_set", 187 | "io_setup", 188 | "io_submit", 189 | "io_uring_enter", 190 | "io_uring_register", 191 | "io_uring_setup", 192 | "ipc", 193 | "kill", 194 | "lchown", 195 | "lchown32", 196 | "lgetxattr", 197 | "link", 198 | "linkat", 199 | "listen", 200 | "listxattr", 201 | "llistxattr", 202 | "_llseek", 203 | "lremovexattr", 204 | "lseek", 205 | "lsetxattr", 206 | "lstat", 207 | "lstat64", 208 | "madvise", 209 | "membarrier", 210 | "memfd_create", 211 | "mincore", 212 | "mkdir", 213 | "mkdirat", 214 | "mknod", 215 | "mknodat", 216 | "mlock", 217 | "mlock2", 218 | "mlockall", 219 | "mmap", 220 | "mmap2", 221 | "mprotect", 222 | "mq_getsetattr", 223 | "mq_notify", 224 | "mq_open", 225 | "mq_timedreceive", 226 | "mq_timedreceive_time64", 227 | "mq_timedsend", 228 | "mq_timedsend_time64", 229 | "mq_unlink", 230 | "mremap", 231 | "msgctl", 232 | "msgget", 233 | "msgrcv", 234 | "msgsnd", 235 | "msync", 236 | "munlock", 237 | "munlockall", 238 | "munmap", 239 | "nanosleep", 240 | "newfstatat", 241 | "_newselect", 242 | "open", 243 | "openat", 244 | "pause", 245 | "pipe", 246 | "pipe2", 247 | "poll", 248 | "ppoll", 249 | "ppoll_time64", 250 | "prctl", 251 | "pread64", 252 | "preadv", 253 | "preadv2", 254 | "prlimit64", 255 | "pselect6", 256 | "pselect6_time64", 257 | "pwrite64", 258 | "pwritev", 259 | "pwritev2", 260 | "read", 261 | "readahead", 262 | "readlink", 263 | "readlinkat", 264 | "readv", 265 | "recv", 266 | "recvfrom", 267 | "recvmmsg", 268 | "recvmmsg_time64", 269 | "recvmsg", 270 | "remap_file_pages", 271 | "removexattr", 272 | "rename", 273 | "renameat", 274 | "renameat2", 275 | "restart_syscall", 276 | "rmdir", 277 | "rseq", 278 | "rt_sigaction", 279 | "rt_sigpending", 280 | "rt_sigprocmask", 281 | "rt_sigqueueinfo", 282 | "rt_sigreturn", 283 | "rt_sigsuspend", 284 | "rt_sigtimedwait", 285 | "rt_sigtimedwait_time64", 286 | "rt_tgsigqueueinfo", 287 | "sched_getaffinity", 288 | "sched_getattr", 289 | "sched_getparam", 290 | "sched_get_priority_max", 291 | "sched_get_priority_min", 292 | "sched_getscheduler", 293 | "sched_rr_get_interval", 294 | "sched_rr_get_interval_time64", 295 | "sched_setaffinity", 296 | "sched_setattr", 297 | "sched_setparam", 298 | "sched_setscheduler", 299 | "sched_yield", 300 | "seccomp", 301 | "select", 302 | "semctl", 303 | "semget", 304 | "semop", 305 | "semtimedop", 306 | "semtimedop_time64", 307 | "send", 308 | "sendfile", 309 | "sendfile64", 310 | "sendmmsg", 311 | "sendmsg", 312 | "sendto", 313 | "setfsgid", 314 | "setfsgid32", 315 | "setfsuid", 316 | "setfsuid32", 317 | "setgid", 318 | "setgid32", 319 | "setgroups", 320 | "setgroups32", 321 | "setitimer", 322 | "setpgid", 323 | "setpriority", 324 | "setregid", 325 | "setregid32", 326 | "setresgid", 327 | "setresgid32", 328 | "setresuid", 329 | "setresuid32", 330 | "setreuid", 331 | "setreuid32", 332 | "setrlimit", 333 | "set_robust_list", 334 | "setsid", 335 | "setsockopt", 336 | "set_thread_area", 337 | "set_tid_address", 338 | "setuid", 339 | "setuid32", 340 | "setxattr", 341 | "shmat", 342 | "shmctl", 343 | "shmdt", 344 | "shmget", 345 | "shutdown", 346 | "sigaltstack", 347 | "signalfd", 348 | "signalfd4", 349 | "sigprocmask", 350 | "sigreturn", 351 | "socket", 352 | "socketcall", 353 | "socketpair", 354 | "splice", 355 | "stat", 356 | "stat64", 357 | "statfs", 358 | "statfs64", 359 | "statx", 360 | "symlink", 361 | "symlinkat", 362 | "sync", 363 | "sync_file_range", 364 | "syncfs", 365 | "sysinfo", 366 | "tee", 367 | "tgkill", 368 | "time", 369 | "timer_create", 370 | "timer_delete", 371 | "timer_getoverrun", 372 | "timer_gettime", 373 | "timer_gettime64", 374 | "timer_settime", 375 | "timer_settime64", 376 | "timerfd_create", 377 | "timerfd_gettime", 378 | "timerfd_gettime64", 379 | "timerfd_settime", 380 | "timerfd_settime64", 381 | "times", 382 | "tkill", 383 | "truncate", 384 | "truncate64", 385 | "ugetrlimit", 386 | "umask", 387 | "uname", 388 | "unlink", 389 | "unlinkat", 390 | "utime", 391 | "utimensat", 392 | "utimensat_time64", 393 | "utimes", 394 | "vfork", 395 | "vmsplice", 396 | "wait4", 397 | "waitid", 398 | "waitpid", 399 | "write", 400 | "writev" 401 | ], 402 | "action": "SCMP_ACT_ALLOW", 403 | "args": [], 404 | "comment": "", 405 | "includes": {}, 406 | "excludes": {} 407 | }, 408 | { 409 | "names": [ 410 | "ptrace" 411 | ], 412 | "action": "SCMP_ACT_ALLOW", 413 | "args": null, 414 | "comment": "", 415 | "includes": { 416 | "minKernel": "4.8" 417 | }, 418 | "excludes": {} 419 | }, 420 | { 421 | "names": [ 422 | "personality" 423 | ], 424 | "action": "SCMP_ACT_ALLOW", 425 | "args": [ 426 | { 427 | "index": 0, 428 | "value": 0, 429 | "valueTwo": 0, 430 | "op": "SCMP_CMP_EQ" 431 | } 432 | ], 433 | "comment": "", 434 | "includes": {}, 435 | "excludes": {} 436 | }, 437 | { 438 | "names": [ 439 | "personality" 440 | ], 441 | "action": "SCMP_ACT_ALLOW", 442 | "args": [ 443 | { 444 | "index": 0, 445 | "value": 8, 446 | "valueTwo": 0, 447 | "op": "SCMP_CMP_EQ" 448 | } 449 | ], 450 | "comment": "", 451 | "includes": {}, 452 | "excludes": {} 453 | }, 454 | { 455 | "names": [ 456 | "personality" 457 | ], 458 | "action": "SCMP_ACT_ALLOW", 459 | "args": [ 460 | { 461 | "index": 0, 462 | "value": 131072, 463 | "valueTwo": 0, 464 | "op": "SCMP_CMP_EQ" 465 | } 466 | ], 467 | "comment": "", 468 | "includes": {}, 469 | "excludes": {} 470 | }, 471 | { 472 | "names": [ 473 | "personality" 474 | ], 475 | "action": "SCMP_ACT_ALLOW", 476 | "args": [ 477 | { 478 | "index": 0, 479 | "value": 131080, 480 | "valueTwo": 0, 481 | "op": "SCMP_CMP_EQ" 482 | } 483 | ], 484 | "comment": "", 485 | "includes": {}, 486 | "excludes": {} 487 | }, 488 | { 489 | "names": [ 490 | "personality" 491 | ], 492 | "action": "SCMP_ACT_ALLOW", 493 | "args": [ 494 | { 495 | "index": 0, 496 | "value": 4294967295, 497 | "valueTwo": 0, 498 | "op": "SCMP_CMP_EQ" 499 | } 500 | ], 501 | "comment": "", 502 | "includes": {}, 503 | "excludes": {} 504 | }, 505 | { 506 | "names": [ 507 | "sync_file_range2" 508 | ], 509 | "action": "SCMP_ACT_ALLOW", 510 | "args": [], 511 | "comment": "", 512 | "includes": { 513 | "arches": [ 514 | "ppc64le" 515 | ] 516 | }, 517 | "excludes": {} 518 | }, 519 | { 520 | "names": [ 521 | "arm_fadvise64_64", 522 | "arm_sync_file_range", 523 | "sync_file_range2", 524 | "breakpoint", 525 | "cacheflush", 526 | "set_tls" 527 | ], 528 | "action": "SCMP_ACT_ALLOW", 529 | "args": [], 530 | "comment": "", 531 | "includes": { 532 | "arches": [ 533 | "arm", 534 | "arm64" 535 | ] 536 | }, 537 | "excludes": {} 538 | }, 539 | { 540 | "names": [ 541 | "arch_prctl" 542 | ], 543 | "action": "SCMP_ACT_ALLOW", 544 | "args": [], 545 | "comment": "", 546 | "includes": { 547 | "arches": [ 548 | "amd64", 549 | "x32" 550 | ] 551 | }, 552 | "excludes": {} 553 | }, 554 | { 555 | "names": [ 556 | "modify_ldt" 557 | ], 558 | "action": "SCMP_ACT_ALLOW", 559 | "args": [], 560 | "comment": "", 561 | "includes": { 562 | "arches": [ 563 | "amd64", 564 | "x32", 565 | "x86" 566 | ] 567 | }, 568 | "excludes": {} 569 | }, 570 | { 571 | "names": [ 572 | "s390_pci_mmio_read", 573 | "s390_pci_mmio_write", 574 | "s390_runtime_instr" 575 | ], 576 | "action": "SCMP_ACT_ALLOW", 577 | "args": [], 578 | "comment": "", 579 | "includes": { 580 | "arches": [ 581 | "s390", 582 | "s390x" 583 | ] 584 | }, 585 | "excludes": {} 586 | }, 587 | { 588 | "names": [ 589 | "open_by_handle_at" 590 | ], 591 | "action": "SCMP_ACT_ALLOW", 592 | "args": [], 593 | "comment": "", 594 | "includes": { 595 | "caps": [ 596 | "CAP_DAC_READ_SEARCH" 597 | ] 598 | }, 599 | "excludes": {} 600 | }, 601 | { 602 | "names": [ 603 | "bpf", 604 | "clone", 605 | "fanotify_init", 606 | "lookup_dcookie", 607 | "mount", 608 | "name_to_handle_at", 609 | "perf_event_open", 610 | "quotactl", 611 | "setdomainname", 612 | "sethostname", 613 | "setns", 614 | "syslog", 615 | "umount", 616 | "umount2", 617 | "unshare" 618 | ], 619 | "action": "SCMP_ACT_ALLOW", 620 | "args": [], 621 | "comment": "", 622 | "includes": { 623 | "caps": [ 624 | "CAP_SYS_ADMIN" 625 | ] 626 | }, 627 | "excludes": {} 628 | }, 629 | { 630 | "names": [ 631 | "clone" 632 | ], 633 | "action": "SCMP_ACT_ALLOW", 634 | "args": [ 635 | { 636 | "index": 0, 637 | "value": 2114060288, 638 | "valueTwo": 0, 639 | "op": "SCMP_CMP_MASKED_EQ" 640 | } 641 | ], 642 | "comment": "", 643 | "includes": {}, 644 | "excludes": { 645 | "caps": [ 646 | "CAP_SYS_ADMIN" 647 | ], 648 | "arches": [ 649 | "s390", 650 | "s390x" 651 | ] 652 | } 653 | }, 654 | { 655 | "names": [ 656 | "clone" 657 | ], 658 | "action": "SCMP_ACT_ALLOW", 659 | "args": [ 660 | { 661 | "index": 1, 662 | "value": 2114060288, 663 | "valueTwo": 0, 664 | "op": "SCMP_CMP_MASKED_EQ" 665 | } 666 | ], 667 | "comment": "s390 parameter ordering for clone is different", 668 | "includes": { 669 | "arches": [ 670 | "s390", 671 | "s390x" 672 | ] 673 | }, 674 | "excludes": { 675 | "caps": [ 676 | "CAP_SYS_ADMIN" 677 | ] 678 | } 679 | }, 680 | { 681 | "names": [ 682 | "reboot" 683 | ], 684 | "action": "SCMP_ACT_ALLOW", 685 | "args": [], 686 | "comment": "", 687 | "includes": { 688 | "caps": [ 689 | "CAP_SYS_BOOT" 690 | ] 691 | }, 692 | "excludes": {} 693 | }, 694 | { 695 | "names": [ 696 | "chroot" 697 | ], 698 | "action": "SCMP_ACT_ALLOW", 699 | "args": [], 700 | "comment": "", 701 | "includes": { 702 | "caps": [ 703 | "CAP_SYS_CHROOT" 704 | ] 705 | }, 706 | "excludes": {} 707 | }, 708 | { 709 | "names": [ 710 | "delete_module", 711 | "init_module", 712 | "finit_module" 713 | ], 714 | "action": "SCMP_ACT_ALLOW", 715 | "args": [], 716 | "comment": "", 717 | "includes": { 718 | "caps": [ 719 | "CAP_SYS_MODULE" 720 | ] 721 | }, 722 | "excludes": {} 723 | }, 724 | { 725 | "names": [ 726 | "acct" 727 | ], 728 | "action": "SCMP_ACT_ALLOW", 729 | "args": [], 730 | "comment": "", 731 | "includes": { 732 | "caps": [ 733 | "CAP_SYS_PACCT" 734 | ] 735 | }, 736 | "excludes": {} 737 | }, 738 | { 739 | "names": [ 740 | "kcmp", 741 | "process_vm_readv", 742 | "process_vm_writev", 743 | "ptrace" 744 | ], 745 | "action": "SCMP_ACT_ALLOW", 746 | "args": [], 747 | "comment": "", 748 | "includes": { 749 | "caps": [ 750 | "CAP_SYS_PTRACE" 751 | ] 752 | }, 753 | "excludes": {} 754 | }, 755 | { 756 | "names": [ 757 | "iopl", 758 | "ioperm" 759 | ], 760 | "action": "SCMP_ACT_ALLOW", 761 | "args": [], 762 | "comment": "", 763 | "includes": { 764 | "caps": [ 765 | "CAP_SYS_RAWIO" 766 | ] 767 | }, 768 | "excludes": {} 769 | }, 770 | { 771 | "names": [ 772 | "settimeofday", 773 | "stime", 774 | "clock_settime" 775 | ], 776 | "action": "SCMP_ACT_ALLOW", 777 | "args": [], 778 | "comment": "", 779 | "includes": { 780 | "caps": [ 781 | "CAP_SYS_TIME" 782 | ] 783 | }, 784 | "excludes": {} 785 | }, 786 | { 787 | "names": [ 788 | "vhangup" 789 | ], 790 | "action": "SCMP_ACT_ALLOW", 791 | "args": [], 792 | "comment": "", 793 | "includes": { 794 | "caps": [ 795 | "CAP_SYS_TTY_CONFIG" 796 | ] 797 | }, 798 | "excludes": {} 799 | }, 800 | { 801 | "names": [ 802 | "get_mempolicy", 803 | "mbind", 804 | "set_mempolicy" 805 | ], 806 | "action": "SCMP_ACT_ALLOW", 807 | "args": [], 808 | "comment": "", 809 | "includes": { 810 | "caps": [ 811 | "CAP_SYS_NICE" 812 | ] 813 | }, 814 | "excludes": {} 815 | }, 816 | { 817 | "names": [ 818 | "syslog" 819 | ], 820 | "action": "SCMP_ACT_ALLOW", 821 | "args": [], 822 | "comment": "", 823 | "includes": { 824 | "caps": [ 825 | "CAP_SYSLOG" 826 | ] 827 | }, 828 | "excludes": {} 829 | } 830 | ] 831 | } 832 | -------------------------------------------------------------------------------- /frontend/wizard-vue/.editorconfig: -------------------------------------------------------------------------------- 1 | [*.{js,jsx,mjs,cjs,ts,tsx,mts,cts,vue}] 2 | charset = utf-8 3 | indent_size = 2 4 | indent_style = space 5 | insert_final_newline = true 6 | trim_trailing_whitespace = true 7 | 8 | end_of_line = lf 9 | max_line_length = 100 10 | -------------------------------------------------------------------------------- /frontend/wizard-vue/.env.development: -------------------------------------------------------------------------------- 1 | VITE_API_BASE=http://localhost:5000 2 | -------------------------------------------------------------------------------- /frontend/wizard-vue/.env.production: -------------------------------------------------------------------------------- 1 | VITE_API_BASE= 2 | -------------------------------------------------------------------------------- /frontend/wizard-vue/.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | -------------------------------------------------------------------------------- /frontend/wizard-vue/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | .DS_Store 12 | dist 13 | dist-ssr 14 | coverage 15 | *.local 16 | 17 | /cypress/videos/ 18 | /cypress/screenshots/ 19 | 20 | # Editor directories and files 21 | .vscode/* 22 | !.vscode/extensions.json 23 | .idea 24 | *.suo 25 | *.ntvs* 26 | *.njsproj 27 | *.sln 28 | *.sw? 29 | 30 | *.tsbuildinfo 31 | -------------------------------------------------------------------------------- /frontend/wizard-vue/.prettierrc.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "$schema": "https://json.schemastore.org/prettierrc", 4 | "semi": false, 5 | "singleQuote": true, 6 | "printWidth": 100 7 | } 8 | -------------------------------------------------------------------------------- /frontend/wizard-vue/embed.go: -------------------------------------------------------------------------------- 1 | package wizard_vue 2 | 3 | import "embed" 4 | 5 | //go:embed dist 6 | var EmbedFS embed.FS 7 | 8 | const FSPrefix = "dist" 9 | -------------------------------------------------------------------------------- /frontend/wizard-vue/env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /frontend/wizard-vue/eslint.config.ts: -------------------------------------------------------------------------------- 1 | import pluginVue from 'eslint-plugin-vue' 2 | import { defineConfigWithVueTs, vueTsConfigs } from '@vue/eslint-config-typescript' 3 | import skipFormatting from '@vue/eslint-config-prettier/skip-formatting' 4 | 5 | // To allow more languages other than `ts` in `.vue` files, uncomment the following lines: 6 | // import { configureVueProject } from '@vue/eslint-config-typescript' 7 | // configureVueProject({ scriptLangs: ['ts', 'tsx'] }) 8 | // More info at https://github.com/vuejs/eslint-config-typescript/#advanced-setup 9 | 10 | export default defineConfigWithVueTs( 11 | { 12 | name: 'app/files-to-lint', 13 | files: ['**/*.{ts,mts,tsx,vue}'], 14 | }, 15 | 16 | { 17 | name: 'app/files-to-ignore', 18 | ignores: ['**/dist/**', '**/dist-ssr/**', '**/coverage/**'], 19 | }, 20 | 21 | pluginVue.configs['flat/essential'], 22 | vueTsConfigs.recommended, 23 | skipFormatting, 24 | ) 25 | -------------------------------------------------------------------------------- /frontend/wizard-vue/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | RSS Alchemy 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /frontend/wizard-vue/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "wizard-vue", 3 | "version": "0.0.0", 4 | "private": true, 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "run-p type-check \"build-only {@}\" --", 9 | "preview": "vite preview", 10 | "build-only": "vite build", 11 | "type-check": "vue-tsc --build", 12 | "lint": "eslint . --fix", 13 | "format": "prettier --write src/" 14 | }, 15 | "dependencies": { 16 | "@kalimahapps/vue-icons": "^1.7.1", 17 | "es-toolkit": "^1.32.0", 18 | "google-protobuf": "^3.21.4", 19 | "pinia": "^2.3.1", 20 | "vue": "^3.5.13", 21 | "vue-router": "^4.5.0" 22 | }, 23 | "devDependencies": { 24 | "@tsconfig/node22": "^22.0.0", 25 | "@types/google-protobuf": "^3.15.12", 26 | "@types/node": "^22.10.7", 27 | "@vitejs/plugin-vue": "^5.2.1", 28 | "@vue/eslint-config-prettier": "^10.1.0", 29 | "@vue/eslint-config-typescript": "^14.3.0", 30 | "@vue/tsconfig": "^0.7.0", 31 | "eslint": "^9.18.0", 32 | "eslint-plugin-vue": "^9.32.0", 33 | "jiti": "^2.4.2", 34 | "npm-run-all2": "^7.0.2", 35 | "prettier": "^3.4.2", 36 | "sass-embedded": "^1.83.4", 37 | "typescript": "~5.7.3", 38 | "vite": "^6.0.11", 39 | "vite-plugin-vue-devtools": "^7.7.0", 40 | "vue-tsc": "^2.2.0" 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/App.vue: -------------------------------------------------------------------------------- 1 | 5 | 6 | 15 | 16 | 31 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/assets/base.scss: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: "system-ui", "Segoe UI", Helvetica, Arial, sans-serif; 3 | } 4 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Egor3f/rssalchemy/a839d87ee6b5517a40789990069317f1b03518c5/frontend/wizard-vue/src/assets/logo.png -------------------------------------------------------------------------------- /frontend/wizard-vue/src/common/enum.ts: -------------------------------------------------------------------------------- 1 | export type EnumValue = { 2 | label: string 3 | value: number 4 | } 5 | export type Enum = EnumValue[] 6 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/components/Btn.vue: -------------------------------------------------------------------------------- 1 | 9 | 10 | 15 | 16 | 45 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/components/Copyable.vue: -------------------------------------------------------------------------------- 1 | 21 | 22 | 29 | 30 | 61 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/components/EditUrlModal.vue: -------------------------------------------------------------------------------- 1 | 48 | 49 | 61 | 62 | 65 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/components/ForkMe.vue: -------------------------------------------------------------------------------- 1 | 8 | 9 | 21 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/components/Modal.vue: -------------------------------------------------------------------------------- 1 | 4 | 5 | 14 | 15 | 37 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/components/SpecsForm.vue: -------------------------------------------------------------------------------- 1 | 19 | 20 | 46 | 47 | 52 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/components/inputs/RadioButtons.vue: -------------------------------------------------------------------------------- 1 | 17 | 18 | 33 | 34 | 52 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/components/inputs/TextField.vue: -------------------------------------------------------------------------------- 1 | 19 | 20 | 28 | 29 | 47 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/main.ts: -------------------------------------------------------------------------------- 1 | import './assets/base.scss'; 2 | 3 | import { createApp } from 'vue' 4 | import { createPinia } from 'pinia' 5 | 6 | import App from './App.vue' 7 | import router from './router' 8 | 9 | const app = createApp(App) 10 | 11 | app.use(createPinia()) 12 | app.use(router) 13 | 14 | app.mount('#app') 15 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/pages/WizardPage.vue: -------------------------------------------------------------------------------- 1 | 59 | 60 | 73 | 74 | 90 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/router/index.ts: -------------------------------------------------------------------------------- 1 | import { createRouter, createWebHistory } from 'vue-router' 2 | import WizardPage from "@/pages/WizardPage.vue"; 3 | 4 | const router = createRouter({ 5 | history: createWebHistory(import.meta.env.BASE_URL), 6 | routes: [ 7 | { 8 | path: '/', 9 | name: 'wizard', 10 | component: WizardPage, 11 | }, 12 | ], 13 | }) 14 | 15 | export default router 16 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/stores/wizard.ts: -------------------------------------------------------------------------------- 1 | import {defineStore} from "pinia"; 2 | import {defaultSpecs, type SpecField, fields, type Specs, type SpecValue} from "@/urlmaker/specs.ts"; 3 | import {computed, reactive} from "vue"; 4 | import {debounce} from "es-toolkit"; 5 | 6 | const LOCAL_STORAGE_KEY = 'rssalchemy_store_wizard'; 7 | 8 | export const useWizardStore = defineStore('wizard', () => { 9 | 10 | const locStorageContent = localStorage.getItem(LOCAL_STORAGE_KEY); 11 | const initialSpecs = locStorageContent ? JSON.parse(locStorageContent) as Specs : defaultSpecs; 12 | 13 | const specs = reactive(Object.assign({}, initialSpecs)); 14 | 15 | const formValid = computed(() => { 16 | return fields.every(field => ( 17 | !specs[field.name] && !(field as SpecField).required || field.validate(specs[field.name]!) 18 | )); 19 | }); 20 | 21 | const updateLocalStorage = debounce(() => { 22 | localStorage.setItem(LOCAL_STORAGE_KEY, JSON.stringify(specs)); 23 | }, 100); 24 | 25 | function updateSpec(fieldName: K, newValue: Specs[K]) { 26 | specs[fieldName] = newValue; 27 | updateLocalStorage(); 28 | } 29 | function updateSpecs(newValue: Specs) { 30 | Object.assign(specs, newValue); 31 | updateLocalStorage(); 32 | } 33 | function reset() { 34 | Object.assign(specs, defaultSpecs); 35 | updateLocalStorage(); 36 | } 37 | 38 | return {specs, formValid, updateSpec, updateSpecs, reset}; 39 | }); 40 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/urlmaker/index.ts: -------------------------------------------------------------------------------- 1 | import {type Specs} from "@/urlmaker/specs.ts"; 2 | import {b64decode, b64encode, compress, decompress, decompressString} from "@/urlmaker/utils.ts"; 3 | import {rssalchemy, rssalchemy as pb} from '@/urlmaker/proto/specs.ts'; 4 | 5 | const apiBase = import.meta.env.VITE_API_BASE || document.location.origin; 6 | const renderEndpoint = '/api/v1/render/'; // trailing slash 7 | const screenshotEndpoint = '/api/v1/screenshot'; // no trailing slash 8 | export const presetPrefix = 'rssalchemy:'; 9 | 10 | export async function decodeUrl(url: string): Promise { 11 | const splitUrl = url.split(renderEndpoint); 12 | if(splitUrl.length !== 2) { 13 | throw 'Split failed'; 14 | } 15 | let encodedData = splitUrl[1]; 16 | return decodeSpecsPart(encodedData); 17 | } 18 | 19 | export async function decodePreset(preset: string): Promise { 20 | if(!preset.startsWith(presetPrefix)) { 21 | throw 'Invalid preset'; 22 | } 23 | let encodedData = preset.substring(presetPrefix.length); 24 | return decodeSpecsPart(encodedData); 25 | } 26 | 27 | export async function decodeSpecsPart(encodedData: string): Promise { 28 | console.log('Decoded data len=' + encodedData.length); 29 | const m = encodedData.match(/(\d*):?([A-Za-z0-9+/=]+)/); 30 | if(!m) { 31 | throw 'Regex failed'; 32 | } 33 | const version = m[1] ? parseInt(m[1]) : 0; 34 | console.log('Decoding url using version: ' + version); 35 | encodedData = m[2]; 36 | 37 | let buf = b64decode(encodedData); 38 | switch (version) { 39 | case 0: 40 | const jsonData = await decompressString(buf); 41 | return JSON.parse(jsonData); 42 | case 1: 43 | const data = await decompress(buf); 44 | //@ts-ignore 45 | return pb.Specs.deserializeBinary(data).toObject(); 46 | default: 47 | throw 'Unknown version' 48 | } 49 | } 50 | 51 | export async function encodeUrl(specs: Specs): Promise { 52 | return `${apiBase}${renderEndpoint}${await encodeSpecsPart(specs)}` 53 | } 54 | 55 | export async function encodePreset(specs: Specs): Promise { 56 | return `${presetPrefix}${await encodeSpecsPart(specs)}`; 57 | } 58 | 59 | export async function encodeSpecsPart(specs: Specs): Promise { 60 | const pbSpecs = pb.Specs.fromObject(specs as ReturnType); 61 | let data = pbSpecs.serializeBinary(); 62 | data = await compress(data); 63 | const encodedData = b64encode(data); 64 | console.log('Encoded data len=' + encodedData.length); 65 | const version = 1; 66 | return `${version}:${encodedData}`; 67 | } 68 | 69 | export function getScreenshotUrl(url: string): string { 70 | return `${apiBase}${screenshotEndpoint}?url=${encodeURIComponent(url)}`; 71 | } 72 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/urlmaker/proto/specs.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Generated by the protoc-gen-ts. DO NOT EDIT! 3 | * compiler version: 5.29.3 4 | * source: proto/specs.proto 5 | * git: https://github.com/thesayyn/protoc-gen-ts */ 6 | import * as pb_1 from "google-protobuf"; 7 | export namespace rssalchemy { 8 | export enum ExtractFrom { 9 | InnerText = 0, 10 | Attribute = 1 11 | } 12 | export class Specs extends pb_1.Message { 13 | #one_of_decls: number[][] = []; 14 | constructor(data?: any[] | { 15 | url?: string; 16 | selector_post?: string; 17 | selector_title?: string; 18 | selector_link?: string; 19 | selector_description?: string; 20 | selector_author?: string; 21 | selector_created?: string; 22 | created_extract_from?: ExtractFrom; 23 | created_attribute_name?: string; 24 | selector_content?: string; 25 | selector_enclosure?: string; 26 | cache_lifetime?: string; 27 | }) { 28 | super(); 29 | pb_1.Message.initialize(this, Array.isArray(data) ? data : [], 0, -1, [], this.#one_of_decls); 30 | if (!Array.isArray(data) && typeof data == "object") { 31 | if ("url" in data && data.url != undefined) { 32 | this.url = data.url; 33 | } 34 | if ("selector_post" in data && data.selector_post != undefined) { 35 | this.selector_post = data.selector_post; 36 | } 37 | if ("selector_title" in data && data.selector_title != undefined) { 38 | this.selector_title = data.selector_title; 39 | } 40 | if ("selector_link" in data && data.selector_link != undefined) { 41 | this.selector_link = data.selector_link; 42 | } 43 | if ("selector_description" in data && data.selector_description != undefined) { 44 | this.selector_description = data.selector_description; 45 | } 46 | if ("selector_author" in data && data.selector_author != undefined) { 47 | this.selector_author = data.selector_author; 48 | } 49 | if ("selector_created" in data && data.selector_created != undefined) { 50 | this.selector_created = data.selector_created; 51 | } 52 | if ("created_extract_from" in data && data.created_extract_from != undefined) { 53 | this.created_extract_from = data.created_extract_from; 54 | } 55 | if ("created_attribute_name" in data && data.created_attribute_name != undefined) { 56 | this.created_attribute_name = data.created_attribute_name; 57 | } 58 | if ("selector_content" in data && data.selector_content != undefined) { 59 | this.selector_content = data.selector_content; 60 | } 61 | if ("selector_enclosure" in data && data.selector_enclosure != undefined) { 62 | this.selector_enclosure = data.selector_enclosure; 63 | } 64 | if ("cache_lifetime" in data && data.cache_lifetime != undefined) { 65 | this.cache_lifetime = data.cache_lifetime; 66 | } 67 | } 68 | } 69 | get url() { 70 | return pb_1.Message.getFieldWithDefault(this, 1, "") as string; 71 | } 72 | set url(value: string) { 73 | pb_1.Message.setField(this, 1, value); 74 | } 75 | get selector_post() { 76 | return pb_1.Message.getFieldWithDefault(this, 2, "") as string; 77 | } 78 | set selector_post(value: string) { 79 | pb_1.Message.setField(this, 2, value); 80 | } 81 | get selector_title() { 82 | return pb_1.Message.getFieldWithDefault(this, 3, "") as string; 83 | } 84 | set selector_title(value: string) { 85 | pb_1.Message.setField(this, 3, value); 86 | } 87 | get selector_link() { 88 | return pb_1.Message.getFieldWithDefault(this, 4, "") as string; 89 | } 90 | set selector_link(value: string) { 91 | pb_1.Message.setField(this, 4, value); 92 | } 93 | get selector_description() { 94 | return pb_1.Message.getFieldWithDefault(this, 5, "") as string; 95 | } 96 | set selector_description(value: string) { 97 | pb_1.Message.setField(this, 5, value); 98 | } 99 | get selector_author() { 100 | return pb_1.Message.getFieldWithDefault(this, 6, "") as string; 101 | } 102 | set selector_author(value: string) { 103 | pb_1.Message.setField(this, 6, value); 104 | } 105 | get selector_created() { 106 | return pb_1.Message.getFieldWithDefault(this, 7, "") as string; 107 | } 108 | set selector_created(value: string) { 109 | pb_1.Message.setField(this, 7, value); 110 | } 111 | get created_extract_from() { 112 | return pb_1.Message.getFieldWithDefault(this, 11, ExtractFrom.InnerText) as ExtractFrom; 113 | } 114 | set created_extract_from(value: ExtractFrom) { 115 | pb_1.Message.setField(this, 11, value); 116 | } 117 | get created_attribute_name() { 118 | return pb_1.Message.getFieldWithDefault(this, 12, "") as string; 119 | } 120 | set created_attribute_name(value: string) { 121 | pb_1.Message.setField(this, 12, value); 122 | } 123 | get selector_content() { 124 | return pb_1.Message.getFieldWithDefault(this, 8, "") as string; 125 | } 126 | set selector_content(value: string) { 127 | pb_1.Message.setField(this, 8, value); 128 | } 129 | get selector_enclosure() { 130 | return pb_1.Message.getFieldWithDefault(this, 9, "") as string; 131 | } 132 | set selector_enclosure(value: string) { 133 | pb_1.Message.setField(this, 9, value); 134 | } 135 | get cache_lifetime() { 136 | return pb_1.Message.getFieldWithDefault(this, 10, "") as string; 137 | } 138 | set cache_lifetime(value: string) { 139 | pb_1.Message.setField(this, 10, value); 140 | } 141 | static fromObject(data: { 142 | url?: string; 143 | selector_post?: string; 144 | selector_title?: string; 145 | selector_link?: string; 146 | selector_description?: string; 147 | selector_author?: string; 148 | selector_created?: string; 149 | created_extract_from?: ExtractFrom; 150 | created_attribute_name?: string; 151 | selector_content?: string; 152 | selector_enclosure?: string; 153 | cache_lifetime?: string; 154 | }): Specs { 155 | const message = new Specs({}); 156 | if (data.url != null) { 157 | message.url = data.url; 158 | } 159 | if (data.selector_post != null) { 160 | message.selector_post = data.selector_post; 161 | } 162 | if (data.selector_title != null) { 163 | message.selector_title = data.selector_title; 164 | } 165 | if (data.selector_link != null) { 166 | message.selector_link = data.selector_link; 167 | } 168 | if (data.selector_description != null) { 169 | message.selector_description = data.selector_description; 170 | } 171 | if (data.selector_author != null) { 172 | message.selector_author = data.selector_author; 173 | } 174 | if (data.selector_created != null) { 175 | message.selector_created = data.selector_created; 176 | } 177 | if (data.created_extract_from != null) { 178 | message.created_extract_from = data.created_extract_from; 179 | } 180 | if (data.created_attribute_name != null) { 181 | message.created_attribute_name = data.created_attribute_name; 182 | } 183 | if (data.selector_content != null) { 184 | message.selector_content = data.selector_content; 185 | } 186 | if (data.selector_enclosure != null) { 187 | message.selector_enclosure = data.selector_enclosure; 188 | } 189 | if (data.cache_lifetime != null) { 190 | message.cache_lifetime = data.cache_lifetime; 191 | } 192 | return message; 193 | } 194 | toObject() { 195 | const data: { 196 | url?: string; 197 | selector_post?: string; 198 | selector_title?: string; 199 | selector_link?: string; 200 | selector_description?: string; 201 | selector_author?: string; 202 | selector_created?: string; 203 | created_extract_from?: ExtractFrom; 204 | created_attribute_name?: string; 205 | selector_content?: string; 206 | selector_enclosure?: string; 207 | cache_lifetime?: string; 208 | } = {}; 209 | if (this.url != null) { 210 | data.url = this.url; 211 | } 212 | if (this.selector_post != null) { 213 | data.selector_post = this.selector_post; 214 | } 215 | if (this.selector_title != null) { 216 | data.selector_title = this.selector_title; 217 | } 218 | if (this.selector_link != null) { 219 | data.selector_link = this.selector_link; 220 | } 221 | if (this.selector_description != null) { 222 | data.selector_description = this.selector_description; 223 | } 224 | if (this.selector_author != null) { 225 | data.selector_author = this.selector_author; 226 | } 227 | if (this.selector_created != null) { 228 | data.selector_created = this.selector_created; 229 | } 230 | if (this.created_extract_from != null) { 231 | data.created_extract_from = this.created_extract_from; 232 | } 233 | if (this.created_attribute_name != null) { 234 | data.created_attribute_name = this.created_attribute_name; 235 | } 236 | if (this.selector_content != null) { 237 | data.selector_content = this.selector_content; 238 | } 239 | if (this.selector_enclosure != null) { 240 | data.selector_enclosure = this.selector_enclosure; 241 | } 242 | if (this.cache_lifetime != null) { 243 | data.cache_lifetime = this.cache_lifetime; 244 | } 245 | return data; 246 | } 247 | serialize(): Uint8Array; 248 | serialize(w: pb_1.BinaryWriter): void; 249 | serialize(w?: pb_1.BinaryWriter): Uint8Array | void { 250 | const writer = w || new pb_1.BinaryWriter(); 251 | if (this.url.length) 252 | writer.writeString(1, this.url); 253 | if (this.selector_post.length) 254 | writer.writeString(2, this.selector_post); 255 | if (this.selector_title.length) 256 | writer.writeString(3, this.selector_title); 257 | if (this.selector_link.length) 258 | writer.writeString(4, this.selector_link); 259 | if (this.selector_description.length) 260 | writer.writeString(5, this.selector_description); 261 | if (this.selector_author.length) 262 | writer.writeString(6, this.selector_author); 263 | if (this.selector_created.length) 264 | writer.writeString(7, this.selector_created); 265 | if (this.created_extract_from != ExtractFrom.InnerText) 266 | writer.writeEnum(11, this.created_extract_from); 267 | if (this.created_attribute_name.length) 268 | writer.writeString(12, this.created_attribute_name); 269 | if (this.selector_content.length) 270 | writer.writeString(8, this.selector_content); 271 | if (this.selector_enclosure.length) 272 | writer.writeString(9, this.selector_enclosure); 273 | if (this.cache_lifetime.length) 274 | writer.writeString(10, this.cache_lifetime); 275 | if (!w) 276 | return writer.getResultBuffer(); 277 | } 278 | static deserialize(bytes: Uint8Array | pb_1.BinaryReader): Specs { 279 | const reader = bytes instanceof pb_1.BinaryReader ? bytes : new pb_1.BinaryReader(bytes), message = new Specs(); 280 | while (reader.nextField()) { 281 | if (reader.isEndGroup()) 282 | break; 283 | switch (reader.getFieldNumber()) { 284 | case 1: 285 | message.url = reader.readString(); 286 | break; 287 | case 2: 288 | message.selector_post = reader.readString(); 289 | break; 290 | case 3: 291 | message.selector_title = reader.readString(); 292 | break; 293 | case 4: 294 | message.selector_link = reader.readString(); 295 | break; 296 | case 5: 297 | message.selector_description = reader.readString(); 298 | break; 299 | case 6: 300 | message.selector_author = reader.readString(); 301 | break; 302 | case 7: 303 | message.selector_created = reader.readString(); 304 | break; 305 | case 11: 306 | message.created_extract_from = reader.readEnum(); 307 | break; 308 | case 12: 309 | message.created_attribute_name = reader.readString(); 310 | break; 311 | case 8: 312 | message.selector_content = reader.readString(); 313 | break; 314 | case 9: 315 | message.selector_enclosure = reader.readString(); 316 | break; 317 | case 10: 318 | message.cache_lifetime = reader.readString(); 319 | break; 320 | default: reader.skipField(); 321 | } 322 | } 323 | return message; 324 | } 325 | serializeBinary(): Uint8Array { 326 | return this.serialize(); 327 | } 328 | static deserializeBinary(bytes: Uint8Array): Specs { 329 | return Specs.deserialize(bytes); 330 | } 331 | } 332 | } 333 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/urlmaker/specs.ts: -------------------------------------------------------------------------------- 1 | import { 2 | validateAttribute, 3 | validateDuration, 4 | validateSelector, 5 | validateUrl, 6 | type validator 7 | } from "@/urlmaker/validators.ts"; 8 | import {rssalchemy} from "@/urlmaker/proto/specs.ts"; 9 | import type {Enum} from "@/common/enum.ts"; 10 | 11 | export const defaultSpecs = { 12 | url: '', 13 | selector_post: '', 14 | selector_title: '', 15 | selector_link: '', 16 | selector_description: '', 17 | selector_author: '', 18 | selector_content: '', 19 | selector_enclosure: '', 20 | selector_created: '', 21 | created_extract_from: rssalchemy.ExtractFrom.InnerText, 22 | created_attribute_name: '', 23 | cache_lifetime: '10m' 24 | }; 25 | 26 | export type SpecValue = string | number; 27 | export type Specs = typeof defaultSpecs; 28 | 29 | export enum InputType { 30 | Url = 'url', 31 | Text = 'text', 32 | Radio = 'radio' 33 | } 34 | 35 | export interface SpecField { 36 | name: keyof Specs 37 | input_type: InputType 38 | enum?: Enum, 39 | label: string 40 | validate: validator 41 | required?: boolean 42 | group?: string 43 | show_if?: (specs: Specs) => boolean 44 | } 45 | 46 | export const fields: SpecField[] = [ 47 | { 48 | name: 'url', 49 | input_type: InputType.Url, 50 | label: 'URL of page for converting', 51 | validate: validateUrl, 52 | required: true, 53 | }, 54 | { 55 | name: 'selector_post', 56 | input_type: InputType.Text, 57 | label: 'CSS Selector for post', 58 | validate: validateSelector, 59 | }, 60 | { 61 | name: 'selector_title', 62 | input_type: InputType.Text, 63 | label: 'CSS Selector for title', 64 | validate: validateSelector, 65 | }, 66 | { 67 | name: 'selector_link', 68 | input_type: InputType.Text, 69 | label: 'CSS Selector for link', 70 | validate: validateSelector, 71 | }, 72 | { 73 | name: 'selector_description', 74 | input_type: InputType.Text, 75 | label: 'CSS Selector for description', 76 | validate: validateSelector, 77 | }, 78 | { 79 | name: 'selector_author', 80 | input_type: InputType.Text, 81 | label: 'CSS Selector for author', 82 | validate: validateSelector, 83 | }, 84 | 85 | { 86 | name: 'selector_created', 87 | input_type: InputType.Text, 88 | label: 'CSS Selector for created date', 89 | validate: validateSelector, 90 | group: 'created', 91 | }, 92 | { 93 | name: 'created_extract_from', 94 | input_type: InputType.Radio, 95 | enum: [ 96 | {label: 'Inner Text', value: rssalchemy.ExtractFrom.InnerText}, 97 | {label: 'Attribute', value: rssalchemy.ExtractFrom.Attribute}, 98 | ], 99 | label: 'Extract from', 100 | validate: value => Object.values(rssalchemy.ExtractFrom).includes(value), 101 | group: 'created', 102 | show_if: specs => !!specs.selector_created, 103 | }, 104 | { 105 | name: 'created_attribute_name', 106 | input_type: InputType.Text, 107 | label: 'Attribute name', 108 | validate: validateAttribute, 109 | show_if: specs => 110 | !!specs.selector_created && specs.created_extract_from === rssalchemy.ExtractFrom.Attribute, 111 | group: 'created', 112 | }, 113 | 114 | { 115 | name: 'selector_content', 116 | input_type: InputType.Text, 117 | label: 'CSS Selector for content', 118 | validate: validateSelector, 119 | }, 120 | { 121 | name: 'selector_enclosure', 122 | input_type: InputType.Text, 123 | label: 'CSS Selector for enclosure (e.g. image url)', 124 | validate: validateSelector, 125 | }, 126 | { 127 | name: 'cache_lifetime', 128 | input_type: InputType.Text, 129 | label: 'Cache lifetime (format examples: 10s, 1m, 2h)', 130 | validate: validateDuration, 131 | }, 132 | ]; 133 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/urlmaker/utils.ts: -------------------------------------------------------------------------------- 1 | export async function compress(s: string|Uint8Array): Promise { 2 | if(typeof s === 'string') { 3 | s = new TextEncoder().encode(s); 4 | } 5 | let cs = new CompressionStream('deflate-raw'); 6 | let writer = cs.writable.getWriter(); 7 | // noinspection ES6MissingAwait 8 | writer.write(s); 9 | // noinspection ES6MissingAwait 10 | writer.close(); 11 | let response = new Response(cs.readable); 12 | return new Uint8Array(await response.arrayBuffer()); 13 | } 14 | 15 | export async function decompress(buf: Uint8Array): Promise { 16 | let ds = new DecompressionStream('deflate-raw'); 17 | let writer = ds.writable.getWriter(); 18 | // noinspection ES6MissingAwait 19 | writer.write(buf); 20 | // noinspection ES6MissingAwait 21 | writer.close(); 22 | let response = new Response(ds.readable); 23 | return response.bytes(); 24 | } 25 | 26 | export async function decompressString(buf: Uint8Array): Promise { 27 | const binary = await decompress(buf); 28 | // @ts-ignore 29 | return String.fromCharCode.apply(null, binary); 30 | } 31 | 32 | export function b64encode(buf: Uint8Array): string { 33 | // @ts-ignore 34 | const b64str = btoa(String.fromCharCode.apply(null, buf)); 35 | // @ts-ignore 36 | return b64str.replaceAll('=', ''); 37 | } 38 | 39 | export function b64decode(s: string): Uint8Array { 40 | return Uint8Array.from(atob(s), c => c.charCodeAt(0)); 41 | } 42 | -------------------------------------------------------------------------------- /frontend/wizard-vue/src/urlmaker/validators.ts: -------------------------------------------------------------------------------- 1 | import {presetPrefix} from "@/urlmaker/index.ts"; 2 | import type {SpecValue} from "@/urlmaker/specs.ts"; 3 | 4 | export type validator = (v: SpecValue) => boolean; 5 | 6 | export function validateUrl(s: SpecValue): boolean { 7 | let url; 8 | try { 9 | url = new URL(s as string); 10 | return url.protocol === "http:" || url.protocol === "https:" 11 | } catch { 12 | return false; 13 | } 14 | } 15 | 16 | export function validatePreset(s: SpecValue): boolean { 17 | return (s as string).startsWith(presetPrefix); 18 | } 19 | 20 | export function validateSelector(s: SpecValue): boolean { 21 | try { 22 | document.createDocumentFragment().querySelector(s as string); 23 | return true; 24 | } catch { 25 | return false; 26 | } 27 | } 28 | 29 | export function validateAttribute(s: SpecValue): boolean { 30 | return /([^\t\n\f \/>"'=]+)/.test(s as string); 31 | } 32 | 33 | export function validateDuration(s: SpecValue): boolean { 34 | return /^\d+[smh]$/.test(s as string); 35 | } 36 | -------------------------------------------------------------------------------- /frontend/wizard-vue/tsconfig.app.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@vue/tsconfig/tsconfig.dom.json", 3 | "include": ["env.d.ts", "src/**/*", "src/**/*.vue"], 4 | "exclude": ["src/**/__tests__/*"], 5 | "compilerOptions": { 6 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo", 7 | 8 | "paths": { 9 | "@/*": ["./src/*"] 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /frontend/wizard-vue/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "references": [ 4 | { 5 | "path": "./tsconfig.node.json" 6 | }, 7 | { 8 | "path": "./tsconfig.app.json" 9 | } 10 | ], 11 | "compilerOptions": { 12 | "lib": [ 13 | "es2021" 14 | ] 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /frontend/wizard-vue/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@tsconfig/node22/tsconfig.json", 3 | "include": [ 4 | "vite.config.*", 5 | "vitest.config.*", 6 | "cypress.config.*", 7 | "nightwatch.conf.*", 8 | "playwright.config.*", 9 | "eslint.config.*" 10 | ], 11 | "compilerOptions": { 12 | "noEmit": true, 13 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", 14 | 15 | "module": "ESNext", 16 | "moduleResolution": "Bundler", 17 | "types": ["node"] 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /frontend/wizard-vue/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { fileURLToPath, URL } from 'node:url' 2 | 3 | import { defineConfig } from 'vite' 4 | import vue from '@vitejs/plugin-vue' 5 | import vueDevTools from 'vite-plugin-vue-devtools' 6 | 7 | // https://vite.dev/config/ 8 | export default defineConfig({ 9 | plugins: [ 10 | vue(), 11 | vueDevTools(), 12 | ], 13 | resolve: { 14 | alias: { 15 | '@': fileURLToPath(new URL('./src', import.meta.url)) 16 | }, 17 | }, 18 | }) 19 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/egor3f/rssalchemy 2 | 3 | go 1.23.2 4 | 5 | toolchain go1.24.0 6 | 7 | require ( 8 | github.com/AdguardTeam/urlfilter v0.20.0 9 | github.com/ericchiang/css v1.4.0 10 | github.com/felixge/fgprof v0.9.5 11 | github.com/go-playground/validator/v10 v10.26.0 12 | github.com/go-redsync/redsync/v4 v4.8.1 13 | github.com/gorilla/feeds v1.2.0 14 | github.com/ilyakaznacheev/cleanenv v1.5.0 15 | github.com/jellydator/ttlcache/v3 v3.3.0 16 | github.com/labstack/echo/v4 v4.13.3 17 | github.com/labstack/gommon v0.4.2 18 | github.com/markusmobius/go-dateparser v1.2.3 19 | github.com/mennanov/limiters v1.11.0 20 | github.com/nats-io/nats.go v1.38.0 21 | github.com/playwright-community/playwright-go v0.5001.0 22 | github.com/redis/go-redis/v9 v9.7.0 23 | github.com/srikrsna/protoc-gen-gotag v1.0.2 24 | github.com/stretchr/testify v1.10.0 25 | golang.org/x/time v0.8.0 26 | google.golang.org/protobuf v1.35.2 27 | ) 28 | 29 | require ( 30 | github.com/AdguardTeam/golibs v0.29.0 // indirect 31 | github.com/BurntSushi/toml v1.2.1 // indirect 32 | github.com/alessandro-c/gomemcached-lock v1.0.0 // indirect 33 | github.com/armon/go-metrics v0.4.1 // indirect 34 | github.com/aws/aws-sdk-go-v2 v1.32.5 // indirect 35 | github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue v1.15.17 // indirect 36 | github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.24 // indirect 37 | github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.24 // indirect 38 | github.com/aws/aws-sdk-go-v2/service/dynamodb v1.37.1 // indirect 39 | github.com/aws/aws-sdk-go-v2/service/dynamodbstreams v1.24.6 // indirect 40 | github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.1 // indirect 41 | github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.10.5 // indirect 42 | github.com/aws/smithy-go v1.22.1 // indirect 43 | github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874 // indirect 44 | github.com/cenkalti/backoff/v3 v3.2.2 // indirect 45 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 46 | github.com/coreos/go-semver v0.3.1 // indirect 47 | github.com/coreos/go-systemd/v22 v22.5.0 // indirect 48 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect 49 | github.com/deckarep/golang-set/v2 v2.7.0 // indirect 50 | github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect 51 | github.com/elliotchance/pie/v2 v2.7.0 // indirect 52 | github.com/fatih/color v1.16.0 // indirect 53 | github.com/gabriel-vasile/mimetype v1.4.8 // indirect 54 | github.com/go-jose/go-jose/v3 v3.0.3 // indirect 55 | github.com/go-playground/locales v0.14.1 // indirect 56 | github.com/go-playground/universal-translator v0.18.1 // indirect 57 | github.com/go-stack/stack v1.8.1 // indirect 58 | github.com/gogo/protobuf v1.3.2 // indirect 59 | github.com/golang/protobuf v1.5.4 // indirect 60 | github.com/google/pprof v0.0.0-20240227163752-401108e1b7e7 // indirect 61 | github.com/hablullah/go-hijri v1.0.2 // indirect 62 | github.com/hablullah/go-juliandays v1.0.0 // indirect 63 | github.com/hashicorp/consul/api v1.30.0 // indirect 64 | github.com/hashicorp/errwrap v1.1.0 // indirect 65 | github.com/hashicorp/go-cleanhttp v0.5.2 // indirect 66 | github.com/hashicorp/go-hclog v1.5.0 // indirect 67 | github.com/hashicorp/go-immutable-radix v1.3.1 // indirect 68 | github.com/hashicorp/go-multierror v1.1.1 // indirect 69 | github.com/hashicorp/go-rootcerts v1.0.2 // indirect 70 | github.com/hashicorp/golang-lru v0.5.4 // indirect 71 | github.com/hashicorp/serf v0.10.1 // indirect 72 | github.com/jalaali/go-jalaali v0.0.0-20210801064154-80525e88d958 // indirect 73 | github.com/jmespath/go-jmespath v0.4.0 // indirect 74 | github.com/joho/godotenv v1.5.1 // indirect 75 | github.com/klauspost/compress v1.17.9 // indirect 76 | github.com/leodido/go-urn v1.4.0 // indirect 77 | github.com/lib/pq v1.10.9 // indirect 78 | github.com/magefile/mage v1.14.0 // indirect 79 | github.com/mattn/go-colorable v0.1.14 // indirect 80 | github.com/mattn/go-isatty v0.0.20 // indirect 81 | github.com/miekg/dns v1.1.61 // indirect 82 | github.com/mitchellh/go-homedir v1.1.0 // indirect 83 | github.com/mitchellh/mapstructure v1.5.0 // indirect 84 | github.com/nats-io/nkeys v0.4.9 // indirect 85 | github.com/nats-io/nuid v1.0.1 // indirect 86 | github.com/pkg/errors v0.9.1 // indirect 87 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect 88 | github.com/samuel/go-zookeeper v0.0.0-20201211165307-7117e9ea2414 // indirect 89 | github.com/tetratelabs/wazero v1.2.1 // indirect 90 | github.com/thanhpk/randstr v1.0.4 // indirect 91 | github.com/valyala/bytebufferpool v1.0.0 // indirect 92 | github.com/valyala/fasttemplate v1.2.2 // indirect 93 | github.com/wasilibs/go-re2 v1.3.0 // indirect 94 | go.etcd.io/etcd/api/v3 v3.5.17 // indirect 95 | go.etcd.io/etcd/client/pkg/v3 v3.5.17 // indirect 96 | go.etcd.io/etcd/client/v3 v3.5.17 // indirect 97 | go.uber.org/atomic v1.10.0 // indirect 98 | go.uber.org/multierr v1.9.0 // indirect 99 | go.uber.org/zap v1.24.0 // indirect 100 | golang.org/x/crypto v0.33.0 // indirect 101 | golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect 102 | golang.org/x/mod v0.21.0 // indirect 103 | golang.org/x/net v0.34.0 // indirect 104 | golang.org/x/sync v0.11.0 // indirect 105 | golang.org/x/sys v0.30.0 // indirect 106 | golang.org/x/text v0.22.0 // indirect 107 | golang.org/x/tools v0.25.0 // indirect 108 | google.golang.org/genproto/googleapis/api v0.0.0-20240814211410-ddb44dafa142 // indirect 109 | google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 // indirect 110 | google.golang.org/grpc v1.67.1 // indirect 111 | gopkg.in/yaml.v3 v3.0.1 // indirect 112 | olympos.io/encoding/edn v0.0.0-20201019073823-d3554ca0b0a3 // indirect 113 | ) 114 | 115 | replace github.com/ericchiang/css => github.com/egor3f/css v0.0.0-20250507004805-bfefe22b74a4 116 | -------------------------------------------------------------------------------- /internal/adapters/adapters.go: -------------------------------------------------------------------------------- 1 | package adapters 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | ) 8 | 9 | type WorkQueue interface { 10 | Enqueue(ctx context.Context, key string, payload []byte) (result []byte, err error) 11 | } 12 | 13 | var ErrKeyNotFound = fmt.Errorf("key not found") 14 | 15 | type Cache interface { 16 | Get(key string) (result []byte, ts time.Time, err error) 17 | Set(key string, payload []byte) (err error) 18 | } 19 | 20 | type QueueConsumer interface { 21 | ConsumeQueue( 22 | ctx context.Context, 23 | taskFunc func(taskPayload []byte) (cacheKey string, result []byte, err error), 24 | ) error 25 | } 26 | -------------------------------------------------------------------------------- /internal/adapters/natsadapter/natsadapter.go: -------------------------------------------------------------------------------- 1 | package natsadapter 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "github.com/egor3f/rssalchemy/internal/adapters" 8 | "github.com/labstack/gommon/log" 9 | "github.com/nats-io/nats.go" 10 | "github.com/nats-io/nats.go/jetstream" 11 | "sync" 12 | "time" 13 | ) 14 | 15 | type NatsAdapter struct { 16 | jets jetstream.JetStream 17 | jstream jetstream.Stream 18 | kv jetstream.KeyValue 19 | streamName string 20 | 21 | runningMu sync.Mutex 22 | running map[string]struct{} 23 | } 24 | 25 | func New(natsc *nats.Conn, streamName string) (*NatsAdapter, error) { 26 | na := NatsAdapter{} 27 | var err error 28 | 29 | if len(streamName) == 0 { 30 | return nil, fmt.Errorf("stream name is empty") 31 | } 32 | na.streamName = streamName 33 | 34 | na.jets, err = jetstream.New(natsc) 35 | if err != nil { 36 | return nil, fmt.Errorf("create jetstream: %w", err) 37 | } 38 | 39 | na.jstream, err = na.jets.CreateOrUpdateStream(context.TODO(), jetstream.StreamConfig{ 40 | Name: streamName, 41 | Subjects: []string{fmt.Sprintf("%s.>", streamName)}, 42 | Retention: jetstream.WorkQueuePolicy, 43 | AllowDirect: true, 44 | }) 45 | if err != nil { 46 | return nil, fmt.Errorf("create js stream: %w", err) 47 | } 48 | 49 | na.kv, err = na.jets.CreateKeyValue(context.TODO(), jetstream.KeyValueConfig{ 50 | Bucket: "render_cache", 51 | }) 52 | if err != nil { 53 | return nil, fmt.Errorf("create nats kv: %w", err) 54 | } 55 | 56 | na.running = make(map[string]struct{}) 57 | 58 | return &na, nil 59 | } 60 | 61 | func (na *NatsAdapter) Enqueue(ctx context.Context, key string, payload []byte) ([]byte, error) { 62 | // prevent resubmitting already running task 63 | na.runningMu.Lock() 64 | _, alreadyRunning := na.running[key] 65 | na.running[key] = struct{}{} 66 | na.runningMu.Unlock() 67 | defer func() { 68 | na.runningMu.Lock() 69 | delete(na.running, key) 70 | na.runningMu.Unlock() 71 | }() 72 | 73 | watcher, err := na.kv.Watch(ctx, key) 74 | if err != nil { 75 | return nil, fmt.Errorf("nats watch failed: %w", err) 76 | } 77 | defer watcher.Stop() 78 | 79 | var taskEnqueued bool 80 | for { 81 | select { 82 | case upd := <-watcher.Updates(): 83 | if upd != nil { 84 | if !taskEnqueued { 85 | // old value from cache, skipping 86 | continue 87 | } 88 | log.Infof("got value for task: %s, payload=%.100s", key, upd.Value()) 89 | return upd.Value(), nil 90 | } 91 | taskEnqueued = true 92 | if alreadyRunning { 93 | log.Infof("already running: %s", key) 94 | continue 95 | } 96 | log.Infof("sending task to queue: %s", key) 97 | _, err = na.jets.Publish( 98 | ctx, 99 | fmt.Sprintf("%s.%s", na.streamName, key), 100 | payload, 101 | ) 102 | if err != nil { 103 | return nil, fmt.Errorf("nats publish error: %v", err) 104 | } 105 | case <-ctx.Done(): 106 | log.Warnf("task cancelled by context: %s", key) 107 | return nil, ctx.Err() 108 | } 109 | } 110 | } 111 | 112 | func (na *NatsAdapter) Get(key string) (result []byte, ts time.Time, err error) { 113 | entry, err := na.kv.Get(context.TODO(), key) 114 | if err != nil { 115 | if errors.Is(err, jetstream.ErrKeyNotFound) { 116 | return nil, time.Time{}, adapters.ErrKeyNotFound 117 | } 118 | return nil, time.Time{}, fmt.Errorf("nats: %w", err) 119 | } 120 | return entry.Value(), entry.Created(), nil 121 | } 122 | 123 | func (na *NatsAdapter) Set(key string, payload []byte) error { 124 | _, err := na.kv.Put(context.TODO(), key, payload) 125 | if err != nil { 126 | return fmt.Errorf("nats: %w", err) 127 | } 128 | return nil 129 | } 130 | 131 | func (na *NatsAdapter) ConsumeQueue( 132 | ctx context.Context, 133 | taskFunc func(taskPayload []byte) (cacheKey string, result []byte, err error), 134 | ) error { 135 | cons, err := na.jstream.CreateOrUpdateConsumer(ctx, jetstream.ConsumerConfig{ 136 | Durable: "worker", 137 | }) 138 | if err != nil { 139 | return fmt.Errorf("create js consumer: %w", err) 140 | } 141 | consCtx, err := cons.Consume(func(msg jetstream.Msg) { 142 | metadata, err := msg.Metadata() 143 | if err != nil { 144 | log.Errorf("msg metadata: %v", err) 145 | return 146 | } 147 | seq := metadata.Sequence.Stream 148 | if err := msg.InProgress(); err != nil { 149 | log.Errorf("task seq=%d inProgress: %v", seq, err) 150 | } 151 | log.Infof("got task seq=%d payload=%.100s", seq, msg.Data()) 152 | 153 | defer func() { 154 | if err := recover(); err != nil { 155 | log.Errorf("recovered panic from consumer: %v", err) 156 | if err := msg.Term(); err != nil { 157 | log.Errorf("term in recover: %v", err) 158 | } 159 | } 160 | }() 161 | cacheKey, resultPayload, taskErr := taskFunc(msg.Data()) 162 | 163 | if err := msg.DoubleAck(ctx); err != nil { 164 | log.Errorf("double ack seq=%d: %v", seq, err) 165 | } 166 | 167 | if taskErr != nil { 168 | log.Errorf("taskFunc seq=%d error: %v", seq, taskErr) 169 | return 170 | } 171 | 172 | log.Infof("task finished seq=%d cachekey=%s payload=%.100s", seq, cacheKey, resultPayload) 173 | if _, err := na.kv.Put(ctx, cacheKey, resultPayload); err != nil { 174 | log.Errorf("put seq=%d to cache: %v", seq, err) 175 | return 176 | } 177 | }) 178 | if err != nil { 179 | return fmt.Errorf("consume context: %w", err) 180 | } 181 | log.Infof("ready to consume tasks") 182 | <-ctx.Done() 183 | log.Infof("stopping consumer") 184 | consCtx.Stop() 185 | return nil 186 | } 187 | -------------------------------------------------------------------------------- /internal/api/http/handler.go: -------------------------------------------------------------------------------- 1 | package http 2 | 3 | import ( 4 | "bytes" 5 | "compress/flate" 6 | "context" 7 | "encoding/base64" 8 | "encoding/json" 9 | "errors" 10 | "fmt" 11 | "github.com/egor3f/rssalchemy/internal/adapters" 12 | "github.com/egor3f/rssalchemy/internal/api/http/pb" 13 | "github.com/egor3f/rssalchemy/internal/models" 14 | "github.com/egor3f/rssalchemy/internal/validators" 15 | "github.com/go-playground/validator/v10" 16 | "github.com/gorilla/feeds" 17 | "github.com/labstack/echo/v4" 18 | "github.com/labstack/gommon/log" 19 | "golang.org/x/time/rate" 20 | "google.golang.org/protobuf/proto" 21 | "html" 22 | "io" 23 | "net/url" 24 | "strconv" 25 | "strings" 26 | "sync" 27 | "time" 28 | ) 29 | 30 | const ( 31 | taskTimeout = 1 * time.Minute 32 | minLifetime = time.Duration(0) 33 | maxLifetime = 24 * time.Hour 34 | ) 35 | 36 | type Handler struct { 37 | validate *validator.Validate 38 | workQueue adapters.WorkQueue 39 | cache adapters.Cache 40 | rateLimit rate.Limit 41 | rateLimitBurst int 42 | limits map[string]*rate.Limiter 43 | limitsMu sync.RWMutex 44 | debug bool 45 | } 46 | 47 | func New(wq adapters.WorkQueue, cache adapters.Cache, rateLimit rate.Limit, rateLimitBurst int, debug bool) *Handler { 48 | if wq == nil || cache == nil { 49 | panic("you fckd up with di again") 50 | } 51 | h := Handler{ 52 | workQueue: wq, 53 | cache: cache, 54 | rateLimit: rateLimit, 55 | rateLimitBurst: rateLimitBurst, 56 | limits: make(map[string]*rate.Limiter), 57 | debug: debug, 58 | } 59 | h.validate = validator.New(validator.WithRequiredStructEnabled()) 60 | if err := h.validate.RegisterValidation("selector", validators.ValidateSelector); err != nil { 61 | log.Panicf("register validation: %v", err) 62 | } 63 | return &h 64 | } 65 | 66 | func (h *Handler) SetupRoutes(g *echo.Group) { 67 | g.GET("/render/:specs", h.handleRender) 68 | g.GET("/screenshot", h.handlePageScreenshot) 69 | } 70 | 71 | func (h *Handler) handleRender(c echo.Context) error { 72 | specsParam := c.Param("specs") 73 | specs, err := h.decodeSpecs(specsParam) 74 | if err != nil { 75 | return echo.NewHTTPError(400, fmt.Errorf("decode specs: %w", err)) 76 | } 77 | 78 | extractFrom, ok := map[pb.ExtractFrom]models.ExtractFrom{ 79 | pb.ExtractFrom_InnerText: models.ExtractFrom_InnerText, 80 | pb.ExtractFrom_Attribute: models.ExtractFrom_Attribute, 81 | }[specs.CreatedExtractFrom] 82 | if !ok { 83 | return echo.NewHTTPError(400, "invalid extract from") 84 | } 85 | 86 | task := models.Task{ 87 | TaskType: models.TaskTypeExtract, 88 | URL: specs.Url, 89 | SelectorPost: specs.SelectorPost, 90 | SelectorTitle: specs.SelectorTitle, 91 | SelectorLink: specs.SelectorLink, 92 | SelectorDescription: specs.SelectorDescription, 93 | SelectorAuthor: specs.SelectorAuthor, 94 | SelectorCreated: specs.SelectorCreated, 95 | CreatedExtractFrom: extractFrom, 96 | CreatedAttributeName: specs.CreatedAttributeName, 97 | SelectorContent: specs.SelectorContent, 98 | SelectorEnclosure: specs.SelectorEnclosure, 99 | Headers: extractHeaders(c), 100 | } 101 | 102 | cacheLifetime, err := time.ParseDuration(specs.CacheLifetime) 103 | if err != nil { 104 | return echo.NewHTTPError(400, "invalid cache lifetime") 105 | } 106 | if cacheLifetime < minLifetime { 107 | cacheLifetime = minLifetime 108 | } 109 | if cacheLifetime > maxLifetime { 110 | cacheLifetime = maxLifetime 111 | } 112 | if h.debug { 113 | cacheLifetime = 0 114 | } 115 | 116 | timeoutCtx, cancel := context.WithTimeout(context.Background(), taskTimeout) 117 | defer cancel() 118 | 119 | encodedTask, err := json.Marshal(task) 120 | if err != nil { 121 | return echo.NewHTTPError(500, fmt.Errorf("task marshal error: %v", err)) 122 | } 123 | log.Debugf("Encoded task: %s", encodedTask) 124 | 125 | taskResultBytes, cachedTS, err := h.cache.Get(task.CacheKey()) 126 | if err != nil && !errors.Is(err, adapters.ErrKeyNotFound) { 127 | return echo.NewHTTPError(500, fmt.Errorf("cache failed: %v", err)) 128 | } 129 | if errors.Is(err, adapters.ErrKeyNotFound) || time.Since(cachedTS) > cacheLifetime { 130 | if !h.checkRateLimit(c) { 131 | return echo.ErrTooManyRequests 132 | } 133 | taskResultBytes, err = h.workQueue.Enqueue(timeoutCtx, task.CacheKey(), encodedTask) 134 | if err != nil { 135 | return echo.NewHTTPError(500, fmt.Errorf("task enqueue failed: %v", err)) 136 | } 137 | } 138 | 139 | var result models.TaskResult 140 | if err := json.Unmarshal(taskResultBytes, &result); err != nil { 141 | return echo.NewHTTPError(500, fmt.Errorf("cached value unmarshal failed: %v", err)) 142 | } 143 | 144 | atom, err := makeFeed(task, result) 145 | if err != nil { 146 | log.Errorf("make feed failed: %v", err) 147 | return echo.NewHTTPError(500) 148 | } 149 | 150 | c.Response().Header().Set("Content-Type", "text/xml") 151 | return c.String(200, atom) 152 | } 153 | 154 | func (h *Handler) handlePageScreenshot(c echo.Context) error { 155 | pageUrl := c.QueryParam("url") 156 | if _, err := url.Parse(pageUrl); err != nil { 157 | return echo.NewHTTPError(400, "url is invalid or missing") 158 | } 159 | 160 | task := models.Task{ 161 | TaskType: models.TaskTypePageScreenshot, 162 | URL: pageUrl, 163 | Headers: extractHeaders(c), 164 | } 165 | 166 | timeoutCtx, cancel := context.WithTimeout(context.Background(), taskTimeout) 167 | defer cancel() 168 | 169 | encodedTask, err := json.Marshal(task) 170 | if err != nil { 171 | return echo.NewHTTPError(500, fmt.Errorf("task marshal error: %v", err)) 172 | } 173 | 174 | if !h.checkRateLimit(c) { 175 | return echo.ErrTooManyRequests 176 | } 177 | 178 | taskResultBytes, err := h.workQueue.Enqueue(timeoutCtx, task.CacheKey(), encodedTask) 179 | if err != nil { 180 | return echo.NewHTTPError(500, fmt.Errorf("queued cache failed: %v", err)) 181 | } 182 | 183 | var result models.ScreenshotTaskResult 184 | if err := json.Unmarshal(taskResultBytes, &result); err != nil { 185 | return echo.NewHTTPError(500, fmt.Errorf("task result unmarshal failed: %v", err)) 186 | } 187 | return c.Blob(200, "image/png", result.Image) 188 | } 189 | 190 | func (h *Handler) checkRateLimit(c echo.Context) bool { 191 | h.limitsMu.RLock() 192 | limiter, ok := h.limits[c.RealIP()] 193 | h.limitsMu.RUnlock() 194 | if !ok { 195 | h.limitsMu.Lock() 196 | limiter, ok = h.limits[c.RealIP()] 197 | if !ok { 198 | limiter = rate.NewLimiter(h.rateLimit, h.rateLimitBurst) 199 | h.limits[c.RealIP()] = limiter 200 | } 201 | h.limitsMu.Unlock() 202 | } 203 | log.Debugf("Rate limiter for ip=%s tokens=%f", c.RealIP(), limiter.Tokens()) 204 | return limiter.Allow() 205 | } 206 | 207 | func (h *Handler) decodeSpecs(specsParam string) (*pb.Specs, error) { 208 | var err error 209 | version := 0 210 | paramSplit := strings.Split(specsParam, ":") 211 | if len(paramSplit) == 2 { 212 | version, err = strconv.Atoi(paramSplit[0]) 213 | if err != nil { 214 | return nil, fmt.Errorf("invalid version: %s", paramSplit[0]) 215 | } 216 | specsParam = paramSplit[1] 217 | } 218 | 219 | decodedSpecsParam, err := base64.StdEncoding.WithPadding(base64.NoPadding).DecodeString(specsParam) 220 | if err != nil { 221 | return nil, fmt.Errorf("failed to decode specs: %w", err) 222 | } 223 | rc := flate.NewReader(bytes.NewReader(decodedSpecsParam)) 224 | decodedSpecsParam, err = io.ReadAll(rc) 225 | if err != nil { 226 | return nil, fmt.Errorf("failed to unzip specs: %w", err) 227 | } 228 | 229 | specs := &pb.Specs{} 230 | switch version { 231 | case 0: 232 | if err := json.Unmarshal(decodedSpecsParam, specs); err != nil { 233 | return nil, fmt.Errorf("failed to unmarshal json specs: %w", err) 234 | } 235 | case 1: 236 | if err := proto.Unmarshal(decodedSpecsParam, specs); err != nil { 237 | return nil, fmt.Errorf("failed to unmarshal proto specs: %w", err) 238 | } 239 | default: 240 | return nil, fmt.Errorf("unknown version: %d", version) 241 | } 242 | 243 | if err := h.validate.Struct(specs); err != nil { 244 | return nil, fmt.Errorf("specs are invalid: %w", err) 245 | } 246 | return specs, nil 247 | } 248 | 249 | func makeFeed(task models.Task, result models.TaskResult) (string, error) { 250 | feedTS := time.Now() 251 | if len(result.Items) > 0 { 252 | feedTS = result.Items[0].Created 253 | } 254 | feed := feeds.Feed{ 255 | Title: html.EscapeString(result.Title), 256 | Link: &feeds.Link{Href: task.URL}, 257 | Updated: feedTS, 258 | } 259 | for _, item := range result.Items { 260 | itemUrl, err := url.Parse(item.Link) 261 | if err != nil { 262 | log.Errorf("Invalid item link, item=%+v", item) 263 | continue 264 | } 265 | id := fmt.Sprintf( 266 | "tag:%s,%s:%s", 267 | itemUrl.Host, 268 | anyTimeFormat("2006-01-02", item.Created, item.Updated), 269 | itemUrl.Path, 270 | ) 271 | if len(itemUrl.RawQuery) > 0 { 272 | id += "?" + itemUrl.RawQuery 273 | } 274 | feed.Items = append(feed.Items, &feeds.Item{ 275 | Id: id, 276 | Title: html.EscapeString(item.Title), 277 | Link: &feeds.Link{Href: item.Link}, 278 | Author: &feeds.Author{Name: item.AuthorName}, 279 | Description: item.Description, 280 | Created: item.Created, 281 | Updated: item.Updated, 282 | Content: item.Content, 283 | }) 284 | } 285 | if len(feed.Items) == 0 { 286 | return "", fmt.Errorf("empty feed") 287 | } 288 | atomFeed := (&feeds.Atom{Feed: &feed}).AtomFeed() 289 | atomFeed.Icon = result.Icon 290 | for i, entry := range atomFeed.Entries { 291 | if entry.Author != nil { 292 | entry.Author.Uri = result.Items[i].AuthorLink 293 | } 294 | } 295 | atom, err := feeds.ToXML(atomFeed) 296 | if err != nil { 297 | return "", fmt.Errorf("feed to xml: %w", err) 298 | } 299 | return atom, nil 300 | } 301 | 302 | func extractHeaders(c echo.Context) map[string]string { 303 | headers := make(map[string]string) 304 | for _, hName := range []string{"Accept-Language", "Cookie"} { 305 | if len(c.Request().Header.Get(hName)) > 0 { 306 | headers[hName] = c.Request().Header.Get(hName) 307 | } 308 | } 309 | return headers 310 | } 311 | 312 | // returns the first non-zero time formatted as a string or "" 313 | func anyTimeFormat(format string, times ...time.Time) string { 314 | for _, t := range times { 315 | if !t.IsZero() { 316 | return t.Format(format) 317 | } 318 | } 319 | return "" 320 | } 321 | -------------------------------------------------------------------------------- /internal/api/http/pb/specs.pb.go: -------------------------------------------------------------------------------- 1 | // Code generated by protoc-gen-go. DO NOT EDIT. 2 | // versions: 3 | // protoc-gen-go v1.36.4 4 | // protoc v5.29.3 5 | // source: proto/specs.proto 6 | 7 | package pb 8 | 9 | import ( 10 | _ "github.com/srikrsna/protoc-gen-gotag/tagger" 11 | protoreflect "google.golang.org/protobuf/reflect/protoreflect" 12 | protoimpl "google.golang.org/protobuf/runtime/protoimpl" 13 | reflect "reflect" 14 | sync "sync" 15 | unsafe "unsafe" 16 | ) 17 | 18 | const ( 19 | // Verify that this generated code is sufficiently up-to-date. 20 | _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) 21 | // Verify that runtime/protoimpl is sufficiently up-to-date. 22 | _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) 23 | ) 24 | 25 | type ExtractFrom int32 26 | 27 | const ( 28 | ExtractFrom_InnerText ExtractFrom = 0 29 | ExtractFrom_Attribute ExtractFrom = 1 30 | ) 31 | 32 | // Enum value maps for ExtractFrom. 33 | var ( 34 | ExtractFrom_name = map[int32]string{ 35 | 0: "InnerText", 36 | 1: "Attribute", 37 | } 38 | ExtractFrom_value = map[string]int32{ 39 | "InnerText": 0, 40 | "Attribute": 1, 41 | } 42 | ) 43 | 44 | func (x ExtractFrom) Enum() *ExtractFrom { 45 | p := new(ExtractFrom) 46 | *p = x 47 | return p 48 | } 49 | 50 | func (x ExtractFrom) String() string { 51 | return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) 52 | } 53 | 54 | func (ExtractFrom) Descriptor() protoreflect.EnumDescriptor { 55 | return file_proto_specs_proto_enumTypes[0].Descriptor() 56 | } 57 | 58 | func (ExtractFrom) Type() protoreflect.EnumType { 59 | return &file_proto_specs_proto_enumTypes[0] 60 | } 61 | 62 | func (x ExtractFrom) Number() protoreflect.EnumNumber { 63 | return protoreflect.EnumNumber(x) 64 | } 65 | 66 | // Deprecated: Use ExtractFrom.Descriptor instead. 67 | func (ExtractFrom) EnumDescriptor() ([]byte, []int) { 68 | return file_proto_specs_proto_rawDescGZIP(), []int{0} 69 | } 70 | 71 | type Specs struct { 72 | state protoimpl.MessageState `protogen:"open.v1"` 73 | Url string `protobuf:"bytes,1,opt,name=url,proto3" json:"url" validate:"url"` 74 | SelectorPost string `protobuf:"bytes,2,opt,name=selector_post,json=selectorPost,proto3" json:"selector_post" validate:"selector"` 75 | SelectorTitle string `protobuf:"bytes,3,opt,name=selector_title,json=selectorTitle,proto3" json:"selector_title" validate:"selector"` 76 | SelectorLink string `protobuf:"bytes,4,opt,name=selector_link,json=selectorLink,proto3" json:"selector_link" validate:"selector"` 77 | SelectorDescription string `protobuf:"bytes,5,opt,name=selector_description,json=selectorDescription,proto3" json:"selector_description" validate:"omitempty,selector"` 78 | SelectorAuthor string `protobuf:"bytes,6,opt,name=selector_author,json=selectorAuthor,proto3" json:"selector_author" validate:"omitempty,selector"` 79 | SelectorCreated string `protobuf:"bytes,7,opt,name=selector_created,json=selectorCreated,proto3" json:"selector_created" validate:"selector"` 80 | CreatedExtractFrom ExtractFrom `protobuf:"varint,11,opt,name=created_extract_from,json=createdExtractFrom,proto3,enum=rssalchemy.ExtractFrom" json:"created_extract_from"` 81 | CreatedAttributeName string `protobuf:"bytes,12,opt,name=created_attribute_name,json=createdAttributeName,proto3" json:"created_attribute_name"` 82 | SelectorContent string `protobuf:"bytes,8,opt,name=selector_content,json=selectorContent,proto3" json:"selector_content" validate:"omitempty,selector"` 83 | SelectorEnclosure string `protobuf:"bytes,9,opt,name=selector_enclosure,json=selectorEnclosure,proto3" json:"selector_enclosure" validate:"selector"` 84 | CacheLifetime string `protobuf:"bytes,10,opt,name=cache_lifetime,json=cacheLifetime,proto3" json:"cache_lifetime"` 85 | unknownFields protoimpl.UnknownFields 86 | sizeCache protoimpl.SizeCache 87 | } 88 | 89 | func (x *Specs) Reset() { 90 | *x = Specs{} 91 | mi := &file_proto_specs_proto_msgTypes[0] 92 | ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 93 | ms.StoreMessageInfo(mi) 94 | } 95 | 96 | func (x *Specs) String() string { 97 | return protoimpl.X.MessageStringOf(x) 98 | } 99 | 100 | func (*Specs) ProtoMessage() {} 101 | 102 | func (x *Specs) ProtoReflect() protoreflect.Message { 103 | mi := &file_proto_specs_proto_msgTypes[0] 104 | if x != nil { 105 | ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) 106 | if ms.LoadMessageInfo() == nil { 107 | ms.StoreMessageInfo(mi) 108 | } 109 | return ms 110 | } 111 | return mi.MessageOf(x) 112 | } 113 | 114 | // Deprecated: Use Specs.ProtoReflect.Descriptor instead. 115 | func (*Specs) Descriptor() ([]byte, []int) { 116 | return file_proto_specs_proto_rawDescGZIP(), []int{0} 117 | } 118 | 119 | func (x *Specs) GetUrl() string { 120 | if x != nil { 121 | return x.Url 122 | } 123 | return "" 124 | } 125 | 126 | func (x *Specs) GetSelectorPost() string { 127 | if x != nil { 128 | return x.SelectorPost 129 | } 130 | return "" 131 | } 132 | 133 | func (x *Specs) GetSelectorTitle() string { 134 | if x != nil { 135 | return x.SelectorTitle 136 | } 137 | return "" 138 | } 139 | 140 | func (x *Specs) GetSelectorLink() string { 141 | if x != nil { 142 | return x.SelectorLink 143 | } 144 | return "" 145 | } 146 | 147 | func (x *Specs) GetSelectorDescription() string { 148 | if x != nil { 149 | return x.SelectorDescription 150 | } 151 | return "" 152 | } 153 | 154 | func (x *Specs) GetSelectorAuthor() string { 155 | if x != nil { 156 | return x.SelectorAuthor 157 | } 158 | return "" 159 | } 160 | 161 | func (x *Specs) GetSelectorCreated() string { 162 | if x != nil { 163 | return x.SelectorCreated 164 | } 165 | return "" 166 | } 167 | 168 | func (x *Specs) GetCreatedExtractFrom() ExtractFrom { 169 | if x != nil { 170 | return x.CreatedExtractFrom 171 | } 172 | return ExtractFrom_InnerText 173 | } 174 | 175 | func (x *Specs) GetCreatedAttributeName() string { 176 | if x != nil { 177 | return x.CreatedAttributeName 178 | } 179 | return "" 180 | } 181 | 182 | func (x *Specs) GetSelectorContent() string { 183 | if x != nil { 184 | return x.SelectorContent 185 | } 186 | return "" 187 | } 188 | 189 | func (x *Specs) GetSelectorEnclosure() string { 190 | if x != nil { 191 | return x.SelectorEnclosure 192 | } 193 | return "" 194 | } 195 | 196 | func (x *Specs) GetCacheLifetime() string { 197 | if x != nil { 198 | return x.CacheLifetime 199 | } 200 | return "" 201 | } 202 | 203 | var File_proto_specs_proto protoreflect.FileDescriptor 204 | 205 | var file_proto_specs_proto_rawDesc = string([]byte{ 206 | 0x0a, 0x11, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x73, 0x70, 0x65, 0x63, 0x73, 0x2e, 0x70, 0x72, 207 | 0x6f, 0x74, 0x6f, 0x12, 0x0a, 0x72, 0x73, 0x73, 0x61, 0x6c, 0x63, 0x68, 0x65, 0x6d, 0x79, 0x1a, 208 | 0x13, 0x74, 0x61, 0x67, 0x67, 0x65, 0x72, 0x2f, 0x74, 0x61, 0x67, 0x67, 0x65, 0x72, 0x2e, 0x70, 209 | 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xc0, 0x08, 0x0a, 0x05, 0x53, 0x70, 0x65, 0x63, 0x73, 0x12, 0x30, 210 | 0x0a, 0x03, 0x75, 0x72, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x42, 0x1e, 0x9a, 0x84, 0x9e, 211 | 0x03, 0x19, 0x6a, 0x73, 0x6f, 0x6e, 0x3a, 0x22, 0x75, 0x72, 0x6c, 0x22, 0x20, 0x76, 0x61, 0x6c, 212 | 0x69, 0x64, 0x61, 0x74, 0x65, 0x3a, 0x22, 0x75, 0x72, 0x6c, 0x22, 0x52, 0x03, 0x75, 0x72, 0x6c, 213 | 0x12, 0x52, 0x0a, 0x0d, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x70, 0x6f, 0x73, 214 | 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x42, 0x2d, 0x9a, 0x84, 0x9e, 0x03, 0x28, 0x6a, 0x73, 215 | 0x6f, 0x6e, 0x3a, 0x22, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x70, 0x6f, 0x73, 216 | 0x74, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x65, 0x3a, 0x22, 0x73, 0x65, 0x6c, 217 | 0x65, 0x63, 0x74, 0x6f, 0x72, 0x22, 0x52, 0x0c, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 218 | 0x50, 0x6f, 0x73, 0x74, 0x12, 0x55, 0x0a, 0x0e, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 219 | 0x5f, 0x74, 0x69, 0x74, 0x6c, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x42, 0x2e, 0x9a, 0x84, 220 | 0x9e, 0x03, 0x29, 0x6a, 0x73, 0x6f, 0x6e, 0x3a, 0x22, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 221 | 0x72, 0x5f, 0x74, 0x69, 0x74, 0x6c, 0x65, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 222 | 0x65, 0x3a, 0x22, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x22, 0x52, 0x0d, 0x73, 0x65, 223 | 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x54, 0x69, 0x74, 0x6c, 0x65, 0x12, 0x52, 0x0a, 0x0d, 0x73, 224 | 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x6c, 0x69, 0x6e, 0x6b, 0x18, 0x04, 0x20, 0x01, 225 | 0x28, 0x09, 0x42, 0x2d, 0x9a, 0x84, 0x9e, 0x03, 0x28, 0x6a, 0x73, 0x6f, 0x6e, 0x3a, 0x22, 0x73, 226 | 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x6c, 0x69, 0x6e, 0x6b, 0x22, 0x20, 0x76, 0x61, 227 | 0x6c, 0x69, 0x64, 0x61, 0x74, 0x65, 0x3a, 0x22, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 228 | 0x22, 0x52, 0x0c, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x4c, 0x69, 0x6e, 0x6b, 0x12, 229 | 0x71, 0x0a, 0x14, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x64, 0x65, 0x73, 0x63, 230 | 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x42, 0x3e, 0x9a, 231 | 0x84, 0x9e, 0x03, 0x39, 0x6a, 0x73, 0x6f, 0x6e, 0x3a, 0x22, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 232 | 0x6f, 0x72, 0x5f, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x20, 233 | 0x76, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x65, 0x3a, 0x22, 0x6f, 0x6d, 0x69, 0x74, 0x65, 0x6d, 234 | 0x70, 0x74, 0x79, 0x2c, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x22, 0x52, 0x13, 0x73, 235 | 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x44, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 236 | 0x6f, 0x6e, 0x12, 0x62, 0x0a, 0x0f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x61, 237 | 0x75, 0x74, 0x68, 0x6f, 0x72, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x42, 0x39, 0x9a, 0x84, 0x9e, 238 | 0x03, 0x34, 0x6a, 0x73, 0x6f, 0x6e, 0x3a, 0x22, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 239 | 0x5f, 0x61, 0x75, 0x74, 0x68, 0x6f, 0x72, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 240 | 0x65, 0x3a, 0x22, 0x6f, 0x6d, 0x69, 0x74, 0x65, 0x6d, 0x70, 0x74, 0x79, 0x2c, 0x73, 0x65, 0x6c, 241 | 0x65, 0x63, 0x74, 0x6f, 0x72, 0x22, 0x52, 0x0e, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 242 | 0x41, 0x75, 0x74, 0x68, 0x6f, 0x72, 0x12, 0x5b, 0x0a, 0x10, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 243 | 0x6f, 0x72, 0x5f, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x09, 244 | 0x42, 0x30, 0x9a, 0x84, 0x9e, 0x03, 0x2b, 0x6a, 0x73, 0x6f, 0x6e, 0x3a, 0x22, 0x73, 0x65, 0x6c, 245 | 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x22, 0x20, 0x76, 246 | 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x65, 0x3a, 0x22, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 247 | 0x72, 0x22, 0x52, 0x0f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, 0x72, 0x65, 0x61, 248 | 0x74, 0x65, 0x64, 0x12, 0x6b, 0x0a, 0x14, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x65, 249 | 0x78, 0x74, 0x72, 0x61, 0x63, 0x74, 0x5f, 0x66, 0x72, 0x6f, 0x6d, 0x18, 0x0b, 0x20, 0x01, 0x28, 250 | 0x0e, 0x32, 0x17, 0x2e, 0x72, 0x73, 0x73, 0x61, 0x6c, 0x63, 0x68, 0x65, 0x6d, 0x79, 0x2e, 0x45, 251 | 0x78, 0x74, 0x72, 0x61, 0x63, 0x74, 0x46, 0x72, 0x6f, 0x6d, 0x42, 0x20, 0x9a, 0x84, 0x9e, 0x03, 252 | 0x1b, 0x6a, 0x73, 0x6f, 0x6e, 0x3a, 0x22, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x65, 253 | 0x78, 0x74, 0x72, 0x61, 0x63, 0x74, 0x5f, 0x66, 0x72, 0x6f, 0x6d, 0x22, 0x52, 0x12, 0x63, 0x72, 254 | 0x65, 0x61, 0x74, 0x65, 0x64, 0x45, 0x78, 0x74, 0x72, 0x61, 0x63, 0x74, 0x46, 0x72, 0x6f, 0x6d, 255 | 0x12, 0x58, 0x0a, 0x16, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x74, 0x72, 256 | 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x09, 257 | 0x42, 0x22, 0x9a, 0x84, 0x9e, 0x03, 0x1d, 0x6a, 0x73, 0x6f, 0x6e, 0x3a, 0x22, 0x63, 0x72, 0x65, 258 | 0x61, 0x74, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x5f, 0x6e, 259 | 0x61, 0x6d, 0x65, 0x22, 0x52, 0x14, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x64, 0x41, 0x74, 0x74, 260 | 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x65, 0x0a, 0x10, 0x73, 0x65, 261 | 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x18, 0x08, 262 | 0x20, 0x01, 0x28, 0x09, 0x42, 0x3a, 0x9a, 0x84, 0x9e, 0x03, 0x35, 0x6a, 0x73, 0x6f, 0x6e, 0x3a, 263 | 0x22, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 264 | 0x74, 0x22, 0x20, 0x76, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x65, 0x3a, 0x22, 0x6f, 0x6d, 0x69, 265 | 0x74, 0x65, 0x6d, 0x70, 0x74, 0x79, 0x2c, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x22, 266 | 0x52, 0x0f, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 267 | 0x74, 0x12, 0x61, 0x0a, 0x12, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x65, 0x6e, 268 | 0x63, 0x6c, 0x6f, 0x73, 0x75, 0x72, 0x65, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x42, 0x32, 0x9a, 269 | 0x84, 0x9e, 0x03, 0x2d, 0x6a, 0x73, 0x6f, 0x6e, 0x3a, 0x22, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 270 | 0x6f, 0x72, 0x5f, 0x65, 0x6e, 0x63, 0x6c, 0x6f, 0x73, 0x75, 0x72, 0x65, 0x22, 0x20, 0x76, 0x61, 271 | 0x6c, 0x69, 0x64, 0x61, 0x74, 0x65, 0x3a, 0x22, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 272 | 0x22, 0x52, 0x11, 0x73, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x45, 0x6e, 0x63, 0x6c, 0x6f, 273 | 0x73, 0x75, 0x72, 0x65, 0x12, 0x41, 0x0a, 0x0e, 0x63, 0x61, 0x63, 0x68, 0x65, 0x5f, 0x6c, 0x69, 274 | 0x66, 0x65, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x09, 0x42, 0x1a, 0x9a, 0x84, 275 | 0x9e, 0x03, 0x15, 0x6a, 0x73, 0x6f, 0x6e, 0x3a, 0x22, 0x63, 0x61, 0x63, 0x68, 0x65, 0x5f, 0x6c, 276 | 0x69, 0x66, 0x65, 0x74, 0x69, 0x6d, 0x65, 0x22, 0x52, 0x0d, 0x63, 0x61, 0x63, 0x68, 0x65, 0x4c, 277 | 0x69, 0x66, 0x65, 0x74, 0x69, 0x6d, 0x65, 0x2a, 0x2b, 0x0a, 0x0b, 0x45, 0x78, 0x74, 0x72, 0x61, 278 | 0x63, 0x74, 0x46, 0x72, 0x6f, 0x6d, 0x12, 0x0d, 0x0a, 0x09, 0x49, 0x6e, 0x6e, 0x65, 0x72, 0x54, 279 | 0x65, 0x78, 0x74, 0x10, 0x00, 0x12, 0x0d, 0x0a, 0x09, 0x41, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 280 | 0x74, 0x65, 0x10, 0x01, 0x42, 0x16, 0x5a, 0x14, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 281 | 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x68, 0x74, 0x74, 0x70, 0x2f, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 282 | 0x6f, 0x74, 0x6f, 0x33, 283 | }) 284 | 285 | var ( 286 | file_proto_specs_proto_rawDescOnce sync.Once 287 | file_proto_specs_proto_rawDescData []byte 288 | ) 289 | 290 | func file_proto_specs_proto_rawDescGZIP() []byte { 291 | file_proto_specs_proto_rawDescOnce.Do(func() { 292 | file_proto_specs_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_proto_specs_proto_rawDesc), len(file_proto_specs_proto_rawDesc))) 293 | }) 294 | return file_proto_specs_proto_rawDescData 295 | } 296 | 297 | var file_proto_specs_proto_enumTypes = make([]protoimpl.EnumInfo, 1) 298 | var file_proto_specs_proto_msgTypes = make([]protoimpl.MessageInfo, 1) 299 | var file_proto_specs_proto_goTypes = []any{ 300 | (ExtractFrom)(0), // 0: rssalchemy.ExtractFrom 301 | (*Specs)(nil), // 1: rssalchemy.Specs 302 | } 303 | var file_proto_specs_proto_depIdxs = []int32{ 304 | 0, // 0: rssalchemy.Specs.created_extract_from:type_name -> rssalchemy.ExtractFrom 305 | 1, // [1:1] is the sub-list for method output_type 306 | 1, // [1:1] is the sub-list for method input_type 307 | 1, // [1:1] is the sub-list for extension type_name 308 | 1, // [1:1] is the sub-list for extension extendee 309 | 0, // [0:1] is the sub-list for field type_name 310 | } 311 | 312 | func init() { file_proto_specs_proto_init() } 313 | func file_proto_specs_proto_init() { 314 | if File_proto_specs_proto != nil { 315 | return 316 | } 317 | type x struct{} 318 | out := protoimpl.TypeBuilder{ 319 | File: protoimpl.DescBuilder{ 320 | GoPackagePath: reflect.TypeOf(x{}).PkgPath(), 321 | RawDescriptor: unsafe.Slice(unsafe.StringData(file_proto_specs_proto_rawDesc), len(file_proto_specs_proto_rawDesc)), 322 | NumEnums: 1, 323 | NumMessages: 1, 324 | NumExtensions: 0, 325 | NumServices: 0, 326 | }, 327 | GoTypes: file_proto_specs_proto_goTypes, 328 | DependencyIndexes: file_proto_specs_proto_depIdxs, 329 | EnumInfos: file_proto_specs_proto_enumTypes, 330 | MessageInfos: file_proto_specs_proto_msgTypes, 331 | }.Build() 332 | File_proto_specs_proto = out.File 333 | file_proto_specs_proto_goTypes = nil 334 | file_proto_specs_proto_depIdxs = nil 335 | } 336 | -------------------------------------------------------------------------------- /internal/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "fmt" 5 | "github.com/go-playground/validator/v10" 6 | "github.com/ilyakaznacheev/cleanenv" 7 | "net/url" 8 | "reflect" 9 | "slices" 10 | ) 11 | 12 | type Config struct { 13 | // Format: host:port 14 | WebserverAddress string `env:"WEBSERVER_ADDRESS" env-default:"0.0.0.0:5000" validate:"hostname_port"` 15 | NatsUrl string `env:"NATS_URL" env-default:"nats://localhost:4222" validate:"url"` 16 | RedisUrl string `env:"REDIS_URL" env-default:"localhost:6379" validate:"url"` 17 | Debug bool `env:"DEBUG"` 18 | // Format: scheme://user:pass@host:port (supported schemes: http, https, socks) 19 | Proxy string `env:"PROXY" env-default:"" validate:"omitempty,proxy"` 20 | // TaskRateLimitEvery and TaskRateLimitBurst are parameters for Token Bucket algorithm 21 | // for task rate limiter (don't apply to cache). 22 | // A token is added to the bucket every TaskRateLimitEvery seconds. 23 | TaskRateLimitEvery float64 `env:"TASK_RATE_LIMIT_EVERY" env-default:"60" validate:"number,gt=0"` 24 | TaskRateLimitBurst int `env:"TASK_RATE_LIMIT_BURST" env-default:"10" validate:"number,gte=0"` 25 | // PerDomainRateLimitEvery and PerDomainRateLimitCapacity are params for LeakyBucket alrogithm 26 | // for per-domain rate limiting of outgoing queries. 27 | // Request to domain limited to 1 per PerDomainRateLimitEvery seconds. 28 | PerDomainRateLimitEvery float64 `env:"PER_DOMAIN_RATE_LIMIT_EVERY" env-default:"2" validate:"number,gt=0"` 29 | PerDomainRateLimitCapacity int `env:"PER_DOMAIN_RATE_LIMIT_CAPACITY" env-default:"10" validate:"number,gt=0"` 30 | // IP ranges of reverse proxies for correct real ip detection (cidr format, sep. by comma) 31 | TrustedIpRanges []string `env:"TRUSTED_IP_RANGES" env-default:"" validate:"omitempty,dive,cidr"` 32 | RealIpHeader string `env:"REAL_IP_HEADER" env-default:"" validate:"omitempty"` 33 | } 34 | 35 | func Read() (Config, error) { 36 | var cfg Config 37 | err := cleanenv.ReadEnv(&cfg) 38 | if err != nil { 39 | return Config{}, err 40 | } 41 | validate := validator.New() 42 | if err := validate.RegisterValidation("proxy", validateProxy); err != nil { 43 | panic(fmt.Errorf("register validation: %w", err)) 44 | } 45 | err = validate.Struct(cfg) 46 | if err == nil { 47 | fmt.Printf("Config: %+v\n", cfg) 48 | } 49 | return cfg, err 50 | } 51 | 52 | func validateProxy(fl validator.FieldLevel) bool { 53 | if fl.Field().Kind() != reflect.String { 54 | return false 55 | } 56 | validSchemes := []string{"http", "https", "socks"} 57 | pUrl, err := url.Parse(fl.Field().String()) 58 | return err == nil && slices.Contains(validSchemes, pUrl.Scheme) && pUrl.Opaque == "" && pUrl.Path == "" 59 | } 60 | -------------------------------------------------------------------------------- /internal/cookiemgr/dummy/dummycookies.go: -------------------------------------------------------------------------------- 1 | package dummy 2 | 3 | import "github.com/egor3f/rssalchemy/internal/cookiemgr" 4 | 5 | type CookieManager struct { 6 | } 7 | 8 | func New() *CookieManager { 9 | m := CookieManager{} 10 | return &m 11 | } 12 | 13 | func (m *CookieManager) GetCookies(key string, cookieHeader string) ([][2]string, error) { 14 | return cookiemgr.ParseCookieHeader(cookieHeader) 15 | } 16 | 17 | func (m *CookieManager) UpdateCookies(key string, cookieHeader string, cookies [][2]string) error { 18 | return nil 19 | } 20 | -------------------------------------------------------------------------------- /internal/cookiemgr/nats/natscookies.go: -------------------------------------------------------------------------------- 1 | package nats 2 | 3 | import ( 4 | "context" 5 | "crypto/sha256" 6 | "errors" 7 | "fmt" 8 | "github.com/egor3f/rssalchemy/internal/cookiemgr" 9 | "github.com/labstack/gommon/log" 10 | "github.com/nats-io/nats.go" 11 | "github.com/nats-io/nats.go/jetstream" 12 | ) 13 | 14 | type CookieManager struct { 15 | kv jetstream.KeyValue 16 | } 17 | 18 | func New(natsc *nats.Conn) (*CookieManager, error) { 19 | m := CookieManager{} 20 | 21 | jets, err := jetstream.New(natsc) 22 | if err != nil { 23 | return nil, fmt.Errorf("create jetstream: %w", err) 24 | } 25 | 26 | m.kv, err = jets.CreateKeyValue(context.TODO(), jetstream.KeyValueConfig{ 27 | Bucket: "cookie_manager_store", 28 | }) 29 | if err != nil { 30 | return nil, fmt.Errorf("create nats kv: %w", err) 31 | } 32 | 33 | return &m, nil 34 | } 35 | 36 | func (m *CookieManager) GetCookies(key string, cookieHeader string) ([][2]string, error) { 37 | cookies, err := cookiemgr.ParseCookieHeader(cookieHeader) 38 | if err != nil { 39 | return nil, fmt.Errorf("parse cookie header: %w", err) 40 | } 41 | storeKey := m.storeKey(key, cookies) 42 | log.Debugf("Store key = %s", storeKey) 43 | value, err := m.kv.Get(context.TODO(), storeKey) 44 | if err != nil { 45 | if errors.Is(err, jetstream.ErrKeyNotFound) { 46 | return cookies, nil 47 | } 48 | return nil, fmt.Errorf("kv: %w", err) 49 | } 50 | cookies, err = cookiemgr.ParseCookieHeader(string(value.Value())) 51 | if err != nil { 52 | return nil, fmt.Errorf("parse cookies from kv: %w", err) 53 | } 54 | return cookies, nil 55 | } 56 | 57 | func (m *CookieManager) UpdateCookies(key string, oldCookieHeader string, cookies [][2]string) error { 58 | if len(cookies) == 0 { 59 | return nil 60 | } 61 | newCookieValue := cookiemgr.EncodeCookieHeader(cookies) 62 | log.Debugf("Updating cookies: %.100s", newCookieValue) 63 | oldCookies, err := cookiemgr.ParseCookieHeader(oldCookieHeader) 64 | if err != nil { 65 | return fmt.Errorf("parse cookie header: %w", err) 66 | } 67 | storeKey := m.storeKey(key, oldCookies) 68 | _, err = m.kv.PutString(context.TODO(), storeKey, newCookieValue) 69 | if err != nil { 70 | return fmt.Errorf("kv: %w", err) 71 | } 72 | return nil 73 | } 74 | 75 | func (m *CookieManager) storeKey(key string, cookies [][2]string) string { 76 | hash := cookiemgr.CookiesHash(cookies) 77 | keyHash := sha256.New() 78 | keyHash.Write([]byte(key)) 79 | return fmt.Sprintf("%x_%s", keyHash.Sum(nil), hash) 80 | } 81 | -------------------------------------------------------------------------------- /internal/cookiemgr/utils.go: -------------------------------------------------------------------------------- 1 | package cookiemgr 2 | 3 | import ( 4 | "crypto/sha256" 5 | "fmt" 6 | "net/url" 7 | "strings" 8 | ) 9 | 10 | func ParseCookieHeader(cookieStr string) ([][2]string, error) { 11 | var result [][2]string 12 | 13 | for _, cook := range strings.Split(cookieStr, ";") { 14 | kv := strings.Split(cook, "=") 15 | if len(kv) < 2 { 16 | return nil, fmt.Errorf("failed to parse cookies: split by =: count<2") 17 | } 18 | k, err1 := url.QueryUnescape(kv[0]) 19 | v, err2 := url.QueryUnescape(strings.Join(kv[1:], "=")) 20 | if err1 != nil || err2 != nil { 21 | return nil, fmt.Errorf("failed to parse cookies: unescape k=%w v=%w", err1, err2) 22 | } 23 | result = append(result, [2]string{strings.TrimSpace(k), strings.TrimSpace(v)}) 24 | } 25 | 26 | return result, nil 27 | } 28 | 29 | func EncodeCookieHeader(cookies [][2]string) string { 30 | result := make([]string, len(cookies)) 31 | for i, cook := range cookies { 32 | result[i] = fmt.Sprintf("%s=%s", url.QueryEscape(cook[0]), url.QueryEscape(cook[1])) 33 | } 34 | return strings.Join(result, "; ") 35 | } 36 | 37 | func CookiesHash(cookies [][2]string) string { 38 | hash := sha256.New() 39 | hash.Write([]byte(fmt.Sprintf("%v", cookies))) 40 | return fmt.Sprintf("%x", hash.Sum(nil)) 41 | } 42 | -------------------------------------------------------------------------------- /internal/dateparser/dateparser.go: -------------------------------------------------------------------------------- 1 | package dateparser 2 | 3 | import ( 4 | "fmt" 5 | godateparser "github.com/markusmobius/go-dateparser" 6 | "strings" 7 | "time" 8 | ) 9 | 10 | type DateParser struct { 11 | CurrentTimeFunc func() time.Time 12 | } 13 | 14 | func (d *DateParser) ParseDate(str string) (time.Time, error) { 15 | str = strings.TrimSpace(str) 16 | 17 | if len(str) == 0 { 18 | return time.Time{}, fmt.Errorf("date string is empty") 19 | } 20 | 21 | dt, err := godateparser.Parse(&godateparser.Configuration{ 22 | CurrentTime: d.CurrentTimeFunc(), 23 | }, str) 24 | if err == nil { 25 | return dt.Time, nil 26 | } 27 | 28 | parts := strings.Split(str, " ") 29 | for len(parts) > 1 { 30 | newStr := strings.Join(parts, " ") 31 | dt, err = godateparser.Parse(nil, newStr) 32 | if err == nil { 33 | return dt.Time, err 34 | } 35 | parts = parts[1:] 36 | } 37 | 38 | return time.Time{}, err 39 | } 40 | -------------------------------------------------------------------------------- /internal/extractors/pwextractor/adblock.go: -------------------------------------------------------------------------------- 1 | package pwextractor 2 | 3 | import ( 4 | _ "embed" 5 | "fmt" 6 | "github.com/AdguardTeam/urlfilter" 7 | "github.com/AdguardTeam/urlfilter/filterlist" 8 | "github.com/AdguardTeam/urlfilter/rules" 9 | "github.com/labstack/gommon/log" 10 | "net/url" 11 | ) 12 | 13 | //go:embed blocklists/easylist.txt 14 | var easyList string 15 | 16 | //go:embed blocklists/easyprivacy.txt 17 | var easyPrivacy string 18 | 19 | var ruleLists = []string{ 20 | easyList, 21 | easyPrivacy, 22 | } 23 | 24 | var engine *urlfilter.Engine 25 | 26 | func init() { 27 | lists := make([]filterlist.RuleList, len(ruleLists)) 28 | for i, rulesStr := range ruleLists { 29 | lists[i] = &filterlist.StringRuleList{ 30 | RulesText: rulesStr, 31 | ID: i, 32 | IgnoreCosmetic: true, 33 | } 34 | } 35 | storage, err := filterlist.NewRuleStorage(lists) 36 | if err != nil { 37 | panic(fmt.Sprintf("initialize adblock: NewRuleStorage: %v", err)) 38 | } 39 | engine = urlfilter.NewEngine(storage) 40 | } 41 | 42 | func allowAdblock(url *url.URL, sourceUrl *url.URL) bool { 43 | req := rules.NewRequest(url.String(), sourceUrl.String(), rules.TypeOther) 44 | res := engine.MatchRequest(req) 45 | rule := res.GetBasicResult() 46 | allow := rule == nil || rule.Whitelist 47 | if !allow { 48 | log.Infof("Adblock blocked %s from %s by rule %s", url, sourceUrl, rule.String()) 49 | } 50 | return allow 51 | } 52 | -------------------------------------------------------------------------------- /internal/extractors/pwextractor/extract_post.js: -------------------------------------------------------------------------------- 1 | // let fnc = // for autocomplete 2 | el => { 3 | let content = ""; 4 | let paragraph = ""; 5 | 6 | const finishParagraph = () => { 7 | content += "

" + paragraph + "

"; 8 | paragraph = ""; 9 | } 10 | 11 | const addImage = img => { 12 | let imgSrc = img.getAttribute('src'); 13 | if (imgSrc.startsWith('/')) { 14 | imgSrc = `${document.location.origin}/${imgSrc}`; 15 | } 16 | content += ``; 17 | }; 18 | 19 | let traverse = (node) => { 20 | // node = document.getRootNode(); // for autocomplete 21 | 22 | if (node.childNodes.length === 0) { 23 | return 24 | } 25 | 26 | for (let child of node.childNodes) { 27 | switch (child.nodeType) { 28 | case child.ELEMENT_NODE: 29 | // child = document.getElementById(''); // for autocomplete 30 | 31 | let tag = child.tagName.toLowerCase(); 32 | 33 | const allowedMarkupTags = ['b', 'i', 'strong']; 34 | if (allowedMarkupTags.includes(tag)) { 35 | paragraph += `<${tag}>` 36 | } 37 | 38 | if (tag === 'img') { 39 | finishParagraph(); 40 | addImage(child); 41 | break; 42 | } 43 | 44 | traverse(child); 45 | 46 | if (allowedMarkupTags.includes(tag)) { 47 | paragraph += `` 48 | } 49 | 50 | break; 51 | case child.TEXT_NODE: 52 | if (child.nodeValue.length > 0) { 53 | paragraph += child.nodeValue + " "; 54 | } 55 | break; 56 | } 57 | } 58 | }; 59 | 60 | traverse(el); 61 | return content; 62 | } 63 | -------------------------------------------------------------------------------- /internal/extractors/pwextractor/pageparser.go: -------------------------------------------------------------------------------- 1 | package pwextractor 2 | 3 | import ( 4 | "context" 5 | _ "embed" 6 | "fmt" 7 | "github.com/egor3f/rssalchemy/internal/models" 8 | "github.com/labstack/gommon/log" 9 | "github.com/playwright-community/playwright-go" 10 | ) 11 | 12 | // Timeouts 13 | var ( 14 | defTimeout = "100ms" 15 | ) 16 | 17 | type pageParser struct { 18 | task models.Task 19 | page playwright.Page 20 | dateParser DateParser 21 | 22 | // next fields only for debugging. Shit code, to do better later 23 | postIdx int 24 | fieldIdx int 25 | } 26 | 27 | func (p *pageParser) parse() (*models.TaskResult, error) { 28 | var result models.TaskResult 29 | var err error 30 | 31 | p.waitFullLoad() 32 | 33 | result.Title, err = p.page.Title() 34 | if err != nil { 35 | return nil, fmt.Errorf("page title: %w", err) 36 | } 37 | 38 | iconUrl, err := p.page.Locator("link[rel=apple-touch-icon]").First(). 39 | GetAttribute("href", playwright.LocatorGetAttributeOptions{Timeout: pwDuration("100ms")}) 40 | if err != nil { 41 | log.Warnf("page icon url: %v", err) 42 | } else { 43 | result.Icon = absUrl(iconUrl, p.page) 44 | } 45 | 46 | posts, err := p.page.Locator(p.task.SelectorPost).All() 47 | if err != nil { 48 | return nil, fmt.Errorf("post locator: %w", err) 49 | } 50 | if len(posts) == 0 { 51 | return nil, fmt.Errorf("no posts on page") 52 | } 53 | log.Debugf("Posts count=%d", len(posts)) 54 | 55 | for _, post := range posts { 56 | item, err := p.extractPost(post) 57 | if err != nil { 58 | log.Errorf("extract post fields: %v", err) 59 | continue 60 | } 61 | if len(item.Title) == 0 || len(item.Link) == 0 || item.Created.IsZero() { 62 | log.Warnf("post has no required fields, skip") 63 | continue 64 | } 65 | result.Items = append(result.Items, item) 66 | } 67 | if len(result.Items) == 0 { 68 | return nil, fmt.Errorf("extract failed for all posts") 69 | } 70 | 71 | return &result, nil 72 | } 73 | 74 | func (p *pageParser) waitFullLoad() { 75 | timeout := pwDuration("5s") 76 | ctx, cancel := context.WithCancel(context.Background()) 77 | 78 | go func() { 79 | err := p.page.WaitForLoadState(playwright.PageWaitForLoadStateOptions{ 80 | State: playwright.LoadStateNetworkidle, 81 | Timeout: timeout, 82 | }) 83 | log.Debugf("WaitFor LoadState finished with %v", err) 84 | cancel() 85 | }() 86 | 87 | <-ctx.Done() 88 | } 89 | 90 | func (p *pageParser) extractPost(post playwright.Locator) (models.FeedItem, error) { 91 | p.fieldIdx = 0 92 | p.postIdx++ 93 | var item models.FeedItem 94 | 95 | item.Title = newLocator(post, p.task.SelectorTitle).First().InnerText() 96 | log.Debugf("---- POST: %s ----", item.Title) 97 | 98 | item.Link = newLocator(post, p.task.SelectorLink).First().GetAttribute("href") 99 | page, _ := post.Page() 100 | item.Link = absUrl(item.Link, page) 101 | 102 | if len(p.task.SelectorDescription) > 0 { 103 | item.Description = newLocator(post, p.task.SelectorDescription).First().InnerText() 104 | } 105 | 106 | if len(p.task.SelectorAuthor) > 0 { 107 | item.AuthorName = newLocator(post, p.task.SelectorAuthor).First().InnerText() 108 | item.AuthorLink = newLocator(post, p.task.SelectorAuthor).First().GetAttribute("href") 109 | item.AuthorLink = absUrl(item.AuthorLink, page) 110 | } 111 | 112 | if len(p.task.SelectorContent) > 0 { 113 | item.Content = p.extractContent(post) 114 | } 115 | 116 | item.Enclosure = newLocator(post, p.task.SelectorEnclosure).First().GetAttribute("src") 117 | 118 | var createdDateStr string 119 | switch p.task.CreatedExtractFrom { 120 | case models.ExtractFrom_InnerText: 121 | createdDateStr = newLocator(post, p.task.SelectorCreated).First().InnerText() 122 | case models.ExtractFrom_Attribute: 123 | createdDateStr = newLocator(post, p.task.SelectorCreated).First().GetAttribute(p.task.CreatedAttributeName) 124 | default: 125 | return models.FeedItem{}, fmt.Errorf("invalid task.CreatedExtractFrom") 126 | } 127 | log.Debugf("date=%s", createdDateStr) 128 | createdDate, err := p.dateParser.ParseDate(createdDateStr) 129 | if err != nil { 130 | log.Errorf("dateparser: %v", err) 131 | } else { 132 | item.Created = createdDate 133 | } 134 | 135 | return item, nil 136 | } 137 | 138 | //go:embed extract_post.js 139 | var extractPostScript string 140 | 141 | func (p *pageParser) extractContent(post playwright.Locator) string { 142 | postContent := newLocator(post, p.task.SelectorContent) 143 | result, err := postContent.Evaluate( 144 | extractPostScript, 145 | nil, 146 | playwright.LocatorEvaluateOptions{Timeout: pwDuration("1s")}, 147 | ) 148 | if err != nil { 149 | log.Errorf("extract post content: evaluate: %v", err) 150 | return postContent.TextContent() 151 | } 152 | resString, ok := result.(string) 153 | if !ok { 154 | log.Errorf("extract post content: result type mismatch: %v", result) 155 | } 156 | return resString 157 | } 158 | 159 | type locator struct { 160 | selector string 161 | playwright.Locator 162 | } 163 | 164 | func newLocator(parent playwright.Locator, selector string) *locator { 165 | return &locator{ 166 | selector: selector, 167 | Locator: parent.Locator(selector), 168 | } 169 | } 170 | 171 | func (l *locator) String() string { 172 | return l.selector 173 | } 174 | 175 | func (l *locator) checkVisible() bool { 176 | visible, err := l.IsVisible() 177 | if err != nil { 178 | log.Errorf("locator %s isVisible: %v", l, err) 179 | return false 180 | } 181 | if !visible { 182 | log.Warnf("locator %s is not visible", l) 183 | } 184 | return visible 185 | } 186 | 187 | func (l *locator) First() *locator { 188 | return &locator{l.selector, l.Locator.First()} 189 | } 190 | 191 | func (l *locator) InnerText() string { 192 | if !l.checkVisible() { 193 | return "" 194 | } 195 | t, err := l.Locator.InnerText(playwright.LocatorInnerTextOptions{Timeout: pwDuration(defTimeout)}) 196 | if err != nil { 197 | log.Errorf("locator %s innerText: %v", l, err) 198 | return "" 199 | } 200 | return t 201 | } 202 | 203 | func (l *locator) GetAttribute(name string) string { 204 | if !l.checkVisible() { 205 | return "" 206 | } 207 | t, err := l.Locator.GetAttribute(name, playwright.LocatorGetAttributeOptions{Timeout: pwDuration(defTimeout)}) 208 | if err != nil { 209 | log.Errorf("locator %s getAttribute %s: %v", l, name, err) 210 | return "" 211 | } 212 | return t 213 | } 214 | 215 | func (l *locator) TextContent() string { 216 | if !l.checkVisible() { 217 | return "" 218 | } 219 | t, err := l.Locator.TextContent(playwright.LocatorTextContentOptions{Timeout: pwDuration(defTimeout)}) 220 | if err != nil { 221 | log.Errorf("locator %s textContent: %v", l, err) 222 | return "" 223 | } 224 | return t 225 | } 226 | -------------------------------------------------------------------------------- /internal/extractors/pwextractor/pwextractor.go: -------------------------------------------------------------------------------- 1 | package pwextractor 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "github.com/egor3f/rssalchemy/internal/limiter" 8 | "github.com/egor3f/rssalchemy/internal/models" 9 | "github.com/labstack/gommon/log" 10 | "github.com/playwright-community/playwright-go" 11 | "maps" 12 | "net" 13 | "net/url" 14 | "strings" 15 | "time" 16 | ) 17 | 18 | var userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36" 19 | var secChUa = `"Chromium";v="132", "Google Chrome";v="132", "Not-A.Brand";v="99"` 20 | 21 | type DateParser interface { 22 | ParseDate(string) (time.Time, error) 23 | } 24 | 25 | type CookieManager interface { 26 | GetCookies(key string, cookieHeader string) ([][2]string, error) 27 | UpdateCookies(key string, cookieHeader string, cookies [][2]string) error 28 | } 29 | 30 | type PwExtractor struct { 31 | pw *playwright.Playwright 32 | chrome playwright.Browser 33 | dateParser DateParser 34 | cookieManager CookieManager 35 | limiter limiter.Limiter 36 | proxyIP net.IP 37 | allowed int 38 | blocked int 39 | } 40 | 41 | type Config struct { 42 | Proxy string 43 | DateParser DateParser 44 | CookieManager CookieManager 45 | Limiter limiter.Limiter 46 | } 47 | 48 | func New(cfg Config) (*PwExtractor, error) { 49 | e := PwExtractor{} 50 | var err error 51 | e.pw, err = playwright.Run() 52 | if err != nil { 53 | return nil, fmt.Errorf("run playwright: %w", err) 54 | } 55 | proxy, err := parseProxy(cfg.Proxy) 56 | if err != nil { 57 | return nil, fmt.Errorf("parse proxy: %w", err) 58 | } 59 | if proxy != nil { 60 | proxyIPs, err := getIPs(proxy.Server) 61 | if err != nil { 62 | return nil, fmt.Errorf("get proxy ip: %w", err) 63 | } 64 | e.proxyIP = proxyIPs[0] 65 | } 66 | e.chrome, err = e.pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{ 67 | Args: []string{ 68 | "--webrtc-ip-handling-policy=disable_non_proxied_udp", 69 | "--force-webrtc-ip-handling-policy", 70 | }, 71 | Channel: playwright.String("chromium"), 72 | ChromiumSandbox: playwright.Bool(true), 73 | HandleSIGINT: playwright.Bool(false), 74 | Proxy: proxy, 75 | Timeout: pwDuration("5s"), 76 | }) 77 | if err != nil { 78 | return nil, fmt.Errorf("run chromium: %w", err) 79 | } 80 | 81 | e.dateParser = cfg.DateParser 82 | e.cookieManager = cfg.CookieManager 83 | e.limiter = cfg.Limiter 84 | if e.dateParser == nil || e.cookieManager == nil || e.limiter == nil { 85 | panic("you fckd up with di again") 86 | } 87 | 88 | return &e, nil 89 | } 90 | 91 | func (e *PwExtractor) Stop() error { 92 | if err := e.chrome.Close(); err != nil { 93 | return fmt.Errorf("closing chrome: %w", err) 94 | } 95 | if err := e.pw.Stop(); err != nil { 96 | return fmt.Errorf("stopping playwright: %w", err) 97 | } 98 | return nil 99 | } 100 | 101 | const MAX_RETRIES = 3 // todo: config 102 | 103 | func (e *PwExtractor) visitPage(task models.Task, cb func(page playwright.Page) error) (errRet error) { 104 | 105 | taskUrl, err := url.Parse(task.URL) 106 | if err != nil { 107 | return fmt.Errorf("parse task url: %w", err) 108 | } 109 | 110 | baseDomain, scheme, err := parseBaseDomain(task.URL) 111 | if err != nil { 112 | return fmt.Errorf("parse base domain: %w", err) 113 | } 114 | 115 | waitFor, err := e.limiter.Limit(context.TODO(), baseDomain) 116 | if err != nil { 117 | return fmt.Errorf("bydomain limiter: %w", err) 118 | } 119 | if waitFor > 0 { 120 | log.Infof("Bydomain limiter domain=%s wait=%v", baseDomain, waitFor) 121 | time.Sleep(waitFor) // todo: task timeouts 122 | } 123 | 124 | headers := maps.Clone(task.Headers) 125 | headers["Sec-Ch-Ua"] = secChUa 126 | 127 | var cookieStr string 128 | var cookies [][2]string 129 | if v, ok := headers["Cookie"]; ok { 130 | cookieStr = v 131 | var err error 132 | cookies, err = e.cookieManager.GetCookies(task.URL, v) 133 | if err != nil { 134 | log.Errorf("cookie manager get: %v", err) 135 | cookies = make([][2]string, 0) 136 | } 137 | log.Debugf("Found cookies, count=%d", len(cookies)) 138 | delete(headers, "Cookie") 139 | } 140 | 141 | bCtx, err := e.chrome.NewContext(playwright.BrowserNewContextOptions{ 142 | ExtraHttpHeaders: headers, 143 | UserAgent: &userAgent, 144 | ServiceWorkers: playwright.ServiceWorkerPolicyBlock, 145 | AcceptDownloads: playwright.Bool(false), 146 | }) 147 | if err != nil { 148 | return fmt.Errorf("create browser context: %w", err) 149 | } 150 | defer func() { 151 | if err := bCtx.Close(); err != nil { 152 | errRet = fmt.Errorf("close context: %w; other error=%w", err, errRet) 153 | } 154 | }() 155 | 156 | if err := e.setupInterceptors(bCtx, taskUrl); err != nil { 157 | return fmt.Errorf("setup interceptors: %w", err) 158 | } 159 | 160 | if len(cookies) > 0 { 161 | var pwCookies []playwright.OptionalCookie 162 | for _, cook := range cookies { 163 | pwCookies = append(pwCookies, playwright.OptionalCookie{ 164 | Name: cook[0], 165 | Value: cook[1], 166 | Domain: playwright.String(fmt.Sprintf(".%s", baseDomain)), 167 | Path: playwright.String("/"), 168 | Secure: playwright.Bool(strings.HasPrefix(cook[0], "__Secure")), 169 | }) 170 | } 171 | 172 | if err := bCtx.AddCookies(pwCookies); err != nil { 173 | return fmt.Errorf("add cookies: %w", err) 174 | } 175 | } 176 | 177 | page, err := bCtx.NewPage() 178 | if err != nil { 179 | return fmt.Errorf("browser new page: %w", err) 180 | } 181 | defer func() { 182 | if err := page.Close(); err != nil { 183 | errRet = fmt.Errorf("close page: %w; other error=%w", err, errRet) 184 | } 185 | }() 186 | log.Debugf("Page created") 187 | 188 | if len(task.Headers) > 0 { 189 | if err := page.SetExtraHTTPHeaders(task.Headers); err != nil { 190 | return fmt.Errorf("set headers: %w", err) 191 | } 192 | } 193 | 194 | for retry := 0; retry < MAX_RETRIES; retry++ { 195 | _, err = page.Goto(task.URL, playwright.PageGotoOptions{Timeout: pwDuration("10s")}) 196 | if !errors.Is(err, playwright.ErrTimeout) { 197 | break 198 | } 199 | log.Infof("Retrying page goto (%d of %d) %s", retry, MAX_RETRIES, task.URL) 200 | } 201 | if err != nil { 202 | return fmt.Errorf("goto page: %w", err) 203 | } 204 | log.Debugf("Url %s visited, starting cb", task.URL) 205 | 206 | start := time.Now() 207 | err = cb(page) 208 | log.Infof( 209 | "Visiting page %s finished, time=%f secs, allowed hosts=%d, blocked hosts=%d, err=%v", 210 | task.URL, 211 | time.Since(start).Seconds(), 212 | e.allowed, e.blocked, 213 | err, 214 | ) 215 | 216 | if len(cookies) > 0 { 217 | bCookies, err := bCtx.Cookies(fmt.Sprintf("%s://%s", scheme, baseDomain)) 218 | if err != nil { 219 | log.Errorf("browser context get cookies: %v", err) 220 | } else { 221 | newCookies := make([][2]string, len(bCookies)) 222 | for i, cook := range bCookies { 223 | newCookies[i] = [2]string{cook.Name, cook.Value} 224 | } 225 | err = e.cookieManager.UpdateCookies(task.URL, cookieStr, newCookies) 226 | if err != nil { 227 | log.Errorf("cookie manager update: %v", err) 228 | } 229 | } 230 | } 231 | 232 | return err 233 | } 234 | 235 | func (e *PwExtractor) setupInterceptors(bCtx playwright.BrowserContext, sourceUrl *url.URL) error { 236 | if err := bCtx.Route("**", func(route playwright.Route) { 237 | log.Debugf("Route: %s", route.Request().URL()) 238 | allowHost, err := e.allowHost(route.Request().URL()) 239 | if err != nil { 240 | log.Errorf("Allow host: %v", err) 241 | allowHost = false 242 | } 243 | URL, err := url.Parse(route.Request().URL()) 244 | if err != nil { 245 | log.Errorf("Interceptor parse url: %v", err) 246 | allowHost = false 247 | } 248 | allowHost = allowHost && allowAdblock(URL, sourceUrl) 249 | if allowHost { 250 | e.allowed++ 251 | if err := route.Continue(); err != nil { 252 | log.Warnf("Route continue error: %v", err) 253 | } 254 | } else { 255 | e.blocked++ 256 | if err := route.Abort(); err != nil { 257 | log.Warnf("Route abort error: %v", err) 258 | } 259 | } 260 | }); err != nil { 261 | return fmt.Errorf("set route: %w", err) 262 | } 263 | 264 | if err := bCtx.RouteWebSocket("**", func(route playwright.WebSocketRoute) { 265 | log.Debugf("Websocket route: %s", route.URL()) 266 | allowHost, err := e.allowHost(route.URL()) 267 | if err != nil { 268 | log.Errorf("Allow host: %v", err) 269 | allowHost = false 270 | } 271 | URL, err := url.Parse(route.URL()) 272 | if err != nil { 273 | log.Errorf("Interceptor websocket parse url: %v", err) 274 | allowHost = false 275 | } 276 | allowHost = allowHost && allowAdblock(URL, sourceUrl) 277 | if allowHost { 278 | e.allowed++ 279 | if _, err := route.ConnectToServer(); err != nil { 280 | log.Warnf("Websocket connect error: %v", err) 281 | } 282 | } else { 283 | e.blocked++ 284 | route.Close() 285 | } 286 | }); err != nil { 287 | return fmt.Errorf("websocket set route: %w", err) 288 | } 289 | return nil 290 | } 291 | 292 | func (e *PwExtractor) allowHost(rawUrl string) (bool, error) { 293 | ips, err := getIPs(rawUrl) 294 | if err != nil { 295 | return false, fmt.Errorf("allow host get ips: %w", err) 296 | } 297 | for _, ip := range ips { 298 | deny := ip.IsPrivate() || ip.IsLoopback() || ip.IsUnspecified() || ip.IsLinkLocalUnicast() || ip.IsMulticast() 299 | deny = deny || e.proxyIP.Equal(ip) 300 | if deny { 301 | log.Warnf("Banned address: %s", rawUrl) 302 | return false, nil 303 | } 304 | } 305 | return true, nil 306 | } 307 | 308 | func (e *PwExtractor) Extract(task models.Task) (result *models.TaskResult, errRet error) { 309 | errRet = e.visitPage(task, func(page playwright.Page) error { 310 | parser := pageParser{ 311 | task: task, 312 | page: page, 313 | dateParser: e.dateParser, 314 | } 315 | var err error 316 | result, err = parser.parse() 317 | if err != nil { 318 | return fmt.Errorf("parse page: %w", err) 319 | } 320 | return nil 321 | }) 322 | return 323 | } 324 | 325 | func (e *PwExtractor) Screenshot(task models.Task) (result *models.ScreenshotTaskResult, errRet error) { 326 | errRet = e.visitPage(task, func(page playwright.Page) error { 327 | err := page.WaitForLoadState(playwright.PageWaitForLoadStateOptions{ 328 | State: playwright.LoadStateNetworkidle, 329 | Timeout: pwDuration("5s"), 330 | }) 331 | if err != nil { 332 | log.Debugf("Wait for network idle: %v", err) 333 | } 334 | if err := page.SetViewportSize(1280, 5000); err != nil { 335 | return fmt.Errorf("set viewport size: %w", err) 336 | } 337 | screenshot, err := page.Screenshot(playwright.PageScreenshotOptions{ 338 | Animations: playwright.ScreenshotAnimationsDisabled, 339 | Timeout: pwDuration("5s"), 340 | }) 341 | if err != nil { 342 | return fmt.Errorf("make screenshot: %w", err) 343 | } 344 | log.Infof("Screenshot finished; total size: %d bytes", len(screenshot)) 345 | result = &models.ScreenshotTaskResult{Image: screenshot} 346 | return nil 347 | }) 348 | return 349 | } 350 | -------------------------------------------------------------------------------- /internal/extractors/pwextractor/utils.go: -------------------------------------------------------------------------------- 1 | package pwextractor 2 | 3 | import ( 4 | "fmt" 5 | "github.com/jellydator/ttlcache/v3" 6 | "github.com/playwright-community/playwright-go" 7 | "net" 8 | "net/url" 9 | "slices" 10 | "strings" 11 | "time" 12 | ) 13 | 14 | func absUrl(link string, page playwright.Page) string { 15 | if len(link) == 0 { 16 | return "" 17 | } 18 | if strings.HasPrefix(link, "/") { 19 | pageUrl, _ := url.Parse(page.URL()) 20 | link = fmt.Sprintf("%s://%s%s", pageUrl.Scheme, pageUrl.Host, link) 21 | } 22 | //log.Debugf("link=%s", link) 23 | return link 24 | } 25 | 26 | // pwDuration converts string like "10s" to milliseconds float64 pointer 27 | // needed for Playwright timeouts (wtf? why they don't use normal Durations?) 28 | func pwDuration(s string) *float64 { 29 | dur, err := time.ParseDuration(s) 30 | if err != nil { 31 | panic(fmt.Errorf("failed to parse duration %s: %w", s, err)) 32 | } 33 | f64 := float64(dur.Milliseconds()) 34 | return &f64 35 | } 36 | 37 | func parseProxy(s string) (*playwright.Proxy, error) { 38 | var proxy *playwright.Proxy 39 | if len(s) > 0 { 40 | proxyUrl, err := url.Parse(s) 41 | if err != nil { 42 | return nil, err 43 | } 44 | urlWithoutUser := *proxyUrl 45 | urlWithoutUser.User = nil 46 | proxy = &playwright.Proxy{Server: urlWithoutUser.String()} 47 | if proxyUrl.User != nil { 48 | user := proxyUrl.User.Username() 49 | proxy.Username = &user 50 | if pass, exist := proxyUrl.User.Password(); exist { 51 | proxy.Password = &pass 52 | } 53 | } 54 | } 55 | return proxy, nil 56 | } 57 | 58 | // parseBaseDomain extracts second-level domain from url, e.g. 59 | // https://kek.example.com/lol becomes example.com 60 | // if url is invalid or scheme is not http(s), returns error, otherwise returns scheme and domain 61 | func parseBaseDomain(urlStr string) (domain string, scheme string, err error) { 62 | pageUrl, err := url.Parse(urlStr) 63 | if err != nil { 64 | return "", "", fmt.Errorf("task url parsing: %w", err) 65 | } 66 | scheme = pageUrl.Scheme 67 | if !slices.Contains([]string{"https", "http"}, scheme) { 68 | return "", "", fmt.Errorf("bad scheme: %s", scheme) 69 | } 70 | hostname := strings.ToLower(pageUrl.Hostname()) 71 | ipHost := net.ParseIP(hostname) 72 | if ipHost != nil { 73 | return ipHost.String(), scheme, nil 74 | } 75 | domainParts := strings.Split(hostname, ".") 76 | slices.Reverse(domainParts) // com, example, www 77 | return fmt.Sprintf("%s.%s", domainParts[1], domainParts[0]), scheme, nil 78 | } 79 | 80 | var dnsCache *ttlcache.Cache[string, []net.IP] 81 | 82 | func init() { 83 | dnsCache = ttlcache.New[string, []net.IP]( 84 | ttlcache.WithTTL[string, []net.IP](1*time.Minute), 85 | ttlcache.WithDisableTouchOnHit[string, []net.IP](), 86 | ) 87 | go dnsCache.Start() 88 | } 89 | 90 | // getIPs from url, hostname, ip string 91 | // result slice len always > 0 if error is nil 92 | func getIPs(host string) ([]net.IP, error) { 93 | ip := net.ParseIP(host) 94 | if ip != nil { 95 | return []net.IP{ip}, nil 96 | } 97 | 98 | urlStruct, err := url.Parse(host) 99 | if err != nil { 100 | return nil, fmt.Errorf("url parse: %w", err) 101 | } 102 | if len(urlStruct.Host) > 0 { 103 | host = urlStruct.Hostname() 104 | ip = net.ParseIP(host) 105 | if ip != nil { 106 | return []net.IP{ip}, nil 107 | } 108 | } 109 | 110 | var ips []net.IP 111 | if dnsCache.Has(host) { 112 | ips = dnsCache.Get(host).Value() 113 | } else { 114 | ips, err = net.LookupIP(host) 115 | if err != nil { 116 | return nil, fmt.Errorf("lookup ip: %w", err) 117 | } 118 | dnsCache.Set(host, ips, ttlcache.DefaultTTL) 119 | } 120 | if len(ips) == 0 { 121 | return nil, fmt.Errorf("lookip ip: not resolved") 122 | } 123 | return ips, nil 124 | } 125 | -------------------------------------------------------------------------------- /internal/extractors/pwextractor/utils_test.go: -------------------------------------------------------------------------------- 1 | package pwextractor 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | var testDomain = "dns.google" 12 | var testResult = []string{ 13 | "2001:4860:4860::8844", "2001:4860:4860::8888", "8.8.8.8", "8.8.4.4", 14 | } 15 | 16 | func TestGetIPs(t *testing.T) { 17 | tests := []struct { 18 | name string 19 | input string 20 | expectedIPStrings []string 21 | wantErr bool 22 | }{ 23 | { 24 | name: "valid IPv4", 25 | input: "192.0.2.1", 26 | expectedIPStrings: []string{"192.0.2.1"}, 27 | wantErr: false, 28 | }, 29 | { 30 | name: "valid IPv6", 31 | input: "2001:db8::1", 32 | expectedIPStrings: []string{"2001:db8::1"}, 33 | wantErr: false, 34 | }, 35 | { 36 | name: "URL with IPv4 host", 37 | input: "http://192.0.2.1", 38 | expectedIPStrings: []string{"192.0.2.1"}, 39 | wantErr: false, 40 | }, 41 | { 42 | name: "URL with IPv6 host", 43 | input: "http://[2001:db8::1]", 44 | expectedIPStrings: []string{"2001:db8::1"}, 45 | wantErr: false, 46 | }, 47 | { 48 | name: "URL with hostname", 49 | input: fmt.Sprintf("https://%s:8080", testDomain), 50 | expectedIPStrings: testResult, 51 | wantErr: false, 52 | }, 53 | { 54 | name: "hostname", 55 | input: testDomain, 56 | expectedIPStrings: testResult, 57 | wantErr: false, 58 | }, 59 | { 60 | name: "invalid IP address", 61 | input: "256.0.0.0", 62 | wantErr: true, 63 | }, 64 | { 65 | name: "empty input", 66 | input: "", 67 | wantErr: true, 68 | }, 69 | { 70 | name: "invalid URL format", 71 | input: "://invalid", 72 | wantErr: true, 73 | }, 74 | { 75 | name: "unresolvable hostname", 76 | input: "nonexistent.invalid", 77 | wantErr: true, 78 | }, 79 | } 80 | 81 | for _, tc := range tests { 82 | t.Run(tc.name, func(t *testing.T) { 83 | ips, err := getIPs(tc.input) 84 | 85 | if tc.wantErr { 86 | require.Error(t, err) 87 | assert.Nil(t, ips) 88 | } else { 89 | require.NoError(t, err) 90 | require.NotEmpty(t, ips, "result slice should not be empty when error is nil") 91 | 92 | if tc.expectedIPStrings != nil { 93 | ipStrings := make([]string, len(ips)) 94 | for i, ip := range ips { 95 | ipStrings[i] = ip.String() 96 | } 97 | assert.ElementsMatch(t, tc.expectedIPStrings, ipStrings, "IPs do not match expected") 98 | } 99 | } 100 | }) 101 | } 102 | t.Run("cache set", func(t *testing.T) { 103 | dnsCache.DeleteAll() 104 | require.False(t, dnsCache.Has(testDomain)) 105 | ips, _ := getIPs(testDomain) 106 | require.True(t, dnsCache.Has(testDomain)) 107 | require.ElementsMatch(t, ips, dnsCache.Get(testDomain).Value()) 108 | }) 109 | } 110 | 111 | func Test_parseBaseDomain(t *testing.T) { 112 | tests := []struct { 113 | name string 114 | urlStr string 115 | expectedDomain string 116 | expectedScheme string 117 | expectErr bool 118 | }{ 119 | { 120 | name: "valid https with subdomain", 121 | urlStr: "https://kek.example.com/lol", 122 | expectedDomain: "example.com", 123 | expectedScheme: "https", 124 | }, 125 | { 126 | name: "valid http with www subdomain", 127 | urlStr: "http://www.example.com/path", 128 | expectedDomain: "example.com", 129 | expectedScheme: "http", 130 | }, 131 | { 132 | name: "valid http with no subdomain", 133 | urlStr: "http://example.com", 134 | expectedDomain: "example.com", 135 | expectedScheme: "http", 136 | }, 137 | { 138 | name: "url with port in host", 139 | urlStr: "http://example.com:8080/path", 140 | expectedDomain: "example.com", 141 | expectedScheme: "http", 142 | }, 143 | { 144 | name: "url with ip address host", 145 | urlStr: "http://192.168.1.1", 146 | expectedDomain: "192.168.1.1", 147 | expectedScheme: "http", 148 | }, 149 | { 150 | name: "url with uppercase http scheme", 151 | urlStr: "HTTP://EXAMPLE.COM", 152 | expectedDomain: "example.com", 153 | expectedScheme: "http", 154 | }, 155 | { 156 | name: "invalid scheme (ftp)", 157 | urlStr: "ftp://example.com", 158 | expectErr: true, 159 | }, 160 | { 161 | name: "no scheme", 162 | urlStr: "example.com", 163 | expectErr: true, 164 | }, 165 | { 166 | name: "invalid url format", 167 | urlStr: "http//example.com", 168 | expectErr: true, 169 | }, 170 | { 171 | name: "empty url string", 172 | urlStr: "", 173 | expectErr: true, 174 | }, 175 | { 176 | name: "url with user info", 177 | urlStr: "http://user:pass@example.com", 178 | expectedDomain: "example.com", 179 | expectedScheme: "http", 180 | }, 181 | { 182 | name: "url with multiple subdomains", 183 | urlStr: "https://a.b.c.example.com", 184 | expectedDomain: "example.com", 185 | expectedScheme: "https", 186 | }, 187 | { 188 | name: "url with leading/trailing whitespace", 189 | urlStr: " https://example.com ", 190 | expectErr: true, 191 | }, 192 | } 193 | 194 | for _, tt := range tests { 195 | t.Run(tt.name, func(t *testing.T) { 196 | domain, scheme, err := parseBaseDomain(tt.urlStr) 197 | 198 | if tt.expectErr { 199 | require.Error(t, err) 200 | assert.Empty(t, domain) 201 | assert.Empty(t, scheme) 202 | } else { 203 | require.NoError(t, err) 204 | assert.Equal(t, tt.expectedDomain, domain) 205 | assert.Equal(t, tt.expectedScheme, scheme) 206 | } 207 | }) 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /internal/limiter/dummy/dummy.go: -------------------------------------------------------------------------------- 1 | package dummy 2 | 3 | import ( 4 | "context" 5 | "time" 6 | ) 7 | 8 | type Limiter struct { 9 | } 10 | 11 | func (l *Limiter) Limit(context.Context, string) (time.Duration, error) { 12 | return 0, nil 13 | } 14 | -------------------------------------------------------------------------------- /internal/limiter/limiter.go: -------------------------------------------------------------------------------- 1 | package limiter 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | ) 8 | 9 | var ErrLimitReached = fmt.Errorf("limit reached") 10 | 11 | type Limiter interface { 12 | Limit(ctx context.Context, key string) (waitFor time.Duration, err error) 13 | } 14 | -------------------------------------------------------------------------------- /internal/limiter/redisleaky/redisleaky.go: -------------------------------------------------------------------------------- 1 | package redisleaky 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "github.com/egor3f/rssalchemy/internal/limiter" 8 | rsredis "github.com/go-redsync/redsync/v4/redis" 9 | rsgoredis "github.com/go-redsync/redsync/v4/redis/goredis/v9" 10 | "github.com/labstack/gommon/log" 11 | "github.com/mennanov/limiters" 12 | "github.com/redis/go-redis/v9" 13 | "golang.org/x/time/rate" 14 | "time" 15 | ) 16 | 17 | type Limiter struct { 18 | rate time.Duration 19 | capacity int64 20 | 21 | redisClient *redis.Client 22 | redisPool rsredis.Pool 23 | prefix string 24 | } 25 | 26 | func New( 27 | rateLimit rate.Limit, 28 | capacity int64, 29 | redisClient *redis.Client, 30 | prefix string, 31 | ) *Limiter { 32 | l := Limiter{ 33 | rate: time.Duration(float64(time.Second) / float64(rateLimit)), 34 | capacity: capacity, 35 | redisClient: redisClient, 36 | redisPool: rsgoredis.NewPool(redisClient), 37 | prefix: prefix, 38 | } 39 | return &l 40 | } 41 | 42 | func (l *Limiter) Limit(ctx context.Context, key string) (time.Duration, error) { 43 | limiterKey := fmt.Sprintf("limiter_%s_%s", l.prefix, key) 44 | bucket := limiters.NewLeakyBucket( 45 | l.capacity, 46 | l.rate, 47 | limiters.NewLockRedis(l.redisPool, fmt.Sprintf("%s_lock", limiterKey)), 48 | limiters.NewLeakyBucketRedis( 49 | l.redisClient, 50 | fmt.Sprintf("%s_state", limiterKey), 51 | time.Duration(l.capacity*int64(l.rate)), 52 | true, 53 | ), 54 | limiters.NewSystemClock(), 55 | logger{}, 56 | ) 57 | wait, err := bucket.Limit(ctx) 58 | if errors.Is(err, limiters.ErrLimitExhausted) { 59 | err = limiter.ErrLimitReached // My own sentinel error not to depend on `mennanov/limiters` library 60 | } 61 | return wait, err 62 | } 63 | 64 | type logger struct { 65 | } 66 | 67 | func (logger) Log(v ...interface{}) { 68 | log.Infof("Limiter: %v", v...) 69 | } 70 | -------------------------------------------------------------------------------- /internal/models/models.go: -------------------------------------------------------------------------------- 1 | package models 2 | 3 | import ( 4 | "crypto/sha256" 5 | "fmt" 6 | "time" 7 | ) 8 | 9 | type TaskType string 10 | 11 | const ( 12 | TaskTypeExtract = "extract" 13 | TaskTypePageScreenshot = "page_screenshot" 14 | ) 15 | 16 | type ExtractFrom int 17 | 18 | const ( 19 | ExtractFrom_InnerText ExtractFrom = 0 20 | ExtractFrom_Attribute ExtractFrom = 1 21 | ) 22 | 23 | type Task struct { 24 | // While adding new fields, dont forget to alter caching func 25 | TaskType TaskType 26 | URL string 27 | SelectorPost string 28 | SelectorTitle string 29 | SelectorLink string 30 | SelectorDescription string 31 | SelectorAuthor string 32 | SelectorCreated string 33 | CreatedExtractFrom ExtractFrom 34 | CreatedAttributeName string 35 | SelectorContent string 36 | SelectorEnclosure string 37 | Headers map[string]string 38 | } 39 | 40 | func (t Task) CacheKey() string { 41 | h := sha256.New() 42 | h.Write([]byte(t.URL)) 43 | h.Write([]byte(t.SelectorPost)) 44 | h.Write([]byte(t.SelectorTitle)) 45 | h.Write([]byte(t.SelectorLink)) 46 | h.Write([]byte(t.SelectorDescription)) 47 | h.Write([]byte(t.SelectorAuthor)) 48 | h.Write([]byte(t.SelectorCreated)) 49 | h.Write([]byte(t.SelectorContent)) 50 | h.Write([]byte(t.SelectorEnclosure)) 51 | h.Write([]byte(fmt.Sprintf("%+v", t.Headers))) 52 | return fmt.Sprintf("%s_%x", t.TaskType, h.Sum(nil)) 53 | } 54 | 55 | type FeedItem struct { 56 | Title string 57 | Created time.Time 58 | Updated time.Time 59 | AuthorName string 60 | Link string 61 | Description string 62 | Content string 63 | Enclosure string 64 | AuthorLink string 65 | } 66 | 67 | type TaskResult struct { 68 | Title string 69 | Items []FeedItem 70 | Icon string 71 | } 72 | 73 | type ScreenshotTaskResult struct { 74 | Image []byte // png 75 | } 76 | -------------------------------------------------------------------------------- /internal/validators/validators.go: -------------------------------------------------------------------------------- 1 | package validators 2 | 3 | import ( 4 | "github.com/ericchiang/css" 5 | "github.com/go-playground/validator/v10" 6 | "github.com/labstack/gommon/log" 7 | "reflect" 8 | ) 9 | 10 | func ValidateSelector(fl validator.FieldLevel) bool { 11 | if fl.Field().Kind() != reflect.String { 12 | return false 13 | } 14 | _, err := css.Parse(fl.Field().String()) 15 | if err != nil { 16 | log.Debugf("selector %s invalid: %v", fl.Field().String(), err) 17 | } 18 | return err == nil 19 | } 20 | -------------------------------------------------------------------------------- /presets/README.md: -------------------------------------------------------------------------------- 1 | # RSS Alchemy presets 2 | 3 | To share task configuration, copy "preset for sharing" field value. 4 | To import preset, use the same dialog as for editing an existing link. 5 | 6 | ## Presets list 7 | 8 | List is provided "as is" and is not maintained. Pull requests are welcome. 9 | 10 | ### Content sites 11 | 12 | #### vombat.su new posts 13 | ``` 14 | rssalchemy:0:nVDNasMwDH4Vo1MLTbLs6ENfpSiK2nhVLGMrHWXs3UvwxYPtspv4/vRJX7BlAQ+LWSp+GB66Tmh92YbInwOKwAkKC5NpviQtBh7m8Oj3sZt0fra8BRPew0aHLS4h3n+BZy6UQ7KgsU0ljcbRukmU7i61Dtxs0Qwe0C9YDmdXEsbqqtSxVVNmNJ5rto+2dHrt7Jn4MB7d2f0ffP+5pvb984RWy5FEy5b3L2G2QMIurLdepw+m6sMQ4QSEtPBFwpUtrLt6fFvh+wU 15 | ``` 16 | 17 | #### youtube.com subscriptions (requires cookies [^1]) 18 | 19 | ``` 20 | rssalchemy:0:XY/RasMwDEV/pWgvG1QJ26N/pij2TWNqW8FWCqHs30daGN4epXMlHT1oq4kcLWZrc+O462bbhMFrHmcgjG2bmq9xtail0ZkaErxpvazajByFeB92C1yjXzgaMleUgIrahy1aAjl6u8cA5VfZ8RTLjRxJz/nZ7EIBvybkqCey2aK1s/GLlILERTJO0kd9hRjC4ZJhEsTkOIRTW6U8Z18KB+Qpqb+5YgvrzLaveP/6+LNMi6HYPxsUn7Rt9Xg45uuwG3ut4JjlCuakEhDoTF78gkuKMyzmI/yZ6fsH 21 | ``` 22 | 23 | #### reddit.com r/selfhosted 24 | ``` 25 | rssalchemy:1:dcyxCsIwEADQQUFwEHRw6FidhPTUsdAfcHQtFUJzaY8mJlyujZ8vuLu+4W3Po0hMNUDOuWI0hqTqgweGhM6OIQkaOGw0C/UOi6NukwvS2Nk5FUMS5eg9deUfvxSGlsoiGtVrNkrwIyoyLoT5vtftyGhfTQlzQoayq9dCHh878sPpl5DXAz5Xt6v/Ag 26 | ``` 27 | 28 | #### Associated Press (technology) 29 | ``` 30 | rssalchemy:1:ZcuxCsIwEIBhpOAguBQXC4I4CmkUtzyCU3FyPZMjDfRyR+9QfHtHEcf/g3+1G81Eg/cgFV/aRyZvGMfKE+d3u07l2Q+QcZiZuGvHy7ecFZvwsPm3PRy3P6NLqHEuYoVrWAaNLHhfPLoEBk5YDZNLYOisEKoBybUplG/N+UQf 31 | ``` 32 | 33 | [^1]: To capture cookies, use [this manual](presets/cookies.md) 34 | -------------------------------------------------------------------------------- /presets/cookies.md: -------------------------------------------------------------------------------- 1 | # Capturing cookies 2 | 3 | 1. Open incognito window (don't use existing session, otherwise you will be kicked from account) 4 | 2. Log in to website you want to convert (e.g. youtube) 5 | 3. Be sure that desired page opens successfully (e.g. you can see your youtube subscriptions/posts/...) 6 | 4. Open devtools (F12). Go to network tab 7 | 5. Refresh page 8 | 6. Open the very first entry 9 | 7. In "request headers" find `Cookie:`. Copy the entire value to your RSS reader 10 | - Miniflux: Feed settings page, field `Set Cookies` 11 | - Other RSS readers are not tested yet 12 | 13 | That's all! :) 14 | -------------------------------------------------------------------------------- /proto/specs.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package rssalchemy; 4 | 5 | import "tagger/tagger.proto"; 6 | 7 | option go_package = "internal/api/http/pb"; 8 | 9 | enum ExtractFrom { 10 | InnerText = 0; 11 | Attribute = 1; 12 | } 13 | 14 | message Specs { 15 | string url = 1 [(tagger.tags) = "json:\"url\" validate:\"url\""]; 16 | string selector_post = 2 [(tagger.tags) = "json:\"selector_post\" validate:\"selector\""]; 17 | string selector_title = 3 [(tagger.tags) = "json:\"selector_title\" validate:\"selector\""]; 18 | string selector_link = 4 [(tagger.tags) = "json:\"selector_link\" validate:\"selector\""]; 19 | string selector_description = 5 [(tagger.tags) = "json:\"selector_description\" validate:\"omitempty,selector\""]; 20 | string selector_author = 6 [(tagger.tags) = "json:\"selector_author\" validate:\"omitempty,selector\""]; 21 | 22 | string selector_created = 7 [(tagger.tags) = "json:\"selector_created\" validate:\"selector\""]; 23 | ExtractFrom created_extract_from = 11 [(tagger.tags) = "json:\"created_extract_from\""]; 24 | string created_attribute_name = 12 [(tagger.tags) = "json:\"created_attribute_name\""]; 25 | 26 | string selector_content = 8 [(tagger.tags) = "json:\"selector_content\" validate:\"omitempty,selector\""]; 27 | string selector_enclosure = 9 [(tagger.tags) = "json:\"selector_enclosure\" validate:\"selector\""]; 28 | string cache_lifetime = 10 [(tagger.tags) = "json:\"cache_lifetime\""]; 29 | } 30 | --------------------------------------------------------------------------------