├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── archmanweb ├── __init__.py ├── management │ ├── commands │ │ ├── man_drop_cache.py │ │ └── man_update.py │ └── utils │ │ └── finder.py ├── migrations │ ├── 0001_initial.py │ └── __init__.py ├── models.py ├── static │ └── archmanweb │ │ ├── base.css │ │ └── man_page.css ├── templates │ ├── 404.html │ ├── 500.html │ ├── base.html │ ├── index.html │ ├── listing.html │ ├── man_404.html │ ├── man_page.html │ └── search.html ├── templatetags │ ├── __init__.py │ ├── make_table.py │ ├── pagination_links.py │ ├── query_transform.py │ └── reverse_man_url.py ├── urls.py ├── utils │ ├── __init__.py │ ├── django.py │ ├── encodings.py │ └── mandoc.py └── views │ ├── __init__.py │ ├── listing.py │ ├── man_page.py │ └── search.py ├── local_settings.py.example ├── manage.py ├── settings.py ├── urls.py └── wsgi.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | 4 | # Django 5 | local_settings.py 6 | 7 | # pacman 8 | /.cache/ 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "archlinux-common-style"] 2 | path = archlinux-common-style 3 | url = https://gitlab.archlinux.org/archlinux/archlinux-common-style.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Arch manual pages 2 | 3 | ## Git submodules 4 | 5 | Make sure that git submodules are initialized after cloning the repository: 6 | 7 | git submodule update --init --recursive 8 | 9 | Or initialize the submodules while cloning: 10 | 11 | git clone --recurse-submodules ssh://git@gitlab.archlinux.org:222/archlinux/archmanweb.git 12 | 13 | ## Dependencies 14 | 15 | pacman -S pyalpm python-chardet python-django python-django-csp python-psycopg2 python-requests python-xtarfile 16 | 17 | ## Installation 18 | 19 | 1. Copy `local_settings.py.example` to `local_settings.py` and edit `DEBUG = True` and the `SECRET_KEY` variable. 20 | 21 | 2. Configure a connection to a [PostgreSQL](https://wiki.archlinux.org/index.php/PostgreSQL) database 22 | in the [Django database settings](https://docs.djangoproject.com/en/3.1/ref/settings/#databases) 23 | in the `local_settings.py` file. 24 | 25 | 3. Make sure that the [pg_trgm](https://www.postgresql.org/docs/current/pgtrgm.html) 26 | extension is [created](https://www.postgresql.org/docs/current/sql-createextension.html) 27 | in the database. For example: 28 | 29 | psql --username= --dbname= --command "create extension if not exists pg_trgm;" 30 | 31 | 4. Make migrations. 32 | 33 | ./manage.py makemigrations 34 | 35 | 5. Migrate changes. 36 | 37 | ./manage.py migrate 38 | 39 | 6. Build the [archlinux-common-style](https://gitlab.archlinux.org/archlinux/archlinux-common-style) 40 | submodule. 41 | 42 | A SASS compiler is needed. For example, install [sassc](https://archlinux.org/packages/community/x86_64/sassc/) 43 | and run 44 | 45 | cd archlinux-common-style 46 | make SASS=sassc 47 | 48 | 7. Start the development web server with `./manage.py runserver`. The site 49 | should be available at http://localhost:8000, saying that there are 0 man 50 | pages and 0 packages (because they were not imported yet). The server will 51 | automatically reload when you make changes to the webapp code or templates. 52 | 53 | 8. Run the `update.py` script to import some man pages. However, note that the 54 | full import requires to download about 7.5 GiB of packages from a mirror of 55 | the Arch repos and then the extraction takes about 20-30 minutes. (The volume 56 | of all man pages is less than 300 MiB though.) If you won't need all man pages 57 | for the development, you can run e.g. `update.py --only-repos core` to import 58 | only man pages from the core repository (the smallest one, download size is 59 | about 160 MiB) or even `update.py --only-packages coreutils man-pages`. 60 | 61 | ## About 62 | 63 | This website was created for the [man template](https://wiki.archlinux.org/index.php/Template:Man) 64 | on the Arch wiki. Originally, the template replaced plain text, unclickable 65 | references to man pages with links to [man7.org](https://man7.org/linux/man-pages/), 66 | which contains a handful of manuals taken directly from upstream. Later, we 67 | considered switching to another site providing more manuals. Since we did not 68 | find a suitable external site, we decided to build a new service to satisfy all 69 | our requirements: 70 | 71 | 1. All man pages from official Arch packages are available. Old versions and 72 | permalinks are not necessary. 73 | 2. Functionality does not require Javascript. 74 | 3. Pages are addressable by their name and section, both occurring exactly once 75 | in the URL to avoid problems with pages such as 76 | [ar(1)](https://man.archlinux.org/man/ar.1) and 77 | [ar(1p)](https://man.archlinux.org/man/ar.1p). 78 | 4. The URLs used by the _man_ template should not redirect to permalinks, 79 | otherwise users would start copy-pasting them to the wiki and it would be 80 | hard to check if they are the same as the canonical URLs. 81 | 5. Human-readable subsection anchors. 82 | 6. The page should clearly indicate the Arch package version containing the 83 | page. 84 | 85 | See the [original discussion](https://wiki.archlinux.org/index.php/Template_talk:Man#Sources) 86 | for details. 87 | 88 | We used a dynamic approach instead of building a website consisting of 89 | completely static pages. The main building blocks are the 90 | [Django web framework](https://www.djangoproject.com/), the 91 | [PostgreSQL](https://www.postgresql.org/) database server, the `mandoc` tool 92 | from the [mandoc toolset](http://mdocml.bsd.lv/) for the conversion to HTML and 93 | the [pyalpm](https://github.com/archlinux/pyalpm) library for data extraction 94 | from the Arch repositories. The code is available in the 95 | [archmanweb](https://gitlab.archlinux.org/archlinux/archmanweb) repository at 96 | GitLab. 97 | 98 | Overall, this approach allows us to provide the following features without 99 | rebuilding the whole website from scratch: 100 | 101 | - Listings with custom filters and orderings. 102 | - Links to other versions of the same manual provided by different packages. 103 | - Links to similar manuals available in other sections or languages. 104 | - Searching in the names and descriptions of packages and manuals, similarly to 105 | [apropos(1)](https://man.archlinux.org/man/apropos.1). 106 | 107 | ### Similar projects 108 | 109 | Some similar projects, each using a different approach, are: 110 | 111 | - [manned.org](https://manned.org/) ([code](https://g.blicky.net/manned.git/), 112 | [Arch BBS thread](https://bbs.archlinux.org/viewtopic.php?id=145382)) 113 | - [man7.org](http://man7.org/linux/man-pages/) (no idea about website scripts) 114 | - [manpages.debian.org](https://manpages.debian.org/) 115 | ([source](https://github.com/Debian/debiman/)) 116 | - [man.openbsd.org](http://man.openbsd.org/) (runs with the mandoc CGI script) 117 | 118 | ## Test cases 119 | 120 | These links serve as test cases to ensure that all features still work, they 121 | are not useful to regular users. 122 | 123 | ### URLs with dots 124 | 125 | - intro 126 | - intro.1 127 | - intro.1.en 128 | - intro.en 129 | - systemd.service 130 | - systemd.service.5 131 | - systemd.service.5.en 132 | - systemd.service.en 133 | - gimp-2.8 134 | - gimp-2.8.1 135 | - gimp-2.8.1.en 136 | - gimp-2.8.en 137 | - CA.pl 138 | - CA.pl.1ssl 139 | - CA.pl.1ssl.en 140 | - CA.pl.en 141 | 142 | ### Best match lookup 143 | 144 | Ambiguous cases are ordered by section, package repository and package version, 145 | then the first manual is selected. 146 | 147 | - mount redirects to 148 | mount.8 149 | (not mount.2) 150 | - gv redirects to 151 | gv.1 152 | (not gv.3guile, 153 | gv.3lua etc.) 154 | - graphviz/gv redirects to 155 | graphviz/gv.3guile 156 | (not graphviz/gv.3lua etc.) 157 | - gv.3 redirects to 158 | gv.3guile 159 | (not gv.1, 160 | gv.3lua etc.) 161 | - aliases.5 displays 162 | extra/postfix/aliases.5 163 | (not community/opensmtpd/aliases.5) 164 | - mysqld.8 displays 165 | extra/mariadb/mysqld.8 166 | (not community/percona-server/mysqld.8) 167 | - mailx and 168 | mailx.1 redirect to 169 | mail.1.en as a symbolic link 170 | (not mailx.1p) 171 | 172 | ### Language fallback 173 | 174 | - nvidia-smi.cs → 175 | nvidia-smi.en → 176 | nvidia-smi.1.en 177 | (maybe we should try harder and avoid the double redirect) 178 | - nvidia-smi.1.cs → 179 | nvidia-smi.1.en 180 | - nvidia-smi.foo → 404 181 | - nvidia-smi.1.foo → 404 182 | 183 | ### Package filter 184 | 185 | - nvidia-utils/nvidia-smi.en 186 | - nvidia-340xx-utils/nvidia-smi.en 187 | - nvidia-utils/nvidia-smi.cs → 188 | nvidia-utils/nvidia-smi.en 189 | - nvidia-340xx-utils/nvidia-smi.cs → 190 | nvidia-utils/nvidia-340xx-smi.en 191 | - foo/nvidia-smi.cs → 404 192 | - foo/nvidia-smi.en → 404 193 | 194 | ### .so macros 195 | 196 | There is a groff(1) extension for the 197 | man(7) and 198 | mdoc(7) 199 | languages to include contents of other files using the `.so` macro. In normal 200 | operation where manuals are stored as files on a file system, the 201 | soelim(1) 202 | pre-processor handles the inclusion. Our system is based on a database rather 203 | than a file system, so we need a custom `soelim` as well. 204 | 205 | Some pages which contain the `.so` macro: 206 | 207 | - [.1.zh_CN 208 | - pwunconv(8) 209 | - pam(8) 210 | - url(7) 211 | - xorg.conf.d(5) 212 | - glibc(7) 213 | - systemd-logind(8) 214 | - shorewall6.conf(5) 215 | points to a page contained in a different package (`shorewall` instead of `shorewall6`) 216 | - lsof(8) 217 | (not a "hardlink", includes an invalid file `./00DIALECTS`) 218 | -------------------------------------------------------------------------------- /archmanweb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/archlinux/archmanweb/5fcd985314a8f07d151a46ace71ce152e67450a6/archmanweb/__init__.py -------------------------------------------------------------------------------- /archmanweb/management/commands/man_drop_cache.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | from django.core.management.base import BaseCommand 4 | from django.db import connection 5 | 6 | class Command(BaseCommand): 7 | help = "Drops cached data from the database" 8 | 9 | def handle(self, *args, **kwargs): 10 | with connection.cursor() as c: 11 | c.execute("UPDATE archmanweb_content SET html = NULL WHERE html IS NOT NULL;") 12 | c.execute("UPDATE archmanweb_content SET txt = NULL WHERE txt IS NOT NULL;") 13 | c.execute("UPDATE archmanweb_content SET description = NULL WHERE description IS NOT NULL;") 14 | c.execute("UPDATE archmanweb_manpage SET converted_content_id = NULL WHERE converted_content_id IS NOT NULL;") 15 | c.execute("VACUUM FULL archmanweb_content;") 16 | -------------------------------------------------------------------------------- /archmanweb/management/commands/man_update.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import argparse 4 | import os.path 5 | import logging 6 | import datetime 7 | from pathlib import PurePath 8 | import subprocess 9 | 10 | import chardet 11 | import pyalpm 12 | 13 | from archmanweb.management.utils.finder import MANDIR, ManPagesFinder 14 | 15 | from django.core.management.base import BaseCommand 16 | import django 17 | from django.db import connection, transaction 18 | from django.db.models import Count 19 | from archmanweb.models import Package, Content, ManPage, SymbolicLink, UpdateLog, SoelimError 20 | 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | 25 | class UnknownManPath(Exception): 26 | pass 27 | 28 | 29 | def decode(text, *, encoding_hint=None): 30 | CHARSETS = ["utf-8", "ascii", "iso-8859-1", "iso-8859-9", "iso-8859-15", "cp1250", "cp1252"] 31 | if encoding_hint is not None: 32 | CHARSETS.insert(0, encoding_hint) 33 | 34 | for charset in CHARSETS: 35 | try: 36 | return text.decode(charset) 37 | except UnicodeDecodeError: 38 | pass 39 | except LookupError: 40 | # ignore invalid encoding_hint 41 | pass 42 | 43 | # fall back to chardet and errors="replace" 44 | encoding = chardet.detect(text)["encoding"] 45 | return text.decode(encoding, errors="replace") 46 | 47 | 48 | def parse_man_path(path): 49 | pp = PurePath(path) 50 | man_name = pp.stem 51 | man_section = pp.suffix[1:] # strip the dot 52 | 53 | if not man_section: 54 | raise UnknownManPath("empty section number") 55 | 56 | # relative_to can succeed only if path is a subdir of MANDIR 57 | if not path.startswith(MANDIR): 58 | raise UnknownManPath 59 | pp = pp.relative_to(MANDIR) 60 | 61 | if pp.parts[0].startswith("man"): 62 | man_lang = "en" 63 | elif len(pp.parts) > 1 and pp.parts[1].startswith("man"): 64 | man_lang = pp.parts[0] 65 | else: 66 | raise UnknownManPath 67 | return man_name, man_section, man_lang 68 | 69 | 70 | def update_packages(finder, *, force=False, only_repos=None): 71 | updated_pkgs = [] 72 | 73 | # update packages in the django database 74 | for db in finder.sync_db.get_syncdbs(): 75 | if only_repos and db.name not in only_repos: 76 | continue 77 | logger.info("Updating packages from repository '{}'...".format(db.name)) 78 | for pkg in db.pkgcache: 79 | try: 80 | db_package = Package.objects.get(repo=db.name, name=pkg.name) 81 | if pyalpm.vercmp(db_package.version, pkg.version) == -1: 82 | updated_pkgs.append(pkg) 83 | elif force is True: 84 | updated_pkgs.append(pkg) 85 | else: 86 | # skip void update of db_package 87 | continue 88 | except Package.DoesNotExist: 89 | db_package = Package() 90 | db_package.repo = db.name 91 | db_package.name = pkg.name 92 | db_package.arch = pkg.arch 93 | updated_pkgs.append(pkg) 94 | 95 | # update volatile fields (this is run iff the pkg was added to updated_pkgs) 96 | db_package.version = pkg.version 97 | db_package.description = pkg.desc 98 | db_package.url = pkg.url 99 | db_package.build_date = datetime.datetime.fromtimestamp(pkg.builddate, tz=datetime.timezone.utc) 100 | db_package.licenses = pkg.licenses 101 | db_package.save() 102 | 103 | # delete old packages from the django database 104 | for db_package in Package.objects.order_by("repo").order_by("name"): 105 | if not finder.pkg_exists(db_package.repo, db_package.name): 106 | Package.objects.filter(repo=db_package.repo, name=db_package.name).delete() 107 | 108 | return updated_pkgs 109 | 110 | 111 | def update_man_pages(finder, updated_pkgs): 112 | logger.info("Updating man pages from {} packages...".format(len(updated_pkgs))) 113 | updated_pages = 0 114 | 115 | for pkg in updated_pkgs: 116 | db_pkg = Package.objects.filter(repo=pkg.db.name, name=pkg.name)[0] 117 | files = set(finder.get_man_files(pkg)) 118 | if not files: 119 | continue 120 | 121 | # set of unique keys (tuples) of pages present in the package, 122 | # the rest will be deleted from the database 123 | keys = set() 124 | 125 | # insert/update man pages 126 | for t, v1, v2 in finder.get_man_contents(pkg): 127 | if t == "file": 128 | path, content = v1, v2 129 | # extract info from path, check if it makes sense 130 | try: 131 | man_name, man_section, man_lang = parse_man_path(path) 132 | except UnknownManPath: 133 | logger.warning("Skipping path with unrecognized structure: {}".format(path)) 134 | continue 135 | 136 | # extract the encoding hint (see e.g. evim.1.ru.KOI8-R) 137 | if "." in man_lang: 138 | man_lang, encoding_hint = man_lang.split(".", maxsplit=1) 139 | else: 140 | encoding_hint = None 141 | 142 | # decode the content 143 | content = decode(content, encoding_hint=encoding_hint) 144 | # django complains, the DBMS would drop it anyway 145 | content = content.replace("\0", "") 146 | 147 | if not content: 148 | logger.warning("Skipping empty man page: {}".format(path)) 149 | continue 150 | 151 | if (man_name, man_section, man_lang) in keys: 152 | logger.debug("Skipping duplicate man page (maybe duplicate encoding): {}".format(path)) 153 | continue 154 | keys.add( (man_name, man_section, man_lang) ) 155 | 156 | # find or create Content instance 157 | try: 158 | db_man = ManPage.objects.get(package_id=db_pkg.id, name=man_name, section=man_section, lang=man_lang) 159 | db_content = db_man.content 160 | except ManPage.DoesNotExist: 161 | db_man = None 162 | db_content = Content() 163 | 164 | # update content 165 | db_content.raw = content 166 | db_content.html = None 167 | db_content.txt = None 168 | db_content.save() 169 | 170 | # update newly-created ManPage instance 171 | if db_man is None: 172 | db_man = ManPage() 173 | db_man.package_id = db_pkg.id 174 | db_man.name = man_name 175 | db_man.section = man_section 176 | db_man.lang = man_lang 177 | db_man.content = db_content 178 | 179 | # db_man has to be saved after db_content, because django's 180 | # validation is not deferrable (and db_content.id is not 181 | # known until the content is saved) 182 | db_man.full_clean() 183 | # TODO: this might still fail if there are multiple foo.1 in different directories and same language 184 | db_man.save() 185 | 186 | updated_pages += 1 187 | 188 | elif t == "hardlink": 189 | # hardlinks can't point to non-existent files, so they can be stored in the ManPage table 190 | source, target = v1, v2 191 | 192 | # extract info from source, check if it makes sense 193 | try: 194 | source_name, source_section, source_lang = parse_man_path(source) 195 | except UnknownManPath: 196 | logger.warning("Skipping hardlink with unrecognized source path: {}".format(source)) 197 | continue 198 | 199 | # extract info from target, check if it makes sense 200 | try: 201 | target_name, target_section, target_lang = parse_man_path(target) 202 | except UnknownManPath: 203 | logger.warning("Skipping hardlink with unrecognized target path: {}".format(target)) 204 | continue 205 | 206 | # drop encoding from the lang (ru.KOI8-R) 207 | if "." in source_lang: 208 | source_lang, _ = source_lang.split(".", maxsplit=1) 209 | if "." in target_lang: 210 | target_lang, _ = target_lang.split(".", maxsplit=1) 211 | 212 | # drop useless redirects 213 | if target_lang == source_lang and target_section == source_section and target_name == source_name: 214 | logger.warning("Skipping hardlink from {} to {} (the base name is the same).".format(source, target)) 215 | continue 216 | 217 | if (source_name, source_section, source_lang) in keys: 218 | logger.debug("Skipping duplicate hardlink: {}".format(source)) 219 | continue 220 | keys.add( (source_name, source_section, source_lang) ) 221 | 222 | # save into database 223 | man_target = ManPage.objects.get(package_id=db_pkg.id, name=target_name, section=target_section, lang=target_lang) 224 | try: 225 | man_source = ManPage.objects.get(package_id=db_pkg.id, name=source_name, section=source_section, lang=source_lang) 226 | except ManPage.DoesNotExist: 227 | man_source = ManPage( 228 | package_id=db_pkg.id, 229 | name=source_name, 230 | section=source_section, 231 | lang=source_lang 232 | ) 233 | man_source.content_id = man_target.content_id 234 | 235 | # validate and save 236 | man_source.full_clean() 237 | man_source.save() 238 | 239 | updated_pages += 1 240 | 241 | elif t == "symlink": 242 | source, target = v1, v2 243 | 244 | # extract info from source, check if it makes sense 245 | try: 246 | source_name, source_section, source_lang = parse_man_path(source) 247 | except UnknownManPath: 248 | logger.warning("Skipping symlink with unrecognized structure: {}".format(source)) 249 | continue 250 | 251 | if target.startswith("/"): 252 | # make target relative to "/" 253 | target = target[1:] 254 | else: 255 | # make target full path 256 | ppt = PurePath(source).parent / target 257 | # normalize to remove any '..' 258 | target = os.path.normpath(ppt) 259 | 260 | # extract info from target, check if it makes sense 261 | try: 262 | target_name, target_section, target_lang = parse_man_path(target) 263 | except UnknownManPath: 264 | logger.warning("Skipping symlink with unknown target: {}".format(target)) 265 | continue 266 | 267 | # drop encoding from the lang (ru.KOI8-R) 268 | if "." in source_lang: 269 | source_lang, _ = source_lang.split(".", maxsplit=1) 270 | if "." in target_lang: 271 | target_lang, _ = target_lang.split(".", maxsplit=1) 272 | 273 | # drop cross-language symlinks 274 | if target_lang != source_lang: 275 | logger.warning("Skipping cross-language symlink from {} to {}".format(source, target)) 276 | continue 277 | 278 | # drop useless redirects 279 | if target_section == source_section and target_name == source_name: 280 | logger.warning("Skipping symlink from {} to {} (the base name is the same).".format(source, target)) 281 | continue 282 | 283 | # save into database 284 | try: 285 | db_link = SymbolicLink.objects.get(package_id=db_pkg.id, lang=source_lang, from_section=source_section, from_name=source_name) 286 | except SymbolicLink.DoesNotExist: 287 | db_link = SymbolicLink( 288 | package_id=db_pkg.id, 289 | lang=source_lang, 290 | from_section=source_section, 291 | from_name=source_name, 292 | ) 293 | db_link.to_section = target_section 294 | db_link.to_name = target_name 295 | 296 | # validate and save 297 | db_link.full_clean() 298 | db_link.save() 299 | 300 | else: 301 | raise NotImplementedError("Unknown tarball entry type: {}".format(t)) 302 | 303 | # delete man pages whose files no longer exist 304 | for db_man in ManPage.objects.filter(package_id=db_pkg.id): 305 | if (db_man.name, db_man.section, db_man.lang) not in keys: 306 | ManPage.objects.filter(package_id=db_pkg.id, name=db_man.name, section=db_man.section, lang=db_man.lang).delete() 307 | 308 | # delete unreferenced rows from Content 309 | unreferenced = Content.objects.filter(manpage_content__isnull=True).delete() 310 | 311 | return updated_pages 312 | 313 | 314 | class Command(BaseCommand): 315 | help = "Update man pages in the Django database" 316 | 317 | def __init__(self, *args, **kwargs): 318 | BaseCommand.__init__(self, *args, **kwargs) 319 | 320 | # TODO: use Django settings to configure the logger 321 | # https://docs.djangoproject.com/en/3.1/topics/logging/ 322 | logger = logging.getLogger() 323 | logger.setLevel(logging.INFO) 324 | handler = logging.StreamHandler() 325 | formatter = logging.Formatter("{levelname:8} {message}", style="{") 326 | handler.setFormatter(formatter) 327 | logger.addHandler(handler) 328 | 329 | def add_arguments(self, parser): 330 | """ 331 | :param parser: an instance of :py:class:`argparse.ArgumentParser` 332 | """ 333 | parser.add_argument("--force", action="store_true", 334 | help="force an import of man pages from all packages, even if they were not updated recently") 335 | parser.add_argument("--only-repos", action="store", nargs="+", metavar="NAME", 336 | help="import packages (and man pages) only from these repositories") 337 | parser.add_argument("--only-packages", action="store", nargs="+", metavar="NAME", 338 | help="import man pages only from these packages") 339 | parser.add_argument("--cache-dir", action="store", default="./.cache/", 340 | help="path to the cache directory (default: %(default)s)") 341 | parser.add_argument("--keep-tarballs", action="store_true", 342 | help="keep downloaded package tarballs in the cache directory") 343 | parser.add_argument("--workers", type=int, default=0, 344 | help="number of workers for parallel processing (0 = use 1 worker per CPU core; default: %(default)s)") 345 | 346 | def handle(self, **kwargs): 347 | start = datetime.datetime.now(tz=datetime.timezone.utc) 348 | updated_pkgs, count_updated_pages = self.do_update(**kwargs) 349 | end = datetime.datetime.now(tz=datetime.timezone.utc) 350 | 351 | # log update 352 | log = UpdateLog() 353 | log.timestamp = start 354 | log.duration = end - start 355 | log.updated_pkgs = len(updated_pkgs) 356 | log.updated_pages = count_updated_pages 357 | log.stats_count_man_pages = ManPage.objects.count() 358 | log.stats_count_symlinks = SymbolicLink.objects.count() 359 | log.stats_count_all_pkgs = Package.objects.count() 360 | log.stats_count_pkgs_with_mans = ManPage.objects.aggregate(Count("package_id", distinct=True))["package_id__count"] 361 | log.save() 362 | 363 | def do_update(self, *, cache_dir, workers, 364 | force=False, 365 | only_repos=None, 366 | only_packages=None, 367 | keep_tarballs=False, 368 | **kwargs): 369 | finder = ManPagesFinder(cache_dir) 370 | finder.refresh() 371 | 372 | # everything in a single transaction 373 | with transaction.atomic(): 374 | updated_pkgs = update_packages(finder, force=force, only_repos=only_repos) 375 | if only_packages is None: 376 | count_updated_pages = update_man_pages(finder, updated_pkgs) 377 | else: 378 | count_updated_pages = update_man_pages(finder, [p for p in updated_pkgs if p.name in only_packages]) 379 | 380 | # this is called outside of the transaction, so that the cache can be reused on errors 381 | if keep_tarballs is False: 382 | finder.clear_pkgcache() 383 | 384 | # convert manual pages to plain-text 385 | # (one transaction per update, otherwise we might hit memory allocation error) 386 | def worker(man_id): 387 | man = ManPage.objects.get(id=man_id) 388 | try: 389 | man.get_converted("txt") 390 | except SoelimError as e: 391 | logger.error("SoelimError ({}) while converting {}.{}.{} to txt".format(str(e), man.name, man.section, man.lang)) 392 | except subprocess.CalledProcessError as e: 393 | logger.error("CalledProcessError while converting {}.{}.{} to txt:\nreturncode = {}\nstderr = {}" 394 | .format(man.name, man.section, man.lang, e.returncode, e.stderr)) 395 | 396 | # prepare man page IDs which need to be converted 397 | # (queryset needs to be a list for multiprocessing to work) 398 | queryset = ManPage.objects.only("package", "lang", "content_id", "converted_content_id").filter(content__txt=None).values_list("id", flat=True) 399 | queryset = list(queryset) 400 | 401 | # all existing database connections have to be closed before forking, 402 | # each process will then recreate its own connection: 403 | # https://stackoverflow.com/a/10684672 404 | django.db.connections.close_all() 405 | 406 | # parallel processing of the queryset 407 | import concurrent.futures 408 | # FIXME: Why the fuck does it deadlock here, after we moved the code into the Command class? 409 | # Database connections are closed just above, which used to work before... 410 | #with concurrent.futures.ProcessPoolExecutor(max_workers=workers or None) as executor: 411 | with concurrent.futures.ThreadPoolExecutor(max_workers=workers or None) as executor: 412 | executor.map(worker, queryset) 413 | 414 | # VACUUM cannot run inside a transaction block 415 | if updated_pkgs or only_packages is not None: 416 | logger.info("Running VACUUM FULL ANALYZE on our tables...") 417 | for Model in [Package, Content, ManPage, SymbolicLink]: 418 | table = Model.objects.model._meta.db_table 419 | logger.info("--> {}".format(table)) 420 | with connection.cursor() as cursor: 421 | cursor.execute("VACUUM FULL ANALYZE {};".format(table)) 422 | 423 | return updated_pkgs, count_updated_pages 424 | -------------------------------------------------------------------------------- /archmanweb/management/utils/finder.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import os 4 | from pathlib import Path 5 | import shutil 6 | import datetime 7 | import logging 8 | import gzip 9 | 10 | import requests 11 | import pycman 12 | import pyalpm 13 | import xtarfile as tarfile # wrapper around tarfile - needed for zst packages 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | PACCONF = """ 18 | [options] 19 | RootDir = / 20 | DBPath = {pacdbpath} 21 | CacheDir = {cachedir} 22 | LogFile = {pacdbpath} 23 | # Use system GPGDir so that we don't have to populate it 24 | GPGDir = /etc/pacman.d/gnupg/ 25 | Architecture = {arch} 26 | 27 | [core] 28 | Include = /etc/pacman.d/mirrorlist 29 | 30 | [extra] 31 | Include = /etc/pacman.d/mirrorlist 32 | 33 | [community] 34 | Include = /etc/pacman.d/mirrorlist 35 | 36 | [multilib] 37 | Include = /etc/pacman.d/mirrorlist 38 | """ 39 | 40 | MANDIR = "usr/share/man/" 41 | 42 | class ManPagesFinder: 43 | def __init__(self, topdir): 44 | topdir = Path(topdir).resolve() 45 | self.dbpath = topdir / "pacdbpath" 46 | self.cachedir = topdir / "cached_packages" 47 | 48 | os.makedirs(self.dbpath, exist_ok=True) 49 | os.makedirs(self.cachedir, exist_ok=True) 50 | 51 | self.sync_db = self.init_sync_db(PACCONF, arch="x86_64") 52 | self.files_db = {} 53 | self.init_files_db(self.sync_db) 54 | self._cached_tarfiles = {} 55 | 56 | def init_sync_db(self, config, arch): 57 | confpath = self.dbpath / "pacman.conf" 58 | f = open(confpath, "w") 59 | f.write(config.format(pacdbpath=self.dbpath, 60 | cachedir=self.cachedir, 61 | arch=arch)) 62 | f.close() 63 | return pycman.config.init_with_config(confpath) 64 | 65 | def init_files_db(self, pacdb): 66 | dbpath = self.dbpath / "files" 67 | os.makedirs(dbpath, exist_ok=True) 68 | for db in pacdb.get_syncdbs(): 69 | files_db = dbpath / "{}.files".format(db.name) 70 | if files_db.exists(): 71 | local_timestamp = os.path.getmtime(files_db) 72 | else: 73 | local_timestamp = 0 74 | self.files_db.setdefault(db.name, { 75 | "path": files_db, 76 | "timestamp": local_timestamp, 77 | }) 78 | 79 | # TODO: working with the files databases is not implemented in pyalpm: https://gitlab.archlinux.org/archlinux/pyalpm/-/issues/6 80 | # TODO: check integrity of the downloaded files 81 | def _refresh_files_db(self, db): 82 | for server in db.servers: 83 | for ext in [".tar.gz", ".tar.xz"]: 84 | url = server + "/" + db.name + ".files" + ext 85 | r = requests.head(url) 86 | if r.status_code != 200: 87 | continue 88 | 89 | # parse remote timestamp 90 | remote_timestamp = r.headers["last-modified"] 91 | remote_timestamp = datetime.datetime.strptime(remote_timestamp, '%a, %d %b %Y %X GMT') 92 | remote_timestamp = remote_timestamp.replace(tzinfo=datetime.timezone.utc).timestamp() 93 | 94 | # get local things 95 | local_db = self.files_db[db.name] 96 | local_timestamp = local_db["timestamp"] 97 | _path = Path(local_db["path"]).parent / (db.name + ".files" + ext) 98 | 99 | # check if we need to update 100 | if remote_timestamp > local_timestamp: 101 | r = requests.get(url, stream=True) 102 | with open(_path, "wb") as f: 103 | for chunk in r.iter_content(chunk_size=4096): 104 | f.write(chunk) 105 | 106 | # update timestamp 107 | local_db["timestamp"] = remote_timestamp 108 | 109 | # drop from cache 110 | if local_db["path"] in self._cached_tarfiles: 111 | del self._cached_tarfiles[local_db["path"]] 112 | 113 | # create or update the symlink 114 | if Path(local_db["path"]).is_symlink(): 115 | os.remove(local_db["path"]) 116 | os.symlink(db.name + ".files" + ext, local_db["path"]) 117 | 118 | # return on success 119 | return 120 | 121 | raise Exception("Failed to sync files database for '{}'.".format(db.name)) 122 | 123 | # sync databases like pacman -Sy + -Fs 124 | def _refresh_sync_db(self, pacdb, force=False): 125 | for db in pacdb.get_syncdbs(): 126 | # since this is private pacman database, there is no locking 127 | db.update(force) 128 | 129 | # update files database 130 | self._refresh_files_db(db) 131 | 132 | # sync all 133 | def refresh(self): 134 | try: 135 | logger.info("Syncing pacman database (x86_64)...") 136 | self._refresh_sync_db(self.sync_db) 137 | except pyalpm.error: 138 | logger.exception("Failed to sync pacman database.") 139 | raise 140 | 141 | def clear_pkgcache(self): 142 | # TODO: we should call pyalpm to do the equivalent of "pacman -Scc", but it's not implemented there 143 | shutil.rmtree(self.cachedir) 144 | 145 | def get_man_files(self, pkg, repo=None): 146 | if repo is None: 147 | repo = [db for db in self.sync_db.get_syncdbs() if db.get_pkg(pkg.name)][0].name 148 | local_db = self.files_db[repo]["path"] 149 | t = self._cached_tarfiles.setdefault(local_db, tarfile.open(str(local_db.resolve()), "r")) 150 | files = t.extractfile("{}-{}/files".format(pkg.name, pkg.version)) 151 | 152 | for line in files.readlines(): 153 | line = line.decode("utf-8").rstrip() 154 | if line.startswith(MANDIR) and not line.endswith("/"): 155 | yield line 156 | 157 | def get_all_man_files(self): 158 | for db in self.sync_db.get_syncdbs(): 159 | for pkg in db.pkgcache: 160 | yield pkg, list(self.get_man_files(pkg, db.name)) 161 | 162 | def _download_package(self, pkg): 163 | class Options: 164 | downloadonly = True 165 | nodeps = True 166 | t = pycman.transaction.init_from_options(self.sync_db, Options) 167 | 168 | # reset callback functions which print lots of text into the logs 169 | def _void_cb(*args): 170 | pass 171 | self.sync_db.dlcb = _void_cb 172 | self.sync_db.eventcb = _void_cb 173 | self.sync_db.questioncb = _void_cb 174 | self.sync_db.progresscb = _void_cb 175 | 176 | t.add_pkg(pkg) 177 | if not pycman.transaction.finalize(t): 178 | raise Exception("Pycman transaction failed: {}".format(t)) 179 | 180 | def get_man_contents(self, pkg): 181 | """ 182 | Note: the content is yielded as `bytes`, its decoding is not a priori known 183 | """ 184 | # first check if there are any man files at all to avoid useless downloads 185 | man_files = list(self.get_man_files(pkg)) 186 | if not man_files: 187 | return 188 | 189 | # get the pkg tarball 190 | _pattern = "{}-{}-{}.pkg.tar.*".format(pkg.name, pkg.version, pkg.arch) 191 | if not list(f for f in self.cachedir.glob(_pattern) if not str(f).endswith(".part")): 192 | self._download_package(pkg) 193 | tarballs = sorted(f for f in self.cachedir.glob(_pattern) if not str(f).endswith(".part")) 194 | assert len(tarballs) > 0, _pattern 195 | tarball = tarballs[0] 196 | 197 | # extract man files 198 | with tarfile.open(str(tarball), "r") as t: 199 | hardlinks = [] 200 | for file in man_files: 201 | info = t.getmember(file) 202 | # Hardlinks on the filesystem level are indifferentiable from normal files, 203 | # but in tar the first file added is "file" and the subsequent are hardlinks. 204 | # To make sure that normal files are processed first, we postpone yielding of 205 | # the hardlinks. 206 | if info.islnk(): 207 | if file.endswith(".gz"): 208 | file = file[:-3] 209 | target = info.linkname 210 | if target.endswith(".gz"): 211 | target = target[:-3] 212 | hardlinks.append( ("hardlink", file, target) ) 213 | elif info.issym(): 214 | if file.endswith(".gz"): 215 | file = file[:-3] 216 | target = info.linkname 217 | if target.endswith(".gz"): 218 | target = target[:-3] 219 | yield "symlink", file, target 220 | else: 221 | man = t.extractfile(file).read() 222 | if file.endswith(".gz"): 223 | file = file[:-3] 224 | man = gzip.decompress(man) 225 | yield "file", file, man 226 | yield from hardlinks 227 | 228 | def get_all_man_contents(self): 229 | for db in self.sync_db.get_syncdbs(): 230 | for pkg in db.pkgcache: 231 | for v1, v2, v3 in self.get_man_contents(pkg): 232 | yield pkg, v1, v2, v3 233 | 234 | def pkg_exists(self, repo, pkgname): 235 | db = [db for db in self.sync_db.get_syncdbs() if db.name == repo][0] 236 | if db.get_pkg(pkgname) is not None: 237 | return True 238 | return False 239 | -------------------------------------------------------------------------------- /archmanweb/migrations/0001_initial.py: -------------------------------------------------------------------------------- 1 | # Generated by Django 3.1.2 on 2020-10-25 10:26 2 | 3 | import archmanweb.models 4 | import django.contrib.postgres.fields 5 | import django.contrib.postgres.indexes 6 | from django.db import migrations, models 7 | import django.db.models.deletion 8 | 9 | 10 | class Migration(migrations.Migration): 11 | 12 | initial = True 13 | 14 | dependencies = [ 15 | ] 16 | 17 | operations = [ 18 | migrations.CreateModel( 19 | name='Content', 20 | fields=[ 21 | ('id', models.AutoField(primary_key=True, serialize=False)), 22 | ('raw', models.TextField()), 23 | ('html', models.TextField(blank=True, null=True)), 24 | ('txt', models.TextField(blank=True, null=True)), 25 | ('description', models.TextField(blank=True, null=True)), 26 | ], 27 | ), 28 | migrations.CreateModel( 29 | name='ManPage', 30 | fields=[ 31 | ('id', models.AutoField(primary_key=True, serialize=False)), 32 | ('name', models.TextField()), 33 | ('section', models.TextField()), 34 | ('lang', models.TextField(default='en')), 35 | ], 36 | ), 37 | migrations.CreateModel( 38 | name='Package', 39 | fields=[ 40 | ('id', models.AutoField(primary_key=True, serialize=False)), 41 | ('repo', models.TextField()), 42 | ('name', models.TextField()), 43 | ('version', models.TextField()), 44 | ('arch', models.TextField()), 45 | ('description', models.TextField()), 46 | ('url', models.TextField(null=True)), 47 | ('build_date', models.DateTimeField()), 48 | ('licenses', django.contrib.postgres.fields.ArrayField(base_field=models.TextField(), size=None)), 49 | ], 50 | ), 51 | migrations.CreateModel( 52 | name='UpdateLog', 53 | fields=[ 54 | ('id', models.AutoField(primary_key=True, serialize=False)), 55 | ('timestamp', models.DateTimeField()), 56 | ('duration', models.DurationField()), 57 | ('updated_pkgs', models.IntegerField()), 58 | ('updated_pages', models.IntegerField()), 59 | ('stats_count_man_pages', models.IntegerField()), 60 | ('stats_count_symlinks', models.IntegerField()), 61 | ('stats_count_all_pkgs', models.IntegerField()), 62 | ('stats_count_pkgs_with_mans', models.IntegerField()), 63 | ('convert_txt_returncode', models.IntegerField(null=True)), 64 | ], 65 | ), 66 | migrations.CreateModel( 67 | name='SymbolicLink', 68 | fields=[ 69 | ('id', models.AutoField(primary_key=True, serialize=False)), 70 | ('lang', models.TextField(default='en')), 71 | ('from_section', models.TextField()), 72 | ('from_name', models.TextField()), 73 | ('to_section', models.TextField()), 74 | ('to_name', models.TextField()), 75 | ('package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='archmanweb.package')), 76 | ], 77 | ), 78 | migrations.AddIndex( 79 | model_name='package', 80 | index=django.contrib.postgres.indexes.GinIndex(fields=['name'], name='package_name', opclasses=['gin_trgm_ops']), 81 | ), 82 | migrations.AddIndex( 83 | model_name='package', 84 | index=archmanweb.models.SearchVectorIndex(fields=['description'], name='package_description_search'), 85 | ), 86 | migrations.AlterUniqueTogether( 87 | name='package', 88 | unique_together={('name', 'repo')}, 89 | ), 90 | migrations.AddField( 91 | model_name='manpage', 92 | name='content', 93 | field=models.ForeignKey(on_delete=django.db.models.deletion.DO_NOTHING, related_name='manpage_content', to='archmanweb.content'), 94 | ), 95 | migrations.AddField( 96 | model_name='manpage', 97 | name='converted_content', 98 | field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='manpage_converted_content', to='archmanweb.content'), 99 | ), 100 | migrations.AddField( 101 | model_name='manpage', 102 | name='package', 103 | field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='archmanweb.package'), 104 | ), 105 | migrations.AddIndex( 106 | model_name='content', 107 | index=archmanweb.models.SearchVectorIndex(fields=['description'], name='content_description_search'), 108 | ), 109 | migrations.AddIndex( 110 | model_name='symboliclink', 111 | index=django.contrib.postgres.indexes.GinIndex(fields=['from_name'], name='symboliclink_from_name', opclasses=['gin_trgm_ops']), 112 | ), 113 | migrations.AlterUniqueTogether( 114 | name='symboliclink', 115 | unique_together={('package', 'lang', 'from_section', 'from_name')}, 116 | ), 117 | migrations.AlterIndexTogether( 118 | name='symboliclink', 119 | index_together={('from_name', 'lang'), ('from_section', 'from_name'), ('from_section', 'from_name', 'lang')}, 120 | ), 121 | migrations.AddIndex( 122 | model_name='manpage', 123 | index=django.contrib.postgres.indexes.GinIndex(fields=['name'], name='manpage_name', opclasses=['gin_trgm_ops']), 124 | ), 125 | migrations.AlterUniqueTogether( 126 | name='manpage', 127 | unique_together={('package', 'section', 'name', 'lang')}, 128 | ), 129 | migrations.AlterIndexTogether( 130 | name='manpage', 131 | index_together={('section', 'name', 'lang'), ('name', 'lang', 'section'), ('section', 'name'), ('name', 'lang'), ('lang', 'name', 'section')}, 132 | ), 133 | ] 134 | -------------------------------------------------------------------------------- /archmanweb/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/archlinux/archmanweb/5fcd985314a8f07d151a46ace71ce152e67450a6/archmanweb/migrations/__init__.py -------------------------------------------------------------------------------- /archmanweb/models.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pathlib import PurePath 3 | 4 | from django.db import models 5 | from django.contrib.postgres.indexes import GinIndex 6 | from django.contrib.postgres.fields import ArrayField 7 | from django.core.exceptions import ValidationError 8 | 9 | from .utils import reverse_man_url, mandoc_convert, postprocess, extract_description 10 | 11 | # django does not support functional indexes (indexes on expressions) out of the box, 12 | # otherwise we could use just this: 13 | # from django.contrib.postgres.search import SearchVector 14 | # GinIndex(fields=[SearchVector("description", config="english")]) 15 | # see https://code.djangoproject.com/ticket/26167 16 | # this is a hack inspired by this blog post: 17 | # https://vxlabs.com/2018/01/31/creating-a-django-migration-for-a-gist-gin-index-with-a-special-index-operator/ 18 | class SearchVectorIndex(GinIndex): 19 | def __init__(self, config="english", *args, **kwargs): 20 | self.config = config 21 | super().__init__(*args, **kwargs) 22 | 23 | def create_sql(self, model, schema_editor, **kwargs): 24 | statement = super().create_sql(model, schema_editor, **kwargs) 25 | # this works only for one column, otherwise we get a list inside to_tsvector 26 | # note: coalesce is used because Django uses it even for SearchVector on one column 27 | statement.template = "CREATE INDEX %(name)s ON %(table)s%(using)s (to_tsvector('" + self.config + "'::regconfig, COALESCE(%(columns)s, '')))%(extra)s" 28 | return statement 29 | 30 | class Package(models.Model): 31 | id = models.AutoField(primary_key=True) 32 | repo = models.TextField() 33 | name = models.TextField() 34 | version = models.TextField() 35 | arch = models.TextField() 36 | 37 | # non-essential attributes (useful for search etc.) 38 | description = models.TextField() 39 | url = models.TextField(null=True) # nullable in pacman 40 | build_date = models.DateTimeField() 41 | licenses = ArrayField(models.TextField()) 42 | 43 | class Meta: 44 | unique_together = ( 45 | ('name', 'repo'), 46 | ) 47 | indexes = ( 48 | GinIndex(name="package_name", fields=["name"], opclasses=["gin_trgm_ops"]), 49 | SearchVectorIndex(name="package_description_search", fields=["description"], config="english"), 50 | ) 51 | 52 | def __str__(self): 53 | return "".format(self.arch, self.repo, self.name, self.version) 54 | 55 | 56 | class SoelimError(Exception): 57 | pass 58 | 59 | 60 | class Content(models.Model): 61 | id = models.AutoField(primary_key=True) 62 | 63 | # raw content of the man page 64 | raw = models.TextField() 65 | 66 | # cached HTML version of the manual 67 | # (only the , not the whole page served to users) 68 | html = models.TextField(blank=True, null=True) 69 | 70 | # plain-text version of the content - should be always present to make full-text search possible 71 | txt = models.TextField(blank=True, null=True) 72 | 73 | # short plain-text description for full-text ("apropos-like") search 74 | description = models.TextField(blank=True, null=True) 75 | 76 | class Meta: 77 | indexes = ( 78 | SearchVectorIndex(name="content_description_search", fields=["description"], config="english"), 79 | ) 80 | 81 | 82 | class ManPage(models.Model): 83 | # would be created automatically anyway 84 | id = models.AutoField(primary_key=True) 85 | 86 | # package containing the man page 87 | # NOTE: django emulates ON DELETE, it is not added to the SQL 88 | package = models.ForeignKey(Package, on_delete=models.CASCADE) 89 | 90 | # man page name 91 | name = models.TextField() 92 | 93 | # section number (remember that there are multi-character sections like 3p, 3am, 3perl, ...) 94 | section = models.TextField() 95 | 96 | # language tag 97 | lang = models.TextField(default="en") 98 | 99 | # the original content of this manual 100 | content = models.ForeignKey(Content, on_delete=models.DO_NOTHING, related_name="manpage_content") 101 | 102 | # shortcut for "hardlinks" due to the .so macro 103 | # (this significantly reduces storage due to avoiding duplicate HTML and txt) 104 | converted_content = models.ForeignKey(Content, on_delete=models.SET_NULL, blank=True, null=True, related_name="manpage_converted_content") 105 | 106 | class Meta: 107 | unique_together = ( 108 | ('package', 'section', 'name', 'lang'), 109 | ) 110 | index_together = ( 111 | # we need all orders for the listings' ordering 112 | ('name', 'lang', 'section'), 113 | ('section', 'name', 'lang'), 114 | ('lang', 'name', 'section'), 115 | # for optional 'language' and for filter in 'links to other sections' 116 | ('section', 'name'), 117 | # for optional 'section' and for filter in 'links to other sections' 118 | ('name', 'lang'), 119 | ) 120 | indexes = [GinIndex(name="manpage_name", fields=["name"], opclasses=["gin_trgm_ops"])] 121 | 122 | def clean(self): 123 | if not self.name: 124 | raise ValidationError("Man name cannot be empty.") 125 | if not self.section: 126 | raise ValidationError("Man section cannot be empty.") 127 | if "." in self.section: 128 | raise ValidationError("Man section cannot contain dots.") 129 | if "." in self.lang: 130 | raise ValidationError("Language tag cannot contain dots.") 131 | 132 | # this should always be used instead of `self.content.` to load only 133 | # the specified field (django does not support auto-defer fields) 134 | def get_content(self, format, from_converted=None): 135 | assert hasattr(Content, format) 136 | if from_converted is None: 137 | from_converted = format != "raw" 138 | if from_converted is True: 139 | content_id = self.converted_content_id 140 | else: 141 | content_id = self.content_id 142 | return Content.objects.values_list(format, flat=True).get(id=content_id) 143 | 144 | def set_content(self, format, text): 145 | assert hasattr(Content, format) 146 | assert format != "raw", "the raw content should not be set with set_content" 147 | Content.objects.filter(id=self.converted_content_id).update(**{format: text}) 148 | 149 | def resolve_so_link(self): 150 | """ 151 | Detects if the manual is nothing but a "hardlink" to some different page 152 | using the .so macro. 153 | 154 | Effects: 155 | - updates self.converted_content_id as necessary 156 | - raises SoelimError if there is a .so macro which could not be resolved 157 | """ 158 | if self.converted_content_id is None: 159 | self.converted_content_id = self.content_id 160 | else: 161 | return 162 | 163 | # strip comments, whitespace etc. 164 | stripped = re.sub(r'^\.\\".*', "", self.get_content("raw"), flags=re.MULTILINE) 165 | stripped = stripped.strip() 166 | 167 | # eliminate the '.so' macro 168 | if re.fullmatch(r"^\.so [A-Za-z0-9@._+\-:\[\]\/]+\s*$", stripped): 169 | path = stripped.split()[1] 170 | if path.endswith('.gz'): 171 | path = path[:-3] 172 | pp = PurePath(path) 173 | target_name = pp.stem 174 | target_section = pp.suffix[1:] # strip the dot 175 | 176 | # There are actually packages redirecting their manuals to other packages, 177 | # e.g. shorewall6 -> shorewall. The attribution info provided on the page 178 | # isn't entirely correct, but that's what the authors intended... 179 | query = ManPage.objects.filter(section=target_section, name=target_name, lang=self.lang).values("content_id", "package_id")[:2] 180 | query = list(query) 181 | 182 | if len(query) == 0: 183 | raise SoelimError("unknown target page: {}".format(stripped.split()[1])) 184 | elif len(query) == 1: 185 | self.converted_content_id = query[0]["content_id"] 186 | else: 187 | # if the query is ambiguous, the only thing we can try is to check package_id 188 | try: 189 | cid = ManPage.objects.values_list("content_id", flat=True) \ 190 | .get(section=target_section, name=target_name, lang=self.lang, package_id=self.package_id) 191 | except ManPage.DoesNotExist: 192 | raise SoelimError("ambiguous target page: {}".format(stripped.split()[1])) 193 | self.converted_content_id = cid 194 | 195 | # save changes to converted_content_id 196 | self.save() 197 | 198 | def get_preprocessed_content(self, *, visited_ids=None, level=0): 199 | """ 200 | Performs a recursive elimination of the .so macro and returns the final 201 | content. 202 | 203 | Effects: 204 | - calls self.resolve_so_link() 205 | - raises SoelimError if there is a .so macro pointing to an unknown 206 | page, there is an inclusion cycle or the recursion depth limit 207 | has been exceeded 208 | """ 209 | if visited_ids is None: 210 | visited_ids = {self.id} 211 | else: 212 | if self.id in visited_ids: 213 | raise SoelimError("inclusion cycle detected") 214 | elif level > 100: 215 | raise SoelimError("recursion depth exceeded") 216 | 217 | # resolve "hardlinks" using the .so macro 218 | self.resolve_so_link() 219 | 220 | # always take from converted, even "hardlinks" may be included in other pages 221 | content = self.get_content("raw", from_converted=True) 222 | 223 | def repl(match): 224 | target = match.group("target") 225 | pp = PurePath(target) 226 | target_name = pp.stem 227 | target_section = pp.suffix[1:] # strip the dot 228 | 229 | # mandoc uses this fallback for invalid references 230 | fallback = "See the file {}.".format(target) 231 | 232 | # There are actually packages redirecting their manuals to other packages, 233 | # e.g. shorewall6 -> shorewall. The attribution info provided on the page 234 | # isn't entirely correct, but that's what the authors intended... 235 | mans_count = ManPage.objects.filter(section=target_section, name=target_name, lang=self.lang).count() 236 | 237 | if mans_count == 0: 238 | return fallback 239 | elif mans_count == 1: 240 | man = ManPage.objects.get(section=target_section, name=target_name, lang=self.lang) 241 | else: 242 | # if the query is ambiguous, the only thing we can try is to check package_id 243 | try: 244 | man = ManPage.objects.get(section=target_section, name=target_name, lang=self.lang, package_id=self.package_id) 245 | except ManPage.DoesNotExist: 246 | return fallback 247 | 248 | return man.get_preprocessed_content(visited_ids=visited_ids | {self.id}, level=level + 1) 249 | 250 | # resolve the remaining .so file inclusions, apply mandoc-style fallback 251 | content = re.sub(r"^\.so (?P[A-Za-z0-9@._+\-:\[\]\/]+)\s*$", repl, content, flags=re.MULTILINE) 252 | return content 253 | 254 | def get_converted(self, output_type): 255 | assert output_type in {"html", "txt"} 256 | 257 | self.resolve_so_link() 258 | 259 | # convert the man page to HTML/txt if not already done 260 | content = self.get_content(output_type) 261 | if content is None: 262 | content = self.get_preprocessed_content() 263 | content = mandoc_convert(content, output_type, self.lang) 264 | content = postprocess(content, output_type, self.lang) 265 | self.set_content(output_type, content) 266 | 267 | if output_type == "txt": 268 | # update plain-text description 269 | description = extract_description(content, self.lang) 270 | Content.objects.filter(id=self.converted_content_id).update(description=description) 271 | 272 | return content 273 | 274 | 275 | class SymbolicLink(models.Model): 276 | # would be created automatically anyway 277 | id = models.AutoField(primary_key=True) 278 | 279 | # package containing the symlink 280 | # NOTE: django emulates ON DELETE, it is not added to the SQL 281 | package = models.ForeignKey(Package, on_delete=models.CASCADE) 282 | 283 | # language tag (same for the source and target) 284 | lang = models.TextField(default="en") 285 | 286 | # source section number 287 | from_section = models.TextField() 288 | 289 | # source man page name 290 | from_name = models.TextField() 291 | 292 | # target section number 293 | to_section = models.TextField() 294 | 295 | # target man page name 296 | to_name = models.TextField() 297 | 298 | class Meta: 299 | unique_together = ( 300 | ('package', 'lang', 'from_section', 'from_name'), 301 | ) 302 | index_together = ( 303 | # for checks in _parse_man_name_section_lang 304 | ('from_section', 'from_name'), 305 | ('from_section', 'from_name', 'lang'), 306 | # for checks in try_symlink_or_404 307 | ('from_name', 'lang'), 308 | ) 309 | indexes = [GinIndex(name="symboliclink_from_name", fields=["from_name"], opclasses=["gin_trgm_ops"])] 310 | 311 | def __str__(self): 312 | return "" \ 313 | .format(self.package, self.lang, self.from_section, self.from_name, self.to_section, self.to_name) 314 | 315 | def clean(self): 316 | # either the section or name must be different 317 | if self.from_section == self.to_section and self.from_name == self.to_name: 318 | raise ValidationError("Symbolic link cannot be to the same name and section.") 319 | if "." in self.lang: 320 | raise ValidationError("Language tag cannot contain dots.") 321 | 322 | class UpdateLog(models.Model): 323 | id = models.AutoField(primary_key=True) 324 | 325 | timestamp = models.DateTimeField() 326 | duration = models.DurationField() 327 | updated_pkgs = models.IntegerField() 328 | updated_pages = models.IntegerField() 329 | 330 | # record also history of statistics after each update 331 | stats_count_man_pages = models.IntegerField() 332 | stats_count_symlinks = models.IntegerField() 333 | stats_count_all_pkgs = models.IntegerField() 334 | stats_count_pkgs_with_mans = models.IntegerField() 335 | 336 | # return code of the convert_txt program 337 | convert_txt_returncode = models.IntegerField(null=True) 338 | 339 | class HtmlTableConfig: 340 | columns = ( 341 | "timestamp", 342 | "duration", 343 | "updated_pkgs", 344 | "updated_pages", 345 | ) 346 | descriptions = ( 347 | "Time (UTC)", 348 | "Duration", 349 | "Updated packages", 350 | "Updated man pages", 351 | ) 352 | -------------------------------------------------------------------------------- /archmanweb/static/archmanweb/base.css: -------------------------------------------------------------------------------- 1 | #archnavbar form { 2 | display: inline-block !important; 3 | font-size: 14px !important; 4 | line-height: 14px !important; 5 | padding: 14px 15px 0px !important; 6 | } 7 | #archnavbar input { 8 | border: none; 9 | height: 17px; 10 | line-height: 14px !important; 11 | } 12 | 13 | /* simple reset */ 14 | * { 15 | margin: 0; 16 | padding: 0; 17 | line-height: 1.4; 18 | } 19 | 20 | /* general styling */ 21 | body { 22 | /* background: #f6f9fc; */ 23 | color: #222; 24 | font: normal 100% sans-serif; 25 | /* set min-width to prevent over-shrinking of the body when 26 | * a flex-container child refuses to shrink */ 27 | min-width: min-content; 28 | } 29 | 30 | p { 31 | margin: .33em 0 1em; 32 | } 33 | 34 | ol, 35 | ul { 36 | margin-bottom: 1em; 37 | padding-left: 2em; 38 | } 39 | 40 | ul { 41 | list-style: square; 42 | } 43 | 44 | code { 45 | font-family: monospace, monospace; 46 | background: #ffd; 47 | padding: 0.15em 0.25em; 48 | } 49 | 50 | pre { 51 | font-family: monospace, monospace; 52 | border: 1px solid #bdb; 53 | background: #dfd; 54 | padding: 0.5em; 55 | margin: 1em; 56 | overflow: auto; 57 | white-space: pre-wrap; 58 | } 59 | 60 | pre code { 61 | display: block; 62 | background: none; 63 | overflow: auto; 64 | white-space: pre-wrap; 65 | } 66 | 67 | nav { 68 | margin-bottom: 1em; 69 | } 70 | 71 | /* forms and input styling */ 72 | form p { 73 | margin: 0.5em 0; 74 | } 75 | 76 | form ul.errorlist { 77 | color: red; 78 | margin: 0.5em 0; 79 | } 80 | 81 | fieldset { 82 | border: 0; 83 | } 84 | 85 | label { 86 | vertical-align: top; 87 | display: inline-block; 88 | } 89 | 90 | input { 91 | vertical-align: middle; 92 | } 93 | 94 | input[type=text], 95 | input[type=search] { 96 | padding: 0.10em; 97 | } 98 | 99 | input[type=submit] { 100 | padding: 0.1em 0.6em; 101 | } 102 | 103 | select[multiple] { 104 | padding: 1px 0; 105 | } 106 | 107 | select[multiple] option { 108 | padding: 0 0.5em 0 0.3em; 109 | } 110 | 111 | /* search and listing filter parameters */ 112 | .filter-parameters { 113 | margin-bottom: 1em; 114 | } 115 | 116 | .filter-parameters h3 { 117 | font-size: 1em; 118 | margin-top: 0; 119 | } 120 | 121 | .filter-parameters div.flex-container { 122 | display: flex; 123 | justify-content: flex-start; 124 | flex-wrap: wrap; 125 | /* compensate for the margin-bottom of the flex-container items */ 126 | margin-bottom: -0.85em; 127 | } 128 | 129 | .filter-parameters .flex-container div { 130 | margin-bottom: 1em; 131 | font-size: 0.85em; 132 | } 133 | 134 | .filter-parameters .flex-container div:not(:last-child) { 135 | margin-right: 1.65em; 136 | } 137 | 138 | .filter-parameters legend { 139 | display: none; 140 | } 141 | 142 | .filter-parameters label { 143 | display: block; 144 | } 145 | 146 | /* scale fonts down to a sane default (16 * .875 = 14px) */ 147 | #content { 148 | font-size: 0.875em; 149 | } 150 | 151 | /* Arch style for links */ 152 | a { 153 | text-decoration: none; 154 | } 155 | 156 | a:link, 157 | th a:visited { 158 | color: #07b; 159 | } 160 | 161 | a:visited { 162 | color: #666; 163 | } 164 | 165 | a:hover { 166 | text-decoration: underline; 167 | color: #666; 168 | } 169 | 170 | a:active { 171 | color: #e90; 172 | } 173 | 174 | /* headings */ 175 | h2 { 176 | font-size: 1.5em; 177 | margin-bottom: 0.5em; 178 | border-bottom: 1px solid #888; 179 | } 180 | 181 | h3 { 182 | font-size: 1.25em; 183 | margin-top: .5em; 184 | } 185 | 186 | h4 { 187 | font-size: 1.15em; 188 | margin-top: 1em; 189 | } 190 | 191 | h5 { 192 | font-size: 1em; 193 | margin-top: 1em; 194 | } 195 | 196 | /* general layout */ 197 | main#content { 198 | display: flex; 199 | margin: 1em; 200 | justify-content: center; 201 | } 202 | 203 | #content-left { 204 | display: block; 205 | order: 1; 206 | /* max-width of the content has to be specified here to make it play nice 207 | * with flex. Setting it to max-content would prevent growing beyond the 208 | * largest child element. */ 209 | max-width: 80ch; 210 | /* set min-width to prevent excessive shrinking 211 | * (and to force shrinking in case there is an element which refuses to 212 | * shrink, e.g. a very long line in a
 tag) */
213 |     min-width: 40ch;
214 |     /* set initial/ideal width to 70% */
215 |     flex-basis: 70%;
216 |     /* shrink factor of 0 effectively means that it will not shrink below 70% of
217 |      * the available space (the sidebar will be shrinked instead) */
218 |     flex-shrink: 0;
219 |     /* increase grow factor to expand into the extra space at the cost of the
220 |      * sidebar */
221 |     flex-grow: 1;
222 | }
223 | 
224 | #sidebar {
225 |     display: block;
226 |     order: 2;
227 |     margin-left: 2em;
228 |     /* set min-width to prevent excessive shrinking
229 |      * (and to force shrinking in case there is an element which refuses to
230 |      * shrink, e.g. a very long word) */
231 |     min-width: 20ch;
232 |     /* set max-width to prevent excessive growing */
233 |     max-width: 40ch;
234 |     /* sets flex base width to 30% (complement of the main content's base) */
235 |     flex-basis: 30%;
236 | }
237 | #sidebar > * {
238 |     margin-bottom: 1em;
239 | }
240 | 
241 | /* responsive layout */
242 | @media only screen and (max-width: 700px) {
243 |     main#content {
244 |         /* place the items in vertical direction */
245 |         flex-direction: column;
246 |     }
247 | 
248 |     #content-left,
249 |     #sidebar {
250 |         margin: 0 0 1rem;
251 |     }
252 | }
253 | 
254 | /* footer */
255 | footer {
256 |     clear: both;
257 |     margin: 2em 1em 1em;
258 | }
259 | 
260 | footer p {
261 |     margin: .5em 0;
262 |     text-align: center;
263 |     font-size: 0.8em;
264 | }
265 | 
266 | .box {
267 |     margin-bottom: 1.5em;
268 |     padding: 0.65em;
269 |     background: #ecf2f5;
270 |     border: 1px solid #bcd;
271 | }
272 | 
273 | /* package info definition list */
274 | .package-info > dl {
275 |     padding-left: 1em;
276 | }
277 | .package-info dd {
278 |     padding-left: 1em;
279 |     word-break: break-word;
280 | }
281 | 
282 | /* workaround for multi-column lists in the listing view
283 |  * (width needs to be explicit because the main#content has display:flex) */
284 | article.single-column-content {
285 |     width: 100%;
286 | }
287 | 
288 | /* multi-column lists (used for listings) */
289 | ul.multi-column,
290 | ol.multi-column {
291 |     column-width: 20em;
292 |     column-gap: 1.5em;
293 |     padding-left: 0.5em;
294 | }
295 | ul.multi-column > li,
296 | ol.multi-column > li {
297 |     /* needed to preserve bullets/numbers in multi-column layout */
298 |     margin-left: 2em;
299 | }
300 | 
301 | /* styled tables */
302 | table.styled-table {
303 |     background: #ecf2f5 none repeat scroll 0 0;
304 |     border: 1px solid #bcd;
305 |     margin-bottom: 1.5em;
306 |     padding: 0.65em;
307 |     font-size: 1.00em;
308 |     border-collapse: collapse;
309 | }
310 | 
311 | table.styled-table tr td,
312 | table.styled-table th {
313 |     padding: 0.35em 0.90em;
314 |     text-align: left;
315 |     /*white-space: nowrap;*/
316 | }
317 | 
318 | table.styled-table th {
319 |     background: rgb(217, 230, 236) none repeat scroll 0 0;
320 |     border-bottom: 1px solid #bcd;
321 | }
322 | 
323 | table.styled-table tr:hover {
324 |     background-color: rgb(228, 237, 241)
325 | }
326 | 
327 | table.styled-table tr td {
328 |     border-top: 1px dotted #bcd;
329 | }
330 | 


--------------------------------------------------------------------------------
/archmanweb/static/archmanweb/man_page.css:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Basic style
  3 |  */
  4 | 
  5 | .man-page-content div.manual-text {
  6 |     text-align: justify;
  7 |     margin-left: 5ex;
  8 | }
  9 | 
 10 | /* don't align nested elements */
 11 | .man-page-content div.manual-text table table,
 12 | .man-page-content div.manual-text table dl,
 13 | .man-page-content div.manual-text dl table,
 14 | .man-page-content div.manual-text dl dl {
 15 |     text-align: initial;
 16 | }
 17 | 
 18 | /* fix top margin */
 19 | .man-page-content > :first-child,
 20 | .package-info > :first-child {
 21 |     margin-top: 0;
 22 | }
 23 | .man-page-content table.head,
 24 | .man-page-content table.foot {
 25 |     border-spacing: 0px;  /* chromium sets it to 2px by default */
 26 | }
 27 | .man-page-content table.head td,
 28 | .man-page-content table.foot td {
 29 |     padding: 0px;
 30 | }
 31 | 
 32 | /* Override top margin for paragraphs (needs to be the same as bottom margin,
 33 |  * because mandoc creates elements with text of the first paragraph outside 

. 34 | * The value 0.6em is consistent with the margins assigned to markup elements 35 | * such as ".Bl-tag > dd". 36 | */ 37 | .man-page-content p { 38 | margin-top: 0.6em; 39 | margin-bottom: 0.6em; 40 | } 41 | /* Override top margin of first children in elements for which mandoc sets zero 42 | * margin (cannot be done everywhere because of the note above). 43 | */ 44 | .Bl-compact > li > :first-child, 45 | .Bl-tag > dt > :first-child, 46 | .Bl-tag > dd > :first-child, 47 | .Bl-compact > dd > :first-child, 48 | .Bl-compact > dt > :first-child, 49 | .Bl-compact > tbody > tr > td > :first-child 50 | { 51 | margin-top: 0em; 52 | } 53 | /* Override bottom margin of all last children except headers. 54 | * (mandoc creates some sections where

or

is the only element and 55 | * the text is not inside

56 | */ 57 | .man-page-content :last-child:not(h1.Sh):not(h2.Ss) { 58 | margin-bottom: 0em; 59 | } 60 | 61 | 62 | /* 63 | * Custom styles for mandoc elements. 64 | * (upstream styles are overridden with the .man-page-content selector) 65 | */ 66 | 67 | /* font sizes */ 68 | .man-page-content h1.Sh { 69 | font-size: 1.25em; 70 | margin-left: -4ex; 71 | border-bottom: none; 72 | } 73 | .man-page-content h2.Ss { 74 | font-size: 1.1em; 75 | margin-left: -2ex; 76 | border-bottom: none; 77 | } 78 | 79 | /* fancy anchor links */ 80 | .man-page-content a.permalink { 81 | color: inherit; 82 | text-decoration: inherit; 83 | border-bottom: thin dotted; 84 | } 85 | .man-page-content a.permalink:only-child:hover:after { 86 | /* :only-child is necessary, otherwise the following text would be shifted */ 87 | /* (alternatively we could match only links inside

etc. */ 88 | content: " ¶"; 89 | } 90 | 91 | /* Arch-styled tables 92 | * style copied from base.css and changed: 93 | * - applied to the "tbl" class 94 | * - removed "white-space: nowrap" 95 | * - "th" style applied also to "td" in the first row 96 | * - top and bottom margins set to 1em for consistency with other markup elements 97 | * (they have to be the same because mandoc creates elements with text of the 98 | * first paragraph outside

) 99 | */ 100 | .man-page-content table.tbl { 101 | background: #ecf2f5 none repeat scroll 0 0; 102 | border: 1px solid #bcd; 103 | margin-top: 0.6em; 104 | margin-bottom: 0.6em; 105 | padding: 0.65em; 106 | font-size: 1.00em; 107 | border-collapse: collapse; 108 | } 109 | 110 | .man-page-content table.tbl tr td, 111 | .man-page-content table.tbl th { 112 | padding: 0.35em 0.90em; 113 | text-align: left; 114 | } 115 | 116 | .man-page-content table.tbl th, 117 | .man-page-content table.tbl tr:first-child td:not(:only-child) { 118 | background: rgb(217, 230, 236) none repeat scroll 0 0; 119 | border-bottom: 1px solid #bcd; 120 | font-weight: bold; 121 | word-break: initial; 122 | } 123 | 124 | .man-page-content table.tbl tr:hover { 125 | background-color: rgb(228, 237, 241) 126 | } 127 | 128 | .man-page-content table.tbl tr td { 129 | border-top: 1px dotted #bcd; 130 | border-bottom: 1px dotted #bcd; 131 | } 132 | 133 | /* 134 | * these were copied from mandoc.css and limited only to the children of our 135 | * man-page-content wrapper 136 | */ 137 | .man-page-content * { clear: both } 138 | .man-page-content td { vertical-align: top; } 139 | .man-page-content ul, 140 | .man-page-content ol, 141 | .man-page-content table, 142 | .man-page-content dl { 143 | margin-top: 1ex; 144 | margin-bottom: 1ex; 145 | } 146 | /* unlike upstream, only nested dl's have zero margins */ 147 | .man-page-content table table, 148 | .man-page-content table dl, 149 | .man-page-content dl table, 150 | .man-page-content dl dl { 151 | margin-top: 0ex; 152 | margin-bottom: 0ex; 153 | } 154 | .man-page-content li, 155 | .man-page-content dt { 156 | margin-top: 1em; 157 | } 158 | 159 | /* override overflow:auto -> overflow:initial (a scrollbar sometimes appeared with auto) */ 160 | .man-page-content dd.It-tag { overflow:initial; } 161 | 162 | /* hide
tags immediately following a block tag (pre or div) */ 163 | .man-page-content pre + br, 164 | .man-page-content div + br { 165 | display: none; 166 | } 167 | /* hide
tags which are the first or the last child node */ 168 | .man-page-content br:first-child, 169 | .man-page-content br:last-child { 170 | display: none; 171 | } 172 | 173 | 174 | /* 175 | * The following rules were copied from the upstream mandoc.css file and modified: 176 | * - Margins of ".Bl-hang > dd", ".Bl-tag" and ".Bl-tag > dt" were changed from 177 | * 5.5em to 3.8em to match ".Bd-indent". 178 | * - "font-family" was removed from all rules. The font family should be assigned 179 | * to tags, not to tag classes. 180 | * - ".Nd" is styled with "display: inline;" 181 | * - ".HP" is removed/disabled (deprecated macro, cannot be represented in HTML) 182 | * - added ".Bd-indent > pre:only-child" to cancel useless indentation of pre tags 183 | */ 184 | 185 | /* Header and footer lines. */ 186 | 187 | table.head { width: 100%; 188 | border-bottom: 1px dotted #808080; 189 | margin-bottom: 1em; 190 | font-size: smaller; } 191 | td.head-vol { text-align: center; } 192 | td.head-rtitle { 193 | text-align: right; } 194 | 195 | table.foot { width: 100%; 196 | border-top: 1px dotted #808080; 197 | margin-top: 1em; 198 | font-size: smaller; } 199 | td.foot-os { text-align: right; } 200 | 201 | /* Sections and paragraphs. */ 202 | 203 | .manual-text { 204 | margin-left: 3.8em; } 205 | .Nd { display: inline; } 206 | section.Sh { } 207 | h1.Sh { margin-top: 1.2em; 208 | margin-bottom: 0.6em; 209 | margin-left: -3.2em; 210 | font-size: 110%; } 211 | section.Ss { } 212 | h2.Ss { margin-top: 1.2em; 213 | margin-bottom: 0.6em; 214 | margin-left: -1.2em; 215 | font-size: 105%; } 216 | .Pp { margin: 0.6em 0em; } 217 | .Sx { } 218 | .Xr { } 219 | 220 | /* Displays and lists. */ 221 | 222 | .Bd { } 223 | .Bd-indent { margin-left: 3.8em; } 224 | /* cancel useless indentation of pre tags (leave visual left margin of 1em) */ 225 | .Bd-indent > pre:only-child { 226 | margin-left: -2.8em; 227 | margin-top: 0em; } 228 | /* cancel useless double-indent */ 229 | .Bd-indent > .Bd-indent:only-child { 230 | margin-left: 0em; } 231 | 232 | .Bl-bullet { list-style-type: disc; 233 | padding-left: 1em; } 234 | .Bl-bullet > li { } 235 | .Bl-dash { list-style-type: none; 236 | padding-left: 0em; } 237 | .Bl-dash > li:before { 238 | content: "\2014 "; } 239 | .Bl-item { list-style-type: none; 240 | padding-left: 0em; } 241 | .Bl-item > li { } 242 | .Bl-compact > li { 243 | margin-top: 0em; } 244 | 245 | .Bl-enum { padding-left: 2em; } 246 | .Bl-enum > li { } 247 | .Bl-compact > li { 248 | margin-top: 0em; } 249 | 250 | .Bl-diag { } 251 | .Bl-diag > dt { 252 | font-style: normal; 253 | font-weight: bold; } 254 | .Bl-diag > dd { 255 | margin-left: 0em; } 256 | .Bl-hang { } 257 | .Bl-hang > dt { } 258 | .Bl-hang > dd { 259 | margin-left: 3.8em; } 260 | .Bl-inset { } 261 | .Bl-inset > dt { } 262 | .Bl-inset > dd { 263 | margin-left: 0em; } 264 | .Bl-ohang { } 265 | .Bl-ohang > dt { } 266 | .Bl-ohang > dd { 267 | margin-left: 0em; } 268 | .Bl-tag { margin-top: 0.6em; 269 | margin-left: 3.8em; } 270 | .Bl-tag > dt { 271 | float: left; 272 | width: 100%; 273 | margin-top: 0em; 274 | margin-left: -3.8em; 275 | padding-right: 0.5em; 276 | vertical-align: top; } 277 | .Bl-tag > dd { 278 | clear: right; 279 | width: 100%; 280 | margin-top: 0em; 281 | margin-left: 0em; 282 | margin-bottom: 0.6em; 283 | vertical-align: top; 284 | overflow: auto; } 285 | .Bl-compact { margin-top: 0em; } 286 | .Bl-compact > dd { 287 | margin-bottom: 0em; } 288 | .Bl-compact > dt { 289 | margin-top: 0em; } 290 | 291 | .Bl-column { } 292 | .Bl-column > tbody > tr { } 293 | .Bl-column > tbody > tr > td { 294 | margin-top: 1em; } 295 | .Bl-compact > tbody > tr > td { 296 | margin-top: 0em; } 297 | 298 | .Rs { font-style: normal; 299 | font-weight: normal; } 300 | .RsA { } 301 | .RsB { font-style: italic; 302 | font-weight: normal; } 303 | .RsC { } 304 | .RsD { } 305 | .RsI { font-style: italic; 306 | font-weight: normal; } 307 | .RsJ { font-style: italic; 308 | font-weight: normal; } 309 | .RsN { } 310 | .RsO { } 311 | .RsP { } 312 | .RsQ { } 313 | .RsR { } 314 | .RsT { text-decoration: underline; } 315 | .RsU { } 316 | .RsV { } 317 | 318 | .eqn { } 319 | .tbl td { vertical-align: middle; } 320 | 321 | /* NOTE: .HP is only placed on

tags, this selector conflicts with ".man-page-content p" 322 | and the .HP macro is deprecated anyway (its meaning cannot be represented exactly in HTML) 323 | .HP { margin-left: 3.8em; 324 | text-indent: -3.8em; } 325 | */ 326 | 327 | /* Semantic markup for command line utilities. */ 328 | 329 | table.Nm { } 330 | code.Nm { font-style: normal; 331 | font-weight: bold; } 332 | .Fl { font-style: normal; 333 | font-weight: bold; } 334 | .Cm { font-style: normal; 335 | font-weight: bold; } 336 | .Ar { font-style: italic; 337 | font-weight: normal; } 338 | .Op { display: inline; } 339 | .Ic { font-style: normal; 340 | font-weight: bold; } 341 | .Ev { font-style: normal; 342 | font-weight: normal; } 343 | .Pa { font-style: italic; 344 | font-weight: normal; } 345 | 346 | /* Semantic markup for function libraries. */ 347 | 348 | .Lb { } 349 | code.In { font-style: normal; 350 | font-weight: bold; } 351 | a.In { } 352 | .Fd { font-style: normal; 353 | font-weight: bold; } 354 | .Ft { font-style: italic; 355 | font-weight: normal; } 356 | .Fn { font-style: normal; 357 | font-weight: bold; } 358 | .Fa { font-style: italic; 359 | font-weight: normal; } 360 | .Vt { font-style: italic; 361 | font-weight: normal; } 362 | .Va { font-style: italic; 363 | font-weight: normal; } 364 | .Dv { font-style: normal; 365 | font-weight: normal; } 366 | .Er { font-style: normal; 367 | font-weight: normal; } 368 | 369 | /* Various semantic markup. */ 370 | 371 | .An { } 372 | .Lk { } 373 | .Mt { } 374 | .Cd { font-style: normal; 375 | font-weight: bold; } 376 | .Ad { font-style: italic; 377 | font-weight: normal; } 378 | .Ms { font-style: normal; 379 | font-weight: bold; } 380 | .St { } 381 | .Ux { } 382 | 383 | /* Physical markup. */ 384 | 385 | .Bf { display: inline; } 386 | .No { font-style: normal; 387 | font-weight: normal; } 388 | .Em { font-style: italic; 389 | font-weight: normal; } 390 | .Sy { font-style: normal; 391 | font-weight: bold; } 392 | .Li { font-style: normal; 393 | font-weight: normal; } 394 | 395 | /* Responsive overrides to avoid excessive margins on small devices. */ 396 | 397 | @media (max-width: 33em) { 398 | .man-page-content div.manual-text { 399 | margin-left: 0em; 400 | } 401 | .man-page-content h1.Sh, 402 | .man-page-content h2.Ss { 403 | margin-left: 0em; 404 | } 405 | /* the following rules are copied from mandoc.css */ 406 | .Bd-indent { margin-left: 2em; } 407 | .Bl-hang > dd { 408 | margin-left: 2em; } 409 | .Bl-tag { margin-left: 2em; } 410 | .Bl-tag > dt { 411 | margin-left: -2em; } 412 | .HP { margin-left: 2em; 413 | text-indent: -2em; } 414 | } 415 | -------------------------------------------------------------------------------- /archmanweb/templates/404.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 |

5 |

404 — Page not found

6 | {% if exception %} 7 | {{ exception }} 8 | {% else %} 9 | Sorry, the page you have requested does not exist. 10 | {% endif %} 11 |
12 | {% endblock %} 13 | -------------------------------------------------------------------------------- /archmanweb/templates/500.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 |
5 |

500 — Internal server error

6 | The web application crashed. 7 |
8 | {% endblock %} 9 | -------------------------------------------------------------------------------- /archmanweb/templates/base.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | 4 | 5 | 6 | 7 | 8 | {% block title %}Arch manual pages{% endblock %} 9 | 10 | 11 | 12 | 13 | {% block head %} 14 | {% endblock %} 15 | 16 | 17 |
18 | 39 | 49 |
50 |
51 | {% block content %} 52 |
53 | {% block content_left %} 54 | {% endblock %} 55 |
56 | 60 | {% endblock %} 61 |
62 |
63 |

Powered by archmanweb, 64 | using mandoc for the conversion of manual pages. 65 |

66 |

The website is available under the terms of the GPL-3.0 67 | license, except for the contents of the manual pages, which have their own license 68 | specified in the corresponding Arch Linux package. 69 |

70 |
71 | 72 | 73 | -------------------------------------------------------------------------------- /archmanweb/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% load static %} 3 | {% load make_table %} 4 | 5 | {% block head %} 6 | 7 | {% endblock %} 8 | 9 | {% block content %} 10 |
11 |
12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
man.archlinux.org(7)Arch Linux manual pagesman.archlinux.org(7)
20 |
21 | 22 |
23 |

24 | man.archlinux.org — manual pages from Arch Linux packages 25 |
26 | 27 |
28 |

29 |

!archman KEYWORDS...

30 |
31 | 32 |
33 |

34 |

35 | This website is a repository of all manual pages available in the 36 | Arch Linux packages. 37 | There are {{ count_man_pages }} manual pages and {{ count_symlinks }} symbolic links from 38 | {{ count_pkgs_with_mans }} packages, another {{ count_pkgs_without_mans }} packages do not contain 39 | any indexable manual pages. 40 |

41 | 42 |

43 | Traditionally, manual pages are organized into several sections. Read the introduction 44 | pages for each section: 45 | intro(1), 46 | intro(2), 47 | intro(3), 48 | intro(4), 49 | intro(5), 50 | intro(6), 51 | intro(7), 52 | intro(8). 53 | Some packages provide their manuals in a subsection (e.g. 3ssl) and some use non-standard sections 54 | (e.g. 0 or n). 55 |

56 |
57 | 58 |
59 |

60 | There are several ways to find a specific manual page: 61 |
    62 |
  • Use the search form to search for keywords in the names and 63 | descriptions of manual pages and packages. You can also the !archman DuckDuckGo 64 | bang to search Arch manual pages.
  • 65 |
  • Use the listing form to list all manual pages matching the 66 | specified filtering and sorting criteria.
  • 67 |
  • Manually use the addressing scheme as explained below.
  • 68 |
69 | 70 |

71 | The manual pages are addressable as 72 | /man/<repo>/<pkgname>/<page>.<section>.<language>.<format>. 73 | Any part except <page> is optional: 74 |

75 |
    76 |
  • <repo> and <pkgname> can be used to disambiguate the 77 | page version found in multiple packages. If omitted, manual pages are looked up in package 78 | repositories in the following order: core, extra, community, multilib.
  • 79 |
  • If <section> is missing, you will be redirected to the first manual page 80 | found in sections in the following order: 1, n, l, 8, 6, 3, 0, 2, 5, 7, 4, 9.
  • 81 |
  • The default language is en. Note that en is also the fallback 82 | language for pages, which are not available in any other language.
  • 83 |
  • The default format is html (other supported formats are txt and 84 | raw).
  • 85 |
86 |

87 | Note that symbolic links, such as bunzip2(1), are 88 | implemented as HTTP redirects with the 302 status code. Symbolic links are included in per-package 89 | listings, such as core/openssl. 90 |

91 |
92 | 93 |
94 |

95 | This is just a brief summary of the last updates to the internal database. 96 | {% make_table last_updates class_="styled-table" %} 97 |
98 | 99 |
100 |

101 | This page is not an actual manual page. It was just made to look like one. 102 |
103 | 104 |
105 |

106 | The project was initially created for the man template on the Arch wiki. 107 | The code is developed in a GitLab repository. 108 |
109 | 110 |
111 |

112 | There are other manual page websites using different sources and providing different features. 113 | See a list on the 114 | wiki. 115 |
116 | 117 |
118 | 119 | 120 | 121 | 122 | 123 | 124 |
{% now "Y-m-d" %}Arch Linux
125 | 126 |
127 |
128 | {% endblock %} 129 | -------------------------------------------------------------------------------- /archmanweb/templates/listing.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% load query_transform %} 3 | {% load reverse_man_url %} 4 | {% load pagination_links %} 5 | 6 | {% block head %} 7 | 8 | {% endblock %} 9 | 10 | {% block content %} 11 |
12 | 13 |
14 |

Listing parameters

15 | 16 | 40 |
41 | 42 |
43 |

List of manual pages

44 | 45 | {% if man_pages %} 46 | 58 | {% else %} 59 |

No manual pages matching these criteria.

60 | {% endif %} 61 | 62 | {% if man_pages.paginator.num_pages != 1 %} 63 | {% pagination_links request man_pages 'page' %} 64 | {% endif %} 65 |
66 | 67 | {% if pkg and symlinks %} 68 |
69 |

List of symbolic links

70 | 80 | 81 | {% if symlinks.paginator.num_pages != 1 %} 82 | {% pagination_links request symlinks 'page_symlinks' %} 83 | {% endif %} 84 |
85 | {% endif %} 86 | 87 |
88 | {% endblock %} 89 | -------------------------------------------------------------------------------- /archmanweb/templates/man_404.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% load static %} 3 | 4 | {% block title %}Page not found — Arch manual pages{% endblock %} 5 | 6 | {% block content %} 7 |
8 |

404 — Page not found

9 |

10 | The manual page {{ name }} was not found in 11 | {% if repo and pkgname %} 12 | the {{ repo }}/{{ pkgname }} 13 | {% elif pkgname %} 14 | the {{ pkgname }} 15 | {% else %} 16 | any 17 | {% endif %} 18 | package. 19 | {% if search_url %} 20 | You can search for similar manual pages. 21 | {% endif %} 22 |

23 |
24 | {% endblock %} 25 | -------------------------------------------------------------------------------- /archmanweb/templates/man_page.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% load static %} 3 | {% load reverse_man_url %} 4 | 5 | {% block title %}{{ man.name }}({{ man.section }}) — Arch manual pages{% endblock %} 6 | 7 | {% block head %} 8 | 9 | {% for lang in other_languages %} 10 | 11 | {% endfor %} 12 | {% endblock %} 13 | 14 | {% block content_left %} 15 |
16 | {{ man_page_content | safe }} 17 |
18 | {% endblock %} 19 | 20 | {% block sidebar %} 21 |
22 |

Package information:

23 |
24 |
Package name:
25 |
{{ pkg.repo }}/{{ pkg.name }}
26 |
Version:
27 |
{{ pkg.version }}
28 |
Upstream:
29 |
{{ pkg.url }}
30 |
Licenses:
31 |
{{ pkg.licenses | join:", " }}
32 |
Manuals:
33 |
{% url 'listing' %}/{{ pkg.repo }}/{{ pkg.name }}/
34 |
35 |
36 | 37 |
38 | Table of contents 39 | 46 |
47 | 48 | 58 | 59 | 69 | 70 | 80 | {% endblock %} 81 | -------------------------------------------------------------------------------- /archmanweb/templates/search.html: -------------------------------------------------------------------------------- 1 | {% extends "index.html" %} 2 | {% load query_transform %} 3 | {% load reverse_man_url %} 4 | {% load pagination_links %} 5 | 6 | {% block head %} 7 | 8 | {% endblock %} 9 | 10 | {% block current_search_term %}{{ request.GET.q }}{% endblock %} 11 | 12 | {% block content %} 13 |
14 | 15 |
16 |

Manual page search parameters

17 | 18 | 42 |
43 | 44 | {% if request.GET.q and search_form.is_valid %} 45 |
46 |

Results in manual names

47 |
    48 | {% for man in man_results %} 49 |
  1. 50 | {{ man.name }}({{ man.section }}) 51 | [{{ man.lang }}] 52 | (from the {{ man.package__repo }}/{{ man.package__name }} package) 53 |
  2. 54 | {% empty %} 55 | No results. 56 | {% endfor%} 57 |
58 | 59 | {% if man_results.paginator.num_pages != 1 %} 60 | {% pagination_links request man_results 'page_man' %} 61 | {% endif %} 62 |
63 | 64 |
65 |

Results in manual descriptions ("apropos")

66 |
    67 | {% for man in apropos_results %} 68 |
  1. 69 |
    70 |
    71 | {{ man.name }}({{ man.section }}) 72 | [{{ man.lang }}] 73 | (from the {{ man.package__repo }}/{{ man.package__name }} package) 74 |
    75 |
    {{ man.desc_snippet | safe }}
    76 |
    77 |
  2. 78 | {% empty %} 79 | No results. 80 | {% endfor%} 81 |
82 | 83 | {% if apropos_results.paginator.num_pages != 1 %} 84 | {% pagination_links request apropos_results 'page_apropos' %} 85 | {% endif %} 86 |
87 | 88 |
89 |

Results in package names and descriptions

90 |
    91 | {% for pkg in pkg_results %} 92 |
  1. 93 |
    94 |
    {{ pkg.repo }}/{{ pkg.name }}
    95 |
    {{ pkg.desc_snippet | safe }}
    96 |
    97 |
  2. 98 | {% empty %} 99 | No results. 100 | {% endfor%} 101 |
102 | 103 | {% if pkg_results.paginator.num_pages != 1 %} 104 | {% pagination_links request pkg_results 'page_pkg' %} 105 | {% endif %} 106 |
107 | {% endif %} 108 | 109 |
110 | {% endblock %} 111 | -------------------------------------------------------------------------------- /archmanweb/templatetags/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/archlinux/archmanweb/5fcd985314a8f07d151a46ace71ce152e67450a6/archmanweb/templatetags/__init__.py -------------------------------------------------------------------------------- /archmanweb/templatetags/make_table.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from django import template 4 | from django.utils.html import format_html, mark_safe 5 | 6 | register = template.Library() 7 | 8 | # taken from https://stackoverflow.com/a/30339105 9 | def format_timedelta(value, time_format="{days} days, {hours2}:{minutes2}:{seconds2}"): 10 | if hasattr(value, 'seconds'): 11 | seconds = value.seconds + value.days * 24 * 3600 12 | else: 13 | seconds = int(value) 14 | 15 | seconds_total = seconds 16 | 17 | minutes = int(seconds / 60) 18 | minutes_total = minutes 19 | seconds -= minutes * 60 20 | 21 | hours = int(minutes / 60) 22 | hours_total = hours 23 | minutes -= hours * 60 24 | 25 | days = int(hours / 24) 26 | hours -= days * 24 27 | 28 | return time_format.format(**{ 29 | 'seconds': seconds, 30 | 'seconds2': str(seconds).zfill(2), 31 | 'minutes': minutes, 32 | 'minutes2': str(minutes).zfill(2), 33 | 'hours': hours, 34 | 'hours2': str(hours).zfill(2), 35 | 'days': days, 36 | 'seconds_total': seconds_total, 37 | 'minutes_total': minutes_total, 38 | 'hours_total': hours_total, 39 | }) 40 | 41 | @register.simple_tag 42 | def make_table(rows, class_=None): 43 | # we need to access the first row twice, so this way we avoid making separate SQL query 44 | rows = list(rows) 45 | 46 | if not rows: 47 | return "" 48 | 49 | config = rows[0].HtmlTableConfig 50 | columns = config.columns 51 | descriptions = config.descriptions 52 | 53 | if class_ is None: 54 | html = '\n' 55 | else: 56 | html = format_html('
\n', class_) 57 | 58 | # header 59 | html += '\n' 60 | html += '\n' 61 | for desc in descriptions: 62 | html += format_html('\n', desc) 63 | html += '\n' 64 | html += '\n' 65 | 66 | # body 67 | html += '\n' 68 | for row in rows: 69 | html += '\n' 70 | for col in columns: 71 | value = getattr(row, col) 72 | if isinstance(value, datetime.datetime): 73 | value = value.strftime("%F %T") 74 | elif isinstance(value, datetime.timedelta): 75 | value = format_timedelta(value, time_format="{hours2}:{minutes2}:{seconds2}") 76 | html += format_html('\n', value) 77 | html += '\n' 78 | html += '\n' 79 | 80 | html += '
{}
{}
\n' 81 | return mark_safe(html) 82 | -------------------------------------------------------------------------------- /archmanweb/templatetags/pagination_links.py: -------------------------------------------------------------------------------- 1 | from django import template 2 | from django.utils.html import format_html, mark_safe 3 | 4 | register = template.Library() 5 | 6 | @register.simple_tag 7 | def pagination_links(request, paginator, query_string_param): 8 | html = '\n' 38 | return mark_safe(html) 39 | -------------------------------------------------------------------------------- /archmanweb/templatetags/query_transform.py: -------------------------------------------------------------------------------- 1 | from django import template 2 | 3 | register = template.Library() 4 | 5 | @register.simple_tag 6 | def query_transform(request, **kwargs): 7 | updated = request.GET.copy() 8 | for k, v in kwargs.items(): 9 | updated[k] = v 10 | return updated.urlencode() 11 | -------------------------------------------------------------------------------- /archmanweb/templatetags/reverse_man_url.py: -------------------------------------------------------------------------------- 1 | from django import template 2 | 3 | from ..utils import reverse_man_url as _reverse 4 | 5 | register = template.Library() 6 | 7 | @register.simple_tag 8 | def reverse_man_url(repo, pkgname, man_name, man_section, man_lang, output_type): 9 | return _reverse(repo, pkgname, man_name, man_section, man_lang, output_type) 10 | -------------------------------------------------------------------------------- /archmanweb/urls.py: -------------------------------------------------------------------------------- 1 | from django.urls import re_path 2 | 3 | from . import views 4 | 5 | urlpatterns = [ 6 | re_path(r"^$", views.index, name="index"), 7 | re_path(r"^listing(/(?P[A-Za-z0-9@._+\-]+))??(/(?P[A-Za-z0-9@._+\-]+))?/?$", views.listing, name="listing"), 8 | re_path(r"^man/" 9 | r"((?P[A-Za-z0-9@._+\-]+)/)??" 10 | r"((?P[A-Za-z0-9@._+\-]+)/)?" 11 | r"(?P[A-Za-z0-9@._+\-:\[\]]+?)" 12 | r"(\.(?Phtml|txt|raw))?$", 13 | views.man_page, name="man_page"), 14 | re_path(r"^search", views.search, name="search"), 15 | ] 16 | -------------------------------------------------------------------------------- /archmanweb/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .django import * 2 | from .encodings import * 3 | from .mandoc import * 4 | -------------------------------------------------------------------------------- /archmanweb/utils/django.py: -------------------------------------------------------------------------------- 1 | from django.urls import reverse 2 | from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger 3 | 4 | __all__ = ["reverse_man_url", "search_url", "paginate"] 5 | 6 | def reverse_man_url(repo, pkgname, man_name, man_section, man_lang, content_type): 7 | # django's reverse function can't reverse our regexes, so we're doing it the old way 8 | url = reverse("index") + "man/" 9 | if repo: 10 | url += repo + "/" 11 | if pkgname: 12 | url += pkgname + "/" 13 | url += man_name 14 | if man_section: 15 | url += "." + man_section 16 | if man_lang: 17 | url += "." + man_lang 18 | if content_type: 19 | url += "." + content_type 20 | return url 21 | 22 | def search_url(man_page, *, section=None, lang=None, repo=None, pkgname=None): 23 | url = reverse("search") 24 | url += "?q=" + man_page 25 | if section: 26 | url += "§ion=" + section 27 | if lang: 28 | url += "&lang=" + lang 29 | if repo: 30 | url += "&repo=" + repo 31 | if pkgname: 32 | url += "&pkgname=" + pkgname 33 | return url 34 | 35 | def paginate(request, url_param, query, limit): 36 | paginator = Paginator(query, limit) 37 | page = request.GET.get(url_param) 38 | try: 39 | query = paginator.page(page) 40 | except PageNotAnInteger: 41 | # If page is not an integer, deliver the first page. 42 | query = paginator.page(1) 43 | except EmptyPage: 44 | # If page is out of range, deliver the last page. 45 | query = paginator.page(paginator.num_pages) 46 | return query 47 | -------------------------------------------------------------------------------- /archmanweb/utils/encodings.py: -------------------------------------------------------------------------------- 1 | import re 2 | import unicodedata 3 | 4 | __all__ = ["normalize_html_entities", "safe_escape_attribute", "anchorencode_id", "anchorencode_href"] 5 | 6 | def normalize_html_entities(s): 7 | def repl(match): 8 | # TODO: add some error checking 9 | if match.group(1): 10 | return chr(int(match.group(2), 16)) 11 | return chr(int(match.group(2))) 12 | return re.sub(r"&#(x?)([0-9a-fA-F]+);", repl, s) 13 | 14 | # escape sensitive characters when formatting an element attribute 15 | # https://stackoverflow.com/a/7382028 16 | def safe_escape_attribute(attribute): 17 | escape_map = { 18 | "<" : "<", 19 | ">" : ">", 20 | "\"" : """, 21 | "'" : "'", 22 | "&" : "&", 23 | } 24 | return "".join(escape_map.get(c, c) for c in attribute) 25 | 26 | # function copied from wiki-scripts: 27 | # https://github.com/lahwaacz/wiki-scripts/blob/master/ws/parser_helpers/encodings.py#L81-L98 28 | def _anchor_preprocess(str_): 29 | """ 30 | Context-sensitive pre-processing for anchor-encoding. See `MediaWiki`_ for 31 | details. 32 | 33 | .. _`MediaWiki`: https://www.mediawiki.org/wiki/Manual:PAGENAMEE_encoding 34 | """ 35 | # underscores are treated as spaces during this pre-processing, so they are 36 | # converted to spaces first (the encoding later converts them back) 37 | str_ = str_.replace("_", " ") 38 | # strip leading + trailing whitespace 39 | str_ = str_.strip() 40 | # squash *spaces* in the middle (other whitespace is preserved) 41 | str_ = re.sub("[ ]+", " ", str_) 42 | # leading colons are stripped, others preserved (colons in the middle preceded by 43 | # newline are supposed to be fucked up in MediaWiki, but this is pretty safe to ignore) 44 | str_ = str_.lstrip(":") 45 | return str_ 46 | 47 | # adapted from `anchorencode` in wiki-scripts (the "legacy" format was removed): 48 | # https://github.com/lahwaacz/wiki-scripts/blob/master/ws/parser_helpers/encodings.py#L119-L152 49 | def anchorencode_id(str_): 50 | """ 51 | anchorencode_id avoids percent-encoding to keep the id readable 52 | """ 53 | str_ = _anchor_preprocess(str_) 54 | # HTML5 specification says ids must not contain spaces 55 | str_ = re.sub("[ \t\n\r\f\v]", "_", str_) 56 | return str_ 57 | 58 | # adapted from `anchorencode` in wiki-scripts (the "legacy" format was removed): 59 | # https://github.com/lahwaacz/wiki-scripts/blob/master/ws/parser_helpers/encodings.py#L119-L152 60 | def anchorencode_href(str_, *, input_is_already_id=False): 61 | """ 62 | anchorencode_href does some percent-encoding on top of anchorencode_id to 63 | increase compatibility (The id can be linked with "#id" as well as with 64 | "#percent-encoded-id", since the browser does the percent-encoding in the 65 | former case. But if we used percent-encoded ids in the first place, only 66 | the links with percent-encoded fragments would work.) 67 | """ 68 | if input_is_already_id is False: 69 | str_ = anchorencode_id(str_) 70 | # encode "%" from percent-encoded octets 71 | str_ = re.sub(r"%([a-fA-F0-9]{2})", r"%25\g<1>", str_) 72 | # encode sensitive characters - the output of this function should be usable 73 | # in various markup languages (MediaWiki, FluxBB, etc.) 74 | encode_chars = "[]|" 75 | 76 | escape_char = "%" 77 | charset = "utf-8" 78 | errors = "strict" 79 | output = "" 80 | for char in str_: 81 | # encode characters from encode_chars and the Separator and Other categories 82 | # https://en.wikipedia.org/wiki/Unicode#General_Category_property 83 | if char in encode_chars or unicodedata.category(char)[0] in {"Z", "C"}: 84 | for byte in bytes(char, charset, errors): 85 | output += "{}{:02X}".format(escape_char, byte) 86 | else: 87 | output += char 88 | return output 89 | -------------------------------------------------------------------------------- /archmanweb/utils/mandoc.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import re 3 | import textwrap 4 | 5 | from django.urls import reverse 6 | from .django import reverse_man_url 7 | from .encodings import normalize_html_entities, safe_escape_attribute, anchorencode_id, anchorencode_href 8 | 9 | __all__ = ["mandoc_convert", "postprocess", "extract_headings", "extract_description"] 10 | 11 | def mandoc_convert(content, output_type, lang=None): 12 | if output_type == "html": 13 | url_pattern = reverse_man_url("", "", "%N", "%S", lang, "") 14 | cmd = "mandoc -T html -O fragment,man={}".format(url_pattern) 15 | elif output_type == "txt": 16 | cmd = "mandoc -T utf8" 17 | p = subprocess.run(cmd, shell=True, check=True, input=content, encoding="utf-8", stdout=subprocess.PIPE, stderr=subprocess.PIPE) 18 | assert p.stdout 19 | return p.stdout 20 | 21 | def _replace_urls_in_plain_text(html): 22 | def repl_url(match): 23 | url = match.group("url") 24 | if not url: 25 | return match.group(0) 26 | return f"{url}" 27 | 28 | skip_tags_pattern = r"\<(?Pa|pre)[^>]*\>.*?\" 29 | url_pattern = r"(?Phttps?://[^\s<>&]+(?<=[\w/]))" 30 | surrounding_tag_begin = r"(?P\<(?Pb|i|strong|em|mark)[^>]*\>\s*)?" 31 | surrounding_tag_end = r"(?(tag_begin)\s*\|)" 32 | surrounding_angle_begin = r"(?P<)?" 33 | surrounding_angle_end = r"(?(angle)>|)" 34 | html = re.sub(f"{skip_tags_pattern}|{surrounding_angle_begin}{surrounding_tag_begin}{url_pattern}{surrounding_tag_end}{surrounding_angle_end}", 35 | repl_url, html, flags=re.DOTALL) 36 | 37 | # if the URL is the only text in
 tags, it gets replaced
 38 |     html = re.sub(f"
\s*{url_pattern}\s*
", 39 | repl_url, html, flags=re.DOTALL) 40 | 41 | return html 42 | 43 | def _replace_section_heading_ids(html): 44 | """ 45 | Replace IDs for section headings and self-links with something sensible and wiki-compatible 46 | 47 | E.g. mandoc does not strip the "\&" roff(7) escape, may lead to duplicate underscores, 48 | and sometimes uses weird encoding for some chars. 49 | """ 50 | # section ID getter capable of handling duplicate titles 51 | ids = set() 52 | def get_id(title): 53 | base_id = anchorencode_id(title) 54 | id = base_id 55 | j = 2 56 | while id in ids: 57 | id = base_id + "_" + str(j) 58 | j += 1 59 | ids.add(id) 60 | return id 61 | 62 | def repl_heading(match): 63 | heading_tag = match.group("heading_tag") 64 | heading_attributes = match.group("heading_attributes") 65 | heading_attributes = " ".join(a for a in heading_attributes.split() if not a.startswith("id=")) 66 | title = match.group("title").replace("\n", " ") 67 | id = safe_escape_attribute(get_id(title)) 68 | href = anchorencode_href(id, input_is_already_id=True) 69 | return f"<{heading_tag} {heading_attributes} id='{id}'>" 70 | 71 | pattern = re.compile(r"\<(?Ph[1-6])(?P[^\>]*)\>[^\<\>]*" 72 | r"\]*\>" 73 | r"(?P.+?)" 74 | r"\<\/a\>[^\<\>]*" 75 | r"\<\/(?P=heading_tag)\>", re.DOTALL) 76 | return re.sub(pattern, repl_heading, html) 77 | 78 | def postprocess(text, content_type, lang): 79 | assert content_type in {"html", "txt"} 80 | if content_type == "html": 81 | # replace references with links 82 | xref_patterns = [ 83 | # section outside the tag 84 | r"\<(?P<tag>b|i|strong|em|mark)\>" 85 | r"(?P<man_name>[A-Za-z0-9@._+\-:\[\]]+)" 86 | r"\<\/\1\>" 87 | r"\((?P<section>\d[A-Za-z]{,3})\)", 88 | # section inside the tag 89 | r"\<(?P<tag>b|i|strong|em|mark)\>" 90 | r"(?P<man_name>[A-Za-z0-9@._+\-:\[\]]+)" 91 | r"\((?P<section>\d[A-Za-z]{,3})\)" 92 | r"\<\/\1\>", 93 | ] 94 | for pattern in xref_patterns: 95 | text = re.sub(pattern, 96 | "<a href='" + reverse("index") + "man/" + r"\g<man_name>.\g<section>." + lang + 97 | "'>\g<man_name>(\g<section>)</a>", 98 | text) 99 | 100 | # remove empty tags 101 | text = re.sub(r"\<(?P<tag>[^ >]+)[^>]*\>(\s| )*\</(?P=tag)\>\n?", "", text) 102 | 103 | # strip leading and trailing newlines and remove common indentation 104 | # from the text inside <pre> tags 105 | _pre_tag_pattern = re.compile(r"\<pre\>(.+?)\</pre\>", flags=re.DOTALL) 106 | text = _pre_tag_pattern.sub(lambda match: "<pre>" + textwrap.dedent(match.group(1).strip("\n")) + "</pre>", text) 107 | 108 | # remove <br/> tags following a <pre> or <div> tag 109 | text = re.sub(r"(?<=\</(pre|div)\>)\n?<br/>", "", text) 110 | 111 | # replace URLs in plain-text with <a> links 112 | text = _replace_urls_in_plain_text(text) 113 | 114 | # replace IDs for section headings and self-links with something sensible and wiki-compatible 115 | text = _replace_section_heading_ids(text) 116 | 117 | return text 118 | 119 | elif content_type == "txt": 120 | # strip mandoc's back-spaced encoding 121 | return re.sub(".\b", "", text, flags=re.DOTALL) 122 | 123 | def extract_headings(html): 124 | def normalize(title): 125 | return re.sub(r"\s+", " ", title) 126 | result = [] 127 | headings_pattern = re.compile(r"\<h1[^\>]*\>[^\<\>]*" 128 | r"\<a class=(\"|\')permalink(\"|\') href=(\"|\')#(?P<id>\S+)(\"|\')\>" 129 | r"(?P<title>.+?)" 130 | r"\<\/a\>[^\<\>]*" 131 | r"\<\/h1\>", re.DOTALL) 132 | for match in headings_pattern.finditer(html): 133 | id = normalize_html_entities(match.group("id")) 134 | title = normalize_html_entities(normalize(match.group("title"))) 135 | result.append(dict(id=id, title=title)) 136 | return result 137 | 138 | def extract_description(text, lang="en"): 139 | """ 140 | Extracts the "description" from a plain-text version of a manual page. 141 | 142 | The description is taken from the NAME section (or a hard-coded list of 143 | translations for non-English manuals). At most 2 paragraphs, one of which 144 | is usually the one-line description of the manual, are taken to keep the 145 | description short. 146 | 147 | Note that NAME does not have to be the first section, see e.g. syslog.h(0P). 148 | """ 149 | dictionary = { 150 | "ar": "الاسم", 151 | "bn": "নাম", 152 | "ca": "NOM", 153 | "cs": "JMÉNO|NÁZEV", 154 | "da": "NAVN", 155 | "de": "BEZEICHNUNG", 156 | "el": "ΌΝΟΜΑ", 157 | "eo": "NOMO", 158 | "es": "NOMBRE", 159 | "et": "NIMI", 160 | "fi": "NIMI", 161 | "fr": "NOM", 162 | "gl": "NOME", 163 | "hr": "IME", 164 | "hu": "NÉV", 165 | "id": "NAMA", 166 | "it": "NOME", 167 | "ja": "名前", 168 | "ko": "이름", 169 | "lt": "PAVADINIMAS", 170 | "nb": "NAVN", 171 | "nl": "NAAM", 172 | "pl": "NAZWA", 173 | "pt": "NOME", 174 | "ro": "NUME", 175 | "ru": "ИМЯ|НАЗВАНИЕ", 176 | "sk": "NÁZOV", 177 | "sl": "IME", 178 | "sr": "НАЗИВ|ИМЕ|IME", 179 | "sv": "NAMN", 180 | "ta": "பெயர்", 181 | "tr": "İSİM|AD", 182 | "uk": "НАЗВА|НОМИ|NOMI", 183 | "vi": "TÊN", 184 | "zh": "名称|名字|名称|名稱", 185 | } 186 | lang = lang.split("_")[0].split("@")[0] 187 | name = dictionary.get(lang, "NAME") 188 | if name != "NAME": 189 | name = "NAME|" + name 190 | match = re.search(rf"(^{name}$)(?P<description>.+?)(?=^\S)", text, flags=re.MULTILINE | re.DOTALL | re.IGNORECASE) 191 | if match is None: 192 | return None 193 | description = match.group("description") 194 | description = textwrap.dedent(description.strip("\n")) 195 | # keep max 2 paragraphs separated by a blank line 196 | # (some pages contain a lot of text in the NAME section, e.g. owncloud(1) or qwtlicense(3)) 197 | description = "\n\n".join(description.split("\n\n")[:2]) 198 | return description 199 | -------------------------------------------------------------------------------- /archmanweb/views/__init__.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | from django.http import Http404 3 | from django.db.models import Count 4 | 5 | from ..models import Package, ManPage, SymbolicLink, UpdateLog 6 | 7 | # make views available from the main "views" package 8 | from .listing import listing 9 | from .man_page import man_page 10 | from .search import search 11 | 12 | def index(request): 13 | count_man_pages = ManPage.objects.count() 14 | count_symlinks = SymbolicLink.objects.count() 15 | count_all_pkgs = Package.objects.count() 16 | count_pkgs_with_mans = ManPage.objects.aggregate(Count("package_id", distinct=True))["package_id__count"] 17 | last_updates = UpdateLog.objects.order_by("-id")[:5] 18 | context = { 19 | "count_man_pages": count_man_pages, 20 | "count_symlinks": count_symlinks, 21 | "count_pkgs_with_mans": count_pkgs_with_mans, 22 | "count_pkgs_without_mans": count_all_pkgs - count_pkgs_with_mans, 23 | "last_updates": last_updates, 24 | "search_autofocus": True, 25 | } 26 | return render(request, "index.html", context) 27 | -------------------------------------------------------------------------------- /archmanweb/views/listing.py: -------------------------------------------------------------------------------- 1 | import operator 2 | from functools import reduce 3 | 4 | from django.shortcuts import render 5 | from django.http import HttpResponse, Http404 6 | from django import forms 7 | from django.db.models import Q 8 | 9 | from ..models import Package, ManPage, SymbolicLink 10 | from ..utils import paginate 11 | from .search import SearchForm 12 | 13 | class ListingForm(SearchForm): 14 | # remove the "q" field 15 | q = None 16 | 17 | # add sorting fields 18 | sort_by = forms.ChoiceField( 19 | label="Sort by", 20 | help_text="Order the results by the specified field", 21 | choices=[("name", "Name"), ("section", "Section"), ("lang", "Language")], 22 | required=False, 23 | ) 24 | sort_order = forms.ChoiceField( 25 | label="Sort order", 26 | help_text="Order the results in the specified order", 27 | choices=[("asc", "Ascending"), ("desc", "Descending")], 28 | required=False, 29 | ) 30 | 31 | def listing(request, *, repo=None, pkgname=None): 32 | # move repo and pkgname from the URL path to the query string 33 | query = request.GET.copy() 34 | if repo is not None: 35 | query["repo"] = repo 36 | if pkgname is not None: 37 | query["pkgname"] = pkgname 38 | 39 | # init and validate the form 40 | listing_form = ListingForm(query) 41 | if not listing_form.is_valid(): 42 | return render(request, "listing.html", {"listing_form": listing_form}) 43 | 44 | # get parameters from the form 45 | repo = listing_form.cleaned_data["repo"] 46 | pkgname = listing_form.cleaned_data["pkgname"] 47 | section = listing_form.cleaned_data["section"] 48 | lang = listing_form.cleaned_data["lang"] 49 | sort_by = listing_form.cleaned_data["sort_by"] or "name" 50 | sort_order = listing_form.cleaned_data["sort_order"] or "asc" 51 | 52 | if sort_by == "name": 53 | sorting_columns = ("name", "lang", "section") 54 | elif sort_by == "section": 55 | sorting_columns = ("section", "name", "lang") 56 | elif sort_by == "lang": 57 | sorting_columns = ("lang", "name", "section") 58 | 59 | if sort_order == "desc": 60 | sorting_columns = ("-" + c for c in sorting_columns) 61 | 62 | db_pkg = None 63 | man_pages = ManPage.objects.order_by( *sorting_columns ) 64 | 65 | if pkgname: 66 | # check that such package exists 67 | if repo: 68 | query = Package.objects.filter(name=pkgname, repo__in=repo) 69 | else: 70 | query = Package.objects.filter(name=pkgname) 71 | if len(query) == 0: 72 | if len(repo) > 1: 73 | raise Http404("The package {} does not exist in the {} repositories.".format(pkgname, repo)) 74 | elif len(repo) == 1: 75 | raise Http404("The package {} does not exist in the {} repositoriy.".format(pkgname, repo[0])) 76 | else: 77 | raise Http404("The package {} does not exist in the database.".format(pkgname)) 78 | elif len(query) == 1: 79 | db_pkg = query[0] 80 | else: 81 | raise HttpResponse( 82 | "The package {} exists in multiple repositories ({}) and ambiguous listings are not implemented." 83 | .format(pkgname, ", ".join(pkg.repo for pkg in query)), 84 | status=501) 85 | man_pages = man_pages.filter(package__name=pkgname) 86 | elif repo: 87 | man_pages = man_pages.filter(package__repo__in=repo) 88 | if section: 89 | assert isinstance(section, list) 90 | section_parts = [] 91 | for q in section: 92 | # do prefix search only when given a single letter (e.g. "3p" should not match "3perl", "3python" etc.) 93 | # Note: section is matched case-insensitively due to 94 | # https://gitlab.archlinux.org/archlinux/archmanweb/-/issues/35 95 | if len(q) == 1: 96 | section_parts.append(Q(section__istartswith=q)) 97 | else: 98 | section_parts.append(Q(section__iexact=q)) 99 | section_filter = reduce(operator.__or__, section_parts) 100 | man_pages = man_pages.filter(section_filter) 101 | if lang: 102 | assert isinstance(lang, list) 103 | lang_filter = reduce(operator.__or__, 104 | (Q(lang__startswith=q) for q in lang)) 105 | man_pages = man_pages.filter(lang_filter) 106 | 107 | # list of symbolic links in a package 108 | if pkgname: 109 | symlinks_sorting_columns = [] 110 | for c in sorting_columns: 111 | if "name" in c: 112 | c = c.replace("name", "from_name") 113 | elif "section" in c: 114 | c = c.replace("section", "from_section") 115 | symlinks_sorting_columns.append(c) 116 | symlinks = SymbolicLink.objects.order_by( *symlinks_sorting_columns ).filter(package__name=pkgname) 117 | symlinks_count = SymbolicLink.objects.filter(package__name=pkgname).count() 118 | else: 119 | symlinks = [] 120 | symlinks_count = 0 121 | 122 | # template rendering time is dominated by the number of links, symlinks have 2 links per row 123 | if symlinks_count > 125: 124 | man_pages = paginate(request, "page", man_pages, 250) 125 | symlinks = paginate(request, "page_symlinks", symlinks, 125) 126 | else: 127 | man_pages = paginate(request, "page", man_pages, 500) 128 | symlinks = paginate(request, "page_symlinks", symlinks, 500) 129 | 130 | context = { 131 | "listing_form": listing_form, 132 | "pkg": db_pkg, 133 | "man_pages": man_pages, 134 | "symlinks": symlinks, 135 | } 136 | return render(request, "listing.html", context) 137 | -------------------------------------------------------------------------------- /archmanweb/views/man_page.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import pyalpm 3 | 4 | from django.shortcuts import render 5 | from django.http import HttpResponse, Http404, HttpResponseRedirect 6 | 7 | from ..models import ManPage, SymbolicLink, SoelimError 8 | from ..utils import reverse_man_url, search_url, extract_headings 9 | 10 | def _get_package_filter(repo, pkgname): 11 | if repo is None and pkgname is None: 12 | return {} 13 | elif repo is None: 14 | return {"package__name": pkgname} 15 | else: 16 | return {"package__name": pkgname, "package__repo": repo} 17 | 18 | # Maybe all these checks should include repo/pkgname when specified in the URL, 19 | # but this seems enough to parse the URL correctly. debiman actually only checks 20 | # if given section/lang is in some static set. 21 | def _exists_name_section(name, section): 22 | # Note: section is matched case-insensitively due to 23 | # https://gitlab.archlinux.org/archlinux/archmanweb/-/issues/35 24 | return ManPage.objects.filter(name=name, section__istartswith=section).exists() or \ 25 | SymbolicLink.objects.filter(from_name=name, from_section__istartswith=section).exists() 26 | 27 | def _exists_language(lang): 28 | # cross-language symlinks are not allowed 29 | return ManPage.objects.filter(lang=lang).exists() 30 | 31 | def _exists_name_language(name, lang): 32 | # cross-language symlinks are not allowed 33 | return ManPage.objects.filter(name=name, lang=lang).exists() 34 | 35 | def _exists_name_section_language(name, section, lang): 36 | # Note: section is matched case-insensitively due to 37 | # https://gitlab.archlinux.org/archlinux/archmanweb/-/issues/35 38 | return ManPage.objects.filter(name=name, section__istartswith=section, lang=lang).exists() or \ 39 | SymbolicLink.objects.filter(from_name=name, from_section__istartswith=section, lang=lang).exists() 40 | 41 | def _parse_man_name_section_lang(url_snippet, *, force_lang=None): 42 | # Man page names can contain dots, so we need to parse from the right. There are still 43 | # some ambiguities for shortcuts like gimp-2.8 (shortcut for gimp-2.8(1)), jclient.pl 44 | # (shortcut for jclient.pl.1.en) etc., but we'll either detect that the page given by 45 | # the greedy algorithm does not exist or the user can specify the section or language 46 | # to get the version they want. 47 | # NOTE: The force_lang parameter can be used to ignore the lang specified in the URL. 48 | # This is useful for redirections to the default language if we find out that there 49 | # is no version of the page in the user-specified language. 50 | parts = url_snippet.split(".") 51 | if len(parts) == 1: 52 | # name 53 | return url_snippet, None, None 54 | name = ".".join(parts[:-1]) 55 | # the last part can be a section or a language 56 | if _exists_name_section(name, parts[-1]): 57 | # any.name.section: language cannot come before section, so we're done 58 | return name, parts[-1], None 59 | elif len(parts) == 2: 60 | if force_lang is not None and not _exists_language(parts[-1]): 61 | # we still need to validate the input 62 | return url_snippet, None, None 63 | if _exists_name_language(name, force_lang or parts[-1]): 64 | # name.lang 65 | return name, None, force_lang or parts[-1] 66 | else: 67 | # dotted.name 68 | return url_snippet, None, None 69 | elif _exists_language(parts[-1]): 70 | name2 = ".".join(parts[:-2]) 71 | if _exists_name_section_language(name2, parts[-2], force_lang or parts[-1]): 72 | # name.section.lang 73 | return name2, parts[-2], force_lang or parts[-1] 74 | if _exists_name_language(name, force_lang or parts[-1]): 75 | # name.with.dots.lang 76 | return name, None, force_lang or parts[-1] 77 | # name.with.dots 78 | return url_snippet, None, None 79 | else: 80 | # name.with.dots 81 | return url_snippet, None, None 82 | 83 | def _get_section_key(section): 84 | # section sorting: 85 | # - based on mandoc: 1, 8, 6, 2, 3, 5, 7, 4, 9, 3p 86 | # - based on man-db: 1, n, l, 8, 3, 0, 2, 5, 4, 9, 6, 7 87 | order = ("1", "n", "l", "8", "6", "3", "0", "2", "5", "7", "4", "9") 88 | # sections in the list are ordered first 89 | if section in order: 90 | return (order.index(section), "") 91 | # sections which start with a letter in the list are sorted next 92 | # (following the same ordering of the first letter and lexical ordering of the rest) 93 | if section[0] in order: 94 | return (order.index(section[0]) + len(order), section[1:]) 95 | # other sections are ordered last (respecting the lexical order wrt each other) 96 | return (100, section) 97 | 98 | def _get_repo_key(repo): 99 | order = ("core", "extra", "community", "multilib", "testing", "community-testing", "multilib-testing") 100 | if repo in order: 101 | return (order.index(repo), "") 102 | return (len(order), repo) 103 | 104 | def _get_pkgver_key(version): 105 | # arguments of vercmp are swapped to order the highest version first 106 | key_getter = functools.cmp_to_key(lambda a, b: pyalpm.vercmp(b, a)) 107 | return key_getter(version) 108 | 109 | def _get_best_match(query, section="section"): 110 | # prefetch the package object so that we don't hit the db repeatedly while sorting 111 | # (we can fetch all matches and do the sorting in Python since there are not many 112 | # ambiguous cases) 113 | queryset = query.select_related("package").all() 114 | if len(queryset) == 0: 115 | return None 116 | 117 | # sorting for best match: section (custom order), repo (custom order), package version (vercmp) 118 | def sort_key(man): 119 | sec_key = _get_section_key(getattr(man, section)) 120 | repo_key = _get_repo_key(man.package.repo) 121 | pkgver_key = _get_pkgver_key(man.package.version) 122 | return (sec_key, repo_key, pkgver_key) 123 | 124 | queryset = sorted(queryset, key=sort_key) 125 | return queryset[0] 126 | 127 | def get_symlink(repo, pkgname, man_name, man_section, lang, output_type): 128 | if man_section is None: 129 | query = SymbolicLink.objects.filter(from_name=man_name, lang=lang, **_get_package_filter(repo, pkgname)) 130 | else: 131 | # Note: section is matched case-insensitively due to 132 | # https://gitlab.archlinux.org/archlinux/archmanweb/-/issues/35 133 | query = SymbolicLink.objects.filter(from_section__istartswith=man_section, from_name=man_name, lang=lang, **_get_package_filter(repo, pkgname)) 134 | return _get_best_match(query, "from_section") 135 | 136 | def try_redirect(repo, pkgname, man_name, man_section, lang, output_type, name_section_lang): 137 | symlink = get_symlink(repo, pkgname, man_name, man_section, lang, output_type) 138 | if symlink is not None: 139 | # repo and pkgname are not added, the target might be in a different package 140 | url = reverse_man_url("", "", symlink.to_name, symlink.to_section, symlink.lang, output_type) 141 | return HttpResponseRedirect(url) 142 | 143 | # Try the default language before using the fallback response. 144 | # This is important because we don't know if the user explicitly specified 145 | # the language or followed a link to a localized page, which does not exist. 146 | # 147 | # Note: if page "foo" does not exist in language "bar", we'll get "foo.bar" as the 148 | # man_name, so we need to re-parse the URL and force the default language. 149 | parsed_name, parsed_section, parsed_lang = _parse_man_name_section_lang(name_section_lang, force_lang="en") 150 | if (parsed_name != man_name or parsed_section != man_section) and parsed_lang == "en": 151 | url = reverse_man_url(repo, pkgname, parsed_name, parsed_section, "en", output_type) 152 | return HttpResponseRedirect(url) 153 | 154 | # this is used from the search view to redirect directly to the man page 155 | def quick_search(name_section_lang, *, repo=None, pkgname=None): 156 | man_name, man_section, url_lang = _parse_man_name_section_lang(name_section_lang) 157 | lang = url_lang or "en" 158 | 159 | # find the man page and package containing it 160 | if man_section is None: 161 | query = ManPage.objects.filter(name=man_name, lang=lang, **_get_package_filter(repo, pkgname)) 162 | else: 163 | # Note: section is matched case-insensitively due to 164 | # https://gitlab.archlinux.org/archlinux/archmanweb/-/issues/35 165 | query = ManPage.objects.filter(section__istartswith=man_section, name=man_name, lang=lang, **_get_package_filter(repo, pkgname)) 166 | db_man = _get_best_match(query) 167 | 168 | if db_man is None: 169 | return try_redirect(repo, pkgname, man_name, man_section, lang, "", name_section_lang) 170 | else: 171 | url = reverse_man_url(repo, pkgname, man_name, man_section, url_lang, "") 172 | return HttpResponseRedirect(url) 173 | 174 | def render_404(request, repo, pkgname, name_section_lang): 175 | # use naive splitting for the search URL parameters 176 | # (_parse_man_name_section_lang leaves everything in the first part when 177 | # the page does not exist. This is ambiguous since the section and lang 178 | # may get mixed up, but better than nothing.) 179 | parts = name_section_lang.rsplit(".", maxsplit=2) 180 | search_name = parts[0] 181 | search_section = None 182 | search_lang = None 183 | if len(parts) > 1: 184 | search_section = parts[1] 185 | if len(parts) > 2: 186 | search_lang = parts[2] 187 | 188 | context = { 189 | "repo": repo, 190 | "pkgname": pkgname, 191 | "name": name_section_lang, 192 | "search_url": search_url(search_name, section=search_section, lang=search_lang, repo=repo, pkgname=pkgname), 193 | } 194 | 195 | response = render(request, "man_404.html", context) 196 | response.status_code = 404 197 | return response 198 | 199 | def man_page(request, *, repo=None, pkgname=None, name_section_lang=None, url_output_type=None): 200 | # validate input parameters 201 | if repo is not None and pkgname is None: 202 | return HttpResponse("Specifying repo ({}) without a pkg name should not be allowed.".format(repo), status=500) 203 | if not name_section_lang: 204 | return HttpResponse("The name of the man page was not specified.", status=400) 205 | assert "/" not in name_section_lang 206 | man_name, man_section, url_lang = _parse_man_name_section_lang(name_section_lang) 207 | lang = url_lang or "en" 208 | serve_output_type = url_output_type or "html" 209 | if serve_output_type not in {"html", "txt", "raw"}: 210 | return HttpResponse("No data for the {} content type are available.".format(serve_output_type), status=400) 211 | 212 | # find the man page and package containing it 213 | if man_section is None: 214 | query = ManPage.objects.filter(name=man_name, lang=lang, **_get_package_filter(repo, pkgname)) 215 | else: 216 | # Note: section is matched case-insensitively due to 217 | # https://gitlab.archlinux.org/archlinux/archmanweb/-/issues/35 218 | query = ManPage.objects.filter(section__istartswith=man_section, name=man_name, lang=lang, **_get_package_filter(repo, pkgname)) 219 | db_man = _get_best_match(query) 220 | 221 | if db_man is None: 222 | response = try_redirect(repo, pkgname, man_name, man_section, lang, url_output_type, name_section_lang) 223 | if response: 224 | return response 225 | # page does not exist even in the default language, return a nice 404 page with 226 | # a link to the search form 227 | return render_404(request, repo, pkgname, name_section_lang) 228 | 229 | if man_section != db_man.section: 230 | # try a symlink and check if its section is a better match than the man section 231 | # (e.g. mailx.1 is a symlink to mail.1, which takes precedence over mailx.1p) 232 | db_symlink = get_symlink(repo, pkgname, man_name, man_section, lang, url_output_type) 233 | if db_symlink is not None and _get_section_key(db_symlink.from_section) < _get_section_key(db_man.section): 234 | # repo and pkgname are not added, the target might be in a different package 235 | url = reverse_man_url("", "", db_symlink.to_name, db_symlink.to_section, db_symlink.lang, url_output_type) 236 | return HttpResponseRedirect(url) 237 | # redirect if man_section is None or just a prefix 238 | url = reverse_man_url(repo, pkgname, man_name, db_man.section, url_lang, url_output_type) 239 | return HttpResponseRedirect(url) 240 | db_pkg = db_man.package 241 | 242 | if serve_output_type == "raw": 243 | return HttpResponse(db_man.content.raw, content_type="text/plain; charset=utf8") 244 | 245 | try: 246 | converted_content = db_man.get_converted(serve_output_type) 247 | except SoelimError as e: 248 | raise Http404("The requested manual contains a .so reference to an unknown file. The error is: {}".format(e)) 249 | 250 | if serve_output_type == "txt": 251 | return HttpResponse(converted_content, content_type="text/plain; charset=utf8") 252 | 253 | # links to other packages providing the same manual 254 | other_packages = [] 255 | query = ManPage.objects.values("package__repo", "package__name") \ 256 | .filter(section=db_man.section, name=man_name, lang=lang) \ 257 | .exclude(package__id=db_pkg.id) \ 258 | .union(SymbolicLink.objects.values("package__repo", "package__name") \ 259 | .filter(from_section=db_man.section, from_name=man_name, lang=lang) \ 260 | .exclude(package__id=db_pkg.id)) \ 261 | .order_by("package__repo", "package__name") 262 | for row in query: 263 | info = { 264 | "repo": row["package__repo"], 265 | "name": row["package__name"], 266 | } 267 | other_packages.append(info) 268 | 269 | # links to other languages - might lead to different package, even if the user specified repo or pkgname 270 | other_languages = set() 271 | query = ManPage.objects.values("lang") \ 272 | .filter(section=db_man.section, name=man_name) \ 273 | .exclude(lang=lang) \ 274 | .union(SymbolicLink.objects.values("lang") \ 275 | .filter(from_section=db_man.section, from_name=man_name) \ 276 | .exclude(lang=lang)) 277 | for row in query: 278 | other_languages.add(row["lang"]) 279 | 280 | # links to other sections - might lead to different package, even if the user specified repo or pkgname 281 | other_sections = set() 282 | query = ManPage.objects.values("section") \ 283 | .filter(name=man_name, lang=lang) \ 284 | .exclude(section=db_man.section) \ 285 | .union(SymbolicLink.objects.values("from_section") \ 286 | .filter(from_name=man_name, lang=lang) \ 287 | .exclude(from_section=db_man.section)) 288 | for row in query: 289 | other_sections.add(row["section"]) 290 | 291 | # this is pretty fast, no caching 292 | headings = extract_headings(converted_content) 293 | 294 | context = { 295 | "lang": lang, # used in base.html 296 | "url_repo": repo, 297 | "url_pkgname": pkgname, 298 | "url_lang": url_lang, 299 | "url_output_type": url_output_type, 300 | "pkg": db_pkg, 301 | "man": db_man, 302 | "man_page_content": converted_content, 303 | "headings": headings, 304 | "other_packages": other_packages, 305 | "other_languages": sorted(other_languages), 306 | "other_sections": sorted(other_sections), 307 | } 308 | 309 | return render(request, "man_page.html", context) 310 | -------------------------------------------------------------------------------- /archmanweb/views/search.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import operator 3 | from functools import reduce 4 | 5 | from django.shortcuts import render 6 | from django import forms 7 | from django.core.cache import cache 8 | from django.db.models import Q 9 | from django.contrib.postgres.search import TrigramSimilarity, SearchQuery, SearchVector, SearchHeadline, SearchRank 10 | 11 | from ..models import Package, Content, ManPage, SymbolicLink 12 | from ..utils import paginate 13 | from .man_page import quick_search 14 | 15 | class SearchForm(forms.Form): 16 | error_css_class = "form-error" 17 | required_css_class = "form-required" 18 | 19 | q = forms.CharField(label="Keywords", help_text="Enter search keywords") 20 | section = forms.MultipleChoiceField() 21 | lang = forms.MultipleChoiceField() 22 | repo = forms.MultipleChoiceField() 23 | pkgname = forms.CharField( 24 | label="Package name", 25 | help_text="Limit results to a specific package name", 26 | required=False 27 | ) 28 | 29 | # hidden field for quick search 30 | go = forms.CharField(widget=forms.HiddenInput(), required=False) 31 | 32 | def __init__(self, querydict, *args, **kwargs): 33 | super().__init__(querydict, *args, **kwargs) 34 | 35 | # cache common database queries: https://docs.djangoproject.com/en/3.1/topics/cache/#the-low-level-cache-api 36 | manpage_distinct_section = cache.get_or_set("ManPage:section:distinct", ManPage.objects.values_list("section", flat=True).distinct("section").order_by("section"), timeout=600) 37 | manpage_distinct_lang = cache.get_or_set("ManPage:lang:distinct", ManPage.objects.values_list("lang", flat=True).distinct("lang").order_by("lang"), timeout=600) 38 | package_distinct_repo = cache.get_or_set("Package:repo:distinct", Package.objects.values_list("repo", flat=True).distinct("repo").order_by("repo"), timeout=600) 39 | 40 | section_descriptions = { 41 | "1": "1 - General commands", 42 | "2": "2 - System calls", 43 | "3": "3 - Library functions", 44 | "4": "4 - Device files", 45 | "5": "5 - File formats", 46 | "6": "6 - Games", 47 | "7": "7 - Miscellaneous", 48 | "8": "8 - Privileged commands", 49 | "9": "9 - Kernel internals", 50 | } 51 | 52 | # django does not support dynamic assignments into the field instances, 53 | # so the whole fields have to be recreated from scratch 54 | self.fields["section"] = forms.MultipleChoiceField( 55 | label="Section", 56 | help_text="Limit results to a specific manual section or subsection", 57 | choices=[(r, section_descriptions.get(r, r)) for r in manpage_distinct_section], 58 | required=False, 59 | ) 60 | self.fields["lang"] = forms.MultipleChoiceField( 61 | label="Language", 62 | help_text="Limit results to a specific language", 63 | choices=[(r, r) for r in manpage_distinct_lang], 64 | required=False, 65 | ) 66 | self.fields["repo"] = forms.MultipleChoiceField( 67 | label="Repository", 68 | help_text="Limit results to a specific package repository", 69 | choices=[(r, r) for r in package_distinct_repo], 70 | required=False, 71 | ) 72 | 73 | def build_apropos_filter(q): 74 | def build_condition(key, value): 75 | # parse the Django syntax (hardcoded for current models) 76 | column, operation = key.rsplit("__", maxsplit=1) 77 | if column.startswith("package__"): 78 | column = column.split("__", maxsplit=1)[1] 79 | column = f"\"{Package.objects.model._meta.db_table}\".\"{column}\"" 80 | else: 81 | column = f"\"{ManPage.objects.model._meta.db_table}\".\"{column}\"" 82 | # select the correct operator 83 | if operation == "exact": 84 | op = "= %s::text" 85 | elif operation == "iexact": 86 | op = "= lower(%s::text)" 87 | column = f"lower({column})" 88 | elif operation == "in": 89 | op = "IN ({})".format(", ".join(["%s::text"] * len(value))) 90 | elif operation == "startswith": 91 | op = "~~ %s::text" 92 | value += "%" 93 | elif operation == "istartswith": 94 | op = "~~ lower(%s::text)" 95 | column = f"lower({column})" 96 | value += "%" 97 | else: 98 | raise NotImplementedError(f"Operation {operation} is not implemented for the apropos search.") 99 | # build the filter condition 100 | condition = f"{column} {op}" 101 | return condition, value 102 | 103 | conditions = [] 104 | values = [] 105 | for i in range(len(q.children)): 106 | if isinstance(q.children[i], Q): 107 | c, v = build_apropos_filter(q.children[i]) 108 | conditions.append(c) 109 | values += v 110 | continue 111 | key, value = q.children[i] 112 | condition, value = build_condition(key, value) 113 | conditions.append(condition) 114 | if isinstance(value, list): 115 | values += value 116 | else: 117 | values.append(value) 118 | 119 | condition = f" {q.connector} ".join("({})".format(c) for c in conditions) 120 | return condition, values 121 | 122 | # References: 123 | # - https://www.postgresql.org/docs/current/static/pgtrgm.html 124 | # - https://www.postgresql.org/docs/current/static/textsearch.html 125 | # - https://www.postgresql.org/docs/current/static/functions-textsearch.html 126 | # - https://www.postgresql.org/docs/current/static/textsearch-controls.html#textsearch-headline 127 | def search(request): 128 | search_form = SearchForm(request.GET) 129 | if not search_form.is_valid(): 130 | return render(request, "search.html", {"search_form": search_form}) 131 | 132 | term = search_form.cleaned_data["q"] 133 | filter_section = search_form.cleaned_data["section"] 134 | filter_lang = search_form.cleaned_data["lang"] 135 | filter_repo = search_form.cleaned_data["repo"] 136 | filter_pkgname = search_form.cleaned_data["pkgname"] 137 | 138 | # handle quick search 139 | go = search_form.cleaned_data["go"] 140 | if term and go == "Go" and len(filter_repo) <= 1: 141 | name_section_lang = term 142 | if filter_section: 143 | name_section_lang += filter_section 144 | if filter_lang: 145 | name_section_lang += filter_lang 146 | response = quick_search(repo=filter_repo[0] if len(filter_repo) == 1 else None, 147 | pkgname=filter_pkgname or None, 148 | name_section_lang=name_section_lang) 149 | if response: 150 | return response 151 | 152 | man_filter = Q() 153 | pkg_filter = Q() 154 | 155 | if filter_section: 156 | assert isinstance(filter_section, list) 157 | section_parts = [] 158 | for q in filter_section: 159 | # do prefix search only when given a single letter (e.g. "3p" should not match "3perl", "3python" etc.) 160 | # Note: section is matched case-insensitively due to 161 | # https://gitlab.archlinux.org/archlinux/archmanweb/-/issues/35 162 | if len(q) == 1: 163 | section_parts.append(Q(section__istartswith=q)) 164 | else: 165 | section_parts.append(Q(section__iexact=q)) 166 | man_filter &= reduce(operator.__or__, section_parts) 167 | if filter_lang: 168 | assert isinstance(filter_lang, list) 169 | man_filter &= reduce(operator.__or__, 170 | (Q(lang__startswith=q) for q in filter_lang)) 171 | if filter_repo: 172 | assert isinstance(filter_repo, list) 173 | man_filter &= Q(package__repo__in=filter_repo) 174 | pkg_filter &= Q(repo__in=filter_repo) 175 | if filter_pkgname: 176 | man_filter &= Q(package__name__iexact=filter_pkgname) 177 | pkg_filter &= Q(name__iexact=filter_pkgname) 178 | 179 | # this is only because we cannot use .annotate() inside the union (Django would add another column) 180 | symlink_filter = copy.deepcopy(man_filter) 181 | def build_symlink_filter(q): 182 | for i in range(len(q.children)): 183 | if isinstance(q.children[i], Q): 184 | build_symlink_filter(q.children[i]) 185 | continue 186 | key, value = q.children[i] 187 | if key.startswith("section__"): 188 | key = "from_" + key 189 | q.children[i] = (key, value) 190 | build_symlink_filter(symlink_filter) 191 | 192 | man_results = ManPage.objects.values("name", "section", "lang", "package__repo", "package__name") \ 193 | .filter(name__trigram_similar=term).filter(man_filter) \ 194 | .annotate(similarity=TrigramSimilarity("name", term)) \ 195 | .union(SymbolicLink.objects.values("from_name", "from_section", "lang", "package__repo", "package__name") 196 | .filter(from_name__trigram_similar=term).filter(symlink_filter) 197 | .annotate(similarity=TrigramSimilarity("from_name", term)), 198 | all=True) \ 199 | .order_by("-similarity", "name", "section", "lang", "package__name", "package__repo") 200 | 201 | # full-text search objects: https://docs.djangoproject.com/en/3.1/ref/contrib/postgres/search/ 202 | ts_query = SearchQuery(term) 203 | ts_vector = SearchVector("description", config="english") 204 | ts_headline = SearchHeadline("description", ts_query, start_sel="<b>", stop_sel="</b>") 205 | #ts_rank = SearchRank(ts_vector, ts_query, normalization=32) 206 | ts_sim_rank = TrigramSimilarity("name", term) + 2 * SearchRank(ts_vector, ts_query, normalization=32) 207 | 208 | # get table names for the models (needed for raw SQL) 209 | package_table = Package.objects.model._meta.db_table 210 | content_table = Content.objects.model._meta.db_table 211 | manpage_table = ManPage.objects.model._meta.db_table 212 | 213 | # build the WHERE clause (ugh) 214 | apropos_filter_conditions, apropos_filter_values = build_apropos_filter(man_filter) 215 | if apropos_filter_conditions: 216 | apropos_filter = f"WHERE {apropos_filter_conditions}" 217 | else: 218 | apropos_filter = "" 219 | 220 | # For the search in man page descriptions ("apropos") we need to perform a raw SQL query, 221 | # because it is not possible to express the same query with Django ORM. 222 | # Notes: 223 | # - the subquery (i.e. INNER JOIN (...) AS subquery) is necessary for good performance 224 | # - INNER JOIN instead of LEFT OUTER JOIN is needed on the subquery, otherwise PostgreSQL 225 | # will not use the GIN index 226 | # - WITH is used for convenience to avoid repeating the ts_rank expression in the WHERE clause 227 | # https://www.postgresql.org/docs/current/queries-with.html 228 | content_results = f""" 229 | WITH content_search AS ( 230 | SELECT "{content_table}"."id", 231 | ts_headline("{content_table}"."description", plainto_tsquery(%s), 'StartSel=''<b>'', StopSel=''</b>''') AS "desc_snippet", 232 | ts_rank(to_tsvector('english'::regconfig, COALESCE("{content_table}"."description", '')), plainto_tsquery(%s), 32) AS "rank", 233 | to_tsvector('english'::regconfig, COALESCE("{content_table}"."description", '')) AS "search" 234 | FROM "{content_table}" 235 | ) 236 | SELECT * 237 | FROM "content_search" WHERE "search" @@ plainto_tsquery(%s) AND "rank" > 0.001""" 238 | apropos_results = ManPage.objects.raw(f""" 239 | SELECT "{manpage_table}"."id", 240 | "{manpage_table}"."name", 241 | "{manpage_table}"."section", 242 | "{manpage_table}"."lang", 243 | "{package_table}"."repo" AS "package__repo", 244 | "{package_table}"."name" AS "package__name", 245 | "desc_snippet", 246 | "rank" 247 | FROM "{manpage_table}" INNER JOIN "{package_table}" ON ("{manpage_table}"."package_id" = "{package_table}"."id") 248 | INNER JOIN ({content_results}) AS subquery ON ("{manpage_table}"."converted_content_id" = "subquery"."id") 249 | {apropos_filter} 250 | ORDER BY "rank" DESC, "{manpage_table}"."name" ASC, "{manpage_table}"."section" ASC, "{manpage_table}"."lang" ASC, "package__name" ASC, "package__repo" ASC""", 251 | [term, term, term] + apropos_filter_values) 252 | # NOTE: Some other things that were tried with Django ORM (as of Django 3.1): 253 | # 1. We could do this if we did not need a subquery (this works, but is slow): 254 | # apropos_results = ManPage.objects.values("name", "section", "lang", "package__repo", "package__name", "converted_content__description").extra( 255 | # select={ 256 | # "desc_snippet": f"ts_headline('english', COALESCE({content_table}.description, ''), plainto_tsquery(%s))", 257 | # "rank": f"ts_rank(to_tsvector('english', COALESCE({content_table}.description, '')), plainto_tsquery(%s), 32)", 258 | # }, 259 | # where=[f"to_tsvector('english', COALESCE({content_table}.description, '')) @@ plainto_tsquery(%s)"], 260 | # params=[term], 261 | # select_params=[term, term], 262 | # order_by=("-rank", "name", "section", "lang", "package__name", "package__repo"), 263 | # ) 264 | # 265 | # 2. A mostly equivalent query in pure Django ORM syntax (better parametrization, still no subquery, same performance): 266 | # from django.db.models import F 267 | # apropos_results = ManPage.objects.values("name", "section", "lang", "package__repo", "package__name", "converted_content__description") \ 268 | # .annotate(description=F("converted_content__description")) \ 269 | # .annotate(desc_snippet=ts_headline) \ 270 | # .annotate(rank=ts_rank) \ 271 | # .annotate(search=ts_vector) \ 272 | # .filter(search=ts_query) \ 273 | # .order_by("-rank", "name", "section", "lang", "package__name", "package__repo") 274 | # 3. We can define the subquery like this, but the real question is how to use it: 275 | # content_results = Content.objects.only("id") \ 276 | # .annotate(desc_snippet=ts_headline) \ 277 | # .annotate(rank=ts_rank) \ 278 | # .annotate(search=ts_vector) \ 279 | # .filter(search=ts_query) 280 | # Also note that we can't use the subquery even in the plain-text for a raw SQL query, 281 | # because the ".query" attribute strips '' from the COALESCE function. [WTF!!!] 282 | # 3a) Django supports subqueries like this: https://docs.djangoproject.com/en/3.1/ref/models/expressions/#subquery-expressions 283 | # SELECT "post"."id", ( 284 | # SELECT U0."email" 285 | # FROM "comment" U0 286 | # WHERE U0."post_id" = ("post"."id") 287 | # ORDER BY U0."created_at" DESC LIMIT 1 288 | # ) AS "newest_commenter_email" FROM "post" 289 | # But this is not applicable here, because the subquery *must* return exactly one column 290 | # (otherwise it is an SQL syntax error). Anyway, the code (which does not work) would 291 | # be more or less like this: 292 | # from django.db.models import OuterRef, Subquery 293 | # content_results = Content.objects.only("id") \ 294 | # .annotate(desc_snippet=ts_headline) \ 295 | # .annotate(rank=ts_rank) \ 296 | # .annotate(search=ts_vector) \ 297 | # .filter(Q(search=ts_query) & Q(id=OuterRef("converted_content_id"))) # this is basically the join condition 298 | # apropos_results = ManPage.objects.values("name", "section", "lang", "package__repo", "package__name") \ 299 | # .annotate(content_subquery=Subquery(content_results)) \ 300 | # .order_by("-rank", "name", "section", "lang", "package__name", "package__repo") 301 | # 3b) Django supports joins with simple subqueries via FilteredRelation objects, but it 302 | # does not work with arbitrary subqueries, especially subqueries which add additional 303 | # columns (like our "desc_snippet" and "rank"). 304 | # https://docs.djangoproject.com/en/3.1/ref/models/querysets/#filteredrelation-objects 305 | 306 | # Note: the "Q" objects allow more complicated expressions in the filter: 307 | # https://docs.djangoproject.com/en/3.1/topics/db/queries/#complex-lookups-with-q 308 | pkg_results = Package.objects.only("repo", "name") \ 309 | .annotate(desc_snippet=ts_headline) \ 310 | .annotate(rank=ts_sim_rank) \ 311 | .annotate(search=ts_vector) \ 312 | .filter(pkg_filter) \ 313 | .filter(Q(name__trigram_similar=term) | Q(search=ts_query)) \ 314 | .order_by("-rank", "name", "repo") 315 | 316 | man_results = paginate(request, "page_man", man_results, 20) 317 | apropos_results = paginate(request, "page_apropos", apropos_results, 20) 318 | pkg_results = paginate(request, "page_pkg", pkg_results, 20) 319 | 320 | context = { 321 | "search_form": search_form, 322 | "man_results": man_results, 323 | "apropos_results": apropos_results, 324 | "pkg_results": pkg_results, 325 | } 326 | 327 | return render(request, "search.html", context) 328 | -------------------------------------------------------------------------------- /local_settings.py.example: -------------------------------------------------------------------------------- 1 | ## Reference: https://docs.djangoproject.com/en/3.1/howto/deployment/checklist/ 2 | 3 | ## Import the common settings, which may be overridden in this file. 4 | from settings import * 5 | 6 | DEBUG = False 7 | 8 | ## Make this unique, and don't share it with anybody. 9 | SECRET_KEY = '00000000000000000000000000000000000000000000000' 10 | 11 | ## Must not be empty when DEBUG is False 12 | ALLOWED_HOSTS = [] 13 | 14 | 15 | ## PostgreSQL database settings 16 | #DATABASES = { 17 | # 'default': { 18 | # 'ENGINE': 'django.db.backends.postgresql', 19 | # 'NAME': 'archmanweb', 20 | # 'USER': 'archmanweb', 21 | # 'PASSWORD': 'secret', 22 | # } 23 | #} 24 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | 6 | if __name__ == "__main__": 7 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "local_settings") 8 | try: 9 | from django.core.management import execute_from_command_line 10 | except ImportError: 11 | # The above import may fail for some other reason. Ensure that the 12 | # issue is really that Django is missing to avoid masking other 13 | # exceptions on Python 2. 14 | try: 15 | import django 16 | except ImportError: 17 | raise ImportError( 18 | "Couldn't import Django. Are you sure it's installed and " 19 | "available on your PYTHONPATH environment variable? Did you " 20 | "forget to activate a virtual environment?" 21 | ) 22 | raise 23 | execute_from_command_line(sys.argv) 24 | -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | DEBUG = False 4 | 5 | # Full path to the base project directory 6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 7 | 8 | INSTALLED_APPS = [ 9 | "django.contrib.staticfiles", 10 | "django.contrib.postgres", 11 | "archmanweb", 12 | ] 13 | 14 | # https://docs.djangoproject.com/en/3.1/topics/http/middleware/ 15 | MIDDLEWARE = [ 16 | # https://docs.djangoproject.com/en/3.1/ref/middleware/#django.middleware.common.CommonMiddleware 17 | "django.middleware.common.CommonMiddleware", 18 | # https://docs.djangoproject.com/en/3.1/ref/csrf/ 19 | "django.middleware.csrf.CsrfViewMiddleware", 20 | # https://docs.djangoproject.com/en/3.1/ref/clickjacking/ 21 | "django.middleware.clickjacking.XFrameOptionsMiddleware", 22 | # https://docs.djangoproject.com/en/3.1/ref/middleware/#django.middleware.security.SecurityMiddleware 23 | "django.middleware.security.SecurityMiddleware", 24 | # https://django-csp.readthedocs.io/en/latest/ 25 | "csp.middleware.CSPMiddleware", 26 | ] 27 | 28 | # Referrer Policy 29 | SECURE_REFERRER_POLICY = 'no-referrer-when-downgrade' 30 | 31 | # X-XSS-Protection, enables cross-site scripting filter in most browsers 32 | SECURE_BROWSER_XSS_FILTER = True 33 | 34 | # CSP Settings 35 | CSP_DEFAULT_SRC = ("'self'",) 36 | CSP_SCRIPT_SRC = ("'none'",) 37 | CSP_IMG_SRC = ("'self'",) 38 | CSP_BASE_URI = ("'none'",) 39 | CSP_FORM_ACTION = ("'self'",) 40 | CSP_FRAME_ANCESTORS = ("'none'",) 41 | 42 | # Base of the URL hierarchy 43 | ROOT_URLCONF = "urls" 44 | 45 | # requires CommonMiddleware 46 | APPEND_SLASH = True 47 | 48 | # URL to serve static files 49 | STATIC_URL = "/static/" 50 | 51 | # Location to collect static files 52 | STATIC_ROOT = os.path.join(BASE_DIR, "collected_static") 53 | 54 | # Look for more static files in these locations 55 | # (use a tuple to keep the directory namespaced) 56 | # https://docs.djangoproject.com/en/3.1/ref/settings/#prefixes-optional 57 | STATICFILES_DIRS = ( 58 | ("archlinux-common", os.path.join(BASE_DIR, "archlinux-common-style/css")), 59 | ("archlinux-common", os.path.join(BASE_DIR, "archlinux-common-style/img")), 60 | ) 61 | 62 | # Static files backend that appends the MD5 hash of the file’s content to the filename 63 | # (this allows us to use far future Expires headers) 64 | STATICFILES_STORAGE = "django.contrib.staticfiles.storage.ManifestStaticFilesStorage" 65 | 66 | # Internationalization 67 | # https://docs.djangoproject.com/en/3.1/topics/i18n/ 68 | LANGUAGE_CODE = "en-us" 69 | TIME_ZONE = "UTC" 70 | USE_I18N = False 71 | USE_L10N = False 72 | USE_TZ = True 73 | 74 | TEMPLATES = [ 75 | { 76 | "BACKEND": "django.template.backends.django.DjangoTemplates", 77 | "DIRS": [], 78 | "APP_DIRS": True, 79 | "OPTIONS": { 80 | "debug": DEBUG, 81 | "context_processors": [ 82 | "django.template.context_processors.debug", 83 | "django.template.context_processors.request", 84 | ], 85 | }, 86 | }, 87 | ] 88 | -------------------------------------------------------------------------------- /urls.py: -------------------------------------------------------------------------------- 1 | from django.urls import include, path 2 | 3 | urlpatterns = [ 4 | path("", include("archmanweb.urls")), 5 | ] 6 | -------------------------------------------------------------------------------- /wsgi.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "local_settings") 3 | 4 | from django.core.wsgi import get_wsgi_application 5 | application = get_wsgi_application() 6 | --------------------------------------------------------------------------------