├── .gitignore ├── CHANGELOG.rst ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── UPGRADING.rst ├── contrib ├── gitolite │ ├── get-grok-manifest.command │ └── grok-get-gl-manifest.sh ├── grok-fsck@.service ├── grok-fsck@.timer ├── grok-pull@.service ├── logrotate ├── pubsubv1.py ├── python-grokmirror.spec ├── ref-updated └── selinux │ └── el7 │ ├── grokmirror.fc │ └── grokmirror.te ├── grokmirror.conf ├── grokmirror ├── __init__.py ├── bundle.py ├── dumb_pull.py ├── fsck.py ├── manifest.py ├── pi_indexer.py ├── pi_piper.py └── pull.py ├── man ├── grok-bundle.1 ├── grok-bundle.1.rst ├── grok-dumb-pull.1 ├── grok-dumb-pull.1.rst ├── grok-fsck.1 ├── grok-fsck.1.rst ├── grok-manifest.1 ├── grok-manifest.1.rst ├── grok-pi-indexer.1 ├── grok-pi-indexer.1.rst ├── grok-pi-piper.1 ├── grok-pi-piper.1.rst ├── grok-pull.1 └── grok-pull.1.rst ├── pi-piper.conf ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.pyc 3 | *.swp 4 | *.pdf 5 | *~ 6 | dist 7 | build 8 | *.egg-info 9 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | v2.1.0 (TBD) 2 | ------------ 3 | - Add new hook post_work_complete_hook that fires after all work is 4 | complete and grokmirror goes idle 5 | - Add new command grok-pi-indexer for indexing public-inbox mirrored 6 | repositories 7 | 8 | v2.0.9 (2021-07-13) 9 | ------------------- 10 | - Add initial support for post_clone_complete_hook that fires only after 11 | all new clones have been completed. 12 | - Fix grok-manifest traceback due to unicode errors in the repo 13 | description file. 14 | - Minor code cleanups. 15 | 16 | v2.0.8 (2021-03-11) 17 | ------------------- 18 | - Fixes around symlink handling in manifest files. Adding and deleting 19 | symlinks should properly work again. 20 | - Don't require [fsck] section in the config file (though you'd almost 21 | always want it there). 22 | 23 | v2.0.7 (2021-01-19) 24 | ------------------- 25 | - A slew of small fixes improving performance on very large repository 26 | collections (CAF internally is 32,500). 27 | 28 | v2.0.6 (2021-01-07) 29 | ------------------- 30 | - Use fsck.extra_repack_flags when doing quick post-clone repacks 31 | - Store objects in objstore after grok-dumb-pull call on a repo that uses 32 | objstore repositories 33 | 34 | v2.0.5 (2020-11-25) 35 | ------------------- 36 | - Prioritize baseline repositories when finding related objstore repos. 37 | - Minor fixes. 38 | 39 | v2.0.4 (2020-11-06) 40 | ------------------- 41 | - Add support to use git plumbing for objstore operations, via enabling 42 | core.objstore_uses_plumbing. This allows to significantly speed up 43 | fetching objects into objstore during pull operations. Fsck operations 44 | will continue to use porcelain "git fetch", since speed is less important 45 | in those cases and it's best to opt for maximum safety. As a benchmark, 46 | with remote.preload_bundle_url and core.objstore_uses_plumbing settings 47 | enabled, cloning a full replica of git.kernel.org takes less than an hour 48 | as opposed to over a day. 49 | 50 | v2.0.3 (2020-11-04) 51 | ------------------- 52 | - Refuse to delete ffonly repos 53 | - Add new experimental bundle_preload feature for generating objstore 54 | repo bundles and using them to preload objstores on the mirrors 55 | 56 | v2.0.2 (2020-10-06) 57 | ------------------- 58 | - Provide pi-piper utility for piping new messages from public-inbox 59 | repositories. It can be specified as post_update_hook: 60 | post_update_hook = /usr/bin/grok-pi-piper -c ~/.config/pi-piper.conf 61 | - Add -r option to grok-manifest to ignore specific refs when calculating 62 | repository fingerprint. This is mostly useful for mirroring from gerrit. 63 | 64 | v2.0.1 (2020-09-30) 65 | ------------------- 66 | - fix potential corruption when migrating repositories with existing 67 | alternates to new object storage format 68 | - improve grok-fsck console output to be less misleading for large repo 69 | collections (was misreporting obstrepo/total repo numbers) 70 | - use a faster repo search algorithm that doesn't needlessly recurse 71 | into git repos themselves, once found 72 | 73 | 74 | v2.0.0 (2020-09-21) 75 | ------------------- 76 | Major rewrite to improve shared object storage and replication for VERY 77 | LARGE repository collections (codeaurora.org is ~30,000 repositories, 78 | which are mostly various forks of Android). 79 | 80 | See UPGRADING.rst for the upgrade strategy. 81 | 82 | Below are some major highlights. 83 | 84 | - Drop support for python < 3.6 85 | - Introduce "object storage" repositories that benefit from git-pack 86 | delta islands and improve overall disk storage footprint (depending on 87 | the number of forks). 88 | - Drop dependency on GitPython, use git calls directly for all operations 89 | - Remove progress bars to slim down dependencies (drops enlighten) 90 | - Make grok-pull operate in daemon mode (with -o) (see contrib for 91 | systemd unit files). This is more efficient than the cron mode when 92 | run very frequently. 93 | - Provide a socket listener for pubsub push updates (see contrib for 94 | Google pubsubv1.py). 95 | - Merge fsck.conf and repos.conf into a single config file. This 96 | requires creating a new configuration file after the upgrade. See 97 | UPGRADING.rst for details. 98 | - Record and propagate HEAD position using the manifest file. 99 | - Add grok-bundle command to create clone.bundle files for CDN-offloaded 100 | cloning (mostly used by Android's repo command). 101 | - Add SELinux policy for EL7 (see contrib). 102 | 103 | 104 | v1.2.2 (2019-10-23) 105 | ------------------- 106 | - Small bugfixes 107 | - Generate commit-graph file if the version of git is new 108 | enough to support it. This is done during grok-fsck any time we 109 | decide that the repository needs to be repacked. You can force 110 | this off by setting commitgraph=never in config. 111 | 112 | 113 | v1.2.1 (2019-03-11) 114 | ------------------- 115 | - Minor feature improvement changing how precious=yes works. 116 | Grokmirror will now turn preciousObjects off for the duration 117 | of the repack. We still protect shared repositories against 118 | inadvertent object pruning by outside processes, but this 119 | allows us to clean up loose objects and obsolete packs. 120 | To have the 1.2.0 behaviour back, set precious=always, but it 121 | is only really useful in very rare cases. 122 | 123 | 124 | v1.2.0 (2019-02-14) 125 | ------------------- 126 | - Make sure to set gc.auto=0 on repositories to avoid pruning repos 127 | that are acting as alternates to others. We run our own prune 128 | during fsck, so there is no need to auto-gc, ever (unless you 129 | didn't set up grok-fsck, in which case you're not doing it right). 130 | - Rework the repack code to be more clever -- instead of repacking 131 | based purely on dates, we now track the number of loose objects 132 | and the number of generated packs. Many of the settings are 133 | hardcoded for the moment while testing, but will probably end up 134 | settable via global and per-repository config settings. 135 | - The following fsck.conf settings have no further effect: 136 | - repack_flags (replaced with extra_repack_flags) 137 | - full_repack_flags (replaced with extra_repack_flags_full) 138 | - full_repack_every (we now figure it out ourselves) 139 | - Move git command invocation routines into a central function to 140 | reduce the amount of code duplication. You can also set the path 141 | to the git binary using the GITBIN env variable or by simply 142 | adding it to your path. 143 | - Add "reclone_on_errors" setting in fsck.conf. If fsck/repack/prune 144 | comes across a matching error, it will mark the repository for 145 | recloning and it will be cloned anew from the master the next time 146 | grok-pull runs. This is useful for auto-correcting corruption on the 147 | mirrors. You can also manually request a reclone by creating a 148 | "grokmirror.reclone" file in a repository. 149 | - Set extensions.preciousObjects for repositories used with git 150 | alternates if precious=yes is set in fsck.conf. This helps further 151 | protect shared repos from erroneous pruning (e.g. done manually by 152 | an administrator). 153 | 154 | 155 | v1.1.1 (2018-07-25) 156 | ------------------- 157 | - Quickfix a bug that was causing repositories to never be repacked 158 | due to miscalculated fingerprints. 159 | 160 | 161 | v1.1.0 (2018-04-24) 162 | ------------------- 163 | - Make Python3 compatible (thanks to QuLogic for most of the work) 164 | - Rework grok-fsck to improve functionality: 165 | 166 | - run repack and prune before fsck, for optimal safety 167 | - add --connectivity flag to run fsck with --connectivity-only 168 | - add --repack-all-quick to trigger a quick repack of all repos 169 | - add --repack-all-full to trigger a full repack of all repositories 170 | using the defined full_repack_flags from fsck.conf 171 | - always run fsck with --no-dangling, because mirror admins are not 172 | responsible for cleaning those up anyway 173 | - no longer locking repos when running repack/prune/fsck, because 174 | these operations are safe as long as they are done by git itself 175 | 176 | - fix grok-pull so it no longer purges repos that are providing 177 | alternates to others 178 | - fix grok-fsck so it's more paranoid when pruning repos providing 179 | alternates to others (checks all repos on disk, not just manifest) 180 | - in verbose mode, most commands will draw progress bars (handy with 181 | very large connections of repositories) 182 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | include *.rst 3 | include *.conf 4 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | GROKMIRROR 2 | ========== 3 | -------------------------------------------- 4 | Framework to smartly mirror git repositories 5 | -------------------------------------------- 6 | 7 | :Author: konstantin@linuxfoundation.org 8 | :Date: 2020-09-18 9 | :Copyright: The Linux Foundation and contributors 10 | :License: GPLv3+ 11 | :Version: 2.0.0 12 | 13 | DESCRIPTION 14 | ----------- 15 | Grokmirror was written to make replicating large git repository 16 | collections more efficient. Grokmirror uses the manifest file published 17 | by the origin server in order to figure out which repositories to clone, 18 | and to track which repositories require updating. The process is 19 | lightweight and efficient both for the primary and for the replicas. 20 | 21 | CONCEPTS 22 | -------- 23 | The origin server publishes a json-formatted manifest file containing 24 | information about all git repositories that it carries. The format of 25 | the manifest file is as follows:: 26 | 27 | { 28 | "/path/to/bare/repository.git": { 29 | "description": "Repository description", 30 | "head": "ref: refs/heads/branchname", 31 | "reference": "/path/to/reference/repository.git", 32 | "forkgroup": "forkgroup-guid", 33 | "modified": timestamp, 34 | "fingerprint": sha1sum(git show-ref), 35 | "symlinks": [ 36 | "/location/to/symlink", 37 | ... 38 | ], 39 | } 40 | ... 41 | } 42 | 43 | The manifest file is usually gzip-compressed to preserve bandwidth. 44 | 45 | Each time a commit is made to one of the git repositories, it 46 | automatically updates the manifest file using an appropriate git hook, 47 | so the manifest.js file should always contain the most up-to-date 48 | information about the state of all repositories. 49 | 50 | The mirroring clients will poll the manifest.js file and download the 51 | updated manifest if it is newer than the locally stored copy (using 52 | ``Last-Modified`` and ``If-Modified-Since`` http headers). After 53 | downloading the updated manifest.js file, the mirrors will parse it to 54 | find out which repositories have been updated and which new repositories 55 | have been added. 56 | 57 | Object Storage Repositories 58 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 59 | Grokmirror 2.0 introduces the concept of "object storage repositories", 60 | which aims to optimize how repository forks are stored on disk and 61 | served to the cloning clients. 62 | 63 | When grok-fsck runs, it will automatically recognize related 64 | repositories by analyzing their root commits. If it finds two or more 65 | related repositories, it will set up a unified "object storage" repo and 66 | fetch all refs from each related repository into it. 67 | 68 | For example, you can have two forks of linux.git: 69 | torvalds/linux.git: 70 | refs/heads/master 71 | refs/tags/v5.0-rc3 72 | ... 73 | 74 | and its fork: 75 | 76 | maintainer/linux.git: 77 | refs/heads/master 78 | refs/heads/devbranch 79 | refs/tags/v5.0-rc3 80 | ... 81 | 82 | Grok-fsck will set up an object storage repository and fetch all refs from 83 | both repositories: 84 | 85 | objstore/[random-guid-name].git 86 | refs/virtual/[sha1-of-torvalds/linux.git:12]/heads/master 87 | refs/virtual/[sha1-of-torvalds/linux.git:12]/tags/v5.0-rc3 88 | ... 89 | refs/virtual/[sha1-of-maintainer/linux.git:12]/heads/master 90 | refs/virtual/[sha1-of-maintainer/linux.git:12]/heads/devbranch 91 | refs/virtual/[sha1-of-maintainer/linux.git:12]/tags/v5.0-rc3 92 | ... 93 | 94 | Then both torvalds/linux.git and maintainer/linux.git with be configured 95 | to use objstore/[random-guid-name].git via objects/info/alternates 96 | and repacked to just contain metadata and no objects. 97 | 98 | The alternates repository will be repacked with "delta islands" enabled, 99 | which should help optimize clone operations for each "sibling" 100 | repository. 101 | 102 | Please see the example grokmirror.conf for more details about 103 | configuring objstore repositories. 104 | 105 | 106 | ORIGIN SETUP 107 | ------------ 108 | Install grokmirror on the origin server using your preferred way. 109 | 110 | **IMPORTANT: Only bare git repositories are supported.** 111 | 112 | You will need to add a hook to each one of your repositories that would 113 | update the manifest upon repository modification. This can either be a 114 | post-receive hook, or a post-update hook. The hook must call the 115 | following command:: 116 | 117 | /usr/bin/grok-manifest -m /var/www/html/manifest.js.gz \ 118 | -t /var/lib/gitolite3/repositories -n `pwd` 119 | 120 | The **-m** flag is the path to the manifest.js file. The git process 121 | must be able to write to it and to the directory the file is in (it 122 | creates a manifest.js.randomstring file first, and then moves it in 123 | place of the old one for atomicity). 124 | 125 | The **-t** flag is to help grokmirror trim the irrelevant toplevel disk 126 | path, so it is trimmed from the top. 127 | 128 | The **-n** flag tells grokmirror to use the current timestamp instead of 129 | the exact timestamp of the commit (much faster this way). 130 | 131 | Before enabling the hook, you will need to generate the manifest.js of 132 | all your git repositories. In order to do that, run the same command, 133 | but omit the -n and the \`pwd\` argument. E.g.:: 134 | 135 | /usr/bin/grok-manifest -m /var/www/html/manifest.js.gz \ 136 | -t /var/lib/gitolite3/repositories 137 | 138 | The last component you need to set up is to automatically purge deleted 139 | repositories from the manifest. As this can't be added to a git hook, 140 | you can either run the ``--purge`` command from cron:: 141 | 142 | /usr/bin/grok-manifest -m /var/www/html/manifest.js.gz \ 143 | -t /var/lib/gitolite3/repositories -p 144 | 145 | Or add it to your gitolite's ``D`` command using the ``--remove`` flag:: 146 | 147 | /usr/bin/grok-manifest -m /var/www/html/manifest.js.gz \ 148 | -t /var/lib/gitolite3/repositories -x $repo.git 149 | 150 | If you would like grok-manifest to honor the ``git-daemon-export-ok`` 151 | magic file and only add to the manifest those repositories specifically 152 | marked as exportable, pass the ``--check-export-ok`` flag. See 153 | ``git-daemon(1)`` for more info on ``git-daemon-export-ok`` file. 154 | 155 | You will need to have some kind of httpd server to serve the manifest 156 | file. 157 | 158 | REPLICA SETUP 159 | ------------- 160 | Install grokmirror on the replica using your preferred way. 161 | 162 | Locate grokmirror.conf and modify it to reflect your needs. The default 163 | configuration file is heavily commented to explain what each option 164 | does. 165 | 166 | Make sure the user "mirror" (or whichever user you specified) is able to 167 | write to the toplevel and log locations specified in grokmirror.conf. 168 | 169 | You can either run grok-pull manually, from cron, or as a 170 | systemd-managed daemon (see contrib). If you do it more frequently than 171 | once every few hours, you should definitely run it as a daemon in order 172 | to improve performance. 173 | 174 | GROK-FSCK 175 | --------- 176 | Git repositories should be routinely repacked and checked for 177 | corruption. This utility will perform the necessary optimizations and 178 | report any problems to the email defined via fsck.report_to ('root' by 179 | default). It should run weekly from cron or from the systemd timer (see 180 | contrib). 181 | 182 | Please examine the example grokmirror.conf file for various things you 183 | can tweak. 184 | 185 | FAQ 186 | --- 187 | Why is it called "grok mirror"? 188 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 189 | Because it's developed at kernel.org and "grok" is a mirror of "korg". 190 | Also, because it groks git mirroring. 191 | 192 | Why not just use rsync? 193 | ~~~~~~~~~~~~~~~~~~~~~~~ 194 | Rsync is extremely inefficient for the purpose of mirroring git trees 195 | that mostly consist of a lot of small files that very rarely change. 196 | Since rsync must calculate checksums on each file during each run, it 197 | mostly results in a lot of disk thrashing. 198 | 199 | Additionally, if several repositories share objects between each-other, 200 | unless the disk paths are exactly the same on both the remote and local 201 | mirror, this will result in broken git repositories. 202 | 203 | It is also a bit silly, considering git provides its own extremely 204 | efficient mechanism for specifying what changed between revision X and 205 | revision Y. 206 | -------------------------------------------------------------------------------- /UPGRADING.rst: -------------------------------------------------------------------------------- 1 | Upgrading from Grokmirror 1.x to 2.x 2 | ------------------------------------ 3 | Grokmirror-2.0 introduced major changes to how repositories are 4 | organized, so it deliberately breaks the upgrade path in order to force 5 | admins to make proper decisions. Installing the newer version on top of 6 | the old one will break replication, as it will refuse to work with old 7 | configuration files. 8 | 9 | Manifest compatibility 10 | ---------------------- 11 | Manifest files generated by grokmirror-1.x will continue to work on 12 | grokmirror-2.x replicas. Similarly, manifest files generated by 13 | grokmirror-2.x origin servers will work on grokmirror-1.x replicas. 14 | 15 | In other words, upgrading the origin servers and replicas does not need 16 | to happen at the same time. While grokmirror-2.x adds more entries to 17 | the manifest file (e.g. "forkgroup" and "head" records), they will be 18 | ignored by grokmirror-1.x replicas. 19 | 20 | Upgrading the origin server 21 | --------------------------- 22 | Breaking changes affecting the origin server are related to grok-fsck 23 | runs. Existing grok-manifest hooks should continue to work without any 24 | changes required. 25 | 26 | Grok-fsck will now automatically recognize related repositories by 27 | comparing the output of ``git rev-list --max-parents=0 --all``. When two 28 | or more repositories are recognized as forks of each-other, a new 29 | "object storage" repository will be set up that will contain refs from 30 | all siblings. After that, individual repositories will be repacked to 31 | only contain repository metadata (and loose objects in need of pruning). 32 | 33 | Existing repositories that already use alternates will be automatically 34 | migrated to objstore repositories during the first grok-fsck run. If you 35 | have a small collection of repositories, or if the vast majority of them 36 | aren't forks of each-other, then the upgrade can be done live with 37 | little impact. 38 | 39 | If the opposite is true and most of your repositories are forks, then 40 | the initial grok-fsck run will take a lot of time and resources to 41 | complete, as repositories will be automatically repacked to take 42 | advantage of the new object storage layout. Doing so without preparation 43 | can significantly impact the availability of your server, so you should 44 | plan the upgrade appropriately. 45 | 46 | Recommended scenario for large collections with lots of forks 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 48 | 1. Set up a temporary system with fast disk IO and plenty of CPUs 49 | and RAM. Repacking will go a lot faster on fast systems with plenty 50 | of IO cycles. 51 | 2. Install grokmirror-2 and configure it to replicate from the origin 52 | **INTO THE SAME PATH AS ON THE ORIGIN SERVER**. If your origin server 53 | is hosting repos out of /var/lib/gitolite3/repositories, then your 54 | migration replica should be configured with toplevel in 55 | /var/lib/gitolite3/repositories. This is important, because when the 56 | "alternates" file is created, it specifies a full path to the 57 | location of the object storage directory and moving repositories into 58 | different locations post-migration will result in breakage. *Avoid 59 | using symlinks for this purpose*, as grokmirror-2 will realpath them 60 | before using internally. 61 | 3. Perform initial grok-pull replication from the current origin server 62 | to the migration replica. This should set up all repositories 63 | currently using alternates as objstore repositories. 64 | 4. Once the initial replication is complete, run grok-fsck on the new 65 | hierarchy. This should properly repack all new object storage 66 | repositories to benefit from delta islands, plus automatically find 67 | all repositories that are forks of each-other but aren't already set 68 | up for alternates. The initial grok-fsck process may take a LONG time 69 | to run, depending on the size of your repository collection. 70 | 5. Schedule migration downtime. 71 | 6. Right before downtime, run grok-pull to get the latest updates. 72 | 7. At the start of downtime, block access to the origin server, so no 73 | pushes are allowed to go through. Run final grok-pull on the 74 | migration replica. 75 | 8. Back up your existing hierarchy, because you know you should, or move 76 | it out of the way if you have enough disk space for this. 77 | 9. Copy the new hierarchy from the migration replica (e.g. using rsync). 78 | 10. Run any necessary steps such as "gitolite setup" in order to set 79 | things up. 80 | 11. Rerun grok-manifest on the toplevel in order to generate the fresh 81 | manifest.js.gz file. 82 | 12. Create a new grokmirror.conf for fsck runs (grokmirror-1.x 83 | configuration files are purposefully not supported). 84 | 13. Enable the grok-fsck timer. 85 | 86 | Upgrading the replicas 87 | ---------------------- 88 | The above procedure should also be considered for upgrading the 89 | replicas, unless you have a small collection that doesn't use a lot of 90 | forks and alternates. You can find out if that is the case by running 91 | ``find . -name alternates`` at the top of your mirrored tree. If the 92 | number of returned hits is significant, then the first time grok-fsck 93 | runs, it will spend a lot of time repacking the repositories to benefit 94 | from the new layout. On the upside, you can expect significant storage 95 | use reduction after this conversion is completed. 96 | 97 | If your replica is providing continuous access for members of your 98 | development team, then you may want to perform this conversion prior to 99 | upgrading grokmirror on your production server, in order to reduce the 100 | impact on server load. Just follow the instructions from the section 101 | above. 102 | 103 | Converting the configuration file 104 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 105 | Grokmirror-1.x used two different config files -- one for grok-pull and 106 | another for grok-fsck. This separation only really made sense on the 107 | origin server and was cumbersome for the replicas, since they ended up 108 | duplicating a lot of configuration options between the two config files. 109 | 110 | Grokmirror-1.x: 111 | - separate configuration files for grok-pull and grok-fsck 112 | - multiple origin servers can be listed in one file 113 | 114 | Grokmirror-2.x: 115 | - one configuration file for all grokmirror tools 116 | - one origin server per configuration file 117 | 118 | Grokmirror-2.x will refuse to run with configuration files created for 119 | the previous version, so you will need to create a new configuration 120 | file in order to continue using it after upgrading. Most configuration 121 | options will be familiar to you from version 1.x, and the rest are 122 | documented in the grokmirror.conf file provided with the distribution. 123 | 124 | Converting from cron to daemon operation 125 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 126 | Grokmirror-1.x expected grok-pull to run from cron, but this had a set 127 | of important limitations. In contrast, grokmirror-2.x is written to run 128 | grok-pull as a daemon. It is strongly recommended to switch away from 129 | cron-based regular runs if you do them more frequently than once every 130 | few hours, as this will result in more efficient operation. See the set 131 | of systemd unit files included in the contrib directory for where to get 132 | started. 133 | 134 | Grok-fsck can continue to run from cron if you prefer, or you can run it 135 | from a systemd timer as well. 136 | -------------------------------------------------------------------------------- /contrib/gitolite/get-grok-manifest.command: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This is a command to install in gitolite's local-code. 3 | # Don't forget to enable it via .gitolite.rc 4 | # 5 | # Change this to where grok-manifest is writing manifest.js 6 | MANIFILE="/var/www/html/grokmirror/manifest.js.gz" 7 | 8 | if [[ -z "$GL_USER" ]]; then 9 | echo "ERROR: GL_USER is unset. Run me via ssh, please." 10 | exit 1 11 | fi 12 | 13 | # Make sure we only accept credential replication from the mirrors 14 | for MIRROR in $(GL_USER='' gitolite mirror list slaves gitolite-admin); do 15 | if [[ $GL_USER == "server-${MIRROR}" ]]; then 16 | AOK="yes" 17 | break 18 | fi 19 | done 20 | 21 | if [[ -z "$AOK" ]]; then 22 | echo "You are not allowed to do this" 23 | exit 1 24 | fi 25 | 26 | if [[ ! -s $MANIFILE ]]; then 27 | echo "Manifest file not found" 28 | exit 1 29 | fi 30 | 31 | R_LASTMOD=$1 32 | if [[ -z "$R_LASTMOD" ]]; then 33 | R_LASTMOD=0 34 | fi 35 | 36 | L_LASTMOD=$(stat --printf='%Y' $MANIFILE) 37 | if [[ $L_LASTMOD -le $R_LASTMOD ]]; then 38 | exit 127 39 | fi 40 | 41 | if [[ $MANIFILE == *.gz ]]; then 42 | zcat $MANIFILE 43 | else 44 | cat $MANIFILE 45 | fi 46 | 47 | exit 0 48 | -------------------------------------------------------------------------------- /contrib/gitolite/grok-get-gl-manifest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This is executed by grok-pull if manifest_command is defined. 3 | # You should install the other file as one of your commands in local-code 4 | # and enable it in .gitolite.rc 5 | PRIMARY=$(gitolite mirror list master gitolite-admin) 6 | STATEFILE="$(gitolite query-rc GL_ADMIN_BASE)/.${PRIMARY}.manifest.lastupd" 7 | GL_COMMAND=get-grok-manifest 8 | 9 | if [[ -s $STATEFILE ]] && [[ $1 != '--force' ]]; then 10 | LASTUPD=$(cat $STATEFILE) 11 | fi 12 | NOWSTAMP=$(date +'%s') 13 | 14 | ssh $PRIMARY $GL_COMMAND $LASTUPD 15 | ECODE=$? 16 | 17 | if [[ $ECODE == 0 ]]; then 18 | echo $NOWSTAMP > $STATEFILE 19 | fi 20 | exit $ECODE 21 | -------------------------------------------------------------------------------- /contrib/grok-fsck@.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Grok-fsck service for %I 3 | Documentation=https://github.com/mricon/grokmirror 4 | 5 | [Service] 6 | Type=oneshot 7 | Environment="EXTRA_FSCK_OPTS=" 8 | EnvironmentFile=-/etc/sysconfig/grokmirror.default 9 | EnvironmentFile=-/etc/sysconfig/grokmirror.%i 10 | ExecStart=/usr/bin/grok-fsck -c /etc/grokmirror/%i.conf $EXTRA_FSCK_OPTS 11 | CPUSchedulingPolicy=batch 12 | # To override these users, create a drop-in systemd conf file in 13 | # /etc/systemd/system/grok-fsck@[foo].service.d/10-usergroup.conf: 14 | # [Service] 15 | # User=yourpreference 16 | # Group=yourpreference 17 | User=mirror 18 | Group=mirror 19 | -------------------------------------------------------------------------------- /contrib/grok-fsck@.timer: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Grok-fsck timer for %I 3 | Documentation=https://github.com/mricon/grokmirror 4 | 5 | [Timer] 6 | OnCalendar=Sat 04:00 7 | 8 | [Install] 9 | WantedBy=timers.target 10 | -------------------------------------------------------------------------------- /contrib/grok-pull@.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Grok-pull service for %I 3 | After=network.target 4 | Documentation=https://github.com/mricon/grokmirror 5 | 6 | [Service] 7 | Environment="EXTRA_PULL_OPTS=" 8 | EnvironmentFile=-/etc/sysconfig/grokmirror.default 9 | EnvironmentFile=-/etc/sysconfig/grokmirror.%i 10 | ExecStart=/usr/bin/grok-pull -o -c /etc/grokmirror/%i.conf $EXTRA_PULL_OPTS 11 | Type=simple 12 | Restart=on-failure 13 | # To override these users, create a drop-in systemd conf file in 14 | # /etc/systemd/system/grok-pull@[foo].service.d/10-usergroup.conf: 15 | # [Service] 16 | # User=yourpreference 17 | # Group=yourpreference 18 | User=mirror 19 | Group=mirror 20 | 21 | [Install] 22 | WantedBy=multi-user.target 23 | -------------------------------------------------------------------------------- /contrib/logrotate: -------------------------------------------------------------------------------- 1 | /var/log/grokmirror/*.log { 2 | missingok 3 | notifempty 4 | delaycompress 5 | } 6 | -------------------------------------------------------------------------------- /contrib/pubsubv1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Implements a Google pubsub v1 push listener, see: 3 | # https://cloud.google.com/pubsub/docs/push 4 | # 5 | # In order to work, grok-pull must be running as a daemon service with 6 | # the "socket" option enabled in the configuration. 7 | # 8 | # The pubsub message should contain two attributes: 9 | # { 10 | # "message": { 11 | # "attributes": { 12 | # "proj": "projname", 13 | # "repo": "/path/to/repo.git" 14 | # } 15 | # } 16 | # } 17 | # 18 | # "proj" value should map to a "$proj.conf" file in /etc/grokmirror 19 | # (you can override that default via the GROKMIRROR_CONFIG_DIR env var). 20 | # "repo" value should match a repo defined in the manifest file as understood 21 | # by the running grok-pull daemon (it will ignore anything else) 22 | # 23 | # Any other attributes or the "data" field are ignored. 24 | 25 | import falcon 26 | import json 27 | import os 28 | import socket 29 | import re 30 | 31 | from configparser import ConfigParser, ExtendedInterpolation 32 | 33 | # Some sanity defaults 34 | MAX_PROJ_LEN = 32 35 | MAX_REPO_LEN = 1024 36 | 37 | # noinspection PyBroadException 38 | class PubsubListener(object): 39 | 40 | def on_get(self, req, resp): 41 | resp.status = falcon.HTTP_200 42 | resp.body = "We don't serve GETs here\n" 43 | 44 | def on_post(self, req, resp): 45 | if not req.content_length: 46 | resp.status = falcon.HTTP_500 47 | resp.body = 'Payload required\n' 48 | return 49 | 50 | try: 51 | doc = json.load(req.stream) 52 | except: 53 | resp.status = falcon.HTTP_500 54 | resp.body = 'Failed to parse payload as json\n' 55 | return 56 | 57 | try: 58 | proj = doc['message']['attributes']['proj'] 59 | repo = doc['message']['attributes']['repo'] 60 | except (KeyError, TypeError): 61 | resp.status = falcon.HTTP_500 62 | resp.body = 'Not a pubsub v1 payload\n' 63 | return 64 | 65 | if len(proj) > MAX_PROJ_LEN or len(repo) > MAX_REPO_LEN: 66 | resp.status = falcon.HTTP_500 67 | resp.body = 'Repo or project value too long\n' 68 | return 69 | 70 | # Proj shouldn't contain slashes or whitespace 71 | if re.search(r'[\s/]', proj): 72 | resp.status = falcon.HTTP_500 73 | resp.body = 'Invalid characters in project name\n' 74 | return 75 | 76 | # Repo shouldn't contain whitespace 77 | if re.search(r'\s', proj): 78 | resp.status = falcon.HTTP_500 79 | resp.body = 'Invalid characters in repo name\n' 80 | return 81 | 82 | confdir = os.environ.get('GROKMIRROR_CONFIG_DIR', '/etc/grokmirror') 83 | cfgfile = os.path.join(confdir, '{}.conf'.format(proj)) 84 | if not os.access(cfgfile, os.R_OK): 85 | resp.status = falcon.HTTP_500 86 | resp.body = 'Invalid project name\n' 87 | return 88 | config = ConfigParser(interpolation=ExtendedInterpolation()) 89 | config.read(cfgfile) 90 | if 'pull' not in config or not config['pull'].get('socket'): 91 | resp.status = falcon.HTTP_500 92 | resp.body = 'Invalid project configuration (no socket defined)\n' 93 | return 94 | sockfile = config['pull'].get('socket') 95 | if not os.access(sockfile, os.W_OK): 96 | resp.status = falcon.HTTP_500 97 | resp.body = 'Invalid project configuration (socket does not exist or is not writable)\n' 98 | return 99 | 100 | try: 101 | with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as client: 102 | client.connect(sockfile) 103 | client.send(repo.encode()) 104 | except: 105 | resp.status = falcon.HTTP_500 106 | resp.body = 'Unable to communicate with the socket\n' 107 | return 108 | 109 | resp.status = falcon.HTTP_204 110 | 111 | 112 | app = falcon.API() 113 | pl = PubsubListener() 114 | app.add_route('/pubsub_v1', pl) 115 | -------------------------------------------------------------------------------- /contrib/python-grokmirror.spec: -------------------------------------------------------------------------------- 1 | %global srcname grokmirror 2 | %global groupname mirror 3 | %global username mirror 4 | %global userhome %{_sharedstatedir}/grokmirror 5 | 6 | Name: python-%{srcname} 7 | Version: 2.0.8 8 | Release: 1%{?dist} 9 | Summary: Framework to smartly mirror git repositories 10 | 11 | License: GPLv3+ 12 | URL: https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git 13 | Source0: https://www.kernel.org/pub/software/network/grokmirror/grokmirror-%{version}.tar.xz 14 | 15 | BuildArch: noarch 16 | 17 | %global _description %{expand: 18 | Grokmirror was written to make mirroring large git repository 19 | collections more efficient. Grokmirror uses the manifest file published 20 | by the master mirror in order to figure out which repositories to 21 | clone, and to track which repositories require updating. The process is 22 | extremely lightweight and efficient both for the master and for the 23 | mirrors.} 24 | 25 | %description %_description 26 | 27 | %package -n python3-%{srcname} 28 | Summary: %{summary} 29 | Requires(pre): shadow-utils 30 | Requires: git-core, python3-packaging, python3-requests 31 | BuildRequires: python3-devel, python3-setuptools 32 | BuildRequires: systemd 33 | Obsoletes: python-%{srcname} < 2, python2-%{srcname} < 2 34 | 35 | %description -n python3-%{srcname} %_description 36 | 37 | %prep 38 | %autosetup -n %{srcname}-%{version} 39 | 40 | %build 41 | %py3_build 42 | 43 | %install 44 | %py3_install 45 | 46 | %{__mkdir_p} -m 0755 \ 47 | %{buildroot}%{userhome} \ 48 | %{buildroot}%{_sysconfdir}/%{srcname} \ 49 | %{buildroot}%{_sysconfdir}/logrotate.d \ 50 | %{buildroot}%{_unitdir} \ 51 | %{buildroot}%{_bindir} \ 52 | %{buildroot}%{_tmpfilesdir} \ 53 | %{buildroot}%{_mandir}/man1 \ 54 | %{buildroot}%{_localstatedir}/log/%{srcname} \ 55 | %{buildroot}/run/%{srcname} 56 | 57 | %{__install} -m 0644 man/*.1 %{buildroot}/%{_mandir}/man1/ 58 | %{__install} -m 0644 contrib/*.service %{buildroot}/%{_unitdir}/ 59 | %{__install} -m 0644 contrib/*.timer %{buildroot}/%{_unitdir}/ 60 | %{__install} -m 0644 contrib/logrotate %{buildroot}/%{_sysconfdir}/logrotate.d/grokmirror 61 | %{__install} -m 0644 grokmirror.conf %{buildroot}/%{_sysconfdir}/%{srcname}/grokmirror.conf.example 62 | 63 | echo "d /run/%{srcname} 0755 %{username} %{groupname}" > %{buildroot}/%{_tmpfilesdir}/%{srcname}.conf 64 | 65 | %pre -n python3-%{srcname} 66 | getent group %{groupname} >/dev/null || groupadd -r %{groupname} 67 | getent passwd %{username} >/dev/null || \ 68 | useradd -r -g %{groupname} -d %{userhome} -s /sbin/nologin \ 69 | -c "Grokmirror user" %{username} 70 | exit 0 71 | 72 | %files -n python3-%{srcname} 73 | %license LICENSE.txt 74 | %doc README.rst grokmirror.conf pi-piper.conf 75 | %dir %attr(0750, %{username}, %{groupname}) %{userhome} 76 | %dir %attr(0755, %{username}, %{groupname}) %{_localstatedir}/log/%{srcname}/ 77 | %dir %attr(0755, %{username}, %{groupname}) /run/%{srcname}/ 78 | %config %{_sysconfdir}/%{srcname}/* 79 | %config %{_sysconfdir}/logrotate.d/* 80 | %{_tmpfilesdir}/%{srcname}.conf 81 | %{_unitdir}/* 82 | %{python3_sitelib}/%{srcname}-*.egg-info/ 83 | %{python3_sitelib}/%{srcname}/ 84 | %{_bindir}/* 85 | %{_mandir}/*/* 86 | 87 | %changelog 88 | * Thu Mar 11 2021 Konstantin Ryabitsev - 2.0.8-1 89 | - Update to 2.0.8 with fixes to symlink handling in manifests 90 | 91 | * Tue Jan 19 2021 Konstantin Ryabitsev - 2.0.7-1 92 | - Update to 2.0.7 with improvements for very large repo collections 93 | 94 | * Thu Jan 07 2021 Konstantin Ryabitsev - 2.0.6-1 95 | - Update to 2.0.6 with minor new features 96 | 97 | * Wed Nov 25 2020 Konstantin Ryabitsev - 2.0.5-1 98 | - Update to 2.0.5 with minor new features 99 | 100 | * Wed Nov 04 2020 Konstantin Ryabitsev - 2.0.4-1 101 | - Update to 2.0.4 with minor new features 102 | 103 | * Wed Nov 04 2020 Konstantin Ryabitsev - 2.0.3-1 104 | - Update to 2.0.3 with minor new features 105 | 106 | * Tue Oct 06 2020 Konstantin Ryabitsev - 2.0.2-1 107 | - Update to 2.0.2 108 | - Install pi-piper into bindir 109 | 110 | * Wed Sep 30 2020 Konstantin Ryabitsev - 2.0.1-1 111 | - Update to 2.0.1 112 | 113 | * Mon Sep 21 2020 Konstantin Ryabitsev - 2.0.0-1 114 | - Initial 2.0.0 packaging 115 | -------------------------------------------------------------------------------- /contrib/ref-updated: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Gerrit's hook system is very different from standard git, so 3 | # minor modifications to the hook are required to make it work. 4 | # Place this file in your gerrit/hooks/ref-updated and modify the 5 | # variables below to make it work for you. 6 | 7 | GERRIT_HOME=/var/lib/gerrit 8 | GERRIT_GIT=/srv/gerrit/git 9 | 10 | GROK_MANIFEST_BIN=/usr/bin/grok-manifest 11 | GROK_MANIFEST_LOG=${GERRIT_HOME}/logs/grok-manifest.log 12 | # You'll need to place this where you can serve it with httpd 13 | # Make sure the gerrit process can write to this location 14 | GROK_MANIFEST=/var/www/html/grokmirror/manifest.js.gz 15 | 16 | # Yank out the project out of the passed params 17 | args=$(getopt -l "project:" -- "$@") 18 | 19 | eval set -- "$args" 20 | while [ $# -ge 1 ]; do 21 | case "$1" in 22 | --) 23 | # No more options left. 24 | shift 25 | break 26 | ;; 27 | --project) 28 | project="$2" 29 | shift 30 | ;; 31 | esac 32 | shift 33 | done 34 | 35 | ${GROK_MANIFEST_BIN} -y -w -l ${GROK_MANIFEST_LOG} \ 36 | -m ${GROK_MANIFEST} \ 37 | -t ${GERRIT_GIT} \ 38 | -n "${GERRIT_GIT}/${project}.git" 39 | -------------------------------------------------------------------------------- /contrib/selinux/el7/grokmirror.fc: -------------------------------------------------------------------------------- 1 | /usr/bin/grok-.* -- gen_context(system_u:object_r:grokmirror_exec_t,s0) 2 | 3 | /var/lib/grokmirror(/.*)? gen_context(system_u:object_r:grokmirror_var_lib_t,s0) 4 | /var/run/grokmirror(/.*)? gen_context(system_u:object_r:grokmirror_var_run_t,s0) 5 | /var/log/grokmirror(/.*)? gen_context(system_u:object_r:grokmirror_log_t,s0) 6 | -------------------------------------------------------------------------------- /contrib/selinux/el7/grokmirror.te: -------------------------------------------------------------------------------- 1 | ################## 2 | # Author: Konstantin Ryabitsev 3 | # 4 | policy_module(grokmirror, 1.1.1) 5 | 6 | require { 7 | type gitosis_var_lib_t; 8 | type git_sys_content_t; 9 | type net_conf_t; 10 | type httpd_t; 11 | type ssh_home_t; 12 | type passwd_file_t; 13 | type postfix_etc_t; 14 | } 15 | 16 | ################## 17 | # Declarations 18 | 19 | type grokmirror_t; 20 | type grokmirror_exec_t; 21 | init_daemon_domain(grokmirror_t, grokmirror_exec_t) 22 | 23 | type grokmirror_var_lib_t; 24 | files_type(grokmirror_var_lib_t) 25 | 26 | type grokmirror_log_t; 27 | logging_log_file(grokmirror_log_t) 28 | 29 | type grokmirror_var_run_t; 30 | files_pid_file(grokmirror_var_run_t) 31 | 32 | type grokmirror_tmpfs_t; 33 | files_tmpfs_file(grokmirror_tmpfs_t) 34 | 35 | gen_tunable(grokmirror_connect_ssh, false) 36 | gen_tunable(grokmirror_connect_all_unreserved, false) 37 | 38 | # Uncomment to put these domains into permissive mode 39 | permissive grokmirror_t; 40 | 41 | ################## 42 | # Daemons policy 43 | 44 | domain_use_interactive_fds(grokmirror_t) 45 | files_read_etc_files(grokmirror_t) 46 | miscfiles_read_localization(grokmirror_t) 47 | 48 | # Logging 49 | append_files_pattern(grokmirror_t, grokmirror_log_t, grokmirror_log_t) 50 | create_files_pattern(grokmirror_t, grokmirror_log_t, grokmirror_log_t) 51 | setattr_files_pattern(grokmirror_t, grokmirror_log_t, grokmirror_log_t) 52 | logging_log_filetrans(grokmirror_t, grokmirror_log_t, { file dir }) 53 | logging_send_syslog_msg(grokmirror_t) 54 | 55 | # Allow managing anything grokmirror_var_lib_t 56 | manage_dirs_pattern(grokmirror_t, grokmirror_var_lib_t, grokmirror_var_lib_t) 57 | manage_files_pattern(grokmirror_t, grokmirror_var_lib_t, grokmirror_var_lib_t) 58 | manage_lnk_files_pattern(grokmirror_t, grokmirror_var_lib_t, grokmirror_var_lib_t) 59 | manage_sock_files_pattern(grokmirror_t, grokmirror_var_lib_t, grokmirror_var_lib_t) 60 | 61 | # Allow managing git repositories 62 | manage_files_pattern(grokmirror_t, gitosis_var_lib_t, gitosis_var_lib_t) 63 | manage_lnk_files_pattern(grokmirror_t, gitosis_var_lib_t, gitosis_var_lib_t) 64 | manage_dirs_pattern(grokmirror_t, gitosis_var_lib_t, gitosis_var_lib_t) 65 | manage_sock_files_pattern(grokmirror_t, gitosis_var_lib_t, gitosis_var_lib_t) 66 | 67 | manage_files_pattern(grokmirror_t, git_sys_content_t, git_sys_content_t) 68 | manage_lnk_files_pattern(grokmirror_t, git_sys_content_t, git_sys_content_t) 69 | manage_dirs_pattern(grokmirror_t, git_sys_content_t, git_sys_content_t) 70 | manage_sock_files_pattern(grokmirror_t, git_sys_content_t, git_sys_content_t) 71 | 72 | # Allow executing bin (for git, mostly) 73 | corecmd_exec_bin(grokmirror_t) 74 | libs_exec_ldconfig(grokmirror_t) 75 | 76 | # Allow managing httpd content in case the manifest is stored there 77 | apache_manage_sys_content(grokmirror_t) 78 | 79 | # git wants to access system state and other bits 80 | kernel_dontaudit_read_system_state(grokmirror_t) 81 | 82 | # Allow connecting to http, git 83 | corenet_tcp_connect_http_port(grokmirror_t) 84 | corenet_tcp_connect_git_port(grokmirror_t) 85 | corenet_tcp_bind_generic_node(grokmirror_t) 86 | corenet_tcp_sendrecv_generic_node(grokmirror_t) 87 | 88 | # git needs to dns-resolve 89 | sysnet_dns_name_resolve(grokmirror_t) 90 | 91 | # Allow reading .netrc files 92 | read_files_pattern(grokmirror_t, net_conf_t, net_conf_t) 93 | 94 | # Post-hooks can use grep, which requires execmem 95 | allow grokmirror_t self:process execmem; 96 | 97 | fs_getattr_tmpfs(grokmirror_t) 98 | manage_files_pattern(grokmirror_t, grokmirror_tmpfs_t, grokmirror_tmpfs_t) 99 | fs_tmpfs_filetrans(grokmirror_t, grokmirror_tmpfs_t, file) 100 | 101 | # Listener socket file 102 | manage_dirs_pattern(grokmirror_t, grokmirror_var_run_t, grokmirror_var_run_t) 103 | manage_files_pattern(grokmirror_t, grokmirror_var_run_t, grokmirror_var_run_t) 104 | manage_sock_files_pattern(grokmirror_t, grokmirror_var_run_t, grokmirror_var_run_t) 105 | files_pid_filetrans(grokmirror_t, grokmirror_var_run_t, { dir file sock_file }) 106 | 107 | # allow httpd to write to the listener socket 108 | allow httpd_t grokmirror_t:unix_stream_socket connectto; 109 | 110 | # Some bogus dontaudits 111 | # ssh tries to open /etc/mailname, which the postfix module labels oddly 112 | dontaudit grokmirror_t postfix_etc_t:file { getattr open read }; 113 | 114 | tunable_policy(`grokmirror_connect_all_unreserved',` 115 | corenet_sendrecv_all_client_packets(grokmirror_t) 116 | corenet_tcp_connect_all_unreserved_ports(grokmirror_t) 117 | ') 118 | 119 | tunable_policy(`grokmirror_connect_ssh',` 120 | corenet_sendrecv_ssh_client_packets(grokmirror_t) 121 | corenet_tcp_connect_ssh_port(grokmirror_t) 122 | corenet_tcp_sendrecv_ssh_port(grokmirror_t) 123 | 124 | ssh_exec(grokmirror_t) 125 | ssh_read_user_home_files(grokmirror_t) 126 | 127 | # for the controlmaster socket 128 | manage_sock_files_pattern(grokmirror_t, ssh_home_t, ssh_home_t) 129 | allow grokmirror_t self:unix_stream_socket connectto; 130 | allow grokmirror_t passwd_file_t:file { getattr open read }; 131 | ') 132 | -------------------------------------------------------------------------------- /grokmirror.conf: -------------------------------------------------------------------------------- 1 | # Grokmirror 2.x and above have a single config file per each set 2 | # of mirrored repos, instead of a separate repos.conf and fsck.conf 3 | # with multiple sections. 4 | # 5 | # You can use ${varname} interpolation within the same section 6 | # or ${sectname:varname} from any other section. 7 | [core] 8 | # 9 | # Where are our mirrored repositories kept? 10 | toplevel = /var/lib/git/mirror 11 | # 12 | # Where should we keep our manifest file? 13 | manifest = ${toplevel}/manifest.js.gz 14 | # 15 | # Where should we put our log? Make sure it is logrotated, 16 | # otherwise it will grow indefinitely. 17 | log = ${toplevel}/log 18 | # 19 | # Options are "info" and "debug" for all the debug data (lots!) 20 | loglevel = info 21 | # 22 | # Grokmirror version 2.x and above can automatically recognize related repositories 23 | # by analyzing root commits. If it finds two or more related repositories, it can set 24 | # up a unified "object storage" repo and fetch all refs from each related repository. 25 | # For example, you can have two forks of linux.git: 26 | # foo/bar/linux.git: 27 | # refs/heads/master 28 | # refs/heads/devbranch 29 | # refs/tags/v5.0-rc3 30 | # ... 31 | # baz/quux/linux.git: 32 | # refs/heads/master 33 | # refs/heads/devbranch 34 | # refs/tags/v5.0-rc3 35 | # ... 36 | # Grokmirror will set up an object storage repository and fetch all refs from 37 | # both repositories: 38 | # objstore/[random-guid-name].git 39 | # refs/virtual/[sha1-of-foo/bar/linux.git:12]/heads/master 40 | # refs/virtual/[sha1-of-foo/bar/linux.git:12]/heads/devbranch 41 | # refs/virtual/[sha1-of-foo/bar/linux.git:12]/tags/v5.0-rc3 42 | # ... 43 | # refs/virtual/[sha1-of-baz/quux/linux.git:12]/heads/master 44 | # refs/virtual/[sha1-of-baz/quux/linux.git:12]/heads/devbranch 45 | # refs/virtual/[sha1-of-baz/quux/linux.git:12]/tags/v5.0-rc3 46 | # ... 47 | # 48 | # This will dramatically improve storage on disk, as original repositories will be 49 | # repacked to almost nothing. Grokmirror will repack the object storage repository 50 | # with --delta-islands to help optimize packs for efficient clones. 51 | objstore = ${toplevel}/objstore 52 | # 53 | # When copying objects into objstore repositories, we will use regular git 54 | # porcelain commands, such as git fetch. However, this tends to be slow due to 55 | # git erring on the side of caution when calculating haves and wants, so if you 56 | # are running a busy mirror and want to save a lot of cycles, you will want to 57 | # enable the setting below, which will use internal git plumbing for much more 58 | # direct object copying between repos. 59 | #objstore_uses_plumbing = yes 60 | # 61 | # Due to the nature of git alternates, if two repositories share all their objects 62 | # with an "object storage" repo, any object from repoA can be retrieved from repoB 63 | # via most web UIs if someone knows the object hash. 64 | # E.g. this is how this trick works on Github: 65 | # https://github.com/torvalds/linux/blob/b4061a10fc29010a610ff2b5b20160d7335e69bf/drivers/hid/hid-samsung.c#L113-L118 66 | # 67 | # If you have private repositories that should absolutely not reveal any objects, 68 | # add them here using shell-style globbing. They will still be set up for alternates 69 | # if we find common roots with public repositories, but we won't fetch any objects 70 | # from these repos into refs/virtual/*. 71 | # 72 | # Leave blank if you don't have any private repos (or don't offer a web UI). 73 | #private = */private/* 74 | 75 | # Used by grok-manifest (and others for "pretty"). These options can be 76 | # overridden using matching command-line switches to grok-manifest. 77 | [manifest] 78 | # Enable to save pretty-printed js (larger and slower, but easier to debug) 79 | pretty = no 80 | # List of repositories to ignore -- can take multiple entries with newline+tab 81 | # and accepts shell globbing. 82 | ignore = /testing/* 83 | /private/* 84 | # Enable to fetch objects into objstore repos after commit. This can be useful if 85 | # someone tries to push the same objects to a sibling repository, but may significantly 86 | # slow down post-commit hook operation, negating any speed gains. If set to no, the 87 | # objects will be fetched during regular grok-fsck runs. 88 | fetch_objstore = no 89 | # Only include repositories that have git-daemon-export-ok. 90 | check_export_ok = no 91 | 92 | # Used by grok-pull, mostly 93 | [remote] 94 | # The host part of the mirror you're pulling from. 95 | site = https://git.kernel.org 96 | # 97 | # Where the grok manifest is published. The following protocols 98 | # are supported at this time: 99 | # http:// or https:// using If-Modified-Since http header 100 | # file:// (when manifest file is on NFS, for example) 101 | # NB: You can no longer specify username:password as part of the URL with 102 | # grokmirror 2.x and above. You can use a netrc file for this purpose. 103 | manifest = ${site}/manifest.js.gz 104 | # 105 | # As an alternative to setting a manifest URL, you can define a manifest_command. 106 | # It has three possible outcomes: 107 | # exit code 0 + full remote manifest on stdout (must be valid json) 108 | # exit code 1 + error message on stdout 109 | # exit code 127 + nothing on stdout if remote manifest hasn't changed 110 | # It should also accept '--force' as a single argument to force manifest retrieval 111 | # even if it hasn't changed. 112 | # See contrib/gitolite/* for example commands to use with gitolite. 113 | #manifest_command = /usr/local/bin/grok-get-gl-manifest.sh 114 | # 115 | # If the remote is providing pre-generated preload bundles, list the path 116 | # here. This is only useful if you're mirroring the entire repository 117 | # collection and not just a handful of select repos. 118 | #preload_bundle_url = https://some-cdn-site.com/preload/ 119 | 120 | # Used by grok-pull 121 | [pull] 122 | # 123 | # Write out projects.list that can be used by gitweb or cgit. 124 | # Leave blank if you don't want a projects.list. 125 | projectslist = ${core:toplevel}/projects.list 126 | # 127 | # When generating projects.list, start at this subpath instead 128 | # of at the toplevel. Useful when mirroring kernel or when generating 129 | # multiple gitweb/cgit configurations for the same tree. 130 | projectslist_trimtop = 131 | # 132 | # When generating projects.list, also create entries for symlinks. 133 | # Otherwise we assume they are just legacy and keep them out of 134 | # web interfaces. 135 | projectslist_symlinks = no 136 | # 137 | # A simple hook to execute whenever a repository is modified. 138 | # It receives the full path to the git repository modified as the only 139 | # argument. You can define multiple hooks if you separate them by 140 | # newline+whitespace. E.g.: 141 | #post_update_hook = /usr/bin/grok-pi-indexer -c /etc/public-inbox/config update 142 | post_update_hook = 143 | # 144 | # A hook to execute after all new repositories are done cloning. 145 | # It receives no arguments; full paths to freshly cloned repositories 146 | # are passed on stdin, newline-terminated. This hook is useful when you want 147 | # to make sure that certain jobs only run when there were fresh clones 148 | # and they have all completed cloning. 149 | # You can define multiple hooks if you separate them by newline+whitespace. 150 | #post_clone_complete_hook = /usr/bin/grok-pi-indexer -c /etc/public-inbox/config -j 4 --no-fsync init 151 | post_clone_complete_hook = 152 | # 153 | # A hook to execute after the work queue is completely cleared, in case 154 | # you want to run some tasks only after all updates are completed. Does not 155 | # receive any arguments or stdin contents. 156 | # You can define multiple hooks if you separate them by newline+whitespace. 157 | #post_work_complete_hook = /usr/bin/grok-pi-indexer -c /etc/public-inbox/config -j 4 --no-fsync extindex 158 | post_work_complete_hook = 159 | # 160 | # Should we purge repositories that are not present in the remote 161 | # manifest? If set to "no" this can be overridden via the -p flag to 162 | # grok-pull (useful if you have a very large collection of repos 163 | # and don't want to walk the entire tree on each manifest run). 164 | # See also: purgeprotect. 165 | purge = yes 166 | # 167 | # There may be repositories that aren't replicated with grokmirror that 168 | # you don't want to be purged. You can list them below using bash-style 169 | # globbing. Separate multiple entries using newline+whitespace. 170 | #nopurge = /gitolite-admin.git 171 | # 172 | # This prevents catastrophic mirror purges when our upstream gives us a 173 | # manifest that is dramatically smaller than ours. The default is to 174 | # refuse the purge if the remote manifest has over 5% fewer repositories 175 | # than what we have, or in other words, if we have 100 repos and the 176 | # remote manifest has shrunk to 95 repos or fewer, we refuse to purge, 177 | # suspecting that something has gone wrong. You can set purgeprotect to 178 | # a higher percentage, or override it entirely with --force-purge 179 | # commandline flag. 180 | purgeprotect = 5 181 | # 182 | # If owner is not specified in the manifest, who should be listed 183 | # as the default owner in tools like gitweb or cgit? 184 | #default_owner = Grokmirror User 185 | default_owner = Grokmirror User 186 | # 187 | # By default, we'll call the upstream origin "_grokmirror", but you can set your 188 | # own name here (e.g. just call it "origin") 189 | remotename = _grokmirror 190 | # 191 | # To speed up updates, grok-pull will use multiple threads. Please be 192 | # considerate to the mirror you're pulling from and don't set this very 193 | # high. You may also run into per-ip multiple session limits, so leave 194 | # this number at a nice low setting. 195 | pull_threads = 5 196 | # 197 | # If git fetch fails, we will retry up to this many times before 198 | # giving up and marking that repository as failed. 199 | retries = 3 200 | # 201 | # Use shell-globbing to list the repositories you would like to mirror. 202 | # If you want to mirror everything, just say "*". Separate multiple entries 203 | # with newline plus tab. Examples: 204 | # 205 | # mirror everything: 206 | #include = * 207 | # 208 | # mirror just the main kernel sources: 209 | #include = /pub/scm/linux/kernel/git/torvalds/linux.git 210 | # /pub/scm/linux/kernel/git/stable/linux.git 211 | # /pub/scm/linux/kernel/git/next/linux-next.git 212 | include = * 213 | # 214 | # This is processed after the include. If you want to exclude some 215 | # specific entries from an all-inclusive globbing above. E.g., to 216 | # exclude all linux-2.4 git sources: 217 | #exclude = */linux-2.4* 218 | exclude = 219 | # 220 | # List repositories that should always reject forced pushes. 221 | #ffonly = */torvalds/linux.git 222 | # 223 | # If you enable the following option and run grok-pull with -o, 224 | # grok-pull will run continuously and will periodically recheck the 225 | # remote maniefest for new updates. See contrib for an example systemd 226 | # service you can set up to continuously update your local mirror. The 227 | # value is in seconds. 228 | #refresh = 900 229 | # 230 | # If you enable refresh, you can also enable the socket listener that 231 | # allows for rapid push notifications from your primary mirror. The 232 | # socket expects repository names matching what is in the local 233 | # manifest, followed by a newline. E.g.: 234 | # /pub/scm/linux/kernel/git/torvalds/linux.git\n 235 | # 236 | # Anything not matching a repository in the local manifest will be ignored. 237 | # See contrib for example pubsub listener. 238 | #socket = ${core:toplevel}/.updater.socket 239 | 240 | # Used by grok-fsck 241 | [fsck] 242 | # 243 | # How often should we check each repository, in days. Any newly added 244 | # repository will have the first check within a random period of 0 and 245 | # $frequency, and then every $frequency after that, to assure that not 246 | # all repositories are checked on the same day. Don't set to less than 247 | # 7 unless you only mirror a few repositories (or really like to thrash 248 | # your disks). 249 | frequency = 30 250 | # 251 | # Where to keep the status file 252 | statusfile = ${core:toplevel}/fsck.status.js 253 | # 254 | # Some errors are relatively benign and can be safely ignored. Add 255 | # matching substrings to this field to ignore them. 256 | ignore_errors = notice: 257 | warning: disabling bitmap writing 258 | ignoring extra bitmap file 259 | missingTaggerEntry 260 | missingSpaceBeforeDate 261 | # 262 | # If the fsck process finds errors that match any of these strings 263 | # during its run, it will ask grok-pull to reclone this repository when 264 | # it runs next. Only useful for minion mirrors, not for mirror masters. 265 | reclone_on_errors = fatal: bad tree object 266 | fatal: Failed to traverse parents 267 | missing commit 268 | missing blob 269 | missing tree 270 | broken link 271 | # 272 | # Should we repack the repositories? You almost always want this on, 273 | # unless you are doing something really odd. 274 | repack = yes 275 | # 276 | # We set proper flags for repacking depending if the repo is using 277 | # alternates or not, and whether this is a full repack or not. We will 278 | # also always build bitmaps (when it makes sense), to make cloning 279 | # faster. You can add other flags (e.g. --threads and --window-memory) 280 | # via the following parameter: 281 | extra_repack_flags = 282 | # 283 | # These flags are added *in addition* to extra_repack_flags 284 | extra_repack_flags_full = --window=250 --depth=50 285 | # 286 | # If git version is new enough to support generating commit graphs, we 287 | # will always generate them, though if your git version is older than 288 | # 2.24.0, the graphs won't be automatically used unless core.commitgraph 289 | # is set to true. You can turn off graph generation by setting the 290 | # commitgraph option to "no". Graph generation will be skipped for 291 | # child repos that use alternates. 292 | commitgraph = yes 293 | # 294 | # Run git-prune to remove obsolete loose objects. Grokmirror will make 295 | # sure this is a safe operation when it comes to objstore repos, so you 296 | # should leave this enabled. 297 | prune = yes 298 | # 299 | # Grokmirror is extremely careful about not pruning the repositories 300 | # that are used by others via git alternates. However, it cannot prevent 301 | # some other git process (not grokmirror-managed) from inadvertently 302 | # running "git prune/gc". For example, this may happen if an admin 303 | # mistypes a command in the wrong directory. Setting precious=yes will 304 | # add extensions.preciousObjects=true to the git configuration file in 305 | # such repositories, which will help prevent repository corruption 306 | # between grok-fsck runs. 307 | # 308 | # When set to "yes", grokmirror will temporarily turn this feature off 309 | # when running scheduled repacks in order to be able to delete redundant 310 | # packs and loose objects that have already been packed. This is usually 311 | # a safe operation when done by grok-fsck itself. However, if you set 312 | # this to "always", grokmirror will leave this enabled even during 313 | # grok-fsck runs, for maximum paranoia. Be warned, that this will result 314 | # in ever-growing git repositories, so it only makes sense in very rare 315 | # situations, such as for backup purposes. 316 | precious = yes 317 | # 318 | # If you have a lot of forks using the same objstore repo, you may end 319 | # up with thousands of refs being negotiated during each remote update. 320 | # This tends to result in higher load and bigger negotiation transfers. 321 | # Setting the "baselines" option allows you to designate a set of repos 322 | # that are likely to have most of the relevant objects and ignore the 323 | # rest of the objstore refs. This is done using the 324 | # core.alternateRefsPrefixes feature (see git-config). 325 | baselines = */kernel/git/next/linux-next.git 326 | # 327 | # Objstore repos are repacked with delta island support (see man 328 | # git-config), but if you have one repo that is a lot more likely to be 329 | # cloned than all the other ones, you can designate it as "islandCore", 330 | # which will give it priority when creating packs. 331 | islandcores = */kernel/git/torvalds/linux.git 332 | # 333 | # Generate preload bundles for objstore repos and put them into this 334 | # location. Unless you are running a major mirroring hub site, you 335 | # do not want this enabled. See corresponding preload_bundle_url 336 | # entry in the [remote] section. 337 | #preload_bundle_outdir = /some/http/accessible/path 338 | # 339 | # If there are any critical errors, the report will be sent to root. You 340 | # can change the settings below to configure report delivery to suit 341 | # your needs: 342 | #report_to = root 343 | #report_from = root 344 | #report_subject = git fsck errors on my beautiful replica 345 | #report_mailhost = localhost 346 | -------------------------------------------------------------------------------- /grokmirror/bundle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (C) 2013-2020 by The Linux Foundation and contributors 3 | # 4 | # This program is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program. If not, see . 16 | 17 | import sys 18 | import os 19 | import logging 20 | import fnmatch 21 | import grokmirror 22 | 23 | from pathlib import Path 24 | 25 | # default basic logger. We override it later. 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | def get_repo_size(fullpath): 30 | reposize = 0 31 | obj_info = grokmirror.get_repo_obj_info(fullpath) 32 | if 'alternate' in obj_info: 33 | altpath = grokmirror.get_altrepo(fullpath) 34 | reposize = get_repo_size(altpath) 35 | reposize += int(obj_info['size']) 36 | reposize += int(obj_info['size-pack']) 37 | 38 | logger.debug('%s size: %s', fullpath, reposize) 39 | return reposize 40 | 41 | 42 | def generate_bundles(config, outdir, gitargs, revlistargs, maxsize, include): 43 | # uses advisory lock, so its safe even if we die unexpectedly 44 | manifest = grokmirror.read_manifest(config['core'].get('manifest')) 45 | toplevel = os.path.realpath(config['core'].get('toplevel')) 46 | if gitargs: 47 | gitargs = gitargs.split() 48 | if revlistargs: 49 | revlistargs = revlistargs.split() 50 | 51 | for repo in manifest.keys(): 52 | logger.debug('Checking %s', repo) 53 | # Does it match our globbing pattern? 54 | found = False 55 | for tomatch in include: 56 | if fnmatch.fnmatch(repo, tomatch) or fnmatch.fnmatch(repo, tomatch.lstrip('/')): 57 | found = True 58 | break 59 | if not found: 60 | logger.debug('%s does not match include list, skipping', repo) 61 | continue 62 | 63 | repo = repo.lstrip('/') 64 | fullpath = os.path.join(toplevel, repo) 65 | 66 | bundledir = os.path.join(outdir, repo.replace('.git', '')) 67 | Path(bundledir).mkdir(parents=True, exist_ok=True) 68 | 69 | repofpr = grokmirror.get_repo_fingerprint(toplevel, repo) 70 | logger.debug('%s fingerprint is %s', repo, repofpr) 71 | 72 | # Do we have a bundle file already? 73 | bfile = os.path.join(bundledir, 'clone.bundle') 74 | bfprfile = os.path.join(bundledir, '.fingerprint') 75 | logger.debug('Looking for %s', bfile) 76 | if os.path.exists(bfile): 77 | # Do we have a bundle fingerprint? 78 | logger.debug('Found existing bundle in %s', bfile) 79 | if os.path.exists(bfprfile): 80 | with open(bfprfile) as fh: 81 | bfpr = fh.read().strip() 82 | logger.debug('Read bundle fingerprint from %s: %s', bfprfile, bfpr) 83 | if bfpr == repofpr: 84 | logger.info(' skipped: %s (unchanged)', repo) 85 | continue 86 | 87 | logger.debug('checking size of %s', repo) 88 | total_size = get_repo_size(fullpath)/1024/1024 89 | 90 | if total_size > maxsize: 91 | logger.info(' skipped: %s (%s > %s)', repo, total_size, maxsize) 92 | continue 93 | 94 | fullargs = gitargs + ['bundle', 'create', bfile] + revlistargs 95 | logger.debug('Full git args: %s', fullargs) 96 | logger.info(' generate: %s', bfile) 97 | ecode, out, err = grokmirror.run_git_command(fullpath, fullargs) 98 | 99 | if ecode == 0: 100 | with open(bfprfile, 'w') as fh: 101 | fh.write(repofpr) 102 | logger.debug('Wrote %s into %s', repofpr, bfprfile) 103 | 104 | return 0 105 | 106 | 107 | def parse_args(): 108 | import argparse 109 | 110 | # noinspection PyTypeChecker 111 | op = argparse.ArgumentParser(prog='grok-bundle', 112 | description='Generate clone.bundle files for use with "repo"', 113 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 114 | op.add_argument('-v', '--verbose', action='store_true', 115 | default=False, 116 | help='Be verbose and tell us what you are doing') 117 | op.add_argument('-c', '--config', 118 | required=True, 119 | help='Location of the configuration file') 120 | op.add_argument('-o', '--outdir', 121 | required=True, 122 | help='Location where to store bundle files') 123 | op.add_argument('-g', '--gitargs', 124 | default='-c core.compression=9', 125 | help='extra args to pass to git') 126 | op.add_argument('-r', '--revlistargs', 127 | default='--branches HEAD', 128 | help='Rev-list args to use') 129 | op.add_argument('-s', '--maxsize', type=int, 130 | default=2, 131 | help='Maximum size of git repositories to bundle (in GiB)') 132 | op.add_argument('-i', '--include', nargs='*', 133 | default='*', 134 | help='List repositories to bundle (accepts shell globbing)') 135 | op.add_argument('--version', action='version', version=grokmirror.VERSION) 136 | 137 | opts = op.parse_args() 138 | 139 | return opts 140 | 141 | 142 | def grok_bundle(cfgfile, outdir, gitargs, revlistargs, maxsize, include, verbose=False): 143 | global logger 144 | 145 | config = grokmirror.load_config_file(cfgfile) 146 | 147 | logfile = config['core'].get('log', None) 148 | if config['core'].get('loglevel', 'info') == 'debug': 149 | loglevel = logging.DEBUG 150 | else: 151 | loglevel = logging.INFO 152 | 153 | logger = grokmirror.init_logger('bundle', logfile, loglevel, verbose) 154 | 155 | return generate_bundles(config, outdir, gitargs, revlistargs, maxsize, include) 156 | 157 | 158 | def command(): 159 | opts = parse_args() 160 | 161 | retval = grok_bundle( 162 | opts.config, opts.outdir, opts.gitargs, opts.revlistargs, opts.maxsize, opts.include, verbose=opts.verbose) 163 | 164 | sys.exit(retval) 165 | 166 | 167 | if __name__ == '__main__': 168 | command() 169 | -------------------------------------------------------------------------------- /grokmirror/dumb_pull.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (C) 2013-2018 by The Linux Foundation and contributors 3 | # 4 | # This program is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program. If not, see . 16 | 17 | import os 18 | 19 | import grokmirror 20 | import logging 21 | import fnmatch 22 | import subprocess 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | def git_rev_parse_all(gitdir): 28 | args = ['rev-parse', '--all'] 29 | retcode, output, error = grokmirror.run_git_command(gitdir, args) 30 | 31 | if error: 32 | # Put things we recognize into debug 33 | debug = list() 34 | warn = list() 35 | for line in error.split('\n'): 36 | warn.append(line) 37 | if debug: 38 | logger.debug('Stderr: %s', '\n'.join(debug)) 39 | if warn: 40 | logger.warning('Stderr: %s', '\n'.join(warn)) 41 | 42 | return output 43 | 44 | 45 | def git_remote_update(args, fullpath): 46 | retcode, output, error = grokmirror.run_git_command(fullpath, args) 47 | 48 | if error: 49 | # Put things we recognize into debug 50 | debug = list() 51 | warn = list() 52 | for line in error.split('\n'): 53 | if line.find('From ') == 0: 54 | debug.append(line) 55 | elif line.find('-> ') > 0: 56 | debug.append(line) 57 | else: 58 | warn.append(line) 59 | if debug: 60 | logger.debug('Stderr: %s', '\n'.join(debug)) 61 | if warn: 62 | logger.warning('Stderr: %s', '\n'.join(warn)) 63 | 64 | 65 | def dumb_pull_repo(gitdir, remotes, svn=False): 66 | # verify it's a git repo and fetch all remotes 67 | logger.debug('Will pull %s with following remotes: %s', gitdir, remotes) 68 | old_revs = git_rev_parse_all(gitdir) 69 | 70 | try: 71 | grokmirror.lock_repo(gitdir, nonblocking=True) 72 | except IOError: 73 | logger.info('Could not obtain exclusive lock on %s', gitdir) 74 | logger.info('\tAssuming another process is running.') 75 | return False 76 | 77 | if svn: 78 | logger.debug('Using git-svn for %s', gitdir) 79 | 80 | for remote in remotes: 81 | # arghie-argh-argh 82 | if remote == '*': 83 | remote = '--all' 84 | 85 | logger.info('Running git-svn fetch %s in %s', remote, gitdir) 86 | args = ['svn', 'fetch', remote] 87 | git_remote_update(args, gitdir) 88 | 89 | else: 90 | # Not an svn remote 91 | myremotes = grokmirror.list_repo_remotes(gitdir) 92 | if not len(myremotes): 93 | logger.info('Repository %s has no defined remotes!', gitdir) 94 | return False 95 | 96 | logger.debug('existing remotes: %s', myremotes) 97 | for remote in remotes: 98 | remotefound = False 99 | for myremote in myremotes: 100 | if fnmatch.fnmatch(myremote, remote): 101 | remotefound = True 102 | logger.debug('existing remote %s matches %s', myremote, remote) 103 | args = ['remote', 'update', myremote, '--prune'] 104 | logger.info('Updating remote %s in %s', myremote, gitdir) 105 | 106 | git_remote_update(args, gitdir) 107 | 108 | if not remotefound: 109 | logger.info('Could not find any remotes matching %s in %s', remote, gitdir) 110 | 111 | new_revs = git_rev_parse_all(gitdir) 112 | grokmirror.unlock_repo(gitdir) 113 | 114 | if old_revs == new_revs: 115 | logger.debug('No new revs, no updates') 116 | return False 117 | 118 | logger.debug('New revs found -- new content pulled') 119 | return True 120 | 121 | 122 | def run_post_update_hook(hookscript, gitdir): 123 | if hookscript == '': 124 | return 125 | if not os.access(hookscript, os.X_OK): 126 | logger.warning('post_update_hook %s is not executable', hookscript) 127 | return 128 | 129 | args = [hookscript, gitdir] 130 | logger.debug('Running: %s', ' '.join(args)) 131 | (output, error) = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() 132 | 133 | error = error.decode().strip() 134 | output = output.decode().strip() 135 | if error: 136 | # Put hook stderror into warning 137 | logger.warning('Hook Stderr: %s', error) 138 | if output: 139 | # Put hook stdout into info 140 | logger.info('Hook Stdout: %s', output) 141 | 142 | 143 | def parse_args(): 144 | import argparse 145 | # noinspection PyTypeChecker 146 | op = argparse.ArgumentParser(prog='grok-dumb-pull', 147 | description='Fetch remotes in repositories not managed by grokmirror', 148 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 149 | 150 | op.add_argument('-v', '--verbose', dest='verbose', action='store_true', 151 | default=False, 152 | help='Be verbose and tell us what you are doing') 153 | op.add_argument('-s', '--svn', dest='svn', action='store_true', 154 | default=False, 155 | help='The remotes for these repositories are Subversion') 156 | op.add_argument('-r', '--remote-names', dest='remotes', action='append', 157 | default=None, 158 | help='Only fetch remotes matching this name (accepts shell globbing)') 159 | op.add_argument('-u', '--post-update-hook', dest='posthook', 160 | default='', 161 | help='Run this hook after each repository is updated.') 162 | op.add_argument('-l', '--logfile', dest='logfile', 163 | default=None, 164 | help='Put debug logs into this file') 165 | op.add_argument('--version', action='version', version=grokmirror.VERSION) 166 | op.add_argument('paths', nargs='+', help='Full path(s) of the repos to pull') 167 | 168 | opts = op.parse_args() 169 | 170 | if not len(opts.paths): 171 | op.error('You must provide at least a path to the repos to pull') 172 | 173 | return opts 174 | 175 | 176 | def dumb_pull(paths, verbose=False, svn=False, remotes=None, posthook='', logfile=None): 177 | global logger 178 | 179 | loglevel = logging.INFO 180 | logger = grokmirror.init_logger('dumb-pull', logfile, loglevel, verbose) 181 | 182 | if remotes is None: 183 | remotes = ['*'] 184 | 185 | # Find all repositories we are to pull 186 | for entry in paths: 187 | if entry[-4:] == '.git': 188 | if not os.path.exists(entry): 189 | logger.critical('%s does not exist', entry) 190 | continue 191 | 192 | logger.debug('Found %s', entry) 193 | didwork = dumb_pull_repo(entry, remotes, svn=svn) 194 | if didwork: 195 | run_post_update_hook(posthook, entry) 196 | 197 | else: 198 | logger.debug('Finding all git repos in %s', entry) 199 | for founddir in grokmirror.find_all_gitdirs(entry): 200 | didwork = dumb_pull_repo(founddir, remotes, svn=svn) 201 | if didwork: 202 | run_post_update_hook(posthook, founddir) 203 | 204 | 205 | def command(): 206 | opts = parse_args() 207 | 208 | return dumb_pull( 209 | opts.paths, verbose=opts.verbose, svn=opts.svn, remotes=opts.remotes, 210 | posthook=opts.posthook, logfile=opts.logfile) 211 | 212 | 213 | if __name__ == '__main__': 214 | command() 215 | -------------------------------------------------------------------------------- /grokmirror/manifest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (C) 2013-2020 by The Linux Foundation and contributors 3 | # 4 | # This program is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program. If not, see . 16 | 17 | import os 18 | import sys 19 | import logging 20 | import datetime 21 | 22 | import grokmirror 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | objstore_uses_plumbing = False 27 | 28 | 29 | def update_manifest(manifest, toplevel, fullpath, usenow, ignorerefs): 30 | logger.debug('Examining %s', fullpath) 31 | if not grokmirror.is_bare_git_repo(fullpath): 32 | logger.critical('Error opening %s.', fullpath) 33 | logger.critical('Make sure it is a bare git repository.') 34 | sys.exit(1) 35 | 36 | gitdir = '/' + os.path.relpath(fullpath, toplevel) 37 | repoinfo = grokmirror.get_repo_defs(toplevel, gitdir, usenow=usenow, ignorerefs=ignorerefs) 38 | # Ignore it if it's an empty git repository 39 | if not repoinfo['fingerprint']: 40 | logger.info(' manifest: ignored %s (no heads)', gitdir) 41 | return 42 | 43 | if gitdir not in manifest: 44 | # In grokmirror-1.x we didn't normalize paths to be always with a leading '/', so 45 | # check the manifest for both and make sure we only save the path with a leading / 46 | if gitdir.lstrip('/') in manifest: 47 | manifest[gitdir] = manifest.pop(gitdir.lstrip('/')) 48 | logger.info(' manifest: updated %s', gitdir) 49 | else: 50 | logger.info(' manifest: added %s', gitdir) 51 | manifest[gitdir] = dict() 52 | else: 53 | logger.info(' manifest: updated %s', gitdir) 54 | 55 | altrepo = grokmirror.get_altrepo(fullpath) 56 | reference = None 57 | if manifest[gitdir].get('forkgroup', None) != repoinfo.get('forkgroup', None): 58 | # Use the first remote listed in the forkgroup as our reference, just so 59 | # grokmirror-1.x clients continue to work without doing full clones 60 | remotes = grokmirror.list_repo_remotes(altrepo, withurl=True) 61 | if len(remotes): 62 | urls = list(x[1] for x in remotes) 63 | urls.sort() 64 | reference = '/' + os.path.relpath(urls[0], toplevel) 65 | else: 66 | reference = manifest[gitdir].get('reference', None) 67 | 68 | if altrepo and not reference and not repoinfo.get('forkgroup'): 69 | # Not an objstore repo 70 | reference = '/' + os.path.relpath(altrepo, toplevel) 71 | 72 | manifest[gitdir].update(repoinfo) 73 | # Always write a reference entry even if it's None, as grok-1.x clients expect it 74 | manifest[gitdir]['reference'] = reference 75 | 76 | 77 | def set_symlinks(manifest, toplevel, symlinks): 78 | for symlink in symlinks: 79 | target = os.path.realpath(symlink) 80 | if not os.path.exists(target): 81 | logger.critical(' manifest: symlink %s is broken, ignored', symlink) 82 | continue 83 | relative = '/' + os.path.relpath(symlink, toplevel) 84 | if target.find(toplevel) < 0: 85 | logger.critical(' manifest: symlink %s points outside toplevel, ignored', relative) 86 | continue 87 | tgtgitdir = '/' + os.path.relpath(target, toplevel) 88 | if tgtgitdir not in manifest: 89 | logger.critical(' manifest: symlink %s points to %s, which we do not recognize', relative, tgtgitdir) 90 | continue 91 | if 'symlinks' in manifest[tgtgitdir]: 92 | if relative not in manifest[tgtgitdir]['symlinks']: 93 | logger.info(' manifest: symlinked %s->%s', relative, tgtgitdir) 94 | manifest[tgtgitdir]['symlinks'].append(relative) 95 | else: 96 | logger.info(' manifest: %s->%s is already in manifest', relative, tgtgitdir) 97 | else: 98 | manifest[tgtgitdir]['symlinks'] = [relative] 99 | logger.info(' manifest: symlinked %s->%s', relative, tgtgitdir) 100 | 101 | # Now go through all repos and fix any references pointing to the 102 | # symlinked location. We shouldn't need to do anything with forkgroups. 103 | for gitdir in manifest: 104 | if manifest[gitdir] == relative: 105 | logger.info(' manifest: removing %s (replaced by a symlink)', gitdir) 106 | manifest.pop(gitdir) 107 | continue 108 | if manifest[gitdir]['reference'] == relative: 109 | logger.info(' manifest: symlinked %s->%s', relative, tgtgitdir) 110 | manifest[gitdir]['reference'] = tgtgitdir 111 | 112 | 113 | def purge_manifest(manifest, toplevel, gitdirs): 114 | for oldrepo in list(manifest): 115 | if os.path.join(toplevel, oldrepo.lstrip('/')) not in gitdirs: 116 | logger.info(' manifest: purged %s (gone)', oldrepo) 117 | manifest.remove(oldrepo) 118 | 119 | 120 | def parse_args(): 121 | global objstore_uses_plumbing 122 | 123 | import argparse 124 | # noinspection PyTypeChecker 125 | op = argparse.ArgumentParser(prog='grok-manifest', 126 | description='Create or update a manifest file', 127 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 128 | 129 | op.add_argument('--cfgfile', dest='cfgfile', 130 | default=None, 131 | help='Path to grokmirror.conf containing at least a [core] section') 132 | op.add_argument('-m', '--manifest', dest='manifile', 133 | help='Location of manifest.js or manifest.js.gz') 134 | op.add_argument('-t', '--toplevel', dest='toplevel', 135 | help='Top dir where all repositories reside') 136 | op.add_argument('-l', '--logfile', dest='logfile', 137 | default=None, 138 | help='When specified, will put debug logs in this location') 139 | op.add_argument('-n', '--use-now', dest='usenow', action='store_true', 140 | default=False, 141 | help='Use current timestamp instead of parsing commits') 142 | op.add_argument('-c', '--check-export-ok', dest='check_export_ok', 143 | action='store_true', default=False, 144 | help='Export only repositories marked as git-daemon-export-ok') 145 | op.add_argument('-p', '--purge', dest='purge', action='store_true', 146 | default=False, 147 | help='Purge deleted git repositories from manifest') 148 | op.add_argument('-x', '--remove', dest='remove', action='store_true', 149 | default=False, 150 | help='Remove repositories passed as arguments from manifest') 151 | op.add_argument('-y', '--pretty', dest='pretty', action='store_true', 152 | default=False, 153 | help='Pretty-print manifest (sort keys and add indentation)') 154 | op.add_argument('-i', '--ignore-paths', dest='ignore', action='append', 155 | default=None, 156 | help='When finding git dirs, ignore these paths (accepts shell-style globbing)') 157 | op.add_argument('-r', '--ignore-refs', dest='ignore_refs', action='append', default=None, 158 | help='Refs to exclude from fingerprint calculation (e.g. refs/meta/*)') 159 | op.add_argument('-w', '--wait-for-manifest', dest='wait', 160 | action='store_true', default=False, 161 | help='When running with arguments, wait if manifest is not there ' 162 | '(can be useful when multiple writers are writing the manifest)') 163 | op.add_argument('-o', '--fetch-objstore', dest='fetchobst', 164 | action='store_true', default=False, 165 | help='Fetch updates into objstore repo (if used)') 166 | op.add_argument('-v', '--verbose', dest='verbose', action='store_true', 167 | default=False, 168 | help='Be verbose and tell us what you are doing') 169 | op.add_argument('--version', action='version', version=grokmirror.VERSION) 170 | op.add_argument('paths', nargs='*', help='Full path(s) to process') 171 | 172 | opts = op.parse_args() 173 | 174 | if opts.cfgfile: 175 | config = grokmirror.load_config_file(opts.cfgfile) 176 | if not opts.manifile: 177 | opts.manifile = config['core'].get('manifest') 178 | if not opts.toplevel: 179 | opts.toplevel = os.path.realpath(config['core'].get('toplevel')) 180 | if not opts.logfile: 181 | opts.logfile = config['core'].get('logfile') 182 | 183 | objstore_uses_plumbing = config['core'].getboolean('objstore_uses_plumbing', False) 184 | 185 | if 'manifest' in config: 186 | if not opts.ignore: 187 | opts.ignore = [x.strip() for x in config['manifest'].get('ignore', '').split('\n')] 188 | if not opts.check_export_ok: 189 | opts.check_export_ok = config['manifest'].getboolean('check_export_ok', False) 190 | if not opts.pretty: 191 | opts.pretty = config['manifest'].getboolean('pretty', False) 192 | if not opts.fetchobst: 193 | opts.fetchobst = config['manifest'].getboolean('fetch_objstore', False) 194 | 195 | if not opts.manifile: 196 | op.error('You must provide the path to the manifest file') 197 | if not opts.toplevel: 198 | op.error('You must provide the toplevel path') 199 | if opts.ignore is None: 200 | opts.ignore = list() 201 | 202 | if not len(opts.paths) and opts.wait: 203 | op.error('--wait option only makes sense when dirs are passed') 204 | 205 | return opts 206 | 207 | 208 | def grok_manifest(manifile, toplevel, paths=None, logfile=None, usenow=False, 209 | check_export_ok=False, purge=False, remove=False, 210 | pretty=False, ignore=None, wait=False, verbose=False, fetchobst=False, 211 | ignorerefs=None): 212 | global logger 213 | loglevel = logging.INFO 214 | logger = grokmirror.init_logger('manifest', logfile, loglevel, verbose) 215 | 216 | startt = datetime.datetime.now() 217 | if paths is None: 218 | paths = list() 219 | if ignore is None: 220 | ignore = list() 221 | 222 | grokmirror.manifest_lock(manifile) 223 | manifest = grokmirror.read_manifest(manifile, wait=wait) 224 | 225 | toplevel = os.path.realpath(toplevel) 226 | 227 | # If manifest is empty, don't use current timestamp 228 | if not len(manifest): 229 | usenow = False 230 | 231 | if remove and len(paths): 232 | # Remove the repos as required, write new manfiest and exit 233 | for fullpath in paths: 234 | repo = '/' + os.path.relpath(fullpath, toplevel) 235 | if repo in manifest: 236 | manifest.pop(repo) 237 | logger.info(' manifest: removed %s', repo) 238 | else: 239 | # Is it in any of the symlinks? 240 | found = False 241 | for gitdir in manifest: 242 | if 'symlinks' in manifest[gitdir] and repo in manifest[gitdir]['symlinks']: 243 | found = True 244 | manifest[gitdir]['symlinks'].remove(repo) 245 | if not len(manifest[gitdir]['symlinks']): 246 | manifest[gitdir].pop('symlinks') 247 | logger.info(' manifest: removed symlink %s->%s', repo, gitdir) 248 | if not found: 249 | logger.info(' manifest: %s not in manifest', repo) 250 | 251 | # XXX: need to add logic to make sure we don't break the world 252 | # by removing a repository used as a reference for others 253 | grokmirror.write_manifest(manifile, manifest, pretty=pretty) 254 | grokmirror.manifest_unlock(manifile) 255 | return 0 256 | 257 | gitdirs = list() 258 | 259 | if purge or not len(paths) or not len(manifest): 260 | # We automatically purge when we do a full tree walk 261 | for gitdir in grokmirror.find_all_gitdirs(toplevel, ignore=ignore, exclude_objstore=True): 262 | gitdirs.append(gitdir) 263 | purge_manifest(manifest, toplevel, gitdirs) 264 | 265 | if len(manifest) and len(paths): 266 | # limit ourselves to passed dirs only when there is something 267 | # in the manifest. This precaution makes sure we regenerate the 268 | # whole file when there is nothing in it or it can't be parsed. 269 | for apath in paths: 270 | arealpath = os.path.realpath(apath) 271 | if apath != arealpath and os.path.islink(apath): 272 | gitdirs.append(apath) 273 | else: 274 | gitdirs.append(arealpath) 275 | 276 | symlinks = list() 277 | tofetch = set() 278 | for gitdir in gitdirs: 279 | # check to make sure this gitdir is ok to export 280 | if check_export_ok and not os.path.exists(os.path.join(gitdir, 'git-daemon-export-ok')): 281 | # is it curently in the manifest? 282 | repo = '/' + os.path.relpath(gitdir, toplevel) 283 | if repo in list(manifest): 284 | logger.info(' manifest: removed %s (no longer exported)', repo) 285 | manifest.pop(repo) 286 | 287 | # XXX: need to add logic to make sure we don't break the world 288 | # by removing a repository used as a reference for others 289 | # also make sure we clean up any dangling symlinks 290 | continue 291 | 292 | if os.path.islink(gitdir): 293 | symlinks.append(gitdir) 294 | else: 295 | update_manifest(manifest, toplevel, gitdir, usenow, ignorerefs) 296 | if fetchobst: 297 | # Do it after we're done with manifest, to avoid keeping it locked 298 | tofetch.add(gitdir) 299 | 300 | if len(symlinks): 301 | set_symlinks(manifest, toplevel, symlinks) 302 | 303 | grokmirror.write_manifest(manifile, manifest, pretty=pretty) 304 | grokmirror.manifest_unlock(manifile) 305 | 306 | fetched = set() 307 | for gitdir in tofetch: 308 | altrepo = grokmirror.get_altrepo(gitdir) 309 | if altrepo in fetched: 310 | continue 311 | if altrepo and grokmirror.is_obstrepo(altrepo): 312 | try: 313 | grokmirror.lock_repo(altrepo, nonblocking=True) 314 | logger.info(' manifest: objstore %s -> %s', gitdir, os.path.basename(altrepo)) 315 | grokmirror.fetch_objstore_repo(altrepo, gitdir, use_plumbing=objstore_uses_plumbing) 316 | grokmirror.unlock_repo(altrepo) 317 | fetched.add(altrepo) 318 | except IOError: 319 | # grok-fsck will fetch this one, then 320 | pass 321 | 322 | elapsed = datetime.datetime.now() - startt 323 | if len(gitdirs) > 1: 324 | logger.info('Updated %s records in %ds', len(gitdirs), elapsed.total_seconds()) 325 | else: 326 | logger.info('Done in %0.2fs', elapsed.total_seconds()) 327 | 328 | 329 | def command(): 330 | opts = parse_args() 331 | 332 | return grok_manifest( 333 | opts.manifile, opts.toplevel, paths=opts.paths, logfile=opts.logfile, 334 | usenow=opts.usenow, check_export_ok=opts.check_export_ok, 335 | purge=opts.purge, remove=opts.remove, pretty=opts.pretty, 336 | ignore=opts.ignore, wait=opts.wait, verbose=opts.verbose, 337 | fetchobst=opts.fetchobst, ignorerefs=opts.ignore_refs) 338 | 339 | 340 | if __name__ == '__main__': 341 | command() 342 | -------------------------------------------------------------------------------- /grokmirror/pi_indexer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # A hook to properly initialize and index mirrored public-inbox repositories. 5 | 6 | import logging 7 | import os 8 | import sys 9 | import re 10 | import shutil 11 | import pathlib 12 | 13 | import grokmirror 14 | 15 | from fnmatch import fnmatch 16 | from typing import Tuple 17 | 18 | # default basic logger. We override it later. 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | def get_pi_repos(inboxdir: str) -> list: 23 | members = list() 24 | at = 0 25 | while True: 26 | repodir = os.path.join(inboxdir, 'git', '%d.git' % at) 27 | if not os.path.isdir(repodir): 28 | break 29 | members.append(repodir) 30 | at += 1 31 | 32 | return members 33 | 34 | 35 | def index_pi_inbox(fullpath: str, opts) -> bool: 36 | gdir, pdir = get_git_pi_dir(opts, fullpath) 37 | logger.info('pi-index %s', gdir) 38 | success = True 39 | # Check that msgmap.sqlite3 is there 40 | msgmapdbf = os.path.join(pdir, 'msgmap.sqlite3') 41 | if not os.path.exists(msgmapdbf): 42 | logger.info('Inboxdir not initialized: %s', pdir) 43 | return False 44 | 45 | piargs = ['public-inbox-index', '--no-update-extindex'] 46 | if opts.jobs: 47 | piargs += ['--jobs', str(opts.jobs)] 48 | if opts.nofsync: 49 | piargs += ['--no-fsync'] 50 | 51 | piargs.append(pdir) 52 | 53 | env = { 54 | 'PI_CONFIG': opts.piconfig, 55 | 'PATH': os.getenv('PATH', '/bin:/usr/bin:/usr/local/bin'), 56 | } 57 | try: 58 | ec, out, err = grokmirror.run_shell_command(piargs, env=env) 59 | if ec > 0: 60 | logger.critical('Unable to index public-inbox repo %s: %s', pdir, err) 61 | success = False 62 | except Exception as ex: # noqa 63 | logger.critical('Unable to index public-inbox repo %s: %s', pdir, ex) 64 | success = False 65 | 66 | return success 67 | 68 | 69 | def init_pi_inbox(gdir: str, pdir: str, opts) -> bool: 70 | # for boost values, we look at the number of entries 71 | boosts = list() 72 | if opts.listid_priority: 73 | boosts = list(reversed(opts.listid_priority.split(','))) 74 | 75 | logger.info('pi-init %s', gdir) 76 | # Lock all member repos so they don't get updated in the process 77 | pi_repos = get_pi_repos(gdir) 78 | origins = None 79 | gitargs = ['show', 'refs/meta/origins:i'] 80 | # We reverse because we want to give priority to the latest origins info 81 | success = True 82 | for subrepo in reversed(pi_repos): 83 | grokmirror.lock_repo(subrepo) 84 | if not origins: 85 | ec, out, err = grokmirror.run_git_command(subrepo, gitargs) 86 | if out: 87 | origins = out 88 | inboxname = os.path.basename(gdir) 89 | if not origins and opts.origin_host: 90 | # Attempt to grab the config sample from remote 91 | origin_host = opts.origin_host.rstrip('/') 92 | rconfig = f'{origin_host}/{inboxname}/_/text/config/raw' 93 | try: 94 | ses = grokmirror.get_requests_session() 95 | res = ses.get(rconfig) 96 | res.raise_for_status() 97 | origins = res.text 98 | except: # noqa 99 | logger.critical('ERROR: Not able to get origins info for %s, skipping', gdir) 100 | success = False 101 | 102 | if origins: 103 | # Okay, let's process it 104 | # Generate a config entry 105 | if opts.local_toplevel: 106 | local_toplevel = opts.local_toplevel.rstrip('/') 107 | local_url = f'{local_toplevel}/{inboxname}' 108 | else: 109 | local_url = inboxname 110 | extraopts = list() 111 | acceptopts = {'listid'} 112 | if opts.extra_cfgopts: 113 | acceptopts.update(opts.extra_cfgopts.split(',')) 114 | description = None 115 | newsgroup = None 116 | listid = None 117 | addresses = list() 118 | for line in origins.split('\n'): 119 | line = line.strip() 120 | if not line or line.startswith(';') or line.startswith('#') or line.startswith('[publicinbox'): 121 | continue 122 | try: 123 | opt, val = line.split('=', maxsplit=1) 124 | opt = opt.strip() 125 | val = val.strip() 126 | if opt == 'address': 127 | addresses.append(val) 128 | continue 129 | if opt == 'description': 130 | description = val 131 | continue 132 | if opt == 'newsgroup': 133 | newsgroup = val 134 | continue 135 | if opt == 'listid' and boosts: 136 | listid = val 137 | # Calculate the boost value 138 | boostval = 1 139 | for patt in boosts: 140 | if fnmatch(val, patt): 141 | boostval = boosts.index(patt) + 10 142 | break 143 | extraopts.append(('boost', str(boostval))) 144 | 145 | if opt in acceptopts: 146 | logger.debug('Accepting extra opt %s=%s', opt, val) 147 | extraopts.append((opt, val)) 148 | 149 | except ValueError: 150 | logger.critical('Invalid config line: %s', line) 151 | success = False 152 | 153 | if not success: 154 | break 155 | 156 | if not addresses: 157 | addresses = [f'{inboxname}@localhost'] 158 | if not description: 159 | if listid: 160 | description = f'{listid} archive mirror' 161 | else: 162 | description = f'{inboxname} archive mirror' 163 | 164 | if success: 165 | if gdir != pdir: 166 | # public-inbox databases are separate from the main git trees 167 | pathlib.Path(pdir).mkdir(parents=True, exist_ok=True) 168 | # Symlink the git subpath 169 | if not os.path.islink(os.path.join(pdir, 'git')): 170 | os.symlink(os.path.join(gdir, 'git'), os.path.join(pdir, 'git')) 171 | 172 | # Now we run public-inbox-init 173 | piargs = ['public-inbox-init', '-V2', '-L', opts.indexlevel] 174 | if newsgroup: 175 | piargs += ['--ng', newsgroup] 176 | for opt, val in extraopts: 177 | piargs += ['-c', f'{opt}={val}'] 178 | piargs += [inboxname, pdir, local_url] 179 | piargs += addresses 180 | logger.debug('piargs=%s', piargs) 181 | 182 | env = { 183 | 'PI_CONFIG': opts.piconfig, 184 | 'PATH': os.getenv('PATH', '/bin:/usr/bin:/usr/local/bin'), 185 | } 186 | try: 187 | ec, out, err = grokmirror.run_shell_command(piargs, env=env) 188 | if ec > 0: 189 | logger.critical('Unable to init public-inbox repo %s: %s', pdir, err) 190 | success = False 191 | except Exception as ex: # noqa 192 | logger.critical('Unable to init public-inbox repo %s: %s', pdir, ex) 193 | success = False 194 | 195 | if success: 196 | with open(os.path.join(pdir, 'description'), 'w') as fh: 197 | fh.write(description) 198 | 199 | # Unlock all members 200 | for subrepo in pi_repos: 201 | grokmirror.unlock_repo(subrepo) 202 | 203 | return success 204 | 205 | 206 | def get_inboxdirs(repos: list) -> set: 207 | inboxdirs = set() 208 | for repo in repos: 209 | # Check that it's a public-inbox repo -- it should have .../git/N.git at the end 210 | matches = re.search(r'(/.*)/git/\d+\.git', repo) 211 | if matches: 212 | inboxdirs.add(matches.groups()[0]) 213 | 214 | return inboxdirs 215 | 216 | 217 | def process_inboxdirs(inboxdirs: set, opts, init: bool = False): 218 | if not len(inboxdirs): 219 | logger.info('Nothing to do') 220 | sys.exit(0) 221 | 222 | # Init all new repos first, and then index them one by one 223 | toindex = set() 224 | for inboxdir in inboxdirs: 225 | gdir, pdir = get_git_pi_dir(opts, inboxdir) 226 | # Check if msgmap.sqlite3 is there -- it can be a clone of a new epoch, 227 | # so no initialization is necessary 228 | msgmapdbf = os.path.join(pdir, 'msgmap.sqlite3') 229 | if init and not os.path.exists(msgmapdbf): 230 | # Initialize this public-inbox repo 231 | if not init_pi_inbox(gdir, pdir, opts): 232 | logger.critical('Could not init %s', inboxdir) 233 | continue 234 | if os.path.exists(msgmapdbf): 235 | toindex.add(inboxdir) 236 | 237 | for inboxdir in toindex: 238 | if not index_pi_inbox(inboxdir, opts): 239 | logger.critical('Unable to index %s', inboxdir) 240 | 241 | 242 | def get_git_pi_dir(opts, fullpath: str) -> Tuple[str, str]: 243 | fullpath = os.path.realpath(fullpath) 244 | if not opts.pitoplevel: 245 | # Public-inbox is in the same dir 246 | return fullpath, fullpath 247 | # Public-inbox is in a separate dir 248 | pitop = os.path.realpath(opts.pitoplevel) 249 | groktop = os.path.realpath(opts.toplevel) 250 | inboxname = os.path.relpath(fullpath, groktop) 251 | return fullpath, os.path.join(pitop, inboxname) 252 | 253 | 254 | def cmd_init(opts): 255 | if opts.inboxdir: 256 | inboxdirs = get_inboxdirs(opts.inboxdir) 257 | if opts.forceinit: 258 | inboxdir = inboxdirs.pop() 259 | gdir, pdir = get_git_pi_dir(opts, inboxdir) 260 | msgmapdbf = os.path.join(pdir, 'msgmap.sqlite3') 261 | # Delete msgmap and xap15 if present and reinitialize 262 | if os.path.exists(msgmapdbf): 263 | logger.critical('Reinitializing %s', opts.inboxdir) 264 | os.unlink(msgmapdbf) 265 | if os.path.exists(os.path.join(pdir, 'xap15')): 266 | shutil.rmtree(os.path.join(pdir, 'xap15')) 267 | elif not sys.stdin.isatty(): 268 | repos = list() 269 | for line in sys.stdin.read().split('\n'): 270 | if not line: 271 | continue 272 | repos.append(line) 273 | inboxdirs = get_inboxdirs(repos) 274 | else: 275 | logger.info('Nothing to do') 276 | sys.exit(0) 277 | 278 | process_inboxdirs(inboxdirs, opts, init=True) 279 | 280 | 281 | def cmd_update(opts): 282 | inboxdirs = get_inboxdirs(opts.repo) 283 | process_inboxdirs(inboxdirs, opts) 284 | 285 | 286 | def cmd_extindex(opts): 287 | env = { 288 | 'PI_CONFIG': opts.piconfig, 289 | 'PATH': os.getenv('PATH', '/bin:/usr/bin:/usr/local/bin'), 290 | } 291 | logger.info('Running extindex --all') 292 | piargs = ['public-inbox-extindex', '-L', opts.indexlevel, '--all'] 293 | if opts.jobs: 294 | piargs += ['--jobs', str(opts.jobs)] 295 | if opts.nofsync: 296 | piargs += ['--no-fsync'] 297 | try: 298 | ec, out, err = grokmirror.run_shell_command(piargs, env=env) 299 | if ec > 0: 300 | logger.critical('Unable to run public-inbox-extindex: %s', err) 301 | sys.exit(1) 302 | except Exception as ex: # noqa 303 | logger.critical('Unable to run public-inbox-extindex: %s', ex) 304 | sys.exit(1) 305 | 306 | 307 | def command(): 308 | import argparse 309 | global logger 310 | 311 | # noinspection PyTypeChecker 312 | ap = argparse.ArgumentParser(prog='grok-pi-indexer', 313 | description='Properly initialize and update mirrored public-inbox repositories', 314 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 315 | ap.add_argument('-v', '--verbose', action='store_true', 316 | default=False, 317 | help='Be verbose and tell us what you are doing') 318 | ap.add_argument('-c', '--pi-config', dest='piconfig', required=True, 319 | help='Location of the public-inbox configuration file') 320 | ap.add_argument('-t', '--toplevel', dest='toplevel', required=True, 321 | help='Path to git repository mirror toplevel') 322 | ap.add_argument('-p', '--pi-toplevel', dest='pitoplevel', 323 | help='Path to public-inbox toplevel, if separate') 324 | ap.add_argument('-l', '--logfile', 325 | help='Log activity in this log file') 326 | ap.add_argument('-L', '--indexlevel', default='full', 327 | help='Indexlevel to use with public-inbox (full, medium, basic)') 328 | ap.add_argument('-j', '--jobs', type=int, 329 | help='The --jobs parameter to pass to public-inbox') 330 | ap.add_argument('--no-fsync', dest='nofsync', action='store_true', default=False, 331 | help='Use --no-fsync when invoking public-inbox') 332 | 333 | sp = ap.add_subparsers(help='sub-command help', dest='subcmd') 334 | sp_init = sp.add_parser('init', help='Run public-inbox-init+index on repositories passed via stdin') 335 | 336 | sp_init.add_argument('--local-toplevel', dest='local_toplevel', default='', 337 | help='URL of the local mirror toplevel (omit if serving from /)') 338 | sp_init.add_argument('--origin-hostname', dest='origin_host', 339 | default='https://lore.kernel.org/', 340 | help='URL of the origin toplevel serving config files') 341 | sp_init.add_argument('--listid-priority', dest='listid_priority', 342 | default='*.linux.dev,*.kernel.org', 343 | help='List-Ids priority order (comma-separated, can use shell globbing)') 344 | sp_init.add_argument('--extra-cfgopts', dest='extra_cfgopts', 345 | default='indexheader,replyto', 346 | help='Extra config options to accept from remote (comma-separated)') 347 | sp_init.add_argument('--force-reinit', dest='forceinit', action='store_true', default=False, 348 | help='Force a full (re-)init of an inboxdir') 349 | sp_init.add_argument('inboxdir', nargs='?', 350 | help='Path to toplevel inboxdir (non-hook mode)') 351 | sp_init.set_defaults(func=cmd_init) 352 | 353 | sp_update = sp.add_parser('update', help='Run public-inbox-index on passed repository path') 354 | sp_update.add_argument('repo', nargs=1, 355 | help='Full path to foo/git/N.git public-inbox repository') 356 | sp_update.set_defaults(func=cmd_update) 357 | 358 | sp_extindex = sp.add_parser('extindex', help='Run extindex on all inboxes') 359 | sp_extindex.set_defaults(func=cmd_extindex) 360 | 361 | opts = ap.parse_args() 362 | if 'func' not in opts: 363 | ap.print_help() 364 | sys.exit(1) 365 | 366 | logfile = opts.logfile 367 | if opts.verbose: 368 | loglevel = logging.DEBUG 369 | else: 370 | loglevel = logging.INFO 371 | 372 | logger = grokmirror.init_logger('pi-indexer', logfile, loglevel, opts.verbose) 373 | opts.func(opts) 374 | 375 | 376 | if __name__ == '__main__': 377 | command() 378 | -------------------------------------------------------------------------------- /grokmirror/pi_piper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # This is a ready-made post_update_hook script for piping messages from 5 | # mirrored public-inbox repositories to arbitrary commands (e.g. procmail). 6 | # 7 | 8 | __author__ = 'Konstantin Ryabitsev ' 9 | 10 | import os 11 | import sys 12 | import grokmirror 13 | import fnmatch 14 | import logging 15 | import shlex 16 | 17 | from typing import Optional 18 | 19 | # default basic logger. We override it later. 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | def git_get_message_from_pi(fullpath: str, commit_id: str) -> bytes: 24 | logger.debug('Getting %s:m from %s', commit_id, fullpath) 25 | args = ['show', f'{commit_id}:m'] 26 | ecode, out, err = grokmirror.run_git_command(fullpath, args, decode=False) 27 | if ecode > 0: 28 | logger.debug('Could not get the message, error below') 29 | logger.debug(err.decode()) 30 | raise KeyError('Could not find %s in %s' % (commit_id, fullpath)) 31 | return out 32 | 33 | 34 | def git_get_new_revs(fullpath: str, pipelast: Optional[int] = None) -> list: 35 | statf = os.path.join(fullpath, 'pi-piper.latest') 36 | if pipelast: 37 | rev_range = '-n %d' % pipelast 38 | else: 39 | with open(statf, 'r') as fh: 40 | latest = fh.read().strip() 41 | rev_range = f'{latest}..' 42 | 43 | args = ['rev-list', '--pretty=oneline', '--reverse', rev_range, 'master'] 44 | ecode, out, err = grokmirror.run_git_command(fullpath, args) 45 | if ecode > 0: 46 | raise KeyError('Could not iterate %s in %s' % (rev_range, fullpath)) 47 | 48 | newrevs = list() 49 | if out: 50 | for line in out.split('\n'): 51 | (commit_id, logmsg) = line.split(' ', 1) 52 | logger.debug('commit_id=%s, subject=%s', commit_id, logmsg) 53 | newrevs.append((commit_id, logmsg)) 54 | 55 | return newrevs 56 | 57 | 58 | def reshallow(repo: str, commit_id: str) -> int: 59 | with open(os.path.join(repo, 'shallow'), 'w') as fh: 60 | fh.write(commit_id) 61 | fh.write('\n') 62 | logger.info(' prune: %s ', repo) 63 | ecode, out, err = grokmirror.run_git_command(repo, ['gc', '--prune=now']) 64 | return ecode 65 | 66 | 67 | def init_piper_tracking(repo: str, shallow: bool) -> bool: 68 | logger.info('Initial setup for %s', repo) 69 | args = ['rev-list', '-n', '1', 'master'] 70 | ecode, out, err = grokmirror.run_git_command(repo, args) 71 | if ecode > 0 or not out: 72 | logger.info('Could not list revs in %s', repo) 73 | return False 74 | # Just write latest into the tracking file and return 75 | latest = out.strip() 76 | statf = os.path.join(repo, 'pi-piper.latest') 77 | with open(statf, 'w') as fh: 78 | fh.write(latest) 79 | if shallow: 80 | reshallow(repo, latest) 81 | return True 82 | 83 | 84 | def run_pi_repo(repo: str, pipedef: str, dryrun: bool = False, shallow: bool = False, 85 | pipelast: Optional[int] = None) -> None: 86 | logger.info('Checking %s', repo) 87 | sp = shlex.shlex(pipedef, posix=True) 88 | sp.whitespace_split = True 89 | args = list(sp) 90 | if not os.access(args[0], os.EX_OK): 91 | logger.critical('Cannot execute %s', pipedef) 92 | sys.exit(1) 93 | 94 | statf = os.path.join(repo, 'pi-piper.latest') 95 | if not os.path.exists(statf): 96 | if dryrun: 97 | logger.info('Would have set up piper for %s [DRYRUN]', repo) 98 | return 99 | if not init_piper_tracking(repo, shallow): 100 | logger.critical('Unable to set up piper for %s', repo) 101 | return 102 | 103 | try: 104 | revlist = git_get_new_revs(repo, pipelast=pipelast) 105 | except KeyError: 106 | # this could have happened if the public-inbox repository 107 | # got rebased, e.g. due to GDPR-induced history editing. 108 | # For now, bluntly handle this by getting rid of our 109 | # status file and pretending we just started new. 110 | # XXX: in reality, we could handle this better by keeping track 111 | # of the subject line of the latest message we processed, and 112 | # then going through history to find the new commit-id of that 113 | # message. Unless, of course, that's the exact message that got 114 | # deleted in the first place. :/ 115 | # This also makes it hard with shallow repos, since we'd have 116 | # to unshallow them first in order to find that message. 117 | logger.critical('Assuming the repository got rebased, dropping all history.') 118 | os.unlink(statf) 119 | if not dryrun: 120 | init_piper_tracking(repo, shallow) 121 | revlist = git_get_new_revs(repo) 122 | 123 | if not revlist: 124 | return 125 | 126 | logger.info('Processing %s commits', len(revlist)) 127 | 128 | latest_good = None 129 | ecode = 0 130 | for commit_id, subject in revlist: 131 | try: 132 | msgbytes = git_get_message_from_pi(repo, commit_id) 133 | if dryrun: 134 | logger.info(' piping: %s (%s b) [DRYRUN]', commit_id, len(msgbytes)) 135 | logger.debug(' subject: %s', subject) 136 | else: 137 | logger.info(' piping: %s (%s b)', commit_id, len(msgbytes)) 138 | logger.debug(' subject: %s', subject) 139 | ecode, out, err = grokmirror.run_shell_command(args, stdin=msgbytes) 140 | if ecode > 0: 141 | logger.info('Error running %s', pipedef) 142 | logger.info(err) 143 | break 144 | latest_good = commit_id 145 | except KeyError: 146 | logger.info('Skipping %s', commit_id) 147 | 148 | if latest_good and not dryrun: 149 | with open(statf, 'w') as fh: 150 | fh.write(latest_good) 151 | logger.info('Wrote %s', statf) 152 | if ecode == 0 and shallow: 153 | reshallow(repo, latest_good) 154 | 155 | sys.exit(ecode) 156 | 157 | 158 | def command(): 159 | import argparse 160 | from configparser import ConfigParser, ExtendedInterpolation 161 | 162 | global logger 163 | 164 | # noinspection PyTypeChecker 165 | op = argparse.ArgumentParser(prog='grok-pi-piper', 166 | description='Pipe new messages from public-inbox repositories to arbitrary commands', 167 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) 168 | op.add_argument('-v', '--verbose', action='store_true', 169 | default=False, 170 | help='Be verbose and tell us what you are doing') 171 | op.add_argument('-d', '--dry-run', dest='dryrun', action='store_true', 172 | default=False, 173 | help='Do a dry-run and just show what would be done') 174 | op.add_argument('-c', '--config', required=True, 175 | help='Location of the configuration file') 176 | op.add_argument('-l', '--pipe-last', dest='pipelast', type=int, default=None, 177 | help='Force pipe last NN messages in the list, regardless of tracking') 178 | op.add_argument('repo', 179 | help='Full path to foo/git/N.git public-inbox repository') 180 | op.add_argument('--version', action='version', version=grokmirror.VERSION) 181 | 182 | opts = op.parse_args() 183 | 184 | cfgfile = os.path.expanduser(opts.config) 185 | if not cfgfile: 186 | sys.stderr.write('ERORR: File does not exist: %s\n' % cfgfile) 187 | sys.exit(1) 188 | config = ConfigParser(interpolation=ExtendedInterpolation()) 189 | config.read(os.path.expanduser(cfgfile)) 190 | 191 | # Find out the section that we want from the config file 192 | section = 'DEFAULT' 193 | for sectname in config.sections(): 194 | if fnmatch.fnmatch(opts.repo, f'*/{sectname}/git/*.git'): 195 | section = sectname 196 | 197 | pipe = config[section].get('pipe') 198 | if pipe == 'None': 199 | # Quick exit 200 | sys.exit(0) 201 | 202 | logfile = config[section].get('log') 203 | if config[section].get('loglevel') == 'debug': 204 | loglevel = logging.DEBUG 205 | else: 206 | loglevel = logging.INFO 207 | 208 | shallow = config[section].getboolean('shallow', False) # noqa 209 | 210 | logger = grokmirror.init_logger('pull', logfile, loglevel, opts.verbose) 211 | 212 | run_pi_repo(opts.repo, pipe, dryrun=opts.dryrun, shallow=shallow, pipelast=opts.pipelast) 213 | 214 | 215 | if __name__ == '__main__': 216 | command() 217 | -------------------------------------------------------------------------------- /man/grok-bundle.1: -------------------------------------------------------------------------------- 1 | .\" Man page generated from reStructuredText. 2 | . 3 | .TH GROK-BUNDLE 1 "2020-09-04" "2.0.0" "" 4 | .SH NAME 5 | GROK-BUNDLE \- Create clone.bundle files for use with "repo" 6 | . 7 | .nr rst2man-indent-level 0 8 | . 9 | .de1 rstReportMargin 10 | \\$1 \\n[an-margin] 11 | level \\n[rst2man-indent-level] 12 | level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] 13 | - 14 | \\n[rst2man-indent0] 15 | \\n[rst2man-indent1] 16 | \\n[rst2man-indent2] 17 | .. 18 | .de1 INDENT 19 | .\" .rstReportMargin pre: 20 | . RS \\$1 21 | . nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] 22 | . nr rst2man-indent-level +1 23 | .\" .rstReportMargin post: 24 | .. 25 | .de UNINDENT 26 | . RE 27 | .\" indent \\n[an-margin] 28 | .\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] 29 | .nr rst2man-indent-level -1 30 | .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] 31 | .in \\n[rst2man-indent\\n[rst2man-indent-level]]u 32 | .. 33 | .SH SYNOPSIS 34 | .INDENT 0.0 35 | .INDENT 3.5 36 | grok\-bundle [options] \-c grokmirror.conf \-o path 37 | .UNINDENT 38 | .UNINDENT 39 | .SH DESCRIPTION 40 | .sp 41 | Android\(aqs "repo" tool will check for the presence of clone.bundle files 42 | before performing a fresh git clone. This is done in order to offload 43 | most of the git traffic to a CDN and reduce the load on git servers 44 | themselves. 45 | .sp 46 | This command will generate clone.bundle files in a hierarchy expected by 47 | repo. You can then sync the output directory to a CDN service. 48 | .SH OPTIONS 49 | .INDENT 0.0 50 | .INDENT 3.5 51 | .INDENT 0.0 52 | .TP 53 | .B \-h\fP,\fB \-\-help 54 | show this help message and exit 55 | .TP 56 | .B \-v\fP,\fB \-\-verbose 57 | Be verbose and tell us what you are doing (default: False) 58 | .TP 59 | .BI \-c \ CONFIG\fP,\fB \ \-\-config \ CONFIG 60 | Location of the configuration file 61 | .TP 62 | .BI \-o \ OUTDIR\fP,\fB \ \-\-outdir \ OUTDIR 63 | Location where to store bundle files 64 | .TP 65 | .BI \-g \ GITARGS\fP,\fB \ \-\-gitargs \ GITARGS 66 | extra args to pass to git (default: \-c core.compression=9) 67 | .TP 68 | .BI \-r \ REVLISTARGS\fP,\fB \ \-\-revlistargs \ REVLISTARGS 69 | Rev\-list args to use (default: \-\-branches HEAD) 70 | .TP 71 | .BI \-s \ MAXSIZE\fP,\fB \ \-\-maxsize \ MAXSIZE 72 | Maximum size of git repositories to bundle (in GiB) (default: 2) 73 | .TP 74 | .BI \-i\fP,\fB \-\-include \ INCLUDE 75 | List repositories to bundle (accepts shell globbing) (default: *) 76 | .UNINDENT 77 | .UNINDENT 78 | .UNINDENT 79 | .SH EXAMPLES 80 | .INDENT 0.0 81 | .INDENT 3.5 82 | grok\-bundle \-c grokmirror.conf \-o /var/www/bundles \-i /pub/scm/linux/kernel/git/torvalds/linux.git /pub/scm/linux/kernel/git/stable/linux.git /pub/scm/linux/kernel/git/next/linux\-next.git 83 | .UNINDENT 84 | .UNINDENT 85 | .SH SEE ALSO 86 | .INDENT 0.0 87 | .IP \(bu 2 88 | grok\-pull(1) 89 | .IP \(bu 2 90 | grok\-manifest(1) 91 | .IP \(bu 2 92 | grok\-fsck(1) 93 | .IP \(bu 2 94 | git(1) 95 | .UNINDENT 96 | .SH SUPPORT 97 | .sp 98 | Email \fI\%tools@linux.kernel.org\fP\&. 99 | .SH AUTHOR 100 | mricon@kernel.org 101 | 102 | License: GPLv3+ 103 | .SH COPYRIGHT 104 | The Linux Foundation and contributors 105 | .\" Generated by docutils manpage writer. 106 | . 107 | -------------------------------------------------------------------------------- /man/grok-bundle.1.rst: -------------------------------------------------------------------------------- 1 | GROK-BUNDLE 2 | =========== 3 | ------------------------------------------------- 4 | Create clone.bundle files for use with "repo" 5 | ------------------------------------------------- 6 | 7 | :Author: mricon@kernel.org 8 | :Date: 2020-09-04 9 | :Copyright: The Linux Foundation and contributors 10 | :License: GPLv3+ 11 | :Version: 2.0.0 12 | :Manual section: 1 13 | 14 | SYNOPSIS 15 | -------- 16 | grok-bundle [options] -c grokmirror.conf -o path 17 | 18 | DESCRIPTION 19 | ----------- 20 | Android's "repo" tool will check for the presence of clone.bundle files 21 | before performing a fresh git clone. This is done in order to offload 22 | most of the git traffic to a CDN and reduce the load on git servers 23 | themselves. 24 | 25 | This command will generate clone.bundle files in a hierarchy expected by 26 | repo. You can then sync the output directory to a CDN service. 27 | 28 | OPTIONS 29 | ------- 30 | 31 | -h, --help show this help message and exit 32 | -v, --verbose Be verbose and tell us what you are doing (default: False) 33 | -c CONFIG, --config CONFIG 34 | Location of the configuration file 35 | -o OUTDIR, --outdir OUTDIR 36 | Location where to store bundle files 37 | -g GITARGS, --gitargs GITARGS 38 | extra args to pass to git (default: -c core.compression=9) 39 | -r REVLISTARGS, --revlistargs REVLISTARGS 40 | Rev-list args to use (default: --branches HEAD) 41 | -s MAXSIZE, --maxsize MAXSIZE 42 | Maximum size of git repositories to bundle (in GiB) (default: 2) 43 | -i, --include INCLUDE 44 | List repositories to bundle (accepts shell globbing) (default: \*) 45 | 46 | EXAMPLES 47 | -------- 48 | 49 | grok-bundle -c grokmirror.conf -o /var/www/bundles -i /pub/scm/linux/kernel/git/torvalds/linux.git /pub/scm/linux/kernel/git/stable/linux.git /pub/scm/linux/kernel/git/next/linux-next.git 50 | 51 | SEE ALSO 52 | -------- 53 | * grok-pull(1) 54 | * grok-manifest(1) 55 | * grok-fsck(1) 56 | * git(1) 57 | 58 | SUPPORT 59 | ------- 60 | Email tools@linux.kernel.org. 61 | -------------------------------------------------------------------------------- /man/grok-dumb-pull.1: -------------------------------------------------------------------------------- 1 | .\" Man page generated from reStructuredText. 2 | . 3 | .TH GROK-DUMB-PULL 1 "2020-08-14" "2.0.0" "" 4 | .SH NAME 5 | GROK-DUMB-PULL \- Update git repositories not managed by grokmirror 6 | . 7 | .nr rst2man-indent-level 0 8 | . 9 | .de1 rstReportMargin 10 | \\$1 \\n[an-margin] 11 | level \\n[rst2man-indent-level] 12 | level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] 13 | - 14 | \\n[rst2man-indent0] 15 | \\n[rst2man-indent1] 16 | \\n[rst2man-indent2] 17 | .. 18 | .de1 INDENT 19 | .\" .rstReportMargin pre: 20 | . RS \\$1 21 | . nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] 22 | . nr rst2man-indent-level +1 23 | .\" .rstReportMargin post: 24 | .. 25 | .de UNINDENT 26 | . RE 27 | .\" indent \\n[an-margin] 28 | .\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] 29 | .nr rst2man-indent-level -1 30 | .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] 31 | .in \\n[rst2man-indent\\n[rst2man-indent-level]]u 32 | .. 33 | .SH SYNOPSIS 34 | .INDENT 0.0 35 | .INDENT 3.5 36 | grok\-dumb\-pull [options] /path/to/repos 37 | .UNINDENT 38 | .UNINDENT 39 | .SH DESCRIPTION 40 | .sp 41 | This is a satellite utility that updates repositories not exported via 42 | grokmirror manifest. You will need to manually clone these repositories 43 | using "git clone \-\-mirror" and then define a cronjob to update them as 44 | frequently as you require. Grok\-dumb\-pull will bluntly execute "git 45 | remote update" in each of them. 46 | .SH OPTIONS 47 | .INDENT 0.0 48 | .INDENT 3.5 49 | .INDENT 0.0 50 | .TP 51 | .B \-\-version 52 | show program\(aqs version number and exit 53 | .TP 54 | .B \-h\fP,\fB \-\-help 55 | show this help message and exit 56 | .TP 57 | .B \-v\fP,\fB \-\-verbose 58 | Be verbose and tell us what you are doing 59 | .TP 60 | .B \-s\fP,\fB \-\-svn 61 | The remotes for these repositories are Subversion 62 | .TP 63 | .BI \-r \ REMOTES\fP,\fB \ \-\-remote\-names\fB= REMOTES 64 | Only fetch remotes matching this name (accepts globbing, 65 | can be passed multiple times) 66 | .TP 67 | .BI \-u \ POSTHOOK\fP,\fB \ \-\-post\-update\-hook\fB= POSTHOOK 68 | Run this hook after each repository is updated. Passes 69 | full path to the repository as the sole argument. 70 | .TP 71 | .BI \-l \ LOGFILE\fP,\fB \ \-\-logfile\fB= LOGFILE 72 | Put debug logs into this file 73 | .UNINDENT 74 | .UNINDENT 75 | .UNINDENT 76 | .SH EXAMPLES 77 | .sp 78 | The following will update all bare git repositories found in 79 | /path/to/repos hourly, and /path/to/special/repo.git daily, fetching 80 | only the "github" remote: 81 | .INDENT 0.0 82 | .INDENT 3.5 83 | .sp 84 | .nf 85 | .ft C 86 | MAILTO=root 87 | # Update all repositories found in /path/to/repos hourly 88 | 0 * * * * mirror /usr/bin/grok\-dumb\-pull /path/to/repos 89 | # Update /path/to/special/repo.git daily, fetching "github" remote 90 | 0 0 * * * mirror /usr/bin/grok\-dumb\-pull \-r github /path/to/special/repo.git 91 | .ft P 92 | .fi 93 | .UNINDENT 94 | .UNINDENT 95 | .sp 96 | Make sure the user "mirror" (or whichever user you specified) is able to 97 | write to the repos specified. 98 | .SH SEE ALSO 99 | .INDENT 0.0 100 | .IP \(bu 2 101 | grok\-pull(1) 102 | .IP \(bu 2 103 | grok\-manifest(1) 104 | .IP \(bu 2 105 | grok\-fsck(1) 106 | .IP \(bu 2 107 | git(1) 108 | .UNINDENT 109 | .SH SUPPORT 110 | .sp 111 | Email \fI\%tools@linux.kernel.org\fP\&. 112 | .SH AUTHOR 113 | mricon@kernel.org 114 | 115 | License: GPLv3+ 116 | .SH COPYRIGHT 117 | The Linux Foundation and contributors 118 | .\" Generated by docutils manpage writer. 119 | . 120 | -------------------------------------------------------------------------------- /man/grok-dumb-pull.1.rst: -------------------------------------------------------------------------------- 1 | GROK-DUMB-PULL 2 | ============== 3 | ------------------------------------------------- 4 | Update git repositories not managed by grokmirror 5 | ------------------------------------------------- 6 | 7 | :Author: mricon@kernel.org 8 | :Date: 2020-08-14 9 | :Copyright: The Linux Foundation and contributors 10 | :License: GPLv3+ 11 | :Version: 2.0.0 12 | :Manual section: 1 13 | 14 | SYNOPSIS 15 | -------- 16 | grok-dumb-pull [options] /path/to/repos 17 | 18 | DESCRIPTION 19 | ----------- 20 | This is a satellite utility that updates repositories not exported via 21 | grokmirror manifest. You will need to manually clone these repositories 22 | using "git clone --mirror" and then define a cronjob to update them as 23 | frequently as you require. Grok-dumb-pull will bluntly execute "git 24 | remote update" in each of them. 25 | 26 | 27 | OPTIONS 28 | ------- 29 | --version show program's version number and exit 30 | -h, --help show this help message and exit 31 | -v, --verbose Be verbose and tell us what you are doing 32 | -s, --svn The remotes for these repositories are Subversion 33 | -r REMOTES, --remote-names=REMOTES 34 | Only fetch remotes matching this name (accepts globbing, 35 | can be passed multiple times) 36 | -u POSTHOOK, --post-update-hook=POSTHOOK 37 | Run this hook after each repository is updated. Passes 38 | full path to the repository as the sole argument. 39 | -l LOGFILE, --logfile=LOGFILE 40 | Put debug logs into this file 41 | 42 | EXAMPLES 43 | -------- 44 | The following will update all bare git repositories found in 45 | /path/to/repos hourly, and /path/to/special/repo.git daily, fetching 46 | only the "github" remote:: 47 | 48 | MAILTO=root 49 | # Update all repositories found in /path/to/repos hourly 50 | 0 * * * * mirror /usr/bin/grok-dumb-pull /path/to/repos 51 | # Update /path/to/special/repo.git daily, fetching "github" remote 52 | 0 0 * * * mirror /usr/bin/grok-dumb-pull -r github /path/to/special/repo.git 53 | 54 | Make sure the user "mirror" (or whichever user you specified) is able to 55 | write to the repos specified. 56 | 57 | SEE ALSO 58 | -------- 59 | * grok-pull(1) 60 | * grok-manifest(1) 61 | * grok-fsck(1) 62 | * git(1) 63 | 64 | SUPPORT 65 | ------- 66 | Email tools@linux.kernel.org. 67 | -------------------------------------------------------------------------------- /man/grok-fsck.1: -------------------------------------------------------------------------------- 1 | .\" Man page generated from reStructuredText. 2 | . 3 | .TH GROK-FSCK 1 "2020-08-14" "2.0.0" "" 4 | .SH NAME 5 | GROK-FSCK \- Optimize mirrored repositories and check for corruption 6 | . 7 | .nr rst2man-indent-level 0 8 | . 9 | .de1 rstReportMargin 10 | \\$1 \\n[an-margin] 11 | level \\n[rst2man-indent-level] 12 | level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] 13 | - 14 | \\n[rst2man-indent0] 15 | \\n[rst2man-indent1] 16 | \\n[rst2man-indent2] 17 | .. 18 | .de1 INDENT 19 | .\" .rstReportMargin pre: 20 | . RS \\$1 21 | . nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] 22 | . nr rst2man-indent-level +1 23 | .\" .rstReportMargin post: 24 | .. 25 | .de UNINDENT 26 | . RE 27 | .\" indent \\n[an-margin] 28 | .\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] 29 | .nr rst2man-indent-level -1 30 | .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] 31 | .in \\n[rst2man-indent\\n[rst2man-indent-level]]u 32 | .. 33 | .SH SYNOPSIS 34 | .INDENT 0.0 35 | .INDENT 3.5 36 | grok\-fsck \-c /path/to/grokmirror.conf 37 | .UNINDENT 38 | .UNINDENT 39 | .SH DESCRIPTION 40 | .sp 41 | Git repositories should be routinely repacked and checked for 42 | corruption. This utility will perform the necessary optimizations and 43 | report any problems to the email defined via fsck.report_to (\(aqroot\(aq by 44 | default). It should run weekly from cron or from the systemd timer (see 45 | contrib). 46 | .sp 47 | Please examine the example grokmirror.conf file for various things you 48 | can tweak. 49 | .SH OPTIONS 50 | .INDENT 0.0 51 | .INDENT 3.5 52 | .INDENT 0.0 53 | .TP 54 | .B \-\-version 55 | show program\(aqs version number and exit 56 | .TP 57 | .B \-h\fP,\fB \-\-help 58 | show this help message and exit 59 | .TP 60 | .B \-v\fP,\fB \-\-verbose 61 | Be verbose and tell us what you are doing 62 | .TP 63 | .B \-f\fP,\fB \-\-force 64 | Force immediate run on all repositories. 65 | .TP 66 | .BI \-c \ CONFIG\fP,\fB \ \-\-config\fB= CONFIG 67 | Location of fsck.conf 68 | .TP 69 | .B \-\-repack\-only 70 | Only find and repack repositories that need 71 | optimizing (nightly run mode) 72 | .TP 73 | .B \-\-connectivity 74 | (Assumes \-\-force): Run git fsck on all repos, 75 | but only check connectivity 76 | .TP 77 | .B \-\-repack\-all\-quick 78 | (Assumes \-\-force): Do a quick repack of all repos 79 | .TP 80 | .B \-\-repack\-all\-full 81 | (Assumes \-\-force): Do a full repack of all repos 82 | .UNINDENT 83 | .UNINDENT 84 | .UNINDENT 85 | .SH SEE ALSO 86 | .INDENT 0.0 87 | .IP \(bu 2 88 | grok\-manifest(1) 89 | .IP \(bu 2 90 | grok\-pull(1) 91 | .IP \(bu 2 92 | git(1) 93 | .UNINDENT 94 | .SH SUPPORT 95 | .sp 96 | Email \fI\%tools@linux.kernel.org\fP\&. 97 | .SH AUTHOR 98 | mricon@kernel.org 99 | 100 | License: GPLv3+ 101 | .SH COPYRIGHT 102 | The Linux Foundation and contributors 103 | .\" Generated by docutils manpage writer. 104 | . 105 | -------------------------------------------------------------------------------- /man/grok-fsck.1.rst: -------------------------------------------------------------------------------- 1 | GROK-FSCK 2 | ========= 3 | ------------------------------------------------------- 4 | Optimize mirrored repositories and check for corruption 5 | ------------------------------------------------------- 6 | 7 | :Author: mricon@kernel.org 8 | :Date: 2020-08-14 9 | :Copyright: The Linux Foundation and contributors 10 | :License: GPLv3+ 11 | :Version: 2.0.0 12 | :Manual section: 1 13 | 14 | SYNOPSIS 15 | -------- 16 | grok-fsck -c /path/to/grokmirror.conf 17 | 18 | DESCRIPTION 19 | ----------- 20 | Git repositories should be routinely repacked and checked for 21 | corruption. This utility will perform the necessary optimizations and 22 | report any problems to the email defined via fsck.report_to ('root' by 23 | default). It should run weekly from cron or from the systemd timer (see 24 | contrib). 25 | 26 | Please examine the example grokmirror.conf file for various things you 27 | can tweak. 28 | 29 | OPTIONS 30 | ------- 31 | --version show program's version number and exit 32 | -h, --help show this help message and exit 33 | -v, --verbose Be verbose and tell us what you are doing 34 | -f, --force Force immediate run on all repositories. 35 | -c CONFIG, --config=CONFIG 36 | Location of fsck.conf 37 | --repack-only Only find and repack repositories that need 38 | optimizing (nightly run mode) 39 | --connectivity (Assumes --force): Run git fsck on all repos, 40 | but only check connectivity 41 | --repack-all-quick (Assumes --force): Do a quick repack of all repos 42 | --repack-all-full (Assumes --force): Do a full repack of all repos 43 | 44 | SEE ALSO 45 | -------- 46 | * grok-manifest(1) 47 | * grok-pull(1) 48 | * git(1) 49 | 50 | SUPPORT 51 | ------- 52 | Email tools@linux.kernel.org. 53 | -------------------------------------------------------------------------------- /man/grok-manifest.1: -------------------------------------------------------------------------------- 1 | .\" Man page generated from reStructuredText. 2 | . 3 | .TH GROK-MANIFEST 1 "2020-08-14" "2.0.0" "" 4 | .SH NAME 5 | GROK-MANIFEST \- Create manifest for use with grokmirror 6 | . 7 | .nr rst2man-indent-level 0 8 | . 9 | .de1 rstReportMargin 10 | \\$1 \\n[an-margin] 11 | level \\n[rst2man-indent-level] 12 | level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] 13 | - 14 | \\n[rst2man-indent0] 15 | \\n[rst2man-indent1] 16 | \\n[rst2man-indent2] 17 | .. 18 | .de1 INDENT 19 | .\" .rstReportMargin pre: 20 | . RS \\$1 21 | . nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] 22 | . nr rst2man-indent-level +1 23 | .\" .rstReportMargin post: 24 | .. 25 | .de UNINDENT 26 | . RE 27 | .\" indent \\n[an-margin] 28 | .\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] 29 | .nr rst2man-indent-level -1 30 | .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] 31 | .in \\n[rst2man-indent\\n[rst2man-indent-level]]u 32 | .. 33 | .SH SYNOPSIS 34 | .INDENT 0.0 35 | .INDENT 3.5 36 | grok\-manifest [opts] \-m manifest.js[.gz] \-t /path [/path/to/bare.git] 37 | .UNINDENT 38 | .UNINDENT 39 | .SH DESCRIPTION 40 | .sp 41 | Call grok\-manifest from a git post\-update or post\-receive hook to create 42 | the latest repository manifest. This manifest file is downloaded by 43 | mirroring systems (if manifest is newer than what they already have) and 44 | used to only clone/pull the repositories that have changed since the 45 | grok\-pull\(aqs last run. 46 | .SH OPTIONS 47 | .INDENT 0.0 48 | .INDENT 3.5 49 | .INDENT 0.0 50 | .TP 51 | .B \-\-version 52 | show program\(aqs version number and exit 53 | .TP 54 | .B \-h\fP,\fB \-\-help 55 | show this help message and exit 56 | .TP 57 | .BI \-\-cfgfile\fB= CFGFILE 58 | Path to grokmirror.conf containing a [manifest] section 59 | .TP 60 | .BI \-m \ MANIFILE\fP,\fB \ \-\-manifest\fB= MANIFILE 61 | Location of manifest.js or manifest.js.gz 62 | .TP 63 | .BI \-t \ TOPLEVEL\fP,\fB \ \-\-toplevel\fB= TOPLEVEL 64 | Top dir where all repositories reside 65 | .TP 66 | .BI \-l \ LOGFILE\fP,\fB \ \-\-logfile\fB= LOGFILE 67 | When specified, will put debug logs in this location 68 | .TP 69 | .B \-c\fP,\fB \-\-check\-export\-ok 70 | Honor the git\-daemon\-export\-ok magic file and 71 | do not export repositories not marked as such 72 | .TP 73 | .B \-n\fP,\fB \-\-use\-now 74 | Use current timestamp instead of parsing commits 75 | .TP 76 | .B \-p\fP,\fB \-\-purge 77 | Purge deleted git repositories from manifest 78 | .TP 79 | .B \-x\fP,\fB \-\-remove 80 | Remove repositories passed as arguments from 81 | the manifest file 82 | .TP 83 | .B \-y\fP,\fB \-\-pretty 84 | Pretty\-print the generated manifest (sort repos 85 | and add indentation). This is much slower, so 86 | should be used with caution on large 87 | collections. 88 | .TP 89 | .B \-w\fP,\fB \-\-wait\-for\-manifest 90 | When running with arguments, wait if manifest is not 91 | there (can be useful when multiple writers are writing 92 | to the manifest file via NFS) 93 | .TP 94 | .BI \-i \ IGNORE\fP,\fB \ \-\-ignore\-paths\fB= IGNORE 95 | When finding git dirs, ignore these paths (can be used 96 | multiple times, accepts shell\-style globbing) 97 | .TP 98 | .B \-o\fP,\fB \-\-fetch\-objstore 99 | Fetch updates into objstore repo (if used) 100 | .TP 101 | .B \-v\fP,\fB \-\-verbose 102 | Be verbose and tell us what you are doing 103 | .UNINDENT 104 | .UNINDENT 105 | .UNINDENT 106 | .sp 107 | You can set some of these options in a config file that you can pass via 108 | \fB\-\-cfgfile\fP option. See example grokmirror.conf file for 109 | documentation. Values passed via cmdline flags will override the 110 | corresponding config file values. 111 | .SH EXAMPLES 112 | .sp 113 | The examples assume that the repositories are located in 114 | \fB/var/lib/gitolite3/repositories\fP\&. 115 | .sp 116 | Initial manifest generation: 117 | .INDENT 0.0 118 | .INDENT 3.5 119 | .sp 120 | .nf 121 | .ft C 122 | /usr/bin/grok\-manifest \-m /var/www/html/manifest.js.gz \e 123 | \-t /var/lib/gitolite3/repositories 124 | .ft P 125 | .fi 126 | .UNINDENT 127 | .UNINDENT 128 | .sp 129 | Inside the git hook: 130 | .INDENT 0.0 131 | .INDENT 3.5 132 | .sp 133 | .nf 134 | .ft C 135 | /usr/bin/grok\-manifest \-m /var/www/html/manifest.js.gz \e 136 | \-t /var/lib/gitolite3/repositories \-n \(gapwd\(ga 137 | .ft P 138 | .fi 139 | .UNINDENT 140 | .UNINDENT 141 | .sp 142 | To purge deleted repositories from the manifest, use the \fB\-p\fP flag 143 | when running from cron: 144 | .INDENT 0.0 145 | .INDENT 3.5 146 | .sp 147 | .nf 148 | .ft C 149 | /usr/bin/grok\-manifest \-m /var/www/html/manifest.js.gz \e 150 | \-t /var/lib/gitolite3/repositories \-p 151 | .ft P 152 | .fi 153 | .UNINDENT 154 | .UNINDENT 155 | .sp 156 | You can also add it to the gitolite\(aqs \fBD\fP command using the \fB\-x\fP flag: 157 | .INDENT 0.0 158 | .INDENT 3.5 159 | .sp 160 | .nf 161 | .ft C 162 | /usr/bin/grok\-manifest \-m /var/www/html/manifest.js.gz \e 163 | \-t /var/lib/gitolite3/repositories \e 164 | \-x $repo.git 165 | .ft P 166 | .fi 167 | .UNINDENT 168 | .UNINDENT 169 | .sp 170 | To troubleshoot potential problems, you can pass \fB\-l\fP parameter to 171 | grok\-manifest, just make sure the user executing the hook command (user 172 | git or gitolite, for example) is able to write to that location: 173 | .INDENT 0.0 174 | .INDENT 3.5 175 | .sp 176 | .nf 177 | .ft C 178 | /usr/bin/grok\-manifest \-m /var/www/html/manifest.js.gz \e 179 | \-t /var/lib/gitolite3/repositories \e 180 | \-l /var/log/grokmirror/grok\-manifest.log \-n \(gapwd\(ga 181 | .ft P 182 | .fi 183 | .UNINDENT 184 | .UNINDENT 185 | .SH SEE ALSO 186 | .INDENT 0.0 187 | .IP \(bu 2 188 | grok\-pull(1) 189 | .IP \(bu 2 190 | git(1) 191 | .UNINDENT 192 | .SH SUPPORT 193 | .sp 194 | Email \fI\%tools@linux.kernel.org\fP\&. 195 | .SH AUTHOR 196 | mricon@kernel.org 197 | 198 | License: GPLv3+ 199 | .SH COPYRIGHT 200 | The Linux Foundation and contributors 201 | .\" Generated by docutils manpage writer. 202 | . 203 | -------------------------------------------------------------------------------- /man/grok-manifest.1.rst: -------------------------------------------------------------------------------- 1 | GROK-MANIFEST 2 | ============= 3 | --------------------------------------- 4 | Create manifest for use with grokmirror 5 | --------------------------------------- 6 | 7 | :Author: mricon@kernel.org 8 | :Date: 2020-08-14 9 | :Copyright: The Linux Foundation and contributors 10 | :License: GPLv3+ 11 | :Version: 2.0.0 12 | :Manual section: 1 13 | 14 | SYNOPSIS 15 | -------- 16 | grok-manifest [opts] -m manifest.js[.gz] -t /path [/path/to/bare.git] 17 | 18 | DESCRIPTION 19 | ----------- 20 | Call grok-manifest from a git post-update or post-receive hook to create 21 | the latest repository manifest. This manifest file is downloaded by 22 | mirroring systems (if manifest is newer than what they already have) and 23 | used to only clone/pull the repositories that have changed since the 24 | grok-pull's last run. 25 | 26 | OPTIONS 27 | ------- 28 | --version show program's version number and exit 29 | -h, --help show this help message and exit 30 | --cfgfile=CFGFILE Path to grokmirror.conf containing a [manifest] section 31 | -m MANIFILE, --manifest=MANIFILE 32 | Location of manifest.js or manifest.js.gz 33 | -t TOPLEVEL, --toplevel=TOPLEVEL 34 | Top dir where all repositories reside 35 | -l LOGFILE, --logfile=LOGFILE 36 | When specified, will put debug logs in this location 37 | -c, --check-export-ok 38 | Honor the git-daemon-export-ok magic file and 39 | do not export repositories not marked as such 40 | -n, --use-now Use current timestamp instead of parsing commits 41 | -p, --purge Purge deleted git repositories from manifest 42 | -x, --remove Remove repositories passed as arguments from 43 | the manifest file 44 | -y, --pretty Pretty-print the generated manifest (sort repos 45 | and add indentation). This is much slower, so 46 | should be used with caution on large 47 | collections. 48 | -w, --wait-for-manifest 49 | When running with arguments, wait if manifest is not 50 | there (can be useful when multiple writers are writing 51 | to the manifest file via NFS) 52 | -i IGNORE, --ignore-paths=IGNORE 53 | When finding git dirs, ignore these paths (can be used 54 | multiple times, accepts shell-style globbing) 55 | -o, --fetch-objstore Fetch updates into objstore repo (if used) 56 | -v, --verbose Be verbose and tell us what you are doing 57 | 58 | You can set some of these options in a config file that you can pass via 59 | ``--cfgfile`` option. See example grokmirror.conf file for 60 | documentation. Values passed via cmdline flags will override the 61 | corresponding config file values. 62 | 63 | EXAMPLES 64 | -------- 65 | The examples assume that the repositories are located in 66 | ``/var/lib/gitolite3/repositories``. 67 | 68 | Initial manifest generation:: 69 | 70 | /usr/bin/grok-manifest -m /var/www/html/manifest.js.gz \ 71 | -t /var/lib/gitolite3/repositories 72 | 73 | Inside the git hook:: 74 | 75 | /usr/bin/grok-manifest -m /var/www/html/manifest.js.gz \ 76 | -t /var/lib/gitolite3/repositories -n `pwd` 77 | 78 | To purge deleted repositories from the manifest, use the ``-p`` flag 79 | when running from cron:: 80 | 81 | /usr/bin/grok-manifest -m /var/www/html/manifest.js.gz \ 82 | -t /var/lib/gitolite3/repositories -p 83 | 84 | You can also add it to the gitolite's ``D`` command using the ``-x`` flag:: 85 | 86 | /usr/bin/grok-manifest -m /var/www/html/manifest.js.gz \ 87 | -t /var/lib/gitolite3/repositories \ 88 | -x $repo.git 89 | 90 | To troubleshoot potential problems, you can pass ``-l`` parameter to 91 | grok-manifest, just make sure the user executing the hook command (user 92 | git or gitolite, for example) is able to write to that location:: 93 | 94 | /usr/bin/grok-manifest -m /var/www/html/manifest.js.gz \ 95 | -t /var/lib/gitolite3/repositories \ 96 | -l /var/log/grokmirror/grok-manifest.log -n `pwd` 97 | 98 | SEE ALSO 99 | -------- 100 | * grok-pull(1) 101 | * git(1) 102 | 103 | SUPPORT 104 | ------- 105 | Email tools@linux.kernel.org. 106 | -------------------------------------------------------------------------------- /man/grok-pi-indexer.1: -------------------------------------------------------------------------------- 1 | .\" Man page generated from reStructuredText. 2 | . 3 | .TH GROK-PI-INDEXER 1 "2021-07-27" "2.1.0" "" 4 | .SH NAME 5 | GROK-PI-INDEXER \- Hook script for indexing mirrored public-inbox repos 6 | . 7 | .nr rst2man-indent-level 0 8 | . 9 | .de1 rstReportMargin 10 | \\$1 \\n[an-margin] 11 | level \\n[rst2man-indent-level] 12 | level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] 13 | - 14 | \\n[rst2man-indent0] 15 | \\n[rst2man-indent1] 16 | \\n[rst2man-indent2] 17 | .. 18 | .de1 INDENT 19 | .\" .rstReportMargin pre: 20 | . RS \\$1 21 | . nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] 22 | . nr rst2man-indent-level +1 23 | .\" .rstReportMargin post: 24 | .. 25 | .de UNINDENT 26 | . RE 27 | .\" indent \\n[an-margin] 28 | .\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] 29 | .nr rst2man-indent-level -1 30 | .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] 31 | .in \\n[rst2man-indent\\n[rst2man-indent-level]]u 32 | .. 33 | .SH SYNOPSIS 34 | .INDENT 0.0 35 | .INDENT 3.5 36 | .INDENT 0.0 37 | .TP 38 | .B grok\-pi\-indexer [\-h] [\-v] \-c PICONFIG \-t TOPLEVEL [\-p PITOPLEVEL] 39 | [\-l LOGFILE] [\-L INDEXLEVEL] [\-j JOBS] [\-\-no\-fsync] 40 | {init,update,extindex} ... 41 | .UNINDENT 42 | .UNINDENT 43 | .UNINDENT 44 | .SH DESCRIPTION 45 | .sp 46 | This is a helper hook for correctly initializing and indexing 47 | public\-inbox repositories. NOTE: a working public\-inbox 1.6+ install is 48 | required, and public\-inbox commands must be in the PATH. 49 | .sp 50 | The command should be invoked via grokmirror hooks, for example, use 51 | the following grokmirror configuration file to mirror lore.kernel.org: 52 | .INDENT 0.0 53 | .INDENT 3.5 54 | .sp 55 | .nf 56 | .ft C 57 | [core] 58 | toplevel = /ver/lib/git/lore.kernel.org 59 | manifest = ${toplevel}/manifest.js.gz 60 | log = /var/log/grokmirror/lore.kernel.org.log 61 | loglevel = info 62 | 63 | [remote] 64 | site = https://lore.kernel.org 65 | manifest = ${site}/manifest.js.gz 66 | 67 | [pull] 68 | default_owner = PublicInbox 69 | pull_threads = 2 70 | # Adjust as you see fit, or simply set to * to mirror everything 71 | include = /git/* 72 | /tools/* 73 | refresh = 60 74 | purge = no 75 | # If you have many CPUs and fast disks, you may want to raise \-j to a higher number 76 | # You can also set publicinbox.indexBatchSize to a higher number in PI_CONFIG if 77 | # you have lots of RAM, but probably not higher than 256m 78 | post_clone_complete_hook = /usr/bin/grok\-pi\-indexer \-c /etc/public\-inbox/config \-t ${core:toplevel} init 79 | post_update_hook = /usr/bin/grok\-pi\-indexer \-c /etc/public\-inbox/config \-t ${core:toplevel} update 80 | # Uncomment if you\(aqve defined any [extindex] sections 81 | #post_work_complete_hook = /usr/bin/grok\-pi\-indexer \-c /etc/public\-inbox/config \-t ${core:toplevel} extindex 82 | 83 | [fsck] 84 | frequency = 30 85 | report_to = root 86 | statusfile = ${core:toplevel}/fsck.status.js 87 | repack = yes 88 | commitgraph = yes 89 | prune = yes 90 | .ft P 91 | .fi 92 | .UNINDENT 93 | .UNINDENT 94 | .SH OPTIONS 95 | .INDENT 0.0 96 | .INDENT 3.5 97 | .INDENT 0.0 98 | .TP 99 | .B \-h\fP,\fB \-\-help 100 | show this help message and exit 101 | .TP 102 | .B \-v\fP,\fB \-\-verbose 103 | Be verbose and tell us what you are doing (default: False) 104 | .TP 105 | .BI \-c \ PICONFIG\fR,\fB \ \-\-pi\-config \ PICONFIG 106 | Location of the public\-inbox configuration file (default: None) 107 | .TP 108 | .BI \-t \ TOPLEVEL\fR,\fB \ \-\-toplevel \ TOPLEVEL 109 | Path to git repository mirror toplevel (default: None) 110 | .TP 111 | .BI \-p \ PITOPLEVEL\fR,\fB \ \-\-pi\-toplevel \ PITOPLEVEL 112 | Path to public\-inbox toplevel, if separate (default: None) 113 | .TP 114 | .BI \-l \ LOGFILE\fR,\fB \ \-\-logfile \ LOGFILE 115 | Log activity in this log file (default: None) 116 | .TP 117 | .BI \-L \ INDEXLEVEL\fR,\fB \ \-\-indexlevel \ INDEXLEVEL 118 | Indexlevel to use with public\-inbox (full, medium, basic) (default: full) 119 | .TP 120 | .BI \-j \ JOBS\fR,\fB \ \-\-jobs \ JOBS 121 | The \-\-jobs parameter to pass to public\-inbox (default: None) 122 | .TP 123 | .B \-\-no\-fsync 124 | Use \-\-no\-fsync when invoking public\-inbox (default: False) 125 | .UNINDENT 126 | .UNINDENT 127 | .UNINDENT 128 | .SH SEE ALSO 129 | .INDENT 0.0 130 | .IP \(bu 2 131 | grok\-pull(1) 132 | .IP \(bu 2 133 | public\-inbox\-init(1) 134 | .IP \(bu 2 135 | public\-inbox\-index(1) 136 | .IP \(bu 2 137 | public\-inbox\-extindex(1) 138 | .UNINDENT 139 | .SH SUPPORT 140 | .sp 141 | Email \fI\%tools@linux.kernel.org\fP\&. 142 | .SH AUTHOR 143 | mricon@kernel.org 144 | 145 | License: GPLv3+ 146 | .SH COPYRIGHT 147 | The Linux Foundation and contributors 148 | .\" Generated by docutils manpage writer. 149 | . 150 | -------------------------------------------------------------------------------- /man/grok-pi-indexer.1.rst: -------------------------------------------------------------------------------- 1 | GROK-PI-INDEXER 2 | =============== 3 | ---------------------------------------------------- 4 | Hook script for indexing mirrored public-inbox repos 5 | ---------------------------------------------------- 6 | 7 | :Author: mricon@kernel.org 8 | :Date: 2021-07-27 9 | :Copyright: The Linux Foundation and contributors 10 | :License: GPLv3+ 11 | :Version: 2.1.0 12 | :Manual section: 1 13 | 14 | SYNOPSIS 15 | -------- 16 | grok-pi-indexer [-h] [-v] -c PICONFIG -t TOPLEVEL [-p PITOPLEVEL] 17 | [-l LOGFILE] [-L INDEXLEVEL] [-j JOBS] [--no-fsync] 18 | {init,update,extindex} ... 19 | 20 | DESCRIPTION 21 | ----------- 22 | This is a helper hook for correctly initializing and indexing 23 | public-inbox repositories. NOTE: a working public-inbox 1.6+ install is 24 | required, and public-inbox commands must be in the PATH. 25 | 26 | The command should be invoked via grokmirror hooks, for example, use 27 | the following grokmirror configuration file to mirror lore.kernel.org:: 28 | 29 | [core] 30 | toplevel = /ver/lib/git/lore.kernel.org 31 | manifest = ${toplevel}/manifest.js.gz 32 | log = /var/log/grokmirror/lore.kernel.org.log 33 | loglevel = info 34 | 35 | [remote] 36 | site = https://lore.kernel.org 37 | manifest = ${site}/manifest.js.gz 38 | 39 | [pull] 40 | default_owner = PublicInbox 41 | pull_threads = 2 42 | # Adjust as you see fit, or simply set to * to mirror everything 43 | include = /git/* 44 | /tools/* 45 | refresh = 60 46 | purge = no 47 | # If you have many CPUs and fast disks, you may want to raise -j to a higher number 48 | # You can also set publicinbox.indexBatchSize to a higher number in PI_CONFIG if 49 | # you have lots of RAM, but probably not higher than 256m 50 | post_clone_complete_hook = /usr/bin/grok-pi-indexer -c /etc/public-inbox/config -t ${core:toplevel} init 51 | post_update_hook = /usr/bin/grok-pi-indexer -c /etc/public-inbox/config -t ${core:toplevel} update 52 | # Uncomment if you've defined any [extindex] sections 53 | #post_work_complete_hook = /usr/bin/grok-pi-indexer -c /etc/public-inbox/config -t ${core:toplevel} extindex 54 | 55 | [fsck] 56 | frequency = 30 57 | report_to = root 58 | statusfile = ${core:toplevel}/fsck.status.js 59 | repack = yes 60 | commitgraph = yes 61 | prune = yes 62 | 63 | 64 | OPTIONS 65 | ------- 66 | 67 | -h, --help show this help message and exit 68 | -v, --verbose Be verbose and tell us what you are doing (default: False) 69 | -c PICONFIG, --pi-config PICONFIG 70 | Location of the public-inbox configuration file (default: None) 71 | -t TOPLEVEL, --toplevel TOPLEVEL 72 | Path to git repository mirror toplevel (default: None) 73 | -p PITOPLEVEL, --pi-toplevel PITOPLEVEL 74 | Path to public-inbox toplevel, if separate (default: None) 75 | -l LOGFILE, --logfile LOGFILE 76 | Log activity in this log file (default: None) 77 | -L INDEXLEVEL, --indexlevel INDEXLEVEL 78 | Indexlevel to use with public-inbox (full, medium, basic) (default: full) 79 | -j JOBS, --jobs JOBS The --jobs parameter to pass to public-inbox (default: None) 80 | --no-fsync Use --no-fsync when invoking public-inbox (default: False) 81 | 82 | SEE ALSO 83 | -------- 84 | * grok-pull(1) 85 | * public-inbox-init(1) 86 | * public-inbox-index(1) 87 | * public-inbox-extindex(1) 88 | 89 | SUPPORT 90 | ------- 91 | Email tools@linux.kernel.org. 92 | -------------------------------------------------------------------------------- /man/grok-pi-piper.1: -------------------------------------------------------------------------------- 1 | .\" Man page generated from reStructuredText. 2 | . 3 | .TH GROK-PI-PIPER 1 "2020-10-07" "2.0.2" "" 4 | .SH NAME 5 | GROK-PI-PIPER \- Hook script for piping new messages from public-inbox repos 6 | . 7 | .nr rst2man-indent-level 0 8 | . 9 | .de1 rstReportMargin 10 | \\$1 \\n[an-margin] 11 | level \\n[rst2man-indent-level] 12 | level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] 13 | - 14 | \\n[rst2man-indent0] 15 | \\n[rst2man-indent1] 16 | \\n[rst2man-indent2] 17 | .. 18 | .de1 INDENT 19 | .\" .rstReportMargin pre: 20 | . RS \\$1 21 | . nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] 22 | . nr rst2man-indent-level +1 23 | .\" .rstReportMargin post: 24 | .. 25 | .de UNINDENT 26 | . RE 27 | .\" indent \\n[an-margin] 28 | .\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] 29 | .nr rst2man-indent-level -1 30 | .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] 31 | .in \\n[rst2man-indent\\n[rst2man-indent-level]]u 32 | .. 33 | .SH SYNOPSIS 34 | .INDENT 0.0 35 | .INDENT 3.5 36 | grok\-pi\-piper [\-h] [\-v] [\-d] \-c CONFIG [\-l PIPELAST] [\-\-version] repo 37 | .UNINDENT 38 | .UNINDENT 39 | .SH DESCRIPTION 40 | .sp 41 | This is a ready\-made hook script that can be called from 42 | pull.post_update_hook when mirroring public\-inbox repositories. It will 43 | pipe all newly received messages to arbitrary commands defined in the 44 | config file. The simplest configuration for lore.kernel.org is: 45 | .INDENT 0.0 46 | .INDENT 3.5 47 | .sp 48 | .nf 49 | .ft C 50 | ~/.config/pi\-piper.conf 51 | \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- 52 | [DEFAULT] 53 | pipe = /usr/bin/procmail 54 | # Prune successfully processed messages 55 | shallow = yes 56 | 57 | ~/.procmailrc 58 | \-\-\-\-\-\-\-\-\-\-\-\-\- 59 | DEFAULT=$HOME/Maildir/ 60 | 61 | # Don\(aqt deliver cross\-posted duplicates 62 | :0 Wh: .msgid.lock 63 | | formail \-D 8192 .msgid.cache 64 | 65 | ~/.config/lore.conf 66 | \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- 67 | [core] 68 | toplevel = ~/.local/share/grokmirror/lore 69 | log = ${toplevel}/grokmirror.log 70 | 71 | [remote] 72 | site = https://lore.kernel.org 73 | manifest = https://lore.kernel.org/manifest.js.gz 74 | 75 | [pull] 76 | post_update_hook = ~/.local/bin/grok\-pi\-piper \-c ~/.config/pi\-piper.conf 77 | include = /list\-you\-want/* 78 | /another\-list/* 79 | .ft P 80 | .fi 81 | .UNINDENT 82 | .UNINDENT 83 | .sp 84 | It assumes that grokmirror was installed from pip. If you installed it 85 | via some other means, please check the path for the grok\-pi\-piper 86 | script. 87 | .sp 88 | Note, that initial clone may take a long time, even if you set 89 | shallow=yes. 90 | .sp 91 | See pi\-piper.conf for other config options. 92 | .SH OPTIONS 93 | .INDENT 0.0 94 | .INDENT 3.5 95 | .INDENT 0.0 96 | .TP 97 | .B \-h\fP,\fB \-\-help 98 | show this help message and exit 99 | .TP 100 | .B \-v\fP,\fB \-\-verbose 101 | Be verbose and tell us what you are doing (default: False) 102 | .TP 103 | .B \-d\fP,\fB \-\-dry\-run 104 | Do a dry\-run and just show what would be done (default: False) 105 | .TP 106 | .BI \-c \ CONFIG\fP,\fB \ \-\-config \ CONFIG 107 | Location of the configuration file (default: None) 108 | .TP 109 | .BI \-l \ PIPELAST\fP,\fB \ \-\-pipe\-last \ PIPELAST 110 | Force pipe last NN messages in the list, regardless of tracking (default: None) 111 | .TP 112 | .B \-\-version 113 | show program\(aqs version number and exit 114 | .UNINDENT 115 | .UNINDENT 116 | .UNINDENT 117 | .SH SEE ALSO 118 | .INDENT 0.0 119 | .IP \(bu 2 120 | grok\-pull(1) 121 | .IP \(bu 2 122 | git(1) 123 | .UNINDENT 124 | .SH SUPPORT 125 | .sp 126 | Email \fI\%tools@linux.kernel.org\fP\&. 127 | .SH AUTHOR 128 | mricon@kernel.org 129 | 130 | License: GPLv3+ 131 | .SH COPYRIGHT 132 | The Linux Foundation and contributors 133 | .\" Generated by docutils manpage writer. 134 | . 135 | -------------------------------------------------------------------------------- /man/grok-pi-piper.1.rst: -------------------------------------------------------------------------------- 1 | GROK-PI-PIPER 2 | ============= 3 | ----------------------------------------------------------- 4 | Hook script for piping new messages from public-inbox repos 5 | ----------------------------------------------------------- 6 | 7 | :Author: mricon@kernel.org 8 | :Date: 2020-10-07 9 | :Copyright: The Linux Foundation and contributors 10 | :License: GPLv3+ 11 | :Version: 2.0.2 12 | :Manual section: 1 13 | 14 | SYNOPSIS 15 | -------- 16 | grok-pi-piper [-h] [-v] [-d] -c CONFIG [-l PIPELAST] [--version] repo 17 | 18 | DESCRIPTION 19 | ----------- 20 | This is a ready-made hook script that can be called from 21 | pull.post_update_hook when mirroring public-inbox repositories. It will 22 | pipe all newly received messages to arbitrary commands defined in the 23 | config file. The simplest configuration for lore.kernel.org is:: 24 | 25 | ~/.config/pi-piper.conf 26 | ----------------------- 27 | [DEFAULT] 28 | pipe = /usr/bin/procmail 29 | # Prune successfully processed messages 30 | shallow = yes 31 | 32 | ~/.procmailrc 33 | ------------- 34 | DEFAULT=$HOME/Maildir/ 35 | 36 | # Don't deliver cross-posted duplicates 37 | :0 Wh: .msgid.lock 38 | | formail -D 8192 .msgid.cache 39 | 40 | ~/.config/lore.conf 41 | ------------------- 42 | [core] 43 | toplevel = ~/.local/share/grokmirror/lore 44 | log = ${toplevel}/grokmirror.log 45 | 46 | [remote] 47 | site = https://lore.kernel.org 48 | manifest = https://lore.kernel.org/manifest.js.gz 49 | 50 | [pull] 51 | post_update_hook = ~/.local/bin/grok-pi-piper -c ~/.config/pi-piper.conf 52 | include = /list-you-want/* 53 | /another-list/* 54 | 55 | It assumes that grokmirror was installed from pip. If you installed it 56 | via some other means, please check the path for the grok-pi-piper 57 | script. 58 | 59 | Note, that initial clone may take a long time, even if you set 60 | shallow=yes. 61 | 62 | See pi-piper.conf for other config options. 63 | 64 | 65 | OPTIONS 66 | ------- 67 | -h, --help show this help message and exit 68 | -v, --verbose Be verbose and tell us what you are doing (default: False) 69 | -d, --dry-run Do a dry-run and just show what would be done (default: False) 70 | -c CONFIG, --config CONFIG 71 | Location of the configuration file (default: None) 72 | -l PIPELAST, --pipe-last PIPELAST 73 | Force pipe last NN messages in the list, regardless of tracking (default: None) 74 | --version show program's version number and exit 75 | 76 | 77 | SEE ALSO 78 | -------- 79 | * grok-pull(1) 80 | * git(1) 81 | 82 | SUPPORT 83 | ------- 84 | Email tools@linux.kernel.org. 85 | -------------------------------------------------------------------------------- /man/grok-pull.1: -------------------------------------------------------------------------------- 1 | .\" Man page generated from reStructuredText. 2 | . 3 | .TH GROK-PULL 1 "2020-08-14" "2.0.0" "" 4 | .SH NAME 5 | GROK-PULL \- Clone or update local git repositories 6 | . 7 | .nr rst2man-indent-level 0 8 | . 9 | .de1 rstReportMargin 10 | \\$1 \\n[an-margin] 11 | level \\n[rst2man-indent-level] 12 | level margin: \\n[rst2man-indent\\n[rst2man-indent-level]] 13 | - 14 | \\n[rst2man-indent0] 15 | \\n[rst2man-indent1] 16 | \\n[rst2man-indent2] 17 | .. 18 | .de1 INDENT 19 | .\" .rstReportMargin pre: 20 | . RS \\$1 21 | . nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin] 22 | . nr rst2man-indent-level +1 23 | .\" .rstReportMargin post: 24 | .. 25 | .de UNINDENT 26 | . RE 27 | .\" indent \\n[an-margin] 28 | .\" old: \\n[rst2man-indent\\n[rst2man-indent-level]] 29 | .nr rst2man-indent-level -1 30 | .\" new: \\n[rst2man-indent\\n[rst2man-indent-level]] 31 | .in \\n[rst2man-indent\\n[rst2man-indent-level]]u 32 | .. 33 | .SH SYNOPSIS 34 | .INDENT 0.0 35 | .INDENT 3.5 36 | grok\-pull \-c /path/to/grokmirror.conf 37 | .UNINDENT 38 | .UNINDENT 39 | .SH DESCRIPTION 40 | .sp 41 | Grok\-pull is the main tool for replicating repository updates from the 42 | grokmirror primary server to the mirrors. 43 | .sp 44 | Grok\-pull has two modes of operation \-\- onetime and continous 45 | (daemonized). In one\-time operation mode, it downloads the latest 46 | manifest and applies any outstanding updates. If there are new 47 | repositories or changes in the existing repositories, grok\-pull will 48 | perform the necessary git commands to clone or fetch the required data 49 | from the master. Once all updates are applied, it will write its own 50 | manifest and exit. In this mode, grok\-pull can be run manually or from 51 | cron. 52 | .sp 53 | In continuous operation mode (when run with \-o), grok\-pull will continue 54 | running after all updates have been applied and will periodically 55 | re\-download the manifest from the server to check for new updates. For 56 | this to work, you must set pull.refresh in grokmirror.conf to the amount 57 | of seconds you would like it to wait between refreshes. 58 | .sp 59 | If pull.socket is specified, grok\-pull will also listen on a socket for 60 | any push updates (relative repository path as present in the manifest 61 | file, terminated with newlines). This can be used for pubsub 62 | subscriptions (see contrib). 63 | .SH OPTIONS 64 | .INDENT 0.0 65 | .INDENT 3.5 66 | .INDENT 0.0 67 | .TP 68 | .B \-\-version 69 | show program\(aqs version number and exit 70 | .TP 71 | .B \-h\fP,\fB \-\-help 72 | show this help message and exit 73 | .TP 74 | .B \-v\fP,\fB \-\-verbose 75 | Be verbose and tell us what you are doing 76 | .TP 77 | .B \-n\fP,\fB \-\-no\-mtime\-check 78 | Run without checking manifest mtime. 79 | .TP 80 | .B \-o\fP,\fB \-\-continuous 81 | Run continuously (no effect if refresh is not set) 82 | .TP 83 | .BI \-c \ CONFIG\fP,\fB \ \-\-config\fB= CONFIG 84 | Location of the configuration file 85 | .TP 86 | .B \-p\fP,\fB \-\-purge 87 | Remove any git trees that are no longer in manifest. 88 | .TP 89 | .B \-\-force\-purge 90 | Force purge operation despite significant repo deletions 91 | .UNINDENT 92 | .UNINDENT 93 | .UNINDENT 94 | .SH EXAMPLES 95 | .sp 96 | Use grokmirror.conf and modify it to reflect your needs. The example 97 | configuration file is heavily commented. To invoke, run: 98 | .INDENT 0.0 99 | .INDENT 3.5 100 | .sp 101 | .nf 102 | .ft C 103 | grok\-pull \-v \-c /path/to/grokmirror.conf 104 | .ft P 105 | .fi 106 | .UNINDENT 107 | .UNINDENT 108 | .SH SEE ALSO 109 | .INDENT 0.0 110 | .IP \(bu 2 111 | grok\-manifest(1) 112 | .IP \(bu 2 113 | grok\-fsck(1) 114 | .IP \(bu 2 115 | git(1) 116 | .UNINDENT 117 | .SH SUPPORT 118 | .sp 119 | Please email \fI\%tools@linux.kernel.org\fP\&. 120 | .SH AUTHOR 121 | mricon@kernel.org 122 | 123 | License: GPLv3+ 124 | .SH COPYRIGHT 125 | The Linux Foundation and contributors 126 | .\" Generated by docutils manpage writer. 127 | . 128 | -------------------------------------------------------------------------------- /man/grok-pull.1.rst: -------------------------------------------------------------------------------- 1 | GROK-PULL 2 | ========= 3 | -------------------------------------- 4 | Clone or update local git repositories 5 | -------------------------------------- 6 | 7 | :Author: mricon@kernel.org 8 | :Date: 2020-08-14 9 | :Copyright: The Linux Foundation and contributors 10 | :License: GPLv3+ 11 | :Version: 2.0.0 12 | :Manual section: 1 13 | 14 | SYNOPSIS 15 | -------- 16 | grok-pull -c /path/to/grokmirror.conf 17 | 18 | DESCRIPTION 19 | ----------- 20 | Grok-pull is the main tool for replicating repository updates from the 21 | grokmirror primary server to the mirrors. 22 | 23 | Grok-pull has two modes of operation -- onetime and continous 24 | (daemonized). In one-time operation mode, it downloads the latest 25 | manifest and applies any outstanding updates. If there are new 26 | repositories or changes in the existing repositories, grok-pull will 27 | perform the necessary git commands to clone or fetch the required data 28 | from the master. Once all updates are applied, it will write its own 29 | manifest and exit. In this mode, grok-pull can be run manually or from 30 | cron. 31 | 32 | In continuous operation mode (when run with -o), grok-pull will continue 33 | running after all updates have been applied and will periodically 34 | re-download the manifest from the server to check for new updates. For 35 | this to work, you must set pull.refresh in grokmirror.conf to the amount 36 | of seconds you would like it to wait between refreshes. 37 | 38 | If pull.socket is specified, grok-pull will also listen on a socket for 39 | any push updates (relative repository path as present in the manifest 40 | file, terminated with newlines). This can be used for pubsub 41 | subscriptions (see contrib). 42 | 43 | OPTIONS 44 | ------- 45 | --version show program's version number and exit 46 | -h, --help show this help message and exit 47 | -v, --verbose Be verbose and tell us what you are doing 48 | -n, --no-mtime-check Run without checking manifest mtime. 49 | -o, --continuous Run continuously (no effect if refresh is not set) 50 | -c CONFIG, --config=CONFIG 51 | Location of the configuration file 52 | -p, --purge Remove any git trees that are no longer in manifest. 53 | --force-purge Force purge operation despite significant repo deletions 54 | 55 | EXAMPLES 56 | -------- 57 | Use grokmirror.conf and modify it to reflect your needs. The example 58 | configuration file is heavily commented. To invoke, run:: 59 | 60 | grok-pull -v -c /path/to/grokmirror.conf 61 | 62 | SEE ALSO 63 | -------- 64 | * grok-manifest(1) 65 | * grok-fsck(1) 66 | * git(1) 67 | 68 | SUPPORT 69 | ------- 70 | Please email tools@linux.kernel.org. 71 | -------------------------------------------------------------------------------- /pi-piper.conf: -------------------------------------------------------------------------------- 1 | # These will be overriden by any sections below 2 | [DEFAULT] 3 | # To start piping public-inbox messages into your inbox, simply 4 | # install procmail and add the following line to your ~/.procmailrc: 5 | # DEFAULT=$HOME/Maildir/ 6 | # You can now read your mail with "mutt -f ~/Maildir/" 7 | pipe = /usr/bin/procmail 8 | # Once you've successfully piped the messages, you generally 9 | # don't need them any more. If you set shallow = yes, then 10 | # the repository will be configured as "shallow" and all succesffully 11 | # processed messages will be pruned from the repo. 12 | # This will greatly reduce disk space usage, especially on large archives. 13 | # You can always get any number of them back, e.g. by running: 14 | # git fetch _grokmirror master --deepen 100 15 | shallow = yes 16 | # You can use ~/ for paths in your home dir, or omit for no log 17 | #log = ~/pi-piper.log 18 | # Can be "info" or "debug". Note, that debug will have message bodies as well. 19 | #loglevel = info 20 | 21 | # Overrides for any defaults. You may not need any if all you want is to pipe all mirrored 22 | # public-inboxes to procmail. 23 | # Naming: 24 | # We will perform simple shell-style globbing using the following rule: 25 | # /{section}/git/*.git, 26 | # so, for a section that matches /alsa-devel/git/0.git, name it "alsa-devel" 27 | [alsa-devel] 28 | # Use a different config file for this one 29 | pipe = /usr/bin/procmail /path/to/some/other/procmailrc 30 | 31 | [lkml] 32 | # Setting pipe = None allows ignoring this particular list 33 | pipe = None -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | packaging 2 | requests -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Copyright (C) 2013-2020 by The Linux Foundation and contributors 4 | # 5 | # This program is free software: you can redistribute it and/or modify 6 | # it under the terms of the GNU General Public License as published by 7 | # the Free Software Foundation, either version 3 of the License, or 8 | # (at your option) any later version. 9 | # 10 | # This program is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | # GNU General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU General Public License 16 | # along with this program. If not, see . 17 | 18 | import os 19 | import re 20 | from setuptools import setup 21 | 22 | 23 | def read(fname): 24 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 25 | 26 | 27 | def find_version(source): 28 | version_file = read(source) 29 | version_match = re.search(r"^VERSION = ['\"]([^'\"]*)['\"]", version_file, re.M) 30 | if version_match: 31 | return version_match.group(1) 32 | raise RuntimeError("Unable to find version string.") 33 | 34 | 35 | NAME = 'grokmirror' 36 | VERSION = find_version('grokmirror/__init__.py') 37 | 38 | 39 | setup( 40 | version=VERSION, 41 | url='https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git', 42 | download_url='https://www.kernel.org/pub/software/network/grokmirror/%s-%s.tar.xz' % (NAME, VERSION), 43 | name=NAME, 44 | description='Smartly mirror git repositories that use grokmirror', 45 | author='Konstantin Ryabitsev', 46 | author_email='konstantin@linuxfoundation.org', 47 | packages=[NAME], 48 | license='GPLv3+', 49 | long_description=read('README.rst'), 50 | long_description_content_type='text/x-rst', 51 | keywords=['git', 'mirroring', 'repositories'], 52 | project_urls={ 53 | 'Source': 'https://git.kernel.org/pub/scm/utils/grokmirror/grokmirror.git', 54 | 'Tracker': 'https://github.com/mricon/grokmirror/issues', 55 | }, 56 | install_requires=[ 57 | 'requests', 58 | ], 59 | python_requires='>=3.6', 60 | entry_points={ 61 | 'console_scripts': [ 62 | "grok-dumb-pull=grokmirror.dumb_pull:command", 63 | "grok-pull=grokmirror.pull:command", 64 | "grok-fsck=grokmirror.fsck:command", 65 | "grok-manifest=grokmirror.manifest:command", 66 | "grok-bundle=grokmirror.bundle:command", 67 | "grok-pi-piper=grokmirror.pi_piper:command", 68 | "grok-pi-indexer=grokmirror.pi_indexer:command", 69 | ] 70 | } 71 | ) 72 | --------------------------------------------------------------------------------