├── .gitignore ├── LICENSE ├── README.md ├── example ├── build-tutorial-env.sh ├── example.json └── example_config.ini ├── input_plugins ├── __init__.py ├── input_plugin.py └── json_plugin.py ├── lib ├── __init__.py ├── config.py ├── diff.py ├── exception.py ├── pg_compare.py ├── pg_objects.py ├── pg_transform.py ├── provider.py ├── strategy.py ├── util.py └── writer.py ├── pg-compare ├── pg-pickle ├── pg-transform └── strategies ├── __init__.py ├── column_default.py ├── datatype.py ├── drop_table.py └── nullable.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PG Transform 2 | 3 | PG Transform is a set of tools I created for solving a particular problem: discovering and fixing schema differences between many hundreds of Postgresql databases. 4 | 5 | The tools work by taking a database with a "golden schema" and comparing it against the other databases. The output of the comparison step is a tree of DiffNodes with each individual node representing a single difference. This tree is then passed as input to the transformation program which processes each node, applying any 'strategies' that match. Once this is complete, and provided there is a strategy for each and every difference, the schemas will again be identical. 6 | 7 | All this is achieved with the following three tools. 8 | 9 | ### pg-pickle 10 | 11 | The pg-pickle program processes the database with the "golden schema" and outputs a Python pickle file. This is the template that will be compared to the other databases. 12 | 13 | ``` 14 | ⇒ ./pg-pickle -h 15 | Usage: pg-pickle [options] 16 | 17 | Options: 18 | -h, --help show this help message and exit 19 | --ignore-columns=IGNORE_COLUMNS 20 | Columns to be ignored, specified as a comma seperated 21 | list. Wildcards can be used, eg, *ignore* 22 | --ignore-tables=IGNORE_TABLES 23 | Tables to be ignored, specified as a comma seperated 24 | list. Wildcards can be used, eg, *ignore* 25 | -o OUT_PATH, --out=OUT_PATH 26 | Path to output file 27 | --dbname=DBNAME Database name 28 | --dbuser=DBUSER Database user 29 | --dbpass=DBPASS Database password 30 | --dbhost=DBHOST Database host 31 | --dbport=DBPORT Database port 32 | 33 | ``` 34 | 35 | ### pg-compare 36 | 37 | The pg-compare program handles comparing databases against the template. This can be done in parallel, and the output can be printed or exported to a sqlite database. 38 | 39 | 40 | ``` 41 | Usage: pg-compare [options] 42 | 43 | Options: 44 | -h, --help show this help message and exit 45 | -c CONFIG, --config=CONFIG 46 | Config file 47 | --ignore-columns=IGNORE_COLUMNS 48 | Columns to be ignored, specified as a comma seperated 49 | list. Wildcards can be used, eg, *ignore* 50 | --ignore-tables=IGNORE_TABLES 51 | Tables to be ignored, specified as a comma seperated 52 | list. Wildcards can be used, eg, *ignore* 53 | --pickle-path=PICKLE_PATH 54 | Path to pickled database 55 | --max-threads=MAX_THREADS 56 | Maximum number of databases to process in parallel 57 | -o OUT_PATH, --out=OUT_PATH 58 | Path to output file 59 | --output-type=OUTPUT_TYPE 60 | Allowed values: ('stdout', 'sqlite') 61 | 62 | ``` 63 | 64 | ### pg-transform 65 | 66 | The pg-transform program does the work of modifying the target schemas to bring them back in line with the template. Like pg-compare, it can be run in parallel against many databases. 67 | 68 | ``` 69 | Usage: pg-transform [options] 70 | 71 | Options: 72 | -h, --help show this help message and exit 73 | -c CONFIG, --config=CONFIG 74 | Config file 75 | --max-threads=MAX_THREADS 76 | Maximum number of databases to process in parallel 77 | --commit Whether or not to commit changes 78 | --pickle-path=PICKLE_PATH 79 | Path to pickled database 80 | 81 | ``` 82 | 83 | ## Strategies 84 | 85 | The project contains several example strategies, however they are intended as examples and are not intended for production use. It is recommended that you write your own strategies to be confident the changes you are applying are correct for your specific situation. 86 | 87 | ## Tutorial 88 | 89 | In this tutorial I will demonstrate how we can use these tools to bring a group of database schemas back in line. We will use the dvdrental example database from http://www.postgresqltutorial.com/load-postgresql-sample-database/. 90 | 91 | First things first, let's build the test databases: 92 | 93 | ```bash 94 | example/build-tutorial-env.sh 95 | ``` 96 | 97 | After executing the script you should have the following databases: 98 | 99 | ``` 100 | vagrant@pgtest:~/tutorial$ psql -l | grep dvdrental 101 | dvdrental | postgres | UTF8 | en_US.UTF-8 | en_US.UTF-8 | 102 | dvdrental_modified | postgres | UTF8 | en_US.UTF-8 | en_US.UTF-8 | 103 | ``` 104 | 105 | The dvdrental tutorial database is our known good 'golden schema'. Let's build a template pickle of this database with pg-pickle: 106 | 107 | ```bash 108 | ⇒ ./pg-pickle --dbname dvdrental --dbuser postgres --dbpass postgres --dbhost tutorial --out /tmp/ 109 | Pickling: dvdrental to /tmp/dvdrental.pickle 110 | ``` 111 | 112 | As a sanity check, let's confirm that pg-compare returns no differences when executed against the newly created "dvdrental_modified" database: 113 | 114 | ```bash 115 | ⇒ ./pg-compare --config example/example_config.ini --pickle-path /tmp/dvdrental.pickle 116 | Comparing: dvdrental -> dvdrental_modified 117 | Writing results for: dvdrental_modified 118 | ``` 119 | 120 | As expected, no differences are returned. Now, let's modify the schema: 121 | 122 | ```bash 123 | vagrant@pgtest:~/tutorial$ psql dvdrental_modified -c 'ALTER TABLE actor ALTER COLUMN first_name DROP NOT NULL' 124 | ``` 125 | 126 | And run pg-compare again: 127 | 128 | ```bash 129 | ⇒ ./pg-compare --config example/example_config.ini --pickle-path /tmp/dvdrental.pickle 130 | Comparing: dvdrental -> dvdrental_modified 131 | Writing results for: dvdrental_modified 132 | dvdrental: 133 | actor: 134 | first_name: 135 | -> {is_nullable}: expected: False, found: True 136 | ``` 137 | 138 | Here we can see the `is_nullable` attribute of the `actor.first_name` field is different to the template schema, as expected. Let's apply another schema change: 139 | 140 | ```bash 141 | psql dvdrental_modified -c 'ALTER TABLE actor ALTER COLUMN last_update DROP DEFAULT' 142 | ``` 143 | 144 | And run pg-compare again: 145 | 146 | ```bash 147 | ⇒ ./pg-compare --config example/example_config.ini --pickle-path /tmp/dvdrental.pickle 148 | Comparing: dvdrental -> dvdrental_modified 149 | Writing results for: dvdrental_modified 150 | dvdrental: 151 | actor: 152 | first_name: 153 | -> {is_nullable}: expected: False, found: True 154 | last_update: 155 | -> {column_default}: expected: 'now()', found: None 156 | ``` 157 | 158 | Again, the difference is picked up. Let's execute pg-transform and bring the schema back into line: 159 | 160 | ```bash 161 | ⇒ ./pg-transform --config example/example_config.ini --pickle-path /tmp/dvdrental.pickle --commit 162 | Processing: dvdrental_modified 163 | Comparing: dvdrental_modified 164 | Transforming: dvdrental_modified 165 | Applying strategy: ColumnDefaultStrategy 166 | --> SETTING actor.last_update DEFAULT to now() 167 | 168 | Applying strategy: DatatypeStrategy 169 | 170 | Applying strategy: NotNullableStrategy 171 | --> Setting actor.first_name to NOT NULL 172 | 173 | Changes committed! 174 | ``` 175 | 176 | And run pg-compare one last time: 177 | 178 | ```bash 179 | ./pg-compare --config example/example_config.ini --pickle-path /tmp/dvdrental.pickle 180 | Comparing: dvdrental -> dvdrental_modified 181 | Writing results for: dvdrental_modified 182 | ``` 183 | 184 | No differences are registered. -------------------------------------------------------------------------------- /example/build-tutorial-env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Simple script for building tutorial environment. 4 | # WARNING: drops databases before creating them! 5 | # Requires postgres user is able to connect without a password. 6 | 7 | 8 | EXAMPLEDB_ZIP_PATH=dvdrental.zip 9 | EXAMPLEDB_TAR_PATH=dvdrental.tar 10 | 11 | TEMPLATEDB_NAME=dvdrental 12 | TESTDB_NAME=dvdrental_modified 13 | 14 | if [ ! -f ${EXAMPLEDB_ZIP_PATH} ]; then 15 | wget http://www.postgresqltutorial.com/download/dvd-rental-sample-database/?wpdmdl=969 -O dvdrental.zip 16 | fi 17 | 18 | if [ ! -f ${EXAMPLEDB_TAR_PATH} ]; then 19 | echo "unzipping: $EXAMPLEDB_ZIP_PATH" 20 | unzip ${EXAMPLEDB_ZIP_PATH} 21 | fi 22 | 23 | # Create our template database 24 | echo "creating template database: $TEMPLATEDB_NAME" 25 | dropdb ${TEMPLATEDB_NAME} 2> /dev/null || true 26 | createdb ${TEMPLATEDB_NAME} -O postgres 27 | pg_restore -d ${TEMPLATEDB_NAME} -U postgres ${EXAMPLEDB_TAR_PATH} 28 | 29 | # Create our test databases 30 | echo "creating test database: $TESTDB_NAME" 31 | dropdb ${TESTDB_NAME} 2> /dev/null || true 32 | createdb -O postgres -T ${TEMPLATEDB_NAME} ${TESTDB_NAME} 33 | -------------------------------------------------------------------------------- /example/example.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "host": "tutorial", 4 | "port": 5432, 5 | "name": "dvdrental_modified", 6 | "password": "postgres", 7 | "user": "postgres" 8 | } 9 | ] 10 | 11 | -------------------------------------------------------------------------------- /example/example_config.ini: -------------------------------------------------------------------------------- 1 | ; Input plugins are specified here. The only required option is the class_name. All other 2 | ; options will be passed to the plugin's __init__() method. 3 | [InputPlugin] 4 | class_name = JSONPlugin 5 | path = example/example.json 6 | 7 | ; Strategies found here will be executed in transform mode 8 | ; Strategy blocks can accept a number of options that will control how they are executed: 9 | ; 10 | ; applicable_tables - Will only target the named tables, separated by a comma 11 | ; applicable_columns - Will only target the named columns, separated by a comma 12 | 13 | ; A strategy will only be executed if enabled is set to yes or true 14 | 15 | [NotNullableStrategy] 16 | enabled : true 17 | 18 | [CharacterLengthStrategy] 19 | enabled : true 20 | 21 | [DropTableStrategy] 22 | enabled : false 23 | 24 | [DatatypeStrategy] 25 | enabled : true 26 | 27 | [ColumnDefaultStrategy] 28 | enabled : true 29 | -------------------------------------------------------------------------------- /input_plugins/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from input_plugins.input_plugin import InputPlugin 3 | from lib.util import get_subclasses 4 | 5 | 6 | def get_input_plugin(class_name): 7 | for plugin in get_subclasses('input_plugins', InputPlugin): 8 | if plugin.__name__ == class_name: 9 | return plugin 10 | -------------------------------------------------------------------------------- /input_plugins/input_plugin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from collections import namedtuple 3 | 4 | ConnectionConfig = namedtuple('ConnectionConfig', ['database', 'user', 'host', 'password', 'port']) 5 | 6 | 7 | class InputPlugin(object): 8 | def get_connection_configs(self): 9 | raise NotImplementedError("You must implement get_connection_configs()!") 10 | -------------------------------------------------------------------------------- /input_plugins/json_plugin.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from input_plugins.input_plugin import InputPlugin, ConnectionConfig 4 | 5 | 6 | class JSONPlugin(InputPlugin): 7 | def __init__(self, path): 8 | self._json = json.load(open(path)) 9 | 10 | def get_connection_configs(self): 11 | configs = [] 12 | for database in self._json: 13 | configs.append( 14 | ConnectionConfig(database['name'], database['user'], database['host'], database['password'], 15 | database['port'])) 16 | return configs 17 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facetoe/pgtransform/8631959a19fd8e38c6a642000a47c85f5f1ba123/lib/__init__.py -------------------------------------------------------------------------------- /lib/config.py: -------------------------------------------------------------------------------- 1 | import ConfigParser 2 | import inspect 3 | from ConfigParser import NoSectionError 4 | 5 | from input_plugins import get_input_plugin 6 | from lib.exception import ConfigException 7 | 8 | 9 | class Config(object): 10 | """ 11 | Simple config object that knows how to locate and load input plugins and strategies. 12 | """ 13 | _strategy_vars = ['applicable_tables', 'applicable_columns'] 14 | 15 | _INPUT_PLUGIN = 'InputPlugin' 16 | required_sections = [_INPUT_PLUGIN] 17 | 18 | def __init__(self, path): 19 | self.config = ConfigParser.ConfigParser() 20 | self.config.read(path) 21 | self.check_config() 22 | 23 | def check_config(self): 24 | if not all([section in self.config.sections() for section in self.required_sections]): 25 | raise NoSectionError("%s is required!" % ",".join(self.required_sections)) 26 | 27 | @property 28 | def input_plugin(self): 29 | plugin_args = dict() 30 | for arg_name in self.config.options(self._INPUT_PLUGIN): 31 | plugin_args[arg_name] = self.config.get(self._INPUT_PLUGIN, arg_name) 32 | 33 | class_name = plugin_args.pop('class_name', None) 34 | if not class_name: 35 | raise ConfigParser.NoOptionError("class_name", self._INPUT_PLUGIN) 36 | 37 | Plugin = get_input_plugin(class_name) 38 | if not Plugin: 39 | raise ConfigException("No such plugin: %s" % class_name) 40 | 41 | try: 42 | return Plugin(**plugin_args) 43 | except TypeError, e: 44 | raise ConfigException("Input plugin __init__ missing one of: %s" % self.get_missing_args(Plugin), e) 45 | 46 | def get_missing_args(self, Plugin): 47 | init_args = inspect.getargspec(Plugin.__init__) 48 | return [arg for arg in init_args.args 49 | if arg != 'self' and 50 | arg not in dict(zip(reversed(init_args.args or []), reversed(init_args.defaults or [])))] 51 | 52 | @property 53 | def strategies(self): 54 | strategies = list() 55 | for strategy in [s for s in self.config.sections() if s not in self.required_sections]: 56 | strategies.append(self.get_strategy(strategy)) 57 | return strategies 58 | 59 | def get_strategy(self, section): 60 | strategy = dict() 61 | if not self.config.has_option(section, 'enabled') \ 62 | or self.config.get(section, 'enabled').lower().strip() not in ('yes', 'true'): 63 | return None 64 | 65 | for var_name in self._strategy_vars: 66 | if self.config.has_option(section, var_name): 67 | setting = self.config.get(section, var_name) 68 | strategy[var_name] = self.split_lists(setting) 69 | return strategy 70 | 71 | def split_lists(self, setting): 72 | if ',' in setting: 73 | return map(lambda x: x.strip(), setting.split(',')) 74 | else: 75 | return [setting] 76 | -------------------------------------------------------------------------------- /lib/diff.py: -------------------------------------------------------------------------------- 1 | class DiffItem(object): 2 | def __init__(self, name, expected, found): 3 | self.name = name 4 | self.expected = expected 5 | self.found = found 6 | 7 | def __str__(self): 8 | return "[%s - expected: %s, found: %s]" % (self.name, self.expected, self.found) 9 | 10 | def __repr__(self): 11 | return "{%s}" % self.name 12 | 13 | 14 | class DiffNode(object): 15 | """ 16 | Node representing a database difference. 17 | """ 18 | 19 | def __init__(self, name, object_type=None, data=None, parent=None): 20 | self.parent = parent 21 | self.name = name 22 | self.data = data 23 | self.object_type = object_type 24 | self.children = [] 25 | 26 | def append(self, obj): 27 | if isinstance(obj, DiffNode): 28 | obj.parent = self 29 | self.children.append(obj) 30 | 31 | def isleaf(self): 32 | return len(self.children) == 0 33 | 34 | def isbranch(self): 35 | return len(self.children) > 0 36 | 37 | def find(self, target, attribute='name'): 38 | if getattr(self, attribute) == target: 39 | return self 40 | return self._find(target, attribute, node=self) 41 | 42 | def _find(self, target, attribute, node): 43 | for n in node.children: 44 | if getattr(n, attribute) == target: 45 | return n 46 | found = n.find(target=target, attribute=attribute, node=n) 47 | if found: 48 | return found 49 | 50 | def findall(self, target, attribute='name'): 51 | values = [] 52 | self._findall(self, attribute, target, values) 53 | return values 54 | 55 | def _findall(self, node, attribute, target, values): 56 | if node is not None: 57 | if getattr(node, attribute) == target: 58 | values.append(node) 59 | for n in node.children: 60 | self._findall(n, attribute, target, values) 61 | 62 | # This only merges top level nodes. Need to fix it. 63 | def merge(self, node): 64 | if self.name == node.name: 65 | for child in node.children: 66 | self.append(child) 67 | 68 | def to_tree(self, level=0): 69 | indent_width = ' ' 70 | indent = indent_width * level 71 | output = "%s%s:\n" % (indent, self.name) 72 | for child in self.children: 73 | if child.isleaf(): 74 | output += "%s -> %s: " \ 75 | "expected: %s, " \ 76 | "found: %s\n" % ((indent + indent_width), 77 | repr(child.data), 78 | repr(child.data.expected), 79 | repr(child.data.found)) 80 | else: 81 | output += child.to_tree(level + 1) 82 | return output 83 | 84 | def __len__(self): 85 | return len(self.children) 86 | 87 | def __repr__(self): 88 | return "{%s}" % self.name 89 | -------------------------------------------------------------------------------- /lib/exception.py: -------------------------------------------------------------------------------- 1 | class StrategyException(Exception): 2 | """ Raised when strategy encounters an impossible situation """ 3 | 4 | 5 | class ConfigException(Exception): 6 | """ Raised when there is a config related error """ 7 | -------------------------------------------------------------------------------- /lib/pg_compare.py: -------------------------------------------------------------------------------- 1 | class PGCompare(object): 2 | """ 3 | PGCompare takes a reference database and a test database, compares them and returns the differences 4 | represented as a tree of DiffNodes. 5 | """ 6 | 7 | def __init__(self, reference_provider, test_provider): 8 | self.reference_db = reference_provider.get_database() 9 | self.test_db = test_provider.get_database() 10 | 11 | def compare(self): 12 | return self.reference_db.compare_to(self.test_db) 13 | -------------------------------------------------------------------------------- /lib/pg_objects.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from collections import OrderedDict 3 | 4 | import psycopg2 5 | from psycopg2.extras import DictCursor 6 | 7 | from lib.diff import DiffNode, DiffItem 8 | 9 | 10 | class DBConnection(object): 11 | def __init__(self, host, database, user, password, port=5432, connect_timeout=30): 12 | self.host = host 13 | self.database = database 14 | self.user = user 15 | self.password = password 16 | self.port = port 17 | self.connect_timeout = connect_timeout 18 | self.db_connection = None 19 | 20 | def connect(self): 21 | conn = psycopg2.connect(database=self.database, 22 | user=self.user, 23 | password=self.password, 24 | host=self.host, 25 | port=self.port, 26 | connect_timeout=self.connect_timeout) 27 | return conn 28 | 29 | @property 30 | def connection(self): 31 | if self.db_connection is None: 32 | self.db_connection = self.connect() 33 | return self.db_connection 34 | 35 | 36 | class DatabaseObject(object): 37 | def construct(self, **kwargs): 38 | raise NotImplementedError("You must implement construct()!") 39 | 40 | def compare_to(self, obj): 41 | raise NotImplementedError("You must implement compare_to()!") 42 | 43 | def set_attributes(self, query_result, remap_attr_names=None, ignore_none=True): 44 | for attr_name, attr in query_result.iteritems(): 45 | if ignore_none and attr is None: 46 | continue 47 | 48 | if remap_attr_names and attr_name in remap_attr_names: 49 | attr_name = remap_attr_names[attr_name] 50 | 51 | if attr == "NO": 52 | attr = False 53 | elif attr == "YES": 54 | attr = True 55 | 56 | setattr(self, attr_name, attr) 57 | 58 | def select_as_objects(self, cursor, sql_select, object_type, remap_attr_names, **kwargs): 59 | cursor.execute(sql_select, self.__dict__) 60 | objects = list() 61 | for obj in cursor.fetchall(): 62 | object_dict = dict(obj) 63 | object_dict.update(dict(object_type=object_type, remap_attr_names=remap_attr_names)) 64 | object_dict.update(kwargs) 65 | objects.append(object_type(**object_dict)) 66 | return objects 67 | 68 | def compare_object(self, target_attribute, object_type, other_object): 69 | object_diff = DiffNode(name=self.name) 70 | for missing_target in self.get_missing(other_object, target_attribute): 71 | object_diff.append( 72 | DiffNode(name=missing_target.name, 73 | object_type=object_type, 74 | data=DiffItem(name=missing_target.name, 75 | expected=missing_target, 76 | found=None))) 77 | 78 | for extra_target in self.get_extra(other_object, target_attribute): 79 | object_diff.append( 80 | DiffNode(name=extra_target.name, 81 | object_type=object_type, 82 | data=DiffItem(name=extra_target.name, 83 | expected=None, 84 | found=extra_target))) 85 | 86 | for matching_target in self.get_matching(other_object, target_attribute): 87 | other_target = filter(lambda o: o.name == matching_target.name, getattr(other_object, target_attribute))[0] 88 | other_object_diff = matching_target.compare_to(other_target) 89 | if other_object_diff.isbranch(): 90 | object_diff.append(other_object_diff) 91 | 92 | return object_diff 93 | 94 | def compare_attrs(self, other_obj, ignore_attr=None): 95 | attr_diffs = DiffNode(name=self.name) 96 | 97 | for attr_name, attr in self.__dict__.iteritems(): 98 | if ignore_attr and attr_name == ignore_attr: 99 | continue 100 | 101 | if attr_name in other_obj.__dict__: 102 | other_attr = other_obj.__dict__[attr_name] 103 | if attr != other_attr: 104 | attr_diffs.append( 105 | DiffNode(name=attr_name, 106 | object_type=ColumnAttribute, 107 | data=DiffItem(name=attr_name, 108 | expected=attr, 109 | found=other_attr))) 110 | else: 111 | attr_diffs.append( 112 | DiffNode(name=attr_name, 113 | object_type=ColumnAttribute, 114 | data=DiffItem(name=attr_name, 115 | expected=attr, 116 | found=None))) 117 | 118 | for other_attr_name, other_attr in other_obj.__dict__.iteritems(): 119 | if ignore_attr and other_attr_name == ignore_attr: 120 | continue 121 | 122 | if other_attr_name not in self.__dict__: 123 | attr_diffs.append( 124 | DiffNode(name=other_attr_name, 125 | object_type=ColumnAttribute, 126 | data=DiffItem(name=other_attr_name, 127 | expected=None, 128 | found=other_attr))) 129 | 130 | return attr_diffs 131 | 132 | def get_missing(self, obj, target_attr): 133 | return [o for o in getattr(self, target_attr) if o not in getattr(obj, target_attr)] 134 | 135 | def get_extra(self, obj, target_attr): 136 | return [o for o in getattr(obj, target_attr) if o not in getattr(self, target_attr)] 137 | 138 | def get_matching(self, obj, target_attr): 139 | return [o for o in getattr(self, target_attr) if o in getattr(obj, target_attr)] 140 | 141 | def __eq__(self, other): 142 | return self.name == other.name 143 | 144 | def __ne__(self, other): 145 | return self.name != other.name 146 | 147 | def __repr__(self): 148 | return "[%s: %s]" % (self.__class__.__name__, self.name) 149 | 150 | 151 | class Database(DatabaseObject): 152 | SQL_CONSTRUCT = """ 153 | SELECT table_name FROM information_schema.tables 154 | WHERE 155 | table_schema = %(schema_name)s 156 | AND 157 | NOT table_name ILIKE ANY (%(ignore_tables)s) 158 | """ 159 | 160 | SQL_SELECT_PROCEDURES = """ 161 | SELECT 162 | p.proname AS name, 163 | p.pronargs AS num_args, 164 | t1.typname AS return_type, 165 | l.lanname AS language_type, 166 | p.proargtypes AS argument_types_oids, 167 | prosrc AS body 168 | FROM pg_proc p 169 | LEFT JOIN pg_type t1 ON p.prorettype = t1.oid 170 | LEFT JOIN pg_language l ON p.prolang = l.oid 171 | WHERE proname IN ( 172 | SELECT routine_name 173 | FROM information_schema.routines 174 | WHERE specific_schema NOT IN 175 | ('pg_catalog', 'information_schema') 176 | AND type_udt_name != 'trigger' 177 | AND data_type = 'USER-DEFINED' 178 | ) 179 | """ 180 | 181 | def __init__(self, db_connection, ignore_columns=None, ignore_tables=None, schema_name='public'): 182 | self.name = db_connection.database 183 | self.ignore_columns = ignore_columns if ignore_columns else [] 184 | self.ignore_tables = ignore_tables if ignore_tables else [] 185 | self.schema_name = schema_name 186 | self.tables = list() 187 | self.procedures = list() 188 | self.construct(cursor=db_connection.connection.cursor(cursor_factory=DictCursor)) 189 | 190 | def construct(self, **kwargs): 191 | cursor = kwargs['cursor'] 192 | cursor.execute(self.SQL_CONSTRUCT, self.__dict__) 193 | self.tables.extend([Table(cursor, row['table_name'], 194 | self.name, 195 | ignore_columns=self.ignore_columns) 196 | for row in cursor.fetchall()]) 197 | self.procedures.extend(self.select_as_objects(cursor, 198 | sql_select=self.SQL_SELECT_PROCEDURES, 199 | object_type=Procedure, 200 | remap_attr_names=dict())) 201 | 202 | def compare_to(self, other_database): 203 | db_diffs = DiffNode(name=self.name) 204 | table_diffs = self.compare_object('tables', Table, other_database) 205 | procedure_diffs = self.compare_object('procedures', Procedure, other_database) 206 | db_diffs.merge(procedure_diffs) 207 | db_diffs.merge(table_diffs) 208 | return db_diffs 209 | 210 | 211 | class Table(DatabaseObject): 212 | SQL_CONSTRUCT = """ 213 | SELECT * 214 | FROM information_schema.tables 215 | WHERE 216 | table_name = %(name)s 217 | """ 218 | 219 | SQL_SELECT_COLUMNS = """ 220 | SELECT 221 | column_name, 222 | data_type, 223 | udt_name, 224 | column_default, 225 | is_nullable, 226 | character_maximum_length, 227 | numeric_precision 228 | FROM 229 | information_schema.columns 230 | WHERE 231 | table_name = %(name)s 232 | AND 233 | table_catalog = %(database_name)s 234 | AND 235 | NOT column_name ILIKE ANY (%(ignore_columns)s) 236 | """ 237 | 238 | SQL_SELECT_CONSTRAINTS = """ 239 | SELECT c.conname AS constraint_name, 240 | CASE c.contype 241 | WHEN 'c' THEN 'CHECK' 242 | WHEN 'f' THEN 'FOREIGN KEY' 243 | WHEN 'p' THEN 'PRIMARY KEY' 244 | WHEN 'u' THEN 'UNIQUE' 245 | END AS "constraint_type", 246 | CASE WHEN c.condeferrable = 'f' THEN 'NO' ELSE 'YES' END AS is_deferrable, 247 | CASE WHEN c.condeferred = 'f' THEN 'NO' ELSE 'YES' END AS is_deferred, 248 | t.relname AS table_name, 249 | -- Not sure what this does but there are a lot of results -- array_to_string(c.conkey, ' ') AS constraint_key, 250 | CASE confupdtype 251 | WHEN 'a' THEN 'NO ACTION' 252 | WHEN 'r' THEN 'RESTRICT' 253 | WHEN 'c' THEN 'CASCADE' 254 | WHEN 'n' THEN 'SET NULL' 255 | WHEN 'd' THEN 'SET DEFAULT' 256 | END AS on_update, 257 | CASE confdeltype 258 | WHEN 'a' THEN 'NO ACTION' 259 | WHEN 'r' THEN 'RESTRICT' 260 | WHEN 'c' THEN 'CASCADE' 261 | WHEN 'n' THEN 'SET NULL' 262 | WHEN 'd' THEN 'SET DEFAULT' 263 | END AS on_delete, 264 | CASE confmatchtype 265 | WHEN 'u' THEN 'UNSPECIFIED' 266 | WHEN 'f' THEN 'FULL' 267 | WHEN 'p' THEN 'PARTIAL' 268 | END AS match_type, 269 | t2.relname AS references_table, 270 | array_to_string(c.confkey, ' ') AS fk_constraint_key 271 | FROM pg_constraint c 272 | LEFT JOIN pg_class t ON c.conrelid = t.oid 273 | LEFT JOIN pg_class t2 ON c.confrelid = t2.oid 274 | WHERE t.relname = %(name)s 275 | """ 276 | 277 | SQL_SELECT_FOREIGN_KEYS = """ 278 | SELECT tc.table_schema, 279 | tc.constraint_name, 280 | tc.table_name, 281 | kcu.column_name, 282 | ccu.table_name AS foreign_table_name, 283 | ccu.column_name AS foreign_column_name 284 | FROM information_schema.table_constraints tc 285 | JOIN information_schema.key_column_usage kcu 286 | ON tc.constraint_name = kcu.constraint_name 287 | JOIN information_schema.constraint_column_usage ccu 288 | ON ccu.constraint_name = tc.constraint_name 289 | WHERE constraint_type = 'FOREIGN KEY' 290 | AND 291 | ccu.table_name= %(name)s 292 | """ 293 | 294 | SQL_SELECT_TRIGGERS = """ 295 | SELECT 296 | trigger_name, 297 | -- This causes false positives - event_manipulation, 298 | event_object_table, 299 | action_order, 300 | action_condition, 301 | action_statement, 302 | action_orientation, 303 | action_timing, 304 | action_reference_old_table, 305 | action_reference_new_table, 306 | action_reference_new_row, 307 | created 308 | FROM information_schema.triggers 309 | WHERE trigger_schema 310 | NOT IN ('pg_catalog', 'information_schema') 311 | AND 312 | event_object_table = %(name)s 313 | """ 314 | 315 | SQL_SELECT_INDEXES = """ 316 | SELECT 317 | t.relname AS table_name, 318 | a.attname AS column_name 319 | FROM pg_class t, 320 | pg_class i, 321 | pg_index ix, 322 | pg_attribute a 323 | WHERE t.oid = ix.indrelid 324 | AND 325 | i.oid = ix.indexrelid 326 | AND 327 | a.attrelid = t.oid 328 | AND 329 | a.attnum = ANY (ix.indkey) 330 | AND 331 | t.relkind = 'r' 332 | AND 333 | t.relname = %(name)s 334 | AND 335 | NOT a.attname ILIKE ANY (%(ignore_columns)s) 336 | """ 337 | 338 | def __init__(self, cursor, table_name, database_name, ignore_columns=None): 339 | self.name = table_name 340 | self.database_name = database_name 341 | self.ignore_columns = ignore_columns if ignore_columns else [] 342 | self.columns = list() 343 | self.triggers = list() 344 | self.primary_keys = list() 345 | self.foreign_keys = list() 346 | self.check_constraints = list() 347 | self.unique_constraints = list() 348 | self.indexes = list() 349 | 350 | self.construct(cursor=cursor) 351 | 352 | def construct(self, **kwargs): 353 | cursor = kwargs['cursor'] 354 | cursor.execute(self.SQL_CONSTRUCT, self.__dict__) 355 | self.set_attributes(cursor.fetchone()) 356 | self.set_constraints(cursor) 357 | self.columns.extend(self.select_as_objects(cursor, 358 | sql_select=self.SQL_SELECT_COLUMNS, 359 | object_type=Column, 360 | remap_attr_names=dict(column_name='name'))) 361 | self.triggers.extend(self.select_as_objects(cursor, 362 | sql_select=self.SQL_SELECT_TRIGGERS, 363 | object_type=Trigger, 364 | remap_attr_names=dict(trigger_name='name'))) 365 | self.indexes.extend(self.select_as_objects(cursor, 366 | sql_select=self.SQL_SELECT_INDEXES, 367 | object_type=Index, 368 | remap_attr_names=dict( 369 | column_name='name'))) ## Indexes are compared based on the column they target as the names often differ 370 | self.foreign_keys.extend(self.select_as_objects(cursor, 371 | sql_select=self.SQL_SELECT_FOREIGN_KEYS, 372 | object_type=ForeignKey, 373 | remap_attr_names=dict(constraint_name='name'))) 374 | 375 | def set_constraints(self, cursor): 376 | cursor.execute(self.SQL_SELECT_CONSTRAINTS, self.__dict__) 377 | for const in cursor.fetchall(): 378 | constraint = dict(const) 379 | constraint_type = constraint['constraint_type'] 380 | constraint.update(dict(remap_attr_names=dict(constraint_name='name'))) 381 | if constraint_type == 'CHECK': 382 | constraint.update(dict(object_type=CheckConstraint)) 383 | self.check_constraints.append(CheckConstraint(**constraint)) 384 | elif constraint_type == 'PRIMARY KEY': 385 | constraint.update(dict(object_type=PrimaryKey)) 386 | self.primary_keys.append(PrimaryKey(**constraint)) 387 | elif constraint_type == 'UNIQUE': 388 | constraint.update(dict(object_type=UniqueConstraint)) 389 | self.unique_constraints.append(UniqueConstraint(**constraint)) 390 | elif constraint_type == 'FOREIGN KEY': 391 | # We select more information about foreign keys in a separate query. 392 | continue 393 | else: 394 | sys.stderr.write("Unknown Constraint: %s\n" % constraint_type) 395 | 396 | def compare_to(self, other_table): 397 | db_objects = OrderedDict(columns=Column, 398 | check_constraints=CheckConstraint, 399 | primary_keys=PrimaryKey, 400 | foreign_keys=ForeignKey, 401 | unique_constraints=UniqueConstraint, 402 | triggers=Trigger, 403 | indexes=Index) 404 | table_diffs = DiffNode(self.name) 405 | for name, object_type in db_objects.iteritems(): 406 | table_diffs.merge(self.compare_object(name, object_type, other_table)) 407 | return table_diffs 408 | 409 | 410 | class BaseTableAttribute(DatabaseObject): 411 | def __init__(self, **kwargs): 412 | self.object_type = kwargs['object_type'] 413 | self.remap_attr_names = kwargs['remap_attr_names'] 414 | self.ignore_attr = kwargs.pop('ignore_attr', None) 415 | self.construct(**kwargs) 416 | 417 | def construct(self, **kwargs): 418 | self.set_attributes(kwargs, remap_attr_names=self.remap_attr_names) 419 | 420 | def compare_to(self, other_column): 421 | return self.compare_attrs(other_column, ignore_attr=self.ignore_attr) 422 | 423 | 424 | class Column(BaseTableAttribute): 425 | def __init__(self, **kwargs): 426 | BaseTableAttribute.__init__(self, **kwargs) 427 | 428 | 429 | class CheckConstraint(BaseTableAttribute): 430 | def __init__(self, **kwargs): 431 | BaseTableAttribute.__init__(self, **kwargs) 432 | 433 | 434 | class ForeignKey(BaseTableAttribute): 435 | def __init__(self, **kwargs): 436 | BaseTableAttribute.__init__(self, **kwargs) 437 | 438 | 439 | class PrimaryKey(BaseTableAttribute): 440 | def __init__(self, **kwargs): 441 | BaseTableAttribute.__init__(self, **kwargs) 442 | 443 | 444 | class UniqueConstraint(BaseTableAttribute): 445 | def __init__(self, **kwargs): 446 | BaseTableAttribute.__init__(self, **kwargs) 447 | 448 | 449 | class Trigger(BaseTableAttribute): 450 | def __init__(self, **kwargs): 451 | BaseTableAttribute.__init__(self, **kwargs) 452 | 453 | 454 | class Index(BaseTableAttribute): 455 | def __init__(self, **kwargs): 456 | BaseTableAttribute.__init__(self, **kwargs) 457 | 458 | 459 | class Procedure(BaseTableAttribute): 460 | def __init__(self, **kwargs): 461 | BaseTableAttribute.__init__(self, **kwargs) 462 | 463 | 464 | class ColumnAttribute(DatabaseObject): 465 | def construct(self, **kwargs): 466 | raise NotImplemented() 467 | 468 | def compare_to(self, obj): 469 | raise NotImplemented() 470 | -------------------------------------------------------------------------------- /lib/pg_transform.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from lib.strategy import Strategy, AttributeStrategy, TypeStrategy 3 | from lib.util import get_subclasses, print_info, print_warn 4 | 5 | 6 | class PGTransform(object): 7 | """ 8 | Transform the target database by executing the relevant strategies on it. 9 | """ 10 | 11 | def __init__(self, test_dbconnection_provider, database_diffs, config, target_name): 12 | self.db_connection = test_dbconnection_provider.db_connection.connection 13 | self.database_diffs = database_diffs 14 | self.strategies = get_subclasses(package='strategies', BaseClass=Strategy) 15 | self.config = config 16 | self.target_attr_name = target_name 17 | 18 | def transform(self, commit=False): 19 | """ 20 | For each strategy, loop over it's applicable nodes and call the strategy.execute() method on it. 21 | 22 | :param commit: whether or not to commit 23 | """ 24 | 25 | print_info("Transforming: ", self.target_attr_name) 26 | for strategy_class in self.strategies: 27 | strategy_config = self.config.get_strategy(strategy_class.__name__) 28 | if strategy_config is not None: 29 | strategy = strategy_class(**strategy_config) 30 | print_info('Applying strategy: ', strategy_class.__name__) 31 | for diff_node in self.get_target_nodes(strategy): 32 | self.apply_strategy(diff_node, strategy) 33 | if commit: 34 | self.db_connection.commit() 35 | print_info("Changes committed!") 36 | else: 37 | print_warn("Dry run", " - nothing committed") 38 | self.db_connection.rollback() 39 | 40 | def apply_strategy(self, node, strategy): 41 | """ 42 | Apply a strategy. 43 | 44 | :param node: the diff node to pass to execute() 45 | :param strategy: the strategy to call execute() on 46 | :return: 47 | """ 48 | cursor = self.db_connection.cursor() 49 | strategy.execute(cursor, node) 50 | cursor.close() 51 | 52 | def get_target_nodes(self, strategy): 53 | """ 54 | Return a list of nodes that this strategy applies to. If applicable_tables or applicable_columns has been 55 | specified in the config, only nodes from those columns and or tables will be returned. 56 | 57 | :param strategy: stratgey to retrieve nodes for 58 | :return: 59 | """ 60 | if issubclass(strategy.__class__, AttributeStrategy): 61 | return self.get_matching_nodes(strategy, target=strategy.target, attribute='name') 62 | elif issubclass(strategy.__class__, TypeStrategy): 63 | return self.get_matching_nodes(strategy, target=strategy.target, attribute='object_type') 64 | else: 65 | raise Exception("target_attr and target_type are mutually exclusive!") 66 | 67 | def get_matching_nodes(self, strategy, target, attribute='object_type'): 68 | """ 69 | Return all matching nodes for this strategy. A node is considered to match if it's target attribute 70 | is equal to the target parameter that is passed in (essentially getattr(somenode, attribute) == target)). 71 | 72 | :param strategy: the target to gather nodes for 73 | :param target: target to compare with. Can be a subclass of DatabaseObject or a ColumnAttribute name 74 | :param attribute: the name of the DiffNode attribute to compare with target. 75 | :return: 76 | """ 77 | if strategy.applicable_tables: 78 | return self.get_targets_from_tables(strategy, target=target, attribute=attribute) 79 | elif strategy.applicable_columns: 80 | return self.get_targets_from_columns(strategy, target=target, attribute=attribute) 81 | else: 82 | return self.database_diffs.findall(target, attribute) 83 | 84 | def get_targets_from_tables(self, strategy, target, attribute): 85 | """ 86 | Only return nodes that belong to the tables specified in applicable_tables 87 | """ 88 | nodes = list() 89 | for table in strategy.applicable_tables: 90 | t = self.database_diffs.find(table) 91 | if t: 92 | if strategy.applicable_columns: 93 | nodes.extend( 94 | self.get_targets_from_columns(strategy, target=target, attribute=attribute, table_node=t)) 95 | else: 96 | nodes.extend(t.findall(target, attribute=attribute)) 97 | return nodes 98 | 99 | def get_targets_from_columns(self, strategy, target, attribute, table_node=None): 100 | """ 101 | Only return nodes that belong to the columns specified in applicable_columns 102 | """ 103 | nodes = list() 104 | for column_name in strategy.applicable_columns: 105 | if table_node is not None: 106 | c = table_node.find(column_name) 107 | if c: 108 | nodes.extend(c.findall(target, attribute=attribute)) 109 | else: 110 | for diff_node in self.database_diffs.findall(target, attribute=attribute): 111 | # parent.name is column_name 112 | if diff_node.parent.name in strategy.applicable_columns: 113 | nodes.append(diff_node) 114 | return nodes 115 | -------------------------------------------------------------------------------- /lib/provider.py: -------------------------------------------------------------------------------- 1 | from lib.pg_objects import DBConnection 2 | from lib.pg_objects import Database 3 | from util import unpickle_database 4 | 5 | 6 | class DBProvider(object): 7 | def get_database(self): 8 | raise NotImplementedError("You must implement get_database()!") 9 | 10 | 11 | class PickleProvider(DBProvider): 12 | """ 13 | Deserialize a pickled database 14 | """ 15 | def __init__(self, pickle_path): 16 | self.database = unpickle_database(pickle_path) 17 | 18 | def get_database(self): 19 | return self.database 20 | 21 | 22 | class DBConnectionProvider(DBProvider): 23 | """ 24 | Build a Database object from a live connection. 25 | """ 26 | def __init__(self, host, database, user, password, port=5432, ignore_columns=None, 27 | ignore_tables=None): 28 | self.db_connection = DBConnection(host=host, 29 | database=database, 30 | user=user, 31 | password=password, 32 | port=port) 33 | self.database = Database(self.db_connection, ignore_columns=ignore_columns, 34 | ignore_tables=ignore_tables) 35 | 36 | def get_database(self): 37 | return self.database 38 | -------------------------------------------------------------------------------- /lib/strategy.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | from lib.exception import ConfigException 4 | from lib.pg_objects import DatabaseObject 5 | from lib.util import get_subclasses 6 | 7 | ColumnInfo = namedtuple('ColumnInfo', ['table_name', 'column_name', 'expected', 'found']) 8 | 9 | 10 | class Strategy(object): 11 | """ 12 | Base class for strategies. Provides some convenience methods. 13 | """ 14 | 15 | def __init__(self, applicable_tables=None, applicable_columns=None, target_attr=None, target_type=None): 16 | self.applicable_tables = applicable_tables or [] 17 | self.applicable_columns = applicable_columns or [] 18 | self.target_attr = target_attr 19 | self.target_type = self._get_type(target_type) 20 | 21 | def execute(self, cursor, diff_node): 22 | """ 23 | Implement this method in Strategy subclasses. This method will be passed, one after the other, 24 | to each diff node that it applies to (based on the rules in lib/pg_transform.py) 25 | 26 | :param cursor: target database cursor 27 | :param diff_node: the diff node to execute upon 28 | :return: 29 | """ 30 | raise NotImplementedError("You must implement execute()!") 31 | 32 | @property 33 | def target(self): 34 | """ 35 | Subclasses need to implement this property to return the appropriate target. For example, if this 36 | strategy targets types, such as Index, return this type. Otherwise return the name of the targeted attribute, 37 | eg, udt_name 38 | """ 39 | raise NotImplementedError("You must implement target!") 40 | 41 | @property 42 | def name(self): 43 | return self.__class__.__name__ 44 | 45 | def get_column_info(self, diff_node): 46 | table_name = diff_node.parent.parent.name 47 | column_name = diff_node.parent.name 48 | expected = diff_node.data.expected 49 | found = diff_node.data.found 50 | return ColumnInfo(table_name, column_name, expected, found) 51 | 52 | def _get_type(self, target_type): 53 | if not target_type: 54 | return None 55 | for type in get_subclasses('lib', DatabaseObject): 56 | if type.__name__ == target_type: 57 | return type 58 | raise ConfigException("No such type to target: %s" % target_type) 59 | 60 | 61 | class AttributeStrategy(Strategy): 62 | """ 63 | An attribute strategy targets a particulare attribute name, eg udt_name or character_length. 64 | """ 65 | 66 | 67 | class TypeStrategy(Strategy): 68 | """ 69 | A type strategy targets a particular type, eg Index or ForeignKey 70 | """ 71 | -------------------------------------------------------------------------------- /lib/util.py: -------------------------------------------------------------------------------- 1 | import cPickle as pickle 2 | import functools 3 | import inspect 4 | import os 5 | import sys 6 | from imp import find_module 7 | from types import ModuleType 8 | 9 | from termcolor import colored, cprint 10 | 11 | 12 | def get_subclasses(package, BaseClass): 13 | filename, path, description = find_module(package) 14 | modules = sorted(set(i.partition('.')[0] 15 | for i in os.listdir(path) 16 | if i.endswith(('.py', '.pyc', '.pyo')) 17 | and not i.startswith('__init__.py'))) 18 | pkg = __import__(package, fromlist=modules) 19 | for m in modules: 20 | module = getattr(pkg, m) 21 | if type(module) == ModuleType: 22 | for c in dir(module): 23 | klass = getattr(module, c) 24 | if inspect.isclass(klass) and issubclass(klass, BaseClass) and klass is not BaseClass: 25 | yield klass 26 | 27 | 28 | def unpickle_database(pickle_path): 29 | with open(pickle_path, 'rb') as pickle_file: 30 | return pickle.load(pickle_file) 31 | 32 | 33 | def pickle_database(database, out_path): 34 | with open(out_path, 'wb') as out_file: 35 | pickle.dump(database, out_file, pickle.HIGHEST_PROTOCOL) 36 | 37 | 38 | def synchronized(lock=None): 39 | """ 40 | Decorator for performing synchronized method calls. 41 | """ 42 | 43 | def _decorator(wrapped): 44 | @functools.wraps(wrapped) 45 | def _wrapper(*args, **kwargs): 46 | with lock: 47 | return wrapped(*args, **kwargs) 48 | 49 | return _wrapper 50 | 51 | return _decorator 52 | 53 | 54 | def format_ignore(option, opt, value, parser): 55 | results = list() 56 | for item in value.split(','): 57 | results.append(item.replace('*', '%')) 58 | setattr(parser.values, option.dest, results) 59 | 60 | 61 | def fail(message): 62 | cprint("\n** %s **\n" % message, 'red', attrs=['bold'], file=sys.stderr) 63 | sys.exit(1) 64 | 65 | 66 | def strat_print_success(message): 67 | print colored('\t--> ', 'green') + colored(message, 'white') 68 | 69 | 70 | def strat_print_fail(message): 71 | print colored('\t--x ', 'red', attrs=['bold']) + colored(message, 'white', attrs=['bold']) 72 | 73 | 74 | def strat_print_warn(message): 75 | print colored('\t--s ', 'yellow', attrs=['bold']) + colored(message, 'white') 76 | 77 | 78 | def print_warn(heading, message): 79 | print colored(heading, 'yellow', attrs=['bold']) + message 80 | 81 | 82 | def print_info(heading, message=''): 83 | print colored(heading, 'white', attrs=['bold']) + message 84 | -------------------------------------------------------------------------------- /lib/writer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | class OutputWriter(object): 5 | def write(self, database_diffs): 6 | raise NotImplementedError("You must implement write()!") 7 | 8 | def visit(self, node, func, **func_kwargs): 9 | for child in node.children: 10 | if child.isleaf(): 11 | func(child, **func_kwargs) 12 | else: 13 | self.visit(child, func, **func_kwargs) 14 | 15 | 16 | class STDOUTWriter(OutputWriter): 17 | """ 18 | Prints the difference tree to STDOUT 19 | """ 20 | def write(self, database_diffs): 21 | if len(database_diffs) > 0: 22 | print database_diffs.to_tree() 23 | 24 | 25 | class SQLightWriter(OutputWriter): 26 | """ 27 | Writes the difference tree to a SQLite database. 28 | """ 29 | SQL_CREATE_TABLE_DATABASE = """ 30 | CREATE TABLE if not exists database( 31 | id INTEGER PRIMARY KEY NOT NULL, 32 | name TEXT NOT NULL 33 | ) 34 | """ 35 | 36 | SQL_SELECT_DATABASE = """ 37 | SELECT id FROM database WHERE name == :name 38 | """ 39 | 40 | SQL_INSERT_DATABASE = """ 41 | INSERT INTO database (name) values (:name) 42 | """ 43 | 44 | SQL_CREATE_TABLE_DIFFERENCES = """ 45 | CREATE TABLE IF NOT EXISTS differences ( 46 | id INTEGER PRIMARY KEY NOT NULL, 47 | table_name TEXT NOT NULL, 48 | path TEXT NOT NULL, 49 | type TEXT NOT NULL, 50 | expected TEXT, 51 | found TEXT, 52 | database INT NOT NULL, 53 | FOREIGN KEY(database) REFERENCES database(id) 54 | ) 55 | """ 56 | 57 | SQL_INSERT_DIFFERENCES = """ 58 | INSERT INTO differences 59 | (table_name, path, type, expected, found, database) 60 | VALUES 61 | (:table_name, :path, :type, :expected, :found, :database) 62 | """ 63 | 64 | SQL_DELETE_DIFFERENCES = """ 65 | DELETE FROM differences WHERE database = :database_id 66 | """ 67 | 68 | def __init__(self, db_path, db_name): 69 | import sqlite3 70 | 71 | self.db_name = db_name 72 | self.connection = sqlite3.connect(db_path) 73 | self.connection.row_factory = sqlite3.Row 74 | 75 | def write(self, database_diffs): 76 | cursor = self.connection.cursor() 77 | cursor.execute(self.SQL_CREATE_TABLE_DATABASE) 78 | cursor.execute(self.SQL_CREATE_TABLE_DIFFERENCES) 79 | 80 | def insert_differences(node, cursor=None, database_id=None): 81 | path, table_name = self.get_node_path_name(node) 82 | expected = repr(node.data.expected) if node.data.expected else None 83 | found = repr(node.data.found) if node.data.found else None 84 | cursor.execute(self.SQL_INSERT_DIFFERENCES, {'table_name': table_name, 85 | 'path': path, 86 | 'name': node.name, 87 | 'type': node.object_type.__name__, 88 | 'expected': expected, 89 | 'found': found, 90 | 'database': database_id}) 91 | 92 | database_id = self.upsert_database(cursor, self.db_name) 93 | cursor.execute(self.SQL_DELETE_DIFFERENCES, {'database_id': database_id}) 94 | self.visit(database_diffs, insert_differences, cursor=cursor, database_id=database_id) 95 | self.connection.commit() 96 | 97 | def upsert_database(self, cursor, database_name): 98 | cursor.execute(self.SQL_SELECT_DATABASE, {'name': database_name}) 99 | result = cursor.fetchone() 100 | if not result: 101 | cursor.execute(self.SQL_INSERT_DATABASE, {'name': database_name}) 102 | cursor.execute(self.SQL_SELECT_DATABASE, {'name': database_name}) 103 | result = cursor.fetchone() 104 | return result['id'] 105 | 106 | def get_node_path_name(self, node): 107 | segments = [node.name] 108 | while node.parent.parent: 109 | node = node.parent 110 | segments.append(node.name) 111 | return ".".join(reversed(segments)), node.name 112 | -------------------------------------------------------------------------------- /pg-compare: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | import os 3 | import threading 4 | import traceback 5 | from multiprocessing.pool import ThreadPool 6 | from optparse import OptionParser 7 | 8 | from lib.config import Config 9 | from lib.pg_compare import PGCompare 10 | from lib.provider import PickleProvider, DBConnectionProvider 11 | from lib.util import print_info, synchronized, fail, format_ignore 12 | from lib.writer import SQLightWriter, STDOUTWriter 13 | 14 | 15 | def get_compare_arg_tuples(input_plugin, reference_db, ignore_items): 16 | arg_tuples = [] 17 | for connection_config in input_plugin.get_connection_configs(): 18 | arg_tuples.append((reference_db, connection_config, ignore_items)) 19 | return arg_tuples 20 | 21 | 22 | def compare(arg_tuple): 23 | try: 24 | reference_db, database, ignore_items = arg_tuple 25 | kwargs = database.__dict__ 26 | kwargs.update(ignore_items) 27 | db = DBConnectionProvider(**kwargs) 28 | comparator = PGCompare(reference_provider=reference_db, test_provider=db) 29 | print_info("Comparing: ", "{} -> {}".format(reference_db.database.name, database.database)) 30 | database_diffs = comparator.compare() 31 | print_info("Writing results for: ", database.database) 32 | write_output(database, database_diffs) 33 | except Exception, e: 34 | print "Failed: %s" % e.message 35 | 36 | 37 | def get_writer(output_path, db_name): 38 | if options.output_type == 'stdout': 39 | return STDOUTWriter() 40 | elif options.output_type == 'sqlite': 41 | return SQLightWriter(output_path, db_name) 42 | else: 43 | fail("output-type is required") 44 | 45 | 46 | lock = threading.RLock() 47 | 48 | 49 | @synchronized(lock=lock) 50 | def write_output(database, database_diffs): 51 | writer = get_writer(os.path.join(options.out_path, 'db_diffs.sqlite'), database.database) 52 | writer.write(database_diffs) 53 | 54 | 55 | VALID_OUTPUT_TYPES = ('stdout', 'sqlite') 56 | 57 | parser = OptionParser() 58 | parser.add_option('-c', "--config", dest="config_path", 59 | help="Config file", metavar="CONFIG") 60 | parser.add_option("--ignore-columns", 61 | dest="ignore_columns", 62 | help="Columns to be ignored, specified as a comma seperated list. Wildcards can be used, eg, *ignore*", 63 | action='callback', 64 | type='string', 65 | default=[], 66 | callback=format_ignore) 67 | parser.add_option("--ignore-tables", 68 | dest="ignore_tables", 69 | help="Tables to be ignored, specified as a comma seperated list. Wildcards can be used, eg, *ignore*", 70 | action='callback', 71 | type='string', 72 | default=[], 73 | callback=format_ignore) 74 | parser.add_option('--pickle-path', 75 | dest='pickle_path', 76 | help="Path to pickled database", 77 | metavar="PICKLE_PATH") 78 | parser.add_option('--max-threads', 79 | dest='max_threads', 80 | default=60, 81 | type='int', 82 | help="Maximum number of databases to process in parallel", 83 | metavar="MAX_THREADS") 84 | parser.add_option('-o', "--out", 85 | dest="out_path", 86 | help="Path to output file", 87 | default=os.getcwd(), 88 | metavar="OUT_PATH") 89 | parser.add_option("--output-type", 90 | help="Allowed values: %s" % str(VALID_OUTPUT_TYPES), 91 | default='stdout') 92 | (options, args) = parser.parse_args() 93 | 94 | if not options.config_path: 95 | parser.print_help() 96 | fail("config path is required!") 97 | elif not options.pickle_path: 98 | parser.print_help() 99 | fail("--pickle-path is required!") 100 | 101 | config = Config(options.config_path) 102 | ref_db = PickleProvider(options.pickle_path) 103 | database_configs = get_compare_arg_tuples(config.input_plugin, ref_db, 104 | {'ignore_tables': options.ignore_tables, 105 | 'ignore_columns': options.ignore_columns}) 106 | pool = ThreadPool(processes=options.max_threads) 107 | pool.map_async(compare, database_configs).get(99999) 108 | -------------------------------------------------------------------------------- /pg-pickle: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | import os 3 | from optparse import OptionParser 4 | 5 | from lib.provider import DBConnectionProvider 6 | from lib.util import print_info, pickle_database, fail, format_ignore 7 | 8 | parser = OptionParser() 9 | parser.add_option("--ignore-columns", 10 | dest="ignore_columns", 11 | help="Columns to be ignored, specified as a comma seperated list. Wildcards can be used, eg, *ignore*", 12 | action='callback', 13 | type='string', 14 | default=[], 15 | callback=format_ignore) 16 | parser.add_option("--ignore-tables", 17 | dest="ignore_tables", 18 | help="Tables to be ignored, specified as a comma seperated list. Wildcards can be used, eg, *ignore*", 19 | action='callback', 20 | type='string', 21 | default=[], 22 | callback=format_ignore) 23 | parser.add_option('-o', "--out", 24 | dest="out_path", 25 | help="Path to output file", 26 | default=os.getcwd(), 27 | metavar="OUT_PATH") 28 | parser.add_option('--dbname', 29 | dest='dbname', 30 | help="Database name", 31 | metavar="DBNAME") 32 | parser.add_option('--dbuser', 33 | dest='dbuser', 34 | help="Database user", 35 | metavar="DBUSER") 36 | parser.add_option('--dbpass', 37 | dest='dbpass', 38 | help="Database password", 39 | metavar="DBPASS") 40 | parser.add_option('--dbhost', 41 | dest='dbhost', 42 | help="Database host", 43 | metavar="DBHOST") 44 | parser.add_option('--dbport', 45 | dest='dbport', 46 | help="Database port", 47 | default=5432, 48 | metavar="DBPORT") 49 | 50 | (options, args) = parser.parse_args() 51 | 52 | for setting_name in ('dbname', 'dbuser', 'dbpass', 'dbhost', 'dbport'): 53 | if getattr(options, setting_name) is None: 54 | parser.print_help() 55 | fail("--%s is required in pickle mode!" % setting_name) 56 | 57 | out_file = os.path.join(options.out_path, options.dbname + '.pickle') 58 | print_info("Pickling: ", "%s to %s" % (options.dbname, out_file)) 59 | referencedb = DBConnectionProvider(host=options.dbhost, 60 | database=options.dbname, 61 | user=options.dbuser, 62 | password=options.dbpass, 63 | port=options.dbport, 64 | ignore_columns=options.ignore_columns, 65 | ignore_tables=options.ignore_tables) 66 | pickle_database(referencedb.database, out_file) 67 | -------------------------------------------------------------------------------- /pg-transform: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | import sys 3 | import traceback 4 | from multiprocessing.pool import ThreadPool 5 | from optparse import OptionParser 6 | 7 | from lib.config import Config 8 | from lib.exception import StrategyException 9 | from lib.pg_compare import PGCompare 10 | from lib.pg_transform import PGTransform 11 | from lib.provider import PickleProvider, DBConnectionProvider 12 | from lib.util import fail, print_info, print_warn 13 | 14 | 15 | def transform(arg_tuple): 16 | try: 17 | reference_db, database, commit = arg_tuple 18 | print_info("Processing: ", database.database) 19 | db_connection = DBConnectionProvider(**database.__dict__) 20 | print_info("Comparing: ", database.database) 21 | db_diffs = PGCompare(reference_db, db_connection).compare() 22 | transformer = PGTransform(db_connection, db_diffs, config, target_name=database.database) 23 | transformer.transform(commit) 24 | except StrategyException, e: 25 | print_warn("WARN: ", "%s - skipping\n" % e.message.strip()) 26 | except Exception, e: 27 | traceback.print_exc() 28 | sys.stderr.write("Failed: %s" % e.message) 29 | 30 | 31 | def get_transform_arg_tuples(input_plugin, reference_db, commit=False): 32 | arg_tuples = [] 33 | for connection_config in input_plugin.get_connection_configs(): 34 | arg_tuples.append((reference_db, connection_config, commit)) 35 | return arg_tuples 36 | 37 | 38 | parser = OptionParser() 39 | 40 | parser.add_option('-c', "--config", dest="config_path", 41 | help="Config file", metavar="CONFIG") 42 | parser.add_option('--max-threads', 43 | dest='max_threads', 44 | default=60, 45 | help="Maximum number of databases to process in parallel", 46 | metavar="MAX_THREADS") 47 | parser.add_option("--commit", 48 | dest="commit", 49 | action="store_true", 50 | default=False, 51 | help="Whether or not to commit changes") 52 | parser.add_option('--pickle-path', 53 | dest='pickle_path', 54 | help="Path to pickled database", 55 | metavar="PICKLE_PATH") 56 | 57 | (options, args) = parser.parse_args() 58 | 59 | if not options.config_path: 60 | parser.print_help() 61 | fail("config path is required!") 62 | elif not options.pickle_path: 63 | parser.print_help() 64 | fail("--pickle-path is required!") 65 | 66 | config = Config(options.config_path) 67 | 68 | reference_db = PickleProvider(options.pickle_path) 69 | database_configs = get_transform_arg_tuples(config.input_plugin, reference_db, options.commit) 70 | pool = ThreadPool(processes=options.max_threads) 71 | pool.map_async(transform, database_configs).get(99999) 72 | -------------------------------------------------------------------------------- /strategies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facetoe/pgtransform/8631959a19fd8e38c6a642000a47c85f5f1ba123/strategies/__init__.py -------------------------------------------------------------------------------- /strategies/column_default.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from lib.strategy import Strategy, AttributeStrategy 4 | from lib.util import strat_print_success, strat_print_warn 5 | 6 | 7 | class ColumnDefaultStrategy(AttributeStrategy): 8 | SQL_DATATYPE = """ 9 | ALTER TABLE ONLY "%(table_name)s" ALTER COLUMN "%(column_name)s" SET DEFAULT %(expected)s 10 | """ 11 | 12 | def __init__(self, **kwargs): 13 | Strategy.__init__(self, **kwargs) 14 | 15 | def execute(self, cursor, diff_node): 16 | column_info = self.get_column_info(diff_node) 17 | if column_info.column_name != 'id': 18 | strat_print_success("SETTING %(table_name)s.%(column_name)s DEFAULT to %(expected)s" % vars(column_info)) 19 | cursor.execute(self.SQL_DATATYPE % vars(column_info)) 20 | else: 21 | strat_print_warn("SKIPPING %(table_name)s.%(column_name)s" % vars(column_info)) 22 | 23 | @property 24 | def target(self): 25 | return 'column_default' 26 | -------------------------------------------------------------------------------- /strategies/datatype.py: -------------------------------------------------------------------------------- 1 | from lib.strategy import Strategy, AttributeStrategy 2 | from lib.util import strat_print_success 3 | 4 | 5 | class DatatypeStrategy(AttributeStrategy): 6 | SQL_DATATYPE = """ 7 | ALTER TABLE "%(table_name)s" alter column "%(column_name)s" type %(expected)s 8 | """ 9 | 10 | def __init__(self, **kwargs): 11 | Strategy.__init__(self, **kwargs) 12 | 13 | def execute(self, cursor, diff_node): 14 | column_info = self.get_column_info(diff_node) 15 | strat_print_success("SETTING %(table_name)s.%(column_name)s type to %(expected)s" % vars(column_info)) 16 | cursor.execute(self.SQL_DATATYPE % vars(column_info)) 17 | 18 | @property 19 | def target(self): 20 | return 'udt_name' 21 | -------------------------------------------------------------------------------- /strategies/drop_table.py: -------------------------------------------------------------------------------- 1 | from lib.pg_objects import Table 2 | from lib.strategy import Strategy, TypeStrategy 3 | from lib.util import strat_print_success 4 | 5 | 6 | class DropTableStrategy(TypeStrategy): 7 | SQL_DROP_TABLE = """ 8 | DROP TABLE "%(name)s" CASCADE 9 | """ 10 | 11 | def __init__(self, **kwargs): 12 | Strategy.__init__(self, **kwargs) 13 | 14 | def execute(self, cursor, diff_node): 15 | strat_print_success("Dropping table %(name)s" % vars(diff_node)) 16 | cursor.execute(self.SQL_DROP_TABLE % vars(diff_node)) 17 | 18 | @property 19 | def target(self): 20 | return Table 21 | -------------------------------------------------------------------------------- /strategies/nullable.py: -------------------------------------------------------------------------------- 1 | from lib.strategy import Strategy, AttributeStrategy 2 | from lib.util import strat_print_success 3 | 4 | 5 | class NotNullableStrategy(AttributeStrategy): 6 | SQL_SET_NOT_NULL = """ 7 | ALTER TABLE "%(table_name)s" ALTER COLUMN "%(column_name)s" SET NOT NULL 8 | """ 9 | 10 | SQL_SET_NULL = """ 11 | ALTER TABLE "%(table_name)s" ALTER COLUMN "%(column_name)s" DROP NOT NULL 12 | """ 13 | 14 | def __init__(self, **kwargs): 15 | Strategy.__init__(self, **kwargs) 16 | 17 | def execute(self, cursor, diff_node): 18 | column_info = self.get_column_info(diff_node) 19 | if column_info.expected is False: 20 | strat_print_success("Setting %(table_name)s.%(column_name)s to NOT NULL" % vars(column_info)) 21 | cursor.execute(self.SQL_SET_NOT_NULL % vars(column_info)) 22 | else: 23 | strat_print_success("Setting %(table_name)s.%(column_name)s to NULLABLE" % vars(column_info)) 24 | cursor.execute(self.SQL_SET_NULL % vars(column_info)) 25 | 26 | 27 | @property 28 | def target(self): 29 | return 'is_nullable' 30 | --------------------------------------------------------------------------------