├── src ├── main │ ├── resources │ │ ├── ext_xml.txt │ │ ├── ext_text.txt │ │ ├── ext_archives.txt │ │ ├── pmd.xml │ │ ├── java_header_regex_template.txt │ │ └── checkstyle.xml │ └── java │ │ └── io │ │ └── github │ │ └── hoijui │ │ └── rezipdoc │ │ ├── ReroutableConsoleHandler.java │ │ ├── BasicLogFormatter.java │ │ ├── BufferedOutputStream.java │ │ ├── ZipDoc.java │ │ ├── BinaryUtil.java │ │ ├── ReZip.java │ │ ├── XmlFormatter.java │ │ └── Utils.java └── test │ └── java │ └── io │ └── github │ └── hoijui │ └── rezipdoc │ ├── BasicLogFormatterTest.java │ ├── UtilsTest.java │ ├── ZipDocTest.java │ ├── BufferedOutputStreamTest.java │ ├── AbstractReZipDocTest.java │ ├── ReZipTest.java │ ├── BinaryUtilTest.java │ └── XmlFormatterTest.java ├── .gitignore ├── scripts ├── rezipdoc-filter-ZACplus.sh ├── rezipdoc-filter-UniProKit.sh ├── rezipdoc-sample-filter-session.sh ├── rezipdoc-complete-test.sh ├── rezipdoc-create-archives-repo.sh ├── rezipdoc-scripts-tool.sh ├── rezipdoc-history-filter.sh └── rezipdoc-repo-tool.sh ├── AUTHORS ├── .travis.yml ├── DEVELOPMENT.md ├── README.md └── pom.xml /src/main/resources/ext_xml.txt: -------------------------------------------------------------------------------- 1 | xml 2 | svg 3 | -------------------------------------------------------------------------------- /src/main/resources/ext_text.txt: -------------------------------------------------------------------------------- 1 | txt 2 | md 3 | markdown 4 | properties 5 | java 6 | kt 7 | c 8 | cxx 9 | cpp 10 | h 11 | hxx 12 | hpp 13 | js 14 | html 15 | -------------------------------------------------------------------------------- /src/main/resources/ext_archives.txt: -------------------------------------------------------------------------------- 1 | docx 2 | xlsx 3 | pptx 4 | odt 5 | ods 6 | odp 7 | mcdx 8 | slx 9 | zip 10 | jar 11 | fcstd 12 | Fcstd 13 | FCStd 14 | FCSTD 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Maven generated 2 | /target 3 | /pom.xml.* 4 | /release.properties 5 | 6 | # Markdown generated 7 | /DEVELOPMENT.html 8 | /LICENSE.html 9 | /README.html 10 | 11 | -------------------------------------------------------------------------------- /scripts/rezipdoc-filter-ZACplus.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | script_dir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") 4 | 5 | "${script_dir}/rezipdoc-sample-filter-session.sh" \ 6 | "https://github.com/case06/ZACplus.git" 7 | -------------------------------------------------------------------------------- /scripts/rezipdoc-filter-UniProKit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | script_dir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") 4 | 5 | "${script_dir}/rezipdoc-sample-filter-session.sh" \ 6 | "https://github.com/case06/upklib_v2.git" 7 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the list of ReZipDoc authors for copyright purposes. 2 | # 3 | # This does not necessarily list everyone who has contributed code, since in 4 | # some cases, their employer may be the copyright holder. To see the full list 5 | # of contributors, see the revision history in source control. 6 | Carl Osterwisch 7 | Robin Vobruba 8 | 9 | -------------------------------------------------------------------------------- /src/main/resources/pmd.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | PMD rules selection suitable for JavaOSC 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/main/resources/java_header_regex_template.txt: -------------------------------------------------------------------------------- 1 | ^/\*$ 2 | ^ \* Copyright \(C\) \d\d\d\d(-\d\d\d\d)?, The authors of the ReZipDoc project\.$ 3 | ^ \*$ 4 | ^ \* This program is free software\: you can redistribute it and/or modify$ 5 | ^ \* it under the terms of the GNU General Public License as published by$ 6 | ^ \* the Free Software Foundation, either version 3 of the License, or$ 7 | ^ \* \(at your option\) any later version\.$ 8 | ^ \*$ 9 | ^ \* This program is distributed in the hope that it will be useful,$ 10 | ^ \* but WITHOUT ANY WARRANTY\; without even the implied warranty of$ 11 | ^ \* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE\. See the$ 12 | ^ \* GNU General Public License for more details\.$ 13 | ^ \*$ 14 | ^ \* You should have received a copy of the GNU General Public License$ 15 | ^ \* along with this program\. If not, see \\.$ 16 | ^ \*/$ 17 | ^$ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | sudo: false # faster builds 3 | 4 | addons: 5 | sonarcloud: 6 | organization: "hoijui-github" 7 | token: ${SONAR_TOKEN} 8 | 9 | jdk: 10 | # Only test one JDK to save energy 11 | # - oraclejdk8 12 | # - oraclejdk11 13 | - openjdk8 14 | # - openjdk11 15 | 16 | script: 17 | # NOTE JaCoCo is used to have code coverage, the agent has to be activated 18 | # NOTE The SonarCube Community Edition (free plan) only allows branch "master". 19 | # NOTE We only run the SonarQube stuff for the root repo, 20 | # as others (forks) will likely not have a SONAR_TOKEN specified 21 | # in the travis settings. 22 | - | 23 | mvnExtra="" 24 | if [ $TRAVIS_BRANCH = "master" ] && [ "$TRAVIS_REPO_SLUG" = "hoijui/ReZipDoc" ] 25 | then 26 | mvnExtra="sonar:sonar -Psonar -Dsonar.projectKey=hoijui_ReZipDoc" 27 | fi 28 | mvn -B clean org.jacoco:jacoco-maven-plugin:prepare-agent package $mvnExtra 29 | 30 | cache: 31 | directories: 32 | - '$HOME/.m2/repository' 33 | - '$HOME/.sonar/cache' 34 | 35 | -------------------------------------------------------------------------------- /src/main/java/io/github/hoijui/rezipdoc/ReroutableConsoleHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import java.io.OutputStream; 21 | import java.util.logging.ConsoleHandler; 22 | 23 | public class ReroutableConsoleHandler extends ConsoleHandler { 24 | 25 | // to make this function public 26 | @Override 27 | public synchronized void setOutputStream(final OutputStream out) { 28 | super.setOutputStream(out); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/io/github/hoijui/rezipdoc/BasicLogFormatter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import java.io.PrintWriter; 21 | import java.io.StringWriter; 22 | import java.util.logging.Formatter; 23 | import java.util.logging.LogRecord; 24 | 25 | public class BasicLogFormatter extends Formatter { 26 | 27 | @Override 28 | public synchronized String format(final LogRecord record) { 29 | 30 | final StringWriter stringW = new StringWriter(); 31 | final PrintWriter printW = new PrintWriter(stringW); 32 | printW.println(formatMessage(record)); 33 | if (record.getThrown() != null) { 34 | record.getThrown().printStackTrace(printW); 35 | printW.close(); 36 | } 37 | return stringW.toString(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/test/java/io/github/hoijui/rezipdoc/BasicLogFormatterTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import org.hamcrest.CoreMatchers; 21 | import org.hamcrest.MatcherAssert; 22 | import org.junit.Assert; 23 | import org.junit.Test; 24 | 25 | import java.util.logging.Level; 26 | import java.util.logging.LogRecord; 27 | 28 | /** 29 | * @see BasicLogFormatter 30 | */ 31 | public class BasicLogFormatterTest { 32 | 33 | private void testMessageOnly(final Level logLevel) { 34 | 35 | final BasicLogFormatter formatter = new BasicLogFormatter(); 36 | final LogRecord record = new LogRecord(logLevel, "Hello World!"); 37 | final String actual = formatter.format(record); 38 | Assert.assertEquals("Hello World!\n", actual); 39 | } 40 | 41 | private void testWithException(final Level logLevel) { 42 | 43 | final BasicLogFormatter formatter = new BasicLogFormatter(); 44 | final LogRecord record = new LogRecord(logLevel, "Hello World!"); 45 | record.setThrown(new NullPointerException()); 46 | final String actual = formatter.format(record); 47 | MatcherAssert.assertThat(actual, CoreMatchers.startsWith("Hello World!\n" 48 | + "java.lang.NullPointerException\n" 49 | + " at io.github.hoijui.rezipdoc.BasicLogFormatterTest.testWithException(BasicLogFormatterTest.java:")); 50 | } 51 | 52 | @Test 53 | public void testFinest() { 54 | testMessageOnly(Level.FINEST); 55 | } 56 | 57 | @Test 58 | public void testInfo() { 59 | testMessageOnly(Level.INFO); 60 | } 61 | 62 | @Test 63 | public void testWarning() { 64 | testMessageOnly(Level.WARNING); 65 | } 66 | 67 | @Test 68 | public void testSevere() { 69 | testMessageOnly(Level.SEVERE); 70 | } 71 | 72 | @Test 73 | public void testFinestWithException() { 74 | testWithException(Level.FINEST); 75 | } 76 | 77 | @Test 78 | public void testInfoWithException() { 79 | testWithException(Level.INFO); 80 | } 81 | 82 | @Test 83 | public void testWarningWithException() { 84 | testWithException(Level.WARNING); 85 | } 86 | 87 | @Test 88 | public void testSevereWithException() { 89 | testWithException(Level.SEVERE); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/test/java/io/github/hoijui/rezipdoc/UtilsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import org.junit.Assert; 21 | import org.junit.Test; 22 | 23 | import java.io.IOException; 24 | import java.net.URISyntaxException; 25 | import java.nio.file.Files; 26 | import java.nio.file.Path; 27 | import java.util.Arrays; 28 | import java.util.List; 29 | import java.util.Set; 30 | 31 | /** 32 | * @see Utils 33 | */ 34 | public class UtilsTest extends AbstractReZipDocTest { 35 | 36 | @Test 37 | public void fileWriteAndReadLines() throws IOException { 38 | 39 | final List linesExpectedUnfiltered = Arrays.asList("line 1", "# line2", "", "4th line"); 40 | final List linesExpectedFiltered = Arrays.asList("line 1", "4th line"); 41 | 42 | final Path tmpFile = Files.createTempFile(getClass().getName() + "_filtered_", ".zip"); 43 | tmpFile.toFile().deleteOnExit(); 44 | 45 | Utils.writeLines(tmpFile, linesExpectedUnfiltered); 46 | 47 | final List linesActualUnfiltered = Utils.readLines(tmpFile, false); 48 | Assert.assertArrayEquals(linesExpectedUnfiltered.toArray(), linesActualUnfiltered.toArray()); 49 | 50 | final List linesActualFiltered = Utils.readLines(tmpFile, true); 51 | Assert.assertArrayEquals(linesExpectedFiltered.toArray(), linesActualFiltered.toArray()); 52 | } 53 | 54 | @Test 55 | public void writeAndDeleteSuffixFiles() throws IOException, URISyntaxException { 56 | 57 | Utils.writeSuffixesFiles(); 58 | Set loadedSuffixesXml = Utils.collectFileOrDefaults( 59 | Utils.RESOURCE_FILE_SUFFIXES_XML, Utils.DEFAULT_SUFFIXES_XML); 60 | Set loadedSuffixesText = Utils.collectFileOrDefaults( 61 | Utils.RESOURCE_FILE_SUFFIXES_TEXT, Utils.DEFAULT_SUFFIXES_TEXT); 62 | Set loadedSuffixesArchive = Utils.collectFileOrDefaults( 63 | Utils.RESOURCE_FILE_SUFFIXES_ARCHIVE, Utils.DEFAULT_SUFFIXES_ARCHIVE); 64 | Assert.assertEquals(Utils.DEFAULT_SUFFIXES_XML, loadedSuffixesXml); 65 | Assert.assertEquals(Utils.DEFAULT_SUFFIXES_TEXT, loadedSuffixesText); 66 | Assert.assertEquals(Utils.DEFAULT_SUFFIXES_ARCHIVE, loadedSuffixesArchive); 67 | 68 | Utils.deleteSuffixesFiles(); 69 | loadedSuffixesXml = Utils.collectFileOrDefaults( 70 | Utils.RESOURCE_FILE_SUFFIXES_XML, Utils.DEFAULT_SUFFIXES_XML); 71 | loadedSuffixesText = Utils.collectFileOrDefaults( 72 | Utils.RESOURCE_FILE_SUFFIXES_TEXT, Utils.DEFAULT_SUFFIXES_TEXT); 73 | loadedSuffixesArchive = Utils.collectFileOrDefaults( 74 | Utils.RESOURCE_FILE_SUFFIXES_ARCHIVE, Utils.DEFAULT_SUFFIXES_ARCHIVE); 75 | Assert.assertEquals(Utils.DEFAULT_SUFFIXES_XML, loadedSuffixesXml); 76 | Assert.assertEquals(Utils.DEFAULT_SUFFIXES_TEXT, loadedSuffixesText); 77 | Assert.assertEquals(Utils.DEFAULT_SUFFIXES_ARCHIVE, loadedSuffixesArchive); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/io/github/hoijui/rezipdoc/BufferedOutputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import java.io.ByteArrayInputStream; 21 | import java.io.ByteArrayOutputStream; 22 | 23 | /** 24 | * A simple wrapper around {@link ByteArrayOutputStream} 25 | * that adds some (partly unsafe) methods, 26 | * mainly to avoid excessive copying of memory. 27 | */ 28 | @SuppressWarnings("WeakerAccess") 29 | public class BufferedOutputStream extends ByteArrayOutputStream { 30 | 31 | /** 32 | * Creates a new byte array output stream. The buffer capacity is 33 | * initially 256 bytes, though its size increases if necessary. 34 | */ 35 | public BufferedOutputStream() { 36 | this(256); 37 | } 38 | 39 | /** 40 | * Creates a new byte array output stream, with a buffer capacity of 41 | * the specified size, in bytes. 42 | * 43 | * @param size the initial size. 44 | * @exception IllegalArgumentException if size is negative. 45 | */ 46 | public BufferedOutputStream(final int size) { 47 | super(size); 48 | } 49 | 50 | /** 51 | * Tests if this buffer starts with the specified prefix. 52 | * 53 | * @param prefix the prefix. 54 | * @return {@code true} if the byte sequence represented by the 55 | * argument is a prefix of the byte sequence stored in 56 | * this buffer; {@code false} otherwise. 57 | * Note also that {@code true} will be returned if the 58 | * argument is an empty sequence. 59 | */ 60 | public boolean startsWith(final byte[] prefix) { 61 | 62 | boolean startsWith = true; 63 | if (prefix.length > count) { 64 | startsWith = false; 65 | } else { 66 | int idx = prefix.length - 1; 67 | while (idx >= 0) { 68 | if (buf[idx] != prefix[idx]) { 69 | startsWith = false; 70 | break; 71 | } 72 | idx--; 73 | } 74 | } 75 | return startsWith; 76 | } 77 | 78 | /** 79 | * Creates an {@code InputStream} streaming the data of this buffer. 80 | * CAUTION If {@code copyBytes} is false, do not add data to this buffer 81 | * while the stream is still in use! 82 | * 83 | * @param copyBytes whether to copy the internal data into the stream, 84 | * or just reference it. 85 | * If this is {@code false}, then the behavior of the created stream 86 | * in case of data being added to the buffer after the streams creations 87 | * is undefined. 88 | * @return an {@code InputStream} using the same data as this buffer. 89 | */ 90 | public ByteArrayInputStream createInputStream(final boolean copyBytes) { 91 | 92 | final ByteArrayInputStream inStream; 93 | if (copyBytes) { 94 | inStream = new ByteArrayInputStream(toByteArray()); 95 | } else { 96 | inStream = new ByteArrayInputStream(buf, 0, count); 97 | } 98 | 99 | return inStream; 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /DEVELOPMENT.md: -------------------------------------------------------------------------------- 1 | # Developer instructions 2 | 3 | This file contains info relevant in the development process, 4 | and is generally uninteresting for users. 5 | 6 | --- 7 | 8 | __NOTE__ 9 | 10 | Make sure you are using the right Maven and JDK versions when releasing. 11 | For example, if you use JDK 6, software using JDK 8+ will not be able 12 | to use your artifact due to incompatible byte-code. 13 | The same applies vice versa. 14 | 15 | --- 16 | 17 | ## Release a SNAPSHOT 18 | 19 | Here we release a development version to the Sonatype snapshot repository only. 20 | 21 | ```bash 22 | mvn clean deploy 23 | ``` 24 | 25 | ## Release 26 | 27 | ### Setup for signing the release 28 | 29 | To be able to sign the release artifacts, 30 | make sure you have a section in your `~/.m2/settings.xml` that looks like this: 31 | 32 | ```xml 33 | 34 | 35 | ossrh 36 | 37 | true 38 | 39 | 40 | gpg2 41 | 45 | First-name Last-name (Comment) <user@email.org> 46 | 47 | 48 | 49 | ``` 50 | 51 | If you have not yet done so, generate and publish a key-pair. 52 | See [the Sonatype guide](http://central.sonatype.org/pages/working-with-pgp-signatures.html) 53 | for further details about how to work with GPG keys. 54 | 55 | ### Perform the release 56 | 57 | Before starting the actual release process, 58 | we check whether everything is in order: 59 | The code compiles, packaging goes well, unit tests pass, 60 | the signing works, and the site is generated without errors. 61 | 62 | ```bash 63 | mvn \ 64 | clean \ 65 | package \ 66 | verify \ 67 | gpg:sign \ 68 | site 69 | ``` 70 | 71 | If the above command finished without errors, 72 | we are ready to run a test release run ("dry"): 73 | 74 | ```bash 75 | mvn release:clean 76 | mvn \ 77 | -DdryRun=true \ 78 | release:prepare 79 | ``` 80 | 81 | If that also went fine, we prepare the release locally, for real now: 82 | 83 | --- 84 | 85 | __NOTE__ 86 | 87 | This is where using the right JDK version is important! 88 | 89 | You might want to do that in this way: 90 | 91 | ```bash 92 | # open a temporary shell, to not spill the changes in env vars 93 | bash 94 | # set env vars 95 | export JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64" 96 | export PATH="${JAVA_HOME}/bin/:${PATH}" 97 | ``` 98 | 99 | Then perform the steps below, 100 | and afterwards `exit` from the temporary shell again. 101 | 102 | --- 103 | 104 | ```bash 105 | # run the prepare phase for real 106 | mvn release:clean 107 | mvn \ 108 | -DdryRun=false \ 109 | release:prepare 110 | ``` 111 | 112 | This does the following: 113 | 114 | * asks for the release and new snapshot versions to use (for all modules) 115 | * package the release JARs 116 | * signs with GPG 117 | * creates the release and post-release commits 118 | * tags the release 119 | 120 | Now we publish the release to the public, to Maven Central. 121 | This will take quite some time, 122 | and the second line might end with an error 123 | while waiting for the promoting to finish, 124 | which you can ignore. 125 | 126 | Make sure to set `release_tag` to the actual tag. 127 | 128 | ```bash 129 | release_tag="rezipdoc-0.2" 130 | git push origin master "$release_tag" 131 | mvn release:perform 132 | mvn deploy 133 | ``` 134 | 135 | This does the following: 136 | 137 | * pushes to origin 138 | * checks-out the release tag 139 | * builds 140 | * deploy into Sonatype staging repository 141 | * promote it on Maven Central repository 142 | * release a SNAPSHOT with the new version 143 | 144 | This last step should take no more then 4h. 145 | 146 | ... done! :-) 147 | 148 | If all has gone well, you should be able to 149 | [find the release on Maven Central](https://search.maven.org/search?q=g:io.github.hoijui.rezipdoc). 150 | -------------------------------------------------------------------------------- /src/test/java/io/github/hoijui/rezipdoc/ZipDocTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import org.hamcrest.CoreMatchers; 21 | import org.hamcrest.Matcher; 22 | import org.hamcrest.MatcherAssert; 23 | import org.junit.Test; 24 | 25 | import java.io.IOException; 26 | import java.io.PrintStream; 27 | import java.util.LinkedList; 28 | import java.util.List; 29 | import java.util.zip.ZipEntry; 30 | 31 | /** 32 | * @see ZipDoc 33 | */ 34 | public class ZipDocTest extends AbstractReZipDocTest { 35 | 36 | private void testRecursive(final boolean recursive) throws IOException { 37 | 38 | // This is the original, compressed file 39 | createRecursiveZip(zipFile, projectRoot, archiveContents, ZipEntry.DEFLATED); 40 | 41 | // This creates the uncompressed file 42 | final BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(); 43 | // new ZipDoc(recursive, false).transform( 44 | // new ZipInputStream(Files.newInputStream(zipFile)), 45 | // new PrintStream(bufferedOutputStream)); 46 | final List mainArgs = new LinkedList<>(); 47 | if (!recursive) { 48 | mainArgs.add("--non-recursive"); 49 | } 50 | mainArgs.add(zipFile.toFile().getAbsolutePath()); 51 | final PrintStream outBefore = System.out; 52 | try (PrintStream tempOut = new PrintStream(bufferedOutputStream)) { 53 | System.setOut(tempOut); 54 | ZipDoc.main(mainArgs.toArray(new String[0])); 55 | System.setOut(outBefore); 56 | } 57 | 58 | // Test whether the filtered ZIP file does (not) contain the original content 59 | // placed in a sub-ZIP file in plain text 60 | checkContains(recursive, bufferedOutputStream, archiveContents.subList(0, 2)); 61 | // Test whether the filtered ZIP file contains the directly embedded original content 62 | // in plain text 63 | checkContains(true, bufferedOutputStream, archiveContents.subList(2, archiveContents.size())); 64 | } 65 | 66 | @Test 67 | public void testNonRecursive() throws IOException { 68 | testRecursive(false); 69 | } 70 | 71 | @Test 72 | public void testRecursive() throws IOException { 73 | testRecursive(true); 74 | } 75 | 76 | @Test 77 | public void testHelp() throws IOException { 78 | 79 | final Matcher helpMatchers = CoreMatchers.allOf( 80 | CoreMatchers.startsWith(ZipDoc.class.getSimpleName()), 81 | CoreMatchers.containsString("License:"), 82 | CoreMatchers.containsString("Usage:"), 83 | CoreMatchers.containsString("Examples:")); 84 | 85 | try (BufferedOutputStream outBuffer = new BufferedOutputStream()) { 86 | Utils.getLogHandler().setOutputStream(outBuffer); 87 | ZipDoc.main(new String[] { "-h" }); 88 | MatcherAssert.assertThat(new String(outBuffer.toByteArray()), helpMatchers); 89 | 90 | outBuffer.reset(); 91 | ZipDoc.main(new String[] { "--help" }); 92 | MatcherAssert.assertThat(new String(outBuffer.toByteArray()), helpMatchers); 93 | } finally { 94 | Utils.getLogHandler().setOutputStream(System.err); 95 | } 96 | } 97 | 98 | @Test 99 | public void testNoArgs() throws IOException { 100 | 101 | exit.expectSystemExitWithStatus(1); 102 | try (BufferedOutputStream outBuffer = new BufferedOutputStream()) { 103 | Utils.getLogHandler().setOutputStream(outBuffer); 104 | ZipDoc.main(new String[] {}); 105 | } finally { 106 | Utils.getLogHandler().setOutputStream(System.err); 107 | } 108 | } 109 | 110 | @Test 111 | public void testInvalidArgument() throws IOException { 112 | 113 | exit.expectSystemExitWithStatus(1); 114 | try (BufferedOutputStream outBuffer = new BufferedOutputStream()) { 115 | Utils.getLogHandler().setOutputStream(outBuffer); 116 | ReZip.main(new String[] { "-invalid-argument", "theZipFile" }); 117 | } finally { 118 | Utils.getLogHandler().setOutputStream(System.err); 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/test/java/io/github/hoijui/rezipdoc/BufferedOutputStreamTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import org.junit.Assert; 21 | import org.junit.Test; 22 | 23 | import java.io.ByteArrayInputStream; 24 | import java.io.IOException; 25 | 26 | /** 27 | * @see BufferedOutputStream 28 | */ 29 | public class BufferedOutputStreamTest { 30 | 31 | @Test 32 | public void testStartsWith() throws IOException { 33 | 34 | final BufferedOutputStream outStream = new BufferedOutputStream(); 35 | outStream.write("hello".getBytes()); 36 | 37 | Assert.assertTrue(outStream.startsWith("hello".getBytes())); 38 | Assert.assertTrue(outStream.startsWith("hel".getBytes())); 39 | Assert.assertTrue(outStream.startsWith("".getBytes())); 40 | Assert.assertFalse(outStream.startsWith("hello world".getBytes())); 41 | } 42 | 43 | @Test 44 | public void testCreateInputStreamWithReference() throws IOException { 45 | 46 | try (BufferedOutputStream outStream = new BufferedOutputStream()) { 47 | outStream.write("hello".getBytes()); 48 | try (ByteArrayInputStream inStream = outStream.createInputStream(false)) { 49 | Assert.assertEquals("hello", Utils.readStreamToString(inStream)); 50 | } 51 | } 52 | 53 | // The length of the buffer is always separate for the created InputStream, 54 | // thus we see the same output in this scenario 55 | try (BufferedOutputStream outStream = new BufferedOutputStream()) { 56 | outStream.write("hello".getBytes()); 57 | try (ByteArrayInputStream inStream = outStream.createInputStream(false)) { 58 | outStream.write(" world".getBytes()); 59 | Assert.assertEquals("hello", Utils.readStreamToString(inStream)); 60 | } 61 | } 62 | 63 | try (BufferedOutputStream outStream = new BufferedOutputStream()) { 64 | outStream.write("hello".getBytes()); 65 | try (ByteArrayInputStream inStream = outStream.createInputStream(false)) { 66 | outStream.reset(); 67 | Assert.assertEquals("hello", Utils.readStreamToString(inStream)); 68 | } 69 | } 70 | 71 | try (BufferedOutputStream outStream = new BufferedOutputStream()) { 72 | outStream.write("hello".getBytes()); 73 | try (ByteArrayInputStream inStream = outStream.createInputStream(false)) { 74 | outStream.reset(); 75 | outStream.write("world".getBytes()); 76 | Assert.assertEquals("world", Utils.readStreamToString(inStream)); 77 | } 78 | } 79 | } 80 | 81 | @Test 82 | public void testCreateInputStreamWithCopy() throws IOException { 83 | 84 | try (BufferedOutputStream outStream = new BufferedOutputStream()) { 85 | outStream.write("hello".getBytes()); 86 | try (ByteArrayInputStream inStream = outStream.createInputStream(true)) { 87 | Assert.assertEquals("hello", Utils.readStreamToString(inStream)); 88 | } 89 | } 90 | 91 | try (BufferedOutputStream outStream = new BufferedOutputStream()) { 92 | outStream.write("hello".getBytes()); 93 | try (ByteArrayInputStream inStream = outStream.createInputStream(true)) { 94 | outStream.write(" world".getBytes()); 95 | Assert.assertEquals("hello", Utils.readStreamToString(inStream)); 96 | } 97 | } 98 | 99 | try (BufferedOutputStream outStream = new BufferedOutputStream()) { 100 | outStream.write("hello".getBytes()); 101 | try (ByteArrayInputStream inStream = outStream.createInputStream(true)) { 102 | outStream.reset(); 103 | Assert.assertEquals("hello", Utils.readStreamToString(inStream)); 104 | } 105 | } 106 | 107 | try (BufferedOutputStream outStream = new BufferedOutputStream()) { 108 | outStream.write("hello".getBytes()); 109 | try (ByteArrayInputStream inStream = outStream.createInputStream(true)) { 110 | outStream.reset(); 111 | outStream.write("world".getBytes()); 112 | Assert.assertEquals("hello", Utils.readStreamToString(inStream)); 113 | } 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /scripts/rezipdoc-sample-filter-session.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright (c) 2020 Robin Vobruba 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | # SOFTWARE. 21 | 22 | # This does the following: 23 | # 1. install the helper scripts locally 24 | # 2. clone the project from the supplied git URL into a local repo 25 | # 3. creates a ReZip filtered clone of that project 26 | # 4. print the bare size of these two repos for comparison 27 | # 5. opens a GUI history browser for each of these repos, 28 | # as to compare the difference in changes in FreeCAD files (.fcstd), 29 | # which show as a simple "binary files differ" in the original, 30 | # vs a textual diff of the contained plain-text files in the fitlered version. 31 | 32 | # Exit immediately on each error and unset variable; 33 | # see: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ 34 | set -Eeuo pipefail 35 | #set -Eeu 36 | 37 | # We use this repo, because it has a lot of FreeCAD files, 38 | # which are essentially ZIP files. 39 | git_url="${1:-}" 40 | if [ -z "$git_url" ] 41 | then 42 | >&2 echo "ERROR: Please supply a git URL as first parameter!" 43 | exit 1 44 | fi 45 | project_name="$(echo "$git_url" | sed -e 's|.*/||' -e 's|.git$||')" 46 | bfg_version="1.13.0" 47 | 48 | echo "Filtering project '$project_name' from '$git_url' ..." 49 | echo "(press ^C to abort)" 50 | sleep 5 51 | echo 52 | 53 | # Create a random number between 0 and 255 54 | rnd=$(od -A n -t d -N 1 /dev/urandom | tr -d ' ') 55 | 56 | _git_compact() { 57 | 58 | rm -rf .git/refs/original/ 59 | git reflog expire --expire=now --all 60 | git gc --prune=now --aggressive 61 | } 62 | 63 | # Helper function that creates a bar git repo clone. 64 | # This is useful to evaluate the real size of a git repo, 65 | # as this is the size transferred over the network when cloning, 66 | # or which is used on a server like GitHub. 67 | create_bare_repo() { 68 | 69 | repo_orig="$1" 70 | bare_repo="$2" 71 | 72 | git clone --bare "${repo_orig}" "${bare_repo}" 73 | cd "${bare_repo}" 74 | _git_compact 75 | } 76 | 77 | # Helper function that evaluates the size of a git repo, 78 | # using different ways of measuring. 79 | check_git_repo_size() { 80 | 81 | repo_orig="$1" 82 | bare_repo="/tmp/rezipdoc-test-$(basename "$repo_orig")-bare-$rnd" 83 | 84 | create_bare_repo "${repo_orig}" "${bare_repo}" 85 | 86 | du=/usr/bin/du 87 | repo_size_human=$(${du} -sh "${bare_repo}" | sed 's/[ \t].*//') 88 | repo_size_apparent=$(${du} -sb "${bare_repo}" | sed 's/[ \t].*//') 89 | repo_size_raw=$(${du} -s "${bare_repo}" | sed 's/[ \t].*//') 90 | 91 | rm -Rf "${bare_repo}" 92 | 93 | printf "%s\t%s\t%s\n" "$repo_size_human" "$repo_size_raw" "$repo_size_apparent" 94 | } 95 | 96 | repo_orig="/tmp/${project_name}-orig-$rnd" 97 | repo_filtered="/tmp/${project_name}-filtered-$rnd" 98 | 99 | # Install helper scripts 100 | # NOTE Potential security risk! 101 | curl -s -L https://raw.githubusercontent.com/hoijui/ReZipDoc/master/scripts/rezipdoc-scripts-tool.sh \ 102 | | sh -s install --path --dev || true 103 | 104 | # Create local clone of the project. 105 | git clone "$git_url" "$repo_orig" 106 | size_orig=$(check_git_repo_size "$repo_orig") 107 | 108 | # Remove from history: 109 | # * FreeCAD backup files (*.fcstd1) 110 | # * 3D-printing instructions (*.gcode) 111 | prev_dir=$(pwd) 112 | cd "$repo_orig" 113 | curl https://repo1.maven.org/maven2/com/madgag/bfg/${bfg_version}/bfg-1${bfg_version}.jar -o bfg-${bfg_version}.jar 114 | java -jar ./bfg-${bfg_version}.jar --no-blob-protection --delete-files '*.{fcstd1,FCStd1,gcode}' ./ 115 | cd "$prev_dir" 116 | 117 | # Create a ReZip filtered clone of the above project 118 | rezipdoc-history-filter.sh \ 119 | --source "$repo_orig" \ 120 | --branch master \ 121 | --orig \ 122 | --target "$repo_filtered" 123 | 124 | (cd "$repo_filtered"; git remote rm source) 125 | size_filtered=$(check_git_repo_size "$repo_filtered") 126 | 127 | # Print bare repo sizes 128 | echo -e "Bare repo size '$repo_orig': $size_orig" 129 | echo -e "Bare repo size '$repo_filtered': $size_filtered" 130 | 131 | # Open git history browser sessions, 132 | # so one can see the binary vs textual diff representation 133 | # of the ZIP based files 134 | (cd "$repo_orig"; gitk &) 135 | (cd "$repo_filtered"; gitk &) 136 | -------------------------------------------------------------------------------- /src/test/java/io/github/hoijui/rezipdoc/AbstractReZipDocTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import org.junit.After; 21 | import org.junit.Assert; 22 | import org.junit.Before; 23 | import org.junit.Rule; 24 | import org.junit.contrib.java.lang.system.ExpectedSystemExit; 25 | 26 | import java.io.File; 27 | import java.io.IOException; 28 | import java.lang.invoke.MethodHandles; 29 | import java.nio.file.Files; 30 | import java.nio.file.Path; 31 | import java.util.Collections; 32 | import java.util.HashMap; 33 | import java.util.List; 34 | import java.util.Map; 35 | import java.util.stream.Collectors; 36 | import java.util.zip.ZipEntry; 37 | import java.util.zip.ZipOutputStream; 38 | 39 | @SuppressWarnings("WeakerAccess") 40 | public abstract class AbstractReZipDocTest { 41 | 42 | protected Path projectRoot; 43 | protected List archiveContents; 44 | protected Path zipFile; 45 | @Rule 46 | public final ExpectedSystemExit exit = ExpectedSystemExit.none(); 47 | 48 | protected static List createArchiveContentsList(final Path scanRoot) throws IOException { 49 | 50 | final List collect = Files.find(scanRoot, 15, 51 | (p, a) -> p.getFileName().toString().matches(".*[.](java|txt|xml|properties)")) 52 | .map(Path::toFile) 53 | .collect(Collectors.toList()); 54 | if (collect.isEmpty()) { 55 | throw new RuntimeException("No sample files found to use in ZIP testing"); 56 | } 57 | 58 | return collect; 59 | } 60 | 61 | protected static void createZip(final Path zipFile, final Map> rootContents, final int compressionMethod) throws IOException { 62 | 63 | try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) { 64 | zipOut.setMethod(compressionMethod); 65 | for (final Path rootDir : rootContents.keySet()) { 66 | for (final File file : rootContents.get(rootDir)) { 67 | final Path relPath = rootDir.relativize(file.toPath()); 68 | final ZipEntry entry = new ZipEntry(relPath.toString()); 69 | entry.setMethod(compressionMethod); 70 | zipOut.putNextEntry(entry); 71 | Files.copy(file.toPath(), zipOut); 72 | zipOut.closeEntry(); 73 | } 74 | } 75 | } 76 | } 77 | 78 | protected static void createZip( 79 | final Path zipFile, 80 | final Path rootDir, 81 | final List contents, 82 | final int compressionMethod) 83 | throws IOException 84 | { 85 | final Map> rootContents = new HashMap<>(); 86 | rootContents.put(rootDir, contents); 87 | createZip(zipFile, rootContents, compressionMethod); 88 | } 89 | 90 | protected static void createRecursiveZip( 91 | final Path zipFile, 92 | final Path rootDir, 93 | final List contents, 94 | final int compressionMethod) 95 | throws IOException 96 | { 97 | if (contents.size() < 4) { 98 | throw new IllegalStateException("We need at least 4 content elements for a recursive ZIP file"); 99 | } 100 | 101 | final Map> subRootContents = new HashMap<>(); 102 | subRootContents.put(rootDir, contents.subList(0, 2)); 103 | final Path subZipFile = Files.createTempFile(MethodHandles.lookup().lookupClass().getName() 104 | + "_original_sub_", ".zip"); 105 | createZip(subZipFile, subRootContents, compressionMethod); 106 | 107 | final Map> mainRootContents = new HashMap<>(); 108 | mainRootContents.put(subZipFile.getParent(), Collections.singletonList(subZipFile.toFile())); 109 | mainRootContents.put(rootDir, contents.subList(2, contents.size())); 110 | createZip(zipFile, mainRootContents, compressionMethod); 111 | 112 | Files.deleteIfExists(subZipFile); 113 | } 114 | 115 | @Before 116 | public void setUp() throws IOException { 117 | 118 | projectRoot = new File("").toPath(); 119 | archiveContents = createArchiveContentsList(projectRoot.resolve("src")); 120 | // We do not want too much content 121 | final int maxContentFiles = 10; 122 | if (archiveContents.size() > maxContentFiles) { 123 | archiveContents = archiveContents.subList(0, maxContentFiles); 124 | } 125 | 126 | zipFile = Files.createTempFile(getClass().getName() + "_original_", ".zip"); 127 | } 128 | 129 | @After 130 | public void tearDown() { 131 | 132 | try { 133 | Files.deleteIfExists(zipFile); 134 | } catch (IOException exc) { 135 | // ignore 136 | } 137 | } 138 | 139 | protected static void checkContains( 140 | final boolean contains, 141 | final String contentsActual, 142 | final List contentsExpected) 143 | throws IOException 144 | { 145 | for (final File file : contentsExpected) { 146 | final String fileContent = new String(Files.readAllBytes(file.toPath())); 147 | Assert.assertEquals( 148 | "Content of file '" + file.toString() + "' does " + (contains ? "not " : "") 149 | + "appear in Re(Un)Zip filtered ZIP file", 150 | contains, 151 | contentsActual.contains(fileContent)); 152 | } 153 | } 154 | 155 | protected static void checkContains(final boolean contains, final Path zipFile, final List contentsExpected) throws IOException { 156 | 157 | final String wholeZipContent = new String(Files.readAllBytes(zipFile)); 158 | checkContains(contains, wholeZipContent, contentsExpected); 159 | } 160 | 161 | protected static void checkContains(final boolean contains, final BufferedOutputStream contentsActual, final List contentsExpected) throws IOException { 162 | checkContains(contains, contentsActual.toString(), contentsExpected); 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/test/java/io/github/hoijui/rezipdoc/ReZipTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import org.hamcrest.CoreMatchers; 21 | import org.hamcrest.Matcher; 22 | import org.hamcrest.MatcherAssert; 23 | import org.junit.After; 24 | import org.junit.Before; 25 | import org.junit.Test; 26 | 27 | import java.io.IOException; 28 | import java.io.InputStream; 29 | import java.io.PrintStream; 30 | import java.nio.file.Files; 31 | import java.nio.file.Path; 32 | import java.util.LinkedList; 33 | import java.util.List; 34 | import java.util.zip.ZipEntry; 35 | 36 | /** 37 | * @see ReZip 38 | */ 39 | @SuppressWarnings("WeakerAccess") 40 | public class ReZipTest extends AbstractReZipDocTest { 41 | 42 | protected Path reZipFile; 43 | 44 | @Before 45 | public void setUp() throws IOException { 46 | 47 | super.setUp(); 48 | reZipFile = Files.createTempFile(getClass().getName() + "_filtered_", ".zip"); 49 | } 50 | 51 | @After 52 | public void tearDown() { 53 | 54 | super.tearDown(); 55 | try { 56 | Files.deleteIfExists(reZipFile); 57 | } catch (IOException exc) { 58 | // ignore 59 | } 60 | } 61 | 62 | private void runReZip(final boolean compression, final boolean nullifyTimes, 63 | final boolean recursive, final boolean formatXml, 64 | final Path zipFile, final Path reZipFile) 65 | throws IOException 66 | { 67 | // call the internal function directly 68 | // new ReZip(compression, nullifyTimes, recursive, formatXml) 69 | // .reZip(zipFile, reZipFile); 70 | 71 | // call main method (this tests more code, and uses the class 72 | // nearly as it wil be used as a git filter 73 | final List mainArgs = new LinkedList<>(); 74 | if (compression) { 75 | mainArgs.add("--compressed"); 76 | } 77 | if (nullifyTimes) { 78 | mainArgs.add("--nullify-times"); 79 | } 80 | if (!recursive) { 81 | mainArgs.add("--non-recursive"); 82 | } 83 | if (formatXml) { 84 | mainArgs.add("--format-xml"); 85 | } 86 | final InputStream inBefore = System.in; 87 | final PrintStream outBefore = System.out; 88 | try (InputStream tempIn = Files.newInputStream(zipFile); 89 | PrintStream tempOut = new PrintStream(Files.newOutputStream(reZipFile))) 90 | { 91 | System.setIn(tempIn); 92 | System.setOut(tempOut); 93 | ReZip.main(mainArgs.toArray(new String[0])); 94 | } finally { 95 | System.setIn(inBefore); 96 | System.setOut(outBefore); 97 | } 98 | } 99 | 100 | private void testRecursive(final boolean recursive) throws IOException { 101 | 102 | // This is the original, compressed file 103 | createRecursiveZip(zipFile, projectRoot, archiveContents, ZipEntry.DEFLATED); 104 | 105 | // This creates the uncompressed file 106 | runReZip(false, false, recursive, false, zipFile, reZipFile); 107 | 108 | // Test whether the filtered ZIP file does (not) contain the original content 109 | // placed in a sub-ZIP file in plain text 110 | checkContains(recursive, reZipFile, archiveContents.subList(0, 2)); 111 | // Test whether the filtered ZIP file contains the directly embedded original content 112 | // in plain text 113 | checkContains(true, reZipFile, archiveContents.subList(2, archiveContents.size())); 114 | } 115 | 116 | private void testPlainText(final boolean plainText) throws IOException { 117 | 118 | // This is the original, compressed file 119 | createZip(zipFile, projectRoot, archiveContents, ZipEntry.DEFLATED); 120 | 121 | // This creates the *still compressed* file 122 | runReZip(!plainText, false, true, false, zipFile, reZipFile); 123 | 124 | // Test whether the filtered ZIP file does (not) contain the original content in plain text 125 | checkContains(plainText, reZipFile, archiveContents); 126 | } 127 | 128 | @Test 129 | public void testNonRecursive() throws IOException { 130 | testRecursive(false); 131 | } 132 | 133 | @Test 134 | public void testRecursive() throws IOException { 135 | testRecursive(true); 136 | } 137 | 138 | @Test 139 | public void testContentsNotVisibleInFullInPlainText() throws IOException { 140 | testPlainText(false); 141 | } 142 | 143 | @Test 144 | public void testContentsVisibleInFullInPlainText() throws IOException { 145 | testPlainText(true); 146 | } 147 | 148 | @Test 149 | public void testHelp() throws IOException { 150 | 151 | final Matcher helpMatchers = CoreMatchers.allOf( 152 | CoreMatchers.startsWith(ReZip.class.getSimpleName()), 153 | CoreMatchers.containsString("License:"), 154 | CoreMatchers.containsString("Usage:"), 155 | CoreMatchers.containsString("Options:")); 156 | 157 | try (BufferedOutputStream outBuffer = new BufferedOutputStream()) { 158 | Utils.getLogHandler().setOutputStream(outBuffer); 159 | ReZip.main(new String[] { "-h" }); 160 | MatcherAssert.assertThat(new String(outBuffer.toByteArray()), helpMatchers); 161 | 162 | outBuffer.reset(); 163 | ReZip.main(new String[] { "--help" }); 164 | MatcherAssert.assertThat(new String(outBuffer.toByteArray()), helpMatchers); 165 | } finally { 166 | Utils.getLogHandler().setOutputStream(System.err); 167 | } 168 | } 169 | 170 | @Test 171 | public void testInvalidArgument() throws IOException { 172 | 173 | exit.expectSystemExitWithStatus(1); 174 | try (BufferedOutputStream outBuffer = new BufferedOutputStream()) { 175 | Utils.getLogHandler().setOutputStream(outBuffer); 176 | ReZip.main(new String[] { "-invalid-argument" }); 177 | } finally { 178 | Utils.getLogHandler().setOutputStream(System.err); 179 | } 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /src/main/java/io/github/hoijui/rezipdoc/ZipDoc.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import java.io.IOException; 21 | import java.io.PrintStream; 22 | import java.nio.file.Files; 23 | import java.nio.file.Path; 24 | import java.nio.file.Paths; 25 | import java.util.logging.Level; 26 | import java.util.logging.Logger; 27 | import java.util.zip.CRC32; 28 | import java.util.zip.CheckedOutputStream; 29 | import java.util.zip.ZipEntry; 30 | import java.util.zip.ZipInputStream; 31 | 32 | /** 33 | * The program takes a single argument, 34 | * the name of the ZIP file to convert, 35 | * and produces a more human readable, 36 | * textual representation of its content on stdout. 37 | * It is meant to be used as a {@code git textconv} filter; 38 | * see the README for details. 39 | */ 40 | @SuppressWarnings("WeakerAccess") 41 | public class ZipDoc { 42 | 43 | private static final Logger LOGGER = Utils.getLogger(ZipDoc.class.getName()); 44 | 45 | private final boolean recursive; 46 | private final boolean formatXml; 47 | 48 | /** 49 | * Creates an instance with specific values. 50 | * 51 | * @param recursive whether to also text-ify ZIPs within the main ZIP 52 | * (and therein, and therein, ...) (default: {@code true}) 53 | * @param formatXml whether to pretty-print XML content 54 | * (default: {@code true}) 55 | */ 56 | public ZipDoc(final boolean recursive, final boolean formatXml) { 57 | 58 | this.recursive = recursive; 59 | this.formatXml = formatXml; 60 | } 61 | 62 | /** 63 | * Creates an instance with default values. 64 | */ 65 | public ZipDoc() { 66 | this(true, true); 67 | } 68 | 69 | private static void printUsage(final Level logLevel) { 70 | 71 | final String name = ZipDoc.class.getSimpleName(); 72 | if (LOGGER.isLoggable(logLevel)) { 73 | Utils.printUsageHeader(LOGGER, logLevel, name); 74 | LOGGER.log(logLevel, "Usage:"); 75 | LOGGER.log(logLevel, String.format( 76 | "\t%s [Archive-input-file] # writes textual version to stdout", 77 | name)); 78 | LOGGER.log(logLevel, String.format( 79 | "Examples:%n\t%s in-file.zip > text-representation.txt", 80 | name)); 81 | } 82 | } 83 | 84 | public static void main(final String[] argv) throws IOException { 85 | 86 | if (argv.length == 0) { 87 | printUsage(Level.WARNING); 88 | System.exit(1); 89 | } 90 | if ("--help".equals(argv[0]) || "-h".equals(argv[0])) { 91 | printUsage(Level.INFO); 92 | return; 93 | } 94 | 95 | boolean recursive = true; 96 | boolean formatXml = false; 97 | for (int i = 0; i < argv.length - 1; i++) { 98 | final String arg = argv[i]; 99 | if ("--non-recursive".equals(arg)) { 100 | recursive = false; 101 | } else if ("--format-xml".equals(arg)) { 102 | formatXml = true; 103 | } else { 104 | if (LOGGER.isLoggable(Level.WARNING)) { 105 | LOGGER.log(Level.WARNING, String.format("Invalid argument '%s'%n", arg)); 106 | } 107 | printUsage(Level.WARNING); 108 | System.exit(1); 109 | } 110 | } 111 | 112 | new ZipDoc(recursive, formatXml).transform(Paths.get(argv[argv.length - 1])); 113 | } 114 | 115 | /** 116 | * Reads the specified ZIP file and outputs 117 | * a textual representation of it to stdout. 118 | * 119 | * @param zipFile the ZIP file to convert to a text 120 | * @throws IOException if any input or output fails 121 | */ 122 | public void transform(final Path zipFile) throws IOException { 123 | 124 | try (ZipInputStream zipIn = new ZipInputStream(Files.newInputStream(zipFile))) { 125 | transform(zipIn, System.out); 126 | } 127 | } 128 | 129 | /** 130 | * Reads the specified ZIP document and outputs a textual representation 131 | * of its to the specified output stream. 132 | * 133 | * @param zipIn the ZIP document to convert to a text 134 | * @param output where the text gets written to 135 | * @throws IOException if any input or output fails 136 | */ 137 | public void transform(final ZipInputStream zipIn, final PrintStream output) 138 | throws IOException 139 | { 140 | final XmlFormatter xmlFormatter = new XmlFormatter(2, " ", true); 141 | final byte[] buffer = new byte[8192]; 142 | ZipEntry entry; 143 | final BufferedOutputStream uncompressedOutRaw = new BufferedOutputStream(); 144 | final CRC32 checkSum = new CRC32(); 145 | final CheckedOutputStream uncompressedOutChecked = new CheckedOutputStream(uncompressedOutRaw, checkSum); 146 | while ((entry = zipIn.getNextEntry()) != null) { 147 | uncompressedOutRaw.reset(); 148 | checkSum.reset(); 149 | 150 | output.println("Sub-file:\t" + entry); 151 | 152 | // Copy the file from zipIn into the uncompressed, check-summed output stream 153 | Utils.transferTo(zipIn, uncompressedOutChecked, buffer); 154 | zipIn.closeEntry(); 155 | 156 | final boolean isXml = Utils.isXml(entry.getName(), entry.getSize(), uncompressedOutRaw); 157 | if (formatXml && isXml) { 158 | // XML file: pretty-print the data to stdout 159 | xmlFormatter.prettify(uncompressedOutRaw.createInputStream(false), output, buffer); 160 | } else if (Utils.isPlainText(entry.getName(), entry.getSize(), uncompressedOutRaw) || isXml) { 161 | // Text file: dump directly to output 162 | uncompressedOutRaw.writeTo(output); 163 | } else if (Utils.isZip(entry.getName(), entry.getSize(), uncompressedOutRaw) 164 | && recursive) 165 | { 166 | // Zip: recursively uncompress to output 167 | output.println("Sub-ZIP start:\t" + entry.getName()); 168 | try (ZipInputStream zipInRec = new ZipInputStream( 169 | uncompressedOutRaw.createInputStream(false))) 170 | { 171 | transform(zipInRec, output); 172 | } 173 | output.println("Sub-ZIP end: \t" + entry.getName()); 174 | } else { 175 | // Unknown file type: report uncompressed size and CRC32 176 | output.println("File size:\t" + uncompressedOutRaw.size()); 177 | output.println("Checksum:\t" + Long.toHexString(checkSum.getValue())); 178 | } 179 | output.println(); 180 | } 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /scripts/rezipdoc-complete-test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright (c) 2019 Robin Vobruba 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | # SOFTWARE. 21 | 22 | # For info about this script, please refer to the `printUsage()` function below. 23 | 24 | # Exit immediately on each error and unset variable; 25 | # see: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ 26 | set -Eeuo pipefail 27 | #set -Eeu 28 | 29 | pwd_before="$(pwd)" 30 | this_script_file=$(basename "$0") 31 | script_name="$this_script_file" 32 | this_script_dir=$(cd "$(dirname "$0")"; pwd) 33 | 34 | # Settings and default values 35 | source_repo="." 36 | add_archive_bin="false" 37 | add_archive_src="true" 38 | num_commits_max=1000 39 | 40 | printUsage() { 41 | echo "$script_name - This script does:" 42 | echo "* create one or multiple repositories including archives" 43 | echo "* filters all of them with ReZip(Doc)" 44 | echo "* creates a report about the original and filtered repository sizes" 45 | echo "Think of it as a performance demo." 46 | echo 47 | echo "Usage:" 48 | echo " $script_name [OPTIONS]" 49 | echo 50 | echo "Options:" 51 | echo " -h, --help show this help message" 52 | echo " --no-src-archive do not add the sources ZIP to each commit" 53 | echo " --bin-archive add the binary archive (JAR) to each commit (NOTE this will take a long time)" 54 | echo " -m, --max-commits maximum number of commits to consider in each pass" 55 | echo " -s, --source [path|URL] the repo to read commits from" 56 | } 57 | 58 | # Handle command line arguments 59 | while [ $# -gt 0 ] 60 | do 61 | opName="$1" 62 | shift # skip argument 63 | case ${opName} in 64 | -h|--help) 65 | printUsage 66 | exit 0 67 | ;; 68 | --no-src-archive) 69 | add_archive_src="false" 70 | ;; 71 | --bin-archive) 72 | add_archive_bin="true" 73 | ;; 74 | -m|--max-commits) 75 | num_commits_max="$1" 76 | shift # past argument 77 | ;; 78 | -s|--source) 79 | source_repo="$1" 80 | shift # past argument 81 | ;; 82 | *) 83 | # unknown option / not an option 84 | >&2 echo "Unknown option '${opName}'!" 85 | printUsage 86 | exit 1 87 | ;; 88 | esac 89 | done 90 | 91 | if ! git ls-remote "$source_repo" > /dev/null 2> /dev/null 92 | then 93 | >&2 echo "Source repo is not a valid git repository: '$source_repo'!" 94 | exit 1 95 | fi 96 | 97 | rnd=$(od -A n -t d -N 1 /dev/urandom | tr -d ' ') 98 | tmp_repo="/tmp/rezipdoc-tmp-repo-${rnd}" 99 | archive_repo="/tmp/rezipdoc-archives-repo-${rnd}" 100 | filtered_repo="/tmp/rezipdoc-filtered-repo-${rnd}" 101 | 102 | echo "Source repo: '${source_repo}'" 103 | echo "Source repo (copy): '${tmp_repo}'" 104 | echo "Max commits: ${num_commits_max}" 105 | echo "Archives repo: '${archive_repo}'" 106 | echo "Filtered repo: '${filtered_repo}'" 107 | 108 | echo 109 | echo "Starting in 3 seconds ..." 110 | sleep 3 111 | echo 112 | 113 | echo 114 | echo "##############################" 115 | echo "# Creating archives repo ... #" 116 | echo "##############################" 117 | echo 118 | archives_extra_args="" 119 | if [ "$add_archive_src" = "false" ] 120 | then 121 | archives_extra_args="$archives_extra_args --no-src-archive" 122 | fi 123 | if [ "$add_archive_bin" = "true" ] 124 | then 125 | archives_extra_args="$archives_extra_args --bin-archive" 126 | fi 127 | if ! "$this_script_dir/rezipdoc-create-archives-repo.sh" \ 128 | --max-commits "${num_commits_max}" \ 129 | --source "${source_repo}" \ 130 | --target "${archive_repo}" \ 131 | --tmp "${tmp_repo}" \ 132 | ${archives_extra_args} 133 | then 134 | >&2 echo "Failed creating archives repo!" 135 | exit 1 136 | fi 137 | 138 | echo 139 | echo "##############################" 140 | echo "# Creating filtered repo ... #" 141 | echo "##############################" 142 | echo 143 | if ! "$this_script_dir/rezipdoc-history-filter.sh" \ 144 | --max-commits "${num_commits_max}" \ 145 | --source "${archive_repo}" \ 146 | --target "${filtered_repo}" 147 | then 148 | >&2 echo "Failed creating filtered repo!" 149 | exit 1 150 | fi 151 | 152 | echo 153 | echo "################################" 154 | echo "# Checking bare repo sizes ... #" 155 | echo "################################" 156 | echo 157 | 158 | _git_compact() { 159 | 160 | rm -rf .git/refs/original/ 161 | git reflog expire --expire=now --all 162 | git gc --prune=now --aggressive 163 | } 164 | 165 | create_bare_repo() { 166 | 167 | orig_repo="$1" 168 | bare_repo="$2" 169 | 170 | git clone --bare "${orig_repo}" "${bare_repo}" 171 | cd "${bare_repo}" 172 | _git_compact 173 | } 174 | 175 | check_git_repo_size() { 176 | 177 | orig_repo="$1" 178 | bare_repo="/tmp/rezipdoc-test-$(basename "$orig_repo")-bare-$rnd" 179 | 180 | create_bare_repo "${orig_repo}" "${bare_repo}" 181 | 182 | du=/usr/bin/du 183 | repo_size_human=$(${du} -sh "${bare_repo}" | sed 's/[ \t].*//') 184 | repo_size_apparent=$(${du} -sb "${bare_repo}" | sed 's/[ \t].*//') 185 | repo_size_raw=$(${du} -s "${bare_repo}" | sed 's/[ \t].*//') 186 | 187 | rm -Rf "${bare_repo}" 188 | 189 | printf "%s\t%s\t%s\n" "$repo_size_human" "$repo_size_raw" "$repo_size_apparent" 190 | } 191 | 192 | size_archive=$(check_git_repo_size "${archive_repo}") 193 | size_filtered=$(check_git_repo_size "${filtered_repo}") 194 | 195 | cd "$pwd_before" 196 | 197 | echo 198 | echo "###########" 199 | echo "# Summary #" 200 | echo "###########" 201 | echo 202 | echo "Source repo: '${source_repo}'" 203 | echo "Source repo (copy): '${tmp_repo}'" 204 | echo "Max commits: ${num_commits_max}" 205 | echo "Archives repo: '${archive_repo}'" 206 | echo "Filtered repo: '${filtered_repo}'" 207 | echo "Archives repo size: ${size_archive}" 208 | echo "Filtered repo size: ${size_filtered}" 209 | -------------------------------------------------------------------------------- /src/test/java/io/github/hoijui/rezipdoc/BinaryUtilTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2020, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertTrue; 24 | 25 | import java.io.ByteArrayInputStream; 26 | import java.io.IOException; 27 | import java.util.logging.Handler; 28 | import java.util.logging.Level; 29 | import java.util.logging.Logger; 30 | 31 | /** 32 | * @see BinaryUtil 33 | */ 34 | public class BinaryUtilTest { 35 | 36 | private static final String TEST_MANIFEST 37 | = "-By: hoijui\n" 38 | + "Bnd-LastModified: 1584876314307\n" 39 | + "Build-Jdk: 1.8.0_151\n" 40 | + "Bundle-Description: A Git filter and textconv for converting ZIP based b\n" 41 | + " inary files to an uncompressed version of themselves, which works bet\n" 42 | + " ter with gits delta-compression and diffs\n" 43 | + "Bundle-License: http://www.gnu.org/licenses/gpl-3.0.html\n" 44 | + "Bundle-ManifestVersion: 2\n" 45 | + "Bundle-Name: ReZipDoc\n" 46 | + "Bundle-SymbolicName: io.github.hoijui.rezipdoc\n" 47 | + "Bundle-Version: 0.5.0.SNAPSHOT\n" 48 | + "Created-By: Apache Maven Bundle Plugin\n" 49 | + "Export-Package: io.github.hoijui.rezipdoc;uses:=\"javax.xml.parsers,javax\n" 50 | + " .xml.transform,javax.xml.xpath,org.xml.sax\";version=\"0.5.0\"\n" 51 | + "Import-Package: javax.xml.namespace,javax.xml.parsers,javax.xml.transfor\n" 52 | + " m,javax.xml.transform.dom,javax.xml.transform.stream,javax.xml.xpath,or\n" 53 | + " g.w3c.dom,org.xml.sax\n" 54 | + "Require-Capability: osgi.ee;filter:=\"(&(osgi.ee=JavaSE)(version=1.8))\"\n" 55 | + "Manifest-Version: 1.0\n" 56 | + "Tool: Bnd-2.4.1.201501161923\n"; 57 | 58 | private Logger nullLogger() { 59 | 60 | final Logger logger = Utils.getLogger(BinaryUtilTest.class.getName()); 61 | logger.setUseParentHandlers(false); 62 | for (final Handler handler : logger.getHandlers()) { 63 | logger.removeHandler(handler); 64 | } 65 | 66 | return logger; 67 | } 68 | 69 | private void testDefaultNoError(final BinaryUtil binaryUtil) throws IOException { 70 | 71 | try { 72 | final Logger logger = nullLogger(); 73 | logger.setLevel(Level.FINE); 74 | if (logger.isLoggable(Level.INFO)) { 75 | logger.info(binaryUtil.createLibrarySummary()); 76 | if (logger.isLoggable(Level.FINE)) { 77 | logger.fine(binaryUtil.createManifestPropertiesString(true)); 78 | } 79 | } 80 | } catch (final Throwable thr) { 81 | assertTrue("Default manifest file parsing and logging failed: " + thr.getMessage(), false); 82 | } 83 | } 84 | 85 | @Test 86 | public void testDefaultNoError() throws IOException { 87 | 88 | final BinaryUtil binaryUtil = new BinaryUtil(new ByteArrayInputStream(TEST_MANIFEST.getBytes())); 89 | 90 | testDefaultNoError(binaryUtil); 91 | } 92 | 93 | @Test 94 | public void testDefaultNoErrorJar() throws IOException { 95 | 96 | final BinaryUtil binaryUtil = new BinaryUtil(); 97 | 98 | testDefaultNoError(binaryUtil); 99 | } 100 | 101 | @Test 102 | public void testLibrarySummary() throws IOException { 103 | 104 | final BinaryUtil binaryUtil = new BinaryUtil(new ByteArrayInputStream(TEST_MANIFEST.getBytes())); 105 | 106 | final String expectedLibrarySummary = "\nName: ReZipDoc\nDescription: A Git filter and textconv for converting ZIP based binary files to an uncompressed version of themselves, which works better with gits delta-compression and diffs\nVersion: 0.5.0.SNAPSHOT\nLicense: http://www.gnu.org/licenses/gpl-3.0.html"; 107 | final String actualLibrarySummary = binaryUtil.createLibrarySummary(); 108 | assertEquals(expectedLibrarySummary, actualLibrarySummary); 109 | } 110 | 111 | @Test 112 | public void testLibrarySummaryEmpty() throws IOException { 113 | 114 | final BinaryUtil binaryUtil = new BinaryUtil(new ByteArrayInputStream("".getBytes())); 115 | 116 | final String expectedLibrarySummary = "\nName: \nDescription: \nVersion: \nLicense: "; 117 | final String actualLibrarySummary = binaryUtil.createLibrarySummary(); 118 | assertEquals(expectedLibrarySummary, actualLibrarySummary); 119 | } 120 | 121 | @Test 122 | public void testManifestProperties() throws IOException { 123 | 124 | final BinaryUtil binaryUtil = new BinaryUtil(new ByteArrayInputStream(TEST_MANIFEST.getBytes())); 125 | 126 | final String expectedManifestProperties 127 | = " -By -> \"hoijui\"\n" 128 | + " Bnd-LastModified -> \"1584876314307\"\n" 129 | + " Build-Jdk -> \"1.8.0_151\"\n" 130 | + " Bundle-Description -> \"A Git filter and textconv for converting ZIP based binary files to an uncompressed version of themselves, which works better with gits delta-compression and diffs\"\n" 131 | + " Bundle-License -> \"http://www.gnu.org/licenses/gpl-3.0.html\"\n" 132 | + " Bundle-ManifestVersion -> \"2\"\n" 133 | + " Bundle-Name -> \"ReZipDoc\"\n" 134 | + " Bundle-SymbolicName -> \"io.github.hoijui.rezipdoc\"\n" 135 | + " Bundle-Version -> \"0.5.0.SNAPSHOT\"\n" 136 | + " Created-By -> \"Apache Maven Bundle Plugin\"\n" 137 | + " Export-Package -> \"io.github.hoijui.rezipdoc;uses:=\"javax.xml.parsers,javax.xml.transform,javax.xml.xpath,org.xml.sax\";version=\"0.5.0\"\"\n" 138 | + " Import-Package -> \"javax.xml.namespace,javax.xml.parsers,javax.xml.transform,javax.xml.transform.dom,javax.xml.transform.stream,javax.xml.xpath,org.w3c.dom,org.xml.sax\"\n" 139 | + " Manifest-Version -> \"1.0\"\n" 140 | + " Require-Capability -> \"osgi.ee;filter:=\"(&(osgi.ee=JavaSE)(version=1.8))\"\"\n" 141 | + " Tool -> \"Bnd-2.4.1.201501161923\"\n"; 142 | final String actualManifestProperties = binaryUtil.createManifestPropertiesString(true); 143 | assertEquals(expectedManifestProperties, actualManifestProperties); 144 | } 145 | 146 | @Test 147 | public void testManifestPropertiesEmpty() throws IOException { 148 | 149 | final BinaryUtil binaryUtil = new BinaryUtil(new ByteArrayInputStream("".getBytes())); 150 | 151 | final String expectedManifestProperties = ""; 152 | final String actualManifestProperties = binaryUtil.createManifestPropertiesString(false); 153 | assertEquals(expectedManifestProperties, actualManifestProperties); 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /scripts/rezipdoc-create-archives-repo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright (c) 2019 Robin Vobruba 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | # SOFTWARE. 21 | 22 | # For info about this script, please refer to the `printUsage()` function below. 23 | 24 | # Exit immediately on each error and unset variable; 25 | # see: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ 26 | set -Eeuo pipefail 27 | #set -Eeu 28 | 29 | pwd_before=$(pwd) 30 | this_script_file=$(basename "$0") 31 | script_name="$this_script_file" 32 | this_script_dir=$(cd "$(dirname "$0")"; pwd) 33 | 34 | # Settings and default values 35 | add_archive_bin="false" 36 | add_archive_src="true" 37 | num_commits_max=1000 38 | branch=master 39 | source_repo=$(cd "${this_script_dir}"; cd ..; pwd) 40 | rnd=$(od -A n -t d -N 1 /dev/urandom | tr -d ' ') 41 | target_repo="/tmp/rezipdoc-archives-repo-${rnd}" 42 | # We use this one for building the binaries 43 | tmp_repo="/tmp/rezipdoc-tmp-repo-${rnd}" 44 | 45 | printUsage() { 46 | echo "$script_name - This creates a git repository with a lot of archives as content," 47 | echo "focusing on archives that contain mostly plain-text content that changes" 48 | echo "non-radically over time." 49 | echo 50 | echo "Practically, it will build the main JAR of this project for every commit" 51 | echo "of this repo, plus some text files, and commit each change, if there was one." 52 | echo 53 | echo "Usage:" 54 | echo " $script_name [OPTIONS]" 55 | echo 56 | echo "Options:" 57 | echo " -h, --help show this help message" 58 | echo " --no-src-archive do not add the sources ZIP to each commit" 59 | echo " --bin-archive add the binary archive (JAR) to each commit (NOTE this will take a long time)" 60 | echo " -m, --max-commits maximum number of commits to transcribe into the new repo" 61 | echo " -s, --source [path|URL] the repo to transcribe from" 62 | echo " -t, --target [path] the repo to transcribe to" 63 | echo " --tmp [path] the repo to use for temporary checkout and binary building" 64 | } 65 | 66 | # Handle command line arguments 67 | while [ $# -gt 0 ] 68 | do 69 | opName="$1" 70 | case ${opName} in 71 | -h|--help) 72 | printUsage 73 | exit 0 74 | ;; 75 | --no-src-archive) 76 | add_archive_src="false" 77 | ;; 78 | --bin-archive) 79 | add_archive_bin="true" 80 | ;; 81 | -m|--max-commits) 82 | num_commits_max=$2 83 | shift # past argument 84 | ;; 85 | -s|--source) 86 | source_repo="$2" 87 | shift # past argument 88 | ;; 89 | -t|--target) 90 | target_repo="$2" 91 | shift # past argument 92 | ;; 93 | --tmp) 94 | tmp_repo="$2" 95 | shift # past argument 96 | ;; 97 | *) 98 | # unknown option / not an option 99 | >&2 echo "Unknown option '${opName}'!" 100 | printUsage 101 | exit 1 102 | ;; 103 | esac 104 | shift # next argument or value 105 | done 106 | 107 | if [ "$add_archive_bin" != "true" ] && [ "$add_archive_src" != "true" ] 108 | then 109 | >&2 echo "Please include at least one of binary and source archive!" 110 | exit 1 111 | fi 112 | 113 | if ! git ls-remote "$source_repo" > /dev/null 2> /dev/null 114 | then 115 | >&2 echo "Source repo is not a valid git repository: '$source_repo'!" 116 | exit 1 117 | fi 118 | 119 | if [ -e "$tmp_repo" ] 120 | then 121 | >&2 echo "Temporary repo can not be an existing path: '$tmp_repo'!" 122 | exit 1 123 | fi 124 | 125 | if [ "$source_repo" = "$target_repo" ] 126 | then 127 | >&2 echo "Source and target repos can not be equal!" 128 | exit 1 129 | fi 130 | 131 | if [ -e "$target_repo" ] 132 | then 133 | >&2 echo "Target repo can not be an existing path: '$target_repo'!" 134 | exit 1 135 | fi 136 | 137 | echo "Source repo: '${source_repo}'" 138 | echo "Tmp-checkout repo: '${tmp_repo}'" 139 | echo "Max commits: ${num_commits_max}" 140 | echo "Target repo: '${target_repo}'" 141 | 142 | git clone "$source_repo" "$tmp_repo" 143 | 144 | mkdir "$target_repo" 145 | cd "$target_repo" 146 | git init 147 | # This disables the global git-ignore file, which might otherwise prevent us 148 | # from adding binaries (like archives). 149 | git config core.excludesfile 'some-file-that-does-not-exist' 150 | 151 | cd "$tmp_repo" 152 | 153 | num_commits=$(git log -${num_commits_max} --format="%H" --reverse origin/${branch} | wc -l) 154 | i=0 155 | for commit_hash in $(git log -${num_commits_max} --format="%H" --reverse origin/${branch}) 156 | do 157 | i=$((i + 1)) 158 | echo 159 | echo "############################################################" 160 | echo "Building commit ${i}/${num_commits} - ${commit_hash} ..." 161 | echo 162 | 163 | cd "$tmp_repo" 164 | git checkout "$commit_hash" 165 | if [ "${add_archive_bin}" = "true" ] 166 | then 167 | rm -f target/*.jar 168 | mvn package -DskipTests 169 | fi 170 | commit_msg=$(git log -1 --format="ARCH - %s%n%n orig=%h%n%n%b" "$commit_hash") 171 | 172 | cd "$target_repo" 173 | find . -type f | grep -v "\.git" | xargs rm -Rf 174 | 175 | # Add some Project-global text files/sources 176 | cp "$tmp_repo/README"* ./ 2> /dev/null 177 | cp "$tmp_repo/LICENSE"* ./ 2> /dev/null 178 | cp "$tmp_repo/pom.xml" ./ 2> /dev/null 179 | cp -r "$tmp_repo/src"* ./ 180 | 181 | # Add archive(s) 182 | if [ "$add_archive_bin" = "true" ] 183 | then 184 | # uncompressing this is probably less interesting/useful, 185 | # because it contains mainly class files, which are binary, 186 | # and thus might not play much nicer with git 187 | # then the compressed archive 188 | cp "$tmp_repo/target/"*".jar" ./ 189 | fi 190 | if [ "$add_archive_src" = "true" ] 191 | then 192 | # As this probably contains mostly text files, 193 | # it should play much nicer with git when uncompressed. 194 | (cd "$tmp_repo"; zip --quiet -r "$target_repo/src.zip" src) 195 | fi 196 | 197 | git add --all --force 198 | git commit -m "${commit_msg}" 199 | 200 | cd "$tmp_repo" 201 | echo "############################################################" 202 | echo 203 | done 204 | 205 | # Make sure a potential global value might be used again 206 | git config --unset core.excludesfile 207 | 208 | echo "Source repo: '${source_repo}'" 209 | echo "Tmp-checkout repo: '${tmp_repo}'" 210 | echo "Max commits: ${num_commits_max}" 211 | echo "Target repo: '${target_repo}'" 212 | 213 | cd "$pwd_before" 214 | -------------------------------------------------------------------------------- /src/test/java/io/github/hoijui/rezipdoc/XmlFormatterTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import org.hamcrest.CoreMatchers; 21 | import org.hamcrest.Matcher; 22 | import org.hamcrest.MatcherAssert; 23 | import org.junit.Assert; 24 | import org.junit.Rule; 25 | import org.junit.Test; 26 | import org.junit.contrib.java.lang.system.ExpectedSystemExit; 27 | import org.junit.contrib.java.lang.system.SystemOutRule; 28 | import org.junit.contrib.java.lang.system.TextFromStandardInputStream; 29 | 30 | import java.io.ByteArrayOutputStream; 31 | import java.io.File; 32 | import java.io.IOException; 33 | import java.io.InputStream; 34 | import java.io.PrintStream; 35 | import java.nio.charset.StandardCharsets; 36 | import java.nio.file.Files; 37 | 38 | /** 39 | * @see XmlFormatter 40 | */ 41 | public class XmlFormatterTest { 42 | 43 | @Rule 44 | public final ExpectedSystemExit exit = ExpectedSystemExit.none(); 45 | @Rule 46 | public final TextFromStandardInputStream systemInMock 47 | = TextFromStandardInputStream.emptyStandardInputStream(); 48 | @Rule 49 | public final SystemOutRule systemOutRule 50 | = new SystemOutRule().mute().enableLog(); 51 | 52 | 53 | // TODO Use System.lineSeparator(); 54 | 55 | private void testStringPrettyPrint(final String input, final String expected) throws IOException { 56 | 57 | final String actual = new XmlFormatter().prettify(input); 58 | Assert.assertEquals(expected, actual); 59 | } 60 | 61 | private void testRoughStringPrettyPrint(final String input, final String expected) throws IOException { 62 | 63 | final String actual = new XmlFormatter(2, " ", false).prettify(input); 64 | Assert.assertEquals(expected, actual); 65 | } 66 | 67 | private File createTempFile(final String nameBase, final String content) throws IOException { 68 | 69 | final File file = File.createTempFile(nameBase, ".xml"); 70 | file.deleteOnExit(); 71 | try (PrintStream out = new PrintStream(Files.newOutputStream(file.toPath()))) { 72 | out.print(content); 73 | } 74 | return file; 75 | } 76 | 77 | private void testPrettyPrint(final String input, final String expected) throws IOException { 78 | 79 | testPrettyPrintFiles(input, expected); 80 | testPrettyPrintFileAndStream(input, expected); 81 | testPrettyPrintStreams(input, expected); 82 | } 83 | 84 | private void testPrettyPrintFiles(final String input, final String expected) throws IOException { 85 | 86 | final File xmlInFile = createTempFile("rezipdoc-unformatted-in", input); 87 | final File xmlOutFile = createTempFile("rezipdoc-unformatted-out", ""); 88 | 89 | XmlFormatter.main(new String[] { 90 | "--input", xmlInFile.getAbsolutePath(), 91 | "--output", xmlOutFile.getAbsolutePath() }); 92 | 93 | try (InputStream resultIn = Files.newInputStream(xmlOutFile.toPath())) { 94 | final String actual = Utils.readStreamToString(resultIn); 95 | 96 | Assert.assertEquals(expected, actual); 97 | } 98 | } 99 | 100 | private void testPrettyPrintFileAndStream(final String input, final String expected) throws IOException { 101 | 102 | final File xmlInFile = createTempFile("rezipdoc-unformatted-in", input); 103 | 104 | systemOutRule.clearLog(); 105 | XmlFormatter.main(new String[] { "--input", xmlInFile.getAbsolutePath() }); 106 | final String actual = systemOutRule.getLog(); 107 | 108 | Assert.assertEquals(expected, actual); 109 | } 110 | 111 | private void testPrettyPrintStreams(final String input, final String expected) { 112 | 113 | systemInMock.provideLines(input); 114 | systemOutRule.clearLog(); 115 | XmlFormatter.main(new String[] {}); 116 | final String actual = systemOutRule.getLog(); 117 | 118 | Assert.assertEquals(expected, actual); 119 | } 120 | 121 | private static String toString(final ByteArrayOutputStream buffer) { 122 | return new String(buffer.toByteArray(), StandardCharsets.UTF_8); 123 | } 124 | 125 | @Test 126 | public void testHelp() throws IOException { 127 | 128 | final Matcher helpMatchers = CoreMatchers.allOf( 129 | CoreMatchers.startsWith(XmlFormatter.class.getSimpleName()), 130 | CoreMatchers.containsString("License:"), 131 | CoreMatchers.containsString("Usage:"), 132 | CoreMatchers.containsString("Examples:")); 133 | 134 | try (ByteArrayOutputStream outBuffer = new ByteArrayOutputStream()) { 135 | Utils.getLogHandler().setOutputStream(outBuffer); 136 | XmlFormatter.main(new String[] { "-h" }); 137 | MatcherAssert.assertThat(toString(outBuffer), helpMatchers); 138 | 139 | outBuffer.reset(); 140 | XmlFormatter.main(new String[] { "--help" }); 141 | MatcherAssert.assertThat(toString(outBuffer), helpMatchers); 142 | } finally { 143 | Utils.getLogHandler().setOutputStream(System.err); 144 | } 145 | } 146 | 147 | @Test 148 | public void testBadArguments() throws IOException { 149 | 150 | exit.expectSystemExitWithStatus(1); 151 | try (BufferedOutputStream outBuffer = new BufferedOutputStream()) { 152 | Utils.getLogHandler().setOutputStream(outBuffer); 153 | XmlFormatter.main(new String[] { "file1", "file2", "file3" }); 154 | } finally { 155 | Utils.getLogHandler().setOutputStream(System.err); 156 | } 157 | } 158 | 159 | @Test 160 | public void testSingleTag() throws IOException { 161 | 162 | testStringPrettyPrint("", 163 | "\n\n"); 164 | } 165 | 166 | @Test 167 | public void testBeginAndEndTag() throws IOException { 168 | 169 | testStringPrettyPrint("", 170 | "\n\n"); 171 | } 172 | 173 | @Test 174 | public void testBeginMiddleAndEndTag() throws IOException { 175 | 176 | testStringPrettyPrint("", 177 | "\n\n \n\n"); 178 | } 179 | 180 | @Test 181 | public void testFileSingleTag() throws IOException { 182 | 183 | testPrettyPrint("", 184 | "\n\n"); 185 | } 186 | 187 | @Test 188 | public void testFileBeginAndEndTag() throws IOException { 189 | 190 | testPrettyPrint("", 191 | "\n\n"); 192 | } 193 | 194 | @Test 195 | public void testFileBeginMiddleAndEndTag() throws IOException { 196 | 197 | testPrettyPrint("", 198 | "\n\n \n\n"); 199 | } 200 | 201 | @Test 202 | public void testRoughSingleTag() throws IOException { 203 | 204 | testRoughStringPrettyPrint("", 205 | "\n"); 206 | } 207 | 208 | @Test 209 | public void testRoughBeginAndEndTag() throws IOException { 210 | 211 | testRoughStringPrettyPrint("", 212 | "\n\n"); 213 | } 214 | @Test 215 | public void testRoughBeginMiddleAndEndTag() throws IOException { 216 | 217 | testRoughStringPrettyPrint("", 218 | "\n \n\n"); 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /src/main/java/io/github/hoijui/rezipdoc/BinaryUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import java.io.BufferedReader; 21 | import java.io.IOException; 22 | import java.io.InputStream; 23 | import java.io.InputStreamReader; 24 | import java.nio.charset.StandardCharsets; 25 | import java.util.LinkedList; 26 | import java.util.List; 27 | import java.util.Map; 28 | import java.util.Properties; 29 | import java.util.Set; 30 | import java.util.TreeMap; 31 | import java.util.Map.Entry; 32 | import java.util.stream.Stream; 33 | 34 | /** 35 | * A global hub, providing general information about this software. 36 | * The information available here comes either from the libraries manifest file at 37 | * "{JAR_ROOT}/META-INF/MANIFEST.MF", or is fetched directly from code inside this software. 38 | */ 39 | public final class BinaryUtil { 40 | 41 | private static final String MANIFEST_FILE = "/META-INF/MANIFEST.MF"; 42 | @SuppressWarnings("WeakerAccess") 43 | public static final String UNKNOWN_VALUE = ""; 44 | private static final char MANIFEST_CONTINUATION_LINE_INDICATOR = ' '; 45 | /** 1 key + 1 value = 2 parts of a key-value pair */ 46 | private static final int KEY_PLUS_VALUE_COUNT = 2; 47 | private final Properties manifestProperties; 48 | 49 | @SuppressWarnings("WeakerAccess") 50 | public BinaryUtil() throws IOException { 51 | 52 | this.manifestProperties = readJarManifest(); 53 | } 54 | 55 | @SuppressWarnings("WeakerAccess") 56 | public BinaryUtil(final InputStream manifestIn) throws IOException { 57 | 58 | this.manifestProperties = parseManifestFile(manifestIn); 59 | } 60 | 61 | private static Properties parseManifestFile(final InputStream manifestIn) throws IOException { 62 | 63 | try (BufferedReader manifestBufferedIn = new BufferedReader( 64 | new InputStreamReader(manifestIn, StandardCharsets.UTF_8))) 65 | { 66 | final Stream manifestLines = manifestBufferedIn.lines(); 67 | return parseManifestLines(manifestLines); 68 | } 69 | } 70 | 71 | /** 72 | * Filters out empty lines and comments. 73 | * NOTE Is there really a comment syntax defined for manifest files? 74 | */ 75 | private static boolean isContentManifestLine(final String manifestLine) { 76 | return !Utils.isBlank(manifestLine) && !manifestLine.startsWith("[#%]"); 77 | } 78 | 79 | /** 80 | * Figures out whether a manifest line is continuation of a previous one. 81 | * @param manifestLine to be checked 82 | * @return true if the suppleid line is a continuation line, false otherwise 83 | */ 84 | private static boolean isContinuationManifestLine(final String manifestLine) { 85 | return manifestLine.charAt(0) == MANIFEST_CONTINUATION_LINE_INDICATOR; 86 | } 87 | 88 | /** 89 | * Collects a list of Strings, each a single, complete manifest key+value pair. 90 | * @param manifestLines the raw manifest lines, including comments, empty lines and continuation lines 91 | * @return a list of Strings, each one (supposedly) containing a single, complete key+value pair. 92 | */ 93 | private static List collectKeyValueStrings(final Stream manifestLines) { 94 | 95 | final List manifestProps = new LinkedList<>(); 96 | 97 | final StringBuilder currentProp = new StringBuilder(80); 98 | // NOTE one property can be specified on multiple lines. 99 | // This is done by prepending all but the first line with white-space, for example: 100 | // "My-Key: hello, this is my very long property value, which is sp" 101 | // " lit over multiple lines, and because we also want to show the " 102 | // " third line, we write a little more." 103 | manifestLines.forEach(manifestLine -> { 104 | if (isContentManifestLine(manifestLine)) { 105 | if (isContinuationManifestLine(manifestLine)) { 106 | // remove the initial MANIFEST_CONTINUATION_LINE_INDICATOR 107 | // and add the remainder to the already read value 108 | currentProp.append(manifestLine.substring(1)); 109 | } else { 110 | // store the previous key+value, if there was one 111 | if (currentProp.length() > 0) { 112 | manifestProps.add(currentProp.toString()); 113 | } 114 | currentProp.setLength(0); 115 | currentProp.append(manifestLine); 116 | } 117 | } 118 | }); 119 | if (currentProp.length() > 0) { 120 | manifestProps.add(currentProp.toString()); 121 | } 122 | 123 | return manifestProps; 124 | } 125 | 126 | /** 127 | * Parses a list of manifest files into a set of properties. 128 | * The manifest lines might be directly read from a file, 129 | * like MANIFEST_FILE. 130 | * @param manifestLines to be parsed into properties 131 | * @return the parsed properties 132 | */ 133 | private static Properties parseManifestLines(final Stream manifestLines) { 134 | 135 | final Properties manifestProps = new Properties(); 136 | 137 | for (final String manifestProp : collectKeyValueStrings(manifestLines)) { 138 | // then (start to) parse the next one 139 | final String[] keyAndValue = manifestProp.split(": ", KEY_PLUS_VALUE_COUNT); 140 | if (keyAndValue.length < KEY_PLUS_VALUE_COUNT) { 141 | throw new IllegalArgumentException("Invalid manifest entry: \"" + manifestProp + '"'); 142 | } 143 | manifestProps.setProperty(keyAndValue[0], keyAndValue[1]); 144 | } 145 | 146 | return manifestProps; 147 | } 148 | 149 | private static Properties readJarManifest() throws IOException { 150 | 151 | Properties mavenProps; 152 | 153 | try (InputStream manifestFileIn = BinaryUtil.class.getResourceAsStream(MANIFEST_FILE)) { 154 | if (manifestFileIn == null) { 155 | throw new IOException("Failed locating resource in the classpath: " + MANIFEST_FILE); 156 | } 157 | mavenProps = parseManifestFile(manifestFileIn); 158 | } 159 | 160 | return mavenProps; 161 | } 162 | 163 | /** 164 | * Returns this application JARs {@link #MANIFEST_FILE} properties. 165 | * @return the contents of the manifest file as {@code String} to {@code String} mapping 166 | */ 167 | @SuppressWarnings("WeakerAccess") 168 | public Properties getManifestProperties() { 169 | return manifestProperties; 170 | } 171 | 172 | public String getVersion() { 173 | return getManifestProperties().getProperty("Bundle-Version", UNKNOWN_VALUE); 174 | } 175 | 176 | public String getLicense() { 177 | return getManifestProperties().getProperty("Bundle-License", UNKNOWN_VALUE); 178 | } 179 | 180 | @SuppressWarnings("unused") 181 | public String createManifestPropertiesString(final boolean sorted) { 182 | 183 | final StringBuilder info = new StringBuilder(1024); 184 | 185 | final Set> propertiesSet; 186 | if (sorted) { 187 | propertiesSet = new TreeMap<>(getManifestProperties()).entrySet(); 188 | } else { 189 | propertiesSet = getManifestProperties().entrySet(); 190 | } 191 | for (final Map.Entry manifestEntry : propertiesSet) { 192 | final String key = (String) manifestEntry.getKey(); 193 | final String value = (String) manifestEntry.getValue(); 194 | info 195 | .append(String.format("%32s", key)) 196 | .append(" -> \"") 197 | .append(value) 198 | .append("\"\n"); 199 | } 200 | 201 | return info.toString(); 202 | } 203 | 204 | @SuppressWarnings("WeakerAccess") 205 | public String createLibrarySummary() { 206 | 207 | final StringBuilder summary = new StringBuilder(1024); 208 | 209 | summary 210 | .append("\nName: ").append(getManifestProperties().getProperty("Bundle-Name", UNKNOWN_VALUE)) 211 | .append("\nDescription: ").append(getManifestProperties().getProperty("Bundle-Description", UNKNOWN_VALUE)) 212 | .append("\nVersion: ").append(getVersion()) 213 | .append("\nLicense: ").append(getLicense()); 214 | 215 | return summary.toString(); 216 | } 217 | } 218 | -------------------------------------------------------------------------------- /src/main/resources/checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 33 | 34 | 35 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | -------------------------------------------------------------------------------- /scripts/rezipdoc-scripts-tool.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright (c) 2019 Robin Vobruba 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | # SOFTWARE. 21 | 22 | # For info about this script, please refer to the `printUsage()` function below. 23 | 24 | # Exit immediately on each error and unset variable; 25 | # see: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ 26 | set -Eeuo pipefail 27 | #set -Eeu 28 | 29 | pwd_before="$(pwd)" 30 | this_script_file=$(basename "$0") 31 | script_name="$this_script_file" 32 | this_script_dir=$(cd "$(dirname "$0")"; pwd) 33 | 34 | # Settings and default values 35 | action="" 36 | dry_prefix="" 37 | scripts_base_url="https://raw.githubusercontent.com/hoijui/ReZipDoc" 38 | # We can parse the latest released version from this 39 | metadata_url="https://repo1.maven.org/maven2/io/github/hoijui/rezipdoc/rezipdoc/maven-metadata.xml" 40 | scripts_install_dir="$HOME/bin" 41 | script_names='rezipdoc-repo-tool.sh rezipdoc-history-filter.sh' 42 | # Whether to use latest development scripts, or the stable (last release) versions 43 | enable_development="false" 44 | enable_path="false" 45 | java_pkg="io.github.hoijui.rezipdoc" 46 | maven_group="$java_pkg" 47 | maven_artifact="rezipdoc" 48 | fetch_url="https://repository.sonatype.org/service/local/artifact/maven/redirect?r=central-proxy&g=${maven_group}&a=${maven_artifact}&v=LATEST" 49 | 50 | printUsage() { 51 | echo "$script_name - This installs (or removes) the ReZipDoc helper shell scripts" 52 | echo "to the local directory ~/bin/." 53 | echo 54 | echo "See the ReZipDoc README for further info." 55 | echo 56 | echo "NOTE This is really only required for developers, hacking on this code-base." 57 | echo 58 | echo "Usage:" 59 | echo " $script_name ACTION [OPTIONS]" 60 | echo 61 | echo "Actions:" 62 | echo " -h, --help show this help message" 63 | echo " install install the latest (stable or development) versions of the scripts," 64 | echo " if none are installed yet" 65 | echo " remove remove the local scripts" 66 | echo " update install the latest (stable or development) versions of the scripts" 67 | echo " check check whether the scripts are installed" 68 | echo 69 | echo "Options:" 70 | echo " --dev (install|update) when installing or updating, install the latest dev scripts," 71 | echo " instead of stable" 72 | echo " --dry (install|remove|update) show what would be done, instead of actually doing anything" 73 | echo " --path (install) add the install directory to PATH in the current shell and after reboot" 74 | } 75 | 76 | set_action() { 77 | new_action="$1" 78 | if [ "$action" != "" ] 79 | then 80 | >&2 echo "You may only specify one action!" 81 | printUsage 82 | exit 1 83 | fi 84 | action="$new_action" 85 | } 86 | 87 | # Handle command line arguments 88 | while [ $# -gt 0 ] 89 | do 90 | option="$1" 91 | case ${option} in 92 | -h|--help) 93 | printUsage 94 | exit 0 95 | ;; 96 | install) 97 | set_action "install" 98 | ;; 99 | remove) 100 | set_action "remove" 101 | ;; 102 | update) 103 | set_action "update" 104 | ;; 105 | check) 106 | set_action "check" 107 | ;; 108 | --dev) 109 | if [ "$action" = "remove" ] || [ "$action" = "check" ] 110 | then 111 | >&2 echo "Action '$action' does not support '--dev'." 112 | exit 2 113 | fi 114 | enable_development="true" 115 | ;; 116 | --dry) 117 | if [ "$action" = "check" ] 118 | then 119 | >&2 echo "Action '$action' does not support '--dry'" 120 | exit 2 121 | fi 122 | dry_prefix="echo" 123 | ;; 124 | --path) 125 | if [ "$action" = "remove" ] || [ "$action" = "update" ] 126 | then 127 | >&2 echo "Action '$action' does not support '--path'" 128 | exit 2 129 | fi 130 | enable_path="true" 131 | ;; 132 | *) 133 | # unknown option / not an option 134 | >&2 echo "Unknown option '${option}'!" 135 | printUsage 136 | exit 1 137 | ;; 138 | esac 139 | shift # next argument or value 140 | done 141 | 142 | 143 | if [ "$action" = "" ] 144 | then 145 | >&2 echo "No action defined!" 146 | printUsage 147 | exit 1 148 | fi 149 | if [ "$action" = "update" ] 150 | then 151 | update_args="" 152 | if [ "$dry_prefix" != "" ] 153 | then 154 | update_args="$update_args --dry" 155 | fi 156 | 157 | install_args="" 158 | if [ "$enable_development" = "true" ] 159 | then 160 | install_args="$install_args --dev" 161 | fi 162 | 163 | # Call ourselves recursively 164 | $0 remove ${update_args} \ 165 | && $0 install ${update_args} ${install_args} 166 | exit $? 167 | fi 168 | 169 | # If we got this far, it means that 'action' is set to 'check|install|remove' 170 | 171 | extra_info="" 172 | if [ "$action" = "install" ] 173 | then 174 | if [ "$enable_development" = "true" ] 175 | then 176 | version="master" 177 | revision="$version" 178 | else 179 | version=$(curl -s "$metadata_url" | grep '' | sed 's/.*//' | sed 's/<\/latest>.*//') 180 | revision="rezipdoc-$version" 181 | fi 182 | extra_info="$extra_info (version: $version)" 183 | fi 184 | 185 | echo "$script_name action: ${action}ing$extra_info ..." 186 | 187 | exit_state=0 188 | 189 | dir_in_path() { 190 | dir="$1" 191 | ret=1:was 192 | case :$PATH: in 193 | *:${dir}:*) ret=0 194 | esac 195 | return ${ret} 196 | } 197 | 198 | # global checks 199 | if [ "$action" = "check" ] && [ "$enable_path" = "true" ] 200 | then 201 | if [ -e "$scripts_install_dir" ] 202 | then 203 | echo "install directory exists: '$scripts_install_dir'" 204 | else 205 | echo "install directory does not exist: '$scripts_install_dir'" 206 | exit_state=1 207 | fi 208 | if dir_in_path "$scripts_install_dir" 209 | then 210 | echo "install directory is in PATH: '$scripts_install_dir'" 211 | else 212 | echo "install directory is not in PATH: '$scripts_install_dir'" 213 | exit_state=1 214 | fi 215 | fi 216 | if [ "$action" = "install" ] && [ "$enable_path" = "true" ] 217 | then 218 | if [ ! -e "$scripts_install_dir" ] 219 | then 220 | echo "creating scripts install dir: '$scripts_install_dir' ..." 221 | ${dry_prefix} mkdir -p "$scripts_install_dir" 222 | install_state=$? 223 | exit_state=$((exit_state + install_state)) 224 | [ ${install_state} -eq 0 ] \ 225 | && echo "done" || echo "failed!" 226 | fi 227 | if [ "$enable_path" = "true" ] && [ -e "$scripts_install_dir" ] && ! dir_in_path "$scripts_install_dir" 228 | then 229 | echo "adding scripts install dir to PATH ..." 230 | ${dry_prefix} export PATH="$PATH:$scripts_install_dir" 231 | profile_file="$HOME/.profile" 232 | [ "${dry_prefix}" != "" ] && profile_file="/dev/stdout" 233 | echo "export PATH=\"\$PATH:${scripts_install_dir}\"" >> ${profile_file} 234 | install_state=$? 235 | exit_state=$((exit_state + install_state)) 236 | [ ${install_state} -eq 0 ] \ 237 | && echo "done" || echo "failed!" 238 | fi 239 | fi 240 | 241 | # per script-file checks 242 | for script_name in ${script_names} 243 | do 244 | if [ "$action" = "check" ] 245 | then 246 | if [ -f "$scripts_install_dir/$script_name" ] 247 | then 248 | echo "script is installed: $script_name" 249 | else 250 | echo "script is NOT installed: $script_name" 251 | exit_state=1 252 | fi 253 | elif [ "$action" = "install" ] 254 | then 255 | if [ -f "$scripts_install_dir/$script_name" ] 256 | then 257 | echo "was already installed: $script_name" 258 | exit_state=1 259 | else 260 | echo -n "installing (version: $version): $script_name ... " 261 | ${dry_prefix} curl -s "$scripts_base_url/$revision/scripts/$script_name" -o "$scripts_install_dir/$script_name" \ 262 | && ${dry_prefix} chmod +x "$scripts_install_dir/$script_name" 263 | install_state=$? 264 | exit_state=$((exit_state + install_state)) 265 | [ ${install_state} -eq 0 ] \ 266 | && echo "done" || echo "failed!" 267 | fi 268 | elif [ "$action" = "remove" ] 269 | then 270 | if [ -f "$scripts_install_dir/$script_name" ] 271 | then 272 | echo -n "removing: $script_name ... " 273 | ${dry_prefix} rm -f "$scripts_install_dir/$script_name" 274 | remove_state=$? 275 | exit_state=$((exit_state + remove_state)) 276 | [ ${remove_state} -eq 0 ] \ 277 | && echo "done" || echo "failed!" 278 | else 279 | echo "was not present: $script_name" 280 | exit_state=1 281 | fi 282 | fi 283 | done 284 | 285 | exit ${exit_state} 286 | -------------------------------------------------------------------------------- /scripts/rezipdoc-history-filter.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright (c) 2019 Robin Vobruba 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | # SOFTWARE. 21 | 22 | # For info about this script, please refer to the `printUsage()` function below. 23 | 24 | # Exit immediately on each error and unset variable; 25 | # see: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ 26 | set -Eeuo pipefail 27 | #set -Eeu 28 | 29 | pwd_before=$(pwd) 30 | this_script_file=$(basename "$0") 31 | script_name="$this_script_file" 32 | this_script_dir=$(cd "$(dirname "$0")"; pwd) 33 | 34 | # Settings and default values 35 | source_repo="" 36 | target_repo="" 37 | num_commits_max=1000 38 | use_orig_commit="false" 39 | branch="master" 40 | repo_tool_url="https://raw.githubusercontent.com/hoijui/ReZipDoc/master/scripts/rezipdoc-repo-tool.sh" 41 | repo_tool_file_name=$(basename "$repo_tool_url") 42 | 43 | printUsage() { 44 | echo "$script_name - Creates a local clone of a repo, and filters" 45 | echo "the main branch with ReZip(Doc)." 46 | echo 47 | echo "Usage:" 48 | echo " $script_name [OPTIONS]" 49 | echo 50 | echo "Options:" 51 | echo " -h, --help show this help message" 52 | echo " -b, --branch git branch to filter" 53 | echo " -m, --max-commits maximum number of commits to filter into the new repo" 54 | echo " -o, --orig use the original commit message (default: prefix with \"FILTERED - \")," 55 | echo " author, email and time" 56 | echo " -s, --source [path|URL] the repo to read commits from" 57 | echo " -t, --target [path] the repo to write commits to" 58 | } 59 | 60 | # Handle command line arguments 61 | while [ $# -gt 0 ] 62 | do 63 | opName="$1" 64 | shift # skip argument 65 | case ${opName} in 66 | -h|--help) 67 | printUsage 68 | exit 0 69 | ;; 70 | -b|--branch) 71 | branch="$1" 72 | shift # past argument 73 | ;; 74 | -m|--max-commits) 75 | num_commits_max=$1 76 | shift # past argument 77 | ;; 78 | -o|--orig) 79 | use_orig_commit="true" 80 | ;; 81 | -s|--source) 82 | source_repo="$1" 83 | shift # past argument 84 | ;; 85 | -t|--target) 86 | target_repo="$1" 87 | shift # past argument 88 | ;; 89 | *) 90 | # unknown option / not an option 91 | >&2 echo "Unknown option '${opName}'!" 92 | printUsage 93 | exit 1 94 | ;; 95 | esac 96 | done 97 | 98 | if ! git ls-remote "$source_repo" > /dev/null 2> /dev/null 99 | then 100 | >&2 echo "Source repo is not a valid git repository: '$source_repo'!" 101 | exit 1 102 | fi 103 | 104 | if [ "$source_repo" = "$target_repo" ] 105 | then 106 | >&2 echo "Source and target repos can not be equal!" 107 | exit 1 108 | fi 109 | 110 | if [ -e "$target_repo" ] 111 | then 112 | >&2 echo "Target repo can not be an existing path: '$target_repo'!" 113 | exit 1 114 | fi 115 | 116 | # Check whether the source repo is a local directory or a URL 117 | source_is_url="true" 118 | [ -d "$source_repo" ] && source_is_url="false" 119 | [ "$source_is_url" = "true" ] && source_type="URL" || source_type="local repo" 120 | 121 | # If the source repo is a local directory, make the path to it absolute 122 | [ "$source_is_url" != "true" ] && source_repo="$(cd "$source_repo"; pwd)" 123 | 124 | echo "Source repo: '${source_repo}' ($source_type)" 125 | echo "Branch: ${branch}" 126 | echo "Max commits: ${num_commits_max}" 127 | echo "Target repo: '${target_repo}'" 128 | 129 | mkdir "$target_repo" 130 | cd "$target_repo" 131 | git init 132 | git remote add source "$source_repo" 133 | git fetch source 134 | 135 | # Ensure we have a local filter installer script 136 | if [ -e "$this_script_dir/$repo_tool_file_name" ] 137 | then 138 | repo_tool="$this_script_dir/$repo_tool_file_name" 139 | else 140 | rnd=$(od -A n -t d -N 1 /dev/urandom | tr -d ' ') 141 | repo_tool="/tmp/$(basename --suffix='.sh' \"$repo_tool_file_name\")-${rnd}.sh" 142 | curl -s "$repo_tool_url" -o "$repo_tool" 143 | fi 144 | 145 | # Install our filter if not yet installed 146 | if ! ${repo_tool} check --commit --diff --renormalize > /dev/null 2> /dev/null 147 | then 148 | ${repo_tool} install --commit --diff --renormalize \ 149 | || ( >&2 echo "Failed installing filter!"; exit 2 ) 150 | fi 151 | 152 | git checkout --orphan ${branch}_filtered 153 | git commit --allow-empty --allow-empty-message -m "" 154 | 155 | num_commits=$(git log -${num_commits_max} --format="%H" --reverse source/${branch} | wc -l) 156 | i=0 157 | for commit_hash in $(git log -${num_commits_max} --topo-order --format="%H" --reverse source/${branch}) 158 | do 159 | i=$(expr ${i} + 1) 160 | echo 161 | echo "############################################################" 162 | echo "Copying & filtering commit ${i}/${num_commits} - ${commit_hash} ..." 163 | echo 164 | 165 | commit_args="" 166 | if [ "$use_orig_commit" = "true" ] 167 | then 168 | #commit_msg=$(git log -1 --format="%s%n%n%b" ${commit_hash}) 169 | commit_msg="" 170 | commit_args="$commit_args --reuse-message=${commit_hash}" 171 | else 172 | commit_msg=$(git log -1 --format="FILTERED - %s%n%n orig=%h%n%n%b" ${commit_hash}) 173 | # We have to give the message through stdin, 174 | # because otherwise the quoting somehow gets fucked up (by sh) 175 | commit_args="$commit_args --file=-" 176 | fi 177 | 178 | set +e 179 | 180 | echo "Cherry-picking ..." 181 | git cherry-pick --strategy=recursive --strategy-option=theirs --allow-empty --mainline 1 --no-commit ${commit_hash} 182 | last_status=$? 183 | if [ ${last_status} -ne 0 ] 184 | then 185 | >&2 echo -e "\tfailed! (Cherry-picking for ${commit_hash})" 186 | fi 187 | 188 | echo "Removing ..." 189 | git status | grep 'deleted by them:' | cut -d':' -f2 | xargs -t -I {} git rm "{}" 190 | last_status=$? 191 | if [ ${last_status} -ne 0 ] 192 | then 193 | >&2 echo -e "\tfailed! (Removing for ${commit_hash})" 194 | fi 195 | 196 | echo "Adding the 1st ..." 197 | git add --all --force 198 | last_status=$? 199 | if [ ${last_status} -ne 0 ] 200 | then 201 | >&2 echo -e "\tfailed! (Adding the 1st for ${commit_hash})" 202 | fi 203 | 204 | if [ ${last_status} -eq 0 ] 205 | then 206 | echo "Adding the 2nd ..." 207 | git add --all --force --renormalize 208 | last_status=$? 209 | if [ ${last_status} -ne 0 ] 210 | then 211 | >&2 echo -e "\tfailed! (Adding the 2nd for ${commit_hash})" 212 | fi 213 | fi 214 | if [ ${last_status} -eq 0 ] 215 | then 216 | if output=$(git status --porcelain) && [ -z "${output}" ] 217 | then 218 | # Working directory clean (completely) 219 | if [ ${last_status} -ne 0 ] 220 | then 221 | >&2 echo -e "WARNING: Nothing to commit for ${commit_hash} -> skipping" 222 | fi 223 | last_status=0 224 | else 225 | echo "Committing ..." 226 | echo "$commit_msg" | git commit -v ${commit_args} 227 | last_status=$? 228 | if [ ${last_status} -ne 0 ] 229 | then 230 | >&2 echo -e "\tfailed! (Committing for ${commit_hash})" 231 | fi 232 | fi 233 | fi 234 | 235 | set -e 236 | 237 | if [ ${last_status} -ne 0 ] 238 | then 239 | git status 240 | echo "Failed!" 241 | exit ${last_status} 242 | fi 243 | 244 | echo "############################################################" 245 | echo 246 | done 247 | 248 | echo 249 | echo "############################################################" 250 | echo "############################################################" 251 | echo 252 | 253 | # Merge the first (empty) commit with the second one 254 | echo "removing first (empty) commit ..." 255 | git rebase --root 256 | 257 | echo "Source repo: '${source_repo}' ($source_type)" 258 | echo "Branch: ${branch}" 259 | echo "Max commits: ${num_commits_max}" 260 | echo "Target repo: '${target_repo}'" 261 | 262 | # Check if the original and the filtered versions have the same final content 263 | cd "${target_repo}" 264 | # We add a new commit on top of the orig branch, 265 | # that just clean-filters all the final content (with re-zip), 266 | # and we use that one only for comparing final contents for equality, 267 | # to be sure we messed nothing (serious) up, during filtering. 268 | git checkout -b "filter-commit-on-orig" "source/${branch}" 269 | # This applies the clean filter(s) on all the files in the repo, 270 | # and thus re-zips everything, in our case. 271 | git commit --all -m "Re-zipped binaries" 272 | set +e 273 | git diff --exit-code --stat --color --color-moved "${branch}_filtered" "filter-commit-on-orig" 274 | last_status=$? 275 | set -e 276 | git checkout "${branch}_filtered" 277 | git branch -D "filter-commit-on-orig" 278 | if [ ${last_status} -ne 0 ] 279 | then 280 | >&2 echo "ERROR: Original and filtered repos final content differ!" 281 | exit ${last_status} 282 | fi 283 | 284 | cd "$pwd_before" 285 | -------------------------------------------------------------------------------- /src/main/java/io/github/hoijui/rezipdoc/ReZip.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import java.io.BufferedInputStream; 21 | import java.io.IOException; 22 | import java.io.InputStream; 23 | import java.net.URISyntaxException; 24 | import java.nio.file.Files; 25 | import java.nio.file.Path; 26 | import java.nio.file.attribute.FileTime; 27 | import java.util.logging.Level; 28 | import java.util.logging.Logger; 29 | import java.util.zip.CRC32; 30 | import java.util.zip.CheckedOutputStream; 31 | import java.util.zip.ZipEntry; 32 | import java.util.zip.ZipInputStream; 33 | import java.util.zip.ZipOutputStream; 34 | 35 | /** 36 | * Read ZIP content and write uncompressed ZIP content out. 37 | * Uncompressed files are stored more efficiently in Git. 38 | * 39 | * @see "https://github.com/costerwi/rezip" 40 | */ 41 | @SuppressWarnings({"WeakerAccess", "unused"}) 42 | public class ReZip { 43 | 44 | private static final Logger LOGGER = Utils.getLogger(ReZip.class.getName()); 45 | 46 | /** 47 | * Whether to re-pack the output ZIP with compression 48 | * (default: {@code false}). 49 | */ 50 | private final boolean compression; 51 | /** 52 | * Whether to re-pack the output ZIP with all entries time-stamps 53 | * (creation-, last-access- and last-modified-times) set to zero 54 | * (default: {@code false}). 55 | */ 56 | private final boolean nullifyTimes; 57 | /** 58 | * Whether to also re-pack ZIP files contained within the supplied ZIP 59 | * (and therein, and therein, ...) 60 | * (default: {@code true}). 61 | */ 62 | private final boolean recursive; 63 | /** 64 | * Whether to pretty-print XML content 65 | * (default: {@code false}). 66 | */ 67 | private final boolean formatXml; 68 | 69 | /** 70 | * Stores settings about how to re-zip. 71 | * 72 | * @param compression whether the output ZIP is to use compression 73 | * @param nullifyTimes whether the creation-, last-access- and last-modified-times 74 | * of the re-packed archive entries should be set to {@code 0} 75 | * @param recursive whether to re-pack the ZIP recursively 76 | * (repacking the ZIPs within ZIPs ... within the supplied ZIP) 77 | * @param formatXml whether to pretty-print XML content 78 | * (default: {@code true}) 79 | */ 80 | public ReZip(final boolean compression, final boolean nullifyTimes, final boolean recursive, final boolean formatXml) { 81 | 82 | this.compression = compression; 83 | this.nullifyTimes = nullifyTimes; 84 | this.recursive = recursive; 85 | this.formatXml = formatXml; 86 | } 87 | 88 | public ReZip() { 89 | this(false, false, true, false); 90 | } 91 | 92 | /** 93 | * Whether to re-pack the output ZIP with compression. 94 | * @return default: {@code false} 95 | */ 96 | public boolean isCompression() { 97 | return compression; 98 | } 99 | 100 | /** 101 | * Whether to re-pack the output ZIP with all entries time-stamps 102 | * (creation-, last-access- and last-modified-times) set to zero. 103 | * @return default: {@code false} 104 | */ 105 | public boolean isNullifyTimes() { 106 | return nullifyTimes; 107 | } 108 | 109 | /** 110 | * Whether to also re-pack ZIP files contained within the supplied ZIP 111 | * (and therein, and therein, ...). 112 | * @return default: {@code true} 113 | */ 114 | public boolean isRecursive() { 115 | return recursive; 116 | } 117 | 118 | /** 119 | * Whether to pretty-print XML content 120 | * @return default: {@code false} 121 | */ 122 | public boolean isFormatXml() { 123 | return formatXml; 124 | } 125 | 126 | private static void printUsage(final Level logLevel) { 127 | 128 | final String name = ReZip.class.getSimpleName(); 129 | if (LOGGER.isLoggable(logLevel)) { 130 | Utils.printUsageHeader(LOGGER, logLevel, name); 131 | LOGGER.log(logLevel, "Usage:"); 132 | LOGGER.log(logLevel, String.format("\t%s [--compressed|--uncompressed] [--nullify-times] [--non-recursive] out.zip", 133 | name)); 134 | LOGGER.log(logLevel, String.format("\t%s --write-suffixes", name)); 135 | LOGGER.log(logLevel, "Options:"); 136 | LOGGER.log(logLevel, "\t--compressed re-zip compressed"); 137 | LOGGER.log(logLevel, "\t--uncompressed (noop) re-zip uncompressed (this is the default, but we may want to explicitly state it on hte command line"); 138 | LOGGER.log(logLevel, "\t--nullify-times set creation-, last-access- and last-modified-times of the re-zipped archives entries to 0"); 139 | LOGGER.log(logLevel, "\t--non-recursive do not re-zip archives within archives"); 140 | LOGGER.log(logLevel, "\t--format-xml pretty-print (reformat) XML content"); 141 | LOGGER.log(logLevel, "\t--write-suffixes writes suffix files next to the JAR, populated with defaults, and exits"); 142 | } 143 | } 144 | 145 | /** 146 | * Reads a ZIP file from stdin and writes new ZIP content to stdout. 147 | * With the --compressed command line argument, 148 | * the output will be a compressed ZIP as well. 149 | * 150 | * @param argv the command line arguments 151 | * @throws IOException if any input or output fails 152 | */ 153 | public static void main(final String[] argv) throws IOException { 154 | 155 | boolean compressed = false; 156 | boolean nullifyTimes = false; 157 | boolean recursive = true; 158 | boolean formatXml = false; 159 | for (final String arg : argv) { 160 | if ("--help".equals(arg) || "-h".equals(arg)) { 161 | printUsage(Level.INFO); 162 | return; 163 | } else if ("--compressed".equals(arg)) { 164 | compressed = true; 165 | } else if ("--uncompressed".equals(arg)) { 166 | // this is basically a noop, but it allows the command-line to look more expressive 167 | compressed = false; 168 | } else if ("--nullify-times".equals(arg)) { 169 | nullifyTimes = true; 170 | } else if ("--non-recursive".equals(arg)) { 171 | recursive = false; 172 | } else if ("--format-xml".equals(arg)) { 173 | formatXml = true; 174 | } else if ("--write-suffixes".equals(arg)) { 175 | try { 176 | Utils.writeSuffixesFiles(); 177 | return; 178 | } catch (URISyntaxException exc) { 179 | LOGGER.log(Level.SEVERE, "Failed writing suffixes files", exc); 180 | System.exit(1); 181 | } 182 | } else { 183 | printUsage(Level.WARNING); 184 | System.exit(1); 185 | } 186 | } 187 | 188 | new ReZip(compressed, nullifyTimes, recursive, formatXml).reZip(); 189 | } 190 | 191 | /** 192 | * Reads a ZIP file from stdin and writes new ZIP content to stdout. 193 | * 194 | * @throws IOException if any input or output fails 195 | */ 196 | public void reZip() throws IOException { 197 | 198 | try (ZipInputStream zipIn = new ZipInputStream(System.in); 199 | ZipOutputStream zipOut = new ZipOutputStream(System.out)) 200 | { 201 | reZip(zipIn, zipOut); 202 | } 203 | } 204 | 205 | public void reZip(final Path zipInFile, final Path zipOutFile) throws IOException { 206 | 207 | try (ZipInputStream zipIn = new ZipInputStream(new BufferedInputStream(Files.newInputStream(zipInFile))); 208 | ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipOutFile))) 209 | { 210 | reZip(zipIn, zipOut); 211 | } 212 | } 213 | 214 | /** 215 | * Reads a ZIP and writes to an other ZIP. 216 | * 217 | * @param zipIn the source ZIP 218 | * @param zipOut the destination ZIP 219 | * @throws IOException if any input or output fails 220 | */ 221 | public void reZip(final ZipInputStream zipIn, final ZipOutputStream zipOut) 222 | throws IOException 223 | { 224 | final int compressionMethod = isCompression() ? ZipEntry.DEFLATED : ZipEntry.STORED; 225 | final byte[] buffer = new byte[8192]; 226 | final BufferedOutputStream uncompressedOutRaw = new BufferedOutputStream(); 227 | final CRC32 checksum = new CRC32(); 228 | final CheckedOutputStream uncompressedOutChecked = new CheckedOutputStream(uncompressedOutRaw, checksum); 229 | reZip(zipIn, zipOut, compressionMethod, buffer, uncompressedOutRaw, checksum, uncompressedOutChecked); 230 | } 231 | 232 | private void reZip( 233 | final ZipInputStream zipIn, 234 | final ZipOutputStream zipOut, 235 | final int compressionMethod, 236 | final byte[] buffer, 237 | final BufferedOutputStream uncompressedOutRaw, 238 | final CRC32 checksum, 239 | final CheckedOutputStream uncompressedOutChecked) 240 | throws IOException 241 | { 242 | final XmlFormatter xmlFormatter = new XmlFormatter(); 243 | for (ZipEntry entry = zipIn.getNextEntry(); entry != null; entry = zipIn.getNextEntry()) { 244 | uncompressedOutRaw.reset(); 245 | checksum.reset(); 246 | 247 | // Copy file from zipIn into uncompressed, check-summed output stream 248 | Utils.transferTo(zipIn, uncompressedOutChecked, buffer); 249 | zipIn.closeEntry(); 250 | 251 | // If we found a ZIP in this ZIP, and we want to recursively filter, then do so 252 | if (isFormatXml() && Utils.isXml(entry.getName(), entry.getSize(), uncompressedOutRaw)) { 253 | // XML file: pretty-print the data to stdout 254 | final InputStream source = uncompressedOutRaw.createInputStream(true); 255 | uncompressedOutRaw.reset(); 256 | xmlFormatter.prettify(source, uncompressedOutRaw, buffer); 257 | } else if (isRecursive() && Utils.isZip(entry.getName(), entry.getSize(), uncompressedOutRaw)) { 258 | final BufferedOutputStream subUncompressedOutRaw = new BufferedOutputStream(); 259 | final CRC32 subChecksum = new CRC32(); 260 | final CheckedOutputStream subUncompressedOutChecked = new CheckedOutputStream(subUncompressedOutRaw, subChecksum); 261 | try (ZipInputStream zipInRec = new ZipInputStream(uncompressedOutRaw.createInputStream(true)); 262 | ZipOutputStream zipOutRec = new ZipOutputStream(uncompressedOutChecked)) 263 | { 264 | uncompressedOutRaw.reset(); 265 | checksum.reset(); 266 | reZip(zipInRec, zipOutRec, compressionMethod, buffer, subUncompressedOutRaw, subChecksum, subUncompressedOutChecked); 267 | } 268 | } 269 | 270 | // Create the ZIP entry for destination ZIP 271 | entry.setSize(uncompressedOutRaw.size()); 272 | entry.setCrc(checksum.getValue()); 273 | entry.setMethod(compressionMethod); 274 | // Unknown compressed size 275 | entry.setCompressedSize(-1); 276 | if (isNullifyTimes()) { 277 | entry.setTime(0); 278 | entry.setCreationTime(FileTime.fromMillis(0)); 279 | entry.setLastAccessTime(FileTime.fromMillis(0)); 280 | entry.setLastModifiedTime(FileTime.fromMillis(0)); 281 | } 282 | 283 | zipOut.putNextEntry(entry); 284 | uncompressedOutRaw.writeTo(zipOut); 285 | zipOut.closeEntry(); 286 | } 287 | } 288 | } 289 | -------------------------------------------------------------------------------- /src/main/java/io/github/hoijui/rezipdoc/XmlFormatter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import org.w3c.dom.Document; 21 | import org.w3c.dom.Node; 22 | import org.w3c.dom.NodeList; 23 | import org.xml.sax.InputSource; 24 | import org.xml.sax.SAXException; 25 | 26 | import javax.xml.XMLConstants; 27 | import javax.xml.parsers.DocumentBuilderFactory; 28 | import javax.xml.parsers.ParserConfigurationException; 29 | import javax.xml.transform.OutputKeys; 30 | import javax.xml.transform.Transformer; 31 | import javax.xml.transform.TransformerException; 32 | import javax.xml.transform.TransformerFactory; 33 | import javax.xml.transform.dom.DOMSource; 34 | import javax.xml.transform.stream.StreamResult; 35 | import javax.xml.xpath.XPath; 36 | import javax.xml.xpath.XPathConstants; 37 | import javax.xml.xpath.XPathExpressionException; 38 | import javax.xml.xpath.XPathFactory; 39 | import java.io.BufferedInputStream; 40 | import java.io.BufferedOutputStream; 41 | import java.io.ByteArrayInputStream; 42 | import java.io.ByteArrayOutputStream; 43 | import java.io.IOException; 44 | import java.io.InputStream; 45 | import java.io.OutputStream; 46 | import java.io.PrintStream; 47 | import java.nio.charset.StandardCharsets; 48 | import java.nio.file.Files; 49 | import java.nio.file.Path; 50 | import java.nio.file.Paths; 51 | import java.util.Arrays; 52 | import java.util.Iterator; 53 | import java.util.List; 54 | import java.util.logging.Level; 55 | import java.util.logging.Logger; 56 | 57 | /** 58 | * Takes XML content as input, 59 | * and reproduces the same content as output, 60 | * but more pleasing on the human eye, 61 | * by adding proper line-endings and indents. 62 | */ 63 | @SuppressWarnings("WeakerAccess") 64 | public class XmlFormatter { 65 | 66 | private static final Logger LOGGER = Utils.getLogger(XmlFormatter.class.getName()); 67 | 68 | private static final int DEFAULT_BUFFER_SIZE = 2048; 69 | private static final int DEFAULT_ARG_INDENT_SPACES = 2; 70 | private static final String DEFAULT_ARG_INDENT = " "; 71 | private static final boolean DEFAULT_ARG_CORRECT = true; 72 | 73 | private final int indentSpaces; 74 | private final String indent; 75 | private final boolean correct; 76 | 77 | /** 78 | * Creates an instance with specific values. 79 | * Usually {@code indentSpaces} is used for correct, 80 | * while {@code indent} is used for rough and fast pretty'fication. 81 | * 82 | * @param indentSpaces how many spaces to use per indent 83 | * @param indent what string to use for oen indent 84 | * (this might be two spaces or one TAB, for example) 85 | * @param correct whether to use correct or rough and fast pretty'fication. 86 | * correct is slower, and works for valid XML only. 87 | * rough and fast might produce weird results if there are 88 | * '{@literal <}' or '{@literal >}' characters which are not part of tags. 89 | */ 90 | public XmlFormatter(final int indentSpaces, final String indent, final boolean correct) { 91 | 92 | this.indentSpaces = indentSpaces; 93 | this.indent = indent; 94 | this.correct = correct; 95 | } 96 | 97 | /** 98 | * Creates an instance with default values. 99 | */ 100 | public XmlFormatter() { 101 | this(DEFAULT_ARG_INDENT_SPACES, DEFAULT_ARG_INDENT, DEFAULT_ARG_CORRECT); 102 | } 103 | 104 | private static void printUsage(final Level logLevel) { 105 | 106 | final String name = XmlFormatter.class.getSimpleName(); 107 | if (LOGGER.isLoggable(logLevel)) { 108 | Utils.printUsageHeader(LOGGER, logLevel, name); 109 | LOGGER.log(logLevel, "Usage:"); 110 | LOGGER.log(logLevel, String.format( 111 | "\t%s [XML-input-file] [XML-output-file] # both input and output from files", 112 | name)); 113 | LOGGER.log(logLevel, String.format( 114 | "\t%s [XML-input-file] # input from file, output to stdout", 115 | name)); 116 | LOGGER.log(logLevel, String.format( 117 | "\t%s # input from stdin, output to stdout", 118 | name)); 119 | LOGGER.log(logLevel, "Examples:"); 120 | LOGGER.log(logLevel, String.format("\t%s in-file.xml out-file.xml", 121 | name)); 122 | LOGGER.log(logLevel, String.format("\t%s in-file.xml > out-file.xml", 123 | name)); 124 | LOGGER.log(logLevel, String.format("\t%s < in-file.xml > out-file.xml", 125 | name)); 126 | } 127 | } 128 | 129 | private static InputStream createInput(final Path inFile) throws IOException { 130 | 131 | InputStream in; 132 | if (inFile == null) { 133 | in = new BufferedInputStream(System.in, 64) { 134 | @Override 135 | public void close() { 136 | // NOTE We do explicitly NOT close the underlying stream 137 | } 138 | }; 139 | } else { 140 | in = Files.newInputStream(inFile); 141 | } 142 | return in; 143 | } 144 | 145 | private static OutputStream createOutput(final Path outFile) throws IOException { 146 | 147 | OutputStream out; 148 | if (outFile == null) { 149 | out = new BufferedOutputStream(System.out, 64) { 150 | @Override 151 | public void close() { 152 | // NOTE We do explicitly NOT close the underlying stream 153 | } 154 | }; 155 | } else { 156 | out = Files.newOutputStream(outFile); 157 | } 158 | return out; 159 | } 160 | 161 | public static void main(final String[] args) { 162 | 163 | final List argsL = Arrays.asList(args); 164 | if (argsL.contains("-h") || argsL.contains("--help")) { 165 | printUsage(Level.INFO); 166 | } else { 167 | // normal usage: prettify input to output 168 | int indentSpaces = DEFAULT_ARG_INDENT_SPACES; 169 | String indent = DEFAULT_ARG_INDENT; 170 | boolean correct = DEFAULT_ARG_CORRECT; 171 | Path inFile = null; 172 | Path outFile = null; 173 | int bufferSize = DEFAULT_BUFFER_SIZE; 174 | final Iterator argsIt = argsL.iterator(); 175 | while (argsIt.hasNext()) { 176 | final String arg = argsIt.next(); 177 | if ("-r".equals(arg) || "--rough".equals(arg)) { 178 | correct = false; 179 | } else if ("--indent-spaces".equals(arg)) { 180 | indentSpaces = Integer.parseInt(argsIt.next()); 181 | } else if ("--indent".equals(arg)) { 182 | indent = argsIt.next(); 183 | } else if ("-i".equals(arg) || "--input".equals(arg)) { 184 | inFile = Paths.get(argsIt.next()); 185 | } else if ("-o".equals(arg) || "--output".equals(arg)) { 186 | outFile = Paths.get(argsIt.next()); 187 | } else if ("-b".equals(arg) || "--buffer-size".equals(arg)) { 188 | bufferSize = Integer.parseInt(argsIt.next()); 189 | } else { 190 | if (LOGGER.isLoggable(Level.SEVERE)) { 191 | LOGGER.log(Level.SEVERE, "Unknown argument: {0}", arg); 192 | printUsage(Level.SEVERE); 193 | } 194 | System.exit(1); 195 | } 196 | } 197 | 198 | final XmlFormatter xmlFormatter = new XmlFormatter(indentSpaces, indent, correct); 199 | 200 | try (InputStream source = createInput(inFile); 201 | OutputStream target = createOutput(outFile)) 202 | { 203 | xmlFormatter.prettify(source, target, createBuffer(bufferSize)); 204 | } catch (final Exception exc) { 205 | if (LOGGER.isLoggable(Level.SEVERE)) { 206 | LOGGER.log(Level.SEVERE, "Failed to XML pretty-print", exc); 207 | printUsage(Level.SEVERE); 208 | } 209 | System.exit(1); 210 | } 211 | } 212 | } 213 | 214 | private static byte[] createBuffer(final int size) { 215 | return new byte[size]; 216 | } 217 | 218 | /** 219 | * Reformats XML content to be easy on the human eye. 220 | * 221 | * @param xmlIn the supplier of XML content to pretty-print 222 | * @param xmlOut where the pretty XML content shall be written to 223 | * @param buffer may be used internally for whatever in- or out-buffering there might be 224 | * @throws IOException if any input or output fails 225 | */ 226 | public void prettify(final InputStream xmlIn, final OutputStream xmlOut, final byte[] buffer) 227 | throws IOException 228 | { 229 | try { 230 | if (correct) { 231 | prettifyCorrect(xmlIn, xmlOut); 232 | } else { 233 | prettifyRoughAndFast(xmlIn, xmlOut, buffer); 234 | } 235 | } catch (final Exception exc) { 236 | LOGGER.log(Level.WARNING, "Failed to pretty print; fallback to carbon-copy", exc); 237 | // In case of failure of pretty-printing, use the XML as-is 238 | Utils.transferTo(xmlIn, xmlOut, buffer); 239 | } 240 | } 241 | 242 | /** 243 | * Reformats XML content to be easy on the human eye. 244 | * NOTE Rather use the 245 | * {@link #prettify(InputStream, OutputStream, byte[]) streamed version}, 246 | * as it uses less memory. 247 | * 248 | * @param xml the XML content to be pretty-printed 249 | * @return pretty XML content 250 | * @throws IOException if any input or output fails 251 | */ 252 | public String prettify(final String xml) throws IOException { 253 | 254 | final byte[] xmlBytes = xml.getBytes(StandardCharsets.UTF_8); 255 | try (InputStream xmlIn = new ByteArrayInputStream(xmlBytes); 256 | ByteArrayOutputStream xmlOut = new ByteArrayOutputStream()) 257 | { 258 | // NOTE It is a bit hacky to use the input buffer as working buffer, 259 | // but should work without problems in this case 260 | prettify(xmlIn, xmlOut, xmlBytes); 261 | return new String(xmlOut.toByteArray(), StandardCharsets.UTF_8); 262 | } 263 | } 264 | 265 | public void prettifyCorrect(final InputStream xmlIn, final OutputStream xmlOut) 266 | throws IOException, 267 | ParserConfigurationException, 268 | SAXException, 269 | XPathExpressionException, 270 | TransformerException 271 | { 272 | final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); 273 | documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); 274 | final Document document = documentBuilderFactory 275 | .newDocumentBuilder() 276 | .parse(new InputSource(xmlIn)); 277 | 278 | final XPath xPath = XPathFactory.newInstance().newXPath(); 279 | final NodeList nodeList = (NodeList) xPath.evaluate("//text()[normalize-space()='']", 280 | document, 281 | XPathConstants.NODESET); 282 | 283 | for (int i = 0; i < nodeList.getLength(); ++i) { 284 | Node node = nodeList.item(i); 285 | node.getParentNode().removeChild(node); 286 | } 287 | 288 | final TransformerFactory transformerFactory = TransformerFactory.newInstance(); 289 | transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); 290 | final Transformer transformer = transformerFactory.newTransformer(); 291 | transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); 292 | transformer.setOutputProperty(OutputKeys.INDENT, "yes"); 293 | transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", String.valueOf(indentSpaces)); 294 | 295 | final StreamResult streamResult = new StreamResult(xmlOut); 296 | 297 | transformer.transform(new DOMSource(document), streamResult); 298 | } 299 | 300 | public void prettifyRoughAndFast(final InputStream xmlIn, final OutputStream xmlOut, final byte[] buffer) 301 | throws IOException 302 | { 303 | // this is a kind of stack, denoting the number of indents 304 | int numIndents = 0; 305 | 306 | // prepare the in-buffer 307 | final StringBuilder inBuffer = new StringBuilder(); 308 | // prepare the out stream wrapper, 309 | // which allows to write string data more comfortably 310 | final PrintStream xmlOutPrinter = new PrintStream(xmlOut); 311 | 312 | for (int readBytes = xmlIn.read(buffer); readBytes > 0; readBytes = xmlIn.read(buffer)) { 313 | // convert the newly read part to a string 314 | // and append it to the leftover, which was already read 315 | inBuffer.append(new String(buffer, 0, readBytes)); 316 | 317 | // split all the content we have at the moment into rows (think: lines) 318 | final String[] rows = inBuffer.toString() 319 | .replace(">", ">\n") 320 | .replace("<", "\n<") 321 | .split("\n"); 322 | 323 | // handle all except the last row, 324 | // because it is potentially incomplete 325 | for (int ir = 0; ir < rows.length - 1; ir++) { 326 | numIndents = handleRow(xmlOutPrinter, rows[ir].trim(), numIndents); 327 | } 328 | 329 | // fill the buffer with only the last row, 330 | // which is potentially incomplete 331 | inBuffer.setLength(0); 332 | inBuffer.append(rows[rows.length - 1]); 333 | } 334 | // handle the last row 335 | handleRow(xmlOutPrinter, inBuffer.toString().trim(), numIndents); 336 | } 337 | 338 | private static void appendIndents(final PrintStream output, final int numIndents, final String indent) { 339 | 340 | for (int ii = 0; ii < numIndents; ii++) { 341 | output.append(indent); 342 | } 343 | } 344 | 345 | private int handleRow(final PrintStream xmlOut, final String row, final int numIndents) { 346 | 347 | int curIndents = numIndents; 348 | if (!row.isEmpty()) { 349 | if (row.startsWith("")) { 356 | appendIndents(xmlOut, curIndents, indent); 357 | xmlOut.append(row).append("\n"); 358 | curIndents++; 359 | if (row.endsWith("]]>")) { 360 | curIndents--; 361 | } 362 | } else { 363 | appendIndents(xmlOut, curIndents, indent); 364 | xmlOut.append(row).append("\n"); 365 | } 366 | } 367 | 368 | return curIndents; 369 | } 370 | } 371 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ReZipDoc 2 | 3 | A _repack uncompressed_ & _diff visualizer_ for ZIP based files stored in git repos. 4 | 5 | Most 6 | [git]( 7 | https://git-scm.com/) 8 | repos hosting 9 | [Open Source Hardware]( 10 | https://en.wikipedia.org/wiki/Open-source_hardware) 11 | should use [__ReZipDoc__](https://github.com/hoijui/ReZipDoc). 12 | 13 | ## What is this? 14 | 15 | [git](https://git-scm.com/) does not like binary files. 16 | They make the repo grow fast in size in MB (see [delta compression](https://en.wikipedia.org/wiki/Delta_encoding)), 17 | and when you try to see what changed in a commit, you only get this: 18 | 19 | > Binary files _A_ and _B_ differ! 20 | 21 | ... not very useful! 22 | 23 | __ReZipDoc__ solves both of these issues, though only for ZIP based files, 24 | which includes for example FreeCAD and LibreOffice files. 25 | 26 | > **NOTE** It does not work for all binary files! 27 | 28 | > **HINT** If you are unsure whether a file format is ZIP based, 29 | > just try to look at it with a software that can peak into ZIP files.\ 30 | > On Linux or OSX: `unzip -l someFile.xyz` 31 | 32 | So if you are storing ZIP based files in your `git` repo, 33 | you probably want to use __ReZipDoc__. 34 | 35 | ## Index 36 | 37 | * [Project state](#project-state) 38 | * [How to use](#how-to-use) 39 | * [Installation](#installation) 40 | * [Install helper scripts](#install-helper-scripts) 41 | * [Install diff viewer or filter](#install-diff-viewer-or-filter) 42 | * [Install filter manually](#install-filter-manually) 43 | * [Filter repo history](#filter-repo-history) 44 | * [Filtering example](#filtering-example) 45 | * [Culprits](#culprits) 46 | * [Motivation](#motivation) 47 | * [How it works](#how-it-works) 48 | * [Benefits](#benefits) 49 | * [Observations](#observations) 50 | * [Based on](#based-on) 51 | 52 | ## Project state 53 | 54 | This repo contains a heavily revised, refined version of ReZip (and ZipDoc), 55 | plus [unit tests](src/test/java/io/github/hoijui/rezipdoc) 56 | and [helper scripts](scripts), 57 | which were not available in the original. 58 | 59 | [![License](https://img.shields.io/badge/license-GPL%203-orange.svg)](https://www.gnu.org/licenses/gpl-3.0.en.html) 60 | [![GitHub last commit](https://img.shields.io/github/last-commit/hoijui/ReZipDoc.svg)](https://github.com/hoijui/ReZipDoc) 61 | [![Issues](https://img.shields.io/badge/issues-GitHub-57f.svg)](https://github.com/hoijui/ReZipDoc/issues) 62 | 63 | `master`: 64 | [![Build Status](https://travis-ci.org/hoijui/ReZipDoc.svg?branch=master)](https://travis-ci.org/hoijui/ReZipDoc) 65 | [![Open Hub project report](https://www.openhub.net/p/ReZipDoc/widgets/project_thin_badge.gif)](https://www.openhub.net/p/ReZipDoc?ref=sample) 66 | 67 | [![SonarCloud Status](https://sonarcloud.io/api/project_badges/measure?project=hoijui_ReZipDoc&metric=alert_status)](https://sonarcloud.io/dashboard?id=hoijui_ReZipDoc) 68 | [![SonarCloud Coverage](https://sonarcloud.io/api/project_badges/measure?project=hoijui_ReZipDoc&metric=coverage)](https://sonarcloud.io/component_measures/metric/coverage/list?id=hoijui_ReZipDoc) 69 | [![SonarCloud Bugs](https://sonarcloud.io/api/project_badges/measure?project=hoijui_ReZipDoc&metric=bugs)](https://sonarcloud.io/component_measures/metric/reliability_rating/list?id=hoijui_ReZipDoc) 70 | [![SonarCloud Vulnerabilities](https://sonarcloud.io/api/project_badges/measure?project=hoijui_ReZipDoc&metric=vulnerabilities)](https://sonarcloud.io/component_measures/metric/security_rating/list?id=hoijui_ReZipDoc) 71 | 72 | ## How to use 73 | 74 | If your git repo makes heavy use of ZIP based files, 75 | then you probably want to use ReZipDoc in one of these three ways: 76 | 77 | * install __ZipDoc diff viewer__ - 78 | This allows you to see changes within you ZIP based files 79 | when looking at git history in a human-readable way. 80 | It does not change your past nor future git history. 81 | 82 | To use this, [install](#install-diff-viewer-or-filter) with `--diff` only. 83 | * install __ReZip filter__ - 84 | This will change your future git repos history, 85 | storing ZIP based files without compression. 86 | 87 | To use this, [install](#install-diff-viewer-or-filter) with `--commit --diff --renormalize`. 88 | * install __ReZip filter & filter repo__ - 89 | This changes both the past (<- ___Caution!___) 90 | and future history of your repo. 91 | 92 | To use this, [create a copy of the repo with filtered history](#filter-repo-history). 93 | 94 | ## Installation 95 | 96 | The filter and diff tool require Java 8 or newer. 97 | 98 | The helper scripts - which are mostly used for installing the filter - 99 | require a POSIX (~= Unix) environment. 100 | This is the case on OSX, Linux, BSD, Unix and even Windows, if git is installed. 101 | 102 | The recommended procedure is to 103 | [install the helper scripts](#install-helper-scripts) once, 104 | and then use them to comfortably install the filter into local git repos. 105 | 106 | > __NOTE__\ 107 | This downloads and executes an online script onto your machine, 108 | which is a potential security risk. 109 | You may want to check-out the script before running it. 110 | 111 | ### Install helper scripts 112 | 113 | > __NOTE__\ 114 | This has to be done once per developer machine. 115 | 116 | They get installed into `~/bin/`, 117 | and if the directory did not exist before, 118 | it will get added to `PATH`. 119 | 120 | To install: 121 | ```bash 122 | curl --silent --location \ 123 | https://raw.githubusercontent.com/hoijui/ReZipDoc/master/scripts/rezipdoc-scripts-tool.sh \ 124 | | sh -s install --path 125 | ``` 126 | 127 | To update (to latest development version): 128 | ```bash 129 | curl --silent --location \ 130 | https://raw.githubusercontent.com/hoijui/ReZipDoc/master/scripts/rezipdoc-scripts-tool.sh \ 131 | | sh -s update --dev 132 | ``` 133 | 134 | To remove: 135 | ```bash 136 | curl --silent --location \ 137 | https://raw.githubusercontent.com/hoijui/ReZipDoc/master/scripts/rezipdoc-scripts-tool.sh \ 138 | | sh -s remove 139 | ``` 140 | 141 | ### Install diff viewer or filter 142 | 143 | > __NOTE__\ 144 | This has to be done once per repo. 145 | 146 | This installs the latest release of ReZipDoc into your local git repo. 147 | 148 | Make sure you already have [installed the helper scripts](#install-helper-scripts) 149 | on your machine. 150 | 151 | Switch to the local git repo you want to install this filter to, 152 | for example: 153 | 154 | ```bash 155 | cd ~/src/myRepo/ 156 | ``` 157 | 158 | As explained in [How to use](#how-to-use), 159 | you now want to use one of the following: 160 | 161 | 1. Install the diff viewer 162 | 163 | ```bash 164 | rezipdoc-repo-tool.sh install --diff 165 | ``` 166 | 2. Install the filter 167 | 168 | ```bash 169 | rezipdoc-repo-tool.sh install --commit --renormalize 170 | ``` 171 | 3. Filter the history & install the filter 172 | 173 | If you [filter the repo history](#filter-repo-history), 174 | the freshly created, filtered repo will already have the filter installed as above. 175 | 176 | To uninstall the diff viewer and/or filter, run: 177 | 178 | ```bash 179 | rezipdoc-repo-tool.sh remove 180 | ``` 181 | 182 | #### Install filter manually 183 | 184 | Only use this if you can not use [the above](#install-diff-viewer-or-filter), for some reason. 185 | 186 | 1. Build the JAR 187 | 188 | Run this in bash: 189 | 190 | ```bash 191 | cd 192 | mkdir -p src 193 | cd src 194 | git clone git@github.com:hoijui/ReZipDoc.git 195 | cd ReZipDoc 196 | mvn package 197 | echo "Created ReZipDoc binary:" 198 | ls -1 $PWD/target/rezipdoc-*.jar 199 | ``` 200 | 201 | 2. Install the JAR 202 | 203 | Store _rezipdoc-\*.jar_ somewhere locally, either: 204 | 205 | * (global) in your home directory, for example under _~/bin/_ 206 | * (repo - tracked) in your repository, tracked, for example under _/tools/_ 207 | * (repo - local) __recommended__ in your repository, locally only, under _/.git/_ 208 | 209 | 3. Install the Filter(s) 210 | 211 | execute these lines: 212 | 213 | ```bash 214 | # Install the add/commit filter 215 | git config --replace-all filter.reZip.clean "java -cp .git/rezipdoc-*.jar io.github.hoijui.rezipdoc.ReZip --uncompressed" 216 | 217 | # (optionally) Install the checkout filter 218 | git config --replace-all filter.reZip.smudge "java -cp .git/rezipdoc-*.jar io.github.hoijui.rezipdoc.ReZip --compressed" 219 | 220 | # (optionally) Install the diff filter 221 | git config --replace-all diff.zipDoc.textconv "java -cp .git/rezipdoc-*.jar io.github.hoijui.rezipdoc.ZipDoc" 222 | ``` 223 | 224 | 4. Enable the filters 225 | 226 | In one of these files: 227 | 228 | * (global) _${HOME}/.gitattributes_ 229 | * (repo - tracked) _/.gitattributes_ 230 | * (repo - local) __recommended__ _/.git/info/attributes_ 231 | 232 | Assign attributes to paths: 233 | 234 | ```bash 235 | # This forces git to treat files as if they were text-based (for example in diffs) 236 | [attr]textual diff merge text 237 | # This makes git re-zip ZIP files uncompressed on commit 238 | # NOTE See the ReZipDoc README for how to install the required git filter 239 | [attr]reZip textual filter=reZip 240 | # This makes git visualize ZIP files as uncompressed text with some meta info 241 | # NOTE See the ReZipDoc README for how to install the required git filter 242 | [attr]zipDoc textual diff=zipDoc 243 | # This combines in-history decompression and uncompressed view of ZIP files 244 | [attr]reZipDoc reZip zipDoc 245 | 246 | # MS Office 247 | *.docx reZipDoc 248 | *.xlsx reZipDoc 249 | *.pptx reZipDoc 250 | # OpenOffice 251 | *.odt reZipDoc 252 | *.ods reZipDoc 253 | *.odp reZipDoc 254 | # Misc 255 | *.mcdx reZipDoc 256 | *.slx reZipDoc 257 | # Archives 258 | *.zip reZipDoc 259 | # Java archives 260 | *.jar reZipDoc 261 | # FreeCAD files 262 | *.fcstd reZipDoc 263 | ``` 264 | 265 | ## Filter repo history 266 | 267 | This always creates a new copy of the repository. 268 | 269 | >__NOTE__\ 270 | This only filters a single branch. 271 | 272 | Make sure you have the [helper scripts installed](#install-helper-scripts) and in your `PATH`. 273 | 274 | This filters the `master` branch of the repo at `~/src/myRepo` 275 | into a new local repo `~/src/myRepo_filtered`, 276 | using the original commit messages, authors and dates: 277 | 278 | ```bash 279 | rezipdoc-history-filter.sh \ 280 | --source ~/src/myRepo \ 281 | --branch master \ 282 | --orig \ 283 | --target ~/src/myRepo_filtered 284 | ``` 285 | 286 | It also works with an online source: 287 | 288 | ```bash 289 | rezipdoc-history-filter.sh \ 290 | --source "https://github.com/case06/ZACplus.git" \ 291 | --branch master \ 292 | --orig \ 293 | --target /tmp/ZACplus_filtered 294 | ``` 295 | 296 | After doing this, the new, filtered repo will already have the filter installed, 297 | so future commits will be filtered. 298 | 299 | ### Filtering example 300 | 301 | We are going to run 302 | [a script that filters the Zinc-Oxide Open Hardware battery (ZAC+) project repo]( 303 | https://github.com/hoijui/ReZipDoc/blob/master/scripts/rezipdoc-filter-ZACplus.sh), 304 | which has a header comment explaining what it does in detail. 305 | 306 | In short, it downloads ReZipDoc helper scripts to `~/bin`, 307 | adds that dir to `PATH` if it is not there yet, 308 | creates temporary git repos in `/tmp/`, 309 | and generates some command-line output. 310 | 311 | Run it like this: 312 | 313 | ```bash 314 | curl --silent --location \ 315 | https://raw.githubusercontent.com/hoijui/ReZipDoc/master/scripts/rezipdoc-sample-filter-session.sh \ 316 | | sh 317 | ``` 318 | 319 | ## Culprits 320 | 321 | As described in [gitattributes](http://git-scm.com/docs/gitattributes), 322 | you may see unnecessary merge conflicts when you add attributes to a file that 323 | causes the repository format for that file to change. 324 | To prevent this, Git can be told to run a virtual check-out and check-in of all 325 | three stages of a file when resolving a three-way merge: 326 | 327 | ```bash 328 | git config --add --bool merge.renormalize true 329 | ``` 330 | 331 | ## Motivation 332 | 333 | Many popular applications, such as 334 | [Microsoft Office](http://en.wikipedia.org/wiki/Office_Open_XML) and 335 | [Libre/Open Office](http://en.wikipedia.org/wiki/OpenDocument), 336 | save their documents as XML in compressed zip containers. 337 | Small changes to these document's contents may result in big changes to their 338 | compressed binary container file. 339 | When compressed files are stored in a Git repository 340 | these big differences make delta compression inefficient or impossible 341 | and the repository size is roughly the sum of its revisions. 342 | 343 | This small program acts as a Git clean filter driver. 344 | It reads a ZIP file from stdin and outputs the same ZIP content to stdout, 345 | but without compression. 346 | 347 | ##### pros 348 | 349 | + human readable/plain-text diffs of (ZIP based) archives, 350 | (if they contain plain-text files) 351 | + smaller overall repository size if the archive contents change frequently 352 | 353 | ##### cons 354 | 355 | - slower `git add`/`git commit` process 356 | - slower checkout process, if the smudge filter is used 357 | 358 | ## How it works 359 | 360 | When adding/committing a ZIP based file, 361 | ReZip unpacks it and repacks it without compression, 362 | before adding it to the index/commit. 363 | In an uncompressed ZIP file, 364 | the archived files appear _as-is_ in its content 365 | (together with some binary meta-info before each file). 366 | If those archived files are plain-text files, 367 | this method will play nicely with git. 368 | 369 | ## Benefits 370 | 371 | The main benefit of ReZip over Zippey, 372 | is that the actual file stored in the repository is still a ZIP file. 373 | Thus, in many cases, it will still work _as-is_ 374 | with the respective application (for example Open Office), 375 | even if it is obtained without going through 376 | the re-packing-with-compression smudge filter, 377 | so for example when downloading the file through a web-interface, 378 | instead of checking it out with git. 379 | 380 | ## Observations 381 | 382 | The following are based on my experience in real-world cases. 383 | Use at your own risk. 384 | Your mileage may vary. 385 | 386 | ### SimuLink 387 | 388 | * One packed repository with ReZip was 54% of the size of the packed repository 389 | storing compressed ZIPs. 390 | * Another repository with 280 _\*.slx_ files and over 3000 commits was originally 281 MB 391 | and was reduced to 156 MB using this technique (55% of baseline). 392 | 393 | ### MS Power-Point 394 | 395 | I found that the loose objects stored without this filter were about 5% smaller 396 | than the original file size (zLib on top of zip compression). 397 | When using the ReZip filter, the loose objects were about 10% smaller than the 398 | original files, since zLib could work more efficiently on uncompressed data. 399 | The packed repository with ReZip was only 10% smaller than the packed repository 400 | storing compressed zips. 401 | I think this unremarkable efficiency improvement is due to a large number of 402 | _\*.png_ files in the presentation which were already stored without compression 403 | in the original _\*.pptx_. 404 | 405 | ## Based on 406 | 407 | * [__ReZip__](https://github.com/costerwi/rezip) 408 | For more efficient Git packing of ZIP based files 409 | * [__ZipDoc__](https://github.com/costerwi/zipdoc) 410 | A Git `textconv` program to show text-based diffs of ZIP files 411 | 412 | 413 | ## Similar Projects 414 | 415 | * [__png-inflate__](https://github.com/rayrobdod/png-inflate) 416 | Does the same uncompressed repack for PNG image files 417 | -------------------------------------------------------------------------------- /scripts/rezipdoc-repo-tool.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Copyright (c) 2019 Robin Vobruba 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a copy 5 | # of this software and associated documentation files (the "Software"), to deal 6 | # in the Software without restriction, including without limitation the rights 7 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | # copies of the Software, and to permit persons to whom the Software is 9 | # furnished to do so, subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included in all 12 | # copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | # SOFTWARE. 21 | 22 | # For info about this script, please refer to the `printUsage()` function below. 23 | 24 | # Exit immediately on each error and unset variable; 25 | # see: https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ 26 | set -Eeuo pipefail 27 | #set -Eeu 28 | 29 | pwd_before="$(pwd)" 30 | this_script_file=$(basename "$0") 31 | script_name="$this_script_file" 32 | this_script_dir=$(cd "$(dirname "$0")"; pwd) 33 | 34 | # Settings and default values 35 | action="" 36 | target_path_specs='' 37 | for ext in $(cat "$this_script_dir/../src/main/resources/ext_archives.txt") 38 | do 39 | target_path_specs="$target_path_specs *.$ext" 40 | done 41 | # As described in [gitattributes](http://git-scm.com/docs/gitattributes), 42 | # you may see unnecessary merge conflicts when you add attributes to a file that 43 | # causes the repository format for that file to change. 44 | # To prevent this, Git can be told to run a virtual check-out and check-in of all 45 | # three stages of a file when resolving a three-way merge. 46 | # This might slowdown merges 47 | enable_renormalize="false" 48 | enable_commit="false" 49 | enable_checkout="false" 50 | enable_diff="false" 51 | java_pkg="io.github.hoijui.rezipdoc" 52 | maven_group="$java_pkg" 53 | maven_artifact="rezipdoc" 54 | use_local_binary_if_available=true 55 | fetch_url="https://repository.sonatype.org/service/local/artifact/maven/redirect?r=central-proxy&g=${maven_group}&a=${maven_artifact}&v=LATEST" 56 | binary_file_glob=".git/rezipdoc-*.jar" 57 | conf_file=".git/config" 58 | #attributes_file="${HOME}/.gitattributes" 59 | #attributes_file=".gitattributes" 60 | attributes_file=".git/info/attributes" 61 | # This serves as a magic marker, marking our generated text parts 62 | # It could be any string that is unique enough. 63 | gen_token="go-generate-token" 64 | 65 | marker_begin="# BEGIN $gen_token" 66 | marker_end="# END $gen_token" 67 | header_note="# NOTE Do not manually edit this section; it was generated with $this_script_file" 68 | 69 | printUsage() { 70 | echo "$script_name - This installs (or removes) a custom git filter" 71 | echo "and a diff tool to the local repo, which make using" 72 | echo "ZIP based archives more git-workflow friendly." 73 | echo 74 | echo "See the ReZipDoc README for further info." 75 | echo 76 | echo "NOTE This is really only required for developers, hacking on this code-base." 77 | echo "NOTE All of this gets installed into the local repo only, under '.git/'," 78 | echo " meaning it is not versioned." 79 | echo 80 | echo "Usage:" 81 | echo " $script_name ACTION [OPTIONS]" 82 | echo 83 | echo "Actions:" 84 | echo " -h, --help show this help message" 85 | echo " install install the specified parts of the filter into the local repo" 86 | echo " remove remove *everything* regarding the filter from the local repo" 87 | echo " update first remove, then install the previously installed parts of the filter again" 88 | echo " check check whether the specified parts of the filter are installed (-> return value 0)" 89 | echo 90 | echo "Options:" 91 | echo " --commit (filter part) re-archives ZIP files without compression on commit" 92 | echo " --checkout (filter part) re-archives ZIP files with compression on checkout" 93 | echo " --diff (filter part) represents ZIP based files uncompressed in diff views" 94 | echo " --renormalize (filter part) check-out and -in files on merge conflicts" 95 | } 96 | 97 | set_action() { 98 | new_action="$1" 99 | if [ "$action" != "" ] 100 | then 101 | >&2 echo "You may only specify one action!" 102 | printUsage 103 | exit 1 104 | fi 105 | action="$new_action" 106 | } 107 | 108 | # Handle command line arguments 109 | while [ $# -gt 0 ] 110 | do 111 | option="$1" 112 | case ${option} in 113 | -h|--help) 114 | printUsage 115 | exit 0 116 | ;; 117 | install) 118 | set_action "install" 119 | ;; 120 | remove) 121 | set_action "remove" 122 | ;; 123 | update) 124 | set_action "update" 125 | ;; 126 | check) 127 | set_action "check" 128 | ;; 129 | --renormalize) 130 | enable_renormalize="true" 131 | ;; 132 | --commit) 133 | enable_commit="true" 134 | ;; 135 | --checkout) 136 | enable_checkout="true" 137 | ;; 138 | --diff) 139 | enable_diff="true" 140 | ;; 141 | *) 142 | # unknown option / not an option 143 | >&2 echo "Unknown option '${option}'!" 144 | printUsage 145 | exit 1 146 | ;; 147 | esac 148 | shift # next argument or value 149 | done 150 | 151 | 152 | if [ "$action" = "" ] 153 | then 154 | >&2 echo "No action defined!" 155 | printUsage 156 | exit 1 157 | fi 158 | if [ "$action" = "update" ] 159 | then 160 | parts="" 161 | for chk_part in --commit --checkout --diff --renormalize 162 | do 163 | if $0 check ${chk_part} > /dev/null 2>&1 164 | then 165 | parts="$parts $chk_part" 166 | fi 167 | done 168 | # Call ourselves recursively, re-installing the same parts that already were installed 169 | $0 remove \ 170 | && $0 install ${parts} 171 | fi 172 | 173 | # If we got so far, it means that 'action' is set to 'check|install|remove' 174 | 175 | if ! git ls-remote ./ > /dev/null 2>&1 176 | then 177 | >&2 echo "The current working directory is not a valid git repo!" 178 | exit 1 179 | fi 180 | 181 | if [ "$action" = "check" ] 182 | then 183 | if [ "$enable_renormalize" != "true" ] && [ "$enable_commit" != "true" ] && [ "$enable_checkout" != "true" ] && [ "$enable_diff" != "true" ] 184 | then 185 | >&2 echo "Please check for at least one of --commit, --checkout, --diff, --renormalize" 186 | exit 2 187 | fi 188 | elif [ "$action" = "install" ] 189 | then 190 | if [ "$enable_commit" != "true" ] && [ "$enable_checkout" != "true" ] && [ "$enable_diff" != "true" ] 191 | then 192 | >&2 echo "Please install at least one of --commit, --checkout, --diff" 193 | exit 2 194 | fi 195 | else 196 | if [ "$enable_renormalize" = "true" ] || [ "$enable_commit" = "true" ] || [ "$enable_checkout" = "true" ] || [ "$enable_diff" = "true" ] 197 | then 198 | >&2 echo "Remove always removes the whole filter installation;" 199 | >&2 echo "no need to specify parts with any of --commit, --checkout, --diff, --renormalize" 200 | exit 2 201 | fi 202 | fi 203 | 204 | echo "$script_name action: ${action}ing ..." 205 | 206 | # Install our binary (the JAR) 207 | pre_text="git filter and diff binary -" 208 | if [ -e ${binary_file_glob} ] 209 | then 210 | if [ "$action" = "check" ] 211 | then 212 | echo "$pre_text installed!" 213 | elif [ "$action" = "install" ] 214 | then 215 | echo "$pre_text installing skipped (file already exists)" 216 | else 217 | echo -n "$pre_text removing ... " 218 | rm ${binary_file_glob} \ 219 | && echo "done" || echo "failed!" 220 | fi 221 | else 222 | if [ "$action" = "check" ] 223 | then 224 | >&2 echo "$pre_text not installed!" 225 | exit 1 226 | elif [ "$action" = "install" ] 227 | then 228 | echo -n "$pre_text installing ... " 229 | 230 | # Delete previously downloaded versions 231 | rm -f "$binary_file_glob" 232 | 233 | mvn_target_dir="${this_script_dir}/../target" 234 | # Try to get the latest version, in case there are multiple ones 235 | local_binary=$(find "$mvn_target_dir" -maxdepth 1 -type f -name "rezipdoc-*.jar" | grep -v "\-sources" | grep -v "\-javadoc" | sort --version-sort | tail -1) 236 | if [ "$use_local_binary_if_available" = "true" ] && [ "$local_binary" != "" ] 237 | then 238 | cp "$local_binary" ./ 239 | else 240 | # Download the latest ReZipDoc release JAR from the Maven Central repository 241 | wget --content-disposition "$fetch_url" 242 | # this results in a file like "rezipdoc-0.1.jar" in the CWD 243 | fi 244 | 245 | source_binary_file=$(find . -maxdepth 1 -name "rezipdoc-*.jar") 246 | 247 | echo -n " using binary '$source_binary_file' ... " 248 | 249 | # Extract the version from the release JAR name 250 | version=$(echo "$source_binary_file" | xargs basename --suffix='.jar' | sed -e 's/.*rezipdoc-//') 251 | 252 | binary_file=".git/$(basename "$source_binary_file")" 253 | 254 | mv "$source_binary_file" "$binary_file" \ 255 | && echo "done" || echo "failed!" 256 | else 257 | echo "$pre_text removing skipped (file does not exist)" 258 | fi 259 | fi 260 | 261 | # Configure the filter and diff 262 | pre_text="git filter and diff config entry in $conf_file -" 263 | if [ "$action" = "check" ] 264 | then 265 | is_config_present() { 266 | filter_name="$1" 267 | if git config ${filter_name} > /dev/null 268 | then 269 | echo "$pre_text ${filter_name} is present!" 270 | else 271 | >&2 echo "$pre_text ${filter_name} is not present!" 272 | exit 1 273 | fi 274 | } 275 | 276 | if [ "$enable_commit" = "true" ] 277 | then 278 | is_config_present filter.reZip.clean 279 | fi 280 | 281 | if [ "$enable_checkout" = "true" ] 282 | then 283 | is_config_present filter.reZip.smudge 284 | fi 285 | 286 | if [ "$enable_diff" = "true" ] 287 | then 288 | is_config_present diff.zipDoc.textconv 289 | fi 290 | 291 | echo "$pre_text present!" 292 | elif [ "$action" = "install" ] 293 | then 294 | echo -n "$pre_text writing ... " 295 | 296 | extra_args="" 297 | #extra_args="--replace-all" 298 | set +e 299 | 300 | # Install the add/commit filter 301 | if [ "$enable_commit" = "true" ] 302 | then 303 | git config ${extra_args} filter.reZip.clean "java -cp '$binary_file' ${java_pkg}.ReZip --uncompressed" 304 | fi 305 | 306 | # Install the checkout filter 307 | if [ "$enable_checkout" = "true" ] 308 | then 309 | git config ${extra_args} filter.reZip.smudge "java -cp '$binary_file' ${java_pkg}.ReZip --compressed" 310 | fi 311 | 312 | # Install the diff filter 313 | if [ "$enable_diff" = "true" ] 314 | then 315 | git config ${extra_args} diff.zipDoc.textconv "java -cp '$binary_file' ${java_pkg}.ZipDoc" 316 | fi 317 | 318 | [ $? -eq 0 ] && echo "done" || echo "failed!" 319 | set -e 320 | else 321 | if git config --local --get-regexp "filter\.reZip\..*" > /dev/null 2>&1 322 | then 323 | echo -n "$pre_text filter - removing ... " 324 | git config --remove-section filter.reZip \ 325 | && echo "done" || echo "failed!" 326 | else 327 | echo "$pre_text filter - removing skipped (not present)" 328 | fi 329 | 330 | if git config --local --get-regexp "diff\.zipDoc\..*" > /dev/null 2>&1 331 | then 332 | echo -n "$pre_text diff - removing ... " 333 | git config --remove-section diff.zipDoc \ 334 | && echo "done" || echo "failed!" 335 | else 336 | echo "$pre_text diff - removing skipped (not present)" 337 | fi 338 | fi 339 | 340 | # Apply the filter and diff-view to matching file(s) 341 | pre_text="git attributes entries to $attributes_file -" 342 | if grep -q "$marker_begin" "$attributes_file" > /dev/null 2>&1 343 | then 344 | # Our section does exist in the attributes_file 345 | if [ "$action" = "check" ] 346 | then 347 | echo "$pre_text exist!" 348 | if [ "$enable_commit" = "true" ] || [ "$enable_checkout" = "true" ] && ! grep -q -r "^\[attr\]reZip" "$attributes_file" 349 | then 350 | # only report failure if checking was explicitly requested 351 | >&2 echo "$pre_text - '[attr]reZip' does not exist!" 352 | exit 1 353 | fi 354 | if [ "$enable_diff" = "true" ] && ! grep -q -r "^\[attr\]zipDoc" "$attributes_file" 355 | then 356 | # only report failure if checking was explicitly requested 357 | >&2 echo "$pre_text - '[attr]zipDoc' does not exist!" 358 | exit 1 359 | fi 360 | elif [ "$action" = "install" ] 361 | then 362 | echo "$pre_text writing skipped (section already exists)" 363 | else 364 | echo -n "$pre_text removing ... " 365 | sed -e "/$marker_begin/,/$marker_end/d" --in-place "$attributes_file" \ 366 | && echo "done" || echo "failed!" 367 | fi 368 | else 369 | # Our section does NOT exist in the attributes_file 370 | if [ "$action" = "check" ] 371 | then 372 | >&2 echo "$pre_text do not exist!" 373 | if [ "$enable_commit" = "true" ] || [ "$enable_checkout" = "true" ] || [ "$enable_diff" = "true" ] 374 | then 375 | # only report failure if checking was explicitly requested 376 | exit 1 377 | fi 378 | elif [ "$action" = "install" ] 379 | then 380 | echo "$pre_text writing ..." 381 | { 382 | echo "$marker_begin" 383 | echo "$header_note" 384 | cat << EOF 385 | # This forces git to treat files as if they were text-based (for example in diffs) 386 | [attr]textual diff merge 387 | EOF 388 | } >> "$attributes_file" 389 | 390 | parts="" 391 | if [ "$enable_commit" = "true" ] || [ "$enable_checkout" = "true" ] 392 | then 393 | parts="$parts reZip" 394 | cat >> "$attributes_file" << EOF 395 | # This makes git re-zip ZIP files uncompressed on commit 396 | [attr]reZip textual filter=reZip 397 | EOF 398 | fi 399 | 400 | if [ "$enable_diff" = "true" ] 401 | then 402 | parts="$parts zipDoc" 403 | cat >> "$attributes_file" << EOF 404 | # This makes git visualize ZIP files as uncompressed text with some meta info 405 | [attr]zipDoc textual diff=zipDoc 406 | EOF 407 | fi 408 | 409 | cat >> "$attributes_file" << EOF 410 | [attr]reZipDoc ${parts} 411 | 412 | EOF 413 | 414 | # Disable globbing 415 | set -o noglob 416 | for path_spec in ${target_path_specs} 417 | do 418 | echo " writing $path_spec ... " 419 | echo "$path_spec reZipDoc" >> "$attributes_file" 420 | done 421 | # Re-enable globbing 422 | set +o noglob 423 | 424 | echo "$marker_end" >> "$attributes_file" \ 425 | && echo " done" || echo " failed!" 426 | else 427 | echo "$pre_text removing skipped (section not present)" 428 | fi 429 | fi 430 | 431 | 432 | # As described in [gitattributes](http://git-scm.com/docs/gitattributes), 433 | # you may see unnecessary merge conflicts when you add attributes to a file that 434 | # causes the repository format for that file to change. 435 | # To prevent this, Git can be told to run a virtual check-out and check-in of all 436 | # three stages of a file when resolving a three-way merge. 437 | # This might slowdown merges 438 | 439 | # Set git merge renormalization 440 | pre_text="git merge renormalization -" 441 | renormalize_enabled=$(git config --get merge.renormalize || true) 442 | if [ "$renormalize_enabled" = "true" ] 443 | then 444 | # Renormalization is enabled 445 | if [ "$action" = "check" ] 446 | then 447 | echo "$pre_text enabled!" 448 | elif [ "$action" = "install" ] 449 | then 450 | echo "$pre_text enabling skipped (is already enabled)" 451 | else 452 | echo -n "$pre_text disabling ... " 453 | git config --unset-all merge.renormalize \ 454 | && echo "done" || echo "failed!" 455 | fi 456 | else 457 | # Renormalization is disabled 458 | if [ "$action" = "check" ] 459 | then 460 | >&2 echo "$pre_text not enabled!" 461 | if [ "$enable_renormalize" = "true" ] 462 | then 463 | # only report failure if checking was explicitly requested 464 | exit 1 465 | fi 466 | elif [ "$action" = "install" ] 467 | then 468 | echo -n "$pre_text enabling ... " 469 | git config --add --bool merge.renormalize true \ 470 | && echo "done" || echo "failed!" 471 | else 472 | echo "$pre_text disabling skipped (is already disabled)" 473 | fi 474 | fi 475 | -------------------------------------------------------------------------------- /src/main/java/io/github/hoijui/rezipdoc/Utils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2015-2019, The authors of the ReZipDoc project. 3 | * 4 | * This program is free software: you can redistribute it and/or modify 5 | * it under the terms of the GNU General Public License as published by 6 | * the Free Software Foundation, either version 3 of the License, or 7 | * (at your option) any later version. 8 | * 9 | * This program is distributed in the hope that it will be useful, 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | * GNU General Public License for more details. 13 | * 14 | * You should have received a copy of the GNU General Public License 15 | * along with this program. If not, see . 16 | */ 17 | 18 | package io.github.hoijui.rezipdoc; 19 | 20 | import java.io.BufferedReader; 21 | import java.io.File; 22 | import java.io.IOException; 23 | import java.io.InputStream; 24 | import java.io.InputStreamReader; 25 | import java.io.OutputStream; 26 | import java.io.PrintStream; 27 | import java.net.URISyntaxException; 28 | import java.net.URLConnection; 29 | import java.nio.charset.Charset; 30 | import java.nio.charset.StandardCharsets; 31 | import java.nio.file.Files; 32 | import java.nio.file.Path; 33 | import java.util.Collection; 34 | import java.util.Collections; 35 | import java.util.HashSet; 36 | import java.util.List; 37 | import java.util.Scanner; 38 | import java.util.Set; 39 | import java.util.logging.Formatter; 40 | import java.util.logging.Level; 41 | import java.util.logging.Logger; 42 | import java.util.stream.Collectors; 43 | import java.util.stream.Stream; 44 | 45 | /** 46 | * Various helper functions. 47 | */ 48 | @SuppressWarnings({"WeakerAccess", "unused"}) 49 | public final class Utils { 50 | 51 | private static final Logger LOGGER = Utils.getLogger(Utils.class.getName()); 52 | 53 | public static final String RESOURCE_FILE_SUFFIXES_PREFIX = "reZipDoc-"; 54 | public static final String RESOURCE_FILE_SUFFIXES_TEXT 55 | = RESOURCE_FILE_SUFFIXES_PREFIX + "suffixes-text.csv"; 56 | public static final String RESOURCE_FILE_SUFFIXES_XML 57 | = RESOURCE_FILE_SUFFIXES_PREFIX + "suffixes-xml.csv"; 58 | public static final String RESOURCE_FILE_SUFFIXES_ARCHIVE 59 | = RESOURCE_FILE_SUFFIXES_PREFIX + "suffixes-archive.csv"; 60 | public static final Set DEFAULT_SUFFIXES_XML = immutableSetFromResource("/ext_xml.txt"); 61 | public static final Set DEFAULT_SUFFIXES_TEXT = immutableSetFromResource("/ext_text.txt"); 62 | public static final Set DEFAULT_SUFFIXES_ARCHIVE = immutableSetFromResource("/ext_archives.txt"); 63 | private static final Set SUFFIXES_XML; 64 | private static final Set SUFFIXES_TEXT; 65 | private static final Set SUFFIXES_ARCHIVE; 66 | 67 | static { 68 | SUFFIXES_XML = collectFileOrDefaults(RESOURCE_FILE_SUFFIXES_XML, DEFAULT_SUFFIXES_XML); 69 | SUFFIXES_TEXT = collectFileOrDefaults(RESOURCE_FILE_SUFFIXES_TEXT, DEFAULT_SUFFIXES_TEXT); 70 | SUFFIXES_ARCHIVE = collectFileOrDefaults(RESOURCE_FILE_SUFFIXES_ARCHIVE, DEFAULT_SUFFIXES_ARCHIVE); 71 | } 72 | 73 | private Utils() { 74 | } 75 | 76 | private static ReroutableConsoleHandler stdErr; 77 | 78 | public static Logger getLogger(final String name) { 79 | 80 | final Logger logger = Logger.getLogger(name); 81 | logger.setUseParentHandlers(false); 82 | 83 | final Formatter basicFmt = new BasicLogFormatter(); 84 | 85 | if (stdErr == null) { 86 | final ReroutableConsoleHandler tmpStdErr = new ReroutableConsoleHandler(); 87 | tmpStdErr.setLevel(Level.FINEST); 88 | tmpStdErr.setFormatter(basicFmt); 89 | stdErr = tmpStdErr; 90 | } 91 | logger.addHandler(stdErr); 92 | 93 | logger.setLevel(Level.INFO); 94 | 95 | return logger; 96 | } 97 | 98 | public static void printUsageHeader(final Logger logger, final Level logLevel, final String name) { 99 | 100 | if (logger.isLoggable(logLevel)) { 101 | try { 102 | final BinaryUtil binaryUtil = new BinaryUtil(); 103 | logger.log(logLevel, String.format("%s %s", 104 | name, binaryUtil.getVersion())); 105 | logger.log(logLevel, String.format("License: %s%n", 106 | binaryUtil.getLicense())); 107 | } catch (final IOException exc) { 108 | logger.log(logLevel, "Failed to get version and license info", exc); 109 | } 110 | } 111 | } 112 | 113 | public static ReroutableConsoleHandler getLogHandler() { 114 | return stdErr; 115 | } 116 | 117 | /** 118 | * Returns the directory in which our sources are located. 119 | * 120 | * @return the dir where this ".class" or ".jar" file is located 121 | * @throws URISyntaxException should never happen 122 | * @see "https://stackoverflow.com/a/320595/586229" 123 | */ 124 | public static Path sourceDir() throws URISyntaxException { 125 | 126 | File sourceLocation = new File(Utils.class.getProtectionDomain() 127 | .getCodeSource().getLocation().toURI()); 128 | // If our source location is a JAR file, get its parent dir 129 | if (sourceLocation.isFile()) { 130 | sourceLocation = sourceLocation.getParentFile(); 131 | } 132 | return sourceLocation.toPath(); 133 | } 134 | 135 | /** 136 | * Reads all lines of a file, and streams them. 137 | * @param textFile the file to be used as a data source 138 | * @param filter whether to filter out empty lines starting with '#' 139 | * @return the file as a stream of lines 140 | * @throws IOException if there is a problem while reading the file 141 | */ 142 | public static List readLines(final Path textFile, final boolean filter) throws IOException { 143 | 144 | final Charset encoding = StandardCharsets.UTF_8; 145 | try (Stream fileIn = Files.lines(textFile, encoding)) { 146 | return fileIn 147 | .map(String::trim) 148 | // filter-out empty lines and comments 149 | .filter(s -> !filter || (!s.isEmpty() && (s.charAt(0) != '#'))) 150 | .collect(Collectors.toList()); 151 | } 152 | } 153 | 154 | public static String readStreamToString(final InputStream inputStream) { 155 | 156 | try (Scanner inScr = new Scanner(inputStream)) { 157 | inScr.useDelimiter("\\A"); 158 | return inScr.hasNext() ? inScr.next() : ""; 159 | } 160 | } 161 | 162 | public static void writeLines(final Path textFile, final Collection lines) throws IOException { 163 | 164 | try (PrintStream out = new PrintStream(Files.newOutputStream(textFile))) { 165 | lines.forEach(out::println); 166 | } 167 | } 168 | 169 | /** 170 | * Writes all suffix files next to our binary (JAR). 171 | * @throws IOException if there is a problem while writing the file 172 | * @throws URISyntaxException if there is a problem while looking up the file path 173 | */ 174 | public static void writeSuffixesFiles() throws IOException, URISyntaxException { 175 | 176 | Path suffixesFile = sourceDir().resolve(RESOURCE_FILE_SUFFIXES_TEXT); 177 | writeLines(suffixesFile, SUFFIXES_TEXT); 178 | 179 | suffixesFile = sourceDir().resolve(RESOURCE_FILE_SUFFIXES_XML); 180 | writeLines(suffixesFile, SUFFIXES_XML); 181 | 182 | suffixesFile = sourceDir().resolve(RESOURCE_FILE_SUFFIXES_ARCHIVE); 183 | writeLines(suffixesFile, SUFFIXES_ARCHIVE); 184 | } 185 | 186 | /** 187 | * Deletes all suffix files next to our binary (JAR). 188 | * @throws URISyntaxException if there is a problem while looking up the file path 189 | * @throws IOException if deleting the any of the files failed 190 | */ 191 | public static void deleteSuffixesFiles() throws URISyntaxException, IOException { 192 | 193 | Path suffixesFile = sourceDir().resolve(RESOURCE_FILE_SUFFIXES_TEXT); 194 | Files.deleteIfExists(suffixesFile); 195 | 196 | suffixesFile = sourceDir().resolve(RESOURCE_FILE_SUFFIXES_XML); 197 | Files.deleteIfExists(suffixesFile); 198 | 199 | suffixesFile = sourceDir().resolve(RESOURCE_FILE_SUFFIXES_ARCHIVE); 200 | Files.deleteIfExists(suffixesFile); 201 | } 202 | 203 | /** 204 | * Reads a set of strings from a resource text file, nad makes it immutable. 205 | * Each line ends up being one string. 206 | * If any string appears two times, an {@link IllegalArgumentException} is thrown. 207 | */ 208 | public static Set immutableSetFromResource(final String resourceFile) { 209 | 210 | final List list = listFromResource(resourceFile); 211 | final HashSet set = new HashSet<>(list); 212 | if (set.size() != list.size()) { 213 | throw new IllegalArgumentException("Resource-file contains duplicate lines: " + resourceFile); 214 | } 215 | return Collections.unmodifiableSet(set); 216 | } 217 | 218 | /** 219 | * Reads a list of strings from a resource text file. 220 | * Each line ends up being one string. 221 | */ 222 | public static List listFromResource(final String resourceFile) { 223 | 224 | try (final BufferedReader reader = new BufferedReader(new InputStreamReader( 225 | Utils.class.getResourceAsStream(resourceFile)))) 226 | { 227 | return reader.lines().collect(Collectors.toList()); 228 | } catch (IOException exc) { 229 | throw new IllegalArgumentException("Failed to read list from resource file: " + resourceFile, exc); 230 | } 231 | } 232 | 233 | public static Set collectFileOrDefaults(final String localResourceFilePath, final Set defaults) { 234 | 235 | Set suffixes; 236 | Path suffixesFile = null; 237 | try { 238 | suffixesFile = sourceDir().resolve(localResourceFilePath); 239 | suffixes = collectFileNameMatchers(suffixesFile); 240 | if (LOGGER.isLoggable(Level.INFO)) { 241 | LOGGER.info(String.format("Read suffixes from file \"%s\".", suffixesFile)); 242 | } 243 | } catch (IOException exc) { 244 | if (LOGGER.isLoggable(Level.FINER)) { 245 | LOGGER.finer(String.format("Did not read suffixes from file \"%s\".", suffixesFile)); 246 | } 247 | suffixes = defaults; 248 | } catch (URISyntaxException exc) { 249 | if (LOGGER.isLoggable(Level.SEVERE)) { 250 | LOGGER.log(Level.SEVERE, String.format("Failed collecting suffixes for \"%s\"", 251 | localResourceFilePath), exc); 252 | } 253 | suffixes = null; 254 | System.exit(1); 255 | } 256 | 257 | return suffixes; 258 | } 259 | 260 | public static Set collectFileNameMatchers(final Path resourceFile) throws IOException { 261 | return new HashSet<>(readLines(resourceFile, true)); 262 | } 263 | 264 | /** 265 | * Checks whether a file denotes an XML based file format. 266 | * 267 | * @param fileName to be checked for known XML file extensions 268 | * (for example ".xml" and ".svg") 269 | * @param contentBytes length of the content in bytes 270 | * @param contentIn to be checked for magic file header 271 | * @param magicHeader the magic file header to look for 272 | * @param suffixes the file suffixes to look for 273 | * @param mimeType which MIME-type to accept as matching 274 | * @return whether the supplied file type is XML based 275 | * @throws IOException If something went wrong while trying to read the magic file header 276 | */ 277 | public static boolean isType(final String fileName, final long contentBytes, final BufferedOutputStream contentIn, 278 | final String magicHeader, final Set suffixes, final String mimeType) throws IOException 279 | { 280 | boolean matches = false; 281 | 282 | final String fileNameLower = fileName.toLowerCase(); 283 | if (fileNameLower.contains(".")) { 284 | final String suffix = fileNameLower.substring(fileNameLower.lastIndexOf('.') + 1); 285 | matches = suffixes.contains(suffix); 286 | } 287 | if (!matches && (magicHeader != null) 288 | && (contentBytes >= magicHeader.length()) 289 | && contentIn.startsWith(magicHeader.getBytes())) 290 | { 291 | matches = true; 292 | } 293 | if (!matches && mimeType != null && !mimeType.isEmpty()) { 294 | final String foundMimeType = guessContentTypeFromStream(contentIn.createInputStream(false)); 295 | if (mimeType.equals(foundMimeType)) { 296 | matches = true; 297 | } 298 | } 299 | 300 | return matches; 301 | } 302 | 303 | /** 304 | * Checks whether a file denotes an XML based file format. 305 | * 306 | * @param fileName to be checked for known XML file extensions 307 | * (for example ".xml" and ".svg") 308 | * @param contentBytes length of the content in bytes 309 | * @param contentIn to be checked for magic file header for XML: 310 | * {@literal "= 0; n = source.read(buffer)) { 349 | target.write(buffer, 0, n); 350 | } 351 | } 352 | 353 | /** 354 | * Tries to determine the type of an input stream based on the 355 | * characters at the beginning of the stream. 356 | * 357 | * Additional values herein over what is already supported by the JDK 358 | * are taken from @{link https://en.wikipedia.org/wiki/List_of_file_signatures}. 359 | * 360 | * @param is an input stream that supports marks. 361 | * @return a guess at the content type, or {@code null} if none 362 | * can be determined. 363 | * 364 | * @throws IOException if an I/O error occurs while reading the 365 | * input stream. 366 | * @see InputStream#mark(int) 367 | * @see InputStream#markSupported() 368 | * @see URLConnection#guessContentTypeFromStream(InputStream) 369 | */ 370 | public static String guessContentTypeFromStream(final InputStream is) throws IOException { 371 | 372 | String contentMimeType = URLConnection.guessContentTypeFromStream(is); 373 | if (contentMimeType == null && is.markSupported()) { 374 | // JDK does not know the mime type, but we might be able to figure it out ... 375 | 376 | // read the first 16 byte of the file (it might be a magic header) 377 | is.mark(16); 378 | final int c1 = is.read(); 379 | final int c2 = is.read(); 380 | final int c3 = is.read(); 381 | final int c4 = is.read(); 382 | is.reset(); 383 | 384 | if ((c1 == 0x50 && c2 == 0x4B) 385 | &&((c3 == 0x03 && c4 == 0x04) 386 | || (c3 == 0x05 && c4 == 0x06) 387 | || (c3 == 0x07 && c4 == 0x08))) 388 | { 389 | contentMimeType = "application/zip"; 390 | } 391 | } 392 | 393 | return contentMimeType; 394 | } 395 | 396 | /** 397 | * Checks whether a String consists of only white-space (or is empty). 398 | * 399 | * NOTE Starting from Java 11, one might use 400 | * isBlank() 401 | * instead. 402 | * 403 | * @param str to be checked for blankness 404 | * @return whether the supplied string is empty or consists of only white-space 405 | * @see the solution on StackOverflow 406 | */ 407 | public static boolean isBlank(final String str) { 408 | return str.chars().allMatch(Character::isWhitespace); 409 | } 410 | } 411 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | 6 | ${project.groupId} 7 | ${project.packageName}.ReZip 8 | 12 | UTF-8 13 | ${project.build.encoding} 14 | ${project.build.encoding} 15 | 8 16 | 17 | 18 | io.github.hoijui.rezipdoc 19 | rezipdoc 20 | 0.5-SNAPSHOT 21 | 22 | bundle 23 | 24 | ReZipDoc 25 | 26 | A Git filter and textconv for converting ZIP based binary files 27 | to an uncompressed version of themselves, 28 | which works better with gits delta-compression and diffs 29 | 30 | https://github.com/hoijui/ReZipDoc/ 31 | 2019 32 | 33 | 34 | scm:git:git@github.com:hoijui/ReZipDoc.git 35 | scm:git:git@github.com:hoijui/ReZipDoc.git 36 | https://github.com/hoijui/ReZipDoc 37 | HEAD 38 | 39 | 40 | 41 | 42 | GNU General Public License (GPL), version 3 or later 43 | http://www.gnu.org/licenses/gpl-3.0.html 44 | repo 45 | 46 | 47 | 48 | 49 | GitHub 50 | https://github.com/hoijui/ReZipDoc/issues 51 | 52 | 53 | 54 | 55 | Robin Vobruba 56 | hoijui.quaero@gmail.com 57 | 58 | developer 59 | 60 | 61 | 62 | 63 | 64 | 65 | junit 66 | junit 67 | 4.13.1 68 | test 69 | 70 | 71 | com.github.stefanbirkner 72 | system-rules 73 | 1.19.0 74 | test 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | org.apache.maven.plugins 84 | maven-checkstyle-plugin 85 | 3.0.0 86 | 87 | ${project.build.sourceEncoding} 88 | true 89 | src/main/resources/checkstyle.xml 90 | 91 | 92 | 93 | 94 | org.apache.maven.plugins 95 | maven-pmd-plugin 96 | 3.11.0 97 | 98 | true 99 | true 100 | ${project.build.sourceEncoding} 101 | 1.${java.version} 102 | 103 | 50 104 | 105 | src/main/resources/pmd.xml 106 | 107 | 108 | 109 | 110 | 111 | org.codehaus.mojo 112 | findbugs-maven-plugin 113 | 3.0.5 114 | 115 | 116 | 117 | org.apache.maven.plugins 118 | maven-surefire-report-plugin 119 | 3.0.0-M3 120 | 121 | 122 | 123 | org.codehaus.mojo 124 | jdepend-maven-plugin 125 | 2.0 126 | 127 | 128 | 129 | org.apache.maven.plugins 130 | maven-project-info-reports-plugin 131 | 3.0.0 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | org.apache.maven.plugins 141 | maven-assembly-plugin 142 | 3.1.1 143 | 144 | false 145 | 146 | 147 | 148 | 149 | org.apache.maven.plugins 150 | maven-checkstyle-plugin 151 | 3.0.0 152 | 153 | ${project.build.sourceEncoding} 154 | true 155 | src/main/resources/checkstyle.xml 156 | java.header.regex.template.file=src/main/resources/java_header_regex_template.txt 157 | 158 | 159 | 160 | 161 | org.apache.maven.plugins 162 | maven-compiler-plugin 163 | 3.8.0 164 | 165 | 1.${java.version} 166 | 1.${java.version} 167 | ${project.build.sourceEncoding} 168 | true 169 | -Xlint:unchecked 170 | 171 | 172 | 173 | 174 | org.apache.maven.plugins 175 | maven-resources-plugin 176 | 3.1.0 177 | 178 | ${project.build.resourceEncoding} 179 | 180 | 181 | 182 | 183 | org.apache.maven.plugins 184 | maven-javadoc-plugin 185 | 3.0.1 186 | 187 | ${project.build.sourceEncoding} 188 | ${project.build.sourceEncoding} 189 | 190 | 191 | 192 | 193 | org.apache.maven.plugins 194 | maven-deploy-plugin 195 | 3.0.0-M1 196 | 197 | 198 | 199 | org.apache.maven.plugins 200 | maven-source-plugin 201 | 3.0.1 202 | 203 | 204 | 205 | org.apache.maven.plugins 206 | maven-clean-plugin 207 | 3.1.0 208 | 209 | 210 | 211 | org.apache.maven.plugins 212 | maven-install-plugin 213 | 3.0.0-M1 214 | 215 | 216 | 217 | org.apache.maven.plugins 218 | maven-site-plugin 219 | 3.7.1 220 | 221 | 222 | 223 | org.apache.maven.plugins 224 | maven-release-plugin 225 | 2.5.3 226 | 227 | true 228 | false 229 | release 230 | deploy 231 | 235 | forked-path 236 | 237 | 238 | 239 | 240 | org.apache.maven.plugins 241 | maven-jar-plugin 242 | 3.1.1 243 | 244 | 245 | 246 | ${project.mainClass} 247 | 248 | 249 | 250 | 251 | 252 | 256 | false 257 | 258 | 259 | 260 | 261 | 262 | org.codehaus.mojo 263 | exec-maven-plugin 264 | 1.6.0 265 | 266 | 267 | 268 | java 269 | 270 | 271 | 272 | 273 | ${project.mainClass} 274 | 275 | 276 | 277 | 278 | org.apache.maven.plugins 279 | maven-surefire-plugin 280 | 3.0.0-M3 281 | 282 | 283 | 284 | org.apache.maven.plugins 285 | maven-enforcer-plugin 286 | 3.0.0-M2 287 | 288 | 289 | enforce-versions 290 | 291 | enforce 292 | 293 | 294 | 295 | 296 | 297 | WARN 298 | 299 | org.apache.maven.plugins:maven-verifier-plugin 300 | 301 | Please consider using the maven-invoker-plugin 302 | (http://maven.apache.org/plugins/maven-invoker-plugin/)! 303 | 304 | 305 | 306 | 3.3.9 307 | 308 | 309 | 1.${java.version} 310 | 311 | 312 | 313 | 314 | ${project.groupId} 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | org.apache.felix 325 | maven-bundle-plugin 326 | 2.5.4 327 | true 328 | 329 | 330 | 331 | ${project.packageName} 332 | 333 | 334 | 335 | 336 | 337 | 338 | org.apache.maven.plugins 339 | maven-pmd-plugin 340 | 3.11.0 341 | 342 | true 343 | true 344 | ${project.build.sourceEncoding} 345 | 1.${java.version} 346 | 347 | src/main/resources/pmd.xml 348 | 349 | 350 | 351 | 352 | 353 | org.codehaus.mojo 354 | findbugs-maven-plugin 355 | 3.0.5 356 | 357 | 358 | 359 | org.jacoco 360 | jacoco-maven-plugin 361 | 0.8.3 362 | 363 | true 364 | 365 | 366 | 367 | prepare-agent 368 | 369 | prepare-agent 370 | 371 | 372 | 373 | prepare-agent-integration 374 | 375 | prepare-agent-integration 376 | 377 | 378 | 379 | jacoco-site 380 | test 381 | 382 | report 383 | 384 | 385 | 386 | 387 | 388 | 389 | org.sonarsource.scanner.maven 390 | sonar-maven-plugin 391 | 3.6.0.1398 392 | 393 | 394 | verify 395 | 396 | sonar 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | org.apache.maven.plugins 407 | maven-enforcer-plugin 408 | 409 | 410 | org.apache.felix 411 | maven-bundle-plugin 412 | 413 | 414 | 415 | 416 | 417 | 418 | sonar 419 | 420 | https://sonarcloud.io 421 | hoijui-github 422 | 423 | 424 | 425 | 426 | org.jacoco 427 | jacoco-maven-plugin 428 | 429 | 430 | org.sonarsource.scanner.maven 431 | sonar-maven-plugin 432 | 433 | 434 | 435 | 436 | 437 | 438 | Extensive-Reports 439 | 445 | 446 | 447 | 448 | org.apache.maven.plugins 449 | maven-jxr-plugin 450 | 3.0.0 451 | 452 | ${project.build.sourceEncoding} 453 | 454 | 455 | 456 | 457 | org.apache.maven.plugins 458 | maven-javadoc-plugin 459 | 3.0.1 460 | 461 | ${project.build.sourceEncoding} 462 | ${project.build.sourceEncoding} 463 | 464 | 465 | 466 | 467 | org.apache.maven.plugins 468 | maven-changelog-plugin 469 | 2.3 470 | 471 | 472 | 473 | org.sonarsource.scanner.maven 474 | sonar-maven-plugin 475 | 3.6.0.1398 476 | 477 | 478 | 479 | 480 | 481 | 482 | release 483 | 487 | 488 | 489 | 490 | org.apache.maven.plugins 491 | maven-gpg-plugin 492 | 1.6 493 | 494 | 495 | sign-artifacts 496 | verify 497 | 498 | sign 499 | 500 | 501 | 507 | ${gpg.keyname} 508 | ${gpg.keyname} 509 | 510 | 511 | 512 | 513 | 514 | 515 | org.sonatype.plugins 516 | nexus-staging-maven-plugin 517 | 1.6.8 518 | true 519 | 520 | ossrh 521 | https://oss.sonatype.org/ 522 | true 523 | 524 | 525 | 526 | 527 | org.apache.maven.plugins 528 | maven-javadoc-plugin 529 | 3.0.1 530 | 531 | false 532 | ${project.build.sourceEncoding} 533 | ${project.build.sourceEncoding} 534 | 535 | 536 | 537 | attach-javadocs 538 | 539 | jar 540 | 541 | 542 | 543 | 544 | 545 | 546 | org.apache.maven.plugins 547 | maven-source-plugin 548 | 3.0.1 549 | 550 | 551 | attach-sources 552 | 553 | jar-no-fork 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | ossrh 566 | https://oss.sonatype.org/content/repositories/snapshots 567 | 568 | 569 | ossrh 570 | https://oss.sonatype.org/service/local/staging/deploy/maven2/ 571 | 572 | 573 | 574 | 575 | --------------------------------------------------------------------------------