├── .github └── workflows │ └── ruby.yml ├── .gitignore ├── Ascii85.gemspec ├── CHANGELOG.md ├── Gemfile ├── LICENSE ├── README.md ├── Rakefile ├── bin └── ascii85 ├── lib ├── Ascii85 │ └── version.rb └── ascii85.rb └── spec ├── bin └── cli_spec.rb └── lib └── ascii85_spec.rb /.github/workflows/ruby.yml: -------------------------------------------------------------------------------- 1 | name: Ruby 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | ruby-version: 17 | - '2.7' 18 | - '3.0' 19 | - '3.1' 20 | - '3.2' 21 | - '3.3' 22 | - '3.4' 23 | - 'head' 24 | - 'jruby' 25 | - 'truffleruby' 26 | 27 | steps: 28 | - uses: actions/checkout@v4 29 | - name: Set up Ruby 30 | uses: ruby/setup-ruby@v1 31 | with: 32 | ruby-version: ${{ matrix.ruby-version }} 33 | bundler-cache: true 34 | 35 | - name: Run tests 36 | run: bundle exec rake 37 | 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | .bundle 3 | .yardoc 4 | Gemfile.lock 5 | doc/ 6 | pkg/* 7 | -------------------------------------------------------------------------------- /Ascii85.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative 'lib/Ascii85/version' 4 | 5 | Gem::Specification.new do |s| 6 | s.name = 'Ascii85' 7 | s.version = Ascii85::VERSION 8 | s.platform = Gem::Platform::RUBY 9 | s.author = 'Johannes Holzfuß' 10 | s.email = 'johannes@holzfuss.name' 11 | s.license = 'MIT' 12 | s.homepage = 'https://github.com/DataWraith/ascii85gem/' 13 | s.summary = 'Ascii85 encoder/decoder' 14 | s.description = "Ascii85 provides methods to encode/decode Adobe's binary-to-text encoding of the same name." 15 | 16 | s.required_ruby_version = '>= 2.7.0' 17 | 18 | s.add_development_dependency 'minitest', '~> 5', '>= 5.12.0' 19 | s.add_development_dependency 'rake', '~> 13' 20 | 21 | s.files = `git ls-files`.split("\n") - ['.gitignore', '.github/workflows/ruby.yml'] 22 | s.test_files = `git ls-files -- spec/*`.split("\n") 23 | s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) } 24 | s.require_paths = ['lib'] 25 | s.extra_rdoc_files = ['README.md', 'LICENSE'] 26 | end 27 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Ascii85 Changelog 2 | 3 | ## [2.0.1] - 2024-09-15 4 | 5 | ### Fixed 6 | 7 | - Decoding binary data could lead to Encoding errors (Issue #8) 8 | 9 | ## [2.0.0] - 2024-08-20 10 | 11 | ### BREAKING CHANGES 12 | 13 | - The minimum required Ruby version has been raised to 2.7.0. 14 | 15 | ### Added 16 | 17 | - `Ascii85.decode_raw` method that doesn't expect the input to be wrapped in `<~` and `~>` delimiters. 18 | - `Ascii85.extract` method to extract encoded text from between `<~` and `~>` for feeding to `#decode_raw`. 19 | - Option to pass an IO-object as input to `#encode` and `#decode_raw` instead of a String. 20 | - Option to pass an IO-object to `#encode` and `#decode_raw` for output. Output is written to the object instead of being returned as a String. 21 | - Streaming capability for `#encode` and `#decode_raw` when both input and output are IO objects, using constant memory. 22 | 23 | ## [1.1.1] - 2024-05-09 24 | 25 | ### Fixed 26 | 27 | - Make `bin/ascii85` Ruby 3.2-compatible (thanks @tylerwillingham) 28 | - Slightly improved error handling of `bin/ascii85` 29 | 30 | ## [1.1.0] - 2020-11-11 31 | 32 | ### Added 33 | 34 | - Make use of frozen_string_literal (thanks @aliismayilov) 35 | 36 | ### Changed 37 | 38 | - Updated tests to use newer minitest syntax 39 | 40 | ## [1.0.3] - 2018-01-25 41 | 42 | ### Changed 43 | 44 | - Updated the gem's metadata 45 | 46 | ## [1.0.2] - 2012-09-16 47 | 48 | ### Changed 49 | 50 | - Changed test runner from RSpec to MiniSpec 51 | - Added support for rubygems-test 52 | - Minor changes to make packaging easier 53 | 54 | ## [1.0.1] - 2011-05-05 55 | 56 | ### Changed 57 | 58 | - Removed `hoe` dependency in favor of `bundler` 59 | - Minor corrections in the documentation 60 | 61 | ## [1.0.0] - 2009-12-25 62 | 63 | ### Added 64 | 65 | - Ruby 1.9 compatibility 66 | - Command-line en- and decoder 67 | 68 | ## [0.9.0] - 2009-02-17 69 | 70 | - Initial release 71 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'http://rubygems.org' 4 | 5 | # Specify your gem's dependencies in Ascii85.gemspec 6 | gemspec 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 Johannes Holzfuß 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Status**: This project is feature-complete. With the exception of fixes to reported bugs, no further development will take place. 2 | 3 | # Ascii85 4 | 5 | ## Description 6 | 7 | Ascii85 is a Ruby gem that provides methods for encoding/decoding Adobe's 8 | binary-to-text encoding of the same name. 9 | 10 | See the Adobe PostScript Language Reference ([archived version][PLRM]) page 131 11 | and [Wikipedia](https://en.wikipedia.org/wiki/Ascii85) for more information 12 | about the format. 13 | 14 | [PLRM]: https://web.archive.org/web/20161222092741/https://www.adobe.com/products/postscript/pdfs/PLRM.pdf 15 | 16 | 17 | ## Installation 18 | 19 | `$ gem install Ascii85` 20 | 21 | > [!IMPORTANT] 22 | > Note that the gem name is capitalized. 23 | 24 | 25 | ## Usage 26 | 27 | ```ruby 28 | require 'ascii85' 29 | 30 | Ascii85.encode("Ruby") 31 | => "<~;KZGo~>" 32 | 33 | Ascii85.decode("<~;KZGo~>") 34 | => "Ruby" 35 | 36 | Ascii85.extract("Foo<~;KZGo~>Bar") 37 | => ";KZGo" 38 | 39 | Ascii85.decode_raw(";KZGo") 40 | => "Ruby" 41 | ``` 42 | 43 | In addition, `Ascii85.encode` can take a second parameter that specifies the 44 | length of the returned lines. The default is 80; use `false` for unlimited. 45 | 46 | `Ascii85.decode` expects the input to be enclosed in `<~` and `~>` — it 47 | ignores everything outside of these, while `Ascii85.decode_raw` assumes that 48 | the entire String passed in is encoded in Ascii85. If you need to, you can use 49 | `Ascii85.extract` to find and extract the first substring of the input that is 50 | enclosed by the `<~` and `~>` delimiters. 51 | 52 | The output of `Ascii85.decode` and `Ascii85.decode_raw` will be a String that 53 | has the `ASCII-8BIT` encoding, so you may have to use `String#force_encoding` to 54 | convert it to the desired encoding. 55 | 56 | For further options, see the [Documentation](https://www.rubydoc.info/gems/Ascii85/). 57 | 58 | 59 | ## Command-line utility 60 | 61 | This gem includes `ascii85`, a command-line utility modeled after `base64` from 62 | the GNU coreutils. It can be used to encode/decode Ascii85 directly from the 63 | command-line: 64 | 65 | ``` 66 | Usage: ascii85 [OPTIONS] [FILE] 67 | Encodes or decodes FILE or STDIN using Ascii85 and writes to STDOUT. 68 | -w, --wrap COLUMN Wrap lines at COLUMN. Default is 80, use 0 for no wrapping 69 | -d, --decode Decode the input 70 | -h, --help Display this help and exit 71 | -V, --version Output version information 72 | ``` 73 | 74 | 75 | ## License 76 | 77 | Ascii85 is distributed under the MIT License. See the accompanying LICENSE file 78 | for details. 79 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'bundler' 4 | Bundler::GemHelper.install_tasks 5 | 6 | require 'rake/testtask' 7 | 8 | Rake::TestTask.new do |t| 9 | t.test_files = FileList['spec/**/*_spec.rb'] 10 | end 11 | 12 | task specs: :test 13 | task tests: :test 14 | task default: :test 15 | -------------------------------------------------------------------------------- /bin/ascii85: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | # 5 | # A simple command-line tool to de- and encode Ascii85, modeled after `base64` 6 | # from the GNU Coreutils. 7 | # 8 | 9 | require 'optparse' 10 | require File.join(File.dirname(__FILE__), '..', 'lib', 'ascii85') 11 | require File.join(File.dirname(__FILE__), '..', 'lib', 'Ascii85', 'version') 12 | 13 | class CLI 14 | attr_reader :options 15 | 16 | def initialize(argv, stdin: $stdin, stdout: $stdout) 17 | @in = stdin 18 | @out = stdout 19 | 20 | @options = { 21 | wrap: 80, 22 | action: :encode 23 | } 24 | 25 | parse_options(argv) 26 | end 27 | 28 | def parse_options(argv) 29 | @parser = OptionParser.new do |opts| 30 | opts.banner = "Usage: #{File.basename($PROGRAM_NAME)} [OPTIONS] [FILE]\n" \ 31 | 'Encodes or decodes FILE or STDIN using Ascii85 and writes to STDOUT.' 32 | 33 | opts.on('-w', '--wrap COLUMN', Integer, 34 | 'Wrap lines at COLUMN. Default is 80, use 0 for no wrapping') do |opt| 35 | @options[:wrap] = opt.abs 36 | @options[:wrap] = false if opt.zero? 37 | end 38 | 39 | opts.on('-d', '--decode', 'Decode the input') do 40 | @options[:action] = :decode 41 | end 42 | 43 | opts.on('-h', '--help', 'Display this help and exit') do 44 | @options[:action] = :help 45 | end 46 | 47 | opts.on('-V', '--version', 'Output version information') do |_opt| 48 | @options[:action] = :version 49 | end 50 | 51 | end 52 | 53 | remaining_args = @parser.parse!(argv) 54 | 55 | case remaining_args.size 56 | when 0 57 | @options[:file] = '-' 58 | when 1 59 | @options[:file] = remaining_args.first 60 | else 61 | raise(OptionParser::ParseError, "Superfluous operand(s): \"#{remaining_args[1..].join('", "')}\"") 62 | end 63 | end 64 | 65 | def input 66 | fn = @options[:file] 67 | 68 | return @in.binmode if fn == '-' 69 | 70 | raise(StandardError, "File not found: \"#{fn}\"") unless File.exist?(fn) 71 | raise(StandardError, "File is not readable: \"#{fn}\"") unless File.readable_real?(fn) 72 | 73 | File.new(fn, 'rb') 74 | end 75 | 76 | def decode 77 | Ascii85.decode(input.read, out: @out) 78 | end 79 | 80 | def encode 81 | Ascii85.encode(input, @options[:wrap], out: @out) 82 | end 83 | 84 | def version 85 | "Ascii85 v#{Ascii85::VERSION},\nwritten by Johannes Holzfuß" 86 | end 87 | 88 | def help 89 | @parser 90 | end 91 | 92 | def call 93 | case @options[:action] 94 | when :help then @out.puts help 95 | when :version then @out.puts version 96 | when :encode then encode 97 | when :decode then decode 98 | end 99 | end 100 | end 101 | 102 | if File.basename($PROGRAM_NAME) == "ascii85" 103 | begin 104 | CLI.new(ARGV).call 105 | rescue OptionParser::ParseError => e 106 | abort e.message 107 | rescue Ascii85::DecodingError => e 108 | abort "Decoding Error: #{e.message}" 109 | rescue StandardError => e 110 | abort "Error: #{e.message}" 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /lib/Ascii85/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Ascii85 4 | VERSION = '2.0.1' 5 | end 6 | -------------------------------------------------------------------------------- /lib/ascii85.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'stringio' 4 | 5 | # 6 | # Ascii85 is an implementation of Adobe's binary-to-text encoding of the 7 | # same name in pure Ruby. 8 | # 9 | # See http://en.wikipedia.org/wiki/Ascii85 for more information about the 10 | # format. 11 | # 12 | # Author:: Johannes Holzfuß (johannes@holzfuss.name) 13 | # License:: Distributed under the MIT License (see LICENSE file) 14 | # 15 | module Ascii85 16 | class << self 17 | EMPTY_STRING = ''.dup.force_encoding(Encoding::ASCII_8BIT) 18 | START_MARKER = '<~'.dup.force_encoding(Encoding::ASCII_8BIT) 19 | ENDING_MARKER = '~>'.dup.force_encoding(Encoding::ASCII_8BIT) 20 | LINE_BREAK = "\n".dup.force_encoding(Encoding::ASCII_8BIT) 21 | 22 | # 23 | # Encodes the bytes of the given String or IO-like object as Ascii85. 24 | # 25 | # @param str_or_io [String, IO] The input to encode 26 | # @param wrap_lines [Integer, false] The line length for wrapping, or +false+ for no wrapping 27 | # @param out [IO, nil] An optional IO-like object to write the output to 28 | # 29 | # @return [String, IO] The encoded String or the output IO object that was passed in 30 | # 31 | # @example Encoding a simple String 32 | # Ascii85.encode("Ruby") 33 | # # => <~;KZGo~> 34 | # 35 | # @example Encoding with line wrapping 36 | # Ascii85.encode("Supercalifragilisticexpialidocious", 15) 37 | # # => <~;g!%jEarNoBkD 38 | # # BoB5)0rF*),+AU& 39 | # # 0.@;KXgDe!L"F`R 40 | # # ~> 41 | # 42 | # @example Encoding without line wrapping 43 | # Ascii85.encode("Supercalifragilisticexpialidocious", false) 44 | # # => <~;g!%jEarNoBkDBoB5)0rF*),+AU&0.@;KXgDe!L"F`R~> 45 | # 46 | # @example Encoding from an IO-like object 47 | # input = StringIO.new("Ruby") 48 | # Ascii85.encode(input) 49 | # # => "<~;KZGo~>" 50 | # 51 | # @example Encoding to an IO object 52 | # output = StringIO.new 53 | # Ascii85.encode("Ruby", out: output) 54 | # # => output (with "<~;KZGo~>" written to it) 55 | # 56 | def encode(str_or_io, wrap_lines = 80, out: nil) 57 | reader = if io_like?(str_or_io) 58 | str_or_io 59 | else 60 | StringIO.new(str_or_io.to_s, 'rb') 61 | end 62 | 63 | return EMPTY_STRING.dup if reader.eof? 64 | 65 | # Setup buffered Reader and Writers 66 | bufreader = BufferedReader.new(reader, unencoded_chunk_size) 67 | bufwriter = BufferedWriter.new(out || StringIO.new(String.new, 'wb'), encoded_chunk_size) 68 | writer = wrap_lines ? Wrapper.new(bufwriter, wrap_lines) : DummyWrapper.new(bufwriter) 69 | 70 | padding = unfrozen_binary_copy("\0\0\0\0") 71 | tuplebuf = unfrozen_binary_copy('!!!!!') 72 | exclamations = unfrozen_binary_copy('!!!!!') 73 | z = unfrozen_binary_copy('z') 74 | 75 | bufreader.each_chunk do |chunk| 76 | chunk.unpack('N*').each do |word| 77 | # Encode each big-endian 32-bit word into a 5-character tuple (except 78 | # for 0, which encodes to 'z') 79 | if word.zero? 80 | writer.write(z) 81 | else 82 | word, b0 = word.divmod(85) 83 | word, b1 = word.divmod(85) 84 | word, b2 = word.divmod(85) 85 | word, b3 = word.divmod(85) 86 | b4 = word 87 | 88 | tuplebuf.setbyte(0, b4 + 33) 89 | tuplebuf.setbyte(1, b3 + 33) 90 | tuplebuf.setbyte(2, b2 + 33) 91 | tuplebuf.setbyte(3, b1 + 33) 92 | tuplebuf.setbyte(4, b0 + 33) 93 | 94 | writer.write(tuplebuf) 95 | end 96 | end 97 | 98 | next if (chunk.bytesize & 0b11).zero? 99 | 100 | # If we have leftover bytes, we need to zero-pad to a multiple of four 101 | # before converting to a 32-bit word. 102 | padding_length = (-chunk.bytesize) % 4 103 | trailing = chunk[-(4 - padding_length)..] 104 | word = (trailing + padding[0...padding_length]).unpack1('N') 105 | 106 | # Encode the last word and cut off any padding 107 | if word.zero? 108 | writer.write(exclamations[0..(4 - padding_length)]) 109 | else 110 | word, b0 = word.divmod(85) 111 | word, b1 = word.divmod(85) 112 | word, b2 = word.divmod(85) 113 | word, b3 = word.divmod(85) 114 | b4 = word 115 | 116 | tuplebuf.setbyte(0, b4 + 33) 117 | tuplebuf.setbyte(1, b3 + 33) 118 | tuplebuf.setbyte(2, b2 + 33) 119 | tuplebuf.setbyte(3, b1 + 33) 120 | tuplebuf.setbyte(4, b0 + 33) 121 | 122 | writer.write(tuplebuf[0..(4 - padding_length)]) 123 | end 124 | end 125 | 126 | # If no output IO-object was provided, extract the encoded String from the 127 | # default StringIO writer. We force the encoding to 'ASCII-8BIT' to work 128 | # around a TruffleRuby bug. 129 | return writer.finish.io.string.force_encoding(Encoding::ASCII_8BIT) if out.nil? 130 | 131 | # Otherwise we make sure to flush the output writer, and then return it. 132 | writer.finish.io 133 | end 134 | 135 | # Searches through a String and extracts the first substring enclosed by '<~' and '~>'. 136 | # 137 | # @param str [String] The String to search through 138 | # 139 | # @return [String] The extracted substring, or an empty String if no valid delimiters are found 140 | # 141 | # @example Extracting Ascii85 content 142 | # Ascii85.extract("Foo<~;KZGo~>Bar<~z~>Baz") 143 | # # => ";KZGo" 144 | # 145 | # @example When no delimiters are found 146 | # Ascii85.extract("No delimiters") 147 | # # => "" 148 | # 149 | # @note This method only accepts a String, not an IO-like object, as the entire input 150 | # needs to be available to ensure validity. 151 | # 152 | def extract(str) 153 | input = str.to_s 154 | 155 | # Make sure the delimiter Strings have the correct encoding. 156 | opening_delim = '<~'.encode(input.encoding) 157 | closing_delim = '~>'.encode(input.encoding) 158 | 159 | # Get the positions of the opening/closing delimiters. If there is no pair 160 | # of opening/closing delimiters, return an unfrozen empty String. 161 | (start_pos = input.index(opening_delim)) or return EMPTY_STRING.dup 162 | (end_pos = input.index(closing_delim, start_pos + 2)) or return EMPTY_STRING.dup 163 | 164 | # Get the String inside the delimiter-pair 165 | input[(start_pos + 2)...end_pos] 166 | end 167 | 168 | # 169 | # Searches through a String and decodes the first substring enclosed by '<~' and '~>'. 170 | # 171 | # @param str [String] The String containing Ascii85-encoded content 172 | # @param out [IO, nil] An optional IO-like object to write the output to 173 | # 174 | # @return [String, IO] The decoded String (in ASCII-8BIT encoding) or the output IO object (if it was provided) 175 | # 176 | # @raise [Ascii85::DecodingError] When malformed input is encountered 177 | # 178 | # @example Decoding Ascii85 content 179 | # Ascii85.decode("<~;KZGo~>") 180 | # # => "Ruby" 181 | # 182 | # @example Decoding with multiple Ascii85 blocks present (ignores all but the first) 183 | # Ascii85.decode("Foo<~;KZGo~>Bar<~87cURDZ~>Baz") 184 | # # => "Ruby" 185 | # 186 | # @example When no delimiters are found 187 | # Ascii85.decode("No delimiters") 188 | # # => "" 189 | # 190 | # @example Decoding to an IO object 191 | # output = StringIO.new 192 | # Ascii85.decode("<~;KZGo~>", out: output) 193 | # # => output (with "Ruby" written to it) 194 | # 195 | # @note This method only accepts a String, not an IO-like object, as the entire input 196 | # needs to be available to ensure validity. 197 | # 198 | def decode(str, out: nil) 199 | decode_raw(extract(str), out: out) 200 | end 201 | 202 | # 203 | # Decodes the given raw Ascii85-encoded String or IO-like object. 204 | # 205 | # @param str_or_io [String, IO] The Ascii85-encoded input to decode 206 | # @param out [IO, nil] An optional IO-like object to write the output to 207 | # 208 | # @return [String, IO] The decoded String (in ASCII-8BIT encoding) or the output IO object (if it was provided) 209 | # 210 | # @raise [Ascii85::DecodingError] When malformed input is encountered 211 | # 212 | # @example Decoding a raw Ascii85 String 213 | # Ascii85.decode_raw(";KZGo") 214 | # # => "Ruby" 215 | # 216 | # @example Decoding from an IO-like object 217 | # input = StringIO.new(";KZGo") 218 | # Ascii85.decode_raw(input) 219 | # # => "Ruby" 220 | # 221 | # @example Decoding to an IO object 222 | # output = StringIO.new 223 | # Ascii85.decode_raw(";KZGo", out: output) 224 | # # => output (with "Ruby" written to it) 225 | # 226 | # @note The input must not be enclosed in '<~' and '~>' delimiters. 227 | # 228 | def decode_raw(str_or_io, out: nil) 229 | reader = if io_like?(str_or_io) 230 | str_or_io 231 | else 232 | StringIO.new(str_or_io.to_s, 'rb') 233 | end 234 | 235 | # Return an unfrozen String on empty input 236 | return EMPTY_STRING.dup if reader.eof? 237 | 238 | # Setup buffered Reader and Writers 239 | bufreader = BufferedReader.new(reader, encoded_chunk_size) 240 | bufwriter = BufferedWriter.new(out || StringIO.new(String.new, 'wb'), unencoded_chunk_size) 241 | 242 | # Populate the lookup table (caches the exponentiation) 243 | lut = (0..4).map { |count| 85**(4 - count) } 244 | 245 | # Decode 246 | word = 0 247 | count = 0 248 | zeroes = unfrozen_binary_copy("\0\0\0\0") 249 | wordbuf = zeroes.dup 250 | 251 | bufreader.each_chunk do |chunk| 252 | chunk.each_byte do |c| 253 | case c.chr 254 | when ' ', "\t", "\r", "\n", "\f", "\0" 255 | # Ignore whitespace 256 | next 257 | 258 | when 'z' 259 | raise(Ascii85::DecodingError, "Found 'z' inside Ascii85 5-tuple") unless count.zero? 260 | 261 | # Expand z to 0-word 262 | bufwriter.write(zeroes) 263 | 264 | when '!'..'u' 265 | # Decode 5 characters into a 4-byte word 266 | word += (c - 33) * lut[count] 267 | count += 1 268 | 269 | if count == 5 && word > 0xffffffff 270 | raise(Ascii85::DecodingError, "Invalid Ascii85 5-tuple (#{word} >= 2**32)") 271 | elsif count == 5 272 | b3 = word & 0xff; word >>= 8 273 | b2 = word & 0xff; word >>= 8 274 | b1 = word & 0xff; word >>= 8 275 | b0 = word 276 | 277 | wordbuf.setbyte(0, b0) 278 | wordbuf.setbyte(1, b1) 279 | wordbuf.setbyte(2, b2) 280 | wordbuf.setbyte(3, b3) 281 | 282 | bufwriter.write(wordbuf) 283 | 284 | word = 0 285 | count = 0 286 | end 287 | 288 | else 289 | raise(Ascii85::DecodingError, "Illegal character inside Ascii85: #{c.chr.dump}") 290 | end 291 | end 292 | end 293 | 294 | # We're done if all 5-tuples have been consumed 295 | if count.zero? 296 | bufwriter.flush 297 | return out || bufwriter.io.string.force_encoding(Encoding::ASCII_8BIT) 298 | end 299 | 300 | raise(Ascii85::DecodingError, 'Last 5-tuple consists of single character') if count == 1 301 | 302 | # Finish last, partially decoded 32-bit word 303 | count -= 1 304 | word += lut[count] 305 | 306 | bufwriter.write((word >> 24).chr) if count >= 1 307 | bufwriter.write(((word >> 16) & 0xff).chr) if count >= 2 308 | bufwriter.write(((word >> 8) & 0xff).chr) if count == 3 309 | bufwriter.flush 310 | 311 | out || bufwriter.io.string.force_encoding(Encoding::ASCII_8BIT) 312 | end 313 | 314 | private 315 | 316 | # Copies the given String and forces the encoding of the returned copy to 317 | # be Encoding::ASCII_8BIT. 318 | def unfrozen_binary_copy(str) 319 | str.dup.force_encoding(Encoding::ASCII_8BIT) 320 | end 321 | 322 | # Buffers an underlying IO object to increase efficiency. You do not need 323 | # to use this directly. 324 | # 325 | # @private 326 | # 327 | class BufferedReader 328 | def initialize(io, buffer_size) 329 | @io = io 330 | @buffer_size = buffer_size 331 | end 332 | 333 | def each_chunk 334 | return enum_for(:each_chunk) unless block_given? 335 | 336 | until @io.eof? 337 | chunk = @io.read(@buffer_size) 338 | yield chunk if chunk 339 | end 340 | end 341 | end 342 | 343 | # Buffers an underlying IO object to increase efficiency. You do not need 344 | # to use this directly. 345 | # 346 | # @private 347 | # 348 | class BufferedWriter 349 | attr_accessor :io 350 | 351 | def initialize(io, buffer_size) 352 | @io = io 353 | @buffer_size = buffer_size 354 | @buffer = String.new(capacity: buffer_size, encoding: Encoding::ASCII_8BIT) 355 | end 356 | 357 | def write(tuple) 358 | flush if @buffer.bytesize + tuple.bytesize > @buffer_size 359 | @buffer << tuple 360 | end 361 | 362 | def flush 363 | @io.write(@buffer) 364 | @buffer.clear 365 | end 366 | end 367 | 368 | # Wraps the input in '<~' and '~>' delimiters and passes it through 369 | # unmodified to the underlying IO object otherwise. You do not need to 370 | # use this directly. 371 | # 372 | # @private 373 | # 374 | class DummyWrapper 375 | def initialize(out) 376 | @out = out 377 | @out.write(START_MARKER) 378 | end 379 | 380 | def write(buffer) 381 | @out.write(buffer) 382 | end 383 | 384 | def finish 385 | @out.write(ENDING_MARKER) 386 | @out.flush 387 | 388 | @out 389 | end 390 | end 391 | 392 | # Wraps the input in '<~' and '~>' delimiters and ensures that no line is 393 | # longer than the specified length. You do not need to use this directly. 394 | # 395 | # @private 396 | # 397 | class Wrapper 398 | def initialize(out, wrap_lines) 399 | @line_length = [2, wrap_lines.to_i].max 400 | 401 | @out = out 402 | @out.write(START_MARKER) 403 | 404 | @cur_len = 2 405 | end 406 | 407 | def write(buffer) 408 | loop do 409 | s = buffer.bytesize 410 | 411 | if @cur_len + s < @line_length 412 | @out.write(buffer) 413 | @cur_len += s 414 | return 415 | end 416 | 417 | remaining = @line_length - @cur_len 418 | @out.write(buffer[0...remaining]) 419 | @out.write(LINE_BREAK) 420 | @cur_len = 0 421 | buffer = buffer[remaining..] 422 | return if buffer.empty? 423 | end 424 | end 425 | 426 | def finish 427 | # Add the closing delimiter (may need to be pushed to the next line) 428 | @out.write(LINE_BREAK) if @cur_len + 2 > @line_length 429 | @out.write(ENDING_MARKER) 430 | 431 | @out.flush 432 | @out 433 | end 434 | end 435 | 436 | # Check if an object is IO-like 437 | # 438 | # @private 439 | # 440 | def io_like?(obj) 441 | obj.respond_to?(:read) && 442 | obj.respond_to?(:eof?) 443 | end 444 | 445 | # @return [Integer] Buffer size for to-be-encoded input 446 | # 447 | def unencoded_chunk_size 448 | 4 * 2048 449 | end 450 | 451 | # @return [Integer] Buffer size for encoded output 452 | # 453 | def encoded_chunk_size 454 | 5 * 2048 455 | end 456 | end 457 | 458 | # 459 | # Error raised when Ascii85 encounters problems while decoding the input. 460 | # 461 | # This error is raised for the following issues: 462 | # * An invalid character (valid characters are '!'..'u' and 'z') 463 | # * A 'z' character inside a 5-tuple ('z' is only valid on its own) 464 | # * An invalid 5-tuple that decodes to >= 2**32 465 | # * The last tuple consisting of a single character. Valid tuples always have 466 | # at least two characters. 467 | # 468 | class DecodingError < StandardError; end 469 | end 470 | -------------------------------------------------------------------------------- /spec/bin/cli_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'stringio' 4 | require 'tempfile' 5 | 6 | require 'minitest/autorun' 7 | 8 | # We can't require the executable file because it doesn't 9 | # have the '.rb' extension, so we have to load it. 10 | unless defined?(CLI) 11 | load File.join(__dir__, '..','..', 'bin', 'ascii85') 12 | end 13 | 14 | describe 'CLI' do 15 | it 'should recognize the -h and --help options' do 16 | [%w[-h], %w[--help]].each do |args| 17 | cli = CLI.new(args) 18 | assert_equal :help, cli.options[:action] 19 | end 20 | end 21 | 22 | it 'should recognize the -V and --version options' do 23 | [%w[-V], %w[--version]].each do |args| 24 | cli = CLI.new(args) 25 | assert_equal :version, cli.options[:action] 26 | end 27 | end 28 | 29 | it 'should complain about superfluous arguments' do 30 | assert_raises(OptionParser::ParseError) do 31 | CLI.new(%w[foo bar]) 32 | end 33 | end 34 | 35 | describe 'wrap' do 36 | it 'should default to wrapping at 80 characters' do 37 | cli = CLI.new([]) 38 | assert_equal 80, cli.options[:wrap] 39 | end 40 | 41 | it 'should recognize the -w and --wrap options' do 42 | [%w[-w 17], %w[--wrap 17]].each do |args| 43 | cli = CLI.new(args) 44 | assert_equal 17, cli.options[:wrap] 45 | end 46 | end 47 | 48 | it 'should recognize the no-wrapping setting' do 49 | cli = CLI.new(%w[-w 0]) 50 | assert_equal false, cli.options[:wrap] 51 | end 52 | 53 | it 'should raise an error if the wrap option is not an integer' do 54 | assert_raises(OptionParser::ParseError) do 55 | CLI.new(%w[-w foo]) 56 | end 57 | end 58 | end 59 | 60 | describe 'encoding' do 61 | it 'should encode from STDIN' do 62 | stdin = StringIO.new('Ruby') 63 | stdout = StringIO.new 64 | 65 | CLI.new([], stdin: stdin, stdout: stdout).call 66 | 67 | assert_equal '<~;KZGo~>', stdout.string 68 | end 69 | 70 | it 'should accept "-" as a file name' do 71 | stdin = StringIO.new('Ruby') 72 | stdout = StringIO.new 73 | 74 | CLI.new(['-'], stdin: stdin, stdout: stdout).call 75 | 76 | assert_equal '<~;KZGo~>', stdout.string 77 | end 78 | 79 | it 'should encode a file' do 80 | begin 81 | f = Tempfile.create('ascii85_encode') 82 | f.write('Ruby') 83 | f.close 84 | 85 | stdout = StringIO.new 86 | CLI.new([f.path], stdout: stdout).call 87 | 88 | assert_equal '<~;KZGo~>', stdout.string 89 | ensure 90 | File.unlink(f.path) 91 | end 92 | end 93 | 94 | it 'should wrap lines' do 95 | begin 96 | f = Tempfile.create('ascii85_wrap') 97 | f.write('a' * 20) 98 | f.close 99 | 100 | stdout = StringIO.new 101 | CLI.new([f.path, '-w2'], stdout: stdout).call 102 | 103 | assert stdout.string.lines.all? { |l| l.chomp.length <= 2 } 104 | ensure 105 | File.unlink(f.path) 106 | end 107 | end 108 | 109 | it 'should fail when the input file is not found' do 110 | assert_raises(StandardError) do 111 | CLI.new(['./foo/bar/baz']).call 112 | end 113 | end 114 | 115 | it 'should fail when the input file is not readable' do 116 | begin 117 | f = Tempfile.create('ascii85_encode') 118 | f.chmod(0o000) 119 | 120 | assert_raises(StandardError) do 121 | CLI.new([f.path]).call 122 | end 123 | ensure 124 | File.unlink(f.path) 125 | end 126 | end 127 | end 128 | 129 | describe 'decoding' do 130 | it 'should decode from STDIN' do 131 | stdin = StringIO.new('<~;KZGo~>') 132 | stdout = StringIO.new 133 | 134 | CLI.new(['-d'], stdin: stdin, stdout: stdout).call 135 | 136 | assert_equal 'Ruby', stdout.string 137 | end 138 | 139 | it 'should accept "-" as a file name' do 140 | stdin = StringIO.new('<~;KZGo~>') 141 | stdout = StringIO.new 142 | 143 | CLI.new(['-d','-'], stdin: stdin, stdout: stdout).call 144 | 145 | assert_equal 'Ruby', stdout.string 146 | end 147 | 148 | it 'should decode a file' do 149 | begin 150 | f = Tempfile.create('ascii85_decode') 151 | f.write('<~;KZGo~>') 152 | f.close 153 | 154 | stdout = StringIO.new 155 | CLI.new(['-d', f.path], stdout: stdout).call 156 | 157 | assert_equal 'Ruby', stdout.string 158 | ensure 159 | File.unlink(f.path) 160 | end 161 | end 162 | 163 | it 'should fail when the input file is not found' do 164 | assert_raises(StandardError) do 165 | CLI.new(['-d', './foo/bar/baz']).call 166 | end 167 | end 168 | 169 | it 'should fail when the input file is not readable' do 170 | begin 171 | f = Tempfile.create('ascii85_decode') 172 | f.chmod(0o000) 173 | 174 | assert_raises(StandardError) do 175 | CLI.new(['-d', f.path]).call 176 | end 177 | ensure 178 | File.unlink(f.path) 179 | end 180 | end 181 | 182 | describe 'invalid input' do 183 | it 'should return the empty string when the input does not have delimiters' do 184 | stdin = StringIO.new('No delimiters') 185 | stdout = StringIO.new 186 | 187 | CLI.new(['-d'], stdin: stdin, stdout: stdout).call 188 | 189 | assert_equal '', stdout.string 190 | end 191 | 192 | ERROR_CASES = [ 193 | '<~!!y!!~>', 194 | '<~!!z!!~>', 195 | '<~s8W-#~>', 196 | '<~!~>', 197 | ] 198 | 199 | it 'should raise an error when invalid input is encountered' do 200 | ERROR_CASES.each do |input| 201 | stdin = StringIO.new(input) 202 | stdout = StringIO.new 203 | 204 | assert_raises(Ascii85::DecodingError) do 205 | CLI.new(['-d'], stdin: stdin, stdout: stdout).call 206 | end 207 | end 208 | end 209 | end 210 | end 211 | end -------------------------------------------------------------------------------- /spec/lib/ascii85_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'minitest/autorun' 4 | require 'stringio' 5 | 6 | # Require implementation 7 | require File.expand_path('../../lib/ascii85', __dir__) 8 | 9 | TEST_CASES = { 10 | '' => '', 11 | ' ' => '<~+9~>', 12 | 13 | "\0" * 1 => '<~!!~>', 14 | "\0" * 2 => '<~!!!~>', 15 | "\0" * 3 => '<~!!!!~>', 16 | "\0" * 4 => '<~z~>', 17 | "\0" * 5 => '<~z!!~>', 18 | "A\0\0\0\0" => '<~5l^lb!!~>', # No z-abbreviation! 19 | 20 | 'A' => '<~5l~>', 21 | 'AB' => '<~5sb~>', 22 | 'ABC' => '<~5sdp~>', 23 | 'ABCD' => '<~5sdq,~>', 24 | 'ABCDE' => '<~5sdq,70~>', 25 | 'ABCDEF' => '<~5sdq,77I~>', 26 | 'ABCDEFG' => '<~5sdq,77Kc~>', 27 | 'ABCDEFGH' => '<~5sdq,77Kd<~>', 28 | 'ABCDEFGHI' => '<~5sdq,77Kd<8H~>', 29 | 'Ascii85' => '<~6$$OMBfIs~>', 30 | 31 | 'Antidisestablishmentarianism' => '<~6#LdYA8-*rF*(i"Ch[s(D.RU,@<-\'jDJ=0/~>', 32 | 33 | # Dōmo arigatō, Mr. Roboto (according to Wikipedia) 34 | 'どうもありがとうミスターロボット' => 35 | '<~j+42iJVN3:K&_E6j+<0KJW/W?W8iG`j+EuaK"9on^Z0sZj+FJoK:LtSKB%T?~>', 36 | 37 | [Math::PI].pack('G') => '<~5RAV2<(&;T~>', 38 | [Math::E].pack('G') => '<~5R"n0M\\K6,~>', 39 | 40 | # Minified example from Github issue 8. 41 | # Note that OT and OU as the trailing characters are equivalent. 42 | "\x9B\xB6\xB9+\x91" => '<~S$ojXOT~>' 43 | }.freeze 44 | 45 | describe Ascii85 do 46 | it '#decode should be the inverse of #encode' do 47 | # Generate a test string that contains all possible bytes 48 | test_str = String.new 49 | (0..255).each do |c| 50 | test_str << c.chr 51 | end 52 | 53 | encoded = Ascii85.encode(test_str) 54 | decoded = Ascii85.decode(encoded) 55 | 56 | assert_equal test_str, decoded 57 | end 58 | 59 | describe '#encode' do 60 | it 'should encode all specified test-cases correctly' do 61 | TEST_CASES.each_pair do |input, encoded| 62 | assert_equal encoded, Ascii85.encode(input) 63 | end 64 | end 65 | 66 | it 'should always return unfrozen Strings' do 67 | TEST_CASES.each_pair do |input, encoded| 68 | assert_equal false, Ascii85.encode(input).frozen? 69 | end 70 | end 71 | 72 | it 'should encode Strings in different encodings correctly' do 73 | input_euc_jp = 'どうもありがとうミスターロボット'.encode('EUC-JP') 74 | input_binary = input_euc_jp.force_encoding('ASCII-8BIT') 75 | 76 | assert_equal Ascii85.encode(input_binary), Ascii85.encode(input_euc_jp) 77 | end 78 | 79 | it 'should produce output lines no longer than specified' do 80 | test_str = '0123456789' * 30 81 | 82 | # 83 | # No wrap 84 | # 85 | assert_equal 0, Ascii85.encode(test_str, false).count("\n") 86 | 87 | # 88 | # x characters per line, except for the last one 89 | # 90 | (2..12).each do |x| 91 | encoded = Ascii85.encode(test_str, x) 92 | 93 | # Determine the length of all lines 94 | count_arr = [] 95 | encoded.each_line do |line| 96 | count_arr << line.chomp.length 97 | end 98 | 99 | # The last line is allowed to be shorter than x, so remove it 100 | count_arr.pop if count_arr.last <= x 101 | 102 | # If the end-marker is on a line of its own, the next-to-last line is 103 | # allowed to be shorter than specified by exactly one character 104 | count_arr.pop if (encoded[-3].chr =~ /[\r\n]/) && (count_arr.last == x - 1) 105 | 106 | # Remove all line-lengths that are of length x from count_arr 107 | count_arr.delete_if { |len| len == x } 108 | 109 | # Now count_arr should be empty 110 | assert_empty count_arr 111 | end 112 | end 113 | 114 | it 'should not split the end-marker to achieve correct line length' do 115 | assert_equal "<~z\n~>", Ascii85.encode("\0" * 4, 4) 116 | end 117 | 118 | it 'should encode to an IO object when provided' do 119 | output = StringIO.new 120 | result = Ascii85.encode('Ruby', out: output) 121 | assert_equal output, result 122 | assert_equal '<~;KZGo~>', output.string 123 | end 124 | 125 | it 'should encode from an IO object' do 126 | input = StringIO.new('Ruby') 127 | result = Ascii85.encode(input) 128 | assert_equal '<~;KZGo~>', result 129 | end 130 | end 131 | 132 | describe '#extract' do 133 | it 'should extract data within delimiters only' do 134 | assert_empty Ascii85.extract('<~~>') 135 | assert_empty Ascii85.extract("Doesn't contain delimiters") 136 | assert_empty Ascii85.extract('Mismatched ~> delimiters 1') 137 | assert_empty Ascii85.extract('Mismatched <~ delimiters 2') 138 | assert_empty Ascii85.extract('Mismatched ~><~ delimiters 3') 139 | 140 | assert_equal ';KZGo', Ascii85.extract('<~;KZGo~><~z~>') 141 | assert_equal 'z', Ascii85.extract('FooBar<~z~>BazQux') 142 | end 143 | end 144 | 145 | describe '#decode' do 146 | it 'should decode all specified test-cases correctly' do 147 | TEST_CASES.each_pair do |decoded, input| 148 | assert_equal decoded.dup.force_encoding('ASCII-8BIT'), Ascii85.decode(input) 149 | end 150 | end 151 | 152 | it 'should always return unfrozen Strings' do 153 | TEST_CASES.each_pair do |input, encoded| 154 | assert_equal false, Ascii85.decode(encoded).frozen? 155 | end 156 | end 157 | 158 | it 'should accept valid input in encodings other than the default' do 159 | input = 'Ragnarök τέχνη русский язык I ♥ Ruby' 160 | input_ascii85 = Ascii85.encode(input) 161 | 162 | # Try to encode input_ascii85 in all possible encodings and see if we 163 | # do the right thing in #decode. 164 | Encoding.list.each do |encoding| 165 | next if encoding.dummy? 166 | next unless encoding.ascii_compatible? 167 | 168 | # CP949 is a Microsoft Codepage for Korean, which apparently does not 169 | # include a backslash, even though #ascii_compatible? returns true. This 170 | # leads to an Ascii85::DecodingError, so we simply skip the encoding. 171 | next if encoding.name == 'CP949' 172 | 173 | begin 174 | to_test = input_ascii85.encode(encoding) 175 | assert_equal input, Ascii85.decode(to_test).force_encoding('UTF-8') 176 | rescue Encoding::ConverterNotFoundError 177 | # Ignore this encoding 178 | end 179 | end 180 | end 181 | 182 | it 'should only process data within delimiters' do 183 | assert_empty Ascii85.decode('<~~>') 184 | assert_empty Ascii85.decode("Doesn't contain delimiters") 185 | assert_empty Ascii85.decode('Mismatched ~> delimiters 1') 186 | assert_empty Ascii85.decode('Mismatched <~ delimiters 2') 187 | assert_empty Ascii85.decode('Mismatched ~><~ delimiters 3') 188 | 189 | assert_equal 'Ruby', Ascii85.decode('<~;KZGo~><~z~>') 190 | assert_equal "\0\0\0\0", Ascii85.decode('FooBar<~z~>BazQux') 191 | end 192 | 193 | it 'should ignore whitespace' do 194 | decoded = Ascii85.decode("<~6 #LdYA\r\08\n \n\n- *rF*(i\"Ch[s \t(D.RU,@ <-\'jDJ=0\f/~>") 195 | assert_equal 'Antidisestablishmentarianism', decoded 196 | end 197 | 198 | it 'should return ASCII-8BIT encoded strings' do 199 | assert_equal 'ASCII-8BIT', Ascii85.decode('<~;KZGo~>').encoding.name 200 | end 201 | 202 | it 'should decode to an IO object when provided' do 203 | output = StringIO.new 204 | result = Ascii85.decode('<~;KZGo~>', out: output) 205 | assert_equal output, result 206 | assert_equal 'Ruby', output.string 207 | end 208 | 209 | describe 'Error conditions' do 210 | it 'should raise DecodingError if it encounters a word >= 2**32' do 211 | assert_raises(Ascii85::DecodingError) { Ascii85.decode('<~s8W-#~>') } 212 | end 213 | 214 | it 'should raise DecodingError if it encounters an invalid character' do 215 | assert_raises(Ascii85::DecodingError) { Ascii85.decode('<~!!y!!~>') } 216 | end 217 | 218 | it 'should raise DecodingError if the last tuple consists of a single character' do 219 | assert_raises(Ascii85::DecodingError) { Ascii85.decode('<~!~>') } 220 | end 221 | 222 | it 'should raise DecodingError if a z is found inside a 5-tuple' do 223 | assert_raises(Ascii85::DecodingError) { Ascii85.decode('<~!!z!!~>') } 224 | end 225 | end 226 | end 227 | 228 | describe '#decode_raw' do 229 | it 'should decode raw Ascii85 without delimiters' do 230 | TEST_CASES.each_pair do |decoded, input| 231 | raw_input = input[2...-2] # Remove '<~' and '~>' 232 | assert_equal decoded.dup.force_encoding('ASCII-8BIT'), Ascii85.decode_raw(raw_input) 233 | end 234 | end 235 | 236 | it 'should always return unfrozen Strings' do 237 | TEST_CASES.each_pair do |decoded, input| 238 | raw_input = input[2...-2] # Remove '<~' and '~>' 239 | assert_equal false, Ascii85.decode_raw(raw_input).frozen? 240 | end 241 | end 242 | 243 | it 'should decode from an IO object' do 244 | input = StringIO.new(';KZGo') 245 | result = Ascii85.decode_raw(input) 246 | assert_equal 'Ruby', result 247 | end 248 | 249 | it 'should decode to an IO object when provided' do 250 | output = StringIO.new 251 | result = Ascii85.decode_raw(';KZGo', out: output) 252 | assert_equal output, result 253 | assert_equal 'Ruby', output.string 254 | end 255 | 256 | it 'should raise DecodingError for invalid input' do 257 | assert_raises(Ascii85::DecodingError) { Ascii85.decode_raw('s8W-#') } 258 | assert_raises(Ascii85::DecodingError) { Ascii85.decode_raw('!!y!!') } 259 | assert_raises(Ascii85::DecodingError) { Ascii85.decode_raw('!') } 260 | assert_raises(Ascii85::DecodingError) { Ascii85.decode_raw('!!z!!') } 261 | end 262 | end 263 | end 264 | --------------------------------------------------------------------------------