├── c64_datasette_decoder.rb └── sampledata.wav /c64_datasette_decoder.rb: -------------------------------------------------------------------------------- 1 | # This script is a proof of concept for decoding data recorded on cassette tapes 2 | # with / for a Commodore 64 (using the default “Kernal” routines). It reads 3 | # audio data from a file named `sampledata.wav` and prints a hexdump of the 4 | # decoded bytes to stdout. 5 | # 6 | # More info about the data encoding used on these tapes can be found here: 7 | # http://c64tapes.org/dokuwiki/doku.php?id=loaders:rom_loader 8 | 9 | require 'bundler/inline' 10 | require 'logger' 11 | 12 | gemfile do 13 | source 'https://rubygems.org' 14 | gem 'wavefile', '~> 0.7' 15 | end 16 | 17 | LOG_LEVEL = Logger::DEBUG # Use `Logger::ERROR` to suppress debugging output 18 | $logger = Logger.new($stderr).tap do |l| 19 | l.level = LOG_LEVEL 20 | l.formatter = ->(severity, _datetime, _progname, msg) do 21 | "#{severity[0]}: #{msg}\n" 22 | end 23 | end 24 | 25 | def error(msg) 26 | $logger.error(msg) 27 | exit 28 | end 29 | 30 | # This class behaves like `Enumerator`, but it also has all the methods from 31 | # `Enumerable` mixed in, and there is an additional `position` field (useful for 32 | # debugging output). 33 | class DataStream 34 | include Enumerable 35 | attr_reader :position 36 | 37 | def initialize(&block) 38 | @enum = Enumerator.new(&block) 39 | @position = 0 40 | end 41 | 42 | def each 43 | loop do 44 | yield self.next 45 | end 46 | end 47 | 48 | def next 49 | @position += 1 50 | @enum.next 51 | end 52 | end 53 | 54 | # First step: Create a data stream of raw audio sample data. Note that we read 55 | # the audio file in chunks of 64K so that we can efficently process arbitrarily 56 | # large files. (This works because we use enumerators all the way down meaning 57 | # the sample data is only loaded when it’s actually needed. Look for the 58 | # “Reading 64k Block” messages in the debugging output to see the effect). 59 | samples = DataStream.new do |yielder| 60 | WaveFile::Reader.new('sampledata.wav').each_buffer(65_536) do |buffer| 61 | $logger.debug "Reading 64k Block" 62 | buffer.samples.each do |sample| 63 | yielder << sample 64 | end 65 | end 66 | end 67 | 68 | 69 | # Calculate the pulse widths by looking at the distance between two falling 70 | # edges crossing the center line, i.e. pairs of sample values where the first 71 | # one is positive and the second is negative. Additionally, we require the 72 | # difference of these two values to be above a certain threshold to make sure we 73 | # only detect steep edges belonging to pulses with a certain amplitude. 74 | # 75 | # The threshold value was determined by trial and error and might need to be 76 | # adjusted for recordings with different amplitudes. 77 | EDGE_THRESHOLD = 2_000 78 | pulse_widths = DataStream.new do |yielder| 79 | pulse_width = 0 80 | samples.each_cons(2) do |a, b| 81 | pulse_width += 1 82 | if a - b > EDGE_THRESHOLD && a >= 0 && b < 0 83 | yielder << pulse_width 84 | pulse_width = 0 85 | end 86 | end 87 | end 88 | 89 | # Determine the width of a short pulse by taking the first 100 sync pulses and 90 | # calculating their median (a few pulses at the start may be skewed because of 91 | # tape motor speed issues). 92 | # 93 | # Note that this should ideally be done again at the start of each block instead 94 | # of doing it only once at the very start of the first block. Feel free to 95 | # create a pull request! :-) 96 | short_pulse_width = pulse_widths.first(100).sort[50] 97 | $logger.info "Determined short pulse width: #{short_pulse_width}" 98 | 99 | # Calculate pulse width thresholds based on the width of a short pulse. We need 100 | # to allow some overshoot because of wow and flutter. 101 | PULSE_WIDTH_OVERSHOOT_FACTOR = 1.1 102 | pulse_width_thresholds = { 103 | 'S' => short_pulse_width * PULSE_WIDTH_OVERSHOOT_FACTOR, 104 | 'M' => short_pulse_width * PULSE_WIDTH_OVERSHOOT_FACTOR * 1.4, 105 | 'L' => short_pulse_width * PULSE_WIDTH_OVERSHOOT_FACTOR * 1.9, 106 | } 107 | $logger.info "Pulse width thresholds: #{pulse_width_thresholds}" 108 | 109 | # Classify pulses by comparing them to the thresholds. 110 | pulse_classifier = ->(width) do 111 | pulse_width_thresholds.each do |type, expected_width| 112 | return type if width < expected_width 113 | end 114 | return "?(#{width})" 115 | end 116 | 117 | decoded_pulses = DataStream.new do |yielder| 118 | pulse_widths.each do |width| 119 | yielder << pulse_classifier.call(width) 120 | end 121 | end 122 | 123 | # Read and decode whole blocks of data. 124 | blocks = DataStream.new do |yielder| 125 | loop do 126 | $logger.info "Start parsing block at #{samples.position}" 127 | 128 | # Each block starts with a sync leader containg short pulses only. We skip 129 | # over it until we find the first “start of byte” marker (long pulse 130 | # followed by a medium pulse). 131 | decoded_pulses.take_while{ |type| type != 'L' } 132 | error "Expected M at #{samples.position}" unless decoded_pulses.next == 'M' 133 | $logger.info "End of leader found at #{samples.position}" 134 | 135 | # Now we can finally decode the real data. 136 | bytes = DataStream.new do |yielder| 137 | bits = [] 138 | decoded_pulses.each_slice(2) do |a, b| 139 | if a == 'S' && b == 'M' 140 | bits << 0 141 | elsif a == 'M' && b == 'S' 142 | bits << 1 143 | elsif a == 'L' && b == 'M' 144 | # We found the next “start of byte” marker. This means we should 145 | # now have read a complete byte (9 bits, because there is 1 parity 146 | # bit). 147 | error "\nRead error: Found only #{bits.size} at #{samples.position}" unless bits.size == 9 148 | # Check the parity. 149 | parity = bits.pop 150 | parity_ok = bits.count(1).even? && parity == 1 || bits.count(1).odd? && parity == 0 151 | error "Read error: Incorrect parity #{parity} for #{bits} at #{samples.position}" unless parity_ok 152 | # Convert the bits to a byte (note that the bytes are stored on tape 153 | # with the LSB coming first). 154 | byte = bits.each.with_index.inject(0){ |acc, (bit, i)| acc + (bit << i) } 155 | yielder << byte 156 | bits = [] 157 | elsif a == 'L' && b == 'S' 158 | # The “end-of-data marker” is optional and we can simply ignore 159 | # it. See http://c64tapes.org/dokuwiki/doku.php?id=loaders:rom_loader 160 | # for details. 161 | $logger.info "End-of-data marker at #{samples.position}" 162 | break 163 | else 164 | error "Read error: #{a} #{b} at #{samples.position}" 165 | break 166 | end 167 | end 168 | end 169 | 170 | yielder.yield bytes 171 | end 172 | end 173 | 174 | blocks.each do |block| 175 | $logger.info "Successfully decoded a data block!" 176 | puts block.map{ |byte| "%02x" % byte }.join(" ") 177 | end 178 | -------------------------------------------------------------------------------- /sampledata.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/noniq/c64_datassette_decoder/fd5a8e208c518ad5ca19e6d9ec5f911a61aa9b50/sampledata.wav --------------------------------------------------------------------------------