├── c64_datasette_decoder.rb
└── sampledata.wav


/c64_datasette_decoder.rb:
--------------------------------------------------------------------------------
  1 | # This script is a proof of concept for decoding data recorded on cassette tapes
  2 | # with / for a Commodore 64 (using the default “Kernal” routines). It reads
  3 | # audio data from a file named `sampledata.wav` and prints a hexdump of the
  4 | # decoded bytes to stdout.
  5 | #
  6 | # More info about the data encoding used on these tapes can be found here:
  7 | # http://c64tapes.org/dokuwiki/doku.php?id=loaders:rom_loader
  8 | 
  9 | require 'bundler/inline'
 10 | require 'logger'
 11 | 
 12 | gemfile do
 13 |   source 'https://rubygems.org'
 14 |   gem 'wavefile', '~> 0.7'
 15 | end
 16 | 
 17 | LOG_LEVEL = Logger::DEBUG # Use `Logger::ERROR` to suppress debugging output
 18 | $logger = Logger.new($stderr).tap do |l|
 19 |   l.level = LOG_LEVEL
 20 |   l.formatter = ->(severity, _datetime, _progname, msg) do
 21 |     "#{severity[0]}: #{msg}\n"
 22 |   end
 23 | end
 24 | 
 25 | def error(msg)
 26 |   $logger.error(msg)
 27 |   exit
 28 | end
 29 | 
 30 | # This class behaves like `Enumerator`, but it also has all the methods from
 31 | # `Enumerable` mixed in, and there is an additional `position` field (useful for
 32 | # debugging output).
 33 | class DataStream
 34 |   include Enumerable
 35 |   attr_reader :position
 36 | 
 37 |   def initialize(&block)
 38 |     @enum = Enumerator.new(&block)
 39 |     @position = 0
 40 |   end
 41 | 
 42 |   def each
 43 |     loop do
 44 |       yield self.next
 45 |     end
 46 |   end
 47 | 
 48 |   def next
 49 |     @position += 1
 50 |     @enum.next
 51 |   end
 52 | end
 53 | 
 54 | # First step: Create a data stream of raw audio sample data. Note that we read
 55 | # the audio file in chunks of 64K so that we can efficently process arbitrarily
 56 | # large files. (This works because we use enumerators all the way down meaning
 57 | # the sample data is only loaded when it’s actually needed. Look for the
 58 | # “Reading 64k Block” messages in the debugging output to see the effect).
 59 | samples = DataStream.new do |yielder|
 60 |   WaveFile::Reader.new('sampledata.wav').each_buffer(65_536) do |buffer|
 61 |     $logger.debug "Reading 64k Block"
 62 |     buffer.samples.each do |sample|
 63 |       yielder << sample
 64 |     end
 65 |   end
 66 | end
 67 | 
 68 | 
 69 | # Calculate the pulse widths by looking at the distance between two falling
 70 | # edges crossing the center line, i.e. pairs of sample values where the first
 71 | # one is positive and the second is negative. Additionally, we require the
 72 | # difference of these two values to be above a certain threshold to make sure we
 73 | # only detect steep edges belonging to pulses with a certain amplitude.
 74 | #
 75 | # The threshold value was determined by trial and error and might need to be
 76 | # adjusted for recordings with different amplitudes.
 77 | EDGE_THRESHOLD = 2_000
 78 | pulse_widths = DataStream.new do |yielder|
 79 |   pulse_width = 0
 80 |   samples.each_cons(2) do |a, b|
 81 |     pulse_width += 1
 82 |     if a - b > EDGE_THRESHOLD && a >= 0 && b < 0
 83 |       yielder << pulse_width
 84 |       pulse_width = 0
 85 |     end
 86 |   end
 87 | end
 88 | 
 89 | # Determine the width of a short pulse by taking the first 100 sync pulses and
 90 | # calculating their median (a few pulses at the start may be skewed because of
 91 | # tape motor speed issues).
 92 | #
 93 | # Note that this should ideally be done again at the start of each block instead
 94 | # of doing it only once at the very start of the first block. Feel free to
 95 | # create a pull request! :-)
 96 | short_pulse_width = pulse_widths.first(100).sort[50]
 97 | $logger.info "Determined short pulse width: #{short_pulse_width}"
 98 | 
 99 | # Calculate pulse width thresholds based on the width of a short pulse. We need
100 | # to allow some overshoot because of wow and flutter.
101 | PULSE_WIDTH_OVERSHOOT_FACTOR = 1.1
102 | pulse_width_thresholds = {
103 |   'S' => short_pulse_width * PULSE_WIDTH_OVERSHOOT_FACTOR,
104 |   'M' => short_pulse_width * PULSE_WIDTH_OVERSHOOT_FACTOR * 1.4,
105 |   'L' => short_pulse_width * PULSE_WIDTH_OVERSHOOT_FACTOR * 1.9,
106 | }
107 | $logger.info "Pulse width thresholds: #{pulse_width_thresholds}"
108 | 
109 | # Classify pulses by comparing them to the thresholds.
110 | pulse_classifier = ->(width) do
111 |   pulse_width_thresholds.each do |type, expected_width|
112 |     return type if width < expected_width
113 |   end
114 |   return "?(#{width})"
115 | end
116 | 
117 | decoded_pulses = DataStream.new do |yielder|
118 |   pulse_widths.each do |width|
119 |     yielder << pulse_classifier.call(width)
120 |   end
121 | end
122 | 
123 | # Read and decode whole blocks of data.
124 | blocks = DataStream.new do |yielder|
125 |   loop do
126 |     $logger.info "Start parsing block at #{samples.position}"
127 | 
128 |     # Each block starts with a sync leader containg short pulses only. We skip
129 |     # over it until we find the first “start of byte” marker (long pulse
130 |     # followed by a medium pulse).
131 |     decoded_pulses.take_while{ |type| type != 'L' }
132 |     error "Expected M at #{samples.position}" unless decoded_pulses.next == 'M'
133 |     $logger.info "End of leader found at #{samples.position}"
134 | 
135 |     # Now we can finally decode the real data.
136 |     bytes = DataStream.new do |yielder|
137 |       bits = []
138 |       decoded_pulses.each_slice(2) do |a, b|
139 |         if a == 'S' && b == 'M'
140 |           bits << 0
141 |         elsif a == 'M' && b == 'S'
142 |           bits << 1
143 |         elsif a == 'L' && b == 'M'
144 |           # We found the next “start of byte” marker. This means we should
145 |           # now have read a complete byte (9 bits, because there is 1 parity
146 |           # bit).
147 |           error "\nRead error: Found only #{bits.size} at #{samples.position}" unless bits.size == 9
148 |           # Check the parity.
149 |           parity = bits.pop
150 |           parity_ok = bits.count(1).even? && parity == 1 || bits.count(1).odd? && parity == 0
151 |           error "Read error: Incorrect parity #{parity} for #{bits} at #{samples.position}" unless parity_ok
152 |           # Convert the bits to a byte (note that the bytes are stored on tape
153 |           # with the LSB coming first).
154 |           byte = bits.each.with_index.inject(0){ |acc, (bit, i)| acc + (bit << i) }
155 |           yielder << byte
156 |           bits = []
157 |         elsif a == 'L' && b == 'S'
158 |           # The “end-of-data marker” is optional and we can simply ignore
159 |           # it. See http://c64tapes.org/dokuwiki/doku.php?id=loaders:rom_loader
160 |           # for details.
161 |           $logger.info "End-of-data marker at #{samples.position}"
162 |           break
163 |         else
164 |           error "Read error: #{a} #{b} at #{samples.position}"
165 |           break
166 |         end
167 |       end
168 |     end
169 | 
170 |     yielder.yield bytes
171 |   end
172 | end
173 | 
174 | blocks.each do |block|
175 |   $logger.info "Successfully decoded a data block!"
176 |   puts block.map{ |byte| "%02x" % byte }.join(" ")
177 | end
178 | 


--------------------------------------------------------------------------------
/sampledata.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/noniq/c64_datassette_decoder/fd5a8e208c518ad5ca19e6d9ec5f911a61aa9b50/sampledata.wav


--------------------------------------------------------------------------------