├── 1B22.png ├── 22A0.png ├── 4770.png ├── 6000.png ├── 92DC.png ├── 955A.png ├── XXXX.png ├── mnist.png ├── title.png ├── vt241.png ├── 008B_B800.png ├── labelthon.png ├── rigolfail.png ├── success.png ├── digit_types.png ├── video_still.jpg ├── wacky_rigol.jpg ├── brouhaha_tweet.png ├── masking_graph.png ├── progress_all.png ├── progress_manual.png ├── show_digit_mask.png ├── BCom_ii_8000_0123.png ├── empty_database.csv.lz ├── labels_from_traces.png ├── masking_graph_fail.png ├── non_executable_ros.png ├── show_digit_mask_fail.png ├── progress_classification.png ├── labels_exploit_disagreement.png ├── labels_resolve_disagreement.png ├── crop_list.csv ├── README.md ├── crop_all_words.sh ├── rigol ├── 01_invert.py └── 00_dedup.py ├── LICENSE ├── finalise.sh ├── show_digit_mask.py ├── assemble_all_labels.sh ├── assembly_to_binary.py ├── labels_diff.py ├── crop_words.py ├── binary_check.py ├── crop_word.py ├── label_database.py ├── labelthon.py ├── assemble_labels.py ├── DATA.md ├── labels_resolve_disagreement.py ├── labels_classification.py ├── labels_from_traces.py ├── labels_classification_keras.py └── WRITEUP.md /1B22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/1B22.png -------------------------------------------------------------------------------- /22A0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/22A0.png -------------------------------------------------------------------------------- /4770.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/4770.png -------------------------------------------------------------------------------- /6000.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/6000.png -------------------------------------------------------------------------------- /92DC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/92DC.png -------------------------------------------------------------------------------- /955A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/955A.png -------------------------------------------------------------------------------- /XXXX.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/XXXX.png -------------------------------------------------------------------------------- /mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/mnist.png -------------------------------------------------------------------------------- /title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/title.png -------------------------------------------------------------------------------- /vt241.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/vt241.png -------------------------------------------------------------------------------- /008B_B800.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/008B_B800.png -------------------------------------------------------------------------------- /labelthon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/labelthon.png -------------------------------------------------------------------------------- /rigolfail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/rigolfail.png -------------------------------------------------------------------------------- /success.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/success.png -------------------------------------------------------------------------------- /digit_types.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/digit_types.png -------------------------------------------------------------------------------- /video_still.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/video_still.jpg -------------------------------------------------------------------------------- /wacky_rigol.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/wacky_rigol.jpg -------------------------------------------------------------------------------- /brouhaha_tweet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/brouhaha_tweet.png -------------------------------------------------------------------------------- /masking_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/masking_graph.png -------------------------------------------------------------------------------- /progress_all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/progress_all.png -------------------------------------------------------------------------------- /progress_manual.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/progress_manual.png -------------------------------------------------------------------------------- /show_digit_mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/show_digit_mask.png -------------------------------------------------------------------------------- /BCom_ii_8000_0123.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/BCom_ii_8000_0123.png -------------------------------------------------------------------------------- /empty_database.csv.lz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/empty_database.csv.lz -------------------------------------------------------------------------------- /labels_from_traces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/labels_from_traces.png -------------------------------------------------------------------------------- /masking_graph_fail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/masking_graph_fail.png -------------------------------------------------------------------------------- /non_executable_ros.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/non_executable_ros.png -------------------------------------------------------------------------------- /show_digit_mask_fail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/show_digit_mask_fail.png -------------------------------------------------------------------------------- /progress_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/progress_classification.png -------------------------------------------------------------------------------- /labels_exploit_disagreement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/labels_exploit_disagreement.png -------------------------------------------------------------------------------- /labels_resolve_disagreement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stepleton/5100NonExecutableROSDecode/HEAD/labels_resolve_disagreement.png -------------------------------------------------------------------------------- /crop_list.csv: -------------------------------------------------------------------------------- 1 | Name,tlx,tly 2 | 0_0,9.0,12.5 3 | 0_1,62.0,12.0 4 | 0_2,100.0,11.0 5 | 0_3,137.0,11.0 6 | 0_4,172.5,10.5 7 | 0_5,207.5,10.0 8 | 0_6,243.0,9.5 9 | 0_7,277.5,9.5 10 | 0_8,313.5,9.5 11 | 1_0,8.5,49.0 12 | 1_1,61.5,48.5 13 | 1_2,99.5,48.0 14 | 1_3,136.5,47.5 15 | 1_4,172.0,47.0 16 | 1_5,207.5,47.0 17 | 1_6,243.0,46.5 18 | 1_7,278.0,46.5 19 | 1_8,314.0,46.0 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Decoding videos of the IBM 5100's Non-executable ROS 2 | 3 | ![An artistic rendering of the text "IBM 5100" using the hexadeximal word 4 | images considered in this project](title.png) 5 | 6 | This repository includes [a detailed write-up of the convoluted way in which 7 | the IBM 5100's non-executable ROS was recovered](WRITEUP.md), as well as [a 8 | separate document detailing the recovered data](DATA.md). 9 | 10 | See the writeup for descriptions of the source code available here. 11 | -------------------------------------------------------------------------------- /crop_all_words.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Licensing: 4 | # 5 | # This program and any supporting programs, software libraries, and 6 | # documentation distributed alongside it are released into the public domain 7 | # without any warranty. See the LICENSE file for details. 8 | 9 | for i in `find . -name 01_cropped`; do 10 | for input in $i/*.png; do 11 | out_prefix=`echo $input | sed 's/01_cropped/02_words/' | sed 's/\.png$/_/'` 12 | echo -n "Processing $input..." 13 | if ./crop_words.py -r 16 -c 29 --brighten "88;73;119" $input crop_list.csv $out_prefix; then 14 | echo " done." 15 | else 16 | echo " error!" 17 | echo $input >> errors.txt 18 | fi 19 | done 20 | done 21 | -------------------------------------------------------------------------------- /rigol/01_invert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | """Bitwise-invert the hex digits in our CSV files.""" 3 | 4 | import argparse 5 | 6 | 7 | def _define_flags(): 8 | """Defines an `ArgumentParser` for command-line flags used by this program.""" 9 | flags = argparse.ArgumentParser( 10 | description='Invert hex digits in CSV files. Output goes to stdout.') 11 | 12 | flags.add_argument('csv_file', type=argparse.FileType('r'), 13 | help='CSV file to process') 14 | 15 | return flags 16 | 17 | 18 | def main(FLAGS): 19 | for line in FLAGS.csv_file: 20 | time, bytestr = line.rstrip().split(',') 21 | rtsetyb = '{:02X}'.format(~int(bytestr, 16) & 0xff) 22 | print('{},{}'.format(time, rtsetyb)) 23 | 24 | 25 | if __name__ == '__main__': 26 | flags = _define_flags() 27 | FLAGS = flags.parse_args() 28 | main(FLAGS) 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /finalise.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Licensing: 4 | # 5 | # This program and any supporting programs, software libraries, and 6 | # documentation distributed alongside it are released into the public domain 7 | # without any warranty. See the LICENSE file for details. 8 | 9 | dd if=binary_APL_LROS_raw_undump.bin bs=6144 count=16 of=binary_APL_LROS.bin 10 | 11 | dd if=binary_BCom_raw_undump.bin bs=1 count=6144 of=binary_BCom.bin # 10 12 | dd if=binary_BCom_raw_undump.bin bs=1 count=6144 skip=6144 oflag=append conv=notrunc of=binary_BCom.bin # 11 13 | dd if=binary_BCom_raw_undump.bin bs=1 count=6144 skip=20480 oflag=append conv=notrunc of=binary_BCom.bin # 12 14 | dd if=binary_BCom_raw_undump.bin bs=1 count=6144 skip=26624 oflag=append conv=notrunc of=binary_BCom.bin # 13 15 | dd if=binary_BCom_raw_undump.bin bs=1 count=6144 skip=40960 oflag=append conv=notrunc of=binary_BCom.bin # 14 16 | dd if=binary_BCom_raw_undump.bin bs=1 count=6144 skip=55296 oflag=append conv=notrunc of=binary_BCom.bin # 15 17 | dd if=binary_BCom_raw_undump.bin bs=1 count=6144 skip=69632 oflag=append conv=notrunc of=binary_BCom.bin # 16 18 | dd if=binary_BCom_raw_undump.bin bs=1 count=6144 skip=75776 oflag=append conv=notrunc of=binary_BCom.bin # 17 19 | dd if=binary_BCom_raw_undump.bin bs=1 count=6144 skip=90112 oflag=append conv=notrunc of=binary_BCom.bin # 18 20 | -------------------------------------------------------------------------------- /rigol/00_dedup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | """Let's slim down our CSV files. 3 | 4 | We'll do it by getting rid of lines where the byte on the lines is the same 5 | as the byte at the preceding timestep. 6 | 7 | We'll also strip out the weird non-ascii characters from the file. Oh Rigol... 8 | """ 9 | 10 | import argparse 11 | import string 12 | 13 | 14 | def _define_flags(): 15 | """Defines an `ArgumentParser` for command-line flags used by this program.""" 16 | flags = argparse.ArgumentParser( 17 | description='Slim down CSV files. Output goes to stdout.') 18 | 19 | flags.add_argument('csv_file', type=argparse.FileType('r'), 20 | help='CSV file to process') 21 | 22 | return flags 23 | 24 | 25 | def main(FLAGS): 26 | # Strip first two lines (headers) from the file. 27 | next(FLAGS.csv_file) 28 | next(FLAGS.csv_file) 29 | 30 | prev_byte = '' 31 | for line in FLAGS.csv_file: 32 | # Strip out non-ASCII, since some lines have plenty of $00 bytes. 33 | #line = line.encode('ascii', errors='ignore').decode().rstrip() 34 | line = ''.join(c for c in line if c in string.printable).rstrip() 35 | # Robustly parse the line so that partial lines prepended to the current 36 | # line don't wreck things. 37 | try: 38 | timestep, byte = line.split(',')[-2:] 39 | except ValueError: 40 | pass # Not enough values to unpack, probably. 41 | 42 | if byte != prev_byte: 43 | print('{},{}'.format(timestep, byte)) 44 | prev_byte = byte 45 | 46 | 47 | if __name__ == '__main__': 48 | flags = _define_flags() 49 | FLAGS = flags.parse_args() 50 | main(FLAGS) 51 | -------------------------------------------------------------------------------- /show_digit_mask.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | """Show digit masking on an image file. Uses Sixel graphics. 3 | 4 | Licensing: 5 | 6 | This program and any supporting programs, software libraries, and documentation 7 | distributed alongside it are released into the public domain without any 8 | warranty. See the LICENSE file for details. 9 | """ 10 | 11 | import argparse 12 | import numpy as np 13 | import os 14 | import skimage 15 | import skimage.io 16 | import sys 17 | import tempfile 18 | import wand.image 19 | 20 | import labels_classification 21 | 22 | 23 | def _define_flags(): 24 | """Defines an `ArgumentParser` for command-line flags used by this program.""" 25 | flags = argparse.ArgumentParser( 26 | description='Show how our image masking segments digits.') 27 | 28 | flags.add_argument('image', type=str, help='Image file to segment.') 29 | 30 | return flags 31 | 32 | 33 | def main(FLAGS): 34 | # Load the image. 35 | image = skimage.color.rgb2gray( 36 | skimage.io.imread(FLAGS.image)).astype(np.float32) 37 | 38 | # Obtain versions with the four digits masked. 39 | masked = [labels_classification.mask_nth_digit_in_image(image, n) 40 | for n in range(4)] 41 | 42 | # Create temporary files for the masked images. 43 | t_masked = [tempfile.mkstemp(suffix='.png')[1] for _ in masked] 44 | for m, fn in zip(masked, t_masked): 45 | skimage.io.imsave(fn, m.astype(np.uint8)) 46 | 47 | # Display each image with Sixel graphics. 48 | print('Original image') 49 | _imshow(FLAGS.image) 50 | for i, fn in enumerate(t_masked): 51 | print('Masked digit', i + 1) 52 | _imshow(fn) 53 | 54 | # Delete temporary files. 55 | for fn in t_masked: 56 | os.remove(fn) 57 | 58 | 59 | def _imshow(filename): 60 | """Show an image file using Sixel graphics.""" 61 | image = wand.image.Image(filename=filename) 62 | image.resize(width=(image.width * 2), height=(image.height * 2)) 63 | sys.stdout.buffer.write(image.make_blob('sixel')) 64 | 65 | 66 | if __name__ == '__main__': 67 | flags = _define_flags() 68 | FLAGS = flags.parse_args() 69 | main(FLAGS) 70 | -------------------------------------------------------------------------------- /assemble_all_labels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Licensing: 4 | # 5 | # This program and any supporting programs, software libraries, and 6 | # documentation distributed alongside it are released into the public domain 7 | # without any warranty. See the LICENSE file for details. 8 | 9 | TRUTH=database.csv 10 | 11 | read -d '' ALL_FILES << EOF 12 | database_classified_44.csv 13 | database_classified_45.csv 14 | database_classified_46.csv 15 | database_classified_47.csv 16 | database_classified_48.csv 17 | database_classified_49.csv 18 | database_classified_50.csv 19 | database_classified_51.csv 20 | database_classified_52.csv 21 | database_classified_53.csv 22 | database_classified_54.csv 23 | database_classified_55.csv 24 | database_classified_56.csv 25 | database_classified_57.csv 26 | database_classified_58.csv 27 | database_classified_59.csv 28 | database_classified_60.csv 29 | database_classified_61.csv 30 | database_classified_62.csv 31 | database_classified_63.csv 32 | database_classified_64.csv 33 | database_classified_65.csv 34 | database_classified_66.csv 35 | database_classified_67.csv 36 | database_classified_68.csv 37 | database_classified_69.csv 38 | database_classified_70.csv 39 | database_classified_71.csv 40 | EOF 41 | 42 | read -d '' ALL_PARTS << EOF 43 | ./APL/APL_LROS_0000,./APL_ii/APL_LROS_ii_0000 44 | ./APL/APL_LROS_2000,./APL_ii/APL_LROS_ii_2000 45 | ./APL/APL_LROS_4000,./APL_ii/APL_LROS_ii_4000 46 | ./APL/APL_LROS_6000,./APL_ii/APL_LROS_ii_6000 47 | ./APL/APL_LROS_8000,./APL_ii/APL_LROS_ii_8000 48 | ./APL/APL_LROS_A000,./APL_ii/APL_LROS_ii_A000 49 | ./APL/APL_LROS_C000,./APL_ii/APL_LROS_ii_C000 50 | ./APL/APL_LROS_E000,./APL_ii/APL_LROS_ii_E000 51 | ./BCom/BCom_0000,./BCom_ii/BCom_ii_0000 52 | ./BCom/BCom_2000,./BCom_ii/BCom_ii_2000 53 | ./BCom/BCom_4000,./BCom_ii/BCom_ii_4000 54 | ./BCom/BCom_6000,./BCom_ii/BCom_ii_6000 55 | ./BCom/BCom_8000,./BCom_ii/BCom_ii_8000 56 | ./BCom/BCom_A000,./BCom_ii/BCom_ii_A000 57 | ./BCom/BCom_C000,./BCom_ii/BCom_ii_C000 58 | ./BCom/BCom_E000,./BCom_ii/BCom_ii_E000 59 | EOF 60 | 61 | for part in $ALL_PARTS; do 62 | title=$(echo $part | cut -d '/' -f 3 | sed s/,.$//) 63 | output="assembly_$title.txt" 64 | echo "Working on $output..." 65 | ./assemble_labels.py $TRUTH $part $ALL_FILES > $output 66 | done 67 | -------------------------------------------------------------------------------- /assembly_to_binary.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | """Optimistically convert "assembly" files to binary data. 3 | 4 | We say "optimistically" because we are going to handle disagreements via 5 | tiebreaking---whichever label has the most votes wins. May your CRCs be ever 6 | in your favour... 7 | 8 | (This program is simplistic in other ways, too. It assembles binary data 9 | according to the ordering of files supplied on the command line, and then 10 | according to the ordering of labels in the file. It even identifies label data 11 | in "assembly" files just by looking for the @ symbol.) 12 | 13 | THIS PROGRAM WRITES BINARY DATA TO STDOUT. 14 | 15 | Licensing: 16 | 17 | This program and any supporting programs, software libraries, and documentation 18 | distributed alongside it are released into the public domain without any 19 | warranty. See the LICENSE file for details. 20 | """ 21 | 22 | import argparse 23 | import binascii 24 | import sys 25 | 26 | 27 | def _define_flags(): 28 | """Defines an `ArgumentParser` for command-line flags used by this program.""" 29 | flags = argparse.ArgumentParser( 30 | description='Compile binary data from "assembly" files.') 31 | 32 | flags.add_argument('assembly_files', type=str, nargs='+', help=( 33 | '"Assembly" files of the kind created by assemble_labels.py. Data ' 34 | 'emitted by this program will be emitted in the order of these files ' 35 | '(and then in the order of label data within the files, irrespective of ' 36 | 'the listed addresses at the beginning of lines in the third section).')) 37 | 38 | return flags 39 | 40 | 41 | #### MAIN PROGRAM #### 42 | 43 | 44 | def main(FLAGS): 45 | for asmfile in FLAGS.assembly_files: 46 | sys.stderr.write('Processing {}...\n'.format(asmfile)) 47 | with open(asmfile, 'r') as f: 48 | for line in f: 49 | if '@' in line: 50 | best_data, best_count = b'\x00\x00', 0 51 | for label in line.split(' ')[1:]: 52 | count = label.count(',') + 1 53 | if count > best_count: 54 | best_count = count 55 | best_data = binascii.a2b_hex(label[:4]) 56 | sys.stdout.buffer.write(best_data) 57 | 58 | 59 | #### MISCELLANEOUS #### 60 | 61 | 62 | if __name__ == '__main__': 63 | flags = _define_flags() 64 | FLAGS = flags.parse_args() 65 | main(FLAGS) 66 | -------------------------------------------------------------------------------- /labels_diff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | """Identify differences in label databases. 3 | 4 | Opens two label databases in read-only mode and shows which records differ. 5 | It's possible to threshold on a minimum label count to ignore unlabeled 6 | images in either database. 7 | 8 | Licensing: 9 | 10 | This program and any supporting programs, software libraries, and documentation 11 | distributed alongside it are released into the public domain without any 12 | warranty. See the LICENSE file for details. 13 | """ 14 | 15 | import argparse 16 | import sys 17 | 18 | import label_database 19 | 20 | 21 | def _define_flags(): 22 | """Defines an `ArgumentParser` for command-line flags used by this program.""" 23 | flags = argparse.ArgumentParser( 24 | description='Identify label differences in two image label databases.') 25 | 26 | flags.add_argument('label_database_1', type=str, 27 | help=('CSV file containing image paths, labels, and ' 28 | 'the number of times a particular label was ' 29 | 'supplied for an image. The CSV header should be ' 30 | '"Filename,Label,Count". ')) 31 | 32 | flags.add_argument('label_database_2', type=str, 33 | help=('CSV file containing image paths, labels, and ' 34 | 'the number of times a particular label was ' 35 | 'supplied for an image. The CSV header should be ' 36 | '"Filename,Label,Count". ')) 37 | 38 | flags.add_argument('--minimum-label-count', default=2, type=int, 39 | help=('Only use image labels with at least this many ' 40 | 'counts as training data.')) 41 | 42 | flags.add_argument('--skip-XXXX', default=True, type=bool, 43 | help='Ignore "XXXX" entries in either database.') 44 | 45 | flags.add_argument('--skip-XXXX-from', type=str, 46 | help=('CSV label database. Files with "XXXX" entries in ' 47 | 'this database will be ignored when comparing the ' 48 | 'other two databases.')) 49 | 50 | return flags 51 | 52 | 53 | def main(FLAGS): 54 | # Load the collection of filenames whose labels we ignore. 55 | ignorables = set() 56 | if FLAGS.skip_XXXX_from: 57 | sys.stderr.write('Opening {}...\n'.format(FLAGS.skip_XXXX_from)) 58 | with label_database.Database(FLAGS.skip_XXXX_from, readonly=True) as db: 59 | db_all_labels = db.all_labels_with_counts_of_at_least(0) 60 | ignorables.update(fn for fn, label in db_all_labels if label =='XXXX') 61 | 62 | # Open label databases. 63 | sys.stderr.write('Opening {}...\n'.format(FLAGS.label_database_1)) 64 | with label_database.Database(FLAGS.label_database_1, readonly=True) as db1: 65 | sys.stderr.write('Opening {}...\n'.format(FLAGS.label_database_2)) 66 | with label_database.Database(FLAGS.label_database_2, readonly=True) as db2: 67 | # Print differences. 68 | db1_all_labels = db1.all_labels_with_counts_of_at_least( 69 | FLAGS.minimum_label_count) 70 | for fn, label1 in db1_all_labels: 71 | label2, count2 = db2[fn] 72 | if count2 < FLAGS.minimum_label_count: continue 73 | if FLAGS.skip_XXXX and 'XXXX' in [label1, label2]: continue 74 | if label1 == label2: continue 75 | if fn in ignorables: continue 76 | print('{} {} <> {}'.format(fn, label1, label2)) 77 | 78 | 79 | if __name__ == '__main__': 80 | flags = _define_flags() 81 | FLAGS = flags.parse_args() 82 | main(FLAGS) 83 | -------------------------------------------------------------------------------- /crop_words.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | """Crop multiple word images from grayscale images. 3 | 4 | A program for cropping multiple 4-character word images from the cropped .png 5 | files created by steps 2-3. 6 | 7 | Licensing: 8 | 9 | This program and any supporting programs, software libraries, and documentation 10 | distributed alongside it are released into the public domain without any 11 | warranty. See the LICENSE file for details. 12 | """ 13 | 14 | import argparse 15 | import csv 16 | import imageio 17 | import logging 18 | import math 19 | import numpy as np 20 | 21 | import crop_word 22 | 23 | 24 | def _define_flags(): 25 | """Defines an `ArgumentParser` for command-line flags used by this program.""" 26 | flags = argparse.ArgumentParser( 27 | description='Crop multiple word images from a grayscale .png') 28 | 29 | flags.add_argument('input_image', type=str, 30 | help='Image to crop: filename or URI') 31 | flags.add_argument('crop_list', type=str, 32 | help=('CSV file listing crop image name and initial ' 33 | 'x,y locations of crop box top-left corners. The ' 34 | 'CSV header should be "Name,tlx,tly"')) 35 | flags.add_argument('output_prefix', type=str, 36 | help='Prefix string for crop image filenames') 37 | 38 | flags.add_argument('-r', '--rows', required=True, type=int, 39 | help='Cropped word image size: rows') 40 | flags.add_argument('-c', '--cols', required=True, type=int, 41 | help='Cropped word image size: columns') 42 | 43 | 44 | flags.add_argument('-i', '--iters', default=200, type=int, 45 | help='Iterations of box position refinement.') 46 | 47 | flags.add_argument('--brighten', type=str, 48 | help=('Do conditional brightening: the code "87;73;119" ' 49 | 'means "if the maximum pixel value is less than ' 50 | '87, multiply pixel values by 119 / 73"')) 51 | 52 | flags.add_argument('-v', '--verbose', action='store_true', 53 | help='Log debug information.') 54 | 55 | return flags 56 | 57 | 58 | def main(FLAGS): 59 | # Verbose logging if desired. 60 | if FLAGS.verbose: logging.getLogger().setLevel(logging.INFO) 61 | 62 | # Set up conditional brightening if desired. 63 | if FLAGS.brighten: 64 | thresh, denom, num = (float(x) for x in FLAGS.brighten.split(';')) 65 | postcrop = lambda x: np.uint8(x * num / denom) if np.max(x) < thresh else x 66 | else: 67 | postcrop = None 68 | 69 | # Load the .csv file listing initial crop locations. 70 | with open(FLAGS.crop_list, newline='') as csvfile: 71 | reader = csv.reader(csvfile) 72 | fieldnames = next(reader) 73 | crop_locs = list(row for row in reader) 74 | assert fieldnames == ['Name', 'tlx', 'tly'], ( 75 | 'Crop list file column names must be "Name,tlx,tly"') 76 | 77 | # Load the image and perform the crops. We raise an error if one of the 78 | # crops is adjusted in a way that moves it more than three pixels from its 79 | # initial location. 80 | image = imageio.imread(FLAGS.input_image, ignoregamma=True) 81 | for name, tlx, tly in crop_locs: 82 | tlx, tly = float(tlx), float(tly) 83 | logging.info('Cropping {} from {}, starting at tlx={}, tly={}'.format( 84 | name, FLAGS.input_image, tlx, tly)) 85 | cropped_image, dx, dy = crop_word.centre_and_crop( 86 | image, FLAGS.rows, FLAGS.cols, tlx, tly, FLAGS.iters, postcrop) 87 | imageio.imwrite('{}{}.png'.format(FLAGS.output_prefix, name), cropped_image) 88 | logging.info('Total adjust: dtlx={:06.2f}, dtly={:06.2f}'.format(dx, dy)) 89 | 90 | total_nudge = math.sqrt(dx*dx + dy*dy) 91 | assert total_nudge < 3.0, ( 92 | 'Excessive crop adjustment of {}; giving up.'.format(total_nudge)) 93 | 94 | 95 | if __name__ == '__main__': 96 | flags = _define_flags() 97 | FLAGS = flags.parse_args() 98 | main(FLAGS) 99 | -------------------------------------------------------------------------------- /binary_check.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | """Do 16-bit CRC checks on $1800-byte sections of binary files. 3 | 4 | This implementation is a Python transcription of IBM 5100 machine code from the 5 | 5100's executable ROS. The CRC is initialised with the value 0xffff and updated 6 | with each new byte in the binary file section. The CRC check is assumed to have 7 | passed if the final CRC is 0x0000. 8 | 9 | Licensing: 10 | 11 | This program and any supporting programs, software libraries, and documentation 12 | distributed alongside it are released into the public domain without any 13 | warranty. See the LICENSE file for details. 14 | """ 15 | 16 | import argparse 17 | from typing import Tuple 18 | 19 | 20 | def _define_flags(): 21 | """Defines an `ArgumentParser` for command-line flags used by this program.""" 22 | flags = argparse.ArgumentParser( 23 | description='Do CRC checks on $1800-byte sections of the file.') 24 | 25 | flags.add_argument('binary_file', type=str, help=( 26 | 'File to check for valid CRC in $1800-byte sections.')) 27 | 28 | flags.add_argument('--starts', type=str, nargs='*', help=( 29 | 'List of starting positions for sections, if $1800-byte intervals ' 30 | 'isn\'t your thing. Values are parsed as hex digits.')) 31 | 32 | return flags 33 | 34 | 35 | def update_crc(byte: int, crc: Tuple[int, int]) -> Tuple[int, int]: 36 | """Update a 16-bit CRC given a new byte.""" 37 | # Set up the 16 registers of the 5100. 38 | r = [(0, 0) for _ in range(16)] 39 | r[9] = (crc[0] & 0xff, crc[1] & 0xff) 40 | r[10] = (0, byte & 0xff) 41 | 42 | # MHL R14, R9 43 | r[14] = (r[14][0], r[9][0]) 44 | 45 | # XOR R14, R10 46 | r[14] = (r[14][0], r[14][1] ^ r[10][1]) 47 | 48 | # MOVE R1, R14 49 | r[1] = r[14] 50 | 51 | # MOVE R15, R14 52 | r[15] = r[14] 53 | 54 | # SWAP R15 55 | r[15] = (r[15][0], (r[15][1] << 4 & 0xff) | (r[15][1] >> 4)) 56 | 57 | # XOR R14, R15 58 | r[14] = (r[14][0], r[14][1] ^ r[15][1]) 59 | 60 | # CLR R14, #$0F 61 | r[14] = (r[14][0], r[14][1] & 0xf0) 62 | 63 | # XOR R14, R9 64 | r[14] = (r[14][0], r[14][1] ^ r[9][1]) 65 | 66 | # MOVE R9, R1 67 | r[9] = r[1] 68 | 69 | # CLR R15, #$F0 70 | r[15] = (r[15][0], r[15][1] & 0x0f) 71 | 72 | # XOR R9, R15 73 | r[9] = (r[9][0], r[9][1] ^ r[15][1]) 74 | 75 | # ROR3 R1 76 | r[1] = (r[1][0], (r[1][1] << 5 & 0xff) | (r[1][1] >> 3)) 77 | 78 | # MOVE R15, R1 79 | r[15] = r[1] 80 | 81 | # CLR R15, #$E0 82 | r[15] = (r[15][0], r[15][1] & 0x1f) 83 | 84 | # XOR R14, R15 85 | r[14] = (r[14][0], r[14][1] ^ r[15][1]) 86 | 87 | # CLR R1, #$1F 88 | r[1] = (r[1][0], r[1][1] & 0xe0) 89 | 90 | # XOR R9, R1 91 | r[9] = (r[9][0], r[9][1] ^ r[1][1]) 92 | 93 | # SWAP R15 94 | r[15] = (r[15][0], (r[15][1] << 4 & 0xff) | (r[15][1] >> 4)) 95 | 96 | # MOVE R1, R15 97 | r[1] = r[15] 98 | 99 | # CLR R15, #$1F 100 | r[15] = (r[15][0], r[15][1] & 0xe0) 101 | 102 | # XOR R9, R15 103 | r[9] = (r[9][0], r[9][1] ^ r[15][1]) 104 | 105 | # CLR R1, #$FE 106 | r[1] = (r[1][0], r[1][1] & 0x01) 107 | 108 | # XOR R14, R1 109 | r[14] = (r[14][0], r[14][1] ^ r[1][1]) 110 | 111 | # MLH R9, R14 112 | r[9] = (r[14][1], r[9][1]) 113 | 114 | # RET R8 115 | return r[9] 116 | 117 | 118 | #### MAIN PROGRAM #### 119 | 120 | 121 | def main(FLAGS): 122 | with open(FLAGS.binary_file, 'rb') as f: 123 | all_data = f.read() 124 | 125 | if FLAGS.starts: 126 | starts = (int(s, 16) for s in FLAGS.starts) 127 | else: 128 | starts = range(0, len(all_data), 0x1800) 129 | 130 | for start in starts: 131 | end = start + 0x1800 132 | data = all_data[start:end] 133 | 134 | crc = (0xff, 0xff) 135 | for byte in data: 136 | crc = update_crc(byte, crc) 137 | ok = 'OK' if crc == (0x0, 0x0) else '{:X}{:X}'.format(*crc) 138 | 139 | # data[-3] is the section identifier. 140 | print('{:X} to {:X}: {:X}; {}'.format(start, end-1, data[-3], ok)) 141 | 142 | 143 | #### MISCELLANEOUS #### 144 | 145 | 146 | if __name__ == '__main__': 147 | flags = _define_flags() 148 | FLAGS = flags.parse_args() 149 | main(FLAGS) 150 | -------------------------------------------------------------------------------- /crop_word.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | """Crop individual word images in grayscale images. 3 | 4 | A library and program for cropping individual 4-character word images from the 5 | cropped .png files created by steps 2-3. 6 | 7 | Licensing: 8 | 9 | This program and any supporting programs, software libraries, and documentation 10 | distributed alongside it are released into the public domain without any 11 | warranty. See the LICENSE file for details. 12 | """ 13 | 14 | import argparse 15 | import logging 16 | 17 | import imageio 18 | import numpy as np 19 | from scipy import ndimage 20 | 21 | 22 | def _define_flags(): 23 | """Defines an `ArgumentParser` for command-line flags used by this program.""" 24 | flags = argparse.ArgumentParser( 25 | description='Crop an individual word image from a grayscale .png') 26 | 27 | flags.add_argument('input_image', type=str, 28 | help='Image to crop: filename or URI') 29 | flags.add_argument('output_image', type=str, 30 | help='Write output here: filename or URI') 31 | 32 | flags.add_argument('-r', '--rows', required=True, type=int, 33 | help='Cropped word image size: rows') 34 | flags.add_argument('-c', '--cols', required=True, type=int, 35 | help='Cropped word image size: columns') 36 | 37 | flags.add_argument('-tlx', '--top-left-x', required=True, type=float, 38 | help='Initial x coord. of crop box left edge') 39 | flags.add_argument('-tly', '--top-left-y', required=True, type=float, 40 | help='Initial y coord. of crop box top edge') 41 | 42 | flags.add_argument('-i', '--iters', default=100, type=int, 43 | help='Iterations of box position refinement') 44 | 45 | flags.add_argument('--brighten', type=str, 46 | help=('Do conditional brightening: the code "87;73;119" ' 47 | 'means "if the maximum pixel value is less than ' 48 | '87, multiply pixel values by 119 / 73"')) 49 | 50 | flags.add_argument('-v', '--verbose', action='store_true', 51 | help='Log debug information') 52 | 53 | return flags 54 | 55 | 56 | def main(FLAGS): 57 | # Verbose logging if desired. 58 | if FLAGS.verbose: logging.getLogger().setLevel(logging.INFO) 59 | 60 | # Set up conditional brightening if desired. 61 | if FLAGS.brighten: 62 | thresh, denom, num = (float(x) for x in FLAGS.brighten.split(';')) 63 | postcrop = lambda x: np.uint8(x * num / denom) if np.max(x) < thresh else x 64 | else: 65 | postcrop = None 66 | 67 | # Perform the crop. 68 | image = imageio.imread(FLAGS.input_image, ignoregamma=True) 69 | cropped_image, dtlx, dtly = centre_and_crop( 70 | image, 71 | FLAGS.rows, FLAGS.cols, FLAGS.top_left_x, FLAGS.top_left_y, FLAGS.iters, 72 | postcrop) 73 | imageio.imwrite(FLAGS.output_image, cropped_image) 74 | logging.info('Total adjust: dtlx={:06.2f}, dtly={:06.2f}'.format(dtlx, dtly)) 75 | 76 | 77 | def centre_and_crop(image, rows, cols, tlx, tly, iters, postcrop=lambda x: x): 78 | """Crop a subimage of `image`, nudging crop window to centre on bright pixels. 79 | 80 | Args: 81 | rows: subimage rows. 82 | cols: subimage cols. 83 | tlx: initial x coordinate of subimage's top-left corner. 84 | tly: initial y coordinate of subimage's top-left corner. 85 | iters: number of nudging iterations. 86 | postcrop: a callable to apply to subimages after they are cropped. 87 | 88 | Returns: a 3-tuple with the following items: 89 | [0]: cropped subimage. 90 | [1]: nudging displacement in the X direction. 91 | [2]: nudging displacement in the Y direction. 92 | """ 93 | # Set up sampling points and unscaled positioning gradients. 94 | xs, ys = np.meshgrid( 95 | np.arange(cols, dtype=float), np.arange(rows, dtype=float)) 96 | xs = xs.ravel() # Flattened for ndimage.map_coordinates. 97 | ys = ys.ravel() 98 | dtlx_dcol = 2.0 * xs / (cols-1) - 1.0 # Unscaled positioning gradients range 99 | dtly_drow = 2.0 * ys / (rows-1) - 1.0 # linearly from -1.0 to 1.0. 100 | xs += tlx # Move sampling points to their initial positions. 101 | ys += tly 102 | 103 | # Extract initial subimage. 104 | extract_subimage = lambda: postcrop(ndimage.map_coordinates( 105 | image, coordinates=[ys, xs], 106 | order=3, mode='constant', cval=0.0).reshape((rows, cols))) 107 | subimage = extract_subimage() 108 | 109 | # Subimage position adjustment. 110 | if iters > 0: 111 | # Scale the positioning gradients so that the first step is no more than 112 | # 0.01 pixels in any direction. 113 | dtlx = np.dot(subimage.ravel(), dtlx_dcol) 114 | dtly = np.dot(subimage.ravel(), dtly_drow) 115 | if abs(dtlx) > 0.01: dtlx_dcol *= (0.01 / abs(dtlx)) 116 | if abs(dtly) > 0.01: dtly_drow *= (0.01 / abs(dtly)) 117 | 118 | # Compute positioning adjustment the current subimage. Adjustment is capped 119 | # so that it never exceeds 0.01 in X or Y. 120 | for it in range(iters): 121 | dtlx = np.dot(subimage.ravel(), dtlx_dcol) 122 | dtly = np.dot(subimage.ravel(), dtly_drow) 123 | if abs(dtlx) > 0.01: dtlx *= (0.01 / abs(dtlx)) 124 | if abs(dtly) > 0.01: dtly *= (0.01 / abs(dtly)) 125 | xs += dtlx 126 | ys += dtly 127 | logging.info( 128 | 'Crop box adjust: tlx={:06.2f}, tly={:06.2f}, (dtlx={:07.4f}, ' 129 | 'dtly={:07.4f})'.format(xs[0], ys[0], dtlx, dtly)) 130 | # Extract subimage. 131 | subimage = extract_subimage() 132 | 133 | return subimage, xs[0] - tlx, ys[0] - tly 134 | 135 | 136 | if __name__ == '__main__': 137 | flags = _define_flags() 138 | FLAGS = flags.parse_args() 139 | main(FLAGS) 140 | -------------------------------------------------------------------------------- /label_database.py: -------------------------------------------------------------------------------- 1 | """Library for our database of image labels. 2 | 3 | Licensing: 4 | 5 | This program and any supporting programs, software libraries, and documentation 6 | distributed alongside it are released into the public domain without any 7 | warranty. See the LICENSE file for details. 8 | """ 9 | 10 | import collections 11 | import contextlib 12 | import copy 13 | import csv 14 | import random 15 | import shutil 16 | import threading 17 | import time 18 | 19 | 20 | class Database(object): 21 | """Our database of image labels. Use as a context manager to ensure saving.""" 22 | 23 | def __init__(self, filename, readonly=False, save_backups=True): 24 | """Open and load the image label database. 25 | 26 | Args: 27 | filename: the CSV file containing the database. 28 | readonly: whether to open the database in read-only mode. 29 | save_backups: whether to move old saved databases to backup locations 30 | (filename~~) before saving new data. 31 | """ 32 | self._filename = filename 33 | self._readonly = readonly 34 | self._save_backups = save_backups 35 | 36 | if readonly: 37 | self._lock_db = contextlib.suppress() # Null context. 38 | self._lock_io = contextlib.suppress() 39 | else: 40 | self._lock_db = threading.RLock() # For data in memory. 41 | self._lock_io = threading.RLock() # For data on disk. 42 | # Maps filename to (label, count). 43 | self._database = collections.OrderedDict() 44 | # Maps counts to sets of filenames. 45 | self._by_count = collections.defaultdict(set) 46 | self.reload() 47 | 48 | def __enter__(self): 49 | return self 50 | 51 | def __exit__(self, *args): 52 | if not self._readonly: self.save() 53 | 54 | def __len__(self): 55 | return len(self._database) 56 | 57 | def __contains__(self, filename): 58 | with self._lock_db: 59 | return filename in self._database 60 | 61 | def __getitem__(self, filename): 62 | with self._lock_db: 63 | return self._database[filename] 64 | 65 | def num_labels_with_counts_of_at_least(self, n): 66 | """How many labels have been supplied at least `n` times?""" 67 | with self._lock_db: 68 | return sum(len(s) for count, s in self._by_count.items() if count >= n) 69 | 70 | def num_labels_with_counts_of(self, n): 71 | """How many labels have been supplied exactly `n` times?""" 72 | with self._lock_db: 73 | return sum(len(s) for count, s in self._by_count.items() if count == n) 74 | 75 | def random(self): 76 | """Retrieve an image filename, any filename.""" 77 | with self._lock_db: 78 | return random.sample(self._database.keys(), 1)[0] 79 | 80 | def example_filename(self): 81 | """But if you just want an example filename, this is faster.""" 82 | with self._lock_db: 83 | return next(iter(self._database)) 84 | 85 | def random_label_with_count_of(self, n): 86 | """Retrieve a filename receiving the same label `n` times (or None).""" 87 | with self._lock_db: 88 | return (random.sample(self._by_count[n], 1)[0] 89 | if self._by_count[n] else None) 90 | 91 | def all_labels_with_counts_of(self, n): 92 | """Retrieve all labels with a count of `n` as (filename, label) tuples.""" 93 | with self._lock_db: 94 | return [(fn, label) for fn, (label, count) in self._database.items() 95 | if count == n] 96 | 97 | def all_labels_with_counts_of_at_least(self, n): 98 | """Retrieve all labels with a count >= `n` as (filename, label) tuples.""" 99 | with self._lock_db: 100 | return [(fn, label) for fn, (label, count) in self._database.items() 101 | if count >= n] 102 | 103 | def label(self, filename, label): 104 | """Add or confirm/disavow a label in the image label database. 105 | 106 | If the image has no label, then the image is given the label and a label 107 | count of 1. If the image has a label equal to label, then the image's 108 | label count is incremented. If the image has a label different to label, 109 | then the image's label count is decremented, but nothing is done with the 110 | new label. 111 | 112 | Args: 113 | filename: filename of image to (re)(un)label. 114 | label: proposed label for this image. 115 | 116 | Raises: 117 | RuntimeError: the database is open in read-only mode. 118 | """ 119 | self._check_writable() 120 | 121 | with self._lock_db: 122 | if filename not in self._database: raise KeyError( 123 | '{} is not an image file known to the database stored in {}.'.format( 124 | filename, self._filename)) 125 | 126 | old_label, count = self._database[filename] 127 | if count == 0 or label == old_label: 128 | self._database[filename] = (label, count + 1) 129 | self._by_count[count].remove(filename) 130 | self._by_count[count + 1].add(filename) 131 | else: 132 | if count == 1: old_label = '0000' # Default label for count == 0. 133 | self._database[filename] = (old_label, count - 1) 134 | self._by_count[count].remove(filename) 135 | self._by_count[count - 1].add(filename) 136 | 137 | def force(self, filename, label, count): 138 | """Force a particular label and count in the image label database. 139 | 140 | List an image in the database as having a particular label and count. 141 | This method can also add new image filenames to the database. 142 | 143 | Args: 144 | filename: filename of image to force-label. 145 | label: label for this image. 146 | count: label count for the label. 147 | 148 | Raises: 149 | RuntimeError: the database is open in read-only mode. 150 | """ 151 | self._check_writable() 152 | 153 | with self._lock_db: 154 | if filename in self._database: 155 | self._by_count[self._database[filename][1]].remove(filename) 156 | self._database[filename] = (label, count) 157 | self._by_count[count].add(filename) 158 | 159 | def reload(self): 160 | """Reload the image label database from the CSV file.""" 161 | with self._lock_db, self._lock_io: 162 | with open(self._filename, newline='') as csvfile: 163 | reader = csv.reader(csvfile) 164 | fieldnames = next(reader) 165 | assert fieldnames == ['Filename', 'Label', 'Count'], ( 166 | 'Label database column names must be "Filename,Label,Count"') 167 | 168 | self._database = collections.OrderedDict() 169 | self._by_count = collections.defaultdict(set) 170 | for imgfile, label, count in reader: 171 | count = int(count) 172 | self._database[imgfile] = (label, count) 173 | self._by_count[count].add(imgfile) 174 | 175 | def save(self): 176 | """Save the image label database to the CSV file, making backups.""" 177 | self._check_writable() 178 | 179 | with self._lock_db: # Make local copy. 180 | db_copy = copy.deepcopy(self._database) 181 | 182 | with self._lock_io: 183 | # Move the current database file to a backup location. 184 | if self._save_backups: 185 | shutil.move(self._filename, 186 | '{}~{}~'.format(self._filename, int(time.time()))) 187 | 188 | # Write a new database file. 189 | with open(self._filename, 'w', newline='') as csvfile: 190 | writer = csv.writer(csvfile, dialect='unix') 191 | writer.writerow(['Filename', 'Label', 'Count']) 192 | for imgfile, (label, count) in self._database.items(): 193 | writer.writerow([imgfile, label, count]) 194 | 195 | def _check_writable(self): 196 | """Raise `RuntimeError` if the database is in read-only mode.""" 197 | if self._readonly: raise RuntimeError( 198 | 'The label database "{}" has been opened in read-only mode and will ' 199 | 'not be mutated or overwritten.'.format(self._filename)) 200 | -------------------------------------------------------------------------------- /labelthon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | """Hand-label our grayscale word images. 3 | 4 | This program presents word images to the terminal using Sixel graphics; a 5 | compatible terminal emulator program is required (e.g. mlterm). The user gets 6 | to enter labels for the images until they get sick of it (actually, they will 7 | probably be sick of labeling well before the job is done, but that's life). 8 | 9 | The user interface is simple: user sees an image, user types hex digits. If 10 | the user doesn't want to label a particular image, then they can type the space 11 | bar to move on to the next image. 12 | 13 | Runs on Unix systems only for now. Sorry, windows... 14 | 15 | Licensing: 16 | 17 | This program and any supporting programs, software libraries, and documentation 18 | distributed alongside it are released into the public domain without any 19 | warranty. See the LICENSE file for details. 20 | """ 21 | 22 | import argparse 23 | import itertools 24 | import random 25 | import sys 26 | import termios 27 | import tty 28 | import wand.image 29 | 30 | import label_database 31 | 32 | 33 | def _define_flags(): 34 | """Defines an `ArgumentParser` for command-line flags used by this program.""" 35 | flags = argparse.ArgumentParser( 36 | description='Label word images.') 37 | 38 | flags.add_argument('label_database', type=str, 39 | help=('CSV file containing image paths, labels, and ' 40 | 'the number of times a particular label was ' 41 | 'supplied for an image. The CSV header should be ' 42 | '"Filename,Label,Count".')) 43 | 44 | flags.add_argument('-n', '--num-labels', required=True, type=int, 45 | help=('Stop after this many images have been labeled ' 46 | 'and their labels verified.')) 47 | 48 | flags.add_argument('-b', '--label-bias', default=0.2, type=float, 49 | help=('Degree to which this program would prefer to label ' 50 | 'new images over verifying old ones.')) 51 | flags.add_argument('-s', '--scale', default=3.0, type=float, 52 | help='Scale images by this factor when showing them.') 53 | 54 | flags.add_argument('--mark-apl-ros-c000-zeros', action='store_true', 55 | help=("It's known that the data words in the APL ROS are " 56 | 'all 0000 from C000 to DFFE. Mark them as such. ' 57 | "It's only necessary to do this once, but doesn't " 58 | 'hurt to do it more times.')) 59 | 60 | return flags 61 | 62 | 63 | def main(FLAGS): 64 | print('Loading...') 65 | with label_database.Database(FLAGS.label_database) as db: 66 | if FLAGS.mark_apl_ros_c000_zeros: 67 | print('Marking APL ROS known-zeros at C000...') 68 | mark_apl_ros_c000_zeros(db) 69 | 70 | for act_count in itertools.count(): 71 | filename, image = next_image_and_housekeeping( 72 | db, FLAGS.num_labels, FLAGS.label_bias, FLAGS.scale, act_count) 73 | 74 | if filename is None: 75 | print('You are finished! Thank you for your hard work!') 76 | return 77 | 78 | label = quiz_user_for_label(image) 79 | if not label: 80 | print('Skipping this image.') 81 | elif label == 'Q': 82 | print('Quitting...') 83 | return 84 | else: 85 | db.label(filename, label) 86 | 87 | 88 | def quiz_user_for_label(image): 89 | """Present an image and request a label from the user. 90 | 91 | The image is printed to the screen in Sixel format. Users will type in four 92 | hex digits to supply a label, capital-'Q' to indicate a desire to quit, or 93 | ' ' to decline to label the image. 94 | 95 | Args: 96 | image: wand.image.Image to display to the user. 97 | 98 | Returns: 99 | 4-character string of hex digits, 'XXXX' if the user believes the image has 100 | been corrupted by the image spanning multiple frames, 'Q' if the user has 101 | indicated that they wish to quit, or '' if they decline to label the image. 102 | """ 103 | # Display the image and print prompt (note cursor positioning). 104 | sys.stdout.buffer.write(image.make_blob('sixel')) 105 | sys.stdout.write('\n label >> <<\b\b\b\b\b\b') 106 | sys.stdout.flush() 107 | 108 | # Obtain label characters from user. 109 | label_chars = [None] * 4 110 | pos = 0 111 | while None in label_chars: 112 | char = getch() 113 | 114 | if char in '0123456789abcdefABCDEF': 115 | label_chars[pos] = char.upper() 116 | pos += 1 117 | sys.stdout.write(char.upper()) 118 | elif char == '\x7f' and pos > 0: # Did the user type backspace? 119 | pos -= 1 120 | label_chars[pos] = None 121 | sys.stdout.write('\b \b') 122 | elif char in 'zZ': # Did the user type 'Z'? 123 | print() # Image is all zeroes. 124 | print() 125 | return '0000' 126 | elif char in 'mM': # Did the user type 'M'? 127 | print() # Image is corrupted by screen transition. 128 | print() 129 | return 'XXXX' 130 | elif char == 'Q': # Did the user want to quit? 131 | print() 132 | return 'Q' 133 | elif char == ' ': # Did the user decide not to label this image? 134 | print() 135 | return '' 136 | 137 | sys.stdout.flush() 138 | 139 | print() 140 | print() 141 | return ''.join(label_chars) 142 | 143 | 144 | def next_image_and_housekeeping(db, num_labels, label_bias, scale, act_count): 145 | """Retrieve the next image to label, and do some housekeeping. 146 | 147 | Args: 148 | db: a label_database.Database object. 149 | num_labels: Number of verified labels desired by the user. 150 | label_bias: A bias that controls the degree to which we ought to load a new 151 | image to label rather than an already-labeled image for verification. 152 | scale: amount of scaling to apply to loaded images. 153 | action_count: how many labeling actions the user has undertaken in this 154 | session prior to now. This function will save the database to disk after 155 | every 100 labeling actions. 156 | 157 | Returns: 158 | (None, None) if there are already `num_labels` verified labels in the 159 | database. Otherwise, a 2-tuple whose elements are: 160 | [0]: filename of an image to label. 161 | [1]: wand.image.Image object of the (scaled) image to label. 162 | """ 163 | # Save the database occasionally, and find out if we have work to do. 164 | if (act_count + 1) % 100 == 0: db.save() 165 | num_done = db.num_labels_with_counts_of_at_least(2) 166 | if num_done >= num_labels: return None, None 167 | 168 | # Choose an image to label: either a novel one or an unverified one. 169 | num_unverified = db.num_labels_with_counts_of(1) 170 | fraction_unlabeled = (num_labels - num_done - num_unverified) / num_labels 171 | novel_image_probability = fraction_unlabeled * (1 + label_bias) 172 | if num_unverified > 0 and random.random() > novel_image_probability: 173 | filename = db.random_label_with_count_of(1) # Choose to verify a label. 174 | else: 175 | filename = db.random_label_with_count_of(0) # Label a novel image. 176 | 177 | # Attempt to load the image, and scale it. 178 | image = wand.image.Image(filename=filename) 179 | image.resize(width=round(image.width * scale), 180 | height=round(image.height * scale)) 181 | 182 | return filename, image 183 | 184 | 185 | def mark_apl_ros_c000_zeros(db): 186 | """Mark APL ROS known-zeros between C000-DFFE.""" 187 | for prefix in ('./APL/APL_LROS_C000/02_words', 188 | './APL_ii/APL_LROS_ii_C000/02_words'): 189 | for frame in range(2500): # Not sure quite which frames it is... 190 | for subimage in ('0_1', '0_2', '0_3', '0_4', '0_5', '0_6', '0_7', '0_8', 191 | '1_1', '1_2', '1_3', '1_4', '1_5', '1_6', '1_7', '1_8'): 192 | filename = '{}/{:04d}_{}.png'.format(prefix, frame, subimage) 193 | if filename in db: 194 | db.label(filename, '0000') # Label twice: verify '0000' value. 195 | db.label(filename, '0000') 196 | 197 | 198 | def getch(): 199 | """Retrieve a single character from stdin with the terminal in raw mode.""" 200 | stdin_fd = sys.stdin.fileno() 201 | old_attrs = termios.tcgetattr(stdin_fd) 202 | try: 203 | tty.setraw(stdin_fd) 204 | char = sys.stdin.read(1) 205 | finally: 206 | termios.tcsetattr(stdin_fd, termios.TCSADRAIN, old_attrs) 207 | return char 208 | 209 | 210 | if __name__ == '__main__': 211 | flags = _define_flags() 212 | FLAGS = flags.parse_args() 213 | main(FLAGS) 214 | -------------------------------------------------------------------------------- /assemble_labels.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | """Gather labels from multiple classification runs. 3 | 4 | The classification file stores information in three sections: one listing the 5 | label files used as input to this program, one listing the image files referred 6 | to by those label files, and finally the inferred labels at each memory address. 7 | A very short version of this file might look like this: 8 | 9 | ======== Label databases: 10 | 0: path/to/a/label_file.csv 11 | 1: path/to/another/label_file.csv 12 | ======== Images: 13 | 0: ./images/set1/0000_0_0.png 14 | 1: ./images/set1/0000_0_1.png 15 | 2: ./images/set1/0000_0_2.png 16 | 3: ./images/set1/0000_0_3.png 17 | 4: ./images/set1/0000_0_4.png 18 | ... 19 | ======== Labels: 20 | 1000: 12AB@0,0/0,16/0,32 12A8@1,0/1,16/1,32 21 | 1001: 34CD@0,1/0,17 34C0@0,33/1,1/1,17 84CD@1,33 22 | ... 23 | 24 | The format of the labels line is as follows: 25 | 26 | :