├── letter.jpeg ├── letter.js ├── Rakefile ├── spec └── javascripts │ ├── ManuscriptSpec.js │ └── support │ ├── jasmine_helper.rb │ └── jasmine.yml ├── README.md ├── public └── javascripts │ └── Manuscript.js └── letter.html /letter.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ultrasaurus/hocr-javascript/HEAD/letter.jpeg -------------------------------------------------------------------------------- /letter.js: -------------------------------------------------------------------------------- 1 | 2 | $(document).ready(function() { 3 | $(".ocrx_word").attr('style', function() { 4 | return Manuscript.bboxToStyle(this.title); 5 | }); 6 | 7 | }); 8 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | 2 | begin 3 | require 'jasmine' 4 | load 'jasmine/tasks/jasmine.rake' 5 | rescue LoadError 6 | task :jasmine do 7 | abort "Jasmine is not available. In order to run jasmine, you must: (sudo) gem install jasmine" 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /spec/javascripts/ManuscriptSpec.js: -------------------------------------------------------------------------------- 1 | describe("Manuscript", function() { 2 | 3 | describe ("class methods", function() { 4 | it("#bboxToStyle", function() { 5 | result = Manuscript.bboxToStyle("bbox 197 272 249 281"); 6 | expect(result).toEqual("left:197px; top:272px; right:249px; bottom:281px; "); 7 | 8 | }); 9 | 10 | }); 11 | }); -------------------------------------------------------------------------------- /spec/javascripts/support/jasmine_helper.rb: -------------------------------------------------------------------------------- 1 | #Use this file to set/override Jasmine configuration options 2 | #You can remove it if you don't need it. 3 | #This file is loaded *after* jasmine.yml is interpreted. 4 | # 5 | #Example: using a different boot file. 6 | #Jasmine.configure do |config| 7 | # config.boot_dir = '/absolute/path/to/boot_dir' 8 | # config.boot_files = lambda { ['/absolute/path/to/boot_dir/file.js'] } 9 | #end 10 | # 11 | 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Experimenting with OCR interface 2 | 3 | sample data source: 4 | [Baekeland citizenship letter](http://siris-archives.si.edu/ipac20/ipac.jsp?&profile=all&source=~!siarchives&uri=full=3100001~!311219~!0#focus) from National Museum of American History Achives 5 | 6 | ``` 7 | brew install tesseract 8 | tesseract letter.jpeg letter hocr 9 | 10 | ``` 11 | 12 | 13 | ## testing with Jasmine 14 | 15 | ``` 16 | gem install jasmine 17 | rake jasmine 18 | ``` 19 | 20 | see tests run at: http://localhost:8888 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /public/javascripts/Manuscript.js: -------------------------------------------------------------------------------- 1 | function Manuscript() { 2 | } 3 | 4 | // "bbox 197 272 249 281" 5 | // hOCR format: https://docs.google.com/document/d/1QQnIQtvdAC_8n92-LhwPcjtAUFwBlzE8EWnKAxlgVf0/preview 6 | // bbox x0 y0 x1 y1 7 | Manuscript.bboxToStyle = function(bbox_str) { 8 | arr = bbox_str.split(" "); 9 | left_pos = "left:"+arr[1]+"px; "; 10 | top_pos = "top:"+arr[2]+"px; "; 11 | right_pos = "right:"+arr[3]+"px; "; 12 | bottom_pos = "bottom:"+arr[4]+"px; "; 13 | return left_pos + top_pos + right_pos + bottom_pos; 14 | }; -------------------------------------------------------------------------------- /spec/javascripts/support/jasmine.yml: -------------------------------------------------------------------------------- 1 | # src_files 2 | # 3 | # Return an array of filepaths relative to src_dir to include before jasmine specs. 4 | # Default: [] 5 | # 6 | # EXAMPLE: 7 | # 8 | # src_files: 9 | # - lib/source1.js 10 | # - lib/source2.js 11 | # - dist/**/*.js 12 | # 13 | src_files: 14 | - public/javascripts/**/*.js 15 | 16 | # stylesheets 17 | # 18 | # Return an array of stylesheet filepaths relative to src_dir to include before jasmine specs. 19 | # Default: [] 20 | # 21 | # EXAMPLE: 22 | # 23 | # stylesheets: 24 | # - css/style.css 25 | # - stylesheets/*.css 26 | # 27 | stylesheets: 28 | 29 | # helpers 30 | # 31 | # Return an array of filepaths relative to spec_dir to include before jasmine specs. 32 | # Default: ["helpers/**/*.js"] 33 | # 34 | # EXAMPLE: 35 | # 36 | # helpers: 37 | # - helpers/**/*.js 38 | # 39 | helpers: 40 | - 'helpers/**/*.js' 41 | # spec_files 42 | # 43 | # Return an array of filepaths relative to spec_dir to include. 44 | # Default: ["**/*[sS]pec.js"] 45 | # 46 | # EXAMPLE: 47 | # 48 | # spec_files: 49 | # - **/*[sS]pec.js 50 | # 51 | spec_files: 52 | - '**/*[sS]pec.js' 53 | 54 | # src_dir 55 | # 56 | # Source directory path. Your src_files must be returned relative to this path. Will use root if left blank. 57 | # Default: project root 58 | # 59 | # EXAMPLE: 60 | # 61 | # src_dir: public 62 | # 63 | src_dir: 64 | 65 | # spec_dir 66 | # 67 | # Spec directory path. Your spec_files must be returned relative to this path. 68 | # Default: spec/javascripts 69 | # 70 | # EXAMPLE: 71 | # 72 | # spec_dir: spec/javascripts 73 | # 74 | spec_dir: 75 | 76 | # spec_helper 77 | # 78 | # Ruby file that Jasmine server will require before starting. 79 | # Returned relative to your root path 80 | # Default spec/support/jasmine_helper.rb 81 | # 82 | # EXAMPLE: 83 | # 84 | # spec_helper: spec/support/jasmine_helper.rb 85 | # 86 | spec_helper: spec/support/jasmine_helper.rb 87 | 88 | 89 | -------------------------------------------------------------------------------- /letter.html: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 |
6 |
34 | 37 | atrial or 38 | 39 |
40 |43 | LOUIS N. ELLRODT 44 | 45 |
46 |49 | COUNTY CLERK 50 | 51 |
52 |55 | 56 | 57 |
58 |61 | Goxmttj ofwestcflester 62 | 63 | ¢o1ttt?£ouse 64 | 65 | ?53hite?°lai1zs,?’(.?3. 66 | 67 |
68 |71 | TO WHOM IT MAY CONCERN:: 72 | 73 |
74 |77 | This is to certify that the records of 78 | 79 | the Supreme Court for the State of New York, County 80 | 81 | of Westcheeter, at White Plains, New York, show that 82 | 83 | one LEO BAEKELAND was admitted to citizenship in 84 | 85 | said court on December 16, 1919 86 | 87 |
88 |91 | I DO FURTER CERTIFY that the record 92 | 93 | of the above naturalization was completed as per 94 | 95 | order filed Dec. 16, 1919, file #2774—1919. 96 | 97 |
98 |101 | In testimony whereof I have 102 | 103 | hereunto subscribed my name 104 | 105 | and affixed the seal of the 106 | 107 | said court this first“7<1ay’.. of‘ 108 | 109 | Decersfferrfin the year of our 110 | 111 | Lord one thousand nine hun- 112 | 113 | dred and twenty—two and the 114 | 115 | one hundred and forty-seventh 116 | 117 | year of the independence of 118 | 119 | the United States. 120 | 121 |
122 |125 | Clerk 126 | 127 |
128 |131 | THIS IS NOT A CERTIFICATE OF NATURALIZATION. 132 | 133 |
134 |