├── .gitignore ├── .npmrc ├── README.md ├── binarization-bak.js ├── binarization.html ├── binarization.js ├── bit.js ├── cnn.html ├── cnn.md ├── data ├── data.js ├── index.js ├── model.js ├── package.json ├── read.js ├── write.js └── yarn.lock ├── demo.gif ├── docs ├── assets │ ├── cnn.d418443f.js │ ├── index.070df02d.js │ ├── index.24c9cc44.js │ ├── index.96676500.css │ ├── map.efa40bbb.png │ ├── model.368228f4.json │ ├── test.137e7c6f.buffer │ └── train.9ab62db8.buffer ├── cnn.html ├── favicon.ico ├── index.html ├── model.json └── model.weights.bin ├── index.html ├── lsb.js ├── map.jpeg ├── ocr.md ├── ocr ├── ocr-0.jpg ├── ocr-1.jpeg ├── ocr-10.png ├── ocr-11.png ├── ocr-12.png ├── ocr-13.jpeg ├── ocr-14.jpeg ├── ocr-15.jpeg ├── ocr-16.jpeg ├── ocr-17.png ├── ocr-18.png ├── ocr-19.png ├── ocr-2.jpeg ├── ocr-3.jpeg ├── ocr-4.jpeg ├── ocr-5.png ├── ocr-6.png ├── ocr-7.jpeg ├── ocr-8.png ├── ocr-9.png └── ocr-map.jpeg ├── package.json ├── pnpm-lock.yaml ├── public ├── favicon.ico ├── model.json └── model.weights.bin ├── src ├── App.vue ├── assets │ ├── map.jpeg │ ├── map.png │ └── sheikah-icon │ │ ├── 0.svg │ │ ├── 1.svg │ │ ├── 2.svg │ │ ├── 3.svg │ │ ├── 4.svg │ │ ├── 5.svg │ │ ├── 6.svg │ │ ├── 7.svg │ │ ├── 8.svg │ │ ├── 9.svg │ │ ├── a.svg │ │ ├── b.svg │ │ ├── c.svg │ │ ├── d.svg │ │ ├── e.svg │ │ ├── exclam.svg │ │ ├── f.svg │ │ ├── g.svg │ │ ├── h.svg │ │ ├── hyphen.svg │ │ ├── i.svg │ │ ├── j.svg │ │ ├── k.svg │ │ ├── l.svg │ │ ├── m.svg │ │ ├── n.svg │ │ ├── o.svg │ │ ├── p.svg │ │ ├── period.svg │ │ ├── q.svg │ │ ├── question.svg │ │ ├── r.svg │ │ ├── s.svg │ │ ├── t.svg │ │ ├── u.svg │ │ ├── v.svg │ │ ├── w.svg │ │ ├── x.svg │ │ ├── y.svg │ │ └── z.svg ├── cnn │ ├── data.js │ └── index.js ├── components │ ├── Download.vue │ ├── ParsePanel.vue │ ├── WordIcon │ │ ├── Main.vue │ │ ├── icon-map.ts │ │ └── regist-script.ts │ └── WordsPanel.vue ├── data │ ├── model.json │ ├── model.weights.bin │ ├── test.buffer │ ├── test.json │ ├── train.buffer │ ├── train.json │ └── words.json ├── main.ts ├── shims-vue.d.ts └── utils │ ├── export-image.ts │ ├── image-info.ts │ └── image-ocr.ts ├── tsconfig.json └── vite.config.ts /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .DS_Store 3 | dist-ssr 4 | *.local 5 | **/dataset/** 6 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | registry = https://registry.npmmirror.com -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 希卡文生成翻译器器 2 | 3 | 工具地址:https://kinglisky.github.io/zelda-words/index.html 4 | 5 | ![demo](./demo.gif) 6 | 7 | 开发: 8 | 9 | ``` 10 | yarn && yarn dev 11 | ``` 12 | 13 | 具体实现请看: 14 | 15 | - [从希卡文翻译谈谈 OCR 的简单实现](https://juejin.cn/post/6941003131891220517) 16 | - [CNN 手写数字分类与希卡文翻译](https://juejin.cn/post/7015918983723352077) 17 | -------------------------------------------------------------------------------- /binarization-bak.js: -------------------------------------------------------------------------------- 1 | (async function () { 2 | // canvas drawImage 有跨域限制,先加载图片转 blob url 使用 3 | const loadImage = (url) => { 4 | return fetch(url) 5 | .then(res => res.blob()) 6 | .then(blob => URL.createObjectURL(blob)) 7 | .then(blobUrl => { 8 | 9 | return new Promise((resolve, reject) => { 10 | const img = new Image(); 11 | img.onload = () => resolve(img); 12 | img.onerror = (e) => reject(e); 13 | img.src = blobUrl; 14 | }); 15 | }); 16 | }; 17 | 18 | const drawToCanvas = (image) => { 19 | const { naturalWidth: width, naturalHeight: height } = image; 20 | const canvas = document.createElement('canvas'); 21 | canvas.width = width; 22 | canvas.height = height; 23 | const ctx = canvas.getContext('2d'); 24 | ctx.drawImage(image, 0, 0); 25 | return canvas; 26 | } 27 | 28 | const canvasToGray = (canvas) => { 29 | const ctx = canvas.getContext('2d'); 30 | const data = ctx.getImageData(0, 0, canvas.width, canvas.height); 31 | const calculateGray = (r, g, b) => parseInt(r * 0.299 + g * 0.587 + b * 0.114); 32 | const grayData = []; 33 | for (let x = 0; x < data.width; x++) { 34 | for (let y = 0; y < data.height; y++) { 35 | const idx = (x + y * data.width) * 4; 36 | const r = data.data[idx + 0]; 37 | const g = data.data[idx + 1]; 38 | const b = data.data[idx + 2]; 39 | const gray = calculateGray(r, g, b); 40 | grayData.push(gray); 41 | } 42 | } 43 | return grayData; 44 | }; 45 | 46 | // 像素平均值图片阈值 47 | const average = (grayData) => { 48 | let sum = 0; 49 | for (let i = 0; i < grayData.length; i += 1) { 50 | sum += grayData[i]; 51 | } 52 | return sum / grayData.length; 53 | }; 54 | 55 | const otsu = (grayData) => { 56 | let ptr = 0; 57 | // 记录 0-256 每个灰度值的数量,初始值为 0 58 | let histData = Array(256).fill(0); 59 | let total = grayData.length; 60 | 61 | while (ptr < total) { 62 | let h = grayData[ptr++]; 63 | histData[h]++; 64 | } 65 | // 总数(灰度值x数量) 66 | let sum = 0; 67 | for (let i = 0; i < 256; i++) { 68 | sum += i * histData[i]; 69 | } 70 | // 背景(小于阈值)的数量 71 | let wB = 0; 72 | // 前景(大于阈值)的数量 73 | let wF = 0; 74 | // 背景图像(灰度x数量)总和 75 | let sumB = 0; 76 | // 存储最大类间方差值 77 | let varMax = 0; 78 | // 阈值 79 | let threshold = 0; 80 | 81 | for (let t = 0; t < 256; t++) { 82 | // 背景(小于阈值)的数量累加 83 | wB += histData[t]; 84 | if (wB === 0) continue; 85 | // 前景(大于阈值)的数量累加 86 | wF = total - wB; 87 | if (wF === 0) break; 88 | // 背景(灰度x数量)累加 89 | sumB += t * histData[t]; 90 | 91 | // 背景(小于阈值)的平均灰度 92 | let mB = sumB / wB; 93 | // 前景(大于阈值)的平均灰度 94 | let mF = (sum - sumB) / wF; 95 | // 类间方差 96 | let varBetween = wB * wF * (mB - mF) ** 2; 97 | 98 | if (varBetween > varMax) { 99 | varMax = varBetween; 100 | threshold = t; 101 | } 102 | } 103 | 104 | return threshold; 105 | }; 106 | 107 | const binaryzationOutput = (originCanvas, threshold) => { 108 | const ctx = originCanvas.getContext('2d'); 109 | const imageData = ctx.getImageData(0, 0, originCanvas.width, originCanvas.height); 110 | const { width, height, data } = imageData; 111 | // 第一像素的值即为背景色值 112 | const head = (data[0] + data[1] + data[2]) / 3 | 0; 113 | // 如果背景颜色大于阈值,则背景与文字的颜色的值则需要调换 114 | const color = head > threshold 115 | ? { foreground: 0, background: 255} 116 | : { foreground: 255, background: 0 }; 117 | for (let x = 0; x < width; x++) { 118 | for (let y = 0; y < height; y++) { 119 | const idx = (x + y * width) * 4; 120 | const avg = (data[idx] + data[idx + 1] + data[idx + 2]) / 3 | 0; 121 | const v = avg > threshold ? color.foreground : color.background; 122 | data[idx] = v; 123 | data[idx + 1] = v; 124 | data[idx + 2] = v; 125 | data[idx + 3] = 255; 126 | } 127 | } 128 | ctx.putImageData(imageData, 0, 0); 129 | return originCanvas.toDataURL(); 130 | } 131 | 132 | const binaryzationHash = (originCanvas, threshold) => { 133 | const ctx = originCanvas.getContext('2d'); 134 | const imageData = ctx.getImageData(0, 0, originCanvas.width, originCanvas.height); 135 | const { width, height, data } = imageData; 136 | // 第一像素的值即为背景色值 137 | const head = (data[0] + data[1] + data[2]) / 3 | 0; 138 | // 如果背景颜色大于阈值,则背景与文字的颜色的值则需要调换 139 | const color = head > threshold 140 | ? { foreground: 0, background: 255} 141 | : { foreground: 255, background: 0 }; 142 | const hash = []; 143 | for (let x = 0; x < width; x++) { 144 | for (let y = 0; y < height; y++) { 145 | const idx = (x + y * width) * 4; 146 | const avg = (data[idx] + data[idx + 1] + data[idx + 2]) / 3 | 0; 147 | const v = avg > threshold ? color.foreground : color.background; 148 | hash.push(v ? 1 : 0); 149 | } 150 | } 151 | return hash; 152 | } 153 | 154 | const url = 'https://markdown-write.oss-cn-hangzhou.aliyuncs.com/page.png'; 155 | const image = await loadImage(url); 156 | const canvas = drawToCanvas(image); 157 | const grayData = canvasToGray(canvas); 158 | // const threshold = average(grayData); 159 | const threshold = otsu(grayData); 160 | const result = binaryzationOutput(canvas, threshold); 161 | console.log('res', result); 162 | })(); -------------------------------------------------------------------------------- /binarization.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | BINARYZATION OUTPUT 9 | 10 | 33 | 34 |

原图

35 | demo image 36 |

灰度

37 | 38 |

二值化

39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /binarization.js: -------------------------------------------------------------------------------- 1 | (async function () { 2 | const drawToCanvas = (canvas, image) => { 3 | const ctx = canvas.getContext('2d'); 4 | ctx.drawImage(image, 0, 0); 5 | return canvas; 6 | } 7 | 8 | const canvasToGray = (canvas) => { 9 | const ctx = canvas.getContext('2d'); 10 | const data = ctx.getImageData(0, 0, canvas.width, canvas.height); 11 | const calculateGray = (r, g, b) => parseInt(r * 0.299 + g * 0.587 + b * 0.114); 12 | const grayData = []; 13 | for (let x = 0; x < data.width; x++) { 14 | for (let y = 0; y < data.height; y++) { 15 | const idx = (x + y * data.width) * 4; 16 | const r = data.data[idx + 0]; 17 | const g = data.data[idx + 1]; 18 | const b = data.data[idx + 2]; 19 | const gray = calculateGray(r, g, b); 20 | data.data[idx + 0] = gray; 21 | data.data[idx + 1] = gray; 22 | data.data[idx + 2] = gray; 23 | data.data[idx + 3] = 255; 24 | grayData.push(gray); 25 | } 26 | } 27 | ctx.putImageData(data, 0, 0); 28 | return grayData; 29 | }; 30 | 31 | // 像素平均值图片阈值 32 | const average = (grayData) => { 33 | let sum = 0; 34 | for (let i = 0; i < grayData.length; i += 1) { 35 | sum += grayData[i]; 36 | } 37 | return sum / grayData.length; 38 | }; 39 | 40 | const otsu = (grayData) => { 41 | let ptr = 0; 42 | // 记录 0-256 每个灰度值的数量,初始值为 0 43 | let histData = Array(256).fill(0); 44 | let total = grayData.length; 45 | 46 | while (ptr < total) { 47 | let h = grayData[ptr++]; 48 | histData[h]++; 49 | } 50 | // 总数(灰度值x数量) 51 | let sum = 0; 52 | for (let i = 0; i < 256; i++) { 53 | sum += i * histData[i]; 54 | } 55 | // 背景(小于阈值)的数量 56 | let wB = 0; 57 | // 前景(大于阈值)的数量 58 | let wF = 0; 59 | // 背景图像(灰度x数量)总和 60 | let sumB = 0; 61 | // 存储最大类间方差值 62 | let varMax = 0; 63 | // 阈值 64 | let threshold = 0; 65 | 66 | for (let t = 0; t < 256; t++) { 67 | // 背景(小于阈值)的数量累加 68 | wB += histData[t]; 69 | if (wB === 0) continue; 70 | // 前景(大于阈值)的数量累加 71 | wF = total - wB; 72 | if (wF === 0) break; 73 | // 背景(灰度x数量)累加 74 | sumB += t * histData[t]; 75 | 76 | // 背景(小于阈值)的平均灰度 77 | let mB = sumB / wB; 78 | // 前景(大于阈值)的平均灰度 79 | let mF = (sum - sumB) / wF; 80 | // 类间方差 81 | let varBetween = wB * wF * (mB - mF) ** 2; 82 | 83 | if (varBetween > varMax) { 84 | varMax = varBetween; 85 | threshold = t; 86 | } 87 | } 88 | 89 | return threshold; 90 | }; 91 | 92 | const canvasToBinaryzation = (canvas, threshold) => { 93 | const ctx = canvas.getContext('2d'); 94 | const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height); 95 | const { width, height, data } = imageData; 96 | // 第一像素的值即为背景色值 97 | const head = data[0]; 98 | // 如果背景颜色大于阈值,则背景与文字的颜色的值则需要调换 99 | const color = head > threshold 100 | ? { foreground: 0, background: 255} 101 | : { foreground: 255, background: 0 }; 102 | const bits = []; 103 | for (let x = 0; x < width; x++) { 104 | for (let y = 0; y < height; y++) { 105 | const idx = (x + y * width) * 4; 106 | const avg = (data[idx] + data[idx + 1] + data[idx + 2]) / 3 | 0; 107 | const v = avg > threshold ? color.foreground : color.background; 108 | data[idx] = v; 109 | data[idx + 1] = v; 110 | data[idx + 2] = v; 111 | data[idx + 3] = 255; 112 | bits.push(v > 0 ? 1 : 0); 113 | } 114 | } 115 | ctx.putImageData(imageData, 0, 0); 116 | return bits; 117 | } 118 | 119 | const image = document.querySelector('.input-image'); 120 | const grayCanvas = document.querySelector('.output-gray'); 121 | const binaryzationCanvas = document.querySelector('.output-binaryzation'); 122 | drawToCanvas(grayCanvas, image); 123 | const grayData = canvasToGray(grayCanvas); 124 | const threshold = average(grayData); 125 | // const threshold = otsu(grayData); 126 | drawToCanvas(binaryzationCanvas, grayCanvas); 127 | const bits = canvasToBinaryzation(binaryzationCanvas, threshold); 128 | console.log(bits); 129 | })(); -------------------------------------------------------------------------------- /bit.js: -------------------------------------------------------------------------------- 1 | (function () { 2 | function paddingLfet(bits) { 3 | return ('00000000' + bits).slice(-8); 4 | } 5 | 6 | function loadImage (url) { 7 | return fetch(url) 8 | .then(res => res.blob()) 9 | .then(blob => URL.createObjectURL(blob)) 10 | .then(blobUrl => { 11 | 12 | return new Promise((resolve, reject) => { 13 | const img = new Image(); 14 | img.onload = () => resolve(img); 15 | img.onerror = (e) => reject(e); 16 | img.src = blobUrl; 17 | }); 18 | }); 19 | }; 20 | 21 | function write(data) { 22 | const bits = data.reduce((s, it) => s + paddingLfet(it.toString(2)), ''); 23 | const size = 100; 24 | const width = size * bits.length; 25 | const canvas = document.createElement('canvas'); 26 | canvas.width = width; 27 | canvas.height = size; 28 | const ctx = canvas.getContext('2d'); 29 | ctx.fillStyle = '#0000000'; 30 | ctx.fillRect(0, 0, width, size); 31 | for (let i = 0; i < bits.length; i++) { 32 | if (Number(bits[i])) { 33 | ctx.fillStyle = '#020202'; 34 | ctx.fillRect(i * size, 0, size, size); 35 | } 36 | } 37 | return canvas.toDataURL(); 38 | } 39 | 40 | async function read(url) { 41 | const image = await loadImage(url); 42 | const canvas = document.createElement('canvas'); 43 | canvas.width = image.naturalWidth; 44 | canvas.height = image.naturalHeight; 45 | const ctx = canvas.getContext('2d'); 46 | ctx.drawImage(image, 0, 0); 47 | const size = 100; 48 | const bits = []; 49 | for (let i = 0; i < 16; i++) { 50 | const imageData = ctx.getImageData(i * size, 0, size, size); 51 | const r = imageData.data[0]; 52 | const g = imageData.data[1]; 53 | const b = imageData.data[2]; 54 | bits.push(r + g + b === 0 ? 0 : 1); 55 | } 56 | return bits; 57 | } 58 | 59 | const url = write([100, 200]); 60 | console.log(url); 61 | read(url).then(bits => console.log(bits)); 62 | })(); -------------------------------------------------------------------------------- /cnn.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 10 | ZELDA WORDS(CNN) 11 | 12 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /data/data.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const path = require('path'); 3 | const util = require('util'); 4 | const tf = require('@tensorflow/tfjs'); 5 | const readFile = util.promisify(fs.readFile); 6 | const WORDS = require('./words.json'); 7 | const TRAIN = require('../src/cnn/train.json'); 8 | const TEST = require('../src/cnn/test.json'); 9 | const loadBuffer = async (data) => { 10 | const { 11 | count, 12 | width, 13 | height, 14 | buffer: bufferName, 15 | indexs, 16 | } = data; 17 | const buffers = await readFile(path.join(__dirname, '../src/cnn', bufferName)); 18 | const images = new Float32Array(buffers); 19 | for (let i = 0; i < images.length; i++) { 20 | images[i] = images[i] / 255; 21 | } 22 | const labels = new Int32Array(indexs); 23 | return { 24 | count, 25 | width, 26 | height, 27 | images, 28 | labels, 29 | }; 30 | } 31 | 32 | class Dataset { 33 | constructor() { 34 | this.dataset = {}; 35 | } 36 | 37 | async loadData() { 38 | const train = await loadBuffer(TRAIN); 39 | const test = await loadBuffer(TEST); 40 | this.dataset = { 41 | train, 42 | test, 43 | }; 44 | } 45 | 46 | getData(key) { 47 | const target = this.dataset[key]; 48 | const imagesShape = [target.count, target.height, target.height, 1 ]; 49 | return { 50 | images: tf.tensor4d(target.images, imagesShape), 51 | labels: tf.oneHot(tf.tensor1d(target.labels, 'int32'), WORDS.length).toFloat(), 52 | }; 53 | } 54 | 55 | getTrainData() { 56 | return this.getData('train'); 57 | } 58 | 59 | getTestData() { 60 | return this.getData('test'); 61 | } 62 | } 63 | 64 | module.exports = new Dataset(); 65 | -------------------------------------------------------------------------------- /data/index.js: -------------------------------------------------------------------------------- 1 | const data = require('./data'); 2 | const model = require('./model'); 3 | const yargs = require('yargs/yargs'); 4 | const { hideBin } = require('yargs/helpers'); 5 | const argv = yargs(hideBin(process.argv)).argv; 6 | 7 | async function run(epochs, batchSize, modelSavePath) { 8 | await data.loadData(); 9 | 10 | model.summary(); 11 | 12 | const { 13 | images: trainImages, 14 | labels: trainLabels, 15 | } = data.getTrainData(); 16 | // console.log({ trainImages, trainLabels }); 17 | const validationSplit = 0.15; 18 | await model.fit(trainImages, trainLabels, { 19 | verbose: 1, 20 | epochs, 21 | batchSize, 22 | validationSplit, 23 | callbacks: { 24 | onBatchEnd: async (batch, logs) => { 25 | console.log(`onBatchEnd: batch ${batch} ---> loss: ${logs.loss} acc: ${logs.acc}`); 26 | }, 27 | onEpochEnd: async (epoch, logs) => { 28 | console.log(`onEpochEnd: epoch ${epoch} ---> val_loss: ${logs.val_loss} val_acc: ${logs.val_acc}`); 29 | } 30 | } 31 | }); 32 | 33 | const { 34 | images: testImages, 35 | labels: testLabels, 36 | } = data.getTestData(); 37 | 38 | const evalOutput = model.evaluate(testImages, testLabels); 39 | 40 | console.log( 41 | `\nEvaluation result:\n` + 42 | ` Loss = ${evalOutput[0].dataSync()[0].toFixed(3)}; ` + 43 | `Accuracy = ${evalOutput[1].dataSync()[0].toFixed(3)}`); 44 | 45 | if (modelSavePath) { 46 | await model.save(`file://${modelSavePath}`); 47 | console.log(`Saved model to path: ${modelSavePath}`); 48 | } 49 | } 50 | 51 | const epochs = Number(argv.epochs || 1); 52 | const batchSize = Number(argv.batch_size || 10); 53 | const modelSavePath = argv.model_save_path || ''; 54 | console.log({ 55 | epochs, 56 | batchSize, 57 | modelSavePath, 58 | }); 59 | run(epochs, batchSize, modelSavePath); 60 | -------------------------------------------------------------------------------- /data/model.js: -------------------------------------------------------------------------------- 1 | const tf = require('@tensorflow/tfjs'); 2 | const WORDS = require('./words.json'); 3 | 4 | const model = tf.sequential(); 5 | model.add(tf.layers.conv2d({ 6 | inputShape: [28, 28, 4], 7 | filters: 32, 8 | kernelSize: 3, 9 | activation: 'relu', 10 | })); 11 | model.add(tf.layers.conv2d({ 12 | filters: 32, 13 | kernelSize: 3, 14 | activation: 'relu', 15 | })); 16 | model.add(tf.layers.maxPooling2d({ poolSize: [2, 2] })); 17 | model.add(tf.layers.conv2d({ 18 | filters: 64, 19 | kernelSize: 3, 20 | activation: 'relu', 21 | })); 22 | model.add(tf.layers.conv2d({ 23 | filters: 64, 24 | kernelSize: 3, 25 | activation: 'relu', 26 | })); 27 | model.add(tf.layers.maxPooling2d({ poolSize: [2, 2] })); 28 | model.add(tf.layers.flatten()); 29 | model.add(tf.layers.dropout({ rate: 0.25 })); 30 | model.add(tf.layers.dense({ units: 512, activation: 'relu' })); 31 | model.add(tf.layers.dropout({ rate: 0.5 })); 32 | model.add(tf.layers.dense({ units: WORDS.length, activation: 'softmax' })); 33 | 34 | const optimizer = 'rmsprop'; 35 | model.compile({ 36 | optimizer: optimizer, 37 | loss: 'categoricalCrossentropy', 38 | metrics: ['accuracy'], 39 | }); 40 | 41 | module.exports = model; 42 | -------------------------------------------------------------------------------- /data/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cnn", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "dependencies": { 12 | "@tensorflow/tfjs": "^3.9.0", 13 | "cheerio": "^1.0.0-rc.10", 14 | "color": "^4.0.1", 15 | "sharp": "^0.28.3", 16 | "shelljs": "^0.8.4" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /data/read.js: -------------------------------------------------------------------------------- 1 | const util = require('util'); 2 | const path = require('path'); 3 | const fs = require('fs'); 4 | const sharp = require('sharp'); 5 | const shell = require('shelljs'); 6 | const readFile = util.promisify(fs.readFile); 7 | const data = require('../src/cnn/test.json'); 8 | 9 | (async function main() { 10 | const { 11 | count, 12 | width, 13 | height, 14 | buffer: bufferName, 15 | indexs, 16 | } = data; 17 | const buffer = await readFile(path.join(__dirname, '../src/cnn', bufferName)); 18 | const chunkSize = width * height; 19 | const options = { 20 | raw: { 21 | width, 22 | height, 23 | channels: 1 24 | } 25 | }; 26 | let i = 0; 27 | while (i < count) { 28 | const start = i * chunkSize; 29 | const end = start + chunkSize; 30 | const data = buffer.slice(start , end); 31 | const targetIndex = indexs[i]; 32 | const fileName = `${targetIndex}.png`; 33 | await sharp(data, options).png().toFile(fileName); 34 | await shell.exec(`open ${fileName}`); 35 | i+=1; 36 | } 37 | })(); 38 | -------------------------------------------------------------------------------- /data/write.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | const util = require('util'); 3 | const path = require('path'); 4 | const fs = require('fs'); 5 | const sharp = require('sharp'); 6 | const cheerio = require('cheerio'); 7 | const Color = require('color'); 8 | const readFile = util.promisify(fs.readFile); 9 | const writeFile = util.promisify(fs.writeFile); 10 | const tf = require('@tensorflow/tfjs'); 11 | const WORDS = require('../src/data/words.json'); 12 | const yargs = require('yargs/yargs'); 13 | const { hideBin } = require('yargs/helpers'); 14 | const argv = yargs(hideBin(process.argv)).argv; 15 | 16 | const WORDS_INDEXS = Array.from({ length: WORDS.length }).map((_, i) => i); 17 | const IMAGE_WIDTH = Number(argv.size || 28); 18 | const IMAGE_HEIGHT = Number(argv.size || 28); 19 | const COUNT = Number(argv.count || 1); 20 | const NAME = argv.name || 'temp'; 21 | 22 | console.log({ 23 | COUNT, 24 | NAME, 25 | IMAGE_HEIGHT, 26 | IMAGE_WIDTH, 27 | }); 28 | 29 | function randomValue(value, base = 0) { 30 | return Math.floor(Math.random() * value + base); 31 | } 32 | 33 | function fillSvg(svg, color) { 34 | const $ = cheerio.load(svg, { xmlMode: true }); 35 | const fill = Color(color).hex(); 36 | $('svg').attr('fill', fill); 37 | return $.xml(); 38 | } 39 | 40 | async function loadSvg(word) { 41 | const svgPath = path.join(__dirname, '../src/assets', word.path); 42 | const data = await readFile(svgPath, 'utf8'); 43 | const svgContent = fillSvg(data, { 44 | r: 255, 45 | g: 255, 46 | b: 255, 47 | }); 48 | return Buffer.from(svgContent); 49 | } 50 | 51 | async function createWordImage(word) { 52 | const size = randomValue(200, 24); 53 | const svg = await loadSvg(word); 54 | // 生成大小不同的图片 55 | const resizeImageBuffer = await sharp(svg) 56 | .resize(size, size) 57 | .trim() 58 | .png().toBuffer(); 59 | // 统一缩放成 28 x 28 60 | const wordImageBuffer = await sharp(resizeImageBuffer) 61 | .resize(IMAGE_WIDTH, IMAGE_HEIGHT) 62 | .png().toBuffer(); 63 | // 字符背景色 64 | const baseImageBuffer = await sharp({ 65 | create: { 66 | width: IMAGE_WIDTH, 67 | height: IMAGE_HEIGHT, 68 | channels: 4, 69 | background: { 70 | r: 0, 71 | g: 0, 72 | b: 0, 73 | alpha: 0, 74 | }, 75 | } 76 | }).png().toBuffer(); 77 | // 将文字图片绘制到背景上 78 | const image = await sharp(baseImageBuffer).composite([{ 79 | input: wordImageBuffer, 80 | top: 0, 81 | left: 0, 82 | }]).sharpen().raw().toBuffer(); 83 | return image; 84 | } 85 | 86 | (async function main() { 87 | let data = null; 88 | const indexs = []; 89 | for (let i = 0; i < COUNT; i++) { 90 | console.log('batch create images --------------------------------------> ', i); 91 | // 打乱字符顺序 92 | tf.util.shuffle(WORDS_INDEXS); 93 | const createWords = WORDS_INDEXS.map(async (index) => { 94 | const word = WORDS[index]; 95 | const buffer = await createWordImage(word); 96 | return { 97 | index, 98 | buffer, 99 | }; 100 | }); 101 | const res = await Promise.all(createWords); 102 | res.forEach(({ index, buffer }) => { 103 | const pixs = []; 104 | for (let i = 0; i < buffer.length; i += 4) { 105 | const a = buffer[i + 3] / 255; 106 | const r = buffer[i] * a; 107 | const g = buffer[i + 1] * a; 108 | const b = buffer[i + 2] * a; 109 | pixs.push(Math.floor(r * 0.299 + g * 0.587 + b * 0.114)); 110 | } 111 | indexs.push(index); 112 | const pixsBuffer = Buffer.from(pixs); 113 | data = data ? Buffer.concat([data, pixsBuffer]) : pixsBuffer; 114 | }); 115 | const meta = { 116 | indexs, 117 | count: (i + 1) * WORDS_INDEXS.length, 118 | width: IMAGE_WIDTH, 119 | height: IMAGE_HEIGHT, 120 | buffer: `${NAME}.buffer`, 121 | }; 122 | await writeFile(path.join(__dirname, `../src/data/${NAME}.buffer`), data); 123 | await writeFile(path.join(__dirname, `../src/data/${NAME}.json`), JSON.stringify(meta)); 124 | console.log(`batch save images --------------------------------------> ${i}, count ${meta.count}`); 125 | } 126 | console.log('done!'); 127 | })(); 128 | -------------------------------------------------------------------------------- /demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/demo.gif -------------------------------------------------------------------------------- /docs/assets/index.96676500.css: -------------------------------------------------------------------------------- 1 | .word-icon{overflow:hidden;width:1em;height:1em;padding:0;margin:0;fill:currentColor}.words-panel__groups,.words-panel--vertical{display:flex}.words-panel--vertical .words-panel__groups{flex-direction:column}.parse-panel{position:fixed;top:50%;left:50%;overflow:hidden;width:80%;max-width:600px;height:50%;background:#fff;border-radius:4px;transform:translate(-50%,-50%)}.parse-panel__close{position:absolute;top:0;right:0;display:block;width:36px;height:36px;color:#000;font-size:36px;line-height:36px;text-align:center;cursor:pointer}.parse-panel__result{display:block;width:100%;height:100%;object-fit:contain}.parse-panel__message{display:flex;width:100%;height:100%;align-items:center;justify-content:center}.download{position:fixed;top:50%;left:50%;overflow:hidden;width:80%;max-width:600px;height:60%;background:#fff;border-radius:4px;transform:translate(-50%,-50%)}.download img{display:block;width:100%;height:100%;object-fit:contain}*{box-sizing:border-box;padding:0;margin:0}body{width:100vw;background:#000200}#app{display:flex;justify-content:center;overflow:hidden;width:100vw;height:100vh;font-family:Avenir,Helvetica,Arial,sans-serif;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.container{display:flex;flex-direction:column;width:100%;max-width:1920px;height:100%}.header{display:flex;width:100%;height:60px;border-bottom:1px solid #fff}.header__group{display:flex;align-items:center;flex:1;height:100%}.header__item{position:relative;display:flex;justify-content:center;align-items:center;flex:1;height:100%;color:#fff;border-top:1px solid #fff;border-right:1px solid #fff}.header__item span{margin-right:8px}.header__color{display:block;width:100%;height:100%;border:none}.header__button{cursor:pointer}.header__upload{position:absolute;top:0;left:0;width:100%;height:100%;outline:none;cursor:pointer;opacity:0}.content{display:flex;align-items:center;flex:1;overflow:hidden;width:100%}.words{flex:1;box-sizing:border-box;overflow:hidden;height:100%}.words textarea{display:block;width:100%;height:100%;padding:32px;color:#fff;font-size:14px;line-height:2;background-color:transparent;border:none;border-right:1px solid #fff;outline:none;resize:none}.results{display:flex;flex:2;overflow-x:auto;overflow-y:auto;height:100%}@media (max-width: 768px){.container{flex-direction:column-reverse}.header{display:block;height:auto}.header__group{display:flex;align-items:center;height:60px}.header__item{display:flex;justify-content:center;align-items:center;height:100%;border-bottom:1px solid #fff}.content{flex-direction:column-reverse;flex:1}.words{width:100%;height:200px}.results{justify-content:center;align-items:center;width:100%}} 2 | -------------------------------------------------------------------------------- /docs/assets/map.efa40bbb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/docs/assets/map.efa40bbb.png -------------------------------------------------------------------------------- /docs/assets/model.368228f4.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelTopology": { 3 | "class_name": "Sequential", 4 | "config": { 5 | "name": "sequential_1", 6 | "layers": [ 7 | { 8 | "class_name": "Conv2D", 9 | "config": { 10 | "filters": 16, 11 | "kernel_initializer": { 12 | "class_name": "VarianceScaling", 13 | "config": { 14 | "scale": 1, 15 | "mode": "fan_avg", 16 | "distribution": "normal", 17 | "seed": null 18 | } 19 | }, 20 | "kernel_regularizer": null, 21 | "kernel_constraint": null, 22 | "kernel_size": [ 23 | 3, 24 | 3 25 | ], 26 | "strides": [ 27 | 1, 28 | 1 29 | ], 30 | "padding": "valid", 31 | "data_format": "channels_last", 32 | "dilation_rate": [ 33 | 1, 34 | 1 35 | ], 36 | "activation": "relu", 37 | "use_bias": true, 38 | "bias_initializer": { 39 | "class_name": "Zeros", 40 | "config": {} 41 | }, 42 | "bias_regularizer": null, 43 | "activity_regularizer": null, 44 | "bias_constraint": null, 45 | "name": "conv2d_Conv2D1", 46 | "trainable": true, 47 | "batch_input_shape": [ 48 | null, 49 | 28, 50 | 28, 51 | 1 52 | ], 53 | "dtype": "float32" 54 | } 55 | }, 56 | { 57 | "class_name": "MaxPooling2D", 58 | "config": { 59 | "pool_size": [ 60 | 2, 61 | 2 62 | ], 63 | "padding": "valid", 64 | "strides": [ 65 | 2, 66 | 2 67 | ], 68 | "data_format": "channels_last", 69 | "name": "max_pooling2d_MaxPooling2D1", 70 | "trainable": true 71 | } 72 | }, 73 | { 74 | "class_name": "Conv2D", 75 | "config": { 76 | "filters": 32, 77 | "kernel_initializer": { 78 | "class_name": "VarianceScaling", 79 | "config": { 80 | "scale": 1, 81 | "mode": "fan_avg", 82 | "distribution": "normal", 83 | "seed": null 84 | } 85 | }, 86 | "kernel_regularizer": null, 87 | "kernel_constraint": null, 88 | "kernel_size": [ 89 | 3, 90 | 3 91 | ], 92 | "strides": [ 93 | 1, 94 | 1 95 | ], 96 | "padding": "valid", 97 | "data_format": "channels_last", 98 | "dilation_rate": [ 99 | 1, 100 | 1 101 | ], 102 | "activation": "relu", 103 | "use_bias": true, 104 | "bias_initializer": { 105 | "class_name": "Zeros", 106 | "config": {} 107 | }, 108 | "bias_regularizer": null, 109 | "activity_regularizer": null, 110 | "bias_constraint": null, 111 | "name": "conv2d_Conv2D2", 112 | "trainable": true 113 | } 114 | }, 115 | { 116 | "class_name": "MaxPooling2D", 117 | "config": { 118 | "pool_size": [ 119 | 2, 120 | 2 121 | ], 122 | "padding": "valid", 123 | "strides": [ 124 | 2, 125 | 2 126 | ], 127 | "data_format": "channels_last", 128 | "name": "max_pooling2d_MaxPooling2D2", 129 | "trainable": true 130 | } 131 | }, 132 | { 133 | "class_name": "Conv2D", 134 | "config": { 135 | "filters": 32, 136 | "kernel_initializer": { 137 | "class_name": "VarianceScaling", 138 | "config": { 139 | "scale": 1, 140 | "mode": "fan_avg", 141 | "distribution": "normal", 142 | "seed": null 143 | } 144 | }, 145 | "kernel_regularizer": null, 146 | "kernel_constraint": null, 147 | "kernel_size": [ 148 | 3, 149 | 3 150 | ], 151 | "strides": [ 152 | 1, 153 | 1 154 | ], 155 | "padding": "valid", 156 | "data_format": "channels_last", 157 | "dilation_rate": [ 158 | 1, 159 | 1 160 | ], 161 | "activation": "relu", 162 | "use_bias": true, 163 | "bias_initializer": { 164 | "class_name": "Zeros", 165 | "config": {} 166 | }, 167 | "bias_regularizer": null, 168 | "activity_regularizer": null, 169 | "bias_constraint": null, 170 | "name": "conv2d_Conv2D3", 171 | "trainable": true 172 | } 173 | }, 174 | { 175 | "class_name": "Flatten", 176 | "config": { 177 | "name": "flatten_Flatten1", 178 | "trainable": true 179 | } 180 | }, 181 | { 182 | "class_name": "Dense", 183 | "config": { 184 | "units": 64, 185 | "activation": "relu", 186 | "use_bias": true, 187 | "kernel_initializer": { 188 | "class_name": "VarianceScaling", 189 | "config": { 190 | "scale": 1, 191 | "mode": "fan_avg", 192 | "distribution": "normal", 193 | "seed": null 194 | } 195 | }, 196 | "bias_initializer": { 197 | "class_name": "Zeros", 198 | "config": {} 199 | }, 200 | "kernel_regularizer": null, 201 | "bias_regularizer": null, 202 | "activity_regularizer": null, 203 | "kernel_constraint": null, 204 | "bias_constraint": null, 205 | "name": "dense_Dense1", 206 | "trainable": true 207 | } 208 | }, 209 | { 210 | "class_name": "Dense", 211 | "config": { 212 | "units": 40, 213 | "activation": "softmax", 214 | "use_bias": true, 215 | "kernel_initializer": { 216 | "class_name": "VarianceScaling", 217 | "config": { 218 | "scale": 1, 219 | "mode": "fan_avg", 220 | "distribution": "normal", 221 | "seed": null 222 | } 223 | }, 224 | "bias_initializer": { 225 | "class_name": "Zeros", 226 | "config": {} 227 | }, 228 | "kernel_regularizer": null, 229 | "bias_regularizer": null, 230 | "activity_regularizer": null, 231 | "kernel_constraint": null, 232 | "bias_constraint": null, 233 | "name": "dense_Dense2", 234 | "trainable": true 235 | } 236 | } 237 | ] 238 | }, 239 | "keras_version": "tfjs-layers 3.9.0", 240 | "backend": "tensor_flow.js" 241 | }, 242 | "format": "layers-model", 243 | "generatedBy": "TensorFlow.js tfjs-layers v3.9.0", 244 | "convertedBy": null, 245 | "weightsManifest": [ 246 | { 247 | "paths": [ 248 | "./model.weights.bin" 249 | ], 250 | "weights": [ 251 | { 252 | "name": "conv2d_Conv2D1/kernel", 253 | "shape": [ 254 | 3, 255 | 3, 256 | 1, 257 | 16 258 | ], 259 | "dtype": "float32" 260 | }, 261 | { 262 | "name": "conv2d_Conv2D1/bias", 263 | "shape": [ 264 | 16 265 | ], 266 | "dtype": "float32" 267 | }, 268 | { 269 | "name": "conv2d_Conv2D2/kernel", 270 | "shape": [ 271 | 3, 272 | 3, 273 | 16, 274 | 32 275 | ], 276 | "dtype": "float32" 277 | }, 278 | { 279 | "name": "conv2d_Conv2D2/bias", 280 | "shape": [ 281 | 32 282 | ], 283 | "dtype": "float32" 284 | }, 285 | { 286 | "name": "conv2d_Conv2D3/kernel", 287 | "shape": [ 288 | 3, 289 | 3, 290 | 32, 291 | 32 292 | ], 293 | "dtype": "float32" 294 | }, 295 | { 296 | "name": "conv2d_Conv2D3/bias", 297 | "shape": [ 298 | 32 299 | ], 300 | "dtype": "float32" 301 | }, 302 | { 303 | "name": "dense_Dense1/kernel", 304 | "shape": [ 305 | 288, 306 | 64 307 | ], 308 | "dtype": "float32" 309 | }, 310 | { 311 | "name": "dense_Dense1/bias", 312 | "shape": [ 313 | 64 314 | ], 315 | "dtype": "float32" 316 | }, 317 | { 318 | "name": "dense_Dense2/kernel", 319 | "shape": [ 320 | 64, 321 | 40 322 | ], 323 | "dtype": "float32" 324 | }, 325 | { 326 | "name": "dense_Dense2/bias", 327 | "shape": [ 328 | 40 329 | ], 330 | "dtype": "float32" 331 | } 332 | ] 333 | } 334 | ] 335 | } -------------------------------------------------------------------------------- /docs/assets/test.137e7c6f.buffer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/docs/assets/test.137e7c6f.buffer -------------------------------------------------------------------------------- /docs/assets/train.9ab62db8.buffer: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/docs/assets/train.9ab62db8.buffer -------------------------------------------------------------------------------- /docs/cnn.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 10 | ZELDA WORDS(CNN) 11 | 12 | 13 | 14 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/docs/favicon.ico -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | ZELDA WORDS 9 | 10 | 11 | 12 | 13 | 14 |
15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /docs/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelTopology": { 3 | "class_name": "Sequential", 4 | "config": { 5 | "name": "sequential_1", 6 | "layers": [ 7 | { 8 | "class_name": "Conv2D", 9 | "config": { 10 | "filters": 16, 11 | "kernel_initializer": { 12 | "class_name": "VarianceScaling", 13 | "config": { 14 | "scale": 1, 15 | "mode": "fan_avg", 16 | "distribution": "normal", 17 | "seed": null 18 | } 19 | }, 20 | "kernel_regularizer": null, 21 | "kernel_constraint": null, 22 | "kernel_size": [ 23 | 3, 24 | 3 25 | ], 26 | "strides": [ 27 | 1, 28 | 1 29 | ], 30 | "padding": "valid", 31 | "data_format": "channels_last", 32 | "dilation_rate": [ 33 | 1, 34 | 1 35 | ], 36 | "activation": "relu", 37 | "use_bias": true, 38 | "bias_initializer": { 39 | "class_name": "Zeros", 40 | "config": {} 41 | }, 42 | "bias_regularizer": null, 43 | "activity_regularizer": null, 44 | "bias_constraint": null, 45 | "name": "conv2d_Conv2D1", 46 | "trainable": true, 47 | "batch_input_shape": [ 48 | null, 49 | 28, 50 | 28, 51 | 1 52 | ], 53 | "dtype": "float32" 54 | } 55 | }, 56 | { 57 | "class_name": "MaxPooling2D", 58 | "config": { 59 | "pool_size": [ 60 | 2, 61 | 2 62 | ], 63 | "padding": "valid", 64 | "strides": [ 65 | 2, 66 | 2 67 | ], 68 | "data_format": "channels_last", 69 | "name": "max_pooling2d_MaxPooling2D1", 70 | "trainable": true 71 | } 72 | }, 73 | { 74 | "class_name": "Conv2D", 75 | "config": { 76 | "filters": 32, 77 | "kernel_initializer": { 78 | "class_name": "VarianceScaling", 79 | "config": { 80 | "scale": 1, 81 | "mode": "fan_avg", 82 | "distribution": "normal", 83 | "seed": null 84 | } 85 | }, 86 | "kernel_regularizer": null, 87 | "kernel_constraint": null, 88 | "kernel_size": [ 89 | 3, 90 | 3 91 | ], 92 | "strides": [ 93 | 1, 94 | 1 95 | ], 96 | "padding": "valid", 97 | "data_format": "channels_last", 98 | "dilation_rate": [ 99 | 1, 100 | 1 101 | ], 102 | "activation": "relu", 103 | "use_bias": true, 104 | "bias_initializer": { 105 | "class_name": "Zeros", 106 | "config": {} 107 | }, 108 | "bias_regularizer": null, 109 | "activity_regularizer": null, 110 | "bias_constraint": null, 111 | "name": "conv2d_Conv2D2", 112 | "trainable": true 113 | } 114 | }, 115 | { 116 | "class_name": "MaxPooling2D", 117 | "config": { 118 | "pool_size": [ 119 | 2, 120 | 2 121 | ], 122 | "padding": "valid", 123 | "strides": [ 124 | 2, 125 | 2 126 | ], 127 | "data_format": "channels_last", 128 | "name": "max_pooling2d_MaxPooling2D2", 129 | "trainable": true 130 | } 131 | }, 132 | { 133 | "class_name": "Conv2D", 134 | "config": { 135 | "filters": 32, 136 | "kernel_initializer": { 137 | "class_name": "VarianceScaling", 138 | "config": { 139 | "scale": 1, 140 | "mode": "fan_avg", 141 | "distribution": "normal", 142 | "seed": null 143 | } 144 | }, 145 | "kernel_regularizer": null, 146 | "kernel_constraint": null, 147 | "kernel_size": [ 148 | 3, 149 | 3 150 | ], 151 | "strides": [ 152 | 1, 153 | 1 154 | ], 155 | "padding": "valid", 156 | "data_format": "channels_last", 157 | "dilation_rate": [ 158 | 1, 159 | 1 160 | ], 161 | "activation": "relu", 162 | "use_bias": true, 163 | "bias_initializer": { 164 | "class_name": "Zeros", 165 | "config": {} 166 | }, 167 | "bias_regularizer": null, 168 | "activity_regularizer": null, 169 | "bias_constraint": null, 170 | "name": "conv2d_Conv2D3", 171 | "trainable": true 172 | } 173 | }, 174 | { 175 | "class_name": "Flatten", 176 | "config": { 177 | "name": "flatten_Flatten1", 178 | "trainable": true 179 | } 180 | }, 181 | { 182 | "class_name": "Dense", 183 | "config": { 184 | "units": 64, 185 | "activation": "relu", 186 | "use_bias": true, 187 | "kernel_initializer": { 188 | "class_name": "VarianceScaling", 189 | "config": { 190 | "scale": 1, 191 | "mode": "fan_avg", 192 | "distribution": "normal", 193 | "seed": null 194 | } 195 | }, 196 | "bias_initializer": { 197 | "class_name": "Zeros", 198 | "config": {} 199 | }, 200 | "kernel_regularizer": null, 201 | "bias_regularizer": null, 202 | "activity_regularizer": null, 203 | "kernel_constraint": null, 204 | "bias_constraint": null, 205 | "name": "dense_Dense1", 206 | "trainable": true 207 | } 208 | }, 209 | { 210 | "class_name": "Dense", 211 | "config": { 212 | "units": 40, 213 | "activation": "softmax", 214 | "use_bias": true, 215 | "kernel_initializer": { 216 | "class_name": "VarianceScaling", 217 | "config": { 218 | "scale": 1, 219 | "mode": "fan_avg", 220 | "distribution": "normal", 221 | "seed": null 222 | } 223 | }, 224 | "bias_initializer": { 225 | "class_name": "Zeros", 226 | "config": {} 227 | }, 228 | "kernel_regularizer": null, 229 | "bias_regularizer": null, 230 | "activity_regularizer": null, 231 | "kernel_constraint": null, 232 | "bias_constraint": null, 233 | "name": "dense_Dense2", 234 | "trainable": true 235 | } 236 | } 237 | ] 238 | }, 239 | "keras_version": "tfjs-layers 3.9.0", 240 | "backend": "tensor_flow.js" 241 | }, 242 | "format": "layers-model", 243 | "generatedBy": "TensorFlow.js tfjs-layers v3.9.0", 244 | "convertedBy": null, 245 | "weightsManifest": [ 246 | { 247 | "paths": [ 248 | "./model.weights.bin" 249 | ], 250 | "weights": [ 251 | { 252 | "name": "conv2d_Conv2D1/kernel", 253 | "shape": [ 254 | 3, 255 | 3, 256 | 1, 257 | 16 258 | ], 259 | "dtype": "float32" 260 | }, 261 | { 262 | "name": "conv2d_Conv2D1/bias", 263 | "shape": [ 264 | 16 265 | ], 266 | "dtype": "float32" 267 | }, 268 | { 269 | "name": "conv2d_Conv2D2/kernel", 270 | "shape": [ 271 | 3, 272 | 3, 273 | 16, 274 | 32 275 | ], 276 | "dtype": "float32" 277 | }, 278 | { 279 | "name": "conv2d_Conv2D2/bias", 280 | "shape": [ 281 | 32 282 | ], 283 | "dtype": "float32" 284 | }, 285 | { 286 | "name": "conv2d_Conv2D3/kernel", 287 | "shape": [ 288 | 3, 289 | 3, 290 | 32, 291 | 32 292 | ], 293 | "dtype": "float32" 294 | }, 295 | { 296 | "name": "conv2d_Conv2D3/bias", 297 | "shape": [ 298 | 32 299 | ], 300 | "dtype": "float32" 301 | }, 302 | { 303 | "name": "dense_Dense1/kernel", 304 | "shape": [ 305 | 288, 306 | 64 307 | ], 308 | "dtype": "float32" 309 | }, 310 | { 311 | "name": "dense_Dense1/bias", 312 | "shape": [ 313 | 64 314 | ], 315 | "dtype": "float32" 316 | }, 317 | { 318 | "name": "dense_Dense2/kernel", 319 | "shape": [ 320 | 64, 321 | 40 322 | ], 323 | "dtype": "float32" 324 | }, 325 | { 326 | "name": "dense_Dense2/bias", 327 | "shape": [ 328 | 40 329 | ], 330 | "dtype": "float32" 331 | } 332 | ] 333 | } 334 | ] 335 | } -------------------------------------------------------------------------------- /docs/model.weights.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/docs/model.weights.bin -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | ZELDA WORDS 9 | 10 | 11 |
12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /lsb.js: -------------------------------------------------------------------------------- 1 | (async function () { 2 | function loadImage (url) { 3 | return fetch(url) 4 | .then(res => res.blob()) 5 | .then(blob => URL.createObjectURL(blob)) 6 | .then(blobUrl => { 7 | 8 | return new Promise((resolve, reject) => { 9 | const img = new Image(); 10 | img.onload = () => resolve(img); 11 | img.onerror = (e) => reject(e); 12 | img.src = blobUrl; 13 | }); 14 | }); 15 | }; 16 | 17 | function createCanvas(width, height) { 18 | const canvas = document.createElement('canvas'); 19 | canvas.width = width; 20 | canvas.height = height; 21 | return canvas; 22 | } 23 | 24 | function getImageData(image) { 25 | const { naturalWidth, naturalHeight } = image; 26 | const canvas = createCanvas(naturalWidth, naturalHeight); 27 | const ctx = canvas.getContext('2d'); 28 | ctx.drawImage(image, 0, 0); 29 | return ctx.getImageData(0, 0, naturalWidth, naturalHeight); 30 | } 31 | 32 | function putImageData(imageData) { 33 | const { width, height } = imageData; 34 | const canvas = createCanvas(width, height); 35 | const ctx = canvas.getContext('2d'); 36 | ctx.putImageData(imageData, 0, 0); 37 | return canvas; 38 | } 39 | 40 | function writeMetaInfo(baseImageData, qrcodeImageData) { 41 | const { width, height, data } = qrcodeImageData; 42 | for (let x = 0; x < width; x++) { 43 | for (let y = 0; y < height; y++) { 44 | // 选用 r 通道来隐藏信息 45 | const r = (x + y * width) * 4; 46 | const v = data[r]; 47 | // 二维码白色部分(背景)标识为 1,黑色部分(内容)标识为 0 48 | const bit = v === 255 ? 1 : 0; 49 | // 如果当前 R 通道色值奇偶性和二维码对应像素不一致则进行加减一使其奇偶性一致 50 | if (baseImageData.data[r] % 2 !== bit) { 51 | baseImageData.data[r] += bit ? 1 : -1; 52 | } 53 | } 54 | } 55 | return baseImageData; 56 | } 57 | 58 | function readMetaInfo(imageData) { 59 | const { width, height, data } = imageData; 60 | const qrcodeImageData = new ImageData(width, height); 61 | for (let x = 0; x < width; x++) { 62 | for (let y = 0; y < height; y++) { 63 | // 读取 r 通道息 64 | const r = (x + y * width) * 4; 65 | const v = data[r] % 2 === 0 ? 0 : 255; 66 | qrcodeImageData.data[r] = v; 67 | qrcodeImageData.data[r + 1] = v; 68 | qrcodeImageData.data[r + 2] = v; 69 | qrcodeImageData.data[r + 3] = 255; 70 | } 71 | } 72 | return qrcodeImageData; 73 | } 74 | 75 | const baseImage = await loadImage('https://gd-filems.dancf.com/mcm79j/mcm79j/05654/cd68f955-0f4d-4e42-af93-fe8ae82599e3555415.png'); 76 | const qrcodeImage = await loadImage('https://gd-filems.dancf.com/mcm79j/mcm79j/05654/f3ffa72f-2377-4c8c-b30f-6d261f5b6905555476.jpg'); 77 | const resultImageData = writeMetaInfo(getImageData(baseImage), getImageData(qrcodeImage)); 78 | const resultCanvas = putImageData(resultImageData); 79 | 80 | // const resultDataUrl = resultCanvas.toDataURL('image/png'); 81 | const resultDataUrl = resultCanvas.toDataURL('image/jpeg', 1); 82 | console.log(resultDataUrl); 83 | const hideMetaImage = await loadImage(resultDataUrl); 84 | const readData = readMetaInfo(getImageData(hideMetaImage)); 85 | const readCanvas = putImageData(readData); 86 | console.log(readCanvas.toDataURL()); 87 | })(); -------------------------------------------------------------------------------- /map.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/map.jpeg -------------------------------------------------------------------------------- /ocr.md: -------------------------------------------------------------------------------- 1 | ## 卷积神经网络实现 2 | 3 | 新增了卷积神经网络实现~ 4 | ## 希卡文 5 | 6 | ![](./ocr/ocr-0.jpg) 7 | 8 | 塞尔达玩家一定不会陌生,希卡文是游戏《塞尔达传说旷野之息》中一种虚构的文字,在希卡族的建筑上都能找到它的影子,之前实现了一个简单的[希卡文生成与翻译的工具](http://nlush.com/zelda-words),不过关键的文字解析实现的并不优雅,使用隐藏水印的方式将一些关键信息隐藏在导出的图片中,图片压缩后隐藏信息很容易丢失,导致解析失败。有兴趣的同学不妨看看上一篇文章:[摸一个塞尔达希卡文字转换器](https://juejin.cn/post/6935836863844319239)。 9 | 10 | 11 | 后面研究了下 [OCR](https://zh.wikipedia.org/wiki/%E5%85%89%E5%AD%A6%E5%AD%97%E7%AC%A6%E8%AF%86%E5%88%AB) 的技术实现,手撸了个简单版的希卡文字 OCR 解析器,简单扯扯实现,水平有限望指点蛤~ 12 | 13 | > 光学字符识别(英語:Optical Character Recognition,OCR)是指对文本资料的图像文件进行分析识别处理,获取文字及版面信息的过程。 14 | 15 | 工具地址在这: 16 | - 工具的演示地址在这:[https://kinglisky.github.io/zelda-words](https://kinglisky.github.io/zelda-words/index.html) 17 | - 仓库地址:[https://github.com/kinglisky/zelda-words](https://github.com/kinglisky/zelda-words) 18 | 19 | 虚构世界的文字往往是基于现实文字创造的,希卡文与英文字母数字与四个特殊符号(共 40 个字符)相对应,规则很简单,都在下图: 20 | 21 | ![](./ocr/ocr-map.jpeg) 22 | 23 | 我们导出的希卡文图片长这样: 24 | 25 | ![](./ocr/ocr-1.jpeg) 26 | ![](./ocr/ocr-2.jpeg) 27 | 28 | 开始吧~ 29 | 30 | ## 图片二值化 31 | 32 | 我们导出希卡文的图片颜色和文字排列都是不确定的,我们首先需要将图片做一个归一化处理,因为我们只关心图片的文字内容,所以首先要剔除颜色的干扰,我们可以将图片统一处理黑白色调的图片。 33 | 34 | 这个过程称作[二值化 35 | ](https://zh.wikipedia.org/zh-hans/%E4%BA%8C%E5%80%BC%E5%8C%96),二值化后的图片更能摒除干扰**突出图片的内容特征**,二值化后的图片可以很方便的被序列化生成图片指纹。 36 | 37 | > 二值化(英语:Binarization)是图像分割的一种最简单的方法。二值化可以把灰度图像转换成二值图像。把大于某个临界灰度值的像素灰度设为灰度極大值,把小于这个值的像素灰度设为灰度極小值,从而实现二值化。 38 | 39 | ![](./ocr/ocr-3.jpeg) 40 | ![](./ocr/ocr-4.jpeg) 41 | 42 | 图片二值化主要流程如下: 43 | - 图片灰度处理 44 | - 计算灰度图片的二值化阈值 45 | - 图片二值化 46 | 47 | ### 图片灰度处理 48 | 49 | ![](./ocr/ocr-2.jpeg) 50 | 51 | 我们以上面的图片为例,图片的灰度处理比较简单,将 rgb 通道的颜色按 `r * 0.299 + g * 0.587 + b * 0.114` 的比值相加就能得到灰度值,因为灰度图片的 rgb 通道的值都是相同的,我们只取一个通道的值用于下一步计算。 52 | 53 | ```JavaScript 54 | const canvasToGray = (canvas) => { 55 | const ctx = canvas.getContext('2d'); 56 | const data = ctx.getImageData(0, 0, canvas.width, canvas.height); 57 | const calculateGray = (r, g, b) => parseInt(r * 0.299 + g * 0.587 + b * 0.114); 58 | const grayData = []; 59 | for (let x = 0; x < data.width; x++) { 60 | for (let y = 0; y < data.height; y++) { 61 | const idx = (x + y * data.width) * 4; 62 | const r = data.data[idx + 0]; 63 | const g = data.data[idx + 1]; 64 | const b = data.data[idx + 2]; 65 | const gray = calculateGray(r, g, b); 66 | grayData.push(gray); 67 | } 68 | } 69 | return grayData; 70 | }; 71 | ``` 72 | 73 | 灰度处理后的图片如下: 74 | 75 | ![](./ocr/ocr-5.png) 76 | ### 二值化阈值 77 | 78 | 阈值计算是图片二值化非常关键的一步,相关的算法也很多这里,这里我们先试试一种最简单的[均值哈希(aHash)](https://baike.baidu.com/item/%E5%9D%87%E5%80%BC%E5%93%88%E5%B8%8C%E7%AE%97%E6%B3%95)算法,算法很简单,求图片灰度像素的总和再除以像素点数量得出均值作为二值化的阈值。直接上代码: 79 | 80 | ```JavaScript 81 | const average = (grayData) => { 82 | let sum = 0; 83 | for (let i = 0; i < grayData.length; i += 1) { 84 | sum += data[i]; 85 | } 86 | return sum / grayData.length; 87 | }; 88 | ``` 89 | 90 | 其他计算阈值的算法还有: 91 | - [感知哈希 pHash](https://baike.baidu.com/item/%E6%84%9F%E7%9F%A5%E5%93%88%E5%B8%8C%E7%AE%97%E6%B3%95) 92 | - [大津算法 otsu](https://zh.wikipedia.org/wiki/%E5%A4%A7%E6%B4%A5%E7%AE%97%E6%B3%95) 93 | 94 | 感兴趣的同学可以了解下,otsu 的生成的二值化效果比较好,后面我们会有 otsu 来处理计算图片阈值,这里也贴一个 otsu 的实现: 95 | 96 | ```JavaScript 97 | const otsu = (grayData) => { 98 | let ptr = 0; 99 | // 记录0-256每个灰度值的数量,初始值为 0 100 | let histData = Array(256).fill(0); 101 | let total = grayData.length; 102 | 103 | while (ptr < total) { 104 | let h = grayData[ptr++]; 105 | histData[h]++; 106 | } 107 | // 总数(灰度值x数量) 108 | let sum = 0; 109 | for (let i = 0; i < 256; i++) { 110 | sum += i * histData[i]; 111 | } 112 | // 背景(小于阈值)的数量 113 | let wB = 0; 114 | // 前景(大于阈值)的数量 115 | let wF = 0; 116 | // 背景图像(灰度x数量)总和 117 | let sumB = 0; 118 | // 存储最大类间方差值 119 | let varMax = 0; 120 | // 阈值 121 | let threshold = 0; 122 | 123 | for (let t = 0; t < 256; t++) { 124 | // 背景(小于阈值)的数量累加 125 | wB += histData[t]; 126 | if (wB === 0) continue; 127 | // 前景(大于阈值)的数量累加 128 | wF = total - wB; 129 | if (wF === 0) break; 130 | // 背景(灰度x数量)累加 131 | sumB += t * histData[t]; 132 | 133 | // 背景(小于阈值)的平均灰度 134 | let mB = sumB / wB; 135 | // 前景(大于阈值)的平均灰度 136 | let mF = (sum - sumB) / wF; 137 | // 类间方差 138 | let varBetween = wB * wF * (mB - mF) ** 2; 139 | 140 | if (varBetween > varMax) { 141 | varMax = varBetween; 142 | threshold = t; 143 | } 144 | } 145 | 146 | return threshold; 147 | }; 148 | ``` 149 | 150 | ### 图片二值化 151 | 152 | 求得了阈值后我们再进行二值化就很简单了,不过这里有注意点,由于我们生成的图片**文字颜色和背景颜色**都是不确定的,我们求得阈值后,进行二值化时图片的背景颜色可能大于阈值,也有可能小于阈值,这样就没法统一所有图片的输出。这里我们需要规定二值化的图片输出,我们统一将背景的颜色设置为黑色(数值为 0),文字颜色设置为(255 白色)。 153 | 154 | 由于我们生成的图片比较简单,图片的背景颜色取第一个像素的 rgb 值就能确认了,代码实现也很简单: 155 | 156 | ```JavaScript 157 | const binaryzationOutput = (originCanvas, threshold) => { 158 | const ctx = originCanvas.getContext('2d'); 159 | const imageData = ctx.getImageData(0, 0, originCanvas.width, originCanvas.height); 160 | const { width, height, data } = imageData; 161 | // 第一像素的值即为背景色值 162 | const head = (data[0] + data[1] + data[2]) / 3; 163 | // 如果背景颜色大于阈值,则背景与文字的颜色的值则需要调换 164 | const color = head > threshold 165 | ? { foreground: 0, background: 255 } 166 | : { foreground: 255, background: 0 }; 167 | for (let x = 0; x < width; x++) { 168 | for (let y = 0; y < height; y++) { 169 | const idx = (x + y * width) * 4; 170 | const avg = (data[idx] + data[idx + 1] + data[idx + 2]) / 3; 171 | const v = avg > threshold ? color.foreground : color.background; 172 | data[idx] = v; 173 | data[idx + 1] = v; 174 | data[idx + 2] = v; 175 | data[idx + 3] = 255; 176 | } 177 | } 178 | ctx.putImageData(imageData, 0, 0); 179 | return originCanvas.toDataURL(); 180 | } 181 | ``` 182 | 183 | 还有一点需要注意下,这里二值处理的是**原图**,不是灰度处理后的图片。 184 | 185 | [完整的代码戳这](https://github.com/kinglisky/zelda-words/blob/master/binarization.js),二值化的图片如下: 186 | 187 | ![](./ocr/ocr-6.png) 188 | 189 | ## 文字切割 190 | 191 | 经过上面的二值化处理我们已经将图片统一处理成黑底白字的图片,操作也特别简单,但生产级别的 OCR 实现往往还会涉及复杂的图片预处理,如图片的投影矫正、旋转矫正、裁剪、图片降噪、锐化等操作,这些预处理都是为了生成一张只包含文字信息的干净图片,因为会很大程度的影响下一步文字切割的效果。 192 | 193 | 同字面描述一样,我们得想办法把一个个希卡文字提取出来,下面介绍一种简单的切割算法:**投影切割算法**。 194 | 195 | 基本思路是: 196 | - 从上到下扫描图片每一行像素值,切割出文字所在的行 197 | - 从左到右扫描文字行每一列像素值,切割出单个文字 198 | 199 | ### 切割行 200 | 201 | 直接看图容易理解一点,先来切割行,我们图片大小是 700 x 600,从上至下扫描每一行的像素,**黑色像素记为 0 白色像素记为 1**,统计每行 1 的个数,我们可以得到下面折线图: 202 | 203 | ![](./ocr/ocr-6.png) 204 | 205 | ![](./ocr/ocr-8.png) 206 | 207 | 横坐标对应图片的高度,纵坐标对应每行像素点的个数,我们可以很直观知道纵坐标为 0 的部分都是图片的空白间距,有值的部分则是文字内容所在的行,行高则是所跨越的区间。 208 | 209 | ### 切割文字(切割列) 210 | 通过上一步的扫描行我们已经可以切割出文字内容所占的行,下一步就是从左到右扫描文字行每列的像素值,同样是黑色记 0 白色记 1 ,统计 1 的个数,以第一行文字为例,其扫描的出来折线图如下: 211 | 212 | ![](./ocr/ocr-9.png) 213 | 214 | ![](./ocr/ocr-10.png) 215 | 216 | 嗯,这个我知道,和切割行一样,只要将纵坐标有值得部分切出来就好! 217 | 218 | 但这里会有问题,如果简单的按纵坐标有值的区间去拆分文字,最后一个文字就会被拆分左右两部部分: 219 | 220 | ![](./ocr/ocr-13.jpeg) 221 | 222 | 原因也很好理解,最后一个文字是左右结构的,中间有空隙隔开,所以文字被拆开了。 223 | 224 | ![](./ocr/ocr-14.jpeg) 225 | 226 | 可以看看下面的几个特殊的字符,一般拆分文字时我们需要考虑左右或者上下结构的文字。 227 | 228 | **上下结构的文字:** 229 | 230 | ![](./ocr/ocr-15.jpeg) 231 | 232 | ![](./ocr/ocr-17.png) 233 | 234 | **左右结构的文字:** 235 | 236 | ![](./ocr/ocr-16.jpeg) 237 | 238 | ![](./ocr/ocr-18.png) 239 | 240 | 241 | 针对这些文字我们应该如何处理呢?我们可以很容易观察出希卡文字都是正方形的,那么每个文字宽高比例应该 1 : 1,如果我们能知道文字的宽度或者高度,那我们就是如何拼接文字区域了。如何计算文字的宽度或高度呢? 242 | 243 | 处理其实很简单,针对整张图片,**横向扫描一次,纵向扫描一次**,可以得到文字内容在横纵方向上的投影大小,我们取横纵投影中**最大的区间**就是标准文字的大小,拆分时文字块不足标准大小则继续与下个投影区间的文字块合并,直到达到文字的标准大小。 244 | 245 | 我们先来实现横纵向扫描和求取最大文字块的方法 246 | 247 | ```JavaScript 248 | // 横纵向扫描 249 | function countPixel(imageData, isRow = false) { 250 | const { width, height, data } = imageData; 251 | const offsets = [0, 1, 2]; 252 | // 背景色 253 | const head = offsets.map((i) => data[i]); 254 | const pixel = []; 255 | if (isRow) { 256 | // 从上至下,横向扫描 257 | for (let i = 0; i < height; i++) { 258 | let count = 0; 259 | for (let j = 0; j < width; j++) { 260 | const index = (i * width + j) * 4; 261 | const isEqual = offsets.every( 262 | (offset) => head[offset] === data[index + offset] 263 | ); 264 | count += isEqual ? 0 : 1; 265 | } 266 | pixel.push(count); 267 | } 268 | } else { 269 | // 从左到右,纵向扫描 270 | for (let i = 0; i < width; i++) { 271 | let count = 0; 272 | for (let j = 0; j < height; j++) { 273 | const index = (j * width + i) * 4; 274 | const isEqual = offsets.every( 275 | (offset) => head[offset] === data[index + offset] 276 | ); 277 | count += isEqual ? 0 : 1; 278 | } 279 | pixel.push(count); 280 | } 281 | } 282 | return pixel; 283 | } 284 | 285 | // 拆分文字与背景区间 286 | function countRanges(counts) { 287 | const groups = []; 288 | let foreground = 0; 289 | let background = 0; 290 | counts.forEach((count) => { 291 | if (count) { 292 | foreground += 1; 293 | if (background) { 294 | groups.push({ background: true, value: background }); 295 | background = 0; 296 | } 297 | } else { 298 | background += 1; 299 | if (foreground) { 300 | groups.push({ foreground: true, value: foreground }); 301 | foreground = 0; 302 | } 303 | } 304 | }); 305 | if (foreground) { 306 | groups.push({ foreground: true, value: foreground }); 307 | } 308 | if (background) { 309 | groups.push({ background: true, value: background }); 310 | } 311 | return groups; 312 | } 313 | 314 | // 获取文字内容的最大区间 315 | function getMaxRange(data) { 316 | return data.reduce((max, it) => { 317 | if (it.foreground) { 318 | return Math.max(max, it.value); 319 | } 320 | return max; 321 | }, 0); 322 | } 323 | ``` 324 | 325 | 计算图片中文字大小: 326 | 327 | ```JavaScript 328 | const imageData = {}; 329 | // 逐行扫描 330 | const rowsRanges = countRanges(countPixel(imageData, true)); 331 | // 逐列扫描 332 | const colsRanges = countRanges(countPixel(imageData, false)); 333 | 334 | // 计算横纵像素分布得出字体内容的大小(字体正方形区域) 335 | const fontRange = Math.max( 336 | getMaxRange(rowsRanges), 337 | getMaxRange(colsRanges) 338 | ); 339 | ``` 340 | 341 | 合并左右上下结构的文字区间: 342 | 343 | ```JavaScript 344 | // 合并结构分离的文字区间 345 | function mergeRanges(data, size) { 346 | const merge = []; 347 | // chunks 用来保存小于标准文字大小区域 348 | let chunks = []; 349 | data.forEach((item) => { 350 | if (chunks.length) { 351 | chunks.push(item); 352 | const value = chunks.reduce((sum, chunk) => sum + chunk.value, 0); 353 | // 当前换成的区域大小大于或接近标准文字大小则合并成一块 354 | if (value >= size || Math.pow(value - size, 2) < 4) { 355 | merge.push({ 356 | foreground: true, 357 | value, 358 | }); 359 | chunks = []; 360 | } 361 | return; 362 | } 363 | // 区域内容小于标准文字大小是推入 chunks 364 | if (item.foreground && item.value < size) { 365 | chunks = [item]; 366 | return; 367 | } 368 | merge.push(item); 369 | }); 370 | return merge; 371 | } 372 | ``` 373 | 374 | 统一处理后的区块信息如下,我们只需按顺序裁剪出 `foreground` 与其对应的区块大小 `value` 就好了。 375 | 376 | ```JavaScript 377 | [ 378 | { 379 | "background": true, 380 | "value": 115 381 | }, 382 | { 383 | "foreground": true, 384 | "value": 70 385 | }, 386 | { 387 | "background": true, 388 | "value": 30 389 | }, 390 | { 391 | "foreground": true, 392 | "value": 70 393 | }, 394 | { 395 | "background": true, 396 | "value": 30 397 | }, 398 | { 399 | "foreground": true, 400 | "value": 70 401 | }, 402 | { 403 | "background": true, 404 | "value": 30 405 | }, 406 | { 407 | "foreground": true, 408 | "value": 70 409 | }, 410 | { 411 | "background": true, 412 | "value": 115 413 | } 414 | ] 415 | ``` 416 | 417 | 剩下的就是算各种偏移值然后从 cnavas 中切割出单个的文字块并记录下位置信息,[具体的实现可以戳这里](https://github.com/kinglisky/zelda-words/blob/master/src/utils/image-ocr.ts#L221),就不细讲了,切割出来文字内容如下: 418 | 419 | ![](./ocr/ocr-19.png) 420 | ## 相似图片检测 421 | 422 | 切割出文字后,剩下的就是文字的翻译了。对于希卡文我们知道它与英文的映射规则,每个希卡符号背后对都对应一个英文符号,我们可以生成 40 个英文字符对应的希卡符号图片作为标准字符图片,那么希卡图片翻译就可以简单理解为:将切割的图片与已知的 40 标准字符图片逐个进行**相似性比较**,找出相似度最高的图片就是目标字符。 423 | 424 | ![abcdefghijklmnopqrstuvwxyz0123456789.-!? 对应的希卡符号](./ocr/ocr-7.jpeg) 425 | 426 | 上面为 `abcdefghijklmnopqrstuvwxyz0123456789.-!?` 对应的希卡符号。 427 | 428 | 我们该如何进行比较两张图片的相似性呢?其实我们已经完成了很大一部分工作,就差临门一脚了。既然一张已经二值化处理成了黑白图片,我们将图片的**黑色像素输出为 0 白色部分输出为 1 **这样就可以得到一张图片的二进制哈希,至于两张图片的相似性就是比较两张图片哈希同一位置的**差异个数**,其实就是计算两张图片的哈希的[汉明距离](https://zh.wikipedia.org/zh-hans/%E6%B1%89%E6%98%8E%E8%B7%9D%E7%A6%BB),汉明距离越小,两张图片越相似。我们只需要简单改变下二值化输出的代码就能得到图片的哈希。 429 | 430 | ```JavaScript 431 | const binaryzationHash = (originCanvas, threshold) => { 432 | const ctx = originCanvas.getContext('2d'); 433 | const imageData = ctx.getImageData(0, 0, originCanvas.width, originCanvas.height); 434 | const { width, height, data } = imageData; 435 | // 第一像素的值即为背景色值 436 | const head = (data[0] + data[1] + data[2]) / 3; 437 | // 如果背景颜色大于阈值,则背景与文字的颜色的值则需要调换 438 | const color = head > threshold 439 | ? { foreground: 0, background: 255 } 440 | : { foreground: 255, background: 0 }; 441 | const hash = []; 442 | for (let x = 0; x < width; x++) { 443 | for (let y = 0; y < height; y++) { 444 | const idx = (x + y * width) * 4; 445 | const avg = (data[idx] + data[idx + 1] + data[idx + 2]) / 3; 446 | const v = avg > threshold ? color.foreground : color.background; 447 | hash.push(v ? 1 : 0); 448 | } 449 | } 450 | return hash; 451 | } 452 | ``` 453 | 454 | 汉明距离的比较也十分简单: 455 | 456 | ```JavaScript 457 | const hammingDistance = (hash1, hash2) => { 458 | let count = 0; 459 | hash1.forEach((it, index) => { 460 | count += it ^ hash2[index]; 461 | }); 462 | return count; 463 | }; 464 | ``` 465 | 466 | 这就是相似图片比较最核心的代码了,因为我们并不能保证切割出来的文字块大小能标准图片一样,所以我们会将切割图片和标准图片都缩小成 8 x 8 大小再进行比较,两张图片相似性比较主要流程大致如下: 467 | - 将比较的图片都缩小成 8 x 8 468 | - 图片灰度化处理 469 | - 计算二值化阈值 470 | - 图片二值化计算图片哈希 471 | - 比较两张图片哈希的汉明距离 472 | 473 | 之前详细整理过一篇相似图片识别的文章,对此感兴趣的同学可以看看这篇文章:[相似图片识别的朴素实现](https://juejin.cn/post/6926181310868226061)。 474 | 475 | 回到我们希卡文翻译上,所以我们现在要做只有三步: 476 | 1. 40 个标准图片统一缩小成 8 x 8 并生成对应的图片哈希 477 | 2. 切割出的文字图片统一缩小成 8 x 8 并生成对应的图片哈希 478 | 3. 切割出文字哈希逐个与 40 个标准图片哈希比较,挑选出差异(相似度最高的)最小就是目标字母 479 | 480 | 481 | 代码实现也比较简单: 482 | 483 | ```JavaScript 484 | async function createImageFingerprints(image) { 485 | const contents = splitImage(image); 486 | return contents.map(({ canvas, ...args }) => { 487 | // 统一缩小到 8 像素 488 | const imageData = resizeCanvas(canvas, 8); 489 | const hash = binaryzationOutput(imageData); 490 | return { 491 | ...args, 492 | hash, 493 | }; 494 | }); 495 | } 496 | 497 | // 生成标准字符指纹 498 | function createSymbols(fingerprints) { 499 | const WORDS = 'abcdefghijklmnopqrstuvwxyz0123456789.-!?'; 500 | return fingerprints.map((it, index) => { 501 | return { 502 | name: WORDS[index], 503 | value: it.hash, 504 | }; 505 | }); 506 | } 507 | 508 | // 匹配出最相似的字符 509 | function mapSymbols(fingerprints, symbols) { 510 | return fingerprints.map(({ hash, ...position }) => { 511 | const isEmpty = hash.every((v:) => v === hash[0]); 512 | if (isEmpty) { 513 | return ' '; 514 | } 515 | let diff = Number.MAX_SAFE_INTEGER; 516 | let word = '*'; 517 | symbols.forEach((symbol) => { 518 | const distance = hammingDistance(hash, symbol.value); 519 | // 汉明距离大于标识相似度偏差较大排除 520 | if (distance < diff) { 521 | diff = distance; 522 | word = symbol.name; 523 | } 524 | }); 525 | return { 526 | ...position, 527 | word, 528 | diff, 529 | }; 530 | }); 531 | } 532 | ``` 533 | 534 | 使用大概是这样的: 535 | 536 | ```JavaScript 537 | /** 538 | * @param imageUrl 解析的图片 539 | * @param mapUrl 标准字符图片 540 | */ 541 | export async function readMetaInfo(imageUrl, mapUrl) { 542 | const mapImage = await loadImage(mapUrl); 543 | const mapImageFingerprints = await createImageFingerprints(mapImage, false); 544 | const symbols = createSymbols(mapImageFingerprints); 545 | const readImage = await loadImage(imageUrl); 546 | const readImageFingerprints = await createImageFingerprints( 547 | readImage, 548 | true 549 | ); 550 | const results = mapSymbols(readImageFingerprints, symbols); 551 | console.log(results); 552 | } 553 | ``` 554 | 555 | [完整代码实现可以戳这里](https://github.com/kinglisky/zelda-words/blob/master/src/utils/image-ocr.ts#L390),至此一个简简简单版的希卡文 OCR 翻译器就完成了~ 556 | 557 | 558 | ## 其他 559 | 560 | - [摸一个塞尔达希卡文字转换器](https://juejin.cn/post/6935836863844319239/) 561 | - [相似图片识别的朴素实现](https://juejin.cn/post/6926181310868226061) 562 | - [利用 JS 实现多种图片相似度算法](https://segmentfault.com/a/1190000021236326) 563 | - [文字切割算法-基于投影的切割](https://blog.csdn.net/Print_lin/article/details/80143002?spm=1001.2014.3001.5501) 564 | - [文字切割算法-投影切割优化](https://blog.csdn.net/Print_lin/article/details/80335236) 565 | 566 | 摸鱼做的一个小东西,粗略的了解了下 OCR 的实现还是很开心的。最后这张图片给特别的你,耶~ 567 | 568 | ![](./ocr/ocr-2.jpeg) 569 | -------------------------------------------------------------------------------- /ocr/ocr-0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-0.jpg -------------------------------------------------------------------------------- /ocr/ocr-1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-1.jpeg -------------------------------------------------------------------------------- /ocr/ocr-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-10.png -------------------------------------------------------------------------------- /ocr/ocr-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-11.png -------------------------------------------------------------------------------- /ocr/ocr-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-12.png -------------------------------------------------------------------------------- /ocr/ocr-13.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-13.jpeg -------------------------------------------------------------------------------- /ocr/ocr-14.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-14.jpeg -------------------------------------------------------------------------------- /ocr/ocr-15.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-15.jpeg -------------------------------------------------------------------------------- /ocr/ocr-16.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-16.jpeg -------------------------------------------------------------------------------- /ocr/ocr-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-17.png -------------------------------------------------------------------------------- /ocr/ocr-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-18.png -------------------------------------------------------------------------------- /ocr/ocr-19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-19.png -------------------------------------------------------------------------------- /ocr/ocr-2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-2.jpeg -------------------------------------------------------------------------------- /ocr/ocr-3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-3.jpeg -------------------------------------------------------------------------------- /ocr/ocr-4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-4.jpeg -------------------------------------------------------------------------------- /ocr/ocr-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-5.png -------------------------------------------------------------------------------- /ocr/ocr-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-6.png -------------------------------------------------------------------------------- /ocr/ocr-7.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-7.jpeg -------------------------------------------------------------------------------- /ocr/ocr-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-8.png -------------------------------------------------------------------------------- /ocr/ocr-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-9.png -------------------------------------------------------------------------------- /ocr/ocr-map.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/ocr/ocr-map.jpeg -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zelda-words", 3 | "version": "0.0.0", 4 | "scripts": { 5 | "dev": "vite", 6 | "build": "vuedx-typecheck . && vite build", 7 | "serve": "vite preview" 8 | }, 9 | "dependencies": { 10 | "@nuintun/qrcode": "^3.0.1", 11 | "@tensorflow/tfjs": "^3.9.0", 12 | "vue": "^3.0.5", 13 | "yargs": "^17.1.1" 14 | }, 15 | "devDependencies": { 16 | "@types/dom-to-image": "^2.6.2", 17 | "@types/node": "^16.9.3", 18 | "@vitejs/plugin-vue": "^1.1.4", 19 | "@vue/compiler-sfc": "^3.0.5", 20 | "@vuedx/typecheck": "^0.6.0", 21 | "@vuedx/typescript-plugin-vue": "^0.6.0", 22 | "dom-to-image": "^2.6.0", 23 | "sass": "^1.32.7", 24 | "typescript": "^4.1.3", 25 | "vite": "^2.0.0" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/public/favicon.ico -------------------------------------------------------------------------------- /public/model.json: -------------------------------------------------------------------------------- 1 | { 2 | "modelTopology": { 3 | "class_name": "Sequential", 4 | "config": { 5 | "name": "sequential_1", 6 | "layers": [ 7 | { 8 | "class_name": "Conv2D", 9 | "config": { 10 | "filters": 16, 11 | "kernel_initializer": { 12 | "class_name": "VarianceScaling", 13 | "config": { 14 | "scale": 1, 15 | "mode": "fan_avg", 16 | "distribution": "normal", 17 | "seed": null 18 | } 19 | }, 20 | "kernel_regularizer": null, 21 | "kernel_constraint": null, 22 | "kernel_size": [ 23 | 3, 24 | 3 25 | ], 26 | "strides": [ 27 | 1, 28 | 1 29 | ], 30 | "padding": "valid", 31 | "data_format": "channels_last", 32 | "dilation_rate": [ 33 | 1, 34 | 1 35 | ], 36 | "activation": "relu", 37 | "use_bias": true, 38 | "bias_initializer": { 39 | "class_name": "Zeros", 40 | "config": {} 41 | }, 42 | "bias_regularizer": null, 43 | "activity_regularizer": null, 44 | "bias_constraint": null, 45 | "name": "conv2d_Conv2D1", 46 | "trainable": true, 47 | "batch_input_shape": [ 48 | null, 49 | 28, 50 | 28, 51 | 1 52 | ], 53 | "dtype": "float32" 54 | } 55 | }, 56 | { 57 | "class_name": "MaxPooling2D", 58 | "config": { 59 | "pool_size": [ 60 | 2, 61 | 2 62 | ], 63 | "padding": "valid", 64 | "strides": [ 65 | 2, 66 | 2 67 | ], 68 | "data_format": "channels_last", 69 | "name": "max_pooling2d_MaxPooling2D1", 70 | "trainable": true 71 | } 72 | }, 73 | { 74 | "class_name": "Conv2D", 75 | "config": { 76 | "filters": 32, 77 | "kernel_initializer": { 78 | "class_name": "VarianceScaling", 79 | "config": { 80 | "scale": 1, 81 | "mode": "fan_avg", 82 | "distribution": "normal", 83 | "seed": null 84 | } 85 | }, 86 | "kernel_regularizer": null, 87 | "kernel_constraint": null, 88 | "kernel_size": [ 89 | 3, 90 | 3 91 | ], 92 | "strides": [ 93 | 1, 94 | 1 95 | ], 96 | "padding": "valid", 97 | "data_format": "channels_last", 98 | "dilation_rate": [ 99 | 1, 100 | 1 101 | ], 102 | "activation": "relu", 103 | "use_bias": true, 104 | "bias_initializer": { 105 | "class_name": "Zeros", 106 | "config": {} 107 | }, 108 | "bias_regularizer": null, 109 | "activity_regularizer": null, 110 | "bias_constraint": null, 111 | "name": "conv2d_Conv2D2", 112 | "trainable": true 113 | } 114 | }, 115 | { 116 | "class_name": "MaxPooling2D", 117 | "config": { 118 | "pool_size": [ 119 | 2, 120 | 2 121 | ], 122 | "padding": "valid", 123 | "strides": [ 124 | 2, 125 | 2 126 | ], 127 | "data_format": "channels_last", 128 | "name": "max_pooling2d_MaxPooling2D2", 129 | "trainable": true 130 | } 131 | }, 132 | { 133 | "class_name": "Conv2D", 134 | "config": { 135 | "filters": 32, 136 | "kernel_initializer": { 137 | "class_name": "VarianceScaling", 138 | "config": { 139 | "scale": 1, 140 | "mode": "fan_avg", 141 | "distribution": "normal", 142 | "seed": null 143 | } 144 | }, 145 | "kernel_regularizer": null, 146 | "kernel_constraint": null, 147 | "kernel_size": [ 148 | 3, 149 | 3 150 | ], 151 | "strides": [ 152 | 1, 153 | 1 154 | ], 155 | "padding": "valid", 156 | "data_format": "channels_last", 157 | "dilation_rate": [ 158 | 1, 159 | 1 160 | ], 161 | "activation": "relu", 162 | "use_bias": true, 163 | "bias_initializer": { 164 | "class_name": "Zeros", 165 | "config": {} 166 | }, 167 | "bias_regularizer": null, 168 | "activity_regularizer": null, 169 | "bias_constraint": null, 170 | "name": "conv2d_Conv2D3", 171 | "trainable": true 172 | } 173 | }, 174 | { 175 | "class_name": "Flatten", 176 | "config": { 177 | "name": "flatten_Flatten1", 178 | "trainable": true 179 | } 180 | }, 181 | { 182 | "class_name": "Dense", 183 | "config": { 184 | "units": 64, 185 | "activation": "relu", 186 | "use_bias": true, 187 | "kernel_initializer": { 188 | "class_name": "VarianceScaling", 189 | "config": { 190 | "scale": 1, 191 | "mode": "fan_avg", 192 | "distribution": "normal", 193 | "seed": null 194 | } 195 | }, 196 | "bias_initializer": { 197 | "class_name": "Zeros", 198 | "config": {} 199 | }, 200 | "kernel_regularizer": null, 201 | "bias_regularizer": null, 202 | "activity_regularizer": null, 203 | "kernel_constraint": null, 204 | "bias_constraint": null, 205 | "name": "dense_Dense1", 206 | "trainable": true 207 | } 208 | }, 209 | { 210 | "class_name": "Dense", 211 | "config": { 212 | "units": 40, 213 | "activation": "softmax", 214 | "use_bias": true, 215 | "kernel_initializer": { 216 | "class_name": "VarianceScaling", 217 | "config": { 218 | "scale": 1, 219 | "mode": "fan_avg", 220 | "distribution": "normal", 221 | "seed": null 222 | } 223 | }, 224 | "bias_initializer": { 225 | "class_name": "Zeros", 226 | "config": {} 227 | }, 228 | "kernel_regularizer": null, 229 | "bias_regularizer": null, 230 | "activity_regularizer": null, 231 | "kernel_constraint": null, 232 | "bias_constraint": null, 233 | "name": "dense_Dense2", 234 | "trainable": true 235 | } 236 | } 237 | ] 238 | }, 239 | "keras_version": "tfjs-layers 3.9.0", 240 | "backend": "tensor_flow.js" 241 | }, 242 | "format": "layers-model", 243 | "generatedBy": "TensorFlow.js tfjs-layers v3.9.0", 244 | "convertedBy": null, 245 | "weightsManifest": [ 246 | { 247 | "paths": [ 248 | "./model.weights.bin" 249 | ], 250 | "weights": [ 251 | { 252 | "name": "conv2d_Conv2D1/kernel", 253 | "shape": [ 254 | 3, 255 | 3, 256 | 1, 257 | 16 258 | ], 259 | "dtype": "float32" 260 | }, 261 | { 262 | "name": "conv2d_Conv2D1/bias", 263 | "shape": [ 264 | 16 265 | ], 266 | "dtype": "float32" 267 | }, 268 | { 269 | "name": "conv2d_Conv2D2/kernel", 270 | "shape": [ 271 | 3, 272 | 3, 273 | 16, 274 | 32 275 | ], 276 | "dtype": "float32" 277 | }, 278 | { 279 | "name": "conv2d_Conv2D2/bias", 280 | "shape": [ 281 | 32 282 | ], 283 | "dtype": "float32" 284 | }, 285 | { 286 | "name": "conv2d_Conv2D3/kernel", 287 | "shape": [ 288 | 3, 289 | 3, 290 | 32, 291 | 32 292 | ], 293 | "dtype": "float32" 294 | }, 295 | { 296 | "name": "conv2d_Conv2D3/bias", 297 | "shape": [ 298 | 32 299 | ], 300 | "dtype": "float32" 301 | }, 302 | { 303 | "name": "dense_Dense1/kernel", 304 | "shape": [ 305 | 288, 306 | 64 307 | ], 308 | "dtype": "float32" 309 | }, 310 | { 311 | "name": "dense_Dense1/bias", 312 | "shape": [ 313 | 64 314 | ], 315 | "dtype": "float32" 316 | }, 317 | { 318 | "name": "dense_Dense2/kernel", 319 | "shape": [ 320 | 64, 321 | 40 322 | ], 323 | "dtype": "float32" 324 | }, 325 | { 326 | "name": "dense_Dense2/bias", 327 | "shape": [ 328 | 40 329 | ], 330 | "dtype": "float32" 331 | } 332 | ] 333 | } 334 | ] 335 | } -------------------------------------------------------------------------------- /public/model.weights.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kinglisky/zelda-words/2adc6e49eaf3446e19d6f5d5923155d323f77eaf/public/model.weights.bin -------------------------------------------------------------------------------- /src/App.vue: -------------------------------------------------------------------------------- 1 |