├── .gitignore ├── README.md ├── index.js └── package.json /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # classify-text 2 | 3 | Example of text classification using neural networks and bow with [mimir](https://github.com/techfort/mimir) and [brain](https://github.com/harthur/brain) 4 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var mimir = require('mimir'), 2 | brain = require('brain'); 3 | 4 | /* few utils for the example */ 5 | function vec_result(res, num_classes) { 6 | var i = 0, 7 | vec = []; 8 | for (i; i < num_classes; i += 1) { 9 | vec.push(0); 10 | } 11 | vec[res] = 1; 12 | return vec; 13 | } 14 | 15 | function maxarg(array) { 16 | return array.indexOf(Math.max.apply(Math, array)); 17 | } 18 | 19 | // train data 20 | var ANN_Classes = { 21 | HISTORY: 0, 22 | PROGRAMMING: 1, 23 | MUSIC: 2 24 | }, 25 | classes_array = Object.keys(ANN_Classes), //['HISTORY', 'PROGRAMMING', 'MUSIC'], 26 | texts = [ 27 | // history 28 | "The end of the Viking-era in Norway is marked by the Battle of Stiklestad in 1030", 29 | "The end of the Viking Age is traditionally marked in England by the failed invasion attempted by the Norwegian king Harald III ", 30 | "The earliest date given for a Viking raid is 787 AD when, according to the Anglo-Saxon Chronicle, a group of men from Norway sailed to the Isle of Portland in Dorset", 31 | // programming 32 | "A programming language is a formal constructed language designed to communicate instructions to a machine, particularly a computer. Programming languages can be used to create programs to control the behavior of a machine or to express algorithms.", 33 | "Thousands of different programming languages have been created, mainly in the computer field, and many more still are being created every year.", 34 | "The description of a programming language is usually split into the two components of syntax (form) and semantics (meaning). Some languages are defined by a specification document (for example, the C programming language is specified by an ISO Standard), while other languages (such as Perl) have a dominant implementation that is treated as a reference", 35 | // music 36 | "Classical music is art music produced or rooted in the traditions of Western music (both liturgical and secular)", 37 | "European music is largely distinguished from many other non-European and popular musical forms by its system of staff notation, in use since about the 16th century", 38 | "classical music has been noted for its development of highly sophisticated forms of instrumental music." 39 | ], 40 | dict = mimir.dict(texts), 41 | traindata = [ 42 | [mimir.bow(texts[0], dict), ANN_Classes.HISTORY], 43 | [mimir.bow(texts[1], dict), ANN_Classes.HISTORY], 44 | [mimir.bow(texts[2], dict), ANN_Classes.HISTORY], 45 | [mimir.bow(texts[3], dict), ANN_Classes.PROGRAMMING], 46 | [mimir.bow(texts[4], dict), ANN_Classes.PROGRAMMING], 47 | [mimir.bow(texts[5], dict), ANN_Classes.PROGRAMMING], 48 | [mimir.bow(texts[6], dict), ANN_Classes.MUSIC], 49 | [mimir.bow(texts[7], dict), ANN_Classes.MUSIC], 50 | [mimir.bow(texts[8], dict), ANN_Classes.MUSIC] 51 | ], 52 | test_history = "The beginning of the Viking Age in the British Isles is, however, often given as 793.", 53 | test_music = "Baroque music is a style of Western art music composed from approximately 1600 to 1750", 54 | test_bow_history = mimir.bow(test_history, dict), 55 | test_bow_music = mimir.bow(test_music, dict); 56 | 57 | var net = new brain.NeuralNetwork(), 58 | ann_train = traindata.map(function (pair) { 59 | return { 60 | input: pair[0], 61 | output: vec_result(pair[1], 3) 62 | }; 63 | }); 64 | 65 | net.train(ann_train); 66 | console.log('------------------- ANN (brain) ----------------------'); 67 | var predict = net.run(test_bow_history); 68 | console.log(predict); 69 | console.log(classes_array[maxarg(predict)]); // prints HISTORY 70 | console.log(classes_array[maxarg(net.run(test_bow_music))]); // prints MUSIC 71 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "classify-text", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "devDependencies": { 12 | "brain": "^0.7.0", 13 | "mimir": "0.0.1", 14 | "node-svm": "^2.1.5" 15 | } 16 | } 17 | --------------------------------------------------------------------------------