diff --git a/config.json b/config.json index c11de6c..1804e72 100644 --- a/config.json +++ b/config.json @@ -5,8 +5,8 @@ "sans-serif", "serif" ], - "training_set": 20000, - "testing_set": 500, + "training_set": 2000, + "testing_set": 1000, "image_size": 16, "threshold": 400, "network": { diff --git a/examples/0123456789.png b/examples/0123456789.png index 3752051..7bba065 100644 Binary files a/examples/0123456789.png and b/examples/0123456789.png differ diff --git a/main.js b/main.js index 29a16be..887c393 100644 --- a/main.js +++ b/main.js @@ -1,14 +1,7 @@ -<<<<<<< HEAD -(function (log) { - 'use strict'; - - log('reading config file ...'); -======= (function () { 'use strict'; console.log('reading config file ...'); ->>>>>>> softmax var synaptic = require('synaptic'), network = require('./network.js'), @@ -29,131 +22,6 @@ config.fonts = config.fonts || ['serif', 'sans-serif']; config.distortion = config.distortion === undefined ? true : config.distortion; config.network.hidden = config.network.hidden || 40; -<<<<<<< HEAD - config.network.output = config.network.output || 8; - config.network.learning_rate = config.network.learning_rate || 0.1; - - log('... done'); - log(); - - var perceptron; - - if(config.mnist === true) { - perceptron = new synaptic.Architect.Perceptron( - (config.image_size * config.image_size), // input - config.network.hidden, // hidden - 4 // output - ); - - mnist(); - } else { - perceptron = new synaptic.Architect.Perceptron( - (config.image_size * config.image_size), // input - config.network.hidden, // hidden - config.network.output // output - ); - - var index, - samples = config.training_set + config.testing_set, - training = [], - testing = [], - settings = { - size: config.text.length, - height: config.image_size, - text: config.text, - fonts: config.fonts, - distortion: config.distortion - }; - - log('generating images ...'); - - for(index = 0; index < samples; index++) - captcha.generate(settings, generate(index)); - } - - // captcha callback - function generate(index) { - return function(text, data) { - var png = new PNG({ filterType: 4 }); - png.parse(data, parse(text, index)); - - if(index === 0) - fs.writeFileSync('./examples/' + text + '.png', data, 'base64'); - }; - } - - // parse MNIST data - function mnist() { - log('parsing MNIST data ...'); - - var data = fs.readFileSync('./mnist/train-images.idx3-ubyte'), - labels = fs.readFileSync('./mnist/train-labels.idx1-ubyte'), - training = [], - testing = [], - pixels = [], - image, - x, y; - - config.training_set = 60000; - config.testing_set = 10000; - config.threshold = 50; - config.image_size = 20; - - for(image = 0; image < config.training_set; image++) { - for(y = 4; y < config.image_size + 4; y++) - for(x = 4; x < config.image_size + 4; x++) - pixels.push(data[(image * 28 * 28) + (x + (y * 28)) + 15]); - - pixels = tools.center( - pixels.map(function(pixel) { - return pixel > config.threshold ? 1 : 0; - }) - ); - - training.push({ - input: pixels, - output: ('0000' + parseInt(labels[image + 8]).toString(2)).substr(-4).split('').map(Number) - }); - - pixels = []; - } - - data = fs.readFileSync('./mnist/t10k-images-2.idx3-ubyte'); - labels = fs.readFileSync('./mnist/t10k-labels.idx1-ubyte'); - - for(image = 0; image < config.testing_set; image++) { - for(y = 4; y < config.image_size + 4; y++) - for(x = 4; x < config.image_size + 4; x++) - pixels.push(data[(image * 28 * 28) + (x + (y * 28)) + 15]); - - pixels = tools.center( - pixels.map(function(pixel) { - return pixel > config.threshold ? 1 : 0; - }) - ); - - testing.push({ - input: pixels, - output: ('0000' + parseInt(labels[image + 8]).toString(2)).substr(-4).split('').map(Number) - }); - - pixels = []; - } - - log('... done', '\n'); - - tools.validate(perceptron, training); - tools.validate(perceptron, testing); - - network.train(perceptron, training, config.network.learning_rate); - fs.writeFileSync('./ocr.js', perceptron.standalone().toString()); - network.test(perceptron, testing); - } - - // 'parsed' event callback - function parse(text, index) { - return function(error, data) { -======= config.network.output = config.network.output || 10; config.network.learning_rate = config.network.learning_rate || 0.1; @@ -187,18 +55,13 @@ var png = new PNG({ filterType: 4 }); png.parse(data, function(error, data) { ->>>>>>> softmax if(error) throw error; var position, chunk = [], pixel = [], -<<<<<<< HEAD - i, j, k, x, y; -======= i, j, x, y; ->>>>>>> softmax for(i = 0; i < config.text.length; i++) { for(y = 0; y < data.height; y++) { @@ -222,11 +85,7 @@ var output = Array.apply(null, new Array(config.network.output)).map(Number.prototype.valueOf, 0); output[i] = 1; -<<<<<<< HEAD - if(index < config.training_set) { -======= if(k < config.training_set) { ->>>>>>> softmax training.push({ input: chunk, output: output @@ -241,15 +100,9 @@ chunk = []; } -<<<<<<< HEAD - if(index === samples - 1) { - log('... done'); - log(); -======= if(k++ === samples - 1) { console.log('... done'); console.log(); ->>>>>>> softmax network.train(perceptron, training, config.network.learning_rate); fs.writeFileSync('./ocr.js', 'module.exports = ' + perceptron.standalone().toString()); @@ -257,13 +110,9 @@ } }); } -<<<<<<< HEAD -})(console.log); -======= console.log('generating images ...'); for(index = 0; index < samples; index++) captcha.generate(settings, generate); })(); ->>>>>>> softmax