Skip to content

Commit

Permalink
fix: serving binary files
Browse files Browse the repository at this point in the history
For binary files, we can't cache the content as a string (utf8), we need to use a binary buffer.

All the preprocessors are currently implemented to deal with strings, as that makes sense for text files. If we changed preprocessors to deal with buffers then each preprocessor would have to convert the buffer to a string, do the string manipulation and then convert it back to a buffer. I don't think that's worthy as there are no binary preprocessors.

This change disables preprocessing of binary files to avoid weird errors.
It shows warning. If there is a reasonable use case for preprocessing binary files we can figure out something.

Closes #864
Closes #885
  • Loading branch information
vojtajina committed Feb 5, 2014
1 parent aca84dc commit 8a30cf5
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 8 deletions.
34 changes: 26 additions & 8 deletions lib/preprocessor.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
var path = require('path');
var fs = require('graceful-fs');
var crypto = require('crypto');
var mm = require('minimatch');
Expand All @@ -10,12 +11,29 @@ var sha1 = function(data) {
return hash.digest('hex');
};

var isBinary = Object.create(null);

This comment has been minimized.

Copy link
@thom4parisot

thom4parisot Mar 12, 2014

Contributor

So in our case, just adding dat in that list would suffice. I'll create a PR for that.

This comment has been minimized.

Copy link
@vojtajina

vojtajina Mar 14, 2014

Author Contributor

yep, saw your PR, makes sense

[
'adp', 'au', 'mid', 'mp4a', 'mpga', 'oga', 's3m', 'sil', 'eol', 'dra', 'dts', 'dtshd', 'lvp',
'pya', 'ecelp4800', 'ecelp7470', 'ecelp9600', 'rip', 'weba', 'aac', 'aif', 'caf', 'flac', 'mka',
'm3u', 'wax', 'wma', 'wav', 'xm', 'flac', '3gp', '3g2', 'h261', 'h263', 'h264', 'jpgv', 'jpm',
'mj2', 'mp4', 'mpeg', 'ogv', 'qt', 'uvh', 'uvm', 'uvp', 'uvs', 'dvb', 'fvt', 'mxu', 'pyv', 'uvu',
'viv', 'webm', 'f4v', 'fli', 'flv', 'm4v', 'mkv', 'mng', 'asf', 'vob', 'wm', 'wmv', 'wmx', 'wvx',
'movie', 'smv', 'ts', 'bmp', 'cgm', 'g3', 'gif', 'ief', 'jpg', 'jpeg', 'ktx', 'png', 'btif',
'sgi', 'svg', 'tiff', 'psd', 'uvi', 'sub', 'djvu', 'dwg', 'dxf', 'fbs', 'fpx', 'fst', 'mmr',
'rlc', 'mdi', 'wdp', 'npx', 'wbmp', 'xif', 'webp', '3ds', 'ras', 'cmx', 'fh', 'ico', 'pcx', 'pic',
'pnm', 'pbm', 'pgm', 'ppm', 'rgb', 'tga', 'xbm', 'xpm', 'xwd', 'zip', 'rar', 'tar', 'bz2', 'eot',
'ttf', 'woff'
].forEach(function(extension) {
isBinary['.' + extension] = true;

This comment has been minimized.

This comment has been minimized.

Copy link
@vojtajina

vojtajina Mar 14, 2014

Author Contributor

Sure, we can use it.

These extensions are just copy/pasted from chokidar.

});

// TODO(vojta): instantiate preprocessors at the start to show warnings immediately
var createPreprocessor = function(config, basePath, injector) {
var patterns = Object.keys(config);
var alreadyDisplayedWarnings = Object.create(null);

return function(file, done) {
var thisFileIsBinary = isBinary[path.extname(file.originalPath)];
var preprocessors = [];
var nextPreprocessor = function(error, content) {
// normalize B-C
Expand Down Expand Up @@ -61,18 +79,18 @@ var createPreprocessor = function(config, basePath, injector) {
// TODO(vojta): should we cache this ?
for (var i = 0; i < patterns.length; i++) {
if (mm(file.originalPath, patterns[i])) {
config[patterns[i]].forEach(instantiatePreprocessor);
if (thisFileIsBinary) {
log.warn('Ignoring preprocessing (%s) %s because it is a binary file.',
config[patterns[i]].join(', '), file.originalPath);
} else {
config[patterns[i]].forEach(instantiatePreprocessor);
}
}
}

// compute SHA of the content
preprocessors.push(function(content, file, done) {
file.sha = sha1(content);
done(content);
});

return fs.readFile(file.originalPath, function(err, buffer) {
nextPreprocessor(buffer.toString());
file.sha = sha1(buffer);
nextPreprocessor(null, thisFileIsBinary ? buffer : buffer.toString());
});
};
};
Expand Down
19 changes: 19 additions & 0 deletions test/unit/preprocessor.spec.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ describe 'preprocessor', ->
some:
'a.js': mocks.fs.file 0, 'content'
'a.txt': mocks.fs.file 0, 'some-text'
'photo.png': mocks.fs.file 0, 'binary'

mocks_ =
'graceful-fs': mockFs
Expand Down Expand Up @@ -133,3 +134,21 @@ describe 'preprocessor', ->
pp file, (err) ->
expect(fakePreprocessor).not.to.have.been.called
done()


it 'should not preprocess binary files', (done) ->
fakePreprocessor = sinon.spy (content, file, done) ->
done null, content

injector = new di.Injector [{
'preprocessor:fake': ['factory', -> fakePreprocessor]
}]

pp = m.createPreprocessor {'**/*': ['fake']}, null, injector

file = {originalPath: '/some/photo.png', path: 'path'}

pp file, (err) ->
expect(fakePreprocessor).not.to.have.been.called
expect(file.content).to.be.an.instanceof Buffer
done()

1 comment on commit 8a30cf5

@thom4parisot
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a use case of processing binary files :-p
Temporarily worked around in bbc/peaks.js@e54759e
We basically serve binary data to describe audio waveforms, because they are smaller in size and memory footprint.

Hopefully we have a JSON fallback but now we cannot tests the binary cases.

Please sign in to comment.