Skip to content

Commit

Permalink
Allow unicode punctuation, move rule after linkifier
Browse files Browse the repository at this point in the history
  • Loading branch information
rlidwka committed Dec 30, 2015
1 parent cb2fc23 commit 94396e6
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 6 deletions.
31 changes: 26 additions & 5 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,13 @@
'use strict';


var PUNCT_CHARS = ' \n()[]\'".,!?-';
// ASCII characters in Cc, Sc, Sm, Sk categories we should terminate on;
// you can check character classes here:
// http:https://www.unicode.org/Public/UNIDATA/UnicodeData.txt
var OTHER_CHARS = ' \r\n$+<=>^`|~';

var UNICODE_PUNCT_RE = require('uc.micro/categories/P/regex').source;
var UNICODE_SPACE_RE = require('uc.micro/categories/Z/regex').source;


module.exports = function sub_plugin(md) {
Expand Down Expand Up @@ -56,21 +62,31 @@ module.exports = function sub_plugin(md) {


function abbr_replace(state) {
var i, j, l, tokens, token, text, nodes, pos, reg, m, regText,
var i, j, l, tokens, token, text, nodes, pos, reg, m, regText, regSimple,
currentToken,
blockTokens = state.tokens;

if (!state.env.abbreviations) { return; }
if (!state.env.abbrRegExp) {
regText = '(^|[' + PUNCT_CHARS.split('').map(escapeRE).join('') + '])'
state.env.abbrRegExpSimple = new RegExp('(?:' +
Object.keys(state.env.abbreviations).map(function (x) {
return x.substr(1);
}).sort(function (a, b) {
return b.length - a.length;
}).map(escapeRE).join('|') + ')');

regText = '(^|' + UNICODE_PUNCT_RE + '|' + UNICODE_SPACE_RE +
'|[' + OTHER_CHARS.split('').map(escapeRE).join('') + '])'
+ '(' + Object.keys(state.env.abbreviations).map(function (x) {
return x.substr(1);
}).sort(function (a, b) {
return b.length - a.length;
}).map(escapeRE).join('|') + ')'
+ '($|[' + PUNCT_CHARS.split('').map(escapeRE).join('') + '])';
+ '($|' + UNICODE_PUNCT_RE + '|' + UNICODE_SPACE_RE +
'|[' + OTHER_CHARS.split('').map(escapeRE).join('') + '])';
state.env.abbrRegExp = new RegExp(regText, 'g');
}
regSimple = state.env.abbrRegExpSimple;
reg = state.env.abbrRegExp;

for (j = 0, l = blockTokens.length; j < l; j++) {
Expand All @@ -87,6 +103,10 @@ module.exports = function sub_plugin(md) {
reg.lastIndex = 0;
nodes = [];

// fast regexp run to determine whether there are any abbreviated words
// in the current token
if (!regSimple.test(text)) { continue; }

while ((m = reg.exec(text))) {
if (reg.lastIndex > pos) {
token = new state.Token('text', '', 0);
Expand Down Expand Up @@ -123,5 +143,6 @@ module.exports = function sub_plugin(md) {
}

md.block.ruler.before('reference', 'abbr_def', abbr_def, { alt: [ 'paragraph', 'reference' ] });
md.core.ruler.after('inline', 'abbr_replace', abbr_replace);

md.core.ruler.after('linkify', 'abbr_replace', abbr_replace);
};
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"markdown-it-testgen": "~0.1.0",
"mocha": "*",
"request": "*",
"uc.micro": "*",
"uglify-js": "*"
}
}
10 changes: 10 additions & 0 deletions test/fixtures/abbr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,16 @@ foobar
.


Interaction with linkifier:
.
http:https://example.com/foo/

*[foo]: something
.
<p><a href="http:https://example.com/foo/">http:https://example.com/<abbr title="something">foo</abbr>/</a></p>
.


Security 1
.
*[__proto__]: blah
Expand Down
2 changes: 1 addition & 1 deletion test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ var generate = require('markdown-it-testgen');
/*eslint-env mocha*/

describe('markdown-it-abbr', function () {
var md = require('markdown-it')()
var md = require('markdown-it')({ linkify: true })
.use(require('../'));

generate(path.join(__dirname, 'fixtures/abbr.txt'), md);
Expand Down

0 comments on commit 94396e6

Please sign in to comment.