Allow unicode punctuation, move rule after linkifier

markdown-it · Dec 30, 2015 · 94396e6 · 94396e6
1 parent cb2fc23
commit 94396e6
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 6 deletions.
diff --git a/index.js b/index.js
@@ -3,7 +3,13 @@
 'use strict';
 
 
-var PUNCT_CHARS = ' \n()[]\'".,!?-';
+// ASCII characters in Cc, Sc, Sm, Sk categories we should terminate on;
+// you can check character classes here:
+// http:https://www.unicode.org/Public/UNIDATA/UnicodeData.txt
+var OTHER_CHARS = ' \r\n$+<=>^`|~';
+
+var UNICODE_PUNCT_RE = require('uc.micro/categories/P/regex').source;
+var UNICODE_SPACE_RE = require('uc.micro/categories/Z/regex').source;
 
 
 module.exports = function sub_plugin(md) {
@@ -56,21 +62,31 @@ module.exports = function sub_plugin(md) {
 
 
  function abbr_replace(state) {
- var i, j, l, tokens, token, text, nodes, pos, reg, m, regText,
+ var i, j, l, tokens, token, text, nodes, pos, reg, m, regText, regSimple,
  currentToken,
  blockTokens = state.tokens;
 
  if (!state.env.abbreviations) { return; }
  if (!state.env.abbrRegExp) {
- regText = '(^|[' + PUNCT_CHARS.split('').map(escapeRE).join('') + '])'
+ state.env.abbrRegExpSimple = new RegExp('(?:' +
+ Object.keys(state.env.abbreviations).map(function (x) {
+ return x.substr(1);
+ }).sort(function (a, b) {
+ return b.length - a.length;
+ }).map(escapeRE).join('|') + ')');
+
+ regText = '(^|' + UNICODE_PUNCT_RE + '|' + UNICODE_SPACE_RE +
+ '|[' + OTHER_CHARS.split('').map(escapeRE).join('') + '])'
  + '(' + Object.keys(state.env.abbreviations).map(function (x) {
  return x.substr(1);
  }).sort(function (a, b) {
  return b.length - a.length;
  }).map(escapeRE).join('|') + ')'
- + '($|[' + PUNCT_CHARS.split('').map(escapeRE).join('') + '])';
+ + '($|' + UNICODE_PUNCT_RE + '|' + UNICODE_SPACE_RE +
+ '|[' + OTHER_CHARS.split('').map(escapeRE).join('') + '])';
  state.env.abbrRegExp = new RegExp(regText, 'g');
  }
+ regSimple = state.env.abbrRegExpSimple;
  reg = state.env.abbrRegExp;
 
  for (j = 0, l = blockTokens.length; j < l; j++) {
@@ -87,6 +103,10 @@ module.exports = function sub_plugin(md) {
  reg.lastIndex = 0;
  nodes = [];
 
+ // fast regexp run to determine whether there are any abbreviated words
+ // in the current token
+ if (!regSimple.test(text)) { continue; }
+
  while ((m = reg.exec(text))) {
  if (reg.lastIndex > pos) {
  token = new state.Token('text', '', 0);
@@ -123,5 +143,6 @@ module.exports = function sub_plugin(md) {
  }
 
  md.block.ruler.before('reference', 'abbr_def', abbr_def, { alt: [ 'paragraph', 'reference' ] });
- md.core.ruler.after('inline', 'abbr_replace', abbr_replace);
+
+ md.core.ruler.after('linkify', 'abbr_replace', abbr_replace);
 };
diff --git a/package.json b/package.json
@@ -33,6 +33,7 @@
  "markdown-it-testgen": "~0.1.0",
  "mocha": "*",
  "request": "*",
+ "uc.micro": "*",
  "uglify-js": "*"
  }
 }
diff --git a/test/fixtures/abbr.txt b/test/fixtures/abbr.txt
@@ -79,6 +79,16 @@ foobar
 .
 
 
+Interaction with linkifier:
+.
+http:https://example.com/foo/
+
+*[foo]: something
+.
+<p><a href="http:https://example.com/foo/">http:https://example.com/<abbr title="something">foo</abbr>/</a></p>
+.
+
+
 Security 1
 .
 *[__proto__]: blah

diff --git a/test/test.js b/test/test.js
@@ -7,7 +7,7 @@ var generate = require('markdown-it-testgen');
 /*eslint-env mocha*/
 
 describe('markdown-it-abbr', function () {
- var md = require('markdown-it')()
+ var md = require('markdown-it')({ linkify: true })
  .use(require('../'));
 
  generate(path.join(__dirname, 'fixtures/abbr.txt'), md);