// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved. // See LICENSE.txt for license information. import twemoji from 'twemoji'; import XRegExp from 'xregexp'; import {getEmojiImageUrl} from 'mattermost-redux/utils/emoji_utils'; import {formatWithRenderer} from 'utils/markdown'; import RemoveMarkdown from 'utils/markdown/remove_markdown'; import {getEmojiMap} from 'selectors/emojis'; import store from 'stores/redux_store.jsx'; import Constants from './constants.jsx'; import * as Emoticons from './emoticons.jsx'; import * as Markdown from './markdown'; const removeMarkdown = new RemoveMarkdown(); const punctuation = XRegExp.cache('[^\\pL\\d]'); const AT_MENTION_PATTERN = /\B@([a-z0-9.\-_]*)/gi; const htmlEmojiPattern = /^
(?:]*>|]*>[^<]*<\/span>\s*)+<\/p>$/;
// pattern to detect the existence of a Chinese, Japanese, or Korean character in a string
// http://stackoverflow.com/questions/15033196/using-javascript-to-check-whether-a-string-contains-japanese-characters-includi
const cjkPattern = /[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uff9f\u4e00-\u9faf\u3400-\u4dbf\uac00-\ud7a3]/;
// Performs formatting of user posts including highlighting mentions and search terms and converting urls, hashtags,
// @mentions and ~channels to links by taking a user's message and returning a string of formatted html. Also takes
// a number of options as part of the second parameter:
// - searchTerm - If specified, this word is highlighted in the resulting html. Defaults to nothing.
// - searchMatches - If specified, an array of words that will be highlighted. Defaults to nothing. If both
// this and searchTerm are specified, this takes precedence.
// - mentionHighlight - Specifies whether or not to highlight mentions of the current user. Defaults to true.
// - mentionKeys - A list of mention keys for the current user to highlight.
// - singleline - Specifies whether or not to remove newlines. Defaults to false.
// - emoticons - Enables emoticon parsing with a data-emoticon attribute. Defaults to true.
// - markdown - Enables markdown parsing. Defaults to true.
// - siteURL - The origin of this Mattermost instance. If provided, links to channels and posts will be replaced with internal
// links that can be handled by a special click handler.
// - atMentions - Whether or not to render at mentions into spans with a data-mention attribute. Defaults to false.
// - channelNamesMap - An object mapping channel display names to channels. If provided, ~channel mentions will be replaced with
// links to the relevant channel.
// - team - The current team.
// - proxyImages - If specified, images are proxied. Defaults to false.
// - autolinkedUrlSchemes - An array of url schemes that will be allowed for autolinking. Defaults to autolinking with any url scheme.
export function formatText(text, inputOptions) {
if (!text || typeof text !== 'string') {
return '';
}
let output = text;
const options = Object.assign({}, inputOptions);
if (options.searchMatches) {
options.searchPatterns = options.searchMatches.map(convertSearchTermToRegex);
} else {
options.searchPatterns = parseSearchTerms(options.searchTerm).map(convertSearchTermToRegex);
}
if (options.removeMarkdown) {
output = formatWithRenderer(output, removeMarkdown);
output = sanitizeHtml(output);
output = doFormatText(output, options);
} else if (!('markdown' in options) || options.markdown) {
// the markdown renderer will call doFormatText as necessary
output = Markdown.format(output, options);
} else {
output = sanitizeHtml(output);
output = doFormatText(output, options);
}
// replace newlines with spaces if necessary
if (options.singleline) {
output = replaceNewlines(output);
}
if (htmlEmojiPattern.test(output.trim())) {
output = '' + output + '';
}
return output;
}
// Performs most of the actual formatting work for formatText. Not intended to be called normally.
export function doFormatText(text, options) {
let output = text;
const tokens = new Map();
// replace important words and phrases with tokens
if (options.atMentions) {
output = autolinkAtMentions(output, tokens);
}
if (options.channelNamesMap) {
output = autolinkChannelMentions(output, tokens, options.channelNamesMap, options.team);
}
output = autolinkEmails(output, tokens);
output = autolinkHashtags(output, tokens);
if (!('emoticons' in options) || options.emoticon) {
output = Emoticons.handleEmoticons(output, tokens);
}
if (options.searchPatterns) {
output = highlightSearchTerms(output, tokens, options.searchPatterns);
}
if (!('mentionHighlight' in options) || options.mentionHighlight) {
output = highlightCurrentMentions(output, tokens, options.mentionKeys);
}
if (!('emoticons' in options) || options.emoticon) {
const emojiMap = getEmojiMap(store.getState());
output = twemoji.parse(output, {
className: 'emoticon',
callback: (icon) => {
if (!emojiMap.hasUnicode(icon)) {
// just leave the unicode characters and hope the browser can handle it
return null;
}
return getEmojiImageUrl(emojiMap.getUnicode(icon));
},
});
}
// reinsert tokens with formatted versions of the important words and phrases
output = replaceTokens(output, tokens);
return output;
}
export function sanitizeHtml(text) {
let output = text;
// normal string.replace only does a single occurrance so use a regex instead
output = output.replace(/&/g, '&');
output = output.replace(//g, '>');
output = output.replace(/'/g, ''');
output = output.replace(/"/g, '"');
return output;
}
// Copied from our fork of commonmark.js
var emailAlphaNumericChars = '\\p{L}\\p{Nd}';
var emailSpecialCharacters = '!#$%&\'*+\\-\\/=?^_`{|}~';
var emailRestrictedSpecialCharacters = '\\s(),:;<>@\\[\\]';
var emailValidCharacters = emailAlphaNumericChars + emailSpecialCharacters;
var emailValidRestrictedCharacters = emailValidCharacters + emailRestrictedSpecialCharacters;
var emailStartPattern = '(?:[' + emailValidCharacters + '](?:[' + emailValidCharacters + ']|\\.(?!\\.|@))*|\\"[' + emailValidRestrictedCharacters + '.]+\\")@';
var reEmail = XRegExp.cache('(^|[^\\pL\\d])(' + emailStartPattern + '[\\pL\\d.\\-]+[.]\\pL{2,4}(?=$|[^\\p{L}]))', 'g');
// Convert emails into tokens
function autolinkEmails(text, tokens) {
function replaceEmailWithToken(fullMatch, prefix, email) {
const index = tokens.size;
const alias = `$MM_EMAIL${index}$`;
tokens.set(alias, {
value: `${email}`,
originalText: email,
});
return prefix + alias;
}
let output = text;
output = XRegExp.replace(text, reEmail, replaceEmailWithToken);
return output;
}
export function autolinkAtMentions(text, tokens) {
function replaceAtMentionWithToken(fullMatch, username) {
const index = tokens.size;
const alias = `$MM_ATMENTION${index}$`;
tokens.set(alias, {
value: `@${username}`,
originalText: fullMatch,
});
return alias;
}
let output = text;
let match = output.match(AT_MENTION_PATTERN);
while (match && match.length > 0) {
output = output.replace(AT_MENTION_PATTERN, replaceAtMentionWithToken);
match = output.match(AT_MENTION_PATTERN);
}
return output;
}
function autolinkChannelMentions(text, tokens, channelNamesMap, team) {
function channelMentionExists(c) {
return Boolean(channelNamesMap[c]);
}
function addToken(channelName, mention, displayName) {
const index = tokens.size;
const alias = `$MM_CHANNELMENTION${index}$`;
let href = '#';
if (team) {
href = (window.basename || '') + '/' + team.name + '/channels/' + channelName;
}
tokens.set(alias, {
value: `~${displayName}`,
originalText: mention,
});
return alias;
}
function replaceChannelMentionWithToken(fullMatch, spacer, mention, channelName) {
let channelNameLower = channelName.toLowerCase();
if (channelMentionExists(channelNameLower)) {
// Exact match
const alias = addToken(channelNameLower, mention, escapeHtml(channelNamesMap[channelNameLower].display_name));
return spacer + alias;
}
// Not an exact match, attempt to truncate any punctuation to see if we can find a channel
const originalChannelName = channelNameLower;
for (let c = channelNameLower.length; c > 0; c--) {
if (punctuation.test(channelNameLower[c - 1])) {
channelNameLower = channelNameLower.substring(0, c - 1);
if (channelMentionExists(channelNameLower)) {
const suffix = originalChannelName.substr(c - 1);
const alias = addToken(channelNameLower, '~' + channelNameLower,
escapeHtml(channelNamesMap[channelNameLower].display_name));
return spacer + alias + suffix;
}
} else {
// If the last character is not punctuation, no point in going any further
break;
}
}
return fullMatch;
}
let output = text;
output = output.replace(/(^|\s)(~([a-z0-9.\-_]*))/gi, replaceChannelMentionWithToken);
return output;
}
export function escapeRegex(text) {
return text.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&');
}
const htmlEntities = {
'&': '&',
'<': '<',
'>': '>',
'"': '"',
"'": ''',
};
export function escapeHtml(text) {
return text.replace(/[&<>"']/g, (match) => htmlEntities[match]);
}
export function convertEntityToCharacter(text) {
return text.
replace(/</g, '<').
replace(/>/g, '>').
replace(/'/g, '\'').
replace(/"/g, '"').
replace(/&/g, '&');
}
function highlightCurrentMentions(text, tokens, mentionKeys = []) {
let output = text;
// look for any existing tokens which are self mentions and should be highlighted
var newTokens = new Map();
for (const [alias, token] of tokens) {
const tokenTextLower = token.originalText.toLowerCase();
if (mentionKeys.findIndex((key) => key.key.toLowerCase() === tokenTextLower) !== -1) {
const index = tokens.size + newTokens.size;
const newAlias = `$MM_SELFMENTION${index}$`;
newTokens.set(newAlias, {
value: `${alias}`,
originalText: token.originalText,
});
output = output.replace(alias, newAlias);
}
}
// the new tokens are stashed in a separate map since we can't add objects to a map during iteration
for (const newToken of newTokens) {
tokens.set(newToken[0], newToken[1]);
}
// look for self mentions in the text
function replaceCurrentMentionWithToken(fullMatch, prefix, mention, suffix = '') {
const index = tokens.size;
const alias = `$MM_SELFMENTION${index}$`;
tokens.set(alias, {
value: `${mention}`,
originalText: mention,
});
return prefix + alias + suffix;
}
for (const mention of mentionKeys) {
if (!mention || !mention.key) {
continue;
}
let flags = 'g';
if (!mention.caseSensitive) {
flags += 'i';
}
const pattern = new RegExp(`(^|\\W)(${escapeRegex(mention.key)})(\\b|_+\\b)`, flags);
output = output.replace(pattern, replaceCurrentMentionWithToken);
}
return output;
}
function autolinkHashtags(text, tokens) {
let output = text;
var newTokens = new Map();
for (const [alias, token] of tokens) {
if (token.originalText.lastIndexOf('#', 0) === 0) {
const index = tokens.size + newTokens.size;
const newAlias = `$MM_HASHTAG${index}$`;
newTokens.set(newAlias, {
value: `${token.originalText}`,
originalText: token.originalText,
hashtag: token.originalText.substring(1),
});
output = output.replace(alias, newAlias);
}
}
// the new tokens are stashed in a separate map since we can't add objects to a map during iteration
for (const newToken of newTokens) {
tokens.set(newToken[0], newToken[1]);
}
// look for hashtags in the text
function replaceHashtagWithToken(fullMatch, prefix, originalText) {
const index = tokens.size;
const alias = `$MM_HASHTAG${index}$`;
if (text.length < Constants.MIN_HASHTAG_LINK_LENGTH + 1) {
// too short to be a hashtag
return fullMatch;
}
tokens.set(alias, {
value: `${originalText}`,
originalText,
hashtag: originalText.substring(1),
});
return prefix + alias;
}
return output.replace(XRegExp.cache('(^|\\W)(#\\pL[\\pL\\d\\-_.]*[\\pL\\d])', 'g'), replaceHashtagWithToken);
}
const puncStart = XRegExp.cache('^[^\\pL\\d\\s#]+');
const puncEnd = XRegExp.cache('[^\\pL\\d\\s]+$');
function parseSearchTerms(searchTerm) {
let terms = [];
let termString = searchTerm;
while (termString) {
let captured;
// check for a quoted string
captured = (/^"([^"]*)"/).exec(termString);
if (captured) {
termString = termString.substring(captured[0].length);
if (captured[1].length > 0) {
terms.push(captured[1]);
}
continue;
}
// check for a search flag (and don't add it to terms)
captured = (/^(?:in|from|channel): ?\S+/).exec(termString);
if (captured) {
termString = termString.substring(captured[0].length);
continue;
}
// capture at mentions differently from the server so we can highlight them with the preceeding at sign
captured = (/^@[a-z0-9.-_]+\b/).exec(termString);
if (captured) {
termString = termString.substring(captured[0].length);
terms.push(captured[0]);
continue;
}
// capture any plain text up until the next quote or search flag
captured = (/^.+?(?=\bin:|\bfrom:|\bchannel:|"|$)/).exec(termString);
if (captured) {
termString = termString.substring(captured[0].length);
// break the text up into words based on how the server splits them in SqlPostStore.SearchPosts and then discard empty terms
terms.push(...captured[0].split(/[ <>+()~@]/).filter((term) => Boolean(term)));
continue;
}
// we should never reach this point since at least one of the regexes should match something in the remaining text
throw new Error('Infinite loop in search term parsing: "' + termString + '"');
}
// remove punctuation from each term
terms = terms.map((term) => {
term.replace(puncStart, '');
if (term.charAt(term.length - 1) !== '*') {
term.replace(puncEnd, '');
}
return term;
});
return terms;
}
function convertSearchTermToRegex(term) {
let pattern;
if (cjkPattern.test(term)) {
// term contains Chinese, Japanese, or Korean characters so don't mark word boundaries
pattern = '()(' + escapeRegex(term.replace(/\*/g, '')) + ')';
} else if ((/[^\s][*]$/).test(term)) {
pattern = '\\b()(' + escapeRegex(term.substring(0, term.length - 1)) + ')';
} else if (term.startsWith('@') || term.startsWith('#')) {
// needs special handling of the first boundary because a word boundary doesn't work before a symbol
pattern = '(\\W|^)(' + escapeRegex(term) + ')\\b';
} else {
pattern = '\\b()(' + escapeRegex(term) + ')\\b';
}
return {
pattern: new RegExp(pattern, 'gi'),
term,
};
}
export function highlightSearchTerms(text, tokens, searchPatterns) {
if (!searchPatterns || searchPatterns.length === 0) {
return text;
}
let output = text;
function replaceSearchTermWithToken(match, prefix, word) {
const index = tokens.size;
const alias = `$MM_SEARCHTERM${index}$`;
tokens.set(alias, {
value: `${word}`,
originalText: word,
});
return prefix + alias;
}
for (const pattern of searchPatterns) {
// highlight existing tokens matching search terms
var newTokens = new Map();
for (const [alias, token] of tokens) {
if (pattern.pattern.test(token.originalText)) {
// If it's a Hashtag, skip it unless the search term is an exact match.
let originalText = token.originalText;
if (originalText.startsWith('#')) {
originalText = originalText.substr(1);
}
let term = pattern.term;
if (term.startsWith('#')) {
term = term.substr(1);
}
if (alias.startsWith('$MM_HASHTAG') && alias.endsWith('$') && originalText.toLowerCase() !== term.toLowerCase()) {
continue;
}
const index = tokens.size + newTokens.size;
const newAlias = `$MM_SEARCHTERM${index}$`;
newTokens.set(newAlias, {
value: `${alias}`,
originalText: token.originalText,
});
output = output.replace(alias, newAlias);
}
// The pattern regexes are global, so calling pattern.pattern.test() above alters their
// state. Reset lastIndex to 0 between calls to test() to ensure it returns the
// same result every time it is called with the same value of token.originalText.
pattern.pattern.lastIndex = 0;
}
// the new tokens are stashed in a separate map since we can't add objects to a map during iteration
for (const newToken of newTokens) {
tokens.set(newToken[0], newToken[1]);
}
output = output.replace(pattern.pattern, replaceSearchTermWithToken);
}
return output;
}
export function replaceTokens(text, tokens) {
let output = text;
// iterate backwards through the map so that we do replacement in the opposite order that we added tokens
const aliases = [...tokens.keys()];
for (let i = aliases.length - 1; i >= 0; i--) {
const alias = aliases[i];
const token = tokens.get(alias);
output = output.replace(alias, token.value);
}
return output;
}
function replaceNewlines(text) {
return text.replace(/\n/g, ' ');
}