Skip to content

Commit

Permalink
Merge pull request #7 from TysonAndre/improve-newline-count
Browse files Browse the repository at this point in the history
Properly count newlines in escaped text
  • Loading branch information
TysonAndre committed Mar 29, 2019
2 parents 965a5d9 + 524e289 commit e5a20fd
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 5 deletions.
94 changes: 94 additions & 0 deletions src/StringUtil.php
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,100 @@ static function ($matches) {
);
}

public static function parseWithNewlinePlaceholder($str)
{
$c = $str[0];
if ($c === '<') {
return self::parseHeredocWithNewlinePlaceholder($str);
}
$binary_length = 0;
if ('b' === $c || 'B' === $c) {
$binary_length = 1;
}

if ('\'' === $str[$binary_length]) {
// Can't have escaped newlines
return self::parse($str);
} else {
return self::parseEscapeSequencesWithNewlinePlaceholder(
// @phan-suppress-next-line PhanPossiblyFalseTypeArgument
substr($str, $binary_length + 1, -1),
'"'
);
}
}

/**
* Converts a fragment of raw (possibly indented)
* heredoc to the string that the PHP interpreter would treat it as.
* (but convert escaped newlines to a character that isn't a literal newline)
*/
public static function parseHeredocWithNewlinePlaceholder(string $str) : string
{
// TODO: handle dos newlines
// TODO: Parse escape sequences
$first_line_index = (int)strpos($str, "\n");
$last_line_index = (int)strrpos($str, "\n");
// $last_line = substr($str, $last_line_index + 1);
$spaces = strspn($str, " \t", $last_line_index + 1);

// On Windows, the "\r" must also be removed from the last line of the heredoc
$inner = (string)substr($str, $first_line_index + 1, $last_line_index - ($first_line_index + 1) - ($str[$last_line_index - 1] === "\r" ? 1 : 0));

if ($spaces > 0) {
$inner = preg_replace("/^" . substr($str, $last_line_index + 1, $spaces) . "/m", '', $inner);
}
if (strpos(substr($str, 0, $first_line_index), "'") === false) {
// If the start of the here/nowdoc doesn't contain a "'", it's heredoc.
// The contents have to be unescaped.
return self::parseEscapeSequencesWithNewlinePlaceholder($inner, null);
}
return $inner;
}

/**
* Parses escape sequences in strings (all string types apart from single quoted),
* but replaces escaped newlines with a different single-byte character, to use for line counting.
*
* @param string $str
* @param ?string $quote
*/
private static function parseEscapeSequencesWithNewlinePlaceholder($str, $quote)
{
if (!is_string($str)) {
// Invalid AST input; give up
return '';
}
if (null !== $quote) {
$str = str_replace('\\' . $quote, $quote, $str);
}

return \preg_replace_callback(
'~\\\\([\\\\$nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~',
/**
* @param array<int,string> $matches
* @return string
*/
static function ($matches) {
$str = $matches[1];

if (isset(self::REPLACEMENTS[$str])) {
$result = self::REPLACEMENTS[$str];
} elseif ('x' === $str[0] || 'X' === $str[0]) {
// @phan-suppress-next-line PhanPartialTypeMismatchArgumentInternal
$result = chr(hexdec($str));
} elseif ('u' === $str[0]) {
// @phan-suppress-next-line PhanPartialTypeMismatchArgument
$result = self::codePointToUtf8(hexdec($matches[2]));
} else {
$result = chr(octdec($str));
}
return $result !== "\n" ? $result : " ";
},
$str
);
}

/**
* Converts a Unicode code point to its UTF-8 encoded representation.
*
Expand Down
11 changes: 10 additions & 1 deletion src/TypoCheckUtils.php
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,17 @@ public static function getTyposForText(string $contents) : array
if ($suggestions === null) {
continue;
}
if (!isset($line_counting_text)) {
if (\in_array($token[0], [\T_CONSTANT_ENCAPSED_STRING, \T_ENCAPSED_AND_WHITESPACE])) {
// Parse this, but replace `"\n"`, `"\x0a"`, etc. with a single byte character that isn't a literal newline.
$line_counting_text = StringUtil::parseWithNewlinePlaceholder($token[1]);
} else {
// There are no escape sequences
$line_counting_text = $text;
}
}
// Edge case in php 7.0: warns if length is 0
$lineno = (int)($token[2]) + ($offset > 0 ? substr_count($text, "\n", 0, $offset) : 0);
$lineno = (int)($token[2]) + ($offset > 0 ? substr_count($line_counting_text, "\n", 0, $offset) : 0);
$results[] = new TypoDetails($word, $token, $lineno, $suggestions);
}
};
Expand Down
10 changes: 7 additions & 3 deletions tests/expected/all_output.expected
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@ src/ignored.php:6 PhanPluginPossibleTypoStringLiteral Saw an invalid word "optin
src/invalid_constant_suggestion.php:6 PhanPluginPossibleTypoStringLiteral Saw an invalid word "DONT" (Did you mean "DON'T"?)
src/invalid_constant_suggestion.php:8 PhanPluginPossibleTypoStringLiteral Saw an invalid word "Reenabled" (Did you mean "Re-enabled"?)
src/multiline_typo.php:3 PhanPluginPossibleTypoGettext Call to gettext() was passed an invalid word "invlaid" in "\n\n\ninvlaid text" (Did you mean "invalid"?)
src/multiline_typo.php:6 PhanPluginPossibleTypoStringLiteral Saw an invalid word "invlaid" (Did you mean "invalid"?)
src/multiline_typo.php:11 PhanPluginPossibleTypoGettext Call to gettext() was passed an invalid word "wasn" in "\n\n'wasn' is a typo" (Did you mean "wasn't" or "was"?)
src/multiline_typo.php:13 PhanPluginPossibleTypoStringLiteral Saw an invalid word "wasn" (Did you mean "wasn't" or "was"?)
src/multiline_typo.php:3 PhanPluginPossibleTypoStringLiteral Saw an invalid word "invlaid" (Did you mean "invalid"?)
src/multiline_typo.php:5 PhanPluginPossibleTypoGettext Call to gettext() was passed an invalid word "INVLAID" in "\n\n\nINVLAID text\n Invlaid" (Did you mean "INVALID"?)
src/multiline_typo.php:5 PhanPluginPossibleTypoGettext Call to gettext() was passed an invalid word "Invlaid" in "\n\n\nINVLAID text\n Invlaid" (Did you mean "Invalid"?)
src/multiline_typo.php:5 PhanPluginPossibleTypoStringLiteral Saw an invalid word "INVLAID" (Did you mean "INVALID"?)
src/multiline_typo.php:6 PhanPluginPossibleTypoStringLiteral Saw an invalid word "Invlaid" (Did you mean "Invalid"?)
src/multiline_typo.php:13 PhanPluginPossibleTypoGettext Call to gettext() was passed an invalid word "wasn" in "\n\n'wasn' is a typo" (Did you mean "wasn't" or "was"?)
src/multiline_typo.php:14 PhanPluginPossibleTypoStringLiteral Saw an invalid word "wasn" (Did you mean "wasn't" or "was"?)
src/string_check.php:2 PhanPluginPossibleTypoVariable Saw an invalid word "Inalid" (Did you mean "Invalid"?)
src/string_check.php:3 PhanPluginPossibleTypoVariable Saw an invalid word "Inalid" (Did you mean "Invalid"?)
src/string_check.php:4 PhanPluginPossibleTypoVariable Saw an invalid word "INALID" (Did you mean "INVALID"?)
Expand Down
4 changes: 3 additions & 1 deletion tests/src/multiline_typo.php
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
<?php
// TODO: Do a better job inferring the line numbers here
// PhanTypoCheck does not count newlines when they're escaped
echo gettext("\n\n\ninvlaid text");

echo gettext("\n\x0a\12INVLAID text
Invlaid");


echo gettext("
Expand Down

0 comments on commit e5a20fd

Please sign in to comment.