Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FLINK-16464][sql-client]result-mode tableau may shift when content contains Chinese String in SQL CLI #11334

Merged
merged 5 commits into from
Mar 13, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions flink-table/flink-sql-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@ under the License.
<version>3.9.0</version>
</dependency>

<!-- Tools to Unify display format for different languages -->
<dependency>
<groupId>com.ibm.icu</groupId>
leonardBang marked this conversation as resolved.
Show resolved Hide resolved
<artifactId>icu4j</artifactId>
<version>65.1</version>
</dependency>

<!-- configuration -->
<dependency>
<groupId>org.apache.flink</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;

import static org.apache.flink.table.client.cli.CliUtils.getStringWidth;
import static org.apache.flink.table.client.cli.CliUtils.isFullWidth;
import static org.apache.flink.table.client.cli.CliUtils.rowToString;

/**
Expand All @@ -59,6 +61,7 @@ public class CliTableauResultView implements AutoCloseable {
private static final int NULL_COLUMN_WIDTH = CliStrings.NULL_COLUMN.length();
private static final int MAX_COLUMN_WIDTH = 30;
private static final int DEFAULT_COLUMN_WIDTH = 20;
private static final int COLUMN_TRUNCATED_FLAG_WIDTH = 3;
leonardBang marked this conversation as resolved.
Show resolved Hide resolved
private static final String COLUMN_TRUNCATED_FLAG = "...";
private static final String CHANGEFLAG_COLUMN_NAME = "+/-";

Expand Down Expand Up @@ -275,11 +278,12 @@ private void printSingleRow(int[] colWidths, String[] cols) {
int idx = 0;
for (String col : cols) {
sb.append(" ");
if (col.length() <= colWidths[idx]) {
sb.append(StringUtils.repeat(' ', colWidths[idx] - col.length()));
int colWidth = getStringWidth(col);
leonardBang marked this conversation as resolved.
Show resolved Hide resolved
if (colWidth <= colWidths[idx]) {
sb.append(StringUtils.repeat(' ', colWidths[idx] - colWidth));
sb.append(col);
} else {
sb.append(col, 0, colWidths[idx] - COLUMN_TRUNCATED_FLAG.length());
sb.append(getFixedString(col, colWidths[idx]));
leonardBang marked this conversation as resolved.
Show resolved Hide resolved
sb.append(COLUMN_TRUNCATED_FLAG);
}
sb.append(" |");
Expand Down Expand Up @@ -389,7 +393,7 @@ private int[] columnWidthsByContent(List<TableColumn> columns, List<String[]> ro
// fill column width with real data
for (String[] row : rows) {
for (int i = 0; i < row.length; ++i) {
colWidths[i] = Math.max(colWidths[i], row[i].length());
colWidths[i] = Math.max(colWidths[i], getStringWidth(row[i]));
}
}

Expand All @@ -401,4 +405,27 @@ private int[] columnWidthsByContent(List<TableColumn> columns, List<String[]> ro
return colWidths;
}

private String getFixedString(String col, int colWidth) {
int passedWidth = 0;
int i = 0;
for (; i < col.length(); i++) {
if (isFullWidth(Character.codePointAt(col, i))) {
passedWidth += 2;
} else {
passedWidth += 1;
}
if (passedWidth >= colWidth - COLUMN_TRUNCATED_FLAG_WIDTH) {
break;
}
}
String substring = col.substring(0, i);

// pad with ' ' before the column
int lackedWidth = colWidth - COLUMN_TRUNCATED_FLAG_WIDTH - getStringWidth(substring);
if (lackedWidth > 0){
substring = StringUtils.repeat(' ', lackedWidth) + substring;
}
return substring;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import org.apache.flink.table.utils.EncodingUtils;
import org.apache.flink.types.Row;

import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import org.jline.utils.AttributedString;
import org.jline.utils.AttributedStringBuilder;
import org.jline.utils.AttributedStyle;
Expand Down Expand Up @@ -111,4 +113,33 @@ public static String[] typesToString(DataType[] types) {
}
return typesAsString;
}

public static int getStringWidth(String str) {
leonardBang marked this conversation as resolved.
Show resolved Hide resolved
int numOfFullWidthCh = (int) str.codePoints().filter(codePoint -> isFullWidth(codePoint)).count();
return str.length() + numOfFullWidthCh;
}

/**
* Check codePoint is FullWidth or not according to Unicode Standard version 12.0.0.
* See https://unicode.org/reports/tr11/
*/
public static boolean isFullWidth(int codePoint) {
int value = UCharacter.getIntPropertyValue(codePoint, UProperty.EAST_ASIAN_WIDTH);
switch (value) {
case UCharacter.EastAsianWidth.NEUTRAL:
return false;
case UCharacter.EastAsianWidth.AMBIGUOUS:
return false;
case UCharacter.EastAsianWidth.HALFWIDTH:
return false;
case UCharacter.EastAsianWidth.FULLWIDTH:
return true;
case UCharacter.EastAsianWidth.NARROW:
return false;
case UCharacter.EastAsianWidth.WIDE:
return true;
default:
throw new RuntimeException("unknown UProperty.EAST_ASIAN_WIDTH: " + value);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,26 @@ public void setUp() {
null)
);

data.add(
Row.of(
null,
-1,
-1,
"这是一段中文",
BigDecimal.valueOf(-12345.06789),
Timestamp.valueOf("2020-03-04 18:39:14"))
);

data.add(
Row.of(
null,
-1,
-1,
"これは日本語をテストするための文です",
BigDecimal.valueOf(-12345.06789),
Timestamp.valueOf("2020-03-04 18:39:14"))
);

streamingData = new ArrayList<>();
for (int i = 0; i < data.size(); ++i) {
streamingData.add(new Tuple2<>(i % 2 == 0, data.get(i)));
Expand All @@ -156,19 +176,21 @@ public void testBatchResult() {

view.displayBatchResults();
view.close();

// note: about
leonardBang marked this conversation as resolved.
Show resolved Hide resolved
Assert.assertEquals(
"+---------+-------------+----------------------+----------------------------+----------------+----------------------------+\n" +
"| boolean | int | bigint | varchar | decimal(10, 5) | timestamp |\n" +
"+---------+-------------+----------------------+----------------------------+----------------+----------------------------+\n" +
"| (NULL) | 1 | 2 | abc | 1.23 | 2020-03-01 18:39:14.0 |\n" +
"| false | (NULL) | 0 | | 1 | 2020-03-01 18:39:14.1 |\n" +
"| true | 2147483647 | (NULL) | abcdefg | 1234567890 | 2020-03-01 18:39:14.12 |\n" +
"| false | -2147483648 | 9223372036854775807 | (NULL) | 12345.06789 | 2020-03-01 18:39:14.123 |\n" +
"| true | 100 | -9223372036854775808 | abcdefg111 | (NULL) | 2020-03-01 18:39:14.123456 |\n" +
"| (NULL) | -1 | -1 | abcdefghijklmnopqrstuvwxyz | -12345.06789 | (NULL) |\n" +
"+---------+-------------+----------------------+----------------------------+----------------+----------------------------+\n" +
"6 row in set\n",
"+---------+-------------+----------------------+--------------------------------+----------------+----------------------------+\n" +
"| boolean | int | bigint | varchar | decimal(10, 5) | timestamp |\n" +
"+---------+-------------+----------------------+--------------------------------+----------------+----------------------------+\n" +
"| (NULL) | 1 | 2 | abc | 1.23 | 2020-03-01 18:39:14.0 |\n" +
"| false | (NULL) | 0 | | 1 | 2020-03-01 18:39:14.1 |\n" +
"| true | 2147483647 | (NULL) | abcdefg | 1234567890 | 2020-03-01 18:39:14.12 |\n" +
"| false | -2147483648 | 9223372036854775807 | (NULL) | 12345.06789 | 2020-03-01 18:39:14.123 |\n" +
"| true | 100 | -9223372036854775808 | abcdefg111 | (NULL) | 2020-03-01 18:39:14.123456 |\n" +
"| (NULL) | -1 | -1 | abcdefghijklmnopqrstuvwxyz | -12345.06789 | (NULL) |\n" +
"| (NULL) | -1 | -1 | 这是一段中文 | -12345.06789 | 2020-03-04 18:39:14.0 |\n" +
"| (NULL) | -1 | -1 | これは日本語をテストするた... | -12345.06789 | 2020-03-04 18:39:14.0 |\n" +
"+---------+-------------+----------------------+--------------------------------+----------------+----------------------------+\n" +
"8 row in set\n",
terminalOutput.toString());
verify(mockExecutor, times(0)).cancelQuery(anyString(), anyString());
}
Expand Down Expand Up @@ -266,7 +288,9 @@ public void testStreamingResult() {

view.displayStreamResults();
view.close();

// note: the expected result may look irregular because every CJK(Chinese/Japanese/Korean) character's
// width < 2 in IDE by default, every CJK character usually's width is 2, you can open this source file
// by vim or just cat the file to check the regular result.
Assert.assertEquals(
"+-----+---------+-------------+----------------------+----------------------+----------------+----------------------------+\n" +
"| +/- | boolean | int | bigint | varchar | decimal(10, 5) | timestamp |\n" +
Expand All @@ -276,9 +300,11 @@ public void testStreamingResult() {
"| + | true | 2147483647 | (NULL) | abcdefg | 1234567890 | 2020-03-01 18:39:14.12 |\n" +
"| - | false | -2147483648 | 9223372036854775807 | (NULL) | 12345.06789 | 2020-03-01 18:39:14.123 |\n" +
"| + | true | 100 | -9223372036854775808 | abcdefg111 | (NULL) | 2020-03-01 18:39:14.123456 |\n" +
"| - | (NULL) | -1 | -1 | abcdefghijklmnopq... | -12345.06789 | (NULL) |\n" +
"| - | (NULL) | -1 | -1 | abcdefghijklmnop... | -12345.06789 | (NULL) |\n" +
"| + | (NULL) | -1 | -1 | 这是一段中文 | -12345.06789 | 2020-03-04 18:39:14.0 |\n" +
"| - | (NULL) | -1 | -1 | これは日本語をテ... | -12345.06789 | 2020-03-04 18:39:14.0 |\n" +
"+-----+---------+-------------+----------------------+----------------------+----------------+----------------------------+\n" +
"Received a total of 6 rows\n",
"Received a total of 8 rows\n",
terminalOutput.toString());
verify(mockExecutor, times(0)).cancelQuery(anyString(), anyString());
}
Expand Down Expand Up @@ -335,7 +361,8 @@ public void testCancelStreamingResult() throws InterruptedException, ExecutionEx
"| + | (NULL) | 1 | 2 | abc | 1.23 | 2020-03-01 18:39:14.0 |\n" +
"| - | false | (NULL) | 0 | | 1 | 2020-03-01 18:39:14.1 |\n" +
"| + | true | 2147483647 | (NULL) | abcdefg | 1234567890 | 2020-03-01 18:39:14.12 |\n" +
"Query terminated, received a total of 3 rows\n",
"| - | false | -2147483648 | 9223372036854775807 | (NULL) | 12345.06789 | 2020-03-01 18:39:14.123 |\n" +
"Query terminated, received a total of 4 rows\n",
terminalOutput.toString());

verify(mockExecutor, times(1)).cancelQuery(anyString(), anyString());
Expand Down Expand Up @@ -366,7 +393,8 @@ public void testFailedStreamingResult() {
"+-----+---------+-------------+----------------------+----------------------+----------------+----------------------------+\n" +
"| + | (NULL) | 1 | 2 | abc | 1.23 | 2020-03-01 18:39:14.0 |\n" +
"| - | false | (NULL) | 0 | | 1 | 2020-03-01 18:39:14.1 |\n" +
"| + | true | 2147483647 | (NULL) | abcdefg | 1234567890 | 2020-03-01 18:39:14.12 |\n",
"| + | true | 2147483647 | (NULL) | abcdefg | 1234567890 | 2020-03-01 18:39:14.12 |\n" +
"| - | false | -2147483648 | 9223372036854775807 | (NULL) | 12345.06789 | 2020-03-01 18:39:14.123 |\n",
terminalOutput.toString());
verify(mockExecutor, times(1)).cancelQuery(anyString(), anyString());
}
Expand Down