Skip to content

Commit

Permalink
[FLINK-22556][ci] Extend JarFileChecker to search for traces of incom…
Browse files Browse the repository at this point in the history
…patible licenses
  • Loading branch information
zentol committed May 11, 2021
1 parent 0fcf44e commit 214f936
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,17 @@

import java.io.IOException;
import java.net.URI;
import java.nio.charset.MalformedInputException;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

Expand Down Expand Up @@ -78,6 +82,8 @@ static int checkJar(Path file) throws Exception {

numSevereIssues +=
getNumLicenseFilesOutsideMetaInfDirectory(file, fileSystem.getPath("/"));

numSevereIssues += getFilesWithIncompatibleLicenses(file, fileSystem.getPath("/"));
}
return numSevereIssues;
}
Expand Down Expand Up @@ -105,8 +111,7 @@ private static boolean noticeFileExistsAndIsValid(Path noticeFile, Path jar)
return false;
}

final String noticeFileContents =
new String(Files.readAllBytes(noticeFile), StandardCharsets.UTF_8);
final String noticeFileContents = readFile(noticeFile);
if (!noticeFileContents.toLowerCase().contains("flink")
|| !noticeFileContents.contains("The Apache Software Foundation")) {
LOG.error("The notice file in {} does not contain the expected entries.", jar);
Expand All @@ -123,8 +128,7 @@ private static boolean licenseFileExistsAndIsValid(Path licenseFile, Path jar)
return false;
}

final String licenseFileContents =
new String(Files.readAllBytes(licenseFile), StandardCharsets.UTF_8);
final String licenseFileContents = readFile(licenseFile);
if (!licenseFileContents.contains("Apache License")
|| !licenseFileContents.contains("Version 2.0, January 2004")) {
LOG.error("The license file in {} does not contain the expected entries.", jar);
Expand All @@ -134,6 +138,117 @@ private static boolean licenseFileExistsAndIsValid(Path licenseFile, Path jar)
return true;
}

private static int getFilesWithIncompatibleLicenses(Path jar, Path jarRoot) throws IOException {
// patterns are based on https://www.apache.org/legal/resolved.html#category-x
return findNonBinaryFilesContainingText(
jar,
jarRoot,
asPatterns(
"Binary Code License",
"Intel Simplified Software License",
"JSR 275",
"Microsoft Limited Public License",
"Amazon Software License",
// Java SDK for Satori RTM license
"as necessary for your use of Satori services",
"REDIS SOURCE AVAILABLE LICENSE",
"Booz Allen Public License",
"Confluent Community License Agreement Version 1.0",
// “Commons Clause” License Condition v1.0
"the License does not grant to you, the right to Sell the Software.",
"Sun Community Source License Version 3.0",
"GNU General Public License",
"GNU Affero General Public License",
"GNU Lesser General Public License",
"Q Public License",
"Sleepycat License",
"Server Side Public License",
"Code Project Open License",
// BSD 4-Clause
" All advertising materials mentioning features or use of this software must display the following acknowledgement",
// Facebook Patent clause v1
"The license granted hereunder will terminate, automatically and without notice, for anyone that makes any claim",
// Facebook Patent clause v2
"The license granted hereunder will terminate, automatically and without notice, if you (or any of your subsidiaries, corporate affiliates or agents) initiate directly or indirectly, or take a direct financial interest in, any Patent Assertion: (i) against Facebook",
"Netscape Public License",
"SOLIPSISTIC ECLIPSE PUBLIC LICENSE",
// DON'T BE A DICK PUBLIC LICENSE
"Do whatever you like with the original work, just don't be a dick.",
// JSON License
"The Software shall be used for Good, not Evil.",
// can sometimes be found in "funny" licenses
"Don’t be evil"));
}

private static Collection<Pattern> asPatterns(String... texts) {
return Stream.of(texts)
.map(JarFileChecker::asPatternWithPotentialLineBreaks)
.collect(Collectors.toList());
}

private static Pattern asPatternWithPotentialLineBreaks(String text) {
// allows word sequences to be separated by whitespace, line-breaks and comments(//, #)
return Pattern.compile(text.toLowerCase(Locale.ROOT).replaceAll(" ", " ?\\\\R?[\\\\s/#]*"));
}

private static int findNonBinaryFilesContainingText(
Path jar, Path jarRoot, Collection<Pattern> forbidden) throws IOException {
try (Stream<Path> files = Files.walk(jarRoot)) {
return files.filter(path -> !path.equals(jarRoot))
.filter(path -> !Files.isDirectory(path))
.filter(JarFileChecker::isNoClassFile)
// frequent false-positives due to dual-licensing; generated by maven
.filter(path -> !getFileName(path).equals("dependencies"))
// false-positives due to dual-licensing; use startsWith to cover .txt/.md files
.filter(path -> !getFileName(path).startsWith("license"))
// false-positives due to optional components; startsWith covers .txt/.md files
.filter(path -> !getFileName(path).startsWith("notice"))
// dual-licensed under GPL 2 and CDDL 1.1
// contained in hadoop/presto S3 FS and flink-dist
.filter(path -> !pathStartsWith(path, "/META-INF/versions/11/javax/xml/bind"))
.filter(path -> !isJavaxManifest(jar, path))
// dual-licensed under GPL 2 and EPL 2.0
// contained in sql-avro-confluent-registry
.filter(path -> !pathStartsWith(path, "/org/glassfish/jersey/internal"))
.map(
path -> {
try {
final String fileContents;
try {
fileContents = readFile(path).toLowerCase(Locale.ROOT);
} catch (MalformedInputException mie) {
// binary file
return 0;
}

int violations = 0;
for (Pattern text : forbidden) {
if (text.matcher(fileContents).find()) {
// do not count individual violations because it can be
// confusing when checking with aliases for the same
// license
violations = 1;
LOG.error(
"File '{}' in jar '{}' contains match with forbidden regex '{}'.",
path,
jar,
text);
}
}
return violations;
} catch (IOException e) {
throw new RuntimeException(
String.format(
"Could not read contents of file '%s' in jar '%s'.",
path, jar),
e);
}
})
.reduce(Integer::sum)
.orElse(0);
}
}

private static int getNumLicenseFilesOutsideMetaInfDirectory(Path jar, Path jarRoot)
throws IOException {
try (Stream<Path> files = Files.walk(jarRoot)) {
Expand All @@ -152,10 +267,7 @@ private static int getNumLicenseFilesOutsideMetaInfDirectory(Path jar, Path jarR
path ->
!Files.isDirectory(
path)) // ignore directories, e.g. "license/"
.filter(
path ->
!getFileName(path)
.endsWith(".class")) // some class files contain
.filter(JarFileChecker::isNoClassFile) // some class files contain
// LICENSE in their name
.filter(
path ->
Expand Down Expand Up @@ -188,4 +300,32 @@ private static int getNumLicenseFilesOutsideMetaInfDirectory(Path jar, Path jarR
private static String getFileName(Path path) {
return path.getFileName().toString().toLowerCase();
}

private static boolean pathStartsWith(Path file, String path) {
return file.startsWith(file.getFileSystem().getPath(path));
}

private static boolean equals(Path file, String path) {
return file.equals(file.getFileSystem().getPath(path));
}

private static boolean isNoClassFile(Path file) {
return !getFileName(file).endsWith(".class");
}

private static boolean isJavaxManifest(Path jar, Path potentialManifestFile) {
try {
return equals(potentialManifestFile, "/META-INF/versions/11/META-INF/MANIFEST.MF")
&& readFile(potentialManifestFile).contains("Specification-Title: jaxb-api");
} catch (IOException e) {
throw new RuntimeException(
String.format(
"Error while reading file %s from jar %s.", potentialManifestFile, jar),
e);
}
}

private static String readFile(Path file) throws IOException {
return new String(Files.readAllBytes(file), StandardCharsets.UTF_8);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -200,6 +201,33 @@ public void testIgnoreWebThirdPartyLicenses() throws Exception {
is(0));
}

@Test
public void testForbiddenLGPLongTextDetected() throws Exception {
assertThat(
JarFileChecker.checkJar(
createJar(
Entry.fileEntry(VALID_NOTICE_CONTENTS, VALID_NOTICE_PATH),
Entry.fileEntry(VALID_LICENSE_CONTENTS, VALID_LICENSE_PATH),
Entry.fileEntry(
"some GNU Lesser General public License text",
Collections.singletonList("some_file.txt")))),
is(1));
}

@Test
public void testForbiddenLGPMultiLineLongTextWithCommentAndLeadingWhitespaceDetected()
throws Exception {
assertThat(
JarFileChecker.checkJar(
createJar(
Entry.fileEntry(VALID_NOTICE_CONTENTS, VALID_NOTICE_PATH),
Entry.fileEntry(VALID_LICENSE_CONTENTS, VALID_LICENSE_PATH),
Entry.fileEntry(
"some GNU Lesser General public \n\t\t//#License text",
Collections.singletonList("some_file.txt")))),
is(1));
}

private static class Entry {
final String contents;
final List<String> path;
Expand Down

0 comments on commit 214f936

Please sign in to comment.