SPARK-1798. Tests should clean up temp files

Three issues related to temp files that tests generate – these should be touched up for hygiene but are not urgent. Modules have a log4j.properties which directs the unit-test.log output file to a directory like `[module]/target/unit-test.log`. But this ends up creating `[module]/[module]/target/unit-test.log` instead of former. The `work/` directory is not deleted by "mvn clean", in the parent and in modules. Neither is the `checkpoint/` directory created under the various external modules. Many tests create a temp directory, which is not usually deleted. This can be largely resolved by calling `deleteOnExit()` at creation and trying to call `Utils.deleteRecursively` consistently to clean up, sometimes in an `@After` method. _If anyone seconds the motion, I can create a more significant change that introduces a new test trait along the lines of `LocalSparkContext`, which provides management of temp directories for subclasses to take advantage of._ Author: Sean Owen <[email protected]> Closes apache#732 from srowen/SPARK-1798 and squashes the following commits: 5af578e [Sean Owen] Try to consistently delete test temp dirs and files, and set deleteOnExit() for each b21b356 [Sean Owen] Remove work/ and checkpoint/ dirs with mvn clean bdd0f41 [Sean Owen] Remove duplicate module dir in log4j.properties output path for tests
ruiwchina · May 12, 2014 · 7120a29 · 7120a29
1 parent 1e4a65e
commit 7120a29
Show file tree

Hide file tree

Showing 35 changed files with 193 additions and 114 deletions.
diff --git a/bagel/src/test/resources/log4j.properties b/bagel/src/test/resources/log4j.properties
@@ -19,7 +19,7 @@
 log4j.rootCategory=INFO, file
 log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
-log4j.appender.file.file=bagel/target/unit-tests.log
+log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
 

diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -43,6 +43,7 @@ private[spark] object TestUtils {
  */
  def createJarWithClasses(classNames: Seq[String], value: String = ""): URL = {
  val tempDir = Files.createTempDir()
+ tempDir.deleteOnExit()
  val files = for (name <- classNames) yield createCompiledClass(name, tempDir, value)
  val jarFile = new File(tempDir, "testJar-%s.jar".format(System.currentTimeMillis()))
  createJar(files, jarFile)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -586,15 +586,17 @@ private[spark] object Utils extends Logging {
  * Don't follow directories if they are symlinks.
  */
  def deleteRecursively(file: File) {
- if ((file.isDirectory) && !isSymlink(file)) {
- for (child <- listFilesSafely(file)) {
- deleteRecursively(child)
+ if (file != null) {
+ if ((file.isDirectory) && !isSymlink(file)) {
+ for (child <- listFilesSafely(file)) {
+ deleteRecursively(child)
+ }
  }
- }
- if (!file.delete()) {
- // Delete can also fail if the file simply did not exist
- if (file.exists()) {
- throw new IOException("Failed to delete: " + file.getAbsolutePath)
+  if (!file.delete()) {
+  // Delete can also fail if the file simply did not exist
+  if (file.exists()) {
+  throw new IOException("Failed to delete: " + file.getAbsolutePath)
+ }
  }
  }
  }

diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java
@@ -18,7 +18,6 @@
 package org.apache.spark;
 
 import java.io.*;
-import java.lang.StringBuilder;
 import java.util.*;
 
 import scala.Tuple2;
@@ -49,16 +48,20 @@
 import org.apache.spark.partial.PartialResult;
 import org.apache.spark.storage.StorageLevel;
 import org.apache.spark.util.StatCounter;
+import org.apache.spark.util.Utils;
 
 // The test suite itself is Serializable so that anonymous Function implementations can be
 // serialized, as an alternative to converting these anonymous classes to static inner classes;
 // see http:https://stackoverflow.com/questions/758570/.
 public class JavaAPISuite implements Serializable {
  private transient JavaSparkContext sc;
+ private transient File tempDir;
 
  @Before
  public void setUp() {
  sc = new JavaSparkContext("local", "JavaAPISuite");
+ tempDir = Files.createTempDir();
+ tempDir.deleteOnExit();
  }
 
  @After
@@ -67,6 +70,7 @@ public void tearDown() {
  sc = null;
  // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
  System.clearProperty("spark.driver.port");
+ Utils.deleteRecursively(tempDir);
  }
 
  static class ReverseIntComparator implements Comparator<Integer>, Serializable {
@@ -611,7 +615,6 @@ public void glom() {
 
  @Test
  public void textFiles() throws IOException {
- File tempDir = Files.createTempDir();
  String outputDir = new File(tempDir, "output").getAbsolutePath();
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
  rdd.saveAsTextFile(outputDir);
@@ -630,7 +633,6 @@ public void wholeTextFiles() throws IOException {
  byte[] content1 = "spark is easy to use.\n".getBytes("utf-8");
  byte[] content2 = "spark is also easy to use.\n".getBytes("utf-8");
 
- File tempDir = Files.createTempDir();
  String tempDirName = tempDir.getAbsolutePath();
  DataOutputStream ds = new DataOutputStream(new FileOutputStream(tempDirName + "/part-00000"));
  ds.write(content1);
@@ -653,7 +655,6 @@ public void wholeTextFiles() throws IOException {
 
  @Test
  public void textFilesCompressed() throws IOException {
- File tempDir = Files.createTempDir();
  String outputDir = new File(tempDir, "output").getAbsolutePath();
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
  rdd.saveAsTextFile(outputDir, DefaultCodec.class);
@@ -667,7 +668,6 @@ public void textFilesCompressed() throws IOException {
  @SuppressWarnings("unchecked")
  @Test
  public void sequenceFile() {
- File tempDir = Files.createTempDir();
  String outputDir = new File(tempDir, "output").getAbsolutePath();
  List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<Integer, String>(1, "a"),
@@ -697,7 +697,6 @@ public Tuple2<Integer, String> call(Tuple2<IntWritable, Text> pair) {
  @SuppressWarnings("unchecked")
  @Test
  public void writeWithNewAPIHadoopFile() {
- File tempDir = Files.createTempDir();
  String outputDir = new File(tempDir, "output").getAbsolutePath();
  List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<Integer, String>(1, "a"),
@@ -728,7 +727,6 @@ public String call(Tuple2<IntWritable, Text> x) {
  @SuppressWarnings("unchecked")
  @Test
  public void readWithNewAPIHadoopFile() throws IOException {
- File tempDir = Files.createTempDir();
  String outputDir = new File(tempDir, "output").getAbsolutePath();
  List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<Integer, String>(1, "a"),
@@ -758,7 +756,6 @@ public String call(Tuple2<IntWritable, Text> x) {
 
  @Test
  public void objectFilesOfInts() {
- File tempDir = Files.createTempDir();
  String outputDir = new File(tempDir, "output").getAbsolutePath();
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
  rdd.saveAsObjectFile(outputDir);
@@ -771,7 +768,6 @@ public void objectFilesOfInts() {
  @SuppressWarnings("unchecked")
  @Test
  public void objectFilesOfComplexTypes() {
- File tempDir = Files.createTempDir();
  String outputDir = new File(tempDir, "output").getAbsolutePath();
  List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<Integer, String>(1, "a"),
@@ -788,7 +784,6 @@ public void objectFilesOfComplexTypes() {
  @SuppressWarnings("unchecked")
  @Test
  public void hadoopFile() {
- File tempDir = Files.createTempDir();
  String outputDir = new File(tempDir, "output").getAbsolutePath();
  List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<Integer, String>(1, "a"),
@@ -818,7 +813,6 @@ public String call(Tuple2<IntWritable, Text> x) {
  @SuppressWarnings("unchecked")
  @Test
  public void hadoopFileCompressed() {
- File tempDir = Files.createTempDir();
  String outputDir = new File(tempDir, "output_compressed").getAbsolutePath();
  List<Tuple2<Integer, String>> pairs = Arrays.asList(
  new Tuple2<Integer, String>(1, "a"),
@@ -948,7 +942,6 @@ public String call(Integer t) throws Exception {
 
  @Test
  public void checkpointAndComputation() {
- File tempDir = Files.createTempDir();
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
  sc.setCheckpointDir(tempDir.getAbsolutePath());
  Assert.assertEquals(false, rdd.isCheckpointed());
@@ -960,7 +953,6 @@ public void checkpointAndComputation() {
 
  @Test
  public void checkpointAndRestore() {
- File tempDir = Files.createTempDir();
  JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
  sc.setCheckpointDir(tempDir.getAbsolutePath());
  Assert.assertEquals(false, rdd.isCheckpointed());

diff --git a/core/src/test/resources/log4j.properties b/core/src/test/resources/log4j.properties
@@ -19,7 +19,7 @@
 log4j.rootCategory=INFO, file
 log4j.appender.file=org.apache.log4j.FileAppender
 log4j.appender.file.append=false
-log4j.appender.file.file=core/target/unit-tests.log
+log4j.appender.file.file=target/unit-tests.log
 log4j.appender.file.layout=org.apache.log4j.PatternLayout
 log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
 

diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -35,16 +35,15 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
  override def beforeEach() {
  super.beforeEach()
  checkpointDir = File.createTempFile("temp", "")
+ checkpointDir.deleteOnExit()
  checkpointDir.delete()
  sc = new SparkContext("local", "test")
  sc.setCheckpointDir(checkpointDir.toString)
  }
 
  override def afterEach() {
  super.afterEach()
- if (checkpointDir != null) {
- checkpointDir.delete()
- }
+ Utils.deleteRecursively(checkpointDir)
  }
 
  test("basic checkpointing") {

diff --git a/core/src/test/scala/org/apache/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala
@@ -24,9 +24,11 @@ import com.google.common.io.Files
 import org.scalatest.FunSuite
 
 import org.apache.spark.SparkContext._
+import org.apache.spark.util.Utils
 
 class FileServerSuite extends FunSuite with LocalSparkContext {
 
+ @transient var tmpDir: File = _
  @transient var tmpFile: File = _
  @transient var tmpJarUrl: String = _
 
@@ -38,15 +40,18 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
 
  override def beforeAll() {
  super.beforeAll()
- val tmpDir = new File(Files.createTempDir(), "test")
- tmpDir.mkdir()
 
- val textFile = new File(tmpDir, "FileServerSuite.txt")
+ tmpDir = Files.createTempDir()
+ tmpDir.deleteOnExit()
+ val testTempDir = new File(tmpDir, "test")
+ testTempDir.mkdir()
+
+ val textFile = new File(testTempDir, "FileServerSuite.txt")
  val pw = new PrintWriter(textFile)
  pw.println("100")
  pw.close()
 
- val jarFile = new File(tmpDir, "test.jar")
+ val jarFile = new File(testTempDir, "test.jar")
  val jarStream = new FileOutputStream(jarFile)
  val jar = new JarOutputStream(jarStream, new java.util.jar.Manifest())
  System.setProperty("spark.authenticate", "false")
@@ -70,6 +75,11 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
  tmpJarUrl = jarFile.toURI.toURL.toString
  }
 
+ override def afterAll() {
+ super.afterAll()
+ Utils.deleteRecursively(tmpDir)
+ }
+
  test("Distributing files locally") {
  sc = new SparkContext("local[4]", "test")
  sc.addFile(tmpFile.toString)