Skip to content

Commit

Permalink
SPARK-1798. Tests should clean up temp files
Browse files Browse the repository at this point in the history
Three issues related to temp files that tests generate – these should be touched up for hygiene but are not urgent.

Modules have a log4j.properties which directs the unit-test.log output file to a directory like `[module]/target/unit-test.log`. But this ends up creating `[module]/[module]/target/unit-test.log` instead of former.

The `work/` directory is not deleted by "mvn clean", in the parent and in modules. Neither is the `checkpoint/` directory created under the various external modules.

Many tests create a temp directory, which is not usually deleted. This can be largely resolved by calling `deleteOnExit()` at creation and trying to call `Utils.deleteRecursively` consistently to clean up, sometimes in an `@After` method.

_If anyone seconds the motion, I can create a more significant change that introduces a new test trait along the lines of `LocalSparkContext`, which provides management of temp directories for subclasses to take advantage of._

Author: Sean Owen <[email protected]>

Closes apache#732 from srowen/SPARK-1798 and squashes the following commits:

5af578e [Sean Owen] Try to consistently delete test temp dirs and files, and set deleteOnExit() for each
b21b356 [Sean Owen] Remove work/ and checkpoint/ dirs with mvn clean
bdd0f41 [Sean Owen] Remove duplicate module dir in log4j.properties output path for tests
  • Loading branch information
srowen authored and pwendell committed May 12, 2014
1 parent 1e4a65e commit 7120a29
Show file tree
Hide file tree
Showing 35 changed files with 193 additions and 114 deletions.
2 changes: 1 addition & 1 deletion bagel/src/test/resources/log4j.properties
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
log4j.rootCategory=INFO, file
log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.append=false
log4j.appender.file.file=bagel/target/unit-tests.log
log4j.appender.file.file=target/unit-tests.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n

Expand Down
1 change: 1 addition & 0 deletions core/src/main/scala/org/apache/spark/TestUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ private[spark] object TestUtils {
*/
def createJarWithClasses(classNames: Seq[String], value: String = ""): URL = {
val tempDir = Files.createTempDir()
tempDir.deleteOnExit()
val files = for (name <- classNames) yield createCompiledClass(name, tempDir, value)
val jarFile = new File(tempDir, "testJar-%s.jar".format(System.currentTimeMillis()))
createJar(files, jarFile)
Expand Down
18 changes: 10 additions & 8 deletions core/src/main/scala/org/apache/spark/util/Utils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -586,15 +586,17 @@ private[spark] object Utils extends Logging {
* Don't follow directories if they are symlinks.
*/
def deleteRecursively(file: File) {
if ((file.isDirectory) && !isSymlink(file)) {
for (child <- listFilesSafely(file)) {
deleteRecursively(child)
if (file != null) {
if ((file.isDirectory) && !isSymlink(file)) {
for (child <- listFilesSafely(file)) {
deleteRecursively(child)
}
}
}
if (!file.delete()) {
// Delete can also fail if the file simply did not exist
if (file.exists()) {
throw new IOException("Failed to delete: " + file.getAbsolutePath)
if (!file.delete()) {
// Delete can also fail if the file simply did not exist
if (file.exists()) {
throw new IOException("Failed to delete: " + file.getAbsolutePath)
}
}
}
}
Expand Down
18 changes: 5 additions & 13 deletions core/src/test/java/org/apache/spark/JavaAPISuite.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.spark;

import java.io.*;
import java.lang.StringBuilder;
import java.util.*;

import scala.Tuple2;
Expand Down Expand Up @@ -49,16 +48,20 @@
import org.apache.spark.partial.PartialResult;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.util.StatCounter;
import org.apache.spark.util.Utils;

// The test suite itself is Serializable so that anonymous Function implementations can be
// serialized, as an alternative to converting these anonymous classes to static inner classes;
// see http:https://stackoverflow.com/questions/758570/.
public class JavaAPISuite implements Serializable {
private transient JavaSparkContext sc;
private transient File tempDir;

@Before
public void setUp() {
sc = new JavaSparkContext("local", "JavaAPISuite");
tempDir = Files.createTempDir();
tempDir.deleteOnExit();
}

@After
Expand All @@ -67,6 +70,7 @@ public void tearDown() {
sc = null;
// To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
System.clearProperty("spark.driver.port");
Utils.deleteRecursively(tempDir);
}

static class ReverseIntComparator implements Comparator<Integer>, Serializable {
Expand Down Expand Up @@ -611,7 +615,6 @@ public void glom() {

@Test
public void textFiles() throws IOException {
File tempDir = Files.createTempDir();
String outputDir = new File(tempDir, "output").getAbsolutePath();
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
rdd.saveAsTextFile(outputDir);
Expand All @@ -630,7 +633,6 @@ public void wholeTextFiles() throws IOException {
byte[] content1 = "spark is easy to use.\n".getBytes("utf-8");
byte[] content2 = "spark is also easy to use.\n".getBytes("utf-8");

File tempDir = Files.createTempDir();
String tempDirName = tempDir.getAbsolutePath();
DataOutputStream ds = new DataOutputStream(new FileOutputStream(tempDirName + "/part-00000"));
ds.write(content1);
Expand All @@ -653,7 +655,6 @@ public void wholeTextFiles() throws IOException {

@Test
public void textFilesCompressed() throws IOException {
File tempDir = Files.createTempDir();
String outputDir = new File(tempDir, "output").getAbsolutePath();
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
rdd.saveAsTextFile(outputDir, DefaultCodec.class);
Expand All @@ -667,7 +668,6 @@ public void textFilesCompressed() throws IOException {
@SuppressWarnings("unchecked")
@Test
public void sequenceFile() {
File tempDir = Files.createTempDir();
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<Integer, String>(1, "a"),
Expand Down Expand Up @@ -697,7 +697,6 @@ public Tuple2<Integer, String> call(Tuple2<IntWritable, Text> pair) {
@SuppressWarnings("unchecked")
@Test
public void writeWithNewAPIHadoopFile() {
File tempDir = Files.createTempDir();
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<Integer, String>(1, "a"),
Expand Down Expand Up @@ -728,7 +727,6 @@ public String call(Tuple2<IntWritable, Text> x) {
@SuppressWarnings("unchecked")
@Test
public void readWithNewAPIHadoopFile() throws IOException {
File tempDir = Files.createTempDir();
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<Integer, String>(1, "a"),
Expand Down Expand Up @@ -758,7 +756,6 @@ public String call(Tuple2<IntWritable, Text> x) {

@Test
public void objectFilesOfInts() {
File tempDir = Files.createTempDir();
String outputDir = new File(tempDir, "output").getAbsolutePath();
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
rdd.saveAsObjectFile(outputDir);
Expand All @@ -771,7 +768,6 @@ public void objectFilesOfInts() {
@SuppressWarnings("unchecked")
@Test
public void objectFilesOfComplexTypes() {
File tempDir = Files.createTempDir();
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<Integer, String>(1, "a"),
Expand All @@ -788,7 +784,6 @@ public void objectFilesOfComplexTypes() {
@SuppressWarnings("unchecked")
@Test
public void hadoopFile() {
File tempDir = Files.createTempDir();
String outputDir = new File(tempDir, "output").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<Integer, String>(1, "a"),
Expand Down Expand Up @@ -818,7 +813,6 @@ public String call(Tuple2<IntWritable, Text> x) {
@SuppressWarnings("unchecked")
@Test
public void hadoopFileCompressed() {
File tempDir = Files.createTempDir();
String outputDir = new File(tempDir, "output_compressed").getAbsolutePath();
List<Tuple2<Integer, String>> pairs = Arrays.asList(
new Tuple2<Integer, String>(1, "a"),
Expand Down Expand Up @@ -948,7 +942,6 @@ public String call(Integer t) throws Exception {

@Test
public void checkpointAndComputation() {
File tempDir = Files.createTempDir();
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
sc.setCheckpointDir(tempDir.getAbsolutePath());
Assert.assertEquals(false, rdd.isCheckpointed());
Expand All @@ -960,7 +953,6 @@ public void checkpointAndComputation() {

@Test
public void checkpointAndRestore() {
File tempDir = Files.createTempDir();
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5));
sc.setCheckpointDir(tempDir.getAbsolutePath());
Assert.assertEquals(false, rdd.isCheckpointed());
Expand Down
2 changes: 1 addition & 1 deletion core/src/test/resources/log4j.properties
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
log4j.rootCategory=INFO, file
log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.append=false
log4j.appender.file.file=core/target/unit-tests.log
log4j.appender.file.file=target/unit-tests.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n

Expand Down
5 changes: 2 additions & 3 deletions core/src/test/scala/org/apache/spark/CheckpointSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,15 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging {
override def beforeEach() {
super.beforeEach()
checkpointDir = File.createTempFile("temp", "")
checkpointDir.deleteOnExit()
checkpointDir.delete()
sc = new SparkContext("local", "test")
sc.setCheckpointDir(checkpointDir.toString)
}

override def afterEach() {
super.afterEach()
if (checkpointDir != null) {
checkpointDir.delete()
}
Utils.deleteRecursively(checkpointDir)
}

test("basic checkpointing") {
Expand Down
18 changes: 14 additions & 4 deletions core/src/test/scala/org/apache/spark/FileServerSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ import com.google.common.io.Files
import org.scalatest.FunSuite

import org.apache.spark.SparkContext._
import org.apache.spark.util.Utils

class FileServerSuite extends FunSuite with LocalSparkContext {

@transient var tmpDir: File = _
@transient var tmpFile: File = _
@transient var tmpJarUrl: String = _

Expand All @@ -38,15 +40,18 @@ class FileServerSuite extends FunSuite with LocalSparkContext {

override def beforeAll() {
super.beforeAll()
val tmpDir = new File(Files.createTempDir(), "test")
tmpDir.mkdir()

val textFile = new File(tmpDir, "FileServerSuite.txt")
tmpDir = Files.createTempDir()
tmpDir.deleteOnExit()
val testTempDir = new File(tmpDir, "test")
testTempDir.mkdir()

val textFile = new File(testTempDir, "FileServerSuite.txt")
val pw = new PrintWriter(textFile)
pw.println("100")
pw.close()

val jarFile = new File(tmpDir, "test.jar")
val jarFile = new File(testTempDir, "test.jar")
val jarStream = new FileOutputStream(jarFile)
val jar = new JarOutputStream(jarStream, new java.util.jar.Manifest())
System.setProperty("spark.authenticate", "false")
Expand All @@ -70,6 +75,11 @@ class FileServerSuite extends FunSuite with LocalSparkContext {
tmpJarUrl = jarFile.toURI.toURL.toString
}

override def afterAll() {
super.afterAll()
Utils.deleteRecursively(tmpDir)
}

test("Distributing files locally") {
sc = new SparkContext("local[4]", "test")
sc.addFile(tmpFile.toString)
Expand Down
Loading

0 comments on commit 7120a29

Please sign in to comment.