forked from apache/flink
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[FLINK-5254] [yarn] Implement YARN High-Availability Services
- Loading branch information
1 parent
e2922ad
commit 2a7dbda
Showing
33 changed files
with
2,537 additions
and
78 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
121 changes: 121 additions & 0 deletions
121
...-fs-tests/src/test/java/org/apache/flink/hdfstests/FsNegativeRunningJobsRegistryTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http:https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.flink.hdfstests; | ||
|
||
import org.apache.flink.api.common.JobID; | ||
import org.apache.flink.core.fs.Path; | ||
import org.apache.flink.runtime.highavailability.FsNegativeRunningJobsRegistry; | ||
|
||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.hdfs.MiniDFSCluster; | ||
|
||
import org.junit.AfterClass; | ||
import org.junit.BeforeClass; | ||
import org.junit.ClassRule; | ||
import org.junit.Test; | ||
import org.junit.rules.TemporaryFolder; | ||
|
||
import java.io.File; | ||
|
||
import static org.junit.Assert.assertFalse; | ||
import static org.junit.Assert.assertTrue; | ||
|
||
public class FsNegativeRunningJobsRegistryTest { | ||
|
||
@ClassRule | ||
public static final TemporaryFolder TEMP_DIR = new TemporaryFolder(); | ||
|
||
private static MiniDFSCluster HDFS_CLUSTER; | ||
|
||
private static Path HDFS_ROOT_PATH; | ||
|
||
// ------------------------------------------------------------------------ | ||
// startup / shutdown | ||
// ------------------------------------------------------------------------ | ||
|
||
@BeforeClass | ||
public static void createHDFS() throws Exception { | ||
final File tempDir = TEMP_DIR.newFolder(); | ||
|
||
Configuration hdConf = new Configuration(); | ||
hdConf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, tempDir.getAbsolutePath()); | ||
|
||
MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(hdConf); | ||
HDFS_CLUSTER = builder.build(); | ||
|
||
HDFS_ROOT_PATH = new Path("hdfs:https://" + HDFS_CLUSTER.getURI().getHost() + ":" | ||
+ HDFS_CLUSTER.getNameNodePort() + "/"); | ||
} | ||
|
||
@AfterClass | ||
public static void destroyHDFS() { | ||
if (HDFS_CLUSTER != null) { | ||
HDFS_CLUSTER.shutdown(); | ||
} | ||
HDFS_CLUSTER = null; | ||
HDFS_ROOT_PATH = null; | ||
} | ||
|
||
// ------------------------------------------------------------------------ | ||
// Tests | ||
// ------------------------------------------------------------------------ | ||
|
||
@Test | ||
public void testCreateAndSetFinished() throws Exception { | ||
final Path workDir = new Path(HDFS_ROOT_PATH, "test-work-dir"); | ||
final JobID jid = new JobID(); | ||
|
||
FsNegativeRunningJobsRegistry registry = new FsNegativeRunningJobsRegistry(workDir); | ||
|
||
// initially, without any call, the job is considered running | ||
assertTrue(registry.isJobRunning(jid)); | ||
|
||
// repeated setting should not affect the status | ||
registry.setJobRunning(jid); | ||
assertTrue(registry.isJobRunning(jid)); | ||
|
||
// set the job to finished and validate | ||
registry.setJobFinished(jid); | ||
assertFalse(registry.isJobRunning(jid)); | ||
|
||
// another registry should pick this up | ||
FsNegativeRunningJobsRegistry otherRegistry = new FsNegativeRunningJobsRegistry(workDir); | ||
assertFalse(otherRegistry.isJobRunning(jid)); | ||
} | ||
|
||
@Test | ||
public void testSetFinishedAndRunning() throws Exception { | ||
final Path workDir = new Path(HDFS_ROOT_PATH, "änother_wörk_directörü"); | ||
final JobID jid = new JobID(); | ||
|
||
FsNegativeRunningJobsRegistry registry = new FsNegativeRunningJobsRegistry(workDir); | ||
|
||
// set the job to finished and validate | ||
registry.setJobFinished(jid); | ||
assertFalse(registry.isJobRunning(jid)); | ||
|
||
// set the job to back to running and validate | ||
registry.setJobRunning(jid); | ||
assertTrue(registry.isJobRunning(jid)); | ||
|
||
// another registry should pick this up | ||
FsNegativeRunningJobsRegistry otherRegistry = new FsNegativeRunningJobsRegistry(workDir); | ||
assertTrue(otherRegistry.isJobRunning(jid)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
153 changes: 153 additions & 0 deletions
153
...rc/main/java/org/apache/flink/runtime/highavailability/FsNegativeRunningJobsRegistry.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http:https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.flink.runtime.highavailability; | ||
|
||
import org.apache.flink.api.common.JobID; | ||
import org.apache.flink.core.fs.FSDataOutputStream; | ||
import org.apache.flink.core.fs.FileSystem; | ||
import org.apache.flink.core.fs.Path; | ||
|
||
import java.io.FileNotFoundException; | ||
import java.io.IOException; | ||
|
||
import static org.apache.flink.util.Preconditions.checkNotNull; | ||
|
||
/** | ||
* This {@link RunningJobsRegistry} tracks the status jobs via marker files, | ||
* marking finished jobs via marker files. | ||
* | ||
* <p>The general contract is the following: | ||
* <ul> | ||
* <li>Initially, a marker file does not exist (no one created it, yet), which means | ||
* the specific job is assumed to be running</li> | ||
* <li>The JobManager that finishes calls this service to create the marker file, | ||
* which marks the job as finished.</li> | ||
* <li>If a JobManager gains leadership at some point when shutdown is in progress, | ||
* it will see the marker file and realize that the job is finished.</li> | ||
* <li>The application framework is expected to clean the file once the application | ||
* is completely shut down. At that point, no JobManager will attempt to | ||
* start the job, even if it gains leadership.</li> | ||
* </ul> | ||
* | ||
* <p>It is especially tailored towards deployment modes like for example | ||
* YARN, where HDFS is available as a persistent file system, and the YARN | ||
* application's working directories on HDFS are automatically cleaned | ||
* up after the application completed. | ||
*/ | ||
public class FsNegativeRunningJobsRegistry implements RunningJobsRegistry { | ||
|
||
private static final String PREFIX = ".job_complete_"; | ||
|
||
private final FileSystem fileSystem; | ||
|
||
private final Path basePath; | ||
|
||
/** | ||
* Creates a new registry that writes to the FileSystem and working directory | ||
* denoted by the given path. | ||
* | ||
* <p>The initialization will attempt to write to the given working directory, in | ||
* order to catch setup/configuration errors early. | ||
* | ||
* @param workingDirectory The working directory for files to track the job status. | ||
* | ||
* @throws IOException Thrown, if the specified directory cannot be accessed. | ||
*/ | ||
public FsNegativeRunningJobsRegistry(Path workingDirectory) throws IOException { | ||
this(workingDirectory.getFileSystem(), workingDirectory); | ||
} | ||
|
||
/** | ||
* Creates a new registry that writes its files to the given FileSystem at | ||
* the given working directory path. | ||
* | ||
* <p>The initialization will attempt to write to the given working directory, in | ||
* order to catch setup/configuration errors early. | ||
* | ||
* @param fileSystem The FileSystem to use for the marker files. | ||
* @param workingDirectory The working directory for files to track the job status. | ||
* | ||
* @throws IOException Thrown, if the specified directory cannot be accessed. | ||
*/ | ||
public FsNegativeRunningJobsRegistry(FileSystem fileSystem, Path workingDirectory) throws IOException { | ||
this.fileSystem = checkNotNull(fileSystem, "fileSystem"); | ||
this.basePath = checkNotNull(workingDirectory, "workingDirectory"); | ||
|
||
// to be safe, attempt to write to the working directory, to | ||
// catch problems early | ||
final Path testFile = new Path(workingDirectory, ".registry_test"); | ||
try (FSDataOutputStream out = fileSystem.create(testFile, false)) { | ||
out.write(42); | ||
} | ||
catch (IOException e) { | ||
throw new IOException("Unable to write to working directory: " + workingDirectory, e); | ||
} | ||
finally { | ||
fileSystem.delete(testFile, false); | ||
} | ||
} | ||
|
||
// ------------------------------------------------------------------------ | ||
|
||
@Override | ||
public void setJobRunning(JobID jobID) throws IOException { | ||
checkNotNull(jobID, "jobID"); | ||
final Path filePath = createMarkerFilePath(jobID); | ||
|
||
// delete the marker file, if it exists | ||
try { | ||
fileSystem.delete(filePath, false); | ||
} | ||
catch (FileNotFoundException e) { | ||
// apparently job was already considered running | ||
} | ||
} | ||
|
||
@Override | ||
public void setJobFinished(JobID jobID) throws IOException { | ||
checkNotNull(jobID, "jobID"); | ||
final Path filePath = createMarkerFilePath(jobID); | ||
|
||
// create the file | ||
// to avoid an exception if the job already exists, set overwrite=true | ||
try (FSDataOutputStream out = fileSystem.create(filePath, true)) { | ||
out.write(42); | ||
} | ||
} | ||
|
||
@Override | ||
public boolean isJobRunning(JobID jobID) throws IOException { | ||
checkNotNull(jobID, "jobID"); | ||
|
||
// check for the existence of the file | ||
try { | ||
fileSystem.getFileStatus(createMarkerFilePath(jobID)); | ||
// file was found --> job is terminated | ||
return false; | ||
} | ||
catch (FileNotFoundException e) { | ||
// file does not exist, job is still running | ||
return true; | ||
} | ||
} | ||
|
||
private Path createMarkerFilePath(JobID jobId) { | ||
return new Path(basePath, PREFIX + jobId.toString()); | ||
} | ||
} |
Oops, something went wrong.