Skip to content

Commit

Permalink
[FLINK-1984] Mesos ResourceManager - T1 milestone
Browse files Browse the repository at this point in the history
Implemented Mesos AppMaster including:
- runners for AppMaster and TaskManager
- MesosFlinkResourceManager as a Mesos framework
- ZK persistent storage for Mesos tasks
- reusable scheduler actors for:
  - offer handling using Netflix Fenzo (LaunchCoordinator)
  - reconciliation (ReconciliationCoordinator)
  - task monitoring (TaskMonitor)
  - connection monitoring (ConnectionMonitor)
- lightweight HTTP server to serve artifacts to the Mesos fetcher (ArtifactServer)
- scenario-based logging for:
  - connectivity issues
  - offer handling (receive, process, decline, rescind, accept)
- incorporated FLINK-4152, FLINK-3904, FLINK-4141, FLINK-3675, FLINK-4166
  • Loading branch information
wrighe3 authored and mxm committed Aug 29, 2016
1 parent 578e80e commit d9b2be0
Show file tree
Hide file tree
Showing 51 changed files with 6,446 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,60 @@ public final class ConfigConstants {
public static final String YARN_APPLICATION_MASTER_PORT = "yarn.application-master.port";


// ------------------------ Mesos Configuration ------------------------

/**
* The maximum number of failed Mesos tasks before entirely stopping
* the Mesos session / job on Mesos.
*
* By default, we take the number of of initially requested tasks.
*/
public static final String MESOS_MAX_FAILED_TASKS = "mesos.maximum-failed-tasks";

/**
* The Mesos master URL.
*
* The value should be in one of the following forms:
* <pre>
* {@code
* host:port
* zk:https://host1:port1,host2:port2,.../path
* zk:https://username:password@host1:port1,host2:port2,.../path
* file:https:///path/to/file (where file contains one of the above)
* }
* </pre>
*
*/
public static final String MESOS_MASTER_URL = "mesos.master";

/**
* The failover timeout for the Mesos scheduler, after which running tasks are automatically shut down.
*
* The default value is 600 (seconds).
*/
public static final String MESOS_FAILOVER_TIMEOUT_SECONDS = "mesos.failover-timeout";

/**
* The config parameter defining the Mesos artifact server port to use.
* Setting the port to 0 will let the OS choose an available port.
*/
public static final String MESOS_ARTIFACT_SERVER_PORT_KEY = "mesos.resourcemanager.artifactserver.port";

public static final String MESOS_RESOURCEMANAGER_FRAMEWORK_NAME = "mesos.resourcemanager.framework.name";

public static final String MESOS_RESOURCEMANAGER_FRAMEWORK_ROLE = "mesos.resourcemanager.framework.role";

public static final String MESOS_RESOURCEMANAGER_FRAMEWORK_PRINCIPAL = "mesos.resourcemanager.framework.principal";

public static final String MESOS_RESOURCEMANAGER_FRAMEWORK_SECRET = "mesos.resourcemanager.framework.secret";

/**
* The cpus to acquire from Mesos.
*
* By default, we use the number of requested task slots.
*/
public static final String MESOS_RESOURCEMANAGER_TASKS_CPUS = "mesos.resourcemanager.tasks.cpus";

// ------------------------ Hadoop Configuration ------------------------

/**
Expand Down Expand Up @@ -736,6 +790,9 @@ public final class ConfigConstants {
@Deprecated
public static final String ZOOKEEPER_CHECKPOINT_COUNTER_PATH = "recovery.zookeeper.path.checkpoint-counter";

/** ZooKeeper root path (ZNode) for Mesos workers. */
public static final String ZOOKEEPER_MESOS_WORKERS_PATH = "recovery.zookeeper.path.mesos-workers";

/** Deprecated in favour of {@link #HA_ZOOKEEPER_SESSION_TIMEOUT}. */
@Deprecated
public static final String ZOOKEEPER_SESSION_TIMEOUT = "recovery.zookeeper.client.session-timeout";
Expand Down Expand Up @@ -983,6 +1040,23 @@ public final class ConfigConstants {
*/
public static final String DEFAULT_YARN_JOB_MANAGER_PORT = "0";

// ------ Mesos-Specific Configuration ------

/** The default failover timeout provided to Mesos (10 mins) */
public static final int DEFAULT_MESOS_FAILOVER_TIMEOUT_SECS = 10 * 60;

/**
* The default network port to listen on for the Mesos artifact server.
*/
public static final int DEFAULT_MESOS_ARTIFACT_SERVER_PORT = 0;

/**
* The default Mesos framework name for the ResourceManager to use.
*/
public static final String DEFAULT_MESOS_RESOURCEMANAGER_FRAMEWORK_NAME = "Flink";

public static final String DEFAULT_MESOS_RESOURCEMANAGER_FRAMEWORK_ROLE = "*";

// ------------------------ File System Behavior ------------------------

/**
Expand Down Expand Up @@ -1131,6 +1205,8 @@ public final class ConfigConstants {

public static final String DEFAULT_ZOOKEEPER_CHECKPOINT_COUNTER_PATH = "/checkpoint-counter";

public static final String DEFAULT_ZOOKEEPER_MESOS_WORKERS_PATH = "/mesos-workers";

public static final int DEFAULT_ZOOKEEPER_SESSION_TIMEOUT = 60000;

public static final int DEFAULT_ZOOKEEPER_CONNECTION_TIMEOUT = 15000;
Expand Down
7 changes: 6 additions & 1 deletion flink-dist/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,13 @@ under the License.
<artifactId>flink-metrics-jmx</artifactId>
<version>${project.version}</version>
</dependency>

<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-mesos_2.10</artifactId>
<version>${project.version}</version>
</dependency>


</dependencies>

<!-- See main pom.xml for explanation of profiles -->
Expand Down
Loading

0 comments on commit d9b2be0

Please sign in to comment.