Skip to content

Commit

Permalink
[FLINK-22144][runtime][runtime-web] Adds more documentation and moves…
Browse files Browse the repository at this point in the history
… option

- `jobmanager.exception-history-size` is moved to `web.exception-history-size`
- the documentation was slightly extended
- some minor mistakes in the docs are fixed
  • Loading branch information
XComp authored and dawidwys committed Apr 14, 2021
1 parent f2f2bef commit a871333
Show file tree
Hide file tree
Showing 11 changed files with 32 additions and 31 deletions.
1 change: 1 addition & 0 deletions docs/content/docs/deployment/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ You can configure checkpointing directly in code within your Flink job or applic

- `web.submit.enable`: Enables uploading and starting jobs through the Flink UI *(true by default)*. Please note that even when this is disabled, session clusters still accept jobs through REST requests (HTTP calls). This flag only guards the feature to upload jobs in the UI.
- `web.upload.dir`: The directory where to store uploaded jobs. Only used when `web.submit.enable` is true.
- `web.exception-history-size`: Sets the size of the exception history that prints the most recent failures that were handled by Flink for a job.

**Other**

Expand Down
12 changes: 6 additions & 6 deletions docs/layouts/shortcodes/generated/all_jobmanager_section.html
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,6 @@
<td>String</td>
<td>Dictionary for JobManager to store the archives of completed jobs.</td>
</tr>
<tr>
<td><h5>jobmanager.exception-history-size</h5></td>
<td style="word-wrap: break-word;">16</td>
<td>Integer</td>
<td>The maximum number of failures collected by the exception history per job.</td>
</tr>
<tr>
<td><h5>jobmanager.execution.attempts-history-size</h5></td>
<td style="word-wrap: break-word;">16</td>
Expand Down Expand Up @@ -86,5 +80,11 @@
<td>Integer</td>
<td>The max number of completed jobs that can be kept in the job store.</td>
</tr>
<tr>
<td><h5>web.exception-history-size</h5></td>
<td style="word-wrap: break-word;">16</td>
<td>Integer</td>
<td>The maximum number of failures collected by the exception history per job.</td>
</tr>
</tbody>
</table>
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,6 @@
<td>String</td>
<td>The local address of the network interface that the job manager binds to. If not configured, '0.0.0.0' will be used.</td>
</tr>
<tr>
<td><h5>jobmanager.exception-history-size</h5></td>
<td style="word-wrap: break-word;">16</td>
<td>Integer</td>
<td>The maximum number of failures collected by the exception history per job.</td>
</tr>
<tr>
<td><h5>jobmanager.execution.attempts-history-size</h5></td>
<td style="word-wrap: break-word;">16</td>
Expand Down
2 changes: 1 addition & 1 deletion docs/layouts/shortcodes/generated/rest_v1_dispatcher.html
Original file line number Diff line number Diff line change
Expand Up @@ -2630,7 +2630,7 @@
<td class="text-left">Response code: <code>200 OK</code></td>
</tr>
<tr>
<td colspan="2">Returns the most recent exceptions that have been handled by Flink for this job. The 'exceptionHistory.truncated' flag defines whether exceptions were filtered out through the GET parameter. The backend collects only a specific amount of most recent exceptions per job. This can be configured through jobmanager.exception-history-size in the Flink configuration. The following first-level members are deprecated: 'root-exception', 'timestamp', 'timestamp', 'truncated'. Use the data provided through 'exceptionHistory', instead.</td>
<td colspan="2">Returns the most recent exceptions that have been handled by Flink for this job. The 'exceptionHistory.truncated' flag defines whether exceptions were filtered out through the GET parameter. The backend collects only a specific amount of most recent exceptions per job. This can be configured through web.exception-history-size in the Flink configuration. The following first-level members are deprecated: 'root-exception', 'timestamp', 'all-exceptions', and 'truncated'. Use the data provided through 'exceptionHistory', instead.</td>
</tr>
<tr>
<td colspan="2">Path parameters</td>
Expand Down
6 changes: 6 additions & 0 deletions docs/layouts/shortcodes/generated/web_configuration.html
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
<td>Integer</td>
<td>Number of checkpoints to remember for recent history.</td>
</tr>
<tr>
<td><h5>web.exception-history-size</h5></td>
<td style="word-wrap: break-word;">16</td>
<td>Integer</td>
<td>The maximum number of failures collected by the exception history per job.</td>
</tr>
<tr>
<td><h5>web.history</h5></td>
<td style="word-wrap: break-word;">5</td>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,15 +257,6 @@ public class JobManagerOptions {
.withDescription(
"The maximum number of prior execution attempts kept in history.");

/** The maximum number of failures kept in the exception history. */
@Documentation.Section(Documentation.Sections.ALL_JOB_MANAGER)
public static final ConfigOption<Integer> MAX_EXCEPTION_HISTORY_SIZE =
key("jobmanager.exception-history-size")
.intType()
.defaultValue(16)
.withDescription(
"The maximum number of failures collected by the exception history per job.");

/**
* This option specifies the failover strategy, i.e. how the job computation recovers from task
* failures.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,16 @@ public class WebOptions {
.withDeprecatedKeys("jobmanager.web.checkpoints.history")
.withDescription("Number of checkpoints to remember for recent history.");

/** The maximum number of failures kept in the exception history. */
// the parameter is referenced in the UI and might need to be updated there as well
@Documentation.Section(Documentation.Sections.ALL_JOB_MANAGER)
public static final ConfigOption<Integer> MAX_EXCEPTION_HISTORY_SIZE =
key("web.exception-history-size")
.intType()
.defaultValue(16)
.withDescription(
"The maximum number of failures collected by the exception history per job.");

/** @deprecated - no longer used. */
@Deprecated
public static final ConfigOption<Integer> BACKPRESSURE_CLEANUP_INTERVAL =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
<tr *ngIf="listOfException.length > 0">
<td colspan="6">
<i nz-icon nzType="info-circle" nzTheme="fill"></i>&nbsp;
<i>The exception history is limited to the most recent failures that caused parts of the job or the entire job to restart. The maximum history size can be configured through the Flink configuration.</i>
<i>The exception history is limited to the most recent failures that caused parts of the job or the entire job to restart. The maximum history size can be configured via the Flink configuration property <b>web.exception-history-size</b>.</i>
</td>
</tr>
<tr *ngIf="truncated">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

package org.apache.flink.runtime.rest.messages;

import org.apache.flink.configuration.JobManagerOptions;
import org.apache.flink.configuration.WebOptions;
import org.apache.flink.runtime.rest.HttpMethodWrapper;
import org.apache.flink.runtime.rest.handler.job.JobExceptionsHandler;
import org.apache.flink.runtime.rest.messages.job.JobExceptionsMessageParameters;
Expand Down Expand Up @@ -78,8 +78,8 @@ public String getDescription() {
+ "out through the GET parameter. The backend collects only a specific amount "
+ "of most recent exceptions per job. This can be configured through %s in the "
+ "Flink configuration. The following first-level members are deprecated: "
+ "'root-exception', 'timestamp', 'timestamp', 'truncated'. Use the data provided "
+ "'root-exception', 'timestamp', 'all-exceptions', and 'truncated'. Use the data provided "
+ "through 'exceptionHistory', instead.",
JobManagerOptions.MAX_EXCEPTION_HISTORY_SIZE.key());
WebOptions.MAX_EXCEPTION_HISTORY_SIZE.key());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import org.apache.flink.api.common.JobStatus;
import org.apache.flink.configuration.CheckpointingOptions;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.JobManagerOptions;
import org.apache.flink.configuration.WebOptions;
import org.apache.flink.queryablestate.KvStateID;
import org.apache.flink.runtime.accumulators.AccumulatorSnapshot;
import org.apache.flink.runtime.checkpoint.CheckpointCoordinator;
Expand Down Expand Up @@ -218,8 +218,7 @@ public SchedulerBase(
this.exceptionHistoryEntryExtractor = new ExceptionHistoryEntryExtractor();
this.exceptionHistory =
new BoundedFIFOQueue<>(
jobMasterConfiguration.getInteger(
JobManagerOptions.MAX_EXCEPTION_HISTORY_SIZE));
jobMasterConfiguration.getInteger(WebOptions.MAX_EXCEPTION_HISTORY_SIZE));
}

private void registerShutDownCheckpointServicesOnExecutionGraphTermination(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

import org.apache.flink.api.common.JobStatus;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.JobManagerOptions;
import org.apache.flink.configuration.WebOptions;
import org.apache.flink.runtime.checkpoint.CheckpointCoordinator;
import org.apache.flink.runtime.checkpoint.hooks.TestMasterHook;
import org.apache.flink.runtime.concurrent.ComponentMainThreadExecutor;
Expand Down Expand Up @@ -1130,7 +1130,7 @@ public void testExceptionHistoryWithRestartableFailure() {
public void testExceptionHistoryTruncation() {
final JobGraph jobGraph = singleNonParallelJobVertexJobGraph();

configuration.set(JobManagerOptions.MAX_EXCEPTION_HISTORY_SIZE, 1);
configuration.set(WebOptions.MAX_EXCEPTION_HISTORY_SIZE, 1);
final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph);

final ExecutionAttemptID attemptId0 =
Expand Down

0 comments on commit a871333

Please sign in to comment.