diff --git a/flink-core/src/main/java/org/apache/flink/util/ExceptionUtils.java b/flink-core/src/main/java/org/apache/flink/util/ExceptionUtils.java index 32bc1d25ac6de..d1357a84b31e6 100644 --- a/flink-core/src/main/java/org/apache/flink/util/ExceptionUtils.java +++ b/flink-core/src/main/java/org/apache/flink/util/ExceptionUtils.java @@ -26,10 +26,13 @@ import org.apache.flink.annotation.Internal; +import javax.annotation.Nullable; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; +import static org.apache.flink.util.Preconditions.checkNotNull; + @Internal public final class ExceptionUtils { public static final String STRINGIFIED_NULL_EXCEPTION = "(null)"; @@ -59,7 +62,56 @@ public static String stringifyException(final Throwable e) { return e.getClass().getName() + " (error while printing stack trace)"; } } - + + /** + * Adds a new exception as a {@link Throwable#addSuppressed(Throwable) suppressed exception} + * to a prior exception, or returns the new exception, if no prior exception exists. + * + *
{@code
+	 *
+	 * public void closeAllThings() throws Exception {
+	 *     Exception ex = null;
+	 *     try {
+	 *         component.shutdown();
+	 *     } catch (Exception e) {
+	 *         ex = firstOrSuppressed(e, ex);
+	 *     }
+	 *     try {
+	 *         anotherComponent.stop();
+	 *     } catch (Exception e) {
+	 *         ex = firstOrSuppressed(e, ex);
+	 *     }
+	 *     try {
+	 *         lastComponent.shutdown();
+	 *     } catch (Exception e) {
+	 *         ex = firstOrSuppressed(e, ex);
+	 *     }
+	 *
+	 *     if (ex != null) {
+	 *         throw ex;
+	 *     }
+	 * }
+	 * }
+ * + * @param newException The newly occurred exception + * @param previous The previously occurred exception, possibly null. + * + * @return The new exception, if no previous exception exists, or the previous exception with the + * new exception in the list of suppressed exceptions. + */ + public static T firstOrSuppressed(T newException, @Nullable T previous) { + checkNotNull(newException, "newException"); + + if (previous == null) { + return newException; + } else { + previous.addSuppressed(newException); + return previous; + } + } + + + /** * Throws the given {@code Throwable} in scenarios where the signatures do not allow you to * throw an arbitrary Throwable. Errors and RuntimeExceptions are thrown directly, other exceptions diff --git a/flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/async/AsyncIOExample.java b/flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/async/AsyncIOExample.java index 96c7658aa32ec..6dde537e9dc7b 100644 --- a/flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/async/AsyncIOExample.java +++ b/flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/async/AsyncIOExample.java @@ -33,6 +33,8 @@ import org.apache.flink.streaming.api.functions.source.SourceFunction; import org.apache.flink.streaming.api.functions.async.collector.AsyncCollector; import org.apache.flink.util.Collector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Collections; @@ -43,10 +45,17 @@ import java.util.concurrent.TimeUnit; /** - * Example to illustrates how to use {@link org.apache.flink.streaming.api.functions.async.AsyncFunction} + * Example to illustrates how to use {@link AsyncFunction} */ public class AsyncIOExample { + private static final Logger LOG = LoggerFactory.getLogger(AsyncIOExample.class); + + private static final String EXACTLY_ONCE_MODE = "exactly_once"; + private static final String EVENT_TIME = "EventTime"; + private static final String INGESTION_TIME = "IngestionTime"; + private static final String ORDERED = "ordered"; + /** * A checkpointed source. */ @@ -103,8 +112,10 @@ public void cancel() { * async client. */ private static class SampleAsyncFunction extends RichAsyncFunction { - transient static ExecutorService executorService; - transient static Random random; + private static final long serialVersionUID = 2098635244857937717L; + + private static ExecutorService executorService; + private static Random random; private int counter; @@ -112,17 +123,17 @@ private static class SampleAsyncFunction extends RichAsyncFunction collec @Override public void run() { // wait for while to simulate async operation here - int sleep = (int) (random.nextFloat() * sleepFactor); + long sleep = (long) (random.nextFloat() * sleepFactor); try { Thread.sleep(sleep); - List ret = Collections.singletonList("key-" + (input % 10)); + if (random.nextFloat() < failRatio) { collector.collect(new Exception("wahahahaha...")); } else { - collector.collect(ret); + collector.collect( + Collections.singletonList("key-" + (input % 10))); } } catch (InterruptedException e) { collector.collect(new ArrayList(0)); @@ -200,47 +214,71 @@ public static void main(String[] args) throws Exception { // obtain execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); - printUsage(); - // parse parameters final ParameterTool params = ParameterTool.fromArgs(args); - // check the configuration for the job - final String statePath = params.getRequired("fsStatePath"); - final String cpMode = params.get("checkpointMode", "exactly_once"); - final int maxCount = params.getInt("maxCount", 100000); - final int sleepFactor = params.getInt("sleepFactor", 100); - final float failRatio = params.getFloat("failRatio", 0.001f); - final String mode = params.get("waitMode", "ordered"); - final int taskNum = params.getInt("waitOperatorParallelism", 1); - final String timeType = params.get("eventType", "EventTime"); - final int shutdownWaitTS = params.getInt("shutdownWaitTS", 20000); - - System.out.println("Job configuration\n" - +"\tFS state path="+statePath+"\n" - +"\tCheckpoint mode="+cpMode+"\n" - +"\tMax count of input from source="+maxCount+"\n" - +"\tSleep factor="+sleepFactor+"\n" - +"\tFail ratio="+failRatio+"\n" - +"\tWaiting mode="+mode+"\n" - +"\tParallelism for async wait operator="+taskNum+"\n" - +"\tEvent type="+timeType+"\n" - +"\tShutdown wait timestamp="+shutdownWaitTS); - - // setup state and checkpoint mode - env.setStateBackend(new FsStateBackend(statePath)); - if (cpMode.equals("exactly_once")) { - env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE); + final String statePath; + final String cpMode; + final int maxCount; + final long sleepFactor; + final float failRatio; + final String mode; + final int taskNum; + final String timeType; + final long shutdownWaitTS; + + try { + // check the configuration for the job + statePath = params.get("fsStatePath", null); + cpMode = params.get("checkpointMode", "exactly_once"); + maxCount = params.getInt("maxCount", 100000); + sleepFactor = params.getLong("sleepFactor", 100); + failRatio = params.getFloat("failRatio", 0.001f); + mode = params.get("waitMode", "ordered"); + taskNum = params.getInt("waitOperatorParallelism", 1); + timeType = params.get("eventType", "EventTime"); + shutdownWaitTS = params.getLong("shutdownWaitTS", 20000); + } catch (Exception e) { + printUsage(); + + throw e; + } + + StringBuilder configStringBuilder = new StringBuilder(); + + final String lineSeparator = System.getProperty("line.separator"); + + configStringBuilder + .append("Job configuration").append(lineSeparator) + .append("FS state path=").append(statePath).append(lineSeparator) + .append("Checkpoint mode=").append(cpMode).append(lineSeparator) + .append("Max count of input from source=").append(maxCount).append(lineSeparator) + .append("Sleep factor=").append(sleepFactor).append(lineSeparator) + .append("Fail ratio=").append(failRatio).append(lineSeparator) + .append("Waiting mode=").append(mode).append(lineSeparator) + .append("Parallelism for async wait operator=").append(taskNum).append(lineSeparator) + .append("Event type=").append(timeType).append(lineSeparator) + .append("Shutdown wait timestamp=").append(shutdownWaitTS); + + LOG.info(configStringBuilder.toString()); + + if (statePath != null) { + // setup state and checkpoint mode + env.setStateBackend(new FsStateBackend(statePath)); + } + + if (EXACTLY_ONCE_MODE.equals(cpMode)) { + env.enableCheckpointing(1000L, CheckpointingMode.EXACTLY_ONCE); } else { - env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE); + env.enableCheckpointing(1000L, CheckpointingMode.AT_LEAST_ONCE); } // enable watermark or not - if (timeType.equals("EventTime")) { + if (EVENT_TIME.equals(timeType)) { env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); } - else if (timeType.equals("IngestionTime")) { + else if (INGESTION_TIME.equals(timeType)) { env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); } @@ -253,7 +291,7 @@ else if (timeType.equals("IngestionTime")) { // add async operator to streaming job DataStream result; - if (mode.equals("ordered")) { + if (ORDERED.equals(mode)) { result = AsyncDataStream.orderedWait(inputStream, function, 20).setParallelism(taskNum); } else { @@ -262,6 +300,8 @@ else if (timeType.equals("IngestionTime")) { // add a reduce to get the sum of each keys. result.flatMap(new FlatMapFunction>() { + private static final long serialVersionUID = -938116068682344455L; + @Override public void flatMap(String value, Collector> out) throws Exception { out.collect(new Tuple2<>(value, 1)); @@ -269,7 +309,7 @@ public void flatMap(String value, Collector> out) throws }).keyBy(0).sum(1).print(); // execute the program - env.execute("Async I/O Example"); + env.execute("Async IO Example"); } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/AsyncFunction.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/AsyncFunction.java index b5b7d6fcbcee3..4de2db1818461 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/AsyncFunction.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/AsyncFunction.java @@ -29,14 +29,15 @@ *

* For each #asyncInvoke, an async io operation can be triggered, and once it has been done, * the result can be collected by calling {@link AsyncCollector#collect}. For each async - * operations, their contexts are buffered in the operator immediately after invoking - * #asyncInvoke, leading to no blocking for each stream input as long as internal buffer is not full. + * operation, its context is stored in the operator immediately after invoking + * #asyncInvoke, avoiding blocking for each stream input as long as the internal buffer is not full. *

- * {@link AsyncCollector} can be passed into callbacks or futures provided by async client to - * fetch result data. Any error can also be propagate to the operator by {@link AsyncCollector#collect(Throwable)}. + * {@link AsyncCollector} can be passed into callbacks or futures to collect the result data. + * An error can also be propagate to the async IO operator by + * {@link AsyncCollector#collect(Throwable)}. * *

- * Typical usage for callback: + * Callback example usage: *

{@code
  * public class HBaseAsyncFunc implements AsyncFunction {
  *   @Override
@@ -46,11 +47,10 @@
  *     hbase.asyncGet(get, cb);
  *   }
  * }
- * }
  * 
* *

- * Typical usage for {@link com.google.common.util.concurrent.ListenableFuture} + * Future example usage: *

{@code
  * public class HBaseAsyncFunc implements AsyncFunction {
  *   @Override
@@ -68,7 +68,6 @@
  *     });
  *   }
  * }
- * }
  * 
* * @param The type of the input elements. @@ -80,9 +79,10 @@ public interface AsyncFunction extends Function, Serializable { /** * Trigger async operation for each stream input. * - * @param input Stream Input - * @param collector AsyncCollector - * @exception Exception will make task fail and trigger fail-over process. + * @param input element coming from an upstream task + * @param collector to collect the result data + * @exception Exception in case of a user code error. An exception will make the task fail and + * trigger fail-over process. */ void asyncInvoke(IN input, AsyncCollector collector) throws Exception; } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/RichAsyncFunction.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/RichAsyncFunction.java index f6d3d319ea3b2..232206c29a36e 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/RichAsyncFunction.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/RichAsyncFunction.java @@ -41,6 +41,7 @@ import org.apache.flink.metrics.MetricGroup; import org.apache.flink.streaming.api.functions.async.collector.AsyncCollector; import org.apache.flink.types.Value; +import org.apache.flink.util.Preconditions; import java.io.Serializable; import java.util.List; @@ -48,88 +49,55 @@ /** * Rich variant of the {@link AsyncFunction}. As a {@link RichFunction}, it gives access to the - * {@link org.apache.flink.api.common.functions.RuntimeContext} and provides setup and teardown methods: + * {@link RuntimeContext} and provides setup and teardown methods: * {@link RichFunction#open(org.apache.flink.configuration.Configuration)} and * {@link RichFunction#close()}. * *

- * State related apis in {@link RuntimeContext} are not supported yet because the key may get changed - * while accessing states in the working thread. + * State related apis in {@link RuntimeContext} are not supported yet because the key may get + * changed while accessing states in the working thread. *

- * {@link IterationRuntimeContext#getIterationAggregator(String)} is not supported since the aggregator - * may be modified by multiple threads. + * {@link IterationRuntimeContext#getIterationAggregator(String)} is not supported since the + * aggregator may be modified by multiple threads. * * @param The type of the input elements. * @param The type of the returned elements. */ - @PublicEvolving -public abstract class RichAsyncFunction extends AbstractRichFunction - implements AsyncFunction { +public abstract class RichAsyncFunction extends AbstractRichFunction implements AsyncFunction { - private transient RuntimeContext runtimeContext; + private static final long serialVersionUID = 3858030061138121840L; @Override - public void setRuntimeContext(RuntimeContext t) { - super.setRuntimeContext(t); + public void setRuntimeContext(RuntimeContext runtimeContext) { + Preconditions.checkNotNull(runtimeContext); - if (t != null) { - runtimeContext = new RichAsyncFunctionRuntimeContext(t); + if (runtimeContext instanceof IterationRuntimeContext) { + super.setRuntimeContext( + new RichAsyncFunctionIterationRuntimeContext( + (IterationRuntimeContext) runtimeContext)); + } else { + super.setRuntimeContext(new RichAsyncFunctionRuntimeContext(runtimeContext)); } } @Override public abstract void asyncInvoke(IN input, AsyncCollector collector) throws Exception; - @Override - public RuntimeContext getRuntimeContext() { - if (this.runtimeContext != null) { - return runtimeContext; - } else { - throw new IllegalStateException("The runtime context has not been initialized."); - } - } - - @Override - public IterationRuntimeContext getIterationRuntimeContext() { - if (this.runtimeContext != null) { - return (IterationRuntimeContext) runtimeContext; - } else { - throw new IllegalStateException("The runtime context has not been initialized."); - } - } + // ----------------------------------------------------------------------------------------- + // Wrapper classes + // ----------------------------------------------------------------------------------------- /** - * A wrapper class to delegate {@link RuntimeContext}. State related apis are disabled. + * A wrapper class for async function's {@link RuntimeContext}. The async function runtime + * context only supports basic operations which are thread safe. Consequently, state access, + * accumulators, broadcast variables and the distributed cache are disabled. */ - private class RichAsyncFunctionRuntimeContext implements IterationRuntimeContext { - private RuntimeContext runtimeContext; - - public RichAsyncFunctionRuntimeContext(RuntimeContext context) { - runtimeContext = context; - } - - private IterationRuntimeContext getIterationRuntineContext() { - if (this.runtimeContext instanceof IterationRuntimeContext) { - return (IterationRuntimeContext) this.runtimeContext; - } else { - throw new IllegalStateException("This stub is not part of an iteration step function."); - } - } - - @Override - public int getSuperstepNumber() { - return getIterationRuntineContext().getSuperstepNumber(); - } - - @Override - public > T getIterationAggregator(String name) { - throw new UnsupportedOperationException("Get iteration aggregator is not supported in rich async function"); - } + private static class RichAsyncFunctionRuntimeContext implements RuntimeContext { + private final RuntimeContext runtimeContext; - @Override - public T getPreviousIterationAggregate(String name) { - return getIterationRuntineContext().getPreviousIterationAggregate(name); + RichAsyncFunctionRuntimeContext(RuntimeContext context) { + runtimeContext = Preconditions.checkNotNull(context); } @Override @@ -172,74 +140,108 @@ public ClassLoader getUserCodeClassLoader() { return runtimeContext.getUserCodeClassLoader(); } + // ----------------------------------------------------------------------------------- + // Unsupported operations + // ----------------------------------------------------------------------------------- + + @Override + public DistributedCache getDistributedCache() { + throw new UnsupportedOperationException("Distributed cache is not supported in rich async functions."); + } + + @Override + public ValueState getState(ValueStateDescriptor stateProperties) { + throw new UnsupportedOperationException("State is not supported in rich async functions."); + } + + @Override + public ListState getListState(ListStateDescriptor stateProperties) { + throw new UnsupportedOperationException("State is not supported in rich async functions."); + } + + @Override + public ReducingState getReducingState(ReducingStateDescriptor stateProperties) { + throw new UnsupportedOperationException("State is not supported in rich async functions."); + } + @Override public void addAccumulator(String name, Accumulator accumulator) { - runtimeContext.addAccumulator(name, accumulator); + throw new UnsupportedOperationException("Accumulators are not supported in rich async functions."); } @Override public Accumulator getAccumulator(String name) { - return runtimeContext.getAccumulator(name); + throw new UnsupportedOperationException("Accumulators are not supported in rich async functions."); } @Override public Map> getAllAccumulators() { - return runtimeContext.getAllAccumulators(); + throw new UnsupportedOperationException("Accumulators are not supported in rich async functions."); } @Override public IntCounter getIntCounter(String name) { - return runtimeContext.getIntCounter(name); + throw new UnsupportedOperationException("Int counters are not supported in rich async functions."); } @Override public LongCounter getLongCounter(String name) { - return runtimeContext.getLongCounter(name); + throw new UnsupportedOperationException("Long counters are not supported in rich async functions."); } @Override public DoubleCounter getDoubleCounter(String name) { - return runtimeContext.getDoubleCounter(name); + throw new UnsupportedOperationException("Long counters are not supported in rich async functions."); } @Override public Histogram getHistogram(String name) { - return runtimeContext.getHistogram(name); + throw new UnsupportedOperationException("Histograms are not supported in rich async functions."); } @Override public boolean hasBroadcastVariable(String name) { - return runtimeContext.hasBroadcastVariable(name); + throw new UnsupportedOperationException("Broadcast variables are not supported in rich async functions."); } @Override public List getBroadcastVariable(String name) { - return runtimeContext.getBroadcastVariable(name); + throw new UnsupportedOperationException("Broadcast variables are not supported in rich async functions."); } @Override public C getBroadcastVariableWithInitializer(String name, BroadcastVariableInitializer initializer) { - return runtimeContext.getBroadcastVariableWithInitializer(name, initializer); + throw new UnsupportedOperationException("Broadcast variables are not supported in rich async functions."); } + } - @Override - public DistributedCache getDistributedCache() { - return runtimeContext.getDistributedCache(); + private static class RichAsyncFunctionIterationRuntimeContext extends RichAsyncFunctionRuntimeContext implements IterationRuntimeContext { + + private final IterationRuntimeContext iterationRuntimeContext; + + RichAsyncFunctionIterationRuntimeContext(IterationRuntimeContext iterationRuntimeContext) { + super(iterationRuntimeContext); + + this.iterationRuntimeContext = Preconditions.checkNotNull(iterationRuntimeContext); } @Override - public ValueState getState(ValueStateDescriptor stateProperties) { - throw new UnsupportedOperationException("State is not supported in rich async function"); + public int getSuperstepNumber() { + return iterationRuntimeContext.getSuperstepNumber(); } + // ----------------------------------------------------------------------------------- + // Unsupported operations + // ----------------------------------------------------------------------------------- + @Override - public ListState getListState(ListStateDescriptor stateProperties) { - throw new UnsupportedOperationException("State is not supported in rich async function"); + public > T getIterationAggregator(String name) { + throw new UnsupportedOperationException("Iteration aggregators are not supported in rich async functions."); } @Override - public ReducingState getReducingState(ReducingStateDescriptor stateProperties) { - throw new UnsupportedOperationException("State is not supported in rich async function"); + public T getPreviousIterationAggregate(String name) { + throw new UnsupportedOperationException("Iteration aggregators are not supported in rich async functions."); } } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/AbstractBufferEntry.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/AbstractBufferEntry.java deleted file mode 100644 index 29643fd204f20..0000000000000 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/AbstractBufferEntry.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.api.functions.async.buffer; - -import org.apache.flink.streaming.runtime.streamrecord.StreamElement; -import org.apache.flink.util.Preconditions; - -import java.io.IOException; -import java.util.List; - -/** - * Abstract implementation for {@link StreamElementEntry} - * - * @param Output type. - */ -public abstract class AbstractBufferEntry implements StreamElementEntry { - private final StreamElement streamElement; - - protected AbstractBufferEntry(StreamElement element) { - this.streamElement = Preconditions.checkNotNull(element, "Reference to StreamElement should not be null"); - } - - @Override - public List getResult() throws IOException { - throw new UnsupportedOperationException("It is only available for StreamRecordEntry"); - } - - @Override - public void markDone() { - throw new UnsupportedOperationException("It is only available for StreamRecordEntry"); - } - - @Override - public boolean isDone() { - throw new UnsupportedOperationException("It must be overriden by the concrete entry"); - } - - @Override - public boolean isStreamRecord() { - return streamElement.isRecord(); - } - - @Override - public boolean isWatermark() { - return streamElement.isWatermark(); - } - - @Override - public boolean isLatencyMarker() { - return streamElement.isLatencyMarker(); - } - - @Override - public StreamElement getStreamElement() { - return streamElement; - } - - @Override - public String toString() { - return "StreamElementEntry for @" + streamElement; - } -} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/AsyncCollectorBuffer.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/AsyncCollectorBuffer.java deleted file mode 100644 index ee176d91a855a..0000000000000 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/AsyncCollectorBuffer.java +++ /dev/null @@ -1,633 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.api.functions.async.buffer; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.annotation.VisibleForTesting; -import org.apache.flink.streaming.api.datastream.AsyncDataStream; -import org.apache.flink.streaming.api.functions.async.collector.AsyncCollector; -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.api.operators.TimestampedCollector; -import org.apache.flink.streaming.api.operators.async.AsyncWaitOperator; -import org.apache.flink.streaming.api.watermark.Watermark; -import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; -import org.apache.flink.streaming.runtime.streamrecord.StreamElement; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.util.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Set; - -/** - * AsyncCollectorBuffer will hold all {@link AsyncCollector} in its internal buffer, - * and emit results from {@link AsyncCollector} to the next operators following it by - * calling {@link Output#collect(Object)} - */ -@Internal -public class AsyncCollectorBuffer { - - /** - * The logger. - */ - private static final Logger LOG = LoggerFactory.getLogger(AsyncCollectorBuffer.class); - - /** - * Max number of {@link AsyncCollector} in the buffer. - */ - private final int bufferSize; - - private final AsyncDataStream.OutputMode mode; - - private final AsyncWaitOperator operator; - - /** - * Keep all {@link StreamElementEntry} - */ - private final Set> queue = new LinkedHashSet<>(); - - /** - * Keep all {@link StreamElementEntry} to their corresponding {@link Watermark} or {@link LatencyMarker} - * If the inputs are: SR1, SR2, WM1, SR3, SR4. Then SR1 and SR2 belong to WM1, and - * SR3 and SR4 will be kept in {@link #lonelyEntries} - */ - private final Map, StreamElement> entriesToMarkers = new HashMap<>(); - - private final List> lonelyEntries = new LinkedList<>(); - - /** - * Keep finished AsyncCollector belonging to the oldest Watermark or LatencyMarker in UNORDERED mode. - */ - private final Map>> markerToFinishedEntries = new LinkedHashMap<>(); - private Set>lonelyFinishedEntries = new HashSet<>(); - - /** - * For the AsyncWaitOperator chained with StreamSource, the checkpoint thread may get the - * {@link org.apache.flink.streaming.runtime.tasks.StreamTask#lock} while {@link AsyncCollectorBuffer#queue} - * is full since main thread waits on this lock. The StreamElement in - * {@link AsyncWaitOperator#processElement(StreamRecord)} should be treated as a part of all StreamElements - * in its queue. It will be kept in the operator state while snapshotting. - */ - private StreamElement extraStreamElement; - - /** - * {@link TimestampedCollector} and {@link Output} to collect results and watermarks. - */ - private final Output> output; - private final TimestampedCollector timestampedCollector; - - /** - * Checkpoint lock from {@link org.apache.flink.streaming.runtime.tasks.StreamTask#lock} - */ - private final Object lock; - - private final Emitter emitter; - private final Thread emitThread; - - /** - * Exception from async operation or internal error - */ - private Exception error; - - /** - * Flag telling Emitter thread to work or not. - */ - private volatile boolean workwork = false; - - public AsyncCollectorBuffer( - int bufferSize, - AsyncDataStream.OutputMode mode, - Output> output, - TimestampedCollector collector, - Object lock, - AsyncWaitOperator operator) { - Preconditions.checkArgument(bufferSize > 0, "Future buffer size should be greater than 0."); - - this.bufferSize = bufferSize; - - this.mode = Preconditions.checkNotNull(mode, "Processing mode should not be NULL."); - this.output = Preconditions.checkNotNull(output, "Output should not be NULL."); - this.timestampedCollector = Preconditions.checkNotNull(collector, "TimestampedCollector should not be NULL."); - this.operator = Preconditions.checkNotNull(operator, "Reference to AsyncWaitOperator should not be NULL."); - this.lock = Preconditions.checkNotNull(lock, "Checkpoint lock should not be NULL."); - - this.emitter = new Emitter(); - this.emitThread = new Thread(emitter); - this.emitThread.setDaemon(true); - } - - /** - * Add an {@link StreamRecord} into the buffer. A new {@link AsyncCollector} will be created and returned - * corresponding to the input StreamRecord. - *

- * If buffer is full, caller will wait until a new space is available. - * - * @param record StreamRecord - * @return An AsyncCollector - * @throws Exception Exception from AsyncCollector. - */ - public AsyncCollector addStreamRecord(StreamRecord record) throws Exception { - assert(Thread.holdsLock(lock)); - - while (queue.size() >= bufferSize) { - // hold the input StreamRecord until it is placed in the buffer - extraStreamElement = record; - - lock.wait(); - } - - if (error != null) { - throw error; - } - - StreamElementEntry entry = new StreamRecordEntry<>(record, this); - - queue.add(entry); - - if (mode == AsyncDataStream.OutputMode.UNORDERED) { - lonelyEntries.add(entry); - } - - extraStreamElement = null; - - return (AsyncCollector)entry; - } - - /** - * Add a {@link Watermark} into buffer. - *

- * If queue is full, caller will wait here. - * - * @param watermark Watermark - * @throws Exception Exception from AsyncCollector. - */ - public void addWatermark(Watermark watermark) throws Exception { - processMark(new WatermarkEntry(watermark)); - } - - /** - * Add a {@link LatencyMarker} into buffer. - *

- * If queue is full, caller will wait here. - * - * @param latencyMarker LatencyMarker - * @throws Exception Exception from AsyncCollector. - */ - public void addLatencyMarker(LatencyMarker latencyMarker) throws Exception { - processMark(new LatencyMarkerEntry(latencyMarker)); - } - - /** - * Notify the emitter thread and main thread that an AsyncCollector has completed. - * - * @param entry Completed AsyncCollector - */ - public void markCollectorCompleted(StreamElementEntry entry) { - synchronized (lock) { - entry.markDone(); - - if (mode == AsyncDataStream.OutputMode.UNORDERED) { - StreamElement marker = entriesToMarkers.get(entry); - - if (marker != null) { - markerToFinishedEntries.get(marker).add(entry); - } - else { - lonelyFinishedEntries.add(entry); - } - } - - // if workwork is true, it is not necessary to check it again - if (!workwork && shouldNotifyEmitterThread(entry)) { - workwork = true; - - lock.notifyAll(); - } - } - } - - /** - * Caller will wait here if buffer is not empty, meaning that not all async i/o tasks have returned yet. - * - * @throws Exception IOException from AsyncCollector. - */ - public void waitEmpty() throws Exception { - assert(Thread.holdsLock(lock)); - - while (queue.size() != 0) { - if (error != null) { - throw error; - } - - lock.wait(); - } - } - - public void startEmitterThread() { - emitThread.start(); - } - - public void stopEmitterThread() { - emitter.stop(); - - emitThread.interrupt(); - - while (emitThread.isAlive()) { - // temporarily release the lock first, since caller of this method may also hold the lock. - if (Thread.holdsLock(lock)) { - try { - lock.wait(1000); - } - catch (InterruptedException e) { - // do nothing - } - } - - try { - emitThread.join(10000); - } catch (InterruptedException e) { - // do nothing - } - - // get the stack trace - StringBuilder sb = new StringBuilder(); - StackTraceElement[] stack = emitThread.getStackTrace(); - - for (StackTraceElement e : stack) { - sb.append(e).append('\n'); - } - - LOG.warn("Emitter thread blocks due to {}", sb.toString()); - - emitThread.interrupt(); - } - } - - /** - * Get all StreamElements in the AsyncCollector queue. - *

- * Emitter Thread can not output records and will wait for a while due to checkpoiting procedure - * holding the checkpoint lock. - * - * @return An {@link Iterator} to the StreamElements in the buffer, including the extra one. - */ - public Iterator getStreamElementsInBuffer() { - final Iterator> iterator = queue.iterator(); - final StreamElement extra = extraStreamElement; - - return new Iterator() { - boolean shouldSendExtraElement = (extra != null); - - @Override - public boolean hasNext() { - return iterator.hasNext() || shouldSendExtraElement; - } - - @Override - public StreamElement next() { - if (!hasNext()) { - throw new NoSuchElementException(); - } - - if (iterator.hasNext()) { - return iterator.next().getStreamElement(); - } - else { - shouldSendExtraElement = false; - - return extra; - } - } - - @Override - public void remove() { - throw new UnsupportedOperationException("remove"); - } - }; - } - - private void processMark(StreamElementEntry entry) throws Exception { - assert(Thread.holdsLock(lock)); - - StreamElement mark = entry.getStreamElement(); - - while (queue.size() >= bufferSize) { - // hold the input StreamRecord until it is placed in the buffer - extraStreamElement = mark; - - lock.wait(); - } - - if (error != null) { - throw error; - } - - queue.add(entry); - - if (mode == AsyncDataStream.OutputMode.UNORDERED) { - // update AsyncCollector to Watermark / LatencyMarker map - for (StreamElementEntry e : lonelyEntries) { - entriesToMarkers.put(e, mark); - } - - lonelyEntries.clear(); - - // update Watermark / LatencyMarker to finished AsyncCollector map - markerToFinishedEntries.put(mark, lonelyFinishedEntries); - - lonelyFinishedEntries = new HashSet<>(); - } - - extraStreamElement = null; - - // notify Emitter thread if the head of buffer is Watermark or LatencyMarker - // this is for the case when LatencyMarkers keep coming but there is no StreamRecords. - StreamElementEntry element = queue.iterator().next(); - - if (element.isLatencyMarker() || element.isWatermark()) { - workwork = true; - - lock.notifyAll(); - } - } - - private boolean shouldNotifyEmitterThread(StreamElementEntry entry) { - - switch (mode) { - - case ORDERED: - Iterator> queueIterator = queue.iterator(); - - // get to work as long as the first AsyncCollect is done. - return queueIterator.hasNext() && (queueIterator.next().isDone()); - - case UNORDERED: - Iterator>>> iteratorMarker = - markerToFinishedEntries.entrySet().iterator(); - - // get to work only the finished AsyncCollector belongs to the oldest Watermark or LatencyMarker - // or no Watermark / LatencyMarker is in the buffer yet. - return iteratorMarker.hasNext() ? iteratorMarker.next().getValue().contains(entry) - : lonelyFinishedEntries.contains(entry); - - default: - // this case should never happen - return false; - } - } - - @VisibleForTesting - public Set> getQueue() { - return queue; - } - - @VisibleForTesting - public void setExtraStreamElement(StreamElement element) { - extraStreamElement = element; - } - - /** - * A working thread to output results from {@link AsyncCollector} to the next operator. - */ - private class Emitter implements Runnable { - private volatile boolean running = true; - - private void output(StreamElementEntry entry) throws Exception { - - StreamElement element = entry.getStreamElement(); - - if (element == null) { - throw new Exception("StreamElement in the buffer entry should not be null"); - } - - if (entry.isStreamRecord()) { - List result = entry.getResult(); - - if (result == null) { - throw new Exception("Result for stream record " + element + " is null"); - } - - // update the timestamp for the collector - timestampedCollector.setTimestamp(element.asRecord()); - - for (OUT val : result) { - timestampedCollector.collect(val); - } - } - else if (entry.isWatermark()) { - output.emitWatermark(element.asWatermark()); - } - else if (entry.isLatencyMarker()) { - operator.sendLatencyMarker(element.asLatencyMarker()); - } - else { - throw new IOException("Unknown input record: " + element); - } - } - - /** - * Emit results from the finished head collector and its following finished ones. - * - *

NOTE: Since {@link #output(StreamElementEntry)} may be blocked if operator chain chained with - * another {@link AsyncWaitOperator} and its buffer is full, we can not use an {@link Iterator} to - * go through {@link #queue} because ConcurrentModificationException may be thrown while we remove - * element in the queue by calling {@link Iterator#remove()}. - * - *

Example: Assume operator chain like this: async-wait-operator1(awo1) -> async-wait-operator2(awo2). - * The buffer for awo1 is full so the main thread is blocked there. - * The {@link Emitter} thread, named emitter1, in awo1 is outputting - * data to awo2. Assume that 2 elements have been emitted and the buffer in awo1 has two vacancies. While - * outputting the third one, the buffer in awo2 is full, so emitter1 will wait for a moment. If we use - * {@link Iterator}, it is just before calling {@link Iterator#remove()}. Once the {@link #lock} is released - * and luckily enough, the main thread get the lock. It will modify {@link #queue}, causing - * ConcurrentModificationException once emitter1 runs to {@link Iterator#remove()}. - * - */ - private void orderedProcess() throws Exception { - StreamElementEntry entry; - - while (queue.size() > 0 && (entry = queue.iterator().next()).isDone()) { - output(entry); - - queue.remove(entry); - } - } - - /** - * Emit results for each finished collector. Try to emit results prior to the oldest watermark - * in the buffer. - *

- * For example, assume the sequence of input StreamElements is: - * Entry(ac1, record1) -> Entry(ac2, record2) -> Entry(ac3, watermark1) -> Entry(ac4, record3). - * and both of ac2 and ac3 have finished. For unordered-mode, ac1 and ac2 are prior to watermark1, - * so ac2 will be emitted. Since ac1 is not ready yet, ac3 have to wait until ac1 is done. - */ - private void unorderedProcess() throws Exception { - // try to emit finished AsyncCollectors in markerToFinishedEntries - if (markerToFinishedEntries.size() != 0) { - while (markerToFinishedEntries.size() != 0) { - Map.Entry>> finishedStreamElementEntry = - markerToFinishedEntries.entrySet().iterator().next(); - - Set> finishedElementSet = finishedStreamElementEntry.getValue(); - - // While outputting results to the next operator, output may release lock if the following operator - // in the chain is another AsyncWaitOperator. During this period, there may be some - // finished StreamElementEntry coming into the finishedElementSet, and we should - // output all finished elements after re-acquiring the lock. - while (finishedElementSet.size() != 0) { - StreamElementEntry finishedEntry = finishedElementSet.iterator().next(); - - output(finishedEntry); - - queue.remove(finishedEntry); - - entriesToMarkers.remove(finishedEntry); - - finishedElementSet.remove(finishedEntry); - } - - finishedStreamElementEntry.getValue().clear(); - - - // if all StreamElements belonging to current Watermark / LatencyMarker have been emitted, - // emit current Watermark / LatencyMarker - - if (queue.size() == 0) { - if (markerToFinishedEntries.size() != 0 || entriesToMarkers.size() != 0 - || lonelyEntries.size() != 0 || lonelyFinishedEntries.size() != 0) { - throw new IOException("Inner data info is not consistent."); - } - } - else { - // check the head AsyncCollector whether it is a Watermark or LatencyMarker. - StreamElementEntry queueEntry = queue.iterator().next(); - - if (!queueEntry.isStreamRecord()) { - if (finishedStreamElementEntry.getKey() != queueEntry.getStreamElement()) { - throw new IOException("Watermark / LatencyMarker from finished collector map " - + "and input buffer are not the same."); - } - - output(queueEntry); - - queue.remove(queueEntry); - - // remove useless data in markerToFinishedEntries - markerToFinishedEntries.remove(finishedStreamElementEntry.getKey()); - } - else { - break; - } - } - } - } - - if (markerToFinishedEntries.size() == 0) { - // health check - if (entriesToMarkers.size() != 0) { - throw new IOException("Entries to marker map should be zero"); - } - - // no Watermark or LatencyMarker in the buffer yet, emit results in lonelyFinishedEntries - while (lonelyFinishedEntries.size() != 0) { - StreamElementEntry entry = lonelyFinishedEntries.iterator().next(); - - output(entry); - - queue.remove(entry); - - lonelyEntries.remove(entry); - - lonelyFinishedEntries.remove(entry); - } - } - } - - private void processFinishedAsyncCollector() throws Exception { - if (mode == AsyncDataStream.OutputMode.ORDERED) { - orderedProcess(); - } else { - unorderedProcess(); - } - } - - private void clearAndNotify() { - // clear all data - queue.clear(); - entriesToMarkers.clear(); - markerToFinishedEntries.clear(); - lonelyEntries.clear(); - - running = false; - - lock.notifyAll(); - } - - @Override - public void run() { - while (running) { - synchronized (lock) { - - try { - while (!workwork) { - lock.wait(); - } - - processFinishedAsyncCollector(); - - lock.notifyAll(); - - workwork = false; - } - catch (InterruptedException e) { - // The source of InterruptedException is from: - // 1. lock.wait() statement in Emit - // 2. collector waiting for vacant buffer - // The action for this exception should try to clear all held data and - // exit Emit thread. - - clearAndNotify(); - } - catch (Exception e) { - // For exceptions, not InterruptedException, it should be propagated - // to main thread. - error = e; - - clearAndNotify(); - } - } - } - } - - public void stop() { - running = false; - } - } -} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/StreamElementEntry.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/StreamElementEntry.java deleted file mode 100644 index de7f606aab105..0000000000000 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/StreamElementEntry.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.api.functions.async.buffer; - -import org.apache.flink.annotation.Internal; -import org.apache.flink.streaming.runtime.streamrecord.StreamElement; - -import java.io.IOException; -import java.util.List; - -/** - * The base class for entries in the {@link AsyncCollectorBuffer} - * - * @param Output data type - */ - -@Internal -public interface StreamElementEntry { - /** - * Get result. Throw IOException while encountering an error. - * - * @return A List of result. - * @throws IOException IOException wrapping errors from user codes. - */ - List getResult() throws IOException; - - /** - * Set the internal flag, marking the async operator has finished. - */ - void markDone(); - - /** - * Get the flag indicating the async operator has finished or not. - * - * @return True for finished async operator. - */ - boolean isDone(); - - /** - * Check inner element is StreamRecord or not. - * - * @return True if element is StreamRecord. - */ - boolean isStreamRecord(); - - /** - * Check inner element is Watermark or not. - * - * @return True if element is Watermark. - */ - boolean isWatermark(); - - /** - * Check inner element is LatencyMarker or not. - * - * @return True if element is LatencyMarker. - */ - boolean isLatencyMarker(); - - /** - * Get inner stream element. - * - * @return Inner {@link StreamElement}. - */ - StreamElement getStreamElement(); -} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/StreamRecordEntry.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/StreamRecordEntry.java deleted file mode 100644 index fb0dc3b0c0907..0000000000000 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/StreamRecordEntry.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.api.functions.async.buffer; - -import org.apache.flink.streaming.api.functions.async.collector.AsyncCollector; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.util.Preconditions; - -import java.io.IOException; -import java.util.List; - -/** - * {@link AsyncCollectorBuffer} entry for {@link StreamRecord} - * - * @param Input data type - * @param Output data type - */ -public class StreamRecordEntry extends AbstractBufferEntry implements AsyncCollector { - private List result; - private Throwable error; - - private boolean isDone = false; - - private final AsyncCollectorBuffer buffer; - - public StreamRecordEntry(StreamRecord element, AsyncCollectorBuffer buffer) { - super(element); - this.buffer = Preconditions.checkNotNull(buffer, "Reference to AsyncCollectorBuffer should not be null"); - } - - @Override - public void collect(List result) { - this.result = result; - - this.buffer.markCollectorCompleted(this); - } - - @Override - public void collect(Throwable error) { - this.error = error; - - this.buffer.markCollectorCompleted(this); - } - - public List getResult() throws IOException { - if (error != null) { - throw new IOException(error.getMessage()); - } - return result; - } - - public void markDone() { - isDone = true; - } - - public boolean isDone() { - return isDone; - } -} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/collector/AsyncCollector.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/collector/AsyncCollector.java index b2a58d2fdb393..a072acaa4300d 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/collector/AsyncCollector.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/collector/AsyncCollector.java @@ -18,16 +18,16 @@ package org.apache.flink.streaming.api.functions.async.collector; -import org.apache.flink.annotation.Internal; +import org.apache.flink.annotation.PublicEvolving; -import java.util.List; +import java.util.Collection; /** * {@link AsyncCollector} collects data / error in user codes while processing async i/o. * * @param Output type */ -@Internal +@PublicEvolving public interface AsyncCollector { /** * Set result. @@ -35,14 +35,15 @@ public interface AsyncCollector { * Note that it should be called for exactly one time in the user code. * Calling this function for multiple times will cause data lose. *

- * Put all results in a {@link List} and then issue {@link AsyncCollector#collect(List)}. + * Put all results in a {@link Collection} and then issue + * {@link AsyncCollector#collect(Collection)}. *

* If the result is NULL, it will cause task fail. If collecting empty result set is allowable and * should not cause task fail-over, then try to collect an empty list collection. * * @param result A list of results. */ - void collect(List result); + void collect(Collection result); /** * Set error diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/TimestampedCollector.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/TimestampedCollector.java index 56fa14d89c0d5..dc80e81108121 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/TimestampedCollector.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/TimestampedCollector.java @@ -63,6 +63,10 @@ public void setAbsoluteTimestamp(long timestamp) { reuse.setTimestamp(timestamp); } + public void eraseTimestamp() { + reuse.eraseTimestamp(); + } + @Override public void close() { output.close(); diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/AsyncWaitOperator.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/AsyncWaitOperator.java index 9166865e9600d..88fc833a82f9b 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/AsyncWaitOperator.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/AsyncWaitOperator.java @@ -19,110 +19,154 @@ package org.apache.flink.streaming.api.operators.async; import org.apache.flink.annotation.Internal; -import org.apache.flink.annotation.VisibleForTesting; import org.apache.flink.api.common.state.ListState; import org.apache.flink.api.common.state.ListStateDescriptor; import org.apache.flink.api.common.typeutils.TypeSerializer; import org.apache.flink.runtime.state.StateInitializationContext; import org.apache.flink.runtime.state.StateSnapshotContext; import org.apache.flink.streaming.api.datastream.AsyncDataStream; -import org.apache.flink.streaming.api.functions.async.collector.AsyncCollector; -import org.apache.flink.streaming.api.functions.async.buffer.AsyncCollectorBuffer; import org.apache.flink.streaming.api.functions.async.AsyncFunction; +import org.apache.flink.streaming.api.functions.async.collector.AsyncCollector; import org.apache.flink.streaming.api.graph.StreamConfig; import org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator; import org.apache.flink.streaming.api.operators.ChainingStrategy; import org.apache.flink.streaming.api.operators.OneInputStreamOperator; import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.api.operators.TimestampedCollector; +import org.apache.flink.streaming.api.operators.async.queue.StreamElementQueue; +import org.apache.flink.streaming.api.operators.async.queue.StreamElementQueueEntry; +import org.apache.flink.streaming.api.operators.async.queue.StreamRecordQueueEntry; +import org.apache.flink.streaming.api.operators.async.queue.WatermarkQueueEntry; +import org.apache.flink.streaming.api.operators.async.queue.OrderedStreamElementQueue; +import org.apache.flink.streaming.api.operators.async.queue.UnorderedStreamElementQueue; import org.apache.flink.streaming.api.watermark.Watermark; -import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; import org.apache.flink.streaming.runtime.streamrecord.StreamElement; import org.apache.flink.streaming.runtime.streamrecord.StreamElementSerializer; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.runtime.tasks.ProcessingTimeCallback; import org.apache.flink.streaming.runtime.tasks.StreamTask; +import org.apache.flink.util.ExceptionUtils; import org.apache.flink.util.Preconditions; -import java.util.Iterator; +import java.util.Collection; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; /** - * The {@link AsyncWaitOperator} will accept input {@link StreamElement} from previous operators, - * pass them into {@link AsyncFunction}, make a snapshot for the inputs in the {@link AsyncCollectorBuffer} - * while checkpointing, and restore the {@link AsyncCollectorBuffer} from previous state. + * The {@link AsyncWaitOperator} allows to asynchronously process incoming stream records. For that + * the operator creates an {@link AsyncCollector} which is passed to an {@link AsyncFunction}. + * Within the async function, the user can complete the async collector arbitrarily. Once the async + * collector has been completed, the result is emitted by the operator's emitter to downstream + * operators. *

- * Note that due to newly added working thread, named {@link AsyncCollectorBuffer.Emitter}, - * if {@link AsyncWaitOperator} is chained with other operators, {@link StreamTask} has to make sure that - * the the order to open operators in the operator chain should be from the tail operator to the head operator, - * and order to close operators in the operator chain should be from the head operator to the tail operator. + * The operator offers different output modes depending on the chosen + * {@link AsyncDataStream.OutputMode}. In order to give exactly once processing guarantees, the + * operator stores all currently in-flight {@link StreamElement} in it's operator state. Upon + * recovery the recorded set of stream elements is replayed. + *

+ * In case of chaining of this operator, it has to be made sure that the operators in the chain are + * opened tail to head. The reason for this is that an opened {@link AsyncWaitOperator} starts + * already emitting recovered {@link StreamElement} to downstream operators. * * @param Input type for the operator. * @param Output type for the operator. */ @Internal public class AsyncWaitOperator - extends AbstractUdfStreamOperator> - implements OneInputStreamOperator -{ + extends AbstractUdfStreamOperator> + implements OneInputStreamOperator, OperatorActions { private static final long serialVersionUID = 1L; - private final static String STATE_NAME = "_async_wait_operator_state_"; + private static final String STATE_NAME = "_async_wait_operator_state_"; - /** - * {@link TypeSerializer} for inputs while making snapshots. - */ + /** Capacity of the stream element queue */ + private final int capacity; + + /** Output mode for this operator */ + private final AsyncDataStream.OutputMode outputMode; + + /** Timeout for the async collectors */ + private final long timeout; + + private transient Object checkpointingLock; + + /** {@link TypeSerializer} for inputs while making snapshots. */ private transient StreamElementSerializer inStreamElementSerializer; - /** - * input stream elements from the state - */ + /** Recovered input stream elements */ private transient ListState recoveredStreamElements; - private transient TimestampedCollector collector; + /** Queue to store the currently in-flight stream elements into */ + private transient StreamElementQueue queue; - private transient AsyncCollectorBuffer buffer; + /** Pending stream element which could not yet added to the queue */ + private transient StreamElementQueueEntry pendingStreamElementQueueEntry; - /** - * Checkpoint lock from {@link StreamTask#lock} - */ - private transient Object checkpointLock; + private transient ExecutorService executor; + + /** Emitter for the completed stream element queue entries */ + private transient Emitter emitter; - private final int bufferSize; - private final AsyncDataStream.OutputMode mode; + /** Thread running the emitter */ + private transient Thread emitterThread; - public AsyncWaitOperator(AsyncFunction asyncFunction, int bufferSize, AsyncDataStream.OutputMode mode) { + public AsyncWaitOperator( + AsyncFunction asyncFunction, + int capacity, + AsyncDataStream.OutputMode outputMode) { super(asyncFunction); chainingStrategy = ChainingStrategy.ALWAYS; - Preconditions.checkArgument(bufferSize > 0, "The number of concurrent async operation should be greater than 0."); - this.bufferSize = bufferSize; + Preconditions.checkArgument(capacity > 0, "The number of concurrent async operation should be greater than 0."); + this.capacity = capacity; - this.mode = mode; + this.outputMode = Preconditions.checkNotNull(outputMode, "outputMode"); + + this.timeout = -1L; } @Override public void setup(StreamTask containingTask, StreamConfig config, Output> output) { super.setup(containingTask, config, output); - this.inStreamElementSerializer = - new StreamElementSerializer(this.getOperatorConfig().getTypeSerializerIn1(getUserCodeClassloader())); - - this.collector = new TimestampedCollector<>(output); - - this.checkpointLock = containingTask.getCheckpointLock(); - - this.buffer = new AsyncCollectorBuffer<>(bufferSize, mode, output, collector, this.checkpointLock, this); + this.checkpointingLock = getContainingTask().getCheckpointLock(); + + this.inStreamElementSerializer = new StreamElementSerializer<>( + getOperatorConfig().getTypeSerializerIn1(getUserCodeClassloader())); + + // create the operators executor for the complete operations of the queue entries + this.executor = Executors.newSingleThreadExecutor(); + + switch (outputMode) { + case ORDERED: + queue = new OrderedStreamElementQueue( + capacity, + executor, + this); + break; + case UNORDERED: + queue = new UnorderedStreamElementQueue( + capacity, + executor, + this); + break; + default: + throw new IllegalStateException("Unknown async mode: " + outputMode + '.'); + } } @Override public void open() throws Exception { super.open(); - // process stream elements from state, since the Emit thread will start soon as all elements from - // previous state are in the AsyncCollectorBuffer, we have to make sure that the order to open all - // operators in the operator chain should be from the tail operator to the head operator. - if (this.recoveredStreamElements != null) { - for (StreamElement element : this.recoveredStreamElements.get()) { + // process stream elements from state, since the Emit thread will start as soon as all + // elements from previous state are in the StreamElementQueue, we have to make sure that the + // order to open all operators in the operator chain proceeds from the tail operator to the + // head operator. + if (recoveredStreamElements != null) { + for (StreamElement element : recoveredStreamElements.get()) { if (element.isRecord()) { processElement(element.asRecord()); } @@ -133,30 +177,52 @@ else if (element.isLatencyMarker()) { processLatencyMarker(element.asLatencyMarker()); } else { - throw new Exception("Unknown record type: "+element.getClass()); + throw new IllegalStateException("Unknown record type " + element.getClass() + + " encountered while opening the operator."); } } - this.recoveredStreamElements = null; + recoveredStreamElements = null; } - buffer.startEmitterThread(); + // create the emitter + this.emitter = new Emitter<>(checkpointingLock, output, queue, this); + + // start the emitter thread + this.emitterThread = new Thread(emitter); + emitterThread.setDaemon(true); + emitterThread.start(); + } @Override public void processElement(StreamRecord element) throws Exception { - AsyncCollector collector = buffer.addStreamRecord(element); + final StreamRecordQueueEntry streamRecordBufferEntry = new StreamRecordQueueEntry<>(element); + + if (timeout > 0L) { + // register a timeout for this AsyncStreamRecordBufferEntry + long timeoutTimestamp = timeout + System.currentTimeMillis(); + + getProcessingTimeService().registerTimer( + timeoutTimestamp, + new ProcessingTimeCallback() { + @Override + public void onProcessingTime(long timestamp) throws Exception { + streamRecordBufferEntry.collect( + new TimeoutException("Async function call has timed out.")); + } + }); + } - userFunction.asyncInvoke(element.getValue(), collector); + addAsyncBufferEntry(streamRecordBufferEntry); + + userFunction.asyncInvoke(element.getValue(), streamRecordBufferEntry); } @Override public void processWatermark(Watermark mark) throws Exception { - buffer.addWatermark(mark); - } + WatermarkQueueEntry watermarkBufferEntry = new WatermarkQueueEntry(mark); - @Override - public void processLatencyMarker(LatencyMarker latencyMarker) throws Exception { - buffer.addLatencyMarker(latencyMarker); + addAsyncBufferEntry(watermarkBufferEntry); } @Override @@ -167,45 +233,155 @@ public void snapshotState(StateSnapshotContext context) throws Exception { getOperatorStateBackend().getOperatorState(new ListStateDescriptor<>(STATE_NAME, inStreamElementSerializer)); partitionableState.clear(); - Iterator iterator = buffer.getStreamElementsInBuffer(); - while (iterator.hasNext()) { - partitionableState.add(iterator.next()); + Collection> values = queue.values(); + + for (StreamElementQueueEntry value : values) { + partitionableState.add(value.getStreamElement()); + } + + // add the pending stream element queue entry if the stream element queue is currently full + if (pendingStreamElementQueueEntry != null) { + partitionableState.add(pendingStreamElementQueueEntry.getStreamElement()); } } @Override public void initializeState(StateInitializationContext context) throws Exception { - recoveredStreamElements = - context.getOperatorStateStore().getOperatorState(new ListStateDescriptor<>(STATE_NAME, inStreamElementSerializer)); + recoveredStreamElements = context + .getOperatorStateStore() + .getOperatorState(new ListStateDescriptor<>(STATE_NAME, inStreamElementSerializer)); } @Override public void close() throws Exception { try { - buffer.waitEmpty(); + assert(Thread.holdsLock(checkpointingLock)); + + while (!queue.isEmpty()) { + // wait for the emitter thread to output the remaining elements + // for that he needs the checkpointing lock and thus we have to free it + checkpointingLock.wait(); + } } finally { - // make sure Emitter thread exits and close user function - buffer.stopEmitterThread(); + Exception exception = null; + + try { + super.close(); + } catch (InterruptedException interrupted) { + exception = interrupted; + + Thread.currentThread().interrupt(); + } catch (Exception e) { + exception = e; + } + + try { + // terminate the emitter, the emitter thread and the executor + stopResources(true); + } catch (InterruptedException interrupted) { + exception = ExceptionUtils.firstOrSuppressed(interrupted, exception); + + Thread.currentThread().interrupt(); + } catch (Exception e) { + exception = ExceptionUtils.firstOrSuppressed(e, exception); + } - super.close(); + if (exception != null) { + LOG.warn("Errors occurred while closing the AsyncWaitOperator.", exception); + } } } @Override public void dispose() throws Exception { - super.dispose(); + Exception exception = null; + + try { + super.dispose(); + } catch (InterruptedException interrupted) { + exception = interrupted; + + Thread.currentThread().interrupt(); + } catch (Exception e) { + exception = e; + } + + try { + stopResources(false); + } catch (InterruptedException interrupted) { + exception = ExceptionUtils.firstOrSuppressed(interrupted, exception); + + Thread.currentThread().interrupt(); + } catch (Exception e) { + exception = ExceptionUtils.firstOrSuppressed(e, exception); + } + + if (exception != null) { + throw exception; + } + } - buffer.stopEmitterThread(); + /** + * Close the operator's resources. They include the emitter thread and the executor to run + * the queue's complete operation. + * + * @param waitForShutdown is true if the method should wait for the resources to be freed; + * otherwise false. + * @throws InterruptedException if current thread has been interrupted + */ + private void stopResources(boolean waitForShutdown) throws InterruptedException { + emitter.stop(); + emitterThread.interrupt(); + + executor.shutdown(); + + if (waitForShutdown) { + try { + if (!executor.awaitTermination(365L, TimeUnit.DAYS)) { + executor.shutdownNow(); + } + } catch (InterruptedException e) { + executor.shutdownNow(); + + Thread.currentThread().interrupt(); + } + + emitterThread.join(); + } else { + executor.shutdownNow(); + } } - public void sendLatencyMarker(LatencyMarker marker) throws Exception { - super.processLatencyMarker(marker); + /** + * Add the given stream element queue entry to the operator's stream element queue. This + * operation blocks until the element has been added. + *

+ * For that it tries to put the element into the queue and if not successful then it waits on + * the checkpointing lock. The checkpointing lock is also used by the {@link Emitter} to output + * elements. The emitter is also responsible for notifying this method if the queue has capacity + * left again, by calling notifyAll on the checkpointing lock. + * + * @param streamElementQueueEntry to add to the operator's queue + * @param Type of the stream element queue entry's result + * @throws InterruptedException if the current thread has been interrupted + */ + private void addAsyncBufferEntry(StreamElementQueueEntry streamElementQueueEntry) throws InterruptedException { + assert(Thread.holdsLock(checkpointingLock)); + + pendingStreamElementQueueEntry = streamElementQueueEntry; + + while (!queue.tryPut(streamElementQueueEntry)) { + // we wait for the emitter to notify us if the queue has space left again + checkpointingLock.wait(); + } + + pendingStreamElementQueueEntry = null; } - @VisibleForTesting - public AsyncCollectorBuffer getBuffer() { - return buffer; + @Override + public void failOperator(Throwable throwable) { + getContainingTask().getEnvironment().failExternally(throwable); } } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/Emitter.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/Emitter.java new file mode 100644 index 0000000000000..4b22aaa262762 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/Emitter.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async; + +import org.apache.flink.streaming.api.operators.Output; +import org.apache.flink.streaming.api.operators.TimestampedCollector; +import org.apache.flink.streaming.api.operators.async.queue.AsyncCollectionResult; +import org.apache.flink.streaming.api.operators.async.queue.StreamElementQueue; +import org.apache.flink.streaming.api.operators.async.queue.AsyncResult; +import org.apache.flink.streaming.api.operators.async.queue.AsyncWatermarkResult; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.util.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Collection; + +/** + * Runnable responsible for consuming elements from the given queue and outputting them to the + * given output/timestampedCollector. + * + * @param Type of the output elements + */ +public class Emitter implements Runnable { + + private static final Logger LOG = LoggerFactory.getLogger(Emitter.class); + + /** Lock to hold before outputting */ + private final Object checkpointLock; + + /** Output for the watermark elements */ + private final Output> output; + + /** Queue to consume the async results from */ + private final StreamElementQueue streamElementQueue; + + private final OperatorActions operatorActions; + + /** Output for stream records */ + private final TimestampedCollector timestampedCollector; + + private volatile boolean running; + + public Emitter( + final Object checkpointLock, + final Output> output, + final StreamElementQueue streamElementQueue, + final OperatorActions operatorActions) { + + this.checkpointLock = Preconditions.checkNotNull(checkpointLock, "checkpointLock"); + this.output = Preconditions.checkNotNull(output, "output"); + this.streamElementQueue = Preconditions.checkNotNull(streamElementQueue, "asyncCollectorBuffer"); + this.operatorActions = Preconditions.checkNotNull(operatorActions, "operatorActions"); + + this.timestampedCollector = new TimestampedCollector<>(this.output); + this.running = true; + } + + @Override + public void run() { + try { + while (running) { + LOG.debug("Wait for next completed async stream element result."); + AsyncResult streamElementEntry = streamElementQueue.peekBlockingly(); + + output(streamElementEntry); + } + } catch (InterruptedException e) { + if (running) { + operatorActions.failOperator(e); + } else { + // Thread got interrupted which means that it should shut down + LOG.debug("Emitter thread got interrupted. This indicates that the emitter should " + + "shut down."); + } + } catch (Throwable t) { + operatorActions.failOperator(new Exception("AsyncWaitOperator's emitter caught an " + + "unexpected throwable.", t)); + } + } + + private void output(AsyncResult asyncResult) throws InterruptedException { + if (asyncResult.isWatermark()) { + synchronized (checkpointLock) { + // remove the peeked element from the async collector buffer so that it is no longer + // checkpointed + streamElementQueue.poll(); + + // notify the main thread that there is again space left in the async collector + // buffer + checkpointLock.notifyAll(); + + AsyncWatermarkResult asyncWatermarkResult = asyncResult.asWatermark(); + + LOG.debug("Output async watermark."); + output.emitWatermark(asyncWatermarkResult.getWatermark()); + } + } else { + AsyncCollectionResult streamRecordResult = asyncResult.asResultCollection(); + + if (streamRecordResult.hasTimestamp()) { + timestampedCollector.setAbsoluteTimestamp(streamRecordResult.getTimestamp()); + } else { + timestampedCollector.eraseTimestamp(); + } + + synchronized (checkpointLock) { + // remove the peeked element from the async collector buffer so that it is no longer + // checkpointed + streamElementQueue.poll(); + + // notify the main thread that there is again space left in the async collector + // buffer + checkpointLock.notifyAll(); + + LOG.debug("Output async stream element collection result."); + + try { + Collection resultCollection = streamRecordResult.get(); + + for (OUT result : resultCollection) { + timestampedCollector.collect(result); + } + } catch (Exception e) { + operatorActions.failOperator( + new Exception("An async function call terminated with an exception. " + + "Failing the AsyncWaitOperator.", e)); + } + } + } + } + + public void stop() { + running = false; + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/LatencyMarkerEntry.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/OperatorActions.java similarity index 67% rename from flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/LatencyMarkerEntry.java rename to flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/OperatorActions.java index 1705c2d3d5e90..5a2e43c0b3de5 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/LatencyMarkerEntry.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/OperatorActions.java @@ -16,21 +16,19 @@ * limitations under the License. */ -package org.apache.flink.streaming.api.functions.async.buffer; +package org.apache.flink.streaming.api.operators.async; -import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.api.operators.StreamOperator; /** - * {@link AsyncCollectorBuffer} entry for {@link LatencyMarker} - * + * Interface for {@link StreamOperator} actions. */ -public class LatencyMarkerEntry extends AbstractBufferEntry { - public LatencyMarkerEntry(LatencyMarker marker) { - super(marker); - } +public interface OperatorActions { - @Override - public boolean isDone() { - return true; - } + /** + * Fail the respective stream operator with the given throwable. + * + * @param throwable to fail the stream operator with + */ + void failOperator(Throwable throwable); } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/AsyncCollectionResult.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/AsyncCollectionResult.java new file mode 100644 index 0000000000000..8088bf0e64504 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/AsyncCollectionResult.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async.queue; + +import java.util.Collection; + +/** + * {@link AsyncResult} sub class for asynchronous result collections. + * + * @param Type of the collection elements. + */ +public interface AsyncCollectionResult extends AsyncResult { + + boolean hasTimestamp(); + + long getTimestamp(); + + /** + * Return the asynchronous result collection. + * + * @return the asynchronous result collection + * @throws Exception if the asynchronous result collection could not be completed + */ + Collection get() throws Exception; +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/AsyncResult.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/AsyncResult.java new file mode 100644 index 0000000000000..1a99928150176 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/AsyncResult.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async.queue; + +import org.apache.flink.streaming.api.functions.async.AsyncFunction; +import org.apache.flink.streaming.api.watermark.Watermark; + +/** + * Asynchronous result returned by the {@link StreamElementQueue}. The asynchronous result can + * either be a {@link Watermark} or a collection of new output elements produced by the + * {@link AsyncFunction}. + */ +public interface AsyncResult { + + /** + * True if the async result is a {@link Watermark}; otherwise false. + * + * @return True if the async result is a {@link Watermark}; otherwise false. + */ + boolean isWatermark(); + + /** + * True fi the async result is a collection of output elements; otherwise false. + * + * @return True if the async reuslt is a collection of output elements; otherwise false + */ + boolean isResultCollection(); + + /** + * Return this async result as a async watermark result. + * + * @return this result as a {@link AsyncWatermarkResult}. + */ + AsyncWatermarkResult asWatermark(); + + /** + * Return this async result as a async result collection. + * + * @param Type of the result collection's elements + * @return this result as a {@link AsyncCollectionResult}. + */ + AsyncCollectionResult asResultCollection(); +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/WatermarkEntry.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/AsyncWatermarkResult.java similarity index 73% rename from flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/WatermarkEntry.java rename to flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/AsyncWatermarkResult.java index 8883a2d6073eb..c19b520d2cba4 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/functions/async/buffer/WatermarkEntry.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/AsyncWatermarkResult.java @@ -16,21 +16,18 @@ * limitations under the License. */ -package org.apache.flink.streaming.api.functions.async.buffer; +package org.apache.flink.streaming.api.operators.async.queue; import org.apache.flink.streaming.api.watermark.Watermark; /** - * {@link AsyncCollectorBuffer} entry for {@link Watermark} - * + * {@link AsyncResult} subclass for asynchronous result {@link Watermark}. */ -public class WatermarkEntry extends AbstractBufferEntry { - public WatermarkEntry(Watermark watermark) { - super(watermark); - } - - @Override - public boolean isDone() { - return true; - } +public interface AsyncWatermarkResult extends AsyncResult { + /** + * Get the resulting watermark. + * + * @return the asynchronous result watermark + */ + Watermark getWatermark(); } diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/OrderedStreamElementQueue.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/OrderedStreamElementQueue.java new file mode 100644 index 0000000000000..2bbcb6c1b826d --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/OrderedStreamElementQueue.java @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async.queue; + +import org.apache.flink.runtime.concurrent.AcceptFunction; +import org.apache.flink.streaming.api.operators.async.OperatorActions; +import org.apache.flink.util.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayDeque; +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.Executor; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Ordered {@link StreamElementQueue} implementation. The ordered stream element queue emits + * asynchronous results in the order in which the {@link StreamElementQueueEntry} have been added + * to the queue. Thus, even if the completion order can be arbitrary, the output order strictly + * follows the insertion order (element cannot overtake each other). + */ +public class OrderedStreamElementQueue implements StreamElementQueue { + + private static final Logger LOG = LoggerFactory.getLogger(OrderedStreamElementQueue.class); + + /** Capacity of this queue */ + private final int capacity; + + /** Executor to run the onCompletion callback */ + private final Executor executor; + + /** Operator actions to signal a failure to the operator */ + private final OperatorActions operatorActions; + + /** Lock and conditions for the blocking queue */ + private final ReentrantLock lock; + private final Condition notFull; + private final Condition headIsCompleted; + + /** Queue for the inserted StreamElementQueueEntries */ + private final ArrayDeque> queue; + + public OrderedStreamElementQueue( + int capacity, + Executor executor, + OperatorActions operatorActions) { + + Preconditions.checkArgument(capacity > 0, "The capacity must be larger than 0."); + this.capacity = capacity; + + this.executor = Preconditions.checkNotNull(executor, "executor"); + + this.operatorActions = Preconditions.checkNotNull(operatorActions, "operatorActions"); + + this.lock = new ReentrantLock(false); + this.headIsCompleted = lock.newCondition(); + this.notFull = lock.newCondition(); + + this.queue = new ArrayDeque<>(capacity); + } + + @Override + public AsyncResult peekBlockingly() throws InterruptedException { + lock.lockInterruptibly(); + + try { + while (queue.isEmpty() || !queue.peek().isDone()) { + headIsCompleted.await(); + } + + LOG.debug("Peeked head element from ordered stream element queue with filling degree " + + "({}/{}).", queue.size(), capacity); + + return queue.peek(); + } finally { + lock.unlock(); + } + } + + @Override + public AsyncResult poll() throws InterruptedException { + lock.lockInterruptibly(); + + try { + while (queue.isEmpty() || !queue.peek().isDone()) { + headIsCompleted.await(); + } + + notFull.signalAll(); + + LOG.debug("Polled head element from ordered stream element queue. New filling degree " + + "({}/{}).", queue.size() - 1, capacity); + + return queue.poll(); + } finally { + lock.unlock(); + } + } + + @Override + public Collection> values() throws InterruptedException { + lock.lockInterruptibly(); + + try { + StreamElementQueueEntry[] array = new StreamElementQueueEntry[queue.size()]; + + array = queue.toArray(array); + + return Arrays.asList(array); + } finally { + lock.unlock(); + } + } + + @Override + public boolean isEmpty() { + return queue.isEmpty(); + } + + @Override + public int size() { + return queue.size(); + } + + @Override + public void put(StreamElementQueueEntry streamElementQueueEntry) throws InterruptedException { + lock.lockInterruptibly(); + + try { + while (queue.size() >= capacity) { + notFull.await(); + } + + addEntry(streamElementQueueEntry); + } finally { + lock.unlock(); + } + } + + @Override + public boolean tryPut(StreamElementQueueEntry streamElementQueueEntry) throws InterruptedException { + lock.lockInterruptibly(); + + try { + if (queue.size() < capacity) { + addEntry(streamElementQueueEntry); + + LOG.debug("Put element into ordered stream element queue. New filling degree " + + "({}/{}).", queue.size(), capacity); + + return true; + } else { + LOG.debug("Failed to put element into ordered stream element queue because it " + + "was full ({}/{}).", queue.size(), capacity); + + return false; + } + } finally { + lock.unlock(); + } + } + + /** + * Add the given {@link StreamElementQueueEntry} to the queue. Additionally, this method + * registers a onComplete callback which is triggered once the given queue entry is completed. + * + * @param streamElementQueueEntry to be inserted + * @param Type of the stream element queue entry's result + */ + private void addEntry(StreamElementQueueEntry streamElementQueueEntry) { + assert(lock.isHeldByCurrentThread()); + + queue.addLast(streamElementQueueEntry); + + streamElementQueueEntry.onComplete(new AcceptFunction>() { + @Override + public void accept(StreamElementQueueEntry value) { + try { + onCompleteHandler(value); + } catch (InterruptedException e) { + // we got interrupted. This indicates a shutdown of the executor + LOG.debug("AsyncBufferEntry could not be properly completed because the " + + "executor thread has been interrupted.", e); + } catch (Throwable t) { + operatorActions.failOperator(new Exception("Could not complete the " + + "stream element queue entry: " + value + '.', t)); + } + } + }, executor); + } + + /** + * Check if the completed {@link StreamElementQueueEntry} is the current head. If this is the + * case, then notify the consumer thread about a new consumable entry. + * + * @param streamElementQueueEntry which has been completed + * @throws InterruptedException if the current thread is interrupted + */ + private void onCompleteHandler(StreamElementQueueEntry streamElementQueueEntry) throws InterruptedException { + lock.lockInterruptibly(); + + try { + if (!queue.isEmpty() && queue.peek().isDone()) { + LOG.debug("Signal ordered stream element queue has completed head element."); + headIsCompleted.signalAll(); + } + } finally { + lock.unlock(); + } + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/StreamElementQueue.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/StreamElementQueue.java new file mode 100644 index 0000000000000..1a2c4a859b291 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/StreamElementQueue.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async.queue; + +import org.apache.flink.streaming.api.operators.async.AsyncWaitOperator; + +import java.util.Collection; + +/** + * Interface for blocking stream element queues for the {@link AsyncWaitOperator}. + */ +public interface StreamElementQueue { + + /** + * Put the given element in the queue if capacity is left. If not, then block until this is + * the case. + * + * @param streamElementQueueEntry to be put into the queue + * @param Type of the entries future value + * @throws InterruptedException if the calling thread has been interrupted while waiting to + * insert the given element + */ + void put(StreamElementQueueEntry streamElementQueueEntry) throws InterruptedException; + + /** + * Try to put the given element in the queue. This operation succeeds if the queue has capacity + * left and fails if the queue is full. + * + * @param streamElementQueueEntry to be inserted + * @param Type of the entries future value + * @return True if the entry could be inserted; otherwise false + * @throws InterruptedException if the calling thread has been interrupted while waiting to + * insert the given element + */ + boolean tryPut(StreamElementQueueEntry streamElementQueueEntry) throws InterruptedException; + + /** + * Peek at the head of the queue and return the first completed {@link AsyncResult}. This + * operation is a blocking operation and only returns once a completed async result has been + * found. + * + * @return Completed {@link AsyncResult} + * @throws InterruptedException if the current thread has been interrupted while waiting for a + * completed async result. + */ + AsyncResult peekBlockingly() throws InterruptedException; + + /** + * Poll the first completed {@link AsyncResult} from the head of this queue. This operation is + * blocking and only returns once a completed async result has been found. + * + * @return Completed {@link AsyncResult} which has been removed from the queue + * @throws InterruptedException if the current thread has been interrupted while waiting for a + * completed async result. + */ + AsyncResult poll() throws InterruptedException; + + /** + * Return the collection of {@link StreamElementQueueEntry} currently contained in this queue. + * + * @return Collection of currently contained {@link StreamElementQueueEntry}. + * @throws InterruptedException if the current thread has been interrupted while retrieving the + * stream element queue entries of this queue. + */ + Collection> values() throws InterruptedException; + + /** + * True if the queue is empty; otherwise false. + * + * @return True if the queue is empty; otherwise false. + */ + boolean isEmpty(); + + /** + * Return the size of the queue. + * + * @return The number of elements contained in this queue. + */ + int size(); +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/StreamElementQueueEntry.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/StreamElementQueueEntry.java new file mode 100644 index 0000000000000..06ebf3c30ce76 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/StreamElementQueueEntry.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async.queue; + +import org.apache.flink.runtime.concurrent.AcceptFunction; +import org.apache.flink.runtime.concurrent.Future; +import org.apache.flink.streaming.runtime.streamrecord.StreamElement; +import org.apache.flink.util.Preconditions; + +import java.util.concurrent.Executor; + +/** + * Entry class for the {@link StreamElementQueue}. The stream element queue entry stores the + * {@link StreamElement} for which the stream element queue entry has been instantiated. + * Furthermore, it allows to register callbacks for when the queue entry is completed. + * + * @param Type of the result + */ +public abstract class StreamElementQueueEntry implements AsyncResult { + + /** Stream element */ + private final StreamElement streamElement; + + public StreamElementQueueEntry(StreamElement streamElement) { + this.streamElement = Preconditions.checkNotNull(streamElement); + } + + public StreamElement getStreamElement() { + return streamElement; + } + + /** + * True if the stream element queue entry has been completed; otherwise false. + * + * @return True if the stream element queue entry has been completed; otherwise false. + */ + public boolean isDone() { + return getFuture().isDone(); + } + + /** + * Register the given complete function to be called once this queue entry has been completed. + * + * @param completeFunction to call when the queue entry has been completed + * @param executor to run the complete function + */ + public void onComplete( + final AcceptFunction> completeFunction, + Executor executor) { + final StreamElementQueueEntry thisReference = this; + + getFuture().thenAcceptAsync(new AcceptFunction() { + @Override + public void accept(T value) { + completeFunction.accept(thisReference); + } + }, executor); + } + + protected abstract Future getFuture(); + + @Override + public final boolean isWatermark() { + return AsyncWatermarkResult.class.isAssignableFrom(getClass()); + } + + @Override + public final boolean isResultCollection() { + return AsyncCollectionResult.class.isAssignableFrom(getClass()); + } + + @Override + public final AsyncWatermarkResult asWatermark() { + return (AsyncWatermarkResult) this; + } + + @Override + public final AsyncCollectionResult asResultCollection() { + return (AsyncCollectionResult) this; + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/StreamRecordQueueEntry.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/StreamRecordQueueEntry.java new file mode 100644 index 0000000000000..f0e707e4bfbda --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/StreamRecordQueueEntry.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async.queue; + +import org.apache.flink.runtime.concurrent.CompletableFuture; +import org.apache.flink.runtime.concurrent.Future; +import org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture; +import org.apache.flink.streaming.api.functions.async.AsyncFunction; +import org.apache.flink.streaming.api.functions.async.collector.AsyncCollector; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; + +import java.util.Collection; + +/** + * {@link StreamElementQueueEntry} implementation for {@link StreamRecord}. This class also acts + * as the {@link AsyncCollector} implementation which is given to the {@link AsyncFunction}. The + * async function completes this class with a collection of results. + * + * @param Type of the asynchronous collection result + */ +public class StreamRecordQueueEntry extends StreamElementQueueEntry> + implements AsyncCollectionResult, AsyncCollector { + + /** Timestamp information */ + private final boolean hasTimestamp; + private final long timestamp; + + /** Future containing the collection result */ + private final CompletableFuture> resultFuture; + + public StreamRecordQueueEntry(StreamRecord streamRecord) { + super(streamRecord); + + hasTimestamp = streamRecord.hasTimestamp(); + timestamp = streamRecord.getTimestamp(); + + resultFuture = new FlinkCompletableFuture<>(); + } + + @Override + public boolean hasTimestamp() { + return hasTimestamp; + } + + @Override + public long getTimestamp() { + return timestamp; + } + + @Override + public Collection get() throws Exception { + return resultFuture.get(); + } + + @Override + protected Future> getFuture() { + return resultFuture; + } + + @Override + public void collect(Collection result) { + resultFuture.complete(result); + } + + @Override + public void collect(Throwable error) { + resultFuture.completeExceptionally(error); + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/UnorderedStreamElementQueue.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/UnorderedStreamElementQueue.java new file mode 100644 index 0000000000000..603d8cc60c2a5 --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/UnorderedStreamElementQueue.java @@ -0,0 +1,304 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async.queue; + +import org.apache.flink.runtime.concurrent.AcceptFunction; +import org.apache.flink.streaming.api.operators.async.OperatorActions; +import org.apache.flink.util.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayDeque; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; +import java.util.concurrent.Executor; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; + +/** + * Unordered implementation of the {@link StreamElementQueue}. The unordered stream element queue + * emits asynchronous results as soon as they are completed. Additionally it maintains the + * watermark-stream record order. This means that no stream record can be overtaken by a watermark + * and no watermark can overtake a stream record. However, stream records falling in the same + * segment between two watermarks can overtake each other (their emission order is not guaranteed). + */ +public class UnorderedStreamElementQueue implements StreamElementQueue { + + private static final Logger LOG = LoggerFactory.getLogger(UnorderedStreamElementQueue.class); + + /** Capacity of this queue */ + private final int capacity; + + /** Executor to run the onComplete callbacks */ + private final Executor executor; + + /** OperatorActions to signal the owning operator a failure */ + private final OperatorActions operatorActions; + + /** Queue of uncompleted stream element queue entries segmented by watermarks */ + private final ArrayDeque>> uncompletedQueue; + + /** Queue of completed stream element queue entries */ + private final ArrayDeque> completedQueue; + + /** First (chronologically oldest) uncompleted set of stream element queue entries */ + private Set> firstSet; + + // Last (chronologically youngest) uncompleted set of stream element queue entries. New + // stream element queue entries are inserted into this set. + private Set> lastSet; + private volatile int numberEntries; + + /** Locks and conditions for the blocking queue */ + private final ReentrantLock lock; + private final Condition notFull; + private final Condition hasCompletedEntries; + + public UnorderedStreamElementQueue( + int capacity, + Executor executor, + OperatorActions operatorActions) { + + Preconditions.checkArgument(capacity > 0, "The capacity must be larger than 0."); + this.capacity = capacity; + + this.executor = Preconditions.checkNotNull(executor, "executor"); + + this.operatorActions = Preconditions.checkNotNull(operatorActions, "operatorActions"); + + this.uncompletedQueue = new ArrayDeque<>(capacity); + this.completedQueue = new ArrayDeque<>(capacity); + + this.firstSet = new HashSet<>(capacity); + this.lastSet = firstSet; + + this.numberEntries = 0; + + this.lock = new ReentrantLock(); + this.notFull = lock.newCondition(); + this.hasCompletedEntries = lock.newCondition(); + } + + @Override + public void put(StreamElementQueueEntry streamElementQueueEntry) throws InterruptedException { + lock.lockInterruptibly(); + + try { + while (numberEntries >= capacity) { + notFull.await(); + } + + addEntry(streamElementQueueEntry); + } finally { + lock.unlock(); + } + } + + @Override + public boolean tryPut(StreamElementQueueEntry streamElementQueueEntry) throws InterruptedException { + lock.lockInterruptibly(); + + try { + if (numberEntries < capacity) { + addEntry(streamElementQueueEntry); + + LOG.debug("Put element into ordered stream element queue. New filling degree " + + "({}/{}).", numberEntries, capacity); + + return true; + } else { + LOG.debug("Failed to put element into ordered stream element queue because it " + + "was full ({}/{}).", numberEntries, capacity); + + return false; + } + } finally { + lock.unlock(); + } + } + + @Override + public AsyncResult peekBlockingly() throws InterruptedException { + lock.lockInterruptibly(); + + try { + while (completedQueue.isEmpty()) { + hasCompletedEntries.await(); + } + + LOG.debug("Peeked head element from ordered stream element queue with filling degree " + + "({}/{}).", numberEntries, capacity); + + return completedQueue.peek(); + } finally { + lock.unlock(); + } + } + + @Override + public AsyncResult poll() throws InterruptedException { + lock.lockInterruptibly(); + + try { + while (completedQueue.isEmpty()) { + hasCompletedEntries.await(); + } + + numberEntries--; + notFull.signalAll(); + + LOG.debug("Polled element from unordered stream element queue. New filling degree " + + "({}/{}).", numberEntries, capacity); + + return completedQueue.poll(); + } finally { + lock.unlock(); + } + } + + @Override + public Collection> values() throws InterruptedException { + lock.lockInterruptibly(); + + try { + StreamElementQueueEntry[] array = new StreamElementQueueEntry[numberEntries]; + + array = completedQueue.toArray(array); + + int counter = completedQueue.size(); + + for (StreamElementQueueEntry entry: firstSet) { + array[counter] = entry; + counter++; + } + + for (Set> asyncBufferEntries : uncompletedQueue) { + + for (StreamElementQueueEntry streamElementQueueEntry : asyncBufferEntries) { + array[counter] = streamElementQueueEntry; + counter++; + } + } + + return Arrays.asList(array); + } finally { + lock.unlock(); + } + } + + @Override + public boolean isEmpty() { + return numberEntries == 0; + } + + @Override + public int size() { + return numberEntries; + } + + /** + * Callback for onComplete events for the given stream element queue entry. Whenever a queue + * entry is completed, it is checked whether this entry belogns to the first set. If this is the + * case, then the element is added to the completed entries queue from where it can be consumed. + * If the first set becomes empty, then the next set is polled from the uncompleted entries + * queue. Completed entries from this new set are then added to the completed entries queue. + * + * @param streamElementQueueEntry which has been completed + * @throws InterruptedException if the current thread has been interrupted while performing the + * on complete callback. + */ + public void onCompleteHandler(StreamElementQueueEntry streamElementQueueEntry) throws InterruptedException { + lock.lockInterruptibly(); + + try { + if (firstSet.remove(streamElementQueueEntry)) { + completedQueue.offer(streamElementQueueEntry); + + while (firstSet.isEmpty() && firstSet != lastSet) { + firstSet = uncompletedQueue.poll(); + + Iterator> it = firstSet.iterator(); + + while (it.hasNext()) { + StreamElementQueueEntry bufferEntry = it.next(); + + if (bufferEntry.isDone()) { + completedQueue.offer(bufferEntry); + it.remove(); + } + } + } + + LOG.debug("Signal unordered stream element queue has completed entries."); + hasCompletedEntries.signalAll(); + } + } finally { + lock.unlock(); + } + } + + /** + * Add the given stream element queue entry to the current last set if it is not a watermark. + * If it is a watermark, then stop adding to the current last set, insert the watermark into its + * own set and add a new last set. + * + * @param streamElementQueueEntry to be inserted + * @param Type of the stream element queue entry's result + */ + private void addEntry(StreamElementQueueEntry streamElementQueueEntry) { + assert(lock.isHeldByCurrentThread()); + + if (streamElementQueueEntry.isWatermark()) { + lastSet = new HashSet<>(capacity); + + if (firstSet.isEmpty()) { + firstSet.add(streamElementQueueEntry); + } else { + Set> watermarkSet = new HashSet<>(1); + watermarkSet.add(streamElementQueueEntry); + uncompletedQueue.offer(watermarkSet); + } + uncompletedQueue.offer(lastSet); + } else { + lastSet.add(streamElementQueueEntry); + } + + streamElementQueueEntry.onComplete(new AcceptFunction>() { + @Override + public void accept(StreamElementQueueEntry value) { + try { + onCompleteHandler(value); + } catch (InterruptedException e) { + // The accept executor thread got interrupted. This is probably cause by + // the shutdown of the executor. + LOG.debug("AsyncBufferEntry could not be properly completed because the " + + "executor thread has been interrupted.", e); + } catch (Throwable t) { + operatorActions.failOperator(new Exception("Could not complete the " + + "stream element queue entry: " + value + '.', t)); + } + } + }, executor); + + numberEntries++; + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/WatermarkQueueEntry.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/WatermarkQueueEntry.java new file mode 100644 index 0000000000000..6fe4f440e003d --- /dev/null +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/api/operators/async/queue/WatermarkQueueEntry.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async.queue; + +import org.apache.flink.runtime.concurrent.Future; +import org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture; +import org.apache.flink.streaming.api.watermark.Watermark; + +/** + * {@link StreamElementQueueEntry} implementation for the {@link Watermark}. + */ +public class WatermarkQueueEntry extends StreamElementQueueEntry implements AsyncWatermarkResult { + + private final Future future; + + public WatermarkQueueEntry(Watermark watermark) { + super(watermark); + + this.future = FlinkCompletableFuture.completed(watermark); + } + + @Override + public Watermark getWatermark() { + return (Watermark) getStreamElement(); + } + + @Override + protected Future getFuture() { + return future; + } +} diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/OperatorChain.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/OperatorChain.java index 680cc291608af..7771064e2e2ad 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/OperatorChain.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/OperatorChain.java @@ -52,7 +52,6 @@ import java.util.Map; import java.util.Random; - /** * The {@code OperatorChain} contains all operators that are executed as one chain within a single * {@link StreamTask}. diff --git a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/StreamTask.java b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/StreamTask.java index 0fb22b887bd12..bd9215ab093ff 100644 --- a/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/StreamTask.java +++ b/flink-streaming-java/src/main/java/org/apache/flink/streaming/runtime/tasks/StreamTask.java @@ -326,11 +326,6 @@ public Long getValue() { LOG.error("Could not shut down async checkpoint threads", t); } - // release the output resources. this method should never fail. - if (operatorChain != null) { - operatorChain.releaseOutputs(); - } - // we must! perform this cleanup try { cleanup(); @@ -344,6 +339,11 @@ public Long getValue() { if (!disposed) { disposeAllOperators(); } + + // release the output resources. this method should never fail. + if (operatorChain != null) { + operatorChain.releaseOutputs(); + } } } diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/functions/async/RichAsyncFunctionTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/functions/async/RichAsyncFunctionTest.java index b8788c6a94daa..12ac69352382e 100644 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/functions/async/RichAsyncFunctionTest.java +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/functions/async/RichAsyncFunctionTest.java @@ -18,147 +18,252 @@ package org.apache.flink.streaming.api.functions.async; -import org.apache.flink.api.common.accumulators.IntCounter; +import org.apache.flink.api.common.ExecutionConfig; +import org.apache.flink.api.common.accumulators.Accumulator; +import org.apache.flink.api.common.functions.BroadcastVariableInitializer; import org.apache.flink.api.common.functions.IterationRuntimeContext; +import org.apache.flink.api.common.functions.ReduceFunction; import org.apache.flink.api.common.functions.RuntimeContext; +import org.apache.flink.api.common.state.ListStateDescriptor; +import org.apache.flink.api.common.state.ReducingStateDescriptor; import org.apache.flink.api.common.state.ValueStateDescriptor; +import org.apache.flink.metrics.MetricGroup; import org.apache.flink.streaming.api.functions.async.collector.AsyncCollector; -import org.junit.Assert; import org.junit.Test; -import static org.mockito.Matchers.anyString; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; /** - * Test case for {@link RichAsyncFunction} + * Test cases for {@link RichAsyncFunction} */ public class RichAsyncFunctionTest { - private RichAsyncFunction initFunction() { - RichAsyncFunction function = new RichAsyncFunction() { + /** + * Test the set of iteration runtime context methods in the context of a + * {@link RichAsyncFunction}. + */ + @Test + public void testIterationRuntimeContext() throws Exception { + RichAsyncFunction function = new RichAsyncFunction() { + private static final long serialVersionUID = -2023923961609455894L; + @Override - public void asyncInvoke(String input, AsyncCollector collector) throws Exception { - getRuntimeContext().getState(mock(ValueStateDescriptor.class)); + public void asyncInvoke(Integer input, AsyncCollector collector) throws Exception { + // no op } }; - return function; + int superstepNumber = 42; + + IterationRuntimeContext mockedIterationRuntimeContext = mock(IterationRuntimeContext.class); + when(mockedIterationRuntimeContext.getSuperstepNumber()).thenReturn(superstepNumber); + function.setRuntimeContext(mockedIterationRuntimeContext); + + IterationRuntimeContext iterationRuntimeContext = function.getIterationRuntimeContext(); + + assertEquals(superstepNumber, iterationRuntimeContext.getSuperstepNumber()); + + try { + iterationRuntimeContext.getIterationAggregator("foobar"); + fail("Expected getIterationAggregator to fail with unsupported operation exception"); + } catch (UnsupportedOperationException e) { + // expected + } + + try { + iterationRuntimeContext.getPreviousIterationAggregate("foobar"); + fail("Expected getPreviousIterationAggregator to fail with unsupported operation exception"); + } catch (UnsupportedOperationException e) { + // expected + } } + /** + * Test the set of runtime context methods in the context of a {@link RichAsyncFunction}. + */ @Test - public void testIterationRuntimeContext() throws Exception { - // test runtime context is not set - RichAsyncFunction function = new RichAsyncFunction() { + public void testRuntimeContext() throws Exception { + RichAsyncFunction function = new RichAsyncFunction() { + private static final long serialVersionUID = 1707630162838967972L; + @Override - public void asyncInvoke(String input, AsyncCollector collector) throws Exception { - getIterationRuntimeContext().getIterationAggregator("test"); + public void asyncInvoke(Integer input, AsyncCollector collector) throws Exception { + // no op } }; + final String taskName = "foobarTask"; + final MetricGroup metricGroup = mock(MetricGroup.class); + final int numberOfParallelSubtasks = 42; + final int indexOfSubtask = 43; + final int attemptNumber = 1337; + final String taskNameWithSubtask = "barfoo"; + final ExecutionConfig executionConfig = mock(ExecutionConfig.class); + final ClassLoader userCodeClassLoader = mock(ClassLoader.class); + + RuntimeContext mockedRuntimeContext = mock(RuntimeContext.class); + + when(mockedRuntimeContext.getTaskName()).thenReturn(taskName); + when(mockedRuntimeContext.getMetricGroup()).thenReturn(metricGroup); + when(mockedRuntimeContext.getNumberOfParallelSubtasks()).thenReturn(numberOfParallelSubtasks); + when(mockedRuntimeContext.getIndexOfThisSubtask()).thenReturn(indexOfSubtask); + when(mockedRuntimeContext.getAttemptNumber()).thenReturn(attemptNumber); + when(mockedRuntimeContext.getTaskNameWithSubtasks()).thenReturn(taskNameWithSubtask); + when(mockedRuntimeContext.getExecutionConfig()).thenReturn(executionConfig); + when(mockedRuntimeContext.getUserCodeClassLoader()).thenReturn(userCodeClassLoader); + + function.setRuntimeContext(mockedRuntimeContext); + + RuntimeContext runtimeContext = function.getRuntimeContext(); + + assertEquals(taskName, runtimeContext.getTaskName()); + assertEquals(metricGroup, runtimeContext.getMetricGroup()); + assertEquals(numberOfParallelSubtasks, runtimeContext.getNumberOfParallelSubtasks()); + assertEquals(indexOfSubtask, runtimeContext.getIndexOfThisSubtask()); + assertEquals(attemptNumber, runtimeContext.getAttemptNumber()); + assertEquals(taskNameWithSubtask, runtimeContext.getTaskNameWithSubtasks()); + assertEquals(executionConfig, runtimeContext.getExecutionConfig()); + assertEquals(userCodeClassLoader, runtimeContext.getUserCodeClassLoader()); + try { - function.asyncInvoke("test", mock(AsyncCollector.class)); - } - catch (Exception e) { - Assert.assertEquals("The runtime context has not been initialized.", e.getMessage()); + runtimeContext.getDistributedCache(); + fail("Expected getDistributedCached to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected } - // test get agg from iteration runtime context - function.setRuntimeContext(mock(IterationRuntimeContext.class)); - try { - function.asyncInvoke("test", mock(AsyncCollector.class)); + runtimeContext.getState(new ValueStateDescriptor<>("foobar", Integer.class, 42)); + fail("Expected getState to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected } - catch (Exception e) { - Assert.assertEquals("Get iteration aggregator is not supported in rich async function", e.getMessage()); + + try { + runtimeContext.getListState(new ListStateDescriptor<>("foobar", Integer.class)); + fail("Expected getListState to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected } - // get state from iteration runtime context - function = new RichAsyncFunction() { - @Override - public void asyncInvoke(String input, AsyncCollector collector) throws Exception { - getIterationRuntimeContext().getState(mock(ValueStateDescriptor.class)); - } - }; + try { + runtimeContext.getReducingState(new ReducingStateDescriptor<>("foobar", new ReduceFunction() { + private static final long serialVersionUID = 2136425961884441050L; - function.setRuntimeContext(mock(RuntimeContext.class)); + @Override + public Integer reduce(Integer value1, Integer value2) throws Exception { + return value1; + } + }, Integer.class)); + fail("Expected getReducingState to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected + } try { - function.asyncInvoke("test", mock(AsyncCollector.class)); - } - catch (Exception e) { - Assert.assertEquals("State is not supported in rich async function", e.getMessage()); - } + runtimeContext.addAccumulator("foobar", new Accumulator() { + private static final long serialVersionUID = -4673320336846482358L; - // test getting a counter from iteration runtime context - function = new RichAsyncFunction() { - @Override - public void asyncInvoke(String input, AsyncCollector collector) throws Exception { - getIterationRuntimeContext().getIntCounter("test").add(6); - } - }; + @Override + public void add(Integer value) { + // no op + } - IterationRuntimeContext context = mock(IterationRuntimeContext.class); - IntCounter counter = new IntCounter(0); - when(context.getIntCounter(anyString())).thenReturn(counter); + @Override + public Integer getLocalValue() { + return null; + } - function.setRuntimeContext(context); + @Override + public void resetLocal() { - function.asyncInvoke("test", mock(AsyncCollector.class)); + } - Assert.assertTrue(6 == counter.getLocalValue()); - } + @Override + public void merge(Accumulator other) { - @Test - public void testRuntimeContext() throws Exception { - // test run time context is not set - RichAsyncFunction function = new RichAsyncFunction() { - @Override - public void asyncInvoke(String input, AsyncCollector collector) throws Exception { - getRuntimeContext().getState(mock(ValueStateDescriptor.class)); - } - }; + } - try { - function.asyncInvoke("test", mock(AsyncCollector.class)); - } - catch (Exception e) { - Assert.assertEquals("The runtime context has not been initialized.", e.getMessage()); + @Override + public Accumulator clone() { + return null; + } + }); + fail("Expected addAccumulator to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected } - // test get state - function = new RichAsyncFunction() { - @Override - public void asyncInvoke(String input, AsyncCollector collector) throws Exception { - getRuntimeContext().getState(mock(ValueStateDescriptor.class)); - } - }; + try { + runtimeContext.getAccumulator("foobar"); + fail("Expected getAccumulator to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected + } - function.setRuntimeContext(mock(RuntimeContext.class)); + try { + runtimeContext.getAllAccumulators(); + fail("Expected getAllAccumulators to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected + } try { - function.asyncInvoke("test", mock(AsyncCollector.class)); + runtimeContext.getIntCounter("foobar"); + fail("Expected getIntCounter to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected } - catch (Exception e) { - Assert.assertEquals("State is not supported in rich async function", e.getMessage()); + + try { + runtimeContext.getLongCounter("foobar"); + fail("Expected getLongCounter to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected } - // test getting a counter from runtime context - function = new RichAsyncFunction() { - @Override - public void asyncInvoke(String input, AsyncCollector collector) throws Exception { - getIterationRuntimeContext().getIntCounter("test").add(6); - } - }; + try { + runtimeContext.getDoubleCounter("foobar"); + fail("Expected getDoubleCounter to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected + } - IterationRuntimeContext context = mock(IterationRuntimeContext.class); - IntCounter counter = new IntCounter(0); - when(context.getIntCounter(anyString())).thenReturn(counter); + try { + runtimeContext.getHistogram("foobar"); + fail("Expected getHistogram to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected + } - function.setRuntimeContext(context); + try { + runtimeContext.hasBroadcastVariable("foobar"); + fail("Expected hasBroadcastVariable to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected + } - function.asyncInvoke("test", mock(AsyncCollector.class)); + try { + runtimeContext.getBroadcastVariable("foobar"); + fail("Expected getBroadcastVariable to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected + } - Assert.assertTrue(6 == counter.getLocalValue()); + try { + runtimeContext.getBroadcastVariableWithInitializer("foobar", new BroadcastVariableInitializer() { + @Override + public Object initializeBroadcastVariable(Iterable data) { + return null; + } + }); + fail("Expected getBroadcastVariableWithInitializer to fail with unsupported operation exception."); + } catch (UnsupportedOperationException e) { + // expected + } } } diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/AsyncCollectorBufferTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/AsyncCollectorBufferTest.java deleted file mode 100644 index d118d8029c300..0000000000000 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/AsyncCollectorBufferTest.java +++ /dev/null @@ -1,656 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.streaming.api.operators.async; - -import org.apache.flink.api.common.typeutils.base.IntSerializer; -import org.apache.flink.configuration.Configuration; -import org.apache.flink.runtime.execution.Environment; -import org.apache.flink.runtime.operators.testutils.DummyEnvironment; -import org.apache.flink.streaming.api.datastream.AsyncDataStream; -import org.apache.flink.streaming.api.functions.async.buffer.StreamElementEntry; -import org.apache.flink.streaming.api.functions.async.collector.AsyncCollector; -import org.apache.flink.streaming.api.functions.async.buffer.AsyncCollectorBuffer; -import org.apache.flink.streaming.api.functions.async.AsyncFunction; -import org.apache.flink.streaming.api.graph.StreamConfig; -import org.apache.flink.streaming.api.operators.Output; -import org.apache.flink.streaming.api.watermark.Watermark; -import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; -import org.apache.flink.streaming.runtime.streamrecord.StreamElement; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; -import org.apache.flink.streaming.runtime.tasks.StreamTask; -import org.junit.Assert; -import org.junit.Test; - -import java.io.IOException; -import java.util.*; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -/** - * Tests for {@link AsyncCollectorBuffer}. These test that: - * - *
    - *
  • Add a new item into the buffer
  • - *
  • Ordered mode processing
  • - *
  • Unordered mode processing
  • - *
  • Error handling
  • - *
- */ -public class AsyncCollectorBufferTest { - private final static ExecutorService EXECUTOR_SERVICE = Executors.newFixedThreadPool(10); - - private final Random RANDOM = new Random(); - - private AsyncFunction function; - - private AsyncWaitOperator operator; - - private AsyncCollectorBuffer buffer; - - private Output> output; - - private Object lock = new Object(); - - public AsyncCollectorBuffer getBuffer(int bufferSize, AsyncDataStream.OutputMode mode) throws Exception { - function = new AsyncFunction() { - @Override - public void asyncInvoke(Integer input, AsyncCollector collector) throws Exception { - - } - }; - - operator = new AsyncWaitOperator<>(function, bufferSize, mode); - - StreamConfig cfg = new StreamConfig(new Configuration()); - cfg.setTypeSerializerIn1(IntSerializer.INSTANCE); - - StreamTask mockTask = mock(StreamTask.class); - - when(mockTask.getCheckpointLock()).thenReturn(lock); - - Environment env = new DummyEnvironment("DUMMY;-D", 1, 0); - when(mockTask.getEnvironment()).thenReturn(env); - - output = new FakedOutput(); - - operator.setup(mockTask, cfg, output); - - buffer = operator.getBuffer(); - - return buffer; - } - - @Test - public void testAdd() throws Exception { - buffer = getBuffer(3, AsyncDataStream.OutputMode.ORDERED); - - synchronized (lock) { - buffer.addWatermark(new Watermark(0l)); - buffer.addLatencyMarker(new LatencyMarker(111L, 1, 1)); - } - - Assert.assertEquals(2, buffer.getQueue().size()); - - Iterator> iterator = buffer.getQueue().iterator(); - Watermark watermark = iterator.next().getStreamElement().asWatermark(); - Assert.assertEquals(0l, watermark.getTimestamp()); - - LatencyMarker latencyMarker = iterator.next().getStreamElement().asLatencyMarker(); - Assert.assertEquals(111l, latencyMarker.getMarkedTime()); - - buffer.setExtraStreamElement(new Watermark(222l)); - - Iterator elementIterator = buffer.getStreamElementsInBuffer(); - Assert.assertEquals(0l, elementIterator.next().asWatermark().getTimestamp()); - Assert.assertEquals(111l, elementIterator.next().asLatencyMarker().getMarkedTime()); - Assert.assertEquals(222l, elementIterator.next().asWatermark().getTimestamp()); - Assert.assertFalse(elementIterator.hasNext()); - } - - private void work(final boolean throwExcept) throws Exception { - final int ASYNC_COLLECTOR_NUM = 7; - - Iterator iterator = new Iterator() { - private int idx = 0; - - @Override - public boolean hasNext() { - return idx < ASYNC_COLLECTOR_NUM; - } - - @Override - public StreamElement next() { - ++idx; - - if (idx == 4) { - return new Watermark(333l); - } - else if (idx == 7) { - return new LatencyMarker(111L, 0, 0); - } - else { - StreamRecord ret = new StreamRecord<>(idx); - ret.setTimestamp(idx * idx); - - return ret; - } - } - - @Override - public void remove() { - // do nothing - } - }; - - while (iterator.hasNext()) { - final StreamElement record = iterator.next(); - - if (record.isRecord()) { - AsyncCollector tmp; - - synchronized (lock) { - tmp = buffer.addStreamRecord(record.asRecord()); - } - - final AsyncCollector collector = tmp; - - EXECUTOR_SERVICE.submit(new Runnable() { - @Override - public void run() { - try { - Thread.sleep(RANDOM.nextInt(100)); - - if (throwExcept) { - collector.collect(new Exception("wahahahaha...")); - } - else { - collector.collect(Collections.singletonList(record.asRecord().getValue())); - } - } catch (InterruptedException e) { - // do nothing - } - } - }); - } - else if (record.isWatermark()) { - synchronized (lock) { - buffer.addWatermark(record.asWatermark()); - } - } - else { - synchronized (lock) { - buffer.addLatencyMarker(record.asLatencyMarker()); - } - } - } - } - - @Test - public void testOrderedBuffer() throws Exception { - buffer = getBuffer(3, AsyncDataStream.OutputMode.ORDERED); - - buffer.startEmitterThread(); - - work(false); - - synchronized (lock) { - buffer.waitEmpty(); - } - - buffer.stopEmitterThread(); - - Assert.assertEquals("1,2,3,5,6,", ((FakedOutput)output).getValue()); - Assert.assertEquals("1,4,9,333,25,36,111,", ((FakedOutput)output).getTimestamp()); - } - - @Test - public void testUnorderedBuffer() throws Exception { - buffer = getBuffer(3, AsyncDataStream.OutputMode.UNORDERED); - - buffer.startEmitterThread(); - - work(false); - - synchronized (lock) { - buffer.waitEmpty(); - } - - buffer.stopEmitterThread(); - - Assert.assertEquals(333L, ((FakedOutput)output).getRawTimestamp().toArray()[3]); - - List result = ((FakedOutput)output).getRawValue(); - Collections.sort(result); - Assert.assertEquals("[1, 2, 3, 5, 6]", result.toString()); - - result = ((FakedOutput)output).getRawTimestamp(); - Collections.sort(result); - Assert.assertEquals("[1, 4, 9, 25, 36, 111, 333]", result.toString()); - } - - @Test - public void testOrderedBufferWithManualTriggering() throws Exception { - // test AsyncCollectorBuffer with different combinations of StreamElements in the buffer. - // by triggering completion of each AsyncCollector one by one manually, we can verify - // the output one by one accurately. - - FakedOutput fakedOutput; - AsyncCollector collector1, collector2; - - // 1. head element is a Watermark or LatencyMarker - buffer = getBuffer(3, AsyncDataStream.OutputMode.ORDERED); - fakedOutput = (FakedOutput)output; - - fakedOutput.expect(1); - - buffer.startEmitterThread(); - - synchronized (lock) { - buffer.addWatermark(new Watermark(1L)); - } - - fakedOutput.waitToFinish(); - - Assert.assertEquals("", fakedOutput.getValue()); - Assert.assertEquals("1,", fakedOutput.getTimestamp()); - - - fakedOutput.expect(1); - - synchronized (lock) { - buffer.addLatencyMarker(new LatencyMarker(2L, 0, 0)); - } - - fakedOutput.waitToFinish(); - - Assert.assertEquals("", fakedOutput.getValue()); - Assert.assertEquals("1,2,", fakedOutput.getTimestamp()); - - synchronized (lock) { - buffer.waitEmpty(); - buffer.stopEmitterThread(); - } - - - // 2. buffer layout: WM -> SR1 -> LM -> SR2, where SR2 finishes first, then SR1. - buffer = getBuffer(5, AsyncDataStream.OutputMode.ORDERED); - fakedOutput = (FakedOutput)output; - - synchronized (lock) { - buffer.addWatermark(new Watermark(1L)); - collector1 = buffer.addStreamRecord(new StreamRecord<>(111, 2L)); - buffer.addLatencyMarker(new LatencyMarker(3L, 0, 0)); - collector2 = buffer.addStreamRecord(new StreamRecord<>(222, 4L)); - } - - fakedOutput.expect(1); - - buffer.startEmitterThread(); - - fakedOutput.waitToFinish(); - - // in ORDERED mode, the result of completed SR2 will not be emitted right now. - collector2.collect(Collections.singletonList(222)); - - Thread.sleep(1000); - - Assert.assertEquals("", fakedOutput.getValue()); - Assert.assertEquals("1,", fakedOutput.getTimestamp()); - - fakedOutput.expect(3); - - collector1.collect(Collections.singletonList(111)); - - fakedOutput.waitToFinish(); - - Assert.assertEquals("111,222,", fakedOutput.getValue()); - Assert.assertEquals("1,2,3,4,", fakedOutput.getTimestamp()); - - synchronized (lock) { - buffer.waitEmpty(); - buffer.stopEmitterThread(); - } - - // 3. buffer layout: WM -> SR1 -> LM -> S2, where SR1 completes first, then SR2. - buffer = getBuffer(5, AsyncDataStream.OutputMode.ORDERED); - fakedOutput = (FakedOutput)output; - - synchronized (lock) { - buffer.addWatermark(new Watermark(1L)); - collector1 = buffer.addStreamRecord(new StreamRecord<>(111, 2L)); - buffer.addLatencyMarker(new LatencyMarker(3L, 0, 0)); - collector2 = buffer.addStreamRecord(new StreamRecord<>(222, 4L)); - } - - fakedOutput.expect(1); - - buffer.startEmitterThread(); - - fakedOutput.waitToFinish(); - - fakedOutput.expect(2); - - // in ORDERED mode, the result of completed SR1 will be emitted asap. - collector1.collect(Collections.singletonList(111)); - - fakedOutput.waitToFinish(); - - Assert.assertEquals("111,", fakedOutput.getValue()); - Assert.assertEquals("1,2,3,", fakedOutput.getTimestamp()); - - fakedOutput.expect(1); - - collector2.collect(Collections.singletonList(222)); - - fakedOutput.waitToFinish(); - - Assert.assertEquals("111,222,", fakedOutput.getValue()); - Assert.assertEquals("1,2,3,4,", fakedOutput.getTimestamp()); - - synchronized (lock) { - buffer.waitEmpty(); - buffer.stopEmitterThread(); - } - - // 4. buffer layout: SR1 -> SR2 -> WM -> LM, where SR2 finishes first. - buffer = getBuffer(5, AsyncDataStream.OutputMode.ORDERED); - fakedOutput = (FakedOutput)output; - - synchronized (lock) { - collector1 = buffer.addStreamRecord(new StreamRecord<>(111, 1L)); - collector2 = buffer.addStreamRecord(new StreamRecord<>(222, 2L)); - buffer.addWatermark(new Watermark(3L)); - buffer.addLatencyMarker(new LatencyMarker(4L, 0, 0)); - } - - buffer.startEmitterThread(); - - // in ORDERED mode, the result of completed SR2 will not be emitted right now. - collector2.collect(Collections.singletonList(222)); - - Thread.sleep(1000); - - Assert.assertEquals("", fakedOutput.getValue()); - Assert.assertEquals("", fakedOutput.getTimestamp()); - - fakedOutput.expect(4); - - collector1.collect(Collections.singletonList(111)); - - fakedOutput.waitToFinish(); - - Assert.assertEquals("111,222,", fakedOutput.getValue()); - Assert.assertEquals("1,2,3,4,", fakedOutput.getTimestamp()); - - synchronized (lock) { - buffer.waitEmpty(); - buffer.stopEmitterThread(); - } - } - - @Test - public void testUnorderedWithManualTriggering() throws Exception { - // verify the output in UNORDERED mode by manual triggering. - - FakedOutput fakedOutput; - AsyncCollector collector1, collector2, collector3; - - // 1. head element is a Watermark or LatencyMarker - buffer = getBuffer(5, AsyncDataStream.OutputMode.UNORDERED); - fakedOutput = (FakedOutput)output; - - fakedOutput.expect(1); - - buffer.startEmitterThread(); - - synchronized (lock) { - buffer.addWatermark(new Watermark(1L)); - } - - fakedOutput.waitToFinish(); - - Assert.assertEquals("", fakedOutput.getValue()); - Assert.assertEquals("1,", fakedOutput.getTimestamp()); - - - fakedOutput.expect(1); - - synchronized (lock) { - buffer.addLatencyMarker(new LatencyMarker(2L, 0, 0)); - } - - fakedOutput.waitToFinish(); - - Assert.assertEquals("", fakedOutput.getValue()); - Assert.assertEquals("1,2,", fakedOutput.getTimestamp()); - - synchronized (lock) { - buffer.waitEmpty(); - buffer.stopEmitterThread(); - } - - - // 2. buffer layout: LM -> SR1 -> SR2 -> WM1 -> SR3 -> WM2, where the order of completion is SR3, SR2, SR1 - buffer = getBuffer(6, AsyncDataStream.OutputMode.UNORDERED); - fakedOutput = (FakedOutput)output; - - synchronized (lock) { - buffer.addLatencyMarker(new LatencyMarker(1L, 0, 0)); - collector1 = buffer.addStreamRecord(new StreamRecord<>(111, 2L)); - collector2 = buffer.addStreamRecord(new StreamRecord<>(222, 3L)); - buffer.addWatermark(new Watermark(4L)); - collector3 = buffer.addStreamRecord(new StreamRecord<>(333, 5L)); - buffer.addWatermark(new Watermark(6L)); - } - - fakedOutput.expect(1); - - buffer.startEmitterThread(); - - fakedOutput.waitToFinish(); - - // in UNORDERED mode, the result of completed SR3 will not be emitted right now. - collector3.collect(Collections.singletonList(333)); - - Thread.sleep(1000); - - Assert.assertEquals("", fakedOutput.getValue()); - Assert.assertEquals("1,", fakedOutput.getTimestamp()); - - fakedOutput.expect(1); - - // SR2 will be emitted - collector2.collect(Collections.singletonList(222)); - - fakedOutput.waitToFinish(); - - Assert.assertEquals("222,", fakedOutput.getValue()); - Assert.assertEquals("1,3,", fakedOutput.getTimestamp()); - - // SR1 will be emitted first, then WM, and then SR3 and WM2 - fakedOutput.expect(4); - collector1.collect(Collections.singletonList(111)); - - fakedOutput.waitToFinish(); - - Assert.assertEquals("222,111,333,", fakedOutput.getValue()); - Assert.assertEquals("1,3,2,4,5,6,", fakedOutput.getTimestamp()); - - synchronized (lock) { - buffer.waitEmpty(); - buffer.stopEmitterThread(); - } - - // 3. buffer layout: WM1 -> SR1 -> SR2 -> LM -> SR3 -> WM2, where the order of completion is SR2, SR1, SR3 - buffer = getBuffer(6, AsyncDataStream.OutputMode.UNORDERED); - fakedOutput = (FakedOutput)output; - - synchronized (lock) { - buffer.addWatermark(new Watermark(1L)); - collector1 = buffer.addStreamRecord(new StreamRecord<>(111, 2L)); - collector2 = buffer.addStreamRecord(new StreamRecord<>(222, 3L)); - buffer.addLatencyMarker(new LatencyMarker(4L, 0, 0)); - collector3 = buffer.addStreamRecord(new StreamRecord<>(333, 5L)); - buffer.addWatermark(new Watermark(6L)); - } - - // the result of SR2 will be emitted following WM1 - collector2.collect(Collections.singletonList(222)); - - fakedOutput.expect(2); - - buffer.startEmitterThread(); - - fakedOutput.waitToFinish(); - - Assert.assertEquals("222,", fakedOutput.getValue()); - Assert.assertEquals("1,3,", fakedOutput.getTimestamp()); - - // SR1 and LM will be emitted - fakedOutput.expect(2); - collector1.collect(Collections.singletonList(111)); - - fakedOutput.waitToFinish(); - - Assert.assertEquals("222,111,", fakedOutput.getValue()); - Assert.assertEquals("1,3,2,4,", fakedOutput.getTimestamp()); - - // SR3 and WM2 will be emitted - fakedOutput.expect(2); - collector3.collect(Collections.singletonList(333)); - - fakedOutput.waitToFinish(); - - Assert.assertEquals("222,111,333,", fakedOutput.getValue()); - Assert.assertEquals("1,3,2,4,5,6,", fakedOutput.getTimestamp()); - - synchronized (lock) { - buffer.waitEmpty(); - buffer.stopEmitterThread(); - } - - } - - - - @Test - public void testBufferWithException() throws Exception { - buffer = getBuffer(3, AsyncDataStream.OutputMode.UNORDERED); - - buffer.startEmitterThread(); - - IOException expected = null; - try { - work(true); - } - catch (IOException e) { - expected = e; - } - - Assert.assertNotNull(expected); - Assert.assertEquals(expected.getMessage(), "wahahahaha..."); - - synchronized (lock) { - buffer.waitEmpty(); - } - - buffer.stopEmitterThread(); - } - - public class FakedOutput implements Output> { - private List outputs; - private List timestamps; - - private CountDownLatch latch; - - public FakedOutput() { - this.outputs = new ArrayList<>(); - this.timestamps = new ArrayList<>(); - } - - @Override - public void collect(StreamRecord record) { - outputs.add(record.getValue().longValue()); - if (record.hasTimestamp()) { - timestamps.add(record.getTimestamp()); - } - - if (latch != null) { - latch.countDown(); - } - } - - @Override - public void emitWatermark(Watermark mark) { - timestamps.add(mark.getTimestamp()); - - if (latch != null) { - latch.countDown(); - } - } - - @Override - public void emitLatencyMarker(LatencyMarker latencyMarker) { - timestamps.add(latencyMarker.getMarkedTime()); - - if (latch != null) { - latch.countDown(); - } - } - - @Override - public void close() { - } - - public String getValue() { - StringBuilder sb = new StringBuilder(); - for (Long i : outputs) { - sb.append(i).append(","); - } - return sb.toString(); - } - - public String getTimestamp() { - StringBuilder sb = new StringBuilder(); - for (Long i : timestamps) { - sb.append(i).append(","); - } - return sb.toString(); - } - - public List getRawValue() { - return outputs; - } - - public List getRawTimestamp() { - return timestamps; - } - - public void expect(int count) { - latch = new CountDownLatch(count); - } - - public void waitToFinish() throws InterruptedException { - latch.await(); - } - } -} diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/AsyncWaitOperatorTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/AsyncWaitOperatorTest.java index 560ee5a40b2ba..10ee14f9d38db 100644 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/AsyncWaitOperatorTest.java +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/AsyncWaitOperatorTest.java @@ -19,6 +19,7 @@ package org.apache.flink.streaming.api.operators.async; import org.apache.flink.api.common.ExecutionConfig; +import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.functions.RichMapFunction; import org.apache.flink.api.common.typeinfo.BasicTypeInfo; import org.apache.flink.api.common.typeutils.base.IntSerializer; @@ -36,8 +37,10 @@ import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.async.RichAsyncFunction; import org.apache.flink.streaming.api.functions.async.collector.AsyncCollector; -import org.apache.flink.streaming.api.functions.sink.SinkFunction; +import org.apache.flink.streaming.api.functions.sink.DiscardingSink; import org.apache.flink.streaming.api.graph.StreamConfig; +import org.apache.flink.streaming.api.operators.async.queue.StreamElementQueue; +import org.apache.flink.streaming.api.operators.async.queue.StreamElementQueueEntry; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.streaming.runtime.tasks.OneInputStreamTask; @@ -45,17 +48,15 @@ import org.apache.flink.streaming.runtime.tasks.StreamMockEnvironment; import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness; import org.apache.flink.streaming.util.TestHarnessUtil; +import org.apache.flink.util.TestLogger; import org.junit.Assert; import org.junit.Test; -import java.util.ArrayList; +import java.util.ArrayDeque; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; -import java.util.List; import java.util.Queue; -import java.util.Random; -import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; @@ -74,19 +75,16 @@ *
  • Snapshot state and restore state
  • * */ -public class AsyncWaitOperatorTest { - - // hold sink result - private static Queue sinkResult; +public class AsyncWaitOperatorTest extends TestLogger { private static class MyAsyncFunction extends RichAsyncFunction { - final int SLEEP_FACTOR = 100; - final int THREAD_POOL_SIZE = 10; + private static final long serialVersionUID = 8522411971886428444L; - transient static ExecutorService executorService; - static int counter = 0; + private static final long TIMEOUT = 5000L; + private static final int THREAD_POOL_SIZE = 10; - static Random random = new Random(); + static ExecutorService executorService; + static int counter = 0; @Override public void open(Configuration parameters) throws Exception { @@ -105,33 +103,35 @@ public void open(Configuration parameters) throws Exception { public void close() throws Exception { super.close(); + freeExecutor(); + } + + private void freeExecutor() { synchronized (MyAsyncFunction.class) { --counter; if (counter == 0) { executorService.shutdown(); - executorService.awaitTermination(SLEEP_FACTOR * THREAD_POOL_SIZE, TimeUnit.MILLISECONDS); + + try { + if (!executorService.awaitTermination(TIMEOUT, TimeUnit.MILLISECONDS)) { + executorService.shutdownNow(); + } + } catch (InterruptedException interrupted) { + executorService.shutdownNow(); + + Thread.currentThread().interrupt(); + } } } } @Override public void asyncInvoke(final Integer input, final AsyncCollector collector) throws Exception { - this.executorService.submit(new Runnable() { + executorService.submit(new Runnable() { @Override public void run() { - // wait for while to simulate async operation here - int sleep = (int) (random.nextFloat() * SLEEP_FACTOR); - - try { - Thread.sleep(sleep); - List ret = new ArrayList<>(); - ret.add(input*2); - collector.collect(ret); - } - catch (InterruptedException e) { - // do nothing - } + collector.collect(Collections.singletonList(input * 2)); } }); } @@ -141,11 +141,13 @@ public void run() { * A special {@link org.apache.flink.streaming.api.functions.async.AsyncFunction} without issuing * {@link AsyncCollector#collect} until the latch counts to zero. * This function is used in the testStateSnapshotAndRestore, ensuring - * that {@link org.apache.flink.streaming.api.functions.async.buffer.StreamElementEntry} can stay - * in the {@link org.apache.flink.streaming.api.functions.async.buffer.AsyncCollectorBuffer} to be + * that {@link StreamElementQueueEntry} can stay + * in the {@link StreamElementQueue} to be * snapshotted while checkpointing. */ private static class LazyAsyncFunction extends MyAsyncFunction { + private static final long serialVersionUID = 3537791752703154670L; + private static CountDownLatch latch; public LazyAsyncFunction() { @@ -200,17 +202,23 @@ public int compare(Object o1, Object o2) { } } + /** + * Test the AsyncWaitOperator with ordered mode and event time. + */ @Test - public void testWaterMarkOrdered() throws Exception { - testWithWatermark(AsyncDataStream.OutputMode.ORDERED); + public void testEventTimeOrdered() throws Exception { + testEventTime(AsyncDataStream.OutputMode.ORDERED); } + /** + * Test the AsyncWaitOperator with unordered mode and event time. + */ @Test public void testWaterMarkUnordered() throws Exception { - testWithWatermark(AsyncDataStream.OutputMode.UNORDERED); + testEventTime(AsyncDataStream.OutputMode.UNORDERED); } - private void testWithWatermark(AsyncDataStream.OutputMode mode) throws Exception { + private void testEventTime(AsyncDataStream.OutputMode mode) throws Exception { final AsyncWaitOperator operator = new AsyncWaitOperator<>(new MyAsyncFunction(), 2, mode); final OneInputStreamOperatorTestHarness testHarness = @@ -255,40 +263,42 @@ private void testWithWatermark(AsyncDataStream.OutputMode mode) throws Exception } } + /** + * Test the AsyncWaitOperator with ordered mode and processing time. + */ @Test - public void testOrdered() throws Exception { - testRun(AsyncDataStream.OutputMode.ORDERED); + public void testProcessingTimeOrdered() throws Exception { + testProcessingTime(AsyncDataStream.OutputMode.ORDERED); } + /** + * Test the AsyncWaitOperator with unordered mode and processing time. + */ @Test - public void testUnordered() throws Exception { - testRun(AsyncDataStream.OutputMode.UNORDERED); + public void testProcessingUnordered() throws Exception { + testProcessingTime(AsyncDataStream.OutputMode.UNORDERED); } - private void testRun(AsyncDataStream.OutputMode mode) throws Exception { - final OneInputStreamTask task = new OneInputStreamTask<>(); - final OneInputStreamTaskTestHarness testHarness = - new OneInputStreamTaskTestHarness<>(task, 1, 1, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO); - + private void testProcessingTime(AsyncDataStream.OutputMode mode) throws Exception { final AsyncWaitOperator operator = new AsyncWaitOperator<>(new MyAsyncFunction(), 6, mode); - final StreamConfig streamConfig = testHarness.getStreamConfig(); - streamConfig.setStreamOperator(operator); - - testHarness.invoke(); - testHarness.waitForTaskRunning(); + final OneInputStreamOperatorTestHarness testHarness = new OneInputStreamOperatorTestHarness<>(operator, IntSerializer.INSTANCE); final long initialTime = 0L; - final ConcurrentLinkedQueue expectedOutput = new ConcurrentLinkedQueue(); + final Queue expectedOutput = new ArrayDeque<>(); - testHarness.processElement(new StreamRecord<>(1, initialTime + 1)); - testHarness.processElement(new StreamRecord<>(2, initialTime + 2)); - testHarness.processElement(new StreamRecord<>(3, initialTime + 3)); - testHarness.processElement(new StreamRecord<>(4, initialTime + 4)); - testHarness.processElement(new StreamRecord<>(5, initialTime + 5)); - testHarness.processElement(new StreamRecord<>(6, initialTime + 6)); - testHarness.processElement(new StreamRecord<>(7, initialTime + 7)); - testHarness.processElement(new StreamRecord<>(8, initialTime + 8)); + testHarness.open(); + + synchronized (testHarness.getCheckpointLock()) { + testHarness.processElement(new StreamRecord<>(1, initialTime + 1)); + testHarness.processElement(new StreamRecord<>(2, initialTime + 2)); + testHarness.processElement(new StreamRecord<>(3, initialTime + 3)); + testHarness.processElement(new StreamRecord<>(4, initialTime + 4)); + testHarness.processElement(new StreamRecord<>(5, initialTime + 5)); + testHarness.processElement(new StreamRecord<>(6, initialTime + 6)); + testHarness.processElement(new StreamRecord<>(7, initialTime + 7)); + testHarness.processElement(new StreamRecord<>(8, initialTime + 8)); + } expectedOutput.add(new StreamRecord<>(2, initialTime + 1)); expectedOutput.add(new StreamRecord<>(4, initialTime + 2)); @@ -299,11 +309,9 @@ private void testRun(AsyncDataStream.OutputMode mode) throws Exception { expectedOutput.add(new StreamRecord<>(14, initialTime + 7)); expectedOutput.add(new StreamRecord<>(16, initialTime + 8)); - testHarness.waitForInputProcessing(); - - testHarness.endInput(); - - testHarness.waitForTaskCompletion(); + synchronized (testHarness.getCheckpointLock()) { + testHarness.close(); + } if (mode == AsyncDataStream.OutputMode.ORDERED) { TestHarnessUtil.assertOutputEquals("ORDERED Output was not correct.", expectedOutput, testHarness.getOutput()); @@ -317,6 +325,54 @@ private void testRun(AsyncDataStream.OutputMode mode) throws Exception { } } + /** + * Tests that the AsyncWaitOperator works together with chaining + */ + @Test + public void testOperatorChainWithProcessingTime() throws Exception { + + JobVertex chainedVertex = createChainedVertex(false); + + final OneInputStreamTask task = new OneInputStreamTask<>(); + final OneInputStreamTaskTestHarness testHarness = + new OneInputStreamTaskTestHarness<>(task, 1, 1, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO); + + testHarness.taskConfig = chainedVertex.getConfiguration(); + + final StreamConfig streamConfig = testHarness.getStreamConfig(); + final StreamConfig operatorChainStreamConfig = new StreamConfig(chainedVertex.getConfiguration()); + final AsyncWaitOperator headOperator = + operatorChainStreamConfig.getStreamOperator(AsyncWaitOperatorTest.class.getClassLoader()); + streamConfig.setStreamOperator(headOperator); + + testHarness.invoke(); + testHarness.waitForTaskRunning(); + + long initialTimestamp = 0L; + + testHarness.processElement(new StreamRecord<>(5, initialTimestamp)); + testHarness.processElement(new StreamRecord<>(6, initialTimestamp + 1L)); + testHarness.processElement(new StreamRecord<>(7, initialTimestamp + 2L)); + testHarness.processElement(new StreamRecord<>(8, initialTimestamp + 3L)); + testHarness.processElement(new StreamRecord<>(9, initialTimestamp + 4L)); + + testHarness.endInput(); + testHarness.waitForTaskCompletion(); + + ConcurrentLinkedQueue expectedOutput = new ConcurrentLinkedQueue<>(); + expectedOutput.add(new StreamRecord<>(22, initialTimestamp)); + expectedOutput.add(new StreamRecord<>(26, initialTimestamp + 1L)); + expectedOutput.add(new StreamRecord<>(30, initialTimestamp + 2L)); + expectedOutput.add(new StreamRecord<>(34, initialTimestamp + 3L)); + expectedOutput.add(new StreamRecord<>(38, initialTimestamp + 4L)); + + TestHarnessUtil.assertOutputEqualsSorted( + "Test for chained operator with AsyncWaitOperator failed", + expectedOutput, + testHarness.getOutput(), + new StreamRecordComparator()); + } + private JobVertex createChainedVertex(boolean withLazyFunction) { StreamExecutionEnvironment chainEnv = StreamExecutionEnvironment.getExecutionEnvironment(); @@ -353,144 +409,23 @@ public Integer map(Integer value) throws Exception { input = AsyncDataStream.unorderedWait(input, new MyAsyncFunction(), 3); - input.addSink(new SinkFunction() { - private static final long serialVersionUID = 1L; + input.map(new MapFunction() { + private static final long serialVersionUID = 5162085254238405527L; @Override - public void invoke(Integer value) throws Exception { - sinkResult.add(value); + public Integer map(Integer value) throws Exception { + return value; } - }); + }).startNewChain().addSink(new DiscardingSink()); // be build our own OperatorChain final JobGraph jobGraph = chainEnv.getStreamGraph().getJobGraph(); - Assert.assertTrue(jobGraph.getVerticesSortedTopologicallyFromSources().size() == 2); + Assert.assertTrue(jobGraph.getVerticesSortedTopologicallyFromSources().size() == 3); return jobGraph.getVerticesSortedTopologicallyFromSources().get(1); } - /** - * Get the {@link SubtaskState} for the operator chain. The state will keep several inputs. - * - * @return A {@link SubtaskState} - * @throws Exception - */ - private SubtaskState createTaskState() throws Exception { - sinkResult = new ConcurrentLinkedDeque<>(); - - final OneInputStreamTask task = new OneInputStreamTask<>(); - final OneInputStreamTaskTestHarness testHarness = - new OneInputStreamTaskTestHarness<>(task, 1, 1, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO); - - JobVertex chainedVertex = createChainedVertex(true); - - testHarness.taskConfig = chainedVertex.getConfiguration(); - - final AcknowledgeStreamMockEnvironment env = new AcknowledgeStreamMockEnvironment( - testHarness.jobConfig, - testHarness.taskConfig, - testHarness.getExecutionConfig(), - testHarness.memorySize, - new MockInputSplitProvider(), - testHarness.bufferSize); - - final StreamConfig streamConfig = testHarness.getStreamConfig(); - final StreamConfig operatorChainStreamConfig = new StreamConfig(chainedVertex.getConfiguration()); - final AsyncWaitOperator headOperator = - operatorChainStreamConfig.getStreamOperator(Thread.currentThread().getContextClassLoader()); - streamConfig.setStreamOperator(headOperator); - - testHarness.invoke(env); - testHarness.waitForTaskRunning(); - - testHarness.processElement(new StreamRecord<>(1)); - testHarness.processElement(new StreamRecord<>(2)); - testHarness.processElement(new StreamRecord<>(3)); - testHarness.processElement(new StreamRecord<>(4)); - - testHarness.waitForInputProcessing(); - - final CheckpointMetaData checkpointMetaData = new CheckpointMetaData(1L, 1L); - - task.triggerCheckpoint(checkpointMetaData); - - env.getCheckpointLatch().await(); - - assertEquals(1L, env.getCheckpointId()); - - LazyAsyncFunction.countDown(); - - testHarness.endInput(); - testHarness.waitForTaskCompletion(); - - return env.getCheckpointStateHandles(); - } - - @Test - public void testOperatorChain() throws Exception { - - JobVertex chainedVertex = createChainedVertex(false); - - final OneInputStreamTask task = new OneInputStreamTask<>(); - final OneInputStreamTaskTestHarness testHarness = - new OneInputStreamTaskTestHarness<>(task, 1, 1, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO); - - task.setInitialState(new TaskStateHandles(createTaskState())); - - sinkResult = new ConcurrentLinkedDeque<>(); - - testHarness.taskConfig = chainedVertex.getConfiguration(); - - final AcknowledgeStreamMockEnvironment env = new AcknowledgeStreamMockEnvironment( - testHarness.jobConfig, - testHarness.taskConfig, - testHarness.getExecutionConfig(), - testHarness.memorySize, - new MockInputSplitProvider(), - testHarness.bufferSize); - - final StreamConfig streamConfig = testHarness.getStreamConfig(); - final StreamConfig operatorChainStreamConfig = new StreamConfig(chainedVertex.getConfiguration()); - final AsyncWaitOperator headOperator = - operatorChainStreamConfig.getStreamOperator(Thread.currentThread().getContextClassLoader()); - streamConfig.setStreamOperator(headOperator); - - testHarness.invoke(env); - testHarness.waitForTaskRunning(); - - testHarness.processElement(new StreamRecord<>(5)); - testHarness.processElement(new StreamRecord<>(6)); - testHarness.processElement(new StreamRecord<>(7)); - testHarness.processElement(new StreamRecord<>(8)); - testHarness.processElement(new StreamRecord<>(9)); - - testHarness.endInput(); - testHarness.waitForTaskCompletion(); - - ConcurrentLinkedQueue expectedOutput = new ConcurrentLinkedQueue<>(); - expectedOutput.add(6); - expectedOutput.add(10); - expectedOutput.add(14); - expectedOutput.add(18); - expectedOutput.add(22); - expectedOutput.add(26); - expectedOutput.add(30); - expectedOutput.add(34); - expectedOutput.add(38); - - TestHarnessUtil.assertOutputEqualsSorted( - "Test for chained operator with AsyncWaitOperator failed", - expectedOutput, - sinkResult, - new Comparator() { - @Override - public int compare(Object o1, Object o2) { - return (Integer)o1 - (Integer)o2; - } - }); - } - @Test public void testStateSnapshotAndRestore() throws Exception { final OneInputStreamTask task = new OneInputStreamTask<>(); @@ -498,7 +433,7 @@ public void testStateSnapshotAndRestore() throws Exception { new OneInputStreamTaskTestHarness<>(task, 1, 1, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO); AsyncWaitOperator operator = - new AsyncWaitOperator<>(new LazyAsyncFunction(), 6, AsyncDataStream.OutputMode.ORDERED); + new AsyncWaitOperator<>(new LazyAsyncFunction(), 3, AsyncDataStream.OutputMode.ORDERED); final StreamConfig streamConfig = testHarness.getStreamConfig(); streamConfig.setStreamOperator(operator); diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/EmitterTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/EmitterTest.java new file mode 100644 index 0000000000000..c3a47aa5e09e3 --- /dev/null +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/EmitterTest.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async; + +import org.apache.flink.streaming.api.functions.async.collector.AsyncCollector; +import org.apache.flink.streaming.api.operators.Output; +import org.apache.flink.streaming.api.operators.async.queue.OrderedStreamElementQueue; +import org.apache.flink.streaming.api.operators.async.queue.StreamElementQueue; +import org.apache.flink.streaming.api.operators.async.queue.StreamRecordQueueEntry; +import org.apache.flink.streaming.api.operators.async.queue.WatermarkQueueEntry; +import org.apache.flink.streaming.api.watermark.Watermark; +import org.apache.flink.streaming.runtime.streamrecord.StreamElement; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.util.CollectorOutput; +import org.apache.flink.util.TestLogger; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.mockito.ArgumentCaptor; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; + +public class EmitterTest extends TestLogger { + + private static final long timeout = 10000L; + private static ExecutorService executor; + + @BeforeClass + public static void setup() { + executor = Executors.newFixedThreadPool(3); + } + + @AfterClass + public static void shutdown() { + executor.shutdown(); + + try { + if (!executor.awaitTermination(timeout, TimeUnit.MILLISECONDS)) { + executor.shutdownNow(); + } + } catch (InterruptedException interrupted) { + executor.shutdownNow(); + + Thread.currentThread().interrupt(); + } + } + + /** + * Tests that the emitter outputs completed stream element queue entries. + */ + @Test + public void testEmitterWithOrderedQueue() throws Exception { + Object lock = new Object(); + List list = new ArrayList<>(); + Output> output = new CollectorOutput<>(list); + + List expected = Arrays.asList( + new StreamRecord<>(1, 0L), + new StreamRecord<>(2, 0L), + new StreamRecord<>(3, 1L), + new StreamRecord<>(4, 1L), + new Watermark(3L), + new StreamRecord<>(5, 4L), + new StreamRecord<>(6, 4L)); + + OperatorActions operatorActions = mock(OperatorActions.class); + + final int capacity = 5; + + StreamElementQueue queue = new OrderedStreamElementQueue(capacity, executor, operatorActions); + + final Emitter emitter = new Emitter<>(lock, output, queue, operatorActions); + + final Thread emitterThread = new Thread(emitter); + emitterThread.start(); + + try { + StreamRecordQueueEntry record1 = new StreamRecordQueueEntry<>(new StreamRecord<>(1, 0L)); + StreamRecordQueueEntry record2 = new StreamRecordQueueEntry<>(new StreamRecord<>(2, 1L)); + WatermarkQueueEntry watermark1 = new WatermarkQueueEntry(new Watermark(3L)); + StreamRecordQueueEntry record3 = new StreamRecordQueueEntry<>(new StreamRecord<>(3, 4L)); + + queue.put(record1); + queue.put(record2); + queue.put(watermark1); + queue.put(record3); + + record2.collect(Arrays.asList(3, 4)); + record1.collect(Arrays.asList(1, 2)); + record3.collect(Arrays.asList(5, 6)); + + synchronized (lock) { + while (!queue.isEmpty()) { + lock.wait(); + } + } + + Assert.assertEquals(expected, list); + } finally { + emitter.stop(); + emitterThread.interrupt(); + } + } + + /** + * Tests that the emitter handles exceptions occurring in the {@link AsyncCollector} correctly. + */ + @Test + public void testEmitterWithExceptions() throws Exception { + Object lock = new Object(); + List list = new ArrayList<>(); + Output> output = new CollectorOutput<>(list); + + List expected = Arrays.asList( + new StreamRecord<>(1, 0L), + new Watermark(3L)); + + OperatorActions operatorActions = mock(OperatorActions.class); + + final int capacity = 3; + + StreamElementQueue queue = new OrderedStreamElementQueue(capacity, executor, operatorActions); + + final Emitter emitter = new Emitter<>(lock, output, queue, operatorActions); + + final Thread emitterThread = new Thread(emitter); + emitterThread.start(); + + final Exception testException = new Exception("Test exception"); + + try { + StreamRecordQueueEntry record1 = new StreamRecordQueueEntry<>(new StreamRecord<>(1, 0L)); + StreamRecordQueueEntry record2 = new StreamRecordQueueEntry<>(new StreamRecord<>(2, 1L)); + WatermarkQueueEntry watermark1 = new WatermarkQueueEntry(new Watermark(3L)); + + queue.put(record1); + queue.put(record2); + queue.put(watermark1); + + record2.collect(testException); + record1.collect(Arrays.asList(1)); + + synchronized (lock) { + while (!queue.isEmpty()) { + lock.wait(); + } + } + + Assert.assertEquals(expected, list); + + ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(Throwable.class); + + verify(operatorActions).failOperator(argumentCaptor.capture()); + + Throwable failureCause = argumentCaptor.getValue(); + + Assert.assertNotNull(failureCause.getCause()); + Assert.assertTrue(failureCause.getCause() instanceof ExecutionException); + + Assert.assertNotNull(failureCause.getCause().getCause()); + Assert.assertEquals(testException, failureCause.getCause().getCause()); + } finally { + emitter.stop(); + emitterThread.interrupt(); + } + } +} diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/queue/OrderedStreamElementQueueTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/queue/OrderedStreamElementQueueTest.java new file mode 100644 index 0000000000000..038051246a626 --- /dev/null +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/queue/OrderedStreamElementQueueTest.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async.queue; + +import org.apache.flink.runtime.concurrent.Future; +import org.apache.flink.runtime.concurrent.impl.FlinkFuture; +import org.apache.flink.streaming.api.operators.async.OperatorActions; +import org.apache.flink.streaming.api.watermark.Watermark; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.util.TestLogger; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; + +/** + * {@link OrderedStreamElementQueue} specific tests + */ +public class OrderedStreamElementQueueTest extends TestLogger { + + private static final long timeout = 10000L; + private static ExecutorService executor; + + @BeforeClass + public static void setup() { + executor = Executors.newFixedThreadPool(3); + } + + @AfterClass + public static void shutdown() { + executor.shutdown(); + + try { + if (!executor.awaitTermination(timeout, TimeUnit.MILLISECONDS)) { + executor.shutdownNow(); + } + } catch (InterruptedException interrupted) { + executor.shutdownNow(); + + Thread.currentThread().interrupt(); + } + } + + /** + * Tests that only the head element is pulled from the ordered queue if it has been + * completed. + */ + @Test + public void testCompletionOrder() throws Exception { + OperatorActions operatorActions = mock(OperatorActions.class); + final OrderedStreamElementQueue queue = new OrderedStreamElementQueue(4, executor, operatorActions); + + StreamRecordQueueEntry entry1 = new StreamRecordQueueEntry<>(new StreamRecord<>(1, 0L)); + StreamRecordQueueEntry entry2 = new StreamRecordQueueEntry<>(new StreamRecord<>(2, 1L)); + WatermarkQueueEntry entry3 = new WatermarkQueueEntry(new Watermark(2L)); + StreamRecordQueueEntry entry4 = new StreamRecordQueueEntry<>(new StreamRecord<>(3, 3L)); + + List> expected = Arrays.asList(entry1, entry2, entry3, entry4); + + for (StreamElementQueueEntry entry : expected) { + queue.put(entry); + } + + Future> pollOperation = FlinkFuture.supplyAsync(new Callable>() { + @Override + public List call() throws Exception { + List result = new ArrayList<>(4); + while (!queue.isEmpty()) { + result.add(queue.poll()); + } + + return result; + } + }, executor); + + Thread.sleep(10L); + + Assert.assertFalse(pollOperation.isDone()); + + entry2.collect(Collections.emptyList()); + + entry4.collect(Collections.emptyList()); + + Thread.sleep(10L); + + Assert.assertEquals(4, queue.size()); + + entry1.collect(Collections.emptyList()); + + Assert.assertEquals(expected, pollOperation.get()); + + verify(operatorActions, never()).failOperator(any(Exception.class)); + } +} diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/queue/StreamElementQueueTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/queue/StreamElementQueueTest.java new file mode 100644 index 0000000000000..c9e59c794e4bf --- /dev/null +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/queue/StreamElementQueueTest.java @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async.queue; + +import org.apache.flink.runtime.concurrent.Future; +import org.apache.flink.runtime.concurrent.impl.FlinkFuture; +import org.apache.flink.streaming.api.operators.async.OperatorActions; +import org.apache.flink.streaming.api.watermark.Watermark; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.util.Preconditions; +import org.apache.flink.util.TestLogger; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import static org.apache.flink.streaming.api.operators.async.queue.StreamElementQueueTest.StreamElementQueueType.OrderedStreamElementQueueType; +import static org.apache.flink.streaming.api.operators.async.queue.StreamElementQueueTest.StreamElementQueueType.UnorderedStreamElementQueueType; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; + +/** + * Tests for the basic functionality of {@link StreamElementQueue}. The basic operations consist + * of putting and polling elements from the queue. + */ +@RunWith(Parameterized.class) +public class StreamElementQueueTest extends TestLogger { + + private static final long timeout = 10000L; + private static ExecutorService executor; + + @BeforeClass + public static void setup() { + executor = Executors.newFixedThreadPool(3); + } + + @AfterClass + public static void shutdown() { + executor.shutdown(); + + try { + if (!executor.awaitTermination(timeout, TimeUnit.MILLISECONDS)) { + executor.shutdownNow(); + } + } catch (InterruptedException interrupted) { + executor.shutdownNow(); + + Thread.currentThread().interrupt(); + } + } + + enum StreamElementQueueType { + OrderedStreamElementQueueType, + UnorderedStreamElementQueueType + } + + @Parameterized.Parameters + public static Collection streamElementQueueTypes() { + return Arrays.asList(OrderedStreamElementQueueType, UnorderedStreamElementQueueType); + } + + private final StreamElementQueueType streamElementQueueType; + + public StreamElementQueueTest(StreamElementQueueType streamElementQueueType) { + this.streamElementQueueType = Preconditions.checkNotNull(streamElementQueueType); + } + + public StreamElementQueue createStreamElementQueue(int capacity, OperatorActions operatorActions) { + switch (streamElementQueueType) { + case OrderedStreamElementQueueType: + return new OrderedStreamElementQueue(capacity, executor, operatorActions); + case UnorderedStreamElementQueueType: + return new UnorderedStreamElementQueue(capacity, executor, operatorActions); + default: + throw new IllegalStateException("Unknown stream element queue type: " + streamElementQueueType); + } + } + + @Test + public void testPut() throws InterruptedException { + OperatorActions operatorActions = mock(OperatorActions.class); + StreamElementQueue queue = createStreamElementQueue(2, operatorActions); + + final Watermark watermark = new Watermark(0L); + final StreamRecord streamRecord = new StreamRecord<>(42, 1L); + final Watermark nextWatermark = new Watermark(2L); + + final WatermarkQueueEntry watermarkQueueEntry = new WatermarkQueueEntry(watermark); + final StreamRecordQueueEntry streamRecordQueueEntry = new StreamRecordQueueEntry<>(streamRecord); + + queue.put(watermarkQueueEntry); + queue.put(streamRecordQueueEntry); + + Assert.assertEquals(2, queue.size()); + + Assert.assertFalse(queue.tryPut(new WatermarkQueueEntry(nextWatermark))); + + Collection> actualValues = queue.values(); + + List> expectedValues = Arrays.asList(watermarkQueueEntry, streamRecordQueueEntry); + + Assert.assertEquals(expectedValues, actualValues); + + verify(operatorActions, never()).failOperator(any(Exception.class)); + } + + @Test + public void testPoll() throws InterruptedException { + OperatorActions operatorActions = mock(OperatorActions.class); + StreamElementQueue queue = createStreamElementQueue(2, operatorActions); + + WatermarkQueueEntry watermarkQueueEntry = new WatermarkQueueEntry(new Watermark(0L)); + StreamRecordQueueEntry streamRecordQueueEntry = new StreamRecordQueueEntry<>(new StreamRecord<>(42, 1L)); + + queue.put(watermarkQueueEntry); + queue.put(streamRecordQueueEntry); + + Assert.assertEquals(watermarkQueueEntry, queue.peekBlockingly()); + Assert.assertEquals(2, queue.size()); + + Assert.assertEquals(watermarkQueueEntry, queue.poll()); + Assert.assertEquals(1, queue.size()); + + streamRecordQueueEntry.collect(Collections.emptyList()); + + Assert.assertEquals(streamRecordQueueEntry, queue.poll()); + + Assert.assertEquals(0, queue.size()); + Assert.assertTrue(queue.isEmpty()); + + verify(operatorActions, never()).failOperator(any(Exception.class)); + } + + /** + * Tests that a put operation blocks if the queue is full. + */ + @Test + public void testBlockingPut() throws Exception { + OperatorActions operatorActions = mock(OperatorActions.class); + final StreamElementQueue queue = createStreamElementQueue(1, operatorActions); + + StreamRecordQueueEntry streamRecordQueueEntry = new StreamRecordQueueEntry<>(new StreamRecord<>(42, 0L)); + final StreamRecordQueueEntry streamRecordQueueEntry2 = new StreamRecordQueueEntry<>(new StreamRecord<>(43, 1L)); + + queue.put(streamRecordQueueEntry); + + Assert.assertEquals(1, queue.size()); + + Future putOperation = FlinkFuture.supplyAsync(new Callable() { + @Override + public Void call() throws Exception { + queue.put(streamRecordQueueEntry2); + + return null; + } + }, executor); + + // give the future a chance to complete + Thread.sleep(10L); + + // but it shouldn't ;-) + Assert.assertFalse(putOperation.isDone()); + + streamRecordQueueEntry.collect(Collections.emptyList()); + + // polling the completed head element frees the queue again + Assert.assertEquals(streamRecordQueueEntry, queue.poll()); + + // now the put operation should complete + putOperation.get(); + + verify(operatorActions, never()).failOperator(any(Exception.class)); + } + + /** + * Test that a poll operation on an empty queue blocks. + */ + @Test + public void testBlockingPoll() throws Exception { + OperatorActions operatorActions = mock(OperatorActions.class); + final StreamElementQueue queue = createStreamElementQueue(1, operatorActions); + + WatermarkQueueEntry watermarkQueueEntry = new WatermarkQueueEntry(new Watermark(1L)); + StreamRecordQueueEntry streamRecordQueueEntry = new StreamRecordQueueEntry<>(new StreamRecord<>(1, 2L)); + + Assert.assertTrue(queue.isEmpty()); + + Future peekOperation = FlinkFuture.supplyAsync(new Callable() { + @Override + public AsyncResult call() throws Exception { + return queue.peekBlockingly(); + } + }, executor); + + Thread.sleep(10L); + + Assert.assertFalse(peekOperation.isDone()); + + queue.put(watermarkQueueEntry); + + AsyncResult watermarkResult = peekOperation.get(); + + Assert.assertEquals(watermarkQueueEntry, watermarkResult); + Assert.assertEquals(1, queue.size()); + + Assert.assertEquals(watermarkQueueEntry, queue.poll()); + Assert.assertTrue(queue.isEmpty()); + + Future pollOperation = FlinkFuture.supplyAsync(new Callable() { + @Override + public AsyncResult call() throws Exception { + return queue.poll(); + } + }, executor); + + Thread.sleep(10L); + + Assert.assertFalse(pollOperation.isDone()); + + queue.put(streamRecordQueueEntry); + + Thread.sleep(10L); + + Assert.assertFalse(pollOperation.isDone()); + + streamRecordQueueEntry.collect(Collections.emptyList()); + + Assert.assertEquals(streamRecordQueueEntry, pollOperation.get()); + + Assert.assertTrue(queue.isEmpty()); + + verify(operatorActions, never()).failOperator(any(Exception.class)); + } +} diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/queue/UnorderedStreamElementQueueTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/queue/UnorderedStreamElementQueueTest.java new file mode 100644 index 0000000000000..0a57f92bbcaff --- /dev/null +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/api/operators/async/queue/UnorderedStreamElementQueueTest.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.operators.async.queue; + +import org.apache.flink.runtime.concurrent.Future; +import org.apache.flink.runtime.concurrent.impl.FlinkFuture; +import org.apache.flink.streaming.api.operators.async.OperatorActions; +import org.apache.flink.streaming.api.watermark.Watermark; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.util.TestLogger; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; + +/** + * {@link UnorderedStreamElementQueue} specific tests + */ +public class UnorderedStreamElementQueueTest extends TestLogger { + private static final long timeout = 10000L; + private static ExecutorService executor; + + @BeforeClass + public static void setup() { + executor = Executors.newFixedThreadPool(3); + } + + @AfterClass + public static void shutdown() { + executor.shutdown(); + + try { + if (!executor.awaitTermination(timeout, TimeUnit.MILLISECONDS)) { + executor.shutdownNow(); + } + } catch (InterruptedException interrupted) { + executor.shutdownNow(); + + Thread.currentThread().interrupt(); + } + } + + /** + * Tests that only elements before the oldest watermark are returned if they are completed. + */ + @Test + public void testCompletionOrder() throws Exception { + OperatorActions operatorActions = mock(OperatorActions.class); + + final UnorderedStreamElementQueue queue = new UnorderedStreamElementQueue(8, executor, operatorActions); + + StreamRecordQueueEntry record1 = new StreamRecordQueueEntry<>(new StreamRecord<>(1, 0L)); + StreamRecordQueueEntry record2 = new StreamRecordQueueEntry<>(new StreamRecord<>(2, 1L)); + WatermarkQueueEntry watermark1 = new WatermarkQueueEntry(new Watermark(2L)); + StreamRecordQueueEntry record3 = new StreamRecordQueueEntry<>(new StreamRecord<>(3, 3L)); + StreamRecordQueueEntry record4 = new StreamRecordQueueEntry<>(new StreamRecord<>(4, 4L)); + WatermarkQueueEntry watermark2 = new WatermarkQueueEntry(new Watermark(5L)); + StreamRecordQueueEntry record5 = new StreamRecordQueueEntry<>(new StreamRecord<>(5, 6L)); + StreamRecordQueueEntry record6 = new StreamRecordQueueEntry<>(new StreamRecord<>(6, 7L)); + + List> entries = Arrays.asList(record1, record2, watermark1, record3, + record4, watermark2, record5, record6); + + // The queue should look like R1, R2, W1, R3, R4, W2, R5, R6 + for (StreamElementQueueEntry entry : entries) { + queue.put(entry); + } + + Assert.assertTrue(8 == queue.size()); + + Future firstPoll = FlinkFuture.supplyAsync(new Callable() { + @Override + public AsyncResult call() throws Exception { + return queue.poll(); + } + }, executor); + + // this should not fulfill the poll, because R3 is behind W1 + record3.collect(Collections.emptyList()); + + Thread.sleep(10L); + + Assert.assertFalse(firstPoll.isDone()); + + record2.collect(Collections.emptyList()); + + Assert.assertEquals(record2, firstPoll.get()); + + Future secondPoll = FlinkFuture.supplyAsync(new Callable() { + @Override + public AsyncResult call() throws Exception { + return queue.poll(); + } + }, executor); + + record6.collect(Collections.emptyList()); + record4.collect(Collections.emptyList()); + + Thread.sleep(10L); + + // The future should not be completed because R1 has not been completed yet + Assert.assertFalse(secondPoll.isDone()); + + record1.collect(Collections.emptyList()); + + Assert.assertEquals(record1, secondPoll.get()); + + // Now W1, R3, R4 and W2 are completed and should be pollable + Assert.assertEquals(watermark1, queue.poll()); + + // The order of R3 and R4 is not specified + Set expected = new HashSet<>(2); + expected.add(record3); + expected.add(record4); + + Set actual = new HashSet<>(2); + + actual.add(queue.poll()); + actual.add(queue.poll()); + + Assert.assertEquals(expected, actual); + + Assert.assertEquals(watermark2, queue.poll()); + + // since R6 has been completed before and W2 has been consumed, we should be able to poll + // this record as well + Assert.assertEquals(record6, queue.poll()); + + // only R5 left in the queue + Assert.assertTrue(1 == queue.size()); + + Future thirdPoll = FlinkFuture.supplyAsync(new Callable() { + @Override + public AsyncResult call() throws Exception { + return queue.poll(); + } + }, executor); + + Thread.sleep(10L); + + Assert.assertFalse(thirdPoll.isDone()); + + record5.collect(Collections.emptyList()); + + Assert.assertEquals(record5, thirdPoll.get()); + + Assert.assertTrue(queue.isEmpty()); + + verify(operatorActions, never()).failOperator(any(Exception.class)); + } +} diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/operators/StreamSourceOperatorTest.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/operators/StreamSourceOperatorTest.java index f87b5ef9b34e9..e6004202ee857 100644 --- a/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/operators/StreamSourceOperatorTest.java +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/runtime/operators/StreamSourceOperatorTest.java @@ -40,6 +40,7 @@ import org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService; import org.apache.flink.streaming.runtime.tasks.ProcessingTimeService; +import org.apache.flink.streaming.util.CollectorOutput; import org.junit.Assert; import org.junit.Test; import org.mockito.invocation.InvocationOnMock; @@ -340,33 +341,4 @@ public void stop() { running = false; } } - - // ------------------------------------------------------------------------ - - private static class CollectorOutput implements Output> { - - private final List list; - - private CollectorOutput(List list) { - this.list = list; - } - - @Override - public void emitWatermark(Watermark mark) { - list.add(mark); - } - - @Override - public void emitLatencyMarker(LatencyMarker latencyMarker) { - list.add(latencyMarker); - } - - @Override - public void collect(StreamRecord record) { - list.add(record); - } - - @Override - public void close() {} - } } diff --git a/flink-streaming-java/src/test/java/org/apache/flink/streaming/util/CollectorOutput.java b/flink-streaming-java/src/test/java/org/apache/flink/streaming/util/CollectorOutput.java new file mode 100644 index 0000000000000..fcc8a6cbf70a7 --- /dev/null +++ b/flink-streaming-java/src/test/java/org/apache/flink/streaming/util/CollectorOutput.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.util; + +import org.apache.commons.lang3.SerializationUtils; +import org.apache.flink.streaming.api.operators.Output; +import org.apache.flink.streaming.api.watermark.Watermark; +import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker; +import org.apache.flink.streaming.runtime.streamrecord.StreamElement; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; + +import java.io.Serializable; +import java.util.List; + +public class CollectorOutput implements Output> { + + private final List list; + + public CollectorOutput(List list) { + this.list = list; + } + + @Override + public void emitWatermark(Watermark mark) { + list.add(mark); + } + + @Override + public void emitLatencyMarker(LatencyMarker latencyMarker) { + list.add(latencyMarker); + } + + @Override + public void collect(StreamRecord record) { + T copied = SerializationUtils.deserialize(SerializationUtils.serialize((Serializable) record.getValue())); + list.add(record.copy(copied)); + } + + @Override + public void close() {} +} diff --git a/flink-tests/src/test/java/org/apache/flink/test/streaming/api/StreamingOperatorsITCase.java b/flink-tests/src/test/java/org/apache/flink/test/streaming/api/StreamingOperatorsITCase.java index ea99fe3d3157f..363196552c8cb 100644 --- a/flink-tests/src/test/java/org/apache/flink/test/streaming/api/StreamingOperatorsITCase.java +++ b/flink-tests/src/test/java/org/apache/flink/test/streaming/api/StreamingOperatorsITCase.java @@ -38,6 +38,7 @@ import org.junit.*; import java.util.*; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -94,6 +95,9 @@ public Iterable select(Tuple2 value) { final MemorySinkFunction sinkFunction1 = new MemorySinkFunction(0); + final List actualResult1 = new ArrayList<>(); + MemorySinkFunction.registerCollection(0, actualResult1); + splittedResult.select("0").map(new MapFunction, Integer>() { private static final long serialVersionUID = 2114608668010092995L; @@ -103,9 +107,11 @@ public Integer map(Tuple2 value) throws Exception { } }).addSink(sinkFunction1); - final MemorySinkFunction sinkFunction2 = new MemorySinkFunction(1); + final List actualResult2 = new ArrayList<>(); + MemorySinkFunction.registerCollection(1, actualResult2); + splittedResult.select("1").map(new MapFunction, Integer>() { private static final long serialVersionUID = 5631104389744681308L; @@ -132,13 +138,11 @@ public Integer map(Tuple2 value) throws Exception { env.execute(); - Collection result1 = sinkFunction1.getResult(); - Collections.sort((ArrayList)result1); - Collection result2 = sinkFunction2.getResult(); - Collections.sort((ArrayList)result2); + Collections.sort(actualResult1); + Collections.sort(actualResult2); - Assert.assertArrayEquals(result1.toArray(), expected1.toArray()); - Assert.assertArrayEquals(result2.toArray(), expected2.toArray()); + Assert.assertEquals(expected1, actualResult1); + Assert.assertEquals(expected2, actualResult2); MemorySinkFunction.clear(); } @@ -155,6 +159,8 @@ public void testFoldOperationWithNonJavaSerializableType() throws Exception { DataStream> input = env.addSource(new NonSerializableTupleSource(numElements)); final MemorySinkFunction sinkFunction = new MemorySinkFunction(0); + final ArrayList actualResult = new ArrayList<>(); + MemorySinkFunction.registerCollection(0, actualResult); input .keyBy(0) @@ -186,23 +192,28 @@ public Integer map(NonSerializable value) throws Exception { env.execute(); - Collection result = sinkFunction.getResult(); - Collections.sort((ArrayList)result); + Collections.sort(actualResult); - Assert.assertArrayEquals(result.toArray(), expected.toArray()); + Assert.assertEquals(expected, actualResult); MemorySinkFunction.clear(); } + /** + * Tests the basic functionality of the AsyncWaitOperator: Processing a limited stream of + * elements by doubling their value. This is tested in for the ordered and unordered mode. + */ @Test public void testAsyncWaitOperator() throws Exception { - final int numElements = 10; + final int numElements = 5; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream> input = env.addSource(new NonSerializableTupleSource(numElements)); AsyncFunction, Integer> function = new RichAsyncFunction, Integer>() { + private static final long serialVersionUID = 7000343199829487985L; + transient ExecutorService executorService; @Override @@ -214,26 +225,16 @@ public void open(Configuration parameters) throws Exception { @Override public void close() throws Exception { super.close(); - executorService.shutdown(); + executorService.shutdownNow(); } @Override public void asyncInvoke(final Tuple2 input, final AsyncCollector collector) throws Exception { - this.executorService.submit(new Runnable() { + executorService.submit(new Runnable() { @Override public void run() { - // wait for while to simulate async operation here - int sleep = (int) (new Random().nextFloat() * 10); - try { - Thread.sleep(sleep); - List ret = new ArrayList<>(); - ret.add(input.f0+input.f0); - collector.collect(ret); - } - catch (InterruptedException e) { - collector.collect(new ArrayList(0)); - } + collector.collect(Collections.singletonList(input.f0 + input.f0)); } }); } @@ -243,6 +244,8 @@ public void run() { // save result from ordered process final MemorySinkFunction sinkFunction1 = new MemorySinkFunction(0); + final List actualResult1 = new ArrayList<>(numElements); + MemorySinkFunction.registerCollection(0, actualResult1); orderedResult.addSink(sinkFunction1).setParallelism(1); @@ -251,6 +254,8 @@ public void run() { // save result from unordered process final MemorySinkFunction sinkFunction2 = new MemorySinkFunction(1); + final List actualResult2 = new ArrayList<>(numElements); + MemorySinkFunction.registerCollection(1, actualResult2); unorderedResult.addSink(sinkFunction2); @@ -263,11 +268,10 @@ public void run() { env.execute(); - Assert.assertArrayEquals(expected.toArray(), sinkFunction1.getResult().toArray()); + Assert.assertEquals(expected, actualResult1); - Collection result = sinkFunction2.getResult(); - Collections.sort((ArrayList)result); - Assert.assertArrayEquals(expected.toArray(), result.toArray()); + Collections.sort(actualResult2); + Assert.assertEquals(expected, actualResult2); MemorySinkFunction.clear(); } @@ -331,43 +335,31 @@ public void cancel() { } private static class MemorySinkFunction implements SinkFunction { - private final static Collection collection1 = new ArrayList<>(10); + private static Map> collections = new ConcurrentHashMap<>(); - private final static Collection collection2 = new ArrayList<>(10); + private static final long serialVersionUID = -8815570195074103860L; - private final long serialVersionUID = -8815570195074103860L; + private final int key; - private final int idx; - - public MemorySinkFunction(int idx) { - this.idx = idx; + public MemorySinkFunction(int key) { + this.key = key; } @Override public void invoke(Integer value) throws Exception { - if (idx == 0) { - synchronized (collection1) { - collection1.add(value); - } - } - else { - synchronized (collection2) { - collection2.add(value); - } - } - } + Collection collection = collections.get(key); - public Collection getResult() { - if (idx == 0) { - return collection1; + synchronized (collection) { + collection.add(value); } + } - return collection2; + public static void registerCollection(int key, Collection collection) { + collections.put(key, collection); } public static void clear() { - collection1.clear(); - collection2.clear(); + collections.clear(); } } }