[FLINK-36742][cdc-base][oracle] Filter unacked split for no capture tables when task restore from state
parent
0037c4379e
commit
fed73b2a46
@ -0,0 +1,357 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.flink.cdc.connectors.oracle.source.reader;
|
||||||
|
|
||||||
|
import org.apache.flink.api.common.eventtime.Watermark;
|
||||||
|
import org.apache.flink.api.common.typeinfo.TypeInformation;
|
||||||
|
import org.apache.flink.api.connector.source.ReaderOutput;
|
||||||
|
import org.apache.flink.api.connector.source.SourceOutput;
|
||||||
|
import org.apache.flink.api.connector.source.SourceReaderContext;
|
||||||
|
import org.apache.flink.cdc.connectors.base.config.JdbcSourceConfig;
|
||||||
|
import org.apache.flink.cdc.connectors.base.options.StartupOptions;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.meta.offset.OffsetFactory;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.meta.split.SnapshotSplit;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.meta.split.SourceRecords;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitBase;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitSerializer;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.meta.split.SourceSplitState;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.metrics.SourceReaderMetrics;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.reader.IncrementalSourceReader;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.reader.IncrementalSourceReaderContext;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.reader.IncrementalSourceRecordEmitter;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.reader.IncrementalSourceSplitReader;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.utils.hooks.SnapshotPhaseHooks;
|
||||||
|
import org.apache.flink.cdc.connectors.oracle.source.OracleDialect;
|
||||||
|
import org.apache.flink.cdc.connectors.oracle.source.OracleSourceTestBase;
|
||||||
|
import org.apache.flink.cdc.connectors.oracle.source.config.OracleSourceConfig;
|
||||||
|
import org.apache.flink.cdc.connectors.oracle.source.config.OracleSourceConfigFactory;
|
||||||
|
import org.apache.flink.cdc.connectors.oracle.source.meta.offset.RedoLogOffsetFactory;
|
||||||
|
import org.apache.flink.cdc.connectors.oracle.testutils.RecordsFormatter;
|
||||||
|
import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema;
|
||||||
|
import org.apache.flink.connector.base.source.reader.RecordEmitter;
|
||||||
|
import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
|
||||||
|
import org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue;
|
||||||
|
import org.apache.flink.connector.testutils.source.reader.TestingReaderContext;
|
||||||
|
import org.apache.flink.core.io.InputStatus;
|
||||||
|
import org.apache.flink.metrics.groups.SourceReaderMetricGroup;
|
||||||
|
import org.apache.flink.table.api.DataTypes;
|
||||||
|
import org.apache.flink.table.types.DataType;
|
||||||
|
import org.apache.flink.table.types.logical.LogicalType;
|
||||||
|
import org.apache.flink.table.types.logical.RowType;
|
||||||
|
import org.apache.flink.util.Collector;
|
||||||
|
|
||||||
|
import io.debezium.relational.TableId;
|
||||||
|
import io.debezium.relational.history.TableChanges.TableChange;
|
||||||
|
import org.apache.kafka.connect.source.SourceRecord;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.time.ZoneId;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.function.Supplier;
|
||||||
|
|
||||||
|
import static org.apache.flink.core.io.InputStatus.MORE_AVAILABLE;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
/** Tests for {@link IncrementalSourceReader}. */
|
||||||
|
public class OracleSourceReaderTest extends OracleSourceTestBase {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFinishedUnackedSplitsCleanInvalidSplitAccordingToNewFilter() throws Exception {
|
||||||
|
createAndInitialize("customer.sql");
|
||||||
|
final OracleSourceConfig sourceConfig =
|
||||||
|
getConfig(new String[] {"CUSTOMERS", "CUSTOMERS_1"});
|
||||||
|
final DataType dataType =
|
||||||
|
DataTypes.ROW(
|
||||||
|
DataTypes.FIELD("ID", DataTypes.BIGINT()),
|
||||||
|
DataTypes.FIELD("NAME", DataTypes.STRING()),
|
||||||
|
DataTypes.FIELD("ADDRESS", DataTypes.STRING()),
|
||||||
|
DataTypes.FIELD("PHONE_NUMBER", DataTypes.STRING()));
|
||||||
|
List<SourceSplitBase> snapshotSplits;
|
||||||
|
TableId tableId = new TableId(ORACLE_DATABASE, ORACLE_SCHEMA, "CUSTOMERS");
|
||||||
|
TableId tableId1 = new TableId(ORACLE_DATABASE, ORACLE_SCHEMA, "CUSTOMERS_1");
|
||||||
|
OracleDialect oracleDialect = new OracleDialect();
|
||||||
|
Map<TableId, TableChange> tableSchemas =
|
||||||
|
oracleDialect.discoverDataCollectionSchemas(sourceConfig);
|
||||||
|
|
||||||
|
RowType splitType =
|
||||||
|
RowType.of(
|
||||||
|
new LogicalType[] {DataTypes.INT().getLogicalType()}, new String[] {"id"});
|
||||||
|
snapshotSplits =
|
||||||
|
Arrays.asList(
|
||||||
|
new SnapshotSplit(
|
||||||
|
tableId,
|
||||||
|
tableId + ":0",
|
||||||
|
splitType,
|
||||||
|
null,
|
||||||
|
new Integer[] {200},
|
||||||
|
null,
|
||||||
|
tableSchemas),
|
||||||
|
new SnapshotSplit(
|
||||||
|
tableId,
|
||||||
|
tableId + ":1",
|
||||||
|
splitType,
|
||||||
|
new Integer[] {200},
|
||||||
|
new Integer[] {1500},
|
||||||
|
null,
|
||||||
|
tableSchemas),
|
||||||
|
new SnapshotSplit(
|
||||||
|
tableId,
|
||||||
|
tableId + ":2",
|
||||||
|
splitType,
|
||||||
|
new Integer[] {1500},
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
tableSchemas),
|
||||||
|
new SnapshotSplit(
|
||||||
|
tableId1,
|
||||||
|
tableId1 + ":0",
|
||||||
|
splitType,
|
||||||
|
null,
|
||||||
|
new Integer[] {200},
|
||||||
|
null,
|
||||||
|
tableSchemas),
|
||||||
|
new SnapshotSplit(
|
||||||
|
tableId1,
|
||||||
|
tableId1 + ":1",
|
||||||
|
splitType,
|
||||||
|
new Integer[] {200},
|
||||||
|
new Integer[] {1500},
|
||||||
|
null,
|
||||||
|
tableSchemas),
|
||||||
|
new SnapshotSplit(
|
||||||
|
tableId1,
|
||||||
|
tableId1 + ":2",
|
||||||
|
splitType,
|
||||||
|
new Integer[] {1500},
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
tableSchemas));
|
||||||
|
|
||||||
|
// Step 1: start source reader and assign snapshot splits
|
||||||
|
IncrementalSourceReader<SourceRecord, JdbcSourceConfig> reader = createReader(sourceConfig);
|
||||||
|
reader.start();
|
||||||
|
reader.addSplits(snapshotSplits);
|
||||||
|
|
||||||
|
String[] expectedRecords =
|
||||||
|
new String[] {
|
||||||
|
"+I[111, user_6, Shanghai, 123567891234]",
|
||||||
|
"+I[111, user_6, Shanghai, 123567891234]",
|
||||||
|
"+I[110, user_5, Shanghai, 123567891234]",
|
||||||
|
"+I[110, user_5, Shanghai, 123567891234]",
|
||||||
|
"+I[101, user_1, Shanghai, 123567891234]",
|
||||||
|
"+I[101, user_1, Shanghai, 123567891234]",
|
||||||
|
"+I[103, user_3, Shanghai, 123567891234]",
|
||||||
|
"+I[103, user_3, Shanghai, 123567891234]",
|
||||||
|
"+I[102, user_2, Shanghai, 123567891234]",
|
||||||
|
"+I[102, user_2, Shanghai, 123567891234]",
|
||||||
|
"+I[118, user_7, Shanghai, 123567891234]",
|
||||||
|
"+I[118, user_7, Shanghai, 123567891234]",
|
||||||
|
"+I[121, user_8, Shanghai, 123567891234]",
|
||||||
|
"+I[121, user_8, Shanghai, 123567891234]",
|
||||||
|
"+I[123, user_9, Shanghai, 123567891234]",
|
||||||
|
"+I[123, user_9, Shanghai, 123567891234]",
|
||||||
|
"+I[109, user_4, Shanghai, 123567891234]",
|
||||||
|
"+I[109, user_4, Shanghai, 123567891234]",
|
||||||
|
"+I[1009, user_10, Shanghai, 123567891234]",
|
||||||
|
"+I[1009, user_10, Shanghai, 123567891234]",
|
||||||
|
"+I[1011, user_12, Shanghai, 123567891234]",
|
||||||
|
"+I[1011, user_12, Shanghai, 123567891234]",
|
||||||
|
"+I[1010, user_11, Shanghai, 123567891234]",
|
||||||
|
"+I[1010, user_11, Shanghai, 123567891234]",
|
||||||
|
"+I[1013, user_14, Shanghai, 123567891234]",
|
||||||
|
"+I[1013, user_14, Shanghai, 123567891234]",
|
||||||
|
"+I[1012, user_13, Shanghai, 123567891234]",
|
||||||
|
"+I[1012, user_13, Shanghai, 123567891234]",
|
||||||
|
"+I[1015, user_16, Shanghai, 123567891234]",
|
||||||
|
"+I[1015, user_16, Shanghai, 123567891234]",
|
||||||
|
"+I[1014, user_15, Shanghai, 123567891234]",
|
||||||
|
"+I[1014, user_15, Shanghai, 123567891234]",
|
||||||
|
"+I[1017, user_18, Shanghai, 123567891234]",
|
||||||
|
"+I[1017, user_18, Shanghai, 123567891234]",
|
||||||
|
"+I[1016, user_17, Shanghai, 123567891234]",
|
||||||
|
"+I[1016, user_17, Shanghai, 123567891234]",
|
||||||
|
"+I[1019, user_20, Shanghai, 123567891234]",
|
||||||
|
"+I[1019, user_20, Shanghai, 123567891234]",
|
||||||
|
"+I[1018, user_19, Shanghai, 123567891234]",
|
||||||
|
"+I[1018, user_19, Shanghai, 123567891234]",
|
||||||
|
"+I[2000, user_21, Shanghai, 123567891234]",
|
||||||
|
"+I[2000, user_21, Shanghai, 123567891234]"
|
||||||
|
};
|
||||||
|
// Step 2: wait the snapshot splits finished reading
|
||||||
|
Thread.sleep(10000L);
|
||||||
|
List<String> actualRecords = consumeRecords(reader, dataType, 42);
|
||||||
|
assertEqualsInAnyOrder(Arrays.asList(expectedRecords), actualRecords);
|
||||||
|
|
||||||
|
// Step 3: snapshot reader's state
|
||||||
|
List<SourceSplitBase> splitsState = reader.snapshotState(1L);
|
||||||
|
|
||||||
|
// Step 4: restart reader from a restored state
|
||||||
|
final OracleSourceConfig sourceConfig1 = getConfig(new String[] {"CUSTOMERS"});
|
||||||
|
IncrementalSourceReader<SourceRecord, JdbcSourceConfig> restartReader =
|
||||||
|
createReader(sourceConfig1);
|
||||||
|
restartReader.start();
|
||||||
|
restartReader.addSplits(splitsState);
|
||||||
|
|
||||||
|
// Step 5: check the finished unacked splits between original reader and restarted reader
|
||||||
|
assertEquals(3, restartReader.getFinishedUnackedSplits().size());
|
||||||
|
reader.close();
|
||||||
|
restartReader.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private IncrementalSourceReader<SourceRecord, JdbcSourceConfig> createReader(
|
||||||
|
OracleSourceConfig configuration) {
|
||||||
|
return createReader(configuration, new TestingReaderContext());
|
||||||
|
}
|
||||||
|
|
||||||
|
private IncrementalSourceReader<SourceRecord, JdbcSourceConfig> createReader(
|
||||||
|
OracleSourceConfig configuration, SourceReaderContext readerContext) {
|
||||||
|
final FutureCompletingBlockingQueue<RecordsWithSplitIds<SourceRecords>> elementsQueue =
|
||||||
|
new FutureCompletingBlockingQueue<>();
|
||||||
|
final SourceReaderMetricGroup sourceReaderMetricGroup = readerContext.metricGroup();
|
||||||
|
final SourceReaderMetrics sourceReaderMetrics =
|
||||||
|
new SourceReaderMetrics(sourceReaderMetricGroup);
|
||||||
|
RedoLogOffsetFactory offsetFactory = new RedoLogOffsetFactory();
|
||||||
|
final RecordEmitter<SourceRecords, SourceRecord, SourceSplitState> recordEmitter =
|
||||||
|
new IncrementalSourceRecordEmitter<>(
|
||||||
|
new ForwardDeserializeSchema(),
|
||||||
|
sourceReaderMetrics,
|
||||||
|
configuration.isIncludeSchemaChanges(),
|
||||||
|
offsetFactory);
|
||||||
|
final IncrementalSourceReaderContext incrementalSourceReaderContext =
|
||||||
|
new IncrementalSourceReaderContext(readerContext);
|
||||||
|
OracleDialect dialect = new OracleDialect();
|
||||||
|
Supplier<IncrementalSourceSplitReader<JdbcSourceConfig>> splitReaderSupplier =
|
||||||
|
() ->
|
||||||
|
new IncrementalSourceSplitReader<>(
|
||||||
|
readerContext.getIndexOfSubtask(),
|
||||||
|
dialect,
|
||||||
|
configuration,
|
||||||
|
incrementalSourceReaderContext,
|
||||||
|
SnapshotPhaseHooks.empty());
|
||||||
|
return new IncrementalSourceReader<>(
|
||||||
|
elementsQueue,
|
||||||
|
splitReaderSupplier,
|
||||||
|
recordEmitter,
|
||||||
|
readerContext.getConfiguration(),
|
||||||
|
incrementalSourceReaderContext,
|
||||||
|
configuration,
|
||||||
|
new SourceSplitSerializer() {
|
||||||
|
@Override
|
||||||
|
public OffsetFactory getOffsetFactory() {
|
||||||
|
return offsetFactory;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
dialect);
|
||||||
|
}
|
||||||
|
|
||||||
|
private OracleSourceConfig getConfig(String[] captureTables) {
|
||||||
|
String[] captureTableIds =
|
||||||
|
Arrays.stream(captureTables)
|
||||||
|
.map(tableName -> ORACLE_SCHEMA + "." + tableName)
|
||||||
|
.toArray(String[]::new);
|
||||||
|
return (OracleSourceConfig)
|
||||||
|
new OracleSourceConfigFactory()
|
||||||
|
.startupOptions(StartupOptions.initial())
|
||||||
|
.databaseList(ORACLE_DATABASE)
|
||||||
|
.tableList(captureTableIds)
|
||||||
|
.includeSchemaChanges(false)
|
||||||
|
.hostname(ORACLE_CONTAINER.getHost())
|
||||||
|
.port(ORACLE_CONTAINER.getOraclePort())
|
||||||
|
.splitSize(10)
|
||||||
|
.fetchSize(2)
|
||||||
|
.username(ORACLE_CONTAINER.getUsername())
|
||||||
|
.password(ORACLE_CONTAINER.getPassword())
|
||||||
|
.serverTimeZone(ZoneId.of("UTC").toString())
|
||||||
|
.create(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> consumeRecords(
|
||||||
|
IncrementalSourceReader<SourceRecord, JdbcSourceConfig> sourceReader,
|
||||||
|
DataType recordType,
|
||||||
|
int size)
|
||||||
|
throws Exception {
|
||||||
|
// Poll all the n records of the single split.
|
||||||
|
final SimpleReaderOutput output = new SimpleReaderOutput();
|
||||||
|
InputStatus status = MORE_AVAILABLE;
|
||||||
|
while (MORE_AVAILABLE == status || output.getResults().size() < size) {
|
||||||
|
status = sourceReader.pollNext(output);
|
||||||
|
}
|
||||||
|
final RecordsFormatter formatter = new RecordsFormatter(recordType);
|
||||||
|
return formatter.format(output.getResults());
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// test utilities
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
private static class SimpleReaderOutput implements ReaderOutput<SourceRecord> {
|
||||||
|
|
||||||
|
private final List<SourceRecord> results = new ArrayList<>();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(SourceRecord record) {
|
||||||
|
results.add(record);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<SourceRecord> getResults() {
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(SourceRecord record, long timestamp) {
|
||||||
|
collect(record);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void emitWatermark(Watermark watermark) {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void markIdle() {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void markActive() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SourceOutput<SourceRecord> createOutputForSplit(String splitId) {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void releaseOutputForSplit(String splitId) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ForwardDeserializeSchema
|
||||||
|
implements DebeziumDeserializationSchema<SourceRecord> {
|
||||||
|
|
||||||
|
private static final long serialVersionUID = 1L;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deserialize(SourceRecord record, Collector<SourceRecord> out) throws Exception {
|
||||||
|
out.collect(record);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TypeInformation<SourceRecord> getProducedType() {
|
||||||
|
return TypeInformation.of(SourceRecord.class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue