|
|
|
@ -17,13 +17,24 @@
|
|
|
|
|
package com.ververica.cdc.connectors.sqlserver.source;
|
|
|
|
|
|
|
|
|
|
import org.apache.flink.api.common.JobID;
|
|
|
|
|
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
|
|
|
|
|
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
|
|
|
|
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
|
|
|
|
import org.apache.flink.table.api.TableResult;
|
|
|
|
|
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
|
|
|
|
|
import org.apache.flink.table.catalog.ResolvedSchema;
|
|
|
|
|
import org.apache.flink.table.catalog.UniqueConstraint;
|
|
|
|
|
import org.apache.flink.table.data.RowData;
|
|
|
|
|
import org.apache.flink.types.Row;
|
|
|
|
|
import org.apache.flink.util.CloseableIterator;
|
|
|
|
|
|
|
|
|
|
import com.ververica.cdc.connectors.base.config.JdbcSourceConfig;
|
|
|
|
|
import com.ververica.cdc.connectors.base.source.utils.hooks.SnapshotPhaseHook;
|
|
|
|
|
import com.ververica.cdc.connectors.base.source.utils.hooks.SnapshotPhaseHooks;
|
|
|
|
|
import com.ververica.cdc.connectors.sqlserver.source.config.SqlServerSourceConfig;
|
|
|
|
|
import com.ververica.cdc.connectors.sqlserver.source.dialect.SqlServerDialect;
|
|
|
|
|
import com.ververica.cdc.connectors.sqlserver.testutils.TestTable;
|
|
|
|
|
import io.debezium.jdbc.JdbcConnection;
|
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
|
import org.junit.Rule;
|
|
|
|
|
import org.junit.Test;
|
|
|
|
@ -33,10 +44,16 @@ import org.slf4j.LoggerFactory;
|
|
|
|
|
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.Arrays;
|
|
|
|
|
import java.util.Collections;
|
|
|
|
|
import java.util.Iterator;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.function.Function;
|
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
|
|
|
|
import static java.lang.String.format;
|
|
|
|
|
import static org.apache.flink.table.api.DataTypes.BIGINT;
|
|
|
|
|
import static org.apache.flink.table.api.DataTypes.STRING;
|
|
|
|
|
import static org.apache.flink.table.catalog.Column.physical;
|
|
|
|
|
import static org.apache.flink.util.Preconditions.checkState;
|
|
|
|
|
import static org.testcontainers.containers.MSSQLServerContainer.MS_SQL_SERVER_PORT;
|
|
|
|
|
|
|
|
|
@ -47,6 +64,9 @@ public class SqlServerSourceITCase extends SqlServerSourceTestBase {
|
|
|
|
|
|
|
|
|
|
@Rule public final Timeout timeoutPerTest = Timeout.seconds(300);
|
|
|
|
|
|
|
|
|
|
private static final int USE_POST_LOWWATERMARK_HOOK = 1;
|
|
|
|
|
private static final int USE_PRE_HIGHWATERMARK_HOOK = 2;
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testReadSingleTableWithSingleParallelism() throws Exception {
|
|
|
|
|
testSqlServerParallelSource(
|
|
|
|
@ -90,6 +110,234 @@ public class SqlServerSourceITCase extends SqlServerSourceTestBase {
|
|
|
|
|
1, FailoverType.JM, FailoverPhase.SNAPSHOT, new String[] {"dbo.customers"});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testReadSingleTableWithSingleParallelismAndSkipBackfill() throws Exception {
|
|
|
|
|
testSqlServerParallelSource(
|
|
|
|
|
DEFAULT_PARALLELISM,
|
|
|
|
|
FailoverType.TM,
|
|
|
|
|
FailoverPhase.SNAPSHOT,
|
|
|
|
|
new String[] {"dbo.customers"},
|
|
|
|
|
true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testEnableBackfillWithDMLPreHighWaterMark() throws Exception {
|
|
|
|
|
|
|
|
|
|
List<String> records = testBackfillWhenWritingEvents(false, 21, USE_PRE_HIGHWATERMARK_HOOK);
|
|
|
|
|
|
|
|
|
|
List<String> expectedRecords =
|
|
|
|
|
Arrays.asList(
|
|
|
|
|
"+I[101, user_1, Shanghai, 123567891234]",
|
|
|
|
|
"+I[102, user_2, Shanghai, 123567891234]",
|
|
|
|
|
"+I[103, user_3, Shanghai, 123567891234]",
|
|
|
|
|
"+I[109, user_4, Shanghai, 123567891234]",
|
|
|
|
|
"+I[110, user_5, Shanghai, 123567891234]",
|
|
|
|
|
"+I[111, user_6, Shanghai, 123567891234]",
|
|
|
|
|
"+I[118, user_7, Shanghai, 123567891234]",
|
|
|
|
|
"+I[121, user_8, Shanghai, 123567891234]",
|
|
|
|
|
"+I[123, user_9, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1009, user_10, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1010, user_11, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1011, user_12, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1012, user_13, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1013, user_14, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1014, user_15, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1015, user_16, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1016, user_17, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1017, user_18, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1018, user_19, Shanghai, 123567891234]",
|
|
|
|
|
"+I[2000, user_21, Pittsburgh, 123567891234]",
|
|
|
|
|
"+I[15213, user_15213, Shanghai, 123567891234]");
|
|
|
|
|
// when enable backfill, the wal log between [snapshot, high_watermark) will be
|
|
|
|
|
// applied as snapshot image
|
|
|
|
|
assertEqualsInAnyOrder(expectedRecords, records);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testEnableBackfillWithDMLPostLowWaterMark() throws Exception {
|
|
|
|
|
|
|
|
|
|
List<String> records = testBackfillWhenWritingEvents(false, 21, USE_POST_LOWWATERMARK_HOOK);
|
|
|
|
|
|
|
|
|
|
List<String> expectedRecords =
|
|
|
|
|
Arrays.asList(
|
|
|
|
|
"+I[101, user_1, Shanghai, 123567891234]",
|
|
|
|
|
"+I[102, user_2, Shanghai, 123567891234]",
|
|
|
|
|
"+I[103, user_3, Shanghai, 123567891234]",
|
|
|
|
|
"+I[109, user_4, Shanghai, 123567891234]",
|
|
|
|
|
"+I[110, user_5, Shanghai, 123567891234]",
|
|
|
|
|
"+I[111, user_6, Shanghai, 123567891234]",
|
|
|
|
|
"+I[118, user_7, Shanghai, 123567891234]",
|
|
|
|
|
"+I[121, user_8, Shanghai, 123567891234]",
|
|
|
|
|
"+I[123, user_9, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1009, user_10, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1010, user_11, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1011, user_12, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1012, user_13, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1013, user_14, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1014, user_15, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1015, user_16, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1016, user_17, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1017, user_18, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1018, user_19, Shanghai, 123567891234]",
|
|
|
|
|
"+I[2000, user_21, Pittsburgh, 123567891234]",
|
|
|
|
|
"+I[15213, user_15213, Shanghai, 123567891234]");
|
|
|
|
|
// when enable backfill, the wal log between [low_watermark, snapshot) will be applied
|
|
|
|
|
// as snapshot image
|
|
|
|
|
assertEqualsInAnyOrder(expectedRecords, records);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testSkipBackfillWithDMLPreHighWaterMark() throws Exception {
|
|
|
|
|
|
|
|
|
|
List<String> records = testBackfillWhenWritingEvents(true, 25, USE_PRE_HIGHWATERMARK_HOOK);
|
|
|
|
|
|
|
|
|
|
List<String> expectedRecords =
|
|
|
|
|
Arrays.asList(
|
|
|
|
|
"+I[101, user_1, Shanghai, 123567891234]",
|
|
|
|
|
"+I[102, user_2, Shanghai, 123567891234]",
|
|
|
|
|
"+I[103, user_3, Shanghai, 123567891234]",
|
|
|
|
|
"+I[109, user_4, Shanghai, 123567891234]",
|
|
|
|
|
"+I[110, user_5, Shanghai, 123567891234]",
|
|
|
|
|
"+I[111, user_6, Shanghai, 123567891234]",
|
|
|
|
|
"+I[118, user_7, Shanghai, 123567891234]",
|
|
|
|
|
"+I[121, user_8, Shanghai, 123567891234]",
|
|
|
|
|
"+I[123, user_9, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1009, user_10, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1010, user_11, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1011, user_12, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1012, user_13, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1013, user_14, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1014, user_15, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1015, user_16, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1016, user_17, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1017, user_18, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1018, user_19, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1019, user_20, Shanghai, 123567891234]",
|
|
|
|
|
"+I[2000, user_21, Shanghai, 123567891234]",
|
|
|
|
|
"+I[15213, user_15213, Shanghai, 123567891234]",
|
|
|
|
|
"-U[2000, user_21, Shanghai, 123567891234]",
|
|
|
|
|
"+U[2000, user_21, Pittsburgh, 123567891234]",
|
|
|
|
|
"-D[1019, user_20, Shanghai, 123567891234]");
|
|
|
|
|
// when skip backfill, the wal log between (snapshot, high_watermark) will be seen as
|
|
|
|
|
// stream event.
|
|
|
|
|
assertEqualsInAnyOrder(expectedRecords, records);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void testSkipBackfillWithDMLPostLowWaterMark() throws Exception {
|
|
|
|
|
|
|
|
|
|
List<String> records = testBackfillWhenWritingEvents(true, 25, USE_POST_LOWWATERMARK_HOOK);
|
|
|
|
|
|
|
|
|
|
List<String> expectedRecords =
|
|
|
|
|
Arrays.asList(
|
|
|
|
|
"+I[101, user_1, Shanghai, 123567891234]",
|
|
|
|
|
"+I[102, user_2, Shanghai, 123567891234]",
|
|
|
|
|
"+I[103, user_3, Shanghai, 123567891234]",
|
|
|
|
|
"+I[109, user_4, Shanghai, 123567891234]",
|
|
|
|
|
"+I[110, user_5, Shanghai, 123567891234]",
|
|
|
|
|
"+I[111, user_6, Shanghai, 123567891234]",
|
|
|
|
|
"+I[118, user_7, Shanghai, 123567891234]",
|
|
|
|
|
"+I[121, user_8, Shanghai, 123567891234]",
|
|
|
|
|
"+I[123, user_9, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1009, user_10, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1010, user_11, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1011, user_12, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1012, user_13, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1013, user_14, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1014, user_15, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1015, user_16, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1016, user_17, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1017, user_18, Shanghai, 123567891234]",
|
|
|
|
|
"+I[1018, user_19, Shanghai, 123567891234]",
|
|
|
|
|
"+I[2000, user_21, Pittsburgh, 123567891234]",
|
|
|
|
|
"+I[15213, user_15213, Shanghai, 123567891234]",
|
|
|
|
|
"+I[15213, user_15213, Shanghai, 123567891234]",
|
|
|
|
|
"-U[2000, user_21, Shanghai, 123567891234]",
|
|
|
|
|
"+U[2000, user_21, Pittsburgh, 123567891234]",
|
|
|
|
|
"-D[1019, user_20, Shanghai, 123567891234]");
|
|
|
|
|
// when skip backfill, the wal log between (snapshot, high_watermark) will still be
|
|
|
|
|
// seen as stream event. This will occur data duplicate. For example, user_20 will be
|
|
|
|
|
// deleted twice, and user_15213 will be inserted twice.
|
|
|
|
|
assertEqualsInAnyOrder(expectedRecords, records);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private List<String> testBackfillWhenWritingEvents(
|
|
|
|
|
boolean skipSnapshotBackfill, int fetchSize, int hookType) throws Exception {
|
|
|
|
|
|
|
|
|
|
String databaseName = "customer";
|
|
|
|
|
|
|
|
|
|
initializeSqlServerTable(databaseName);
|
|
|
|
|
|
|
|
|
|
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
|
|
|
|
env.enableCheckpointing(1000);
|
|
|
|
|
env.setParallelism(1);
|
|
|
|
|
|
|
|
|
|
ResolvedSchema customersSchame =
|
|
|
|
|
new ResolvedSchema(
|
|
|
|
|
Arrays.asList(
|
|
|
|
|
physical("id", BIGINT().notNull()),
|
|
|
|
|
physical("name", STRING()),
|
|
|
|
|
physical("address", STRING()),
|
|
|
|
|
physical("phone_number", STRING())),
|
|
|
|
|
new ArrayList<>(),
|
|
|
|
|
UniqueConstraint.primaryKey("pk", Collections.singletonList("id")));
|
|
|
|
|
TestTable customerTable = new TestTable(databaseName, "dbo", "customers", customersSchame);
|
|
|
|
|
String tableId = customerTable.getTableId();
|
|
|
|
|
|
|
|
|
|
SqlServerSourceBuilder.SqlServerIncrementalSource source =
|
|
|
|
|
SqlServerSourceBuilder.SqlServerIncrementalSource.<RowData>builder()
|
|
|
|
|
.hostname(MSSQL_SERVER_CONTAINER.getHost())
|
|
|
|
|
.port(MSSQL_SERVER_CONTAINER.getMappedPort(MS_SQL_SERVER_PORT))
|
|
|
|
|
.username(MSSQL_SERVER_CONTAINER.getUsername())
|
|
|
|
|
.password(MSSQL_SERVER_CONTAINER.getPassword())
|
|
|
|
|
.databaseList(databaseName)
|
|
|
|
|
.tableList(getTableNameRegex(new String[] {"dbo.customers"}))
|
|
|
|
|
.deserializer(customerTable.getDeserializer())
|
|
|
|
|
.skipSnapshotBackfill(skipSnapshotBackfill)
|
|
|
|
|
.build();
|
|
|
|
|
|
|
|
|
|
// Do some database operations during hook in snapshot period.
|
|
|
|
|
SnapshotPhaseHooks hooks = new SnapshotPhaseHooks();
|
|
|
|
|
String[] statements =
|
|
|
|
|
new String[] {
|
|
|
|
|
String.format(
|
|
|
|
|
"INSERT INTO %s VALUES (15213, 'user_15213', 'Shanghai', '123567891234')",
|
|
|
|
|
tableId),
|
|
|
|
|
String.format("UPDATE %s SET address='Pittsburgh' WHERE id=2000", tableId),
|
|
|
|
|
String.format("DELETE FROM %s WHERE id=1019", tableId)
|
|
|
|
|
};
|
|
|
|
|
SnapshotPhaseHook snapshotPhaseHook =
|
|
|
|
|
(sourceConfig, split) -> {
|
|
|
|
|
SqlServerDialect dialect =
|
|
|
|
|
new SqlServerDialect((SqlServerSourceConfig) sourceConfig);
|
|
|
|
|
JdbcConnection postgresConnection =
|
|
|
|
|
dialect.openJdbcConnection((JdbcSourceConfig) sourceConfig);
|
|
|
|
|
postgresConnection.execute(statements);
|
|
|
|
|
postgresConnection.commit();
|
|
|
|
|
try {
|
|
|
|
|
Thread.sleep(1000L);
|
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (hookType == USE_POST_LOWWATERMARK_HOOK) {
|
|
|
|
|
hooks.setPostLowWatermarkAction(snapshotPhaseHook);
|
|
|
|
|
} else if (hookType == USE_PRE_HIGHWATERMARK_HOOK) {
|
|
|
|
|
hooks.setPreHighWatermarkAction(snapshotPhaseHook);
|
|
|
|
|
}
|
|
|
|
|
source.setSnapshotHooks(hooks);
|
|
|
|
|
|
|
|
|
|
List<String> records = new ArrayList<>();
|
|
|
|
|
try (CloseableIterator<RowData> iterator =
|
|
|
|
|
env.fromSource(source, WatermarkStrategy.noWatermarks(), "Backfill Skipped Source")
|
|
|
|
|
.executeAndCollect()) {
|
|
|
|
|
records = fetchRowData(iterator, fetchSize, customerTable::stringify);
|
|
|
|
|
env.close();
|
|
|
|
|
}
|
|
|
|
|
return records;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void testSqlServerParallelSource(
|
|
|
|
|
FailoverType failoverType, FailoverPhase failoverPhase, String[] captureCustomerTables)
|
|
|
|
|
throws Exception {
|
|
|
|
@ -103,6 +351,17 @@ public class SqlServerSourceITCase extends SqlServerSourceTestBase {
|
|
|
|
|
FailoverPhase failoverPhase,
|
|
|
|
|
String[] captureCustomerTables)
|
|
|
|
|
throws Exception {
|
|
|
|
|
testSqlServerParallelSource(
|
|
|
|
|
parallelism, failoverType, failoverPhase, captureCustomerTables, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private void testSqlServerParallelSource(
|
|
|
|
|
int parallelism,
|
|
|
|
|
FailoverType failoverType,
|
|
|
|
|
FailoverPhase failoverPhase,
|
|
|
|
|
String[] captureCustomerTables,
|
|
|
|
|
boolean skipSnapshotBackfill)
|
|
|
|
|
throws Exception {
|
|
|
|
|
|
|
|
|
|
String databaseName = "customer";
|
|
|
|
|
|
|
|
|
@ -131,14 +390,16 @@ public class SqlServerSourceITCase extends SqlServerSourceTestBase {
|
|
|
|
|
+ " 'database-name' = '%s',"
|
|
|
|
|
+ " 'table-name' = '%s',"
|
|
|
|
|
+ " 'scan.incremental.snapshot.enabled' = 'true',"
|
|
|
|
|
+ " 'scan.incremental.snapshot.chunk.size' = '4'"
|
|
|
|
|
+ " 'scan.incremental.snapshot.chunk.size' = '4',"
|
|
|
|
|
+ " 'scan.incremental.snapshot.backfill.skip' = '%s'"
|
|
|
|
|
+ ")",
|
|
|
|
|
MSSQL_SERVER_CONTAINER.getHost(),
|
|
|
|
|
MSSQL_SERVER_CONTAINER.getMappedPort(MS_SQL_SERVER_PORT),
|
|
|
|
|
MSSQL_SERVER_CONTAINER.getUsername(),
|
|
|
|
|
MSSQL_SERVER_CONTAINER.getPassword(),
|
|
|
|
|
databaseName,
|
|
|
|
|
getTableNameRegex(captureCustomerTables));
|
|
|
|
|
getTableNameRegex(captureCustomerTables),
|
|
|
|
|
skipSnapshotBackfill);
|
|
|
|
|
|
|
|
|
|
// first step: check the snapshot data
|
|
|
|
|
String[] snapshotForSingleTable =
|
|
|
|
@ -239,6 +500,17 @@ public class SqlServerSourceITCase extends SqlServerSourceTestBase {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static List<String> fetchRowData(
|
|
|
|
|
Iterator<RowData> iter, int size, Function<RowData, String> stringifier) {
|
|
|
|
|
List<RowData> rows = new ArrayList<>(size);
|
|
|
|
|
while (size > 0 && iter.hasNext()) {
|
|
|
|
|
RowData row = iter.next();
|
|
|
|
|
rows.add(row);
|
|
|
|
|
size--;
|
|
|
|
|
}
|
|
|
|
|
return rows.stream().map(stringifier).collect(Collectors.toList());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static List<String> fetchRows(Iterator<Row> iter, int size) {
|
|
|
|
|
List<String> rows = new ArrayList<>(size);
|
|
|
|
|
while (size > 0 && iter.hasNext()) {
|
|
|
|
|