From 5264e80a7b7619b497c14a65f81a7cb5db32a567 Mon Sep 17 00:00:00 2001 From: Jark Wu Date: Thu, 25 Feb 2021 21:15:56 +0800 Subject: [PATCH] [mysql] Support read binlog from specific timestamp --- flink-connector-mysql-cdc/pom.xml | 8 -- .../cdc/connectors/mysql/MySQLSource.java | 8 ++ .../mysql/SeekBinlogToTimestampFilter.java | 82 +++++++++++++++++++ .../mysql/table/MySQLConnectorITCase.java | 60 ++++++++++++++ 4 files changed, 150 insertions(+), 8 deletions(-) create mode 100644 flink-connector-mysql-cdc/src/main/java/com/alibaba/ververica/cdc/connectors/mysql/SeekBinlogToTimestampFilter.java diff --git a/flink-connector-mysql-cdc/pom.xml b/flink-connector-mysql-cdc/pom.xml index 091f09f88..381ce6790 100644 --- a/flink-connector-mysql-cdc/pom.xml +++ b/flink-connector-mysql-cdc/pom.xml @@ -141,14 +141,6 @@ under the License. - - - org.slf4j - slf4j-api - ${slf4j.version} - test - - org.apache.logging.log4j log4j-slf4j-impl diff --git a/flink-connector-mysql-cdc/src/main/java/com/alibaba/ververica/cdc/connectors/mysql/MySQLSource.java b/flink-connector-mysql-cdc/src/main/java/com/alibaba/ververica/cdc/connectors/mysql/MySQLSource.java index 126b58c3a..3b0279585 100644 --- a/flink-connector-mysql-cdc/src/main/java/com/alibaba/ververica/cdc/connectors/mysql/MySQLSource.java +++ b/flink-connector-mysql-cdc/src/main/java/com/alibaba/ververica/cdc/connectors/mysql/MySQLSource.java @@ -212,6 +212,14 @@ public class MySQLSource { specificOffset.setSourceOffset(sourceOffset); break; + case TIMESTAMP: + checkNotNull(deserializer); + props.setProperty("snapshot.mode", "never"); + deserializer = new SeekBinlogToTimestampFilter<>( + startupOptions.startupTimestampMillis, + deserializer); + break; + default: throw new UnsupportedOperationException(); } diff --git a/flink-connector-mysql-cdc/src/main/java/com/alibaba/ververica/cdc/connectors/mysql/SeekBinlogToTimestampFilter.java b/flink-connector-mysql-cdc/src/main/java/com/alibaba/ververica/cdc/connectors/mysql/SeekBinlogToTimestampFilter.java new file mode 100644 index 000000000..60d287b5c --- /dev/null +++ b/flink-connector-mysql-cdc/src/main/java/com/alibaba/ververica/cdc/connectors/mysql/SeekBinlogToTimestampFilter.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.ververica.cdc.connectors.mysql; + +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.util.Collector; + +import com.alibaba.ververica.cdc.debezium.DebeziumDeserializationSchema; +import io.debezium.data.Envelope; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.source.SourceRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A {@link DebeziumDeserializationSchema} which wraps a real {@link DebeziumDeserializationSchema} + * to seek binlog to the specific timestamp. + */ +public class SeekBinlogToTimestampFilter implements DebeziumDeserializationSchema { + private static final long serialVersionUID = -4450118969976653497L; + protected static final Logger LOG = LoggerFactory.getLogger(SeekBinlogToTimestampFilter.class); + + private final long startupTimestampMillis; + private final DebeziumDeserializationSchema serializer; + + private transient boolean find = false; + private transient long filtered = 0L; + + public SeekBinlogToTimestampFilter(long startupTimestampMillis, DebeziumDeserializationSchema serializer) { + this.startupTimestampMillis = startupTimestampMillis; + this.serializer = serializer; + } + + @Override + public void deserialize(SourceRecord record, Collector out) throws Exception { + if (find) { + serializer.deserialize(record, out); + return; + } + + if (filtered == 0) { + LOG.info("Begin to seek binlog to the specific timestamp {}.", startupTimestampMillis); + } + + Struct value = (Struct) record.value(); + Struct source = value.getStruct(Envelope.FieldName.SOURCE); + Long ts = source.getInt64(Envelope.FieldName.TIMESTAMP); + if (ts != null && ts >= startupTimestampMillis) { + serializer.deserialize(record, out); + find = true; + LOG.info("Successfully seek to the specific timestamp {} with filtered {} change events.", + startupTimestampMillis, + filtered); + } else { + filtered++; + if (filtered % 10000 == 0) { + LOG.info("Seeking binlog to specific timestamp with filtered {} change events.", filtered); + } + } + } + + @Override + public TypeInformation getProducedType() { + return serializer.getProducedType(); + } +} diff --git a/flink-connector-mysql-cdc/src/test/java/com/alibaba/ververica/cdc/connectors/mysql/table/MySQLConnectorITCase.java b/flink-connector-mysql-cdc/src/test/java/com/alibaba/ververica/cdc/connectors/mysql/table/MySQLConnectorITCase.java index 2284b5df9..d16ad02a7 100644 --- a/flink-connector-mysql-cdc/src/test/java/com/alibaba/ververica/cdc/connectors/mysql/table/MySQLConnectorITCase.java +++ b/flink-connector-mysql-cdc/src/test/java/com/alibaba/ververica/cdc/connectors/mysql/table/MySQLConnectorITCase.java @@ -474,6 +474,66 @@ public class MySQLConnectorITCase extends MySQLTestBase { result.getJobClient().get().cancel().get(); } + @Test + public void testStartupFromTimestamp() throws Exception { + inventoryDatabase.createAndInitialize(); + String sourceDDL = String.format( + "CREATE TABLE debezium_source (" + + " id INT NOT NULL," + + " name STRING," + + " description STRING," + + " weight DECIMAL(10,3)" + + ") WITH (" + + " 'connector' = 'mysql-cdc'," + + " 'hostname' = '%s'," + + " 'port' = '%s'," + + " 'username' = '%s'," + + " 'password' = '%s'," + + " 'database-name' = '%s'," + + " 'table-name' = '%s'," + + " 'scan.startup.mode' = 'timestamp'," + + " 'scan.startup.timestamp-millis' = '%s'" + + ")", + MYSQL_CONTAINER.getHost(), + MYSQL_CONTAINER.getDatabasePort(), + inventoryDatabase.getUsername(), + inventoryDatabase.getPassword(), + inventoryDatabase.getDatabaseName(), + "products", + System.currentTimeMillis()); + String sinkDDL = "CREATE TABLE sink " + + " WITH (" + + " 'connector' = 'values'," + + " 'sink-insert-only' = 'false'" + + ") LIKE debezium_source (EXCLUDING OPTIONS)"; + tEnv.executeSql(sourceDDL); + tEnv.executeSql(sinkDDL); + + // async submit job + TableResult result = tEnv.executeSql("INSERT INTO sink SELECT * FROM debezium_source"); + // wait for the source startup, we don't have a better way to wait it, use sleep for now + Thread.sleep(5000L); + + try (Connection connection = inventoryDatabase.getJdbcConnection(); + Statement statement = connection.createStatement()) { + + statement.execute("INSERT INTO products VALUES (default,'jacket','water resistent white wind breaker',0.2);"); // 110 + statement.execute("INSERT INTO products VALUES (default,'scooter','Big 2-wheel scooter ',5.18);"); + statement.execute("UPDATE products SET description='new water resistent white wind breaker', weight='0.5' WHERE id=110;"); + statement.execute("UPDATE products SET weight='5.17' WHERE id=111;"); + statement.execute("DELETE FROM products WHERE id=111;"); + } + + waitForSinkSize("sink", 7); + + String[] expected = new String[]{"110,jacket,new water resistent white wind breaker,0.500"}; + + List actual = TestValuesTableFactory.getResults("sink"); + assertThat(actual, containsInAnyOrder(expected)); + + result.getJobClient().get().cancel().get(); + } + // ------------------------------------------------------------------------------------ private static void waitForSnapshotStarted(String sinkName) throws InterruptedException {