[cdc-connector][postgres] Fix data lost problem when new lsn committed to slot between snapshotState and notifyCheckpointComplete (#2539)
This closes #2538. Co-authored-by: sammieliu <sammieliu@tencent.com>pull/3021/head
parent
e3d6c7e0aa
commit
9ce36569fb
@ -0,0 +1,100 @@
|
||||
/*
|
||||
* Copyright 2023 Ververica Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.ververica.cdc.connectors.base.source.reader;
|
||||
|
||||
import org.apache.flink.api.connector.source.SourceReaderContext;
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.flink.connector.base.source.reader.RecordEmitter;
|
||||
import org.apache.flink.connector.base.source.reader.synchronization.FutureCompletingBlockingQueue;
|
||||
|
||||
import com.ververica.cdc.connectors.base.config.SourceConfig;
|
||||
import com.ververica.cdc.connectors.base.dialect.DataSourceDialect;
|
||||
import com.ververica.cdc.connectors.base.source.meta.offset.Offset;
|
||||
import com.ververica.cdc.connectors.base.source.meta.split.SourceSplitBase;
|
||||
import com.ververica.cdc.connectors.base.source.meta.split.SourceSplitSerializer;
|
||||
import com.ververica.cdc.connectors.base.source.meta.split.StreamSplit;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
/**
|
||||
* Record the LSN of checkpoint {@link StreamSplit}, which can be used to submit to the CDC source.
|
||||
*/
|
||||
public class IncrementalSourceReaderWithCommit extends IncrementalSourceReader {
|
||||
private static final Logger LOG =
|
||||
LoggerFactory.getLogger(IncrementalSourceReaderWithCommit.class);
|
||||
|
||||
private final TreeMap<Long, Offset> lastCheckPointOffset;
|
||||
private long maxCompletedCheckpointId;
|
||||
|
||||
public IncrementalSourceReaderWithCommit(
|
||||
FutureCompletingBlockingQueue elementQueue,
|
||||
Supplier supplier,
|
||||
RecordEmitter recordEmitter,
|
||||
Configuration config,
|
||||
SourceReaderContext context,
|
||||
SourceConfig sourceConfig,
|
||||
SourceSplitSerializer sourceSplitSerializer,
|
||||
DataSourceDialect dialect) {
|
||||
super(
|
||||
elementQueue,
|
||||
supplier,
|
||||
recordEmitter,
|
||||
config,
|
||||
context,
|
||||
sourceConfig,
|
||||
sourceSplitSerializer,
|
||||
dialect);
|
||||
this.lastCheckPointOffset = new TreeMap<>();
|
||||
this.maxCompletedCheckpointId = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<SourceSplitBase> snapshotState(long checkpointId) {
|
||||
final List<SourceSplitBase> stateSplits = super.snapshotState(checkpointId);
|
||||
|
||||
stateSplits.stream()
|
||||
.filter(SourceSplitBase::isStreamSplit)
|
||||
.findAny()
|
||||
.map(SourceSplitBase::asStreamSplit)
|
||||
.ifPresent(
|
||||
streamSplit -> {
|
||||
lastCheckPointOffset.put(checkpointId, streamSplit.getStartingOffset());
|
||||
LOG.debug(
|
||||
"Starting offset of stream split is: {}, and checkpoint id is {}.",
|
||||
streamSplit.getStartingOffset(),
|
||||
checkpointId);
|
||||
});
|
||||
|
||||
return stateSplits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void notifyCheckpointComplete(long checkpointId) throws Exception {
|
||||
// checkpointId might be for a checkpoint that was triggered earlier. see
|
||||
// CheckpointListener#notifyCheckpointComplete(long).
|
||||
if (checkpointId > maxCompletedCheckpointId) {
|
||||
Offset offset = lastCheckPointOffset.get(checkpointId);
|
||||
dialect.notifyCheckpointComplete(checkpointId, offset);
|
||||
lastCheckPointOffset.headMap(checkpointId, true).clear();
|
||||
maxCompletedCheckpointId = checkpointId;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright 2023 Ververica Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.ververica.cdc.connectors.postgres.source;
|
||||
|
||||
import org.apache.flink.util.Preconditions;
|
||||
|
||||
import com.ververica.cdc.connectors.base.source.meta.offset.Offset;
|
||||
import com.ververica.cdc.connectors.postgres.source.config.PostgresSourceConfig;
|
||||
|
||||
import java.util.function.Consumer;
|
||||
|
||||
/** Mock postgres dialect used to test changelog when checkpoint. */
|
||||
public class MockPostgresDialect extends PostgresDialect {
|
||||
|
||||
private static Consumer<Long> callback = null;
|
||||
|
||||
public MockPostgresDialect(PostgresSourceConfig sourceConfig) {
|
||||
super(sourceConfig);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void notifyCheckpointComplete(long checkpointId, Offset offset) throws Exception {
|
||||
if (callback != null) {
|
||||
callback.accept(checkpointId);
|
||||
}
|
||||
super.notifyCheckpointComplete(checkpointId, offset);
|
||||
}
|
||||
|
||||
public static void setNotifyCheckpointCompleteCallback(Consumer<Long> callback) {
|
||||
MockPostgresDialect.callback = Preconditions.checkNotNull(callback);
|
||||
}
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright 2023 Ververica Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.ververica.cdc.connectors.postgres.table;
|
||||
|
||||
import org.apache.flink.table.connector.source.DynamicTableSource;
|
||||
|
||||
/** Mock {@link PostgreSQLTableFactory}. */
|
||||
public class MockPostgreSQLTableFactory extends PostgreSQLTableFactory {
|
||||
public static final String IDENTIFIER = "postgres-cdc-mock";
|
||||
|
||||
@Override
|
||||
public DynamicTableSource createDynamicTableSource(Context context) {
|
||||
PostgreSQLTableSource postgreSQLTableSource =
|
||||
(PostgreSQLTableSource) super.createDynamicTableSource(context);
|
||||
|
||||
return new MockPostgreSQLTableSource(postgreSQLTableSource);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String factoryIdentifier() {
|
||||
return IDENTIFIER;
|
||||
}
|
||||
}
|
@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright 2023 Ververica Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.ververica.cdc.connectors.postgres.table;
|
||||
|
||||
import org.apache.flink.api.connector.source.Source;
|
||||
import org.apache.flink.table.catalog.ResolvedSchema;
|
||||
import org.apache.flink.table.connector.source.SourceProvider;
|
||||
import org.apache.flink.table.data.RowData;
|
||||
import org.apache.flink.util.FlinkRuntimeException;
|
||||
import org.apache.flink.util.Preconditions;
|
||||
|
||||
import com.ververica.cdc.connectors.base.options.StartupOptions;
|
||||
import com.ververica.cdc.connectors.base.source.IncrementalSource;
|
||||
import com.ververica.cdc.connectors.postgres.source.MockPostgresDialect;
|
||||
import com.ververica.cdc.connectors.postgres.source.PostgresSourceBuilder;
|
||||
import com.ververica.cdc.connectors.postgres.source.config.PostgresSourceConfigFactory;
|
||||
import com.ververica.cdc.debezium.table.DebeziumChangelogMode;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
import java.time.Duration;
|
||||
import java.util.Properties;
|
||||
|
||||
/** Mock {@link PostgreSQLTableSource}. */
|
||||
public class MockPostgreSQLTableSource extends PostgreSQLTableSource {
|
||||
public MockPostgreSQLTableSource(PostgreSQLTableSource postgreSQLTableSource) {
|
||||
super(
|
||||
(ResolvedSchema) get(postgreSQLTableSource, "physicalSchema"),
|
||||
(int) get(postgreSQLTableSource, "port"),
|
||||
(String) get(postgreSQLTableSource, "hostname"),
|
||||
(String) get(postgreSQLTableSource, "database"),
|
||||
(String) get(postgreSQLTableSource, "schemaName"),
|
||||
(String) get(postgreSQLTableSource, "tableName"),
|
||||
(String) get(postgreSQLTableSource, "username"),
|
||||
(String) get(postgreSQLTableSource, "password"),
|
||||
(String) get(postgreSQLTableSource, "pluginName"),
|
||||
(String) get(postgreSQLTableSource, "slotName"),
|
||||
(DebeziumChangelogMode) get(postgreSQLTableSource, "changelogMode"),
|
||||
(Properties) get(postgreSQLTableSource, "dbzProperties"),
|
||||
(boolean) get(postgreSQLTableSource, "enableParallelRead"),
|
||||
(int) get(postgreSQLTableSource, "splitSize"),
|
||||
(int) get(postgreSQLTableSource, "splitMetaGroupSize"),
|
||||
(int) get(postgreSQLTableSource, "fetchSize"),
|
||||
(Duration) get(postgreSQLTableSource, "connectTimeout"),
|
||||
(int) get(postgreSQLTableSource, "connectMaxRetries"),
|
||||
(int) get(postgreSQLTableSource, "connectionPoolSize"),
|
||||
(double) get(postgreSQLTableSource, "distributionFactorUpper"),
|
||||
(double) get(postgreSQLTableSource, "distributionFactorLower"),
|
||||
(Duration) get(postgreSQLTableSource, "heartbeatInterval"),
|
||||
(StartupOptions) get(postgreSQLTableSource, "startupOptions"),
|
||||
(String) get(postgreSQLTableSource, "chunkKeyColumn"),
|
||||
(boolean) get(postgreSQLTableSource, "closeIdleReaders"),
|
||||
(boolean) get(postgreSQLTableSource, "skipSnapshotBackfill"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
|
||||
ScanRuntimeProvider scanRuntimeProvider = super.getScanRuntimeProvider(scanContext);
|
||||
|
||||
if (scanRuntimeProvider instanceof SourceProvider) {
|
||||
Source<RowData, ?, ?> source = ((SourceProvider) scanRuntimeProvider).createSource();
|
||||
Preconditions.checkState(
|
||||
source instanceof PostgresSourceBuilder.PostgresIncrementalSource);
|
||||
|
||||
PostgresSourceBuilder.PostgresIncrementalSource incrementalSource =
|
||||
(PostgresSourceBuilder.PostgresIncrementalSource) source;
|
||||
|
||||
try {
|
||||
Field configFactoryField =
|
||||
IncrementalSource.class.getDeclaredField("configFactory");
|
||||
configFactoryField.setAccessible(true);
|
||||
PostgresSourceConfigFactory configFactory =
|
||||
(PostgresSourceConfigFactory) configFactoryField.get(incrementalSource);
|
||||
MockPostgresDialect mockPostgresDialect =
|
||||
new MockPostgresDialect(configFactory.create(0));
|
||||
|
||||
Field dataSourceDialectField =
|
||||
IncrementalSource.class.getDeclaredField("dataSourceDialect");
|
||||
dataSourceDialectField.setAccessible(true);
|
||||
dataSourceDialectField.set(incrementalSource, mockPostgresDialect);
|
||||
} catch (NoSuchFieldException | IllegalArgumentException | IllegalAccessException e) {
|
||||
throw new FlinkRuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
return scanRuntimeProvider;
|
||||
}
|
||||
|
||||
private static Object get(PostgreSQLTableSource postgreSQLTableSource, String name) {
|
||||
try {
|
||||
Field field = postgreSQLTableSource.getClass().getDeclaredField(name);
|
||||
field.setAccessible(true);
|
||||
return field.get(postgreSQLTableSource);
|
||||
} catch (NoSuchFieldException | IllegalArgumentException | IllegalAccessException e) {
|
||||
throw new FlinkRuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,14 @@
|
||||
# Copyright 2023 Ververica Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
com.ververica.cdc.connectors.postgres.table.MockPostgreSQLTableFactory
|
Loading…
Reference in New Issue