[build] Format code with Spotless/google-java-format

pull/334/head
Jark Wu 4 years ago
parent 882c9204a1
commit 8925d943bf
No known key found for this signature in database
GPG Key ID: 85BACB5AEFAE3202

@ -27,16 +27,14 @@ import org.apache.kafka.connect.source.SourceRecord;
import java.io.Serializable;
/**
* The deserialization schema describes how to turn the Debezium SourceRecord
* into data types (Java/Scala objects) that are processed by Flink.
* The deserialization schema describes how to turn the Debezium SourceRecord into data types
* (Java/Scala objects) that are processed by Flink.
*
* @param <T> The type created by the deserialization schema.
*/
@PublicEvolving
public interface DebeziumDeserializationSchema<T> extends Serializable, ResultTypeQueryable<T> {
/**
* Deserialize the Debezium record, it is represented in Kafka {@link SourceRecord}.
*/
void deserialize(SourceRecord record, Collector<T> out) throws Exception;
/** Deserialize the Debezium record, it is represented in Kafka {@link SourceRecord}. */
void deserialize(SourceRecord record, Collector<T> out) throws Exception;
}

@ -69,408 +69,422 @@ import java.util.concurrent.TimeUnit;
* The {@link DebeziumSourceFunction} is a streaming data source that pulls captured change data
* from databases into Flink.
*
* <p>The source function participates in checkpointing and guarantees that no data is lost
* during a failure, and that the computation processes elements "exactly once".
* <p>The source function participates in checkpointing and guarantees that no data is lost during a
* failure, and that the computation processes elements "exactly once".
*
* <p>Note: currently, the source function can't run in multiple parallel instances.
*
* <p>Please refer to Debezium's documentation for the available configuration properties:
* https://debezium.io/documentation/reference/1.2/development/engine.html#engine-properties</p>
* https://debezium.io/documentation/reference/1.2/development/engine.html#engine-properties
*/
@PublicEvolving
public class DebeziumSourceFunction<T> extends RichSourceFunction<T> implements
CheckpointedFunction,
CheckpointListener,
ResultTypeQueryable<T> {
private static final long serialVersionUID = -5808108641062931623L;
protected static final Logger LOG = LoggerFactory.getLogger(DebeziumSourceFunction.class);
/** State name of the consumer's partition offset states. */
public static final String OFFSETS_STATE_NAME = "offset-states";
/** State name of the consumer's history records state. */
public static final String HISTORY_RECORDS_STATE_NAME = "history-records-states";
/** The maximum number of pending non-committed checkpoints to track, to avoid memory leaks. */
public static final int MAX_NUM_PENDING_CHECKPOINTS = 100;
// -------------------------------------------------------------------------------------------
/** The schema to convert from Debezium's messages into Flink's objects. */
private final DebeziumDeserializationSchema<T> deserializer;
/** User-supplied properties for Kafka. **/
private final Properties properties;
/** The specific binlog offset to read from when the first startup. */
private final @Nullable DebeziumOffset specificOffset;
/** Data for pending but uncommitted offsets. */
private final LinkedMap pendingOffsetsToCommit = new LinkedMap();
private ExecutorService executor;
private DebeziumEngine<?> engine;
/** The error from {@link #engine} thread. */
private transient volatile Throwable error;
/** Flag indicating whether the consumer is still running. */
private volatile boolean running = true;
/** The consumer to fetch records from {@link DebeziumEngine}. */
private transient volatile DebeziumChangeConsumer<T> debeziumConsumer;
/**
* The offsets to restore to, if the consumer restores state from a checkpoint.
*
* <p>This map will be populated by the {@link #initializeState(FunctionInitializationContext)} method.
*
* <p>Using a String because we are encoding the offset state in JSON bytes.
*/
private transient volatile String restoredOffsetState;
/** Accessor for state in the operator state backend. */
private transient ListState<byte[]> offsetState;
/**
* State to store the history records, i.e. schema changes.
*
* @see FlinkDatabaseHistory
*/
private transient ListState<String> historyRecordsState;
/**
* Unique name of this Debezium Engine instance across all the jobs. Currently we randomly generate a UUID for it.
* This is used for {@link FlinkDatabaseHistory}.
*/
private transient String engineInstanceName;
public DebeziumSourceFunction(
DebeziumDeserializationSchema<T> deserializer,
Properties properties,
@Nullable DebeziumOffset specificOffset) {
this.deserializer = deserializer;
this.properties = properties;
this.specificOffset = specificOffset;
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
ThreadFactory threadFactory = new ThreadFactoryBuilder()
.setNameFormat("debezium-engine")
.build();
this.executor = Executors.newSingleThreadExecutor(threadFactory);
}
// ------------------------------------------------------------------------
// Checkpoint and restore
// ------------------------------------------------------------------------
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
OperatorStateStore stateStore = context.getOperatorStateStore();
this.offsetState = stateStore.getUnionListState(new ListStateDescriptor<>(
OFFSETS_STATE_NAME,
PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO));
this.historyRecordsState = stateStore.getUnionListState(new ListStateDescriptor<>(
HISTORY_RECORDS_STATE_NAME,
BasicTypeInfo.STRING_TYPE_INFO));
if (context.isRestored()) {
restoreOffsetState();
restoreHistoryRecordsState();
} else {
if (specificOffset != null) {
byte[] serializedOffset = DebeziumOffsetSerializer.INSTANCE.serialize(specificOffset);
restoredOffsetState = new String(serializedOffset, StandardCharsets.UTF_8);
LOG.info(
"Consumer subtask {} starts to read from specified offset {}.",
getRuntimeContext().getIndexOfThisSubtask(),
restoredOffsetState);
} else {
LOG.info("Consumer subtask {} has no restore state.", getRuntimeContext().getIndexOfThisSubtask());
}
}
}
private void restoreOffsetState() throws Exception {
for (byte[] serializedOffset : offsetState.get()) {
if (restoredOffsetState == null) {
restoredOffsetState = new String(serializedOffset, StandardCharsets.UTF_8);
} else {
throw new RuntimeException("Debezium Source only support single task, " +
"however, this is restored from multiple tasks.");
}
}
LOG.info("Consumer subtask {} restored offset state: {}.", getRuntimeContext().getIndexOfThisSubtask(), restoredOffsetState);
}
private void restoreHistoryRecordsState() throws Exception {
DocumentReader reader = DocumentReader.defaultReader();
ConcurrentLinkedQueue<HistoryRecord> historyRecords = new ConcurrentLinkedQueue<>();
int recordsCount = 0;
boolean firstEntry = true;
for (String record : historyRecordsState.get()) {
if (firstEntry) {
// we store the engine instance name in the first element
this.engineInstanceName = record;
firstEntry = false;
} else {
historyRecords.add(new HistoryRecord(reader.read(record)));
recordsCount++;
}
}
if (engineInstanceName != null) {
FlinkDatabaseHistory.registerHistoryRecords(engineInstanceName, historyRecords);
}
LOG.info("Consumer subtask {} restored history records state: {} with {} records.",
getRuntimeContext().getIndexOfThisSubtask(),
engineInstanceName,
recordsCount);
}
@Override
public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception {
if (!running) {
LOG.debug("snapshotState() called on closed source");
} else {
snapshotOffsetState(functionSnapshotContext.getCheckpointId());
snapshotHistoryRecordsState();
}
}
private void snapshotOffsetState(long checkpointId) throws Exception {
offsetState.clear();
final DebeziumChangeConsumer<?> consumer = this.debeziumConsumer;
byte[] serializedOffset = null;
if (consumer == null) {
// the consumer has not yet been initialized, which means we need to return the
// originally restored offsets
if (restoredOffsetState != null) {
serializedOffset = restoredOffsetState.getBytes(StandardCharsets.UTF_8);
}
} else {
byte[] currentState = consumer.snapshotCurrentState();
if (currentState == null) {
// the consumer has been initialized, but has not yet received any data,
// which means we need to return the originally restored offsets
serializedOffset = restoredOffsetState.getBytes(StandardCharsets.UTF_8);
}
else {
serializedOffset = currentState;
}
}
if (serializedOffset != null) {
offsetState.add(serializedOffset);
// the map cannot be asynchronously updated, because only one checkpoint call
// can happen on this function at a time: either snapshotState() or
// notifyCheckpointComplete()
pendingOffsetsToCommit.put(checkpointId, serializedOffset);
// truncate the map of pending offsets to commit, to prevent infinite growth
while (pendingOffsetsToCommit.size() > MAX_NUM_PENDING_CHECKPOINTS) {
pendingOffsetsToCommit.remove(0);
}
}
}
private void snapshotHistoryRecordsState() throws Exception {
historyRecordsState.clear();
if (engineInstanceName != null) {
historyRecordsState.add(engineInstanceName);
ConcurrentLinkedQueue<HistoryRecord> historyRecords = FlinkDatabaseHistory.getRegisteredHistoryRecord(engineInstanceName);
if (historyRecords != null) {
DocumentWriter writer = DocumentWriter.defaultWriter();
for (HistoryRecord record : historyRecords) {
historyRecordsState.add(writer.write(record.document()));
}
}
}
}
@Override
public void run(SourceContext<T> sourceContext) throws Exception {
properties.setProperty("name", "engine");
properties.setProperty("offset.storage", FlinkOffsetBackingStore.class.getCanonicalName());
if (restoredOffsetState != null) {
// restored from state
properties.setProperty(FlinkOffsetBackingStore.OFFSET_STATE_VALUE, restoredOffsetState);
}
// DO NOT include schema payload in change event
properties.setProperty("key.converter.schemas.enable", "false");
properties.setProperty("value.converter.schemas.enable", "false");
// DO NOT include schema change, e.g. DDL
properties.setProperty("include.schema.changes", "false");
// disable the offset flush totally
properties.setProperty("offset.flush.interval.ms", String.valueOf(Long.MAX_VALUE));
// disable tombstones
properties.setProperty("tombstones.on.delete", "false");
// we have to use a persisted DatabaseHistory implementation, otherwise, recovery can't continue to read binlog
// see https://stackoverflow.com/questions/57147584/debezium-error-schema-isnt-know-to-this-connector
// and https://debezium.io/blog/2018/03/16/note-on-database-history-topic-configuration/
properties.setProperty("database.history", FlinkDatabaseHistory.class.getCanonicalName());
if (engineInstanceName == null) {
// not restore from recovery
engineInstanceName = UUID.randomUUID().toString();
FlinkDatabaseHistory.registerEmptyHistoryRecord(engineInstanceName);
}
// history instance name to initialize FlinkDatabaseHistory
properties.setProperty(FlinkDatabaseHistory.DATABASE_HISTORY_INSTANCE_NAME, engineInstanceName);
// we have to filter out the heartbeat events, otherwise the deserializer will fail
String dbzHeartbeatPrefix = properties.getProperty(
Heartbeat.HEARTBEAT_TOPICS_PREFIX.name(),
Heartbeat.HEARTBEAT_TOPICS_PREFIX.defaultValueAsString());
this.debeziumConsumer = new DebeziumChangeConsumer<>(
sourceContext,
deserializer,
restoredOffsetState == null, // DB snapshot phase if restore state is null
this::reportError,
dbzHeartbeatPrefix);
// create the engine with this configuration ...
this.engine = DebeziumEngine.create(Connect.class)
.using(properties)
.notifying(debeziumConsumer)
.using(OffsetCommitPolicy.always())
.using((success, message, error) -> {
if (!success && error != null) {
this.reportError(error);
}
})
.build();
if (!running) {
return;
}
// run the engine asynchronously
executor.execute(engine);
// on a clean exit, wait for the runner thread
try {
while (running) {
if (executor.awaitTermination(5, TimeUnit.SECONDS)) {
break;
}
if (error != null) {
running = false;
shutdownEngine();
// rethrow the error from Debezium consumer
ExceptionUtils.rethrow(error);
}
}
}
catch (InterruptedException e) {
// may be the result of a wake-up interruption after an exception.
// we ignore this here and only restore the interruption state
Thread.currentThread().interrupt();
}
}
@Override
public void notifyCheckpointComplete(long checkpointId) throws Exception {
if (!running) {
LOG.debug("notifyCheckpointComplete() called on closed source");
return;
}
final DebeziumChangeConsumer<T> consumer = this.debeziumConsumer;
if (consumer == null) {
LOG.debug("notifyCheckpointComplete() called on uninitialized source");
return;
}
try {
final int posInMap = pendingOffsetsToCommit.indexOf(checkpointId);
if (posInMap == -1) {
LOG.warn(
"Consumer subtask {} received confirmation for unknown checkpoint id {}",
getRuntimeContext().getIndexOfThisSubtask(),
checkpointId);
return;
}
byte[] serializedOffsets = (byte[]) pendingOffsetsToCommit.remove(posInMap);
// remove older checkpoints in map
for (int i = 0; i < posInMap; i++) {
pendingOffsetsToCommit.remove(0);
}
if (serializedOffsets == null || serializedOffsets.length == 0) {
LOG.debug(
"Consumer subtask {} has empty checkpoint state.",
getRuntimeContext().getIndexOfThisSubtask());
return;
}
DebeziumOffset offset = DebeziumOffsetSerializer.INSTANCE.deserialize(serializedOffsets);
consumer.commitOffset(offset);
} catch (Exception e) {
// ignore exception if we are no longer running
LOG.warn("Ignore error when committing offset to database.", e);
}
}
@Override
public void cancel() {
// flag the main thread to exit. A thread interrupt will come anyways.
running = false;
// safely and gracefully stop the engine
shutdownEngine();
}
@Override
public void close() throws Exception {
cancel();
if (executor != null) {
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
}
super.close();
}
// --------------------------------------------------------------------------------
// Error callbacks
// --------------------------------------------------------------------------------
private void reportError(Throwable error) {
LOG.error("Reporting error:", error);
this.error = error;
}
/**
* Safely and gracefully stop the Debezium engine.
*/
private void shutdownEngine() {
try {
if (engine != null) {
engine.close();
}
} catch (IOException e) {
ExceptionUtils.rethrow(e);
} finally {
if (executor != null) {
executor.shutdown();
}
}
}
@Override
public TypeInformation<T> getProducedType() {
return deserializer.getProducedType();
}
@VisibleForTesting
public LinkedMap getPendingOffsetsToCommit() {
return pendingOffsetsToCommit;
}
public class DebeziumSourceFunction<T> extends RichSourceFunction<T>
implements CheckpointedFunction, CheckpointListener, ResultTypeQueryable<T> {
private static final long serialVersionUID = -5808108641062931623L;
protected static final Logger LOG = LoggerFactory.getLogger(DebeziumSourceFunction.class);
/** State name of the consumer's partition offset states. */
public static final String OFFSETS_STATE_NAME = "offset-states";
/** State name of the consumer's history records state. */
public static final String HISTORY_RECORDS_STATE_NAME = "history-records-states";
/** The maximum number of pending non-committed checkpoints to track, to avoid memory leaks. */
public static final int MAX_NUM_PENDING_CHECKPOINTS = 100;
// -------------------------------------------------------------------------------------------
/** The schema to convert from Debezium's messages into Flink's objects. */
private final DebeziumDeserializationSchema<T> deserializer;
/** User-supplied properties for Kafka. * */
private final Properties properties;
/** The specific binlog offset to read from when the first startup. */
private final @Nullable DebeziumOffset specificOffset;
/** Data for pending but uncommitted offsets. */
private final LinkedMap pendingOffsetsToCommit = new LinkedMap();
private ExecutorService executor;
private DebeziumEngine<?> engine;
/** The error from {@link #engine} thread. */
private transient volatile Throwable error;
/** Flag indicating whether the consumer is still running. */
private volatile boolean running = true;
/** The consumer to fetch records from {@link DebeziumEngine}. */
private transient volatile DebeziumChangeConsumer<T> debeziumConsumer;
/**
* The offsets to restore to, if the consumer restores state from a checkpoint.
*
* <p>This map will be populated by the {@link #initializeState(FunctionInitializationContext)}
* method.
*
* <p>Using a String because we are encoding the offset state in JSON bytes.
*/
private transient volatile String restoredOffsetState;
/** Accessor for state in the operator state backend. */
private transient ListState<byte[]> offsetState;
/**
* State to store the history records, i.e. schema changes.
*
* @see FlinkDatabaseHistory
*/
private transient ListState<String> historyRecordsState;
/**
* Unique name of this Debezium Engine instance across all the jobs. Currently we randomly
* generate a UUID for it. This is used for {@link FlinkDatabaseHistory}.
*/
private transient String engineInstanceName;
public DebeziumSourceFunction(
DebeziumDeserializationSchema<T> deserializer,
Properties properties,
@Nullable DebeziumOffset specificOffset) {
this.deserializer = deserializer;
this.properties = properties;
this.specificOffset = specificOffset;
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
ThreadFactory threadFactory =
new ThreadFactoryBuilder().setNameFormat("debezium-engine").build();
this.executor = Executors.newSingleThreadExecutor(threadFactory);
}
// ------------------------------------------------------------------------
// Checkpoint and restore
// ------------------------------------------------------------------------
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
OperatorStateStore stateStore = context.getOperatorStateStore();
this.offsetState =
stateStore.getUnionListState(
new ListStateDescriptor<>(
OFFSETS_STATE_NAME,
PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO));
this.historyRecordsState =
stateStore.getUnionListState(
new ListStateDescriptor<>(
HISTORY_RECORDS_STATE_NAME, BasicTypeInfo.STRING_TYPE_INFO));
if (context.isRestored()) {
restoreOffsetState();
restoreHistoryRecordsState();
} else {
if (specificOffset != null) {
byte[] serializedOffset =
DebeziumOffsetSerializer.INSTANCE.serialize(specificOffset);
restoredOffsetState = new String(serializedOffset, StandardCharsets.UTF_8);
LOG.info(
"Consumer subtask {} starts to read from specified offset {}.",
getRuntimeContext().getIndexOfThisSubtask(),
restoredOffsetState);
} else {
LOG.info(
"Consumer subtask {} has no restore state.",
getRuntimeContext().getIndexOfThisSubtask());
}
}
}
private void restoreOffsetState() throws Exception {
for (byte[] serializedOffset : offsetState.get()) {
if (restoredOffsetState == null) {
restoredOffsetState = new String(serializedOffset, StandardCharsets.UTF_8);
} else {
throw new RuntimeException(
"Debezium Source only support single task, "
+ "however, this is restored from multiple tasks.");
}
}
LOG.info(
"Consumer subtask {} restored offset state: {}.",
getRuntimeContext().getIndexOfThisSubtask(),
restoredOffsetState);
}
private void restoreHistoryRecordsState() throws Exception {
DocumentReader reader = DocumentReader.defaultReader();
ConcurrentLinkedQueue<HistoryRecord> historyRecords = new ConcurrentLinkedQueue<>();
int recordsCount = 0;
boolean firstEntry = true;
for (String record : historyRecordsState.get()) {
if (firstEntry) {
// we store the engine instance name in the first element
this.engineInstanceName = record;
firstEntry = false;
} else {
historyRecords.add(new HistoryRecord(reader.read(record)));
recordsCount++;
}
}
if (engineInstanceName != null) {
FlinkDatabaseHistory.registerHistoryRecords(engineInstanceName, historyRecords);
}
LOG.info(
"Consumer subtask {} restored history records state: {} with {} records.",
getRuntimeContext().getIndexOfThisSubtask(),
engineInstanceName,
recordsCount);
}
@Override
public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception {
if (!running) {
LOG.debug("snapshotState() called on closed source");
} else {
snapshotOffsetState(functionSnapshotContext.getCheckpointId());
snapshotHistoryRecordsState();
}
}
private void snapshotOffsetState(long checkpointId) throws Exception {
offsetState.clear();
final DebeziumChangeConsumer<?> consumer = this.debeziumConsumer;
byte[] serializedOffset = null;
if (consumer == null) {
// the consumer has not yet been initialized, which means we need to return the
// originally restored offsets
if (restoredOffsetState != null) {
serializedOffset = restoredOffsetState.getBytes(StandardCharsets.UTF_8);
}
} else {
byte[] currentState = consumer.snapshotCurrentState();
if (currentState == null) {
// the consumer has been initialized, but has not yet received any data,
// which means we need to return the originally restored offsets
serializedOffset = restoredOffsetState.getBytes(StandardCharsets.UTF_8);
} else {
serializedOffset = currentState;
}
}
if (serializedOffset != null) {
offsetState.add(serializedOffset);
// the map cannot be asynchronously updated, because only one checkpoint call
// can happen on this function at a time: either snapshotState() or
// notifyCheckpointComplete()
pendingOffsetsToCommit.put(checkpointId, serializedOffset);
// truncate the map of pending offsets to commit, to prevent infinite growth
while (pendingOffsetsToCommit.size() > MAX_NUM_PENDING_CHECKPOINTS) {
pendingOffsetsToCommit.remove(0);
}
}
}
private void snapshotHistoryRecordsState() throws Exception {
historyRecordsState.clear();
if (engineInstanceName != null) {
historyRecordsState.add(engineInstanceName);
ConcurrentLinkedQueue<HistoryRecord> historyRecords =
FlinkDatabaseHistory.getRegisteredHistoryRecord(engineInstanceName);
if (historyRecords != null) {
DocumentWriter writer = DocumentWriter.defaultWriter();
for (HistoryRecord record : historyRecords) {
historyRecordsState.add(writer.write(record.document()));
}
}
}
}
@Override
public void run(SourceContext<T> sourceContext) throws Exception {
properties.setProperty("name", "engine");
properties.setProperty("offset.storage", FlinkOffsetBackingStore.class.getCanonicalName());
if (restoredOffsetState != null) {
// restored from state
properties.setProperty(FlinkOffsetBackingStore.OFFSET_STATE_VALUE, restoredOffsetState);
}
// DO NOT include schema payload in change event
properties.setProperty("key.converter.schemas.enable", "false");
properties.setProperty("value.converter.schemas.enable", "false");
// DO NOT include schema change, e.g. DDL
properties.setProperty("include.schema.changes", "false");
// disable the offset flush totally
properties.setProperty("offset.flush.interval.ms", String.valueOf(Long.MAX_VALUE));
// disable tombstones
properties.setProperty("tombstones.on.delete", "false");
// we have to use a persisted DatabaseHistory implementation, otherwise, recovery can't
// continue to read binlog
// see
// https://stackoverflow.com/questions/57147584/debezium-error-schema-isnt-know-to-this-connector
// and https://debezium.io/blog/2018/03/16/note-on-database-history-topic-configuration/
properties.setProperty("database.history", FlinkDatabaseHistory.class.getCanonicalName());
if (engineInstanceName == null) {
// not restore from recovery
engineInstanceName = UUID.randomUUID().toString();
FlinkDatabaseHistory.registerEmptyHistoryRecord(engineInstanceName);
}
// history instance name to initialize FlinkDatabaseHistory
properties.setProperty(
FlinkDatabaseHistory.DATABASE_HISTORY_INSTANCE_NAME, engineInstanceName);
// we have to filter out the heartbeat events, otherwise the deserializer will fail
String dbzHeartbeatPrefix =
properties.getProperty(
Heartbeat.HEARTBEAT_TOPICS_PREFIX.name(),
Heartbeat.HEARTBEAT_TOPICS_PREFIX.defaultValueAsString());
this.debeziumConsumer =
new DebeziumChangeConsumer<>(
sourceContext,
deserializer,
restoredOffsetState == null, // DB snapshot phase if restore state is null
this::reportError,
dbzHeartbeatPrefix);
// create the engine with this configuration ...
this.engine =
DebeziumEngine.create(Connect.class)
.using(properties)
.notifying(debeziumConsumer)
.using(OffsetCommitPolicy.always())
.using(
(success, message, error) -> {
if (!success && error != null) {
this.reportError(error);
}
})
.build();
if (!running) {
return;
}
// run the engine asynchronously
executor.execute(engine);
// on a clean exit, wait for the runner thread
try {
while (running) {
if (executor.awaitTermination(5, TimeUnit.SECONDS)) {
break;
}
if (error != null) {
running = false;
shutdownEngine();
// rethrow the error from Debezium consumer
ExceptionUtils.rethrow(error);
}
}
} catch (InterruptedException e) {
// may be the result of a wake-up interruption after an exception.
// we ignore this here and only restore the interruption state
Thread.currentThread().interrupt();
}
}
@Override
public void notifyCheckpointComplete(long checkpointId) throws Exception {
if (!running) {
LOG.debug("notifyCheckpointComplete() called on closed source");
return;
}
final DebeziumChangeConsumer<T> consumer = this.debeziumConsumer;
if (consumer == null) {
LOG.debug("notifyCheckpointComplete() called on uninitialized source");
return;
}
try {
final int posInMap = pendingOffsetsToCommit.indexOf(checkpointId);
if (posInMap == -1) {
LOG.warn(
"Consumer subtask {} received confirmation for unknown checkpoint id {}",
getRuntimeContext().getIndexOfThisSubtask(),
checkpointId);
return;
}
byte[] serializedOffsets = (byte[]) pendingOffsetsToCommit.remove(posInMap);
// remove older checkpoints in map
for (int i = 0; i < posInMap; i++) {
pendingOffsetsToCommit.remove(0);
}
if (serializedOffsets == null || serializedOffsets.length == 0) {
LOG.debug(
"Consumer subtask {} has empty checkpoint state.",
getRuntimeContext().getIndexOfThisSubtask());
return;
}
DebeziumOffset offset =
DebeziumOffsetSerializer.INSTANCE.deserialize(serializedOffsets);
consumer.commitOffset(offset);
} catch (Exception e) {
// ignore exception if we are no longer running
LOG.warn("Ignore error when committing offset to database.", e);
}
}
@Override
public void cancel() {
// flag the main thread to exit. A thread interrupt will come anyways.
running = false;
// safely and gracefully stop the engine
shutdownEngine();
}
@Override
public void close() throws Exception {
cancel();
if (executor != null) {
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);
}
super.close();
}
// --------------------------------------------------------------------------------
// Error callbacks
// --------------------------------------------------------------------------------
private void reportError(Throwable error) {
LOG.error("Reporting error:", error);
this.error = error;
}
/** Safely and gracefully stop the Debezium engine. */
private void shutdownEngine() {
try {
if (engine != null) {
engine.close();
}
} catch (IOException e) {
ExceptionUtils.rethrow(e);
} finally {
if (executor != null) {
executor.shutdown();
}
}
}
@Override
public TypeInformation<T> getProducedType() {
return deserializer.getProducedType();
}
@VisibleForTesting
public LinkedMap getPendingOffsetsToCommit() {
return pendingOffsetsToCommit;
}
}

@ -29,15 +29,15 @@ import org.apache.kafka.connect.source.SourceRecord;
* {@link SourceRecord} into String.
*/
public class StringDebeziumDeserializationSchema implements DebeziumDeserializationSchema<String> {
private static final long serialVersionUID = -3168848963265670603L;
private static final long serialVersionUID = -3168848963265670603L;
@Override
public void deserialize(SourceRecord record, Collector<String> out) throws Exception {
out.collect(record.toString());
}
@Override
public void deserialize(SourceRecord record, Collector<String> out) throws Exception {
out.collect(record.toString());
}
@Override
public TypeInformation<String> getProducedType() {
return BasicTypeInfo.STRING_TYPE_INFO;
}
@Override
public TypeInformation<String> getProducedType() {
return BasicTypeInfo.STRING_TYPE_INFO;
}
}

@ -46,213 +46,217 @@ import java.util.Queue;
* @param <T> The type of elements produced by the consumer.
*/
@Internal
public class DebeziumChangeConsumer<T> implements DebeziumEngine.ChangeConsumer<ChangeEvent<SourceRecord, SourceRecord>> {
private static final Logger LOG = LoggerFactory.getLogger(DebeziumChangeConsumer.class);
public static final String LAST_COMPLETELY_PROCESSED_LSN_KEY = "lsn_proc";
public static final String LAST_COMMIT_LSN_KEY = "lsn_commit";
private final SourceFunction.SourceContext<T> sourceContext;
/** The lock that guarantees that record emission and state updates are atomic,
* from the view of taking a checkpoint. */
private final Object checkpointLock;
/** The schema to convert from Debezium's messages into Flink's objects. */
private final DebeziumDeserializationSchema<T> deserialization;
/** A collector to emit records in batch (bundle). **/
private final DebeziumCollector debeziumCollector;
private final ErrorReporter errorReporter;
private final DebeziumOffset debeziumOffset;
private final DebeziumOffsetSerializer stateSerializer;
private final String heartbeatTopicPrefix;
private boolean isInDbSnapshotPhase;
private boolean lockHold = false;
private DebeziumEngine.RecordCommitter<ChangeEvent<SourceRecord, SourceRecord>> currentCommitter;
// ------------------------------------------------------------------------
public DebeziumChangeConsumer(
SourceFunction.SourceContext<T> sourceContext,
DebeziumDeserializationSchema<T> deserialization,
boolean isInDbSnapshotPhase,
ErrorReporter errorReporter,
String heartbeatTopicPrefix) {
this.sourceContext = sourceContext;
this.checkpointLock = sourceContext.getCheckpointLock();
this.deserialization = deserialization;
this.isInDbSnapshotPhase = isInDbSnapshotPhase;
this.heartbeatTopicPrefix = heartbeatTopicPrefix;
this.debeziumCollector = new DebeziumCollector();
this.errorReporter = errorReporter;
this.debeziumOffset = new DebeziumOffset();
this.stateSerializer = DebeziumOffsetSerializer.INSTANCE;
}
@Override
public void handleBatch(
List<ChangeEvent<SourceRecord, SourceRecord>> changeEvents,
DebeziumEngine.RecordCommitter<ChangeEvent<SourceRecord, SourceRecord>> committer) throws InterruptedException {
this.currentCommitter = committer;
try {
for (ChangeEvent<SourceRecord, SourceRecord> event : changeEvents) {
SourceRecord record = event.value();
if (isHeartbeatEvent(record)) {
// keep offset update
synchronized (checkpointLock) {
debeziumOffset.setSourcePartition(record.sourcePartition());
debeziumOffset.setSourceOffset(record.sourceOffset());
}
// drop heartbeat events
continue;
}
deserialization.deserialize(record, debeziumCollector);
if (isInDbSnapshotPhase) {
if (!lockHold) {
MemoryUtils.UNSAFE.monitorEnter(checkpointLock);
lockHold = true;
LOG.info("Database snapshot phase can't perform checkpoint, acquired Checkpoint lock.");
}
if (!isSnapshotRecord(record)) {
MemoryUtils.UNSAFE.monitorExit(checkpointLock);
isInDbSnapshotPhase = false;
LOG.info("Received record from streaming binlog phase, released checkpoint lock.");
}
}
// emit the actual records. this also updates offset state atomically
emitRecordsUnderCheckpointLock(debeziumCollector.records, record.sourcePartition(), record.sourceOffset());
}
} catch (Exception e) {
LOG.error("Error happens when consuming change messages.", e);
errorReporter.reportError(e);
}
}
private boolean isHeartbeatEvent(SourceRecord record) {
String topic = record.topic();
return topic != null && topic.startsWith(heartbeatTopicPrefix);
}
private boolean isSnapshotRecord(SourceRecord record) {
Struct value = (Struct) record.value();
if (value != null) {
Struct source = value.getStruct(Envelope.FieldName.SOURCE);
SnapshotRecord snapshotRecord = SnapshotRecord.fromSource(source);
// even if it is the last record of snapshot, i.e. SnapshotRecord.LAST
// we can still recover from checkpoint and continue to read the binlog,
// because the checkpoint contains binlog position
return SnapshotRecord.TRUE == snapshotRecord;
}
return false;
}
private void emitRecordsUnderCheckpointLock(
Queue<T> records,
Map<String, ?> sourcePartition,
Map<String, ?> sourceOffset) throws InterruptedException {
if (isInDbSnapshotPhase) {
// lockHolderThread holds the lock, don't need to hold it again
emitRecords(records, sourcePartition, sourceOffset);
} else {
// emit the records, using the checkpoint lock to guarantee
// atomicity of record emission and offset state update
synchronized (checkpointLock) {
emitRecords(records, sourcePartition, sourceOffset);
}
}
}
/**
* Emits a batch of records.
*/
private void emitRecords(
Queue<T> records,
Map<String, ?> sourcePartition,
Map<String, ?> sourceOffset) {
T record;
while ((record = records.poll()) != null) {
sourceContext.collect(record);
}
// update offset to state
debeziumOffset.setSourcePartition(sourcePartition);
debeziumOffset.setSourceOffset(sourceOffset);
}
/**
* Takes a snapshot of the Debezium Consumer state.
*
* <p>Important: This method must be called under the checkpoint lock.
*/
public byte[] snapshotCurrentState() throws Exception {
// this method assumes that the checkpoint lock is held
assert Thread.holdsLock(checkpointLock);
if (debeziumOffset.sourceOffset == null || debeziumOffset.sourcePartition == null) {
return null;
}
return stateSerializer.serialize(debeziumOffset);
}
@SuppressWarnings("unchecked")
public void commitOffset(DebeziumOffset offset) throws InterruptedException {
if (currentCommitter == null) {
LOG.info("commitOffset() called on Debezium ChangeConsumer which doesn't receive records yet.");
return;
}
// only the offset is used
SourceRecord recordWrapper = new SourceRecord(
offset.sourcePartition,
adjustSourceOffset((Map<String, Object>) offset.sourceOffset),
"DUMMY",
Schema.BOOLEAN_SCHEMA,
true);
EmbeddedEngineChangeEvent<SourceRecord, SourceRecord> changeEvent = new EmbeddedEngineChangeEvent<>(
null, recordWrapper, recordWrapper);
currentCommitter.markProcessed(changeEvent);
currentCommitter.markBatchFinished();
}
/**
* We have to adjust type of LSN values to Long, because it might be Integer after deserialization,
* however {@code io.debezium.connector.postgresql.PostgresStreamingChangeEventSource#commitOffset(java.util.Map)}
* requires Long.
*/
private Map<String, Object> adjustSourceOffset(Map<String, Object> sourceOffset) {
if (sourceOffset.containsKey(LAST_COMPLETELY_PROCESSED_LSN_KEY)) {
String value = sourceOffset.get(LAST_COMPLETELY_PROCESSED_LSN_KEY).toString();
sourceOffset.put(LAST_COMPLETELY_PROCESSED_LSN_KEY, Long.parseLong(value));
}
if (sourceOffset.containsKey(LAST_COMMIT_LSN_KEY)) {
String value = sourceOffset.get(LAST_COMMIT_LSN_KEY).toString();
sourceOffset.put(LAST_COMMIT_LSN_KEY, Long.parseLong(value));
}
return sourceOffset;
}
private class DebeziumCollector implements Collector<T> {
private final Queue<T> records = new ArrayDeque<>();
@Override
public void collect(T record) {
records.add(record);
}
@Override
public void close() {
}
}
public class DebeziumChangeConsumer<T>
implements DebeziumEngine.ChangeConsumer<ChangeEvent<SourceRecord, SourceRecord>> {
private static final Logger LOG = LoggerFactory.getLogger(DebeziumChangeConsumer.class);
public static final String LAST_COMPLETELY_PROCESSED_LSN_KEY = "lsn_proc";
public static final String LAST_COMMIT_LSN_KEY = "lsn_commit";
private final SourceFunction.SourceContext<T> sourceContext;
/**
* The lock that guarantees that record emission and state updates are atomic, from the view of
* taking a checkpoint.
*/
private final Object checkpointLock;
/** The schema to convert from Debezium's messages into Flink's objects. */
private final DebeziumDeserializationSchema<T> deserialization;
/** A collector to emit records in batch (bundle). * */
private final DebeziumCollector debeziumCollector;
private final ErrorReporter errorReporter;
private final DebeziumOffset debeziumOffset;
private final DebeziumOffsetSerializer stateSerializer;
private final String heartbeatTopicPrefix;
private boolean isInDbSnapshotPhase;
private boolean lockHold = false;
private DebeziumEngine.RecordCommitter<ChangeEvent<SourceRecord, SourceRecord>>
currentCommitter;
// ------------------------------------------------------------------------
public DebeziumChangeConsumer(
SourceFunction.SourceContext<T> sourceContext,
DebeziumDeserializationSchema<T> deserialization,
boolean isInDbSnapshotPhase,
ErrorReporter errorReporter,
String heartbeatTopicPrefix) {
this.sourceContext = sourceContext;
this.checkpointLock = sourceContext.getCheckpointLock();
this.deserialization = deserialization;
this.isInDbSnapshotPhase = isInDbSnapshotPhase;
this.heartbeatTopicPrefix = heartbeatTopicPrefix;
this.debeziumCollector = new DebeziumCollector();
this.errorReporter = errorReporter;
this.debeziumOffset = new DebeziumOffset();
this.stateSerializer = DebeziumOffsetSerializer.INSTANCE;
}
@Override
public void handleBatch(
List<ChangeEvent<SourceRecord, SourceRecord>> changeEvents,
DebeziumEngine.RecordCommitter<ChangeEvent<SourceRecord, SourceRecord>> committer)
throws InterruptedException {
this.currentCommitter = committer;
try {
for (ChangeEvent<SourceRecord, SourceRecord> event : changeEvents) {
SourceRecord record = event.value();
if (isHeartbeatEvent(record)) {
// keep offset update
synchronized (checkpointLock) {
debeziumOffset.setSourcePartition(record.sourcePartition());
debeziumOffset.setSourceOffset(record.sourceOffset());
}
// drop heartbeat events
continue;
}
deserialization.deserialize(record, debeziumCollector);
if (isInDbSnapshotPhase) {
if (!lockHold) {
MemoryUtils.UNSAFE.monitorEnter(checkpointLock);
lockHold = true;
LOG.info(
"Database snapshot phase can't perform checkpoint, acquired Checkpoint lock.");
}
if (!isSnapshotRecord(record)) {
MemoryUtils.UNSAFE.monitorExit(checkpointLock);
isInDbSnapshotPhase = false;
LOG.info(
"Received record from streaming binlog phase, released checkpoint lock.");
}
}
// emit the actual records. this also updates offset state atomically
emitRecordsUnderCheckpointLock(
debeziumCollector.records, record.sourcePartition(), record.sourceOffset());
}
} catch (Exception e) {
LOG.error("Error happens when consuming change messages.", e);
errorReporter.reportError(e);
}
}
private boolean isHeartbeatEvent(SourceRecord record) {
String topic = record.topic();
return topic != null && topic.startsWith(heartbeatTopicPrefix);
}
private boolean isSnapshotRecord(SourceRecord record) {
Struct value = (Struct) record.value();
if (value != null) {
Struct source = value.getStruct(Envelope.FieldName.SOURCE);
SnapshotRecord snapshotRecord = SnapshotRecord.fromSource(source);
// even if it is the last record of snapshot, i.e. SnapshotRecord.LAST
// we can still recover from checkpoint and continue to read the binlog,
// because the checkpoint contains binlog position
return SnapshotRecord.TRUE == snapshotRecord;
}
return false;
}
private void emitRecordsUnderCheckpointLock(
Queue<T> records, Map<String, ?> sourcePartition, Map<String, ?> sourceOffset)
throws InterruptedException {
if (isInDbSnapshotPhase) {
// lockHolderThread holds the lock, don't need to hold it again
emitRecords(records, sourcePartition, sourceOffset);
} else {
// emit the records, using the checkpoint lock to guarantee
// atomicity of record emission and offset state update
synchronized (checkpointLock) {
emitRecords(records, sourcePartition, sourceOffset);
}
}
}
/** Emits a batch of records. */
private void emitRecords(
Queue<T> records, Map<String, ?> sourcePartition, Map<String, ?> sourceOffset) {
T record;
while ((record = records.poll()) != null) {
sourceContext.collect(record);
}
// update offset to state
debeziumOffset.setSourcePartition(sourcePartition);
debeziumOffset.setSourceOffset(sourceOffset);
}
/**
* Takes a snapshot of the Debezium Consumer state.
*
* <p>Important: This method must be called under the checkpoint lock.
*/
public byte[] snapshotCurrentState() throws Exception {
// this method assumes that the checkpoint lock is held
assert Thread.holdsLock(checkpointLock);
if (debeziumOffset.sourceOffset == null || debeziumOffset.sourcePartition == null) {
return null;
}
return stateSerializer.serialize(debeziumOffset);
}
@SuppressWarnings("unchecked")
public void commitOffset(DebeziumOffset offset) throws InterruptedException {
if (currentCommitter == null) {
LOG.info(
"commitOffset() called on Debezium ChangeConsumer which doesn't receive records yet.");
return;
}
// only the offset is used
SourceRecord recordWrapper =
new SourceRecord(
offset.sourcePartition,
adjustSourceOffset((Map<String, Object>) offset.sourceOffset),
"DUMMY",
Schema.BOOLEAN_SCHEMA,
true);
EmbeddedEngineChangeEvent<SourceRecord, SourceRecord> changeEvent =
new EmbeddedEngineChangeEvent<>(null, recordWrapper, recordWrapper);
currentCommitter.markProcessed(changeEvent);
currentCommitter.markBatchFinished();
}
/**
* We have to adjust type of LSN values to Long, because it might be Integer after
* deserialization, however {@code
* io.debezium.connector.postgresql.PostgresStreamingChangeEventSource#commitOffset(java.util.Map)}
* requires Long.
*/
private Map<String, Object> adjustSourceOffset(Map<String, Object> sourceOffset) {
if (sourceOffset.containsKey(LAST_COMPLETELY_PROCESSED_LSN_KEY)) {
String value = sourceOffset.get(LAST_COMPLETELY_PROCESSED_LSN_KEY).toString();
sourceOffset.put(LAST_COMPLETELY_PROCESSED_LSN_KEY, Long.parseLong(value));
}
if (sourceOffset.containsKey(LAST_COMMIT_LSN_KEY)) {
String value = sourceOffset.get(LAST_COMMIT_LSN_KEY).toString();
sourceOffset.put(LAST_COMMIT_LSN_KEY, Long.parseLong(value));
}
return sourceOffset;
}
private class DebeziumCollector implements Collector<T> {
private final Queue<T> records = new ArrayDeque<>();
@Override
public void collect(T record) {
records.add(record);
}
@Override
public void close() {}
}
}

@ -26,14 +26,12 @@ import java.util.Map;
/**
* The state that the Flink Debezium Consumer holds for each instance.
*
* <p>This class describes the most basic state that Debezium used for recovering based
* on Kafka Connect mechanism. It includes a sourcePartition and sourceOffset.
* <p>This class describes the most basic state that Debezium used for recovering based on Kafka
* Connect mechanism. It includes a sourcePartition and sourceOffset.
*
* <p>
* The sourcePartition represents a single input sourcePartition that the record came from (e.g. a filename, table
* name, or topic-partition). The sourceOffset represents a position in that sourcePartition which can be used
* to resume consumption of data.
* </p>
* <p>The sourcePartition represents a single input sourcePartition that the record came from (e.g.
* a filename, table name, or topic-partition). The sourceOffset represents a position in that
* sourcePartition which can be used to resume consumption of data.
*
* <p>These values can have arbitrary structure and should be represented using
* org.apache.kafka.connect.data objects (or primitive values). For example, a database connector
@ -42,24 +40,26 @@ import java.util.Map;
*/
@Internal
public class DebeziumOffset implements Serializable {
private static final long serialVersionUID = 1L;
private static final long serialVersionUID = 1L;
public Map<String, ?> sourcePartition;
public Map<String, ?> sourceOffset;
public Map<String, ?> sourcePartition;
public Map<String, ?> sourceOffset;
public void setSourcePartition(Map<String, ?> sourcePartition) {
this.sourcePartition = sourcePartition;
}
public void setSourcePartition(Map<String, ?> sourcePartition) {
this.sourcePartition = sourcePartition;
}
public void setSourceOffset(Map<String, ?> sourceOffset) {
this.sourceOffset = sourceOffset;
}
public void setSourceOffset(Map<String, ?> sourceOffset) {
this.sourceOffset = sourceOffset;
}
@Override
public String toString() {
return "DebeziumOffset{" +
"sourcePartition=" + sourcePartition +
", sourceOffset=" + sourceOffset +
'}';
}
@Override
public String toString() {
return "DebeziumOffset{"
+ "sourcePartition="
+ sourcePartition
+ ", sourceOffset="
+ sourceOffset
+ '}';
}
}

@ -24,22 +24,20 @@ import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMap
import java.io.IOException;
/**
* Serializer implementation for a {@link DebeziumOffset}.
*/
/** Serializer implementation for a {@link DebeziumOffset}. */
@Internal
public class DebeziumOffsetSerializer {
public static final DebeziumOffsetSerializer INSTANCE = new DebeziumOffsetSerializer();
public static final DebeziumOffsetSerializer INSTANCE = new DebeziumOffsetSerializer();
public byte[] serialize(DebeziumOffset debeziumOffset) throws IOException {
// we currently use JSON serialization for simplification, as the state is very small.
// we can improve this in the future if needed
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsBytes(debeziumOffset);
}
public byte[] serialize(DebeziumOffset debeziumOffset) throws IOException {
// we currently use JSON serialization for simplification, as the state is very small.
// we can improve this in the future if needed
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsBytes(debeziumOffset);
}
public DebeziumOffset deserialize(byte[] bytes) throws IOException {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(bytes, DebeziumOffset.class);
}
public DebeziumOffset deserialize(byte[] bytes) throws IOException {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(bytes, DebeziumOffset.class);
}
}

@ -20,11 +20,9 @@ package com.alibaba.ververica.cdc.debezium.internal;
import org.apache.flink.annotation.Internal;
/**
* A reporter that can report errors to handler.
*/
/** A reporter that can report errors to handler. */
@Internal
public interface ErrorReporter {
void reportError(Throwable error);
void reportError(Throwable error);
}

@ -36,8 +36,8 @@ import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.function.Consumer;
/**
* Inspired from {@link io.debezium.relational.history.MemoryDatabaseHistory} but we will store
* the HistoryRecords in Flink's state for persistence.
* Inspired from {@link io.debezium.relational.history.MemoryDatabaseHistory} but we will store the
* HistoryRecords in Flink's state for persistence.
*
* <p>Note: This is not a clean solution because we depends on a global variable and all the history
* records will be stored in state (grow infinitely). We may need to come up with a
@ -45,100 +45,107 @@ import java.util.function.Consumer;
*/
public class FlinkDatabaseHistory extends AbstractDatabaseHistory {
public static final String DATABASE_HISTORY_INSTANCE_NAME = "database.history.instance.name";
/**
* We will synchronize the records into Flink's state during snapshot.
* We have to use a global variable to communicate with Flink's source function,
* because Debezium will construct the instance of {@link DatabaseHistory} itself.
* Maybe we can improve this in the future.
*
* <p>NOTE: we just use Flink's state as a durable persistent storage as a replacement of
* {@link FileDatabaseHistory} and {@link KafkaDatabaseHistory}. It doesn't need to guarantee
* the exactly-once semantic for the history records. The history records shouldn't be super
* large, because we only monitor the schema changes for one single table.
*
* @see com.alibaba.ververica.cdc.debezium.DebeziumSourceFunction#snapshotState(FunctionSnapshotContext)
*/
public static final Map<String, ConcurrentLinkedQueue<HistoryRecord>> ALL_RECORDS = new HashMap<>();
private ConcurrentLinkedQueue<HistoryRecord> records;
private String instanceName;
/**
* Registers the given HistoryRecords into global variable under the given instance name,
* in order to be accessed by instance of {@link FlinkDatabaseHistory}.
*/
public static void registerHistoryRecords(String instanceName, ConcurrentLinkedQueue<HistoryRecord> historyRecords) {
synchronized (FlinkDatabaseHistory.ALL_RECORDS) {
FlinkDatabaseHistory.ALL_RECORDS.put(instanceName, historyRecords);
}
}
/**
* Registers an empty HistoryRecords into global variable under the given instance name,
* in order to be accessed by instance of {@link FlinkDatabaseHistory}.
*/
public static void registerEmptyHistoryRecord(String instanceName) {
registerHistoryRecords(instanceName, new ConcurrentLinkedQueue<>());
}
/**
* Gets the registered HistoryRecords under the given instance name.
*/
public static ConcurrentLinkedQueue<HistoryRecord> getRegisteredHistoryRecord(String instanceName) {
synchronized (ALL_RECORDS) {
if (ALL_RECORDS.containsKey(instanceName)) {
return ALL_RECORDS.get(instanceName);
}
}
return null;
}
@Override
public void configure(Configuration config, HistoryRecordComparator comparator, DatabaseHistoryListener listener, boolean useCatalogBeforeSchema) {
super.configure(config, comparator, listener, useCatalogBeforeSchema);
this.instanceName = config.getString(DATABASE_HISTORY_INSTANCE_NAME);
this.records = getRegisteredHistoryRecord(instanceName);
if (records == null) {
throw new IllegalStateException(
String.format("Couldn't find engine instance %s in the global records.", instanceName));
}
}
@Override
public void stop() {
super.stop();
if (instanceName != null) {
synchronized (ALL_RECORDS) {
// clear memory
ALL_RECORDS.remove(instanceName);
}
}
}
@Override
protected void storeRecord(HistoryRecord record) throws DatabaseHistoryException {
this.records.add(record);
}
@Override
protected void recoverRecords(Consumer<HistoryRecord> records) {
this.records.forEach(records);
}
@Override
public boolean exists() {
return !records.isEmpty();
}
@Override
public boolean storageExists() {
return true;
}
@Override
public String toString() {
return "Flink Database History";
}
public static final String DATABASE_HISTORY_INSTANCE_NAME = "database.history.instance.name";
/**
* We will synchronize the records into Flink's state during snapshot. We have to use a global
* variable to communicate with Flink's source function, because Debezium will construct the
* instance of {@link DatabaseHistory} itself. Maybe we can improve this in the future.
*
* <p>NOTE: we just use Flink's state as a durable persistent storage as a replacement of {@link
* FileDatabaseHistory} and {@link KafkaDatabaseHistory}. It doesn't need to guarantee the
* exactly-once semantic for the history records. The history records shouldn't be super large,
* because we only monitor the schema changes for one single table.
*
* @see
* com.alibaba.ververica.cdc.debezium.DebeziumSourceFunction#snapshotState(FunctionSnapshotContext)
*/
public static final Map<String, ConcurrentLinkedQueue<HistoryRecord>> ALL_RECORDS =
new HashMap<>();
private ConcurrentLinkedQueue<HistoryRecord> records;
private String instanceName;
/**
* Registers the given HistoryRecords into global variable under the given instance name, in
* order to be accessed by instance of {@link FlinkDatabaseHistory}.
*/
public static void registerHistoryRecords(
String instanceName, ConcurrentLinkedQueue<HistoryRecord> historyRecords) {
synchronized (FlinkDatabaseHistory.ALL_RECORDS) {
FlinkDatabaseHistory.ALL_RECORDS.put(instanceName, historyRecords);
}
}
/**
* Registers an empty HistoryRecords into global variable under the given instance name, in
* order to be accessed by instance of {@link FlinkDatabaseHistory}.
*/
public static void registerEmptyHistoryRecord(String instanceName) {
registerHistoryRecords(instanceName, new ConcurrentLinkedQueue<>());
}
/** Gets the registered HistoryRecords under the given instance name. */
public static ConcurrentLinkedQueue<HistoryRecord> getRegisteredHistoryRecord(
String instanceName) {
synchronized (ALL_RECORDS) {
if (ALL_RECORDS.containsKey(instanceName)) {
return ALL_RECORDS.get(instanceName);
}
}
return null;
}
@Override
public void configure(
Configuration config,
HistoryRecordComparator comparator,
DatabaseHistoryListener listener,
boolean useCatalogBeforeSchema) {
super.configure(config, comparator, listener, useCatalogBeforeSchema);
this.instanceName = config.getString(DATABASE_HISTORY_INSTANCE_NAME);
this.records = getRegisteredHistoryRecord(instanceName);
if (records == null) {
throw new IllegalStateException(
String.format(
"Couldn't find engine instance %s in the global records.",
instanceName));
}
}
@Override
public void stop() {
super.stop();
if (instanceName != null) {
synchronized (ALL_RECORDS) {
// clear memory
ALL_RECORDS.remove(instanceName);
}
}
}
@Override
protected void storeRecord(HistoryRecord record) throws DatabaseHistoryException {
this.records.add(record);
}
@Override
protected void recoverRecords(Consumer<HistoryRecord> records) {
this.records.forEach(records);
}
@Override
public boolean exists() {
return !records.isEmpty();
}
@Override
public boolean storageExists() {
return true;
}
@Override
public String toString() {
return "Flink Database History";
}
}

@ -48,143 +48,155 @@ import java.util.concurrent.TimeoutException;
/**
* A implementation of {@link OffsetBackingStore} backed on Flink's state mechanism.
*
* <p>The {@link #OFFSET_STATE_VALUE} in the {@link WorkerConfig} is the raw position
* and offset data in JSON format. It is set into the config when recovery from failover by
* {@link DebeziumSourceFunction} before startup the {@link DebeziumEngine}. If it is not
* a restoration, the {@link #OFFSET_STATE_VALUE} is empty. {@link DebeziumEngine} relies on
* the {@link OffsetBackingStore} for failover recovery.
* <p>The {@link #OFFSET_STATE_VALUE} in the {@link WorkerConfig} is the raw position and offset
* data in JSON format. It is set into the config when recovery from failover by {@link
* DebeziumSourceFunction} before startup the {@link DebeziumEngine}. If it is not a restoration,
* the {@link #OFFSET_STATE_VALUE} is empty. {@link DebeziumEngine} relies on the {@link
* OffsetBackingStore} for failover recovery.
*
* @see DebeziumSourceFunction
*/
public class FlinkOffsetBackingStore implements OffsetBackingStore {
private static final Logger LOG = LoggerFactory.getLogger(FlinkOffsetBackingStore.class);
public static final String OFFSET_STATE_VALUE = "offset.storage.flink.state.value";
public static final int FLUSH_TIMEOUT_SECONDS = 10;
protected Map<ByteBuffer, ByteBuffer> data = new HashMap<>();
protected ExecutorService executor;
@Override
public void configure(WorkerConfig config) {
// eagerly initialize the executor, because OffsetStorageWriter will use it later
start();
Map<String, ?> conf = config.originals();
if (!conf.containsKey(OFFSET_STATE_VALUE)) {
// a normal startup from clean state, not need to initialize the offset
return;
}
String stateJson = (String) conf.get(OFFSET_STATE_VALUE);
DebeziumOffsetSerializer serializer = new DebeziumOffsetSerializer();
DebeziumOffset debeziumOffset;
try {
debeziumOffset = serializer.deserialize(stateJson.getBytes(StandardCharsets.UTF_8));
} catch (IOException e) {
LOG.error("Can't deserialize debezium offset state from JSON: " + stateJson, e);
throw new RuntimeException(e);
}
String engineName = (String) conf.get(EmbeddedEngine.ENGINE_NAME.name());
Converter keyConverter = new JsonConverter();
Converter valueConverter = new JsonConverter();
keyConverter.configure(config.originals(), true);
Map<String, Object> valueConfigs = new HashMap<>(conf);
valueConfigs.put("schemas.enable", false);
valueConverter.configure(valueConfigs, true);
OffsetStorageWriter offsetWriter = new OffsetStorageWriter(
this,
// must use engineName as namespace to align with Debezium Engine implementation
engineName,
keyConverter,
valueConverter);
offsetWriter.offset(debeziumOffset.sourcePartition, debeziumOffset.sourceOffset);
// flush immediately
if (!offsetWriter.beginFlush()) {
// if nothing is needed to be flushed, there must be something wrong with the initialization
LOG.warn("Initialize FlinkOffsetBackingStore from empty offset state, this shouldn't happen.");
return;
}
// trigger flushing
Future<Void> flushFuture = offsetWriter.doFlush((error, result) -> {
if (error != null) {
LOG.error("Failed to flush initial offset.", error);
} else {
LOG.debug("Successfully flush initial offset.");
}
});
// wait until flushing finished
try {
flushFuture.get(FLUSH_TIMEOUT_SECONDS, TimeUnit.SECONDS);
LOG.info("Flush offsets successfully, partition: {}, offsets: {}",
debeziumOffset.sourcePartition,
debeziumOffset.sourceOffset);
} catch (InterruptedException e) {
LOG.warn("Flush offsets interrupted, cancelling.", e);
offsetWriter.cancelFlush();
} catch (ExecutionException e) {
LOG.error("Flush offsets threw an unexpected exception.", e);
offsetWriter.cancelFlush();
} catch (TimeoutException e) {
LOG.error("Timed out waiting to flush offsets to storage.", e);
offsetWriter.cancelFlush();
}
}
@Override
public void start() {
if (executor == null) {
executor = Executors.newFixedThreadPool(1, ThreadUtils.createThreadFactory(
this.getClass().getSimpleName() + "-%d", false));
}
}
@Override
public void stop() {
if (executor != null) {
executor.shutdown();
// Best effort wait for any get() and set() tasks (and caller's callbacks) to complete.
try {
executor.awaitTermination(30, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
if (!executor.shutdownNow().isEmpty()) {
throw new ConnectException("Failed to stop FlinkOffsetBackingStore. Exiting without cleanly " +
"shutting down pending tasks and/or callbacks.");
}
executor = null;
}
}
@Override
public Future<Map<ByteBuffer, ByteBuffer>> get(final Collection<ByteBuffer> keys) {
return executor.submit(() -> {
Map<ByteBuffer, ByteBuffer> result = new HashMap<>();
for (ByteBuffer key : keys) {
result.put(key, data.get(key));
}
return result;
});
}
@Override
public Future<Void> set(final Map<ByteBuffer, ByteBuffer> values,
final Callback<Void> callback) {
return executor.submit(() -> {
for (Map.Entry<ByteBuffer, ByteBuffer> entry : values.entrySet()) {
data.put(entry.getKey(), entry.getValue());
}
if (callback != null) {
callback.onCompletion(null, null);
}
return null;
});
}
private static final Logger LOG = LoggerFactory.getLogger(FlinkOffsetBackingStore.class);
public static final String OFFSET_STATE_VALUE = "offset.storage.flink.state.value";
public static final int FLUSH_TIMEOUT_SECONDS = 10;
protected Map<ByteBuffer, ByteBuffer> data = new HashMap<>();
protected ExecutorService executor;
@Override
public void configure(WorkerConfig config) {
// eagerly initialize the executor, because OffsetStorageWriter will use it later
start();
Map<String, ?> conf = config.originals();
if (!conf.containsKey(OFFSET_STATE_VALUE)) {
// a normal startup from clean state, not need to initialize the offset
return;
}
String stateJson = (String) conf.get(OFFSET_STATE_VALUE);
DebeziumOffsetSerializer serializer = new DebeziumOffsetSerializer();
DebeziumOffset debeziumOffset;
try {
debeziumOffset = serializer.deserialize(stateJson.getBytes(StandardCharsets.UTF_8));
} catch (IOException e) {
LOG.error("Can't deserialize debezium offset state from JSON: " + stateJson, e);
throw new RuntimeException(e);
}
String engineName = (String) conf.get(EmbeddedEngine.ENGINE_NAME.name());
Converter keyConverter = new JsonConverter();
Converter valueConverter = new JsonConverter();
keyConverter.configure(config.originals(), true);
Map<String, Object> valueConfigs = new HashMap<>(conf);
valueConfigs.put("schemas.enable", false);
valueConverter.configure(valueConfigs, true);
OffsetStorageWriter offsetWriter =
new OffsetStorageWriter(
this,
// must use engineName as namespace to align with Debezium Engine
// implementation
engineName,
keyConverter,
valueConverter);
offsetWriter.offset(debeziumOffset.sourcePartition, debeziumOffset.sourceOffset);
// flush immediately
if (!offsetWriter.beginFlush()) {
// if nothing is needed to be flushed, there must be something wrong with the
// initialization
LOG.warn(
"Initialize FlinkOffsetBackingStore from empty offset state, this shouldn't happen.");
return;
}
// trigger flushing
Future<Void> flushFuture =
offsetWriter.doFlush(
(error, result) -> {
if (error != null) {
LOG.error("Failed to flush initial offset.", error);
} else {
LOG.debug("Successfully flush initial offset.");
}
});
// wait until flushing finished
try {
flushFuture.get(FLUSH_TIMEOUT_SECONDS, TimeUnit.SECONDS);
LOG.info(
"Flush offsets successfully, partition: {}, offsets: {}",
debeziumOffset.sourcePartition,
debeziumOffset.sourceOffset);
} catch (InterruptedException e) {
LOG.warn("Flush offsets interrupted, cancelling.", e);
offsetWriter.cancelFlush();
} catch (ExecutionException e) {
LOG.error("Flush offsets threw an unexpected exception.", e);
offsetWriter.cancelFlush();
} catch (TimeoutException e) {
LOG.error("Timed out waiting to flush offsets to storage.", e);
offsetWriter.cancelFlush();
}
}
@Override
public void start() {
if (executor == null) {
executor =
Executors.newFixedThreadPool(
1,
ThreadUtils.createThreadFactory(
this.getClass().getSimpleName() + "-%d", false));
}
}
@Override
public void stop() {
if (executor != null) {
executor.shutdown();
// Best effort wait for any get() and set() tasks (and caller's callbacks) to complete.
try {
executor.awaitTermination(30, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
if (!executor.shutdownNow().isEmpty()) {
throw new ConnectException(
"Failed to stop FlinkOffsetBackingStore. Exiting without cleanly "
+ "shutting down pending tasks and/or callbacks.");
}
executor = null;
}
}
@Override
public Future<Map<ByteBuffer, ByteBuffer>> get(final Collection<ByteBuffer> keys) {
return executor.submit(
() -> {
Map<ByteBuffer, ByteBuffer> result = new HashMap<>();
for (ByteBuffer key : keys) {
result.put(key, data.get(key));
}
return result;
});
}
@Override
public Future<Void> set(
final Map<ByteBuffer, ByteBuffer> values, final Callback<Void> callback) {
return executor.submit(
() -> {
for (Map.Entry<ByteBuffer, ByteBuffer> entry : values.entrySet()) {
data.put(entry.getKey(), entry.getValue());
}
if (callback != null) {
callback.onCompletion(null, null);
}
return null;
});
}
}

@ -23,25 +23,31 @@ import java.util.Properties;
/** Option utils for Debezium options. */
public class DebeziumOptions {
public static final String DEBEZIUM_OPTIONS_PREFIX = "debezium.";
public static final String DEBEZIUM_OPTIONS_PREFIX = "debezium.";
public static Properties getDebeziumProperties(Map<String, String> properties) {
final Properties debeziumProperties = new Properties();
public static Properties getDebeziumProperties(Map<String, String> properties) {
final Properties debeziumProperties = new Properties();
if (hasDebeziumProperties(properties)) {
properties.keySet().stream()
.filter(key -> key.startsWith(DEBEZIUM_OPTIONS_PREFIX))
.forEach(key -> {
final String value = properties.get(key);
final String subKey = key.substring((DEBEZIUM_OPTIONS_PREFIX).length());
debeziumProperties.put(subKey, value);
});
}
return debeziumProperties;
}
if (hasDebeziumProperties(properties)) {
properties.keySet().stream()
.filter(key -> key.startsWith(DEBEZIUM_OPTIONS_PREFIX))
.forEach(
key -> {
final String value = properties.get(key);
final String subKey =
key.substring((DEBEZIUM_OPTIONS_PREFIX).length());
debeziumProperties.put(subKey, value);
});
}
return debeziumProperties;
}
/** Decides if the table options contains Debezium client properties that start with prefix 'debezium'. */
private static boolean hasDebeziumProperties(Map<String, String> debeziumOptions) {
return debeziumOptions.keySet().stream().anyMatch(k -> k.startsWith(DEBEZIUM_OPTIONS_PREFIX));
}
/**
* Decides if the table options contains Debezium client properties that start with prefix
* 'debezium'.
*/
private static boolean hasDebeziumProperties(Map<String, String> debeziumOptions) {
return debeziumOptions.keySet().stream()
.anyMatch(k -> k.startsWith(DEBEZIUM_OPTIONS_PREFIX));
}
}

@ -55,339 +55,343 @@ import java.time.LocalDateTime;
import java.time.ZoneId;
/**
* Deserialization schema from Debezium object to Flink Table/SQL internal data structure {@link RowData}.
* Deserialization schema from Debezium object to Flink Table/SQL internal data structure {@link
* RowData}.
*/
public final class RowDataDebeziumDeserializeSchema implements DebeziumDeserializationSchema<RowData> {
private static final long serialVersionUID = -4852684966051743776L;
/**
* Custom validator to validate the row value.
*/
public interface ValueValidator extends Serializable {
void validate(RowData rowData, RowKind rowKind) throws Exception;
}
/** TypeInformation of the produced {@link RowData}. **/
private final TypeInformation<RowData> resultTypeInfo;
/**
* Runtime converter that converts {@link JsonNode}s into
* objects of Flink SQL internal data structures. **/
private final DeserializationRuntimeConverter runtimeConverter;
/**
* Time zone of the database server.
*/
private final ZoneId serverTimeZone;
/**
* Validator to validate the row value.
*/
private final ValueValidator validator;
public RowDataDebeziumDeserializeSchema(RowType rowType, TypeInformation<RowData> resultTypeInfo, ValueValidator validator, ZoneId serverTimeZone) {
this.runtimeConverter = createConverter(rowType);
this.resultTypeInfo = resultTypeInfo;
this.validator = validator;
this.serverTimeZone = serverTimeZone;
}
@Override
public void deserialize(SourceRecord record, Collector<RowData> out) throws Exception {
Envelope.Operation op = Envelope.operationFor(record);
Struct value = (Struct) record.value();
Schema valueSchema = record.valueSchema();
if (op == Envelope.Operation.CREATE || op == Envelope.Operation.READ) {
GenericRowData insert = extractAfterRow(value, valueSchema);
validator.validate(insert, RowKind.INSERT);
insert.setRowKind(RowKind.INSERT);
out.collect(insert);
} else if (op == Envelope.Operation.DELETE) {
GenericRowData delete = extractBeforeRow(value, valueSchema);
validator.validate(delete, RowKind.DELETE);
delete.setRowKind(RowKind.DELETE);
out.collect(delete);
} else {
GenericRowData before = extractBeforeRow(value, valueSchema);
validator.validate(before, RowKind.UPDATE_BEFORE);
before.setRowKind(RowKind.UPDATE_BEFORE);
out.collect(before);
GenericRowData after = extractAfterRow(value, valueSchema);
validator.validate(after, RowKind.UPDATE_AFTER);
after.setRowKind(RowKind.UPDATE_AFTER);
out.collect(after);
}
}
private GenericRowData extractAfterRow(Struct value, Schema valueSchema) throws Exception {
Schema afterSchema = valueSchema.field(Envelope.FieldName.AFTER).schema();
Struct after = value.getStruct(Envelope.FieldName.AFTER);
return (GenericRowData) runtimeConverter.convert(after, afterSchema);
}
private GenericRowData extractBeforeRow(Struct value, Schema valueSchema) throws Exception {
Schema afterSchema = valueSchema.field(Envelope.FieldName.BEFORE).schema();
Struct after = value.getStruct(Envelope.FieldName.BEFORE);
return (GenericRowData) runtimeConverter.convert(after, afterSchema);
}
@Override
public TypeInformation<RowData> getProducedType() {
return resultTypeInfo;
}
// -------------------------------------------------------------------------------------
// Runtime Converters
// -------------------------------------------------------------------------------------
/**
* Runtime converter that converts objects of Debezium into objects of Flink Table & SQL internal data structures.
*/
@FunctionalInterface
private interface DeserializationRuntimeConverter extends Serializable {
Object convert(Object dbzObj, Schema schema) throws Exception;
}
/**
* Creates a runtime converter which is null safe.
*/
private DeserializationRuntimeConverter createConverter(LogicalType type) {
return wrapIntoNullableConverter(createNotNullConverter(type));
}
/**
* Creates a runtime converter which assuming input object is not null.
*/
private DeserializationRuntimeConverter createNotNullConverter(LogicalType type) {
switch (type.getTypeRoot()) {
case NULL:
return (dbzObj, schema) -> null;
case BOOLEAN:
return this::convertToBoolean;
case TINYINT:
return (dbzObj, schema) -> Byte.parseByte(dbzObj.toString());
case SMALLINT:
return (dbzObj, schema) -> Short.parseShort(dbzObj.toString());
case INTEGER:
case INTERVAL_YEAR_MONTH:
return this::convertToInt;
case BIGINT:
case INTERVAL_DAY_TIME:
return this::convertToLong;
case DATE:
return this::convertToDate;
case TIME_WITHOUT_TIME_ZONE:
return this::convertToTime;
case TIMESTAMP_WITHOUT_TIME_ZONE:
return this::convertToTimestamp;
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
return this::convertToLocalTimeZoneTimestamp;
case FLOAT:
return this::convertToFloat;
case DOUBLE:
return this::convertToDouble;
case CHAR:
case VARCHAR:
return this::convertToString;
case BINARY:
case VARBINARY:
return this::convertToBinary;
case DECIMAL:
return createDecimalConverter((DecimalType) type);
case ROW:
return createRowConverter((RowType) type);
case ARRAY:
case MAP:
case MULTISET:
case RAW:
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}
private boolean convertToBoolean(Object dbzObj, Schema schema) {
if (dbzObj instanceof Boolean) {
return (boolean) dbzObj;
} else if (dbzObj instanceof Byte) {
return (byte) dbzObj == 1;
} else if (dbzObj instanceof Short) {
return (short) dbzObj == 1;
} else {
return Boolean.parseBoolean(dbzObj.toString());
}
}
private int convertToInt(Object dbzObj, Schema schema) {
if (dbzObj instanceof Integer) {
return (int) dbzObj;
} else if (dbzObj instanceof Long) {
return ((Long) dbzObj).intValue();
} else {
return Integer.parseInt(dbzObj.toString());
}
}
private long convertToLong(Object dbzObj, Schema schema) {
if (dbzObj instanceof Integer) {
return (long) dbzObj;
} else if (dbzObj instanceof Long) {
return (long) dbzObj;
} else {
return Long.parseLong(dbzObj.toString());
}
}
private double convertToDouble(Object dbzObj, Schema schema) {
if (dbzObj instanceof Float) {
return (double) dbzObj;
} else if (dbzObj instanceof Double) {
return (double) dbzObj;
} else {
return Double.parseDouble(dbzObj.toString());
}
}
private float convertToFloat(Object dbzObj, Schema schema) {
if (dbzObj instanceof Float) {
return (float) dbzObj;
} else if (dbzObj instanceof Double) {
return ((Double) dbzObj).floatValue();
} else {
return Float.parseFloat(dbzObj.toString());
}
}
private int convertToDate(Object dbzObj, Schema schema) {
return (int) TemporalConversions.toLocalDate(dbzObj).toEpochDay();
}
private int convertToTime(Object dbzObj, Schema schema) {
if (dbzObj instanceof Long) {
switch (schema.name()) {
case MicroTime.SCHEMA_NAME:
return (int) ((long) dbzObj / 1000);
case NanoTime.SCHEMA_NAME:
return (int) ((long) dbzObj / 1000_000);
}
} else if (dbzObj instanceof Integer) {
return (int) dbzObj;
}
// get number of milliseconds of the day
return TemporalConversions.toLocalTime(dbzObj).toSecondOfDay() * 1000;
}
private TimestampData convertToTimestamp(Object dbzObj, Schema schema) {
if (dbzObj instanceof Long) {
switch (schema.name()) {
case Timestamp.SCHEMA_NAME:
return TimestampData.fromEpochMillis((Long) dbzObj);
case MicroTimestamp.SCHEMA_NAME:
long micro = (long) dbzObj;
return TimestampData.fromEpochMillis(micro / 1000, (int) (micro % 1000 * 1000));
case NanoTimestamp.SCHEMA_NAME:
long nano = (long) dbzObj;
return TimestampData.fromEpochMillis(nano / 1000_000, (int) (nano % 1000_000));
}
}
LocalDateTime localDateTime = TemporalConversions.toLocalDateTime(dbzObj, serverTimeZone);
return TimestampData.fromLocalDateTime(localDateTime);
}
private TimestampData convertToLocalTimeZoneTimestamp(Object dbzObj, Schema schema) {
if (dbzObj instanceof String) {
String str = (String) dbzObj;
// TIMESTAMP type is encoded in string type
Instant instant = Instant.parse(str);
return TimestampData.fromLocalDateTime(LocalDateTime.ofInstant(instant, serverTimeZone));
}
throw new IllegalArgumentException("Unable to convert to TimestampData from unexpected value '" + dbzObj + "' of type " + dbzObj.getClass().getName());
}
private StringData convertToString(Object dbzObj, Schema schema) {
return StringData.fromString(dbzObj.toString());
}
private byte[] convertToBinary(Object dbzObj, Schema schema) {
if (dbzObj instanceof byte[]) {
return (byte[]) dbzObj;
} else if (dbzObj instanceof ByteBuffer) {
ByteBuffer byteBuffer = (ByteBuffer) dbzObj;
byte[] bytes = new byte[byteBuffer.remaining()];
byteBuffer.get(bytes);
return bytes;
} else {
throw new UnsupportedOperationException("Unsupported BYTES value type: " + dbzObj.getClass().getSimpleName());
}
}
private DeserializationRuntimeConverter createDecimalConverter(DecimalType decimalType) {
final int precision = decimalType.getPrecision();
final int scale = decimalType.getScale();
return (dbzObj, schema) -> {
BigDecimal bigDecimal;
if (dbzObj instanceof byte[]) {
// decimal.handling.mode=precise
bigDecimal = Decimal.toLogical(schema, (byte[]) dbzObj);
} else if (dbzObj instanceof String) {
// decimal.handling.mode=string
bigDecimal = new BigDecimal((String) dbzObj);
} else if (dbzObj instanceof Double) {
// decimal.handling.mode=double
bigDecimal = BigDecimal.valueOf((Double) dbzObj);
} else {
if (VariableScaleDecimal.LOGICAL_NAME.equals(schema.name())) {
SpecialValueDecimal decimal = VariableScaleDecimal.toLogical((Struct) dbzObj);
bigDecimal = decimal.getDecimalValue().orElse(BigDecimal.ZERO);
} else {
// fallback to string
bigDecimal = new BigDecimal(dbzObj.toString());
}
}
return DecimalData.fromBigDecimal(bigDecimal, precision, scale);
};
}
private DeserializationRuntimeConverter createRowConverter(RowType rowType) {
final DeserializationRuntimeConverter[] fieldConverters = rowType.getFields().stream()
.map(RowType.RowField::getType)
.map(this::createConverter)
.toArray(DeserializationRuntimeConverter[]::new);
final String[] fieldNames = rowType.getFieldNames().toArray(new String[0]);
return (dbzObj, schema) -> {
Struct struct = (Struct) dbzObj;
int arity = fieldNames.length;
GenericRowData row = new GenericRowData(arity);
for (int i = 0; i < arity; i++) {
String fieldName = fieldNames[i];
Object fieldValue = struct.get(fieldName);
Schema fieldSchema = schema.field(fieldName).schema();
Object convertedField = convertField(fieldConverters[i], fieldValue, fieldSchema);
row.setField(i, convertedField);
}
return row;
};
}
private Object convertField(
DeserializationRuntimeConverter fieldConverter,
Object fieldValue,
Schema fieldSchema) throws Exception {
if (fieldValue == null) {
return null;
} else {
return fieldConverter.convert(fieldValue, fieldSchema);
}
}
private DeserializationRuntimeConverter wrapIntoNullableConverter(
DeserializationRuntimeConverter converter) {
return (dbzObj, schema) -> {
if (dbzObj == null) {
return null;
}
return converter.convert(dbzObj, schema);
};
}
public final class RowDataDebeziumDeserializeSchema
implements DebeziumDeserializationSchema<RowData> {
private static final long serialVersionUID = -4852684966051743776L;
/** Custom validator to validate the row value. */
public interface ValueValidator extends Serializable {
void validate(RowData rowData, RowKind rowKind) throws Exception;
}
/** TypeInformation of the produced {@link RowData}. * */
private final TypeInformation<RowData> resultTypeInfo;
/**
* Runtime converter that converts {@link JsonNode}s into objects of Flink SQL internal data
* structures. *
*/
private final DeserializationRuntimeConverter runtimeConverter;
/** Time zone of the database server. */
private final ZoneId serverTimeZone;
/** Validator to validate the row value. */
private final ValueValidator validator;
public RowDataDebeziumDeserializeSchema(
RowType rowType,
TypeInformation<RowData> resultTypeInfo,
ValueValidator validator,
ZoneId serverTimeZone) {
this.runtimeConverter = createConverter(rowType);
this.resultTypeInfo = resultTypeInfo;
this.validator = validator;
this.serverTimeZone = serverTimeZone;
}
@Override
public void deserialize(SourceRecord record, Collector<RowData> out) throws Exception {
Envelope.Operation op = Envelope.operationFor(record);
Struct value = (Struct) record.value();
Schema valueSchema = record.valueSchema();
if (op == Envelope.Operation.CREATE || op == Envelope.Operation.READ) {
GenericRowData insert = extractAfterRow(value, valueSchema);
validator.validate(insert, RowKind.INSERT);
insert.setRowKind(RowKind.INSERT);
out.collect(insert);
} else if (op == Envelope.Operation.DELETE) {
GenericRowData delete = extractBeforeRow(value, valueSchema);
validator.validate(delete, RowKind.DELETE);
delete.setRowKind(RowKind.DELETE);
out.collect(delete);
} else {
GenericRowData before = extractBeforeRow(value, valueSchema);
validator.validate(before, RowKind.UPDATE_BEFORE);
before.setRowKind(RowKind.UPDATE_BEFORE);
out.collect(before);
GenericRowData after = extractAfterRow(value, valueSchema);
validator.validate(after, RowKind.UPDATE_AFTER);
after.setRowKind(RowKind.UPDATE_AFTER);
out.collect(after);
}
}
private GenericRowData extractAfterRow(Struct value, Schema valueSchema) throws Exception {
Schema afterSchema = valueSchema.field(Envelope.FieldName.AFTER).schema();
Struct after = value.getStruct(Envelope.FieldName.AFTER);
return (GenericRowData) runtimeConverter.convert(after, afterSchema);
}
private GenericRowData extractBeforeRow(Struct value, Schema valueSchema) throws Exception {
Schema afterSchema = valueSchema.field(Envelope.FieldName.BEFORE).schema();
Struct after = value.getStruct(Envelope.FieldName.BEFORE);
return (GenericRowData) runtimeConverter.convert(after, afterSchema);
}
@Override
public TypeInformation<RowData> getProducedType() {
return resultTypeInfo;
}
// -------------------------------------------------------------------------------------
// Runtime Converters
// -------------------------------------------------------------------------------------
/**
* Runtime converter that converts objects of Debezium into objects of Flink Table & SQL
* internal data structures.
*/
@FunctionalInterface
private interface DeserializationRuntimeConverter extends Serializable {
Object convert(Object dbzObj, Schema schema) throws Exception;
}
/** Creates a runtime converter which is null safe. */
private DeserializationRuntimeConverter createConverter(LogicalType type) {
return wrapIntoNullableConverter(createNotNullConverter(type));
}
/** Creates a runtime converter which assuming input object is not null. */
private DeserializationRuntimeConverter createNotNullConverter(LogicalType type) {
switch (type.getTypeRoot()) {
case NULL:
return (dbzObj, schema) -> null;
case BOOLEAN:
return this::convertToBoolean;
case TINYINT:
return (dbzObj, schema) -> Byte.parseByte(dbzObj.toString());
case SMALLINT:
return (dbzObj, schema) -> Short.parseShort(dbzObj.toString());
case INTEGER:
case INTERVAL_YEAR_MONTH:
return this::convertToInt;
case BIGINT:
case INTERVAL_DAY_TIME:
return this::convertToLong;
case DATE:
return this::convertToDate;
case TIME_WITHOUT_TIME_ZONE:
return this::convertToTime;
case TIMESTAMP_WITHOUT_TIME_ZONE:
return this::convertToTimestamp;
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
return this::convertToLocalTimeZoneTimestamp;
case FLOAT:
return this::convertToFloat;
case DOUBLE:
return this::convertToDouble;
case CHAR:
case VARCHAR:
return this::convertToString;
case BINARY:
case VARBINARY:
return this::convertToBinary;
case DECIMAL:
return createDecimalConverter((DecimalType) type);
case ROW:
return createRowConverter((RowType) type);
case ARRAY:
case MAP:
case MULTISET:
case RAW:
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}
private boolean convertToBoolean(Object dbzObj, Schema schema) {
if (dbzObj instanceof Boolean) {
return (boolean) dbzObj;
} else if (dbzObj instanceof Byte) {
return (byte) dbzObj == 1;
} else if (dbzObj instanceof Short) {
return (short) dbzObj == 1;
} else {
return Boolean.parseBoolean(dbzObj.toString());
}
}
private int convertToInt(Object dbzObj, Schema schema) {
if (dbzObj instanceof Integer) {
return (int) dbzObj;
} else if (dbzObj instanceof Long) {
return ((Long) dbzObj).intValue();
} else {
return Integer.parseInt(dbzObj.toString());
}
}
private long convertToLong(Object dbzObj, Schema schema) {
if (dbzObj instanceof Integer) {
return (long) dbzObj;
} else if (dbzObj instanceof Long) {
return (long) dbzObj;
} else {
return Long.parseLong(dbzObj.toString());
}
}
private double convertToDouble(Object dbzObj, Schema schema) {
if (dbzObj instanceof Float) {
return (double) dbzObj;
} else if (dbzObj instanceof Double) {
return (double) dbzObj;
} else {
return Double.parseDouble(dbzObj.toString());
}
}
private float convertToFloat(Object dbzObj, Schema schema) {
if (dbzObj instanceof Float) {
return (float) dbzObj;
} else if (dbzObj instanceof Double) {
return ((Double) dbzObj).floatValue();
} else {
return Float.parseFloat(dbzObj.toString());
}
}
private int convertToDate(Object dbzObj, Schema schema) {
return (int) TemporalConversions.toLocalDate(dbzObj).toEpochDay();
}
private int convertToTime(Object dbzObj, Schema schema) {
if (dbzObj instanceof Long) {
switch (schema.name()) {
case MicroTime.SCHEMA_NAME:
return (int) ((long) dbzObj / 1000);
case NanoTime.SCHEMA_NAME:
return (int) ((long) dbzObj / 1000_000);
}
} else if (dbzObj instanceof Integer) {
return (int) dbzObj;
}
// get number of milliseconds of the day
return TemporalConversions.toLocalTime(dbzObj).toSecondOfDay() * 1000;
}
private TimestampData convertToTimestamp(Object dbzObj, Schema schema) {
if (dbzObj instanceof Long) {
switch (schema.name()) {
case Timestamp.SCHEMA_NAME:
return TimestampData.fromEpochMillis((Long) dbzObj);
case MicroTimestamp.SCHEMA_NAME:
long micro = (long) dbzObj;
return TimestampData.fromEpochMillis(micro / 1000, (int) (micro % 1000 * 1000));
case NanoTimestamp.SCHEMA_NAME:
long nano = (long) dbzObj;
return TimestampData.fromEpochMillis(nano / 1000_000, (int) (nano % 1000_000));
}
}
LocalDateTime localDateTime = TemporalConversions.toLocalDateTime(dbzObj, serverTimeZone);
return TimestampData.fromLocalDateTime(localDateTime);
}
private TimestampData convertToLocalTimeZoneTimestamp(Object dbzObj, Schema schema) {
if (dbzObj instanceof String) {
String str = (String) dbzObj;
// TIMESTAMP type is encoded in string type
Instant instant = Instant.parse(str);
return TimestampData.fromLocalDateTime(
LocalDateTime.ofInstant(instant, serverTimeZone));
}
throw new IllegalArgumentException(
"Unable to convert to TimestampData from unexpected value '"
+ dbzObj
+ "' of type "
+ dbzObj.getClass().getName());
}
private StringData convertToString(Object dbzObj, Schema schema) {
return StringData.fromString(dbzObj.toString());
}
private byte[] convertToBinary(Object dbzObj, Schema schema) {
if (dbzObj instanceof byte[]) {
return (byte[]) dbzObj;
} else if (dbzObj instanceof ByteBuffer) {
ByteBuffer byteBuffer = (ByteBuffer) dbzObj;
byte[] bytes = new byte[byteBuffer.remaining()];
byteBuffer.get(bytes);
return bytes;
} else {
throw new UnsupportedOperationException(
"Unsupported BYTES value type: " + dbzObj.getClass().getSimpleName());
}
}
private DeserializationRuntimeConverter createDecimalConverter(DecimalType decimalType) {
final int precision = decimalType.getPrecision();
final int scale = decimalType.getScale();
return (dbzObj, schema) -> {
BigDecimal bigDecimal;
if (dbzObj instanceof byte[]) {
// decimal.handling.mode=precise
bigDecimal = Decimal.toLogical(schema, (byte[]) dbzObj);
} else if (dbzObj instanceof String) {
// decimal.handling.mode=string
bigDecimal = new BigDecimal((String) dbzObj);
} else if (dbzObj instanceof Double) {
// decimal.handling.mode=double
bigDecimal = BigDecimal.valueOf((Double) dbzObj);
} else {
if (VariableScaleDecimal.LOGICAL_NAME.equals(schema.name())) {
SpecialValueDecimal decimal = VariableScaleDecimal.toLogical((Struct) dbzObj);
bigDecimal = decimal.getDecimalValue().orElse(BigDecimal.ZERO);
} else {
// fallback to string
bigDecimal = new BigDecimal(dbzObj.toString());
}
}
return DecimalData.fromBigDecimal(bigDecimal, precision, scale);
};
}
private DeserializationRuntimeConverter createRowConverter(RowType rowType) {
final DeserializationRuntimeConverter[] fieldConverters =
rowType.getFields().stream()
.map(RowType.RowField::getType)
.map(this::createConverter)
.toArray(DeserializationRuntimeConverter[]::new);
final String[] fieldNames = rowType.getFieldNames().toArray(new String[0]);
return (dbzObj, schema) -> {
Struct struct = (Struct) dbzObj;
int arity = fieldNames.length;
GenericRowData row = new GenericRowData(arity);
for (int i = 0; i < arity; i++) {
String fieldName = fieldNames[i];
Object fieldValue = struct.get(fieldName);
Schema fieldSchema = schema.field(fieldName).schema();
Object convertedField = convertField(fieldConverters[i], fieldValue, fieldSchema);
row.setField(i, convertedField);
}
return row;
};
}
private Object convertField(
DeserializationRuntimeConverter fieldConverter, Object fieldValue, Schema fieldSchema)
throws Exception {
if (fieldValue == null) {
return null;
} else {
return fieldConverter.convert(fieldValue, fieldSchema);
}
}
private DeserializationRuntimeConverter wrapIntoNullableConverter(
DeserializationRuntimeConverter converter) {
return (dbzObj, schema) -> {
if (dbzObj == null) {
return null;
}
return converter.convert(dbzObj, schema);
};
}
}

@ -28,180 +28,188 @@ import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.concurrent.TimeUnit;
/**
* Temporal conversion constants.
*/
/** Temporal conversion constants. */
public final class TemporalConversions {
static final long MILLISECONDS_PER_SECOND = TimeUnit.SECONDS.toMillis(1);
static final long MICROSECONDS_PER_SECOND = TimeUnit.SECONDS.toMicros(1);
static final long MICROSECONDS_PER_MILLISECOND = TimeUnit.MILLISECONDS.toMicros(1);
static final long NANOSECONDS_PER_MILLISECOND = TimeUnit.MILLISECONDS.toNanos(1);
static final long NANOSECONDS_PER_MICROSECOND = TimeUnit.MICROSECONDS.toNanos(1);
static final long NANOSECONDS_PER_SECOND = TimeUnit.SECONDS.toNanos(1);
static final long NANOSECONDS_PER_DAY = TimeUnit.DAYS.toNanos(1);
static final long SECONDS_PER_DAY = TimeUnit.DAYS.toSeconds(1);
static final long MICROSECONDS_PER_DAY = TimeUnit.DAYS.toMicros(1);
static final LocalDate EPOCH = LocalDate.ofEpochDay(0);
static final long MILLISECONDS_PER_SECOND = TimeUnit.SECONDS.toMillis(1);
static final long MICROSECONDS_PER_SECOND = TimeUnit.SECONDS.toMicros(1);
static final long MICROSECONDS_PER_MILLISECOND = TimeUnit.MILLISECONDS.toMicros(1);
static final long NANOSECONDS_PER_MILLISECOND = TimeUnit.MILLISECONDS.toNanos(1);
static final long NANOSECONDS_PER_MICROSECOND = TimeUnit.MICROSECONDS.toNanos(1);
static final long NANOSECONDS_PER_SECOND = TimeUnit.SECONDS.toNanos(1);
static final long NANOSECONDS_PER_DAY = TimeUnit.DAYS.toNanos(1);
static final long SECONDS_PER_DAY = TimeUnit.DAYS.toSeconds(1);
static final long MICROSECONDS_PER_DAY = TimeUnit.DAYS.toMicros(1);
static final LocalDate EPOCH = LocalDate.ofEpochDay(0);
private TemporalConversions() {
}
private TemporalConversions() {}
public static LocalDate toLocalDate(Object obj) {
if (obj == null) {
return null;
}
if (obj instanceof LocalDate) {
return (LocalDate) obj;
}
if (obj instanceof LocalDateTime) {
return ((LocalDateTime) obj).toLocalDate();
}
if (obj instanceof java.sql.Date) {
return ((java.sql.Date) obj).toLocalDate();
}
if (obj instanceof java.sql.Time) {
throw new IllegalArgumentException("Unable to convert to LocalDate from a java.sql.Time value '" + obj + "'");
}
if (obj instanceof java.util.Date) {
java.util.Date date = (java.util.Date) obj;
return LocalDate.of(date.getYear() + 1900,
date.getMonth() + 1,
date.getDate());
}
if (obj instanceof Long) {
// Assume the value is the epoch day number
return LocalDate.ofEpochDay((Long) obj);
}
if (obj instanceof Integer) {
// Assume the value is the epoch day number
return LocalDate.ofEpochDay((Integer) obj);
}
throw new IllegalArgumentException("Unable to convert to LocalDate from unexpected value '" + obj + "' of type " + obj.getClass().getName());
}
public static LocalDate toLocalDate(Object obj) {
if (obj == null) {
return null;
}
if (obj instanceof LocalDate) {
return (LocalDate) obj;
}
if (obj instanceof LocalDateTime) {
return ((LocalDateTime) obj).toLocalDate();
}
if (obj instanceof java.sql.Date) {
return ((java.sql.Date) obj).toLocalDate();
}
if (obj instanceof java.sql.Time) {
throw new IllegalArgumentException(
"Unable to convert to LocalDate from a java.sql.Time value '" + obj + "'");
}
if (obj instanceof java.util.Date) {
java.util.Date date = (java.util.Date) obj;
return LocalDate.of(date.getYear() + 1900, date.getMonth() + 1, date.getDate());
}
if (obj instanceof Long) {
// Assume the value is the epoch day number
return LocalDate.ofEpochDay((Long) obj);
}
if (obj instanceof Integer) {
// Assume the value is the epoch day number
return LocalDate.ofEpochDay((Integer) obj);
}
throw new IllegalArgumentException(
"Unable to convert to LocalDate from unexpected value '"
+ obj
+ "' of type "
+ obj.getClass().getName());
}
public static LocalTime toLocalTime(Object obj) {
if (obj == null) {
return null;
}
if (obj instanceof LocalTime) {
return (LocalTime) obj;
}
if (obj instanceof LocalDateTime) {
return ((LocalDateTime) obj).toLocalTime();
}
if (obj instanceof java.sql.Date) {
throw new IllegalArgumentException("Unable to convert to LocalDate from a java.sql.Date value '" + obj + "'");
}
if (obj instanceof java.sql.Time) {
java.sql.Time time = (java.sql.Time) obj;
long millis = (int) (time.getTime() % MILLISECONDS_PER_SECOND);
int nanosOfSecond = (int) (millis * NANOSECONDS_PER_MILLISECOND);
return LocalTime.of(time.getHours(),
time.getMinutes(),
time.getSeconds(),
nanosOfSecond);
}
if (obj instanceof java.sql.Timestamp) {
java.sql.Timestamp timestamp = (java.sql.Timestamp) obj;
return LocalTime.of(timestamp.getHours(),
timestamp.getMinutes(),
timestamp.getSeconds(),
timestamp.getNanos());
}
if (obj instanceof java.util.Date) {
java.util.Date date = (java.util.Date) obj;
long millis = (int) (date.getTime() % MILLISECONDS_PER_SECOND);
int nanosOfSecond = (int) (millis * NANOSECONDS_PER_MILLISECOND);
return LocalTime.of(date.getHours(),
date.getMinutes(),
date.getSeconds(),
nanosOfSecond);
}
if (obj instanceof Duration) {
Long value = ((Duration) obj).toNanos();
if (value >= 0 && value <= NANOSECONDS_PER_DAY) {
return LocalTime.ofNanoOfDay(value);
}
else {
throw new IllegalArgumentException("Time values must use number of milliseconds greater than 0 and less than 86400000000000");
}
}
throw new IllegalArgumentException("Unable to convert to LocalTime from unexpected value '" + obj + "' of type " + obj.getClass().getName());
}
public static LocalTime toLocalTime(Object obj) {
if (obj == null) {
return null;
}
if (obj instanceof LocalTime) {
return (LocalTime) obj;
}
if (obj instanceof LocalDateTime) {
return ((LocalDateTime) obj).toLocalTime();
}
if (obj instanceof java.sql.Date) {
throw new IllegalArgumentException(
"Unable to convert to LocalDate from a java.sql.Date value '" + obj + "'");
}
if (obj instanceof java.sql.Time) {
java.sql.Time time = (java.sql.Time) obj;
long millis = (int) (time.getTime() % MILLISECONDS_PER_SECOND);
int nanosOfSecond = (int) (millis * NANOSECONDS_PER_MILLISECOND);
return LocalTime.of(
time.getHours(), time.getMinutes(), time.getSeconds(), nanosOfSecond);
}
if (obj instanceof java.sql.Timestamp) {
java.sql.Timestamp timestamp = (java.sql.Timestamp) obj;
return LocalTime.of(
timestamp.getHours(),
timestamp.getMinutes(),
timestamp.getSeconds(),
timestamp.getNanos());
}
if (obj instanceof java.util.Date) {
java.util.Date date = (java.util.Date) obj;
long millis = (int) (date.getTime() % MILLISECONDS_PER_SECOND);
int nanosOfSecond = (int) (millis * NANOSECONDS_PER_MILLISECOND);
return LocalTime.of(
date.getHours(), date.getMinutes(), date.getSeconds(), nanosOfSecond);
}
if (obj instanceof Duration) {
Long value = ((Duration) obj).toNanos();
if (value >= 0 && value <= NANOSECONDS_PER_DAY) {
return LocalTime.ofNanoOfDay(value);
} else {
throw new IllegalArgumentException(
"Time values must use number of milliseconds greater than 0 and less than 86400000000000");
}
}
throw new IllegalArgumentException(
"Unable to convert to LocalTime from unexpected value '"
+ obj
+ "' of type "
+ obj.getClass().getName());
}
public static LocalDateTime toLocalDateTime(Object obj, ZoneId serverTimeZone) {
if (obj == null) {
return null;
}
if (obj instanceof OffsetDateTime) {
return ((OffsetDateTime) obj).toLocalDateTime();
}
if (obj instanceof Instant) {
return ((Instant) obj).atOffset(ZoneOffset.UTC).toLocalDateTime();
}
if (obj instanceof LocalDateTime) {
return (LocalDateTime) obj;
}
if (obj instanceof LocalDate) {
LocalDate date = (LocalDate) obj;
return LocalDateTime.of(date, LocalTime.MIDNIGHT);
}
if (obj instanceof LocalTime) {
LocalTime time = (LocalTime) obj;
return LocalDateTime.of(EPOCH, time);
}
if (obj instanceof java.sql.Date) {
java.sql.Date sqlDate = (java.sql.Date) obj;
LocalDate date = sqlDate.toLocalDate();
return LocalDateTime.of(date, LocalTime.MIDNIGHT);
}
if (obj instanceof java.sql.Time) {
LocalTime localTime = toLocalTime(obj);
return LocalDateTime.of(EPOCH, localTime);
}
if (obj instanceof java.sql.Timestamp) {
java.sql.Timestamp timestamp = (java.sql.Timestamp) obj;
return LocalDateTime.of(timestamp.getYear() + 1900,
timestamp.getMonth() + 1,
timestamp.getDate(),
timestamp.getHours(),
timestamp.getMinutes(),
timestamp.getSeconds(),
timestamp.getNanos());
}
if (obj instanceof java.util.Date) {
java.util.Date date = (java.util.Date) obj;
long millis = (int) (date.getTime() % MILLISECONDS_PER_SECOND);
if (millis < 0) {
millis = MILLISECONDS_PER_SECOND + millis;
}
int nanosOfSecond = (int) (millis * NANOSECONDS_PER_MILLISECOND);
return LocalDateTime.of(date.getYear() + 1900,
date.getMonth() + 1,
date.getDate(),
date.getHours(),
date.getMinutes(),
date.getSeconds(),
nanosOfSecond);
}
if (obj instanceof String) {
String str = (String) obj;
// TIMESTAMP type is encoded in string type
Instant instant = Instant.parse(str);
return LocalDateTime.ofInstant(instant, serverTimeZone);
}
throw new IllegalArgumentException("Unable to convert to LocalDateTime from unexpected value '" + obj + "' of type " + obj.getClass().getName());
}
public static LocalDateTime toLocalDateTime(Object obj, ZoneId serverTimeZone) {
if (obj == null) {
return null;
}
if (obj instanceof OffsetDateTime) {
return ((OffsetDateTime) obj).toLocalDateTime();
}
if (obj instanceof Instant) {
return ((Instant) obj).atOffset(ZoneOffset.UTC).toLocalDateTime();
}
if (obj instanceof LocalDateTime) {
return (LocalDateTime) obj;
}
if (obj instanceof LocalDate) {
LocalDate date = (LocalDate) obj;
return LocalDateTime.of(date, LocalTime.MIDNIGHT);
}
if (obj instanceof LocalTime) {
LocalTime time = (LocalTime) obj;
return LocalDateTime.of(EPOCH, time);
}
if (obj instanceof java.sql.Date) {
java.sql.Date sqlDate = (java.sql.Date) obj;
LocalDate date = sqlDate.toLocalDate();
return LocalDateTime.of(date, LocalTime.MIDNIGHT);
}
if (obj instanceof java.sql.Time) {
LocalTime localTime = toLocalTime(obj);
return LocalDateTime.of(EPOCH, localTime);
}
if (obj instanceof java.sql.Timestamp) {
java.sql.Timestamp timestamp = (java.sql.Timestamp) obj;
return LocalDateTime.of(
timestamp.getYear() + 1900,
timestamp.getMonth() + 1,
timestamp.getDate(),
timestamp.getHours(),
timestamp.getMinutes(),
timestamp.getSeconds(),
timestamp.getNanos());
}
if (obj instanceof java.util.Date) {
java.util.Date date = (java.util.Date) obj;
long millis = (int) (date.getTime() % MILLISECONDS_PER_SECOND);
if (millis < 0) {
millis = MILLISECONDS_PER_SECOND + millis;
}
int nanosOfSecond = (int) (millis * NANOSECONDS_PER_MILLISECOND);
return LocalDateTime.of(
date.getYear() + 1900,
date.getMonth() + 1,
date.getDate(),
date.getHours(),
date.getMinutes(),
date.getSeconds(),
nanosOfSecond);
}
if (obj instanceof String) {
String str = (String) obj;
// TIMESTAMP type is encoded in string type
Instant instant = Instant.parse(str);
return LocalDateTime.ofInstant(instant, serverTimeZone);
}
throw new IllegalArgumentException(
"Unable to convert to LocalDateTime from unexpected value '"
+ obj
+ "' of type "
+ obj.getClass().getName());
}
public static long toEpochMicros(Instant instant) {
return TimeUnit.SECONDS.toMicros(instant.getEpochSecond()) + TimeUnit.NANOSECONDS.toMicros(instant.getNano());
}
public static long toEpochMicros(Instant instant) {
return TimeUnit.SECONDS.toMicros(instant.getEpochSecond())
+ TimeUnit.NANOSECONDS.toMicros(instant.getNano());
}
public static Instant toInstantFromMicros(long microsSinceEpoch) {
return Instant.ofEpochSecond(
TimeUnit.MICROSECONDS.toSeconds(microsSinceEpoch),
TimeUnit.MICROSECONDS.toNanos(microsSinceEpoch % TimeUnit.SECONDS.toMicros(1)));
}
public static Instant toInstantFromMicros(long microsSinceEpoch) {
return Instant.ofEpochSecond(
TimeUnit.MICROSECONDS.toSeconds(microsSinceEpoch),
TimeUnit.MICROSECONDS.toNanos(microsSinceEpoch % TimeUnit.SECONDS.toMicros(1)));
}
}

@ -11,47 +11,53 @@ import io.debezium.engine.RecordChangeEvent;
import org.apache.kafka.connect.source.SourceRecord;
/**
* Copied from Debezium project. Make it public to be accessible from
* {@link com.alibaba.ververica.cdc.debezium.internal.DebeziumChangeConsumer}.
* Copied from Debezium project. Make it public to be accessible from {@link
* com.alibaba.ververica.cdc.debezium.internal.DebeziumChangeConsumer}.
*/
public class EmbeddedEngineChangeEvent<K, V> implements ChangeEvent<K, V>, RecordChangeEvent<V> {
private final K key;
private final V value;
private final SourceRecord sourceRecord;
public EmbeddedEngineChangeEvent(K key, V value, SourceRecord sourceRecord) {
this.key = key;
this.value = value;
this.sourceRecord = sourceRecord;
}
@Override
public K key() {
return key;
}
@Override
public V value() {
return value;
}
@Override
public V record() {
return value;
}
@Override
public String destination() {
return sourceRecord.topic();
}
public SourceRecord sourceRecord() {
return sourceRecord;
}
@Override
public String toString() {
return "EmbeddedEngineChangeEvent [key=" + key + ", value=" + value + ", sourceRecord=" + sourceRecord + "]";
}
private final K key;
private final V value;
private final SourceRecord sourceRecord;
public EmbeddedEngineChangeEvent(K key, V value, SourceRecord sourceRecord) {
this.key = key;
this.value = value;
this.sourceRecord = sourceRecord;
}
@Override
public K key() {
return key;
}
@Override
public V value() {
return value;
}
@Override
public V record() {
return value;
}
@Override
public String destination() {
return sourceRecord.topic();
}
public SourceRecord sourceRecord() {
return sourceRecord;
}
@Override
public String toString() {
return "EmbeddedEngineChangeEvent [key="
+ key
+ ", value="
+ value
+ ", sourceRecord="
+ sourceRecord
+ "]";
}
}

@ -30,208 +30,194 @@ import java.util.Properties;
import static org.apache.flink.util.Preconditions.checkNotNull;
/**
* A builder to build a SourceFunction which can read snapshot and continue to consume binlog.
*/
/** A builder to build a SourceFunction which can read snapshot and continue to consume binlog. */
public class MySQLSource {
private static final String DATABASE_SERVER_NAME = "mysql_binlog_source";
public static <T> Builder<T> builder() {
return new Builder<>();
}
/**
* Builder class of {@link MySQLSource}.
*/
public static class Builder<T> {
private int port = 3306; // default 3306 port
private String hostname;
private String[] databaseList;
private String username;
private String password;
private Integer serverId;
private String serverTimeZone;
private String[] tableList;
private Properties dbzProperties;
private StartupOptions startupOptions = StartupOptions.initial();
private DebeziumDeserializationSchema<T> deserializer;
public Builder<T> hostname(String hostname) {
this.hostname = hostname;
return this;
}
/**
* Integer port number of the MySQL database server.
*/
public Builder<T> port(int port) {
this.port = port;
return this;
}
/**
* An optional list of regular expressions that match database names to be monitored;
* any database name not included in the whitelist will be excluded from monitoring.
* By default all databases will be monitored.
*/
public Builder<T> databaseList(String... databaseList) {
this.databaseList = databaseList;
return this;
}
/**
* An optional list of regular expressions that match fully-qualified table identifiers
* for tables to be monitored; any table not included in the list will be excluded from
* monitoring. Each identifier is of the form databaseName.tableName.
* By default the connector will monitor every non-system table in each monitored database.
*/
public Builder<T> tableList(String... tableList) {
this.tableList = tableList;
return this;
}
/**
* Name of the MySQL database to use when connecting to the MySQL database server.
*/
public Builder<T> username(String username) {
this.username = username;
return this;
}
/**
* Password to use when connecting to the MySQL database server.
*/
public Builder<T> password(String password) {
this.password = password;
return this;
}
/**
* The session time zone in database server, e.g. "America/Los_Angeles".
* It controls how the TIMESTAMP type in MYSQL converted to STRING.
* See more https://debezium.io/documentation/reference/1.2/connectors/mysql.html#_temporal_values
*/
public Builder<T> serverTimeZone(String timeZone) {
this.serverTimeZone = timeZone;
return this;
}
/**
* A numeric ID of this database client, which must be unique across all currently-running
* database processes in the MySQL cluster. This connector joins the MySQL database cluster
* as another server (with this unique ID) so it can read the binlog. By default, a random
* number is generated between 5400 and 6400, though we recommend setting an explicit value.
*/
public Builder<T> serverId(int serverId) {
this.serverId = serverId;
return this;
}
/**
* The Debezium MySQL connector properties. For example, "snapshot.mode".
*/
public Builder<T> debeziumProperties(Properties properties) {
this.dbzProperties = properties;
return this;
}
/**
* The deserializer used to convert from consumed {@link org.apache.kafka.connect.source.SourceRecord}.
*/
public Builder<T> deserializer(DebeziumDeserializationSchema<T> deserializer) {
this.deserializer = deserializer;
return this;
}
/**
* Specifies the startup options.
*/
public Builder<T> startupOptions(StartupOptions startupOptions) {
this.startupOptions = startupOptions;
return this;
}
public DebeziumSourceFunction<T> build() {
Properties props = new Properties();
props.setProperty("connector.class", MySqlConnector.class.getCanonicalName());
// hard code server name, because we don't need to distinguish it, docs:
// Logical name that identifies and provides a namespace for the particular MySQL database
// server/cluster being monitored. The logical name should be unique across all other connectors,
// since it is used as a prefix for all Kafka topic names emanating from this connector.
// Only alphanumeric characters and underscores should be used.
props.setProperty("database.server.name", DATABASE_SERVER_NAME);
props.setProperty("database.hostname", checkNotNull(hostname));
props.setProperty("database.user", checkNotNull(username));
props.setProperty("database.password", checkNotNull(password));
props.setProperty("database.port", String.valueOf(port));
props.setProperty("database.history.skip.unparseable.ddl", String.valueOf(true));
if (serverId != null) {
props.setProperty("database.server.id", String.valueOf(serverId));
}
if (databaseList != null) {
props.setProperty("database.whitelist", String.join(",", databaseList));
}
if (tableList != null) {
props.setProperty("table.whitelist", String.join(",", tableList));
}
if (serverTimeZone != null) {
props.setProperty("database.serverTimezone", serverTimeZone);
}
DebeziumOffset specificOffset = null;
switch (startupOptions.startupMode) {
case INITIAL:
props.setProperty("snapshot.mode", "initial");
break;
case EARLIEST_OFFSET:
props.setProperty("snapshot.mode", "never");
break;
case LATEST_OFFSET:
props.setProperty("snapshot.mode", "schema_only");
break;
case SPECIFIC_OFFSETS:
// if binlog offset is specified, 'snapshot.mode=schema_only_recovery' must
// be configured. It only snapshots the schemas, not the data,
// and continue binlog reading from the specified offset
props.setProperty("snapshot.mode", "schema_only_recovery");
specificOffset = new DebeziumOffset();
Map<String, String> sourcePartition = new HashMap<>();
sourcePartition.put("server", DATABASE_SERVER_NAME);
specificOffset.setSourcePartition(sourcePartition);
Map<String, Object> sourceOffset = new HashMap<>();
sourceOffset.put("file", startupOptions.specificOffsetFile);
sourceOffset.put("pos", startupOptions.specificOffsetPos);
specificOffset.setSourceOffset(sourceOffset);
break;
case TIMESTAMP:
checkNotNull(deserializer);
props.setProperty("snapshot.mode", "never");
deserializer = new SeekBinlogToTimestampFilter<>(
startupOptions.startupTimestampMillis,
deserializer);
break;
default:
throw new UnsupportedOperationException();
}
if (dbzProperties != null) {
dbzProperties.forEach(props::put);
}
return new DebeziumSourceFunction<>(
deserializer,
props,
specificOffset);
}
}
private static final String DATABASE_SERVER_NAME = "mysql_binlog_source";
public static <T> Builder<T> builder() {
return new Builder<>();
}
/** Builder class of {@link MySQLSource}. */
public static class Builder<T> {
private int port = 3306; // default 3306 port
private String hostname;
private String[] databaseList;
private String username;
private String password;
private Integer serverId;
private String serverTimeZone;
private String[] tableList;
private Properties dbzProperties;
private StartupOptions startupOptions = StartupOptions.initial();
private DebeziumDeserializationSchema<T> deserializer;
public Builder<T> hostname(String hostname) {
this.hostname = hostname;
return this;
}
/** Integer port number of the MySQL database server. */
public Builder<T> port(int port) {
this.port = port;
return this;
}
/**
* An optional list of regular expressions that match database names to be monitored; any
* database name not included in the whitelist will be excluded from monitoring. By default
* all databases will be monitored.
*/
public Builder<T> databaseList(String... databaseList) {
this.databaseList = databaseList;
return this;
}
/**
* An optional list of regular expressions that match fully-qualified table identifiers for
* tables to be monitored; any table not included in the list will be excluded from
* monitoring. Each identifier is of the form databaseName.tableName. By default the
* connector will monitor every non-system table in each monitored database.
*/
public Builder<T> tableList(String... tableList) {
this.tableList = tableList;
return this;
}
/** Name of the MySQL database to use when connecting to the MySQL database server. */
public Builder<T> username(String username) {
this.username = username;
return this;
}
/** Password to use when connecting to the MySQL database server. */
public Builder<T> password(String password) {
this.password = password;
return this;
}
/**
* The session time zone in database server, e.g. "America/Los_Angeles". It controls how the
* TIMESTAMP type in MYSQL converted to STRING. See more
* https://debezium.io/documentation/reference/1.2/connectors/mysql.html#_temporal_values
*/
public Builder<T> serverTimeZone(String timeZone) {
this.serverTimeZone = timeZone;
return this;
}
/**
* A numeric ID of this database client, which must be unique across all currently-running
* database processes in the MySQL cluster. This connector joins the MySQL database cluster
* as another server (with this unique ID) so it can read the binlog. By default, a random
* number is generated between 5400 and 6400, though we recommend setting an explicit value.
*/
public Builder<T> serverId(int serverId) {
this.serverId = serverId;
return this;
}
/** The Debezium MySQL connector properties. For example, "snapshot.mode". */
public Builder<T> debeziumProperties(Properties properties) {
this.dbzProperties = properties;
return this;
}
/**
* The deserializer used to convert from consumed {@link
* org.apache.kafka.connect.source.SourceRecord}.
*/
public Builder<T> deserializer(DebeziumDeserializationSchema<T> deserializer) {
this.deserializer = deserializer;
return this;
}
/** Specifies the startup options. */
public Builder<T> startupOptions(StartupOptions startupOptions) {
this.startupOptions = startupOptions;
return this;
}
public DebeziumSourceFunction<T> build() {
Properties props = new Properties();
props.setProperty("connector.class", MySqlConnector.class.getCanonicalName());
// hard code server name, because we don't need to distinguish it, docs:
// Logical name that identifies and provides a namespace for the particular MySQL
// database
// server/cluster being monitored. The logical name should be unique across all other
// connectors,
// since it is used as a prefix for all Kafka topic names emanating from this connector.
// Only alphanumeric characters and underscores should be used.
props.setProperty("database.server.name", DATABASE_SERVER_NAME);
props.setProperty("database.hostname", checkNotNull(hostname));
props.setProperty("database.user", checkNotNull(username));
props.setProperty("database.password", checkNotNull(password));
props.setProperty("database.port", String.valueOf(port));
props.setProperty("database.history.skip.unparseable.ddl", String.valueOf(true));
if (serverId != null) {
props.setProperty("database.server.id", String.valueOf(serverId));
}
if (databaseList != null) {
props.setProperty("database.whitelist", String.join(",", databaseList));
}
if (tableList != null) {
props.setProperty("table.whitelist", String.join(",", tableList));
}
if (serverTimeZone != null) {
props.setProperty("database.serverTimezone", serverTimeZone);
}
DebeziumOffset specificOffset = null;
switch (startupOptions.startupMode) {
case INITIAL:
props.setProperty("snapshot.mode", "initial");
break;
case EARLIEST_OFFSET:
props.setProperty("snapshot.mode", "never");
break;
case LATEST_OFFSET:
props.setProperty("snapshot.mode", "schema_only");
break;
case SPECIFIC_OFFSETS:
// if binlog offset is specified, 'snapshot.mode=schema_only_recovery' must
// be configured. It only snapshots the schemas, not the data,
// and continue binlog reading from the specified offset
props.setProperty("snapshot.mode", "schema_only_recovery");
specificOffset = new DebeziumOffset();
Map<String, String> sourcePartition = new HashMap<>();
sourcePartition.put("server", DATABASE_SERVER_NAME);
specificOffset.setSourcePartition(sourcePartition);
Map<String, Object> sourceOffset = new HashMap<>();
sourceOffset.put("file", startupOptions.specificOffsetFile);
sourceOffset.put("pos", startupOptions.specificOffsetPos);
specificOffset.setSourceOffset(sourceOffset);
break;
case TIMESTAMP:
checkNotNull(deserializer);
props.setProperty("snapshot.mode", "never");
deserializer =
new SeekBinlogToTimestampFilter<>(
startupOptions.startupTimestampMillis, deserializer);
break;
default:
throw new UnsupportedOperationException();
}
if (dbzProperties != null) {
dbzProperties.forEach(props::put);
}
return new DebeziumSourceFunction<>(deserializer, props, specificOffset);
}
}
}

@ -33,50 +33,54 @@ import org.slf4j.LoggerFactory;
* to seek binlog to the specific timestamp.
*/
public class SeekBinlogToTimestampFilter<T> implements DebeziumDeserializationSchema<T> {
private static final long serialVersionUID = -4450118969976653497L;
protected static final Logger LOG = LoggerFactory.getLogger(SeekBinlogToTimestampFilter.class);
private static final long serialVersionUID = -4450118969976653497L;
protected static final Logger LOG = LoggerFactory.getLogger(SeekBinlogToTimestampFilter.class);
private final long startupTimestampMillis;
private final DebeziumDeserializationSchema<T> serializer;
private final long startupTimestampMillis;
private final DebeziumDeserializationSchema<T> serializer;
private transient boolean find = false;
private transient long filtered = 0L;
private transient boolean find = false;
private transient long filtered = 0L;
public SeekBinlogToTimestampFilter(long startupTimestampMillis, DebeziumDeserializationSchema<T> serializer) {
this.startupTimestampMillis = startupTimestampMillis;
this.serializer = serializer;
}
public SeekBinlogToTimestampFilter(
long startupTimestampMillis, DebeziumDeserializationSchema<T> serializer) {
this.startupTimestampMillis = startupTimestampMillis;
this.serializer = serializer;
}
@Override
public void deserialize(SourceRecord record, Collector<T> out) throws Exception {
if (find) {
serializer.deserialize(record, out);
return;
}
@Override
public void deserialize(SourceRecord record, Collector<T> out) throws Exception {
if (find) {
serializer.deserialize(record, out);
return;
}
if (filtered == 0) {
LOG.info("Begin to seek binlog to the specific timestamp {}.", startupTimestampMillis);
}
if (filtered == 0) {
LOG.info("Begin to seek binlog to the specific timestamp {}.", startupTimestampMillis);
}
Struct value = (Struct) record.value();
Struct source = value.getStruct(Envelope.FieldName.SOURCE);
Long ts = source.getInt64(Envelope.FieldName.TIMESTAMP);
if (ts != null && ts >= startupTimestampMillis) {
serializer.deserialize(record, out);
find = true;
LOG.info("Successfully seek to the specific timestamp {} with filtered {} change events.",
startupTimestampMillis,
filtered);
} else {
filtered++;
if (filtered % 10000 == 0) {
LOG.info("Seeking binlog to specific timestamp with filtered {} change events.", filtered);
}
}
}
Struct value = (Struct) record.value();
Struct source = value.getStruct(Envelope.FieldName.SOURCE);
Long ts = source.getInt64(Envelope.FieldName.TIMESTAMP);
if (ts != null && ts >= startupTimestampMillis) {
serializer.deserialize(record, out);
find = true;
LOG.info(
"Successfully seek to the specific timestamp {} with filtered {} change events.",
startupTimestampMillis,
filtered);
} else {
filtered++;
if (filtered % 10000 == 0) {
LOG.info(
"Seeking binlog to specific timestamp with filtered {} change events.",
filtered);
}
}
}
@Override
public TypeInformation<T> getProducedType() {
return serializer.getProducedType();
}
@Override
public TypeInformation<T> getProducedType() {
return serializer.getProducedType();
}
}

@ -48,125 +48,135 @@ import static org.apache.flink.util.Preconditions.checkNotNull;
*/
public class MySQLTableSource implements ScanTableSource {
private final TableSchema physicalSchema;
private final int port;
private final String hostname;
private final String database;
private final String username;
private final String password;
private final Integer serverId;
private final String tableName;
private final ZoneId serverTimeZone;
private final Properties dbzProperties;
private final StartupOptions startupOptions;
public MySQLTableSource(
TableSchema physicalSchema,
int port,
String hostname,
String database,
String tableName,
String username,
String password,
ZoneId serverTimeZone,
Properties dbzProperties,
@Nullable Integer serverId,
StartupOptions startupOptions) {
this.physicalSchema = physicalSchema;
this.port = port;
this.hostname = checkNotNull(hostname);
this.database = checkNotNull(database);
this.tableName = checkNotNull(tableName);
this.username = checkNotNull(username);
this.password = checkNotNull(password);
this.serverId = serverId;
this.serverTimeZone = serverTimeZone;
this.dbzProperties = dbzProperties;
this.startupOptions = startupOptions;
}
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.newBuilder()
.addContainedKind(RowKind.INSERT)
.addContainedKind(RowKind.UPDATE_BEFORE)
.addContainedKind(RowKind.UPDATE_AFTER)
.addContainedKind(RowKind.DELETE)
.build();
}
@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
RowType rowType = (RowType) physicalSchema.toRowDataType().getLogicalType();
TypeInformation<RowData> typeInfo = scanContext.createTypeInformation(physicalSchema.toRowDataType());
DebeziumDeserializationSchema<RowData> deserializer = new RowDataDebeziumDeserializeSchema(
rowType,
typeInfo,
((rowData, rowKind) -> {}),
serverTimeZone);
MySQLSource.Builder<RowData> builder = MySQLSource.<RowData>builder()
.hostname(hostname)
.port(port)
.databaseList(database)
.tableList(database + "." + tableName)
.username(username)
.password(password)
.serverTimeZone(serverTimeZone.toString())
.debeziumProperties(dbzProperties)
.startupOptions(startupOptions)
.deserializer(deserializer);
Optional.ofNullable(serverId).ifPresent(builder::serverId);
DebeziumSourceFunction<RowData> sourceFunction = builder.build();
return SourceFunctionProvider.of(sourceFunction, false);
}
@Override
public DynamicTableSource copy() {
return new MySQLTableSource(
physicalSchema,
port,
hostname,
database,
tableName,
username,
password,
serverTimeZone,
dbzProperties,
serverId,
startupOptions
);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
MySQLTableSource that = (MySQLTableSource) o;
return port == that.port &&
Objects.equals(physicalSchema, that.physicalSchema) &&
Objects.equals(hostname, that.hostname) &&
Objects.equals(database, that.database) &&
Objects.equals(username, that.username) &&
Objects.equals(password, that.password) &&
Objects.equals(serverId, that.serverId) &&
Objects.equals(tableName, that.tableName) &&
Objects.equals(serverTimeZone, that.serverTimeZone) &&
Objects.equals(dbzProperties, that.dbzProperties) &&
Objects.equals(startupOptions, that.startupOptions);
}
@Override
public int hashCode() {
return Objects.hash(physicalSchema, port, hostname, database, username, password, serverId, tableName, serverTimeZone, dbzProperties, startupOptions);
}
@Override
public String asSummaryString() {
return "MySQL-CDC";
}
private final TableSchema physicalSchema;
private final int port;
private final String hostname;
private final String database;
private final String username;
private final String password;
private final Integer serverId;
private final String tableName;
private final ZoneId serverTimeZone;
private final Properties dbzProperties;
private final StartupOptions startupOptions;
public MySQLTableSource(
TableSchema physicalSchema,
int port,
String hostname,
String database,
String tableName,
String username,
String password,
ZoneId serverTimeZone,
Properties dbzProperties,
@Nullable Integer serverId,
StartupOptions startupOptions) {
this.physicalSchema = physicalSchema;
this.port = port;
this.hostname = checkNotNull(hostname);
this.database = checkNotNull(database);
this.tableName = checkNotNull(tableName);
this.username = checkNotNull(username);
this.password = checkNotNull(password);
this.serverId = serverId;
this.serverTimeZone = serverTimeZone;
this.dbzProperties = dbzProperties;
this.startupOptions = startupOptions;
}
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.newBuilder()
.addContainedKind(RowKind.INSERT)
.addContainedKind(RowKind.UPDATE_BEFORE)
.addContainedKind(RowKind.UPDATE_AFTER)
.addContainedKind(RowKind.DELETE)
.build();
}
@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
RowType rowType = (RowType) physicalSchema.toRowDataType().getLogicalType();
TypeInformation<RowData> typeInfo =
scanContext.createTypeInformation(physicalSchema.toRowDataType());
DebeziumDeserializationSchema<RowData> deserializer =
new RowDataDebeziumDeserializeSchema(
rowType, typeInfo, ((rowData, rowKind) -> {}), serverTimeZone);
MySQLSource.Builder<RowData> builder =
MySQLSource.<RowData>builder()
.hostname(hostname)
.port(port)
.databaseList(database)
.tableList(database + "." + tableName)
.username(username)
.password(password)
.serverTimeZone(serverTimeZone.toString())
.debeziumProperties(dbzProperties)
.startupOptions(startupOptions)
.deserializer(deserializer);
Optional.ofNullable(serverId).ifPresent(builder::serverId);
DebeziumSourceFunction<RowData> sourceFunction = builder.build();
return SourceFunctionProvider.of(sourceFunction, false);
}
@Override
public DynamicTableSource copy() {
return new MySQLTableSource(
physicalSchema,
port,
hostname,
database,
tableName,
username,
password,
serverTimeZone,
dbzProperties,
serverId,
startupOptions);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
MySQLTableSource that = (MySQLTableSource) o;
return port == that.port
&& Objects.equals(physicalSchema, that.physicalSchema)
&& Objects.equals(hostname, that.hostname)
&& Objects.equals(database, that.database)
&& Objects.equals(username, that.username)
&& Objects.equals(password, that.password)
&& Objects.equals(serverId, that.serverId)
&& Objects.equals(tableName, that.tableName)
&& Objects.equals(serverTimeZone, that.serverTimeZone)
&& Objects.equals(dbzProperties, that.dbzProperties)
&& Objects.equals(startupOptions, that.startupOptions);
}
@Override
public int hashCode() {
return Objects.hash(
physicalSchema,
port,
hostname,
database,
username,
password,
serverId,
tableName,
serverTimeZone,
dbzProperties,
startupOptions);
}
@Override
public String asSummaryString() {
return "MySQL-CDC";
}
}

@ -36,196 +36,208 @@ import java.util.Set;
import static com.alibaba.ververica.cdc.debezium.table.DebeziumOptions.getDebeziumProperties;
/**
* Factory for creating configured instance of {@link MySQLTableSource}.
*/
/** Factory for creating configured instance of {@link MySQLTableSource}. */
public class MySQLTableSourceFactory implements DynamicTableSourceFactory {
private static final String IDENTIFIER = "mysql-cdc";
private static final ConfigOption<String> HOSTNAME = ConfigOptions.key("hostname")
.stringType()
.noDefaultValue()
.withDescription("IP address or hostname of the MySQL database server.");
private static final ConfigOption<Integer> PORT = ConfigOptions.key("port")
.intType()
.defaultValue(3306)
.withDescription("Integer port number of the MySQL database server.");
private static final ConfigOption<String> USERNAME = ConfigOptions.key("username")
.stringType()
.noDefaultValue()
.withDescription("Name of the MySQL database to use when connecting to the MySQL database server.");
private static final ConfigOption<String> PASSWORD = ConfigOptions.key("password")
.stringType()
.noDefaultValue()
.withDescription("Password to use when connecting to the MySQL database server.");
private static final ConfigOption<String> DATABASE_NAME = ConfigOptions.key("database-name")
.stringType()
.noDefaultValue()
.withDescription("Database name of the MySQL server to monitor.");
private static final ConfigOption<String> TABLE_NAME = ConfigOptions.key("table-name")
.stringType()
.noDefaultValue()
.withDescription("Table name of the MySQL database to monitor.");
private static final ConfigOption<String> SERVER_TIME_ZONE = ConfigOptions.key("server-time-zone")
.stringType()
.defaultValue("UTC")
.withDescription("The session time zone in database server.");
private static final ConfigOption<Integer> SERVER_ID = ConfigOptions.key("server-id")
.intType()
.noDefaultValue()
.withDescription("A numeric ID of this database client, which must be unique across all " +
"currently-running database processes in the MySQL cluster. This connector joins the " +
"MySQL database cluster as another server (with this unique ID) so it can read the binlog. " +
"By default, a random number is generated between 5400 and 6400, though we recommend setting an explicit value.");
public static final ConfigOption<String> SCAN_STARTUP_MODE =
ConfigOptions.key("scan.startup.mode")
.stringType()
.defaultValue("initial")
.withDescription(
"Optional startup mode for Kafka consumer, valid enumerations are "
+ "\"initial\", \"earliest-offset\", \"latest-offset\", \"timestamp\"\n"
+ "or \"specific-offset\"");
public static final ConfigOption<String> SCAN_STARTUP_SPECIFIC_OFFSET_FILE =
ConfigOptions.key("scan.startup.specific-offset.file")
.stringType()
.noDefaultValue()
.withDescription(
"Optional offsets used in case of \"specific-offset\" startup mode");
public static final ConfigOption<Integer> SCAN_STARTUP_SPECIFIC_OFFSET_POS =
ConfigOptions.key("scan.startup.specific-offset.pos")
.intType()
.noDefaultValue()
.withDescription(
"Optional offsets used in case of \"specific-offset\" startup mode");
public static final ConfigOption<Long> SCAN_STARTUP_TIMESTAMP_MILLIS =
ConfigOptions.key("scan.startup.timestamp-millis")
.longType()
.noDefaultValue()
.withDescription(
"Optional timestamp used in case of \"timestamp\" startup mode");
private static final ConfigOption<String> SOURCE_OFFSET_FILE = ConfigOptions.key("source-offset-file")
.stringType()
.noDefaultValue()
.withDescription("File Name of the MySQL binlog.");
private static final ConfigOption<Integer> SOURCE_OFFSET_POSITION = ConfigOptions.key("source-offset-pos")
.intType()
.noDefaultValue()
.withDescription("Position of the MySQL binlog.");
@Override
public DynamicTableSource createDynamicTableSource(Context context) {
final FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
helper.validateExcept(DebeziumOptions.DEBEZIUM_OPTIONS_PREFIX);
final ReadableConfig config = helper.getOptions();
String hostname = config.get(HOSTNAME);
String username = config.get(USERNAME);
String password = config.get(PASSWORD);
String databaseName = config.get(DATABASE_NAME);
String tableName = config.get(TABLE_NAME);
int port = config.get(PORT);
Integer serverId = config.getOptional(SERVER_ID).orElse(null);
ZoneId serverTimeZone = ZoneId.of(config.get(SERVER_TIME_ZONE));
StartupOptions startupOptions = getStartupOptions(config);
TableSchema physicalSchema = TableSchemaUtils.getPhysicalSchema(context.getCatalogTable().getSchema());
return new MySQLTableSource(
physicalSchema,
port,
hostname,
databaseName,
tableName,
username,
password,
serverTimeZone,
getDebeziumProperties(context.getCatalogTable().getOptions()),
serverId,
startupOptions
);
}
@Override
public String factoryIdentifier() {
return IDENTIFIER;
}
@Override
public Set<ConfigOption<?>> requiredOptions() {
Set<ConfigOption<?>> options = new HashSet<>();
options.add(HOSTNAME);
options.add(USERNAME);
options.add(PASSWORD);
options.add(DATABASE_NAME);
options.add(TABLE_NAME);
return options;
}
@Override
public Set<ConfigOption<?>> optionalOptions() {
Set<ConfigOption<?>> options = new HashSet<>();
options.add(PORT);
options.add(SERVER_TIME_ZONE);
options.add(SERVER_ID);
options.add(SCAN_STARTUP_MODE);
options.add(SCAN_STARTUP_SPECIFIC_OFFSET_FILE);
options.add(SCAN_STARTUP_SPECIFIC_OFFSET_POS);
options.add(SCAN_STARTUP_TIMESTAMP_MILLIS);
return options;
}
private static final String SCAN_STARTUP_MODE_VALUE_INITIAL = "initial";
private static final String SCAN_STARTUP_MODE_VALUE_EARLIEST = "earliest-offset";
private static final String SCAN_STARTUP_MODE_VALUE_LATEST = "latest-offset";
private static final String SCAN_STARTUP_MODE_VALUE_SPECIFIC_OFFSET = "specific-offset";
private static final String SCAN_STARTUP_MODE_VALUE_TIMESTAMP = "timestamp";
private static StartupOptions getStartupOptions(ReadableConfig config) {
String modeString = config.get(SCAN_STARTUP_MODE);
switch (modeString.toLowerCase()) {
case SCAN_STARTUP_MODE_VALUE_INITIAL:
return StartupOptions.initial();
case SCAN_STARTUP_MODE_VALUE_EARLIEST:
return StartupOptions.earliest();
case SCAN_STARTUP_MODE_VALUE_LATEST:
return StartupOptions.latest();
case SCAN_STARTUP_MODE_VALUE_SPECIFIC_OFFSET:
String offsetFile = config.get(SCAN_STARTUP_SPECIFIC_OFFSET_FILE);
int offsetPos = config.get(SCAN_STARTUP_SPECIFIC_OFFSET_POS);
return StartupOptions.specificOffset(offsetFile, offsetPos);
case SCAN_STARTUP_MODE_VALUE_TIMESTAMP:
long millis = config.get(SCAN_STARTUP_TIMESTAMP_MILLIS);
return StartupOptions.timestamp(millis);
default:
throw new ValidationException(
String.format(
"Invalid value for option '%s'. Supported values are [%s, %s, %s, %s, %s], but was: %s",
SCAN_STARTUP_MODE.key(),
SCAN_STARTUP_MODE_VALUE_INITIAL,
SCAN_STARTUP_MODE_VALUE_EARLIEST,
SCAN_STARTUP_MODE_VALUE_LATEST,
SCAN_STARTUP_MODE_VALUE_SPECIFIC_OFFSET,
SCAN_STARTUP_MODE_VALUE_TIMESTAMP,
modeString));
}
}
private static final String IDENTIFIER = "mysql-cdc";
private static final ConfigOption<String> HOSTNAME =
ConfigOptions.key("hostname")
.stringType()
.noDefaultValue()
.withDescription("IP address or hostname of the MySQL database server.");
private static final ConfigOption<Integer> PORT =
ConfigOptions.key("port")
.intType()
.defaultValue(3306)
.withDescription("Integer port number of the MySQL database server.");
private static final ConfigOption<String> USERNAME =
ConfigOptions.key("username")
.stringType()
.noDefaultValue()
.withDescription(
"Name of the MySQL database to use when connecting to the MySQL database server.");
private static final ConfigOption<String> PASSWORD =
ConfigOptions.key("password")
.stringType()
.noDefaultValue()
.withDescription(
"Password to use when connecting to the MySQL database server.");
private static final ConfigOption<String> DATABASE_NAME =
ConfigOptions.key("database-name")
.stringType()
.noDefaultValue()
.withDescription("Database name of the MySQL server to monitor.");
private static final ConfigOption<String> TABLE_NAME =
ConfigOptions.key("table-name")
.stringType()
.noDefaultValue()
.withDescription("Table name of the MySQL database to monitor.");
private static final ConfigOption<String> SERVER_TIME_ZONE =
ConfigOptions.key("server-time-zone")
.stringType()
.defaultValue("UTC")
.withDescription("The session time zone in database server.");
private static final ConfigOption<Integer> SERVER_ID =
ConfigOptions.key("server-id")
.intType()
.noDefaultValue()
.withDescription(
"A numeric ID of this database client, which must be unique across all "
+ "currently-running database processes in the MySQL cluster. This connector joins the "
+ "MySQL database cluster as another server (with this unique ID) so it can read the binlog. "
+ "By default, a random number is generated between 5400 and 6400, though we recommend setting an explicit value.");
public static final ConfigOption<String> SCAN_STARTUP_MODE =
ConfigOptions.key("scan.startup.mode")
.stringType()
.defaultValue("initial")
.withDescription(
"Optional startup mode for Kafka consumer, valid enumerations are "
+ "\"initial\", \"earliest-offset\", \"latest-offset\", \"timestamp\"\n"
+ "or \"specific-offset\"");
public static final ConfigOption<String> SCAN_STARTUP_SPECIFIC_OFFSET_FILE =
ConfigOptions.key("scan.startup.specific-offset.file")
.stringType()
.noDefaultValue()
.withDescription(
"Optional offsets used in case of \"specific-offset\" startup mode");
public static final ConfigOption<Integer> SCAN_STARTUP_SPECIFIC_OFFSET_POS =
ConfigOptions.key("scan.startup.specific-offset.pos")
.intType()
.noDefaultValue()
.withDescription(
"Optional offsets used in case of \"specific-offset\" startup mode");
public static final ConfigOption<Long> SCAN_STARTUP_TIMESTAMP_MILLIS =
ConfigOptions.key("scan.startup.timestamp-millis")
.longType()
.noDefaultValue()
.withDescription(
"Optional timestamp used in case of \"timestamp\" startup mode");
private static final ConfigOption<String> SOURCE_OFFSET_FILE =
ConfigOptions.key("source-offset-file")
.stringType()
.noDefaultValue()
.withDescription("File Name of the MySQL binlog.");
private static final ConfigOption<Integer> SOURCE_OFFSET_POSITION =
ConfigOptions.key("source-offset-pos")
.intType()
.noDefaultValue()
.withDescription("Position of the MySQL binlog.");
@Override
public DynamicTableSource createDynamicTableSource(Context context) {
final FactoryUtil.TableFactoryHelper helper =
FactoryUtil.createTableFactoryHelper(this, context);
helper.validateExcept(DebeziumOptions.DEBEZIUM_OPTIONS_PREFIX);
final ReadableConfig config = helper.getOptions();
String hostname = config.get(HOSTNAME);
String username = config.get(USERNAME);
String password = config.get(PASSWORD);
String databaseName = config.get(DATABASE_NAME);
String tableName = config.get(TABLE_NAME);
int port = config.get(PORT);
Integer serverId = config.getOptional(SERVER_ID).orElse(null);
ZoneId serverTimeZone = ZoneId.of(config.get(SERVER_TIME_ZONE));
StartupOptions startupOptions = getStartupOptions(config);
TableSchema physicalSchema =
TableSchemaUtils.getPhysicalSchema(context.getCatalogTable().getSchema());
return new MySQLTableSource(
physicalSchema,
port,
hostname,
databaseName,
tableName,
username,
password,
serverTimeZone,
getDebeziumProperties(context.getCatalogTable().getOptions()),
serverId,
startupOptions);
}
@Override
public String factoryIdentifier() {
return IDENTIFIER;
}
@Override
public Set<ConfigOption<?>> requiredOptions() {
Set<ConfigOption<?>> options = new HashSet<>();
options.add(HOSTNAME);
options.add(USERNAME);
options.add(PASSWORD);
options.add(DATABASE_NAME);
options.add(TABLE_NAME);
return options;
}
@Override
public Set<ConfigOption<?>> optionalOptions() {
Set<ConfigOption<?>> options = new HashSet<>();
options.add(PORT);
options.add(SERVER_TIME_ZONE);
options.add(SERVER_ID);
options.add(SCAN_STARTUP_MODE);
options.add(SCAN_STARTUP_SPECIFIC_OFFSET_FILE);
options.add(SCAN_STARTUP_SPECIFIC_OFFSET_POS);
options.add(SCAN_STARTUP_TIMESTAMP_MILLIS);
return options;
}
private static final String SCAN_STARTUP_MODE_VALUE_INITIAL = "initial";
private static final String SCAN_STARTUP_MODE_VALUE_EARLIEST = "earliest-offset";
private static final String SCAN_STARTUP_MODE_VALUE_LATEST = "latest-offset";
private static final String SCAN_STARTUP_MODE_VALUE_SPECIFIC_OFFSET = "specific-offset";
private static final String SCAN_STARTUP_MODE_VALUE_TIMESTAMP = "timestamp";
private static StartupOptions getStartupOptions(ReadableConfig config) {
String modeString = config.get(SCAN_STARTUP_MODE);
switch (modeString.toLowerCase()) {
case SCAN_STARTUP_MODE_VALUE_INITIAL:
return StartupOptions.initial();
case SCAN_STARTUP_MODE_VALUE_EARLIEST:
return StartupOptions.earliest();
case SCAN_STARTUP_MODE_VALUE_LATEST:
return StartupOptions.latest();
case SCAN_STARTUP_MODE_VALUE_SPECIFIC_OFFSET:
String offsetFile = config.get(SCAN_STARTUP_SPECIFIC_OFFSET_FILE);
int offsetPos = config.get(SCAN_STARTUP_SPECIFIC_OFFSET_POS);
return StartupOptions.specificOffset(offsetFile, offsetPos);
case SCAN_STARTUP_MODE_VALUE_TIMESTAMP:
long millis = config.get(SCAN_STARTUP_TIMESTAMP_MILLIS);
return StartupOptions.timestamp(millis);
default:
throw new ValidationException(
String.format(
"Invalid value for option '%s'. Supported values are [%s, %s, %s, %s, %s], but was: %s",
SCAN_STARTUP_MODE.key(),
SCAN_STARTUP_MODE_VALUE_INITIAL,
SCAN_STARTUP_MODE_VALUE_EARLIEST,
SCAN_STARTUP_MODE_VALUE_LATEST,
SCAN_STARTUP_MODE_VALUE_SPECIFIC_OFFSET,
SCAN_STARTUP_MODE_VALUE_TIMESTAMP,
modeString));
}
}
}

@ -24,15 +24,13 @@ package com.alibaba.ververica.cdc.connectors.mysql.table;
* @see StartupOptions
*/
public enum StartupMode {
INITIAL,
INITIAL,
EARLIEST_OFFSET,
EARLIEST_OFFSET,
LATEST_OFFSET,
LATEST_OFFSET,
SPECIFIC_OFFSETS,
TIMESTAMP
SPECIFIC_OFFSETS,
TIMESTAMP
}

@ -22,107 +22,105 @@ import java.util.Objects;
import static org.apache.flink.util.Preconditions.checkNotNull;
/**
* Debezium startup options.
*/
/** Debezium startup options. */
public final class StartupOptions {
public final StartupMode startupMode;
public final String specificOffsetFile;
public final Integer specificOffsetPos;
public final Long startupTimestampMillis;
public final StartupMode startupMode;
public final String specificOffsetFile;
public final Integer specificOffsetPos;
public final Long startupTimestampMillis;
/**
* Performs an initial snapshot on the monitored database tables upon first startup,
* and continue to read the latest binlog.
*/
public static StartupOptions initial() {
return new StartupOptions(StartupMode.INITIAL, null, null, null);
}
/**
* Performs an initial snapshot on the monitored database tables upon first startup, and
* continue to read the latest binlog.
*/
public static StartupOptions initial() {
return new StartupOptions(StartupMode.INITIAL, null, null, null);
}
/**
* Never to perform snapshot on the monitored database tables upon first startup,
* just read from the beginning of the binlog.
* This should be used with care, as it is only valid when the binlog is guaranteed to contain
* the entire history of the database.
*/
public static StartupOptions earliest() {
return new StartupOptions(StartupMode.EARLIEST_OFFSET, null, null, null);
}
/**
* Never to perform snapshot on the monitored database tables upon first startup, just read from
* the beginning of the binlog. This should be used with care, as it is only valid when the
* binlog is guaranteed to contain the entire history of the database.
*/
public static StartupOptions earliest() {
return new StartupOptions(StartupMode.EARLIEST_OFFSET, null, null, null);
}
/**
* Never to perform snapshot on the monitored database tables upon first startup,
* just read from the end of the binlog which means only have the changes since the connector
* was started.
*/
public static StartupOptions latest() {
return new StartupOptions(StartupMode.LATEST_OFFSET, null, null, null);
}
/**
* Never to perform snapshot on the monitored database tables upon first startup, just read from
* the end of the binlog which means only have the changes since the connector was started.
*/
public static StartupOptions latest() {
return new StartupOptions(StartupMode.LATEST_OFFSET, null, null, null);
}
/**
* Never to perform snapshot on the monitored database tables upon first startup,
* and directly read binlog from the specified offset.
*/
public static StartupOptions specificOffset(String specificOffsetFile, int specificOffsetPos) {
return new StartupOptions(StartupMode.SPECIFIC_OFFSETS, specificOffsetFile, specificOffsetPos, null);
}
/**
* Never to perform snapshot on the monitored database tables upon first startup, and directly
* read binlog from the specified offset.
*/
public static StartupOptions specificOffset(String specificOffsetFile, int specificOffsetPos) {
return new StartupOptions(
StartupMode.SPECIFIC_OFFSETS, specificOffsetFile, specificOffsetPos, null);
}
/**
* Never to perform snapshot on the monitored database tables upon first startup,
* and directly read binlog from the specified timestamp.
*
* <p>The consumer will traverse the binlog from the beginning and ignore change events whose
* timestamp is smaller than the specified timestamp.
*
* @param startupTimestampMillis timestamp for the startup offsets, as milliseconds from epoch.
*/
public static StartupOptions timestamp(long startupTimestampMillis) {
return new StartupOptions(StartupMode.TIMESTAMP, null, null, startupTimestampMillis);
}
/**
* Never to perform snapshot on the monitored database tables upon first startup, and directly
* read binlog from the specified timestamp.
*
* <p>The consumer will traverse the binlog from the beginning and ignore change events whose
* timestamp is smaller than the specified timestamp.
*
* @param startupTimestampMillis timestamp for the startup offsets, as milliseconds from epoch.
*/
public static StartupOptions timestamp(long startupTimestampMillis) {
return new StartupOptions(StartupMode.TIMESTAMP, null, null, startupTimestampMillis);
}
private StartupOptions(
StartupMode startupMode,
String specificOffsetFile,
Integer specificOffsetPos,
Long startupTimestampMillis) {
this.startupMode = startupMode;
this.specificOffsetFile = specificOffsetFile;
this.specificOffsetPos = specificOffsetPos;
this.startupTimestampMillis = startupTimestampMillis;
private StartupOptions(
StartupMode startupMode,
String specificOffsetFile,
Integer specificOffsetPos,
Long startupTimestampMillis) {
this.startupMode = startupMode;
this.specificOffsetFile = specificOffsetFile;
this.specificOffsetPos = specificOffsetPos;
this.startupTimestampMillis = startupTimestampMillis;
switch (startupMode) {
case INITIAL:
case EARLIEST_OFFSET:
case LATEST_OFFSET:
break;
case SPECIFIC_OFFSETS:
checkNotNull(specificOffsetFile, "specificOffsetFile shouldn't be null");
checkNotNull(specificOffsetPos, "specificOffsetPos shouldn't be null");
break;
case TIMESTAMP:
checkNotNull(startupTimestampMillis, "startupTimestampMillis shouldn't be null");
break;
default:
throw new UnsupportedOperationException(startupMode + " mode is not supported.");
}
}
switch (startupMode) {
case INITIAL:
case EARLIEST_OFFSET:
case LATEST_OFFSET:
break;
case SPECIFIC_OFFSETS:
checkNotNull(specificOffsetFile, "specificOffsetFile shouldn't be null");
checkNotNull(specificOffsetPos, "specificOffsetPos shouldn't be null");
break;
case TIMESTAMP:
checkNotNull(startupTimestampMillis, "startupTimestampMillis shouldn't be null");
break;
default:
throw new UnsupportedOperationException(startupMode + " mode is not supported.");
}
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
StartupOptions that = (StartupOptions) o;
return startupMode == that.startupMode &&
Objects.equals(specificOffsetFile, that.specificOffsetFile) &&
Objects.equals(specificOffsetPos, that.specificOffsetPos) &&
Objects.equals(startupTimestampMillis, that.startupTimestampMillis);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
StartupOptions that = (StartupOptions) o;
return startupMode == that.startupMode
&& Objects.equals(specificOffsetFile, that.specificOffsetFile)
&& Objects.equals(specificOffsetPos, that.specificOffsetPos)
&& Objects.equals(startupTimestampMillis, that.startupTimestampMillis);
}
@Override
public int hashCode() {
return Objects.hash(startupMode, specificOffsetFile, specificOffsetPos, startupTimestampMillis);
}
@Override
public int hashCode() {
return Objects.hash(
startupMode, specificOffsetFile, specificOffsetPos, startupTimestampMillis);
}
}

@ -26,33 +26,32 @@ import com.alibaba.ververica.cdc.debezium.StringDebeziumDeserializationSchema;
import org.junit.Ignore;
import org.junit.Test;
/**
* Integration tests for {@link MySQLSource}.
*/
/** Integration tests for {@link MySQLSource}. */
@Ignore
public class MySQLSourceITCase extends MySQLTestBase {
private final UniqueDatabase inventoryDatabase = new UniqueDatabase(
MYSQL_CONTAINER,
"inventory",
"mysqluser",
"mysqlpw");
@Test
public void testConsumingAllEvents() throws Exception {
inventoryDatabase.createAndInitialize();
SourceFunction<String> sourceFunction = MySQLSource.<String>builder()
.hostname(MYSQL_CONTAINER.getHost())
.port(MYSQL_CONTAINER.getDatabasePort())
.databaseList(inventoryDatabase.getDatabaseName()) // monitor all tables under inventory database
.username(inventoryDatabase.getUsername())
.password(inventoryDatabase.getPassword())
.deserializer(new StringDebeziumDeserializationSchema())
.build();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.addSource(sourceFunction).print().setParallelism(1);
env.execute("Print MySQL Snapshot + Binlog");
}
private final UniqueDatabase inventoryDatabase =
new UniqueDatabase(MYSQL_CONTAINER, "inventory", "mysqluser", "mysqlpw");
@Test
public void testConsumingAllEvents() throws Exception {
inventoryDatabase.createAndInitialize();
SourceFunction<String> sourceFunction =
MySQLSource.<String>builder()
.hostname(MYSQL_CONTAINER.getHost())
.port(MYSQL_CONTAINER.getDatabasePort())
.databaseList(
inventoryDatabase
.getDatabaseName()) // monitor all tables under inventory
// database
.username(inventoryDatabase.getUsername())
.password(inventoryDatabase.getPassword())
.deserializer(new StringDebeziumDeserializationSchema())
.build();
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.addSource(sourceFunction).print().setParallelism(1);
env.execute("Print MySQL Snapshot + Binlog");
}
}

@ -33,31 +33,34 @@ import java.sql.SQLException;
import java.util.stream.Stream;
/**
* Basic class for testing MySQL binlog source, this contains a MySQL container which enables binlog.
* Basic class for testing MySQL binlog source, this contains a MySQL container which enables
* binlog.
*/
public abstract class MySQLTestBase extends AbstractTestBase {
private static final Logger LOG = LoggerFactory.getLogger(MySQLTestBase.class);
protected static final MySQLContainer MYSQL_CONTAINER = (MySQLContainer) new MySQLContainer()
.withConfigurationOverride("docker/my.cnf")
.withSetupSQL("docker/setup.sql")
.withDatabaseName("flink-test")
.withUsername("flinkuser")
.withPassword("flinkpw")
.withLogConsumer(new Slf4jLogConsumer(LOG));
@BeforeClass
public static void startContainers() {
LOG.info("Starting containers...");
Startables.deepStart(Stream.of(MYSQL_CONTAINER)).join();
LOG.info("Containers are started.");
}
protected Connection getJdbcConnection() throws SQLException {
return DriverManager.getConnection(
MYSQL_CONTAINER.getJdbcUrl(),
MYSQL_CONTAINER.getUsername(),
MYSQL_CONTAINER.getPassword());
}
private static final Logger LOG = LoggerFactory.getLogger(MySQLTestBase.class);
protected static final MySQLContainer MYSQL_CONTAINER =
(MySQLContainer)
new MySQLContainer()
.withConfigurationOverride("docker/my.cnf")
.withSetupSQL("docker/setup.sql")
.withDatabaseName("flink-test")
.withUsername("flinkuser")
.withPassword("flinkpw")
.withLogConsumer(new Slf4jLogConsumer(LOG));
@BeforeClass
public static void startContainers() {
LOG.info("Starting containers...");
Startables.deepStart(Stream.of(MYSQL_CONTAINER)).join();
LOG.info("Containers are started.");
}
protected Connection getJdbcConnection() throws SQLException {
return DriverManager.getConnection(
MYSQL_CONTAINER.getJdbcUrl(),
MYSQL_CONTAINER.getUsername(),
MYSQL_CONTAINER.getPassword());
}
}

@ -41,259 +41,267 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/**
* Test for {@link MySQLTableSource} created by {@link MySQLTableSourceFactory}.
*/
/** Test for {@link MySQLTableSource} created by {@link MySQLTableSourceFactory}. */
public class MySQLTableSourceFactoryTest {
private static final TableSchema SCHEMA = TableSchema.builder()
.field("aaa", DataTypes.INT().notNull())
.field("bbb", DataTypes.STRING().notNull())
.field("ccc", DataTypes.DOUBLE())
.field("ddd", DataTypes.DECIMAL(31, 18))
.field("eee", DataTypes.TIMESTAMP(3))
.primaryKey("bbb", "aaa")
.build();
private static final String MY_LOCALHOST = "localhost";
private static final String MY_USERNAME = "flinkuser";
private static final String MY_PASSWORD = "flinkpw";
private static final String MY_DATABASE = "myDB";
private static final String MY_TABLE = "myTable";
private static final Properties PROPERTIES = new Properties();
@Test
public void testCommonProperties() {
Map<String, String> properties = getAllOptions();
// validation for source
DynamicTableSource actualSource = createTableSource(properties);
MySQLTableSource expectedSource = new MySQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
3306,
MY_LOCALHOST,
MY_DATABASE,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
ZoneId.of("UTC"),
PROPERTIES,
null,
StartupOptions.initial()
);
assertEquals(expectedSource, actualSource);
}
@Test
public void testOptionalProperties() {
Map<String, String> options = getAllOptions();
options.put("port", "3307");
options.put("server-id", "4321");
options.put("server-time-zone", "Asia/Shanghai");
options.put("debezium.snapshot.mode", "never");
DynamicTableSource actualSource = createTableSource(options);
Properties dbzProperties = new Properties();
dbzProperties.put("snapshot.mode", "never");
MySQLTableSource expectedSource = new MySQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
3307,
MY_LOCALHOST,
MY_DATABASE,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
ZoneId.of("Asia/Shanghai"),
dbzProperties,
4321,
StartupOptions.initial()
);
assertEquals(expectedSource, actualSource);
}
@Test
public void testStartupFromSpecificOffset() {
final String offsetFile = "mysql-bin.000003";
final int offsetPos = 100203;
Map<String, String> options = getAllOptions();
options.put("port", "3307");
options.put("server-id", "4321");
options.put("scan.startup.mode", "specific-offset");
options.put("scan.startup.specific-offset.file", offsetFile);
options.put("scan.startup.specific-offset.pos", String.valueOf(offsetPos));
DynamicTableSource actualSource = createTableSource(options);
MySQLTableSource expectedSource = new MySQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
3307,
MY_LOCALHOST,
MY_DATABASE,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
ZoneId.of("UTC"),
PROPERTIES,
4321,
StartupOptions.specificOffset(offsetFile, offsetPos)
);
assertEquals(expectedSource, actualSource);
}
@Test
public void testStartupFromInitial() {
Map<String, String> properties = getAllOptions();
properties.put("scan.startup.mode", "initial");
// validation for source
DynamicTableSource actualSource = createTableSource(properties);
MySQLTableSource expectedSource = new MySQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
3306,
MY_LOCALHOST,
MY_DATABASE,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
ZoneId.of("UTC"),
PROPERTIES,
null,
StartupOptions.initial()
);
assertEquals(expectedSource, actualSource);
}
@Test
public void testStartupFromEarliestOffset() {
Map<String, String> properties = getAllOptions();
properties.put("scan.startup.mode", "earliest-offset");
// validation for source
DynamicTableSource actualSource = createTableSource(properties);
MySQLTableSource expectedSource = new MySQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
3306,
MY_LOCALHOST,
MY_DATABASE,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
ZoneId.of("UTC"),
PROPERTIES,
null,
StartupOptions.earliest()
);
assertEquals(expectedSource, actualSource);
}
@Test
public void testStartupFromLatestOffset() {
Map<String, String> properties = getAllOptions();
properties.put("scan.startup.mode", "latest-offset");
// validation for source
DynamicTableSource actualSource = createTableSource(properties);
MySQLTableSource expectedSource = new MySQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
3306,
MY_LOCALHOST,
MY_DATABASE,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
ZoneId.of("UTC"),
PROPERTIES,
null,
StartupOptions.latest()
);
assertEquals(expectedSource, actualSource);
}
@Test
public void testValidation() {
// validate illegal port
try {
Map<String, String> properties = getAllOptions();
properties.put("port", "123b");
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(ExceptionUtils.findThrowableWithMessage(t,
"Could not parse value '123b' for key 'port'.").isPresent());
}
// validate illegal server id
try {
Map<String, String> properties = getAllOptions();
properties.put("server-id", "123b");
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(ExceptionUtils.findThrowableWithMessage(t,
"Could not parse value '123b' for key 'server-id'.").isPresent());
}
// validate missing required
Factory factory = new MySQLTableSourceFactory();
for (ConfigOption<?> requiredOption : factory.requiredOptions()) {
Map<String, String> properties = getAllOptions();
properties.remove(requiredOption.key());
try {
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(ExceptionUtils.findThrowableWithMessage(t,
"Missing required options are:\n\n" + requiredOption.key()).isPresent());
}
}
// validate unsupported option
try {
Map<String, String> properties = getAllOptions();
properties.put("unknown", "abc");
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(ExceptionUtils.findThrowableWithMessage(t,
"Unsupported options:\n\nunknown").isPresent());
}
// validate unsupported option
try {
Map<String, String> properties = getAllOptions();
properties.put("scan.startup.mode", "abc");
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
String msg = "Invalid value for option 'scan.startup.mode'. Supported values are " +
"[initial, earliest-offset, latest-offset, specific-offset, timestamp], " +
"but was: abc";
assertTrue(ExceptionUtils.findThrowableWithMessage(t, msg).isPresent());
}
}
private Map<String, String> getAllOptions() {
Map<String, String> options = new HashMap<>();
options.put("connector", "mysql-cdc");
options.put("hostname", MY_LOCALHOST);
options.put("database-name", MY_DATABASE);
options.put("table-name", MY_TABLE);
options.put("username", MY_USERNAME);
options.put("password", MY_PASSWORD);
return options;
}
private static DynamicTableSource createTableSource(Map<String, String> options) {
return FactoryUtil.createTableSource(
null,
ObjectIdentifier.of("default", "default", "t1"),
new CatalogTableImpl(SCHEMA, options, "mock source"),
new Configuration(),
MySQLTableSourceFactoryTest.class.getClassLoader(),
false);
}
private static final TableSchema SCHEMA =
TableSchema.builder()
.field("aaa", DataTypes.INT().notNull())
.field("bbb", DataTypes.STRING().notNull())
.field("ccc", DataTypes.DOUBLE())
.field("ddd", DataTypes.DECIMAL(31, 18))
.field("eee", DataTypes.TIMESTAMP(3))
.primaryKey("bbb", "aaa")
.build();
private static final String MY_LOCALHOST = "localhost";
private static final String MY_USERNAME = "flinkuser";
private static final String MY_PASSWORD = "flinkpw";
private static final String MY_DATABASE = "myDB";
private static final String MY_TABLE = "myTable";
private static final Properties PROPERTIES = new Properties();
@Test
public void testCommonProperties() {
Map<String, String> properties = getAllOptions();
// validation for source
DynamicTableSource actualSource = createTableSource(properties);
MySQLTableSource expectedSource =
new MySQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
3306,
MY_LOCALHOST,
MY_DATABASE,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
ZoneId.of("UTC"),
PROPERTIES,
null,
StartupOptions.initial());
assertEquals(expectedSource, actualSource);
}
@Test
public void testOptionalProperties() {
Map<String, String> options = getAllOptions();
options.put("port", "3307");
options.put("server-id", "4321");
options.put("server-time-zone", "Asia/Shanghai");
options.put("debezium.snapshot.mode", "never");
DynamicTableSource actualSource = createTableSource(options);
Properties dbzProperties = new Properties();
dbzProperties.put("snapshot.mode", "never");
MySQLTableSource expectedSource =
new MySQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
3307,
MY_LOCALHOST,
MY_DATABASE,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
ZoneId.of("Asia/Shanghai"),
dbzProperties,
4321,
StartupOptions.initial());
assertEquals(expectedSource, actualSource);
}
@Test
public void testStartupFromSpecificOffset() {
final String offsetFile = "mysql-bin.000003";
final int offsetPos = 100203;
Map<String, String> options = getAllOptions();
options.put("port", "3307");
options.put("server-id", "4321");
options.put("scan.startup.mode", "specific-offset");
options.put("scan.startup.specific-offset.file", offsetFile);
options.put("scan.startup.specific-offset.pos", String.valueOf(offsetPos));
DynamicTableSource actualSource = createTableSource(options);
MySQLTableSource expectedSource =
new MySQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
3307,
MY_LOCALHOST,
MY_DATABASE,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
ZoneId.of("UTC"),
PROPERTIES,
4321,
StartupOptions.specificOffset(offsetFile, offsetPos));
assertEquals(expectedSource, actualSource);
}
@Test
public void testStartupFromInitial() {
Map<String, String> properties = getAllOptions();
properties.put("scan.startup.mode", "initial");
// validation for source
DynamicTableSource actualSource = createTableSource(properties);
MySQLTableSource expectedSource =
new MySQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
3306,
MY_LOCALHOST,
MY_DATABASE,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
ZoneId.of("UTC"),
PROPERTIES,
null,
StartupOptions.initial());
assertEquals(expectedSource, actualSource);
}
@Test
public void testStartupFromEarliestOffset() {
Map<String, String> properties = getAllOptions();
properties.put("scan.startup.mode", "earliest-offset");
// validation for source
DynamicTableSource actualSource = createTableSource(properties);
MySQLTableSource expectedSource =
new MySQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
3306,
MY_LOCALHOST,
MY_DATABASE,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
ZoneId.of("UTC"),
PROPERTIES,
null,
StartupOptions.earliest());
assertEquals(expectedSource, actualSource);
}
@Test
public void testStartupFromLatestOffset() {
Map<String, String> properties = getAllOptions();
properties.put("scan.startup.mode", "latest-offset");
// validation for source
DynamicTableSource actualSource = createTableSource(properties);
MySQLTableSource expectedSource =
new MySQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
3306,
MY_LOCALHOST,
MY_DATABASE,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
ZoneId.of("UTC"),
PROPERTIES,
null,
StartupOptions.latest());
assertEquals(expectedSource, actualSource);
}
@Test
public void testValidation() {
// validate illegal port
try {
Map<String, String> properties = getAllOptions();
properties.put("port", "123b");
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(
ExceptionUtils.findThrowableWithMessage(
t, "Could not parse value '123b' for key 'port'.")
.isPresent());
}
// validate illegal server id
try {
Map<String, String> properties = getAllOptions();
properties.put("server-id", "123b");
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(
ExceptionUtils.findThrowableWithMessage(
t, "Could not parse value '123b' for key 'server-id'.")
.isPresent());
}
// validate missing required
Factory factory = new MySQLTableSourceFactory();
for (ConfigOption<?> requiredOption : factory.requiredOptions()) {
Map<String, String> properties = getAllOptions();
properties.remove(requiredOption.key());
try {
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(
ExceptionUtils.findThrowableWithMessage(
t,
"Missing required options are:\n\n" + requiredOption.key())
.isPresent());
}
}
// validate unsupported option
try {
Map<String, String> properties = getAllOptions();
properties.put("unknown", "abc");
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(
ExceptionUtils.findThrowableWithMessage(t, "Unsupported options:\n\nunknown")
.isPresent());
}
// validate unsupported option
try {
Map<String, String> properties = getAllOptions();
properties.put("scan.startup.mode", "abc");
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
String msg =
"Invalid value for option 'scan.startup.mode'. Supported values are "
+ "[initial, earliest-offset, latest-offset, specific-offset, timestamp], "
+ "but was: abc";
assertTrue(ExceptionUtils.findThrowableWithMessage(t, msg).isPresent());
}
}
private Map<String, String> getAllOptions() {
Map<String, String> options = new HashMap<>();
options.put("connector", "mysql-cdc");
options.put("hostname", MY_LOCALHOST);
options.put("database-name", MY_DATABASE);
options.put("table-name", MY_TABLE);
options.put("username", MY_USERNAME);
options.put("password", MY_PASSWORD);
return options;
}
private static DynamicTableSource createTableSource(Map<String, String> options) {
return FactoryUtil.createTableSource(
null,
ObjectIdentifier.of("default", "default", "t1"),
new CatalogTableImpl(SCHEMA, options, "mock source"),
new Configuration(),
MySQLTableSourceFactoryTest.class.getClassLoader(),
false);
}
}

@ -25,150 +25,152 @@ import java.util.HashSet;
import java.util.Set;
/**
* Docker container for MySQL. The difference between this class and
* {@link org.testcontainers.containers.MySQLContainer} is that TC MySQLContainer has problems
* when overriding mysql conf file, i.e. my.cnf.
* Docker container for MySQL. The difference between this class and {@link
* org.testcontainers.containers.MySQLContainer} is that TC MySQLContainer has problems when
* overriding mysql conf file, i.e. my.cnf.
*/
@SuppressWarnings("rawtypes")
public class MySQLContainer extends JdbcDatabaseContainer {
public static final String IMAGE = "mysql";
public static final String DEFAULT_TAG = "5.7";
public static final Integer MYSQL_PORT = 3306;
private static final String MY_CNF_CONFIG_OVERRIDE_PARAM_NAME = "MY_CNF";
private static final String SETUP_SQL_PARAM_NAME = "SETUP_SQL";
private static final String MYSQL_ROOT_USER = "root";
private String databaseName = "test";
private String username = "test";
private String password = "test";
public MySQLContainer() {
super(IMAGE + ":" + DEFAULT_TAG);
addExposedPort(MYSQL_PORT);
}
@Override
protected Set<Integer> getLivenessCheckPorts() {
return new HashSet<>(getMappedPort(MYSQL_PORT));
}
@Override
protected void configure() {
optionallyMapResourceParameterAsVolume(
MY_CNF_CONFIG_OVERRIDE_PARAM_NAME,
"/etc/mysql/",
"mysql-default-conf");
if (parameters.containsKey(SETUP_SQL_PARAM_NAME)) {
optionallyMapResourceParameterAsVolume(
SETUP_SQL_PARAM_NAME,
"/docker-entrypoint-initdb.d/",
"N/A");
}
addEnv("MYSQL_DATABASE", databaseName);
addEnv("MYSQL_USER", username);
if (password != null && !password.isEmpty()) {
addEnv("MYSQL_PASSWORD", password);
addEnv("MYSQL_ROOT_PASSWORD", password);
} else if (MYSQL_ROOT_USER.equalsIgnoreCase(username)) {
addEnv("MYSQL_ALLOW_EMPTY_PASSWORD", "yes");
} else {
throw new ContainerLaunchException("Empty password can be used only with the root user");
}
setStartupAttempts(3);
}
@Override
public String getDriverClassName() {
try {
Class.forName("com.mysql.cj.jdbc.Driver");
return "com.mysql.cj.jdbc.Driver";
} catch (ClassNotFoundException e) {
return "com.mysql.jdbc.Driver";
}
}
public String getJdbcUrl(String databaseName) {
String additionalUrlParams = constructUrlParameters("?", "&");
return "jdbc:mysql://" + getHost() + ":" + getDatabasePort() +
"/" + databaseName + additionalUrlParams;
}
@Override
public String getJdbcUrl() {
return getJdbcUrl(databaseName);
}
public int getDatabasePort() {
return getMappedPort(MYSQL_PORT);
}
@Override
protected String constructUrlForConnection(String queryString) {
String url = super.constructUrlForConnection(queryString);
if (!url.contains("useSSL=")) {
String separator = url.contains("?") ? "&" : "?";
url = url + separator + "useSSL=false";
}
if (!url.contains("allowPublicKeyRetrieval=")) {
url = url + "&allowPublicKeyRetrieval=true";
}
return url;
}
@Override
public String getDatabaseName() {
return databaseName;
}
@Override
public String getUsername() {
return username;
}
@Override
public String getPassword() {
return password;
}
@Override
protected String getTestQueryString() {
return "SELECT 1";
}
@SuppressWarnings("unchecked")
public MySQLContainer withConfigurationOverride(String s) {
parameters.put(MY_CNF_CONFIG_OVERRIDE_PARAM_NAME, s);
return this;
}
@SuppressWarnings("unchecked")
public MySQLContainer withSetupSQL(String sqlPath) {
parameters.put(SETUP_SQL_PARAM_NAME, sqlPath);
return this;
}
@Override
public MySQLContainer withDatabaseName(final String databaseName) {
this.databaseName = databaseName;
return this;
}
@Override
public MySQLContainer withUsername(final String username) {
this.username = username;
return this;
}
@Override
public MySQLContainer withPassword(final String password) {
this.password = password;
return this;
}
public static final String IMAGE = "mysql";
public static final String DEFAULT_TAG = "5.7";
public static final Integer MYSQL_PORT = 3306;
private static final String MY_CNF_CONFIG_OVERRIDE_PARAM_NAME = "MY_CNF";
private static final String SETUP_SQL_PARAM_NAME = "SETUP_SQL";
private static final String MYSQL_ROOT_USER = "root";
private String databaseName = "test";
private String username = "test";
private String password = "test";
public MySQLContainer() {
super(IMAGE + ":" + DEFAULT_TAG);
addExposedPort(MYSQL_PORT);
}
@Override
protected Set<Integer> getLivenessCheckPorts() {
return new HashSet<>(getMappedPort(MYSQL_PORT));
}
@Override
protected void configure() {
optionallyMapResourceParameterAsVolume(
MY_CNF_CONFIG_OVERRIDE_PARAM_NAME, "/etc/mysql/", "mysql-default-conf");
if (parameters.containsKey(SETUP_SQL_PARAM_NAME)) {
optionallyMapResourceParameterAsVolume(
SETUP_SQL_PARAM_NAME, "/docker-entrypoint-initdb.d/", "N/A");
}
addEnv("MYSQL_DATABASE", databaseName);
addEnv("MYSQL_USER", username);
if (password != null && !password.isEmpty()) {
addEnv("MYSQL_PASSWORD", password);
addEnv("MYSQL_ROOT_PASSWORD", password);
} else if (MYSQL_ROOT_USER.equalsIgnoreCase(username)) {
addEnv("MYSQL_ALLOW_EMPTY_PASSWORD", "yes");
} else {
throw new ContainerLaunchException(
"Empty password can be used only with the root user");
}
setStartupAttempts(3);
}
@Override
public String getDriverClassName() {
try {
Class.forName("com.mysql.cj.jdbc.Driver");
return "com.mysql.cj.jdbc.Driver";
} catch (ClassNotFoundException e) {
return "com.mysql.jdbc.Driver";
}
}
public String getJdbcUrl(String databaseName) {
String additionalUrlParams = constructUrlParameters("?", "&");
return "jdbc:mysql://"
+ getHost()
+ ":"
+ getDatabasePort()
+ "/"
+ databaseName
+ additionalUrlParams;
}
@Override
public String getJdbcUrl() {
return getJdbcUrl(databaseName);
}
public int getDatabasePort() {
return getMappedPort(MYSQL_PORT);
}
@Override
protected String constructUrlForConnection(String queryString) {
String url = super.constructUrlForConnection(queryString);
if (!url.contains("useSSL=")) {
String separator = url.contains("?") ? "&" : "?";
url = url + separator + "useSSL=false";
}
if (!url.contains("allowPublicKeyRetrieval=")) {
url = url + "&allowPublicKeyRetrieval=true";
}
return url;
}
@Override
public String getDatabaseName() {
return databaseName;
}
@Override
public String getUsername() {
return username;
}
@Override
public String getPassword() {
return password;
}
@Override
protected String getTestQueryString() {
return "SELECT 1";
}
@SuppressWarnings("unchecked")
public MySQLContainer withConfigurationOverride(String s) {
parameters.put(MY_CNF_CONFIG_OVERRIDE_PARAM_NAME, s);
return this;
}
@SuppressWarnings("unchecked")
public MySQLContainer withSetupSQL(String sqlPath) {
parameters.put(SETUP_SQL_PARAM_NAME, sqlPath);
return this;
}
@Override
public MySQLContainer withDatabaseName(final String databaseName) {
this.databaseName = databaseName;
return this;
}
@Override
public MySQLContainer withUsername(final String username) {
this.username = username;
return this;
}
@Override
public MySQLContainer withPassword(final String password) {
this.password = password;
return this;
}
}

@ -36,99 +36,110 @@ import java.util.stream.Stream;
import static org.junit.Assert.assertNotNull;
/**
* Create and populate a unique instance of a MySQL database for each run of JUnit test. A user of class
* needs to provide a logical name for Debezium and database name. It is expected that there is a init file
* in <code>src/test/resources/ddl/&lt;database_name&gt;.sql</code>.
* The database name is enriched with a unique suffix that guarantees complete isolation between runs
* <code>&lt;database_name&gt_&lt;suffix&gt</code>
* Create and populate a unique instance of a MySQL database for each run of JUnit test. A user of
* class needs to provide a logical name for Debezium and database name. It is expected that there
* is a init file in <code>src/test/resources/ddl/&lt;database_name&gt;.sql</code>. The database
* name is enriched with a unique suffix that guarantees complete isolation between runs <code>
* &lt;database_name&gt_&lt;suffix&gt</code>
*
* <p>This class is inspired from Debezium project.
*
*/
public class UniqueDatabase {
private static final String[] CREATE_DATABASE_DDL = new String[]{
"CREATE DATABASE $DBNAME$;",
"USE $DBNAME$;"
};
private static final Pattern COMMENT_PATTERN = Pattern.compile("^(.*)--.*$");
private static final String[] CREATE_DATABASE_DDL =
new String[] {"CREATE DATABASE $DBNAME$;", "USE $DBNAME$;"};
private static final Pattern COMMENT_PATTERN = Pattern.compile("^(.*)--.*$");
private final MySQLContainer container;
private final String databaseName;
private final String templateName;
private final String username;
private final String password;
private final MySQLContainer container;
private final String databaseName;
private final String templateName;
private final String username;
private final String password;
public UniqueDatabase(MySQLContainer container, String databaseName, String username, String password) {
this(container, databaseName, Integer.toUnsignedString(new Random().nextInt(), 36), username, password);
}
public UniqueDatabase(
MySQLContainer container, String databaseName, String username, String password) {
this(
container,
databaseName,
Integer.toUnsignedString(new Random().nextInt(), 36),
username,
password);
}
private UniqueDatabase(MySQLContainer container, String databaseName, final String identifier, String username, String password) {
this.container = container;
this.databaseName = databaseName + "_" + identifier;
this.templateName = databaseName;
this.username = username;
this.password = password;
}
private UniqueDatabase(
MySQLContainer container,
String databaseName,
final String identifier,
String username,
String password) {
this.container = container;
this.databaseName = databaseName + "_" + identifier;
this.templateName = databaseName;
this.username = username;
this.password = password;
}
public String getDatabaseName() {
return databaseName;
}
public String getDatabaseName() {
return databaseName;
}
public String getUsername() {
return username;
}
public String getUsername() {
return username;
}
public String getPassword() {
return password;
}
public String getPassword() {
return password;
}
/**
* @return Fully qualified table name <code>&lt;databaseName&gt;.&lt;tableName&gt;</code>
*/
public String qualifiedTableName(final String tableName) {
return String.format("%s.%s", databaseName, tableName);
}
/** @return Fully qualified table name <code>&lt;databaseName&gt;.&lt;tableName&gt;</code> */
public String qualifiedTableName(final String tableName) {
return String.format("%s.%s", databaseName, tableName);
}
/**
* Creates the database and populates it with initialization SQL script.
*/
public void createAndInitialize() {
final String ddlFile = String.format("ddl/%s.sql", templateName);
final URL ddlTestFile = UniqueDatabase.class.getClassLoader().getResource(ddlFile);
assertNotNull("Cannot locate " + ddlFile, ddlTestFile);
try {
try (Connection connection = DriverManager.getConnection(container.getJdbcUrl(), username, password);
Statement statement = connection.createStatement()) {
final List<String> statements = Arrays.stream(
Stream.concat(
Arrays.stream(CREATE_DATABASE_DDL),
Files.readAllLines(Paths.get(ddlTestFile.toURI())).stream())
.map(String::trim)
.filter(x -> !x.startsWith("--") && !x.isEmpty())
.map(x -> {
final Matcher m = COMMENT_PATTERN.matcher(x);
return m.matches() ? m.group(1) : x;
})
.map(this::convertSQL)
.collect(Collectors.joining("\n")).split(";"))
.map(x -> x.replace("$$", ";"))
.collect(Collectors.toList());
for (String stmt : statements) {
statement.execute(stmt);
}
}
}
catch (final Exception e) {
throw new IllegalStateException(e);
}
}
/** Creates the database and populates it with initialization SQL script. */
public void createAndInitialize() {
final String ddlFile = String.format("ddl/%s.sql", templateName);
final URL ddlTestFile = UniqueDatabase.class.getClassLoader().getResource(ddlFile);
assertNotNull("Cannot locate " + ddlFile, ddlTestFile);
try {
try (Connection connection =
DriverManager.getConnection(
container.getJdbcUrl(), username, password);
Statement statement = connection.createStatement()) {
final List<String> statements =
Arrays.stream(
Stream.concat(
Arrays.stream(CREATE_DATABASE_DDL),
Files.readAllLines(
Paths.get(ddlTestFile.toURI()))
.stream())
.map(String::trim)
.filter(x -> !x.startsWith("--") && !x.isEmpty())
.map(
x -> {
final Matcher m =
COMMENT_PATTERN.matcher(x);
return m.matches() ? m.group(1) : x;
})
.map(this::convertSQL)
.collect(Collectors.joining("\n"))
.split(";"))
.map(x -> x.replace("$$", ";"))
.collect(Collectors.toList());
for (String stmt : statements) {
statement.execute(stmt);
}
}
} catch (final Exception e) {
throw new IllegalStateException(e);
}
}
public Connection getJdbcConnection() throws SQLException {
return DriverManager.getConnection(container.getJdbcUrl(databaseName), username, password);
}
public Connection getJdbcConnection() throws SQLException {
return DriverManager.getConnection(container.getJdbcUrl(databaseName), username, password);
}
private String convertSQL(final String sql) {
return sql.replace("$DBNAME$", databaseName);
}
private String convertSQL(final String sql) {
return sql.replace("$DBNAME$", databaseName);
}
}

@ -34,28 +34,29 @@ import static org.apache.flink.util.Preconditions.checkNotNull;
* @see Heartbeat
*/
public class HeartbeatEventFilter<T> implements DebeziumDeserializationSchema<T> {
private static final long serialVersionUID = -4450118969976653497L;
private final String heartbeatTopicPrefix;
private final DebeziumDeserializationSchema<T> serializer;
public HeartbeatEventFilter(String heartbeatTopicPrefix, DebeziumDeserializationSchema<T> serializer) {
this.heartbeatTopicPrefix = checkNotNull(heartbeatTopicPrefix);
this.serializer = checkNotNull(serializer);
}
@Override
public void deserialize(SourceRecord record, Collector<T> out) throws Exception {
String topic = record.topic();
if (topic != null && topic.startsWith(heartbeatTopicPrefix)) {
// drop heartbeat events
return;
}
serializer.deserialize(record, out);
}
@Override
public TypeInformation<T> getProducedType() {
return serializer.getProducedType();
}
private static final long serialVersionUID = -4450118969976653497L;
private final String heartbeatTopicPrefix;
private final DebeziumDeserializationSchema<T> serializer;
public HeartbeatEventFilter(
String heartbeatTopicPrefix, DebeziumDeserializationSchema<T> serializer) {
this.heartbeatTopicPrefix = checkNotNull(heartbeatTopicPrefix);
this.serializer = checkNotNull(serializer);
}
@Override
public void deserialize(SourceRecord record, Collector<T> out) throws Exception {
String topic = record.topic();
if (topic != null && topic.startsWith(heartbeatTopicPrefix)) {
// drop heartbeat events
return;
}
serializer.deserialize(record, out);
}
@Override
public TypeInformation<T> getProducedType() {
return serializer.getProducedType();
}
}

@ -28,167 +28,156 @@ import java.util.Properties;
import static org.apache.flink.util.Preconditions.checkNotNull;
/**
* A builder to build a SourceFunction which can read snapshot and continue to consume binlog for PostgreSQL.
* A builder to build a SourceFunction which can read snapshot and continue to consume binlog for
* PostgreSQL.
*/
public class PostgreSQLSource {
private static final long DEFAULT_HEARTBEAT_MS = Duration.ofMinutes(5).toMillis();
public static <T> Builder<T> builder() {
return new Builder<>();
}
/**
* Builder class of {@link PostgreSQLSource}.
*/
public static class Builder<T> {
private String pluginName = "decoderbufs";
private String slotName = "flink";
private int port = 5432; // default 5432 port
private String hostname;
private String database;
private String username;
private String password;
private String[] schemaList;
private String[] tableList;
private Properties dbzProperties;
private DebeziumDeserializationSchema<T> deserializer;
/**
* The name of the Postgres logical decoding plug-in installed on the server.
* Supported values are decoderbufs, wal2json, wal2json_rds, wal2json_streaming,
* wal2json_rds_streaming and pgoutput.
*/
public Builder<T> decodingPluginName(String name) {
this.pluginName = name;
return this;
}
public Builder<T> hostname(String hostname) {
this.hostname = hostname;
return this;
}
/**
* Integer port number of the PostgreSQL database server.
*/
public Builder<T> port(int port) {
this.port = port;
return this;
}
/**
* The name of the PostgreSQL database from which to stream the changes.
*/
public Builder<T> database(String database) {
this.database = database;
return this;
}
/**
* An optional list of regular expressions that match schema names to be monitored;
* any schema name not included in the whitelist will be excluded from monitoring.
* By default all non-system schemas will be monitored.
*/
public Builder<T> schemaList(String... schemaList) {
this.schemaList = schemaList;
return this;
}
/**
* An optional list of regular expressions that match fully-qualified table identifiers
* for tables to be monitored; any table not included in the whitelist will be excluded
* from monitoring. Each identifier is of the form schemaName.tableName.
* By default the connector will monitor every non-system table in each monitored schema.
*/
public Builder<T> tableList(String... tableList) {
this.tableList = tableList;
return this;
}
/**
* Name of the PostgreSQL database to use when connecting to the PostgreSQL database server.
*/
public Builder<T> username(String username) {
this.username = username;
return this;
}
/**
* Password to use when connecting to the PostgreSQL database server.
*/
public Builder<T> password(String password) {
this.password = password;
return this;
}
/**
* The name of the PostgreSQL logical decoding slot that was created for streaming changes
* from a particular plug-in for a particular database/schema. The server uses this slot
* to stream events to the connector that you are configuring. Default is "flink".
*
* <p>Slot names must conform to <a href="https://www.postgresql.org/docs/current/static/warm-standby.html#STREAMING-REPLICATION-SLOTS-MANIPULATION">PostgreSQL replication slot naming rules</a>,
* which state: "Each replication slot has a name, which can contain lower-case letters,
* numbers, and the underscore character."
*/
public Builder<T> slotName(String slotName) {
this.slotName = slotName;
return this;
}
/**
* The Debezium Postgres connector properties.
*/
public Builder<T> debeziumProperties(Properties properties) {
this.dbzProperties = properties;
return this;
}
/**
* The deserializer used to convert from consumed {@link org.apache.kafka.connect.source.SourceRecord}.
*/
public Builder<T> deserializer(DebeziumDeserializationSchema<T> deserializer) {
this.deserializer = deserializer;
return this;
}
public DebeziumSourceFunction<T> build() {
Properties props = new Properties();
props.setProperty("connector.class", PostgresConnector.class.getCanonicalName());
props.setProperty("plugin.name", pluginName);
// hard code server name, because we don't need to distinguish it, docs:
// Logical name that identifies and provides a namespace for the particular PostgreSQL
// database server/cluster being monitored. The logical name should be unique across
// all other connectors, since it is used as a prefix for all Kafka topic names coming
// from this connector. Only alphanumeric characters and underscores should be used.
props.setProperty("database.server.name", "postgres_cdc_source");
props.setProperty("database.hostname", checkNotNull(hostname));
props.setProperty("database.dbname", checkNotNull(database));
props.setProperty("database.user", checkNotNull(username));
props.setProperty("database.password", checkNotNull(password));
props.setProperty("database.port", String.valueOf(port));
props.setProperty("slot.name", slotName);
// we have to enable heartbeat for PG to make sure DebeziumChangeConsumer#handleBatch
// is invoked after job restart
props.setProperty("heartbeat.interval.ms", String.valueOf(DEFAULT_HEARTBEAT_MS));
if (schemaList != null) {
props.setProperty("schema.whitelist", String.join(",", schemaList));
}
if (tableList != null) {
props.setProperty("table.whitelist", String.join(",", tableList));
}
if (dbzProperties != null) {
dbzProperties.forEach(props::put);
}
return new DebeziumSourceFunction<>(
deserializer,
props,
null);
}
}
private static final long DEFAULT_HEARTBEAT_MS = Duration.ofMinutes(5).toMillis();
public static <T> Builder<T> builder() {
return new Builder<>();
}
/** Builder class of {@link PostgreSQLSource}. */
public static class Builder<T> {
private String pluginName = "decoderbufs";
private String slotName = "flink";
private int port = 5432; // default 5432 port
private String hostname;
private String database;
private String username;
private String password;
private String[] schemaList;
private String[] tableList;
private Properties dbzProperties;
private DebeziumDeserializationSchema<T> deserializer;
/**
* The name of the Postgres logical decoding plug-in installed on the server. Supported
* values are decoderbufs, wal2json, wal2json_rds, wal2json_streaming,
* wal2json_rds_streaming and pgoutput.
*/
public Builder<T> decodingPluginName(String name) {
this.pluginName = name;
return this;
}
public Builder<T> hostname(String hostname) {
this.hostname = hostname;
return this;
}
/** Integer port number of the PostgreSQL database server. */
public Builder<T> port(int port) {
this.port = port;
return this;
}
/** The name of the PostgreSQL database from which to stream the changes. */
public Builder<T> database(String database) {
this.database = database;
return this;
}
/**
* An optional list of regular expressions that match schema names to be monitored; any
* schema name not included in the whitelist will be excluded from monitoring. By default
* all non-system schemas will be monitored.
*/
public Builder<T> schemaList(String... schemaList) {
this.schemaList = schemaList;
return this;
}
/**
* An optional list of regular expressions that match fully-qualified table identifiers for
* tables to be monitored; any table not included in the whitelist will be excluded from
* monitoring. Each identifier is of the form schemaName.tableName. By default the connector
* will monitor every non-system table in each monitored schema.
*/
public Builder<T> tableList(String... tableList) {
this.tableList = tableList;
return this;
}
/**
* Name of the PostgreSQL database to use when connecting to the PostgreSQL database server.
*/
public Builder<T> username(String username) {
this.username = username;
return this;
}
/** Password to use when connecting to the PostgreSQL database server. */
public Builder<T> password(String password) {
this.password = password;
return this;
}
/**
* The name of the PostgreSQL logical decoding slot that was created for streaming changes
* from a particular plug-in for a particular database/schema. The server uses this slot to
* stream events to the connector that you are configuring. Default is "flink".
*
* <p>Slot names must conform to <a
* href="https://www.postgresql.org/docs/current/static/warm-standby.html#STREAMING-REPLICATION-SLOTS-MANIPULATION">PostgreSQL
* replication slot naming rules</a>, which state: "Each replication slot has a name, which
* can contain lower-case letters, numbers, and the underscore character."
*/
public Builder<T> slotName(String slotName) {
this.slotName = slotName;
return this;
}
/** The Debezium Postgres connector properties. */
public Builder<T> debeziumProperties(Properties properties) {
this.dbzProperties = properties;
return this;
}
/**
* The deserializer used to convert from consumed {@link
* org.apache.kafka.connect.source.SourceRecord}.
*/
public Builder<T> deserializer(DebeziumDeserializationSchema<T> deserializer) {
this.deserializer = deserializer;
return this;
}
public DebeziumSourceFunction<T> build() {
Properties props = new Properties();
props.setProperty("connector.class", PostgresConnector.class.getCanonicalName());
props.setProperty("plugin.name", pluginName);
// hard code server name, because we don't need to distinguish it, docs:
// Logical name that identifies and provides a namespace for the particular PostgreSQL
// database server/cluster being monitored. The logical name should be unique across
// all other connectors, since it is used as a prefix for all Kafka topic names coming
// from this connector. Only alphanumeric characters and underscores should be used.
props.setProperty("database.server.name", "postgres_cdc_source");
props.setProperty("database.hostname", checkNotNull(hostname));
props.setProperty("database.dbname", checkNotNull(database));
props.setProperty("database.user", checkNotNull(username));
props.setProperty("database.password", checkNotNull(password));
props.setProperty("database.port", String.valueOf(port));
props.setProperty("slot.name", slotName);
// we have to enable heartbeat for PG to make sure DebeziumChangeConsumer#handleBatch
// is invoked after job restart
props.setProperty("heartbeat.interval.ms", String.valueOf(DEFAULT_HEARTBEAT_MS));
if (schemaList != null) {
props.setProperty("schema.whitelist", String.join(",", schemaList));
}
if (tableList != null) {
props.setProperty("table.whitelist", String.join(",", tableList));
}
if (dbzProperties != null) {
dbzProperties.forEach(props::put);
}
return new DebeziumSourceFunction<>(deserializer, props, null);
}
}
}

@ -34,115 +34,128 @@ import java.util.Set;
import static com.alibaba.ververica.cdc.debezium.table.DebeziumOptions.DEBEZIUM_OPTIONS_PREFIX;
import static com.alibaba.ververica.cdc.debezium.table.DebeziumOptions.getDebeziumProperties;
/**
* Factory for creating configured instance of {@link PostgreSQLTableSource}.
*/
/** Factory for creating configured instance of {@link PostgreSQLTableSource}. */
public class PostgreSQLTableFactory implements DynamicTableSourceFactory {
private static final String IDENTIFIER = "postgres-cdc";
private static final ConfigOption<String> HOSTNAME = ConfigOptions.key("hostname")
.stringType()
.noDefaultValue()
.withDescription("IP address or hostname of the PostgreSQL database server.");
private static final ConfigOption<Integer> PORT = ConfigOptions.key("port")
.intType()
.defaultValue(5432)
.withDescription("Integer port number of the PostgreSQL database server.");
private static final ConfigOption<String> USERNAME = ConfigOptions.key("username")
.stringType()
.noDefaultValue()
.withDescription("Name of the PostgreSQL database to use when connecting to the PostgreSQL database server.");
private static final ConfigOption<String> PASSWORD = ConfigOptions.key("password")
.stringType()
.noDefaultValue()
.withDescription("Password to use when connecting to the PostgreSQL database server.");
private static final ConfigOption<String> DATABASE_NAME = ConfigOptions.key("database-name")
.stringType()
.noDefaultValue()
.withDescription("Database name of the PostgreSQL server to monitor.");
private static final ConfigOption<String> SCHEMA_NAME = ConfigOptions.key("schema-name")
.stringType()
.noDefaultValue()
.withDescription("Schema name of the PostgreSQL database to monitor.");
private static final ConfigOption<String> TABLE_NAME = ConfigOptions.key("table-name")
.stringType()
.noDefaultValue()
.withDescription("Table name of the PostgreSQL database to monitor.");
private static final ConfigOption<String> DECODING_PLUGIN_NAME = ConfigOptions.key("decoding.plugin.name")
.stringType()
.defaultValue("decoderbufs")
.withDescription("The name of the Postgres logical decoding plug-in installed on the server.\n" +
"Supported values are decoderbufs, wal2json, wal2json_rds, wal2json_streaming,\n" +
"wal2json_rds_streaming and pgoutput.");
private static final ConfigOption<String> SLOT_NAME = ConfigOptions.key("slot.name")
.stringType()
.defaultValue("flink")
.withDescription("The name of the PostgreSQL logical decoding slot that was created for streaming changes " +
"from a particular plug-in for a particular database/schema. The server uses this slot " +
"to stream events to the connector that you are configuring. Default is \"flink\".");
@Override
public DynamicTableSource createDynamicTableSource(DynamicTableFactory.Context context) {
final FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
helper.validateExcept(DEBEZIUM_OPTIONS_PREFIX);
final ReadableConfig config = helper.getOptions();
String hostname = config.get(HOSTNAME);
String username = config.get(USERNAME);
String password = config.get(PASSWORD);
String databaseName = config.get(DATABASE_NAME);
String schemaName = config.get(SCHEMA_NAME);
String tableName = config.get(TABLE_NAME);
int port = config.get(PORT);
String pluginName = config.get(DECODING_PLUGIN_NAME);
String slotName = config.get(SLOT_NAME);
TableSchema physicalSchema = TableSchemaUtils.getPhysicalSchema(context.getCatalogTable().getSchema());
return new PostgreSQLTableSource(
physicalSchema,
port,
hostname,
databaseName,
schemaName,
tableName,
username,
password,
pluginName,
slotName,
getDebeziumProperties(context.getCatalogTable().getOptions()));
}
@Override
public String factoryIdentifier() {
return IDENTIFIER;
}
@Override
public Set<ConfigOption<?>> requiredOptions() {
Set<ConfigOption<?>> options = new HashSet<>();
options.add(HOSTNAME);
options.add(USERNAME);
options.add(PASSWORD);
options.add(DATABASE_NAME);
options.add(SCHEMA_NAME);
options.add(TABLE_NAME);
return options;
}
@Override
public Set<ConfigOption<?>> optionalOptions() {
Set<ConfigOption<?>> options = new HashSet<>();
options.add(PORT);
options.add(DECODING_PLUGIN_NAME);
return options;
}
private static final String IDENTIFIER = "postgres-cdc";
private static final ConfigOption<String> HOSTNAME =
ConfigOptions.key("hostname")
.stringType()
.noDefaultValue()
.withDescription("IP address or hostname of the PostgreSQL database server.");
private static final ConfigOption<Integer> PORT =
ConfigOptions.key("port")
.intType()
.defaultValue(5432)
.withDescription("Integer port number of the PostgreSQL database server.");
private static final ConfigOption<String> USERNAME =
ConfigOptions.key("username")
.stringType()
.noDefaultValue()
.withDescription(
"Name of the PostgreSQL database to use when connecting to the PostgreSQL database server.");
private static final ConfigOption<String> PASSWORD =
ConfigOptions.key("password")
.stringType()
.noDefaultValue()
.withDescription(
"Password to use when connecting to the PostgreSQL database server.");
private static final ConfigOption<String> DATABASE_NAME =
ConfigOptions.key("database-name")
.stringType()
.noDefaultValue()
.withDescription("Database name of the PostgreSQL server to monitor.");
private static final ConfigOption<String> SCHEMA_NAME =
ConfigOptions.key("schema-name")
.stringType()
.noDefaultValue()
.withDescription("Schema name of the PostgreSQL database to monitor.");
private static final ConfigOption<String> TABLE_NAME =
ConfigOptions.key("table-name")
.stringType()
.noDefaultValue()
.withDescription("Table name of the PostgreSQL database to monitor.");
private static final ConfigOption<String> DECODING_PLUGIN_NAME =
ConfigOptions.key("decoding.plugin.name")
.stringType()
.defaultValue("decoderbufs")
.withDescription(
"The name of the Postgres logical decoding plug-in installed on the server.\n"
+ "Supported values are decoderbufs, wal2json, wal2json_rds, wal2json_streaming,\n"
+ "wal2json_rds_streaming and pgoutput.");
private static final ConfigOption<String> SLOT_NAME =
ConfigOptions.key("slot.name")
.stringType()
.defaultValue("flink")
.withDescription(
"The name of the PostgreSQL logical decoding slot that was created for streaming changes "
+ "from a particular plug-in for a particular database/schema. The server uses this slot "
+ "to stream events to the connector that you are configuring. Default is \"flink\".");
@Override
public DynamicTableSource createDynamicTableSource(DynamicTableFactory.Context context) {
final FactoryUtil.TableFactoryHelper helper =
FactoryUtil.createTableFactoryHelper(this, context);
helper.validateExcept(DEBEZIUM_OPTIONS_PREFIX);
final ReadableConfig config = helper.getOptions();
String hostname = config.get(HOSTNAME);
String username = config.get(USERNAME);
String password = config.get(PASSWORD);
String databaseName = config.get(DATABASE_NAME);
String schemaName = config.get(SCHEMA_NAME);
String tableName = config.get(TABLE_NAME);
int port = config.get(PORT);
String pluginName = config.get(DECODING_PLUGIN_NAME);
String slotName = config.get(SLOT_NAME);
TableSchema physicalSchema =
TableSchemaUtils.getPhysicalSchema(context.getCatalogTable().getSchema());
return new PostgreSQLTableSource(
physicalSchema,
port,
hostname,
databaseName,
schemaName,
tableName,
username,
password,
pluginName,
slotName,
getDebeziumProperties(context.getCatalogTable().getOptions()));
}
@Override
public String factoryIdentifier() {
return IDENTIFIER;
}
@Override
public Set<ConfigOption<?>> requiredOptions() {
Set<ConfigOption<?>> options = new HashSet<>();
options.add(HOSTNAME);
options.add(USERNAME);
options.add(PASSWORD);
options.add(DATABASE_NAME);
options.add(SCHEMA_NAME);
options.add(TABLE_NAME);
return options;
}
@Override
public Set<ConfigOption<?>> optionalOptions() {
Set<ConfigOption<?>> options = new HashSet<>();
options.add(PORT);
options.add(DECODING_PLUGIN_NAME);
return options;
}
}

@ -45,123 +45,137 @@ import static org.apache.flink.util.Preconditions.checkNotNull;
*/
public class PostgreSQLTableSource implements ScanTableSource {
private final TableSchema physicalSchema;
private final int port;
private final String hostname;
private final String database;
private final String schemaName;
private final String tableName;
private final String username;
private final String password;
private final String pluginName;
private final String slotName;
private final Properties dbzProperties;
private final TableSchema physicalSchema;
private final int port;
private final String hostname;
private final String database;
private final String schemaName;
private final String tableName;
private final String username;
private final String password;
private final String pluginName;
private final String slotName;
private final Properties dbzProperties;
public PostgreSQLTableSource(
TableSchema physicalSchema,
int port,
String hostname,
String database,
String schemaName,
String tableName,
String username,
String password,
String pluginName,
String slotName,
Properties dbzProperties) {
this.physicalSchema = physicalSchema;
this.port = port;
this.hostname = checkNotNull(hostname);
this.database = checkNotNull(database);
this.schemaName = checkNotNull(schemaName);
this.tableName = checkNotNull(tableName);
this.username = checkNotNull(username);
this.password = checkNotNull(password);
this.pluginName = checkNotNull(pluginName);
this.slotName = slotName;
this.dbzProperties = dbzProperties;
}
public PostgreSQLTableSource(
TableSchema physicalSchema,
int port,
String hostname,
String database,
String schemaName,
String tableName,
String username,
String password,
String pluginName,
String slotName,
Properties dbzProperties) {
this.physicalSchema = physicalSchema;
this.port = port;
this.hostname = checkNotNull(hostname);
this.database = checkNotNull(database);
this.schemaName = checkNotNull(schemaName);
this.tableName = checkNotNull(tableName);
this.username = checkNotNull(username);
this.password = checkNotNull(password);
this.pluginName = checkNotNull(pluginName);
this.slotName = slotName;
this.dbzProperties = dbzProperties;
}
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.newBuilder()
.addContainedKind(RowKind.INSERT)
.addContainedKind(RowKind.UPDATE_BEFORE)
.addContainedKind(RowKind.UPDATE_AFTER)
.addContainedKind(RowKind.DELETE)
.build();
}
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.newBuilder()
.addContainedKind(RowKind.INSERT)
.addContainedKind(RowKind.UPDATE_BEFORE)
.addContainedKind(RowKind.UPDATE_AFTER)
.addContainedKind(RowKind.DELETE)
.build();
}
@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
RowType rowType = (RowType) physicalSchema.toRowDataType().getLogicalType();
TypeInformation<RowData> typeInfo = scanContext.createTypeInformation(physicalSchema.toRowDataType());
DebeziumDeserializationSchema<RowData> deserializer = new RowDataDebeziumDeserializeSchema(
rowType,
typeInfo,
new PostgresValueValidator(schemaName, tableName),
ZoneId.of("UTC"));
DebeziumSourceFunction<RowData> sourceFunction = PostgreSQLSource.<RowData>builder()
.hostname(hostname)
.port(port)
.database(database)
.schemaList(schemaName)
.tableList(schemaName + "." + tableName)
.username(username)
.password(password)
.decodingPluginName(pluginName)
.slotName(slotName)
.debeziumProperties(dbzProperties)
.deserializer(deserializer)
.build();
return SourceFunctionProvider.of(sourceFunction, false);
}
@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext scanContext) {
RowType rowType = (RowType) physicalSchema.toRowDataType().getLogicalType();
TypeInformation<RowData> typeInfo =
scanContext.createTypeInformation(physicalSchema.toRowDataType());
DebeziumDeserializationSchema<RowData> deserializer =
new RowDataDebeziumDeserializeSchema(
rowType,
typeInfo,
new PostgresValueValidator(schemaName, tableName),
ZoneId.of("UTC"));
DebeziumSourceFunction<RowData> sourceFunction =
PostgreSQLSource.<RowData>builder()
.hostname(hostname)
.port(port)
.database(database)
.schemaList(schemaName)
.tableList(schemaName + "." + tableName)
.username(username)
.password(password)
.decodingPluginName(pluginName)
.slotName(slotName)
.debeziumProperties(dbzProperties)
.deserializer(deserializer)
.build();
return SourceFunctionProvider.of(sourceFunction, false);
}
@Override
public DynamicTableSource copy() {
return new PostgreSQLTableSource(
physicalSchema,
port,
hostname,
database,
schemaName,
tableName,
username,
password,
pluginName,
slotName,
dbzProperties);
}
@Override
public DynamicTableSource copy() {
return new PostgreSQLTableSource(
physicalSchema,
port,
hostname,
database,
schemaName,
tableName,
username,
password,
pluginName,
slotName,
dbzProperties);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
PostgreSQLTableSource that = (PostgreSQLTableSource) o;
return port == that.port &&
Objects.equals(physicalSchema, that.physicalSchema) &&
Objects.equals(hostname, that.hostname) &&
Objects.equals(database, that.database) &&
Objects.equals(schemaName, that.schemaName) &&
Objects.equals(tableName, that.tableName) &&
Objects.equals(username, that.username) &&
Objects.equals(password, that.password) &&
Objects.equals(pluginName, that.pluginName) &&
Objects.equals(slotName, that.slotName) &&
Objects.equals(dbzProperties, that.dbzProperties);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
PostgreSQLTableSource that = (PostgreSQLTableSource) o;
return port == that.port
&& Objects.equals(physicalSchema, that.physicalSchema)
&& Objects.equals(hostname, that.hostname)
&& Objects.equals(database, that.database)
&& Objects.equals(schemaName, that.schemaName)
&& Objects.equals(tableName, that.tableName)
&& Objects.equals(username, that.username)
&& Objects.equals(password, that.password)
&& Objects.equals(pluginName, that.pluginName)
&& Objects.equals(slotName, that.slotName)
&& Objects.equals(dbzProperties, that.dbzProperties);
}
@Override
public int hashCode() {
return Objects.hash(physicalSchema, port, hostname, database, schemaName, tableName, username, password, pluginName, slotName, dbzProperties);
}
@Override
public int hashCode() {
return Objects.hash(
physicalSchema,
port,
hostname,
database,
schemaName,
tableName,
username,
password,
pluginName,
slotName,
dbzProperties);
}
@Override
public String asSummaryString() {
return "PostgreSQL-CDC";
}
@Override
public String asSummaryString() {
return "PostgreSQL-CDC";
}
}

@ -23,27 +23,27 @@ import org.apache.flink.types.RowKind;
import com.alibaba.ververica.cdc.debezium.table.RowDataDebeziumDeserializeSchema;
/**
* The {@link RowDataDebeziumDeserializeSchema.ValueValidator} for Postgres connector.
*/
public final class PostgresValueValidator implements RowDataDebeziumDeserializeSchema.ValueValidator {
private static final long serialVersionUID = -1870679469578028765L;
private static final String REPLICA_IDENTITY_EXCEPTION = "The \"before\" field of UPDATE/DELETE message is null, " +
"please check the Postgres table has been set REPLICA IDENTITY to FULL level. " +
"You can update the setting by running the command in Postgres 'ALTER TABLE %s REPLICA IDENTITY FULL'. " +
"Please see more in Debezium documentation: https://debezium.io/documentation/reference/1.2/connectors/postgresql.html#postgresql-replica-identity";
private final String schemaTable;
public PostgresValueValidator(String schema, String table) {
this.schemaTable = schema + "." + table;
}
@Override
public void validate(RowData rowData, RowKind rowKind) throws Exception {
if (rowData == null) {
throw new IllegalStateException(String.format(REPLICA_IDENTITY_EXCEPTION, schemaTable));
}
}
/** The {@link RowDataDebeziumDeserializeSchema.ValueValidator} for Postgres connector. */
public final class PostgresValueValidator
implements RowDataDebeziumDeserializeSchema.ValueValidator {
private static final long serialVersionUID = -1870679469578028765L;
private static final String REPLICA_IDENTITY_EXCEPTION =
"The \"before\" field of UPDATE/DELETE message is null, "
+ "please check the Postgres table has been set REPLICA IDENTITY to FULL level. "
+ "You can update the setting by running the command in Postgres 'ALTER TABLE %s REPLICA IDENTITY FULL'. "
+ "Please see more in Debezium documentation: https://debezium.io/documentation/reference/1.2/connectors/postgresql.html#postgresql-replica-identity";
private final String schemaTable;
public PostgresValueValidator(String schema, String table) {
this.schemaTable = schema + "." + table;
}
@Override
public void validate(RowData rowData, RowKind rowKind) throws Exception {
if (rowData == null) {
throw new IllegalStateException(String.format(REPLICA_IDENTITY_EXCEPTION, schemaTable));
}
}
}

@ -45,63 +45,66 @@ import java.util.stream.Stream;
import static org.junit.Assert.assertNotNull;
/**
* Basic class for testing PostgresSQL source, this contains a PostgreSQL container which enables binlog.
* Basic class for testing PostgresSQL source, this contains a PostgreSQL container which enables
* binlog.
*/
public abstract class PostgresTestBase extends AbstractTestBase {
private static final Logger LOG = LoggerFactory.getLogger(PostgresTestBase.class);
private static final Pattern COMMENT_PATTERN = Pattern.compile("^(.*)--.*$");
private static final Logger LOG = LoggerFactory.getLogger(PostgresTestBase.class);
private static final Pattern COMMENT_PATTERN = Pattern.compile("^(.*)--.*$");
private static final DockerImageName PG_IMAGE = DockerImageName
.parse("debezium/postgres:9.6")
.asCompatibleSubstituteFor("postgres");
private static final DockerImageName PG_IMAGE =
DockerImageName.parse("debezium/postgres:9.6").asCompatibleSubstituteFor("postgres");
protected static final PostgreSQLContainer<?> POSTGERS_CONTAINER = new PostgreSQLContainer<>(PG_IMAGE)
.withDatabaseName("postgres")
.withUsername("postgres")
.withPassword("postgres")
.withLogConsumer(new Slf4jLogConsumer(LOG));
protected static final PostgreSQLContainer<?> POSTGERS_CONTAINER =
new PostgreSQLContainer<>(PG_IMAGE)
.withDatabaseName("postgres")
.withUsername("postgres")
.withPassword("postgres")
.withLogConsumer(new Slf4jLogConsumer(LOG));
@BeforeClass
public static void startContainers() {
LOG.info("Starting containers...");
Startables.deepStart(Stream.of(POSTGERS_CONTAINER)).join();
LOG.info("Containers are started.");
}
@BeforeClass
public static void startContainers() {
LOG.info("Starting containers...");
Startables.deepStart(Stream.of(POSTGERS_CONTAINER)).join();
LOG.info("Containers are started.");
}
protected Connection getJdbcConnection() throws SQLException {
return DriverManager.getConnection(
POSTGERS_CONTAINER.getJdbcUrl(),
POSTGERS_CONTAINER.getUsername(),
POSTGERS_CONTAINER.getPassword()
);
}
/**
* Executes a JDBC statement using the default jdbc config without autocommitting the connection.
*/
protected void initializePostgresTable(String sqlFile) {
final String ddlFile = String.format("ddl/%s.sql", sqlFile);
final URL ddlTestFile = PostgresTestBase.class.getClassLoader().getResource(ddlFile);
assertNotNull("Cannot locate " + ddlFile, ddlTestFile);
try (Connection connection = getJdbcConnection();
Statement statement = connection.createStatement()) {
final List<String> statements = Arrays.stream(
Files.readAllLines(Paths.get(ddlTestFile.toURI())).stream()
.map(String::trim)
.filter(x -> !x.startsWith("--") && !x.isEmpty())
.map(x -> {
final Matcher m = COMMENT_PATTERN.matcher(x);
return m.matches() ? m.group(1) : x;
})
.collect(Collectors.joining("\n")).split(";"))
.collect(Collectors.toList());
for (String stmt : statements) {
statement.execute(stmt);
}
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
protected Connection getJdbcConnection() throws SQLException {
return DriverManager.getConnection(
POSTGERS_CONTAINER.getJdbcUrl(),
POSTGERS_CONTAINER.getUsername(),
POSTGERS_CONTAINER.getPassword());
}
/**
* Executes a JDBC statement using the default jdbc config without autocommitting the
* connection.
*/
protected void initializePostgresTable(String sqlFile) {
final String ddlFile = String.format("ddl/%s.sql", sqlFile);
final URL ddlTestFile = PostgresTestBase.class.getClassLoader().getResource(ddlFile);
assertNotNull("Cannot locate " + ddlFile, ddlTestFile);
try (Connection connection = getJdbcConnection();
Statement statement = connection.createStatement()) {
final List<String> statements =
Arrays.stream(
Files.readAllLines(Paths.get(ddlTestFile.toURI())).stream()
.map(String::trim)
.filter(x -> !x.startsWith("--") && !x.isEmpty())
.map(
x -> {
final Matcher m =
COMMENT_PATTERN.matcher(x);
return m.matches() ? m.group(1) : x;
})
.collect(Collectors.joining("\n"))
.split(";"))
.collect(Collectors.toList());
for (String stmt : statements) {
statement.execute(stmt);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}

@ -42,286 +42,325 @@ import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.testcontainers.containers.PostgreSQLContainer.POSTGRESQL_PORT;
/**
* Integration tests for MySQL binlog SQL source.
*/
/** Integration tests for MySQL binlog SQL source. */
public class PostgreSQLConnectorITCase extends PostgresTestBase {
private final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
private final StreamTableEnvironment tEnv = StreamTableEnvironment.create(
env,
EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
);
@Before
public void before() {
TestValuesTableFactory.clearAllData();
env.setParallelism(1);
}
@Test
public void testConsumingAllEvents() throws SQLException, ExecutionException, InterruptedException {
initializePostgresTable("inventory");
String sourceDDL = String.format(
"CREATE TABLE debezium_source (" +
" id INT NOT NULL," +
" name STRING," +
" description STRING," +
" weight DECIMAL(10,3)" +
") WITH (" +
" 'connector' = 'postgres-cdc'," +
" 'hostname' = '%s'," +
" 'port' = '%s'," +
" 'username' = '%s'," +
" 'password' = '%s'," +
" 'database-name' = '%s'," +
" 'schema-name' = '%s'," +
" 'table-name' = '%s'" +
")",
POSTGERS_CONTAINER.getHost(),
POSTGERS_CONTAINER.getMappedPort(POSTGRESQL_PORT),
POSTGERS_CONTAINER.getUsername(),
POSTGERS_CONTAINER.getPassword(),
POSTGERS_CONTAINER.getDatabaseName(),
"inventory",
"products");
String sinkDDL = "CREATE TABLE sink (" +
" name STRING," +
" weightSum DECIMAL(10,3)," +
" PRIMARY KEY (name) NOT ENFORCED" +
") WITH (" +
" 'connector' = 'values'," +
" 'sink-insert-only' = 'false'," +
" 'sink-expected-messages-num' = '20'" +
")";
tEnv.executeSql(sourceDDL);
tEnv.executeSql(sinkDDL);
// async submit job
TableResult result = tEnv.executeSql("INSERT INTO sink SELECT name, SUM(weight) FROM debezium_source GROUP BY name");
waitForSnapshotStarted("sink");
try (Connection connection = getJdbcConnection();
Statement statement = connection.createStatement()) {
statement.execute("UPDATE inventory.products SET description='18oz carpenter hammer' WHERE id=106;");
statement.execute("UPDATE inventory.products SET weight='5.1' WHERE id=107;");
statement.execute("INSERT INTO inventory.products VALUES (default,'jacket','water resistent white wind breaker',0.2);"); // 110
statement.execute("INSERT INTO inventory.products VALUES (default,'scooter','Big 2-wheel scooter ',5.18);");
statement.execute("UPDATE inventory.products SET description='new water resistent white wind breaker', weight='0.5' WHERE id=110;");
statement.execute("UPDATE inventory.products SET weight='5.17' WHERE id=111;");
statement.execute("DELETE FROM inventory.products WHERE id=111;");
}
waitForSinkSize("sink", 20);
// The final database table looks like this:
//
// > SELECT * FROM inventory.products;
// +-----+--------------------+---------------------------------------------------------+--------+
// | id | name | description | weight |
// +-----+--------------------+---------------------------------------------------------+--------+
// | 101 | scooter | Small 2-wheel scooter | 3.14 |
// | 102 | car battery | 12V car battery | 8.1 |
// | 103 | 12-pack drill bits | 12-pack of drill bits with sizes ranging from #40 to #3 | 0.8 |
// | 104 | hammer | 12oz carpenter's hammer | 0.75 |
// | 105 | hammer | 14oz carpenter's hammer | 0.875 |
// | 106 | hammer | 18oz carpenter hammer | 1 |
// | 107 | rocks | box of assorted rocks | 5.1 |
// | 108 | jacket | water resistent black wind breaker | 0.1 |
// | 109 | spare tire | 24 inch spare tire | 22.2 |
// | 110 | jacket | new water resistent white wind breaker | 0.5 |
// +-----+--------------------+---------------------------------------------------------+--------+
String[] expected = new String[]{
"scooter,3.140", "car battery,8.100", "12-pack drill bits,0.800",
"hammer,2.625", "rocks,5.100", "jacket,0.600", "spare tire,22.200"};
List<String> actual = TestValuesTableFactory.getResults("sink");
assertThat(actual, containsInAnyOrder(expected));
result.getJobClient().get().cancel().get();
}
@Test
public void testExceptionForReplicaIdentity() throws Exception {
initializePostgresTable("replica_identity");
String sourceDDL = String.format(
"CREATE TABLE debezium_source (" +
" id INT NOT NULL," +
" name STRING," +
" description STRING," +
" weight DECIMAL(10,3)" +
") WITH (" +
" 'connector' = 'postgres-cdc'," +
" 'hostname' = '%s'," +
" 'port' = '%s'," +
" 'username' = '%s'," +
" 'password' = '%s'," +
" 'database-name' = '%s'," +
" 'schema-name' = '%s'," +
" 'table-name' = '%s'," +
" 'debezium.slot.name' = '%s'" +
")",
POSTGERS_CONTAINER.getHost(),
POSTGERS_CONTAINER.getMappedPort(POSTGRESQL_PORT),
POSTGERS_CONTAINER.getUsername(),
POSTGERS_CONTAINER.getPassword(),
POSTGERS_CONTAINER.getDatabaseName(),
"inventory",
"products",
"replica_identity_slot");
String sinkDDL = "CREATE TABLE sink (" +
" name STRING," +
" weightSum DECIMAL(10,3)," +
" PRIMARY KEY (name) NOT ENFORCED" +
") WITH (" +
" 'connector' = 'values'," +
" 'sink-insert-only' = 'false'," +
" 'sink-expected-messages-num' = '20'" +
")";
tEnv.executeSql(sourceDDL);
tEnv.executeSql(sinkDDL);
// async submit job
TableResult result = tEnv.executeSql("INSERT INTO sink SELECT name, SUM(weight) FROM debezium_source GROUP BY name");
waitForSnapshotStarted("sink");
try (Connection connection = getJdbcConnection();
Statement statement = connection.createStatement()) {
statement.execute("UPDATE inventory.products SET description='18oz carpenter hammer' WHERE id=106;");
statement.execute("UPDATE inventory.products SET weight='5.1' WHERE id=107;");
statement.execute("INSERT INTO inventory.products VALUES (default,'jacket','water resistent white wind breaker',0.2);"); // 110
statement.execute("INSERT INTO inventory.products VALUES (default,'scooter','Big 2-wheel scooter ',5.18);");
statement.execute("UPDATE inventory.products SET description='new water resistent white wind breaker', weight='0.5' WHERE id=110;");
statement.execute("UPDATE inventory.products SET weight='5.17' WHERE id=111;");
statement.execute("DELETE FROM inventory.products WHERE id=111;");
}
try {
result.await();
} catch (Exception e) {
assertTrue(ExceptionUtils.findThrowableWithMessage(e,
"The \"before\" field of UPDATE/DELETE message is null, " +
"please check the Postgres table has been set REPLICA IDENTITY to FULL level.").isPresent());
}
}
@Test
public void testAllTypes() throws Throwable {
initializePostgresTable("column_type_test");
String sourceDDL = String.format(
"CREATE TABLE full_types (\n" +
" id INTEGER NOT NULL,\n" +
" bytea_c BYTES,\n" +
" small_c SMALLINT,\n" +
" int_c INTEGER,\n" +
" big_c BIGINT,\n" +
" real_c FLOAT,\n" +
" double_precision DOUBLE,\n" +
" numeric_c DECIMAL(10, 5),\n" +
" decimal_c DECIMAL(10, 1),\n" +
" boolean_c BOOLEAN,\n" +
" text_c STRING,\n" +
" char_c STRING,\n" +
" character_c STRING,\n" +
" character_varying_c STRING,\n" +
" timestamp3_c TIMESTAMP(3),\n" +
" timestamp6_c TIMESTAMP(6),\n" +
" date_c DATE,\n" +
" time_c TIME(0),\n" +
" default_numeric_c DECIMAL\n" +
") WITH (" +
" 'connector' = 'postgres-cdc'," +
" 'hostname' = '%s'," +
" 'port' = '%s'," +
" 'username' = '%s'," +
" 'password' = '%s'," +
" 'database-name' = '%s'," +
" 'schema-name' = '%s'," +
" 'table-name' = '%s'" +
")",
POSTGERS_CONTAINER.getHost(),
POSTGERS_CONTAINER.getMappedPort(POSTGRESQL_PORT),
POSTGERS_CONTAINER.getUsername(),
POSTGERS_CONTAINER.getPassword(),
POSTGERS_CONTAINER.getDatabaseName(),
"public",
"full_types");
String sinkDDL =
"CREATE TABLE sink (\n" +
" id INTEGER NOT NULL,\n" +
" bytea_c BYTES,\n" +
" small_c SMALLINT,\n" +
" int_c INTEGER,\n" +
" big_c BIGINT,\n" +
" real_c FLOAT,\n" +
" double_precision DOUBLE,\n" +
" numeric_c DECIMAL(10, 5),\n" +
" decimal_c DECIMAL(10, 1),\n" +
" boolean_c BOOLEAN,\n" +
" text_c STRING,\n" +
" char_c STRING,\n" +
" character_c STRING,\n" +
" character_varying_c STRING,\n" +
" timestamp3_c TIMESTAMP(3),\n" +
" timestamp6_c TIMESTAMP(6),\n" +
" date_c DATE,\n" +
" time_c TIME(0),\n" +
" default_numeric_c DECIMAL\n" +
") WITH (" +
" 'connector' = 'values'," +
" 'sink-insert-only' = 'false'" +
")";
tEnv.executeSql(sourceDDL);
tEnv.executeSql(sinkDDL);
// async submit job
TableResult result = tEnv.executeSql("INSERT INTO sink SELECT * FROM full_types");
waitForSnapshotStarted("sink");
try (Connection connection = getJdbcConnection();
Statement statement = connection.createStatement()) {
statement.execute("UPDATE full_types SET small_c=0 WHERE id=1;");
}
waitForSinkSize("sink", 3);
List<String> expected = Arrays.asList(
"+I(1,[50],32767,65535,2147483647,5.5,6.6,123.12345,404.4,true,Hello World,a,abc,abcd..xyz,2020-07-17T18:00:22.123,2020-07-17T18:00:22.123456,2020-07-17,18:00:22,500)",
"-U(1,[50],32767,65535,2147483647,5.5,6.6,123.12345,404.4,true,Hello World,a,abc,abcd..xyz,2020-07-17T18:00:22.123,2020-07-17T18:00:22.123456,2020-07-17,18:00:22,500)",
"+U(1,[50],0,65535,2147483647,5.5,6.6,123.12345,404.4,true,Hello World,a,abc,abcd..xyz,2020-07-17T18:00:22.123,2020-07-17T18:00:22.123456,2020-07-17,18:00:22,500)");
List<String> actual = TestValuesTableFactory.getRawResults("sink");
assertEquals(expected, actual);
result.getJobClient().get().cancel().get();
}
private static void waitForSnapshotStarted(String sinkName) throws InterruptedException {
while (sinkSize(sinkName) == 0) {
Thread.sleep(100);
}
}
private static void waitForSinkSize(String sinkName, int expectedSize) throws InterruptedException {
while (sinkSize(sinkName) < expectedSize) {
Thread.sleep(100);
}
}
private static int sinkSize(String sinkName) {
synchronized (TestValuesTableFactory.class) {
try {
return TestValuesTableFactory.getRawResults(sinkName).size();
} catch (IllegalArgumentException e) {
// job is not started yet
return 0;
}
}
}
private final StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
private final StreamTableEnvironment tEnv =
StreamTableEnvironment.create(
env,
EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build());
@Before
public void before() {
TestValuesTableFactory.clearAllData();
env.setParallelism(1);
}
@Test
public void testConsumingAllEvents()
throws SQLException, ExecutionException, InterruptedException {
initializePostgresTable("inventory");
String sourceDDL =
String.format(
"CREATE TABLE debezium_source ("
+ " id INT NOT NULL,"
+ " name STRING,"
+ " description STRING,"
+ " weight DECIMAL(10,3)"
+ ") WITH ("
+ " 'connector' = 'postgres-cdc',"
+ " 'hostname' = '%s',"
+ " 'port' = '%s',"
+ " 'username' = '%s',"
+ " 'password' = '%s',"
+ " 'database-name' = '%s',"
+ " 'schema-name' = '%s',"
+ " 'table-name' = '%s'"
+ ")",
POSTGERS_CONTAINER.getHost(),
POSTGERS_CONTAINER.getMappedPort(POSTGRESQL_PORT),
POSTGERS_CONTAINER.getUsername(),
POSTGERS_CONTAINER.getPassword(),
POSTGERS_CONTAINER.getDatabaseName(),
"inventory",
"products");
String sinkDDL =
"CREATE TABLE sink ("
+ " name STRING,"
+ " weightSum DECIMAL(10,3),"
+ " PRIMARY KEY (name) NOT ENFORCED"
+ ") WITH ("
+ " 'connector' = 'values',"
+ " 'sink-insert-only' = 'false',"
+ " 'sink-expected-messages-num' = '20'"
+ ")";
tEnv.executeSql(sourceDDL);
tEnv.executeSql(sinkDDL);
// async submit job
TableResult result =
tEnv.executeSql(
"INSERT INTO sink SELECT name, SUM(weight) FROM debezium_source GROUP BY name");
waitForSnapshotStarted("sink");
try (Connection connection = getJdbcConnection();
Statement statement = connection.createStatement()) {
statement.execute(
"UPDATE inventory.products SET description='18oz carpenter hammer' WHERE id=106;");
statement.execute("UPDATE inventory.products SET weight='5.1' WHERE id=107;");
statement.execute(
"INSERT INTO inventory.products VALUES (default,'jacket','water resistent white wind breaker',0.2);"); // 110
statement.execute(
"INSERT INTO inventory.products VALUES (default,'scooter','Big 2-wheel scooter ',5.18);");
statement.execute(
"UPDATE inventory.products SET description='new water resistent white wind breaker', weight='0.5' WHERE id=110;");
statement.execute("UPDATE inventory.products SET weight='5.17' WHERE id=111;");
statement.execute("DELETE FROM inventory.products WHERE id=111;");
}
waitForSinkSize("sink", 20);
// The final database table looks like this:
//
// > SELECT * FROM inventory.products;
// +-----+--------------------+---------------------------------------------------------+--------+
// | id | name | description |
// weight |
// +-----+--------------------+---------------------------------------------------------+--------+
// | 101 | scooter | Small 2-wheel scooter |
// 3.14 |
// | 102 | car battery | 12V car battery |
// 8.1 |
// | 103 | 12-pack drill bits | 12-pack of drill bits with sizes ranging from #40 to #3 |
// 0.8 |
// | 104 | hammer | 12oz carpenter's hammer |
// 0.75 |
// | 105 | hammer | 14oz carpenter's hammer |
// 0.875 |
// | 106 | hammer | 18oz carpenter hammer |
// 1 |
// | 107 | rocks | box of assorted rocks |
// 5.1 |
// | 108 | jacket | water resistent black wind breaker |
// 0.1 |
// | 109 | spare tire | 24 inch spare tire |
// 22.2 |
// | 110 | jacket | new water resistent white wind breaker |
// 0.5 |
// +-----+--------------------+---------------------------------------------------------+--------+
String[] expected =
new String[] {
"scooter,3.140",
"car battery,8.100",
"12-pack drill bits,0.800",
"hammer,2.625",
"rocks,5.100",
"jacket,0.600",
"spare tire,22.200"
};
List<String> actual = TestValuesTableFactory.getResults("sink");
assertThat(actual, containsInAnyOrder(expected));
result.getJobClient().get().cancel().get();
}
@Test
public void testExceptionForReplicaIdentity() throws Exception {
initializePostgresTable("replica_identity");
String sourceDDL =
String.format(
"CREATE TABLE debezium_source ("
+ " id INT NOT NULL,"
+ " name STRING,"
+ " description STRING,"
+ " weight DECIMAL(10,3)"
+ ") WITH ("
+ " 'connector' = 'postgres-cdc',"
+ " 'hostname' = '%s',"
+ " 'port' = '%s',"
+ " 'username' = '%s',"
+ " 'password' = '%s',"
+ " 'database-name' = '%s',"
+ " 'schema-name' = '%s',"
+ " 'table-name' = '%s',"
+ " 'debezium.slot.name' = '%s'"
+ ")",
POSTGERS_CONTAINER.getHost(),
POSTGERS_CONTAINER.getMappedPort(POSTGRESQL_PORT),
POSTGERS_CONTAINER.getUsername(),
POSTGERS_CONTAINER.getPassword(),
POSTGERS_CONTAINER.getDatabaseName(),
"inventory",
"products",
"replica_identity_slot");
String sinkDDL =
"CREATE TABLE sink ("
+ " name STRING,"
+ " weightSum DECIMAL(10,3),"
+ " PRIMARY KEY (name) NOT ENFORCED"
+ ") WITH ("
+ " 'connector' = 'values',"
+ " 'sink-insert-only' = 'false',"
+ " 'sink-expected-messages-num' = '20'"
+ ")";
tEnv.executeSql(sourceDDL);
tEnv.executeSql(sinkDDL);
// async submit job
TableResult result =
tEnv.executeSql(
"INSERT INTO sink SELECT name, SUM(weight) FROM debezium_source GROUP BY name");
waitForSnapshotStarted("sink");
try (Connection connection = getJdbcConnection();
Statement statement = connection.createStatement()) {
statement.execute(
"UPDATE inventory.products SET description='18oz carpenter hammer' WHERE id=106;");
statement.execute("UPDATE inventory.products SET weight='5.1' WHERE id=107;");
statement.execute(
"INSERT INTO inventory.products VALUES (default,'jacket','water resistent white wind breaker',0.2);"); // 110
statement.execute(
"INSERT INTO inventory.products VALUES (default,'scooter','Big 2-wheel scooter ',5.18);");
statement.execute(
"UPDATE inventory.products SET description='new water resistent white wind breaker', weight='0.5' WHERE id=110;");
statement.execute("UPDATE inventory.products SET weight='5.17' WHERE id=111;");
statement.execute("DELETE FROM inventory.products WHERE id=111;");
}
try {
result.await();
} catch (Exception e) {
assertTrue(
ExceptionUtils.findThrowableWithMessage(
e,
"The \"before\" field of UPDATE/DELETE message is null, "
+ "please check the Postgres table has been set REPLICA IDENTITY to FULL level.")
.isPresent());
}
}
@Test
public void testAllTypes() throws Throwable {
initializePostgresTable("column_type_test");
String sourceDDL =
String.format(
"CREATE TABLE full_types (\n"
+ " id INTEGER NOT NULL,\n"
+ " bytea_c BYTES,\n"
+ " small_c SMALLINT,\n"
+ " int_c INTEGER,\n"
+ " big_c BIGINT,\n"
+ " real_c FLOAT,\n"
+ " double_precision DOUBLE,\n"
+ " numeric_c DECIMAL(10, 5),\n"
+ " decimal_c DECIMAL(10, 1),\n"
+ " boolean_c BOOLEAN,\n"
+ " text_c STRING,\n"
+ " char_c STRING,\n"
+ " character_c STRING,\n"
+ " character_varying_c STRING,\n"
+ " timestamp3_c TIMESTAMP(3),\n"
+ " timestamp6_c TIMESTAMP(6),\n"
+ " date_c DATE,\n"
+ " time_c TIME(0),\n"
+ " default_numeric_c DECIMAL\n"
+ ") WITH ("
+ " 'connector' = 'postgres-cdc',"
+ " 'hostname' = '%s',"
+ " 'port' = '%s',"
+ " 'username' = '%s',"
+ " 'password' = '%s',"
+ " 'database-name' = '%s',"
+ " 'schema-name' = '%s',"
+ " 'table-name' = '%s'"
+ ")",
POSTGERS_CONTAINER.getHost(),
POSTGERS_CONTAINER.getMappedPort(POSTGRESQL_PORT),
POSTGERS_CONTAINER.getUsername(),
POSTGERS_CONTAINER.getPassword(),
POSTGERS_CONTAINER.getDatabaseName(),
"public",
"full_types");
String sinkDDL =
"CREATE TABLE sink (\n"
+ " id INTEGER NOT NULL,\n"
+ " bytea_c BYTES,\n"
+ " small_c SMALLINT,\n"
+ " int_c INTEGER,\n"
+ " big_c BIGINT,\n"
+ " real_c FLOAT,\n"
+ " double_precision DOUBLE,\n"
+ " numeric_c DECIMAL(10, 5),\n"
+ " decimal_c DECIMAL(10, 1),\n"
+ " boolean_c BOOLEAN,\n"
+ " text_c STRING,\n"
+ " char_c STRING,\n"
+ " character_c STRING,\n"
+ " character_varying_c STRING,\n"
+ " timestamp3_c TIMESTAMP(3),\n"
+ " timestamp6_c TIMESTAMP(6),\n"
+ " date_c DATE,\n"
+ " time_c TIME(0),\n"
+ " default_numeric_c DECIMAL\n"
+ ") WITH ("
+ " 'connector' = 'values',"
+ " 'sink-insert-only' = 'false'"
+ ")";
tEnv.executeSql(sourceDDL);
tEnv.executeSql(sinkDDL);
// async submit job
TableResult result = tEnv.executeSql("INSERT INTO sink SELECT * FROM full_types");
waitForSnapshotStarted("sink");
try (Connection connection = getJdbcConnection();
Statement statement = connection.createStatement()) {
statement.execute("UPDATE full_types SET small_c=0 WHERE id=1;");
}
waitForSinkSize("sink", 3);
List<String> expected =
Arrays.asList(
"+I(1,[50],32767,65535,2147483647,5.5,6.6,123.12345,404.4,true,Hello World,a,abc,abcd..xyz,2020-07-17T18:00:22.123,2020-07-17T18:00:22.123456,2020-07-17,18:00:22,500)",
"-U(1,[50],32767,65535,2147483647,5.5,6.6,123.12345,404.4,true,Hello World,a,abc,abcd..xyz,2020-07-17T18:00:22.123,2020-07-17T18:00:22.123456,2020-07-17,18:00:22,500)",
"+U(1,[50],0,65535,2147483647,5.5,6.6,123.12345,404.4,true,Hello World,a,abc,abcd..xyz,2020-07-17T18:00:22.123,2020-07-17T18:00:22.123456,2020-07-17,18:00:22,500)");
List<String> actual = TestValuesTableFactory.getRawResults("sink");
assertEquals(expected, actual);
result.getJobClient().get().cancel().get();
}
private static void waitForSnapshotStarted(String sinkName) throws InterruptedException {
while (sinkSize(sinkName) == 0) {
Thread.sleep(100);
}
}
private static void waitForSinkSize(String sinkName, int expectedSize)
throws InterruptedException {
while (sinkSize(sinkName) < expectedSize) {
Thread.sleep(100);
}
}
private static int sinkSize(String sinkName) {
synchronized (TestValuesTableFactory.class) {
try {
return TestValuesTableFactory.getRawResults(sinkName).size();
} catch (IllegalArgumentException e) {
// job is not started yet
return 0;
}
}
}
}

@ -40,134 +40,141 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/**
* Test for {@link PostgreSQLTableSource} created by {@link PostgreSQLTableFactory}.
*/
/** Test for {@link PostgreSQLTableSource} created by {@link PostgreSQLTableFactory}. */
public class PostgreSQLTableFactoryTest {
private static final TableSchema SCHEMA = TableSchema.builder()
.field("aaa", DataTypes.INT().notNull())
.field("bbb", DataTypes.STRING().notNull())
.field("ccc", DataTypes.DOUBLE())
.field("ddd", DataTypes.DECIMAL(31, 18))
.field("eee", DataTypes.TIMESTAMP(3))
.primaryKey("bbb", "aaa")
.build();
private static final String MY_LOCALHOST = "localhost";
private static final String MY_USERNAME = "flinkuser";
private static final String MY_PASSWORD = "flinkpw";
private static final String MY_DATABASE = "myDB";
private static final String MY_TABLE = "myTable";
private static final String MY_SCHEMA = "public";
private static final Properties PROPERTIES = new Properties();
@Test
public void testCommonProperties() {
Map<String, String> properties = getAllOptions();
// validation for source
DynamicTableSource actualSource = createTableSource(properties);
PostgreSQLTableSource expectedSource = new PostgreSQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
5432,
MY_LOCALHOST,
MY_DATABASE,
MY_SCHEMA,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
"decoderbufs",
"flink",
PROPERTIES);
assertEquals(expectedSource, actualSource);
}
@Test
public void testOptionalProperties() {
Map<String, String> options = getAllOptions();
options.put("port", "5444");
options.put("decoding.plugin.name", "wal2json");
options.put("debezium.snapshot.mode", "never");
DynamicTableSource actualSource = createTableSource(options);
Properties dbzProperties = new Properties();
dbzProperties.put("snapshot.mode", "never");
PostgreSQLTableSource expectedSource = new PostgreSQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
5444,
MY_LOCALHOST,
MY_DATABASE,
MY_SCHEMA,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
"wal2json",
"flink",
dbzProperties);
assertEquals(expectedSource, actualSource);
}
@Test
public void testValidation() {
// validate illegal port
try {
Map<String, String> properties = getAllOptions();
properties.put("port", "123b");
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(ExceptionUtils.findThrowableWithMessage(t,
"Could not parse value '123b' for key 'port'.").isPresent());
}
// validate missing required
Factory factory = new PostgreSQLTableFactory();
for (ConfigOption<?> requiredOption : factory.requiredOptions()) {
Map<String, String> properties = getAllOptions();
properties.remove(requiredOption.key());
try {
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(ExceptionUtils.findThrowableWithMessage(t,
"Missing required options are:\n\n" + requiredOption.key()).isPresent());
}
}
// validate unsupported option
try {
Map<String, String> properties = getAllOptions();
properties.put("unknown", "abc");
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(ExceptionUtils.findThrowableWithMessage(t,
"Unsupported options:\n\nunknown").isPresent());
}
}
private Map<String, String> getAllOptions() {
Map<String, String> options = new HashMap<>();
options.put("connector", "postgres-cdc");
options.put("hostname", MY_LOCALHOST);
options.put("database-name", MY_DATABASE);
options.put("schema-name", MY_SCHEMA);
options.put("table-name", MY_TABLE);
options.put("username", MY_USERNAME);
options.put("password", MY_PASSWORD);
return options;
}
private static DynamicTableSource createTableSource(Map<String, String> options) {
return FactoryUtil.createTableSource(
null,
ObjectIdentifier.of("default", "default", "t1"),
new CatalogTableImpl(SCHEMA, options, "mock source"),
new Configuration(),
PostgreSQLTableFactoryTest.class.getClassLoader(),
false);
}
private static final TableSchema SCHEMA =
TableSchema.builder()
.field("aaa", DataTypes.INT().notNull())
.field("bbb", DataTypes.STRING().notNull())
.field("ccc", DataTypes.DOUBLE())
.field("ddd", DataTypes.DECIMAL(31, 18))
.field("eee", DataTypes.TIMESTAMP(3))
.primaryKey("bbb", "aaa")
.build();
private static final String MY_LOCALHOST = "localhost";
private static final String MY_USERNAME = "flinkuser";
private static final String MY_PASSWORD = "flinkpw";
private static final String MY_DATABASE = "myDB";
private static final String MY_TABLE = "myTable";
private static final String MY_SCHEMA = "public";
private static final Properties PROPERTIES = new Properties();
@Test
public void testCommonProperties() {
Map<String, String> properties = getAllOptions();
// validation for source
DynamicTableSource actualSource = createTableSource(properties);
PostgreSQLTableSource expectedSource =
new PostgreSQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
5432,
MY_LOCALHOST,
MY_DATABASE,
MY_SCHEMA,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
"decoderbufs",
"flink",
PROPERTIES);
assertEquals(expectedSource, actualSource);
}
@Test
public void testOptionalProperties() {
Map<String, String> options = getAllOptions();
options.put("port", "5444");
options.put("decoding.plugin.name", "wal2json");
options.put("debezium.snapshot.mode", "never");
DynamicTableSource actualSource = createTableSource(options);
Properties dbzProperties = new Properties();
dbzProperties.put("snapshot.mode", "never");
PostgreSQLTableSource expectedSource =
new PostgreSQLTableSource(
TableSchemaUtils.getPhysicalSchema(SCHEMA),
5444,
MY_LOCALHOST,
MY_DATABASE,
MY_SCHEMA,
MY_TABLE,
MY_USERNAME,
MY_PASSWORD,
"wal2json",
"flink",
dbzProperties);
assertEquals(expectedSource, actualSource);
}
@Test
public void testValidation() {
// validate illegal port
try {
Map<String, String> properties = getAllOptions();
properties.put("port", "123b");
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(
ExceptionUtils.findThrowableWithMessage(
t, "Could not parse value '123b' for key 'port'.")
.isPresent());
}
// validate missing required
Factory factory = new PostgreSQLTableFactory();
for (ConfigOption<?> requiredOption : factory.requiredOptions()) {
Map<String, String> properties = getAllOptions();
properties.remove(requiredOption.key());
try {
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(
ExceptionUtils.findThrowableWithMessage(
t,
"Missing required options are:\n\n" + requiredOption.key())
.isPresent());
}
}
// validate unsupported option
try {
Map<String, String> properties = getAllOptions();
properties.put("unknown", "abc");
createTableSource(properties);
fail("exception expected");
} catch (Throwable t) {
assertTrue(
ExceptionUtils.findThrowableWithMessage(t, "Unsupported options:\n\nunknown")
.isPresent());
}
}
private Map<String, String> getAllOptions() {
Map<String, String> options = new HashMap<>();
options.put("connector", "postgres-cdc");
options.put("hostname", MY_LOCALHOST);
options.put("database-name", MY_DATABASE);
options.put("schema-name", MY_SCHEMA);
options.put("table-name", MY_TABLE);
options.put("username", MY_USERNAME);
options.put("password", MY_PASSWORD);
return options;
}
private static DynamicTableSource createTableSource(Map<String, String> options) {
return FactoryUtil.createTableSource(
null,
ObjectIdentifier.of("default", "default", "t1"),
new CatalogTableImpl(SCHEMA, options, "mock source"),
new Configuration(),
PostgreSQLTableFactoryTest.class.getClassLoader(),
false);
}
}

@ -26,194 +26,198 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
/**
* Utilities for asserting {@link SourceRecord}.
*/
/** Utilities for asserting {@link SourceRecord}. */
public class AssertUtils {
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#CREATE INSERT/CREATE} record.
*
* @param record the source record; may not be null
*/
public static void assertInsert(SourceRecord record, boolean keyExpected) {
if (keyExpected) {
assertNotNull(record.key());
assertNotNull(record.keySchema());
}
else {
assertNull(record.key());
assertNull(record.keySchema());
}
assertNotNull(record.valueSchema());
Struct value = (Struct) record.value();
assertNotNull(value);
assertEquals(Envelope.Operation.CREATE.code(), value.getString(Envelope.FieldName.OPERATION));
assertNotNull(value.get(Envelope.FieldName.AFTER));
assertNull(value.get(Envelope.FieldName.BEFORE));
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#READ READ} record.
*
* @param record the source record; may not be null
*/
public static void assertRead(SourceRecord record) {
assertNotNull(record.key());
assertNotNull(record.keySchema());
assertNotNull(record.valueSchema());
Struct value = (Struct) record.value();
assertNotNull(value);
assertEquals(Envelope.Operation.READ.code(), value.getString(Envelope.FieldName.OPERATION));
assertNotNull(value.get(Envelope.FieldName.AFTER));
assertNull(value.get(Envelope.FieldName.BEFORE));
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#UPDATE UPDATE} record.
*
* @param record the source record; may not be null
*/
public static void assertUpdate(SourceRecord record, boolean keyExpected) {
if (keyExpected) {
assertNotNull(record.key());
assertNotNull(record.keySchema());
}
else {
assertNull(record.key());
assertNull(record.keySchema());
}
assertNotNull(record.valueSchema());
Struct value = (Struct) record.value();
assertNotNull(value);
assertEquals(Envelope.Operation.UPDATE.code(), value.getString(Envelope.FieldName.OPERATION));
assertNotNull(value.get(Envelope.FieldName.AFTER));
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#DELETE DELETE} record.
*
* @param record the source record; may not be null
*/
public static void assertDelete(SourceRecord record, boolean keyExpected) {
if (keyExpected) {
assertNotNull(record.key());
assertNotNull(record.keySchema());
}
else {
assertNull(record.key());
assertNull(record.keySchema());
}
assertNotNull(record.valueSchema());
Struct value = (Struct) record.value();
assertNotNull(value);
assertEquals(Envelope.Operation.DELETE.code(), value.getString(Envelope.FieldName.OPERATION));
assertNotNull(value.get(Envelope.FieldName.BEFORE));
assertNull(value.get(Envelope.FieldName.AFTER));
}
/**
* Verify that the given {@link SourceRecord} is a valid tombstone, meaning it has a non-null key and key schema but null
* value and value schema.
*
* @param record the source record; may not be null
*/
public static void assertTombstone(SourceRecord record) {
assertNotNull(record.key());
assertNotNull(record.keySchema());
assertNull(record.value());
assertNull(record.valueSchema());
}
/**
* Verify that the given {@link SourceRecord} has a valid non-null integer key that matches the expected integer value.
*
* @param record the source record; may not be null
* @param pkField the single field defining the primary key of the struct; may not be null
* @param pk the expected integer value of the primary key in the struct
*/
public static void hasValidKey(SourceRecord record, String pkField, int pk) {
Struct key = (Struct) record.key();
assertEquals(pk, key.get(pkField));
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#CREATE INSERT/CREATE} record without primary key.
*
* @param record the source record; may not be null
*/
public static void assertInsert(SourceRecord record) {
assertInsert(record, false);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#CREATE INSERT/CREATE} record, and that the integer key
* matches the expected value.
*
* @param record the source record; may not be null
* @param pkField the single field defining the primary key of the struct; may not be null
* @param pk the expected integer value of the primary key in the struct
*/
public static void assertInsert(SourceRecord record, String pkField, int pk) {
hasValidKey(record, pkField, pk);
assertInsert(record, true);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#CREATE READ} record, and that the integer key
* matches the expected value.
*
* @param record the source record; may not be null
* @param pkField the single field defining the primary key of the struct; may not be null
* @param pk the expected integer value of the primary key in the struct
*/
public static void assertRead(SourceRecord record, String pkField, int pk) {
hasValidKey(record, pkField, pk);
assertRead(record);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#UPDATE UPDATE} record without PK.
*
* @param record the source record; may not be null
*/
public static void assertUpdate(SourceRecord record) {
assertUpdate(record, false);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#UPDATE UPDATE} record, and that the integer key
* matches the expected value.
*
* @param record the source record; may not be null
* @param pkField the single field defining the primary key of the struct; may not be null
* @param pk the expected integer value of the primary key in the struct
*/
public static void assertUpdate(SourceRecord record, String pkField, int pk) {
hasValidKey(record, pkField, pk);
assertUpdate(record, true);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#DELETE DELETE} record without PK.
* matches the expected value.
*
* @param record the source record; may not be null
*/
public static void assertDelete(SourceRecord record) {
assertDelete(record, false);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#DELETE DELETE} record, and that the integer key
* matches the expected value.
*
* @param record the source record; may not be null
* @param pkField the single field defining the primary key of the struct; may not be null
* @param pk the expected integer value of the primary key in the struct
*/
public static void assertDelete(SourceRecord record, String pkField, int pk) {
hasValidKey(record, pkField, pk);
assertDelete(record, true);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#CREATE
* INSERT/CREATE} record.
*
* @param record the source record; may not be null
*/
public static void assertInsert(SourceRecord record, boolean keyExpected) {
if (keyExpected) {
assertNotNull(record.key());
assertNotNull(record.keySchema());
} else {
assertNull(record.key());
assertNull(record.keySchema());
}
assertNotNull(record.valueSchema());
Struct value = (Struct) record.value();
assertNotNull(value);
assertEquals(
Envelope.Operation.CREATE.code(), value.getString(Envelope.FieldName.OPERATION));
assertNotNull(value.get(Envelope.FieldName.AFTER));
assertNull(value.get(Envelope.FieldName.BEFORE));
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#READ READ} record.
*
* @param record the source record; may not be null
*/
public static void assertRead(SourceRecord record) {
assertNotNull(record.key());
assertNotNull(record.keySchema());
assertNotNull(record.valueSchema());
Struct value = (Struct) record.value();
assertNotNull(value);
assertEquals(Envelope.Operation.READ.code(), value.getString(Envelope.FieldName.OPERATION));
assertNotNull(value.get(Envelope.FieldName.AFTER));
assertNull(value.get(Envelope.FieldName.BEFORE));
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#UPDATE UPDATE}
* record.
*
* @param record the source record; may not be null
*/
public static void assertUpdate(SourceRecord record, boolean keyExpected) {
if (keyExpected) {
assertNotNull(record.key());
assertNotNull(record.keySchema());
} else {
assertNull(record.key());
assertNull(record.keySchema());
}
assertNotNull(record.valueSchema());
Struct value = (Struct) record.value();
assertNotNull(value);
assertEquals(
Envelope.Operation.UPDATE.code(), value.getString(Envelope.FieldName.OPERATION));
assertNotNull(value.get(Envelope.FieldName.AFTER));
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#DELETE DELETE}
* record.
*
* @param record the source record; may not be null
*/
public static void assertDelete(SourceRecord record, boolean keyExpected) {
if (keyExpected) {
assertNotNull(record.key());
assertNotNull(record.keySchema());
} else {
assertNull(record.key());
assertNull(record.keySchema());
}
assertNotNull(record.valueSchema());
Struct value = (Struct) record.value();
assertNotNull(value);
assertEquals(
Envelope.Operation.DELETE.code(), value.getString(Envelope.FieldName.OPERATION));
assertNotNull(value.get(Envelope.FieldName.BEFORE));
assertNull(value.get(Envelope.FieldName.AFTER));
}
/**
* Verify that the given {@link SourceRecord} is a valid tombstone, meaning it has a non-null
* key and key schema but null value and value schema.
*
* @param record the source record; may not be null
*/
public static void assertTombstone(SourceRecord record) {
assertNotNull(record.key());
assertNotNull(record.keySchema());
assertNull(record.value());
assertNull(record.valueSchema());
}
/**
* Verify that the given {@link SourceRecord} has a valid non-null integer key that matches the
* expected integer value.
*
* @param record the source record; may not be null
* @param pkField the single field defining the primary key of the struct; may not be null
* @param pk the expected integer value of the primary key in the struct
*/
public static void hasValidKey(SourceRecord record, String pkField, int pk) {
Struct key = (Struct) record.key();
assertEquals(pk, key.get(pkField));
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#CREATE
* INSERT/CREATE} record without primary key.
*
* @param record the source record; may not be null
*/
public static void assertInsert(SourceRecord record) {
assertInsert(record, false);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#CREATE
* INSERT/CREATE} record, and that the integer key matches the expected value.
*
* @param record the source record; may not be null
* @param pkField the single field defining the primary key of the struct; may not be null
* @param pk the expected integer value of the primary key in the struct
*/
public static void assertInsert(SourceRecord record, String pkField, int pk) {
hasValidKey(record, pkField, pk);
assertInsert(record, true);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#CREATE READ}
* record, and that the integer key matches the expected value.
*
* @param record the source record; may not be null
* @param pkField the single field defining the primary key of the struct; may not be null
* @param pk the expected integer value of the primary key in the struct
*/
public static void assertRead(SourceRecord record, String pkField, int pk) {
hasValidKey(record, pkField, pk);
assertRead(record);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#UPDATE UPDATE}
* record without PK.
*
* @param record the source record; may not be null
*/
public static void assertUpdate(SourceRecord record) {
assertUpdate(record, false);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#UPDATE UPDATE}
* record, and that the integer key matches the expected value.
*
* @param record the source record; may not be null
* @param pkField the single field defining the primary key of the struct; may not be null
* @param pk the expected integer value of the primary key in the struct
*/
public static void assertUpdate(SourceRecord record, String pkField, int pk) {
hasValidKey(record, pkField, pk);
assertUpdate(record, true);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#DELETE DELETE}
* record without PK. matches the expected value.
*
* @param record the source record; may not be null
*/
public static void assertDelete(SourceRecord record) {
assertDelete(record, false);
}
/**
* Verify that the given {@link SourceRecord} is a {@link Envelope.Operation#DELETE DELETE}
* record, and that the integer key matches the expected value.
*
* @param record the source record; may not be null
* @param pkField the single field defining the primary key of the struct; may not be null
* @param pk the expected integer value of the primary key in the struct
*/
public static void assertDelete(SourceRecord record, String pkField, int pk) {
hasValidKey(record, pkField, pk);
assertDelete(record, true);
}
}

@ -23,46 +23,44 @@ import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
import java.util.concurrent.LinkedBlockingQueue;
/**
* A testable {@link SourceFunction.SourceContext}.
*/
/** A testable {@link SourceFunction.SourceContext}. */
public class TestSourceContext<T> implements SourceFunction.SourceContext<T> {
private final Object checkpointLock = new Object();
private final Object checkpointLock = new Object();
private LinkedBlockingQueue<StreamRecord<T>> collectedOutputs = new LinkedBlockingQueue<>();
private LinkedBlockingQueue<StreamRecord<T>> collectedOutputs = new LinkedBlockingQueue<>();
@Override
public void collect(T element) {
this.collectedOutputs.add(new StreamRecord<>(element));
}
@Override
public void collect(T element) {
this.collectedOutputs.add(new StreamRecord<>(element));
}
@Override
public void collectWithTimestamp(T element, long timestamp) {
this.collectedOutputs.offer(new StreamRecord<>(element, timestamp));
}
@Override
public void collectWithTimestamp(T element, long timestamp) {
this.collectedOutputs.offer(new StreamRecord<>(element, timestamp));
}
@Override
public void emitWatermark(Watermark mark) {
throw new UnsupportedOperationException();
}
@Override
public void emitWatermark(Watermark mark) {
throw new UnsupportedOperationException();
}
@Override
public void markAsTemporarilyIdle() {}
@Override
public void markAsTemporarilyIdle() {}
@Override
public Object getCheckpointLock() {
return checkpointLock;
}
@Override
public Object getCheckpointLock() {
return checkpointLock;
}
@Override
public void close() {}
@Override
public void close() {}
public StreamRecord<T> removeLatestOutput() {
return collectedOutputs.poll();
}
public StreamRecord<T> removeLatestOutput() {
return collectedOutputs.poll();
}
public LinkedBlockingQueue<StreamRecord<T>> getCollectedOutputs() {
return collectedOutputs;
}
public LinkedBlockingQueue<StreamRecord<T>> getCollectedOutputs() {
return collectedOutputs;
}
}

@ -36,110 +36,115 @@ import java.util.Objects;
import static java.lang.String.format;
import static org.apache.flink.table.types.utils.TypeConversions.fromLogicalToDataType;
/**
* Deserialization schema from Changelog Json to Flink Table/SQL internal data structure {@link RowData}.
* Deserialization schema from Changelog Json to Flink Table/SQL internal data structure {@link
* RowData}.
*/
public class ChangelogJsonDeserializationSchema implements DeserializationSchema<RowData> {
private static final long serialVersionUID = -2084214292622004460L;
/** The deserializer to deserialize Debezium JSON data. */
private final JsonRowDataDeserializationSchema jsonDeserializer;
/** TypeInformation of the produced {@link RowData}. **/
private final TypeInformation<RowData> resultTypeInfo;
/** Flag indicating whether to ignore invalid fields/rows (default: throw an exception). */
private final boolean ignoreParseErrors;
public ChangelogJsonDeserializationSchema(
RowType rowType,
TypeInformation<RowData> resultTypeInfo,
boolean ignoreParseErrors,
TimestampFormat timestampFormatOption) {
this.resultTypeInfo = resultTypeInfo;
this.ignoreParseErrors = ignoreParseErrors;
this.jsonDeserializer = new JsonRowDataDeserializationSchema(
createJsonRowType(fromLogicalToDataType(rowType)),
// the result type is never used, so it's fine to pass in Debezium's result type
resultTypeInfo,
false, // ignoreParseErrors already contains the functionality of failOnMissingField
ignoreParseErrors,
timestampFormatOption);
}
@Override
public RowData deserialize(byte[] message) throws IOException {
throw new RuntimeException(
"Please invoke DeserializationSchema#deserialize(byte[], Collector<RowData>) instead.");
}
@Override
public void deserialize(byte[] bytes, Collector<RowData> out) throws IOException {
try {
GenericRowData row = (GenericRowData) jsonDeserializer.deserialize(bytes);
GenericRowData data = (GenericRowData) row.getField(0);
String op = row.getString(1).toString();
RowKind rowKind = parseRowKind(op);
data.setRowKind(rowKind);
out.collect(data);
} catch (Throwable t) {
// a big try catch to protect the processing.
if (!ignoreParseErrors) {
throw new IOException(format(
"Corrupt Debezium JSON message '%s'.", new String(bytes)), t);
}
}
}
private static RowKind parseRowKind(String op) {
switch (op) {
case "+I":
return RowKind.INSERT;
case "-U":
return RowKind.UPDATE_BEFORE;
case "+U":
return RowKind.UPDATE_AFTER;
case "-D":
return RowKind.DELETE;
default:
throw new UnsupportedOperationException("Unsupported operation '" + op + "' for row kind.");
}
}
@Override
public boolean isEndOfStream(RowData rowData) {
return false;
}
@Override
public TypeInformation<RowData> getProducedType() {
return resultTypeInfo;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ChangelogJsonDeserializationSchema that = (ChangelogJsonDeserializationSchema) o;
return ignoreParseErrors == that.ignoreParseErrors &&
Objects.equals(jsonDeserializer, that.jsonDeserializer) &&
Objects.equals(resultTypeInfo, that.resultTypeInfo);
}
@Override
public int hashCode() {
return Objects.hash(jsonDeserializer, resultTypeInfo, ignoreParseErrors);
}
private static RowType createJsonRowType(DataType databaseSchema) {
DataType payload = DataTypes.ROW(
DataTypes.FIELD("data", databaseSchema),
DataTypes.FIELD("op", DataTypes.STRING()));
return (RowType) payload.getLogicalType();
}
private static final long serialVersionUID = -2084214292622004460L;
/** The deserializer to deserialize Debezium JSON data. */
private final JsonRowDataDeserializationSchema jsonDeserializer;
/** TypeInformation of the produced {@link RowData}. * */
private final TypeInformation<RowData> resultTypeInfo;
/** Flag indicating whether to ignore invalid fields/rows (default: throw an exception). */
private final boolean ignoreParseErrors;
public ChangelogJsonDeserializationSchema(
RowType rowType,
TypeInformation<RowData> resultTypeInfo,
boolean ignoreParseErrors,
TimestampFormat timestampFormatOption) {
this.resultTypeInfo = resultTypeInfo;
this.ignoreParseErrors = ignoreParseErrors;
this.jsonDeserializer =
new JsonRowDataDeserializationSchema(
createJsonRowType(fromLogicalToDataType(rowType)),
// the result type is never used, so it's fine to pass in Debezium's result
// type
resultTypeInfo,
false, // ignoreParseErrors already contains the functionality of
// failOnMissingField
ignoreParseErrors,
timestampFormatOption);
}
@Override
public RowData deserialize(byte[] message) throws IOException {
throw new RuntimeException(
"Please invoke DeserializationSchema#deserialize(byte[], Collector<RowData>) instead.");
}
@Override
public void deserialize(byte[] bytes, Collector<RowData> out) throws IOException {
try {
GenericRowData row = (GenericRowData) jsonDeserializer.deserialize(bytes);
GenericRowData data = (GenericRowData) row.getField(0);
String op = row.getString(1).toString();
RowKind rowKind = parseRowKind(op);
data.setRowKind(rowKind);
out.collect(data);
} catch (Throwable t) {
// a big try catch to protect the processing.
if (!ignoreParseErrors) {
throw new IOException(
format("Corrupt Debezium JSON message '%s'.", new String(bytes)), t);
}
}
}
private static RowKind parseRowKind(String op) {
switch (op) {
case "+I":
return RowKind.INSERT;
case "-U":
return RowKind.UPDATE_BEFORE;
case "+U":
return RowKind.UPDATE_AFTER;
case "-D":
return RowKind.DELETE;
default:
throw new UnsupportedOperationException(
"Unsupported operation '" + op + "' for row kind.");
}
}
@Override
public boolean isEndOfStream(RowData rowData) {
return false;
}
@Override
public TypeInformation<RowData> getProducedType() {
return resultTypeInfo;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ChangelogJsonDeserializationSchema that = (ChangelogJsonDeserializationSchema) o;
return ignoreParseErrors == that.ignoreParseErrors
&& Objects.equals(jsonDeserializer, that.jsonDeserializer)
&& Objects.equals(resultTypeInfo, that.resultTypeInfo);
}
@Override
public int hashCode() {
return Objects.hash(jsonDeserializer, resultTypeInfo, ignoreParseErrors);
}
private static RowType createJsonRowType(DataType databaseSchema) {
DataType payload =
DataTypes.ROW(
DataTypes.FIELD("data", databaseSchema),
DataTypes.FIELD("op", DataTypes.STRING()));
return (RowType) payload.getLogicalType();
}
}

@ -44,91 +44,90 @@ import java.util.HashSet;
import java.util.Set;
/**
* Format factory for providing configured instances of Debezium JSON to RowData {@link DeserializationSchema}.
* Format factory for providing configured instances of Debezium JSON to RowData {@link
* DeserializationSchema}.
*/
public class ChangelogJsonFormatFactory implements DeserializationFormatFactory, SerializationFormatFactory {
public static final String IDENTIFIER = "changelog-json";
public static final ConfigOption<Boolean> IGNORE_PARSE_ERRORS = JsonOptions.IGNORE_PARSE_ERRORS;
public static final ConfigOption<String> TIMESTAMP_FORMAT = JsonOptions.TIMESTAMP_FORMAT;
@Override
public DecodingFormat<DeserializationSchema<RowData>> createDecodingFormat(
DynamicTableFactory.Context context, ReadableConfig formatOptions) {
FactoryUtil.validateFactoryOptions(this, formatOptions);
final boolean ignoreParseErrors = formatOptions.get(IGNORE_PARSE_ERRORS);
TimestampFormat timestampFormat = JsonOptions.getTimestampFormat(formatOptions);
return new DecodingFormat<DeserializationSchema<RowData>>() {
@Override
public DeserializationSchema<RowData> createRuntimeDecoder(
DynamicTableSource.Context context, DataType producedDataType) {
final RowType rowType = (RowType) producedDataType.getLogicalType();
final TypeInformation<RowData> rowDataTypeInfo = context.createTypeInformation(producedDataType);
return new ChangelogJsonDeserializationSchema(
rowType,
rowDataTypeInfo,
ignoreParseErrors,
timestampFormat);
}
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.newBuilder()
.addContainedKind(RowKind.INSERT)
.addContainedKind(RowKind.UPDATE_BEFORE)
.addContainedKind(RowKind.UPDATE_AFTER)
.addContainedKind(RowKind.DELETE)
.build();
}
};
}
@Override
public EncodingFormat<SerializationSchema<RowData>> createEncodingFormat(
DynamicTableFactory.Context context, ReadableConfig formatOptions) {
FactoryUtil.validateFactoryOptions(this, formatOptions);
TimestampFormat timestampFormat = JsonOptions.getTimestampFormat(formatOptions);
return new EncodingFormat<SerializationSchema<RowData>>() {
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.newBuilder()
.addContainedKind(RowKind.INSERT)
.addContainedKind(RowKind.UPDATE_BEFORE)
.addContainedKind(RowKind.UPDATE_AFTER)
.addContainedKind(RowKind.DELETE)
.build();
}
@Override
public SerializationSchema<RowData> createRuntimeEncoder(DynamicTableSink.Context context, DataType consumedDataType) {
final RowType rowType = (RowType) consumedDataType.getLogicalType();
return new ChangelogJsonSerializationSchema(
rowType,
timestampFormat);
}
};
}
@Override
public String factoryIdentifier() {
return IDENTIFIER;
}
@Override
public Set<ConfigOption<?>> requiredOptions() {
return Collections.emptySet();
}
@Override
public Set<ConfigOption<?>> optionalOptions() {
Set<ConfigOption<?>> options = new HashSet<>();
options.add(IGNORE_PARSE_ERRORS);
options.add(TIMESTAMP_FORMAT);
return options;
}
public class ChangelogJsonFormatFactory
implements DeserializationFormatFactory, SerializationFormatFactory {
public static final String IDENTIFIER = "changelog-json";
public static final ConfigOption<Boolean> IGNORE_PARSE_ERRORS = JsonOptions.IGNORE_PARSE_ERRORS;
public static final ConfigOption<String> TIMESTAMP_FORMAT = JsonOptions.TIMESTAMP_FORMAT;
@Override
public DecodingFormat<DeserializationSchema<RowData>> createDecodingFormat(
DynamicTableFactory.Context context, ReadableConfig formatOptions) {
FactoryUtil.validateFactoryOptions(this, formatOptions);
final boolean ignoreParseErrors = formatOptions.get(IGNORE_PARSE_ERRORS);
TimestampFormat timestampFormat = JsonOptions.getTimestampFormat(formatOptions);
return new DecodingFormat<DeserializationSchema<RowData>>() {
@Override
public DeserializationSchema<RowData> createRuntimeDecoder(
DynamicTableSource.Context context, DataType producedDataType) {
final RowType rowType = (RowType) producedDataType.getLogicalType();
final TypeInformation<RowData> rowDataTypeInfo =
context.createTypeInformation(producedDataType);
return new ChangelogJsonDeserializationSchema(
rowType, rowDataTypeInfo, ignoreParseErrors, timestampFormat);
}
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.newBuilder()
.addContainedKind(RowKind.INSERT)
.addContainedKind(RowKind.UPDATE_BEFORE)
.addContainedKind(RowKind.UPDATE_AFTER)
.addContainedKind(RowKind.DELETE)
.build();
}
};
}
@Override
public EncodingFormat<SerializationSchema<RowData>> createEncodingFormat(
DynamicTableFactory.Context context, ReadableConfig formatOptions) {
FactoryUtil.validateFactoryOptions(this, formatOptions);
TimestampFormat timestampFormat = JsonOptions.getTimestampFormat(formatOptions);
return new EncodingFormat<SerializationSchema<RowData>>() {
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.newBuilder()
.addContainedKind(RowKind.INSERT)
.addContainedKind(RowKind.UPDATE_BEFORE)
.addContainedKind(RowKind.UPDATE_AFTER)
.addContainedKind(RowKind.DELETE)
.build();
}
@Override
public SerializationSchema<RowData> createRuntimeEncoder(
DynamicTableSink.Context context, DataType consumedDataType) {
final RowType rowType = (RowType) consumedDataType.getLogicalType();
return new ChangelogJsonSerializationSchema(rowType, timestampFormat);
}
};
}
@Override
public String factoryIdentifier() {
return IDENTIFIER;
}
@Override
public Set<ConfigOption<?>> requiredOptions() {
return Collections.emptySet();
}
@Override
public Set<ConfigOption<?>> optionalOptions() {
Set<ConfigOption<?>> options = new HashSet<>();
options.add(IGNORE_PARSE_ERRORS);
options.add(TIMESTAMP_FORMAT);
return options;
}
}

@ -35,83 +35,85 @@ import java.util.Objects;
import static org.apache.flink.table.types.utils.TypeConversions.fromLogicalToDataType;
/**
* Serialization schema from Flink Table/SQL internal data structure {@link RowData} to Changelog Json.
* Serialization schema from Flink Table/SQL internal data structure {@link RowData} to Changelog
* Json.
*/
public class ChangelogJsonSerializationSchema implements SerializationSchema<RowData> {
private static final long serialVersionUID = -3999450457829887684L;
private static final StringData OP_INSERT = StringData.fromString("+I");
private static final StringData OP_UPDATE_BEFORE = StringData.fromString("-U");
private static final StringData OP_UPDATE_AFTER = StringData.fromString("+U");
private static final StringData OP_DELETE = StringData.fromString("-D");
private final JsonRowDataSerializationSchema jsonSerializer;
/** Timestamp format specification which is used to parse timestamp. */
private final TimestampFormat timestampFormat;
private transient GenericRowData reuse;
public ChangelogJsonSerializationSchema(
RowType rowType,
TimestampFormat timestampFormat) {
this.jsonSerializer = new JsonRowDataSerializationSchema(
createJsonRowType(fromLogicalToDataType(rowType)),
timestampFormat,
JsonOptions.MapNullKeyMode.FAIL,
JsonOptions.MAP_NULL_KEY_LITERAL.defaultValue());
this.timestampFormat = timestampFormat;
}
@Override
public void open(InitializationContext context) throws Exception {
this.reuse = new GenericRowData(2);
}
@Override
public byte[] serialize(RowData rowData) {
reuse.setField(0, rowData);
reuse.setField(1, stringifyRowKind(rowData.getRowKind()));
return jsonSerializer.serialize(reuse);
}
private static StringData stringifyRowKind(RowKind rowKind) {
switch (rowKind) {
case INSERT:
return OP_INSERT;
case UPDATE_BEFORE:
return OP_UPDATE_BEFORE;
case UPDATE_AFTER:
return OP_UPDATE_AFTER;
case DELETE:
return OP_DELETE;
default:
throw new UnsupportedOperationException("Unsupported operation '" + rowKind + "' for row kind.");
}
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ChangelogJsonSerializationSchema that = (ChangelogJsonSerializationSchema) o;
return Objects.equals(jsonSerializer, that.jsonSerializer) &&
timestampFormat == that.timestampFormat;
}
@Override
public int hashCode() {
return Objects.hash(jsonSerializer, timestampFormat);
}
private static RowType createJsonRowType(DataType databaseSchema) {
DataType payload = DataTypes.ROW(
DataTypes.FIELD("data", databaseSchema),
DataTypes.FIELD("op", DataTypes.STRING()));
return (RowType) payload.getLogicalType();
}
private static final long serialVersionUID = -3999450457829887684L;
private static final StringData OP_INSERT = StringData.fromString("+I");
private static final StringData OP_UPDATE_BEFORE = StringData.fromString("-U");
private static final StringData OP_UPDATE_AFTER = StringData.fromString("+U");
private static final StringData OP_DELETE = StringData.fromString("-D");
private final JsonRowDataSerializationSchema jsonSerializer;
/** Timestamp format specification which is used to parse timestamp. */
private final TimestampFormat timestampFormat;
private transient GenericRowData reuse;
public ChangelogJsonSerializationSchema(RowType rowType, TimestampFormat timestampFormat) {
this.jsonSerializer =
new JsonRowDataSerializationSchema(
createJsonRowType(fromLogicalToDataType(rowType)),
timestampFormat,
JsonOptions.MapNullKeyMode.FAIL,
JsonOptions.MAP_NULL_KEY_LITERAL.defaultValue());
this.timestampFormat = timestampFormat;
}
@Override
public void open(InitializationContext context) throws Exception {
this.reuse = new GenericRowData(2);
}
@Override
public byte[] serialize(RowData rowData) {
reuse.setField(0, rowData);
reuse.setField(1, stringifyRowKind(rowData.getRowKind()));
return jsonSerializer.serialize(reuse);
}
private static StringData stringifyRowKind(RowKind rowKind) {
switch (rowKind) {
case INSERT:
return OP_INSERT;
case UPDATE_BEFORE:
return OP_UPDATE_BEFORE;
case UPDATE_AFTER:
return OP_UPDATE_AFTER;
case DELETE:
return OP_DELETE;
default:
throw new UnsupportedOperationException(
"Unsupported operation '" + rowKind + "' for row kind.");
}
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ChangelogJsonSerializationSchema that = (ChangelogJsonSerializationSchema) o;
return Objects.equals(jsonSerializer, that.jsonSerializer)
&& timestampFormat == that.timestampFormat;
}
@Override
public int hashCode() {
return Objects.hash(jsonSerializer, timestampFormat);
}
private static RowType createJsonRowType(DataType databaseSchema) {
DataType payload =
DataTypes.ROW(
DataTypes.FIELD("data", databaseSchema),
DataTypes.FIELD("op", DataTypes.STRING()));
return (RowType) payload.getLogicalType();
}
}

@ -49,116 +49,112 @@ import java.util.function.Consumer;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
* Tests for {@link ChangelogJsonFormatFactoryTest}.
*/
/** Tests for {@link ChangelogJsonFormatFactoryTest}. */
public class ChangelogJsonFormatFactoryTest extends TestLogger {
@Rule
public ExpectedException thrown = ExpectedException.none();
private static final TableSchema SCHEMA = TableSchema.builder()
.field("a", DataTypes.STRING())
.field("b", DataTypes.INT())
.field("c", DataTypes.BOOLEAN())
.build();
private static final RowType ROW_TYPE = (RowType) SCHEMA.toRowDataType().getLogicalType();
@Test
public void testSeDeSchema() {
final ChangelogJsonDeserializationSchema expectedDeser = new ChangelogJsonDeserializationSchema(
ROW_TYPE,
InternalTypeInfo.of(ROW_TYPE),
true,
TimestampFormat.ISO_8601);
final ChangelogJsonSerializationSchema expectedSer = new ChangelogJsonSerializationSchema(
ROW_TYPE,
TimestampFormat.ISO_8601);
final Map<String, String> options = getAllOptions();
final DynamicTableSource actualSource = createTableSource(options);
assert actualSource instanceof TestDynamicTableFactory.DynamicTableSourceMock;
TestDynamicTableFactory.DynamicTableSourceMock scanSourceMock =
(TestDynamicTableFactory.DynamicTableSourceMock) actualSource;
DeserializationSchema<RowData> actualDeser = scanSourceMock.valueFormat
.createRuntimeDecoder(
ScanRuntimeProviderContext.INSTANCE,
SCHEMA.toRowDataType());
assertEquals(expectedDeser, actualDeser);
final DynamicTableSink actualSink = createTableSink(options);
assert actualSink instanceof TestDynamicTableFactory.DynamicTableSinkMock;
TestDynamicTableFactory.DynamicTableSinkMock sinkMock = (TestDynamicTableFactory.DynamicTableSinkMock) actualSink;
SerializationSchema<RowData> actualSer = sinkMock.valueFormat
.createRuntimeEncoder(
new SinkRuntimeProviderContext(false),
SCHEMA.toRowDataType());
assertEquals(expectedSer, actualSer);
}
@Test
public void testInvalidIgnoreParseError() {
final Map<String, String> options =
getModifiedOptions(opts -> opts.put("changelog-json.ignore-parse-errors", "abc"));
try {
createTableSource(options);
} catch (Exception e) {
assertTrue(ExceptionUtils.findThrowableWithMessage(
e,
"Unrecognized option for boolean: abc. Expected either true or false(case insensitive)").isPresent());
}
}
// ------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------
/**
* Returns the full options modified by the given consumer {@code optionModifier}.
*
* @param optionModifier Consumer to modify the options
*/
private Map<String, String> getModifiedOptions(Consumer<Map<String, String>> optionModifier) {
Map<String, String> options = getAllOptions();
optionModifier.accept(options);
return options;
}
private Map<String, String> getAllOptions() {
final Map<String, String> options = new HashMap<>();
options.put("connector", TestDynamicTableFactory.IDENTIFIER);
options.put("target", "MyTarget");
options.put("buffer-size", "1000");
options.put("format", "changelog-json");
options.put("changelog-json.ignore-parse-errors", "true");
options.put("changelog-json.timestamp-format.standard", "ISO-8601");
return options;
}
private static DynamicTableSource createTableSource(Map<String, String> options) {
return FactoryUtil.createTableSource(
null,
ObjectIdentifier.of("default", "default", "t1"),
new CatalogTableImpl(SCHEMA, options, "mock source"),
new Configuration(),
ChangelogJsonFormatFactoryTest.class.getClassLoader(),
false);
}
private static DynamicTableSink createTableSink(Map<String, String> options) {
return FactoryUtil.createTableSink(
null,
ObjectIdentifier.of("default", "default", "t1"),
new CatalogTableImpl(SCHEMA, options, "mock sink"),
new Configuration(),
ChangelogJsonFormatFactoryTest.class.getClassLoader(),
false);
}
@Rule public ExpectedException thrown = ExpectedException.none();
private static final TableSchema SCHEMA =
TableSchema.builder()
.field("a", DataTypes.STRING())
.field("b", DataTypes.INT())
.field("c", DataTypes.BOOLEAN())
.build();
private static final RowType ROW_TYPE = (RowType) SCHEMA.toRowDataType().getLogicalType();
@Test
public void testSeDeSchema() {
final ChangelogJsonDeserializationSchema expectedDeser =
new ChangelogJsonDeserializationSchema(
ROW_TYPE, InternalTypeInfo.of(ROW_TYPE), true, TimestampFormat.ISO_8601);
final ChangelogJsonSerializationSchema expectedSer =
new ChangelogJsonSerializationSchema(ROW_TYPE, TimestampFormat.ISO_8601);
final Map<String, String> options = getAllOptions();
final DynamicTableSource actualSource = createTableSource(options);
assert actualSource instanceof TestDynamicTableFactory.DynamicTableSourceMock;
TestDynamicTableFactory.DynamicTableSourceMock scanSourceMock =
(TestDynamicTableFactory.DynamicTableSourceMock) actualSource;
DeserializationSchema<RowData> actualDeser =
scanSourceMock.valueFormat.createRuntimeDecoder(
ScanRuntimeProviderContext.INSTANCE, SCHEMA.toRowDataType());
assertEquals(expectedDeser, actualDeser);
final DynamicTableSink actualSink = createTableSink(options);
assert actualSink instanceof TestDynamicTableFactory.DynamicTableSinkMock;
TestDynamicTableFactory.DynamicTableSinkMock sinkMock =
(TestDynamicTableFactory.DynamicTableSinkMock) actualSink;
SerializationSchema<RowData> actualSer =
sinkMock.valueFormat.createRuntimeEncoder(
new SinkRuntimeProviderContext(false), SCHEMA.toRowDataType());
assertEquals(expectedSer, actualSer);
}
@Test
public void testInvalidIgnoreParseError() {
final Map<String, String> options =
getModifiedOptions(opts -> opts.put("changelog-json.ignore-parse-errors", "abc"));
try {
createTableSource(options);
} catch (Exception e) {
assertTrue(
ExceptionUtils.findThrowableWithMessage(
e,
"Unrecognized option for boolean: abc. Expected either true or false(case insensitive)")
.isPresent());
}
}
// ------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------
/**
* Returns the full options modified by the given consumer {@code optionModifier}.
*
* @param optionModifier Consumer to modify the options
*/
private Map<String, String> getModifiedOptions(Consumer<Map<String, String>> optionModifier) {
Map<String, String> options = getAllOptions();
optionModifier.accept(options);
return options;
}
private Map<String, String> getAllOptions() {
final Map<String, String> options = new HashMap<>();
options.put("connector", TestDynamicTableFactory.IDENTIFIER);
options.put("target", "MyTarget");
options.put("buffer-size", "1000");
options.put("format", "changelog-json");
options.put("changelog-json.ignore-parse-errors", "true");
options.put("changelog-json.timestamp-format.standard", "ISO-8601");
return options;
}
private static DynamicTableSource createTableSource(Map<String, String> options) {
return FactoryUtil.createTableSource(
null,
ObjectIdentifier.of("default", "default", "t1"),
new CatalogTableImpl(SCHEMA, options, "mock source"),
new Configuration(),
ChangelogJsonFormatFactoryTest.class.getClassLoader(),
false);
}
private static DynamicTableSink createTableSink(Map<String, String> options) {
return FactoryUtil.createTableSink(
null,
ObjectIdentifier.of("default", "default", "t1"),
new CatalogTableImpl(SCHEMA, options, "mock sink"),
new Configuration(),
ChangelogJsonFormatFactoryTest.class.getClassLoader(),
false);
}
}

@ -45,140 +45,141 @@ import static org.apache.flink.table.api.DataTypes.STRING;
import static org.junit.Assert.assertEquals;
/**
* Tests for {@link ChangelogJsonSerializationSchema} and {@link ChangelogJsonDeserializationSchema}.
* Tests for {@link ChangelogJsonSerializationSchema} and {@link
* ChangelogJsonDeserializationSchema}.
*/
public class ChangelogJsonSerDeTest {
private static final RowType SCHEMA = (RowType) ROW(
FIELD("id", INT().notNull()),
FIELD("name", STRING()),
FIELD("description", STRING()),
FIELD("weight", FLOAT())
).getLogicalType();
@Test
public void testSerializationDeserialization() throws Exception {
List<String> lines = readLines("changelog-json-data.txt");
ChangelogJsonDeserializationSchema deserializationSchema = new ChangelogJsonDeserializationSchema(
SCHEMA,
InternalTypeInfo.of(SCHEMA),
false,
TimestampFormat.SQL);
deserializationSchema.open(null);
SimpleCollector collector = new SimpleCollector();
for (String line : lines) {
deserializationSchema.deserialize(line.getBytes(StandardCharsets.UTF_8), collector);
}
// CREATE TABLE product (
// id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
// name VARCHAR(255),
// description VARCHAR(512),
// weight FLOAT
// );
// ALTER TABLE product AUTO_INCREMENT = 101;
//
// INSERT INTO product
// VALUES (default,"scooter","Small 2-wheel scooter",3.14),
// (default,"car battery","12V car battery",8.1),
// (default,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8),
// (default,"hammer","12oz carpenter's hammer",0.75),
// (default,"hammer","14oz carpenter's hammer",0.875),
// (default,"hammer","16oz carpenter's hammer",1.0),
// (default,"rocks","box of assorted rocks",5.3),
// (default,"jacket","water resistent black wind breaker",0.1),
// (default,"spare tire","24 inch spare tire",22.2);
// UPDATE product SET description='18oz carpenter hammer' WHERE id=106;
// UPDATE product SET weight='5.1' WHERE id=107;
// INSERT INTO product VALUES (default,"jacket","water resistent white wind breaker",0.2);
// INSERT INTO product VALUES (default,"scooter","Big 2-wheel scooter ",5.18);
// UPDATE product SET description='new water resistent white wind breaker', weight='0.5' WHERE id=110;
// UPDATE product SET weight='5.17' WHERE id=111;
// DELETE FROM product WHERE id=111;
List<String> expected = Arrays.asList(
"+I(101,scooter,Small 2-wheel scooter,3.14)",
"+I(102,car battery,12V car battery,8.1)",
"+I(103,12-pack drill bits,12-pack of drill bits with sizes ranging from #40 to #3,0.8)",
"+I(104,hammer,12oz carpenter's hammer,0.75)",
"+I(105,hammer,14oz carpenter's hammer,0.875)",
"+I(106,hammer,16oz carpenter's hammer,1.0)",
"+I(107,rocks,box of assorted rocks,5.3)",
"+I(108,jacket,water resistent black wind breaker,0.1)",
"+I(109,spare tire,24 inch spare tire,22.2)",
"-U(106,hammer,16oz carpenter's hammer,1.0)",
"+U(106,hammer,18oz carpenter hammer,1.0)",
"-U(107,rocks,box of assorted rocks,5.3)",
"+U(107,rocks,box of assorted rocks,5.1)",
"+I(110,jacket,water resistent white wind breaker,0.2)",
"+I(111,scooter,Big 2-wheel scooter ,5.18)",
"-U(110,jacket,water resistent white wind breaker,0.2)",
"+U(110,jacket,new water resistent white wind breaker,0.5)",
"-U(111,scooter,Big 2-wheel scooter ,5.18)",
"+U(111,scooter,Big 2-wheel scooter ,5.17)",
"-D(111,scooter,Big 2-wheel scooter ,5.17)"
);
List<String> actual = collector.list.stream()
.map(Object::toString)
.collect(Collectors.toList());
assertEquals(expected, actual);
ChangelogJsonSerializationSchema serializationSchema = new ChangelogJsonSerializationSchema(
SCHEMA,
TimestampFormat.SQL);
serializationSchema.open(null);
List<String> result = new ArrayList<>();
for (RowData rowData : collector.list) {
result.add(new String(serializationSchema.serialize(rowData), StandardCharsets.UTF_8));
}
List<String> expectedResult = Arrays.asList(
"{\"data\":{\"id\":101,\"name\":\"scooter\",\"description\":\"Small 2-wheel scooter\",\"weight\":3.14},\"op\":\"+I\"}",
"{\"data\":{\"id\":102,\"name\":\"car battery\",\"description\":\"12V car battery\",\"weight\":8.1},\"op\":\"+I\"}",
"{\"data\":{\"id\":103,\"name\":\"12-pack drill bits\",\"description\":\"12-pack of drill bits with sizes ranging from #40 to #3\",\"weight\":0.8},\"op\":\"+I\"}",
"{\"data\":{\"id\":104,\"name\":\"hammer\",\"description\":\"12oz carpenter's hammer\",\"weight\":0.75},\"op\":\"+I\"}",
"{\"data\":{\"id\":105,\"name\":\"hammer\",\"description\":\"14oz carpenter's hammer\",\"weight\":0.875},\"op\":\"+I\"}",
"{\"data\":{\"id\":106,\"name\":\"hammer\",\"description\":\"16oz carpenter's hammer\",\"weight\":1.0},\"op\":\"+I\"}",
"{\"data\":{\"id\":107,\"name\":\"rocks\",\"description\":\"box of assorted rocks\",\"weight\":5.3},\"op\":\"+I\"}",
"{\"data\":{\"id\":108,\"name\":\"jacket\",\"description\":\"water resistent black wind breaker\",\"weight\":0.1},\"op\":\"+I\"}",
"{\"data\":{\"id\":109,\"name\":\"spare tire\",\"description\":\"24 inch spare tire\",\"weight\":22.2},\"op\":\"+I\"}",
"{\"data\":{\"id\":106,\"name\":\"hammer\",\"description\":\"16oz carpenter's hammer\",\"weight\":1.0},\"op\":\"-U\"}",
"{\"data\":{\"id\":106,\"name\":\"hammer\",\"description\":\"18oz carpenter hammer\",\"weight\":1.0},\"op\":\"+U\"}",
"{\"data\":{\"id\":107,\"name\":\"rocks\",\"description\":\"box of assorted rocks\",\"weight\":5.3},\"op\":\"-U\"}",
"{\"data\":{\"id\":107,\"name\":\"rocks\",\"description\":\"box of assorted rocks\",\"weight\":5.1},\"op\":\"+U\"}",
"{\"data\":{\"id\":110,\"name\":\"jacket\",\"description\":\"water resistent white wind breaker\",\"weight\":0.2},\"op\":\"+I\"}",
"{\"data\":{\"id\":111,\"name\":\"scooter\",\"description\":\"Big 2-wheel scooter \",\"weight\":5.18},\"op\":\"+I\"}",
"{\"data\":{\"id\":110,\"name\":\"jacket\",\"description\":\"water resistent white wind breaker\",\"weight\":0.2},\"op\":\"-U\"}",
"{\"data\":{\"id\":110,\"name\":\"jacket\",\"description\":\"new water resistent white wind breaker\",\"weight\":0.5},\"op\":\"+U\"}",
"{\"data\":{\"id\":111,\"name\":\"scooter\",\"description\":\"Big 2-wheel scooter \",\"weight\":5.18},\"op\":\"-U\"}",
"{\"data\":{\"id\":111,\"name\":\"scooter\",\"description\":\"Big 2-wheel scooter \",\"weight\":5.17},\"op\":\"+U\"}",
"{\"data\":{\"id\":111,\"name\":\"scooter\",\"description\":\"Big 2-wheel scooter \",\"weight\":5.17},\"op\":\"-D\"}"
);
assertEquals(expectedResult, result);
}
// --------------------------------------------------------------------------------------------
// Utilities
// --------------------------------------------------------------------------------------------
private static List<String> readLines(String resource) throws IOException {
final URL url = ChangelogJsonSerDeTest.class.getClassLoader().getResource(resource);
assert url != null;
Path path = new File(url.getFile()).toPath();
return Files.readAllLines(path);
}
private static class SimpleCollector implements Collector<RowData> {
private List<RowData> list = new ArrayList<>();
@Override
public void collect(RowData record) {
list.add(record);
}
@Override
public void close() {
// do nothing
}
}
private static final RowType SCHEMA =
(RowType)
ROW(
FIELD("id", INT().notNull()),
FIELD("name", STRING()),
FIELD("description", STRING()),
FIELD("weight", FLOAT()))
.getLogicalType();
@Test
public void testSerializationDeserialization() throws Exception {
List<String> lines = readLines("changelog-json-data.txt");
ChangelogJsonDeserializationSchema deserializationSchema =
new ChangelogJsonDeserializationSchema(
SCHEMA, InternalTypeInfo.of(SCHEMA), false, TimestampFormat.SQL);
deserializationSchema.open(null);
SimpleCollector collector = new SimpleCollector();
for (String line : lines) {
deserializationSchema.deserialize(line.getBytes(StandardCharsets.UTF_8), collector);
}
// CREATE TABLE product (
// id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
// name VARCHAR(255),
// description VARCHAR(512),
// weight FLOAT
// );
// ALTER TABLE product AUTO_INCREMENT = 101;
//
// INSERT INTO product
// VALUES (default,"scooter","Small 2-wheel scooter",3.14),
// (default,"car battery","12V car battery",8.1),
// (default,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40
// to #3",0.8),
// (default,"hammer","12oz carpenter's hammer",0.75),
// (default,"hammer","14oz carpenter's hammer",0.875),
// (default,"hammer","16oz carpenter's hammer",1.0),
// (default,"rocks","box of assorted rocks",5.3),
// (default,"jacket","water resistent black wind breaker",0.1),
// (default,"spare tire","24 inch spare tire",22.2);
// UPDATE product SET description='18oz carpenter hammer' WHERE id=106;
// UPDATE product SET weight='5.1' WHERE id=107;
// INSERT INTO product VALUES (default,"jacket","water resistent white wind breaker",0.2);
// INSERT INTO product VALUES (default,"scooter","Big 2-wheel scooter ",5.18);
// UPDATE product SET description='new water resistent white wind breaker', weight='0.5'
// WHERE id=110;
// UPDATE product SET weight='5.17' WHERE id=111;
// DELETE FROM product WHERE id=111;
List<String> expected =
Arrays.asList(
"+I(101,scooter,Small 2-wheel scooter,3.14)",
"+I(102,car battery,12V car battery,8.1)",
"+I(103,12-pack drill bits,12-pack of drill bits with sizes ranging from #40 to #3,0.8)",
"+I(104,hammer,12oz carpenter's hammer,0.75)",
"+I(105,hammer,14oz carpenter's hammer,0.875)",
"+I(106,hammer,16oz carpenter's hammer,1.0)",
"+I(107,rocks,box of assorted rocks,5.3)",
"+I(108,jacket,water resistent black wind breaker,0.1)",
"+I(109,spare tire,24 inch spare tire,22.2)",
"-U(106,hammer,16oz carpenter's hammer,1.0)",
"+U(106,hammer,18oz carpenter hammer,1.0)",
"-U(107,rocks,box of assorted rocks,5.3)",
"+U(107,rocks,box of assorted rocks,5.1)",
"+I(110,jacket,water resistent white wind breaker,0.2)",
"+I(111,scooter,Big 2-wheel scooter ,5.18)",
"-U(110,jacket,water resistent white wind breaker,0.2)",
"+U(110,jacket,new water resistent white wind breaker,0.5)",
"-U(111,scooter,Big 2-wheel scooter ,5.18)",
"+U(111,scooter,Big 2-wheel scooter ,5.17)",
"-D(111,scooter,Big 2-wheel scooter ,5.17)");
List<String> actual =
collector.list.stream().map(Object::toString).collect(Collectors.toList());
assertEquals(expected, actual);
ChangelogJsonSerializationSchema serializationSchema =
new ChangelogJsonSerializationSchema(SCHEMA, TimestampFormat.SQL);
serializationSchema.open(null);
List<String> result = new ArrayList<>();
for (RowData rowData : collector.list) {
result.add(new String(serializationSchema.serialize(rowData), StandardCharsets.UTF_8));
}
List<String> expectedResult =
Arrays.asList(
"{\"data\":{\"id\":101,\"name\":\"scooter\",\"description\":\"Small 2-wheel scooter\",\"weight\":3.14},\"op\":\"+I\"}",
"{\"data\":{\"id\":102,\"name\":\"car battery\",\"description\":\"12V car battery\",\"weight\":8.1},\"op\":\"+I\"}",
"{\"data\":{\"id\":103,\"name\":\"12-pack drill bits\",\"description\":\"12-pack of drill bits with sizes ranging from #40 to #3\",\"weight\":0.8},\"op\":\"+I\"}",
"{\"data\":{\"id\":104,\"name\":\"hammer\",\"description\":\"12oz carpenter's hammer\",\"weight\":0.75},\"op\":\"+I\"}",
"{\"data\":{\"id\":105,\"name\":\"hammer\",\"description\":\"14oz carpenter's hammer\",\"weight\":0.875},\"op\":\"+I\"}",
"{\"data\":{\"id\":106,\"name\":\"hammer\",\"description\":\"16oz carpenter's hammer\",\"weight\":1.0},\"op\":\"+I\"}",
"{\"data\":{\"id\":107,\"name\":\"rocks\",\"description\":\"box of assorted rocks\",\"weight\":5.3},\"op\":\"+I\"}",
"{\"data\":{\"id\":108,\"name\":\"jacket\",\"description\":\"water resistent black wind breaker\",\"weight\":0.1},\"op\":\"+I\"}",
"{\"data\":{\"id\":109,\"name\":\"spare tire\",\"description\":\"24 inch spare tire\",\"weight\":22.2},\"op\":\"+I\"}",
"{\"data\":{\"id\":106,\"name\":\"hammer\",\"description\":\"16oz carpenter's hammer\",\"weight\":1.0},\"op\":\"-U\"}",
"{\"data\":{\"id\":106,\"name\":\"hammer\",\"description\":\"18oz carpenter hammer\",\"weight\":1.0},\"op\":\"+U\"}",
"{\"data\":{\"id\":107,\"name\":\"rocks\",\"description\":\"box of assorted rocks\",\"weight\":5.3},\"op\":\"-U\"}",
"{\"data\":{\"id\":107,\"name\":\"rocks\",\"description\":\"box of assorted rocks\",\"weight\":5.1},\"op\":\"+U\"}",
"{\"data\":{\"id\":110,\"name\":\"jacket\",\"description\":\"water resistent white wind breaker\",\"weight\":0.2},\"op\":\"+I\"}",
"{\"data\":{\"id\":111,\"name\":\"scooter\",\"description\":\"Big 2-wheel scooter \",\"weight\":5.18},\"op\":\"+I\"}",
"{\"data\":{\"id\":110,\"name\":\"jacket\",\"description\":\"water resistent white wind breaker\",\"weight\":0.2},\"op\":\"-U\"}",
"{\"data\":{\"id\":110,\"name\":\"jacket\",\"description\":\"new water resistent white wind breaker\",\"weight\":0.5},\"op\":\"+U\"}",
"{\"data\":{\"id\":111,\"name\":\"scooter\",\"description\":\"Big 2-wheel scooter \",\"weight\":5.18},\"op\":\"-U\"}",
"{\"data\":{\"id\":111,\"name\":\"scooter\",\"description\":\"Big 2-wheel scooter \",\"weight\":5.17},\"op\":\"+U\"}",
"{\"data\":{\"id\":111,\"name\":\"scooter\",\"description\":\"Big 2-wheel scooter \",\"weight\":5.17},\"op\":\"-D\"}");
assertEquals(expectedResult, result);
}
// --------------------------------------------------------------------------------------------
// Utilities
// --------------------------------------------------------------------------------------------
private static List<String> readLines(String resource) throws IOException {
final URL url = ChangelogJsonSerDeTest.class.getClassLoader().getResource(resource);
assert url != null;
Path path = new File(url.getFile()).toPath();
return Files.readAllLines(path);
}
private static class SimpleCollector implements Collector<RowData> {
private List<RowData> list = new ArrayList<>();
@Override
public void collect(RowData record) {
list.add(record);
}
@Override
public void close() {
// do nothing
}
}
}

@ -18,8 +18,5 @@
package com.alibaba.ververica.cdc.connectors.mysql;
/**
* This is used to generate a dummy docs jar for this module to pass OSS repository rule.
*/
public class DummyDocs {
}
/** This is used to generate a dummy docs jar for this module to pass OSS repository rule. */
public class DummyDocs {}

@ -18,8 +18,5 @@
package com.alibaba.ververica.cdc.connectors.postgres;
/**
* This is used to generate a dummy docs jar for this module to pass OSS repository rule.
*/
public class DummyDocs {
}
/** This is used to generate a dummy docs jar for this module to pass OSS repository rule. */
public class DummyDocs {}

File diff suppressed because it is too large Load Diff

@ -23,6 +23,20 @@ under the License.
"http://www.puppycrawl.com/dtds/suppressions_1_1.dtd">
<suppressions>
<!-- Star import is used for all the expressions -->
<suppress files="BaseExpressions.java" checks="AvoidStarImport"/>
<!-- These use star import for all the generated Tuple classes -->
<suppress files="CsvReader.java" checks="AvoidStarImport"/>
<suppress files="NoticeFileChecker.java" checks="Regexp"/>
<suppress files="NoticeFileChecker.java" checks="IllegalImport"/>
<suppress files="JoinOperator.java" checks="FileLength"/>
<suppress files="WindowOperatorTest.java" checks="FileLength"/>
<suppress files="WindowOperatorContractTest.java" checks="FileLength"/>
<suppress files="NFAITCase.java" checks="FileLength"/>
<suppress files="org[\\/]apache[\\/]flink[\\/]formats[\\/]avro[\\/]generated[\\/].*.java" checks="[a-zA-Z0-9]*"/>
<suppress files="org[\\/]apache[\\/]flink[\\/]formats[\\/]parquet[\\/]generated[\\/].*.java" checks="[a-zA-Z0-9]*"/>
<!-- Sometimes we have to temporarily fix very long, different formatted Calcite files. -->
@ -43,7 +57,11 @@ under the License.
<suppress
files="FlinkKinesisProducer.java|FlinkKinesisProducerTest.java"
checks="IllegalImport"/>
<!-- Classes copied from Hadoop -->
<!-- Kinesis EFO consumer required to handle Netty ReadTimeoutException -->
<suppress
files="FanOutRecordPublisherTest.java|FanOutShardSubscriber.java|FanOutShardSubscriberTest.java"
checks="IllegalImport"/>
<!-- Classes copied from Hadoop -->
<suppress
files="org[\\/]apache[\\/]hadoop[\\/]conf[\\/]Configuration.java"
checks=".*"/>

Loading…
Cancel
Save