[mysql] Split snapshot chunks asynchronously (#931)

pull/960/head
ehui 3 years ago committed by GitHub
parent 7b16dc2b98
commit 41a5463437
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -21,6 +21,8 @@ package com.ververica.cdc.connectors.mysql.source.assigners;
import org.apache.flink.util.FlinkRuntimeException;
import org.apache.flink.util.Preconditions;
import org.apache.flink.shaded.guava18.com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.ververica.cdc.connectors.mysql.debezium.DebeziumUtils;
import com.ververica.cdc.connectors.mysql.schema.MySqlSchema;
import com.ververica.cdc.connectors.mysql.source.assigners.state.SnapshotPendingSplitsState;
@ -42,10 +44,13 @@ import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.stream.Collectors;
import static com.ververica.cdc.connectors.mysql.debezium.DebeziumUtils.discoverCapturedTables;
@ -68,13 +73,16 @@ public class MySqlSnapshotSplitAssigner implements MySqlSplitAssigner {
private final Map<String, BinlogOffset> splitFinishedOffsets;
private final MySqlSourceConfig sourceConfig;
private final int currentParallelism;
private final LinkedList<TableId> remainingTables;
private final List<TableId> remainingTables;
private final boolean isRemainingTablesCheckpointed;
private AssignerStatus assignerStatus;
private ChunkSplitter chunkSplitter;
private boolean isTableIdCaseSensitive;
private ExecutorService executor;
private Object lock;
@Nullable private Long checkpointIdToFinish;
public MySqlSnapshotSplitAssigner(
@ -126,17 +134,18 @@ public class MySqlSnapshotSplitAssigner implements MySqlSplitAssigner {
this.sourceConfig = sourceConfig;
this.currentParallelism = currentParallelism;
this.alreadyProcessedTables = alreadyProcessedTables;
this.remainingSplits = remainingSplits;
this.remainingSplits = new CopyOnWriteArrayList<>(remainingSplits);
this.assignedSplits = assignedSplits;
this.splitFinishedOffsets = splitFinishedOffsets;
this.assignerStatus = assignerStatus;
this.remainingTables = new LinkedList<>(remainingTables);
this.remainingTables = new CopyOnWriteArrayList<>(remainingTables);
this.isRemainingTablesCheckpointed = isRemainingTablesCheckpointed;
this.isTableIdCaseSensitive = isTableIdCaseSensitive;
}
@Override
public void open() {
lock = new Object();
chunkSplitter = createChunkSplitter(sourceConfig, isTableIdCaseSensitive);
// the legacy state didn't snapshot remaining tables, discovery remaining table here
@ -152,6 +161,7 @@ public class MySqlSnapshotSplitAssigner implements MySqlSplitAssigner {
}
}
captureNewlyAddedTables();
startAsynchronouslySplit();
}
private void captureNewlyAddedTables() {
@ -180,25 +190,54 @@ public class MySqlSnapshotSplitAssigner implements MySqlSplitAssigner {
}
}
private void startAsynchronouslySplit() {
if (!remainingTables.isEmpty()) {
if (executor == null) {
ThreadFactory threadFactory =
new ThreadFactoryBuilder().setNameFormat("snapshot-splitting").build();
this.executor = Executors.newSingleThreadExecutor(threadFactory);
}
executor.submit(
() -> {
Iterator<TableId> iterator = remainingTables.iterator();
while (iterator.hasNext()) {
TableId nextTable = iterator.next();
// split the given table into chunks (snapshot splits)
Collection<MySqlSnapshotSplit> splits =
chunkSplitter.generateSplits(nextTable);
synchronized (lock) {
remainingSplits.addAll(splits);
remainingTables.remove(nextTable);
lock.notify();
}
}
});
}
}
@Override
public Optional<MySqlSplit> getNext() {
if (!remainingSplits.isEmpty()) {
// return remaining splits firstly
Iterator<MySqlSnapshotSplit> iterator = remainingSplits.iterator();
MySqlSnapshotSplit split = iterator.next();
iterator.remove();
assignedSplits.put(split.splitId(), split);
return Optional.of(split);
} else {
// it's turn for next table
TableId nextTable = remainingTables.pollFirst();
if (nextTable != null) {
// split the given table into chunks (snapshot splits)
Collection<MySqlSnapshotSplit> splits = chunkSplitter.generateSplits(nextTable);
remainingSplits.addAll(splits);
alreadyProcessedTables.add(nextTable);
synchronized (lock) {
if (!remainingSplits.isEmpty()) {
// return remaining splits firstly
Iterator<MySqlSnapshotSplit> iterator = remainingSplits.iterator();
MySqlSnapshotSplit split = iterator.next();
remainingSplits.remove(split);
assignedSplits.put(split.splitId(), split);
addAlreadyProcessedTablesIfNotExists(split.getTableId());
return Optional.of(split);
} else if (!remainingTables.isEmpty()) {
try {
// wait for the asynchronous split to complete
lock.wait();
} catch (InterruptedException e) {
throw new FlinkRuntimeException(
"InterruptedException while waiting for asynchronously snapshot split");
}
return getNext();
} else {
closeExecutorService();
return Optional.empty();
}
}
@ -319,7 +358,21 @@ public class MySqlSnapshotSplitAssigner implements MySqlSplitAssigner {
}
@Override
public void close() {}
public void close() {
closeExecutorService();
}
private void closeExecutorService() {
if (executor != null) {
executor.shutdown();
}
}
private void addAlreadyProcessedTablesIfNotExists(TableId tableId) {
if (!alreadyProcessedTables.contains(tableId)) {
alreadyProcessedTables.add(tableId);
}
}
/** Indicates there is no more splits available in this assigner. */
public boolean noMoreSplits() {

Loading…
Cancel
Save