[3.0][cdc-runtime] Provide SchemaOperator and SchemaRegistry to handle schema changes (#2685)
parent
4770f360d0
commit
ace6080c93
@ -1,89 +0,0 @@
|
||||
/*
|
||||
* Copyright 2023 Ververica Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.ververica.cdc.runtime.operators.schema.coordinator;
|
||||
|
||||
import org.apache.flink.runtime.operators.coordination.CoordinationRequest;
|
||||
import org.apache.flink.runtime.operators.coordination.CoordinationRequestHandler;
|
||||
import org.apache.flink.runtime.operators.coordination.CoordinationResponse;
|
||||
import org.apache.flink.runtime.operators.coordination.OperatorCoordinator;
|
||||
import org.apache.flink.runtime.operators.coordination.OperatorEvent;
|
||||
|
||||
import com.ververica.cdc.runtime.operators.schema.SchemaOperator;
|
||||
import com.ververica.cdc.runtime.operators.schema.event.FlushSuccessEvent;
|
||||
import com.ververica.cdc.runtime.operators.schema.event.SchemaChangeRequest;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
/**
|
||||
* The implementation of the {@link OperatorCoordinator} for the {@link SchemaOperator}.
|
||||
*
|
||||
* <p>The <code>SchemaOperatorCoordinator</code> provides an event loop style thread model to
|
||||
* interact with the Flink runtime. The coordinator ensures that all the state manipulations are
|
||||
* made by its event loop thread.
|
||||
*
|
||||
* <p>This coordinator is responsible for:
|
||||
*
|
||||
* <ul>
|
||||
* <li>Apply schema changes when receiving the {@link SchemaChangeRequest} from {@link
|
||||
* SchemaOperator}
|
||||
* <li>Notify {@link SchemaOperator} to continue to push data for the table after receiving {@link
|
||||
* FlushSuccessEvent} from its registered sink writer
|
||||
* </ul>
|
||||
*/
|
||||
public class SchemaOperatorCoordinator implements OperatorCoordinator, CoordinationRequestHandler {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(SchemaOperatorCoordinator.class);
|
||||
|
||||
@Override
|
||||
public CompletableFuture<CoordinationResponse> handleCoordinationRequest(
|
||||
CoordinationRequest coordinationRequest) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start() throws Exception {}
|
||||
|
||||
@Override
|
||||
public void close() throws Exception {}
|
||||
|
||||
@Override
|
||||
public void handleEventFromOperator(int i, int i1, OperatorEvent operatorEvent)
|
||||
throws Exception {}
|
||||
|
||||
@Override
|
||||
public void checkpointCoordinator(long l, CompletableFuture<byte[]> completableFuture)
|
||||
throws Exception {}
|
||||
|
||||
@Override
|
||||
public void notifyCheckpointComplete(long l) {}
|
||||
|
||||
@Override
|
||||
public void resetToCheckpoint(long l, @Nullable byte[] bytes) throws Exception {}
|
||||
|
||||
@Override
|
||||
public void subtaskReset(int i, long l) {}
|
||||
|
||||
@Override
|
||||
public void executionAttemptFailed(int i, int i1, @Nullable Throwable throwable) {}
|
||||
|
||||
@Override
|
||||
public void executionAttemptReady(int i, int i1, SubtaskGateway subtaskGateway) {}
|
||||
}
|
@ -0,0 +1,166 @@
|
||||
/*
|
||||
* Copyright 2023 Ververica Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.ververica.cdc.runtime.operators.schema.coordinator;
|
||||
|
||||
import org.apache.flink.runtime.operators.coordination.CoordinationRequest;
|
||||
import org.apache.flink.runtime.operators.coordination.CoordinationRequestHandler;
|
||||
import org.apache.flink.runtime.operators.coordination.CoordinationResponse;
|
||||
import org.apache.flink.runtime.operators.coordination.OperatorCoordinator;
|
||||
import org.apache.flink.runtime.operators.coordination.OperatorEvent;
|
||||
import org.apache.flink.util.FlinkException;
|
||||
|
||||
import com.ververica.cdc.common.event.TableId;
|
||||
import com.ververica.cdc.common.sink.MetadataApplier;
|
||||
import com.ververica.cdc.runtime.operators.schema.SchemaOperator;
|
||||
import com.ververica.cdc.runtime.operators.schema.event.FlushSuccessEvent;
|
||||
import com.ververica.cdc.runtime.operators.schema.event.ReleaseUpstreamRequest;
|
||||
import com.ververica.cdc.runtime.operators.schema.event.SchemaChangeRequest;
|
||||
import com.ververica.cdc.runtime.operators.schema.event.SinkWriterRegisterEvent;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
/**
|
||||
* The implementation of the {@link OperatorCoordinator} for the {@link SchemaOperator}.
|
||||
*
|
||||
* <p>The <code>SchemaRegister</code> provides an event loop style thread model to interact with the
|
||||
* Flink runtime. The coordinator ensures that all the state manipulations are made by its event
|
||||
* loop thread.
|
||||
*
|
||||
* <p>This <code>SchemaRegister</code> is responsible for:
|
||||
*
|
||||
* <ul>
|
||||
* <li>Apply schema changes when receiving the {@link SchemaChangeRequest} from {@link
|
||||
* SchemaOperator}
|
||||
* <li>Notify {@link SchemaOperator} to continue to push data for the table after receiving {@link
|
||||
* FlushSuccessEvent} from its registered sink writer
|
||||
* </ul>
|
||||
*/
|
||||
public class SchemaRegistry implements OperatorCoordinator, CoordinationRequestHandler {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(SchemaRegistry.class);
|
||||
|
||||
/** The context of the coordinator. */
|
||||
private final OperatorCoordinator.Context context;
|
||||
/** The name of the operator this SchemaOperatorCoordinator is associated with. */
|
||||
private final String operatorName;
|
||||
|
||||
/**
|
||||
* Tracks the subtask failed reason to throw a more meaningful exception in {@link
|
||||
* #subtaskReset}.
|
||||
*/
|
||||
private final Map<Integer, Throwable> failedReasons;
|
||||
|
||||
/** The request handler that handle all requests and events. */
|
||||
private final SchemaRegistryRequestHandler requestHandler;
|
||||
|
||||
public SchemaRegistry(
|
||||
String operatorName,
|
||||
OperatorCoordinator.Context context,
|
||||
Map<TableId, List<MetadataApplier>> metadataAppliers) {
|
||||
this.context = context;
|
||||
this.operatorName = operatorName;
|
||||
this.failedReasons = new HashMap<>();
|
||||
this.requestHandler = new SchemaRegistryRequestHandler(metadataAppliers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start() throws Exception {
|
||||
LOG.info("Starting SchemaRegistry for {}.", operatorName);
|
||||
this.failedReasons.clear();
|
||||
LOG.info("Started SchemaRegistry for {}.", operatorName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws Exception {
|
||||
LOG.info("SchemaRegistry for {} closed.", operatorName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleEventFromOperator(int subtask, int attemptNumber, OperatorEvent event)
|
||||
throws Exception {
|
||||
if (event instanceof FlushSuccessEvent) {
|
||||
FlushSuccessEvent flushSuccessEvent = (FlushSuccessEvent) event;
|
||||
LOG.info(
|
||||
"Sink subtask {} succeed flushing for table {}.",
|
||||
flushSuccessEvent.getSubtask(),
|
||||
flushSuccessEvent.getTableId().toString());
|
||||
requestHandler.flushSuccess(
|
||||
flushSuccessEvent.getTableId(), flushSuccessEvent.getSubtask());
|
||||
} else if (event instanceof SinkWriterRegisterEvent) {
|
||||
requestHandler.registerSinkWriter(((SinkWriterRegisterEvent) event).getSubtask());
|
||||
} else {
|
||||
throw new FlinkException("Unrecognized Operator Event: " + event);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkpointCoordinator(long checkpointId, CompletableFuture<byte[]> resultFuture)
|
||||
throws Exception {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
@Override
|
||||
public void notifyCheckpointComplete(long checkpointId) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<CoordinationResponse> handleCoordinationRequest(
|
||||
CoordinationRequest request) {
|
||||
if (request instanceof SchemaChangeRequest) {
|
||||
SchemaChangeRequest schemaChangeRequest = (SchemaChangeRequest) request;
|
||||
return requestHandler.handleSchemaChangeRequest(schemaChangeRequest);
|
||||
} else if (request instanceof ReleaseUpstreamRequest) {
|
||||
return requestHandler.handleReleaseUpstreamRequest((ReleaseUpstreamRequest) request);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Unrecognized CoordinationRequest type: " + request);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void resetToCheckpoint(long checkpointId, @Nullable byte[] checkpointData)
|
||||
throws Exception {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
@Override
|
||||
public void subtaskReset(int subtask, long checkpointId) {
|
||||
Throwable rootCause = failedReasons.get(subtask);
|
||||
LOG.error(
|
||||
String.format("Subtask %d reset at checkpoint %d.", subtask, checkpointId),
|
||||
rootCause);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void executionAttemptFailed(
|
||||
int subtask, int attemptNumber, @Nullable Throwable throwable) {
|
||||
failedReasons.put(subtask, throwable);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void executionAttemptReady(
|
||||
int subtask, int attemptNumber, SubtaskGateway subtaskGateway) {
|
||||
// do nothing
|
||||
}
|
||||
}
|
@ -0,0 +1,194 @@
|
||||
/*
|
||||
* Copyright 2023 Ververica Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.ververica.cdc.runtime.operators.schema.coordinator;
|
||||
|
||||
import org.apache.flink.runtime.operators.coordination.CoordinationResponse;
|
||||
|
||||
import com.ververica.cdc.common.annotation.Internal;
|
||||
import com.ververica.cdc.common.event.SchemaChangeEvent;
|
||||
import com.ververica.cdc.common.event.TableId;
|
||||
import com.ververica.cdc.common.sink.MetadataApplier;
|
||||
import com.ververica.cdc.runtime.operators.schema.event.ReleaseUpstreamRequest;
|
||||
import com.ververica.cdc.runtime.operators.schema.event.ReleaseUpstreamResponse;
|
||||
import com.ververica.cdc.runtime.operators.schema.event.SchemaChangeRequest;
|
||||
import com.ververica.cdc.runtime.operators.schema.event.SchemaChangeResponse;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.concurrent.NotThreadSafe;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
/** A handler to deal with all requests and events for {@link SchemaRegistry}. */
|
||||
@Internal
|
||||
@NotThreadSafe
|
||||
public class SchemaRegistryRequestHandler {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(SchemaRegistryRequestHandler.class);
|
||||
|
||||
/** The {@link MetadataApplier}s for every table. */
|
||||
private final Map<TableId, List<MetadataApplier>> metadataAppliers;
|
||||
/** All active sink writers. */
|
||||
private final Set<Integer> activeSinkWriters;
|
||||
|
||||
/**
|
||||
* Not applied SchemaChangeRequest's future before receiving all flush success events for its
|
||||
* table from sink writers.
|
||||
*/
|
||||
private PendingSchemaChange waitFlushSuccess;
|
||||
/**
|
||||
* Not applied SchemaChangeRequest before receiving all flush success events for its table from
|
||||
* sink writers.
|
||||
*/
|
||||
private final List<PendingSchemaChange> pendingSchemaChanges;
|
||||
/** Sink writers which have sent flush success events for the request. */
|
||||
private final Set<Integer> flushedSinkWriters;
|
||||
|
||||
public SchemaRegistryRequestHandler(Map<TableId, List<MetadataApplier>> metadataAppliers) {
|
||||
this.metadataAppliers = metadataAppliers;
|
||||
this.activeSinkWriters = new HashSet<>();
|
||||
this.flushedSinkWriters = new HashSet<>();
|
||||
this.pendingSchemaChanges = new LinkedList<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply the schema change to the external system.
|
||||
*
|
||||
* @param tableId the table need to change schema
|
||||
* @param changeEvent the schema change
|
||||
*/
|
||||
private void applySchemaChange(TableId tableId, SchemaChangeEvent changeEvent) {
|
||||
List<MetadataApplier> appliers = metadataAppliers.get(tableId);
|
||||
if (appliers == null || appliers.isEmpty()) {
|
||||
LOG.warn("There is no MetadataApplier for table {}.", tableId);
|
||||
throw new UnsupportedOperationException(
|
||||
"Cannot find a metadata applier for the table changes in table "
|
||||
+ tableId.toString());
|
||||
}
|
||||
|
||||
for (MetadataApplier applier : appliers) {
|
||||
LOG.debug("Apply schema change {} to table {}.", changeEvent, tableId);
|
||||
applier.applySchemaChange(changeEvent);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the {@link SchemaChangeRequest} and wait for all sink subtasks flushing.
|
||||
*
|
||||
* @param request the received SchemaChangeRequest
|
||||
*/
|
||||
public CompletableFuture<CoordinationResponse> handleSchemaChangeRequest(
|
||||
SchemaChangeRequest request) {
|
||||
CompletableFuture<CoordinationResponse> response;
|
||||
if (pendingSchemaChanges.isEmpty() && waitFlushSuccess == null) {
|
||||
LOG.info(
|
||||
"Received schema change event request from table {}. Start to pend requests for others.",
|
||||
request.getTableId().toString());
|
||||
// TODO : skip flushing and please not put it to pendingSchemaChanges
|
||||
response = CompletableFuture.completedFuture(new SchemaChangeResponse(true));
|
||||
pendingSchemaChanges.add(new PendingSchemaChange(request, response));
|
||||
} else {
|
||||
LOG.info("There are already processing requests. Wait for processing.");
|
||||
response = new CompletableFuture<>();
|
||||
pendingSchemaChanges.add(new PendingSchemaChange(request, response));
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the {@link ReleaseUpstreamRequest} and wait for all sink subtasks flushing.
|
||||
*
|
||||
* @param request the received SchemaChangeRequest
|
||||
*/
|
||||
public CompletableFuture<CoordinationResponse> handleReleaseUpstreamRequest(
|
||||
ReleaseUpstreamRequest request) {
|
||||
this.waitFlushSuccess = pendingSchemaChanges.remove(0).startToWaitForFlushSuccess();
|
||||
return waitFlushSuccess.getResponseFuture();
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a sink subtask.
|
||||
*
|
||||
* @param sinkSubtask the sink subtask to register
|
||||
*/
|
||||
public void registerSinkWriter(int sinkSubtask) {
|
||||
LOG.info("Register sink subtask {}.", sinkSubtask);
|
||||
activeSinkWriters.add(sinkSubtask);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record flushed sink subtasks after receiving FlushSuccessEvent.
|
||||
*
|
||||
* @param tableId the subtask in SchemaOperator and table that the FlushEvent is about
|
||||
* @param sinkSubtask the sink subtask succeed flushing
|
||||
*/
|
||||
public void flushSuccess(TableId tableId, int sinkSubtask) {
|
||||
flushedSinkWriters.add(sinkSubtask);
|
||||
if (flushedSinkWriters.equals(activeSinkWriters)) {
|
||||
LOG.info(
|
||||
"All sink subtask have flushed for table {}. Start to apply schema change.",
|
||||
tableId.toString());
|
||||
applySchemaChange(tableId, waitFlushSuccess.getChangeRequest().getSchemaChangeEvent());
|
||||
waitFlushSuccess.getResponseFuture().complete(new ReleaseUpstreamResponse());
|
||||
startNextSchemaChangeRequest();
|
||||
}
|
||||
}
|
||||
|
||||
private void startNextSchemaChangeRequest() {
|
||||
flushedSinkWriters.clear();
|
||||
waitFlushSuccess = null;
|
||||
if (!pendingSchemaChanges.isEmpty()) {
|
||||
// TODO : if no need to flush, remove it from pendingSchemaChanges
|
||||
pendingSchemaChanges
|
||||
.get(0)
|
||||
.getResponseFuture()
|
||||
.complete(new SchemaChangeResponse(true));
|
||||
}
|
||||
}
|
||||
|
||||
class PendingSchemaChange {
|
||||
private final SchemaChangeRequest changeRequest;
|
||||
private final CompletableFuture<CoordinationResponse> responseFuture;
|
||||
|
||||
public PendingSchemaChange(
|
||||
SchemaChangeRequest changeRequest,
|
||||
CompletableFuture<CoordinationResponse> responseFuture) {
|
||||
this.changeRequest = changeRequest;
|
||||
this.responseFuture = responseFuture;
|
||||
}
|
||||
|
||||
public SchemaChangeRequest getChangeRequest() {
|
||||
return changeRequest;
|
||||
}
|
||||
|
||||
public CompletableFuture<CoordinationResponse> getResponseFuture() {
|
||||
return responseFuture;
|
||||
}
|
||||
|
||||
public PendingSchemaChange startToWaitForFlushSuccess() {
|
||||
if (!responseFuture.isDone()) {
|
||||
throw new IllegalStateException(
|
||||
"Cannot start to wait for flush success before the SchemaChangeRequest is done.");
|
||||
}
|
||||
return new PendingSchemaChange(changeRequest, new CompletableFuture<>());
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue