[FLINK-34180] Migrate doc website from ververica to flink (#3028)
parent
86272bf102
commit
1dc201f9b3
@ -0,0 +1,59 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
################################################################################
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
################################################################################
|
||||||
|
set -e
|
||||||
|
|
||||||
|
mvn --version
|
||||||
|
java -version
|
||||||
|
javadoc -J-version
|
||||||
|
|
||||||
|
# workaround for a git security patch
|
||||||
|
git config --global --add safe.directory /root/flink-cdc
|
||||||
|
git submodule update --init --recursive
|
||||||
|
|
||||||
|
HUGO_REPO=https://github.com/gohugoio/hugo/releases/download/v0.80.0/hugo_extended_0.80.0_Linux-64bit.tar.gz
|
||||||
|
HUGO_ARTIFACT=hugo_extended_0.80.0_Linux-64bit.tar.gz
|
||||||
|
if ! curl --fail -OL $HUGO_REPO ; then
|
||||||
|
echo "Failed to download Hugo binary"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
tar -zxvf $HUGO_ARTIFACT
|
||||||
|
|
||||||
|
# Build the docs
|
||||||
|
hugo --source docs
|
||||||
|
|
||||||
|
# generate docs into docs/target
|
||||||
|
hugo -v --source docs --destination target
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
echo "Error building the docs"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# build Flink; required for Javadoc step
|
||||||
|
mvn clean install -B -DskipTests -Dfast
|
||||||
|
|
||||||
|
# build java/scala docs
|
||||||
|
mkdir -p docs/target/api
|
||||||
|
mvn javadoc:aggregate -B \
|
||||||
|
-DadditionalJOption="-Xdoclint:none --allow-script-in-comments" \
|
||||||
|
-Dmaven.javadoc.failOnError=false \
|
||||||
|
-Dcheckstyle.skip=true \
|
||||||
|
-Dspotless.check.skip=true \
|
||||||
|
-Denforcer.skip=true \
|
||||||
|
-Dheader="<a href=\"http://flink.apache.org/\" target=\"_top\"><h1>Back to Flink Website</h1></a> <script>var _paq=window._paq=window._paq||[];_paq.push([\"disableCookies\"]),_paq.push([\"setDomains\",[\"*.flink.apache.org\",\"*.nightlies.apache.org/flink\"]]),_paq.push([\"trackPageView\"]),_paq.push([\"enableLinkTracking\"]),function(){var u=\"//matomo.privacy.apache.org/\";_paq.push([\"setTrackerUrl\",u+\"matomo.php\"]),_paq.push([\"setSiteId\",\"1\"]);var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s)}();</script>"
|
||||||
|
mv target/site/apidocs docs/target/api/java
|
@ -0,0 +1,3 @@
|
|||||||
|
[submodule "docs/themes/book"]
|
||||||
|
path = docs/themes/book
|
||||||
|
url = https://github.com/alex-shpak/hugo-book
|
@ -0,0 +1,10 @@
|
|||||||
|
.bundle/
|
||||||
|
.jekyll-metadata
|
||||||
|
.jekyll-cache/
|
||||||
|
.rubydeps/
|
||||||
|
ruby2/.bundle/
|
||||||
|
ruby2/.rubydeps/
|
||||||
|
public/
|
||||||
|
resources/
|
||||||
|
.hugo_build.lock
|
||||||
|
.DS_Store
|
@ -1,6 +0,0 @@
|
|||||||
FROM python:3.7-slim
|
|
||||||
RUN apt-get update
|
|
||||||
RUN apt-get -y install git
|
|
||||||
RUN pip3 install -U sphinx==4.1.1 myst-parser==0.15.2 pygments==2.10.0 sphinx-rtd-theme==0.5.2 sphinx-autobuild==2021.3.14 gitpython==3.1.18 pyyaml==6.0
|
|
||||||
EXPOSE 8001
|
|
||||||
CMD ["sphinx-autobuild", "--host", "0.0.0.0", "--port", "8001", "/home/flink-cdc/docs", "/home/flink-cdc/docs/_build/html"]
|
|
@ -1,19 +0,0 @@
|
|||||||
# Minimal makefile for Sphinx documentation
|
|
||||||
#
|
|
||||||
|
|
||||||
# You can set these variables from the command line.
|
|
||||||
SPHINXOPTS =
|
|
||||||
SPHINXBUILD = sphinx-build
|
|
||||||
SOURCEDIR = .
|
|
||||||
BUILDDIR = _build
|
|
||||||
|
|
||||||
# Put it first so that "make" without argument is like "make help".
|
|
||||||
help:
|
|
||||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
||||||
|
|
||||||
.PHONY: help Makefile
|
|
||||||
|
|
||||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
|
||||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
|
||||||
%: Makefile
|
|
||||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
|
@ -1,36 +1,269 @@
|
|||||||
<!--
|
This README gives an overview of how to build and contribute to the documentation of Apache Flink.
|
||||||
Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
or more contributor license agreements. See the NOTICE file
|
|
||||||
distributed with this work for additional information
|
|
||||||
regarding copyright ownership. The ASF licenses this file
|
|
||||||
to you under the Apache License, Version 2.0 (the
|
|
||||||
"License"); you may not use this file except in compliance
|
|
||||||
with the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
The documentation is included with the source of Apache Flink in order to ensure that you always
|
||||||
|
have docs corresponding to your checked out version. The online documentation at
|
||||||
|
https://flink.apache.org/ is also generated from the files found here.
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing,
|
# Requirements
|
||||||
software distributed under the License is distributed on an
|
|
||||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations
|
|
||||||
under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
This README gives an overview of how to build the documentation of Flink CDC.
|
### Build the documentation and serve it locally
|
||||||
|
|
||||||
### Build the site locally
|
The Flink documentation uses [Hugo](https://gohugo.io/getting-started/installing/) to generate HTML files. More specifically, it uses the *extended version* of Hugo with Sass/SCSS support.
|
||||||
Make sure you have installed [Docker](https://docs.docker.com/engine/install/) and started it on you local environment.
|
|
||||||
|
|
||||||
From the directory of this module (`docs`), use the following command to start the site.
|
To build the documentation, you can install Hugo locally or use a Docker image.
|
||||||
|
|
||||||
|
Both methods require you to execute commands in the directory of this module (`docs/`). The built site is served at http://localhost:1313/.
|
||||||
|
|
||||||
|
#### Using Hugo Docker image:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
./docs_site.sh start
|
$ git submodule update --init --recursive
|
||||||
|
$ docker pull jakejarvis/hugo-extended:latest
|
||||||
|
$ docker run -v $(pwd):/src -p 1313:1313 jakejarvis/hugo-extended:latest server --buildDrafts --buildFuture --bind 0.0.0.0
|
||||||
```
|
```
|
||||||
Then the site will run and can be viewed at http://localhost:8001, any update on the `docs` will be shown in the site without restarting.
|
|
||||||
|
|
||||||
Of course, you can use the following command to stop the site.
|
## Include externally hosted documentation
|
||||||
|
|
||||||
```sh
|
With the ongoing efforts to move Flink's connectors from this repository to individual, dedicated
|
||||||
./docs_site.sh stop
|
repositories, this also requires the documentation to be hosted outside this repo. However,
|
||||||
|
we still want to serve all documentation as a whole on the Flink documentation website.
|
||||||
|
|
||||||
|
Adding new externally hosted documentation requires the following steps to be taken:
|
||||||
|
|
||||||
|
1. (If necessary) Move the existing documentation to the new repository
|
||||||
|
|
||||||
|
2. In the Flink repository, edit the `docs/setup_docs.sh` file and add a reference to your now
|
||||||
|
externally hosted documentation. The reference will look like `integrate_connector_docs <connector_name> <branch_or_tag>`.
|
||||||
|
|
||||||
|
Replace <connector_name> with the name of your connector, e.g., `elasticsearch` for `flink-connector-elasticsearch`.
|
||||||
|
|
||||||
|
## Generate configuration tables
|
||||||
|
|
||||||
|
Configuration descriptions are auto generated from code. To trigger the generation you need to run in the project root:
|
||||||
|
|
||||||
|
```
|
||||||
|
mvn -Pgenerate-config-docs install -Dfast -DskipTests
|
||||||
|
```
|
||||||
|
|
||||||
|
The resulting html files will be written to `layouts/shortcodes/generated`. Tables are regenerated each time the command is invoked.
|
||||||
|
These tables can be directly included into the documentation:
|
||||||
|
|
||||||
|
```
|
||||||
|
{{< generated/file_name >}}
|
||||||
|
```
|
||||||
|
|
||||||
|
# Contribute
|
||||||
|
|
||||||
|
## Markdown
|
||||||
|
|
||||||
|
The documentation pages are written in [Markdown](http://daringfireball.net/projects/markdown/syntax). It is possible to use [GitHub flavored syntax](http://github.github.com/github-flavored-markdown) and intermix plain html.
|
||||||
|
|
||||||
|
## Front matter
|
||||||
|
|
||||||
|
In addition to Markdown, every page contains a Jekyll front matter, which specifies the title of the page and the layout to use. The title is used as the top-level heading for the page. The default layout is `plain` (found in `_layouts`).
|
||||||
|
|
||||||
|
---
|
||||||
|
title: "Title of the Page"
|
||||||
|
---
|
||||||
|
|
||||||
|
---
|
||||||
|
title: "Title of the Page" <-- Title rendered in the side nave
|
||||||
|
weight: 1 <-- Weight controls the ordering of pages in the side nav.
|
||||||
|
type: docs <-- required
|
||||||
|
aliases: <-- Alias to setup redirect from removed page to this one
|
||||||
|
- /alias/to/removed/page.html
|
||||||
|
---
|
||||||
|
|
||||||
|
## Structure
|
||||||
|
|
||||||
|
### Page
|
||||||
|
|
||||||
|
#### Headings
|
||||||
|
|
||||||
|
All documents are structured with headings. From these headings, you can automatically generate a page table of contents (see below).
|
||||||
|
|
||||||
|
```
|
||||||
|
# Level-1 Heading <- Used for the title of the page
|
||||||
|
## Level-2 Heading <- Start with this one for content
|
||||||
|
### Level-3 heading
|
||||||
|
#### Level-4 heading
|
||||||
|
##### Level-5 heading
|
||||||
|
```
|
||||||
|
|
||||||
|
Please stick to the "logical order" when using the headlines, e.g. start with level-2 headings and use level-3 headings for subsections, etc. Don't use a different ordering, because you don't like how a headline looks.
|
||||||
|
|
||||||
|
#### Table of Contents
|
||||||
|
|
||||||
|
Table of contents are added automatically to every page, based on heading levels 2 - 4.
|
||||||
|
The ToC can be omitted by adding the following to the front matter of the page:
|
||||||
|
|
||||||
|
---
|
||||||
|
bookToc: false
|
||||||
|
---
|
||||||
|
|
||||||
|
### ShortCodes
|
||||||
|
|
||||||
|
Flink uses [shortcodes](https://gohugo.io/content-management/shortcodes/) to add custom functionality
|
||||||
|
to its documentation markdown. The following are available for use:
|
||||||
|
|
||||||
|
#### Flink Artifact
|
||||||
|
|
||||||
|
{{< artifact flink-streaming-scala withScalaVersion >}}
|
||||||
|
|
||||||
|
This will be replaced by the maven artifact for flink-streaming-scala that users should copy into their pom.xml file. It will render out to:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.flink</groupId>
|
||||||
|
<artifactId>flink-streaming-scala_2.12</artifactId>
|
||||||
|
<version><!-- current flink version --></version>
|
||||||
|
</dependency>
|
||||||
|
```
|
||||||
|
|
||||||
|
It includes a number of optional flags:
|
||||||
|
|
||||||
|
* withScalaVersion: Includes the scala version suffix to the artifact id
|
||||||
|
* withTestScope: Includes `<scope>test</scope>` to the module. Useful for marking test dependencies.
|
||||||
|
* withTestClassifier: Includes `<classifier>tests</classifier>`. Useful when users should be pulling in Flink tests dependencies. This is mostly for the test harnesses and probably not what you want.
|
||||||
|
|
||||||
|
You can also use the shortcodes (with same flags) instead:
|
||||||
|
|
||||||
|
* `artifact_gradle` to show the Gradle syntax
|
||||||
|
* `artifact_tabs` to create a tabbed view, showing both Maven and Gradle syntax
|
||||||
|
|
||||||
|
#### Flink Connector Artifact
|
||||||
|
|
||||||
|
{{< connector_artifact flink-connector-elasticsearch 3.0.0 >}}
|
||||||
|
|
||||||
|
This will be replaced by the maven artifact for flink-connector-elasticsearch that users should copy into their pom.xml file. It will render out to:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.flink</groupId>
|
||||||
|
<artifactId>flink-connector-elasticsearch</artifactId>
|
||||||
|
<version>3.0.0</version>
|
||||||
|
</dependency>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Back to Top
|
||||||
|
|
||||||
|
{{< top >}}
|
||||||
|
|
||||||
|
This will be replaced by a back to top link. It is recommended to use these links at least at the end of each level-2 section.
|
||||||
|
|
||||||
|
#### Info Hints
|
||||||
|
|
||||||
|
{{< hint info >}}
|
||||||
|
Some interesting information
|
||||||
|
{{< /hint >}}
|
||||||
|
|
||||||
|
The hint will be rendered in a blue box. This hint is useful when providing
|
||||||
|
additional information for the user that does not fit into the flow of the documentation.
|
||||||
|
|
||||||
|
#### Info Warning
|
||||||
|
|
||||||
|
{{< hint warning >}}
|
||||||
|
Something to watch out for.
|
||||||
|
{{< /hint >}}
|
||||||
|
|
||||||
|
The hint will be rendered in a yellow box. This hint is useful when highlighting
|
||||||
|
information users should watch out for to prevent errors.
|
||||||
|
|
||||||
|
#### Info Danger
|
||||||
|
|
||||||
|
{{< hint danger >}}
|
||||||
|
Something to avoid
|
||||||
|
{{< /hint >}}
|
||||||
|
|
||||||
|
The hint will be rendered in a red box. This hint is useful when highlighting
|
||||||
|
information users need to know to avoid data loss or to point out broken
|
||||||
|
functionality.
|
||||||
|
|
||||||
|
#### Label
|
||||||
|
|
||||||
|
{{< label "My Label" >}}
|
||||||
|
|
||||||
|
The label will be rendered in an inlined blue box. This is useful for labeling functionality
|
||||||
|
such as whether a SQL feature works for only batch or streaming execution.
|
||||||
|
|
||||||
|
#### Flink version
|
||||||
|
|
||||||
|
{{< version >}}
|
||||||
|
|
||||||
|
Interpolates the current Flink version
|
||||||
|
|
||||||
|
#### Scala Version
|
||||||
|
|
||||||
|
{{< scala_version >}}
|
||||||
|
|
||||||
|
Interpolates the default scala version
|
||||||
|
|
||||||
|
#### Stable
|
||||||
|
|
||||||
|
{{< stable >}}
|
||||||
|
Some content
|
||||||
|
{{< /stable >}}
|
||||||
|
|
||||||
|
This shortcode will only render its content if the site is marked as stable.
|
||||||
|
|
||||||
|
#### Unstable
|
||||||
|
|
||||||
|
{{< unstable >}}
|
||||||
|
Some content
|
||||||
|
{{< /unstable >}}
|
||||||
|
|
||||||
|
This shortcode will only render its content if the site is marked as unstable.
|
||||||
|
|
||||||
|
#### Query State Warning
|
||||||
|
|
||||||
|
{{< query_state_warning >}}
|
||||||
|
|
||||||
|
Will render a warning the current SQL feature may have unbounded state requirements.
|
||||||
|
|
||||||
|
#### tab
|
||||||
|
|
||||||
|
{{< tabs "sometab" >}}
|
||||||
|
{{< tab "Java" >}}
|
||||||
|
```java
|
||||||
|
System.out.println("Hello World!");
|
||||||
|
```
|
||||||
|
{{< /tab >}}
|
||||||
|
{{< tab "Scala" >}}
|
||||||
|
```scala
|
||||||
|
println("Hello World!");
|
||||||
|
```
|
||||||
|
{< /tab >}}
|
||||||
|
{{< /tabs }}
|
||||||
|
|
||||||
|
Prints the content in tabs. IMPORTANT: The label in the outermost "tabs" shortcode must
|
||||||
|
be unique for the page.
|
||||||
|
|
||||||
|
#### Github Repo
|
||||||
|
|
||||||
|
{{< github_repo >}}
|
||||||
|
|
||||||
|
Renders a link to the apache flink repo.
|
||||||
|
|
||||||
|
#### Github Link
|
||||||
|
|
||||||
|
{{< gh_link file="/some/file.java" name="Some file" >}}
|
||||||
|
|
||||||
|
Renders a link to a file in the Apache Flink repo with a given name.
|
||||||
|
|
||||||
|
#### JavaDocs Link
|
||||||
|
{{< javadoc file="some/file" name="Some file" >}}
|
||||||
|
|
||||||
|
Renders a link to a file in the Apache Flink Java Documentation.
|
||||||
|
|
||||||
|
#### PythonDocs Link
|
||||||
|
{< pythondoc file="some/file" name="Some file" >}}
|
||||||
|
|
||||||
|
Renders a link to a file in the Apache Flink Python Documentation.
|
||||||
|
|
||||||
|
#### FlinkDownloads Link
|
||||||
|
|
||||||
|
```
|
||||||
|
{{< downloads >}}
|
||||||
|
```
|
||||||
|
|
||||||
|
Renders a link to the apache flink download page.
|
||||||
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Before Width: | Height: | Size: 134 KiB |
Binary file not shown.
Before Width: | Height: | Size: 66 KiB |
Binary file not shown.
Before Width: | Height: | Size: 301 KiB |
@ -1,42 +0,0 @@
|
|||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* override table width restrictions */
|
|
||||||
.wy-table-responsive table td, .wy-table-responsive table th {
|
|
||||||
white-space: normal;
|
|
||||||
}
|
|
||||||
|
|
||||||
.wy-table-responsive {
|
|
||||||
margin-bottom: 24px;
|
|
||||||
max-width: 100%;
|
|
||||||
overflow: visible;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* override style of li under ul */
|
|
||||||
.wy-nav-content ul li {
|
|
||||||
list-style: disc;
|
|
||||||
margin-left: 36px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.wy-nav-content ul li p {
|
|
||||||
margin: 0 0 8px;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* override max-width of content */
|
|
||||||
.wy-nav-content {
|
|
||||||
max-width: 80%;
|
|
||||||
}
|
|
@ -1,51 +0,0 @@
|
|||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- Extend the RTD template to support user defined "Edit on Github" URL -->
|
|
||||||
{%- extends "sphinx_rtd_theme/breadcrumbs.html" %}
|
|
||||||
|
|
||||||
{% if page_source_suffix %}
|
|
||||||
{% set suffix = page_source_suffix %}
|
|
||||||
{% else %}
|
|
||||||
{% set suffix = source_suffix %}
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
{% if meta is defined and meta is not none %}
|
|
||||||
{% set check_meta = True %}
|
|
||||||
{% else %}
|
|
||||||
{% set check_meta = False %}
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
{% if check_meta and 'github_url' in meta %}
|
|
||||||
{% set display_github = True %}
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
|
|
||||||
<div role="navigation" aria-label="breadcrumbs navigation">
|
|
||||||
<ul class="wy-breadcrumbs">
|
|
||||||
{% block breadcrumbs_aside %}
|
|
||||||
<li class="wy-breadcrumbs-aside">
|
|
||||||
{% if pagename != "search" %}
|
|
||||||
{% if display_github %}
|
|
||||||
<a href="http://{{ github_host|default("github.com") }}/{{ github_user }}/{{ github_repo }}/blob/{{ github_version }}/docs/{{ pagename }}{{ suffix }}" class="fa fa-github"> {{ _('Edit on GitHub') }}</a>
|
|
||||||
{% endif %}
|
|
||||||
{% endif %}
|
|
||||||
</li>
|
|
||||||
{% endblock %}
|
|
||||||
</ul>
|
|
||||||
<hr/>
|
|
||||||
</div>
|
|
@ -1,59 +0,0 @@
|
|||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
contributor license agreements. See the NOTICE file distributed with
|
|
||||||
this work for additional information regarding copyright ownership.
|
|
||||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
(the "License"); you may not use this file except in compliance with
|
|
||||||
the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
{% if READTHEDOCS or display_lower_left %}
|
|
||||||
{# Add rst-badge after rst-versions for small badge style. #}
|
|
||||||
<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
|
|
||||||
<span class="rst-current-version" data-toggle="rst-current-version">
|
|
||||||
<span class="fa fa-book"> Read the Docs</span>
|
|
||||||
version: {{ current_version }}
|
|
||||||
<span class="fa fa-caret-down"></span>
|
|
||||||
</span>
|
|
||||||
<div class="rst-other-versions">
|
|
||||||
{% if versions %}
|
|
||||||
<dl>
|
|
||||||
<dt>{{ _('Versions') }}</dt>
|
|
||||||
{% for slug, url in versions %}
|
|
||||||
{% if slug == current_version %} <strong> {% endif %}
|
|
||||||
<dd><a href="{{ url }}">{{ slug }}</a></dd>
|
|
||||||
{% if slug == current_version %} </strong> {% endif %}
|
|
||||||
{% endfor %}
|
|
||||||
</dl>
|
|
||||||
{% endif %}
|
|
||||||
{% if READTHEDOCS %}
|
|
||||||
<dl>
|
|
||||||
<dt>{{ _('On Read the Docs') }}</dt>
|
|
||||||
<dd>
|
|
||||||
<a href="//{{ PRODUCTION_DOMAIN }}/projects/{{ slug }}/?fromdocs={{ slug }}">{{
|
|
||||||
_('Project Home') }}</a>
|
|
||||||
</dd>
|
|
||||||
<dd>
|
|
||||||
<a href="//{{ PRODUCTION_DOMAIN }}/builds/{{ slug }}/?fromdocs={{ slug }}">{{
|
|
||||||
_('Builds') }}</a>
|
|
||||||
</dd>
|
|
||||||
</dl>
|
|
||||||
{% endif %}
|
|
||||||
<hr/>
|
|
||||||
{% trans %}Free document hosting provided by <a href="http://www.readthedocs.org">Read the
|
|
||||||
Docs</a>.{% endtrans %}
|
|
||||||
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{% endif %}
|
|
||||||
<!-- Place this tag in your head or just before your close body tag. -->
|
|
||||||
<script async defer
|
|
||||||
src="https://ververica.github.io/{{ github_repo }}/{{ current_version }}/_static/button.js"></script>
|
|
@ -0,0 +1,239 @@
|
|||||||
|
// Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing,
|
||||||
|
// software distributed under the License is distributed on an
|
||||||
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, either express or implied. See the License for the
|
||||||
|
// specific language governing permissions and limitations
|
||||||
|
// under the License.
|
||||||
|
|
||||||
|
@import "github";
|
||||||
|
|
||||||
|
.link {
|
||||||
|
padding-bottom: 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.appetizer {
|
||||||
|
color: #FBB142;
|
||||||
|
}
|
||||||
|
|
||||||
|
.maindish {
|
||||||
|
color: #7E4F89;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dessert {
|
||||||
|
color: #E6526F;
|
||||||
|
}
|
||||||
|
|
||||||
|
.book-menu nav {
|
||||||
|
background: #f8f8f8;
|
||||||
|
}
|
||||||
|
|
||||||
|
.book-page {
|
||||||
|
padding: 2rem 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.book-search input {
|
||||||
|
background: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.markdown a {
|
||||||
|
text-decoration: none;
|
||||||
|
color: #05b;
|
||||||
|
}
|
||||||
|
|
||||||
|
.markdown a:visited {
|
||||||
|
text-decoration: none;
|
||||||
|
color: #05b;
|
||||||
|
}
|
||||||
|
|
||||||
|
.markdown {
|
||||||
|
line-height: 1.43;
|
||||||
|
|
||||||
|
h1,
|
||||||
|
h2,
|
||||||
|
h3,
|
||||||
|
h4,
|
||||||
|
h5,
|
||||||
|
h6 {
|
||||||
|
font-weight: 500;
|
||||||
|
padding-top: 0;
|
||||||
|
margin-top: 1em;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
letter-spacing: normal;
|
||||||
|
-webkit-font-smoothing: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
aside nav ul {
|
||||||
|
li {
|
||||||
|
margin: 0.5em 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.book-search {
|
||||||
|
border: 2px solid #ebebeb;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media screen and (max-width: 768px) {
|
||||||
|
.toc {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
aside.book-menu nav {
|
||||||
|
a:hover {
|
||||||
|
font-weight: bold;
|
||||||
|
opacity: 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
a.active {
|
||||||
|
font-weight: bold;
|
||||||
|
color: var(--body-font-color);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
aside.book-menu > li {
|
||||||
|
padding: 10px 5px 5px 5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
aside.book-toc {
|
||||||
|
h3 {
|
||||||
|
margin-top: 0;
|
||||||
|
padding-top: 0;
|
||||||
|
font-size: 1.2em;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
html {
|
||||||
|
line-height: 1.43;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1, h2, h3, h4, h5, h6 {
|
||||||
|
line-height: 1.1;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1, h2, h3 {
|
||||||
|
margin-top: 20px;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
h2, h3, h4 {
|
||||||
|
padding-top: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 {
|
||||||
|
font-size: 36px;
|
||||||
|
}
|
||||||
|
|
||||||
|
h2 {
|
||||||
|
font-size: 30px;
|
||||||
|
border-bottom: 1px solid #e5e5e5;
|
||||||
|
}
|
||||||
|
|
||||||
|
h3 {
|
||||||
|
font-size: 24px;
|
||||||
|
}
|
||||||
|
|
||||||
|
h4 {
|
||||||
|
font-size: 18px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.markdown code {
|
||||||
|
background: white;
|
||||||
|
padding: 0;
|
||||||
|
border-radius: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre.chroma code {
|
||||||
|
line-height: 1.43;
|
||||||
|
}
|
||||||
|
|
||||||
|
.book-languages {
|
||||||
|
border: 2px solid black;
|
||||||
|
}
|
||||||
|
|
||||||
|
.menu-break {
|
||||||
|
opacity: 0.1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#book-search-results {
|
||||||
|
padding: 2px;
|
||||||
|
background-color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.label {
|
||||||
|
display: inline;
|
||||||
|
padding: .2em .6em .3em;
|
||||||
|
font-size: 75%;
|
||||||
|
font-weight: 700;
|
||||||
|
line-height: 1;
|
||||||
|
color: #fff;
|
||||||
|
text-align: center;
|
||||||
|
white-space: nowrap;
|
||||||
|
vertical-align: baseline;
|
||||||
|
border-radius: .25em;
|
||||||
|
background-color: #337ab7;
|
||||||
|
}
|
||||||
|
|
||||||
|
.expand-toc {
|
||||||
|
position: fixed;
|
||||||
|
top: 2em;
|
||||||
|
right: 5em;
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.container {
|
||||||
|
max-width: 90rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
#book-search-input:focus {
|
||||||
|
outline: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rest-api h5 {
|
||||||
|
margin-top: .5em;
|
||||||
|
margin-bottom: .5em;
|
||||||
|
font-size: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rest-api tbody {
|
||||||
|
display: table;
|
||||||
|
width: 100%;
|
||||||
|
background: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rest-api td {
|
||||||
|
background: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rest-api .book-expand label {
|
||||||
|
padding: 0rem 0rem;
|
||||||
|
background: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rest-api .book-expand {
|
||||||
|
background: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.rest-api .book-expand .book-expand-head {
|
||||||
|
background: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.configuration td {
|
||||||
|
background: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.markdown table tr:nth-child(2n) {
|
||||||
|
background: white;
|
||||||
|
}
|
@ -0,0 +1,25 @@
|
|||||||
|
// Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
// or more contributor license agreements. See the NOTICE file
|
||||||
|
// distributed with this work for additional information
|
||||||
|
// regarding copyright ownership. The ASF licenses this file
|
||||||
|
// to you under the Apache License, Version 2.0 (the
|
||||||
|
// "License"); you may not use this file except in compliance
|
||||||
|
// with the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing,
|
||||||
|
// software distributed under the License is distributed on an
|
||||||
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, either express or implied. See the License for the
|
||||||
|
// specific language governing permissions and limitations
|
||||||
|
// under the License.
|
||||||
|
|
||||||
|
body {
|
||||||
|
font-family: "Helvetica Neue",Helvetica,Arial,sans-serif;
|
||||||
|
font-size: 14px;
|
||||||
|
}
|
||||||
|
|
||||||
|
code {
|
||||||
|
font-family: "Menlo", "Lucida Console", monospace;
|
||||||
|
}
|
@ -0,0 +1,87 @@
|
|||||||
|
/**
|
||||||
|
* Syntax highlighting generated via
|
||||||
|
* hugo gen chromastyles --style=github > chroma.css
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Background */ .chroma { background-color: #ffffff }
|
||||||
|
/* Other */ .chroma .x { }
|
||||||
|
/* Error */ .chroma .err { color: #a61717; background-color: #e3d2d2 }
|
||||||
|
/* LineTableTD */ .chroma .lntd { vertical-align: top; padding: 0; margin: 0; border: 0; }
|
||||||
|
/* LineTable */ .chroma .lntable { border-spacing: 0; padding: 0; margin: 0; border: 0; width: auto; overflow: auto; display: block; }
|
||||||
|
/* LineHighlight */ .chroma .hl { display: block; width: 100%;background-color: #ffffcc }
|
||||||
|
/* LineNumbersTable */ .chroma .lnt { margin-right: 0.4em; padding: 0 0.4em 0 0.4em;color: #7f7f7f }
|
||||||
|
/* LineNumbers */ .chroma .ln { margin-right: 0.4em; padding: 0 0.4em 0 0.4em;color: #7f7f7f }
|
||||||
|
/* Keyword */ .chroma .k { color: #000000; font-weight: bold }
|
||||||
|
/* KeywordConstant */ .chroma .kc { color: #000000; font-weight: bold }
|
||||||
|
/* KeywordDeclaration */ .chroma .kd { color: #000000; font-weight: bold }
|
||||||
|
/* KeywordNamespace */ .chroma .kn { color: #000000; font-weight: bold }
|
||||||
|
/* KeywordPseudo */ .chroma .kp { color: #000000; font-weight: bold }
|
||||||
|
/* KeywordReserved */ .chroma .kr { color: #000000; font-weight: bold }
|
||||||
|
/* KeywordType */ .chroma .kt { color: #445588; font-weight: bold }
|
||||||
|
/* Name */ .chroma .n { }
|
||||||
|
/* NameAttribute */ .chroma .na { color: #008080 }
|
||||||
|
/* NameBuiltin */ .chroma .nb { color: #0086b3 }
|
||||||
|
/* NameBuiltinPseudo */ .chroma .bp { color: #999999 }
|
||||||
|
/* NameClass */ .chroma .nc { color: #445588; font-weight: bold }
|
||||||
|
/* NameConstant */ .chroma .no { color: #008080 }
|
||||||
|
/* NameDecorator */ .chroma .nd { color: #3c5d5d; font-weight: bold }
|
||||||
|
/* NameEntity */ .chroma .ni { color: #800080 }
|
||||||
|
/* NameException */ .chroma .ne { color: #990000; font-weight: bold }
|
||||||
|
/* NameFunction */ .chroma .nf { color: #990000; font-weight: bold }
|
||||||
|
/* NameFunctionMagic */ .chroma .fm { }
|
||||||
|
/* NameLabel */ .chroma .nl { color: #990000; font-weight: bold }
|
||||||
|
/* NameNamespace */ .chroma .nn { color: #555555 }
|
||||||
|
/* NameOther */ .chroma .nx { }
|
||||||
|
/* NameProperty */ .chroma .py { }
|
||||||
|
/* NameTag */ .chroma .nt { color: #000080 }
|
||||||
|
/* NameVariable */ .chroma .nv { color: #008080 }
|
||||||
|
/* NameVariableClass */ .chroma .vc { color: #008080 }
|
||||||
|
/* NameVariableGlobal */ .chroma .vg { color: #008080 }
|
||||||
|
/* NameVariableInstance */ .chroma .vi { color: #008080 }
|
||||||
|
/* NameVariableMagic */ .chroma .vm { }
|
||||||
|
/* Literal */ .chroma .l { }
|
||||||
|
/* LiteralDate */ .chroma .ld { }
|
||||||
|
/* LiteralString */ .chroma .s { color: #dd1144 }
|
||||||
|
/* LiteralStringAffix */ .chroma .sa { color: #dd1144 }
|
||||||
|
/* LiteralStringBacktick */ .chroma .sb { color: #dd1144 }
|
||||||
|
/* LiteralStringChar */ .chroma .sc { color: #dd1144 }
|
||||||
|
/* LiteralStringDelimiter */ .chroma .dl { color: #dd1144 }
|
||||||
|
/* LiteralStringDoc */ .chroma .sd { color: #dd1144 }
|
||||||
|
/* LiteralStringDouble */ .chroma .s2 { color: #dd1144 }
|
||||||
|
/* LiteralStringEscape */ .chroma .se { color: #dd1144 }
|
||||||
|
/* LiteralStringHeredoc */ .chroma .sh { color: #dd1144 }
|
||||||
|
/* LiteralStringInterpol */ .chroma .si { color: #dd1144 }
|
||||||
|
/* LiteralStringOther */ .chroma .sx { color: #dd1144 }
|
||||||
|
/* LiteralStringRegex */ .chroma .sr { color: #009926 }
|
||||||
|
/* LiteralStringSingle */ .chroma .s1 { color: #dd1144 }
|
||||||
|
/* LiteralStringSymbol */ .chroma .ss { color: #990073 }
|
||||||
|
/* LiteralNumber */ .chroma .m { color: #009999 }
|
||||||
|
/* LiteralNumberBin */ .chroma .mb { color: #009999 }
|
||||||
|
/* LiteralNumberFloat */ .chroma .mf { color: #009999 }
|
||||||
|
/* LiteralNumberHex */ .chroma .mh { color: #009999 }
|
||||||
|
/* LiteralNumberInteger */ .chroma .mi { color: #009999 }
|
||||||
|
/* LiteralNumberIntegerLong */ .chroma .il { color: #009999 }
|
||||||
|
/* LiteralNumberOct */ .chroma .mo { color: #009999 }
|
||||||
|
/* Operator */ .chroma .o { color: #000000; font-weight: bold }
|
||||||
|
/* OperatorWord */ .chroma .ow { color: #000000; font-weight: bold }
|
||||||
|
/* Punctuation */ .chroma .p { }
|
||||||
|
/* Comment */ .chroma .c { color: #999988; font-style: italic }
|
||||||
|
/* CommentHashbang */ .chroma .ch { color: #999988; font-style: italic }
|
||||||
|
/* CommentMultiline */ .chroma .cm { color: #999988; font-style: italic }
|
||||||
|
/* CommentSingle */ .chroma .c1 { color: #999988; font-style: italic }
|
||||||
|
/* CommentSpecial */ .chroma .cs { color: #999999; font-weight: bold; font-style: italic }
|
||||||
|
/* CommentPreproc */ .chroma .cp { color: #999999; font-weight: bold; font-style: italic }
|
||||||
|
/* CommentPreprocFile */ .chroma .cpf { color: #999999; font-weight: bold; font-style: italic }
|
||||||
|
/* Generic */ .chroma .g { }
|
||||||
|
/* GenericDeleted */ .chroma .gd { color: #000000; background-color: #ffdddd }
|
||||||
|
/* GenericEmph */ .chroma .ge { color: #000000; font-style: italic }
|
||||||
|
/* GenericError */ .chroma .gr { color: #aa0000 }
|
||||||
|
/* GenericHeading */ .chroma .gh { color: #999999 }
|
||||||
|
/* GenericInserted */ .chroma .gi { color: #000000; background-color: #ddffdd }
|
||||||
|
/* GenericOutput */ .chroma .go { color: #888888 }
|
||||||
|
/* GenericPrompt */ .chroma .gp { color: #555555 }
|
||||||
|
/* GenericStrong */ .chroma .gs { font-weight: bold }
|
||||||
|
/* GenericSubheading */ .chroma .gu { color: #aaaaaa }
|
||||||
|
/* GenericTraceback */ .chroma .gt { color: #aa0000 }
|
||||||
|
/* GenericUnderline */ .chroma .gl { text-decoration: underline }
|
||||||
|
/* TextWhitespace */ .chroma .w { color: #bbbbbb }
|
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
(function () {
|
||||||
|
const indexCfg = {{ with i18n "bookSearchConfig" }}
|
||||||
|
{{ . }};
|
||||||
|
{{ else }}
|
||||||
|
{};
|
||||||
|
{{ end }}
|
||||||
|
|
||||||
|
indexCfg.doc = {
|
||||||
|
id: 'id',
|
||||||
|
field: ['title', 'content'],
|
||||||
|
store: ['title', 'href', 'section'],
|
||||||
|
};
|
||||||
|
|
||||||
|
const index = FlexSearch.create('balance', indexCfg);
|
||||||
|
window.bookSearchIndex = index;
|
||||||
|
|
||||||
|
{{- $pages := where .Site.Pages "Kind" "in" (slice "page" "section") -}}
|
||||||
|
{{- $pages = where $pages "Params.booksearchexclude" "!=" true -}}
|
||||||
|
{{- $pages = where $pages "Content" "not in" (slice nil "") -}}
|
||||||
|
|
||||||
|
{{ range $index, $page := $pages }}
|
||||||
|
index.add({
|
||||||
|
'id': {{ $index }},
|
||||||
|
'href': '{{ $page.RelPermalink }}',
|
||||||
|
'title': {{ (partial "docs/simple-title" $page) | jsonify }},
|
||||||
|
'section': {{ (partial "docs/simple-title" $page.Parent) | jsonify }},
|
||||||
|
'content': {{ $page.Plain | jsonify }}
|
||||||
|
});
|
||||||
|
{{- end -}}
|
||||||
|
})();
|
@ -1,76 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
################################################################################
|
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
# contributor license agreements. See the NOTICE file distributed with
|
|
||||||
# this work for additional information regarding copyright ownership.
|
|
||||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
# (the "License"); you may not use this file except in compliance with
|
|
||||||
# the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
################################################################################
|
|
||||||
|
|
||||||
set -x
|
|
||||||
|
|
||||||
# step-1: install dependencies
|
|
||||||
apt-get update
|
|
||||||
apt-get -y install git rsync python3-pip python3-git python3-stemmer python3-virtualenv python3-setuptools
|
|
||||||
python3 -m pip install -U sphinx==4.1.1 myst-parser==0.15.2 pygments==2.10.0 sphinx-rtd-theme==0.5.2 pyyaml==6.0
|
|
||||||
|
|
||||||
export REPO_NAME="${GITHUB_REPOSITORY##*/}"
|
|
||||||
|
|
||||||
git config --global --add safe.directory /__w/${REPO_NAME}/${REPO_NAME}
|
|
||||||
export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct)
|
|
||||||
temp_docs_root=`mktemp -d`
|
|
||||||
|
|
||||||
ls
|
|
||||||
|
|
||||||
# step-1.5: copy main site content to temp dir
|
|
||||||
# this must be done before `make -C docs clean` otherwise the contents will be removed
|
|
||||||
rsync -avz "docs/site/" "${temp_docs_root}/"
|
|
||||||
|
|
||||||
# step-2: build sites for all branches(for multiple versioned docs), excludes 'HEAD' and 'gh-pages'
|
|
||||||
make -C docs clean
|
|
||||||
branches="`git for-each-ref '--format=%(refname:lstrip=-1)' refs/remotes/origin/ | grep -viE '^(HEAD|gh-pages|release-1.0|release-1.1|release-1.2|release-1.3)$'| grep -iE '^(release-|master)'`"
|
|
||||||
for current_branch in ${branches}; do
|
|
||||||
export current_version=${current_branch}
|
|
||||||
git checkout ${current_branch}
|
|
||||||
|
|
||||||
# skip the branch that has no docs
|
|
||||||
if [ ! -e 'docs/conf.py' ]; then
|
|
||||||
echo -e "\tINFO: Couldn't find 'docs/conf.py' for branch: ${current_branch}, just skip this branch"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
echo "INFO: Building sites for branch: ${current_branch}"
|
|
||||||
sphinx-build -b html docs/ docs/_build/html/${current_branch}
|
|
||||||
|
|
||||||
# copy the build content to temp dir
|
|
||||||
rsync -av "docs/_build/html/" "${temp_docs_root}/"
|
|
||||||
|
|
||||||
done
|
|
||||||
|
|
||||||
git checkout master
|
|
||||||
git config --global user.name "${GITHUB_ACTOR}"
|
|
||||||
git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com"
|
|
||||||
|
|
||||||
# step-3: push build sites to gh-pages branch
|
|
||||||
pushd "${temp_docs_root}"
|
|
||||||
git init
|
|
||||||
git remote add deploy "https://token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git"
|
|
||||||
git checkout -b gh-pages
|
|
||||||
|
|
||||||
touch .nojekyll
|
|
||||||
|
|
||||||
git add .
|
|
||||||
git commit -m "Generated docs from commit ${GITHUB_SHA}"
|
|
||||||
git push deploy gh-pages --force
|
|
||||||
|
|
||||||
# pop back and exit
|
|
||||||
popd
|
|
||||||
exit 0
|
|
@ -1,135 +0,0 @@
|
|||||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
# contributor license agreements. See the NOTICE file distributed with
|
|
||||||
# this work for additional information regarding copyright ownership.
|
|
||||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
# (the "License"); you may not use this file except in compliance with
|
|
||||||
# the License. You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
# Maven
|
|
||||||
# Build your Java project and run tests with Apache Maven.
|
|
||||||
# Add steps that analyze code, save build artifacts, deploy, and more:
|
|
||||||
# https://docs.microsoft.com/azure/devops/pipelines/languages/java
|
|
||||||
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
#
|
|
||||||
# Configuration file for the Sphinx documentation builder.
|
|
||||||
#
|
|
||||||
# This file does only contain a selection of the most common options. For a
|
|
||||||
# full list see the documentation:
|
|
||||||
# http://www.sphinx-doc.org/en/master/config
|
|
||||||
|
|
||||||
# -- Path setup --------------------------------------------------------------
|
|
||||||
|
|
||||||
# If extensions (or modules to document with autodoc) are in another directory,
|
|
||||||
# add these directories to sys.path here. If the directory is relative to the
|
|
||||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
|
||||||
#
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
sys.path.insert(0, os.path.abspath('.'))
|
|
||||||
|
|
||||||
|
|
||||||
# -- Project information -----------------------------------------------------
|
|
||||||
project = 'CDC Connectors for Apache Flink®'
|
|
||||||
copyright = '2022, Ververica GmbH; Apache Flink, Flink®, Apache®, the squirrel logo, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation'
|
|
||||||
author = 'ververica'
|
|
||||||
|
|
||||||
|
|
||||||
# -- General configuration ---------------------------------------------------
|
|
||||||
|
|
||||||
# If your documentation needs a minimal Sphinx version, state it here.
|
|
||||||
#
|
|
||||||
# needs_sphinx = '1.0'
|
|
||||||
|
|
||||||
# Add any Sphinx extension module names here, as strings. They can be
|
|
||||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
|
||||||
# ones.
|
|
||||||
extensions = [
|
|
||||||
'sphinx_rtd_theme',
|
|
||||||
'sphinx.ext.autodoc',
|
|
||||||
'sphinx.ext.viewcode',
|
|
||||||
'sphinx.ext.githubpages',
|
|
||||||
'myst_parser',
|
|
||||||
]
|
|
||||||
|
|
||||||
# Add any paths that contain templates here, relative to this directory.
|
|
||||||
templates_path = ['_templates']
|
|
||||||
|
|
||||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
|
||||||
# a list of builtin themes.
|
|
||||||
#
|
|
||||||
html_theme = 'sphinx_rtd_theme'
|
|
||||||
|
|
||||||
# The name of an image file (within the static path) to use as favicon of the
|
|
||||||
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
|
||||||
# pixels large.
|
|
||||||
html_favicon = '_static/fig/favicon.png'
|
|
||||||
|
|
||||||
import myst_parser
|
|
||||||
|
|
||||||
source_parsers = {
|
|
||||||
'.md': myst_parser
|
|
||||||
}
|
|
||||||
source_suffix = ['.md']
|
|
||||||
|
|
||||||
# List of patterns, relative to source directory, that match files and
|
|
||||||
# directories to ignore when looking for source files.
|
|
||||||
# This pattern also affects html_static_path and html_extra_path.
|
|
||||||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
|
|
||||||
|
|
||||||
# Add any paths that contain custom static files (such as style sheets) here,
|
|
||||||
# relative to this directory. They are copied after the builtin static files,
|
|
||||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
|
||||||
html_static_path = ['_static']
|
|
||||||
|
|
||||||
html_context = {
|
|
||||||
'css_files': [
|
|
||||||
'_static/theme_overrides.css', # overrides for wide tables in RTD theme
|
|
||||||
],
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
html_context
|
|
||||||
except NameError:
|
|
||||||
html_context = dict()
|
|
||||||
html_context['display_lower_left'] = True
|
|
||||||
|
|
||||||
if 'REPO_NAME' in os.environ:
|
|
||||||
REPO_NAME = os.environ['REPO_NAME']
|
|
||||||
else:
|
|
||||||
REPO_NAME = ''
|
|
||||||
|
|
||||||
from git import Repo
|
|
||||||
repo = Repo( search_parent_directories=True )
|
|
||||||
remote_refs = repo.remote().refs
|
|
||||||
|
|
||||||
if 'current_version' in os.environ:
|
|
||||||
current_version = os.environ['current_version']
|
|
||||||
else:
|
|
||||||
current_version = repo.active_branch.name
|
|
||||||
|
|
||||||
html_context['current_version'] = current_version
|
|
||||||
html_context['version'] = current_version
|
|
||||||
html_context['github_version'] = current_version
|
|
||||||
|
|
||||||
html_context['versions'] = list()
|
|
||||||
branches = [branch.name for branch in remote_refs]
|
|
||||||
for branch in branches:
|
|
||||||
if 'origin/' in branch and ('master' in branch or 'release-' in branch)\
|
|
||||||
and 'HEAD' not in branch and 'gh-pages' not in branch \
|
|
||||||
and 'release-1.0' not in branch and 'release-1.1' not in branch\
|
|
||||||
and 'release-1.2' not in branch and 'release-1.3' not in branch:
|
|
||||||
version = branch[7:]
|
|
||||||
html_context['versions'].append( (version, '/' +REPO_NAME+ '/' +version+ '/') )
|
|
||||||
|
|
||||||
html_context['display_github'] = True
|
|
||||||
html_context['github_user'] = 'ververica'
|
|
||||||
html_context['github_repo'] = 'flink-cdc-connectors'
|
|
@ -0,0 +1,93 @@
|
|||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
baseURL = '//nightlies.apache.org/flink/flink-cdc-docs-master'
|
||||||
|
languageCode = 'en-us'
|
||||||
|
title = 'Apache Flink CDC'
|
||||||
|
enableGitInfo = false
|
||||||
|
theme = "book"
|
||||||
|
pygmentsUseClasses = true
|
||||||
|
|
||||||
|
[params]
|
||||||
|
# Flag whether this is a stable version or not.
|
||||||
|
# Used for the quickstart page.
|
||||||
|
IsStable = false
|
||||||
|
|
||||||
|
# Flag to indicate whether an outdated warning should be shown.
|
||||||
|
ShowOutDatedWarning = false
|
||||||
|
|
||||||
|
# This is the version referenced in the docs. Please only use these variables
|
||||||
|
# to reference a specific Flink version, because this is the only place where
|
||||||
|
# we change the version for the complete docs when forking of a release branch
|
||||||
|
# etc.
|
||||||
|
# The full version string as referenced in Maven (e.g. 1.2.1)
|
||||||
|
Version = "3.1-SNAPSHOT"
|
||||||
|
|
||||||
|
# For stable releases, leave the bugfix version out (e.g. 1.2). For snapshot
|
||||||
|
# release this should be the same as the regular version
|
||||||
|
VersionTitle = "3.1-SNAPSHOT"
|
||||||
|
|
||||||
|
# The branch for this version of Apache Flink CDC
|
||||||
|
Branch = "master"
|
||||||
|
|
||||||
|
# The GitHub repository for Apache Flink CDC
|
||||||
|
Repo = "//github.com/apache/flink-cdc"
|
||||||
|
|
||||||
|
GithubRepo = "https://github.com/apache/flink-cdc.git"
|
||||||
|
|
||||||
|
ProjectHomepage = "//flink.apache.org"
|
||||||
|
|
||||||
|
# External links at the bottom
|
||||||
|
# of the menu
|
||||||
|
MenuLinks = [
|
||||||
|
["Project Homepage", "//flink.apache.org"],
|
||||||
|
["JavaDocs", "//nightlies.apache.org/flink/flink-cdc-docs-master/api/java/"],
|
||||||
|
]
|
||||||
|
|
||||||
|
PreviousDocs = [
|
||||||
|
["3.0", "https://nightlies.apache.org/flink-cdc/flink-cdc-docs-release-3.0"],
|
||||||
|
]
|
||||||
|
|
||||||
|
[markup]
|
||||||
|
[markup.goldmark.renderer]
|
||||||
|
unsafe = true
|
||||||
|
|
||||||
|
[languages]
|
||||||
|
[languages.en]
|
||||||
|
languageName = 'English'
|
||||||
|
contentDir = 'content'
|
||||||
|
weight = 1
|
||||||
|
|
||||||
|
[languages.zh]
|
||||||
|
languageName = '中文版'
|
||||||
|
contentDir = 'content.zh'
|
||||||
|
weight = 2
|
||||||
|
|
||||||
|
[module]
|
||||||
|
[[module.imports.mounts]]
|
||||||
|
source = 'content'
|
||||||
|
target = 'content'
|
||||||
|
lang = 'en'
|
||||||
|
[[module.imports.mounts]]
|
||||||
|
source = 'content.zh'
|
||||||
|
target = 'content'
|
||||||
|
lang = 'zh'
|
||||||
|
[[module.imports.mounts]]
|
||||||
|
source = 'layouts'
|
||||||
|
target = 'layouts'
|
||||||
|
[[module.imports.mounts]]
|
||||||
|
source = 'data'
|
||||||
|
target = 'data'
|
@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
title: Connectors
|
||||||
|
icon: <i class="fa fa-random title maindish" aria-hidden="true"></i>
|
||||||
|
bold: true
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 3
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
@ -0,0 +1,44 @@
|
|||||||
|
---
|
||||||
|
title: "Overview"
|
||||||
|
weight: 1
|
||||||
|
type: docs
|
||||||
|
aliases:
|
||||||
|
- /connectors/pipeline-connectors/
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
# Pipeline Connectors Of CDC Streaming ELT Framework
|
||||||
|
|
||||||
|
## Supported Connectors
|
||||||
|
|
||||||
|
| Connector | Database |
|
||||||
|
|---------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| [doris-pipeline](doris-pipeline.md) | <li> [Doris](https://doris.apache.org/): 1.2.x, 2.x.x |
|
||||||
|
| [mysql-pipeline](mysql-pipeline.md) | <li> [MySQL](https://dev.mysql.com/doc): 5.6, 5.7, 8.0.x <li> [RDS MySQL](https://www.aliyun.com/product/rds/mysql): 5.6, 5.7, 8.0.x <li> [PolarDB MySQL](https://www.aliyun.com/product/polardb): 5.6, 5.7, 8.0.x <li> [Aurora MySQL](https://aws.amazon.com/cn/rds/aurora): 5.6, 5.7, 8.0.x <li> [MariaDB](https://mariadb.org): 10.x <li> [PolarDB X](https://github.com/ApsaraDB/galaxysql): 2.0.1 |
|
||||||
|
| [starrocks-pipeline](starrocks-pipeline.md) | <li> [StarRocks](https://www.starrocks.io/): 2.x, 3.x |
|
||||||
|
|
||||||
|
## Supported Flink Versions
|
||||||
|
The following table shows the version mapping between Flink<sup>®</sup> CDC Pipeline and Flink<sup>®</sup>:
|
||||||
|
|
||||||
|
| Flink<sup>®</sup> CDC Version | Flink<sup>®</sup> Version |
|
||||||
|
|:-----------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
|
||||||
|
| <font color="DarkCyan">3.0.*</font> | <font color="MediumVioletRed">1.14.\*</font>, <font color="MediumVioletRed">1.15.\*</font>, <font color="MediumVioletRed">1.16.\*</font>, <font color="MediumVioletRed">1.17.\*</font>, <font color="MediumVioletRed">1.18.\*</font> |
|
||||||
|
|
||||||
|
{{< top >}}
|
@ -0,0 +1,26 @@
|
|||||||
|
---
|
||||||
|
title: Development
|
||||||
|
icon: <i class="fa fa-code title maindish" aria-hidden="true"></i>
|
||||||
|
bold: true
|
||||||
|
sectionBreak: true
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 2
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
title: "FAQ"
|
||||||
|
icon: <i class="fa fa-question title appetizer" aria-hidden="true"></i>
|
||||||
|
bold: true
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 4
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
title: "Try Flink CDC"
|
||||||
|
icon: <i class="fa fa-rocket title appetizer" aria-hidden="true"></i>
|
||||||
|
bold: true
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 1
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
title: CDC Connectors
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 2
|
||||||
|
aliases:
|
||||||
|
- /try-flink-cdc/cdc-connectors/
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
title: Pipeline Connectors
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 1
|
||||||
|
aliases:
|
||||||
|
- /try-flink-cdc/pipeline-connectors/
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
title: Connectors
|
||||||
|
icon: <i class="fa fa-random title maindish" aria-hidden="true"></i>
|
||||||
|
bold: true
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 3
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
@ -0,0 +1,381 @@
|
|||||||
|
---
|
||||||
|
title: "Db2 CDC Connector"
|
||||||
|
weight: 9
|
||||||
|
type: docs
|
||||||
|
aliases:
|
||||||
|
- /connectors/cdc-connectors/db2-cdc.html
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
# Db2 CDC Connector
|
||||||
|
|
||||||
|
The Db2 CDC connector allows for reading snapshot data and incremental data from Db2 database. This document
|
||||||
|
describes how to setup the db2 CDC connector to run SQL queries against Db2 databases.
|
||||||
|
|
||||||
|
|
||||||
|
## Supported Databases
|
||||||
|
|
||||||
|
| Connector | Database | Driver |
|
||||||
|
|-----------------------|----------------------------------------------------|----------------------|
|
||||||
|
| [Db2-cdc](db2-cdc.md) | <li> [Db2](https://www.ibm.com/products/db2): 11.5 | Db2 Driver: 11.5.0.0 |
|
||||||
|
|
||||||
|
Dependencies
|
||||||
|
------------
|
||||||
|
|
||||||
|
In order to set up the Db2 CDC connector, the following table provides dependency information for both projects
|
||||||
|
using a build automation tool (such as Maven or SBT) and SQL Client with SQL JAR bundles.
|
||||||
|
|
||||||
|
### Maven dependency
|
||||||
|
|
||||||
|
{{< artifact flink-connector-db2-cdc >}}
|
||||||
|
|
||||||
|
### SQL Client JAR
|
||||||
|
|
||||||
|
```Download link is available only for stable releases.```
|
||||||
|
|
||||||
|
Download flink-sql-connector-db2-cdc-3.0-SNAPSHOT.jar and
|
||||||
|
put it under `<FLINK_HOME>/lib/`.
|
||||||
|
|
||||||
|
**Note:** flink-sql-connector-db2-cdc-XXX-SNAPSHOT version is the code corresponding to the development branch. Users
|
||||||
|
need to download the source code and compile the corresponding jar. Users should use the released version, such as
|
||||||
|
[flink-sql-connector-db2-cdc-2.3.0.jar](https://mvnrepository.com/artifact/org.apache.flink/flink-connector-db2-cdc),
|
||||||
|
the released version will be available in the Maven central warehouse.
|
||||||
|
|
||||||
|
Setup Db2 server
|
||||||
|
----------------
|
||||||
|
|
||||||
|
Follow the steps in the [Debezium Db2 Connector](https://debezium.io/documentation/reference/1.9/connectors/db2.html#setting-up-db2).
|
||||||
|
|
||||||
|
|
||||||
|
Notes
|
||||||
|
----------------
|
||||||
|
|
||||||
|
### Not support BOOLEAN type in SQL Replication on Db2
|
||||||
|
|
||||||
|
Only snapshots can be taken from tables with BOOLEAN type columns. Currently, SQL Replication on Db2 does not support BOOLEAN, so Debezium can not perform CDC on those tables.
|
||||||
|
Consider using another type to replace BOOLEAN type.
|
||||||
|
|
||||||
|
|
||||||
|
How to create a Db2 CDC table
|
||||||
|
----------------
|
||||||
|
|
||||||
|
The Db2 CDC table can be defined as following:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- checkpoint every 3 seconds
|
||||||
|
Flink SQL> SET 'execution.checkpointing.interval' = '3s';
|
||||||
|
|
||||||
|
-- register a Db2 table 'products' in Flink SQL
|
||||||
|
Flink SQL> CREATE TABLE products (
|
||||||
|
ID INT NOT NULL,
|
||||||
|
NAME STRING,
|
||||||
|
DESCRIPTION STRING,
|
||||||
|
WEIGHT DECIMAL(10,3)
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'db2-cdc',
|
||||||
|
'hostname' = 'localhost',
|
||||||
|
'port' = '50000',
|
||||||
|
'username' = 'root',
|
||||||
|
'password' = '123456',
|
||||||
|
'database-name' = 'mydb',
|
||||||
|
'schema-name' = 'myschema',
|
||||||
|
'table-name' = 'products');
|
||||||
|
|
||||||
|
-- read snapshot and binlogs from products table
|
||||||
|
Flink SQL> SELECT * FROM products;
|
||||||
|
```
|
||||||
|
|
||||||
|
Connector Options
|
||||||
|
----------------
|
||||||
|
|
||||||
|
<div class="highlight">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width: 10%">Option</th>
|
||||||
|
<th class="text-left" style="width: 8%">Required</th>
|
||||||
|
<th class="text-left" style="width: 7%">Default</th>
|
||||||
|
<th class="text-left" style="width: 10%">Type</th>
|
||||||
|
<th class="text-left" style="width: 65%">Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>connector</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Specify what connector to use, here should be <code>'db2-cdc'</code>.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>hostname</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>IP address or hostname of the Db2 database server.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>username</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Name of the Db2 database to use when connecting to the Db2 database server.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>password</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Password to use when connecting to the Db2 database server.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>database-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Database name of the Db2 server to monitor.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>schema-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Schema name of the Db2 database to monitor.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>table-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Table name of the Db2 database to monitor.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>port</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">50000</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>Integer port number of the Db2 database server.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.startup.mode</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">initial</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Optional startup mode for Db2 CDC consumer, valid enumerations are "initial"
|
||||||
|
and "latest-offset". Please see <a href="#startup-reading-position">Startup Reading Position</a> section
|
||||||
|
for more detailed information.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>server-time-zone</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>The session time zone in database server, e.g. "Asia/Shanghai".
|
||||||
|
It controls how the TIMESTAMP type in Db2 converted to STRING.
|
||||||
|
See more <a href="https://debezium.io/documentation/reference/1.9/connectors/db2.html#db2-temporal-types">here</a>.
|
||||||
|
If not set, then ZoneId.systemDefault() is used to determine the server time zone.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>debezium.*</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Pass-through Debezium's properties to Debezium Embedded Engine which is used to capture data changes from
|
||||||
|
Db2 server.
|
||||||
|
For example: <code>'debezium.snapshot.mode' = 'never'</code>.
|
||||||
|
See more about the <a href="https://debezium.io/documentation/reference/1.9/connectors/db2.html#db2-connector-properties">Debezium's Db2 Connector properties</a></td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
Features
|
||||||
|
--------
|
||||||
|
### Startup Reading Position
|
||||||
|
|
||||||
|
The config option `scan.startup.mode` specifies the startup mode for DB2 CDC consumer. The valid enumerations are:
|
||||||
|
|
||||||
|
- `initial` (default): Performs an initial snapshot on the monitored database tables upon first startup, and continue to read the latest binlog.
|
||||||
|
- `latest-offset`: Never to perform snapshot on the monitored database tables upon first startup, just read from
|
||||||
|
the end of the binlog which means only have the changes since the connector was started.
|
||||||
|
|
||||||
|
_Note: the mechanism of `scan.startup.mode` option relying on Debezium's `snapshot.mode` configuration. So please do not using them together. If you speicifying both `scan.startup.mode` and `debezium.snapshot.mode` options in the table DDL, it may make `scan.startup.mode` doesn't work._
|
||||||
|
|
||||||
|
### DataStream Source
|
||||||
|
|
||||||
|
```java
|
||||||
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||||
|
import org.apache.flink.streaming.api.functions.source.SourceFunction;
|
||||||
|
|
||||||
|
import org.apache.flink.cdc.debezium.JsonDebeziumDeserializationSchema;
|
||||||
|
|
||||||
|
public class Db2SourceExample {
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
SourceFunction<String> db2Source =
|
||||||
|
Db2Source.<String>builder()
|
||||||
|
.hostname("yourHostname")
|
||||||
|
.port(50000)
|
||||||
|
.database("yourDatabaseName") // set captured database
|
||||||
|
.tableList("yourSchemaName.yourTableName") // set captured table
|
||||||
|
.username("yourUsername")
|
||||||
|
.password("yourPassword")
|
||||||
|
.deserializer(
|
||||||
|
new JsonDebeziumDeserializationSchema()) // converts SourceRecord to
|
||||||
|
// JSON String
|
||||||
|
.build();
|
||||||
|
|
||||||
|
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||||
|
|
||||||
|
// enable checkpoint
|
||||||
|
env.enableCheckpointing(3000);
|
||||||
|
|
||||||
|
env.addSource(db2Source)
|
||||||
|
.print()
|
||||||
|
.setParallelism(1); // use parallelism 1 for sink to keep message ordering
|
||||||
|
|
||||||
|
env.execute("Print Db2 Snapshot + Change Stream");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Data Type Mapping
|
||||||
|
----------------
|
||||||
|
|
||||||
|
<div class="wy-table-responsive">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width:30%;"><a href="https://www.ibm.com/docs/en/db2/11.5?topic=elements-data-types">Db2 type</a></th>
|
||||||
|
<th class="text-left" style="width:10%;">Flink SQL type<a href="{% link dev/table/types.md %}"></a></th>
|
||||||
|
<th class="text-left" style="width:60%;">NOTE</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
SMALLINT<br>
|
||||||
|
</td>
|
||||||
|
<td>SMALLINT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
INTEGER
|
||||||
|
</td>
|
||||||
|
<td>INT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
BIGINT
|
||||||
|
</td>
|
||||||
|
<td>BIGINT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
REAL
|
||||||
|
</td>
|
||||||
|
<td>FLOAT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
DOUBLE
|
||||||
|
</td>
|
||||||
|
<td>DOUBLE</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
NUMERIC(p, s)<br>
|
||||||
|
DECIMAL(p, s)
|
||||||
|
</td>
|
||||||
|
<td>DECIMAL(p, s)</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>DATE</td>
|
||||||
|
<td>DATE</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>TIME</td>
|
||||||
|
<td>TIME</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>TIMESTAMP [(p)]
|
||||||
|
</td>
|
||||||
|
<td>TIMESTAMP [(p)]
|
||||||
|
</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
CHARACTER(n)
|
||||||
|
</td>
|
||||||
|
<td>CHAR(n)</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
VARCHAR(n)
|
||||||
|
</td>
|
||||||
|
<td>VARCHAR(n)</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
BINARY(n)
|
||||||
|
</td>
|
||||||
|
<td>BINARY(n)</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
VARBINARY(N)
|
||||||
|
</td>
|
||||||
|
<td>VARBINARY(N)</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
BLOB<br>
|
||||||
|
CLOB<br>
|
||||||
|
DBCLOB<br>
|
||||||
|
</td>
|
||||||
|
<td>BYTES</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
VARGRAPHIC<br>
|
||||||
|
XML
|
||||||
|
</td>
|
||||||
|
<td>STRING</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{{< top >}}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,701 @@
|
|||||||
|
---
|
||||||
|
title: "Oracle CDC Connector"
|
||||||
|
weight: 5
|
||||||
|
type: docs
|
||||||
|
aliases:
|
||||||
|
- /connectors/cdc-connectors/oracle-cdc.html
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
# Oracle CDC Connector
|
||||||
|
|
||||||
|
The Oracle CDC connector allows for reading snapshot data and incremental data from Oracle database. This document describes how to setup the Oracle CDC connector to run SQL queries against Oracle databases.
|
||||||
|
|
||||||
|
Dependencies
|
||||||
|
------------
|
||||||
|
|
||||||
|
In order to setup the Oracle CDC connector, the following table provides dependency information for both projects using a build automation tool (such as Maven or SBT) and SQL Client with SQL JAR bundles.
|
||||||
|
|
||||||
|
### Maven dependency
|
||||||
|
|
||||||
|
{{< artifact flink-connector-oracle-cdc >}}
|
||||||
|
|
||||||
|
### SQL Client JAR
|
||||||
|
|
||||||
|
**Download link is available only for stable releases.**
|
||||||
|
|
||||||
|
Download [flink-sql-connector-oracle-cdc-3.0-SNAPSHOT.jar](https://repo1.maven.org/maven2/org/apache/flink/flink-sql-connector-oracle-cdc/3.0-SNAPSHOT/flink-sql-connector-oracle-cdc-3.0-SNAPSHOT.jar) and put it under `<FLINK_HOME>/lib/`.
|
||||||
|
|
||||||
|
**Note:** flink-sql-connector-oracle-cdc-XXX-SNAPSHOT version is the code corresponding to the development branch. Users need to download the source code and compile the corresponding jar. Users should use the released version, such as [flink-sql-connector-oracle-cdc-2.3.0.jar](https://mvnrepository.com/artifact/org.apache.flink/flink-sql-connector-oracle-cdc), the released version will be available in the Maven central warehouse.
|
||||||
|
|
||||||
|
Setup Oracle
|
||||||
|
----------------
|
||||||
|
You have to enable log archiving for Oracle database and define an Oracle user with appropriate permissions on all databases that the Debezium Oracle connector monitors.
|
||||||
|
|
||||||
|
### For Non-CDB database
|
||||||
|
|
||||||
|
1. Enable log archiving
|
||||||
|
|
||||||
|
(1.1). Connect to the database as DBA
|
||||||
|
```sql
|
||||||
|
ORACLE_SID=SID
|
||||||
|
export ORACLE_SID
|
||||||
|
sqlplus /nolog
|
||||||
|
CONNECT sys/password AS SYSDBA
|
||||||
|
```
|
||||||
|
|
||||||
|
(1.2). Enable log archiving
|
||||||
|
```sql
|
||||||
|
alter system set db_recovery_file_dest_size = 10G;
|
||||||
|
alter system set db_recovery_file_dest = '/opt/oracle/oradata/recovery_area' scope=spfile;
|
||||||
|
shutdown immediate;
|
||||||
|
startup mount;
|
||||||
|
alter database archivelog;
|
||||||
|
alter database open;
|
||||||
|
```
|
||||||
|
**Note:**
|
||||||
|
|
||||||
|
- Enable log archiving requires database restart, pay attention when try to do it
|
||||||
|
- The archived logs will occupy a large amount of disk space, so consider clean the expired logs the periodically
|
||||||
|
|
||||||
|
(1.3). Check whether log archiving is enabled
|
||||||
|
```sql
|
||||||
|
-- Should now "Database log mode: Archive Mode"
|
||||||
|
archive log list;
|
||||||
|
```
|
||||||
|
**Note:**
|
||||||
|
|
||||||
|
Supplemental logging must be enabled for captured tables or the database in order for data changes to capture the <em>before</em> state of changed database rows.
|
||||||
|
The following illustrates how to configure this on the table/database level.
|
||||||
|
```sql
|
||||||
|
-- Enable supplemental logging for a specific table:
|
||||||
|
ALTER TABLE inventory.customers ADD SUPPLEMENTAL LOG DATA (ALL) COLUMNS;
|
||||||
|
```
|
||||||
|
```sql
|
||||||
|
-- Enable supplemental logging for database
|
||||||
|
ALTER DATABASE ADD SUPPLEMENTAL LOG DATA;
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Create an Oracle user with permissions
|
||||||
|
|
||||||
|
(2.1). Create Tablespace
|
||||||
|
```sql
|
||||||
|
sqlplus sys/password@host:port/SID AS SYSDBA;
|
||||||
|
CREATE TABLESPACE logminer_tbs DATAFILE '/opt/oracle/oradata/SID/logminer_tbs.dbf' SIZE 25M REUSE AUTOEXTEND ON MAXSIZE UNLIMITED;
|
||||||
|
exit;
|
||||||
|
```
|
||||||
|
|
||||||
|
(2.2). Create a user and grant permissions
|
||||||
|
```sql
|
||||||
|
sqlplus sys/password@host:port/SID AS SYSDBA;
|
||||||
|
CREATE USER flinkuser IDENTIFIED BY flinkpw DEFAULT TABLESPACE LOGMINER_TBS QUOTA UNLIMITED ON LOGMINER_TBS;
|
||||||
|
GRANT CREATE SESSION TO flinkuser;
|
||||||
|
GRANT SET CONTAINER TO flinkuser;
|
||||||
|
GRANT SELECT ON V_$DATABASE to flinkuser;
|
||||||
|
GRANT FLASHBACK ANY TABLE TO flinkuser;
|
||||||
|
GRANT SELECT ANY TABLE TO flinkuser;
|
||||||
|
GRANT SELECT_CATALOG_ROLE TO flinkuser;
|
||||||
|
GRANT EXECUTE_CATALOG_ROLE TO flinkuser;
|
||||||
|
GRANT SELECT ANY TRANSACTION TO flinkuser;
|
||||||
|
GRANT LOGMINING TO flinkuser;
|
||||||
|
GRANT ANALYZE ANY TO flinkuser;
|
||||||
|
|
||||||
|
GRANT CREATE TABLE TO flinkuser;
|
||||||
|
-- need not to execute if set scan.incremental.snapshot.enabled=true(default)
|
||||||
|
GRANT LOCK ANY TABLE TO flinkuser;
|
||||||
|
GRANT ALTER ANY TABLE TO flinkuser;
|
||||||
|
GRANT CREATE SEQUENCE TO flinkuser;
|
||||||
|
|
||||||
|
GRANT EXECUTE ON DBMS_LOGMNR TO flinkuser;
|
||||||
|
GRANT EXECUTE ON DBMS_LOGMNR_D TO flinkuser;
|
||||||
|
|
||||||
|
GRANT SELECT ON V_$LOG TO flinkuser;
|
||||||
|
GRANT SELECT ON V_$LOG_HISTORY TO flinkuser;
|
||||||
|
GRANT SELECT ON V_$LOGMNR_LOGS TO flinkuser;
|
||||||
|
GRANT SELECT ON V_$LOGMNR_CONTENTS TO flinkuser;
|
||||||
|
GRANT SELECT ON V_$LOGMNR_PARAMETERS TO flinkuser;
|
||||||
|
GRANT SELECT ON V_$LOGFILE TO flinkuser;
|
||||||
|
GRANT SELECT ON V_$ARCHIVED_LOG TO flinkuser;
|
||||||
|
GRANT SELECT ON V_$ARCHIVE_DEST_STATUS TO flinkuser;
|
||||||
|
exit;
|
||||||
|
```
|
||||||
|
|
||||||
|
### For CDB database
|
||||||
|
|
||||||
|
Overall, the steps for configuring CDB database is quite similar to non-CDB database, but the commands may be different.
|
||||||
|
1. Enable log archiving
|
||||||
|
```sql
|
||||||
|
ORACLE_SID=ORCLCDB
|
||||||
|
export ORACLE_SID
|
||||||
|
sqlplus /nolog
|
||||||
|
CONNECT sys/password AS SYSDBA
|
||||||
|
alter system set db_recovery_file_dest_size = 10G;
|
||||||
|
-- should exist
|
||||||
|
alter system set db_recovery_file_dest = '/opt/oracle/oradata/recovery_area' scope=spfile;
|
||||||
|
shutdown immediate
|
||||||
|
startup mount
|
||||||
|
alter database archivelog;
|
||||||
|
alter database open;
|
||||||
|
-- Should show "Database log mode: Archive Mode"
|
||||||
|
archive log list
|
||||||
|
exit;
|
||||||
|
```
|
||||||
|
**Note:**
|
||||||
|
You can also use the following commands to enable supplemental logging:
|
||||||
|
```sql
|
||||||
|
-- Enable supplemental logging for a specific table:
|
||||||
|
ALTER TABLE inventory.customers ADD SUPPLEMENTAL LOG DATA (ALL) COLUMNS;
|
||||||
|
-- Enable supplemental logging for database
|
||||||
|
ALTER DATABASE ADD SUPPLEMENTAL LOG DATA;
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Create an Oracle user with permissions
|
||||||
|
```sql
|
||||||
|
sqlplus sys/password@//localhost:1521/ORCLCDB as sysdba
|
||||||
|
CREATE TABLESPACE logminer_tbs DATAFILE '/opt/oracle/oradata/ORCLCDB/logminer_tbs.dbf' SIZE 25M REUSE AUTOEXTEND ON MAXSIZE UNLIMITED;
|
||||||
|
exit
|
||||||
|
```
|
||||||
|
```sql
|
||||||
|
sqlplus sys/password@//localhost:1521/ORCLPDB1 as sysdba
|
||||||
|
CREATE TABLESPACE logminer_tbs DATAFILE '/opt/oracle/oradata/ORCLCDB/ORCLPDB1/logminer_tbs.dbf' SIZE 25M REUSE AUTOEXTEND ON MAXSIZE UNLIMITED;
|
||||||
|
exit
|
||||||
|
```
|
||||||
|
```sql
|
||||||
|
sqlplus sys/password@//localhost:1521/ORCLCDB as sysdba
|
||||||
|
CREATE USER flinkuser IDENTIFIED BY flinkpw DEFAULT TABLESPACE logminer_tbs QUOTA UNLIMITED ON logminer_tbs CONTAINER=ALL;
|
||||||
|
GRANT CREATE SESSION TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT SET CONTAINER TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT SELECT ON V_$DATABASE to flinkuser CONTAINER=ALL;
|
||||||
|
GRANT FLASHBACK ANY TABLE TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT SELECT ANY TABLE TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT SELECT_CATALOG_ROLE TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT EXECUTE_CATALOG_ROLE TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT SELECT ANY TRANSACTION TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT LOGMINING TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT CREATE TABLE TO flinkuser CONTAINER=ALL;
|
||||||
|
-- need not to execute if set scan.incremental.snapshot.enabled=true(default)
|
||||||
|
GRANT LOCK ANY TABLE TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT CREATE SEQUENCE TO flinkuser CONTAINER=ALL;
|
||||||
|
|
||||||
|
GRANT EXECUTE ON DBMS_LOGMNR TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT EXECUTE ON DBMS_LOGMNR_D TO flinkuser CONTAINER=ALL;
|
||||||
|
|
||||||
|
GRANT SELECT ON V_$LOG TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT SELECT ON V_$LOG_HISTORY TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT SELECT ON V_$LOGMNR_LOGS TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT SELECT ON V_$LOGMNR_CONTENTS TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT SELECT ON V_$LOGMNR_PARAMETERS TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT SELECT ON V_$LOGFILE TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT SELECT ON V_$ARCHIVED_LOG TO flinkuser CONTAINER=ALL;
|
||||||
|
GRANT SELECT ON V_$ARCHIVE_DEST_STATUS TO flinkuser CONTAINER=ALL;
|
||||||
|
exit
|
||||||
|
```
|
||||||
|
|
||||||
|
See more about the [Setting up Oracle](https://debezium.io/documentation/reference/1.9/connectors/oracle.html#setting-up-oracle)
|
||||||
|
|
||||||
|
How to create an Oracle CDC table
|
||||||
|
----------------
|
||||||
|
|
||||||
|
The Oracle CDC table can be defined as following:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- register an Oracle table 'products' in Flink SQL
|
||||||
|
Flink SQL> CREATE TABLE products (
|
||||||
|
ID INT NOT NULL,
|
||||||
|
NAME STRING,
|
||||||
|
DESCRIPTION STRING,
|
||||||
|
WEIGHT DECIMAL(10, 3),
|
||||||
|
PRIMARY KEY(id) NOT ENFORCED
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'oracle-cdc',
|
||||||
|
'hostname' = 'localhost',
|
||||||
|
'port' = '1521',
|
||||||
|
'username' = 'flinkuser',
|
||||||
|
'password' = 'flinkpw',
|
||||||
|
'database-name' = 'ORCLCDB',
|
||||||
|
'schema-name' = 'inventory',
|
||||||
|
'table-name' = 'products');
|
||||||
|
|
||||||
|
-- read snapshot and redo logs from products table
|
||||||
|
Flink SQL> SELECT * FROM products;
|
||||||
|
```
|
||||||
|
**Note:**
|
||||||
|
When working with the CDB + PDB model, you are expected to add an extra option `'debezium.database.pdb.name' = 'xxx'` in Flink DDL to specific the name of the PDB to connect to.
|
||||||
|
|
||||||
|
**Note:**
|
||||||
|
While the connector might work with a variety of Oracle versions and editions, only Oracle 9i, 10g, 11g and 12c have been tested.
|
||||||
|
|
||||||
|
Connector Options
|
||||||
|
----------------
|
||||||
|
<div class="highlight">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width: 25%">Option</th>
|
||||||
|
<th class="text-left" style="width: 8%">Required</th>
|
||||||
|
<th class="text-left" style="width: 7%">Default</th>
|
||||||
|
<th class="text-left" style="width: 10%">Type</th>
|
||||||
|
<th class="text-left" style="width: 50%">Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>connector</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Specify what connector to use, here should be <code>'oracle-cdc'</code>.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>hostname</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>IP address or hostname of the Oracle database server. If the url is not empty, hostname may not be configured, otherwise hostname can not be empty</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>username</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Name of the Oracle database to use when connecting to the Oracle database server.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>password</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Password to use when connecting to the Oracle database server.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>database-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Database name of the Oracle server to monitor.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>schema-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Schema name of the Oracle database to monitor.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>table-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Table name of the Oracle database to monitor.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>port</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">1521</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>Integer port number of the Oracle database server.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>url</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">jdbc:oracle:thin:@{hostname}:{port}:{database-name}</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>JdbcUrl of the oracle database server . If the hostname and port parameter is configured, the URL is concatenated by hostname port database-name in SID format by default. Otherwise, you need to configure the URL parameter</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.startup.mode</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">initial</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Optional startup mode for Oracle CDC consumer, valid enumerations are "initial"
|
||||||
|
and "latest-offset".
|
||||||
|
Please see <a href="#startup-reading-position">Startup Reading Position</a> section for more detailed information.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.incremental.snapshot.enabled</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">true</td>
|
||||||
|
<td>Boolean</td>
|
||||||
|
<td>Incremental snapshot is a new mechanism to read snapshot of a table. Compared to the old snapshot mechanism,
|
||||||
|
the incremental snapshot has many advantages, including:
|
||||||
|
(1) source can be parallel during snapshot reading,
|
||||||
|
(2) source can perform checkpoints in the chunk granularity during snapshot reading,
|
||||||
|
(3) source doesn't need to acquire ROW SHARE MODE lock before snapshot reading.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.incremental.snapshot.chunk.size</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">8096</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>The chunk size (number of rows) of table snapshot, captured tables are split into multiple chunks when read the snapshot of table.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.snapshot.fetch.size</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">1024</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>The maximum fetch size for per poll when read table snapshot.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>connect.max-retries</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">3</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>The max retry times that the connector should retry to build Oracle database server connection.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>connection.pool.size</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">20</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>The connection pool size.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>debezium.*</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Pass-through Debezium's properties to Debezium Embedded Engine which is used to capture data changes from Oracle server.
|
||||||
|
For example: <code>'debezium.snapshot.mode' = 'never'</code>.
|
||||||
|
See more about the <a href="https://debezium.io/documentation/reference/1.9/connectors/oracle.html#oracle-connector-properties">Debezium's Oracle Connector properties</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.incremental.close-idle-reader.enabled</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">false</td>
|
||||||
|
<td>Boolean</td>
|
||||||
|
<td>Whether to close idle readers at the end of the snapshot phase. <br>
|
||||||
|
The flink version is required to be greater than or equal to 1.14 when 'execution.checkpointing.checkpoints-after-tasks-finish.enabled' is set to true.<br>
|
||||||
|
If the flink version is greater than or equal to 1.15, the default value of 'execution.checkpointing.checkpoints-after-tasks-finish.enabled' has been changed to true,
|
||||||
|
so it does not need to be explicitly configured 'execution.checkpointing.checkpoints-after-tasks-finish.enabled' = 'true'
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.incremental.snapshot.chunk.key-column</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>The chunk key of table snapshot, captured tables are split into multiple chunks by a chunk key when read the snapshot of table.
|
||||||
|
By default, the chunk key is 'ROWID'. This column must be a column of the primary key.</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
Limitation
|
||||||
|
--------
|
||||||
|
|
||||||
|
### Can't perform checkpoint during scanning snapshot of tables
|
||||||
|
During scanning snapshot of database tables, since there is no recoverable position, we can't perform checkpoints. In order to not perform checkpoints, Oracle CDC source will keep the checkpoint waiting to timeout. The timeout checkpoint will be recognized as failed checkpoint, by default, this will trigger a failover for the Flink job. So if the database table is large, it is recommended to add following Flink configurations to avoid failover because of the timeout checkpoints:
|
||||||
|
|
||||||
|
```
|
||||||
|
execution.checkpointing.interval: 10min
|
||||||
|
execution.checkpointing.tolerable-failed-checkpoints: 100
|
||||||
|
restart-strategy: fixed-delay
|
||||||
|
restart-strategy.fixed-delay.attempts: 2147483647
|
||||||
|
```
|
||||||
|
|
||||||
|
Available Metadata
|
||||||
|
----------------
|
||||||
|
|
||||||
|
The following format metadata can be exposed as read-only (VIRTUAL) columns in a table definition.
|
||||||
|
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width: 15%">Key</th>
|
||||||
|
<th class="text-left" style="width: 30%">DataType</th>
|
||||||
|
<th class="text-left" style="width: 55%">Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>table_name</td>
|
||||||
|
<td>STRING NOT NULL</td>
|
||||||
|
<td>Name of the table that contain the row.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>schema_name</td>
|
||||||
|
<td>STRING NOT NULL</td>
|
||||||
|
<td>Name of the schema that contain the row.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>database_name</td>
|
||||||
|
<td>STRING NOT NULL</td>
|
||||||
|
<td>Name of the database that contain the row.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>op_ts</td>
|
||||||
|
<td>TIMESTAMP_LTZ(3) NOT NULL</td>
|
||||||
|
<td>It indicates the time that the change was made in the database. <br>If the record is read from snapshot of the table instead of the change stream, the value is always 0.</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
The extended CREATE TABLE example demonstrates the syntax for exposing these metadata fields:
|
||||||
|
```sql
|
||||||
|
CREATE TABLE products (
|
||||||
|
db_name STRING METADATA FROM 'database_name' VIRTUAL,
|
||||||
|
schema_name STRING METADATA FROM 'schema_name' VIRTUAL,
|
||||||
|
table_name STRING METADATA FROM 'table_name' VIRTUAL,
|
||||||
|
operation_ts TIMESTAMP_LTZ(3) METADATA FROM 'op_ts' VIRTUAL,
|
||||||
|
ID INT NOT NULL,
|
||||||
|
NAME STRING,
|
||||||
|
DESCRIPTION STRING,
|
||||||
|
WEIGHT DECIMAL(10, 3),
|
||||||
|
PRIMARY KEY(id) NOT ENFORCED
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'oracle-cdc',
|
||||||
|
'hostname' = 'localhost',
|
||||||
|
'port' = '1521',
|
||||||
|
'username' = 'flinkuser',
|
||||||
|
'password' = 'flinkpw',
|
||||||
|
'database-name' = 'ORCLCDB',
|
||||||
|
'schema-name' = 'inventory',
|
||||||
|
'table-name' = 'products',
|
||||||
|
'debezium.log.mining.strategy' = 'online_catalog',
|
||||||
|
'debezium.log.mining.continuous.mine' = 'true'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note** : The Oracle dialect is case-sensitive, it converts field name to uppercase if the field name is not quoted, Flink SQL doesn't convert the field name. Thus for physical columns from oracle database, we should use its converted field name in Oracle when define an `oracle-cdc` table in Flink SQL.
|
||||||
|
|
||||||
|
Features
|
||||||
|
--------
|
||||||
|
|
||||||
|
### Exactly-Once Processing
|
||||||
|
|
||||||
|
The Oracle CDC connector is a Flink Source connector which will read database snapshot first and then continues to read change events with **exactly-once processing** even failures happen. Please read [How the connector works](https://debezium.io/documentation/reference/1.9/connectors/oracle.html#how-the-oracle-connector-works).
|
||||||
|
|
||||||
|
### Startup Reading Position
|
||||||
|
|
||||||
|
The config option `scan.startup.mode` specifies the startup mode for Oracle CDC consumer. The valid enumerations are:
|
||||||
|
|
||||||
|
- `initial` (default): Performs an initial snapshot on the monitored database tables upon first startup, and continue to read the latest redo log.
|
||||||
|
- `latest-offset`: Never to perform a snapshot on the monitored database tables upon first startup, just read from
|
||||||
|
the change since the connector was started.
|
||||||
|
|
||||||
|
_Note: the mechanism of `scan.startup.mode` option relying on Debezium's `snapshot.mode` configuration. So please do not use them together. If you specific both `scan.startup.mode` and `debezium.snapshot.mode` options in the table DDL, it may make `scan.startup.mode` doesn't work._
|
||||||
|
|
||||||
|
### Single Thread Reading
|
||||||
|
|
||||||
|
The Oracle CDC source can't work in parallel reading, because there is only one task can receive change events.
|
||||||
|
|
||||||
|
### DataStream Source
|
||||||
|
|
||||||
|
The Oracle CDC connector can also be a DataStream source. There are two modes for the DataStream source:
|
||||||
|
|
||||||
|
- incremental snapshot based, which allows parallel reading
|
||||||
|
- SourceFunction based, which only supports single thread reading
|
||||||
|
|
||||||
|
#### Incremental Snapshot based DataStream (Experimental)
|
||||||
|
|
||||||
|
```java
|
||||||
|
import org.apache.flink.cdc.connectors.base.options.StartupOptions;
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.jdbc.JdbcIncrementalSource;
|
||||||
|
import org.apache.flink.cdc.connectors.oracle.source.OracleSourceBuilder;
|
||||||
|
import org.apache.flink.cdc.debezium.JsonDebeziumDeserializationSchema;
|
||||||
|
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
|
||||||
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||||
|
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
public class OracleParallelSourceExample {
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
Properties debeziumProperties = new Properties();
|
||||||
|
debeziumProperties.setProperty("log.mining.strategy", "online_catalog");
|
||||||
|
|
||||||
|
JdbcIncrementalSource<String> oracleChangeEventSource =
|
||||||
|
new OracleSourceBuilder()
|
||||||
|
.hostname("host")
|
||||||
|
.port(1521)
|
||||||
|
.databaseList("ORCLCDB")
|
||||||
|
.schemaList("DEBEZIUM")
|
||||||
|
.tableList("DEBEZIUM.PRODUCTS")
|
||||||
|
.username("username")
|
||||||
|
.password("password")
|
||||||
|
.deserializer(new JsonDebeziumDeserializationSchema())
|
||||||
|
.includeSchemaChanges(true) // output the schema changes as well
|
||||||
|
.startupOptions(StartupOptions.initial())
|
||||||
|
.debeziumProperties(debeziumProperties)
|
||||||
|
.splitSize(2)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||||
|
// enable checkpoint
|
||||||
|
env.enableCheckpointing(3000L);
|
||||||
|
// set the source parallelism to 4
|
||||||
|
env.fromSource(
|
||||||
|
oracleChangeEventSource,
|
||||||
|
WatermarkStrategy.noWatermarks(),
|
||||||
|
"OracleParallelSource")
|
||||||
|
.setParallelism(4)
|
||||||
|
.print()
|
||||||
|
.setParallelism(1);
|
||||||
|
env.execute("Print Oracle Snapshot + RedoLog");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### SourceFunction-based DataStream
|
||||||
|
|
||||||
|
```java
|
||||||
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||||
|
import org.apache.flink.streaming.api.functions.source.SourceFunction;
|
||||||
|
import org.apache.flink.cdc.debezium.JsonDebeziumDeserializationSchema;
|
||||||
|
import org.apache.flink.cdc.connectors.oracle.OracleSource;
|
||||||
|
|
||||||
|
public class OracleSourceExample {
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
SourceFunction<String> sourceFunction = OracleSource.<String>builder()
|
||||||
|
.url("jdbc:oracle:thin:@{hostname}:{port}:{database}")
|
||||||
|
.port(1521)
|
||||||
|
.database("ORCLCDB") // monitor XE database
|
||||||
|
.schemaList("inventory") // monitor inventory schema
|
||||||
|
.tableList("inventory.products") // monitor products table
|
||||||
|
.username("flinkuser")
|
||||||
|
.password("flinkpw")
|
||||||
|
.deserializer(new JsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String
|
||||||
|
.build();
|
||||||
|
|
||||||
|
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||||
|
|
||||||
|
env
|
||||||
|
.addSource(sourceFunction)
|
||||||
|
.print().setParallelism(1); // use parallelism 1 for sink to keep message ordering
|
||||||
|
|
||||||
|
env.execute();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Data Type Mapping
|
||||||
|
----------------
|
||||||
|
<div class="wy-table-responsive">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left"><a href="https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Data-Types.html">Oracle type</a></th>
|
||||||
|
<th class="text-left">Flink SQL type<a href="{% link dev/table/types.md %}"></a></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>NUMBER(p, s <= 0), p - s < 3
|
||||||
|
</td>
|
||||||
|
<td>TINYINT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>NUMBER(p, s <= 0), p - s < 5
|
||||||
|
</td>
|
||||||
|
<td>SMALLINT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>NUMBER(p, s <= 0), p - s < 10
|
||||||
|
</td>
|
||||||
|
<td>INT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>NUMBER(p, s <= 0), p - s < 19
|
||||||
|
</td>
|
||||||
|
<td>BIGINT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>NUMBER(p, s <= 0), 19 <= p - s <= 38 <br>
|
||||||
|
</td>
|
||||||
|
<td>DECIMAL(p - s, 0)</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>NUMBER(p, s > 0)
|
||||||
|
</td>
|
||||||
|
<td>DECIMAL(p, s)</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>NUMBER(p, s <= 0), p - s > 38
|
||||||
|
</td>
|
||||||
|
<td>STRING</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
FLOAT<br>
|
||||||
|
BINARY_FLOAT
|
||||||
|
</td>
|
||||||
|
<td>FLOAT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
DOUBLE PRECISION<br>
|
||||||
|
BINARY_DOUBLE
|
||||||
|
</td>
|
||||||
|
<td>DOUBLE</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>NUMBER(1)</td>
|
||||||
|
<td>BOOLEAN</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
DATE<br>
|
||||||
|
TIMESTAMP [(p)]
|
||||||
|
</td>
|
||||||
|
<td>TIMESTAMP [(p)] [WITHOUT TIMEZONE]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>TIMESTAMP [(p)] WITH TIME ZONE</td>
|
||||||
|
<td>TIMESTAMP [(p)] WITH TIME ZONE</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>TIMESTAMP [(p)] WITH LOCAL TIME ZONE</td>
|
||||||
|
<td>TIMESTAMP_LTZ [(p)]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
CHAR(n)<br>
|
||||||
|
NCHAR(n)<br>
|
||||||
|
NVARCHAR2(n)<br>
|
||||||
|
VARCHAR(n)<br>
|
||||||
|
VARCHAR2(n)<br>
|
||||||
|
CLOB<br>
|
||||||
|
NCLOB<br>
|
||||||
|
XMLType<br>
|
||||||
|
SYS.XMLTYPE
|
||||||
|
</td>
|
||||||
|
<td>STRING</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>BLOB<br>
|
||||||
|
ROWID
|
||||||
|
</td>
|
||||||
|
<td>BYTES</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
INTERVAL DAY TO SECOND<br>
|
||||||
|
INTERVAL YEAR TO MONTH
|
||||||
|
</td>
|
||||||
|
<td>BIGINT</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{{< top >}}
|
@ -0,0 +1,307 @@
|
|||||||
|
---
|
||||||
|
title: "Overview"
|
||||||
|
weight: 1
|
||||||
|
type: docs
|
||||||
|
aliases:
|
||||||
|
- /connectors/cdc-connectors/
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
# CDC Connectors for Apache Flink
|
||||||
|
|
||||||
|
CDC Connectors for Apache Flink<sup>®</sup> is a set of source connectors for <a href="https://flink.apache.org/">Apache Flink<sup>®</sup></a>, ingesting changes from different databases using change data capture (CDC).
|
||||||
|
The CDC Connectors for Apache Flink<sup>®</sup> integrate Debezium as the engine to capture data changes. So it can fully leverage the ability of Debezium. See more about what is [Debezium](https://github.com/debezium/debezium).
|
||||||
|
|
||||||
|
{{< img src="/fig/cdc-flow.png" width="600px" alt="Flink CDC" >}}
|
||||||
|
|
||||||
|
## Supported Connectors
|
||||||
|
|
||||||
|
| Connector | Database | Driver |
|
||||||
|
|-----------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------|
|
||||||
|
| [mongodb-cdc](mongodb-cdc.md) | <li> [MongoDB](https://www.mongodb.com): 3.6, 4.x, 5.0 | MongoDB Driver: 4.3.4 |
|
||||||
|
| [mysql-cdc](mysql-cdc.md) | <li> [MySQL](https://dev.mysql.com/doc): 5.6, 5.7, 8.0.x <li> [RDS MySQL](https://www.aliyun.com/product/rds/mysql): 5.6, 5.7, 8.0.x <li> [PolarDB MySQL](https://www.aliyun.com/product/polardb): 5.6, 5.7, 8.0.x <li> [Aurora MySQL](https://aws.amazon.com/cn/rds/aurora): 5.6, 5.7, 8.0.x <li> [MariaDB](https://mariadb.org): 10.x <li> [PolarDB X](https://github.com/ApsaraDB/galaxysql): 2.0.1 | JDBC Driver: 8.0.28 |
|
||||||
|
| [oceanbase-cdc](oceanbase-cdc.md) | <li> [OceanBase CE](https://open.oceanbase.com): 3.1.x, 4.x <li> [OceanBase EE](https://www.oceanbase.com/product/oceanbase): 2.x, 3.x, 4.x | OceanBase Driver: 2.4.x |
|
||||||
|
| [oracle-cdc](oracle-cdc.md) | <li> [Oracle](https://www.oracle.com/index.html): 11, 12, 19, 21 | Oracle Driver: 19.3.0.0 |
|
||||||
|
| [postgres-cdc](postgres-cdc.md) | <li> [PostgreSQL](https://www.postgresql.org): 9.6, 10, 11, 12, 13, 14 | JDBC Driver: 42.5.1 |
|
||||||
|
| [sqlserver-cdc](sqlserver-cdc.md) | <li> [Sqlserver](https://www.microsoft.com/sql-server): 2012, 2014, 2016, 2017, 2019 | JDBC Driver: 9.4.1.jre8 |
|
||||||
|
| [tidb-cdc](tidb-cdc.md) | <li> [TiDB](https://www.pingcap.com/): 5.1.x, 5.2.x, 5.3.x, 5.4.x, 6.0.0 | JDBC Driver: 8.0.27 |
|
||||||
|
| [db2-cdc](db2-cdc.md) | <li> [Db2](https://www.ibm.com/products/db2): 11.5 | Db2 Driver: 11.5.0.0 |
|
||||||
|
| [vitess-cdc](vitess-cdc.md) | <li> [Vitess](https://vitess.io/): 8.0.x, 9.0.x | MySql JDBC Driver: 8.0.26 |
|
||||||
|
|
||||||
|
## Supported Flink Versions
|
||||||
|
The following table shows the version mapping between Flink<sup>®</sup> CDC Connectors and Flink<sup>®</sup>:
|
||||||
|
|
||||||
|
| Flink<sup>®</sup> CDC Version | Flink<sup>®</sup> Version |
|
||||||
|
|:-----------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
|
||||||
|
| <font color="DarkCyan">1.0.0</font> | <font color="MediumVioletRed">1.11.*</font> |
|
||||||
|
| <font color="DarkCyan">1.1.0</font> | <font color="MediumVioletRed">1.11.*</font> |
|
||||||
|
| <font color="DarkCyan">1.2.0</font> | <font color="MediumVioletRed">1.12.*</font> |
|
||||||
|
| <font color="DarkCyan">1.3.0</font> | <font color="MediumVioletRed">1.12.*</font> |
|
||||||
|
| <font color="DarkCyan">1.4.0</font> | <font color="MediumVioletRed">1.13.*</font> |
|
||||||
|
| <font color="DarkCyan">2.0.*</font> | <font color="MediumVioletRed">1.13.*</font> |
|
||||||
|
| <font color="DarkCyan">2.1.*</font> | <font color="MediumVioletRed">1.13.*</font> |
|
||||||
|
| <font color="DarkCyan">2.2.*</font> | <font color="MediumVioletRed">1.13.\*</font>, <font color="MediumVioletRed">1.14.\*</font> |
|
||||||
|
| <font color="DarkCyan">2.3.*</font> | <font color="MediumVioletRed">1.13.\*</font>, <font color="MediumVioletRed">1.14.\*</font>, <font color="MediumVioletRed">1.15.\*</font>, <font color="MediumVioletRed">1.16.\*</font> |
|
||||||
|
| <font color="DarkCyan">2.4.*</font> | <font color="MediumVioletRed">1.13.\*</font>, <font color="MediumVioletRed">1.14.\*</font>, <font color="MediumVioletRed">1.15.\*</font>, <font color="MediumVioletRed">1.16.\*</font>, <font color="MediumVioletRed">1.17.\*</font> |
|
||||||
|
| <font color="DarkCyan">3.0.*</font> | <font color="MediumVioletRed">1.14.\*</font>, <font color="MediumVioletRed">1.15.\*</font>, <font color="MediumVioletRed">1.16.\*</font>, <font color="MediumVioletRed">1.17.\*</font>, <font color="MediumVioletRed">1.18.\*</font> |
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
1. Supports reading database snapshot and continues to read binlogs with **exactly-once processing** even failures happen.
|
||||||
|
2. CDC connectors for DataStream API, users can consume changes on multiple databases and tables in a single job without Debezium and Kafka deployed.
|
||||||
|
3. CDC connectors for Table/SQL API, users can use SQL DDL to create a CDC source to monitor changes on a single table.
|
||||||
|
|
||||||
|
The following table shows the current features of the connector:
|
||||||
|
|
||||||
|
| Connector | No-lock Read | Parallel Read | Exactly-once Read | Incremental Snapshot Read |
|
||||||
|
|-----------------------------------|--------------|---------------|-------------------|---------------------------|
|
||||||
|
| [mongodb-cdc](mongodb-cdc.md) | ✅ | ✅ | ✅ | ✅ |
|
||||||
|
| [mysql-cdc](mysql-cdc.md) | ✅ | ✅ | ✅ | ✅ |
|
||||||
|
| [oracle-cdc](oracle-cdc.md) | ✅ | ✅ | ✅ | ✅ |
|
||||||
|
| [postgres-cdc](postgres-cdc.md) | ✅ | ✅ | ✅ | ✅ |
|
||||||
|
| [sqlserver-cdc](sqlserver-cdc.md) | ✅ | ✅ | ✅ | ✅ |
|
||||||
|
| [oceanbase-cdc](oceanbase-cdc.md) | ❌ | ❌ | ❌ | ❌ |
|
||||||
|
| [tidb-cdc](tidb-cdc.md) | ✅ | ❌ | ✅ | ❌ |
|
||||||
|
| [db2-cdc](db2-cdc.md) | ❌ | ❌ | ✅ | ❌ |
|
||||||
|
| [vitess-cdc](vitess-cdc.md) | ✅ | ❌ | ✅ | ❌ |
|
||||||
|
|
||||||
|
## Usage for Table/SQL API
|
||||||
|
|
||||||
|
We need several steps to setup a Flink cluster with the provided connector.
|
||||||
|
|
||||||
|
1. Setup a Flink cluster with version 1.12+ and Java 8+ installed.
|
||||||
|
2. Download the connector SQL jars from the [Downloads](../downloads.md) page (or [build yourself](#building-from-source)).
|
||||||
|
3. Put the downloaded jars under `FLINK_HOME/lib/`.
|
||||||
|
4. Restart the Flink cluster.
|
||||||
|
|
||||||
|
The example shows how to create a MySQL CDC source in [Flink SQL Client](https://nightlies.apache.org/flink/flink-docs-stable/docs/dev/table/sqlclient/) and execute queries on it.
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- creates a mysql cdc table source
|
||||||
|
CREATE TABLE mysql_binlog (
|
||||||
|
id INT NOT NULL,
|
||||||
|
name STRING,
|
||||||
|
description STRING,
|
||||||
|
weight DECIMAL(10,3),
|
||||||
|
PRIMARY KEY(id) NOT ENFORCED
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'mysql-cdc',
|
||||||
|
'hostname' = 'localhost',
|
||||||
|
'port' = '3306',
|
||||||
|
'username' = 'flinkuser',
|
||||||
|
'password' = 'flinkpw',
|
||||||
|
'database-name' = 'inventory',
|
||||||
|
'table-name' = 'products'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- read snapshot and binlog data from mysql, and do some transformation, and show on the client
|
||||||
|
SELECT id, UPPER(name), description, weight FROM mysql_binlog;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage for DataStream API
|
||||||
|
|
||||||
|
Include following Maven dependency (available through Maven Central):
|
||||||
|
|
||||||
|
```
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.flink</groupId>
|
||||||
|
<!-- add the dependency matching your database -->
|
||||||
|
<artifactId>flink-connector-mysql-cdc</artifactId>
|
||||||
|
<!-- The dependency is available only for stable releases, SNAPSHOT dependencies need to be built based on master or release branches by yourself. -->
|
||||||
|
<version>3.0-SNAPSHOT</version>
|
||||||
|
</dependency>
|
||||||
|
```
|
||||||
|
|
||||||
|
```java
|
||||||
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||||
|
import org.apache.flink.cdc.debezium.JsonDebeziumDeserializationSchema;
|
||||||
|
import org.apache.flink.cdc.connectors.mysql.source.MySqlSource;
|
||||||
|
|
||||||
|
public class MySqlBinlogSourceExample {
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
MySqlSource<String> mySqlSource = MySqlSource.<String>builder()
|
||||||
|
.hostname("yourHostname")
|
||||||
|
.port(yourPort)
|
||||||
|
.databaseList("yourDatabaseName") // set captured database
|
||||||
|
.tableList("yourDatabaseName.yourTableName") // set captured table
|
||||||
|
.username("yourUsername")
|
||||||
|
.password("yourPassword")
|
||||||
|
.deserializer(new JsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String
|
||||||
|
.build();
|
||||||
|
|
||||||
|
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||||
|
|
||||||
|
// enable checkpoint
|
||||||
|
env.enableCheckpointing(3000);
|
||||||
|
|
||||||
|
env
|
||||||
|
.fromSource(mySqlSource, WatermarkStrategy.noWatermarks(), "MySQL Source")
|
||||||
|
// set 4 parallel source tasks
|
||||||
|
.setParallelism(4)
|
||||||
|
.print().setParallelism(1); // use parallelism 1 for sink to keep message ordering
|
||||||
|
|
||||||
|
env.execute("Print MySQL Snapshot + Binlog");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
### Deserialization
|
||||||
|
The following JSON data show the change event in JSON format.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"before": {
|
||||||
|
"id": 111,
|
||||||
|
"name": "scooter",
|
||||||
|
"description": "Big 2-wheel scooter",
|
||||||
|
"weight": 5.18
|
||||||
|
},
|
||||||
|
"after": {
|
||||||
|
"id": 111,
|
||||||
|
"name": "scooter",
|
||||||
|
"description": "Big 2-wheel scooter",
|
||||||
|
"weight": 5.15
|
||||||
|
},
|
||||||
|
"source": {...},
|
||||||
|
"op": "u", // the operation type, "u" means this this is an update event
|
||||||
|
"ts_ms": 1589362330904, // the time at which the connector processed the event
|
||||||
|
"transaction": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
**Note:** Please refer [Debezium documentation](https://debezium.io/documentation/reference/1.9/connectors/mysql.html#mysql-events
|
||||||
|
) to know the meaning of each field.
|
||||||
|
|
||||||
|
In some cases, users can use the `JsonDebeziumDeserializationSchema(true)` Constructor to enabled include schema in the message. Then the Debezium JSON message may look like this:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"schema": {
|
||||||
|
"type": "struct",
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"type": "struct",
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"type": "int32",
|
||||||
|
"optional": false,
|
||||||
|
"field": "id"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"optional": false,
|
||||||
|
"default": "flink",
|
||||||
|
"field": "name"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"optional": true,
|
||||||
|
"field": "description"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "double",
|
||||||
|
"optional": true,
|
||||||
|
"field": "weight"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"optional": true,
|
||||||
|
"name": "mysql_binlog_source.inventory_1pzxhca.products.Value",
|
||||||
|
"field": "before"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "struct",
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"type": "int32",
|
||||||
|
"optional": false,
|
||||||
|
"field": "id"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"optional": false,
|
||||||
|
"default": "flink",
|
||||||
|
"field": "name"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"optional": true,
|
||||||
|
"field": "description"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "double",
|
||||||
|
"optional": true,
|
||||||
|
"field": "weight"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"optional": true,
|
||||||
|
"name": "mysql_binlog_source.inventory_1pzxhca.products.Value",
|
||||||
|
"field": "after"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "struct",
|
||||||
|
"fields": {...},
|
||||||
|
"optional": false,
|
||||||
|
"name": "io.debezium.connector.mysql.Source",
|
||||||
|
"field": "source"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"optional": false,
|
||||||
|
"field": "op"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "int64",
|
||||||
|
"optional": true,
|
||||||
|
"field": "ts_ms"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"optional": false,
|
||||||
|
"name": "mysql_binlog_source.inventory_1pzxhca.products.Envelope"
|
||||||
|
},
|
||||||
|
"payload": {
|
||||||
|
"before": {
|
||||||
|
"id": 111,
|
||||||
|
"name": "scooter",
|
||||||
|
"description": "Big 2-wheel scooter",
|
||||||
|
"weight": 5.18
|
||||||
|
},
|
||||||
|
"after": {
|
||||||
|
"id": 111,
|
||||||
|
"name": "scooter",
|
||||||
|
"description": "Big 2-wheel scooter",
|
||||||
|
"weight": 5.15
|
||||||
|
},
|
||||||
|
"source": {...},
|
||||||
|
"op": "u", // the operation type, "u" means this this is an update event
|
||||||
|
"ts_ms": 1589362330904, // the time at which the connector processed the event
|
||||||
|
"transaction": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Usually, it is recommended to exclude schema because schema fields makes the messages very verbose which reduces parsing performance.
|
||||||
|
|
||||||
|
The `JsonDebeziumDeserializationSchema` can also accept custom configuration of `JsonConverter`, for example if you want to obtain numeric output for decimal data,
|
||||||
|
you can construct `JsonDebeziumDeserializationSchema` as following:
|
||||||
|
|
||||||
|
```java
|
||||||
|
Map<String, Object> customConverterConfigs = new HashMap<>();
|
||||||
|
customConverterConfigs.put(JsonConverterConfig.DECIMAL_FORMAT_CONFIG, "numeric");
|
||||||
|
JsonDebeziumDeserializationSchema schema =
|
||||||
|
new JsonDebeziumDeserializationSchema(true, customConverterConfigs);
|
||||||
|
```
|
||||||
|
|
||||||
|
{{< top >}}
|
@ -0,0 +1,620 @@
|
|||||||
|
---
|
||||||
|
title: "Postgres CDC Connector"
|
||||||
|
weight: 6
|
||||||
|
type: docs
|
||||||
|
aliases:
|
||||||
|
- /connectors/cdc-connectors/postgres-cdc.html
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
# Postgres CDC Connector
|
||||||
|
|
||||||
|
The Postgres CDC connector allows for reading snapshot data and incremental data from PostgreSQL database. This document describes how to setup the Postgres CDC connector to run SQL queries against PostgreSQL databases.
|
||||||
|
|
||||||
|
Dependencies
|
||||||
|
------------
|
||||||
|
|
||||||
|
In order to setup the Postgres CDC connector, the following table provides dependency information for both projects using a build automation tool (such as Maven or SBT) and SQL Client with SQL JAR bundles.
|
||||||
|
|
||||||
|
### Maven dependency
|
||||||
|
|
||||||
|
{{< artifact flink-connector-postgres-cdc >}}
|
||||||
|
|
||||||
|
### SQL Client JAR
|
||||||
|
|
||||||
|
```Download link is available only for stable releases.```
|
||||||
|
|
||||||
|
Download flink-sql-connector-postgres-cdc-3.0-SNAPSHOT.jar and put it under `<FLINK_HOME>/lib/`.
|
||||||
|
|
||||||
|
**Note:** flink-sql-connector-postgres-cdc-XXX-SNAPSHOT version is the code corresponding to the development branch. Users need to download the source code and compile the corresponding jar. Users should use the released version, such as [flink-sql-connector-postgres-cdc-2.3.0.jar](https://mvnrepository.com/artifact/org.apache.flink/flink-sql-connector-postgres-cdc), the released version will be available in the Maven central warehouse.
|
||||||
|
|
||||||
|
How to create a Postgres CDC table
|
||||||
|
----------------
|
||||||
|
|
||||||
|
The Postgres CDC table can be defined as following:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- register a PostgreSQL table 'shipments' in Flink SQL
|
||||||
|
CREATE TABLE shipments (
|
||||||
|
shipment_id INT,
|
||||||
|
order_id INT,
|
||||||
|
origin STRING,
|
||||||
|
destination STRING,
|
||||||
|
is_arrived BOOLEAN
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'postgres-cdc',
|
||||||
|
'hostname' = 'localhost',
|
||||||
|
'port' = '5432',
|
||||||
|
'username' = 'postgres',
|
||||||
|
'password' = 'postgres',
|
||||||
|
'database-name' = 'postgres',
|
||||||
|
'schema-name' = 'public',
|
||||||
|
'table-name' = 'shipments',
|
||||||
|
'slot.name' = 'flink',
|
||||||
|
-- experimental feature: incremental snapshot (default off)
|
||||||
|
'scan.incremental.snapshot.enabled' = 'true'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- read snapshot and binlogs from shipments table
|
||||||
|
SELECT * FROM shipments;
|
||||||
|
```
|
||||||
|
|
||||||
|
Connector Options
|
||||||
|
----------------
|
||||||
|
|
||||||
|
<div class="highlight">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width: 25%">Option</th>
|
||||||
|
<th class="text-left" style="width: 8%">Required</th>
|
||||||
|
<th class="text-left" style="width: 7%">Default</th>
|
||||||
|
<th class="text-left" style="width: 10%">Type</th>
|
||||||
|
<th class="text-left" style="width: 50%">Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>connector</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Specify what connector to use, here should be <code>'postgres-cdc'</code>.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>hostname</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>IP address or hostname of the PostgreSQL database server.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>username</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Name of the PostgreSQL database to use when connecting to the PostgreSQL database server.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>password</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Password to use when connecting to the PostgreSQL database server.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>database-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Database name of the PostgreSQL server to monitor.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>schema-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Schema name of the PostgreSQL database to monitor.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>table-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Table name of the PostgreSQL database to monitor.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>port</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">5432</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>Integer port number of the PostgreSQL database server.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>slot.name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>The name of the PostgreSQL logical decoding slot that was created for streaming changes from a particular plug-in
|
||||||
|
for a particular database/schema. The server uses this slot to stream events to the connector that you are configuring.
|
||||||
|
<br/>Slot names must conform to <a href="https://www.postgresql.org/docs/current/static/warm-standby.html#STREAMING-REPLICATION-SLOTS-MANIPULATION">PostgreSQL replication slot naming rules</a>, which state: "Each replication slot has a name, which can contain lower-case letters, numbers, and the underscore character."</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>decoding.plugin.name</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">decoderbufs</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>The name of the Postgres logical decoding plug-in installed on the server.
|
||||||
|
Supported values are decoderbufs, wal2json, wal2json_rds, wal2json_streaming, wal2json_rds_streaming and pgoutput.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>changelog-mode</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">all</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>The changelog mode used for encoding streaming changes. Supported values are <code>all</code> (which encodes changes as retract stream using all RowKinds) and <code>upsert</code> (which encodes changes as upsert stream that describes idempotent updates on a key).
|
||||||
|
<br/> <code>upsert</code> mode can be used for tables with primary keys when replica identity <code>FULL</code> is not an option. Primary keys must be set to use <code>upsert</code> mode.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>heartbeat.interval.ms</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">30s</td>
|
||||||
|
<td>Duration</td>
|
||||||
|
<td>The interval of sending heartbeat event for tracing the latest available replication slot offsets</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>debezium.*</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Pass-through Debezium's properties to Debezium Embedded Engine which is used to capture data changes from Postgres server.
|
||||||
|
For example: <code>'debezium.snapshot.mode' = 'never'</code>.
|
||||||
|
See more about the <a href="https://debezium.io/documentation/reference/1.9/connectors/postgresql.html#postgresql-connector-properties">Debezium's Postgres Connector properties</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>debezium.snapshot.select.statement.overrides</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>If you encounter a situation where there is a large amount of data in the table and you don't need all the historical data. You can try to specify the underlying configuration in debezium to select the data range you want to snapshot. This parameter only affects snapshots and does not affect subsequent data reading consumption.
|
||||||
|
<br/> Note: PostgreSQL must use schema name and table name.
|
||||||
|
<br/> For example: <code>'debezium.snapshot.select.statement.overrides' = 'schema.table'</code>.
|
||||||
|
<br/> After specifying the above attributes, you must also add the following attributes:
|
||||||
|
<code> debezium.snapshot.select.statement.overrides.[schema].[table] </code>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>debezium.snapshot.select.statement.overrides.[schema].[table]</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>You can specify SQL statements to limit the data range of snapshot.
|
||||||
|
<br/> Note1: Schema and table need to be specified in the SQL statement, and the SQL should conform to the syntax of the data source.Currently.
|
||||||
|
<br/> For example: <code>'debezium.snapshot.select.statement.overrides.schema.table' = 'select * from schema.table where 1 != 1'</code>.
|
||||||
|
<br/> Note2: The Flink SQL client submission task does not support functions with single quotation marks in the content.
|
||||||
|
<br/> For example: <code>'debezium.snapshot.select.statement.overrides.schema.table' = 'select * from schema.table where to_char(rq, 'yyyy-MM-dd')'</code>.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.incremental.snapshot.enabled</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">false</td>
|
||||||
|
<td>Boolean</td>
|
||||||
|
<td>Incremental snapshot is a new mechanism to read snapshot of a table. Compared to the old snapshot mechanism,
|
||||||
|
the incremental snapshot has many advantages, including:
|
||||||
|
(1) source can be parallel during snapshot reading,
|
||||||
|
(2) source can perform checkpoints in the chunk granularity during snapshot reading,
|
||||||
|
(3) source doesn't need to acquire global read lock (FLUSH TABLES WITH READ LOCK) before snapshot reading.
|
||||||
|
Please see <a href="#incremental-snapshot-reading ">Incremental Snapshot Reading</a>section for more detailed information.
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.incremental.close-idle-reader.enabled</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">false</td>
|
||||||
|
<td>Boolean</td>
|
||||||
|
<td>Whether to close idle readers at the end of the snapshot phase. <br>
|
||||||
|
The flink version is required to be greater than or equal to 1.14 when 'execution.checkpointing.checkpoints-after-tasks-finish.enabled' is set to true.<br>
|
||||||
|
If the flink version is greater than or equal to 1.15, the default value of 'execution.checkpointing.checkpoints-after-tasks-finish.enabled' has been changed to true,
|
||||||
|
so it does not need to be explicitly configured 'execution.checkpointing.checkpoints-after-tasks-finish.enabled' = 'true'
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
|
||||||
|
Note: `slot.name` is recommended to set for different tables to avoid the potential `PSQLException: ERROR: replication slot "flink" is active for PID 974` error. See more [here](https://debezium.io/documentation/reference/1.9/connectors/postgresql.html#postgresql-property-slot-name).
|
||||||
|
|
||||||
|
### Incremental Snapshot Options
|
||||||
|
|
||||||
|
The following options is available only when `scan.incremental.snapshot.enabled=true`:
|
||||||
|
|
||||||
|
<div class="highlight">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width: 25%">Option</th>
|
||||||
|
<th class="text-left" style="width: 8%">Required</th>
|
||||||
|
<th class="text-left" style="width: 7%">Default</th>
|
||||||
|
<th class="text-left" style="width: 10%">Type</th>
|
||||||
|
<th class="text-left" style="width: 50%">Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>scan.incremental.snapshot.chunk.size</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">8096</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>The chunk size (number of rows) of table snapshot, captured tables are split into multiple chunks when read the snapshot of table.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.startup.mode</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">initial</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Optional startup mode for Postgres CDC consumer, valid enumerations are "initial"
|
||||||
|
and "latest-offset".
|
||||||
|
Please see <a href="#startup-reading-position">Startup Reading Position</a> section for more detailed information.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>chunk-meta.group.size</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">1000</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>The group size of chunk meta, if the meta size exceeds the group size, the meta will be divided into multiple groups.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>connect.timeout</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">30s</td>
|
||||||
|
<td>Duration</td>
|
||||||
|
<td>The maximum time that the connector should wait after trying to connect to the PostgreSQL database server before timing out.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>connect.pool.size</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">30</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>The connection pool size.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>connect.max-retries</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">3</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>The max retry times that the connector should retry to build database server connection.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.snapshot.fetch.size</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">1024</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>The maximum fetch size for per poll when read table snapshot.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.incremental.snapshot.chunk.key-column</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>The chunk key of table snapshot, captured tables are split into multiple chunks by a chunk key when read the snapshot of table.
|
||||||
|
By default, the chunk key is the first column of the primary key. This column must be a column of the primary key.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>chunk-key.even-distribution.factor.lower-bound</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">0.05d</td>
|
||||||
|
<td>Double</td>
|
||||||
|
<td>The lower bound of chunk key distribution factor. The distribution factor is used to determine whether the table is evenly distribution or not.
|
||||||
|
The table chunks would use evenly calculation optimization when the data distribution is even, and the query for splitting would happen when it is uneven.
|
||||||
|
The distribution factor could be calculated by (MAX(id) - MIN(id) + 1) / rowCount.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>chunk-key.even-distribution.factor.upper-bound</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">1000.0d</td>
|
||||||
|
<td>Double</td>
|
||||||
|
<td>The upper bound of chunk key distribution factor. The distribution factor is used to determine whether the table is evenly distribution or not.
|
||||||
|
The table chunks would use evenly calculation optimization when the data distribution is even, and the query for splitting would happen when it is uneven.
|
||||||
|
The distribution factor could be calculated by (MAX(id) - MIN(id) + 1) / rowCount.</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
Available Metadata
|
||||||
|
----------------
|
||||||
|
|
||||||
|
The following format metadata can be exposed as read-only (VIRTUAL) columns in a table definition.
|
||||||
|
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width: 15%">Key</th>
|
||||||
|
<th class="text-left" style="width: 30%">DataType</th>
|
||||||
|
<th class="text-left" style="width: 55%">Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>table_name</td>
|
||||||
|
<td>STRING NOT NULL</td>
|
||||||
|
<td>Name of the table that contain the row.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>schema_name</td>
|
||||||
|
<td>STRING NOT NULL</td>
|
||||||
|
<td>Name of the schema that contain the row.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>database_name</td>
|
||||||
|
<td>STRING NOT NULL</td>
|
||||||
|
<td>Name of the database that contain the row.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>op_ts</td>
|
||||||
|
<td>TIMESTAMP_LTZ(3) NOT NULL</td>
|
||||||
|
<td>It indicates the time that the change was made in the database. <br>If the record is read from snapshot of the table instead of the change stream, the value is always 0.</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
Limitation
|
||||||
|
--------
|
||||||
|
|
||||||
|
### Can't perform checkpoint during scanning snapshot of tables when incremental snapshot is disabled
|
||||||
|
|
||||||
|
When `scan.incremental.snapshot.enabled=false`, we have the following limitation.
|
||||||
|
|
||||||
|
During scanning snapshot of database tables, since there is no recoverable position, we can't perform checkpoints. In order to not perform checkpoints, Postgres CDC source will keep the checkpoint waiting to timeout. The timeout checkpoint will be recognized as failed checkpoint, by default, this will trigger a failover for the Flink job. So if the database table is large, it is recommended to add following Flink configurations to avoid failover because of the timeout checkpoints:
|
||||||
|
|
||||||
|
```
|
||||||
|
execution.checkpointing.interval: 10min
|
||||||
|
execution.checkpointing.tolerable-failed-checkpoints: 100
|
||||||
|
restart-strategy: fixed-delay
|
||||||
|
restart-strategy.fixed-delay.attempts: 2147483647
|
||||||
|
```
|
||||||
|
|
||||||
|
The extended CREATE TABLE example demonstrates the syntax for exposing these metadata fields:
|
||||||
|
```sql
|
||||||
|
CREATE TABLE products (
|
||||||
|
db_name STRING METADATA FROM 'database_name' VIRTUAL,
|
||||||
|
table_name STRING METADATA FROM 'table_name' VIRTUAL,
|
||||||
|
operation_ts TIMESTAMP_LTZ(3) METADATA FROM 'op_ts' VIRTUAL,
|
||||||
|
shipment_id INT,
|
||||||
|
order_id INT,
|
||||||
|
origin STRING,
|
||||||
|
destination STRING,
|
||||||
|
is_arrived BOOLEAN
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'postgres-cdc',
|
||||||
|
'hostname' = 'localhost',
|
||||||
|
'port' = '5432',
|
||||||
|
'username' = 'postgres',
|
||||||
|
'password' = 'postgres',
|
||||||
|
'database-name' = 'postgres',
|
||||||
|
'schema-name' = 'public',
|
||||||
|
'table-name' = 'shipments',
|
||||||
|
'slot.name' = 'flink'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
Features
|
||||||
|
--------
|
||||||
|
|
||||||
|
### Incremental Snapshot Reading (Experimental)
|
||||||
|
|
||||||
|
Incremental snapshot reading is a new mechanism to read snapshot of a table. Compared to the old snapshot mechanism, the incremental snapshot has many advantages, including:
|
||||||
|
* (1) PostgreSQL CDC Source can be parallel during snapshot reading
|
||||||
|
* (2) PostgreSQL CDC Source can perform checkpoints in the chunk granularity during snapshot reading
|
||||||
|
* (3) PostgreSQL CDC Source doesn't need to acquire global read lock before snapshot reading
|
||||||
|
|
||||||
|
During the incremental snapshot reading, the PostgreSQL CDC Source firstly splits snapshot chunks (splits) by primary key of table,
|
||||||
|
and then PostgreSQL CDC Source assigns the chunks to multiple readers to read the data of snapshot chunk.
|
||||||
|
|
||||||
|
### Exactly-Once Processing
|
||||||
|
|
||||||
|
The Postgres CDC connector is a Flink Source connector which will read database snapshot first and then continues to read binlogs with **exactly-once processing** even failures happen. Please read [How the connector works](https://debezium.io/documentation/reference/1.9/connectors/postgresql.html#how-the-postgresql-connector-works).
|
||||||
|
|
||||||
|
### DataStream Source
|
||||||
|
|
||||||
|
The Postgres CDC connector can also be a DataStream source. There are two modes for the DataStream source:
|
||||||
|
|
||||||
|
- incremental snapshot based, which allows parallel reading
|
||||||
|
- SourceFunction based, which only supports single thread reading
|
||||||
|
|
||||||
|
#### Incremental Snapshot based DataStream (Experimental)
|
||||||
|
|
||||||
|
```java
|
||||||
|
import org.apache.flink.cdc.connectors.base.source.jdbc.JdbcIncrementalSource;
|
||||||
|
import org.apache.flink.cdc.connectors.postgres.source.PostgresSourceBuilder;
|
||||||
|
import org.apache.flink.cdc.debezium.DebeziumDeserializationSchema;
|
||||||
|
import org.apache.flink.cdc.debezium.JsonDebeziumDeserializationSchema;
|
||||||
|
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
|
||||||
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||||
|
|
||||||
|
public class PostgresParallelSourceExample {
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
DebeziumDeserializationSchema<String> deserializer =
|
||||||
|
new JsonDebeziumDeserializationSchema();
|
||||||
|
|
||||||
|
JdbcIncrementalSource<String> postgresIncrementalSource =
|
||||||
|
PostgresSourceBuilder.PostgresIncrementalSource.<String>builder()
|
||||||
|
.hostname("localhost")
|
||||||
|
.port(5432)
|
||||||
|
.database("postgres")
|
||||||
|
.schemaList("inventory")
|
||||||
|
.tableList("inventory.products")
|
||||||
|
.username("postgres")
|
||||||
|
.password("postgres")
|
||||||
|
.slotName("flink")
|
||||||
|
.decodingPluginName("decoderbufs") // use pgoutput for PostgreSQL 10+
|
||||||
|
.deserializer(deserializer)
|
||||||
|
.includeSchemaChanges(true) // output the schema changes as well
|
||||||
|
.splitSize(2) // the split size of each snapshot split
|
||||||
|
.build();
|
||||||
|
|
||||||
|
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||||
|
|
||||||
|
env.enableCheckpointing(3000);
|
||||||
|
|
||||||
|
env.fromSource(
|
||||||
|
postgresIncrementalSource,
|
||||||
|
WatermarkStrategy.noWatermarks(),
|
||||||
|
"PostgresParallelSource")
|
||||||
|
.setParallelism(2)
|
||||||
|
.print();
|
||||||
|
|
||||||
|
env.execute("Output Postgres Snapshot");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### SourceFunction-based DataStream
|
||||||
|
|
||||||
|
```java
|
||||||
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||||
|
import org.apache.flink.streaming.api.functions.source.SourceFunction;
|
||||||
|
import org.apache.flink.cdc.debezium.JsonDebeziumDeserializationSchema;
|
||||||
|
import org.apache.flink.cdc.connectors.postgres.PostgreSQLSource;
|
||||||
|
|
||||||
|
public class PostgreSQLSourceExample {
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
SourceFunction<String> sourceFunction = PostgreSQLSource.<String>builder()
|
||||||
|
.hostname("localhost")
|
||||||
|
.port(5432)
|
||||||
|
.database("postgres") // monitor postgres database
|
||||||
|
.schemaList("inventory") // monitor inventory schema
|
||||||
|
.tableList("inventory.products") // monitor products table
|
||||||
|
.username("flinkuser")
|
||||||
|
.password("flinkpw")
|
||||||
|
.deserializer(new JsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String
|
||||||
|
.build();
|
||||||
|
|
||||||
|
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||||
|
|
||||||
|
env
|
||||||
|
.addSource(sourceFunction)
|
||||||
|
.print().setParallelism(1); // use parallelism 1 for sink to keep message ordering
|
||||||
|
|
||||||
|
env.execute();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Data Type Mapping
|
||||||
|
----------------
|
||||||
|
|
||||||
|
<div class="wy-table-responsive">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left">PostgreSQL type<a href="https://www.postgresql.org/docs/12/datatype.html"></a></th>
|
||||||
|
<th class="text-left">Flink SQL type<a href="{% link dev/table/types.md %}"></a></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td></td>
|
||||||
|
<td>TINYINT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
SMALLINT<br>
|
||||||
|
INT2<br>
|
||||||
|
SMALLSERIAL<br>
|
||||||
|
SERIAL2</td>
|
||||||
|
<td>SMALLINT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
INTEGER<br>
|
||||||
|
SERIAL</td>
|
||||||
|
<td>INT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
BIGINT<br>
|
||||||
|
BIGSERIAL</td>
|
||||||
|
<td>BIGINT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td></td>
|
||||||
|
<td>DECIMAL(20, 0)</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>BIGINT</td>
|
||||||
|
<td>BIGINT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
REAL<br>
|
||||||
|
FLOAT4</td>
|
||||||
|
<td>FLOAT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
FLOAT8<br>
|
||||||
|
DOUBLE PRECISION</td>
|
||||||
|
<td>DOUBLE</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
NUMERIC(p, s)<br>
|
||||||
|
DECIMAL(p, s)</td>
|
||||||
|
<td>DECIMAL(p, s)</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>BOOLEAN</td>
|
||||||
|
<td>BOOLEAN</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>DATE</td>
|
||||||
|
<td>DATE</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>TIME [(p)] [WITHOUT TIMEZONE]</td>
|
||||||
|
<td>TIME [(p)] [WITHOUT TIMEZONE]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>TIMESTAMP [(p)] [WITHOUT TIMEZONE]</td>
|
||||||
|
<td>TIMESTAMP [(p)] [WITHOUT TIMEZONE]</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
CHAR(n)<br>
|
||||||
|
CHARACTER(n)<br>
|
||||||
|
VARCHAR(n)<br>
|
||||||
|
CHARACTER VARYING(n)<br>
|
||||||
|
TEXT</td>
|
||||||
|
<td>STRING</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>BYTEA</td>
|
||||||
|
<td>BYTES</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{{< top >}}
|
@ -0,0 +1,507 @@
|
|||||||
|
---
|
||||||
|
title: "SQLServer CDC Connector"
|
||||||
|
weight: 7
|
||||||
|
type: docs
|
||||||
|
aliases:
|
||||||
|
- /connectors/cdc-connectors/sqlserver-cdc.html
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
# SQLServer CDC Connector
|
||||||
|
|
||||||
|
The SQLServer CDC connector allows for reading snapshot data and incremental data from SQLServer database. This document describes how to setup the SQLServer CDC connector to run SQL queries against SQLServer databases.
|
||||||
|
|
||||||
|
Dependencies
|
||||||
|
------------
|
||||||
|
|
||||||
|
In order to setup the SQLServer CDC connector, the following table provides dependency information for both projects using a build automation tool (such as Maven or SBT) and SQL Client with SQL JAR bundles.
|
||||||
|
|
||||||
|
### Maven dependency
|
||||||
|
|
||||||
|
{{< artifact flink-connector-sqlserver-cdc >}}
|
||||||
|
|
||||||
|
### SQL Client JAR
|
||||||
|
|
||||||
|
```Download link is available only for stable releases.```
|
||||||
|
|
||||||
|
Download [flink-sql-connector-sqlserver-cdc-3.0-SNAPSHOT.jar](https://repo1.maven.org/maven2/org/apache/flink/flink-sql-connector-sqlserver-cdc/3.0-SNAPSHOT/flink-sql-connector-sqlserver-cdc-3.0-SNAPSHOT.jar) and put it under `<FLINK_HOME>/lib/`.
|
||||||
|
|
||||||
|
**Note:** flink-sql-connector-sqlserver-cdc-XXX-SNAPSHOT version is the code corresponding to the development branch. Users need to download the source code and compile the corresponding jar. Users should use the released version, such as [flink-sql-connector-sqlserver-cdc-2.2.1.jar](https://mvnrepository.com/artifact/org.apache.flink/flink-sql-connector-sqlserver-cdc), the released version will be available in the Maven central warehouse.
|
||||||
|
|
||||||
|
Setup SQLServer Database
|
||||||
|
----------------
|
||||||
|
A SQL Server administrator must enable change data capture on the source tables that you want to capture. The database must already be enabled for CDC. To enable CDC on a table, a SQL Server administrator runs the stored procedure ```sys.sp_cdc_enable_table``` for the table.
|
||||||
|
|
||||||
|
**Prerequisites:**
|
||||||
|
* CDC is enabled on the SQL Server database.
|
||||||
|
* The SQL Server Agent is running.
|
||||||
|
* You are a member of the db_owner fixed database role for the database.
|
||||||
|
|
||||||
|
**Procedure:**
|
||||||
|
* Connect to the SQL Server database by database management studio.
|
||||||
|
* Run the following SQL statement to enable CDC on the table.
|
||||||
|
```sql
|
||||||
|
USE MyDB
|
||||||
|
GO
|
||||||
|
|
||||||
|
EXEC sys.sp_cdc_enable_table
|
||||||
|
@source_schema = N'dbo', -- Specifies the schema of the source table.
|
||||||
|
@source_name = N'MyTable', -- Specifies the name of the table that you want to capture.
|
||||||
|
@role_name = N'MyRole', -- Specifies a role MyRole to which you can add users to whom you want to grant SELECT permission on the captured columns of the source table. Users in the sysadmin or db_owner role also have access to the specified change tables. Set the value of @role_name to NULL, to allow only members in the sysadmin or db_owner to have full access to captured information.
|
||||||
|
@filegroup_name = N'MyDB_CT',-- Specifies the filegroup where SQL Server places the change table for the captured table. The named filegroup must already exist. It is best not to locate change tables in the same filegroup that you use for source tables.
|
||||||
|
@supports_net_changes = 0
|
||||||
|
GO
|
||||||
|
```
|
||||||
|
* Verifying that the user has access to the CDC table
|
||||||
|
```sql
|
||||||
|
--The following example runs the stored procedure sys.sp_cdc_help_change_data_capture on the database MyDB:
|
||||||
|
USE MyDB;
|
||||||
|
GO
|
||||||
|
EXEC sys.sp_cdc_help_change_data_capture
|
||||||
|
GO
|
||||||
|
```
|
||||||
|
The query returns configuration information for each table in the database that is enabled for CDC and that contains change data that the caller is authorized to access. If the result is empty, verify that the user has privileges to access both the capture instance and the CDC tables.
|
||||||
|
|
||||||
|
How to create a SQLServer CDC table
|
||||||
|
----------------
|
||||||
|
|
||||||
|
The SqlServer CDC table can be defined as following:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- register a SqlServer table 'orders' in Flink SQL
|
||||||
|
CREATE TABLE orders (
|
||||||
|
id INT,
|
||||||
|
order_date DATE,
|
||||||
|
purchaser INT,
|
||||||
|
quantity INT,
|
||||||
|
product_id INT,
|
||||||
|
PRIMARY KEY (id) NOT ENFORCED
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'sqlserver-cdc',
|
||||||
|
'hostname' = 'localhost',
|
||||||
|
'port' = '1433',
|
||||||
|
'username' = 'sa',
|
||||||
|
'password' = 'Password!',
|
||||||
|
'database-name' = 'inventory',
|
||||||
|
'table-name' = 'dob.orders'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- read snapshot and binlogs from orders table
|
||||||
|
SELECT * FROM orders;
|
||||||
|
```
|
||||||
|
|
||||||
|
Connector Options
|
||||||
|
----------------
|
||||||
|
|
||||||
|
<div class="highlight">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width: 25%">Option</th>
|
||||||
|
<th class="text-left" style="width: 8%">Required</th>
|
||||||
|
<th class="text-left" style="width: 7%">Default</th>
|
||||||
|
<th class="text-left" style="width: 10%">Type</th>
|
||||||
|
<th class="text-left" style="width: 50%">Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>connector</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Specify what connector to use, here should be <code>'sqlserver-cdc'</code>.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>hostname</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>IP address or hostname of the SQLServer database.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>username</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Username to use when connecting to the SQLServer database.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>password</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Password to use when connecting to the SQLServer database.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>database-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Database name of the SQLServer database to monitor.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>table-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Table name of the SQLServer database to monitor, e.g.: "db1.table1"</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>port</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">1433</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>Integer port number of the SQLServer database.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>server-time-zone</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">UTC</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>The session time zone in database server, e.g. "Asia/Shanghai".</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.incremental.snapshot.enabled</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">true</td>
|
||||||
|
<td>Boolean</td>
|
||||||
|
<td>Whether enable parallelism snapshot.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>chunk-meta.group.size</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">1000</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>The group size of chunk meta, if the meta size exceeds the group size, the meta will be divided into multiple groups.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>chunk-key.even-distribution.factor.lower-bound</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">0.05d</td>
|
||||||
|
<td>Double</td>
|
||||||
|
<td>The lower bound of chunk key distribution factor. The distribution factor is used to determine whether the table is evenly distribution or not.
|
||||||
|
The table chunks would use evenly calculation optimization when the data distribution is even, and the query for splitting would happen when it is uneven.
|
||||||
|
The distribution factor could be calculated by (MAX(id) - MIN(id) + 1) / rowCount.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>chunk-key.even-distribution.factor.upper-bound</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">1000.0d</td>
|
||||||
|
<td>Double</td>
|
||||||
|
<td>The upper bound of chunk key distribution factor. The distribution factor is used to determine whether the table is evenly distribution or not.
|
||||||
|
The table chunks would use evenly calculation optimization when the data distribution is even, and the query for splitting would happen when it is uneven.
|
||||||
|
The distribution factor could be calculated by (MAX(id) - MIN(id) + 1) / rowCount.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>debezium.*</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Pass-through Debezium's properties to Debezium Embedded Engine which is used to capture data changes from SQLServer.
|
||||||
|
For example: <code>'debezium.snapshot.mode' = 'initial_only'</code>.
|
||||||
|
See more about the <a href="https://debezium.io/documentation/reference/1.9/connectors/sqlserver.html#sqlserver-required-connector-configuration-properties">Debezium's SQLServer Connector properties</a></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.incremental.close-idle-reader.enabled</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">false</td>
|
||||||
|
<td>Boolean</td>
|
||||||
|
<td>Whether to close idle readers at the end of the snapshot phase. <br>
|
||||||
|
The flink version is required to be greater than or equal to 1.14 when 'execution.checkpointing.checkpoints-after-tasks-finish.enabled' is set to true.<br>
|
||||||
|
If the flink version is greater than or equal to 1.15, the default value of 'execution.checkpointing.checkpoints-after-tasks-finish.enabled' has been changed to true,
|
||||||
|
so it does not need to be explicitly configured 'execution.checkpointing.checkpoints-after-tasks-finish.enabled' = 'true'
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.incremental.snapshot.chunk.key-column</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>The chunk key of table snapshot, captured tables are split into multiple chunks by a chunk key when read the snapshot of table.
|
||||||
|
By default, the chunk key is the first column of the primary key. This column must be a column of the primary key.</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
Available Metadata
|
||||||
|
----------------
|
||||||
|
|
||||||
|
The following format metadata can be exposed as read-only (VIRTUAL) columns in a table definition.
|
||||||
|
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width: 15%">Key</th>
|
||||||
|
<th class="text-left" style="width: 30%">DataType</th>
|
||||||
|
<th class="text-left" style="width: 55%">Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>table_name</td>
|
||||||
|
<td>STRING NOT NULL</td>
|
||||||
|
<td>Name of the table that contain the row.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>schema_name</td>
|
||||||
|
<td>STRING NOT NULL</td>
|
||||||
|
<td>Name of the schema that contain the row.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>database_name</td>
|
||||||
|
<td>STRING NOT NULL</td>
|
||||||
|
<td>Name of the database that contain the row.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>op_ts</td>
|
||||||
|
<td>TIMESTAMP_LTZ(3) NOT NULL</td>
|
||||||
|
<td>It indicates the time that the change was made in the database. <br>If the record is read from snapshot of the table instead of the change stream, the value is always 0.</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
Limitation
|
||||||
|
--------
|
||||||
|
|
||||||
|
### Can't perform checkpoint during scanning snapshot of tables
|
||||||
|
During scanning snapshot of database tables, since there is no recoverable position, we can't perform checkpoints. In order to not perform checkpoints, SqlServer CDC source will keep the checkpoint waiting to timeout. The timeout checkpoint will be recognized as failed checkpoint, by default, this will trigger a failover for the Flink job. So if the database table is large, it is recommended to add following Flink configurations to avoid failover because of the timeout checkpoints:
|
||||||
|
|
||||||
|
```
|
||||||
|
execution.checkpointing.interval: 10min
|
||||||
|
execution.checkpointing.tolerable-failed-checkpoints: 100
|
||||||
|
restart-strategy: fixed-delay
|
||||||
|
restart-strategy.fixed-delay.attempts: 2147483647
|
||||||
|
```
|
||||||
|
|
||||||
|
The extended CREATE TABLE example demonstrates the syntax for exposing these metadata fields:
|
||||||
|
```sql
|
||||||
|
CREATE TABLE products (
|
||||||
|
table_name STRING METADATA FROM 'table_name' VIRTUAL,
|
||||||
|
schema_name STRING METADATA FROM 'schema_name' VIRTUAL,
|
||||||
|
db_name STRING METADATA FROM 'database_name' VIRTUAL,
|
||||||
|
operation_ts TIMESTAMP_LTZ(3) METADATA FROM 'op_ts' VIRTUAL,
|
||||||
|
id INT NOT NULL,
|
||||||
|
name STRING,
|
||||||
|
description STRING,
|
||||||
|
weight DECIMAL(10,3)
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'sqlserver-cdc',
|
||||||
|
'hostname' = 'localhost',
|
||||||
|
'port' = '1433',
|
||||||
|
'username' = 'sa',
|
||||||
|
'password' = 'Password!',
|
||||||
|
'database-name' = 'inventory',
|
||||||
|
'table-name' = 'dbo.products'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
Features
|
||||||
|
--------
|
||||||
|
|
||||||
|
### Exactly-Once Processing
|
||||||
|
|
||||||
|
The SQLServer CDC connector is a Flink Source connector which will read database snapshot first and then continues to read change events with **exactly-once processing** even failures happen. Please read [How the connector works](https://debezium.io/documentation/reference/1.9/connectors/sqlserver.html#how-the-sqlserver-connector-works).
|
||||||
|
|
||||||
|
### Startup Reading Position
|
||||||
|
|
||||||
|
The config option `scan.startup.mode` specifies the startup mode for SQLServer CDC consumer. The valid enumerations are:
|
||||||
|
|
||||||
|
- `initial` (default): Takes a snapshot of structure and data of captured tables; useful if topics should be populated with a complete representation of the data from the captured tables.
|
||||||
|
- `initial-only`: Takes a snapshot of structure and data like initial but instead does not transition into streaming changes once the snapshot has completed.
|
||||||
|
- `latest-offset`: Takes a snapshot of the structure of captured tables only; useful if only changes happening from now onwards should be propagated to topics.
|
||||||
|
|
||||||
|
_Note: the mechanism of `scan.startup.mode` option relying on Debezium's `snapshot.mode` configuration. So please do not use them together. If you specific both `scan.startup.mode` and `debezium.snapshot.mode` options in the table DDL, it may make `scan.startup.mode` doesn't work._
|
||||||
|
|
||||||
|
### Single Thread Reading
|
||||||
|
|
||||||
|
The SQLServer CDC source can't work in parallel reading, because there is only one task can receive change events.
|
||||||
|
|
||||||
|
### DataStream Source
|
||||||
|
|
||||||
|
The SQLServer CDC connector can also be a DataStream source. You can create a SourceFunction as the following shows:
|
||||||
|
|
||||||
|
```java
|
||||||
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||||
|
import org.apache.flink.streaming.api.functions.source.SourceFunction;
|
||||||
|
import org.apache.flink.cdc.debezium.JsonDebeziumDeserializationSchema;
|
||||||
|
import org.apache.flink.cdc.connectors.sqlserver.SqlServerSource;
|
||||||
|
|
||||||
|
public class SqlServerSourceExample {
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
SourceFunction<String> sourceFunction = SqlServerSource.<String>builder()
|
||||||
|
.hostname("localhost")
|
||||||
|
.port(1433)
|
||||||
|
.database("sqlserver") // monitor sqlserver database
|
||||||
|
.tableList("dbo.products") // monitor products table
|
||||||
|
.username("sa")
|
||||||
|
.password("Password!")
|
||||||
|
.deserializer(new JsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String
|
||||||
|
.build();
|
||||||
|
|
||||||
|
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||||
|
|
||||||
|
env
|
||||||
|
.addSource(sourceFunction)
|
||||||
|
.print().setParallelism(1); // use parallelism 1 for sink to keep message ordering
|
||||||
|
|
||||||
|
env.execute();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The SQLServer CDC incremental connector (after 2.4.0) can be used as the following shows:
|
||||||
|
```java
|
||||||
|
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
|
||||||
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||||
|
|
||||||
|
import org.apache.flink.cdc.connectors.base.options.StartupOptions;
|
||||||
|
import org.apache.flink.cdc.connectors.sqlserver.source.SqlServerSourceBuilder;
|
||||||
|
import org.apache.flink.cdc.connectors.sqlserver.source.SqlServerSourceBuilder.SqlServerIncrementalSource;
|
||||||
|
import org.apache.flink.cdc.debezium.JsonDebeziumDeserializationSchema;
|
||||||
|
|
||||||
|
public class SqlServerIncrementalSourceExample {
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
SqlServerIncrementalSource<String> sqlServerSource =
|
||||||
|
new SqlServerSourceBuilder()
|
||||||
|
.hostname("localhost")
|
||||||
|
.port(1433)
|
||||||
|
.databaseList("inventory")
|
||||||
|
.tableList("dbo.products")
|
||||||
|
.username("sa")
|
||||||
|
.password("Password!")
|
||||||
|
.deserializer(new JsonDebeziumDeserializationSchema())
|
||||||
|
.startupOptions(StartupOptions.initial())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||||
|
// enable checkpoint
|
||||||
|
env.enableCheckpointing(3000);
|
||||||
|
// set the source parallelism to 2
|
||||||
|
env.fromSource(
|
||||||
|
sqlServerSource,
|
||||||
|
WatermarkStrategy.noWatermarks(),
|
||||||
|
"SqlServerIncrementalSource")
|
||||||
|
.setParallelism(2)
|
||||||
|
.print()
|
||||||
|
.setParallelism(1);
|
||||||
|
|
||||||
|
env.execute("Print SqlServer Snapshot + Change Stream");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Data Type Mapping
|
||||||
|
----------------
|
||||||
|
|
||||||
|
<div class="wy-table-responsive">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left">SQLServer type<a href="https://docs.microsoft.com/en-us/sql/t-sql/data-types/data-types-transact-sql"></a></th>
|
||||||
|
<th class="text-left">Flink SQL type<a href="{% link dev/table/types.md %}"></a></th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>char(n)</td>
|
||||||
|
<td>CHAR(n)</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
varchar(n)<br>
|
||||||
|
nvarchar(n)<br>
|
||||||
|
nchar(n)
|
||||||
|
</td>
|
||||||
|
<td>VARCHAR(n)</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
text<br>
|
||||||
|
ntext<br>
|
||||||
|
xml
|
||||||
|
</td>
|
||||||
|
<td>STRING</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
decimal(p, s)<br>
|
||||||
|
money<br>
|
||||||
|
smallmoney
|
||||||
|
</td>
|
||||||
|
<td>DECIMAL(p, s)</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>numeric</td>
|
||||||
|
<td>NUMERIC</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
float<br>
|
||||||
|
real
|
||||||
|
</td>
|
||||||
|
<td>DOUBLE</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>bit</td>
|
||||||
|
<td>BOOLEAN</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>int</td>
|
||||||
|
<td>INT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>tinyint</td>
|
||||||
|
<td>SMALLINT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>smallint</td>
|
||||||
|
<td>SMALLINT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>bigint</td>
|
||||||
|
<td>BIGINT</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>date</td>
|
||||||
|
<td>DATE</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>time(n)</td>
|
||||||
|
<td>TIME(n)</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
datetime2<br>
|
||||||
|
datetime<br>
|
||||||
|
smalldatetime
|
||||||
|
</td>
|
||||||
|
<td>TIMESTAMP(n)</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>datetimeoffset</td>
|
||||||
|
<td>TIMESTAMP_LTZ(3)</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{{< top >}}
|
@ -0,0 +1,496 @@
|
|||||||
|
---
|
||||||
|
title: "TiDB CDC Connector"
|
||||||
|
weight: 8
|
||||||
|
type: docs
|
||||||
|
aliases:
|
||||||
|
- /connectors/cdc-connectors/tidb-cdc.html
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
# TiDB CDC Connector
|
||||||
|
|
||||||
|
The TiDB CDC connector allows for reading snapshot data and incremental data from TiDB database. This document describes how to setup the TiDB CDC connector to run SQL queries against TiDB databases.
|
||||||
|
|
||||||
|
Dependencies
|
||||||
|
------------
|
||||||
|
|
||||||
|
In order to setup the TiDB CDC connector, the following table provides dependency information for both projects using a build automation tool (such as Maven or SBT) and SQL Client with SQL JAR bundles.
|
||||||
|
|
||||||
|
### Maven dependency
|
||||||
|
|
||||||
|
{{< artifact flink-connector-tidb-cdc >}}
|
||||||
|
|
||||||
|
### SQL Client JAR
|
||||||
|
|
||||||
|
```Download link is available only for stable releases.```
|
||||||
|
|
||||||
|
Download [flink-sql-connector-tidb-cdc-3.0-SNAPSHOT.jar](https://repo1.maven.org/maven2/org/apache/flink/flink-sql-connector-tidb-cdc/3.0-SNAPSHOT/flink-sql-connector-tidb-cdc-3.0-SNAPSHOT.jar) and put it under `<FLINK_HOME>/lib/`.
|
||||||
|
|
||||||
|
**Note:** flink-sql-connector-tidb-cdc-XXX-SNAPSHOT version is the code corresponding to the development branch. Users need to download the source code and compile the corresponding jar. Users should use the released version, such as [flink-sql-connector-tidb-cdc-2.2.1.jar](https://mvnrepository.com/artifact/org.apache.flink/flink-sql-connector-tidb-cdc), the released version will be available in the Maven central warehouse.
|
||||||
|
|
||||||
|
How to create a TiDB CDC table
|
||||||
|
----------------
|
||||||
|
|
||||||
|
The TiDB CDC table can be defined as following:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- checkpoint every 3000 milliseconds
|
||||||
|
Flink SQL> SET 'execution.checkpointing.interval' = '3s';
|
||||||
|
|
||||||
|
-- register a TiDB table 'orders' in Flink SQL
|
||||||
|
Flink SQL> CREATE TABLE orders (
|
||||||
|
order_id INT,
|
||||||
|
order_date TIMESTAMP(3),
|
||||||
|
customer_name STRING,
|
||||||
|
price DECIMAL(10, 5),
|
||||||
|
product_id INT,
|
||||||
|
order_status BOOLEAN,
|
||||||
|
PRIMARY KEY(order_id) NOT ENFORCED
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'tidb-cdc',
|
||||||
|
'tikv.grpc.timeout_in_ms' = '20000',
|
||||||
|
'pd-addresses' = 'localhost:2379',
|
||||||
|
'database-name' = 'mydb',
|
||||||
|
'table-name' = 'orders'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- read snapshot and binlogs from orders table
|
||||||
|
Flink SQL> SELECT * FROM orders;
|
||||||
|
```
|
||||||
|
|
||||||
|
Connector Options
|
||||||
|
----------------
|
||||||
|
|
||||||
|
<div class="highlight">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width: 10%">Option</th>
|
||||||
|
<th class="text-left" style="width: 8%">Required</th>
|
||||||
|
<th class="text-left" style="width: 7%">Default</th>
|
||||||
|
<th class="text-left" style="width: 10%">Type</th>
|
||||||
|
<th class="text-left" style="width: 65%">Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>connector</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Specify what connector to use, here should be <code>'tidb-cdc'</code>.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>database-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Database name of the TiDB server to monitor.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>table-name</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Table name of the TiDB database to monitor.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>scan.startup.mode</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">initial</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Optional startup mode for TiDB CDC consumer, valid enumerations are "initial" and "latest-offset".</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>pd-addresses</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>TiKV cluster's PD address.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>tikv.grpc.timeout_in_ms</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>Long</td>
|
||||||
|
<td>TiKV GRPC timeout in ms.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>tikv.grpc.scan_timeout_in_ms</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>Long</td>
|
||||||
|
<td>TiKV GRPC scan timeout in ms.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>tikv.batch_get_concurrency</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">20</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>TiKV GRPC batch get concurrency.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>tikv.*</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Pass-through TiDB client's properties.</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
Available Metadata
|
||||||
|
----------------
|
||||||
|
|
||||||
|
The following format metadata can be exposed as read-only (VIRTUAL) columns in a table definition.
|
||||||
|
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width: 15%">Key</th>
|
||||||
|
<th class="text-left" style="width: 30%">DataType</th>
|
||||||
|
<th class="text-left" style="width: 55%">Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>table_name</td>
|
||||||
|
<td>STRING NOT NULL</td>
|
||||||
|
<td>Name of the table that contain the row.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>database_name</td>
|
||||||
|
<td>STRING NOT NULL</td>
|
||||||
|
<td>Name of the database that contain the row.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>op_ts</td>
|
||||||
|
<td>TIMESTAMP_LTZ(3) NOT NULL</td>
|
||||||
|
<td>It indicates the time that the change was made in the database. <br>If the record is read from snapshot of the table instead of the binlog, the value is always 0.</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
The extended CREATE TABLE example demonstrates the syntax for exposing these metadata fields:
|
||||||
|
```sql
|
||||||
|
CREATE TABLE products (
|
||||||
|
db_name STRING METADATA FROM 'database_name' VIRTUAL,
|
||||||
|
table_name STRING METADATA FROM 'table_name' VIRTUAL,
|
||||||
|
operation_ts TIMESTAMP_LTZ(3) METADATA FROM 'op_ts' VIRTUAL,
|
||||||
|
order_id INT,
|
||||||
|
order_date TIMESTAMP(0),
|
||||||
|
customer_name STRING,
|
||||||
|
price DECIMAL(10, 5),
|
||||||
|
product_id INT,
|
||||||
|
order_status BOOLEAN,
|
||||||
|
PRIMARY KEY(order_id) NOT ENFORCED
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'tidb-cdc',
|
||||||
|
'tikv.grpc.timeout_in_ms' = '20000',
|
||||||
|
'pd-addresses' = 'localhost:2379',
|
||||||
|
'database-name' = 'mydb',
|
||||||
|
'table-name' = 'orders'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
Features
|
||||||
|
--------
|
||||||
|
### Exactly-Once Processing
|
||||||
|
|
||||||
|
The TiDB CDC connector is a Flink Source connector which will read database snapshot first and then continues to read change events with **exactly-once processing** even failures happen.
|
||||||
|
|
||||||
|
### Startup Reading Position
|
||||||
|
|
||||||
|
The config option `scan.startup.mode` specifies the startup mode for TiDB CDC consumer. The valid enumerations are:
|
||||||
|
|
||||||
|
- `initial` (default): Takes a snapshot of structure and data of captured tables; useful if you want fetch a complete representation of the data from the captured tables.
|
||||||
|
- `latest-offset`: Takes a snapshot of the structure of captured tables only; useful if only changes happening from now onwards should be fetched.
|
||||||
|
|
||||||
|
### Multi Thread Reading
|
||||||
|
|
||||||
|
The TiDB CDC source can work in parallel reading, because there is multiple tasks can receive change events.
|
||||||
|
|
||||||
|
### DataStream Source
|
||||||
|
|
||||||
|
The TiDB CDC connector can also be a DataStream source. You can create a SourceFunction as the following shows:
|
||||||
|
|
||||||
|
### DataStream Source
|
||||||
|
|
||||||
|
```java
|
||||||
|
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
|
||||||
|
import org.apache.flink.api.common.typeinfo.TypeInformation;
|
||||||
|
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||||
|
import org.apache.flink.streaming.api.functions.source.SourceFunction;
|
||||||
|
import org.apache.flink.util.Collector;
|
||||||
|
|
||||||
|
import org.apache.flink.cdc.connectors.tidb.TDBSourceOptions;
|
||||||
|
import org.apache.flink.cdc.connectors.tidb.TiDBSource;
|
||||||
|
import org.apache.flink.cdc.connectors.tidb.TiKVChangeEventDeserializationSchema;
|
||||||
|
import org.apache.flink.cdc.connectors.tidb.TiKVSnapshotEventDeserializationSchema;
|
||||||
|
import org.tikv.kvproto.Cdcpb;
|
||||||
|
import org.tikv.kvproto.Kvrpcpb;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
public class TiDBSourceExample {
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
SourceFunction<String> tidbSource =
|
||||||
|
TiDBSource.<String>builder()
|
||||||
|
.database("mydb") // set captured database
|
||||||
|
.tableName("products") // set captured table
|
||||||
|
.tiConf(
|
||||||
|
TDBSourceOptions.getTiConfiguration(
|
||||||
|
"localhost:2399", new HashMap<>()))
|
||||||
|
.snapshotEventDeserializer(
|
||||||
|
new TiKVSnapshotEventDeserializationSchema<String>() {
|
||||||
|
@Override
|
||||||
|
public void deserialize(
|
||||||
|
Kvrpcpb.KvPair record, Collector<String> out)
|
||||||
|
throws Exception {
|
||||||
|
out.collect(record.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TypeInformation<String> getProducedType() {
|
||||||
|
return BasicTypeInfo.STRING_TYPE_INFO;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.changeEventDeserializer(
|
||||||
|
new TiKVChangeEventDeserializationSchema<String>() {
|
||||||
|
@Override
|
||||||
|
public void deserialize(
|
||||||
|
Cdcpb.Event.Row record, Collector<String> out)
|
||||||
|
throws Exception {
|
||||||
|
out.collect(record.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TypeInformation<String> getProducedType() {
|
||||||
|
return BasicTypeInfo.STRING_TYPE_INFO;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.build();
|
||||||
|
|
||||||
|
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||||
|
|
||||||
|
// enable checkpoint
|
||||||
|
env.enableCheckpointing(3000);
|
||||||
|
env.addSource(tidbSource).print().setParallelism(1);
|
||||||
|
|
||||||
|
env.execute("Print TiDB Snapshot + Binlog");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Data Type Mapping
|
||||||
|
----------------
|
||||||
|
|
||||||
|
<div class="wy-table-responsive">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left">TiDB type<a href="https://dev.tidb.com/doc/man/8.0/en/data-types.html"></a></th>
|
||||||
|
<th class="text-left">Flink SQL type<a href="{% link dev/table/types.md %}"></a></th>
|
||||||
|
<th class="text-left">NOTE</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>TINYINT</td>
|
||||||
|
<td>TINYINT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
SMALLINT<br>
|
||||||
|
TINYINT UNSIGNED</td>
|
||||||
|
<td>SMALLINT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
INT<br>
|
||||||
|
MEDIUMINT<br>
|
||||||
|
SMALLINT UNSIGNED</td>
|
||||||
|
<td>INT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
BIGINT<br>
|
||||||
|
INT UNSIGNED</td>
|
||||||
|
<td>BIGINT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>BIGINT UNSIGNED</td>
|
||||||
|
<td>DECIMAL(20, 0)</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
FLOAT<br>
|
||||||
|
</td>
|
||||||
|
<td>FLOAT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
REAL<br>
|
||||||
|
DOUBLE
|
||||||
|
</td>
|
||||||
|
<td>DOUBLE</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
NUMERIC(p, s)<br>
|
||||||
|
DECIMAL(p, s)<br>
|
||||||
|
where p <= 38<br>
|
||||||
|
</td>
|
||||||
|
<td>DECIMAL(p, s)</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
NUMERIC(p, s)<br>
|
||||||
|
DECIMAL(p, s)<br>
|
||||||
|
where 38 < p <= 65<br>
|
||||||
|
</td>
|
||||||
|
<td>STRING</td>
|
||||||
|
<td>The precision for DECIMAL data type is up to 65 in TiDB, but the precision for DECIMAL is limited to 38 in Flink.
|
||||||
|
So if you define a decimal column whose precision is greater than 38, you should map it to STRING to avoid precision loss.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
BOOLEAN<br>
|
||||||
|
TINYINT(1)<br>
|
||||||
|
BIT(1)
|
||||||
|
</td>
|
||||||
|
<td>BOOLEAN</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>DATE</td>
|
||||||
|
<td>DATE</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>TIME [(p)]</td>
|
||||||
|
<td>TIME [(p)]</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>TIMESTAMP [(p)]</td>
|
||||||
|
<td>TIMESTAMP_LTZ [(p)]</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>DATETIME [(p)]</td>
|
||||||
|
<td>TIMESTAMP [(p)]
|
||||||
|
</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
CHAR(n)
|
||||||
|
</td>
|
||||||
|
<td>CHAR(n)</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
VARCHAR(n)
|
||||||
|
</td>
|
||||||
|
<td>VARCHAR(n)</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
BIT(n)
|
||||||
|
</td>
|
||||||
|
<td>BINARY(⌈n/8⌉)</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
BINARY(n)
|
||||||
|
</td>
|
||||||
|
<td>BINARY(n)</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
TINYTEXT<br>
|
||||||
|
TEXT<br>
|
||||||
|
MEDIUMTEXT<br>
|
||||||
|
LONGTEXT<br>
|
||||||
|
</td>
|
||||||
|
<td>STRING</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
TINYBLOB<br>
|
||||||
|
BLOB<br>
|
||||||
|
MEDIUMBLOB<br>
|
||||||
|
LONGBLOB<br>
|
||||||
|
</td>
|
||||||
|
<td>BYTES</td>
|
||||||
|
<td>Currently, for BLOB data type in TiDB, only the blob whose length isn't greater than 2,147,483,647(2 ** 31 - 1) is supported. </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
YEAR
|
||||||
|
</td>
|
||||||
|
<td>INT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
ENUM
|
||||||
|
</td>
|
||||||
|
<td>STRING</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
JSON
|
||||||
|
</td>
|
||||||
|
<td>STRING</td>
|
||||||
|
<td>The JSON data type will be converted into STRING with JSON format in Flink.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
SET
|
||||||
|
</td>
|
||||||
|
<td>ARRAY<STRING></td>
|
||||||
|
<td>As the SET data type in TiDB is a string object that can have zero or more values,
|
||||||
|
it should always be mapped to an array of string
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{{< top >}}
|
@ -0,0 +1,23 @@
|
|||||||
|
---
|
||||||
|
title: Pipeline Connectors
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 1
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
@ -0,0 +1,287 @@
|
|||||||
|
---
|
||||||
|
title: "Doris Pipeline Connector"
|
||||||
|
weight: 2
|
||||||
|
type: docs
|
||||||
|
aliases:
|
||||||
|
- /pipelines/doris-pipeline.html
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
# Doris Pipeline Connector
|
||||||
|
|
||||||
|
This article introduces of Doris Pipeline Connector
|
||||||
|
|
||||||
|
|
||||||
|
## Example
|
||||||
|
----------------
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
source:
|
||||||
|
type: values
|
||||||
|
name: ValuesSource
|
||||||
|
|
||||||
|
sink:
|
||||||
|
type: doris
|
||||||
|
name: Doris Sink
|
||||||
|
fenodes: 127.0.0.1:8030
|
||||||
|
username: root
|
||||||
|
password: ""
|
||||||
|
table.create.properties.replication_num: 1
|
||||||
|
|
||||||
|
pipeline:
|
||||||
|
parallelism: 1
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pipeline options
|
||||||
|
----------------
|
||||||
|
|
||||||
|
<div class="highlight">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width: 10%">Option</th>
|
||||||
|
<th class="text-left" style="width: 8%">Required</th>
|
||||||
|
<th class="text-left" style="width: 7%">Default</th>
|
||||||
|
<th class="text-left" style="width: 10%">Type</th>
|
||||||
|
<th class="text-left" style="width: 65%">Description</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>type</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Specify the Sink to use, here is <code>'doris'</code>.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>name</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td> Name of PipeLine </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>fenodes</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Http address of Doris cluster FE, such as 127.0.0.1:8030 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>benodes</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Http address of Doris cluster BE, such as 127.0.0.1:8040 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>jdbc-url</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>JDBC address of Doris cluster, for example: jdbc:mysql://127.0.0.1:9030/db</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>username</td>
|
||||||
|
<td>required</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Username of Doris cluster</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>password</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Password for Doris cluster</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>auto-redirect</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">false</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td> Whether to write through FE redirection and directly connect to BE to write </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>sink.enable.batch-mode</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">true</td>
|
||||||
|
<td>Boolean</td>
|
||||||
|
<td> Whether to use the batch method to write to Doris </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>sink.flush.queue-size</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">2</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td> Queue size for batch writing
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>sink.buffer-flush.max-rows</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">50000</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>Maximum number of Flush records in a single batch</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>sink.buffer-flush.max-bytes</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">10485760(10MB)</td>
|
||||||
|
<td>Integer</td>
|
||||||
|
<td>Maximum number of bytes flushed in a single batch</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>sink.buffer-flush.interval</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">10s</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Flush interval duration. If this time is exceeded, the data will be flushed asynchronously</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>sink.properties.</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td> Parameters of StreamLoad.
|
||||||
|
For example: <code> sink.properties.strict_mode: true</code>.
|
||||||
|
See more about <a href="https://doris.apache.org/zh-CN/docs/dev/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD/"> StreamLoad Properties properties</a></td>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>table.create.properties.*</td>
|
||||||
|
<td>optional</td>
|
||||||
|
<td style="word-wrap: break-word;">(none)</td>
|
||||||
|
<td>String</td>
|
||||||
|
<td>Create the Properties configuration of the table.
|
||||||
|
For example: <code> table.create.properties.replication_num: 1</code>.
|
||||||
|
See more about <a href="https://doris.apache.org/zh-CN/docs/dev/sql-manual/sql-reference/Data-Definition-Statements/Create/CREATE-TABLE/"> Doris Table Properties properties</a></td>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
## Data Type Mapping
|
||||||
|
|
||||||
|
----------------
|
||||||
|
|
||||||
|
<div class="wy-table-responsive">
|
||||||
|
<table class="colwidths-auto docutils">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="text-left" style="width:10%;">CDC type</th>
|
||||||
|
<th class="text-left" style="width:30%;">Doris type<a href="https://doris.apache.org/docs/dev/sql-manual/sql-reference/Data-Types/BOOLEAN/"></a></th>
|
||||||
|
<th class="text-left" style="width:60%;">NOTE</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>TINYINT</td>
|
||||||
|
<td>TINYINT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>SMALLINT</td>
|
||||||
|
<td>SMALLINT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>INT</td>
|
||||||
|
<td>INT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>BIGINT</td>
|
||||||
|
<td>BIGINT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>DECIMAL</td>
|
||||||
|
<td>DECIMAL</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>FLOAT</td>
|
||||||
|
<td>FLOAT</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>DOUBLE</td>
|
||||||
|
<td>DOUBLE</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>BOOLEAN</td>
|
||||||
|
<td>BOOLEAN</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>DATE</td>
|
||||||
|
<td>DATE</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>TIMESTAMP [(p)]</td>
|
||||||
|
<td>DATETIME [(p)]</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>TIMESTAMP_LTZ [(p)]
|
||||||
|
</td>
|
||||||
|
<td>DATETIME [(p)]
|
||||||
|
</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>CHAR(n)</td>
|
||||||
|
<td>CHAR(n*3)</td>
|
||||||
|
<td>In Doris, strings are stored in UTF-8 encoding, so English characters occupy 1 byte and Chinese characters occupy 3 bytes. The length here is multiplied by 3. The maximum length of CHAR is 255. Once exceeded, it will automatically be converted to VARCHAR type.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>VARCHAR(n)</td>
|
||||||
|
<td>VARCHAR(n*3)</td>
|
||||||
|
<td>Same as above. The length here is multiplied by 3. The maximum length of VARCHAR is 65533. Once exceeded, it will automatically be converted to STRING type.</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
BINARY(n)
|
||||||
|
</td>
|
||||||
|
<td>STRING</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
VARBINARY(N)
|
||||||
|
</td>
|
||||||
|
<td>STRING</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>STRING</td>
|
||||||
|
<td>STRING</td>
|
||||||
|
<td></td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{{< top >}}
|
@ -0,0 +1,44 @@
|
|||||||
|
---
|
||||||
|
title: "Overview"
|
||||||
|
weight: 1
|
||||||
|
type: docs
|
||||||
|
aliases:
|
||||||
|
- /connectors/pipeline-connectors/
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
# Pipeline Connectors Of CDC Streaming ELT Framework
|
||||||
|
|
||||||
|
## Supported Connectors
|
||||||
|
|
||||||
|
| Connector | Database |
|
||||||
|
|---------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| [doris-pipeline](doris-pipeline.md) | <li> [Doris](https://doris.apache.org/): 1.2.x, 2.x.x |
|
||||||
|
| [mysql-pipeline](mysql-pipeline.md) | <li> [MySQL](https://dev.mysql.com/doc): 5.6, 5.7, 8.0.x <li> [RDS MySQL](https://www.aliyun.com/product/rds/mysql): 5.6, 5.7, 8.0.x <li> [PolarDB MySQL](https://www.aliyun.com/product/polardb): 5.6, 5.7, 8.0.x <li> [Aurora MySQL](https://aws.amazon.com/cn/rds/aurora): 5.6, 5.7, 8.0.x <li> [MariaDB](https://mariadb.org): 10.x <li> [PolarDB X](https://github.com/ApsaraDB/galaxysql): 2.0.1 |
|
||||||
|
| [starrocks-pipeline](starrocks-pipeline.md) | <li> [StarRocks](https://www.starrocks.io/): 2.x, 3.x |
|
||||||
|
|
||||||
|
## Supported Flink Versions
|
||||||
|
The following table shows the version mapping between Flink<sup>®</sup> CDC Pipeline and Flink<sup>®</sup>:
|
||||||
|
|
||||||
|
| Flink<sup>®</sup> CDC Version | Flink<sup>®</sup> Version |
|
||||||
|
|:-----------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|
|
||||||
|
| <font color="DarkCyan">3.0.*</font> | <font color="MediumVioletRed">1.14.\*</font>, <font color="MediumVioletRed">1.15.\*</font>, <font color="MediumVioletRed">1.16.\*</font>, <font color="MediumVioletRed">1.17.\*</font>, <font color="MediumVioletRed">1.18.\*</font> |
|
||||||
|
|
||||||
|
{{< top >}}
|
@ -0,0 +1,26 @@
|
|||||||
|
---
|
||||||
|
title: Development
|
||||||
|
icon: <i class="fa fa-code title maindish" aria-hidden="true"></i>
|
||||||
|
bold: true
|
||||||
|
sectionBreak: true
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 2
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
title: "FAQ"
|
||||||
|
icon: <i class="fa fa-question title appetizer" aria-hidden="true"></i>
|
||||||
|
bold: true
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 4
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
title: "Try Flink CDC"
|
||||||
|
icon: <i class="fa fa-rocket title appetizer" aria-hidden="true"></i>
|
||||||
|
bold: true
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 1
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
title: CDC Connectors
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 2
|
||||||
|
aliases:
|
||||||
|
- /try-flink-cdc/cdc-connectors/
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
@ -0,0 +1,331 @@
|
|||||||
|
---
|
||||||
|
title: "Building a Real-time Data Lake with Flink CDC"
|
||||||
|
weight: 999
|
||||||
|
type: docs
|
||||||
|
aliases:
|
||||||
|
- /development/build-real-time-data-lake-tutorial.html
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
# Using Flink CDC to synchronize data from MySQL sharding tables and build real-time data lake
|
||||||
|
|
||||||
|
For OLTP databases, to deal with a huge number of data in a single table, we usually do database and table sharding to get better throughput.
|
||||||
|
But sometimes, for convenient analysis, we need to merge them into one table when loading them to data warehouse or data lake.
|
||||||
|
|
||||||
|
This tutorial will show how to use Flink CDC to build a real-time data lake for such a scenario.
|
||||||
|
You can walk through the tutorial easily in the docker environment. The entire process uses standard SQL syntax without a single line of Java/Scala code or IDE installation.
|
||||||
|
|
||||||
|
The following sections will take the pipeline from MySQL to [Iceberg](https://iceberg.apache.org/) as an example. The overview of the architecture is as follows:
|
||||||
|
|
||||||
|
{{< img src="/fig/real-time-data-lake-tutorial/real-time-data-lake-tutorial.png" alt="Real-time data lake with Flink CDC" >}}
|
||||||
|
|
||||||
|
You can also use other data sources like Oracle/Postgres and sinks like Hudi to build your own pipeline.
|
||||||
|
|
||||||
|
## Preparation
|
||||||
|
Prepare a Linux or MacOS computer with Docker installed.
|
||||||
|
|
||||||
|
## Preparing JAR package required
|
||||||
|
**Download links are available only for stable releases, SNAPSHOT dependencies need to be built based on master or release-branches by yourself.**
|
||||||
|
- flink-sql-connector-mysql-cdc-3.0-SNAPSHOT.jar
|
||||||
|
- [flink-shaded-hadoop-2-uber-2.7.5-10.0.jar](https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.7.5-10.0/flink-shaded-hadoop-2-uber-2.7.5-10.0.jar)
|
||||||
|
- [iceberg-flink-runtime-1.16-1.3.1.jar](https://repo.maven.apache.org/maven2/org/apache/iceberg/iceberg-flink-runtime-1.16/1.3.1/iceberg-flink-runtime-1.16-1.3.1.jar)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Starting components required
|
||||||
|
The components required in this tutorial are all managed in containers, so we will use `docker-compose` to start them.
|
||||||
|
|
||||||
|
1. Create `Dockerfile` file using following contents:
|
||||||
|
```dockerfile
|
||||||
|
FROM flink:1.16.0-scala_2.12
|
||||||
|
# Place the downloaded jar packages in the lib directory at the same level.
|
||||||
|
COPY ./lib /opt/flink/lib
|
||||||
|
RUN apt-get update && apt-get install tree
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Create `docker-compose.yml` file using following contents:
|
||||||
|
```yml
|
||||||
|
version: '2.1'
|
||||||
|
services:
|
||||||
|
sql-client:
|
||||||
|
user: flink:flink
|
||||||
|
build: .
|
||||||
|
command: bin/sql-client.sh
|
||||||
|
depends_on:
|
||||||
|
- jobmanager
|
||||||
|
- mysql
|
||||||
|
environment:
|
||||||
|
- MYSQL_HOST=mysql
|
||||||
|
- |
|
||||||
|
FLINK_PROPERTIES=
|
||||||
|
jobmanager.rpc.address: jobmanager
|
||||||
|
rest.address: jobmanager
|
||||||
|
volumes:
|
||||||
|
- shared-tmpfs:/tmp/iceberg
|
||||||
|
jobmanager:
|
||||||
|
user: flink:flink
|
||||||
|
build: .
|
||||||
|
ports:
|
||||||
|
- "8081:8081"
|
||||||
|
command: jobmanager
|
||||||
|
environment:
|
||||||
|
- |
|
||||||
|
FLINK_PROPERTIES=
|
||||||
|
jobmanager.rpc.address: jobmanager
|
||||||
|
volumes:
|
||||||
|
- shared-tmpfs:/tmp/iceberg
|
||||||
|
taskmanager:
|
||||||
|
user: flink:flink
|
||||||
|
build: .
|
||||||
|
depends_on:
|
||||||
|
- jobmanager
|
||||||
|
command: taskmanager
|
||||||
|
environment:
|
||||||
|
- |
|
||||||
|
FLINK_PROPERTIES=
|
||||||
|
jobmanager.rpc.address: jobmanager
|
||||||
|
taskmanager.numberOfTaskSlots: 2
|
||||||
|
volumes:
|
||||||
|
- shared-tmpfs:/tmp/iceberg
|
||||||
|
mysql:
|
||||||
|
image: debezium/example-mysql:1.1
|
||||||
|
ports:
|
||||||
|
- "3306:3306"
|
||||||
|
environment:
|
||||||
|
- MYSQL_ROOT_PASSWORD=123456
|
||||||
|
- MYSQL_USER=mysqluser
|
||||||
|
- MYSQL_PASSWORD=mysqlpw
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
shared-tmpfs:
|
||||||
|
driver: local
|
||||||
|
driver_opts:
|
||||||
|
type: "tmpfs"
|
||||||
|
device: "tmpfs"
|
||||||
|
```
|
||||||
|
|
||||||
|
The Docker Compose environment consists of the following containers:
|
||||||
|
- SQL-Client: Flink SQL Client, used to submit queries and visualize their results.
|
||||||
|
- Flink Cluster: a Flink JobManager and a Flink TaskManager container to execute queries.
|
||||||
|
- MySQL: mainly used as a data source to store the sharding table.
|
||||||
|
|
||||||
|
3. To start all containers, run the following command in the directory that contains the `docker-compose.yml` file:
|
||||||
|
```shell
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
This command automatically starts all the containers defined in the Docker Compose configuration in a detached mode. Run `docker ps` to check whether these containers are running properly.
|
||||||
|
We can also visit [http://localhost:8081/](http://localhost:8081/) to see if Flink is running normally.
|
||||||
|
|
||||||
|
|
||||||
|
***Note:***
|
||||||
|
* If you want to run with your own Flink environment, remember to download the jar packages and then put them to `FLINK_HOME/lib/`.
|
||||||
|
* All the following commands involving `docker-compose` should be executed in the directory of the `docker-compose.yml` file.
|
||||||
|
|
||||||
|
{{< img src="/fig/real-time-data-lake-tutorial/flink-ui.png" alt="Flink UI" >}}
|
||||||
|
|
||||||
|
### Preparing data in databases
|
||||||
|
1. Enter mysql's container:
|
||||||
|
```shell
|
||||||
|
docker-compose exec mysql mysql -uroot -p123456
|
||||||
|
```
|
||||||
|
2. Create databases/tables and populate data:
|
||||||
|
|
||||||
|
Create a logical sharding table `user` sharded in different databases and tables physically.
|
||||||
|
```sql
|
||||||
|
CREATE DATABASE db_1;
|
||||||
|
USE db_1;
|
||||||
|
CREATE TABLE user_1 (
|
||||||
|
id INTEGER NOT NULL PRIMARY KEY,
|
||||||
|
name VARCHAR(255) NOT NULL DEFAULT 'flink',
|
||||||
|
address VARCHAR(1024),
|
||||||
|
phone_number VARCHAR(512),
|
||||||
|
email VARCHAR(255)
|
||||||
|
);
|
||||||
|
INSERT INTO user_1 VALUES (110,"user_110","Shanghai","123567891234","user_110@foo.com");
|
||||||
|
|
||||||
|
CREATE TABLE user_2 (
|
||||||
|
id INTEGER NOT NULL PRIMARY KEY,
|
||||||
|
name VARCHAR(255) NOT NULL DEFAULT 'flink',
|
||||||
|
address VARCHAR(1024),
|
||||||
|
phone_number VARCHAR(512),
|
||||||
|
email VARCHAR(255)
|
||||||
|
);
|
||||||
|
INSERT INTO user_2 VALUES (120,"user_120","Shanghai","123567891234","user_120@foo.com");
|
||||||
|
```
|
||||||
|
```sql
|
||||||
|
CREATE DATABASE db_2;
|
||||||
|
USE db_2;
|
||||||
|
CREATE TABLE user_1 (
|
||||||
|
id INTEGER NOT NULL PRIMARY KEY,
|
||||||
|
name VARCHAR(255) NOT NULL DEFAULT 'flink',
|
||||||
|
address VARCHAR(1024),
|
||||||
|
phone_number VARCHAR(512),
|
||||||
|
email VARCHAR(255)
|
||||||
|
);
|
||||||
|
INSERT INTO user_1 VALUES (110,"user_110","Shanghai","123567891234", NULL);
|
||||||
|
|
||||||
|
CREATE TABLE user_2 (
|
||||||
|
id INTEGER NOT NULL PRIMARY KEY,
|
||||||
|
name VARCHAR(255) NOT NULL DEFAULT 'flink',
|
||||||
|
address VARCHAR(1024),
|
||||||
|
phone_number VARCHAR(512),
|
||||||
|
email VARCHAR(255)
|
||||||
|
);
|
||||||
|
INSERT INTO user_2 VALUES (220,"user_220","Shanghai","123567891234","user_220@foo.com");
|
||||||
|
```
|
||||||
|
|
||||||
|
## Creating tables using Flink DDL in Flink SQL CLI
|
||||||
|
First, use the following command to enter the Flink SQL CLI Container:
|
||||||
|
```shell
|
||||||
|
docker-compose run sql-client
|
||||||
|
```
|
||||||
|
|
||||||
|
We should see the welcome screen of the CLI client:
|
||||||
|
|
||||||
|
{{< img src="/fig/real-time-data-lake-tutorial/flink-sql-client.png" alt="Flink SQL Client" >}}
|
||||||
|
|
||||||
|
Then do the following steps in Flink SQL CLI:
|
||||||
|
|
||||||
|
1. Enable checkpoints every 3 seconds
|
||||||
|
|
||||||
|
Checkpoint is disabled by default, we need to enable it to commit Iceberg transactions.
|
||||||
|
Besides, the beginning of mysql-cdc binlog phase also requires waiting a complete checkpoint to avoid disorder of binlog records.
|
||||||
|
```sql
|
||||||
|
-- Flink SQL
|
||||||
|
Flink SQL> SET execution.checkpointing.interval = 3s;
|
||||||
|
```
|
||||||
|
2. Create MySQL sharding source table
|
||||||
|
|
||||||
|
Create a source table that captures the data from the logical sharding table `user`. Here, we use regex to match all the physical tables.
|
||||||
|
Besides, the table defines metadata column to identify which database/table the record comes from.
|
||||||
|
```sql
|
||||||
|
-- Flink SQL
|
||||||
|
Flink SQL> CREATE TABLE user_source (
|
||||||
|
database_name STRING METADATA VIRTUAL,
|
||||||
|
table_name STRING METADATA VIRTUAL,
|
||||||
|
`id` DECIMAL(20, 0) NOT NULL,
|
||||||
|
name STRING,
|
||||||
|
address STRING,
|
||||||
|
phone_number STRING,
|
||||||
|
email STRING,
|
||||||
|
PRIMARY KEY (`id`) NOT ENFORCED
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'mysql-cdc',
|
||||||
|
'hostname' = 'mysql',
|
||||||
|
'port' = '3306',
|
||||||
|
'username' = 'root',
|
||||||
|
'password' = '123456',
|
||||||
|
'database-name' = 'db_[0-9]+',
|
||||||
|
'table-name' = 'user_[0-9]+'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
3. Create Iceberg sink table
|
||||||
|
|
||||||
|
Create a sink table `all_users_sink` used to load data to Iceberg.
|
||||||
|
We define `database_name`, `table_name` and `id` as a combined primary key, because `id` maybe not unique across different databases and tables.
|
||||||
|
```sql
|
||||||
|
-- Flink SQL
|
||||||
|
Flink SQL> CREATE TABLE all_users_sink (
|
||||||
|
database_name STRING,
|
||||||
|
table_name STRING,
|
||||||
|
`id` DECIMAL(20, 0) NOT NULL,
|
||||||
|
name STRING,
|
||||||
|
address STRING,
|
||||||
|
phone_number STRING,
|
||||||
|
email STRING,
|
||||||
|
PRIMARY KEY (database_name, table_name, `id`) NOT ENFORCED
|
||||||
|
) WITH (
|
||||||
|
'connector'='iceberg',
|
||||||
|
'catalog-name'='iceberg_catalog',
|
||||||
|
'catalog-type'='hadoop',
|
||||||
|
'warehouse'='file:///tmp/iceberg/warehouse',
|
||||||
|
'format-version'='2'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Streaming to Iceberg
|
||||||
|
1. Streaming write data from MySQL to Iceberg using the following Flink SQL:
|
||||||
|
```sql
|
||||||
|
-- Flink SQL
|
||||||
|
Flink SQL> INSERT INTO all_users_sink select * from user_source;
|
||||||
|
```
|
||||||
|
It will start a streaming job which will synchronize historical and incremental data from MySQL to Iceberg continuously.
|
||||||
|
The running job can be found in [Flink UI](http://localhost:8081/#/job/running), and it looks like:
|
||||||
|
|
||||||
|
|
||||||
|
{{< img src="/fig/real-time-data-lake-tutorial/flink-cdc-iceberg-running-job.png" alt="CDC to Iceberg Running Job" >}}
|
||||||
|
|
||||||
|
Then, we can use the following command to see the files written to Iceberg:
|
||||||
|
```shell
|
||||||
|
docker-compose exec sql-client tree /tmp/iceberg/warehouse/default_database/
|
||||||
|
```
|
||||||
|
It should look like:
|
||||||
|
|
||||||
|
{{< img src="/fig/real-time-data-lake-tutorial/files-in-iceberg.png" alt="Files in Iceberg" >}}
|
||||||
|
|
||||||
|
The actual files may differ in your environment, but the structure of the directory should be similar.
|
||||||
|
|
||||||
|
2. Use the following Flink SQL to query the data written to `all_users_sink`:
|
||||||
|
```sql
|
||||||
|
-- Flink SQL
|
||||||
|
Flink SQL> SELECT * FROM all_users_sink;
|
||||||
|
```
|
||||||
|
We can see the data queried in the Flink SQL CLI:
|
||||||
|
|
||||||
|
{{< img src="/fig/real-time-data-lake-tutorial/data_in_iceberg.png" alt="Data in Iceberg" >}}
|
||||||
|
|
||||||
|
3. Make some changes in the MySQL databases, and then the data in Iceberg table `all_users_sink` will also change in real time.
|
||||||
|
|
||||||
|
(3.1) Insert a new user in table `db_1.user_1`
|
||||||
|
```sql
|
||||||
|
--- db_1
|
||||||
|
INSERT INTO db_1.user_1 VALUES (111,"user_111","Shanghai","123567891234","user_111@foo.com");
|
||||||
|
```
|
||||||
|
|
||||||
|
(3.2) Update a user in table `db_1.user_2`
|
||||||
|
```sql
|
||||||
|
--- db_1
|
||||||
|
UPDATE db_1.user_2 SET address='Beijing' WHERE id=120;
|
||||||
|
```
|
||||||
|
|
||||||
|
(3.3) Delete a user in table `db_2.user_2`
|
||||||
|
```sql
|
||||||
|
--- db_2
|
||||||
|
DELETE FROM db_2.user_2 WHERE id=220;
|
||||||
|
```
|
||||||
|
|
||||||
|
After executing each step, we can query the table `all_users_sink` using `SELECT * FROM all_users_sink` in Flink SQL CLI to see the changes.
|
||||||
|
|
||||||
|
The final query result is as follows:
|
||||||
|
|
||||||
|
{{< img src="/fig/real-time-data-lake-tutorial/final-data-in-iceberg.png" alt="Final Data in Iceberg" >}}
|
||||||
|
|
||||||
|
From the latest result in Iceberg, we can see that there is a new record of `(db_1, user_1, 111)`, and the address of `(db_1, user_2, 120)` has been updated to `Beijing`.
|
||||||
|
Besides, the record of `(db_2, user_2, 220)` has been deleted. The result is exactly the same with the changes we did in MySQL.
|
||||||
|
|
||||||
|
## Clean up
|
||||||
|
After finishing the tutorial, run the following command in the directory of `docker-compose.yml` to stop all containers:
|
||||||
|
```shell
|
||||||
|
docker-compose down
|
||||||
|
```
|
||||||
|
|
||||||
|
{{< top >}}
|
@ -0,0 +1,289 @@
|
|||||||
|
---
|
||||||
|
title: "PolarDB-X Tutorial"
|
||||||
|
weight: 5
|
||||||
|
type: docs
|
||||||
|
aliases:
|
||||||
|
- /try-flink-cdc/cdc-connectors/mongodb-tutorial.html
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
# Demo: PolarDB-X CDC to Elasticsearch
|
||||||
|
|
||||||
|
This tutorial is to show how to quickly build streaming ETL for PolarDB-X with Flink CDC.
|
||||||
|
|
||||||
|
Assuming we are running an e-commerce business. The product and order data stored in PolarDB-X.
|
||||||
|
We want to enrich the orders using the product table, and then load the enriched orders to ElasticSearch in real time.
|
||||||
|
|
||||||
|
In the following sections, we will describe how to use Flink PolarDB-X CDC to implement it.
|
||||||
|
All exercises in this tutorial are performed in the Flink SQL CLI, and the entire process uses standard SQL syntax, without a single line of Java/Scala code or IDE installation.
|
||||||
|
|
||||||
|
## Preparation
|
||||||
|
Prepare a Linux or MacOS computer with Docker installed.
|
||||||
|
|
||||||
|
### Starting components required
|
||||||
|
The components required in this demo are all managed in containers, so we will use `docker-compose` to start them.
|
||||||
|
|
||||||
|
Create `docker-compose.yml` file using following contents:
|
||||||
|
```
|
||||||
|
version: '2.1'
|
||||||
|
services:
|
||||||
|
polardbx:
|
||||||
|
polardbx:
|
||||||
|
image: polardbx/polardb-x:2.0.1
|
||||||
|
container_name: polardbx
|
||||||
|
ports:
|
||||||
|
- "8527:8527"
|
||||||
|
elasticsearch:
|
||||||
|
image: 'elastic/elasticsearch:7.6.0'
|
||||||
|
container_name: elasticsearch
|
||||||
|
environment:
|
||||||
|
- cluster.name=docker-cluster
|
||||||
|
- bootstrap.memory_lock=true
|
||||||
|
- ES_JAVA_OPTS=-Xms512m -Xmx512m
|
||||||
|
- discovery.type=single-node
|
||||||
|
ports:
|
||||||
|
- '9200:9200'
|
||||||
|
- '9300:9300'
|
||||||
|
ulimits:
|
||||||
|
memlock:
|
||||||
|
soft: -1
|
||||||
|
hard: -1
|
||||||
|
nofile:
|
||||||
|
soft: 65536
|
||||||
|
hard: 65536
|
||||||
|
kibana:
|
||||||
|
image: 'elastic/kibana:7.6.0'
|
||||||
|
container_name: kibana
|
||||||
|
ports:
|
||||||
|
- '5601:5601'
|
||||||
|
volumes:
|
||||||
|
- '/var/run/docker.sock:/var/run/docker.sock'
|
||||||
|
```
|
||||||
|
The Docker Compose environment consists of the following containers:
|
||||||
|
- PolarDB-X: the `products`,`orders` tables will be store in the database. They will be joined enrich the orders.
|
||||||
|
- Elasticsearch: mainly used as a data sink to store enriched orders.
|
||||||
|
- Kibana: used to visualize the data in Elasticsearch.
|
||||||
|
|
||||||
|
To start all containers, run the following command in the directory that contains the `docker-compose.yml` file.
|
||||||
|
```shell
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
This command automatically starts all the containers defined in the Docker Compose configuration in a detached mode. Run docker ps to check whether these containers are running properly.
|
||||||
|
We can also visit [http://localhost:5601/](http://localhost:5601/) to see if Kibana is running normally.
|
||||||
|
|
||||||
|
### Preparing Flink and JAR package required
|
||||||
|
1. Download [Flink 1.18.0](https://archive.apache.org/dist/flink/flink-1.18.0/flink-1.18.0-bin-scala_2.12.tgz) and unzip it to the directory `flink-1.18.0`
|
||||||
|
2. Download following JAR package required and put them under `flink-1.18.0/lib/`:
|
||||||
|
|
||||||
|
**Download links are available only for stable releases, SNAPSHOT dependencies need to be built based on master or release branches by yourself.**
|
||||||
|
- flink-sql-connector-mysql-cdc-3.0-SNAPSHOT.jar
|
||||||
|
- [flink-sql-connector-elasticsearch7-3.0.1-1.17.jar](https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch7/3.0.1-1.17/flink-sql-connector-elasticsearch7-3.0.1-1.17.jar)
|
||||||
|
|
||||||
|
### Preparing data in databases
|
||||||
|
#### Preparing data in PolarDB-X
|
||||||
|
1. Enter PolarDB-X Database:
|
||||||
|
```shell
|
||||||
|
mysql -h127.0.0.1 -P8527 -upolardbx_root -p"123456"
|
||||||
|
```
|
||||||
|
2. Create tables and populate data:
|
||||||
|
```sql
|
||||||
|
-- PolarDB-X
|
||||||
|
CREATE TABLE products (
|
||||||
|
id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
name VARCHAR(255) NOT NULL,
|
||||||
|
description VARCHAR(512)
|
||||||
|
) AUTO_INCREMENT = 101;
|
||||||
|
|
||||||
|
INSERT INTO products
|
||||||
|
VALUES (default,"scooter","Small 2-wheel scooter"),
|
||||||
|
(default,"car battery","12V car battery"),
|
||||||
|
(default,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3"),
|
||||||
|
(default,"hammer","12oz carpenter's hammer"),
|
||||||
|
(default,"hammer","14oz carpenter's hammer"),
|
||||||
|
(default,"hammer","16oz carpenter's hammer"),
|
||||||
|
(default,"rocks","box of assorted rocks"),
|
||||||
|
(default,"jacket","water resistent black wind breaker"),
|
||||||
|
(default,"spare tire","24 inch spare tire");
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE orders (
|
||||||
|
order_id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
order_date DATETIME NOT NULL,
|
||||||
|
customer_name VARCHAR(255) NOT NULL,
|
||||||
|
price DECIMAL(10, 5) NOT NULL,
|
||||||
|
product_id INTEGER NOT NULL,
|
||||||
|
order_status BOOLEAN NOT NULL -- Whether order has been placed
|
||||||
|
) AUTO_INCREMENT = 10001;
|
||||||
|
|
||||||
|
INSERT INTO orders
|
||||||
|
VALUES (default, '2020-07-30 10:08:22', 'Jark', 50.50, 102, false),
|
||||||
|
(default, '2020-07-30 10:11:09', 'Sally', 15.00, 105, false),
|
||||||
|
(default, '2020-07-30 12:00:30', 'Edward', 25.25, 106, false);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Starting Flink cluster and Flink SQL CLI
|
||||||
|
|
||||||
|
1. Use the following command to change to the Flink directory:
|
||||||
|
```
|
||||||
|
cd flink-1.18.0
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Use the following command to start a Flink cluster:
|
||||||
|
```shell
|
||||||
|
./bin/start-cluster.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Then we can visit [http://localhost:8081/](http://localhost:8081/) to see if Flink is running normally, and the web page looks like:
|
||||||
|
|
||||||
|
{{< img src="/fig/mysql-postgres-tutorial/flink-ui.png" alt="Flink UI" >}}
|
||||||
|
|
||||||
|
3. Use the following command to start a Flink SQL CLI:
|
||||||
|
```shell
|
||||||
|
./bin/sql-client.sh
|
||||||
|
```
|
||||||
|
We should see the welcome screen of the CLI client.
|
||||||
|
|
||||||
|
{{< img src="/fig/mysql-postgres-tutorial/flink-sql-client.png" alt="Flink SQL Client" >}}
|
||||||
|
|
||||||
|
## Creating tables using Flink DDL in Flink SQL CLI
|
||||||
|
First, enable checkpoints every 3 seconds
|
||||||
|
```sql
|
||||||
|
-- Flink SQL
|
||||||
|
Flink SQL> SET execution.checkpointing.interval = 3s;
|
||||||
|
```
|
||||||
|
|
||||||
|
Then, create tables that capture the change data from the corresponding database tables.
|
||||||
|
```sql
|
||||||
|
-- Flink SQL
|
||||||
|
Flink SQL> SET execution.checkpointing.interval = 3s;
|
||||||
|
|
||||||
|
-- create source table2 - orders
|
||||||
|
Flink SQL> CREATE TABLE orders (
|
||||||
|
order_id INT,
|
||||||
|
order_date TIMESTAMP(0),
|
||||||
|
customer_name STRING,
|
||||||
|
price DECIMAL(10, 5),
|
||||||
|
product_id INT,
|
||||||
|
order_status BOOLEAN,
|
||||||
|
PRIMARY KEY (order_id) NOT ENFORCED
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'mysql-cdc',
|
||||||
|
'hostname' = '127.0.0.1',
|
||||||
|
'port' = '8527',
|
||||||
|
'username' = 'polardbx_root',
|
||||||
|
'password' = '123456',
|
||||||
|
'database-name' = 'mydb',
|
||||||
|
'table-name' = 'orders'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- create source table2 - products
|
||||||
|
CREATE TABLE products (
|
||||||
|
id INT,
|
||||||
|
name STRING,
|
||||||
|
description STRING,
|
||||||
|
PRIMARY KEY (id) NOT ENFORCED
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'mysql-cdc',
|
||||||
|
'hostname' = '127.0.0.1',
|
||||||
|
'port' = '8527',
|
||||||
|
'username' = 'polardbx_root',
|
||||||
|
'password' = '123456',
|
||||||
|
'database-name' = 'mydb',
|
||||||
|
'table-name' = 'products'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, create `enriched_orders` table that is used to load data to the Elasticsearch.
|
||||||
|
```sql
|
||||||
|
-- Flink SQL
|
||||||
|
-- create sink table - enrich_orders
|
||||||
|
Flink SQL> CREATE TABLE enriched_orders (
|
||||||
|
order_id INT,
|
||||||
|
order_date TIMESTAMP(0),
|
||||||
|
customer_name STRING,
|
||||||
|
price DECIMAL(10, 5),
|
||||||
|
product_id INT,
|
||||||
|
order_status BOOLEAN,
|
||||||
|
product_name STRING,
|
||||||
|
product_description STRING,
|
||||||
|
PRIMARY KEY (order_id) NOT ENFORCED
|
||||||
|
) WITH (
|
||||||
|
'connector' = 'elasticsearch-7',
|
||||||
|
'hosts' = 'http://localhost:9200',
|
||||||
|
'index' = 'enriched_orders'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Enriching orders and load to ElasticSearch
|
||||||
|
Use Flink SQL to join the `order` table with the `products` table to enrich orders and write to the Elasticsearch.
|
||||||
|
```sql
|
||||||
|
-- Flink SQL
|
||||||
|
Flink SQL> INSERT INTO enriched_orders
|
||||||
|
SELECT o.order_id,
|
||||||
|
o.order_date,
|
||||||
|
o.customer_name,
|
||||||
|
o.price,
|
||||||
|
o.product_id,
|
||||||
|
o.order_status,
|
||||||
|
p.name,
|
||||||
|
p.description
|
||||||
|
FROM orders AS o
|
||||||
|
LEFT JOIN products AS p ON o.product_id = p.id;
|
||||||
|
```
|
||||||
|
Now, the enriched orders should be shown in Kibana.
|
||||||
|
Visit [http://localhost:5601/app/kibana#/management/kibana/index_pattern](http://localhost:5601/app/kibana#/management/kibana/index_pattern) to create an index pattern `enriched_orders`.
|
||||||
|
|
||||||
|
{{< img src="/fig/mysql-postgres-tutorial/kibana-create-index-pattern.png" alt="Create Index Pattern" >}}
|
||||||
|
|
||||||
|
Visit [http://localhost:5601/app/kibana#/discover](http://localhost:5601/app/kibana#/discover) to find the enriched orders.
|
||||||
|
|
||||||
|
{{< img src="/fig/mysql-postgres-tutorial/kibana-detailed-orders.png" alt="Find enriched Orders" >}}
|
||||||
|
|
||||||
|
Next, do some change in the databases, and then the enriched orders shown in Kibana will be updated after each step in real time.
|
||||||
|
1. Insert a new order in PolarDB-X
|
||||||
|
```sql
|
||||||
|
--PolarDB-X
|
||||||
|
INSERT INTO orders
|
||||||
|
VALUES (default, '2020-07-30 15:22:00', 'Jark', 29.71, 104, false);
|
||||||
|
```
|
||||||
|
2. Update the order status in PolarDB-X
|
||||||
|
```sql
|
||||||
|
--PolarDB-X
|
||||||
|
UPDATE orders SET order_status = true WHERE order_id = 10004;
|
||||||
|
```
|
||||||
|
3. Delete the order in PolarDB-X
|
||||||
|
```sql
|
||||||
|
--PolarDB-X
|
||||||
|
DELETE FROM orders WHERE order_id = 10004;
|
||||||
|
```
|
||||||
|
The changes of enriched orders in Kibana are as follows:
|
||||||
|
{{< img src="/fig/mysql-postgres-tutorial/kibana-detailed-orders-changes.gif" alt="Enriched Orders Changes" >}}
|
||||||
|
|
||||||
|
## Clean up
|
||||||
|
After finishing the tutorial, run the following command to stop all containers in the directory of `docker-compose.yml`:
|
||||||
|
```shell
|
||||||
|
docker-compose down
|
||||||
|
```
|
||||||
|
Run the following command to stop the Flink cluster in the directory of Flink `flink-1.18.0`:
|
||||||
|
```shell
|
||||||
|
./bin/stop-cluster.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
{{< top >}}
|
@ -0,0 +1,25 @@
|
|||||||
|
---
|
||||||
|
title: Pipeline Connectors
|
||||||
|
bookCollapseSection: true
|
||||||
|
weight: 1
|
||||||
|
aliases:
|
||||||
|
- /try-flink-cdc/pipeline-connectors/
|
||||||
|
---
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue