[docs] Add checkpoint and primary key setting for example in tutorials

pull/365/head
frey66 4 years ago committed by GitHub
parent 492fa4f437
commit ce4c0f7f39
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -136,19 +136,22 @@ VALUES (default,10001,'Beijing','Shanghai',false),
4. 下载以下 jar 包到 `<FLINK_HOME>/lib/`: 4. 下载以下 jar 包到 `<FLINK_HOME>/lib/`:
- [flink-sql-connector-elasticsearch7_2.11-1.11.1.jar](https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch7_2.11/1.11.1/flink-sql-connector-elasticsearch7_2.11-1.11.1.jar) - [flink-sql-connector-elasticsearch7_2.11-1.13.2.jar](https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch7_2.11/1.13.2/flink-sql-connector-elasticsearch7_2.11-1.13.2.jar)
- [flink-sql-connector-mysql-cdc-1.0.0.jar](https://repo1.maven.org/maven2/com/alibaba/ververica/flink-sql-connector-mysql-cdc/1.0.0/flink-sql-connector-mysql-cdc-1.0.0.jar) - [flink-sql-connector-mysql-cdc-2.0.0.jar](https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-mysql-cdc/2.0.0/flink-sql-connector-mysql-cdc-2.0.0.jar)
- [flink-sql-connector-postgres-cdc-1.0.0.jar](https://repo1.maven.org/maven2/com/alibaba/ververica/flink-sql-connector-postgres-cdc/1.0.0/flink-sql-connector-postgres-cdc-1.0.0.jar) - [flink-sql-connector-postgres-cdc-2.0.0.jar](https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-postgres-cdc/2.0.0/flink-sql-connector-postgres-cdc-2.0.0.jar)
5. 然后启动 Flink 集群,再启动 SQL CLI. 5. 然后启动 Flink 集群,再启动 SQL CLI.
```sql ```sql
--FlinkSQL --Flink SQL
CREATE TABLE products ( -- 设置 checkpoint 间隔为 3 秒
Flink SQL> SET 'execution.checkpointing.interval' = '3s';
Flink SQL> CREATE TABLE products (
id INT, id INT,
name STRING, name STRING,
description STRING description STRING,
) WITH ( PRIMARY KEY (id) NOT ENFORCED
) WITH (
'connector' = 'mysql-cdc', 'connector' = 'mysql-cdc',
'hostname' = 'localhost', 'hostname' = 'localhost',
'port' = '3306', 'port' = '3306',
@ -156,16 +159,17 @@ CREATE TABLE products (
'password' = '123456', 'password' = '123456',
'database-name' = 'mydb', 'database-name' = 'mydb',
'table-name' = 'products' 'table-name' = 'products'
); );
CREATE TABLE orders ( Flink SQL> CREATE TABLE orders (
order_id INT, order_id INT,
order_date TIMESTAMP(0), order_date TIMESTAMP(0),
customer_name STRING, customer_name STRING,
price DECIMAL(10, 5), price DECIMAL(10, 5),
product_id INT, product_id INT,
order_status BOOLEAN order_status BOOLEAN,
) WITH ( PRIMARY KEY (order_id) NOT ENFORCED
) WITH (
'connector' = 'mysql-cdc', 'connector' = 'mysql-cdc',
'hostname' = 'localhost', 'hostname' = 'localhost',
'port' = '3306', 'port' = '3306',
@ -173,15 +177,16 @@ CREATE TABLE orders (
'password' = '123456', 'password' = '123456',
'database-name' = 'mydb', 'database-name' = 'mydb',
'table-name' = 'orders' 'table-name' = 'orders'
); );
CREATE TABLE shipments ( Flink SQL> CREATE TABLE shipments (
shipment_id INT, shipment_id INT,
order_id INT, order_id INT,
origin STRING, origin STRING,
destination STRING, destination STRING,
is_arrived BOOLEAN is_arrived BOOLEAN,
) WITH ( PRIMARY KEY (shipment_id) NOT ENFORCED
) WITH (
'connector' = 'postgres-cdc', 'connector' = 'postgres-cdc',
'hostname' = 'localhost', 'hostname' = 'localhost',
'port' = '5432', 'port' = '5432',
@ -190,9 +195,9 @@ CREATE TABLE shipments (
'database-name' = 'postgres', 'database-name' = 'postgres',
'schema-name' = 'public', 'schema-name' = 'public',
'table-name' = 'shipments' 'table-name' = 'shipments'
); );
CREATE TABLE enriched_orders ( Flink SQL> CREATE TABLE enriched_orders (
order_id INT, order_id INT,
order_date TIMESTAMP(0), order_date TIMESTAMP(0),
customer_name STRING, customer_name STRING,
@ -206,17 +211,17 @@ CREATE TABLE enriched_orders (
destination STRING, destination STRING,
is_arrived BOOLEAN, is_arrived BOOLEAN,
PRIMARY KEY (order_id) NOT ENFORCED PRIMARY KEY (order_id) NOT ENFORCED
) WITH ( ) WITH (
'connector' = 'elasticsearch-7', 'connector' = 'elasticsearch-7',
'hosts' = 'http://localhost:9200', 'hosts' = 'http://localhost:9200',
'index' = 'enriched_orders' 'index' = 'enriched_orders'
); );
INSERT INTO enriched_orders Flink SQL> INSERT INTO enriched_orders
SELECT o.*, p.name, p.description, s.shipment_id, s.origin, s.destination, s.is_arrived SELECT o.*, p.name, p.description, s.shipment_id, s.origin, s.destination, s.is_arrived
FROM orders AS o FROM orders AS o
LEFT JOIN products AS p ON o.product_id = p.id LEFT JOIN products AS p ON o.product_id = p.id
LEFT JOIN shipments AS s ON o.order_id = s.order_id; LEFT JOIN shipments AS s ON o.order_id = s.order_id;
``` ```
6. 修改 mysql 和 postgres 里面的数据,观察 elasticsearch 里的结果。 6. 修改 mysql 和 postgres 里面的数据,观察 elasticsearch 里的结果。
@ -246,25 +251,25 @@ DELETE FROM orders WHERE order_id = 10004;
```sql ```sql
--Flink SQL --Flink SQL
CREATE TABLE kafka_gmv ( Flink SQL> CREATE TABLE kafka_gmv (
day_str STRING, day_str STRING,
gmv DECIMAL(10, 5) gmv DECIMAL(10, 5)
) WITH ( ) WITH (
'connector' = 'kafka', 'connector' = 'kafka',
'topic' = 'kafka_gmv', 'topic' = 'kafka_gmv',
'scan.startup.mode' = 'earliest-offset', 'scan.startup.mode' = 'earliest-offset',
'properties.bootstrap.servers' = 'localhost:9092', 'properties.bootstrap.servers' = 'localhost:9092',
'format' = 'changelog-json' 'format' = 'changelog-json'
); );
INSERT INTO kafka_gmv Flink SQL> INSERT INTO kafka_gmv
SELECT DATE_FORMAT(order_date, 'yyyy-MM-dd') as day_str, SUM(price) as gmv SELECT DATE_FORMAT(order_date, 'yyyy-MM-dd') as day_str, SUM(price) as gmv
FROM orders FROM orders
WHERE order_status = true WHERE order_status = true
GROUP BY DATE_FORMAT(order_date, 'yyyy-MM-dd'); GROUP BY DATE_FORMAT(order_date, 'yyyy-MM-dd');
-- 读取 Kafka 的 changelog 数据,观察 materialize 后的结果 -- 读取 Kafka 的 changelog 数据,观察 materialize 后的结果
SELECT * FROM kafka_gmv; Flink SQL> SELECT * FROM kafka_gmv;
``` ```
观察 kafka 的输出: 观察 kafka 的输出:

@ -136,19 +136,22 @@ VALUES (default,10001,'Beijing','Shanghai',false),
4. Download following JAR package to `<FLINK_HOME>/lib/`: 4. Download following JAR package to `<FLINK_HOME>/lib/`:
- [flink-sql-connector-elasticsearch7_2.11-1.11.1.jar](https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch7_2.11/1.11.1/flink-sql-connector-elasticsearch7_2.11-1.11.1.jar) - [flink-sql-connector-elasticsearch7_2.11-1.13.2.jar](https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch7_2.11/1.13.2/flink-sql-connector-elasticsearch7_2.11-1.13.2.jar)
- [flink-sql-connector-mysql-cdc-1.0.0.jar](https://repo1.maven.org/maven2/com/alibaba/ververica/flink-sql-connector-mysql-cdc/1.0.0/flink-sql-connector-mysql-cdc-1.0.0.jar) - [flink-sql-connector-mysql-cdc-2.0.0.jar](https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-mysql-cdc/2.0.0/flink-sql-connector-mysql-cdc-2.0.0.jar)
- [flink-sql-connector-postgres-cdc-1.0.0.jar](https://repo1.maven.org/maven2/com/alibaba/ververica/flink-sql-connector-postgres-cdc/1.0.0/flink-sql-connector-postgres-cdc-1.0.0.jar) - [flink-sql-connector-postgres-cdc-2.0.0.jar](https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-postgres-cdc/2.0.0/flink-sql-connector-postgres-cdc-2.0.0.jar)
5. Launch a Flink cluster, then start a Flink SQL CLI and execute following SQL statements inside: 5. Launch a Flink cluster, then start a Flink SQL CLI and execute following SQL statements inside:
```sql ```sql
-- Flink SQL -- Flink SQL
CREATE TABLE products ( -- checkpoint every 3000 milliseconds
Flink SQL> SET 'execution.checkpointing.interval' = '3s';
Flink SQL> CREATE TABLE products (
id INT, id INT,
name STRING, name STRING,
description STRING description STRING,
) WITH ( PRIMARY KEY (id) NOT ENFORCED
) WITH (
'connector' = 'mysql-cdc', 'connector' = 'mysql-cdc',
'hostname' = 'localhost', 'hostname' = 'localhost',
'port' = '3306', 'port' = '3306',
@ -156,16 +159,17 @@ CREATE TABLE products (
'password' = '123456', 'password' = '123456',
'database-name' = 'mydb', 'database-name' = 'mydb',
'table-name' = 'products' 'table-name' = 'products'
); );
CREATE TABLE orders ( Flink SQL> CREATE TABLE orders (
order_id INT, order_id INT,
order_date TIMESTAMP(0), order_date TIMESTAMP(0),
customer_name STRING, customer_name STRING,
price DECIMAL(10, 5), price DECIMAL(10, 5),
product_id INT, product_id INT,
order_status BOOLEAN order_status BOOLEAN,
) WITH ( PRIMARY KEY (order_id) NOT ENFORCED
) WITH (
'connector' = 'mysql-cdc', 'connector' = 'mysql-cdc',
'hostname' = 'localhost', 'hostname' = 'localhost',
'port' = '3306', 'port' = '3306',
@ -173,15 +177,16 @@ CREATE TABLE orders (
'password' = '123456', 'password' = '123456',
'database-name' = 'mydb', 'database-name' = 'mydb',
'table-name' = 'orders' 'table-name' = 'orders'
); );
CREATE TABLE shipments ( Flink SQL> CREATE TABLE shipments (
shipment_id INT, shipment_id INT,
order_id INT, order_id INT,
origin STRING, origin STRING,
destination STRING, destination STRING,
is_arrived BOOLEAN is_arrived BOOLEAN,
) WITH ( PRIMARY KEY (shipment_id) NOT ENFORCED
) WITH (
'connector' = 'postgres-cdc', 'connector' = 'postgres-cdc',
'hostname' = 'localhost', 'hostname' = 'localhost',
'port' = '5432', 'port' = '5432',
@ -190,9 +195,9 @@ CREATE TABLE shipments (
'database-name' = 'postgres', 'database-name' = 'postgres',
'schema-name' = 'public', 'schema-name' = 'public',
'table-name' = 'shipments' 'table-name' = 'shipments'
); );
CREATE TABLE enriched_orders ( Flink SQL> CREATE TABLE enriched_orders (
order_id INT, order_id INT,
order_date TIMESTAMP(0), order_date TIMESTAMP(0),
customer_name STRING, customer_name STRING,
@ -206,17 +211,17 @@ CREATE TABLE enriched_orders (
destination STRING, destination STRING,
is_arrived BOOLEAN, is_arrived BOOLEAN,
PRIMARY KEY (order_id) NOT ENFORCED PRIMARY KEY (order_id) NOT ENFORCED
) WITH ( ) WITH (
'connector' = 'elasticsearch-7', 'connector' = 'elasticsearch-7',
'hosts' = 'http://localhost:9200', 'hosts' = 'http://localhost:9200',
'index' = 'enriched_orders' 'index' = 'enriched_orders'
); );
INSERT INTO enriched_orders Flink SQL> INSERT INTO enriched_orders
SELECT o.*, p.name, p.description, s.shipment_id, s.origin, s.destination, s.is_arrived SELECT o.*, p.name, p.description, s.shipment_id, s.origin, s.destination, s.is_arrived
FROM orders AS o FROM orders AS o
LEFT JOIN products AS p ON o.product_id = p.id LEFT JOIN products AS p ON o.product_id = p.id
LEFT JOIN shipments AS s ON o.order_id = s.order_id; LEFT JOIN shipments AS s ON o.order_id = s.order_id;
``` ```
6. Make some changes in MySQL and Postgres, then check the result in Elasticsearch: 6. Make some changes in MySQL and Postgres, then check the result in Elasticsearch:
@ -246,25 +251,25 @@ Execute following SQL in Flink SQL CLI:
```sql ```sql
-- Flink SQL -- Flink SQL
CREATE TABLE kafka_gmv ( Flink SQL> CREATE TABLE kafka_gmv (
day_str STRING, day_str STRING,
gmv DECIMAL(10, 5) gmv DECIMAL(10, 5)
) WITH ( ) WITH (
'connector' = 'kafka', 'connector' = 'kafka',
'topic' = 'kafka_gmv', 'topic' = 'kafka_gmv',
'scan.startup.mode' = 'earliest-offset', 'scan.startup.mode' = 'earliest-offset',
'properties.bootstrap.servers' = 'localhost:9092', 'properties.bootstrap.servers' = 'localhost:9092',
'format' = 'changelog-json' 'format' = 'changelog-json'
); );
INSERT INTO kafka_gmv Flink SQL> INSERT INTO kafka_gmv
SELECT DATE_FORMAT(order_date, 'yyyy-MM-dd') as day_str, SUM(price) as gmv SELECT DATE_FORMAT(order_date, 'yyyy-MM-dd') as day_str, SUM(price) as gmv
FROM orders FROM orders
WHERE order_status = true WHERE order_status = true
GROUP BY DATE_FORMAT(order_date, 'yyyy-MM-dd'); GROUP BY DATE_FORMAT(order_date, 'yyyy-MM-dd');
-- Consumer changelog data from Kafka, and check the result of materialized view: -- Consumer changelog data from Kafka, and check the result of materialized view:
SELECT * FROM kafka_gmv; Flink SQL> SELECT * FROM kafka_gmv;
``` ```
To consumer records in Kafka using `kafka-console-consumer`: To consumer records in Kafka using `kafka-console-consumer`:

Loading…
Cancel
Save