[docs] Add checkpoint and primary key setting for example in tutorials

pull/365/head
frey66 4 years ago committed by GitHub
parent 492fa4f437
commit ce4c0f7f39
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -136,87 +136,92 @@ VALUES (default,10001,'Beijing','Shanghai',false),
4. 下载以下 jar 包到 `<FLINK_HOME>/lib/`: 4. 下载以下 jar 包到 `<FLINK_HOME>/lib/`:
- [flink-sql-connector-elasticsearch7_2.11-1.11.1.jar](https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch7_2.11/1.11.1/flink-sql-connector-elasticsearch7_2.11-1.11.1.jar) - [flink-sql-connector-elasticsearch7_2.11-1.13.2.jar](https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch7_2.11/1.13.2/flink-sql-connector-elasticsearch7_2.11-1.13.2.jar)
- [flink-sql-connector-mysql-cdc-1.0.0.jar](https://repo1.maven.org/maven2/com/alibaba/ververica/flink-sql-connector-mysql-cdc/1.0.0/flink-sql-connector-mysql-cdc-1.0.0.jar) - [flink-sql-connector-mysql-cdc-2.0.0.jar](https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-mysql-cdc/2.0.0/flink-sql-connector-mysql-cdc-2.0.0.jar)
- [flink-sql-connector-postgres-cdc-1.0.0.jar](https://repo1.maven.org/maven2/com/alibaba/ververica/flink-sql-connector-postgres-cdc/1.0.0/flink-sql-connector-postgres-cdc-1.0.0.jar) - [flink-sql-connector-postgres-cdc-2.0.0.jar](https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-postgres-cdc/2.0.0/flink-sql-connector-postgres-cdc-2.0.0.jar)
5. 然后启动 Flink 集群,再启动 SQL CLI. 5. 然后启动 Flink 集群,再启动 SQL CLI.
```sql ```sql
--FlinkSQL --Flink SQL
CREATE TABLE products ( -- 设置 checkpoint 间隔为 3 秒
id INT, Flink SQL> SET 'execution.checkpointing.interval' = '3s';
name STRING, Flink SQL> CREATE TABLE products (
description STRING id INT,
) WITH ( name STRING,
'connector' = 'mysql-cdc', description STRING,
'hostname' = 'localhost', PRIMARY KEY (id) NOT ENFORCED
'port' = '3306', ) WITH (
'username' = 'root', 'connector' = 'mysql-cdc',
'password' = '123456', 'hostname' = 'localhost',
'database-name' = 'mydb', 'port' = '3306',
'table-name' = 'products' 'username' = 'root',
); 'password' = '123456',
'database-name' = 'mydb',
CREATE TABLE orders ( 'table-name' = 'products'
order_id INT, );
order_date TIMESTAMP(0),
customer_name STRING, Flink SQL> CREATE TABLE orders (
price DECIMAL(10, 5), order_id INT,
product_id INT, order_date TIMESTAMP(0),
order_status BOOLEAN customer_name STRING,
) WITH ( price DECIMAL(10, 5),
'connector' = 'mysql-cdc', product_id INT,
'hostname' = 'localhost', order_status BOOLEAN,
'port' = '3306', PRIMARY KEY (order_id) NOT ENFORCED
'username' = 'root', ) WITH (
'password' = '123456', 'connector' = 'mysql-cdc',
'database-name' = 'mydb', 'hostname' = 'localhost',
'table-name' = 'orders' 'port' = '3306',
); 'username' = 'root',
'password' = '123456',
CREATE TABLE shipments ( 'database-name' = 'mydb',
shipment_id INT, 'table-name' = 'orders'
order_id INT, );
origin STRING,
destination STRING, Flink SQL> CREATE TABLE shipments (
is_arrived BOOLEAN shipment_id INT,
) WITH ( order_id INT,
'connector' = 'postgres-cdc', origin STRING,
'hostname' = 'localhost', destination STRING,
'port' = '5432', is_arrived BOOLEAN,
'username' = 'postgres', PRIMARY KEY (shipment_id) NOT ENFORCED
'password' = 'postgres', ) WITH (
'database-name' = 'postgres', 'connector' = 'postgres-cdc',
'schema-name' = 'public', 'hostname' = 'localhost',
'table-name' = 'shipments' 'port' = '5432',
); 'username' = 'postgres',
'password' = 'postgres',
CREATE TABLE enriched_orders ( 'database-name' = 'postgres',
order_id INT, 'schema-name' = 'public',
order_date TIMESTAMP(0), 'table-name' = 'shipments'
customer_name STRING, );
price DECIMAL(10, 5),
product_id INT, Flink SQL> CREATE TABLE enriched_orders (
order_status BOOLEAN, order_id INT,
product_name STRING, order_date TIMESTAMP(0),
product_description STRING, customer_name STRING,
shipment_id INT, price DECIMAL(10, 5),
origin STRING, product_id INT,
destination STRING, order_status BOOLEAN,
is_arrived BOOLEAN, product_name STRING,
PRIMARY KEY (order_id) NOT ENFORCED product_description STRING,
) WITH ( shipment_id INT,
'connector' = 'elasticsearch-7', origin STRING,
'hosts' = 'http://localhost:9200', destination STRING,
'index' = 'enriched_orders' is_arrived BOOLEAN,
); PRIMARY KEY (order_id) NOT ENFORCED
) WITH (
INSERT INTO enriched_orders 'connector' = 'elasticsearch-7',
SELECT o.*, p.name, p.description, s.shipment_id, s.origin, s.destination, s.is_arrived 'hosts' = 'http://localhost:9200',
FROM orders AS o 'index' = 'enriched_orders'
LEFT JOIN products AS p ON o.product_id = p.id );
LEFT JOIN shipments AS s ON o.order_id = s.order_id;
Flink SQL> INSERT INTO enriched_orders
SELECT o.*, p.name, p.description, s.shipment_id, s.origin, s.destination, s.is_arrived
FROM orders AS o
LEFT JOIN products AS p ON o.product_id = p.id
LEFT JOIN shipments AS s ON o.order_id = s.order_id;
``` ```
6. 修改 mysql 和 postgres 里面的数据,观察 elasticsearch 里的结果。 6. 修改 mysql 和 postgres 里面的数据,观察 elasticsearch 里的结果。
@ -246,25 +251,25 @@ DELETE FROM orders WHERE order_id = 10004;
```sql ```sql
--Flink SQL --Flink SQL
CREATE TABLE kafka_gmv ( Flink SQL> CREATE TABLE kafka_gmv (
day_str STRING, day_str STRING,
gmv DECIMAL(10, 5) gmv DECIMAL(10, 5)
) WITH ( ) WITH (
'connector' = 'kafka', 'connector' = 'kafka',
'topic' = 'kafka_gmv', 'topic' = 'kafka_gmv',
'scan.startup.mode' = 'earliest-offset', 'scan.startup.mode' = 'earliest-offset',
'properties.bootstrap.servers' = 'localhost:9092', 'properties.bootstrap.servers' = 'localhost:9092',
'format' = 'changelog-json' 'format' = 'changelog-json'
); );
INSERT INTO kafka_gmv Flink SQL> INSERT INTO kafka_gmv
SELECT DATE_FORMAT(order_date, 'yyyy-MM-dd') as day_str, SUM(price) as gmv SELECT DATE_FORMAT(order_date, 'yyyy-MM-dd') as day_str, SUM(price) as gmv
FROM orders FROM orders
WHERE order_status = true WHERE order_status = true
GROUP BY DATE_FORMAT(order_date, 'yyyy-MM-dd'); GROUP BY DATE_FORMAT(order_date, 'yyyy-MM-dd');
-- 读取 Kafka 的 changelog 数据,观察 materialize 后的结果 -- 读取 Kafka 的 changelog 数据,观察 materialize 后的结果
SELECT * FROM kafka_gmv; Flink SQL> SELECT * FROM kafka_gmv;
``` ```
观察 kafka 的输出: 观察 kafka 的输出:

@ -136,87 +136,92 @@ VALUES (default,10001,'Beijing','Shanghai',false),
4. Download following JAR package to `<FLINK_HOME>/lib/`: 4. Download following JAR package to `<FLINK_HOME>/lib/`:
- [flink-sql-connector-elasticsearch7_2.11-1.11.1.jar](https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch7_2.11/1.11.1/flink-sql-connector-elasticsearch7_2.11-1.11.1.jar) - [flink-sql-connector-elasticsearch7_2.11-1.13.2.jar](https://repo.maven.apache.org/maven2/org/apache/flink/flink-sql-connector-elasticsearch7_2.11/1.13.2/flink-sql-connector-elasticsearch7_2.11-1.13.2.jar)
- [flink-sql-connector-mysql-cdc-1.0.0.jar](https://repo1.maven.org/maven2/com/alibaba/ververica/flink-sql-connector-mysql-cdc/1.0.0/flink-sql-connector-mysql-cdc-1.0.0.jar) - [flink-sql-connector-mysql-cdc-2.0.0.jar](https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-mysql-cdc/2.0.0/flink-sql-connector-mysql-cdc-2.0.0.jar)
- [flink-sql-connector-postgres-cdc-1.0.0.jar](https://repo1.maven.org/maven2/com/alibaba/ververica/flink-sql-connector-postgres-cdc/1.0.0/flink-sql-connector-postgres-cdc-1.0.0.jar) - [flink-sql-connector-postgres-cdc-2.0.0.jar](https://repo1.maven.org/maven2/com/ververica/flink-sql-connector-postgres-cdc/2.0.0/flink-sql-connector-postgres-cdc-2.0.0.jar)
5. Launch a Flink cluster, then start a Flink SQL CLI and execute following SQL statements inside: 5. Launch a Flink cluster, then start a Flink SQL CLI and execute following SQL statements inside:
```sql ```sql
-- Flink SQL -- Flink SQL
CREATE TABLE products ( -- checkpoint every 3000 milliseconds
id INT, Flink SQL> SET 'execution.checkpointing.interval' = '3s';
name STRING, Flink SQL> CREATE TABLE products (
description STRING id INT,
) WITH ( name STRING,
'connector' = 'mysql-cdc', description STRING,
'hostname' = 'localhost', PRIMARY KEY (id) NOT ENFORCED
'port' = '3306', ) WITH (
'username' = 'root', 'connector' = 'mysql-cdc',
'password' = '123456', 'hostname' = 'localhost',
'database-name' = 'mydb', 'port' = '3306',
'table-name' = 'products' 'username' = 'root',
); 'password' = '123456',
'database-name' = 'mydb',
CREATE TABLE orders ( 'table-name' = 'products'
order_id INT, );
order_date TIMESTAMP(0),
customer_name STRING, Flink SQL> CREATE TABLE orders (
price DECIMAL(10, 5), order_id INT,
product_id INT, order_date TIMESTAMP(0),
order_status BOOLEAN customer_name STRING,
) WITH ( price DECIMAL(10, 5),
'connector' = 'mysql-cdc', product_id INT,
'hostname' = 'localhost', order_status BOOLEAN,
'port' = '3306', PRIMARY KEY (order_id) NOT ENFORCED
'username' = 'root', ) WITH (
'password' = '123456', 'connector' = 'mysql-cdc',
'database-name' = 'mydb', 'hostname' = 'localhost',
'table-name' = 'orders' 'port' = '3306',
); 'username' = 'root',
'password' = '123456',
CREATE TABLE shipments ( 'database-name' = 'mydb',
shipment_id INT, 'table-name' = 'orders'
order_id INT, );
origin STRING,
destination STRING, Flink SQL> CREATE TABLE shipments (
is_arrived BOOLEAN shipment_id INT,
) WITH ( order_id INT,
'connector' = 'postgres-cdc', origin STRING,
'hostname' = 'localhost', destination STRING,
'port' = '5432', is_arrived BOOLEAN,
'username' = 'postgres', PRIMARY KEY (shipment_id) NOT ENFORCED
'password' = 'postgres', ) WITH (
'database-name' = 'postgres', 'connector' = 'postgres-cdc',
'schema-name' = 'public', 'hostname' = 'localhost',
'table-name' = 'shipments' 'port' = '5432',
); 'username' = 'postgres',
'password' = 'postgres',
CREATE TABLE enriched_orders ( 'database-name' = 'postgres',
order_id INT, 'schema-name' = 'public',
order_date TIMESTAMP(0), 'table-name' = 'shipments'
customer_name STRING, );
price DECIMAL(10, 5),
product_id INT, Flink SQL> CREATE TABLE enriched_orders (
order_status BOOLEAN, order_id INT,
product_name STRING, order_date TIMESTAMP(0),
product_description STRING, customer_name STRING,
shipment_id INT, price DECIMAL(10, 5),
origin STRING, product_id INT,
destination STRING, order_status BOOLEAN,
is_arrived BOOLEAN, product_name STRING,
PRIMARY KEY (order_id) NOT ENFORCED product_description STRING,
) WITH ( shipment_id INT,
'connector' = 'elasticsearch-7', origin STRING,
'hosts' = 'http://localhost:9200', destination STRING,
'index' = 'enriched_orders' is_arrived BOOLEAN,
); PRIMARY KEY (order_id) NOT ENFORCED
) WITH (
INSERT INTO enriched_orders 'connector' = 'elasticsearch-7',
SELECT o.*, p.name, p.description, s.shipment_id, s.origin, s.destination, s.is_arrived 'hosts' = 'http://localhost:9200',
FROM orders AS o 'index' = 'enriched_orders'
LEFT JOIN products AS p ON o.product_id = p.id );
LEFT JOIN shipments AS s ON o.order_id = s.order_id;
Flink SQL> INSERT INTO enriched_orders
SELECT o.*, p.name, p.description, s.shipment_id, s.origin, s.destination, s.is_arrived
FROM orders AS o
LEFT JOIN products AS p ON o.product_id = p.id
LEFT JOIN shipments AS s ON o.order_id = s.order_id;
``` ```
6. Make some changes in MySQL and Postgres, then check the result in Elasticsearch: 6. Make some changes in MySQL and Postgres, then check the result in Elasticsearch:
@ -246,25 +251,25 @@ Execute following SQL in Flink SQL CLI:
```sql ```sql
-- Flink SQL -- Flink SQL
CREATE TABLE kafka_gmv ( Flink SQL> CREATE TABLE kafka_gmv (
day_str STRING, day_str STRING,
gmv DECIMAL(10, 5) gmv DECIMAL(10, 5)
) WITH ( ) WITH (
'connector' = 'kafka', 'connector' = 'kafka',
'topic' = 'kafka_gmv', 'topic' = 'kafka_gmv',
'scan.startup.mode' = 'earliest-offset', 'scan.startup.mode' = 'earliest-offset',
'properties.bootstrap.servers' = 'localhost:9092', 'properties.bootstrap.servers' = 'localhost:9092',
'format' = 'changelog-json' 'format' = 'changelog-json'
); );
INSERT INTO kafka_gmv Flink SQL> INSERT INTO kafka_gmv
SELECT DATE_FORMAT(order_date, 'yyyy-MM-dd') as day_str, SUM(price) as gmv SELECT DATE_FORMAT(order_date, 'yyyy-MM-dd') as day_str, SUM(price) as gmv
FROM orders FROM orders
WHERE order_status = true WHERE order_status = true
GROUP BY DATE_FORMAT(order_date, 'yyyy-MM-dd'); GROUP BY DATE_FORMAT(order_date, 'yyyy-MM-dd');
-- Consumer changelog data from Kafka, and check the result of materialized view: -- Consumer changelog data from Kafka, and check the result of materialized view:
SELECT * FROM kafka_gmv; Flink SQL> SELECT * FROM kafka_gmv;
``` ```
To consumer records in Kafka using `kafka-console-consumer`: To consumer records in Kafka using `kafka-console-consumer`:

Loading…
Cancel
Save