[FLINK-35985][transform] Correct the substring function in transform rule

This closes #3702.

Co-authored-by: MOBIN <18814118038@163.com>
pull/3737/head
yuxiqian 2 months ago committed by GitHub
parent 908949bc72
commit bd2aa0aaa1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -126,7 +126,8 @@ Flink CDC uses [Calcite](https://calcite.apache.org/) to parse expressions and [
| LOWER(string) | lower(string) | Returns string in lowercase. |
| TRIM(string1) | trim('BOTH',string1) | Returns a string that removes whitespaces at both sides. |
| REGEXP_REPLACE(string1, string2, string3) | regexpReplace(string1, string2, string3) | Returns a string from STRING1 with all the substrings that match a regular expression STRING2 consecutively being replaced with STRING3. E.g., 'foobar'.regexpReplace('oo\|ar', '') returns "fb". |
| SUBSTRING(string FROM integer1 [ FOR integer2 ]) | substring(string,integer1,integer2) | Returns a substring of STRING starting from position INT1 with length INT2 (to the end by default). |
| SUBSTR(string, integer1[, integer2]) | substr(string,integer1,integer2) | Returns a substring of STRING starting from position integer1 with length integer2 (to the end by default). |
| SUBSTRING(string FROM integer1 [ FOR integer2 ]) | substring(string,integer1,integer2) | Returns a substring of STRING starting from position integer1 with length integer2 (to the end by default). |
| CONCAT(string1, string2,…) | concat(string1, string2,…) | Returns a string that concatenates string1, string2, …. E.g., CONCAT('AA', 'BB', 'CC') returns 'AABBCC'. |
## Temporal Functions

@ -118,16 +118,17 @@ Flink CDC uses [Calcite](https://calcite.apache.org/) to parse expressions and [
## String Functions
| Function | Janino Code | Description |
| -------------------- | ------------------------ | ------------------------------------------------- |
| string1 &#124;&#124; string2 | concat(string1, string2) | Returns the concatenation of STRING1 and STRING2. |
| CHAR_LENGTH(string) | charLength(string) | Returns the number of characters in STRING. |
| UPPER(string) | upper(string) | Returns string in uppercase. |
| LOWER(string) | lower(string) | Returns string in lowercase. |
| TRIM(string1) | trim('BOTH',string1) | Returns a string that removes whitespaces at both sides. |
| Function | Janino Code | Description |
| -------------------- | ------------------------ |---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| string1 &#124;&#124; string2 | concat(string1, string2) | Returns the concatenation of STRING1 and STRING2. |
| CHAR_LENGTH(string) | charLength(string) | Returns the number of characters in STRING. |
| UPPER(string) | upper(string) | Returns string in uppercase. |
| LOWER(string) | lower(string) | Returns string in lowercase. |
| TRIM(string1) | trim('BOTH',string1) | Returns a string that removes whitespaces at both sides. |
| REGEXP_REPLACE(string1, string2, string3) | regexpReplace(string1, string2, string3) | Returns a string from STRING1 with all the substrings that match a regular expression STRING2 consecutively being replaced with STRING3. E.g., 'foobar'.regexpReplace('oo\|ar', '') returns "fb". |
| SUBSTRING(string FROM integer1 [ FOR integer2 ]) | substring(string,integer1,integer2) | Returns a substring of STRING starting from position INT1 with length INT2 (to the end by default). |
| CONCAT(string1, string2,…) | concat(string1, string2,…) | Returns a string that concatenates string1, string2, …. E.g., CONCAT('AA', 'BB', 'CC') returns 'AABBCC'. |
| SUBSTR(string, integer1[, integer2]) | substr(string,integer1,integer2) | Returns a substring of STRING starting from position integer1 with length integer2 (to the end by default). |
| SUBSTRING(string FROM integer1 [ FOR integer2 ]) | substring(string,integer1,integer2) | Returns a substring of STRING starting from position integer1 with length integer2 (to the end by default). |
| CONCAT(string1, string2,…) | concat(string1, string2,…) | Returns a string that concatenates string1, string2, …. E.g., CONCAT('AA', 'BB', 'CC') returns 'AABBCC'. |
## Temporal Functions

@ -361,11 +361,64 @@ public class SystemFunctionUtils {
}
public static String substr(String str, int beginIndex) {
return str.substring(beginIndex);
return substring(str, beginIndex);
}
public static String substr(String str, int beginIndex, int length) {
return str.substring(beginIndex, beginIndex + length);
return substring(str, beginIndex, length);
}
public static String substring(String str, int beginIndex) {
return substring(str, beginIndex, Integer.MAX_VALUE);
}
public static String substring(String str, int beginIndex, int length) {
if (length < 0) {
LOG.error(
"length of 'substring(str, beginIndex, length)' must be >= 0 and Int type, but length = {}",
length);
throw new RuntimeException(
"length of 'substring(str, beginIndex, length)' must be >= 0 and Int type, but length = "
+ length);
}
if (length > Integer.MAX_VALUE || beginIndex > Integer.MAX_VALUE) {
LOG.error(
"length or start of 'substring(str, beginIndex, length)' must be Int type, but length = {}, beginIndex = {}",
beginIndex,
length);
throw new RuntimeException(
"length or start of 'substring(str, beginIndex, length)' must be Int type, but length = "
+ beginIndex
+ ", beginIndex = "
+ length);
}
if (str.isEmpty()) {
return "";
}
int startPos;
int endPos;
if (beginIndex > 0) {
startPos = beginIndex - 1;
if (startPos >= str.length()) {
return "";
}
} else if (beginIndex < 0) {
startPos = str.length() + beginIndex;
if (startPos < 0) {
return "";
}
} else {
startPos = 0;
}
if ((str.length() - startPos) < length) {
endPos = str.length();
} else {
endPos = startPos + length;
}
return str.substring(startPos, endPos);
}
public static String upper(String str) {

@ -191,6 +191,7 @@ public class TransformSqlOperatorTable extends ReflectiveSqlOperatorTable {
SqlTypeFamily.INTEGER,
SqlTypeFamily.INTEGER)),
SqlFunctionCategory.STRING);
public static final SqlFunction SUBSTRING = SqlStdOperatorTable.SUBSTRING;
// ------------------
// Temporal Functions

@ -1499,7 +1499,24 @@ public class PostTransformOperatorTest {
testExpressionConditionTransform("concat('123', 'abc') = '123abc'");
testExpressionConditionTransform("upper('abc') = 'ABC'");
testExpressionConditionTransform("lower('ABC') = 'abc'");
testExpressionConditionTransform("SUBSTR('ABC', 1, 1) = 'B'");
testExpressionConditionTransform("SUBSTR('ABC', -1) = 'C'");
testExpressionConditionTransform("SUBSTR('ABC', -2, 2) = 'BC'");
testExpressionConditionTransform("SUBSTR('ABC', 0) = 'ABC'");
testExpressionConditionTransform("SUBSTR('ABC', 1) = 'ABC'");
testExpressionConditionTransform("SUBSTR('ABC', 2, 2) = 'BC'");
testExpressionConditionTransform("SUBSTR('ABC', 2, 100) = 'BC'");
testExpressionConditionTransform("SUBSTRING('ABC', -1) = 'C'");
testExpressionConditionTransform("SUBSTRING('ABC', -2, 2) = 'BC'");
testExpressionConditionTransform("SUBSTRING('ABC', 0) = 'ABC'");
testExpressionConditionTransform("SUBSTRING('ABC', 1) = 'ABC'");
testExpressionConditionTransform("SUBSTRING('ABC', 2, 2) = 'BC'");
testExpressionConditionTransform("SUBSTRING('ABC', 2, 100) = 'BC'");
testExpressionConditionTransform("SUBSTRING('ABC' FROM -1) = 'C'");
testExpressionConditionTransform("SUBSTRING('ABC' FROM -2 FOR 2) = 'BC'");
testExpressionConditionTransform("SUBSTRING('ABC' FROM 0) = 'ABC'");
testExpressionConditionTransform("SUBSTRING('ABC' FROM 1) = 'ABC'");
testExpressionConditionTransform("SUBSTRING('ABC' FROM 2 FOR 2) = 'BC'");
testExpressionConditionTransform("SUBSTRING('ABC' FROM 2 FOR 100) = 'BC'");
testExpressionConditionTransform("'ABC' like '^[a-zA-Z]'");
testExpressionConditionTransform("'123' not like '^[a-zA-Z]'");
testExpressionConditionTransform("abs(2) = 2");

Loading…
Cancel
Save