From 58531e748ea95f6f455bedb8c612e00d74fff623 Mon Sep 17 00:00:00 2001
From: yux <34335406+yuxiqian@users.noreply.github.com>
Date: Wed, 24 Apr 2024 15:35:28 +0800
Subject: [PATCH] [FLINK-35077][cdc] Add license checking CI scripts (#3218)
---
.github/workflows/flink_cdc.yml | 16 ++++
tools/ci/license_check.rb | 135 ++++++++++++++++++++++++++++++++
2 files changed, 151 insertions(+)
create mode 100755 tools/ci/license_check.rb
diff --git a/.github/workflows/flink_cdc.yml b/.github/workflows/flink_cdc.yml
index ad842d7b4..1700d9a74 100644
--- a/.github/workflows/flink_cdc.yml
+++ b/.github/workflows/flink_cdc.yml
@@ -89,6 +89,22 @@ env:
flink-cdc-e2e-tests/flink-cdc-source-e2e-tests"
jobs:
+ license_check:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out repository code
+ uses: actions/checkout@v4
+ with:
+ submodules: true
+ - name: Set up Ruby environment
+ uses: ruby/setup-ruby@v1
+ with:
+ ruby-version: '3.3'
+ - name: Compiling jar packages
+ run: mvn --no-snapshot-updates -B package -DskipTests
+ - name: Run license check
+ run: gem install rubyzip -v 2.3.0 && ./tools/ci/license_check.rb
+
compile_and_test:
# Only run the CI pipeline for the flink-cdc-connectors repository
# if: github.repository == 'apache/flink-cdc-connectors'
diff --git a/tools/ci/license_check.rb b/tools/ci/license_check.rb
new file mode 100755
index 000000000..ac74c52ca
--- /dev/null
+++ b/tools/ci/license_check.rb
@@ -0,0 +1,135 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+require 'zip'
+
+# These maven modules don't need to be checked
+EXCLUDED_MODULES = %w[flink-cdc-dist flink-cdc-e2e-tests].freeze
+
+# Questionable license statements which shouldn't occur in packaged jar files
+QUESTIONABLE_STATEMENTS = [
+ 'Binary Code License',
+ 'Intel Simplified Software License',
+ 'JSR 275',
+ 'Microsoft Limited Public License',
+ 'Amazon Software License',
+ # Java SDK for Satori RTM license
+ 'as necessary for your use of Satori services',
+ 'REDIS SOURCE AVAILABLE LICENSE',
+ 'Booz Allen Public License',
+ 'Confluent Community License Agreement Version 1.0',
+ # “Commons Clause” License Condition v1.0
+ 'the License does not grant to you, the right to Sell the Software.',
+ 'Sun Community Source License Version 3.0',
+ 'GNU General Public License',
+ 'GNU Affero General Public License',
+ 'GNU Lesser General Public License',
+ 'Q Public License',
+ 'Sleepycat License',
+ 'Server Side Public License',
+ 'Code Project Open License',
+ # BSD 4-Clause
+ ' All advertising materials mentioning features or use of this software must display the following acknowledgement',
+ # Facebook Patent clause v1
+ 'The license granted hereunder will terminate, automatically and without notice, for anyone that makes any claim',
+ # Facebook Patent clause v2
+ 'The license granted hereunder will terminate, automatically and without notice, if you (or any of your subsidiaries, corporate affiliates or agents) initiate directly or indirectly, or take a direct financial interest in, any Patent Assertion: (i) against Facebook',
+ 'Netscape Public License',
+ 'SOLIPSISTIC ECLIPSE PUBLIC LICENSE',
+ # DON'T BE A DICK PUBLIC LICENSE
+ "Do whatever you like with the original work, just don't be a dick.",
+ # JSON License
+ 'The Software shall be used for Good, not Evil.',
+ # can sometimes be found in "funny" licenses
+ 'Don’t be evil',
+ # IBM's non-FOSS license
+ 'International Program License Agreement',
+ # Oracle's non-FOSS license
+ 'Oracle Free Use Terms and Conditions'
+].freeze
+
+# These file extensions are binary-formatted. No check needed.
+BINARY_FILE_EXTENSIONS = %w[.class .dylib .so .dll .gif .ico].freeze
+
+# These packages are licensed under "Weak Copyleft" licenses.
+# According to Apache official guidelines, such software could be
+# packaged in jar if appropriately labelled.
+# See https://www.apache.org/legal/resolved.html for more details.
+EXCEPTION_PACKAGES = [
+ 'org/glassfish/jersey/', # dual-licensed under GPL 2 and EPL 2.0
+ 'org.glassfish.jersey', # dual-licensed under GPL 2 and EPL 2.0
+ 'org.glassfish.hk2', # dual-licensed under GPL 2 and EPL 2.0
+ 'javax.ws.rs-api', # dual-licensed under GPL 2 and EPL 2.0
+ 'jakarta.ws.rs' # dual-licensed under GPL 2 and EPL 2.0
+].freeze
+
+puts 'Start license check...'
+
+# Extract Flink CDC revision number from global pom.xml
+begin
+ REVISION_NUMBER = File.read('pom.xml').scan(%r{(.*)}).last[0]
+rescue NoMethodError
+ abort 'Could not extract Flink CDC revision number from pom.xml'
+end
+
+puts "Flink CDC version: '#{REVISION_NUMBER}'"
+
+# Traversing maven module in given path
+def traverse_module(path)
+ module_name = File.basename path
+ return if EXCLUDED_MODULES.include?(module_name)
+
+ jar_file = File.join path, 'target', "#{module_name}-#{REVISION_NUMBER}.jar"
+ check_jar_license jar_file if File.exist? jar_file
+
+ File.read(File.join(path, 'pom.xml')).scan(%r{(.*)}).map(&:first).each do |submodule|
+ traverse_module File.join(path, submodule.to_s) unless submodule.nil?
+ end
+end
+
+@tainted_records = []
+
+# Check license issues in given jar file
+def check_jar_license(jar_file)
+ puts "Checking jar file #{jar_file}"
+ Zip::File.open(jar_file) do |jar|
+ jar.filter { |e| e.ftype == :file }
+ .filter { |e| !File.basename(e.name).downcase.end_with?(*BINARY_FILE_EXTENSIONS) }
+ .filter { |e| !File.basename(e.name).downcase.start_with? 'license', 'dependencies' }
+ .filter { |e| EXCEPTION_PACKAGES.none? { |ex| e.name.include? ex } }
+ .map do |e|
+ content = e.get_input_stream.read.force_encoding('UTF-8')
+ next unless QUESTIONABLE_STATEMENTS.map { |stmt| content.include?(stmt) }.any?
+
+ @tainted_records.push({
+ jar_file: File.basename(jar_file),
+ suspicious_file: e.name
+ })
+ end
+ end
+end
+
+traverse_module '.'
+
+unless @tainted_records.empty?
+ puts "\nError: packaged jar contains files with incompatible licenses:"
+ puts @tainted_records.map { |e| " -> In #{e[:jar_file]}: #{e[:suspicious_file]}" }.join("\n")
+ abort 'See https://www.apache.org/legal/resolved.html for more details.'
+end
+
+puts 'License check passed.'