diff --git a/.ci/docker-compose-file/docker-compose.yaml b/.ci/docker-compose-file/docker-compose.yaml index 83b58b0b9..2612eb8d8 100644 --- a/.ci/docker-compose-file/docker-compose.yaml +++ b/.ci/docker-compose-file/docker-compose.yaml @@ -3,7 +3,7 @@ version: '3.9' services: erlang24: container_name: erlang24 - image: ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-ubuntu20.04 + image: ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04 env_file: - conf.env environment: diff --git a/.ci/fvt_tests/http_server/src/http_server.erl b/.ci/fvt_tests/http_server/src/http_server.erl index b66b72939..4aaa25b95 100644 --- a/.ci/fvt_tests/http_server/src/http_server.erl +++ b/.ci/fvt_tests/http_server/src/http_server.erl @@ -1,50 +1,63 @@ -module(http_server). --import(minirest, [ return/0 - , return/1 - ]). +-import(minirest, [ + return/0, + return/1 +]). --export([ start/0 - , stop/0 - ]). +-export([ + start/0, + stop/0 +]). --rest_api(#{ name => get_counter - , method => 'GET' - , path => "/counter" - , func => get_counter - , descr => "Check counter" - }). --rest_api(#{ name => add_counter - , method => 'POST' - , path => "/counter" - , func => add_counter - , descr => "Counter plus one" - }). +-rest_api(#{ + name => get_counter, + method => 'GET', + path => "/counter", + func => get_counter, + descr => "Check counter" +}). +-rest_api(#{ + name => add_counter, + method => 'POST', + path => "/counter", + func => add_counter, + descr => "Counter plus one" +}). --export([ get_counter/2 - , add_counter/2 - ]). +-export([ + get_counter/2, + add_counter/2 +]). start() -> application:ensure_all_started(minirest), - ets:new(relup_test_message, [named_table, public]), + _ = spawn(fun ets_owner/0), Handlers = [{"/", minirest:handler(#{modules => [?MODULE]})}], Dispatch = [{"/[...]", minirest, Handlers}], - minirest:start_http(?MODULE, #{socket_opts => [inet, {port, 8080}]}, Dispatch). + minirest:start_http(?MODULE, #{socket_opts => [inet, {port, 7077}]}, Dispatch). stop() -> ets:delete(relup_test_message), minirest:stop_http(?MODULE). get_counter(_Binding, _Params) -> - return({ok, ets:info(relup_test_message, size)}). + V = ets:info(relup_test_message, size), + return({ok, V}). -add_counter(_Binding, Params) -> +add_counter(_Binding, Params) -> case lists:keymember(<<"payload">>, 1, Params) of - true -> + true -> {value, {<<"id">>, ID}, Params1} = lists:keytake(<<"id">>, 1, Params), ets:insert(relup_test_message, {ID, Params1}); - _ -> + _ -> + io:format("discarded: ~p\n", [Params]), ok end, return(). + +ets_owner() -> + ets:new(relup_test_message, [named_table, public]), + receive + stop -> ok + end. diff --git a/.ci/fvt_tests/relup.lux b/.ci/fvt_tests/relup.lux deleted file mode 100644 index f5b47b89e..000000000 --- a/.ci/fvt_tests/relup.lux +++ /dev/null @@ -1,225 +0,0 @@ -[config var=PROFILE] -[config var=PACKAGE_PATH] -[config var=BENCH_PATH] -[config var=ONE_MORE_EMQX_PATH] -[config var=VSN] -[config var=OLD_VSNS] - -[config shell_cmd=/bin/bash] -[config timeout=600000] - -[loop old_vsn $OLD_VSNS] - -[shell http_server] - !cd http_server - !rebar3 shell - ???Eshell - ???> - !http_server:start(). - ?Start http_server listener on 8080 successfully. - ?ok - ?> - -[shell emqx] - !cd $PACKAGE_PATH - mkdir -p emqx - !tar -C emqx -zxf ${PROFILE}-$(echo $old_vsn | sed -r 's/[v|e]//g')-*-ubuntu20.04-amd64.tar.gz - ?SH-PROMPT - - !cd emqx - !sed -i 's|listener.wss.external[ \t]*=.*|listener.wss.external = 8085|g' etc/emqx.conf - !sed -i '/emqx_telemetry/d' data/loaded_plugins - - !./bin/emqx start - ?EMQ X .* is started successfully! - ?SH-PROMPT - -[shell emqx2] - !cd $PACKAGE_PATH - !cp -f $ONE_MORE_EMQX_PATH/one_more_$(echo $PROFILE | sed 's/-/_/g').sh . - !./one_more_$(echo $PROFILE | sed 's/-/_/g').sh emqx2 - ?SH-PROMPT - !cd emqx2 - - !sed -i '/emqx_telemetry/d' data/loaded_plugins - - !./bin/emqx start - ?EMQ X .* is started successfully! - ?SH-PROMPT - - !./bin/emqx_ctl cluster join emqx@127.0.0.1 - ???Join the cluster successfully. - ?SH-PROMPT - - !./bin/emqx_ctl cluster status - """??? - Cluster status: #{running_nodes => ['emqx2@127.0.0.1','emqx@127.0.0.1'], - stopped_nodes => []} - """ - ?SH-PROMPT - - !./bin/emqx_ctl resources create 'web_hook' -i 'resource:691c29ba' -c '{"url": "http://127.0.0.1:8080/counter", "method": "POST"}' - ?created - ?SH-PROMPT - !./bin/emqx_ctl rules create 'SELECT * FROM "t/#"' '[{"name":"data_to_webserver", "params": {"$$resource": "resource:691c29ba"}}]' - ?created - ?SH-PROMPT - -[shell emqx] - !./bin/emqx_ctl resources list - ?691c29ba - ?SH-PROMPT - !./bin/emqx_ctl rules list - ?691c29ba - ?SH-PROMPT - -[shell bench] - !cd $BENCH_PATH - - !./emqtt_bench pub -c 10 -I 1000 -t t/%i -s 64 -L 300 - ???sent - -[shell emqx] - !echo "" > log/emqx.log.1 - ?SH-PROMPT - - !cp -f ../$PROFILE-$VSN-*-ubuntu20.04-amd64.tar.gz releases/ - - ## 1. upgrade to the new version - !./bin/emqx install $VSN - ?Made release permanent: "$VSN" - ?SH-PROMPT - - !./bin/emqx versions |grep permanent - ?(.*)$VSN - ?SH-PROMPT - - ## 2. downgrade to the old version - !./bin/emqx install $old_vsn - ?Made release permanent:.* - ?SH-PROMPT - - !./bin/emqx versions |grep permanent | grep -qs "$old_vsn" - ?SH-PROMPT: - !echo ==$$?== - ?^==0== - ?SH-PROMPT: - - ## 3. again, upgrade to the new version - !./bin/emqx install $VSN - ?Made release permanent: "$VSN" - ?SH-PROMPT - - !./bin/emqx versions |grep permanent - ?(.*)$VSN - ?SH-PROMPT - - !./bin/emqx_ctl cluster status - """??? - Cluster status: #{running_nodes => ['emqx2@127.0.0.1','emqx@127.0.0.1'], - stopped_nodes => []} - """ - ?SH-PROMPT - - !./bin/emqx_ctl plugins list | grep emqx_management - ?Plugin\(emqx_management.*active=true\) - ?SH-PROMPT - -[shell emqx2] - !echo "" > log/emqx.log.1 - ?SH-PROMPT - - !cp -f ../$PROFILE-$VSN-*-ubuntu20.04-amd64.tar.gz releases/ - - ## 1. upgrade to the new version - !./bin/emqx install $VSN - ?Made release permanent: "$VSN" - ?SH-PROMPT - - !./bin/emqx versions |grep permanent - ?(.*)$VSN - ?SH-PROMPT - - ## 2. downgrade to the old version - !./bin/emqx install $old_vsn - ?Made release permanent:.* - ?SH-PROMPT - - !./bin/emqx versions |grep permanent | grep -qs "$old_vsn" - ?SH-PROMPT: - !echo ==$$?== - ?^==0== - ?SH-PROMPT: - - ## 3. again, upgrade to the new version - !./bin/emqx install $VSN - ?Made release permanent: "$VSN" - ?SH-PROMPT - - !./bin/emqx versions |grep permanent - ?(.*)$VSN - ?SH-PROMPT - - !./bin/emqx_ctl cluster status - """??? - Cluster status: #{running_nodes => ['emqx2@127.0.0.1','emqx@127.0.0.1'], - stopped_nodes => []} - """ - ?SH-PROMPT - - !./bin/emqx_ctl plugins list | grep emqx_management - ?Plugin\(emqx_management.*active=true\) - ?SH-PROMPT - -## We don't guarantee not to lose a single message! -## So even if we received 290~300 messages, we consider it as success -[shell bench] - ???publish complete - ??SH-PROMPT: - !sleep 5 - ?SH-PROMPT - - !curl --user admin:public --silent --show-error http://localhost:18083/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@127.0.0.1\") | .metrics.matched" - ?300 - ?SH-PROMPT - - !curl --user admin:public --silent --show-error http://localhost:18083/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@127.0.0.1\") | .metrics.\"actions.success\"" - ?\{"data":(29[0-9])|(300),"code":0\} - ?SH-PROMPT - -[shell emqx2] - !cat log/emqx.log.1 |grep -v 691c29ba |tail -n 100 - -error - ??SH-PROMPT: - - !./bin/emqx stop - ?ok - ?SH-PROMPT: - - !rm -rf $PACKAGE_PATH/emqx2 - ?SH-PROMPT: - -[shell emqx] - !cat log/emqx.log.1 |grep -v 691c29ba |tail -n 100 - -error - ??SH-PROMPT: - - !./bin/emqx stop - ?ok - ?SH-PROMPT: - - !rm -rf $PACKAGE_PATH/emqx - ?SH-PROMPT: - -[shell http_server] - !http_server:stop(). - ?ok - ?> - !halt(3). - ?SH-PROMPT: - -[endloop] - -[cleanup] - !echo ==$$?== - ?==0== diff --git a/.github/workflows/build_and_push_docker_images.yaml b/.github/workflows/build_and_push_docker_images.yaml index 92ec10f49..c6b9c5a8d 100644 --- a/.github/workflows/build_and_push_docker_images.yaml +++ b/.github/workflows/build_and_push_docker_images.yaml @@ -20,7 +20,7 @@ jobs: prepare: runs-on: ubuntu-20.04 # prepare source with any OTP version, no need for a matrix - container: "ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-ubuntu20.04" + container: "ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04" outputs: BUILD_PROFILE: ${{ steps.get_profile.outputs.BUILD_PROFILE }} @@ -211,7 +211,7 @@ jobs: tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} build-args: | - BUILD_FROM=ghcr.io/emqx/emqx-builder/5.0-16:${{ matrix.elixir }}-${{ matrix.otp }}-${{ matrix.os[0] }} + BUILD_FROM=ghcr.io/emqx/emqx-builder/5.0-17:${{ matrix.elixir }}-${{ matrix.otp }}-${{ matrix.os[0] }} RUN_FROM=${{ matrix.os[1] }} EMQX_NAME=${{ steps.pre-meta.outputs.emqx_name }} file: source/${{ matrix.os[2] }} diff --git a/.github/workflows/build_packages.yaml b/.github/workflows/build_packages.yaml index f99184002..2f378bfa2 100644 --- a/.github/workflows/build_packages.yaml +++ b/.github/workflows/build_packages.yaml @@ -213,7 +213,7 @@ jobs: needs: prepare runs-on: ${{ matrix.build_machine }} container: - image: "ghcr.io/emqx/emqx-builder/5.0-16:${{ matrix.elixir }}-${{ matrix.otp }}-${{ matrix.os }}" + image: "ghcr.io/emqx/emqx-builder/5.0-17:${{ matrix.elixir }}-${{ matrix.otp }}-${{ matrix.os }}" strategy: fail-fast: false @@ -328,7 +328,7 @@ jobs: --pkgtype "${PKGTYPE}" \ --arch "${ARCH}" \ --elixir "${IsElixir}" \ - --builder "ghcr.io/emqx/emqx-builder/5.0-16:${ELIXIR}-${OTP}-${SYSTEM}" + --builder "ghcr.io/emqx/emqx-builder/5.0-17:${ELIXIR}-${OTP}-${SYSTEM}" done - uses: actions/upload-artifact@v1 with: diff --git a/.github/workflows/build_slim_packages.yaml b/.github/workflows/build_slim_packages.yaml index 6cfcb5ee0..ba27c1b38 100644 --- a/.github/workflows/build_slim_packages.yaml +++ b/.github/workflows/build_slim_packages.yaml @@ -39,7 +39,7 @@ jobs: - ubuntu20.04 - el8 - container: "ghcr.io/emqx/emqx-builder/5.0-16:${{ matrix.elixir }}-${{ matrix.otp }}-${{ matrix.os }}" + container: "ghcr.io/emqx/emqx-builder/5.0-17:${{ matrix.elixir }}-${{ matrix.otp }}-${{ matrix.os }}" steps: - uses: AutoModality/action-clean@v1 diff --git a/.github/workflows/check_deps_integrity.yaml b/.github/workflows/check_deps_integrity.yaml index f696406af..4a6c31b5e 100644 --- a/.github/workflows/check_deps_integrity.yaml +++ b/.github/workflows/check_deps_integrity.yaml @@ -5,7 +5,7 @@ on: [pull_request, push] jobs: check_deps_integrity: runs-on: ubuntu-20.04 - container: ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-ubuntu20.04 + container: ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04 steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/code_style_check.yaml b/.github/workflows/code_style_check.yaml index 973312d76..5fbf91236 100644 --- a/.github/workflows/code_style_check.yaml +++ b/.github/workflows/code_style_check.yaml @@ -5,7 +5,7 @@ on: [pull_request] jobs: code_style_check: runs-on: ubuntu-20.04 - container: "ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-ubuntu20.04" + container: "ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04" steps: - uses: actions/checkout@v2 with: diff --git a/.github/workflows/elixir_apps_check.yaml b/.github/workflows/elixir_apps_check.yaml index 524943d43..440c91545 100644 --- a/.github/workflows/elixir_apps_check.yaml +++ b/.github/workflows/elixir_apps_check.yaml @@ -8,7 +8,7 @@ jobs: elixir_apps_check: runs-on: ubuntu-latest # just use the latest builder - container: "ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-ubuntu20.04" + container: "ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04" strategy: fail-fast: false diff --git a/.github/workflows/elixir_deps_check.yaml b/.github/workflows/elixir_deps_check.yaml index 49d16d4bf..312278caa 100644 --- a/.github/workflows/elixir_deps_check.yaml +++ b/.github/workflows/elixir_deps_check.yaml @@ -7,7 +7,7 @@ on: [pull_request, push] jobs: elixir_deps_check: runs-on: ubuntu-20.04 - container: ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-ubuntu20.04 + container: ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04 steps: - name: Checkout diff --git a/.github/workflows/elixir_release.yml b/.github/workflows/elixir_release.yml index e2380c21f..006d6aba8 100644 --- a/.github/workflows/elixir_release.yml +++ b/.github/workflows/elixir_release.yml @@ -12,7 +12,7 @@ on: jobs: elixir_release_build: runs-on: ubuntu-latest - container: ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-ubuntu20.04 + container: ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04 steps: - name: Checkout diff --git a/.github/workflows/run_emqx_app_tests.yaml b/.github/workflows/run_emqx_app_tests.yaml index 7539d6d8b..b2f13e8be 100644 --- a/.github/workflows/run_emqx_app_tests.yaml +++ b/.github/workflows/run_emqx_app_tests.yaml @@ -24,7 +24,7 @@ jobs: - amd64 runs-on: aws-amd64 - container: "ghcr.io/emqx/emqx-builder/5.0-16:${{ matrix.elixir}}-${{ matrix.otp }}-${{ matrix.os }}" + container: "ghcr.io/emqx/emqx-builder/5.0-17:${{ matrix.elixir}}-${{ matrix.otp }}-${{ matrix.os }}" defaults: run: diff --git a/.github/workflows/run_fvt_tests.yaml b/.github/workflows/run_fvt_tests.yaml index 2703630f2..8d9d65d2e 100644 --- a/.github/workflows/run_fvt_tests.yaml +++ b/.github/workflows/run_fvt_tests.yaml @@ -16,7 +16,7 @@ jobs: prepare: runs-on: ubuntu-20.04 # prepare source with any OTP version, no need for a matrix - container: ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-alpine3.15.1 + container: ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-alpine3.15.1 steps: - uses: actions/checkout@v2 @@ -68,7 +68,7 @@ jobs: - name: make docker image working-directory: source env: - EMQX_BUILDER: ghcr.io/emqx/emqx-builder/5.0-16:${{ matrix.elixir }}-${{ matrix.otp }}-${{ matrix.os[0] }} + EMQX_BUILDER: ghcr.io/emqx/emqx-builder/5.0-17:${{ matrix.elixir }}-${{ matrix.otp }}-${{ matrix.os[0] }} EMQX_RUNNER: ${{ matrix.os[1] }} run: | make ${{ matrix.profile }}-docker @@ -140,7 +140,7 @@ jobs: - name: make docker image working-directory: source env: - EMQX_BUILDER: ghcr.io/emqx/emqx-builder/5.0-16:${{ matrix.elixir }}-${{ matrix.otp }}-${{ matrix.os[0] }} + EMQX_BUILDER: ghcr.io/emqx/emqx-builder/5.0-17:${{ matrix.elixir }}-${{ matrix.otp }}-${{ matrix.os[0] }} EMQX_RUNNER: ${{ matrix.os[1] }} run: | make ${{ matrix.profile }}-docker diff --git a/.github/workflows/run_relup_tests.yaml b/.github/workflows/run_relup_tests.yaml index 97c33f229..ddb8380b2 100644 --- a/.github/workflows/run_relup_tests.yaml +++ b/.github/workflows/run_relup_tests.yaml @@ -14,101 +14,117 @@ on: pull_request: jobs: - relup_test: - strategy: - matrix: - profile: - - emqx - - emqx-enterprise - otp: - - 24.2.1-1 - # no need to use more than 1 version of Elixir, since tests - # run using only Erlang code. This is needed just to specify - # the base image. - elixir: - - 1.13.4 - os: - - ubuntu20.04 - arch: - - amd64 - + relup_test_plan: runs-on: ubuntu-20.04 - container: "ghcr.io/emqx/emqx-builder/5.0-16:${{ matrix.elixir }}-${{ matrix.otp }}-${{ matrix.os }}" - + container: "ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04" + outputs: + CUR_CE_VSN: ${{ steps.find-versions.outputs.CUR_CE_VSN }} + CUR_EE_VSN: ${{ steps.find-versions.outputs.CUR_EE_VSN }} + OLD_VERSIONS: ${{ steps.find-versions.outputs.OLD_VERSIONS }} defaults: run: shell: bash steps: - - uses: actions/setup-python@v2 - with: - python-version: '3.8' - architecture: 'x64' - uses: actions/checkout@v2 + name: Checkout with: - repository: emqx/paho.mqtt.testing - ref: develop-4.0 - path: paho.mqtt.testing - - uses: actions/checkout@v2 + path: emqx + fetch-depth: 0 + - name: Find versions + id: find-versions + run: | + set -x + cd emqx + ce_vsn="$(./pkg-vsn.sh opensource)" + ee_vsn="$(./pkg-vsn.sh enterprise)" + old_ce_vsns="$(./scripts/relup-base-vsns.sh opensource | xargs)" + old_ee_vsns="$(./scripts/relup-base-vsns.sh enterprise | xargs)" + old_vsns=$(echo -n "${old_ce_vsns} ${old_ee_vsns}" | sed 's/ $//g' | jq -R -s -c 'split(" ")') + echo "::set-output name=CUR_CE_VSN::$ce_vsn" + echo "::set-output name=CUR_EE_VSN::$ee_vsn" + echo "::set-output name=OLD_VERSIONS::$old_vsns" + - name: build emqx + run: | + set -x + cd emqx + make emqx-tgz + make emqx-enterprise-tgz + - uses: actions/upload-artifact@v2 + name: Upload built emqx and test scenario with: - repository: terry-xiaoyu/one_more_emqx - ref: master - path: one_more_emqx - - uses: actions/checkout@v2 + name: emqx_built + path: | + emqx/_upgrade_base + emqx/_packages + emqx/scripts + emqx/.ci + + relup_test_run: + needs: + - relup_test_plan + runs-on: ubuntu-20.04 + strategy: + fail-fast: false + matrix: + old_vsn: ${{ fromJson(needs.relup_test_plan.outputs.OLD_VERSIONS) }} + env: + OLD_VSN: "${{ matrix.old_vsn }}" + CUR_CE_VSN: "${{ needs.relup_test_plan.outputs.CUR_CE_VSN }}" + CUR_EE_VSN: "${{ needs.relup_test_plan.outputs.CUR_EE_VSN }}" + defaults: + run: + shell: bash + steps: + # setup Erlang to run lux + - uses: erlef/setup-beam@v1 with: - repository: emqx/emqtt-bench - ref: 0.3.4 - path: emqtt-bench + otp-version: "24.2" - uses: actions/checkout@v2 with: repository: hawk/lux - ref: lux-2.6 + ref: lux-2.8.1 path: lux - - uses: actions/checkout@v2 - with: - repository: ${{ github.repository }} - path: emqx - fetch-depth: 0 - - - name: Get old vsn - run: echo "OLD_VSNS=$(emqx/scripts/relup-base-vsns.sh ${{ matrix.profile }} | xargs echo -n)" >> $GITHUB_ENV - run: echo "VSN=$(emqx/pkg-vsn.sh ${{ matrix.profile }})" >> $GITHUB_ENV - - - name: build emqx - env: - PROFILE: ${{ matrix.profile }} - run: make -C emqx ${PROFILE}-tgz - - name: build emqtt-bench - run: make -C emqtt-bench - - name: build lux + - name: Install lux run: | set -e -u -x cd lux autoconf ./configure make - make install + echo "$(pwd)/bin" >> $GITHUB_PATH + - uses: actions/download-artifact@v2 + name: Download built emqx and test scenario + with: + name: emqx_built + path: . - name: run relup test - env: - PROFILE: ${{ matrix.profile }} - timeout-minutes: 20 run: | set -e -x -u - if [ -n "$OLD_VSNS" ]; then - mkdir -p packages - cp emqx/_packages/${PROFILE}/*.tar.gz packages - cp emqx/_upgrade_base/*.tar.gz packages - lux \ - --case_timeout infinity \ - --var PROFILE=$PROFILE \ - --var PACKAGE_PATH=$(pwd)/packages \ - --var BENCH_PATH=$(pwd)/emqtt-bench \ - --var ONE_MORE_EMQX_PATH=$(pwd)/one_more_emqx \ - --var VSN="$VSN" \ - --var OLD_VSNS="$OLD_VSNS" \ - emqx/.ci/fvt_tests/relup.lux + chmod a+x scripts/**/*.sh + ls -l scripts + ls -l scripts/relup-test + case "$OLD_VSN" in + e*) + export CUR_VSN="$CUR_EE_VSN" + ;; + v*) + export CUR_VSN="$CUR_CE_VSN" + ;; + *) + echo "unknown old version $OLD_VSN" + exit 1 + ;; + esac + mkdir -p lux_logs + if ! ./scripts/relup-test/run-relup-lux.sh $OLD_VSN; then + docker logs node1.emqx.io | tee lux_logs/emqx1.log + docker logs node2.emqx.io | tee lux_logs/emqx2.log + exit 1 fi - - uses: actions/upload-artifact@v1 + - uses: actions/upload-artifact@v2 + name: Save debug data if: failure() with: - name: lux_logs - path: lux_logs + name: debug_data + path: | + lux_logs diff --git a/.github/workflows/run_test_cases.yaml b/.github/workflows/run_test_cases.yaml index 1d7c7a979..d38acaf31 100644 --- a/.github/workflows/run_test_cases.yaml +++ b/.github/workflows/run_test_cases.yaml @@ -17,7 +17,7 @@ jobs: prepare: runs-on: ubuntu-20.04 # prepare source with any OTP version, no need for a matrix - container: "ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-ubuntu20.04" + container: "ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04" outputs: fast_ct_apps: ${{ steps.run_find_apps.outputs.fast_ct_apps }} docker_ct_apps: ${{ steps.run_find_apps.outputs.docker_ct_apps }} @@ -54,7 +54,7 @@ jobs: defaults: run: shell: bash - container: "ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-ubuntu20.04" + container: "ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04" steps: - uses: AutoModality/action-clean@v1 @@ -144,7 +144,7 @@ jobs: matrix: app_name: ${{ fromJson(needs.prepare.outputs.fast_ct_apps) }} runs-on: aws-amd64 - container: "ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-ubuntu20.04" + container: "ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04" defaults: run: shell: bash @@ -179,7 +179,7 @@ jobs: - ct - ct_docker runs-on: ubuntu-20.04 - container: "ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-ubuntu20.04" + container: "ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04" steps: - uses: AutoModality/action-clean@v1 - uses: actions/download-artifact@v2 diff --git a/.gitignore b/.gitignore index f4472b9ab..f0e55ad28 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,4 @@ mix.lock apps/emqx/test/emqx_static_checks_data/master.bpapi # rendered configurations *.conf.rendered +lux_logs/ diff --git a/Makefile b/Makefile index ceb5a9934..6be3f76c9 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ REBAR = $(CURDIR)/rebar3 BUILD = $(CURDIR)/build SCRIPTS = $(CURDIR)/scripts export EMQX_RELUP ?= true -export EMQX_DEFAULT_BUILDER = ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-debian11 +export EMQX_DEFAULT_BUILDER = ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-debian11 export EMQX_DEFAULT_RUNNER = debian:11-slim export OTP_VSN ?= $(shell $(CURDIR)/scripts/get-otp-vsn.sh) export ELIXIR_VSN ?= $(shell $(CURDIR)/scripts/get-elixir-vsn.sh) diff --git a/apps/emqx_management/src/emqx_mgmt_cli.erl b/apps/emqx_management/src/emqx_mgmt_cli.erl index 50f4b21af..0f60317fa 100644 --- a/apps/emqx_management/src/emqx_mgmt_cli.erl +++ b/apps/emqx_management/src/emqx_mgmt_cli.erl @@ -135,14 +135,35 @@ cluster(["force-leave", SNode]) -> end; cluster(["status"]) -> emqx_ctl:print("Cluster status: ~p~n", [ekka_cluster:info()]); +cluster(["status", "--json"]) -> + Info = sort_map_list_fields(ekka_cluster:info()), + emqx_ctl:print("~ts~n", [emqx_logger_jsonfmt:best_effort_json(Info)]); cluster(_) -> emqx_ctl:usage([ {"cluster join ", "Join the cluster"}, {"cluster leave", "Leave the cluster"}, {"cluster force-leave ", "Force the node leave from cluster"}, - {"cluster status", "Cluster status"} + {"cluster status [--json]", "Cluster status"} ]). +%% sort lists for deterministic output +sort_map_list_fields(Map) when is_map(Map) -> + lists:foldl( + fun(Field, Acc) -> + sort_map_list_field(Field, Acc) + end, + Map, + maps:keys(Map) + ); +sort_map_list_fields(NotMap) -> + NotMap. + +sort_map_list_field(Field, Map) -> + case maps:get(Field, Map) of + [_ | _] = L -> Map#{Field := lists:sort(L)}; + _ -> Map + end. + %%-------------------------------------------------------------------- %% @doc Query clients diff --git a/deploy/docker/Dockerfile b/deploy/docker/Dockerfile index 5213579a7..0fb259082 100644 --- a/deploy/docker/Dockerfile +++ b/deploy/docker/Dockerfile @@ -1,4 +1,4 @@ -ARG BUILD_FROM=ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-debian11 +ARG BUILD_FROM=ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-debian11 ARG RUN_FROM=debian:11-slim FROM ${BUILD_FROM} AS builder diff --git a/scripts/buildx.sh b/scripts/buildx.sh index c87ea44d2..96e48d9eb 100755 --- a/scripts/buildx.sh +++ b/scripts/buildx.sh @@ -9,7 +9,7 @@ ## example: ## ./scripts/buildx.sh --profile emqx --pkgtype tgz --arch arm64 \ -## --builder ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-debian10 +## --builder ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-debian10 set -euo pipefail @@ -24,7 +24,7 @@ help() { echo "--arch amd64|arm64: Target arch to build the EMQX package for" echo "--src_dir : EMQX source ode in this dir, default to PWD" echo "--builder : Builder image to pull" - echo " E.g. ghcr.io/emqx/emqx-builder/5.0-16:1.13.4-24.2.1-1-debian10" + echo " E.g. ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-debian10" } while [ "$#" -gt 0 ]; do diff --git a/scripts/relup-test/README.md b/scripts/relup-test/README.md new file mode 100644 index 000000000..a0a13f4fa --- /dev/null +++ b/scripts/relup-test/README.md @@ -0,0 +1,11 @@ +# Hot-upgrade test + +This collection of scripts is used in CI. + +It can also be used to run the test locally, but limited to ubuntu 20.04 so far. + +How to: + +``` +./scripts/relup-test/run-relup-lux.sh v5.0.0 +``` diff --git a/scripts/relup-test/check-results.sh b/scripts/relup-test/check-results.sh new file mode 100755 index 000000000..f6e20bda9 --- /dev/null +++ b/scripts/relup-test/check-results.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +set -euo pipefail + +matched_node1="$(curl --user admin:public -sf http://localhost:18083/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@node1.emqx.io\") | .metrics.matched")" +matched_node2="$(curl --user admin:public -sf http://localhost:18084/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@node2.emqx.io\") | .metrics.matched")" +success_node1="$(curl --user admin:public -sf http://localhost:18083/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@node1.emqx.io\") | .metrics.\"actions.success\"")" +success_node2="$(curl --user admin:public -sf http://localhost:18084/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@node2.emqx.io\") | .metrics.\"actions.success\"")" +webhook="$(curl -sf http://localhost:7077/counter | jq '.data')" + +MATCHED_TOTAL="$(( matched_node1 + matched_node2 ))" +SUCCESS_TOTAL="$(( success_node1 + success_node2 ))" +COLLECTED_TOTAL="$webhook" + +is_number() { + re='^[0-9]+$' + if ! [[ $2 =~ $re ]] ; then + echo "error: $1=$2 is not a number" >&2; exit 1 + fi +} + +is_number MATCHED_TOTAL "$MATCHED_TOTAL" +is_number SUCCESS_TOTAL "$SUCCESS_TOTAL" +is_number COLLECTED_TOTAL "$COLLECTED_TOTAL" + +if [ "$MATCHED_TOTAL" -lt 290 ] || \ + [ "$SUCCESS_TOTAL" -lt 290 ] || \ + [ "$COLLECTED_TOTAL" -lt 290 ]; then + echo "FAILED" + echo "MATCHED_TOTAL=$MATCHED_TOTAL" + echo "SUCCESS_TOTAL=$SUCCESS_TOTAL" + echo "COLLECTED_TOTAL=$COLLECTED_TOTAL" + exit 1 +else + echo "ALL_IS_WELL" + exit 0 +fi diff --git a/scripts/relup-test/relup.lux b/scripts/relup-test/relup.lux new file mode 100644 index 000000000..8db8169f8 --- /dev/null +++ b/scripts/relup-test/relup.lux @@ -0,0 +1,130 @@ +[config var=PROJ_ROOT] +[config var=VSN] +[config var=CUR_PKG] +[config var=OLD_VSN] +[config var=NODE1] +[config var=NODE2] +[config var=BENCH] + +[config shell_cmd=/bin/bash] +[config timeout=600000] + +[shell emqx1] + !docker exec -it $NODE1 emqx_ctl cluster status + ???running_nodes => ['emqx@node1.emqx.io','emqx@node2.emqx.io'] + ?SH-PROMPT + + ## create a webhook data bridge with id "my_webhook" + !curl --user admin:public --silent --show-error 'http://localhost:18083/api/v5/bridges' -X 'POST' -H 'Content-Type: application/json' --data-binary '{"name":"my_webhook","body":"","method":"post","url":"http://webhook.emqx.io:7077/counter","headers":{"content-type":"application/json"},"pool_size":4,"enable_pipelining":100,"connect_timeout":"5s","request_timeout":"5s","max_retries":3,"type":"webhook","ssl":{"enable":false,"verify":"verify_none"}}' | jq '.status' + ?connected + ?SH-PROMPT + + ## create a rule that uses the webhook as action, the rule id = "rule_edsy" + !curl --user admin:public --silent --show-error 'http://localhost:18083/api/v5/rules' -X 'POST' -H 'Content-Type: application/json' --data-binary '{"id":"rule_edsy","sql":"SELECT\n *\nFROM\n \"t/#\"","actions":["webhook:my_webhook"]}' | jq '.id' + ?rule_edsy + ?SH-PROMPT + +[shell emqx2] + ## verify the bridges and rules are sync to the other node + !curl --user admin:public --silent --show-error 'http://localhost:18084/api/v5/bridges/webhook:my_webhook' -X 'GET' -H 'Content-Type: application/json' | jq '.name' + ?my_webhook + ?SH-PROMPT + !curl --user admin:public --silent --show-error 'http://localhost:18084/api/v5/rules/rule_edsy' -X 'GET' -H 'Content-Type: application/json' | jq '.id' + ?rule_edsy + ?SH-PROMPT + +[shell bench] + !docker exec -it $BENCH emqtt_bench pub --host 'node1.emqx.io' --port 1883 -c 10 -I 1000 -t t/%i -s 64 -L 300 + +[shell emqx1] + !docker cp $CUR_PKG $NODE1:/emqx/releases/ + + ## 1. upgrade to the new version + !docker exec -it $NODE1 emqx install $VSN + ?Made release permanent: "$VSN" + ?SH-PROMPT + + !docker exec -it $NODE1 emqx versions | grep permanent + ?(.*)$VSN + ?SH-PROMPT + + ## 2. downgrade to the old version + !docker exec -it $NODE1 emqx install $OLD_VSN + ?Made release permanent:.* + ?SH-PROMPT + + !docker exec -it $NODE1 emqx versions | grep permanent | grep -qs "$OLD_VSN" + ?SH-PROMPT: + !echo ==$$?== + ?^==0== + ?SH-PROMPT: + + ## 3. again, upgrade to the new version + !docker exec -it $NODE1 emqx install $VSN + ?Made release permanent: "$VSN" + ?SH-PROMPT + + !docker exec -it $NODE1 emqx versions | grep permanent + ?(.*)$VSN + ?SH-PROMPT + + !docker exec -it $NODE1 emqx_ctl cluster status + ???running_nodes => ['emqx@node1.emqx.io','emqx@node2.emqx.io'] + ?SH-PROMPT + +[shell emqx2] + !docker cp $CUR_PKG $NODE2:/emqx/releases/ + + ## 1. upgrade to the new version + !docker exec -it $NODE2 emqx install $VSN + ?Made release permanent: "$VSN" + ?SH-PROMPT + + !docker exec -it $NODE2 emqx versions | grep permanent + ?(.*)$VSN + ?SH-PROMPT + + ## 2. downgrade to the old version + !docker exec -it $NODE2 emqx install $OLD_VSN + ?Made release permanent:.* + ?SH-PROMPT + + !docker exec -it $NODE2 emqx versions | grep permanent | grep -qs "$OLD_VSN" + ?SH-PROMPT: + !echo ==$$?== + ?^==0== + ?SH-PROMPT: + + ## 3. again, upgrade to the new version + !docker exec -it $NODE2 emqx install $VSN + ?Made release permanent: "$VSN" + ?SH-PROMPT + + !docker exec -it $NODE2 emqx versions | grep permanent + ?(.*)$VSN + ?SH-PROMPT + + !docker exec -it $NODE1 emqx_ctl cluster status + ???running_nodes => ['emqx@node1.emqx.io','emqx@node2.emqx.io'] + ?SH-PROMPT + +## We don't guarantee not to lose a single message! +## So even if we received 290~300 messages, we consider it as success +[shell bench] + ???publish complete + ??SH-PROMPT: + !sleep 5 + ?SH-PROMPT + + !$PROJ_ROOT/scripts/relup-test/check-results.sh + !echo ==$$?== + ???ALL_IS_WELL + ?SH-PROMPT: + + !echo ==$$?== + ?^==0== + ?SH-PROMPT: + +[cleanup] + !echo ==$$?== + ?==0== diff --git a/scripts/relup-test/run-pkg.sh b/scripts/relup-test/run-pkg.sh new file mode 100755 index 000000000..0a7fda049 --- /dev/null +++ b/scripts/relup-test/run-pkg.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +## This script is intended to run in docker +## extracts a .tar.gz package and runs EMQX in console mode + +set -euo pipefail + +PKG="$1" + +mkdir -p emqx +tar -C emqx -zxf "$PKG" + +ln -s "$(pwd)/emqx/bin/emqx" /usr/bin/emqx +ln -s "$(pwd)/emqx/bin/emqx_ctl" /usr/bin/emqx_ctl + +emqx console diff --git a/scripts/relup-test/run-relup-lux.sh b/scripts/relup-test/run-relup-lux.sh new file mode 100755 index 000000000..54be86444 --- /dev/null +++ b/scripts/relup-test/run-relup-lux.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +## This script needs the 'lux' command in PATH +## it runs the scripts/relup-test/relup.lux script + +set -euo pipefail + +old_vsn="${1:-}" +if [ -z "$old_vsn" ]; then + echo "arg1 should be the upgrade base version" + exit 1 +fi + +# ensure dir +cd -P -- "$(dirname -- "$0")/../.." + +set -x + +if [ ! -d '.git' ] && [ -z "${CUR_VSN:-}" ]; then + echo "Unable to resolve current version, because it's not a git repo, and CUR_VSN is not set" + exit 1 +fi + +case "$old_vsn" in + e*) + cur_vsn="${CUR_VSN:-$(./pkg-vsn.sh emqx-enterprise)}" + profile='emqx-enterprise' + ;; + v*) + cur_vsn="${CUR_VSN:-$(./pkg-vsn.sh emqx)}" + profile='emqx' + ;; + *) + echo "unknown old version $old_vsn" + exit 1 + ;; +esac + +# From now on, no need for the v|e prefix +OLD_VSN="${old_vsn#[e|v]}" + +OLD_PKG="$(pwd)/_upgrade_base/${profile}-${OLD_VSN}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz" +CUR_PKG="$(pwd)/_packages/${profile}/${profile}-${cur_vsn}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz" + +if [ ! -f "$OLD_PKG" ]; then + echo "$OLD_PKG not found" + exit 1 +fi + +if [ ! -f "$CUR_PKG" ]; then + echo "$CUR_PKG not found" + exit 1 +fi + +# start two nodes and their friends (webhook server and a bench) in docker +./scripts/relup-test/start-relup-test-cluster.sh 'ubuntu:20.04' "$OLD_PKG" + +# run relup tests +lux \ + --progress verbose \ + --case_timeout infinity \ + --var PROJ_ROOT="$(pwd)" \ + --var VSN="$cur_vsn" \ + --var CUR_PKG="$CUR_PKG" \ + --var OLD_VSN="$OLD_VSN" \ + --var NODE1="node1.emqx.io" \ + --var NODE2="node2.emqx.io" \ + --var BENCH="bench.emqx.io" \ + ./scripts/relup-test/relup.lux diff --git a/scripts/relup-test/start-relup-test-cluster.sh b/scripts/relup-test/start-relup-test-cluster.sh new file mode 100755 index 000000000..10bb25a60 --- /dev/null +++ b/scripts/relup-test/start-relup-test-cluster.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash + +set -euo pipefail + +## EMQX can only start with longname (https://erlang.org/doc/reference_manual/distributed.html) +## The host name part of EMQX's node name has to be static, this means we should either +## pre-assign static IP for containers, or ensure containers can communiate with each other by name +## this is why a docker network is created, and the containers's names have a dot. + +# ensure dir +cd -P -- "$(dirname -- "$0")/../.." + +set -x + +IMAGE="${1}" +PKG="$(readlink -f "${2}")" + +NET='emqx.io' +NODE1="node1.$NET" +NODE2="node2.$NET" +WEBHOOK="webhook.$NET" +BENCH="bench.$NET" +COOKIE='this-is-a-secret' +## Erlang image is needed to run webhook server and emqtt-bench +ERLANG_IMAGE="ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04" +# builder has emqtt-bench installed +BENCH_IMAGE="$ERLANG_IMAGE" + +## clean up +docker rm -f "$BENCH" >/dev/null 2>&1 || true +docker rm -f "$WEBHOOK" >/dev/null 2>&1 || true +docker rm -f "$NODE1" >/dev/null 2>&1 || true +docker rm -f "$NODE2" >/dev/null 2>&1 || true +docker network rm "$NET" >/dev/null 2>&1 || true + +docker network create "$NET" + +docker run -d -t --name "$NODE1" \ + --net "$NET" \ + -e EMQX_LOG__CONSOLE_HANDLER__LEVEL=warning \ + -e EMQX_NODE_NAME="emqx@$NODE1" \ + -e EMQX_NODE_COOKIE="$COOKIE" \ + -p 18083:18083 \ + -v "$PKG:/emqx.tar.gz" \ + -v "$(pwd)/scripts/relup-test/run-pkg.sh:/run-pkg.sh" \ + "$IMAGE" /run-pkg.sh emqx.tar.gz + +docker run -d -t --name "$NODE2" \ + --net "$NET" \ + -e EMQX_LOG__CONSOLE_HANDLER__LEVEL=warning \ + -e EMQX_NODE_NAME="emqx@$NODE2" \ + -e EMQX_NODE_COOKIE="$COOKIE" \ + -p 18084:18083 \ + -v "$PKG:/emqx.tar.gz" \ + -v "$(pwd)/scripts/relup-test/run-pkg.sh:/run-pkg.sh" \ + "$IMAGE" /run-pkg.sh emqx.tar.gz + +docker run -d -t --name "$WEBHOOK" \ + --net "$NET" \ + -v "$(pwd)/.ci/fvt_tests/http_server:/http_server" \ + -w /http_server \ + -p 7077:7077 \ + "$ERLANG_IMAGE" bash -c 'rebar3 compile; erl -pa _build/default/lib/*/ebin -eval "http_server:start()"' + +docker run -d -t --name "$BENCH" \ + --net "$NET" \ + "$BENCH_IMAGE" \ + bash -c 'sleep 10000; exit 1' + +wait_limit=60 +wait_for_emqx() { + wait_sec=0 + container="$1" + wait_limit="$2" + set +x + while ! docker exec "$container" emqx_ctl status >/dev/null 2>&1; do + wait_sec=$(( wait_sec + 1 )) + if [ $wait_sec -gt "$wait_limit" ]; then + echo "timeout wait for EMQX" + exit 1 + fi + echo -n '.' + sleep 1 + done +} + +wait_for_webhook() { + wait_sec=0 + wait_limit="$1" + set +x + while ! curl -f -s localhost:7077; do + wait_sec=$(( wait_sec + 1 )) + if [ $wait_sec -gt "$wait_limit" ]; then + echo "timeout wait for EMQX" + exit 1 + fi + echo -n '.' + sleep 1 + done +} + +# wait for webhook http server to start, +# it may take a while because it needs to compile from source code +wait_for_webhook 120 +# after webhook start, it should not cost more than 30 seconds +wait_for_emqx $NODE1 30 +# afer node1 is up, it should not cost more than 10 seconds +wait_for_emqx $NODE2 10 +echo + +docker exec $NODE1 emqx_ctl cluster join "emqx@$NODE2" diff --git a/scripts/start-two-nodes-in-docker.sh b/scripts/start-two-nodes-in-docker.sh index e9d5b27de..ea9029752 100755 --- a/scripts/start-two-nodes-in-docker.sh +++ b/scripts/start-two-nodes-in-docker.sh @@ -42,7 +42,7 @@ docker run -d -t --restart=always --name "$NODE2" \ -p 18084:18083 \ "$IMAGE" -wait (){ +wait () { container="$1" while ! docker exec "$container" emqx_ctl status >/dev/null 2>&1; do echo -n '.'