From 8b7fc490aeb674e4720c8d2976196ed35839d93d Mon Sep 17 00:00:00 2001 From: "Zaiming (Stone) Shi" Date: Tue, 28 Jun 2022 22:06:00 +0200 Subject: [PATCH] refactor: run relup test nodes in individual docker containers When running EMQX in lux shell inside a docker container the node won't boot due to load_failed for redbug modules reason is still unknown --- .ci/fvt_tests/http_server/src/http_server.erl | 12 +- .ci/fvt_tests/relup.lux | 215 ------------------ .github/workflows/run_relup_tests.yaml | 79 +++---- .gitignore | 1 + scripts/relup/check-results.sh | 41 ++++ scripts/relup/relup.lux | 130 +++++++++++ scripts/relup/run-pkg.sh | 16 ++ scripts/relup/run-relup-lux.sh | 64 ++++++ scripts/relup/start-relup-test-cluster.sh | 111 +++++++++ scripts/run-relup-lux.sh | 42 ---- 10 files changed, 400 insertions(+), 311 deletions(-) delete mode 100644 .ci/fvt_tests/relup.lux create mode 100755 scripts/relup/check-results.sh create mode 100644 scripts/relup/relup.lux create mode 100755 scripts/relup/run-pkg.sh create mode 100755 scripts/relup/run-relup-lux.sh create mode 100755 scripts/relup/start-relup-test-cluster.sh delete mode 100755 scripts/run-relup-lux.sh diff --git a/.ci/fvt_tests/http_server/src/http_server.erl b/.ci/fvt_tests/http_server/src/http_server.erl index 4b1981de4..4aaa25b95 100644 --- a/.ci/fvt_tests/http_server/src/http_server.erl +++ b/.ci/fvt_tests/http_server/src/http_server.erl @@ -32,7 +32,7 @@ start() -> application:ensure_all_started(minirest), - ets:new(relup_test_message, [named_table, public]), + _ = spawn(fun ets_owner/0), Handlers = [{"/", minirest:handler(#{modules => [?MODULE]})}], Dispatch = [{"/[...]", minirest, Handlers}], minirest:start_http(?MODULE, #{socket_opts => [inet, {port, 7077}]}, Dispatch). @@ -42,7 +42,8 @@ stop() -> minirest:stop_http(?MODULE). get_counter(_Binding, _Params) -> - return({ok, ets:info(relup_test_message, size)}). + V = ets:info(relup_test_message, size), + return({ok, V}). add_counter(_Binding, Params) -> case lists:keymember(<<"payload">>, 1, Params) of @@ -50,6 +51,13 @@ add_counter(_Binding, Params) -> {value, {<<"id">>, ID}, Params1} = lists:keytake(<<"id">>, 1, Params), ets:insert(relup_test_message, {ID, Params1}); _ -> + io:format("discarded: ~p\n", [Params]), ok end, return(). + +ets_owner() -> + ets:new(relup_test_message, [named_table, public]), + receive + stop -> ok + end. diff --git a/.ci/fvt_tests/relup.lux b/.ci/fvt_tests/relup.lux deleted file mode 100644 index de48a723a..000000000 --- a/.ci/fvt_tests/relup.lux +++ /dev/null @@ -1,215 +0,0 @@ -[config var=PROJ_ROOT] -[config var=PROFILE] -[config var=VSN] -[config var=CUR_PKG] -[config var=OLD_VSN] -[config var=OLD_PKG] - -[config shell_cmd=/bin/bash] -[config timeout=600000] - -[shell http_server] - !cd http_server - !rebar3 shell - ???Eshell - ???> - !http_server:start(). - ?Start http_server listener on 7077 successfully. - ?ok - ?> - -[shell emqx1] - !cd $PROJ_ROOT - !mkdir -p emqx1 - !tar -C emqx1 -zxf "$OLD_PKG" - ?SH-PROMPT - - !cd emqx1 - !export EMQX_NODE_NAME='emqx1@127.0.0.1' - !./bin/emqx start - ?EMQX .* is started successfully! - ?SH-PROMPT - -[shell emqx2] - !cd $PROJ_ROOT - !mkdir -p emqx2 - !tar -C emqx2 -zxf "$OLD_PKG" - ?SH-PROMPT - - !cd emqx2 - !export EMQX_NODE__NAME='emqx2@127.0.0.1' - !export EMQX_STATSD__SERVER='127.0.0.1:8124' - !export EMQX_LISTENERS__TCP__DEFAULT__BIND='0.0.0.0:1882' - !export EMQX_LISTENERS__SSL__DEFAULT__BIND='0.0.0.0:8882' - !export EMQX_LISTENERS__WS__DEFAULT__BIND='0.0.0.0:8082' - !export EMQX_LISTENERS__WSS__DEFAULT__BIND='0.0.0.0:8085' - !export EMQX_DASHBOARD__LISTENERS__HTTP__BIND='0.0.0.0:18082' - !./bin/emqx start - ?EMQX .* is started successfully! - ?SH-PROMPT - - !./bin/emqx_ctl cluster join emqx1@127.0.0.1 - ???Join the cluster successfully. - ?SH-PROMPT - - !./bin/emqx_ctl cluster status --json | jq -c .running_nodes - ???["emqx1@127.0.0.1","emqx2@127.0.0.1"] - ?SH-PROMPT - - ## create a webhook data bridge with id "my_webhook" - !curl --user admin:public --silent --show-error 'http://localhost:18082/api/v5/bridges' -X 'POST' -H 'Content-Type: application/json' --data-binary '{"name":"my_webhook","method":"post","url":"http://127.0.0.1:7077/counter","headers":{"content-type":"application/json"},"pool_size":4,"enable_pipelining":100,"connect_timeout":"5s","request_timeout":"5s","max_retries":3,"type":"webhook","ssl":{"enable":false,"verify":"verify_none"}}' | jq '.status' - ?connected - ?SH-PROMPT - - ## create a rule that uses the webhook as action, the rule id = "rule_edsy" - !curl --user admin:public --silent --show-error 'http://localhost:18082/api/v5/rules' -X 'POST' -H 'Content-Type: application/json' --data-binary '{"id":"rule_edsy","sql":"SELECT\n *\nFROM\n \"t/#\"","actions":["webhook:my_webhook"]}' | jq '.id' - ?rule_edsy - ?SH-PROMPT - -[shell emqx1] - ## verify the bridges and rules are sync to the other node - !curl --user admin:public --silent --show-error 'http://localhost:18083/api/v5/bridges/webhook:my_webhook' -X 'GET' -H 'Content-Type: application/json' | jq '.name' - ?my_webhook - ?SH-PROMPT - !curl --user admin:public --silent --show-error 'http://localhost:18083/api/v5/rules/rule_edsy' -X 'GET' -H 'Content-Type: application/json' | jq '.id' - ?rule_edsy - ?SH-PROMPT - -[shell bench] - - !emqtt_bench pub -c 10 -I 1000 -t t/%i -s 64 -L 300 - ???sent - -[shell emqx1] - !echo "" > log/emqx.log.1 - ?SH-PROMPT - - !cp -f ../$CUR_PKG releases/ - - ## 1. upgrade to the new version - !./bin/emqx install $VSN - ?Made release permanent: "$VSN" - ?SH-PROMPT - - !./bin/emqx versions | grep permanent - ?(.*)$VSN - ?SH-PROMPT - - ## 2. downgrade to the old version - !./bin/emqx install $OLD_VSN - ?Made release permanent:.* - ?SH-PROMPT - - !./bin/emqx versions | grep permanent | grep -qs "$OLD_VSN" - ?SH-PROMPT: - !echo ==$$?== - ?^==0== - ?SH-PROMPT: - - ## 3. again, upgrade to the new version - !./bin/emqx install $VSN - ?Made release permanent: "$VSN" - ?SH-PROMPT - - !./bin/emqx versions | grep permanent - ?(.*)$VSN - ?SH-PROMPT - - !./bin/emqx_ctl cluster status --json | jq -c .running_nodes - ???["emqx1@127.0.0.1","emqx2@127.0.0.1"] - ?SH-PROMPT - -[shell emqx2] - !echo "" > log/emqx.log.1 - ?SH-PROMPT - - !cp -f ../$CUR_PKG releases/ - - ## 1. upgrade to the new version - !./bin/emqx install $VSN - ?Made release permanent: "$VSN" - ?SH-PROMPT - - !./bin/emqx versions |grep permanent - ?(.*)$VSN - ?SH-PROMPT - - ## 2. downgrade to the old version - !./bin/emqx install $OLD_VSN - ?Made release permanent:.* - ?SH-PROMPT - - !./bin/emqx versions | grep permanent | grep -qs "$OLD_VSN" - ?SH-PROMPT: - !echo ==$$?== - ?^==0== - ?SH-PROMPT: - - ## 3. again, upgrade to the new version - !./bin/emqx install $VSN - ?Made release permanent: "$VSN" - ?SH-PROMPT - - !./bin/emqx versions |grep permanent - ?(.*)$VSN - ?SH-PROMPT - - !./bin/emqx_ctl cluster status --json | jq -c .running_nodes - ???["emqx1@127.0.0.1","emqx2@127.0.0.1"] - ?SH-PROMPT - -## We don't guarantee not to lose a single message! -## So even if we received 290~300 messages, we consider it as success -[shell bench] - ???publish complete - ??SH-PROMPT: - !sleep 5 - ?SH-PROMPT - - !curl --user admin:public --silent --show-error http://localhost:18083/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx1@127.0.0.1\") | .metrics.matched" - ?300 - ?SH-PROMPT - - !curl --user admin:public --silent --show-error http://localhost:18083/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx1@127.0.0.1\") | .metrics.\"actions.success\"" - ?\{"data":(29[0-9])|(300),"code":0\} - ?SH-PROMPT - - ## The /counter API is provided by .ci/fvt_test/http_server - !curl http://127.0.0.1:7077/counter - ?\{"data":(29[0-9])|(300),"code":0\} - ?SH-PROMPT - -[shell emqx2] - !cat log/emqx.log.1 | tail -n 100 - -error - ??SH-PROMPT: - - !./bin/emqx stop - ?ok - ?SH-PROMPT: - - !rm -rf emqx2/ - ?SH-PROMPT: - -[shell emqx1] - !cat log/emqx.log.1 | tail -n 100 - -error - ??SH-PROMPT: - - !./bin/emqx stop - ?ok - ?SH-PROMPT: - - !rm -rf emqx1/ - ?SH-PROMPT: - -[shell http_server] - !http_server:stop(). - ?ok - ?> - !halt(3). - ?SH-PROMPT: - -[cleanup] - !echo ==$$?== - ?==0== diff --git a/.github/workflows/run_relup_tests.yaml b/.github/workflows/run_relup_tests.yaml index d5c140763..3d3223eb8 100644 --- a/.github/workflows/run_relup_tests.yaml +++ b/.github/workflows/run_relup_tests.yaml @@ -54,14 +54,12 @@ jobs: with: name: emqx_built path: | - emqx/_packages/*/*.tar.gz - emqx/.ci/fvt_tests + emqx/* relup_test_run: needs: - relup_test_plan runs-on: ubuntu-20.04 - container: "ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04" strategy: fail-fast: false matrix: @@ -74,64 +72,41 @@ jobs: run: shell: bash steps: + # setup Erlang to run lux + - uses: erlef/setup-beam@v1 + with: + otp-version: "24.2" + - uses: actions/checkout@v2 + with: + repository: hawk/lux + ref: lux-2.8.1 + path: lux + - name: Install lux + run: | + set -e -u -x + cd lux + autoconf + ./configure + make + echo "$(pwd)/bin" >> $GITHUB_PATH - uses: actions/download-artifact@v2 name: Download built emqx and test scenario with: name: emqx_built - path: emqx_built - - name: Prepare packages + path: . + - name: run relup test run: | set -e -x -u - mkdir -p packages - cp emqx_built/_packages/*/*.tar.gz packages - cd packages - case "$OLD_VSN" in - e*) - profile='emqx-enterprise' - s3dir='emqx-ee' - ;; - v*) - profile='emqx' - s3dir='emqx-ce' - ;; - *) - echo "unknown old version $OLD_VSN" - exit 1 - ;; - esac - wget --no-verbose https://s3-us-west-2.amazonaws.com/packages.emqx/${s3dir}/$OLD_VSN/${profile}-${OLD_VSN#[e|v]}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz - - name: Run relup test scenario - timeout-minutes: 5 - run: | - set -x - case "$OLD_VSN" in - e*) - cur_vsn=$CUR_EE_VSN - profile='emqx-enterprise' - ;; - v*) - cur_vsn=$CUR_CE_VSN - profile='emqx' - ;; - esac - old_pkg="${profile}-${OLD_VSN#[e|v]}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz" - cur_pkg="${profile}-${cur_vsn}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz" - lux \ - --progress verbose \ - --case_timeout infinity \ - --var PROJ_ROOT="$(pwd)" \ - --var PROFILE="$profile" \ - --var VSN="$cur_vsn" \ - --var OLD_VSN="$OLD_VSN" \ - --var CUR_PKG="$cur_pkg" \ - --var OLD_PKG="$old_pkg" \ - emqx_built/.ci/fvt_tests/relup.lux + cd emqx + if ! ./scripts/relup/run-relup-lux.sh $OLD_VSN; then + docker logs node1.emqx.io | tee lux_logs/emqx1.log + docker logs node2.emqx.io | tee lux_logs/emqx2.log + exit 1 + fi - uses: actions/upload-artifact@v2 name: Save debug data if: failure() with: name: debug_data path: | - packages/emqx1/* - packages/emqx2/* - lux_logs + wd/lux_logs diff --git a/.gitignore b/.gitignore index f4472b9ab..f0e55ad28 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,4 @@ mix.lock apps/emqx/test/emqx_static_checks_data/master.bpapi # rendered configurations *.conf.rendered +lux_logs/ diff --git a/scripts/relup/check-results.sh b/scripts/relup/check-results.sh new file mode 100755 index 000000000..446ebeca5 --- /dev/null +++ b/scripts/relup/check-results.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -euo pipefail + +matched_node1="$(curl --user admin:public -sf http://localhost:18083/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@node1.emqx.io\") | .metrics.matched")" +# TODO +matched_node2=0 +#matched_node2="$(curl --user admin:public -sf http://localhost:18084/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@node2.emqx.io\") | .metrics.matched")" +success_node1="$(curl --user admin:public -sf http://localhost:18083/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@node1.emqx.io\") | .metrics.\"actions.success\"")" +# TODO +success_node2=0 +#success_node2="$(curl --user admin:public -sf http://localhost:18084/api/v5/rules | jq --raw-output ".[0].node_metrics[] | select(.node==\"emqx@node2.emqx.io\") | .metrics.\"actions.success\"")" +webhook="$(curl -sf http://localhost:7077/counter | jq '.data')" + +MATCHED_TOTAL="$(( matched_node1 + matched_node2 ))" +SUCCESS_TOTAL="$(( success_node1 + success_node2 ))" +COLLECTED_TOTAL="$webhook" + +is_number() { + re='^[0-9]+$' + if ! [[ $2 =~ $re ]] ; then + echo "error: $1=$2 is not a number" >&2; exit 1 + fi +} + +is_number MATCHED_TOTAL "$MATCHED_TOTAL" +is_number SUCCESS_TOTAL "$SUCCESS_TOTAL" +is_number COLLECTED_TOTAL "$COLLECTED_TOTAL" + +if [ "$MATCHED_TOTAL" -lt 290 ] || \ + [ "$SUCCESS_TOTAL" -lt 290 ] || \ + [ "$COLLECTED_TOTAL" -lt 290 ]; then + echo "FAILED" + echo "MATCHED_TOTAL=$MATCHED_TOTAL" + echo "SUCCESS_TOTAL=$SUCCESS_TOTAL" + echo "COLLECTED_TOTAL=$COLLECTED_TOTAL" + exit 1 +else + echo "ALL_IS_WELL" + exit 0 +fi diff --git a/scripts/relup/relup.lux b/scripts/relup/relup.lux new file mode 100644 index 000000000..88a504cb8 --- /dev/null +++ b/scripts/relup/relup.lux @@ -0,0 +1,130 @@ +[config var=PROJ_ROOT] +[config var=VSN] +[config var=CUR_PKG] +[config var=OLD_VSN] +[config var=NODE1] +[config var=NODE2] +[config var=BENCH] + +[config shell_cmd=/bin/bash] +[config timeout=600000] + +[shell emqx1] + !docker exec -it $NODE1 emqx_ctl cluster status + ???running_nodes => ['emqx@node1.emqx.io','emqx@node2.emqx.io'] + ?SH-PROMPT + + ## create a webhook data bridge with id "my_webhook" + !curl --user admin:public --silent --show-error 'http://localhost:18083/api/v5/bridges' -X 'POST' -H 'Content-Type: application/json' --data-binary '{"name":"my_webhook","body":"","method":"post","url":"http://webhook.emqx.io:7077/counter","headers":{"content-type":"application/json"},"pool_size":4,"enable_pipelining":100,"connect_timeout":"5s","request_timeout":"5s","max_retries":3,"type":"webhook","ssl":{"enable":false,"verify":"verify_none"}}' | jq '.status' + ?connected + ?SH-PROMPT + + ## create a rule that uses the webhook as action, the rule id = "rule_edsy" + !curl --user admin:public --silent --show-error 'http://localhost:18083/api/v5/rules' -X 'POST' -H 'Content-Type: application/json' --data-binary '{"id":"rule_edsy","sql":"SELECT\n *\nFROM\n \"t/#\"","actions":["webhook:my_webhook"]}' | jq '.id' + ?rule_edsy + ?SH-PROMPT + +[shell emqx2] + ## verify the bridges and rules are sync to the other node + !curl --user admin:public --silent --show-error 'http://localhost:18084/api/v5/bridges/webhook:my_webhook' -X 'GET' -H 'Content-Type: application/json' | jq '.name' + ?my_webhook + ?SH-PROMPT + !curl --user admin:public --silent --show-error 'http://localhost:18084/api/v5/rules/rule_edsy' -X 'GET' -H 'Content-Type: application/json' | jq '.id' + ?rule_edsy + ?SH-PROMPT + +[shell bench] + !docker exec -it $BENCH emqtt_bench pub --host 'node1.emqx.io' --port 1883 -c 10 -I 1000 -t t/%i -s 64 -L 300 + +[shell emqx1] + !docker cp $CUR_PKG $NODE1:/emqx/releases/ + + ## 1. upgrade to the new version + !docker exec -it $NODE1 emqx install $VSN + ?Made release permanent: "$VSN" + ?SH-PROMPT + + !docker exec -it $NODE1 emqx versions | grep permanent + ?(.*)$VSN + ?SH-PROMPT + + ## 2. downgrade to the old version + !docker exec -it $NODE1 emqx install $OLD_VSN + ?Made release permanent:.* + ?SH-PROMPT + + !docker exec -it $NODE1 emqx versions | grep permanent | grep -qs "$OLD_VSN" + ?SH-PROMPT: + !echo ==$$?== + ?^==0== + ?SH-PROMPT: + + ## 3. again, upgrade to the new version + !docker exec -it $NODE1 emqx install $VSN + ?Made release permanent: "$VSN" + ?SH-PROMPT + + !docker exec -it $NODE1 emqx versions | grep permanent + ?(.*)$VSN + ?SH-PROMPT + + !docker exec -it $NODE1 emqx_ctl cluster status + ???running_nodes => ['emqx@node1.emqx.io','emqx@node2.emqx.io'] + ?SH-PROMPT + +[shell emqx2] + !docker cp $CUR_PKG $NODE2:/emqx/releases/ + + ## 1. upgrade to the new version + !docker exec -it $NODE2 emqx install $VSN + ?Made release permanent: "$VSN" + ?SH-PROMPT + + !docker exec -it $NODE2 emqx versions | grep permanent + ?(.*)$VSN + ?SH-PROMPT + + ## 2. downgrade to the old version + !docker exec -it $NODE2 emqx install $OLD_VSN + ?Made release permanent:.* + ?SH-PROMPT + + !docker exec -it $NODE2 emqx versions | grep permanent | grep -qs "$OLD_VSN" + ?SH-PROMPT: + !echo ==$$?== + ?^==0== + ?SH-PROMPT: + + ## 3. again, upgrade to the new version + !docker exec -it $NODE2 emqx install $VSN + ?Made release permanent: "$VSN" + ?SH-PROMPT + + !docker exec -it $NODE2 emqx versions | grep permanent + ?(.*)$VSN + ?SH-PROMPT + + !docker exec -it $NODE1 emqx_ctl cluster status + ???running_nodes => ['emqx@node1.emqx.io','emqx@node2.emqx.io'] + ?SH-PROMPT + +## We don't guarantee not to lose a single message! +## So even if we received 290~300 messages, we consider it as success +[shell bench] + ???publish complete + ??SH-PROMPT: + !sleep 5 + ?SH-PROMPT + + !$PROJ_ROOT/scripts/relup/check-results.sh + !echo ==$$?== + ???ALL_IS_WELL + ?SH-PROMPT: + + !echo ==$$?== + ?^==0== + ?SH-PROMPT: + +[cleanup] + !echo ==$$?== + ?==0== diff --git a/scripts/relup/run-pkg.sh b/scripts/relup/run-pkg.sh new file mode 100755 index 000000000..0a7fda049 --- /dev/null +++ b/scripts/relup/run-pkg.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +## This script is intended to run in docker +## extracts a .tar.gz package and runs EMQX in console mode + +set -euo pipefail + +PKG="$1" + +mkdir -p emqx +tar -C emqx -zxf "$PKG" + +ln -s "$(pwd)/emqx/bin/emqx" /usr/bin/emqx +ln -s "$(pwd)/emqx/bin/emqx_ctl" /usr/bin/emqx_ctl + +emqx console diff --git a/scripts/relup/run-relup-lux.sh b/scripts/relup/run-relup-lux.sh new file mode 100755 index 000000000..af0ecf4c7 --- /dev/null +++ b/scripts/relup/run-relup-lux.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash + +## This script needs the 'lux' command in PATH +## it runs the scripts/relup/relup.lux script + +set -euo pipefail + +old_vsn="${1:-}" +if [ -z "$old_vsn" ]; then + echo "arg1 should be the upgrade base version" + exit 1 +fi + +# ensure dir +cd -P -- "$(dirname -- "$0")/../.." + +set -x + +case "$old_vsn" in + e*) + cur_vsn="$(./pkg-vsn.sh emqx-enterprise)" + profile='emqx-enterprise' + ;; + v*) + cur_vsn="$(./pkg-vsn.sh emqx)" + profile='emqx' + ;; + *) + echo "unknown old version $old_vsn" + exit 1 + ;; +esac + +# From now on, no need for the v|e prefix +OLD_VSN="${old_vsn#[e|v]}" + +OLD_PKG="$(pwd)/_upgrade_base/${profile}-${OLD_VSN}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz" +CUR_PKG="$(pwd)/_packages/${profile}/${profile}-${cur_vsn}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz" + +if [ ! -f "$OLD_PKG" ]; then + echo "$OLD_PKG not found" + exit 1 +fi + +if [ ! -f "$CUR_PKG" ]; then + echo "$CUR_PKG not found" + exit 1 +fi + +# start two nodes and their friends (webhook server and a bench) in docker +./scripts/relup/start-relup-test-cluster.sh 'ubuntu:20.04' "$OLD_PKG" + +# run relup tests +lux \ + --progress verbose \ + --case_timeout infinity \ + --var PROJ_ROOT="$(pwd)" \ + --var VSN="$cur_vsn" \ + --var CUR_PKG="$CUR_PKG" \ + --var OLD_VSN="$OLD_VSN" \ + --var NODE1="node1.emqx.io" \ + --var NODE2="node2.emqx.io" \ + --var BENCH="bench.emqx.io" \ + ./scripts/relup/relup.lux diff --git a/scripts/relup/start-relup-test-cluster.sh b/scripts/relup/start-relup-test-cluster.sh new file mode 100755 index 000000000..deaec4ad1 --- /dev/null +++ b/scripts/relup/start-relup-test-cluster.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash + +set -euo pipefail + +## EMQX can only start with longname (https://erlang.org/doc/reference_manual/distributed.html) +## The host name part of EMQX's node name has to be static, this means we should either +## pre-assign static IP for containers, or ensure containers can communiate with each other by name +## this is why a docker network is created, and the containers's names have a dot. + +# ensure dir +cd -P -- "$(dirname -- "$0")/../.." + +set -x + +IMAGE="${1}" +PKG="$(readlink -f "${2}")" + +NET='emqx.io' +NODE1="node1.$NET" +NODE2="node2.$NET" +WEBHOOK="webhook.$NET" +BENCH="bench.$NET" +COOKIE='this-is-a-secret' +## Erlang image is needed to run webhook server and emqtt-bench +ERLANG_IMAGE="ghcr.io/emqx/emqx-builder/5.0-17:1.13.4-24.2.1-1-ubuntu20.04" +# builder has emqtt-bench installed +BENCH_IMAGE="$ERLANG_IMAGE" + +## clean up +docker rm -f "$BENCH" >/dev/null 2>&1 || true +docker rm -f "$WEBHOOK" >/dev/null 2>&1 || true +docker rm -f "$NODE1" >/dev/null 2>&1 || true +docker rm -f "$NODE2" >/dev/null 2>&1 || true +docker network rm "$NET" >/dev/null 2>&1 || true + +docker network create "$NET" + +docker run -d -t --name "$NODE1" \ + --net "$NET" \ + -e EMQX_LOG__CONSOLE_HANDLER__LEVEL=warning \ + -e EMQX_NODE_NAME="emqx@$NODE1" \ + -e EMQX_NODE_COOKIE="$COOKIE" \ + -p 18083:18083 \ + -v "$PKG:/emqx.tar.gz" \ + -v "$(pwd)/scripts/relup/run-pkg.sh:/run-pkg.sh" \ + "$IMAGE" /run-pkg.sh emqx.tar.gz + +docker run -d -t --name "$NODE2" \ + --net "$NET" \ + -e EMQX_LOG__CONSOLE_HANDLER__LEVEL=warning \ + -e EMQX_NODE_NAME="emqx@$NODE2" \ + -e EMQX_NODE_COOKIE="$COOKIE" \ + -p 18084:18083 \ + -v "$PKG:/emqx.tar.gz" \ + -v "$(pwd)/scripts/relup/run-pkg.sh:/run-pkg.sh" \ + "$IMAGE" /run-pkg.sh emqx.tar.gz + +docker run -d -t --name "$WEBHOOK" \ + --net "$NET" \ + -v "$(pwd)/.ci/fvt_tests/http_server:/http_server" \ + -w /http_server \ + -p 7077:7077 \ + "$ERLANG_IMAGE" bash -c 'rebar3 compile; erl -pa _build/default/lib/*/ebin -eval "http_server:start()"' + +docker run -d -t --name "$BENCH" \ + --net "$NET" \ + "$BENCH_IMAGE" \ + bash -c 'sleep 10000; exit 1' + +wait_limit=60 +wait_for_emqx() { + wait_sec=0 + container="$1" + wait_limit="$2" + set +x + while ! docker exec "$container" emqx_ctl status >/dev/null 2>&1; do + wait_sec=$(( wait_sec + 1 )) + if [ $wait_sec -gt "$wait_limit" ]; then + echo "timeout wait for EMQX" + exit 1 + fi + echo -n '.' + sleep 1 + done +} + +wait_for_webhook() { + wait_sec=0 + wait_limit="$1" + set +x + while ! curl -f -s localhost:7077; do + wait_sec=$(( wait_sec + 1 )) + if [ $wait_sec -gt "$wait_limit" ]; then + echo "timeout wait for EMQX" + exit 1 + fi + echo -n '.' + sleep 1 + done +} + +# wait for webhook http server to start, +# it may take a while because it needs to compile from source code +wait_for_webhook 120 +# after webhook start, it should not cost more than 30 seconds +wait_for_emqx $NODE1 30 +# afer node1 is up, it should not cost more than 10 seconds +wait_for_emqx $NODE2 10 +echo + +docker exec $NODE1 emqx_ctl cluster join "emqx@$NODE2" diff --git a/scripts/run-relup-lux.sh b/scripts/run-relup-lux.sh deleted file mode 100755 index 5b859c2c0..000000000 --- a/scripts/run-relup-lux.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env bash - -## This script needs the 'lux' command in PATH -## it runs the .ci/fvt_tests/relup.lux script - -set -euo pipefail - -old_vsn="${1}" - -# ensure dir -cd -P -- "$(dirname -- "$0")/.." - -set -x - -case "$old_vsn" in - e*) - cur_vsn="$(./pkg-vsn.sh emqx-enterprise)" - profile='emqx-enterprise' - ;; - v*) - cur_vsn="$(./pkg-vsn.sh emqx)" - profile='emqx' - ;; - *) - echo "unknown old version $old_vsn" - exit 1 - ;; -esac - -old_pkg="$(pwd)/_upgrade_base/${profile}-${old_vsn#[e|v]}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz" -cur_pkg="$(pwd)/_packages/${profile}/${profile}-${cur_vsn}-otp24.2.1-1-ubuntu20.04-amd64.tar.gz" - -lux \ - --progress verbose \ - --case_timeout infinity \ - --var PROJ_ROOT="$(pwd)" \ - --var PROFILE="$profile" \ - --var VSN="$cur_vsn" \ - --var OLD_VSN="$old_vsn" \ - --var CUR_PKG="$cur_pkg" \ - --var OLD_PKG="$old_pkg" \ - .ci/fvt_tests/relup.lux