From f8fc67b313a10ee5eda16be2d97b67ac94cf7cb7 Mon Sep 17 00:00:00 2001 From: Thales Macedo Garitezi Date: Sun, 7 Nov 2021 16:54:38 -0300 Subject: [PATCH] fix(lag): target only replica if rlog core+replicant there seems to be race conditions related to some tests with sessions hitting the core and the replicant alternately and rlog. for intance, if there is some delay in this replication, a new connection made to the replica with a just-created session in the core may not have been replicated to the replicant, resulting in a test failure if it expects the session to be present. since such replication lags are inherent to the core-replicant topology, we can try to target only the replicant to avoid seeing this inconsistent view of the system during the tests. --- ...er-compose-emqx-cluster-rlog.override.yaml | 6 ++++++ .ci/docker-compose-file/python/pytest.sh | 14 ++++++++++--- .ci/docker-compose-file/scripts/run-emqx.sh | 20 +++++++++++++++++-- .github/workflows/run_fvt_tests.yaml | 5 ++++- 4 files changed, 39 insertions(+), 6 deletions(-) diff --git a/.ci/docker-compose-file/docker-compose-emqx-cluster-rlog.override.yaml b/.ci/docker-compose-file/docker-compose-emqx-cluster-rlog.override.yaml index 3d8b86dd3..8be146eb5 100644 --- a/.ci/docker-compose-file/docker-compose-emqx-cluster-rlog.override.yaml +++ b/.ci/docker-compose-file/docker-compose-emqx-cluster-rlog.override.yaml @@ -11,17 +11,23 @@ x-default-emqx: &default-emqx services: emqx1: <<: *default-emqx + container_name: node1.emqx.io environment: - "EMQX_HOST=node1.emqx.io" - "EMQX_CLUSTER__DB_BACKEND=rlog" - "EMQX_CLUSTER__RLOG__ROLE=core" - "EMQX_CLUSTER__STATIC__SEEDS=[emqx@node1.emqx.io]" + - "EMQX_LISTENERS__TCP__DEFAULT__PROXY_PROTOCOL=false" + - "EMQX_LISTENERS__WS__DEFAULT__PROXY_PROTOCOL=false" emqx2: <<: *default-emqx + container_name: node2.emqx.io environment: - "EMQX_HOST=node2.emqx.io" - "EMQX_CLUSTER__DB_BACKEND=rlog" - "EMQX_CLUSTER__RLOG__ROLE=replicant" - "EMQX_CLUSTER__RLOG__CORE_NODES=emqx@node1.emqx.io" - "EMQX_CLUSTER__STATIC__SEEDS=[emqx@node1.emqx.io]" + - "EMQX_LISTENERS__TCP__DEFAULT__PROXY_PROTOCOL=false" + - "EMQX_LISTENERS__WS__DEFAULT__PROXY_PROTOCOL=false" diff --git a/.ci/docker-compose-file/python/pytest.sh b/.ci/docker-compose-file/python/pytest.sh index 75f6441b5..4579691b3 100755 --- a/.ci/docker-compose-file/python/pytest.sh +++ b/.ci/docker-compose-file/python/pytest.sh @@ -6,16 +6,24 @@ set -x set +e -LB="haproxy" +EMQX_TEST_DB_BACKEND=$1 +if [ "$EMQX_TEST_DB_BACKEND" = "rlog" ] +then + # target only replica to avoid replication races + TARGET_HOST="node2.emqx.io" +else + # use loadbalancer + TARGET_HOST="haproxy" +fi apk update && apk add git curl git clone -b develop-4.0 https://github.com/emqx/paho.mqtt.testing.git /paho.mqtt.testing pip install pytest -pytest -v /paho.mqtt.testing/interoperability/test_client/V5/test_connect.py -k test_basic --host "$LB" +pytest -v /paho.mqtt.testing/interoperability/test_client/V5/test_connect.py -k test_basic --host "$TARGET_HOST" RESULT=$? -pytest -v /paho.mqtt.testing/interoperability/test_client --host "$LB" +pytest -v /paho.mqtt.testing/interoperability/test_client --host "$TARGET_HOST" RESULT=$(( RESULT + $? )) # pytest -v /paho.mqtt.testing/interoperability/test_cluster --host1 "node1.emqx.io" --host2 "node2.emqx.io" diff --git a/.ci/docker-compose-file/scripts/run-emqx.sh b/.ci/docker-compose-file/scripts/run-emqx.sh index ebb07b8b6..1465cb655 100755 --- a/.ci/docker-compose-file/scripts/run-emqx.sh +++ b/.ci/docker-compose-file/scripts/run-emqx.sh @@ -14,11 +14,27 @@ fi echo "EMQX_ZONES__DEFAULT__MQTT__MAX_TOPIC_ALIAS=10" } >> .ci/docker-compose-file/conf.cluster.env -is_cluster_up() { - docker exec -i node1.emqx.io \ +is_node_up() { + local node + node="$1" + docker exec -i "$node" \ bash -c "emqx eval \"['emqx@node1.emqx.io','emqx@node2.emqx.io'] = maps:get(running_nodes, ekka_cluster:info()).\"" > /dev/null 2>&1 } +is_node_listening() { + local node + node="$1" + docker exec -i "$node" \ + emqx eval "ok = case gen_tcp:connect(\"localhost\", 1883, []) of {ok, P} -> gen_tcp:close(P), ok; _ -> exit(1) end." > /dev/null 2>&1 +} + +is_cluster_up() { + is_node_up node1.emqx.io && \ + is_node_up node2.emqx.io && \ + is_node_listening node1.emqx.io && \ + is_node_listening node2.emqx.io +} + docker-compose \ -f .ci/docker-compose-file/docker-compose-emqx-cluster.yaml \ $CLUSTER_OVERRIDES \ diff --git a/.github/workflows/run_fvt_tests.yaml b/.github/workflows/run_fvt_tests.yaml index e696ade29..46ce95dab 100644 --- a/.github/workflows/run_fvt_tests.yaml +++ b/.github/workflows/run_fvt_tests.yaml @@ -99,10 +99,13 @@ jobs: ./.ci/docker-compose-file/scripts/run-emqx.sh - name: make paho tests run: | - if ! docker exec -i python /scripts/pytest.sh; then + if ! docker exec -i python /scripts/pytest.sh "${{ matrix.cluster_db_backend }}"; then echo "DUMP_CONTAINER_LOGS_BGN" + echo "============== haproxy ==============" docker logs haproxy + echo "============== node1 ==============" docker logs node1.emqx.io + echo "============== node2 ==============" docker logs node2.emqx.io echo "DUMP_CONTAINER_LOGS_END" exit 1