fix(lag): target only replica if rlog core+replicant

there seems to be race conditions related to some tests with sessions
hitting the core and the replicant alternately and rlog.

for intance, if there is some delay in this replication, a new
connection made to the replica with a just-created session in the core
may not have been replicated to the replicant, resulting in a test
failure if it expects the session to be present.

since such replication lags are inherent to the core-replicant
topology, we can try to target only the replicant to avoid seeing this
inconsistent view of the system during the tests.
This commit is contained in:
Thales Macedo Garitezi 2021-11-07 16:54:38 -03:00
parent c60feaaad2
commit f8fc67b313
No known key found for this signature in database
GPG Key ID: DD279F8152A9B6DD
4 changed files with 39 additions and 6 deletions

View File

@ -11,17 +11,23 @@ x-default-emqx: &default-emqx
services: services:
emqx1: emqx1:
<<: *default-emqx <<: *default-emqx
container_name: node1.emqx.io
environment: environment:
- "EMQX_HOST=node1.emqx.io" - "EMQX_HOST=node1.emqx.io"
- "EMQX_CLUSTER__DB_BACKEND=rlog" - "EMQX_CLUSTER__DB_BACKEND=rlog"
- "EMQX_CLUSTER__RLOG__ROLE=core" - "EMQX_CLUSTER__RLOG__ROLE=core"
- "EMQX_CLUSTER__STATIC__SEEDS=[emqx@node1.emqx.io]" - "EMQX_CLUSTER__STATIC__SEEDS=[emqx@node1.emqx.io]"
- "EMQX_LISTENERS__TCP__DEFAULT__PROXY_PROTOCOL=false"
- "EMQX_LISTENERS__WS__DEFAULT__PROXY_PROTOCOL=false"
emqx2: emqx2:
<<: *default-emqx <<: *default-emqx
container_name: node2.emqx.io
environment: environment:
- "EMQX_HOST=node2.emqx.io" - "EMQX_HOST=node2.emqx.io"
- "EMQX_CLUSTER__DB_BACKEND=rlog" - "EMQX_CLUSTER__DB_BACKEND=rlog"
- "EMQX_CLUSTER__RLOG__ROLE=replicant" - "EMQX_CLUSTER__RLOG__ROLE=replicant"
- "EMQX_CLUSTER__RLOG__CORE_NODES=emqx@node1.emqx.io" - "EMQX_CLUSTER__RLOG__CORE_NODES=emqx@node1.emqx.io"
- "EMQX_CLUSTER__STATIC__SEEDS=[emqx@node1.emqx.io]" - "EMQX_CLUSTER__STATIC__SEEDS=[emqx@node1.emqx.io]"
- "EMQX_LISTENERS__TCP__DEFAULT__PROXY_PROTOCOL=false"
- "EMQX_LISTENERS__WS__DEFAULT__PROXY_PROTOCOL=false"

View File

@ -6,16 +6,24 @@
set -x set -x
set +e set +e
LB="haproxy" EMQX_TEST_DB_BACKEND=$1
if [ "$EMQX_TEST_DB_BACKEND" = "rlog" ]
then
# target only replica to avoid replication races
TARGET_HOST="node2.emqx.io"
else
# use loadbalancer
TARGET_HOST="haproxy"
fi
apk update && apk add git curl apk update && apk add git curl
git clone -b develop-4.0 https://github.com/emqx/paho.mqtt.testing.git /paho.mqtt.testing git clone -b develop-4.0 https://github.com/emqx/paho.mqtt.testing.git /paho.mqtt.testing
pip install pytest pip install pytest
pytest -v /paho.mqtt.testing/interoperability/test_client/V5/test_connect.py -k test_basic --host "$LB" pytest -v /paho.mqtt.testing/interoperability/test_client/V5/test_connect.py -k test_basic --host "$TARGET_HOST"
RESULT=$? RESULT=$?
pytest -v /paho.mqtt.testing/interoperability/test_client --host "$LB" pytest -v /paho.mqtt.testing/interoperability/test_client --host "$TARGET_HOST"
RESULT=$(( RESULT + $? )) RESULT=$(( RESULT + $? ))
# pytest -v /paho.mqtt.testing/interoperability/test_cluster --host1 "node1.emqx.io" --host2 "node2.emqx.io" # pytest -v /paho.mqtt.testing/interoperability/test_cluster --host1 "node1.emqx.io" --host2 "node2.emqx.io"

View File

@ -14,11 +14,27 @@ fi
echo "EMQX_ZONES__DEFAULT__MQTT__MAX_TOPIC_ALIAS=10" echo "EMQX_ZONES__DEFAULT__MQTT__MAX_TOPIC_ALIAS=10"
} >> .ci/docker-compose-file/conf.cluster.env } >> .ci/docker-compose-file/conf.cluster.env
is_cluster_up() { is_node_up() {
docker exec -i node1.emqx.io \ local node
node="$1"
docker exec -i "$node" \
bash -c "emqx eval \"['emqx@node1.emqx.io','emqx@node2.emqx.io'] = maps:get(running_nodes, ekka_cluster:info()).\"" > /dev/null 2>&1 bash -c "emqx eval \"['emqx@node1.emqx.io','emqx@node2.emqx.io'] = maps:get(running_nodes, ekka_cluster:info()).\"" > /dev/null 2>&1
} }
is_node_listening() {
local node
node="$1"
docker exec -i "$node" \
emqx eval "ok = case gen_tcp:connect(\"localhost\", 1883, []) of {ok, P} -> gen_tcp:close(P), ok; _ -> exit(1) end." > /dev/null 2>&1
}
is_cluster_up() {
is_node_up node1.emqx.io && \
is_node_up node2.emqx.io && \
is_node_listening node1.emqx.io && \
is_node_listening node2.emqx.io
}
docker-compose \ docker-compose \
-f .ci/docker-compose-file/docker-compose-emqx-cluster.yaml \ -f .ci/docker-compose-file/docker-compose-emqx-cluster.yaml \
$CLUSTER_OVERRIDES \ $CLUSTER_OVERRIDES \

View File

@ -99,10 +99,13 @@ jobs:
./.ci/docker-compose-file/scripts/run-emqx.sh ./.ci/docker-compose-file/scripts/run-emqx.sh
- name: make paho tests - name: make paho tests
run: | run: |
if ! docker exec -i python /scripts/pytest.sh; then if ! docker exec -i python /scripts/pytest.sh "${{ matrix.cluster_db_backend }}"; then
echo "DUMP_CONTAINER_LOGS_BGN" echo "DUMP_CONTAINER_LOGS_BGN"
echo "============== haproxy =============="
docker logs haproxy docker logs haproxy
echo "============== node1 =============="
docker logs node1.emqx.io docker logs node1.emqx.io
echo "============== node2 =============="
docker logs node2.emqx.io docker logs node2.emqx.io
echo "DUMP_CONTAINER_LOGS_END" echo "DUMP_CONTAINER_LOGS_END"
exit 1 exit 1